@staticn0va/wigolo 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +146 -227
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +238 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +47 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +364 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +344 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/shutdown.d.ts +2 -0
- package/dist/cli/shutdown.d.ts.map +1 -0
- package/dist/cli/shutdown.js +26 -0
- package/dist/cli/shutdown.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +34 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/error-describe.d.ts +7 -0
- package/dist/fetch/error-describe.d.ts.map +1 -0
- package/dist/fetch/error-describe.js +37 -0
- package/dist/fetch/error-describe.js.map +1 -0
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +187 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +102 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
- package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/model-select.js +32 -0
- package/dist/integrations/cloud/llm/model-select.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/run.d.ts +27 -0
- package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/run.js +99 -0
- package/dist/integrations/cloud/llm/run.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.js +103 -0
- package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +17 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +41 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +6 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +246 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +19 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +62 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.d.ts.map +1 -1
- package/dist/search/filters.js +58 -54
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +94 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +177 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +174 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
package/dist/crawl/mapper.js
CHANGED
|
@@ -1,178 +1,150 @@
|
|
|
1
|
-
import { parseHTML } from
|
|
2
|
-
import { matchesPatterns } from
|
|
3
|
-
import { parseSitemap, parseSitemapIndex, extractSitemapUrlFromRobots } from
|
|
4
|
-
import { createLogger } from
|
|
5
|
-
const log = createLogger(
|
|
6
|
-
const IGNORED_PROTOCOLS = [
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const resolved = new URL(trimmed, origin);
|
|
31
|
-
// Same-origin check
|
|
32
|
-
if (resolved.origin !== parsedOrigin)
|
|
33
|
-
continue;
|
|
34
|
-
// Strip fragment, keep path + query
|
|
35
|
-
resolved.hash = '';
|
|
36
|
-
const normalized = resolved.href;
|
|
37
|
-
if (!seen.has(normalized)) {
|
|
38
|
-
seen.add(normalized);
|
|
39
|
-
links.push(normalized);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
catch {
|
|
43
|
-
log.debug('Failed to resolve URL', { href: trimmed, origin });
|
|
44
|
-
}
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { matchesPatterns } from "./url-utils.js";
|
|
3
|
+
import { parseSitemap, parseSitemapIndex, extractSitemapUrlFromRobots } from "./sitemap.js";
|
|
4
|
+
import { createLogger } from "../logger.js";
|
|
5
|
+
const log = createLogger("crawl");
|
|
6
|
+
const IGNORED_PROTOCOLS = ["javascript:", "mailto:", "tel:", "data:", "blob:", "ftp:"];
|
|
7
|
+
function extractLinks(html, origin) {
|
|
8
|
+
if (!html || !html.trim()) return [];
|
|
9
|
+
try {
|
|
10
|
+
const { document: doc } = parseHTML(html);
|
|
11
|
+
const anchors = doc.querySelectorAll("a[href]");
|
|
12
|
+
const seen = /* @__PURE__ */ new Set();
|
|
13
|
+
const links = [];
|
|
14
|
+
const parsedOrigin = new URL(origin).origin;
|
|
15
|
+
for (const anchor of Array.from(anchors)) {
|
|
16
|
+
const href = anchor.getAttribute("href");
|
|
17
|
+
if (!href) continue;
|
|
18
|
+
const trimmed = href.trim();
|
|
19
|
+
if (!trimmed) continue;
|
|
20
|
+
if (trimmed.startsWith("#")) continue;
|
|
21
|
+
if (IGNORED_PROTOCOLS.some((p) => trimmed.toLowerCase().startsWith(p))) continue;
|
|
22
|
+
try {
|
|
23
|
+
const resolved = new URL(trimmed, origin);
|
|
24
|
+
if (resolved.origin !== parsedOrigin) continue;
|
|
25
|
+
resolved.hash = "";
|
|
26
|
+
const normalized = resolved.href;
|
|
27
|
+
if (!seen.has(normalized)) {
|
|
28
|
+
seen.add(normalized);
|
|
29
|
+
links.push(normalized);
|
|
45
30
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
log.debug('Failed to parse HTML for link extraction', { error: String(err) });
|
|
50
|
-
return [];
|
|
31
|
+
} catch {
|
|
32
|
+
log.debug("Failed to resolve URL", { href: trimmed, origin });
|
|
33
|
+
}
|
|
51
34
|
}
|
|
35
|
+
return links;
|
|
36
|
+
} catch (err) {
|
|
37
|
+
log.debug("Failed to parse HTML for link extraction", { error: String(err) });
|
|
38
|
+
return [];
|
|
39
|
+
}
|
|
52
40
|
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if (!queued.has(sitemapUrl)) {
|
|
84
|
-
queued.add(sitemapUrl);
|
|
85
|
-
queue.push({ url: sitemapUrl, depth: 0 });
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
41
|
+
async function mapUrls(input, fetchFn) {
|
|
42
|
+
const maxDepth = input.max_depth ?? 3;
|
|
43
|
+
const maxPages = input.max_pages ?? 200;
|
|
44
|
+
let origin;
|
|
45
|
+
try {
|
|
46
|
+
origin = new URL(input.url).origin;
|
|
47
|
+
} catch (err) {
|
|
48
|
+
return {
|
|
49
|
+
urls: [],
|
|
50
|
+
total_found: 0,
|
|
51
|
+
sitemap_found: false,
|
|
52
|
+
error: `Invalid seed URL: ${String(err)}`
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
const discovered = /* @__PURE__ */ new Set([input.url]);
|
|
56
|
+
const queued = /* @__PURE__ */ new Set([input.url]);
|
|
57
|
+
const queue = [{ url: input.url, depth: 0 }];
|
|
58
|
+
let sitemapFound = false;
|
|
59
|
+
try {
|
|
60
|
+
const sitemapUrls = await discoverSitemapUrls(origin, fetchFn);
|
|
61
|
+
if (sitemapUrls.length > 0) {
|
|
62
|
+
sitemapFound = true;
|
|
63
|
+
for (const sitemapUrl of sitemapUrls) {
|
|
64
|
+
if (discovered.size >= maxPages) break;
|
|
65
|
+
if (matchesPatterns(sitemapUrl, input.include_patterns, input.exclude_patterns)) {
|
|
66
|
+
discovered.add(sitemapUrl);
|
|
67
|
+
if (!queued.has(sitemapUrl)) {
|
|
68
|
+
queued.add(sitemapUrl);
|
|
69
|
+
queue.push({ url: sitemapUrl, depth: 0 });
|
|
70
|
+
}
|
|
89
71
|
}
|
|
72
|
+
}
|
|
90
73
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
continue;
|
|
110
|
-
discovered.add(link);
|
|
111
|
-
// Only queue for further traversal if we haven't hit max depth
|
|
112
|
-
if (current.depth + 1 <= maxDepth && !queued.has(link)) {
|
|
113
|
-
queued.add(link);
|
|
114
|
-
queue.push({ url: link, depth: current.depth + 1 });
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
catch (err) {
|
|
119
|
-
if (current.url === input.url && current.depth === 0) {
|
|
120
|
-
seedError = String(err);
|
|
121
|
-
log.warn('Seed URL fetch failed during map', { url: current.url, error: String(err) });
|
|
122
|
-
}
|
|
123
|
-
else {
|
|
124
|
-
log.debug('Child page fetch failed during map, skipping', { url: current.url, error: String(err) });
|
|
125
|
-
}
|
|
74
|
+
} catch (err) {
|
|
75
|
+
log.debug("Sitemap discovery failed, continuing with BFS only", { error: String(err) });
|
|
76
|
+
}
|
|
77
|
+
let seedError;
|
|
78
|
+
while (queue.length > 0 && discovered.size < maxPages) {
|
|
79
|
+
const current = queue.shift();
|
|
80
|
+
if (current.depth > maxDepth) continue;
|
|
81
|
+
try {
|
|
82
|
+
const { html } = await fetchFn(current.url);
|
|
83
|
+
const links = extractLinks(html, origin);
|
|
84
|
+
for (const link of links) {
|
|
85
|
+
if (discovered.size >= maxPages) break;
|
|
86
|
+
if (discovered.has(link)) continue;
|
|
87
|
+
if (!matchesPatterns(link, input.include_patterns, input.exclude_patterns)) continue;
|
|
88
|
+
discovered.add(link);
|
|
89
|
+
if (current.depth + 1 <= maxDepth && !queued.has(link)) {
|
|
90
|
+
queued.add(link);
|
|
91
|
+
queue.push({ url: link, depth: current.depth + 1 });
|
|
126
92
|
}
|
|
93
|
+
}
|
|
94
|
+
} catch (err) {
|
|
95
|
+
if (current.url === input.url && current.depth === 0) {
|
|
96
|
+
seedError = String(err);
|
|
97
|
+
log.warn("Seed URL fetch failed during map", { url: current.url, error: String(err) });
|
|
98
|
+
} else {
|
|
99
|
+
log.debug("Child page fetch failed during map, skipping", { url: current.url, error: String(err) });
|
|
100
|
+
}
|
|
127
101
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
}
|
|
102
|
+
}
|
|
103
|
+
const urls = Array.from(discovered);
|
|
104
|
+
return {
|
|
105
|
+
urls,
|
|
106
|
+
total_found: urls.length,
|
|
107
|
+
sitemap_found: sitemapFound,
|
|
108
|
+
...seedError !== void 0 ? { error: seedError } : {}
|
|
109
|
+
};
|
|
135
110
|
}
|
|
136
111
|
async function discoverSitemapUrls(origin, fetchFn) {
|
|
137
|
-
|
|
138
|
-
|
|
112
|
+
const sitemapLocations = [];
|
|
113
|
+
try {
|
|
114
|
+
const { html: robotsTxt } = await fetchFn(`${origin}/robots.txt`);
|
|
115
|
+
const fromRobots = extractSitemapUrlFromRobots(robotsTxt);
|
|
116
|
+
sitemapLocations.push(...fromRobots);
|
|
117
|
+
} catch {
|
|
118
|
+
log.debug("robots.txt not found, trying default sitemap location");
|
|
119
|
+
}
|
|
120
|
+
if (sitemapLocations.length === 0) {
|
|
121
|
+
sitemapLocations.push(`${origin}/sitemap.xml`);
|
|
122
|
+
}
|
|
123
|
+
const allUrls = [];
|
|
124
|
+
for (const sitemapUrl of sitemapLocations) {
|
|
139
125
|
try {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}
|
|
151
|
-
const allUrls = [];
|
|
152
|
-
for (const sitemapUrl of sitemapLocations) {
|
|
153
|
-
try {
|
|
154
|
-
const { html: sitemapXml } = await fetchFn(sitemapUrl);
|
|
155
|
-
// Check if it's a sitemap index (contains sub-sitemaps)
|
|
156
|
-
const indexUrls = parseSitemapIndex(sitemapXml);
|
|
157
|
-
if (indexUrls.length > 0) {
|
|
158
|
-
for (const subUrl of indexUrls) {
|
|
159
|
-
try {
|
|
160
|
-
const { html: subXml } = await fetchFn(subUrl);
|
|
161
|
-
allUrls.push(...parseSitemap(subXml));
|
|
162
|
-
}
|
|
163
|
-
catch {
|
|
164
|
-
log.debug('Failed to fetch sub-sitemap', { url: subUrl });
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
else {
|
|
169
|
-
allUrls.push(...parseSitemap(sitemapXml));
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
catch {
|
|
173
|
-
log.debug('Sitemap fetch failed', { url: sitemapUrl });
|
|
126
|
+
const { html: sitemapXml } = await fetchFn(sitemapUrl);
|
|
127
|
+
const indexUrls = parseSitemapIndex(sitemapXml);
|
|
128
|
+
if (indexUrls.length > 0) {
|
|
129
|
+
for (const subUrl of indexUrls) {
|
|
130
|
+
try {
|
|
131
|
+
const { html: subXml } = await fetchFn(subUrl);
|
|
132
|
+
allUrls.push(...parseSitemap(subXml));
|
|
133
|
+
} catch {
|
|
134
|
+
log.debug("Failed to fetch sub-sitemap", { url: subUrl });
|
|
135
|
+
}
|
|
174
136
|
}
|
|
137
|
+
} else {
|
|
138
|
+
allUrls.push(...parseSitemap(sitemapXml));
|
|
139
|
+
}
|
|
140
|
+
} catch {
|
|
141
|
+
log.debug("Sitemap fetch failed", { url: sitemapUrl });
|
|
175
142
|
}
|
|
176
|
-
|
|
143
|
+
}
|
|
144
|
+
return allUrls;
|
|
177
145
|
}
|
|
146
|
+
export {
|
|
147
|
+
extractLinks,
|
|
148
|
+
mapUrls
|
|
149
|
+
};
|
|
178
150
|
//# sourceMappingURL=mapper.js.map
|
package/dist/crawl/mapper.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mapper.js","sourceRoot":"","sources":["../../src/crawl/mapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,2BAA2B,EAAE,MAAM,cAAc,CAAC;AAC5F,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG5C,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;AAYlC,MAAM,iBAAiB,GAAG,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;AAEvF,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,MAAc;IACvD,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAErC,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,GAAG,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAE5C,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,IAAI;gBAAE,SAAS;YAEpB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEvB,2BAA2B;YAC3B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YAEtC,0BAA0B;YAC1B,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBAAE,SAAS;YAEjF,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;gBAE1C,oBAAoB;gBACpB,IAAI,QAAQ,CAAC,MAAM,KAAK,YAAY;oBAAE,SAAS;gBAE/C,oCAAoC;gBACpC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC;gBACnB,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC;gBAEjC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC1B,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBACrB,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,KAAK,CAAC,uBAAuB,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC9E,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,KAAe,EAAE,OAAqB;IAClE,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,IAAI,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,IAAI,GAAG,CAAC;IAExC,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,IAAI,EAAE,EAAE;YACR,WAAW,EAAE,CAAC;YACd,aAAa,EAAE,KAAK;YACpB,KAAK,EAAE,qBAAqB,MAAM,CAAC,GAAG,CAAC,EAAE;SAC1C,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,KAAK,GAA0C,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IACpF,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,iEAAiE;IACjE,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/D,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,YAAY,GAAG,IAAI,CAAC;YACpB,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACrC,IAAI,UAAU,CAAC,IAAI,IAAI,QAAQ;oBAAE,MAAM;gBACvC,IAAI,eAAe,CAAC,UAAU,EAAE,KAAK,CAAC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC;oBAChF,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBAC3B,wEAAwE;oBACxE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;wBAC5B,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;wBACvB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;oBAC5C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,oDAAoD,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED,8BAA8B;IAC9B,IAAI,SAA6B,CAAC;IAElC,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,IAAI,GAAG,QAAQ,EAAE,CAAC;QACtD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QAE/B,IAAI,OAAO,CAAC,KAAK,GAAG,QAAQ;YAAE,SAAS;QAEvC,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,IAAI,UAAU,CAAC,IAAI,IAAI,QAAQ;oBAAE,MAAM;gBACvC,IAAI,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;oBAAE,SAAS;gBACnC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB,CAAC;oBAAE,SAAS;gBAErF,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBAErB,+DAA+D;gBAC/D,IAAI,OAAO,CAAC,KAAK,GAAG,CAAC,IAAI,QAAQ,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACvD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACjB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,OAAO,CAAC,GAAG,KAAK,KAAK,CAAC,GAAG,IAAI,OAAO,CAAC,KAAK,KAAK,CAAC,EAAE,CAAC;gBACrD,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;gBACxB,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACzF,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,KAAK,CAAC,8CAA8C,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACtG,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAEpC,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,IAAI,CAAC,MAAM;QACxB,aAAa,EAAE,YAAY;QAC3B,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACzD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,MAAc,EAAE,OAAqB;IACtE,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,8CAA8C;IAC9C,IAAI,CAAC;QACH,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,MAAM,OAAO,CAAC,GAAG,MAAM,aAAa,CAAC,CAAC;QAClE,MAAM,UAAU,GAAG,2BAA2B,CAAC,SAAS,CAAC,CAAC;QAC1D,gBAAgB,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,GAAG,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;IACrE,CAAC;IAED,mCAAmC;IACnC,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClC,gBAAgB,CAAC,IAAI,CAAC,GAAG,MAAM,cAAc,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,UAAU,IAAI,gBAAgB,EAAE,CAAC;QAC1C,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC;YAEvD,wDAAwD;YACxD,MAAM,SAAS,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;YAChD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,KAAK,MAAM,MAAM,IAAI,SAAS,EAAE,CAAC;oBAC/B,IAAI,CAAC;wBACH,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;wBAC/C,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;oBACxC,CAAC;oBAAC,MAAM,CAAC;wBACP,GAAG,CAAC,KAAK,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC5D,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,GAAG,CAAC,KAAK,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../../src/crawl/mapper.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { matchesPatterns } from './url-utils.js';\nimport { parseSitemap, parseSitemapIndex, extractSitemapUrlFromRobots } from './sitemap.js';\nimport { createLogger } from '../logger.js';\nimport type { MapOutput } from '../types.js';\n\nconst log = createLogger('crawl');\n\ninterface MapInput {\n url: string;\n max_depth?: number;\n max_pages?: number;\n include_patterns?: string[];\n exclude_patterns?: string[];\n}\n\nexport type LightFetchFn = (url: string) => Promise<{ html: string; finalUrl: string; statusCode: number }>;\n\nconst IGNORED_PROTOCOLS = ['javascript:', 'mailto:', 'tel:', 'data:', 'blob:', 'ftp:'];\n\nexport function extractLinks(html: string, origin: string): string[] {\n if (!html || !html.trim()) return [];\n\n try {\n const { document: doc } = parseHTML(html);\n const anchors = doc.querySelectorAll('a[href]');\n const seen = new Set<string>();\n const links: string[] = [];\n const parsedOrigin = new URL(origin).origin;\n\n for (const anchor of Array.from(anchors)) {\n const href = anchor.getAttribute('href');\n if (!href) continue;\n\n const trimmed = href.trim();\n if (!trimmed) continue;\n\n // Skip fragment-only links\n if (trimmed.startsWith('#')) continue;\n\n // Skip non-http protocols\n if (IGNORED_PROTOCOLS.some((p) => trimmed.toLowerCase().startsWith(p))) continue;\n\n try {\n const resolved = new URL(trimmed, origin);\n\n // Same-origin check\n if (resolved.origin !== parsedOrigin) continue;\n\n // Strip fragment, keep path + query\n resolved.hash = '';\n const normalized = resolved.href;\n\n if (!seen.has(normalized)) {\n seen.add(normalized);\n links.push(normalized);\n }\n } catch {\n log.debug('Failed to resolve URL', { href: trimmed, origin });\n }\n }\n\n return links;\n } catch (err) {\n log.debug('Failed to parse HTML for link extraction', { error: String(err) });\n return [];\n }\n}\n\nexport async function mapUrls(input: MapInput, fetchFn: LightFetchFn): Promise<MapOutput> {\n const maxDepth = input.max_depth ?? 3;\n const maxPages = input.max_pages ?? 200;\n\n let origin: string;\n try {\n origin = new URL(input.url).origin;\n } catch (err) {\n return {\n urls: [],\n total_found: 0,\n sitemap_found: false,\n error: `Invalid seed URL: ${String(err)}`,\n };\n }\n\n const discovered = new Set<string>([input.url]);\n const queued = new Set<string>([input.url]);\n const queue: Array<{ url: string; depth: number }> = [{ url: input.url, depth: 0 }];\n let sitemapFound = false;\n\n // Phase 1: Sitemap discovery (best-effort, errors are non-fatal)\n try {\n const sitemapUrls = await discoverSitemapUrls(origin, fetchFn);\n if (sitemapUrls.length > 0) {\n sitemapFound = true;\n for (const sitemapUrl of sitemapUrls) {\n if (discovered.size >= maxPages) break;\n if (matchesPatterns(sitemapUrl, input.include_patterns, input.exclude_patterns)) {\n discovered.add(sitemapUrl);\n // Also queue sitemap URLs for BFS traversal so their links are explored\n if (!queued.has(sitemapUrl)) {\n queued.add(sitemapUrl);\n queue.push({ url: sitemapUrl, depth: 0 });\n }\n }\n }\n }\n } catch (err) {\n log.debug('Sitemap discovery failed, continuing with BFS only', { error: String(err) });\n }\n\n // Phase 2: BFS link traversal\n let seedError: string | undefined;\n\n while (queue.length > 0 && discovered.size < maxPages) {\n const current = queue.shift()!;\n\n if (current.depth > maxDepth) continue;\n\n try {\n const { html } = await fetchFn(current.url);\n const links = extractLinks(html, origin);\n\n for (const link of links) {\n if (discovered.size >= maxPages) break;\n if (discovered.has(link)) continue;\n if (!matchesPatterns(link, input.include_patterns, input.exclude_patterns)) continue;\n\n discovered.add(link);\n\n // Only queue for further traversal if we haven't hit max depth\n if (current.depth + 1 <= maxDepth && !queued.has(link)) {\n queued.add(link);\n queue.push({ url: link, depth: current.depth + 1 });\n }\n }\n } catch (err) {\n if (current.url === input.url && current.depth === 0) {\n seedError = String(err);\n log.warn('Seed URL fetch failed during map', { url: current.url, error: String(err) });\n } else {\n log.debug('Child page fetch failed during map, skipping', { url: current.url, error: String(err) });\n }\n }\n }\n\n const urls = Array.from(discovered);\n\n return {\n urls,\n total_found: urls.length,\n sitemap_found: sitemapFound,\n ...(seedError !== undefined ? { error: seedError } : {}),\n };\n}\n\nasync function discoverSitemapUrls(origin: string, fetchFn: LightFetchFn): Promise<string[]> {\n const sitemapLocations: string[] = [];\n\n // Try robots.txt first for Sitemap directives\n try {\n const { html: robotsTxt } = await fetchFn(`${origin}/robots.txt`);\n const fromRobots = extractSitemapUrlFromRobots(robotsTxt);\n sitemapLocations.push(...fromRobots);\n } catch {\n log.debug('robots.txt not found, trying default sitemap location');\n }\n\n // Fallback to default /sitemap.xml\n if (sitemapLocations.length === 0) {\n sitemapLocations.push(`${origin}/sitemap.xml`);\n }\n\n const allUrls: string[] = [];\n\n for (const sitemapUrl of sitemapLocations) {\n try {\n const { html: sitemapXml } = await fetchFn(sitemapUrl);\n\n // Check if it's a sitemap index (contains sub-sitemaps)\n const indexUrls = parseSitemapIndex(sitemapXml);\n if (indexUrls.length > 0) {\n for (const subUrl of indexUrls) {\n try {\n const { html: subXml } = await fetchFn(subUrl);\n allUrls.push(...parseSitemap(subXml));\n } catch {\n log.debug('Failed to fetch sub-sitemap', { url: subUrl });\n }\n }\n } else {\n allUrls.push(...parseSitemap(sitemapXml));\n }\n } catch {\n log.debug('Sitemap fetch failed', { url: sitemapUrl });\n }\n }\n\n return allUrls;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,uBAAuB;AAChC,SAAS,cAAc,mBAAmB,mCAAmC;AAC7E,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,OAAO;AAYhC,MAAM,oBAAoB,CAAC,eAAe,WAAW,QAAQ,SAAS,SAAS,MAAM;AAE9E,SAAS,aAAa,MAAc,QAA0B;AACnE,MAAI,CAAC,QAAQ,CAAC,KAAK,KAAK,EAAG,QAAO,CAAC;AAEnC,MAAI;AACF,UAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,UAAM,UAAU,IAAI,iBAAiB,SAAS;AAC9C,UAAM,OAAO,oBAAI,IAAY;AAC7B,UAAM,QAAkB,CAAC;AACzB,UAAM,eAAe,IAAI,IAAI,MAAM,EAAE;AAErC,eAAW,UAAU,MAAM,KAAK,OAAO,GAAG;AACxC,YAAM,OAAO,OAAO,aAAa,MAAM;AACvC,UAAI,CAAC,KAAM;AAEX,YAAM,UAAU,KAAK,KAAK;AAC1B,UAAI,CAAC,QAAS;AAGd,UAAI,QAAQ,WAAW,GAAG,EAAG;AAG7B,UAAI,kBAAkB,KAAK,CAAC,MAAM,QAAQ,YAAY,EAAE,WAAW,CAAC,CAAC,EAAG;AAExE,UAAI;AACF,cAAM,WAAW,IAAI,IAAI,SAAS,MAAM;AAGxC,YAAI,SAAS,WAAW,aAAc;AAGtC,iBAAS,OAAO;AAChB,cAAM,aAAa,SAAS;AAE5B,YAAI,CAAC,KAAK,IAAI,UAAU,GAAG;AACzB,eAAK,IAAI,UAAU;AACnB,gBAAM,KAAK,UAAU;AAAA,QACvB;AAAA,MACF,QAAQ;AACN,YAAI,MAAM,yBAAyB,EAAE,MAAM,SAAS,OAAO,CAAC;AAAA,MAC9D;AAAA,IACF;AAEA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,4CAA4C,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAC5E,WAAO,CAAC;AAAA,EACV;AACF;AAEA,eAAsB,QAAQ,OAAiB,SAA2C;AACxF,QAAM,WAAW,MAAM,aAAa;AACpC,QAAM,WAAW,MAAM,aAAa;AAEpC,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,MAAM,GAAG,EAAE;AAAA,EAC9B,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,MAAM,CAAC;AAAA,MACP,aAAa;AAAA,MACb,eAAe;AAAA,MACf,OAAO,qBAAqB,OAAO,GAAG,CAAC;AAAA,IACzC;AAAA,EACF;AAEA,QAAM,aAAa,oBAAI,IAAY,CAAC,MAAM,GAAG,CAAC;AAC9C,QAAM,SAAS,oBAAI,IAAY,CAAC,MAAM,GAAG,CAAC;AAC1C,QAAM,QAA+C,CAAC,EAAE,KAAK,MAAM,KAAK,OAAO,EAAE,CAAC;AAClF,MAAI,eAAe;AAGnB,MAAI;AACF,UAAM,cAAc,MAAM,oBAAoB,QAAQ,OAAO;AAC7D,QAAI,YAAY,SAAS,GAAG;AAC1B,qBAAe;AACf,iBAAW,cAAc,aAAa;AACpC,YAAI,WAAW,QAAQ,SAAU;AACjC,YAAI,gBAAgB,YAAY,MAAM,kBAAkB,MAAM,gBAAgB,GAAG;AAC/E,qBAAW,IAAI,UAAU;AAEzB,cAAI,CAAC,OAAO,IAAI,UAAU,GAAG;AAC3B,mBAAO,IAAI,UAAU;AACrB,kBAAM,KAAK,EAAE,KAAK,YAAY,OAAO,EAAE,CAAC;AAAA,UAC1C;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,sDAAsD,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,EACxF;AAGA,MAAI;AAEJ,SAAO,MAAM,SAAS,KAAK,WAAW,OAAO,UAAU;AACrD,UAAM,UAAU,MAAM,MAAM;AAE5B,QAAI,QAAQ,QAAQ,SAAU;AAE9B,QAAI;AACF,YAAM,EAAE,KAAK,IAAI,MAAM,QAAQ,QAAQ,GAAG;AAC1C,YAAM,QAAQ,aAAa,MAAM,MAAM;AAEvC,iBAAW,QAAQ,OAAO;AACxB,YAAI,WAAW,QAAQ,SAAU;AACjC,YAAI,WAAW,IAAI,IAAI,EAAG;AAC1B,YAAI,CAAC,gBAAgB,MAAM,MAAM,kBAAkB,MAAM,gBAAgB,EAAG;AAE5E,mBAAW,IAAI,IAAI;AAGnB,YAAI,QAAQ,QAAQ,KAAK,YAAY,CAAC,OAAO,IAAI,IAAI,GAAG;AACtD,iBAAO,IAAI,IAAI;AACf,gBAAM,KAAK,EAAE,KAAK,MAAM,OAAO,QAAQ,QAAQ,EAAE,CAAC;AAAA,QACpD;AAAA,MACF;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,QAAQ,QAAQ,MAAM,OAAO,QAAQ,UAAU,GAAG;AACpD,oBAAY,OAAO,GAAG;AACtB,YAAI,KAAK,oCAAoC,EAAE,KAAK,QAAQ,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MACvF,OAAO;AACL,YAAI,MAAM,gDAAgD,EAAE,KAAK,QAAQ,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MACpG;AAAA,IACF;AAAA,EACF;AAEA,QAAM,OAAO,MAAM,KAAK,UAAU;AAElC,SAAO;AAAA,IACL;AAAA,IACA,aAAa,KAAK;AAAA,IAClB,eAAe;AAAA,IACf,GAAI,cAAc,SAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,EACxD;AACF;AAEA,eAAe,oBAAoB,QAAgB,SAA0C;AAC3F,QAAM,mBAA6B,CAAC;AAGpC,MAAI;AACF,UAAM,EAAE,MAAM,UAAU,IAAI,MAAM,QAAQ,GAAG,MAAM,aAAa;AAChE,UAAM,aAAa,4BAA4B,SAAS;AACxD,qBAAiB,KAAK,GAAG,UAAU;AAAA,EACrC,QAAQ;AACN,QAAI,MAAM,uDAAuD;AAAA,EACnE;AAGA,MAAI,iBAAiB,WAAW,GAAG;AACjC,qBAAiB,KAAK,GAAG,MAAM,cAAc;AAAA,EAC/C;AAEA,QAAM,UAAoB,CAAC;AAE3B,aAAW,cAAc,kBAAkB;AACzC,QAAI;AACF,YAAM,EAAE,MAAM,WAAW,IAAI,MAAM,QAAQ,UAAU;AAGrD,YAAM,YAAY,kBAAkB,UAAU;AAC9C,UAAI,UAAU,SAAS,GAAG;AACxB,mBAAW,UAAU,WAAW;AAC9B,cAAI;AACF,kBAAM,EAAE,MAAM,OAAO,IAAI,MAAM,QAAQ,MAAM;AAC7C,oBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAAA,UACtC,QAAQ;AACN,gBAAI,MAAM,+BAA+B,EAAE,KAAK,OAAO,CAAC;AAAA,UAC1D;AAAA,QACF;AAAA,MACF,OAAO;AACL,gBAAQ,KAAK,GAAG,aAAa,UAAU,CAAC;AAAA,MAC1C;AAAA,IACF,QAAQ;AACN,UAAI,MAAM,wBAAwB,EAAE,KAAK,WAAW,CAAC;AAAA,IACvD;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -1,72 +1,69 @@
|
|
|
1
|
-
import { isPrivateUrl } from
|
|
2
|
-
import { getConfig } from
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import { isPrivateUrl } from "./url-utils.js";
|
|
2
|
+
import { getConfig } from "../config.js";
|
|
3
|
+
class RateLimiter {
|
|
4
|
+
domains = /* @__PURE__ */ new Map();
|
|
5
|
+
robotsDelays = /* @__PURE__ */ new Map();
|
|
6
|
+
setRobotsCrawlDelay(domain, delaySeconds) {
|
|
7
|
+
this.robotsDelays.set(domain, delaySeconds * 1e3);
|
|
8
|
+
}
|
|
9
|
+
async acquire(url) {
|
|
10
|
+
const domain = new URL(url).hostname;
|
|
11
|
+
const state = this.getOrCreateState(url, domain);
|
|
12
|
+
if (state.activeCount < state.maxConcurrency) {
|
|
13
|
+
const elapsed = Date.now() - state.lastRequestTime;
|
|
14
|
+
const remaining = state.delayMs - elapsed;
|
|
15
|
+
if (remaining > 0 && state.lastRequestTime > 0) {
|
|
16
|
+
await new Promise((r) => setTimeout(r, remaining));
|
|
17
|
+
}
|
|
18
|
+
return this.startRequest(state);
|
|
8
19
|
}
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
return new Promise((resolve) => {
|
|
21
|
+
state.queue.push(() => resolve(this.startRequest(state)));
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
getOrCreateState(url, domain) {
|
|
25
|
+
if (!this.domains.has(domain)) {
|
|
26
|
+
const config = getConfig();
|
|
27
|
+
const isPrivate = isPrivateUrl(url);
|
|
28
|
+
const configDelay = isPrivate ? config.crawlPrivateDelayMs : config.crawlDelayMs;
|
|
29
|
+
const robotsDelay2 = this.robotsDelays.get(domain) ?? 0;
|
|
30
|
+
const effectiveDelay = Math.max(configDelay, robotsDelay2);
|
|
31
|
+
this.domains.set(domain, {
|
|
32
|
+
activeCount: 0,
|
|
33
|
+
lastRequestTime: 0,
|
|
34
|
+
queue: [],
|
|
35
|
+
maxConcurrency: isPrivate ? config.crawlPrivateConcurrency : config.crawlConcurrency,
|
|
36
|
+
delayMs: effectiveDelay
|
|
37
|
+
});
|
|
25
38
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const configDelay = isPrivate ? config.crawlPrivateDelayMs : config.crawlDelayMs;
|
|
31
|
-
// Use robots.txt delay if it's higher than configured delay
|
|
32
|
-
const robotsDelay = this.robotsDelays.get(domain) ?? 0;
|
|
33
|
-
const effectiveDelay = Math.max(configDelay, robotsDelay);
|
|
34
|
-
this.domains.set(domain, {
|
|
35
|
-
activeCount: 0,
|
|
36
|
-
lastRequestTime: 0,
|
|
37
|
-
queue: [],
|
|
38
|
-
maxConcurrency: isPrivate ? config.crawlPrivateConcurrency : config.crawlConcurrency,
|
|
39
|
-
delayMs: effectiveDelay,
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
const state = this.domains.get(domain);
|
|
43
|
-
// Update delay if robots delay was set after state creation
|
|
44
|
-
const robotsDelay = this.robotsDelays.get(domain);
|
|
45
|
-
if (robotsDelay !== undefined && robotsDelay > state.delayMs) {
|
|
46
|
-
state.delayMs = robotsDelay;
|
|
47
|
-
}
|
|
48
|
-
return state;
|
|
39
|
+
const state = this.domains.get(domain);
|
|
40
|
+
const robotsDelay = this.robotsDelays.get(domain);
|
|
41
|
+
if (robotsDelay !== void 0 && robotsDelay > state.delayMs) {
|
|
42
|
+
state.delayMs = robotsDelay;
|
|
49
43
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
}
|
|
44
|
+
return state;
|
|
45
|
+
}
|
|
46
|
+
startRequest(state) {
|
|
47
|
+
state.activeCount++;
|
|
48
|
+
state.lastRequestTime = Date.now();
|
|
49
|
+
return () => {
|
|
50
|
+
state.activeCount--;
|
|
51
|
+
this.processQueue(state);
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
processQueue(state) {
|
|
55
|
+
if (state.queue.length === 0 || state.activeCount >= state.maxConcurrency) return;
|
|
56
|
+
const next = state.queue.shift();
|
|
57
|
+
const elapsed = Date.now() - state.lastRequestTime;
|
|
58
|
+
const remaining = state.delayMs - elapsed;
|
|
59
|
+
if (remaining <= 0) {
|
|
60
|
+
next();
|
|
61
|
+
} else {
|
|
62
|
+
setTimeout(next, remaining);
|
|
70
63
|
}
|
|
64
|
+
}
|
|
71
65
|
}
|
|
66
|
+
export {
|
|
67
|
+
RateLimiter
|
|
68
|
+
};
|
|
72
69
|
//# sourceMappingURL=rate-limiter.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/crawl/rate-limiter.ts"],"sourcesContent":["import { isPrivateUrl } from './url-utils.js';\nimport { getConfig } from '../config.js';\n\ninterface DomainState {\n activeCount: number;\n lastRequestTime: number;\n queue: Array<() => void>;\n maxConcurrency: number;\n delayMs: number;\n}\n\nexport class RateLimiter {\n private domains = new Map<string, DomainState>();\n private robotsDelays = new Map<string, number>();\n\n setRobotsCrawlDelay(domain: string, delaySeconds: number): void {\n this.robotsDelays.set(domain, delaySeconds * 1000);\n }\n\n async acquire(url: string): Promise<() => void> {\n const domain = new URL(url).hostname;\n const state = this.getOrCreateState(url, domain);\n\n if (state.activeCount < state.maxConcurrency) {\n // Enforce delay even when under concurrency limit\n const elapsed = Date.now() - state.lastRequestTime;\n const remaining = state.delayMs - elapsed;\n if (remaining > 0 && state.lastRequestTime > 0) {\n await new Promise<void>((r) => setTimeout(r, remaining));\n }\n return this.startRequest(state);\n }\n\n // Wait in queue\n return new Promise<() => void>((resolve) => {\n state.queue.push(() => resolve(this.startRequest(state)));\n });\n }\n\n private getOrCreateState(url: string, domain: string): DomainState {\n if (!this.domains.has(domain)) {\n const config = getConfig();\n const isPrivate = isPrivateUrl(url);\n const configDelay = isPrivate ? config.crawlPrivateDelayMs : config.crawlDelayMs;\n\n // Use robots.txt delay if it's higher than configured delay\n const robotsDelay = this.robotsDelays.get(domain) ?? 0;\n const effectiveDelay = Math.max(configDelay, robotsDelay);\n\n this.domains.set(domain, {\n activeCount: 0,\n lastRequestTime: 0,\n queue: [],\n maxConcurrency: isPrivate ? config.crawlPrivateConcurrency : config.crawlConcurrency,\n delayMs: effectiveDelay,\n });\n }\n\n const state = this.domains.get(domain)!;\n // Update delay if robots delay was set after state creation\n const robotsDelay = this.robotsDelays.get(domain);\n if (robotsDelay !== undefined && robotsDelay > state.delayMs) {\n state.delayMs = robotsDelay;\n }\n\n return state;\n }\n\n private startRequest(state: DomainState): () => void {\n state.activeCount++;\n state.lastRequestTime = Date.now();\n\n return () => {\n state.activeCount--;\n this.processQueue(state);\n };\n }\n\n private processQueue(state: DomainState): void {\n if (state.queue.length === 0 || state.activeCount >= state.maxConcurrency) return;\n\n const next = state.queue.shift()!;\n const elapsed = Date.now() - state.lastRequestTime;\n const remaining = state.delayMs - elapsed;\n\n if (remaining <= 0) {\n next();\n } else {\n setTimeout(next, remaining);\n }\n }\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,iBAAiB;AAUnB,MAAM,YAAY;AAAA,EACf,UAAU,oBAAI,IAAyB;AAAA,EACvC,eAAe,oBAAI,IAAoB;AAAA,EAE/C,oBAAoB,QAAgB,cAA4B;AAC9D,SAAK,aAAa,IAAI,QAAQ,eAAe,GAAI;AAAA,EACnD;AAAA,EAEA,MAAM,QAAQ,KAAkC;AAC9C,UAAM,SAAS,IAAI,IAAI,GAAG,EAAE;AAC5B,UAAM,QAAQ,KAAK,iBAAiB,KAAK,MAAM;AAE/C,QAAI,MAAM,cAAc,MAAM,gBAAgB;AAE5C,YAAM,UAAU,KAAK,IAAI,IAAI,MAAM;AACnC,YAAM,YAAY,MAAM,UAAU;AAClC,UAAI,YAAY,KAAK,MAAM,kBAAkB,GAAG;AAC9C,cAAM,IAAI,QAAc,CAAC,MAAM,WAAW,GAAG,SAAS,CAAC;AAAA,MACzD;AACA,aAAO,KAAK,aAAa,KAAK;AAAA,IAChC;AAGA,WAAO,IAAI,QAAoB,CAAC,YAAY;AAC1C,YAAM,MAAM,KAAK,MAAM,QAAQ,KAAK,aAAa,KAAK,CAAC,CAAC;AAAA,IAC1D,CAAC;AAAA,EACH;AAAA,EAEQ,iBAAiB,KAAa,QAA6B;AACjE,QAAI,CAAC,KAAK,QAAQ,IAAI,MAAM,GAAG;AAC7B,YAAM,SAAS,UAAU;AACzB,YAAM,YAAY,aAAa,GAAG;AAClC,YAAM,cAAc,YAAY,OAAO,sBAAsB,OAAO;AAGpE,YAAMA,eAAc,KAAK,aAAa,IAAI,MAAM,KAAK;AACrD,YAAM,iBAAiB,KAAK,IAAI,aAAaA,YAAW;AAExD,WAAK,QAAQ,IAAI,QAAQ;AAAA,QACvB,aAAa;AAAA,QACb,iBAAiB;AAAA,QACjB,OAAO,CAAC;AAAA,QACR,gBAAgB,YAAY,OAAO,0BAA0B,OAAO;AAAA,QACpE,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAEA,UAAM,QAAQ,KAAK,QAAQ,IAAI,MAAM;AAErC,UAAM,cAAc,KAAK,aAAa,IAAI,MAAM;AAChD,QAAI,gBAAgB,UAAa,cAAc,MAAM,SAAS;AAC5D,YAAM,UAAU;AAAA,IAClB;AAEA,WAAO;AAAA,EACT;AAAA,EAEQ,aAAa,OAAgC;AACnD,UAAM;AACN,UAAM,kBAAkB,KAAK,IAAI;AAEjC,WAAO,MAAM;AACX,YAAM;AACN,WAAK,aAAa,KAAK;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,aAAa,OAA0B;AAC7C,QAAI,MAAM,MAAM,WAAW,KAAK,MAAM,eAAe,MAAM,eAAgB;AAE3E,UAAM,OAAO,MAAM,MAAM,MAAM;AAC/B,UAAM,UAAU,KAAK,IAAI,IAAI,MAAM;AACnC,UAAM,YAAY,MAAM,UAAU;AAElC,QAAI,aAAa,GAAG;AAClB,WAAK;AAAA,IACP,OAAO;AACL,iBAAW,MAAM,SAAS;AAAA,IAC5B;AAAA,EACF;AACF;","names":["robotsDelay"]}
|