@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export type Vertical = 'general' | 'news' | 'code' | 'docs' | 'papers';
|
|
2
|
+
export declare const VERTICALS: readonly Vertical[];
|
|
3
|
+
export interface ClassifyOptions {
|
|
4
|
+
/** Override classifier (e.g., from `category` input on search tool). */
|
|
5
|
+
hint?: Vertical;
|
|
6
|
+
/** When date filters are present in the search input, push toward 'news'. */
|
|
7
|
+
hasDateBound?: boolean;
|
|
8
|
+
/** Inject a clock for deterministic relative-date parsing in tests. */
|
|
9
|
+
now?: Date;
|
|
10
|
+
}
|
|
11
|
+
export interface DateHint {
|
|
12
|
+
fromDate?: string;
|
|
13
|
+
toDate?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface DetailedClassification {
|
|
16
|
+
vertical: Vertical;
|
|
17
|
+
dateHint?: DateHint;
|
|
18
|
+
}
|
|
19
|
+
export declare function parseDateHint(query: string, now?: Date): DateHint | undefined;
|
|
20
|
+
export declare function classifyIntentDetailed(query: string, opts?: ClassifyOptions): DetailedClassification;
|
|
21
|
+
export declare function classifyIntent(query: string, opts?: ClassifyOptions): Vertical;
|
|
22
|
+
//# sourceMappingURL=intent-router.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"intent-router.d.ts","sourceRoot":"","sources":["../../../src/search/v1/intent-router.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,CAAC;AAEvE,eAAO,MAAM,SAAS,EAAE,SAAS,QAAQ,EAM/B,CAAC;AAEX,MAAM,WAAW,eAAe;IAC9B,wEAAwE;IACxE,IAAI,CAAC,EAAE,QAAQ,CAAC;IAChB,6EAA6E;IAC7E,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,uEAAuE;IACvE,GAAG,CAAC,EAAE,IAAI,CAAC;CACZ;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB;AAuCD,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,GAAE,IAAiB,GAAG,QAAQ,GAAG,SAAS,CA4FzF;AAED,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE,eAAe,GACrB,sBAAsB,CA6BxB;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,eAAe,GAAG,QAAQ,CAE9E"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
const VERTICALS = [
|
|
2
|
+
"general",
|
|
3
|
+
"news",
|
|
4
|
+
"code",
|
|
5
|
+
"docs",
|
|
6
|
+
"papers"
|
|
7
|
+
];
|
|
8
|
+
const PAPERS_RE = /\b(arxiv|paper|cite|citation|doi|preprint|whitepaper|journal|pubmed|proceedings)\b/i;
|
|
9
|
+
const CODE_HARD_RE = /\b(github|pull request|pr #|commit|stack overflow|stackoverflow|compile error|typeerror|traceback|exception)\b/i;
|
|
10
|
+
const LANG_TOKEN_RE = /\b(python|typescript|javascript|rust|go|c\+\+|npm|cargo|pip|regex|sql|bash)\b/i;
|
|
11
|
+
const HOWTO_VERB_RE = /\b(error|fix|debug|compile)\b/i;
|
|
12
|
+
const DOCS_PHRASE_RE = /(\bhow to\b|\btutorial\b|\breference\b|\bapi\b|\bdocumentation\b|\bdocs for\b|\bmdn\b|\bdevdocs\b|\bguide\b|\bgetting started\b)/i;
|
|
13
|
+
const NEWS_RE = /\b(latest|today|yesterday|this week|news|breaking|2024|2025|2026|recent|update|announcement)\b/i;
|
|
14
|
+
const MIN_YEAR = 1990;
|
|
15
|
+
const MAX_YEAR = 2099;
|
|
16
|
+
const MS_PER_DAY = 864e5;
|
|
17
|
+
function isoDate(d) {
|
|
18
|
+
return d.toISOString().slice(0, 10);
|
|
19
|
+
}
|
|
20
|
+
function shiftDays(now, days) {
|
|
21
|
+
return isoDate(new Date(now.getTime() - days * MS_PER_DAY));
|
|
22
|
+
}
|
|
23
|
+
function validYear(y) {
|
|
24
|
+
return y >= MIN_YEAR && y <= MAX_YEAR;
|
|
25
|
+
}
|
|
26
|
+
function withUnit(now, n, unit) {
|
|
27
|
+
if (!Number.isFinite(n) || n <= 0) return void 0;
|
|
28
|
+
const u = unit.toLowerCase();
|
|
29
|
+
if (u.startsWith("day")) return { fromDate: shiftDays(now, n) };
|
|
30
|
+
if (u.startsWith("week")) return { fromDate: shiftDays(now, 7 * n) };
|
|
31
|
+
if (u.startsWith("month")) return { fromDate: shiftDays(now, 30 * n) };
|
|
32
|
+
if (u.startsWith("year")) return { fromDate: shiftDays(now, 365 * n) };
|
|
33
|
+
return void 0;
|
|
34
|
+
}
|
|
35
|
+
function parseDateHint(query, now = /* @__PURE__ */ new Date()) {
|
|
36
|
+
if (!query) return void 0;
|
|
37
|
+
const q = query;
|
|
38
|
+
const between = q.match(/\bbetween\s+(\d{4})\s+and\s+(\d{4})\b/i);
|
|
39
|
+
if (between) {
|
|
40
|
+
const y1 = Number(between[1]);
|
|
41
|
+
const y2 = Number(between[2]);
|
|
42
|
+
if (validYear(y1) && validYear(y2) && y1 <= y2) {
|
|
43
|
+
return { fromDate: `${y1}-01-01`, toDate: `${y2}-12-31` };
|
|
44
|
+
}
|
|
45
|
+
return void 0;
|
|
46
|
+
}
|
|
47
|
+
const fromTo = q.match(/\bfrom\s+(\d{4})\s+to\s+(\d{4})\b/i);
|
|
48
|
+
if (fromTo) {
|
|
49
|
+
const y1 = Number(fromTo[1]);
|
|
50
|
+
const y2 = Number(fromTo[2]);
|
|
51
|
+
if (validYear(y1) && validYear(y2) && y1 <= y2) {
|
|
52
|
+
return { fromDate: `${y1}-01-01`, toDate: `${y2}-12-31` };
|
|
53
|
+
}
|
|
54
|
+
return void 0;
|
|
55
|
+
}
|
|
56
|
+
const since = q.match(/\bsince\s+(\d{4})\b/i);
|
|
57
|
+
if (since) {
|
|
58
|
+
const y = Number(since[1]);
|
|
59
|
+
if (validYear(y)) return { fromDate: `${y}-01-01` };
|
|
60
|
+
return void 0;
|
|
61
|
+
}
|
|
62
|
+
const inAfter = q.match(/\b(?:in|after|starting)\s+(\d{4})\b/i);
|
|
63
|
+
if (inAfter) {
|
|
64
|
+
const y = Number(inAfter[1]);
|
|
65
|
+
if (validYear(y)) return { fromDate: `${y}-01-01` };
|
|
66
|
+
return void 0;
|
|
67
|
+
}
|
|
68
|
+
const before = q.match(/\bbefore\s+(\d{4})\b/i);
|
|
69
|
+
if (before) {
|
|
70
|
+
const y = Number(before[1]);
|
|
71
|
+
if (validYear(y)) return { toDate: `${y - 1}-12-31` };
|
|
72
|
+
return void 0;
|
|
73
|
+
}
|
|
74
|
+
const last = q.match(/\blast\s+(\d+)\s+(days?|weeks?|months?|years?)\b/i);
|
|
75
|
+
if (last) {
|
|
76
|
+
const hint = withUnit(now, Number(last[1]), last[2]);
|
|
77
|
+
if (hint) return hint;
|
|
78
|
+
}
|
|
79
|
+
const past = q.match(/\bpast\s+(\d+)\s+(days?|weeks?|months?|years?)\b/i);
|
|
80
|
+
if (past) {
|
|
81
|
+
const hint = withUnit(now, Number(past[1]), past[2]);
|
|
82
|
+
if (hint) return hint;
|
|
83
|
+
}
|
|
84
|
+
if (/\btoday\b/i.test(q)) {
|
|
85
|
+
const d = shiftDays(now, 0);
|
|
86
|
+
return { fromDate: d, toDate: d };
|
|
87
|
+
}
|
|
88
|
+
if (/\byesterday\b/i.test(q)) {
|
|
89
|
+
const d = shiftDays(now, 1);
|
|
90
|
+
return { fromDate: d, toDate: d };
|
|
91
|
+
}
|
|
92
|
+
if (/\bthis\s+week\b/i.test(q)) {
|
|
93
|
+
return { fromDate: shiftDays(now, 7) };
|
|
94
|
+
}
|
|
95
|
+
if (/\bthis\s+month\b/i.test(q)) {
|
|
96
|
+
return { fromDate: shiftDays(now, 30) };
|
|
97
|
+
}
|
|
98
|
+
if (/\bthis\s+year\b/i.test(q)) {
|
|
99
|
+
return { fromDate: `${now.getUTCFullYear()}-01-01` };
|
|
100
|
+
}
|
|
101
|
+
return void 0;
|
|
102
|
+
}
|
|
103
|
+
function classifyIntentDetailed(query, opts) {
|
|
104
|
+
const q = query ?? "";
|
|
105
|
+
const dateHint = parseDateHint(q, opts?.now);
|
|
106
|
+
if (opts?.hint) {
|
|
107
|
+
return dateHint ? { vertical: opts.hint, dateHint } : { vertical: opts.hint };
|
|
108
|
+
}
|
|
109
|
+
const trimmed = q.trim();
|
|
110
|
+
if (trimmed.length === 0) {
|
|
111
|
+
return { vertical: "general" };
|
|
112
|
+
}
|
|
113
|
+
let vertical;
|
|
114
|
+
if (PAPERS_RE.test(trimmed)) {
|
|
115
|
+
vertical = "papers";
|
|
116
|
+
} else if (CODE_HARD_RE.test(trimmed)) {
|
|
117
|
+
vertical = "code";
|
|
118
|
+
} else if (LANG_TOKEN_RE.test(trimmed) && HOWTO_VERB_RE.test(trimmed)) {
|
|
119
|
+
vertical = "code";
|
|
120
|
+
} else if (DOCS_PHRASE_RE.test(trimmed) || /\blearn\b/i.test(trimmed)) {
|
|
121
|
+
vertical = "docs";
|
|
122
|
+
} else if (opts?.hasDateBound || NEWS_RE.test(trimmed) || !!dateHint) {
|
|
123
|
+
vertical = "news";
|
|
124
|
+
} else {
|
|
125
|
+
vertical = "general";
|
|
126
|
+
}
|
|
127
|
+
return dateHint ? { vertical, dateHint } : { vertical };
|
|
128
|
+
}
|
|
129
|
+
function classifyIntent(query, opts) {
|
|
130
|
+
return classifyIntentDetailed(query, opts).vertical;
|
|
131
|
+
}
|
|
132
|
+
export {
|
|
133
|
+
VERTICALS,
|
|
134
|
+
classifyIntent,
|
|
135
|
+
classifyIntentDetailed,
|
|
136
|
+
parseDateHint
|
|
137
|
+
};
|
|
138
|
+
//# sourceMappingURL=intent-router.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/search/v1/intent-router.ts"],"sourcesContent":["export type Vertical = 'general' | 'news' | 'code' | 'docs' | 'papers';\n\nexport const VERTICALS: readonly Vertical[] = [\n 'general',\n 'news',\n 'code',\n 'docs',\n 'papers',\n] as const;\n\nexport interface ClassifyOptions {\n /** Override classifier (e.g., from `category` input on search tool). */\n hint?: Vertical;\n /** When date filters are present in the search input, push toward 'news'. */\n hasDateBound?: boolean;\n /** Inject a clock for deterministic relative-date parsing in tests. */\n now?: Date;\n}\n\nexport interface DateHint {\n fromDate?: string;\n toDate?: string;\n}\n\nexport interface DetailedClassification {\n vertical: Vertical;\n dateHint?: DateHint;\n}\n\nconst PAPERS_RE = /\\b(arxiv|paper|cite|citation|doi|preprint|whitepaper|journal|pubmed|proceedings)\\b/i;\n\nconst CODE_HARD_RE = /\\b(github|pull request|pr #|commit|stack overflow|stackoverflow|compile error|typeerror|traceback|exception)\\b/i;\n\nconst LANG_TOKEN_RE = /\\b(python|typescript|javascript|rust|go|c\\+\\+|npm|cargo|pip|regex|sql|bash)\\b/i;\nconst HOWTO_VERB_RE = /\\b(error|fix|debug|compile)\\b/i;\n\nconst DOCS_PHRASE_RE = /(\\bhow to\\b|\\btutorial\\b|\\breference\\b|\\bapi\\b|\\bdocumentation\\b|\\bdocs for\\b|\\bmdn\\b|\\bdevdocs\\b|\\bguide\\b|\\bgetting started\\b)/i;\n\nconst NEWS_RE = /\\b(latest|today|yesterday|this week|news|breaking|2024|2025|2026|recent|update|announcement)\\b/i;\n\nconst MIN_YEAR = 1990;\nconst MAX_YEAR = 2099;\nconst MS_PER_DAY = 86_400_000;\n\nfunction isoDate(d: Date): string {\n return d.toISOString().slice(0, 10);\n}\n\nfunction shiftDays(now: Date, days: number): string {\n return isoDate(new Date(now.getTime() - days * MS_PER_DAY));\n}\n\nfunction validYear(y: number): boolean {\n return y >= MIN_YEAR && y <= MAX_YEAR;\n}\n\nfunction withUnit(now: Date, n: number, unit: string): DateHint | undefined {\n if (!Number.isFinite(n) || n <= 0) return undefined;\n const u = unit.toLowerCase();\n if (u.startsWith('day')) return { fromDate: shiftDays(now, n) };\n if (u.startsWith('week')) return { fromDate: shiftDays(now, 7 * n) };\n if (u.startsWith('month')) return { fromDate: shiftDays(now, 30 * n) };\n if (u.startsWith('year')) return { fromDate: shiftDays(now, 365 * n) };\n return undefined;\n}\n\nexport function parseDateHint(query: string, now: Date = new Date()): DateHint | undefined {\n if (!query) return undefined;\n const q = query;\n\n // 1. between YYYY and YYYY\n const between = q.match(/\\bbetween\\s+(\\d{4})\\s+and\\s+(\\d{4})\\b/i);\n if (between) {\n const y1 = Number(between[1]);\n const y2 = Number(between[2]);\n if (validYear(y1) && validYear(y2) && y1 <= y2) {\n return { fromDate: `${y1}-01-01`, toDate: `${y2}-12-31` };\n }\n return undefined;\n }\n\n // 2. from YYYY to YYYY\n const fromTo = q.match(/\\bfrom\\s+(\\d{4})\\s+to\\s+(\\d{4})\\b/i);\n if (fromTo) {\n const y1 = Number(fromTo[1]);\n const y2 = Number(fromTo[2]);\n if (validYear(y1) && validYear(y2) && y1 <= y2) {\n return { fromDate: `${y1}-01-01`, toDate: `${y2}-12-31` };\n }\n return undefined;\n }\n\n // 3. since YYYY\n const since = q.match(/\\bsince\\s+(\\d{4})\\b/i);\n if (since) {\n const y = Number(since[1]);\n if (validYear(y)) return { fromDate: `${y}-01-01` };\n return undefined;\n }\n\n // 4. in/after/starting YYYY\n const inAfter = q.match(/\\b(?:in|after|starting)\\s+(\\d{4})\\b/i);\n if (inAfter) {\n const y = Number(inAfter[1]);\n if (validYear(y)) return { fromDate: `${y}-01-01` };\n return undefined;\n }\n\n // 5. before YYYY\n const before = q.match(/\\bbefore\\s+(\\d{4})\\b/i);\n if (before) {\n const y = Number(before[1]);\n if (validYear(y)) return { toDate: `${y - 1}-12-31` };\n return undefined;\n }\n\n // 6-9. last N <unit>\n const last = q.match(/\\blast\\s+(\\d+)\\s+(days?|weeks?|months?|years?)\\b/i);\n if (last) {\n const hint = withUnit(now, Number(last[1]), last[2]);\n if (hint) return hint;\n }\n\n // 10. past N <unit>\n const past = q.match(/\\bpast\\s+(\\d+)\\s+(days?|weeks?|months?|years?)\\b/i);\n if (past) {\n const hint = withUnit(now, Number(past[1]), past[2]);\n if (hint) return hint;\n }\n\n // 11. today\n if (/\\btoday\\b/i.test(q)) {\n const d = shiftDays(now, 0);\n return { fromDate: d, toDate: d };\n }\n\n // 12. yesterday\n if (/\\byesterday\\b/i.test(q)) {\n const d = shiftDays(now, 1);\n return { fromDate: d, toDate: d };\n }\n\n // 13. this week\n if (/\\bthis\\s+week\\b/i.test(q)) {\n return { fromDate: shiftDays(now, 7) };\n }\n\n // 14. this month\n if (/\\bthis\\s+month\\b/i.test(q)) {\n return { fromDate: shiftDays(now, 30) };\n }\n\n // 15. this year\n if (/\\bthis\\s+year\\b/i.test(q)) {\n return { fromDate: `${now.getUTCFullYear()}-01-01` };\n }\n\n return undefined;\n}\n\nexport function classifyIntentDetailed(\n query: string,\n opts?: ClassifyOptions,\n): DetailedClassification {\n const q = query ?? '';\n const dateHint = parseDateHint(q, opts?.now);\n\n if (opts?.hint) {\n return dateHint ? { vertical: opts.hint, dateHint } : { vertical: opts.hint };\n }\n\n const trimmed = q.trim();\n if (trimmed.length === 0) {\n return { vertical: 'general' };\n }\n\n let vertical: Vertical;\n if (PAPERS_RE.test(trimmed)) {\n vertical = 'papers';\n } else if (CODE_HARD_RE.test(trimmed)) {\n vertical = 'code';\n } else if (LANG_TOKEN_RE.test(trimmed) && HOWTO_VERB_RE.test(trimmed)) {\n vertical = 'code';\n } else if (DOCS_PHRASE_RE.test(trimmed) || /\\blearn\\b/i.test(trimmed)) {\n vertical = 'docs';\n } else if (opts?.hasDateBound || NEWS_RE.test(trimmed) || !!dateHint) {\n vertical = 'news';\n } else {\n vertical = 'general';\n }\n\n return dateHint ? { vertical, dateHint } : { vertical };\n}\n\nexport function classifyIntent(query: string, opts?: ClassifyOptions): Vertical {\n return classifyIntentDetailed(query, opts).vertical;\n}\n"],"mappings":"AAEO,MAAM,YAAiC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAqBA,MAAM,YAAY;AAElB,MAAM,eAAe;AAErB,MAAM,gBAAgB;AACtB,MAAM,gBAAgB;AAEtB,MAAM,iBAAiB;AAEvB,MAAM,UAAU;AAEhB,MAAM,WAAW;AACjB,MAAM,WAAW;AACjB,MAAM,aAAa;AAEnB,SAAS,QAAQ,GAAiB;AAChC,SAAO,EAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AACpC;AAEA,SAAS,UAAU,KAAW,MAAsB;AAClD,SAAO,QAAQ,IAAI,KAAK,IAAI,QAAQ,IAAI,OAAO,UAAU,CAAC;AAC5D;AAEA,SAAS,UAAU,GAAoB;AACrC,SAAO,KAAK,YAAY,KAAK;AAC/B;AAEA,SAAS,SAAS,KAAW,GAAW,MAAoC;AAC1E,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,KAAK,EAAG,QAAO;AAC1C,QAAM,IAAI,KAAK,YAAY;AAC3B,MAAI,EAAE,WAAW,KAAK,EAAG,QAAO,EAAE,UAAU,UAAU,KAAK,CAAC,EAAE;AAC9D,MAAI,EAAE,WAAW,MAAM,EAAG,QAAO,EAAE,UAAU,UAAU,KAAK,IAAI,CAAC,EAAE;AACnE,MAAI,EAAE,WAAW,OAAO,EAAG,QAAO,EAAE,UAAU,UAAU,KAAK,KAAK,CAAC,EAAE;AACrE,MAAI,EAAE,WAAW,MAAM,EAAG,QAAO,EAAE,UAAU,UAAU,KAAK,MAAM,CAAC,EAAE;AACrE,SAAO;AACT;AAEO,SAAS,cAAc,OAAe,MAAY,oBAAI,KAAK,GAAyB;AACzF,MAAI,CAAC,MAAO,QAAO;AACnB,QAAM,IAAI;AAGV,QAAM,UAAU,EAAE,MAAM,wCAAwC;AAChE,MAAI,SAAS;AACX,UAAM,KAAK,OAAO,QAAQ,CAAC,CAAC;AAC5B,UAAM,KAAK,OAAO,QAAQ,CAAC,CAAC;AAC5B,QAAI,UAAU,EAAE,KAAK,UAAU,EAAE,KAAK,MAAM,IAAI;AAC9C,aAAO,EAAE,UAAU,GAAG,EAAE,UAAU,QAAQ,GAAG,EAAE,SAAS;AAAA,IAC1D;AACA,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,EAAE,MAAM,oCAAoC;AAC3D,MAAI,QAAQ;AACV,UAAM,KAAK,OAAO,OAAO,CAAC,CAAC;AAC3B,UAAM,KAAK,OAAO,OAAO,CAAC,CAAC;AAC3B,QAAI,UAAU,EAAE,KAAK,UAAU,EAAE,KAAK,MAAM,IAAI;AAC9C,aAAO,EAAE,UAAU,GAAG,EAAE,UAAU,QAAQ,GAAG,EAAE,SAAS;AAAA,IAC1D;AACA,WAAO;AAAA,EACT;AAGA,QAAM,QAAQ,EAAE,MAAM,sBAAsB;AAC5C,MAAI,OAAO;AACT,UAAM,IAAI,OAAO,MAAM,CAAC,CAAC;AACzB,QAAI,UAAU,CAAC,EAAG,QAAO,EAAE,UAAU,GAAG,CAAC,SAAS;AAClD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,EAAE,MAAM,sCAAsC;AAC9D,MAAI,SAAS;AACX,UAAM,IAAI,OAAO,QAAQ,CAAC,CAAC;AAC3B,QAAI,UAAU,CAAC,EAAG,QAAO,EAAE,UAAU,GAAG,CAAC,SAAS;AAClD,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,EAAE,MAAM,uBAAuB;AAC9C,MAAI,QAAQ;AACV,UAAM,IAAI,OAAO,OAAO,CAAC,CAAC;AAC1B,QAAI,UAAU,CAAC,EAAG,QAAO,EAAE,QAAQ,GAAG,IAAI,CAAC,SAAS;AACpD,WAAO;AAAA,EACT;AAGA,QAAM,OAAO,EAAE,MAAM,mDAAmD;AACxE,MAAI,MAAM;AACR,UAAM,OAAO,SAAS,KAAK,OAAO,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC;AACnD,QAAI,KAAM,QAAO;AAAA,EACnB;AAGA,QAAM,OAAO,EAAE,MAAM,mDAAmD;AACxE,MAAI,MAAM;AACR,UAAM,OAAO,SAAS,KAAK,OAAO,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC;AACnD,QAAI,KAAM,QAAO;AAAA,EACnB;AAGA,MAAI,aAAa,KAAK,CAAC,GAAG;AACxB,UAAM,IAAI,UAAU,KAAK,CAAC;AAC1B,WAAO,EAAE,UAAU,GAAG,QAAQ,EAAE;AAAA,EAClC;AAGA,MAAI,iBAAiB,KAAK,CAAC,GAAG;AAC5B,UAAM,IAAI,UAAU,KAAK,CAAC;AAC1B,WAAO,EAAE,UAAU,GAAG,QAAQ,EAAE;AAAA,EAClC;AAGA,MAAI,mBAAmB,KAAK,CAAC,GAAG;AAC9B,WAAO,EAAE,UAAU,UAAU,KAAK,CAAC,EAAE;AAAA,EACvC;AAGA,MAAI,oBAAoB,KAAK,CAAC,GAAG;AAC/B,WAAO,EAAE,UAAU,UAAU,KAAK,EAAE,EAAE;AAAA,EACxC;AAGA,MAAI,mBAAmB,KAAK,CAAC,GAAG;AAC9B,WAAO,EAAE,UAAU,GAAG,IAAI,eAAe,CAAC,SAAS;AAAA,EACrD;AAEA,SAAO;AACT;AAEO,SAAS,uBACd,OACA,MACwB;AACxB,QAAM,IAAI,SAAS;AACnB,QAAM,WAAW,cAAc,GAAG,MAAM,GAAG;AAE3C,MAAI,MAAM,MAAM;AACd,WAAO,WAAW,EAAE,UAAU,KAAK,MAAM,SAAS,IAAI,EAAE,UAAU,KAAK,KAAK;AAAA,EAC9E;AAEA,QAAM,UAAU,EAAE,KAAK;AACvB,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO,EAAE,UAAU,UAAU;AAAA,EAC/B;AAEA,MAAI;AACJ,MAAI,UAAU,KAAK,OAAO,GAAG;AAC3B,eAAW;AAAA,EACb,WAAW,aAAa,KAAK,OAAO,GAAG;AACrC,eAAW;AAAA,EACb,WAAW,cAAc,KAAK,OAAO,KAAK,cAAc,KAAK,OAAO,GAAG;AACrE,eAAW;AAAA,EACb,WAAW,eAAe,KAAK,OAAO,KAAK,aAAa,KAAK,OAAO,GAAG;AACrE,eAAW;AAAA,EACb,WAAW,MAAM,gBAAgB,QAAQ,KAAK,OAAO,KAAK,CAAC,CAAC,UAAU;AACpE,eAAW;AAAA,EACb,OAAO;AACL,eAAW;AAAA,EACb;AAEA,SAAO,WAAW,EAAE,UAAU,SAAS,IAAI,EAAE,SAAS;AACxD;AAEO,SAAS,eAAe,OAAe,MAAkC;AAC9E,SAAO,uBAAuB,OAAO,IAAI,EAAE;AAC7C;","names":[]}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { RawSearchResult } from '../../types.js';
|
|
2
|
+
import { type Vertical } from './intent-router.js';
|
|
3
|
+
import { type EngineOutcome } from './engine-base.js';
|
|
4
|
+
export interface OrchestratorInput {
|
|
5
|
+
query: string;
|
|
6
|
+
category?: Vertical;
|
|
7
|
+
fromDate?: string;
|
|
8
|
+
toDate?: string;
|
|
9
|
+
maxResults?: number;
|
|
10
|
+
timeoutMs?: number;
|
|
11
|
+
language?: string;
|
|
12
|
+
includeDomains?: string[];
|
|
13
|
+
excludeDomains?: string[];
|
|
14
|
+
}
|
|
15
|
+
export interface OrchestratorOutput {
|
|
16
|
+
vertical: Vertical;
|
|
17
|
+
results: RawSearchResult[];
|
|
18
|
+
enginesUsed: string[];
|
|
19
|
+
outcomes: EngineOutcome[];
|
|
20
|
+
degraded: boolean;
|
|
21
|
+
}
|
|
22
|
+
export declare function runV1Search(input: OrchestratorInput): Promise<OrchestratorOutput>;
|
|
23
|
+
export declare function _resetOrchestratorVerticalsForTest(): void;
|
|
24
|
+
//# sourceMappingURL=orchestrator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../../src/search/v1/orchestrator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAuB,MAAM,gBAAgB,CAAC;AAE3E,OAAO,EAA0B,KAAK,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAGL,KAAK,aAAa,EACnB,MAAM,kBAAkB,CAAC;AAc1B,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,QAAQ,EAAE,aAAa,EAAE,CAAC;IAC1B,QAAQ,EAAE,OAAO,CAAC;CACnB;AA6DD,wBAAsB,WAAW,CAC/B,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,kBAAkB,CAAC,CAqH7B;AAED,wBAAgB,kCAAkC,IAAI,IAAI,CAMzD"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { createLogger } from "../../logger.js";
|
|
2
|
+
import { classifyIntentDetailed } from "./intent-router.js";
|
|
3
|
+
import {
|
|
4
|
+
runEnginesParallel
|
|
5
|
+
} from "./engine-base.js";
|
|
6
|
+
import { recencyMultiplier, hasTemporalIntent } from "./recency-boost.js";
|
|
7
|
+
import { getGeneralEngines, _resetGeneralEnginesForTest } from "./verticals/general.js";
|
|
8
|
+
import { getNewsEngines, _resetNewsEnginesForTest } from "./verticals/news.js";
|
|
9
|
+
import { getCodeEngines, _resetCodeEnginesForTest } from "./verticals/code.js";
|
|
10
|
+
import { getDocsEngines, _resetDocsEnginesForTest } from "./verticals/docs.js";
|
|
11
|
+
import { getPapersEngines, _resetPapersEnginesForTest } from "./verticals/papers.js";
|
|
12
|
+
const log = createLogger("search");
|
|
13
|
+
const RRF_K = 60;
|
|
14
|
+
const DEFAULT_MAX_RESULTS = 10;
|
|
15
|
+
const DEFAULT_TIMEOUT_MS = 1e4;
|
|
16
|
+
function getEntriesForVertical(vertical) {
|
|
17
|
+
switch (vertical) {
|
|
18
|
+
case "general":
|
|
19
|
+
return getGeneralEngines();
|
|
20
|
+
case "news":
|
|
21
|
+
return getNewsEngines();
|
|
22
|
+
case "code":
|
|
23
|
+
return getCodeEngines();
|
|
24
|
+
case "docs":
|
|
25
|
+
return getDocsEngines();
|
|
26
|
+
case "papers":
|
|
27
|
+
return getPapersEngines();
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function hostnameOf(url) {
|
|
31
|
+
try {
|
|
32
|
+
return new URL(url).hostname.toLowerCase();
|
|
33
|
+
} catch {
|
|
34
|
+
return "";
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function matchesDomain(host, domain) {
|
|
38
|
+
const needle = domain.toLowerCase().replace(/^\./, "");
|
|
39
|
+
if (!host) return false;
|
|
40
|
+
return host === needle || host.endsWith(`.${needle}`);
|
|
41
|
+
}
|
|
42
|
+
function applyDomainFilters(results, includeDomains, excludeDomains) {
|
|
43
|
+
if (!includeDomains?.length && !excludeDomains?.length) return results;
|
|
44
|
+
return results.filter((r) => {
|
|
45
|
+
const host = hostnameOf(r.url);
|
|
46
|
+
if (includeDomains?.length && !includeDomains.some((d) => matchesDomain(host, d))) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
if (excludeDomains?.length && excludeDomains.some((d) => matchesDomain(host, d))) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
return true;
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
function dedupWithinEngine(results) {
|
|
56
|
+
const seen = /* @__PURE__ */ new Set();
|
|
57
|
+
const out = [];
|
|
58
|
+
for (const r of results) {
|
|
59
|
+
if (seen.has(r.url)) continue;
|
|
60
|
+
seen.add(r.url);
|
|
61
|
+
out.push(r);
|
|
62
|
+
}
|
|
63
|
+
return out;
|
|
64
|
+
}
|
|
65
|
+
async function runV1Search(input) {
|
|
66
|
+
const query = typeof input.query === "string" ? input.query.trim() : "";
|
|
67
|
+
if (query.length === 0) {
|
|
68
|
+
log.warn("orchestrator received empty query");
|
|
69
|
+
return {
|
|
70
|
+
vertical: "general",
|
|
71
|
+
results: [],
|
|
72
|
+
enginesUsed: [],
|
|
73
|
+
outcomes: [],
|
|
74
|
+
degraded: true
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
const callerHasDateBound = !!(input.fromDate || input.toDate);
|
|
78
|
+
const classification = classifyIntentDetailed(query, {
|
|
79
|
+
hint: input.category,
|
|
80
|
+
hasDateBound: callerHasDateBound
|
|
81
|
+
});
|
|
82
|
+
const vertical = classification.vertical;
|
|
83
|
+
const dateHint = classification.dateHint;
|
|
84
|
+
const effectiveFromDate = input.fromDate ?? dateHint?.fromDate;
|
|
85
|
+
const effectiveToDate = input.toDate ?? dateHint?.toDate;
|
|
86
|
+
const hasDateBound = !!(effectiveFromDate || effectiveToDate);
|
|
87
|
+
const allEntries = getEntriesForVertical(vertical);
|
|
88
|
+
let entries = allEntries;
|
|
89
|
+
if (hasDateBound) {
|
|
90
|
+
const dateAware = allEntries.filter((e) => e.supportsDateFilter === true);
|
|
91
|
+
entries = dateAware.length > 0 ? dateAware : allEntries;
|
|
92
|
+
}
|
|
93
|
+
const options = {
|
|
94
|
+
maxResults: input.maxResults ?? DEFAULT_MAX_RESULTS,
|
|
95
|
+
timeoutMs: input.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
96
|
+
language: input.language,
|
|
97
|
+
includeDomains: input.includeDomains,
|
|
98
|
+
excludeDomains: input.excludeDomains,
|
|
99
|
+
fromDate: effectiveFromDate,
|
|
100
|
+
toDate: effectiveToDate,
|
|
101
|
+
category: vertical === "general" ? void 0 : vertical
|
|
102
|
+
};
|
|
103
|
+
log.info("orchestrator dispatching engines", {
|
|
104
|
+
vertical,
|
|
105
|
+
engineCount: entries.length,
|
|
106
|
+
hasDateBound
|
|
107
|
+
});
|
|
108
|
+
const outcomes = await runEnginesParallel(entries, query, options);
|
|
109
|
+
const wantsRecency = vertical === "news" || hasDateBound || hasTemporalIntent(query);
|
|
110
|
+
const fused = /* @__PURE__ */ new Map();
|
|
111
|
+
const urlToResult = /* @__PURE__ */ new Map();
|
|
112
|
+
for (let i = 0; i < outcomes.length; i++) {
|
|
113
|
+
const outcome = outcomes[i];
|
|
114
|
+
if (!outcome.ok || outcome.results.length === 0) continue;
|
|
115
|
+
const dedupedResults = dedupWithinEngine(outcome.results);
|
|
116
|
+
outcome.results = dedupedResults;
|
|
117
|
+
const weight = entries[i].weight ?? 1;
|
|
118
|
+
for (let j = 0; j < dedupedResults.length; j++) {
|
|
119
|
+
const r = dedupedResults[j];
|
|
120
|
+
const rank = j + 1;
|
|
121
|
+
const base = weight / (RRF_K + rank);
|
|
122
|
+
const recMul = wantsRecency ? recencyMultiplier(r.published_date) : 1;
|
|
123
|
+
fused.set(r.url, (fused.get(r.url) ?? 0) + base * recMul);
|
|
124
|
+
if (!urlToResult.has(r.url)) {
|
|
125
|
+
urlToResult.set(r.url, r);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const sortedUrls = [...fused.entries()].sort((a, b) => b[1] - a[1]).map(([url]) => url);
|
|
130
|
+
let merged = sortedUrls.map((url) => urlToResult.get(url)).filter((r) => r !== void 0);
|
|
131
|
+
merged = applyDomainFilters(merged, input.includeDomains, input.excludeDomains);
|
|
132
|
+
const maxResults = input.maxResults ?? DEFAULT_MAX_RESULTS;
|
|
133
|
+
const results = merged.slice(0, maxResults);
|
|
134
|
+
const enginesUsed = outcomes.filter((o) => o.ok && o.results.length > 0).map((o) => o.engine);
|
|
135
|
+
const degraded = outcomes.every((o) => !o.ok) || results.length === 0;
|
|
136
|
+
if (degraded) {
|
|
137
|
+
log.warn("orchestrator returning degraded result", {
|
|
138
|
+
vertical,
|
|
139
|
+
attempted: outcomes.length,
|
|
140
|
+
ok: outcomes.filter((o) => o.ok).length,
|
|
141
|
+
resultCount: results.length
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
return {
|
|
145
|
+
vertical,
|
|
146
|
+
results,
|
|
147
|
+
enginesUsed,
|
|
148
|
+
outcomes,
|
|
149
|
+
degraded
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function _resetOrchestratorVerticalsForTest() {
|
|
153
|
+
_resetGeneralEnginesForTest();
|
|
154
|
+
_resetNewsEnginesForTest();
|
|
155
|
+
_resetCodeEnginesForTest();
|
|
156
|
+
_resetDocsEnginesForTest();
|
|
157
|
+
_resetPapersEnginesForTest();
|
|
158
|
+
}
|
|
159
|
+
export {
|
|
160
|
+
_resetOrchestratorVerticalsForTest,
|
|
161
|
+
runV1Search
|
|
162
|
+
};
|
|
163
|
+
//# sourceMappingURL=orchestrator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/search/v1/orchestrator.ts"],"sourcesContent":["import type { RawSearchResult, SearchEngineOptions } from '../../types.js';\nimport { createLogger } from '../../logger.js';\nimport { classifyIntentDetailed, type Vertical } from './intent-router.js';\nimport {\n runEnginesParallel,\n type EngineEntry,\n type EngineOutcome,\n} from './engine-base.js';\nimport { recencyMultiplier, hasTemporalIntent } from './recency-boost.js';\nimport { getGeneralEngines, _resetGeneralEnginesForTest } from './verticals/general.js';\nimport { getNewsEngines, _resetNewsEnginesForTest } from './verticals/news.js';\nimport { getCodeEngines, _resetCodeEnginesForTest } from './verticals/code.js';\nimport { getDocsEngines, _resetDocsEnginesForTest } from './verticals/docs.js';\nimport { getPapersEngines, _resetPapersEnginesForTest } from './verticals/papers.js';\n\nconst log = createLogger('search');\n\nconst RRF_K = 60;\nconst DEFAULT_MAX_RESULTS = 10;\nconst DEFAULT_TIMEOUT_MS = 10_000;\n\nexport interface OrchestratorInput {\n query: string;\n category?: Vertical;\n fromDate?: string;\n toDate?: string;\n maxResults?: number;\n timeoutMs?: number;\n language?: string;\n includeDomains?: string[];\n excludeDomains?: string[];\n}\n\nexport interface OrchestratorOutput {\n vertical: Vertical;\n results: RawSearchResult[];\n enginesUsed: string[];\n outcomes: EngineOutcome[];\n degraded: boolean;\n}\n\nfunction getEntriesForVertical(vertical: Vertical): EngineEntry[] {\n switch (vertical) {\n case 'general':\n return getGeneralEngines();\n case 'news':\n return getNewsEngines();\n case 'code':\n return getCodeEngines();\n case 'docs':\n return getDocsEngines();\n case 'papers':\n return getPapersEngines();\n }\n}\n\nfunction hostnameOf(url: string): string {\n try {\n return new URL(url).hostname.toLowerCase();\n } catch {\n return '';\n }\n}\n\nfunction matchesDomain(host: string, domain: string): boolean {\n const needle = domain.toLowerCase().replace(/^\\./, '');\n if (!host) return false;\n return host === needle || host.endsWith(`.${needle}`);\n}\n\nfunction applyDomainFilters(\n results: RawSearchResult[],\n includeDomains?: string[],\n excludeDomains?: string[],\n): RawSearchResult[] {\n if (!includeDomains?.length && !excludeDomains?.length) return results;\n return results.filter((r) => {\n const host = hostnameOf(r.url);\n if (includeDomains?.length && !includeDomains.some((d) => matchesDomain(host, d))) {\n return false;\n }\n if (excludeDomains?.length && excludeDomains.some((d) => matchesDomain(host, d))) {\n return false;\n }\n return true;\n });\n}\n\n// Defensive per-engine dedup: keep first occurrence by URL.\nfunction dedupWithinEngine(results: RawSearchResult[]): RawSearchResult[] {\n const seen = new Set<string>();\n const out: RawSearchResult[] = [];\n for (const r of results) {\n if (seen.has(r.url)) continue;\n seen.add(r.url);\n out.push(r);\n }\n return out;\n}\n\nexport async function runV1Search(\n input: OrchestratorInput,\n): Promise<OrchestratorOutput> {\n const query = typeof input.query === 'string' ? input.query.trim() : '';\n if (query.length === 0) {\n log.warn('orchestrator received empty query');\n return {\n vertical: 'general',\n results: [],\n enginesUsed: [],\n outcomes: [],\n degraded: true,\n };\n }\n\n const callerHasDateBound = !!(input.fromDate || input.toDate);\n const classification = classifyIntentDetailed(query, {\n hint: input.category,\n hasDateBound: callerHasDateBound,\n });\n const vertical = classification.vertical;\n const dateHint = classification.dateHint;\n\n const effectiveFromDate = input.fromDate ?? dateHint?.fromDate;\n const effectiveToDate = input.toDate ?? dateHint?.toDate;\n const hasDateBound = !!(effectiveFromDate || effectiveToDate);\n\n const allEntries = getEntriesForVertical(vertical);\n\n // Date-support filtering. If no engines remain, silently fall back to the\n // full entry list — the engines may still filter client-side, and a later\n // rerank step can apply temporal weighting. Better than returning empty.\n let entries = allEntries;\n if (hasDateBound) {\n const dateAware = allEntries.filter((e) => e.supportsDateFilter === true);\n entries = dateAware.length > 0 ? dateAware : allEntries;\n }\n\n const options: SearchEngineOptions = {\n maxResults: input.maxResults ?? DEFAULT_MAX_RESULTS,\n timeoutMs: input.timeoutMs ?? DEFAULT_TIMEOUT_MS,\n language: input.language,\n includeDomains: input.includeDomains,\n excludeDomains: input.excludeDomains,\n fromDate: effectiveFromDate,\n toDate: effectiveToDate,\n category: vertical === 'general' ? undefined : vertical,\n };\n\n log.info('orchestrator dispatching engines', {\n vertical,\n engineCount: entries.length,\n hasDateBound,\n });\n\n const outcomes = await runEnginesParallel(entries, query, options);\n\n const wantsRecency =\n vertical === 'news' || hasDateBound || hasTemporalIntent(query);\n\n // Per-engine dedup, then RRF with per-entry weights and optional recency boost.\n const fused = new Map<string, number>();\n const urlToResult = new Map<string, RawSearchResult>();\n\n for (let i = 0; i < outcomes.length; i++) {\n const outcome = outcomes[i];\n if (!outcome.ok || outcome.results.length === 0) continue;\n const dedupedResults = dedupWithinEngine(outcome.results);\n // Replace results in outcome to keep telemetry consistent with what we fused.\n outcome.results = dedupedResults;\n\n const weight = entries[i].weight ?? 1;\n for (let j = 0; j < dedupedResults.length; j++) {\n const r = dedupedResults[j];\n const rank = j + 1;\n const base = weight / (RRF_K + rank);\n const recMul = wantsRecency ? recencyMultiplier(r.published_date) : 1.0;\n fused.set(r.url, (fused.get(r.url) ?? 0) + base * recMul);\n if (!urlToResult.has(r.url)) {\n urlToResult.set(r.url, r);\n }\n }\n }\n\n const sortedUrls = [...fused.entries()]\n .sort((a, b) => b[1] - a[1])\n .map(([url]) => url);\n\n let merged: RawSearchResult[] = sortedUrls\n .map((url) => urlToResult.get(url))\n .filter((r): r is RawSearchResult => r !== undefined);\n\n merged = applyDomainFilters(merged, input.includeDomains, input.excludeDomains);\n\n const maxResults = input.maxResults ?? DEFAULT_MAX_RESULTS;\n const results = merged.slice(0, maxResults);\n\n const enginesUsed = outcomes\n .filter((o) => o.ok && o.results.length > 0)\n .map((o) => o.engine);\n\n const degraded = outcomes.every((o) => !o.ok) || results.length === 0;\n\n if (degraded) {\n log.warn('orchestrator returning degraded result', {\n vertical,\n attempted: outcomes.length,\n ok: outcomes.filter((o) => o.ok).length,\n resultCount: results.length,\n });\n }\n\n return {\n vertical,\n results,\n enginesUsed,\n outcomes,\n degraded,\n };\n}\n\nexport function _resetOrchestratorVerticalsForTest(): void {\n _resetGeneralEnginesForTest();\n _resetNewsEnginesForTest();\n _resetCodeEnginesForTest();\n _resetDocsEnginesForTest();\n _resetPapersEnginesForTest();\n}\n"],"mappings":"AACA,SAAS,oBAAoB;AAC7B,SAAS,8BAA6C;AACtD;AAAA,EACE;AAAA,OAGK;AACP,SAAS,mBAAmB,yBAAyB;AACrD,SAAS,mBAAmB,mCAAmC;AAC/D,SAAS,gBAAgB,gCAAgC;AACzD,SAAS,gBAAgB,gCAAgC;AACzD,SAAS,gBAAgB,gCAAgC;AACzD,SAAS,kBAAkB,kCAAkC;AAE7D,MAAM,MAAM,aAAa,QAAQ;AAEjC,MAAM,QAAQ;AACd,MAAM,sBAAsB;AAC5B,MAAM,qBAAqB;AAsB3B,SAAS,sBAAsB,UAAmC;AAChE,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,aAAO,kBAAkB;AAAA,IAC3B,KAAK;AACH,aAAO,eAAe;AAAA,IACxB,KAAK;AACH,aAAO,eAAe;AAAA,IACxB,KAAK;AACH,aAAO,eAAe;AAAA,IACxB,KAAK;AACH,aAAO,iBAAiB;AAAA,EAC5B;AACF;AAEA,SAAS,WAAW,KAAqB;AACvC,MAAI;AACF,WAAO,IAAI,IAAI,GAAG,EAAE,SAAS,YAAY;AAAA,EAC3C,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,cAAc,MAAc,QAAyB;AAC5D,QAAM,SAAS,OAAO,YAAY,EAAE,QAAQ,OAAO,EAAE;AACrD,MAAI,CAAC,KAAM,QAAO;AAClB,SAAO,SAAS,UAAU,KAAK,SAAS,IAAI,MAAM,EAAE;AACtD;AAEA,SAAS,mBACP,SACA,gBACA,gBACmB;AACnB,MAAI,CAAC,gBAAgB,UAAU,CAAC,gBAAgB,OAAQ,QAAO;AAC/D,SAAO,QAAQ,OAAO,CAAC,MAAM;AAC3B,UAAM,OAAO,WAAW,EAAE,GAAG;AAC7B,QAAI,gBAAgB,UAAU,CAAC,eAAe,KAAK,CAAC,MAAM,cAAc,MAAM,CAAC,CAAC,GAAG;AACjF,aAAO;AAAA,IACT;AACA,QAAI,gBAAgB,UAAU,eAAe,KAAK,CAAC,MAAM,cAAc,MAAM,CAAC,CAAC,GAAG;AAChF,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGA,SAAS,kBAAkB,SAA+C;AACxE,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,MAAyB,CAAC;AAChC,aAAW,KAAK,SAAS;AACvB,QAAI,KAAK,IAAI,EAAE,GAAG,EAAG;AACrB,SAAK,IAAI,EAAE,GAAG;AACd,QAAI,KAAK,CAAC;AAAA,EACZ;AACA,SAAO;AACT;AAEA,eAAsB,YACpB,OAC6B;AAC7B,QAAM,QAAQ,OAAO,MAAM,UAAU,WAAW,MAAM,MAAM,KAAK,IAAI;AACrE,MAAI,MAAM,WAAW,GAAG;AACtB,QAAI,KAAK,mCAAmC;AAC5C,WAAO;AAAA,MACL,UAAU;AAAA,MACV,SAAS,CAAC;AAAA,MACV,aAAa,CAAC;AAAA,MACd,UAAU,CAAC;AAAA,MACX,UAAU;AAAA,IACZ;AAAA,EACF;AAEA,QAAM,qBAAqB,CAAC,EAAE,MAAM,YAAY,MAAM;AACtD,QAAM,iBAAiB,uBAAuB,OAAO;AAAA,IACnD,MAAM,MAAM;AAAA,IACZ,cAAc;AAAA,EAChB,CAAC;AACD,QAAM,WAAW,eAAe;AAChC,QAAM,WAAW,eAAe;AAEhC,QAAM,oBAAoB,MAAM,YAAY,UAAU;AACtD,QAAM,kBAAkB,MAAM,UAAU,UAAU;AAClD,QAAM,eAAe,CAAC,EAAE,qBAAqB;AAE7C,QAAM,aAAa,sBAAsB,QAAQ;AAKjD,MAAI,UAAU;AACd,MAAI,cAAc;AAChB,UAAM,YAAY,WAAW,OAAO,CAAC,MAAM,EAAE,uBAAuB,IAAI;AACxE,cAAU,UAAU,SAAS,IAAI,YAAY;AAAA,EAC/C;AAEA,QAAM,UAA+B;AAAA,IACnC,YAAY,MAAM,cAAc;AAAA,IAChC,WAAW,MAAM,aAAa;AAAA,IAC9B,UAAU,MAAM;AAAA,IAChB,gBAAgB,MAAM;AAAA,IACtB,gBAAgB,MAAM;AAAA,IACtB,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,UAAU,aAAa,YAAY,SAAY;AAAA,EACjD;AAEA,MAAI,KAAK,oCAAoC;AAAA,IAC3C;AAAA,IACA,aAAa,QAAQ;AAAA,IACrB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,mBAAmB,SAAS,OAAO,OAAO;AAEjE,QAAM,eACJ,aAAa,UAAU,gBAAgB,kBAAkB,KAAK;AAGhE,QAAM,QAAQ,oBAAI,IAAoB;AACtC,QAAM,cAAc,oBAAI,IAA6B;AAErD,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,UAAU,SAAS,CAAC;AAC1B,QAAI,CAAC,QAAQ,MAAM,QAAQ,QAAQ,WAAW,EAAG;AACjD,UAAM,iBAAiB,kBAAkB,QAAQ,OAAO;AAExD,YAAQ,UAAU;AAElB,UAAM,SAAS,QAAQ,CAAC,EAAE,UAAU;AACpC,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,YAAM,IAAI,eAAe,CAAC;AAC1B,YAAM,OAAO,IAAI;AACjB,YAAM,OAAO,UAAU,QAAQ;AAC/B,YAAM,SAAS,eAAe,kBAAkB,EAAE,cAAc,IAAI;AACpE,YAAM,IAAI,EAAE,MAAM,MAAM,IAAI,EAAE,GAAG,KAAK,KAAK,OAAO,MAAM;AACxD,UAAI,CAAC,YAAY,IAAI,EAAE,GAAG,GAAG;AAC3B,oBAAY,IAAI,EAAE,KAAK,CAAC;AAAA,MAC1B;AAAA,IACF;AAAA,EACF;AAEA,QAAM,aAAa,CAAC,GAAG,MAAM,QAAQ,CAAC,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,EAC1B,IAAI,CAAC,CAAC,GAAG,MAAM,GAAG;AAErB,MAAI,SAA4B,WAC7B,IAAI,CAAC,QAAQ,YAAY,IAAI,GAAG,CAAC,EACjC,OAAO,CAAC,MAA4B,MAAM,MAAS;AAEtD,WAAS,mBAAmB,QAAQ,MAAM,gBAAgB,MAAM,cAAc;AAE9E,QAAM,aAAa,MAAM,cAAc;AACvC,QAAM,UAAU,OAAO,MAAM,GAAG,UAAU;AAE1C,QAAM,cAAc,SACjB,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,SAAS,CAAC,EAC1C,IAAI,CAAC,MAAM,EAAE,MAAM;AAEtB,QAAM,WAAW,SAAS,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,QAAQ,WAAW;AAEpE,MAAI,UAAU;AACZ,QAAI,KAAK,0CAA0C;AAAA,MACjD;AAAA,MACA,WAAW,SAAS;AAAA,MACpB,IAAI,SAAS,OAAO,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,MACjC,aAAa,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,SAAS,qCAA2C;AACzD,8BAA4B;AAC5B,2BAAyB;AACzB,2BAAyB;AACzB,2BAAyB;AACzB,6BAA2B;AAC7B;","names":[]}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns a multiplier in [1.0, 2.0] based on how recent the date is.
|
|
3
|
+
* Continuous decay: 1 + e^(-ageDays / 30). Calibrated to ~2.0 today,
|
|
4
|
+
* ~1.79 at 7d, ~1.37 at 30d, ~1.0 at 180d+.
|
|
5
|
+
*/
|
|
6
|
+
export declare function recencyMultiplier(publishedDate: string | undefined, now?: Date): number;
|
|
7
|
+
/** Cheap regex check for temporal intent keywords. */
|
|
8
|
+
export declare function hasTemporalIntent(query: string): boolean;
|
|
9
|
+
//# sourceMappingURL=recency-boost.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recency-boost.d.ts","sourceRoot":"","sources":["../../../src/search/v1/recency-boost.ts"],"names":[],"mappings":"AAGA;;;;GAIG;AACH,wBAAgB,iBAAiB,CAC/B,aAAa,EAAE,MAAM,GAAG,SAAS,EACjC,GAAG,GAAE,IAAiB,GACrB,MAAM,CAaR;AAQD,sDAAsD;AACtD,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAaxD"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const TAU_DAYS = 30;
|
|
2
|
+
const MS_PER_DAY = 864e5;
|
|
3
|
+
function recencyMultiplier(publishedDate, now = /* @__PURE__ */ new Date()) {
|
|
4
|
+
if (!publishedDate) return 1;
|
|
5
|
+
const parsed = new Date(publishedDate);
|
|
6
|
+
const t = parsed.getTime();
|
|
7
|
+
if (Number.isNaN(t)) return 1;
|
|
8
|
+
const ageMs = Math.max(0, now.getTime() - t);
|
|
9
|
+
const ageDays = ageMs / MS_PER_DAY;
|
|
10
|
+
const raw = 1 + Math.exp(-ageDays / TAU_DAYS);
|
|
11
|
+
if (raw < 1) return 1;
|
|
12
|
+
if (raw > 2) return 2;
|
|
13
|
+
return raw;
|
|
14
|
+
}
|
|
15
|
+
const TEMPORAL_WORD_RE = /\b(latest|today|yesterday|this week|this month|this year|news|breaking|recent|update|now|current)\b/i;
|
|
16
|
+
const TEMPORAL_YEAR_RE = /\b(20[2-3][0-9])\b/;
|
|
17
|
+
const TEMPORAL_LAST_RE = /\blast\s+\d+\s+(day|days|week|weeks|month|months|year|years)\b/i;
|
|
18
|
+
const TEMPORAL_PAST_RE = /\bpast\s+\d+\s+(day|days|week|weeks|month|months|year|years)\b/i;
|
|
19
|
+
function hasTemporalIntent(query) {
|
|
20
|
+
if (!query) return false;
|
|
21
|
+
if (TEMPORAL_WORD_RE.test(query)) return true;
|
|
22
|
+
if (TEMPORAL_YEAR_RE.test(query)) {
|
|
23
|
+
const m = query.match(TEMPORAL_YEAR_RE);
|
|
24
|
+
if (m) {
|
|
25
|
+
const y = Number(m[1]);
|
|
26
|
+
if (y >= 2020 && y <= 2030) return true;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (TEMPORAL_LAST_RE.test(query)) return true;
|
|
30
|
+
if (TEMPORAL_PAST_RE.test(query)) return true;
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
export {
|
|
34
|
+
hasTemporalIntent,
|
|
35
|
+
recencyMultiplier
|
|
36
|
+
};
|
|
37
|
+
//# sourceMappingURL=recency-boost.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/search/v1/recency-boost.ts"],"sourcesContent":["const TAU_DAYS = 30;\nconst MS_PER_DAY = 86_400_000;\n\n/**\n * Returns a multiplier in [1.0, 2.0] based on how recent the date is.\n * Continuous decay: 1 + e^(-ageDays / 30). Calibrated to ~2.0 today,\n * ~1.79 at 7d, ~1.37 at 30d, ~1.0 at 180d+.\n */\nexport function recencyMultiplier(\n publishedDate: string | undefined,\n now: Date = new Date(),\n): number {\n if (!publishedDate) return 1.0;\n const parsed = new Date(publishedDate);\n const t = parsed.getTime();\n if (Number.isNaN(t)) return 1.0;\n\n const ageMs = Math.max(0, now.getTime() - t);\n const ageDays = ageMs / MS_PER_DAY;\n const raw = 1 + Math.exp(-ageDays / TAU_DAYS);\n\n if (raw < 1.0) return 1.0;\n if (raw > 2.0) return 2.0;\n return raw;\n}\n\nconst TEMPORAL_WORD_RE =\n /\\b(latest|today|yesterday|this week|this month|this year|news|breaking|recent|update|now|current)\\b/i;\nconst TEMPORAL_YEAR_RE = /\\b(20[2-3][0-9])\\b/;\nconst TEMPORAL_LAST_RE = /\\blast\\s+\\d+\\s+(day|days|week|weeks|month|months|year|years)\\b/i;\nconst TEMPORAL_PAST_RE = /\\bpast\\s+\\d+\\s+(day|days|week|weeks|month|months|year|years)\\b/i;\n\n/** Cheap regex check for temporal intent keywords. */\nexport function hasTemporalIntent(query: string): boolean {\n if (!query) return false;\n if (TEMPORAL_WORD_RE.test(query)) return true;\n if (TEMPORAL_YEAR_RE.test(query)) {\n const m = query.match(TEMPORAL_YEAR_RE);\n if (m) {\n const y = Number(m[1]);\n if (y >= 2020 && y <= 2030) return true;\n }\n }\n if (TEMPORAL_LAST_RE.test(query)) return true;\n if (TEMPORAL_PAST_RE.test(query)) return true;\n return false;\n}\n"],"mappings":"AAAA,MAAM,WAAW;AACjB,MAAM,aAAa;AAOZ,SAAS,kBACd,eACA,MAAY,oBAAI,KAAK,GACb;AACR,MAAI,CAAC,cAAe,QAAO;AAC3B,QAAM,SAAS,IAAI,KAAK,aAAa;AACrC,QAAM,IAAI,OAAO,QAAQ;AACzB,MAAI,OAAO,MAAM,CAAC,EAAG,QAAO;AAE5B,QAAM,QAAQ,KAAK,IAAI,GAAG,IAAI,QAAQ,IAAI,CAAC;AAC3C,QAAM,UAAU,QAAQ;AACxB,QAAM,MAAM,IAAI,KAAK,IAAI,CAAC,UAAU,QAAQ;AAE5C,MAAI,MAAM,EAAK,QAAO;AACtB,MAAI,MAAM,EAAK,QAAO;AACtB,SAAO;AACT;AAEA,MAAM,mBACJ;AACF,MAAM,mBAAmB;AACzB,MAAM,mBAAmB;AACzB,MAAM,mBAAmB;AAGlB,SAAS,kBAAkB,OAAwB;AACxD,MAAI,CAAC,MAAO,QAAO;AACnB,MAAI,iBAAiB,KAAK,KAAK,EAAG,QAAO;AACzC,MAAI,iBAAiB,KAAK,KAAK,GAAG;AAChC,UAAM,IAAI,MAAM,MAAM,gBAAgB;AACtC,QAAI,GAAG;AACL,YAAM,IAAI,OAAO,EAAE,CAAC,CAAC;AACrB,UAAI,KAAK,QAAQ,KAAK,KAAM,QAAO;AAAA,IACrC;AAAA,EACF;AACA,MAAI,iBAAiB,KAAK,KAAK,EAAG,QAAO;AACzC,MAAI,iBAAiB,KAAK,KAAK,EAAG,QAAO;AACzC,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { RawSearchResult } from '../../types.js';
|
|
2
|
+
export declare function shouldLowercasePathForHost(hostname: string): boolean;
|
|
3
|
+
/** Normalize a URL for dedup comparison. Throws on malformed input. */
|
|
4
|
+
export declare function normalizeUrlForDedup(url: string): string;
|
|
5
|
+
export declare function dedupAgainstRecentUrls(results: RawSearchResult[], recentUrls: string[] | undefined): RawSearchResult[];
|
|
6
|
+
//# sourceMappingURL=recent-cache-dedup.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recent-cache-dedup.d.ts","sourceRoot":"","sources":["../../../src/search/v1/recent-cache-dedup.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAiCtD,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAQpE;AAED,uEAAuE;AACvE,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA6BxD;AAUD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,eAAe,EAAE,EAC1B,UAAU,EAAE,MAAM,EAAE,GAAG,SAAS,GAC/B,eAAe,EAAE,CAcnB"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
const TRACKING_PARAM_PREFIXES = ["utm_"];
|
|
2
|
+
const TRACKING_PARAM_EXACT = /* @__PURE__ */ new Set(["gclid", "fbclid"]);
|
|
3
|
+
const DEFAULT_CASE_INSENSITIVE_HOSTS = /* @__PURE__ */ new Set([
|
|
4
|
+
"microsoft.com",
|
|
5
|
+
"learn.microsoft.com",
|
|
6
|
+
"docs.microsoft.com",
|
|
7
|
+
"msdn.microsoft.com",
|
|
8
|
+
"support.microsoft.com",
|
|
9
|
+
"archive.org",
|
|
10
|
+
"web.archive.org"
|
|
11
|
+
]);
|
|
12
|
+
function isTrackingParam(name) {
|
|
13
|
+
if (TRACKING_PARAM_EXACT.has(name)) return true;
|
|
14
|
+
for (const p of TRACKING_PARAM_PREFIXES) if (name.startsWith(p)) return true;
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
function caseInsensitiveHosts() {
|
|
18
|
+
const set = new Set(DEFAULT_CASE_INSENSITIVE_HOSTS);
|
|
19
|
+
const extra = process.env.WIGOLO_DEDUP_CASE_INSENSITIVE_HOSTS;
|
|
20
|
+
if (extra) {
|
|
21
|
+
for (const raw of extra.split(",")) {
|
|
22
|
+
const h = raw.trim().toLowerCase();
|
|
23
|
+
if (h) set.add(h);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return set;
|
|
27
|
+
}
|
|
28
|
+
function shouldLowercasePathForHost(hostname) {
|
|
29
|
+
const h = hostname.toLowerCase();
|
|
30
|
+
const hosts = caseInsensitiveHosts();
|
|
31
|
+
if (hosts.has(h)) return true;
|
|
32
|
+
for (const known of hosts) {
|
|
33
|
+
if (h.endsWith("." + known)) return true;
|
|
34
|
+
}
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
function normalizeUrlForDedup(url) {
|
|
38
|
+
const u = new URL(url);
|
|
39
|
+
u.hostname = u.hostname.toLowerCase();
|
|
40
|
+
u.hash = "";
|
|
41
|
+
if (u.protocol === "http:" && u.port === "80" || u.protocol === "https:" && u.port === "443") {
|
|
42
|
+
u.port = "";
|
|
43
|
+
}
|
|
44
|
+
if (shouldLowercasePathForHost(u.hostname)) {
|
|
45
|
+
u.pathname = u.pathname.toLowerCase();
|
|
46
|
+
}
|
|
47
|
+
const params = [...u.searchParams.entries()].filter(([k]) => !isTrackingParam(k));
|
|
48
|
+
params.sort(([a], [b]) => a < b ? -1 : a > b ? 1 : 0);
|
|
49
|
+
u.search = "";
|
|
50
|
+
for (const [k, v] of params) u.searchParams.append(k, v);
|
|
51
|
+
let out = u.toString();
|
|
52
|
+
if (u.pathname !== "/" && out.endsWith("/") && !out.includes("?")) {
|
|
53
|
+
out = out.slice(0, -1);
|
|
54
|
+
} else if (u.pathname === "/" && !u.search) {
|
|
55
|
+
out = out.replace(/\/$/, "");
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
59
|
+
function tryNormalize(url) {
|
|
60
|
+
try {
|
|
61
|
+
return normalizeUrlForDedup(url);
|
|
62
|
+
} catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
function dedupAgainstRecentUrls(results, recentUrls) {
|
|
67
|
+
if (!recentUrls || recentUrls.length === 0) return results;
|
|
68
|
+
const seen = /* @__PURE__ */ new Set();
|
|
69
|
+
for (const u of recentUrls) {
|
|
70
|
+
const n = tryNormalize(u);
|
|
71
|
+
if (n !== null) seen.add(n);
|
|
72
|
+
}
|
|
73
|
+
if (seen.size === 0) return results;
|
|
74
|
+
return results.filter((r) => {
|
|
75
|
+
const n = tryNormalize(r.url);
|
|
76
|
+
if (n === null) return true;
|
|
77
|
+
return !seen.has(n);
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
export {
|
|
81
|
+
dedupAgainstRecentUrls,
|
|
82
|
+
normalizeUrlForDedup,
|
|
83
|
+
shouldLowercasePathForHost
|
|
84
|
+
};
|
|
85
|
+
//# sourceMappingURL=recent-cache-dedup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/search/v1/recent-cache-dedup.ts"],"sourcesContent":["// Phase 9 — drop results the agent has already seen.\n//\n// Normalizes URLs for comparison: lowercased host, drops default ports,\n// strips trailing slash + fragment, removes tracking params (utm_*, gclid,\n// fbclid), sorts remaining params. Malformed URLs are kept (we don't drop\n// on parse error).\n//\n// For hosts known to serve paths case-insensitively (IIS, docs.microsoft.com,\n// archive.org, etc.) the path is also lowercased so /A and /a normalize the\n// same way. Callers can extend the allowlist via\n// WIGOLO_DEDUP_CASE_INSENSITIVE_HOSTS=a.com,b.org. Paths on github.com,\n// pypi.org, etc. stay case-sensitive (RFC 3986 default).\n\nimport type { RawSearchResult } from '../../types.js';\n\nconst TRACKING_PARAM_PREFIXES = ['utm_'];\nconst TRACKING_PARAM_EXACT = new Set(['gclid', 'fbclid']);\n\nconst DEFAULT_CASE_INSENSITIVE_HOSTS: ReadonlySet<string> = new Set([\n 'microsoft.com',\n 'learn.microsoft.com',\n 'docs.microsoft.com',\n 'msdn.microsoft.com',\n 'support.microsoft.com',\n 'archive.org',\n 'web.archive.org',\n]);\n\nfunction isTrackingParam(name: string): boolean {\n if (TRACKING_PARAM_EXACT.has(name)) return true;\n for (const p of TRACKING_PARAM_PREFIXES) if (name.startsWith(p)) return true;\n return false;\n}\n\nfunction caseInsensitiveHosts(): Set<string> {\n const set = new Set(DEFAULT_CASE_INSENSITIVE_HOSTS);\n const extra = process.env.WIGOLO_DEDUP_CASE_INSENSITIVE_HOSTS;\n if (extra) {\n for (const raw of extra.split(',')) {\n const h = raw.trim().toLowerCase();\n if (h) set.add(h);\n }\n }\n return set;\n}\n\nexport function shouldLowercasePathForHost(hostname: string): boolean {\n const h = hostname.toLowerCase();\n const hosts = caseInsensitiveHosts();\n if (hosts.has(h)) return true;\n for (const known of hosts) {\n if (h.endsWith('.' + known)) return true;\n }\n return false;\n}\n\n/** Normalize a URL for dedup comparison. Throws on malformed input. */\nexport function normalizeUrlForDedup(url: string): string {\n const u = new URL(url);\n u.hostname = u.hostname.toLowerCase();\n u.hash = '';\n if (\n (u.protocol === 'http:' && u.port === '80') ||\n (u.protocol === 'https:' && u.port === '443')\n ) {\n u.port = '';\n }\n\n if (shouldLowercasePathForHost(u.hostname)) {\n u.pathname = u.pathname.toLowerCase();\n }\n\n const params = [...u.searchParams.entries()].filter(([k]) => !isTrackingParam(k));\n params.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0));\n u.search = '';\n for (const [k, v] of params) u.searchParams.append(k, v);\n\n let out = u.toString();\n // Drop trailing slash on the path when there are no params (URL serializer\n // keeps \"https://x.com/\" — we want \"https://x.com\").\n if (u.pathname !== '/' && out.endsWith('/') && !out.includes('?')) {\n out = out.slice(0, -1);\n } else if (u.pathname === '/' && !u.search) {\n out = out.replace(/\\/$/, '');\n }\n return out;\n}\n\nfunction tryNormalize(url: string): string | null {\n try {\n return normalizeUrlForDedup(url);\n } catch {\n return null;\n }\n}\n\nexport function dedupAgainstRecentUrls(\n results: RawSearchResult[],\n recentUrls: string[] | undefined,\n): RawSearchResult[] {\n if (!recentUrls || recentUrls.length === 0) return results;\n const seen = new Set<string>();\n for (const u of recentUrls) {\n const n = tryNormalize(u);\n if (n !== null) seen.add(n);\n }\n if (seen.size === 0) return results;\n\n return results.filter((r) => {\n const n = tryNormalize(r.url);\n if (n === null) return true; // keep on parse error\n return !seen.has(n);\n });\n}\n"],"mappings":"AAeA,MAAM,0BAA0B,CAAC,MAAM;AACvC,MAAM,uBAAuB,oBAAI,IAAI,CAAC,SAAS,QAAQ,CAAC;AAExD,MAAM,iCAAsD,oBAAI,IAAI;AAAA,EAClE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,gBAAgB,MAAuB;AAC9C,MAAI,qBAAqB,IAAI,IAAI,EAAG,QAAO;AAC3C,aAAW,KAAK,wBAAyB,KAAI,KAAK,WAAW,CAAC,EAAG,QAAO;AACxE,SAAO;AACT;AAEA,SAAS,uBAAoC;AAC3C,QAAM,MAAM,IAAI,IAAI,8BAA8B;AAClD,QAAM,QAAQ,QAAQ,IAAI;AAC1B,MAAI,OAAO;AACT,eAAW,OAAO,MAAM,MAAM,GAAG,GAAG;AAClC,YAAM,IAAI,IAAI,KAAK,EAAE,YAAY;AACjC,UAAI,EAAG,KAAI,IAAI,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,2BAA2B,UAA2B;AACpE,QAAM,IAAI,SAAS,YAAY;AAC/B,QAAM,QAAQ,qBAAqB;AACnC,MAAI,MAAM,IAAI,CAAC,EAAG,QAAO;AACzB,aAAW,SAAS,OAAO;AACzB,QAAI,EAAE,SAAS,MAAM,KAAK,EAAG,QAAO;AAAA,EACtC;AACA,SAAO;AACT;AAGO,SAAS,qBAAqB,KAAqB;AACxD,QAAM,IAAI,IAAI,IAAI,GAAG;AACrB,IAAE,WAAW,EAAE,SAAS,YAAY;AACpC,IAAE,OAAO;AACT,MACG,EAAE,aAAa,WAAW,EAAE,SAAS,QACrC,EAAE,aAAa,YAAY,EAAE,SAAS,OACvC;AACA,MAAE,OAAO;AAAA,EACX;AAEA,MAAI,2BAA2B,EAAE,QAAQ,GAAG;AAC1C,MAAE,WAAW,EAAE,SAAS,YAAY;AAAA,EACtC;AAEA,QAAM,SAAS,CAAC,GAAG,EAAE,aAAa,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;AAChF,SAAO,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAO,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAE;AACtD,IAAE,SAAS;AACX,aAAW,CAAC,GAAG,CAAC,KAAK,OAAQ,GAAE,aAAa,OAAO,GAAG,CAAC;AAEvD,MAAI,MAAM,EAAE,SAAS;AAGrB,MAAI,EAAE,aAAa,OAAO,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,SAAS,GAAG,GAAG;AACjE,UAAM,IAAI,MAAM,GAAG,EAAE;AAAA,EACvB,WAAW,EAAE,aAAa,OAAO,CAAC,EAAE,QAAQ;AAC1C,UAAM,IAAI,QAAQ,OAAO,EAAE;AAAA,EAC7B;AACA,SAAO;AACT;AAEA,SAAS,aAAa,KAA4B;AAChD,MAAI;AACF,WAAO,qBAAqB,GAAG;AAAA,EACjC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,uBACd,SACA,YACmB;AACnB,MAAI,CAAC,cAAc,WAAW,WAAW,EAAG,QAAO;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,KAAK,YAAY;AAC1B,UAAM,IAAI,aAAa,CAAC;AACxB,QAAI,MAAM,KAAM,MAAK,IAAI,CAAC;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,EAAG,QAAO;AAE5B,SAAO,QAAQ,OAAO,CAAC,MAAM;AAC3B,UAAM,IAAI,aAAa,EAAE,GAAG;AAC5B,QAAI,MAAM,KAAM,QAAO;AACvB,WAAO,CAAC,KAAK,IAAI,CAAC;AAAA,EACpB,CAAC;AACH;","names":[]}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface FeedConfig {
|
|
2
|
+
url: string;
|
|
3
|
+
/** Optional category override; defaults to 'news'. */
|
|
4
|
+
category?: string;
|
|
5
|
+
/** Override poll interval per feed in seconds. */
|
|
6
|
+
intervalSec?: number;
|
|
7
|
+
}
|
|
8
|
+
export interface LoadFeedConfigResult {
|
|
9
|
+
feeds: FeedConfig[];
|
|
10
|
+
/** Where feeds came from. */
|
|
11
|
+
sources: Array<'env' | 'file'>;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Load feeds from env (`WIGOLO_RSS_FEEDS` — comma-separated URLs) and
|
|
15
|
+
* JSON file (`<dataDir>/rss-feeds.json`). Env entries win on URL collision.
|
|
16
|
+
* Dedup by URL.
|
|
17
|
+
*/
|
|
18
|
+
export declare function loadFeedConfig(opts?: {
|
|
19
|
+
dataDir?: string;
|
|
20
|
+
}): LoadFeedConfigResult;
|
|
21
|
+
//# sourceMappingURL=feed-config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feed-config.d.ts","sourceRoot":"","sources":["../../../../src/search/v1/rss/feed-config.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kDAAkD;IAClD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,6BAA6B;IAC7B,OAAO,EAAE,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC;CAChC;AAmFD;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,IAAI,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,oBAAoB,CAchF"}
|