@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getVectorStore
|
|
3
|
+
} from "../providers/vector-store.js";
|
|
4
|
+
import {
|
|
5
|
+
updateCacheEmbedding,
|
|
6
|
+
getAllEmbeddings,
|
|
7
|
+
normalizeUrl
|
|
8
|
+
} from "../cache/store.js";
|
|
9
|
+
import { FastembedEmbedProvider } from "./fastembed-provider.js";
|
|
10
|
+
import { createLogger } from "../logger.js";
|
|
11
|
+
const log = createLogger("embedding");
|
|
12
|
+
class EmbeddingService {
|
|
13
|
+
provider;
|
|
14
|
+
store = null;
|
|
15
|
+
knownUrls = /* @__PURE__ */ new Set();
|
|
16
|
+
available = false;
|
|
17
|
+
providerVerified = false;
|
|
18
|
+
constructor(provider) {
|
|
19
|
+
this.provider = provider ?? new FastembedEmbedProvider();
|
|
20
|
+
}
|
|
21
|
+
async init() {
|
|
22
|
+
try {
|
|
23
|
+
this.store = await getVectorStore();
|
|
24
|
+
try {
|
|
25
|
+
const existingSize = await this.store.size();
|
|
26
|
+
if (existingSize === 0) {
|
|
27
|
+
await this.migrateLegacyEmbeddings();
|
|
28
|
+
} else {
|
|
29
|
+
}
|
|
30
|
+
} catch (err) {
|
|
31
|
+
log.warn("embedding migration check failed", {
|
|
32
|
+
error: err instanceof Error ? err.message : String(err)
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
try {
|
|
36
|
+
await this.provider.embed(["embedding service probe"]);
|
|
37
|
+
this.providerVerified = true;
|
|
38
|
+
log.info("embedding provider verified", {
|
|
39
|
+
modelId: this.provider.modelId,
|
|
40
|
+
dim: this.provider.dim
|
|
41
|
+
});
|
|
42
|
+
} catch (err) {
|
|
43
|
+
log.warn("embedding provider probe failed \u2014 embeddings disabled", {
|
|
44
|
+
error: err instanceof Error ? err.message : String(err)
|
|
45
|
+
});
|
|
46
|
+
this.providerVerified = false;
|
|
47
|
+
}
|
|
48
|
+
this.available = true;
|
|
49
|
+
} catch (err) {
|
|
50
|
+
log.error("EmbeddingService init failed", { error: String(err) });
|
|
51
|
+
this.available = false;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
isAvailable() {
|
|
55
|
+
return this.available;
|
|
56
|
+
}
|
|
57
|
+
setAvailable(value) {
|
|
58
|
+
this.available = value;
|
|
59
|
+
}
|
|
60
|
+
/** Backwards-compat alias preserved for callers that gated on subprocess readiness. */
|
|
61
|
+
isSubprocessReady() {
|
|
62
|
+
return this.providerVerified;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Lightweight index view. Returns `size` from the backing VectorStore and
|
|
66
|
+
* `has` from a local URL-cache populated by embedAndStore. Callers that
|
|
67
|
+
* need richer access should consume the VectorStore directly via
|
|
68
|
+
* `getVectorStore()`.
|
|
69
|
+
*/
|
|
70
|
+
getIndex() {
|
|
71
|
+
const knownUrls = this.knownUrls;
|
|
72
|
+
const store = this.store;
|
|
73
|
+
return {
|
|
74
|
+
size: () => store ? this.cachedSize : knownUrls.size,
|
|
75
|
+
has: (url) => knownUrls.has(url)
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Cached size from the store, refreshed after upserts. Reads from a
|
|
80
|
+
* VectorStore would be async; getIndex().size() callers expect a
|
|
81
|
+
* synchronous return so we maintain this counter.
|
|
82
|
+
*/
|
|
83
|
+
cachedSize = 0;
|
|
84
|
+
async embedAndStore(url, markdown) {
|
|
85
|
+
if (!this.available) {
|
|
86
|
+
log.debug("embedding skipped: service not available", { url });
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
try {
|
|
90
|
+
const [vector] = await this.provider.embed([markdown]);
|
|
91
|
+
if (!vector || vector.length === 0) {
|
|
92
|
+
log.warn("embedding returned empty vector", { url });
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
const buffer = Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
96
|
+
const model = this.provider.modelId;
|
|
97
|
+
const dims = vector.length;
|
|
98
|
+
let normalizedUrl;
|
|
99
|
+
try {
|
|
100
|
+
normalizedUrl = normalizeUrl(url);
|
|
101
|
+
} catch {
|
|
102
|
+
normalizedUrl = url;
|
|
103
|
+
}
|
|
104
|
+
updateCacheEmbedding(normalizedUrl, buffer, model, dims);
|
|
105
|
+
if (this.store) {
|
|
106
|
+
const record = {
|
|
107
|
+
id: normalizedUrl,
|
|
108
|
+
vector,
|
|
109
|
+
metadata: { url: normalizedUrl, contentHash: "", modelId: model }
|
|
110
|
+
};
|
|
111
|
+
await this.store.upsert([record]);
|
|
112
|
+
if (!this.knownUrls.has(normalizedUrl)) {
|
|
113
|
+
this.knownUrls.add(normalizedUrl);
|
|
114
|
+
this.cachedSize += 1;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
log.debug("embedded and stored", { url: normalizedUrl, dims });
|
|
118
|
+
} catch (err) {
|
|
119
|
+
log.warn("embedAndStore failed", { url, error: String(err) });
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
embedAsync(url, markdown) {
|
|
123
|
+
if (!this.available) return;
|
|
124
|
+
this.embedAndStore(url, markdown).catch((err) => {
|
|
125
|
+
log.warn("async embedding failed", { url, error: String(err) });
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
async findSimilar(queryText, topK, excludeUrls) {
|
|
129
|
+
if (!this.available || !this.store) {
|
|
130
|
+
return [];
|
|
131
|
+
}
|
|
132
|
+
if (this.cachedSize === 0) {
|
|
133
|
+
try {
|
|
134
|
+
this.cachedSize = await this.store.size();
|
|
135
|
+
} catch {
|
|
136
|
+
this.cachedSize = 0;
|
|
137
|
+
}
|
|
138
|
+
if (this.cachedSize === 0) return [];
|
|
139
|
+
}
|
|
140
|
+
try {
|
|
141
|
+
const [queryVector] = await this.provider.embed([queryText]);
|
|
142
|
+
if (!queryVector || queryVector.length === 0) {
|
|
143
|
+
log.warn("query embedding failed: empty vector");
|
|
144
|
+
return [];
|
|
145
|
+
}
|
|
146
|
+
const overscan = excludeUrls && excludeUrls.size > 0 ? Math.max(topK + excludeUrls.size, topK * 2) : topK;
|
|
147
|
+
const hits = await this.store.search(queryVector, overscan);
|
|
148
|
+
const results = [];
|
|
149
|
+
for (const hit of hits) {
|
|
150
|
+
if (excludeUrls?.has(hit.id)) continue;
|
|
151
|
+
results.push({ url: hit.id, score: hit.score });
|
|
152
|
+
if (results.length >= topK) break;
|
|
153
|
+
}
|
|
154
|
+
return results;
|
|
155
|
+
} catch (err) {
|
|
156
|
+
log.warn("findSimilar failed", { error: String(err) });
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
shutdown() {
|
|
161
|
+
try {
|
|
162
|
+
this.knownUrls.clear();
|
|
163
|
+
this.cachedSize = 0;
|
|
164
|
+
this.store = null;
|
|
165
|
+
this.available = false;
|
|
166
|
+
this.providerVerified = false;
|
|
167
|
+
log.info("EmbeddingService shut down");
|
|
168
|
+
} catch (err) {
|
|
169
|
+
log.error("EmbeddingService shutdown error", { error: String(err) });
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
async migrateLegacyEmbeddings() {
|
|
173
|
+
if (!this.store) return;
|
|
174
|
+
const legacy = getAllEmbeddings(this.provider.modelId);
|
|
175
|
+
if (legacy.length === 0) {
|
|
176
|
+
this.cachedSize = 0;
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
const records = [];
|
|
180
|
+
for (const row of legacy) {
|
|
181
|
+
if (!row.embedding || row.dims <= 0) continue;
|
|
182
|
+
try {
|
|
183
|
+
const vector = new Float32Array(
|
|
184
|
+
row.embedding.buffer.slice(
|
|
185
|
+
row.embedding.byteOffset,
|
|
186
|
+
row.embedding.byteOffset + row.dims * Float32Array.BYTES_PER_ELEMENT
|
|
187
|
+
)
|
|
188
|
+
);
|
|
189
|
+
records.push({
|
|
190
|
+
id: row.normalizedUrl,
|
|
191
|
+
vector,
|
|
192
|
+
metadata: {
|
|
193
|
+
url: row.normalizedUrl,
|
|
194
|
+
contentHash: "",
|
|
195
|
+
modelId: row.model
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
this.knownUrls.add(row.normalizedUrl);
|
|
199
|
+
} catch (err) {
|
|
200
|
+
log.warn("legacy embedding migration: failed to decode vector", {
|
|
201
|
+
url: row.normalizedUrl,
|
|
202
|
+
error: err instanceof Error ? err.message : String(err)
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
if (records.length === 0) {
|
|
207
|
+
this.cachedSize = 0;
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
log.info("migrating embeddings into sqlite-vec store", { count: records.length });
|
|
211
|
+
await this.store.upsert(records);
|
|
212
|
+
this.cachedSize = await this.store.size();
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
let globalInstance = null;
|
|
216
|
+
function getEmbeddingService() {
|
|
217
|
+
if (!globalInstance) {
|
|
218
|
+
globalInstance = new EmbeddingService();
|
|
219
|
+
}
|
|
220
|
+
return globalInstance;
|
|
221
|
+
}
|
|
222
|
+
function resetEmbeddingService() {
|
|
223
|
+
if (globalInstance) {
|
|
224
|
+
globalInstance.shutdown();
|
|
225
|
+
globalInstance = null;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
export {
|
|
229
|
+
EmbeddingService,
|
|
230
|
+
getEmbeddingService,
|
|
231
|
+
resetEmbeddingService
|
|
232
|
+
};
|
|
233
|
+
//# sourceMappingURL=embed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/embedding/embed.ts"],"sourcesContent":["import type { EmbedProvider } from '../providers/embed-provider.js';\nimport {\n getVectorStore,\n type VectorStore,\n type VectorRecord,\n} from '../providers/vector-store.js';\nimport {\n updateCacheEmbedding,\n getAllEmbeddings,\n normalizeUrl,\n} from '../cache/store.js';\nimport { FastembedEmbedProvider } from './fastembed-provider.js';\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('embedding');\n\nexport interface SimilarResult {\n url: string;\n score: number;\n}\n\n/**\n * Index shim exposed by `getIndex()` for callers that still need\n * lightweight size/membership checks. Kept narrow so future stores can\n * implement it without dragging in extra surface area.\n */\nexport interface IndexView {\n size(): number;\n has(url: string): boolean;\n}\n\n/**\n * Embedding service backed by the native fastembed (ONNX) provider and\n * the sqlite-vec VectorStore.\n *\n * Phase 5 replaced the in-memory VectorIndex with the sqlite-vec backed\n * store accessed via getVectorStore(). The public surface (init /\n * embedAndStore / embedAsync / findSimilar / getIndex / isAvailable /\n * shutdown) is unchanged so callers in server.ts, tools/fetch.ts,\n * research/pipeline.ts, search/find-similar.ts, and the legacy SearXNG\n * orchestrator continue to work without modification.\n */\nexport class EmbeddingService {\n private provider: EmbedProvider;\n private store: VectorStore | null = null;\n private knownUrls = new Set<string>();\n private available = false;\n private providerVerified = false;\n\n constructor(provider?: EmbedProvider) {\n this.provider = provider ?? new FastembedEmbedProvider();\n }\n\n async init(): Promise<void> {\n try {\n this.store = await getVectorStore();\n\n // Migrate any embeddings persisted in url_cache (pre-Phase-5 layout)\n // into the sqlite-vec backed store on first use. Skips on hit so\n // re-init is cheap.\n try {\n const existingSize = await this.store.size();\n if (existingSize === 0) {\n await this.migrateLegacyEmbeddings();\n } else {\n // Seed knownUrls from the store so embedAndStore can avoid\n // unnecessary re-upserts when content has not changed.\n // The current store has no list API, so we leave knownUrls empty\n // and rely on upsert idempotency.\n }\n } catch (err) {\n log.warn('embedding migration check failed', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n\n // Probe the provider so we know up front whether ONNX init works.\n try {\n await this.provider.embed(['embedding service probe']);\n this.providerVerified = true;\n log.info('embedding provider verified', {\n modelId: this.provider.modelId,\n dim: this.provider.dim,\n });\n } catch (err) {\n log.warn('embedding provider probe failed — embeddings disabled', {\n error: err instanceof Error ? err.message : String(err),\n });\n this.providerVerified = false;\n }\n\n this.available = true;\n } catch (err) {\n log.error('EmbeddingService init failed', { error: String(err) });\n this.available = false;\n }\n }\n\n isAvailable(): boolean {\n return this.available;\n }\n\n setAvailable(value: boolean): void {\n this.available = value;\n }\n\n /** Backwards-compat alias preserved for callers that gated on subprocess readiness. */\n isSubprocessReady(): boolean {\n return this.providerVerified;\n }\n\n /**\n * Lightweight index view. Returns `size` from the backing VectorStore and\n * `has` from a local URL-cache populated by embedAndStore. Callers that\n * need richer access should consume the VectorStore directly via\n * `getVectorStore()`.\n */\n getIndex(): IndexView {\n const knownUrls = this.knownUrls;\n const store = this.store;\n return {\n size: () => (store ? this.cachedSize : knownUrls.size),\n has: (url: string) => knownUrls.has(url),\n };\n }\n\n /**\n * Cached size from the store, refreshed after upserts. Reads from a\n * VectorStore would be async; getIndex().size() callers expect a\n * synchronous return so we maintain this counter.\n */\n private cachedSize = 0;\n\n async embedAndStore(url: string, markdown: string): Promise<void> {\n if (!this.available) {\n log.debug('embedding skipped: service not available', { url });\n return;\n }\n\n try {\n const [vector] = await this.provider.embed([markdown]);\n if (!vector || vector.length === 0) {\n log.warn('embedding returned empty vector', { url });\n return;\n }\n\n const buffer = Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);\n const model = this.provider.modelId;\n const dims = vector.length;\n\n let normalizedUrl: string;\n try {\n normalizedUrl = normalizeUrl(url);\n } catch {\n normalizedUrl = url;\n }\n\n updateCacheEmbedding(normalizedUrl, buffer, model, dims);\n\n if (this.store) {\n const record: VectorRecord = {\n id: normalizedUrl,\n vector,\n metadata: { url: normalizedUrl, contentHash: '', modelId: model },\n };\n await this.store.upsert([record]);\n if (!this.knownUrls.has(normalizedUrl)) {\n this.knownUrls.add(normalizedUrl);\n this.cachedSize += 1;\n }\n }\n\n log.debug('embedded and stored', { url: normalizedUrl, dims });\n } catch (err) {\n log.warn('embedAndStore failed', { url, error: String(err) });\n }\n }\n\n embedAsync(url: string, markdown: string): void {\n if (!this.available) return;\n\n this.embedAndStore(url, markdown).catch(err => {\n log.warn('async embedding failed', { url, error: String(err) });\n });\n }\n\n async findSimilar(\n queryText: string,\n topK: number,\n excludeUrls?: Set<string>,\n ): Promise<SimilarResult[]> {\n if (!this.available || !this.store) {\n return [];\n }\n if (this.cachedSize === 0) {\n // Refresh once before returning empty so newly-populated stores\n // (e.g. legacy migration just finished) are visible to callers.\n try {\n this.cachedSize = await this.store.size();\n } catch {\n this.cachedSize = 0;\n }\n if (this.cachedSize === 0) return [];\n }\n\n try {\n const [queryVector] = await this.provider.embed([queryText]);\n if (!queryVector || queryVector.length === 0) {\n log.warn('query embedding failed: empty vector');\n return [];\n }\n\n const overscan = excludeUrls && excludeUrls.size > 0\n ? Math.max(topK + excludeUrls.size, topK * 2)\n : topK;\n const hits = await this.store.search(queryVector, overscan);\n\n const results: SimilarResult[] = [];\n for (const hit of hits) {\n if (excludeUrls?.has(hit.id)) continue;\n results.push({ url: hit.id, score: hit.score });\n if (results.length >= topK) break;\n }\n return results;\n } catch (err) {\n log.warn('findSimilar failed', { error: String(err) });\n return [];\n }\n }\n\n shutdown(): void {\n try {\n this.knownUrls.clear();\n this.cachedSize = 0;\n this.store = null;\n this.available = false;\n this.providerVerified = false;\n log.info('EmbeddingService shut down');\n } catch (err) {\n log.error('EmbeddingService shutdown error', { error: String(err) });\n }\n }\n\n private async migrateLegacyEmbeddings(): Promise<void> {\n if (!this.store) return;\n const legacy = getAllEmbeddings(this.provider.modelId);\n if (legacy.length === 0) {\n this.cachedSize = 0;\n return;\n }\n\n const records: VectorRecord[] = [];\n for (const row of legacy) {\n if (!row.embedding || row.dims <= 0) continue;\n try {\n const vector = new Float32Array(\n row.embedding.buffer.slice(\n row.embedding.byteOffset,\n row.embedding.byteOffset + row.dims * Float32Array.BYTES_PER_ELEMENT,\n ),\n );\n records.push({\n id: row.normalizedUrl,\n vector,\n metadata: {\n url: row.normalizedUrl,\n contentHash: '',\n modelId: row.model,\n },\n });\n this.knownUrls.add(row.normalizedUrl);\n } catch (err) {\n log.warn('legacy embedding migration: failed to decode vector', {\n url: row.normalizedUrl,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n if (records.length === 0) {\n this.cachedSize = 0;\n return;\n }\n\n log.info('migrating embeddings into sqlite-vec store', { count: records.length });\n await this.store.upsert(records);\n this.cachedSize = await this.store.size();\n }\n}\n\nlet globalInstance: EmbeddingService | null = null;\n\nexport function getEmbeddingService(): EmbeddingService {\n if (!globalInstance) {\n globalInstance = new EmbeddingService();\n }\n return globalInstance;\n}\n\nexport function resetEmbeddingService(): void {\n if (globalInstance) {\n globalInstance.shutdown();\n globalInstance = null;\n }\n}\n"],"mappings":"AACA;AAAA,EACE;AAAA,OAGK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,8BAA8B;AACvC,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,WAAW;AA4B7B,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA,QAA4B;AAAA,EAC5B,YAAY,oBAAI,IAAY;AAAA,EAC5B,YAAY;AAAA,EACZ,mBAAmB;AAAA,EAE3B,YAAY,UAA0B;AACpC,SAAK,WAAW,YAAY,IAAI,uBAAuB;AAAA,EACzD;AAAA,EAEA,MAAM,OAAsB;AAC1B,QAAI;AACF,WAAK,QAAQ,MAAM,eAAe;AAKlC,UAAI;AACF,cAAM,eAAe,MAAM,KAAK,MAAM,KAAK;AAC3C,YAAI,iBAAiB,GAAG;AACtB,gBAAM,KAAK,wBAAwB;AAAA,QACrC,OAAO;AAAA,QAKP;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,KAAK,oCAAoC;AAAA,UAC3C,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAGA,UAAI;AACF,cAAM,KAAK,SAAS,MAAM,CAAC,yBAAyB,CAAC;AACrD,aAAK,mBAAmB;AACxB,YAAI,KAAK,+BAA+B;AAAA,UACtC,SAAS,KAAK,SAAS;AAAA,UACvB,KAAK,KAAK,SAAS;AAAA,QACrB,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,YAAI,KAAK,8DAAyD;AAAA,UAChE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AACD,aAAK,mBAAmB;AAAA,MAC1B;AAEA,WAAK,YAAY;AAAA,IACnB,SAAS,KAAK;AACZ,UAAI,MAAM,gCAAgC,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAChE,WAAK,YAAY;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,aAAa,OAAsB;AACjC,SAAK,YAAY;AAAA,EACnB;AAAA;AAAA,EAGA,oBAA6B;AAC3B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,WAAsB;AACpB,UAAM,YAAY,KAAK;AACvB,UAAM,QAAQ,KAAK;AACnB,WAAO;AAAA,MACL,MAAM,MAAO,QAAQ,KAAK,aAAa,UAAU;AAAA,MACjD,KAAK,CAAC,QAAgB,UAAU,IAAI,GAAG;AAAA,IACzC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,aAAa;AAAA,EAErB,MAAM,cAAc,KAAa,UAAiC;AAChE,QAAI,CAAC,KAAK,WAAW;AACnB,UAAI,MAAM,4CAA4C,EAAE,IAAI,CAAC;AAC7D;AAAA,IACF;AAEA,QAAI;AACF,YAAM,CAAC,MAAM,IAAI,MAAM,KAAK,SAAS,MAAM,CAAC,QAAQ,CAAC;AACrD,UAAI,CAAC,UAAU,OAAO,WAAW,GAAG;AAClC,YAAI,KAAK,mCAAmC,EAAE,IAAI,CAAC;AACnD;AAAA,MACF;AAEA,YAAM,SAAS,OAAO,KAAK,OAAO,QAAQ,OAAO,YAAY,OAAO,UAAU;AAC9E,YAAM,QAAQ,KAAK,SAAS;AAC5B,YAAM,OAAO,OAAO;AAEpB,UAAI;AACJ,UAAI;AACF,wBAAgB,aAAa,GAAG;AAAA,MAClC,QAAQ;AACN,wBAAgB;AAAA,MAClB;AAEA,2BAAqB,eAAe,QAAQ,OAAO,IAAI;AAEvD,UAAI,KAAK,OAAO;AACd,cAAM,SAAuB;AAAA,UAC3B,IAAI;AAAA,UACJ;AAAA,UACA,UAAU,EAAE,KAAK,eAAe,aAAa,IAAI,SAAS,MAAM;AAAA,QAClE;AACA,cAAM,KAAK,MAAM,OAAO,CAAC,MAAM,CAAC;AAChC,YAAI,CAAC,KAAK,UAAU,IAAI,aAAa,GAAG;AACtC,eAAK,UAAU,IAAI,aAAa;AAChC,eAAK,cAAc;AAAA,QACrB;AAAA,MACF;AAEA,UAAI,MAAM,uBAAuB,EAAE,KAAK,eAAe,KAAK,CAAC;AAAA,IAC/D,SAAS,KAAK;AACZ,UAAI,KAAK,wBAAwB,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,WAAW,KAAa,UAAwB;AAC9C,QAAI,CAAC,KAAK,UAAW;AAErB,SAAK,cAAc,KAAK,QAAQ,EAAE,MAAM,SAAO;AAC7C,UAAI,KAAK,0BAA0B,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,IAChE,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,YACJ,WACA,MACA,aAC0B;AAC1B,QAAI,CAAC,KAAK,aAAa,CAAC,KAAK,OAAO;AAClC,aAAO,CAAC;AAAA,IACV;AACA,QAAI,KAAK,eAAe,GAAG;AAGzB,UAAI;AACF,aAAK,aAAa,MAAM,KAAK,MAAM,KAAK;AAAA,MAC1C,QAAQ;AACN,aAAK,aAAa;AAAA,MACpB;AACA,UAAI,KAAK,eAAe,EAAG,QAAO,CAAC;AAAA,IACrC;AAEA,QAAI;AACF,YAAM,CAAC,WAAW,IAAI,MAAM,KAAK,SAAS,MAAM,CAAC,SAAS,CAAC;AAC3D,UAAI,CAAC,eAAe,YAAY,WAAW,GAAG;AAC5C,YAAI,KAAK,sCAAsC;AAC/C,eAAO,CAAC;AAAA,MACV;AAEA,YAAM,WAAW,eAAe,YAAY,OAAO,IAC/C,KAAK,IAAI,OAAO,YAAY,MAAM,OAAO,CAAC,IAC1C;AACJ,YAAM,OAAO,MAAM,KAAK,MAAM,OAAO,aAAa,QAAQ;AAE1D,YAAM,UAA2B,CAAC;AAClC,iBAAW,OAAO,MAAM;AACtB,YAAI,aAAa,IAAI,IAAI,EAAE,EAAG;AAC9B,gBAAQ,KAAK,EAAE,KAAK,IAAI,IAAI,OAAO,IAAI,MAAM,CAAC;AAC9C,YAAI,QAAQ,UAAU,KAAM;AAAA,MAC9B;AACA,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,UAAI,KAAK,sBAAsB,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AACrD,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA,EAEA,WAAiB;AACf,QAAI;AACF,WAAK,UAAU,MAAM;AACrB,WAAK,aAAa;AAClB,WAAK,QAAQ;AACb,WAAK,YAAY;AACjB,WAAK,mBAAmB;AACxB,UAAI,KAAK,4BAA4B;AAAA,IACvC,SAAS,KAAK;AACZ,UAAI,MAAM,mCAAmC,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,IACrE;AAAA,EACF;AAAA,EAEA,MAAc,0BAAyC;AACrD,QAAI,CAAC,KAAK,MAAO;AACjB,UAAM,SAAS,iBAAiB,KAAK,SAAS,OAAO;AACrD,QAAI,OAAO,WAAW,GAAG;AACvB,WAAK,aAAa;AAClB;AAAA,IACF;AAEA,UAAM,UAA0B,CAAC;AACjC,eAAW,OAAO,QAAQ;AACxB,UAAI,CAAC,IAAI,aAAa,IAAI,QAAQ,EAAG;AACrC,UAAI;AACF,cAAM,SAAS,IAAI;AAAA,UACjB,IAAI,UAAU,OAAO;AAAA,YACnB,IAAI,UAAU;AAAA,YACd,IAAI,UAAU,aAAa,IAAI,OAAO,aAAa;AAAA,UACrD;AAAA,QACF;AACA,gBAAQ,KAAK;AAAA,UACX,IAAI,IAAI;AAAA,UACR;AAAA,UACA,UAAU;AAAA,YACR,KAAK,IAAI;AAAA,YACT,aAAa;AAAA,YACb,SAAS,IAAI;AAAA,UACf;AAAA,QACF,CAAC;AACD,aAAK,UAAU,IAAI,IAAI,aAAa;AAAA,MACtC,SAAS,KAAK;AACZ,YAAI,KAAK,uDAAuD;AAAA,UAC9D,KAAK,IAAI;AAAA,UACT,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,QAAQ,WAAW,GAAG;AACxB,WAAK,aAAa;AAClB;AAAA,IACF;AAEA,QAAI,KAAK,8CAA8C,EAAE,OAAO,QAAQ,OAAO,CAAC;AAChF,UAAM,KAAK,MAAM,OAAO,OAAO;AAC/B,SAAK,aAAa,MAAM,KAAK,MAAM,KAAK;AAAA,EAC1C;AACF;AAEA,IAAI,iBAA0C;AAEvC,SAAS,sBAAwC;AACtD,MAAI,CAAC,gBAAgB;AACnB,qBAAiB,IAAI,iBAAiB;AAAA,EACxC;AACA,SAAO;AACT;AAEO,SAAS,wBAA8B;AAC5C,MAAI,gBAAgB;AAClB,mBAAe,SAAS;AACxB,qBAAiB;AAAA,EACnB;AACF;","names":[]}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { EmbedProvider } from '../providers/embed-provider.js';
|
|
2
|
+
/**
|
|
3
|
+
* Native ONNX embedding provider using fastembed-rs Node bindings.
|
|
4
|
+
*
|
|
5
|
+
* Model: BGE-small-en-v1.5 (384-dim). First call to `warmup()` downloads
|
|
6
|
+
* the ONNX model to `${dataDir}/fastembed`. Subsequent runs reuse the cache.
|
|
7
|
+
* Replaces the legacy sentence-transformers Python subprocess.
|
|
8
|
+
*/
|
|
9
|
+
export declare class FastembedEmbedProvider implements EmbedProvider {
|
|
10
|
+
private model;
|
|
11
|
+
private modelPromise;
|
|
12
|
+
readonly modelId: string;
|
|
13
|
+
readonly dim: number;
|
|
14
|
+
constructor();
|
|
15
|
+
warmup(): Promise<void>;
|
|
16
|
+
private getModel;
|
|
17
|
+
embed(texts: string[]): Promise<Float32Array[]>;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=fastembed-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fastembed-provider.d.ts","sourceRoot":"","sources":["../../src/embedding/fastembed-provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAMpE;;;;;;GAMG;AACH,qBAAa,sBAAuB,YAAW,aAAa;IAC1D,OAAO,CAAC,KAAK,CAA8B;IAC3C,OAAO,CAAC,YAAY,CAAuC;IAC3D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;;IAOf,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,QAAQ;IAmBV,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAWtD"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { FlagEmbedding, EmbeddingModel } from "fastembed";
|
|
3
|
+
import { getConfig } from "../config.js";
|
|
4
|
+
import { createLogger } from "../logger.js";
|
|
5
|
+
const log = createLogger("embedding");
|
|
6
|
+
class FastembedEmbedProvider {
|
|
7
|
+
model = null;
|
|
8
|
+
modelPromise = null;
|
|
9
|
+
modelId;
|
|
10
|
+
dim;
|
|
11
|
+
constructor() {
|
|
12
|
+
this.modelId = "BGE-small-en-v1.5";
|
|
13
|
+
this.dim = 384;
|
|
14
|
+
}
|
|
15
|
+
async warmup() {
|
|
16
|
+
await this.getModel();
|
|
17
|
+
}
|
|
18
|
+
getModel() {
|
|
19
|
+
if (this.model) return Promise.resolve(this.model);
|
|
20
|
+
if (this.modelPromise) return this.modelPromise;
|
|
21
|
+
log.info("Loading embedding model", { modelId: this.modelId });
|
|
22
|
+
const cacheDir = join(getConfig().dataDir, "fastembed");
|
|
23
|
+
this.modelPromise = FlagEmbedding.init({
|
|
24
|
+
model: EmbeddingModel.BGESmallENV15,
|
|
25
|
+
cacheDir
|
|
26
|
+
}).then((m) => {
|
|
27
|
+
this.model = m;
|
|
28
|
+
log.info("Embedding model ready", { modelId: this.modelId, dim: this.dim });
|
|
29
|
+
return m;
|
|
30
|
+
}).catch((err) => {
|
|
31
|
+
this.modelPromise = null;
|
|
32
|
+
throw err;
|
|
33
|
+
});
|
|
34
|
+
return this.modelPromise;
|
|
35
|
+
}
|
|
36
|
+
async embed(texts) {
|
|
37
|
+
if (texts.length === 0) return [];
|
|
38
|
+
const model = await this.getModel();
|
|
39
|
+
const out = [];
|
|
40
|
+
for await (const batch of model.embed(texts, texts.length)) {
|
|
41
|
+
for (const vec of batch) {
|
|
42
|
+
out.push(Float32Array.from(vec));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export {
|
|
49
|
+
FastembedEmbedProvider
|
|
50
|
+
};
|
|
51
|
+
//# sourceMappingURL=fastembed-provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/embedding/fastembed-provider.ts"],"sourcesContent":["import { join } from 'node:path';\nimport { FlagEmbedding, EmbeddingModel } from 'fastembed';\nimport type { EmbedProvider } from '../providers/embed-provider.js';\nimport { getConfig } from '../config.js';\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('embedding');\n\n/**\n * Native ONNX embedding provider using fastembed-rs Node bindings.\n *\n * Model: BGE-small-en-v1.5 (384-dim). First call to `warmup()` downloads\n * the ONNX model to `${dataDir}/fastembed`. Subsequent runs reuse the cache.\n * Replaces the legacy sentence-transformers Python subprocess.\n */\nexport class FastembedEmbedProvider implements EmbedProvider {\n private model: FlagEmbedding | null = null;\n private modelPromise: Promise<FlagEmbedding> | null = null;\n readonly modelId: string;\n readonly dim: number;\n\n constructor() {\n this.modelId = 'BGE-small-en-v1.5';\n this.dim = 384;\n }\n\n async warmup(): Promise<void> {\n await this.getModel();\n }\n\n private getModel(): Promise<FlagEmbedding> {\n if (this.model) return Promise.resolve(this.model);\n if (this.modelPromise) return this.modelPromise;\n log.info('Loading embedding model', { modelId: this.modelId });\n const cacheDir = join(getConfig().dataDir, 'fastembed');\n this.modelPromise = FlagEmbedding.init({\n model: EmbeddingModel.BGESmallENV15,\n cacheDir,\n }).then(m => {\n this.model = m;\n log.info('Embedding model ready', { modelId: this.modelId, dim: this.dim });\n return m;\n }).catch(err => {\n this.modelPromise = null;\n throw err;\n });\n return this.modelPromise;\n }\n\n async embed(texts: string[]): Promise<Float32Array[]> {\n if (texts.length === 0) return [];\n const model = await this.getModel();\n const out: Float32Array[] = [];\n for await (const batch of model.embed(texts, texts.length)) {\n for (const vec of batch) {\n out.push(Float32Array.from(vec));\n }\n }\n return out;\n }\n}\n"],"mappings":"AAAA,SAAS,YAAY;AACrB,SAAS,eAAe,sBAAsB;AAE9C,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,WAAW;AAS7B,MAAM,uBAAgD;AAAA,EACnD,QAA8B;AAAA,EAC9B,eAA8C;AAAA,EAC7C;AAAA,EACA;AAAA,EAET,cAAc;AACZ,SAAK,UAAU;AACf,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,SAAwB;AAC5B,UAAM,KAAK,SAAS;AAAA,EACtB;AAAA,EAEQ,WAAmC;AACzC,QAAI,KAAK,MAAO,QAAO,QAAQ,QAAQ,KAAK,KAAK;AACjD,QAAI,KAAK,aAAc,QAAO,KAAK;AACnC,QAAI,KAAK,2BAA2B,EAAE,SAAS,KAAK,QAAQ,CAAC;AAC7D,UAAM,WAAW,KAAK,UAAU,EAAE,SAAS,WAAW;AACtD,SAAK,eAAe,cAAc,KAAK;AAAA,MACrC,OAAO,eAAe;AAAA,MACtB;AAAA,IACF,CAAC,EAAE,KAAK,OAAK;AACX,WAAK,QAAQ;AACb,UAAI,KAAK,yBAAyB,EAAE,SAAS,KAAK,SAAS,KAAK,KAAK,IAAI,CAAC;AAC1E,aAAO;AAAA,IACT,CAAC,EAAE,MAAM,SAAO;AACd,WAAK,eAAe;AACpB,YAAM;AAAA,IACR,CAAC;AACD,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,OAA0C;AACpD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAChC,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,MAAsB,CAAC;AAC7B,qBAAiB,SAAS,MAAM,MAAM,OAAO,MAAM,MAAM,GAAG;AAC1D,iBAAW,OAAO,OAAO;AACvB,YAAI,KAAK,aAAa,KAAK,GAAG,CAAC;AAAA,MACjC;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract key terms from page content and title for FTS5 query building.
|
|
3
|
+
* Prioritizes: title words > headings > bold text > first paragraph.
|
|
4
|
+
* Returns up to 20 deduplicated, lowercased, stopword-free terms.
|
|
5
|
+
*/
|
|
6
|
+
export declare function extractKeyTerms(content: string, title: string): string[];
|
|
7
|
+
export declare function removeStopwords(words: string[]): string[];
|
|
8
|
+
export declare function extractHeadings(content: string): string[];
|
|
9
|
+
export declare function extractBoldText(content: string): string[];
|
|
10
|
+
export declare function extractFirstParagraph(content: string): string;
|
|
11
|
+
export declare function buildFTS5Query(terms: string[]): string;
|
|
12
|
+
//# sourceMappingURL=key-terms.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"key-terms.d.ts","sourceRoot":"","sources":["../../src/embedding/key-terms.ts"],"names":[],"mappings":"AAwBA;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAkDxE;AAiBD,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAEzD;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAqB7D;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAKtD"}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import { createLogger } from "../logger.js";
|
|
2
|
+
const log = createLogger("embedding");
|
|
3
|
+
const STOPWORDS = /* @__PURE__ */ new Set([
|
|
4
|
+
"a",
|
|
5
|
+
"an",
|
|
6
|
+
"the",
|
|
7
|
+
"and",
|
|
8
|
+
"or",
|
|
9
|
+
"but",
|
|
10
|
+
"in",
|
|
11
|
+
"on",
|
|
12
|
+
"at",
|
|
13
|
+
"to",
|
|
14
|
+
"for",
|
|
15
|
+
"of",
|
|
16
|
+
"with",
|
|
17
|
+
"by",
|
|
18
|
+
"from",
|
|
19
|
+
"is",
|
|
20
|
+
"it",
|
|
21
|
+
"as",
|
|
22
|
+
"be",
|
|
23
|
+
"was",
|
|
24
|
+
"were",
|
|
25
|
+
"been",
|
|
26
|
+
"are",
|
|
27
|
+
"am",
|
|
28
|
+
"has",
|
|
29
|
+
"have",
|
|
30
|
+
"had",
|
|
31
|
+
"do",
|
|
32
|
+
"does",
|
|
33
|
+
"did",
|
|
34
|
+
"will",
|
|
35
|
+
"would",
|
|
36
|
+
"could",
|
|
37
|
+
"should",
|
|
38
|
+
"may",
|
|
39
|
+
"might",
|
|
40
|
+
"can",
|
|
41
|
+
"shall",
|
|
42
|
+
"not",
|
|
43
|
+
"no",
|
|
44
|
+
"nor",
|
|
45
|
+
"so",
|
|
46
|
+
"yet",
|
|
47
|
+
"both",
|
|
48
|
+
"each",
|
|
49
|
+
"few",
|
|
50
|
+
"more",
|
|
51
|
+
"most",
|
|
52
|
+
"other",
|
|
53
|
+
"some",
|
|
54
|
+
"such",
|
|
55
|
+
"than",
|
|
56
|
+
"too",
|
|
57
|
+
"very",
|
|
58
|
+
"just",
|
|
59
|
+
"about",
|
|
60
|
+
"above",
|
|
61
|
+
"after",
|
|
62
|
+
"again",
|
|
63
|
+
"all",
|
|
64
|
+
"also",
|
|
65
|
+
"any",
|
|
66
|
+
"because",
|
|
67
|
+
"before",
|
|
68
|
+
"below",
|
|
69
|
+
"between",
|
|
70
|
+
"during",
|
|
71
|
+
"further",
|
|
72
|
+
"here",
|
|
73
|
+
"how",
|
|
74
|
+
"into",
|
|
75
|
+
"its",
|
|
76
|
+
"itself",
|
|
77
|
+
"me",
|
|
78
|
+
"my",
|
|
79
|
+
"myself",
|
|
80
|
+
"once",
|
|
81
|
+
"only",
|
|
82
|
+
"our",
|
|
83
|
+
"ours",
|
|
84
|
+
"ourselves",
|
|
85
|
+
"out",
|
|
86
|
+
"over",
|
|
87
|
+
"own",
|
|
88
|
+
"same",
|
|
89
|
+
"she",
|
|
90
|
+
"he",
|
|
91
|
+
"her",
|
|
92
|
+
"him",
|
|
93
|
+
"his",
|
|
94
|
+
"hers",
|
|
95
|
+
"that",
|
|
96
|
+
"their",
|
|
97
|
+
"theirs",
|
|
98
|
+
"them",
|
|
99
|
+
"themselves",
|
|
100
|
+
"then",
|
|
101
|
+
"there",
|
|
102
|
+
"these",
|
|
103
|
+
"they",
|
|
104
|
+
"this",
|
|
105
|
+
"those",
|
|
106
|
+
"through",
|
|
107
|
+
"under",
|
|
108
|
+
"until",
|
|
109
|
+
"up",
|
|
110
|
+
"we",
|
|
111
|
+
"what",
|
|
112
|
+
"when",
|
|
113
|
+
"where",
|
|
114
|
+
"which",
|
|
115
|
+
"while",
|
|
116
|
+
"who",
|
|
117
|
+
"whom",
|
|
118
|
+
"why",
|
|
119
|
+
"you",
|
|
120
|
+
"your",
|
|
121
|
+
"yours",
|
|
122
|
+
"yourself",
|
|
123
|
+
"i",
|
|
124
|
+
"if"
|
|
125
|
+
]);
|
|
126
|
+
const MAX_TERMS = 20;
|
|
127
|
+
const FIRST_PARAGRAPH_MAX_CHARS = 200;
|
|
128
|
+
function extractKeyTerms(content, title) {
|
|
129
|
+
try {
|
|
130
|
+
if (!content.trim() && !title.trim()) {
|
|
131
|
+
return [];
|
|
132
|
+
}
|
|
133
|
+
const allTerms = [];
|
|
134
|
+
const titleWords = tokenize(title);
|
|
135
|
+
allTerms.push(...titleWords);
|
|
136
|
+
const headings = extractHeadings(content);
|
|
137
|
+
for (const heading of headings) {
|
|
138
|
+
allTerms.push(...tokenize(heading));
|
|
139
|
+
}
|
|
140
|
+
const boldPhrases = extractBoldText(content);
|
|
141
|
+
for (const phrase of boldPhrases) {
|
|
142
|
+
allTerms.push(...tokenize(phrase));
|
|
143
|
+
}
|
|
144
|
+
const firstPara = extractFirstParagraph(content);
|
|
145
|
+
if (firstPara) {
|
|
146
|
+
allTerms.push(...tokenize(firstPara));
|
|
147
|
+
}
|
|
148
|
+
const cleaned = removeStopwords(allTerms);
|
|
149
|
+
const seen = /* @__PURE__ */ new Set();
|
|
150
|
+
const unique = [];
|
|
151
|
+
for (const term of cleaned) {
|
|
152
|
+
if (!seen.has(term)) {
|
|
153
|
+
seen.add(term);
|
|
154
|
+
unique.push(term);
|
|
155
|
+
}
|
|
156
|
+
if (unique.length >= MAX_TERMS) break;
|
|
157
|
+
}
|
|
158
|
+
log.debug("extracted key terms", {
|
|
159
|
+
titleTerms: titleWords.length,
|
|
160
|
+
headingTerms: headings.length,
|
|
161
|
+
boldTerms: boldPhrases.length,
|
|
162
|
+
uniqueTerms: unique.length
|
|
163
|
+
});
|
|
164
|
+
return unique;
|
|
165
|
+
} catch (err) {
|
|
166
|
+
log.error("key term extraction failed", { error: String(err) });
|
|
167
|
+
return [];
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
function tokenize(text) {
|
|
171
|
+
if (!text) return [];
|
|
172
|
+
let cleaned = text.replace(/https?:\/\/[^\s)]+/g, "");
|
|
173
|
+
cleaned = cleaned.replace(/```[\s\S]*?```/g, "");
|
|
174
|
+
cleaned = cleaned.replace(/`[^`]+`/g, "");
|
|
175
|
+
cleaned = cleaned.replace(/^#{1,6}\s+/gm, "");
|
|
176
|
+
cleaned = cleaned.replace(/\*{1,3}|_{1,3}/g, "");
|
|
177
|
+
return cleaned.toLowerCase().split(/[^a-z0-9-]+/).filter((w) => w.length > 1 && !/^\d+$/.test(w));
|
|
178
|
+
}
|
|
179
|
+
function removeStopwords(words) {
|
|
180
|
+
return words.filter((w) => w.length > 1 && !STOPWORDS.has(w.toLowerCase()));
|
|
181
|
+
}
|
|
182
|
+
function extractHeadings(content) {
|
|
183
|
+
if (!content) return [];
|
|
184
|
+
const headings = [];
|
|
185
|
+
const regex = /^#{1,3}\s+(.+)$/gm;
|
|
186
|
+
let match;
|
|
187
|
+
while ((match = regex.exec(content)) !== null) {
|
|
188
|
+
const text = match[1].trim();
|
|
189
|
+
if (text) headings.push(text);
|
|
190
|
+
}
|
|
191
|
+
return headings;
|
|
192
|
+
}
|
|
193
|
+
function extractBoldText(content) {
|
|
194
|
+
if (!content) return [];
|
|
195
|
+
const bold = [];
|
|
196
|
+
const regex = /\*\*(.+?)\*\*|__(.+?)__/g;
|
|
197
|
+
let match;
|
|
198
|
+
while ((match = regex.exec(content)) !== null) {
|
|
199
|
+
const text = (match[1] || match[2]).trim();
|
|
200
|
+
if (text) bold.push(text);
|
|
201
|
+
}
|
|
202
|
+
return bold;
|
|
203
|
+
}
|
|
204
|
+
function extractFirstParagraph(content) {
|
|
205
|
+
if (!content) return "";
|
|
206
|
+
const lines = content.split("\n");
|
|
207
|
+
const paragraphLines = [];
|
|
208
|
+
for (const line of lines) {
|
|
209
|
+
const trimmed = line.trim();
|
|
210
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
211
|
+
if (trimmed.startsWith("```")) continue;
|
|
212
|
+
paragraphLines.push(trimmed);
|
|
213
|
+
const joined = paragraphLines.join(" ");
|
|
214
|
+
if (joined.length >= FIRST_PARAGRAPH_MAX_CHARS) {
|
|
215
|
+
return joined.slice(0, FIRST_PARAGRAPH_MAX_CHARS);
|
|
216
|
+
}
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
return paragraphLines.join(" ").slice(0, FIRST_PARAGRAPH_MAX_CHARS);
|
|
220
|
+
}
|
|
221
|
+
function buildFTS5Query(terms) {
|
|
222
|
+
if (terms.length === 0) return "";
|
|
223
|
+
const escaped = terms.map((t) => t.replace(/['"()]/g, ""));
|
|
224
|
+
return escaped.filter(Boolean).join(" OR ");
|
|
225
|
+
}
|
|
226
|
+
export {
|
|
227
|
+
buildFTS5Query,
|
|
228
|
+
extractBoldText,
|
|
229
|
+
extractFirstParagraph,
|
|
230
|
+
extractHeadings,
|
|
231
|
+
extractKeyTerms,
|
|
232
|
+
removeStopwords
|
|
233
|
+
};
|
|
234
|
+
//# sourceMappingURL=key-terms.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/embedding/key-terms.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\n\nconst log = createLogger('embedding');\n\nconst STOPWORDS = new Set([\n 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',\n 'of', 'with', 'by', 'from', 'is', 'it', 'as', 'be', 'was', 'were',\n 'been', 'are', 'am', 'has', 'have', 'had', 'do', 'does', 'did', 'will',\n 'would', 'could', 'should', 'may', 'might', 'can', 'shall', 'not', 'no',\n 'nor', 'so', 'yet', 'both', 'each', 'few', 'more', 'most', 'other',\n 'some', 'such', 'than', 'too', 'very', 'just', 'about', 'above', 'after',\n 'again', 'all', 'also', 'any', 'because', 'before', 'below', 'between',\n 'during', 'further', 'here', 'how', 'into', 'its', 'itself', 'me', 'my',\n 'myself', 'once', 'only', 'our', 'ours', 'ourselves', 'out', 'over',\n 'own', 'same', 'she', 'he', 'her', 'him', 'his', 'hers', 'that', 'their',\n 'theirs', 'them', 'themselves', 'then', 'there', 'these', 'they', 'this',\n 'those', 'through', 'under', 'until', 'up', 'we', 'what', 'when', 'where',\n 'which', 'while', 'who', 'whom', 'why', 'you', 'your', 'yours', 'yourself',\n 'i', 'if',\n]);\n\nconst MAX_TERMS = 20;\nconst FIRST_PARAGRAPH_MAX_CHARS = 200;\n\n/**\n * Extract key terms from page content and title for FTS5 query building.\n * Prioritizes: title words > headings > bold text > first paragraph.\n * Returns up to 20 deduplicated, lowercased, stopword-free terms.\n */\nexport function extractKeyTerms(content: string, title: string): string[] {\n try {\n if (!content.trim() && !title.trim()) {\n return [];\n }\n\n const allTerms: string[] = [];\n\n const titleWords = tokenize(title);\n allTerms.push(...titleWords);\n\n const headings = extractHeadings(content);\n for (const heading of headings) {\n allTerms.push(...tokenize(heading));\n }\n\n const boldPhrases = extractBoldText(content);\n for (const phrase of boldPhrases) {\n allTerms.push(...tokenize(phrase));\n }\n\n const firstPara = extractFirstParagraph(content);\n if (firstPara) {\n allTerms.push(...tokenize(firstPara));\n }\n\n const cleaned = removeStopwords(allTerms);\n const seen = new Set<string>();\n const unique: string[] = [];\n\n for (const term of cleaned) {\n if (!seen.has(term)) {\n seen.add(term);\n unique.push(term);\n }\n if (unique.length >= MAX_TERMS) break;\n }\n\n log.debug('extracted key terms', {\n titleTerms: titleWords.length,\n headingTerms: headings.length,\n boldTerms: boldPhrases.length,\n uniqueTerms: unique.length,\n });\n\n return unique;\n } catch (err) {\n log.error('key term extraction failed', { error: String(err) });\n return [];\n }\n}\n\nfunction tokenize(text: string): string[] {\n if (!text) return [];\n\n let cleaned = text.replace(/https?:\\/\\/[^\\s)]+/g, '');\n cleaned = cleaned.replace(/```[\\s\\S]*?```/g, '');\n cleaned = cleaned.replace(/`[^`]+`/g, '');\n cleaned = cleaned.replace(/^#{1,6}\\s+/gm, '');\n cleaned = cleaned.replace(/\\*{1,3}|_{1,3}/g, '');\n\n return cleaned\n .toLowerCase()\n .split(/[^a-z0-9-]+/)\n .filter(w => w.length > 1 && !/^\\d+$/.test(w));\n}\n\nexport function removeStopwords(words: string[]): string[] {\n return words.filter(w => w.length > 1 && !STOPWORDS.has(w.toLowerCase()));\n}\n\nexport function extractHeadings(content: string): string[] {\n if (!content) return [];\n\n const headings: string[] = [];\n const regex = /^#{1,3}\\s+(.+)$/gm;\n let match: RegExpExecArray | null;\n\n while ((match = regex.exec(content)) !== null) {\n const text = match[1].trim();\n if (text) headings.push(text);\n }\n\n return headings;\n}\n\nexport function extractBoldText(content: string): string[] {\n if (!content) return [];\n\n const bold: string[] = [];\n const regex = /\\*\\*(.+?)\\*\\*|__(.+?)__/g;\n let match: RegExpExecArray | null;\n\n while ((match = regex.exec(content)) !== null) {\n const text = (match[1] || match[2]).trim();\n if (text) bold.push(text);\n }\n\n return bold;\n}\n\nexport function extractFirstParagraph(content: string): string {\n if (!content) return '';\n\n const lines = content.split('\\n');\n const paragraphLines: string[] = [];\n\n for (const line of lines) {\n const trimmed = line.trim();\n if (!trimmed || trimmed.startsWith('#')) continue;\n if (trimmed.startsWith('```')) continue;\n\n paragraphLines.push(trimmed);\n const joined = paragraphLines.join(' ');\n if (joined.length >= FIRST_PARAGRAPH_MAX_CHARS) {\n return joined.slice(0, FIRST_PARAGRAPH_MAX_CHARS);\n }\n\n break;\n }\n\n return paragraphLines.join(' ').slice(0, FIRST_PARAGRAPH_MAX_CHARS);\n}\n\nexport function buildFTS5Query(terms: string[]): string {\n if (terms.length === 0) return '';\n\n const escaped = terms.map(t => t.replace(/['\"()]/g, ''));\n return escaped.filter(Boolean).join(' OR ');\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,WAAW;AAEpC,MAAM,YAAY,oBAAI,IAAI;AAAA,EACxB;AAAA,EAAK;AAAA,EAAM;AAAA,EAAO;AAAA,EAAO;AAAA,EAAM;AAAA,EAAO;AAAA,EAAM;AAAA,EAAM;AAAA,EAAM;AAAA,EAAM;AAAA,EAC9D;AAAA,EAAM;AAAA,EAAQ;AAAA,EAAM;AAAA,EAAQ;AAAA,EAAM;AAAA,EAAM;AAAA,EAAM;AAAA,EAAM;AAAA,EAAO;AAAA,EAC3D;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAM;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAM;AAAA,EAAQ;AAAA,EAAO;AAAA,EAChE;AAAA,EAAS;AAAA,EAAS;AAAA,EAAU;AAAA,EAAO;AAAA,EAAS;AAAA,EAAO;AAAA,EAAS;AAAA,EAAO;AAAA,EACnE;AAAA,EAAO;AAAA,EAAM;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAC3D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAS;AAAA,EACjE;AAAA,EAAS;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAW;AAAA,EAAU;AAAA,EAAS;AAAA,EAC7D;AAAA,EAAU;AAAA,EAAW;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAU;AAAA,EAAM;AAAA,EACnE;AAAA,EAAU;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAa;AAAA,EAAO;AAAA,EAC7D;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAM;AAAA,EAAO;AAAA,EAAO;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAQ;AAAA,EACjE;AAAA,EAAU;AAAA,EAAQ;AAAA,EAAc;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAS;AAAA,EAAQ;AAAA,EAClE;AAAA,EAAS;AAAA,EAAW;AAAA,EAAS;AAAA,EAAS;AAAA,EAAM;AAAA,EAAM;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAClE;AAAA,EAAS;AAAA,EAAS;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAO;AAAA,EAAQ;AAAA,EAAS;AAAA,EAChE;AAAA,EAAK;AACP,CAAC;AAED,MAAM,YAAY;AAClB,MAAM,4BAA4B;AAO3B,SAAS,gBAAgB,SAAiB,OAAyB;AACxE,MAAI;AACF,QAAI,CAAC,QAAQ,KAAK,KAAK,CAAC,MAAM,KAAK,GAAG;AACpC,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,WAAqB,CAAC;AAE5B,UAAM,aAAa,SAAS,KAAK;AACjC,aAAS,KAAK,GAAG,UAAU;AAE3B,UAAM,WAAW,gBAAgB,OAAO;AACxC,eAAW,WAAW,UAAU;AAC9B,eAAS,KAAK,GAAG,SAAS,OAAO,CAAC;AAAA,IACpC;AAEA,UAAM,cAAc,gBAAgB,OAAO;AAC3C,eAAW,UAAU,aAAa;AAChC,eAAS,KAAK,GAAG,SAAS,MAAM,CAAC;AAAA,IACnC;AAEA,UAAM,YAAY,sBAAsB,OAAO;AAC/C,QAAI,WAAW;AACb,eAAS,KAAK,GAAG,SAAS,SAAS,CAAC;AAAA,IACtC;AAEA,UAAM,UAAU,gBAAgB,QAAQ;AACxC,UAAM,OAAO,oBAAI,IAAY;AAC7B,UAAM,SAAmB,CAAC;AAE1B,eAAW,QAAQ,SAAS;AAC1B,UAAI,CAAC,KAAK,IAAI,IAAI,GAAG;AACnB,aAAK,IAAI,IAAI;AACb,eAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI,OAAO,UAAU,UAAW;AAAA,IAClC;AAEA,QAAI,MAAM,uBAAuB;AAAA,MAC/B,YAAY,WAAW;AAAA,MACvB,cAAc,SAAS;AAAA,MACvB,WAAW,YAAY;AAAA,MACvB,aAAa,OAAO;AAAA,IACtB,CAAC;AAED,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,8BAA8B,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAC9D,WAAO,CAAC;AAAA,EACV;AACF;AAEA,SAAS,SAAS,MAAwB;AACxC,MAAI,CAAC,KAAM,QAAO,CAAC;AAEnB,MAAI,UAAU,KAAK,QAAQ,uBAAuB,EAAE;AACpD,YAAU,QAAQ,QAAQ,mBAAmB,EAAE;AAC/C,YAAU,QAAQ,QAAQ,YAAY,EAAE;AACxC,YAAU,QAAQ,QAAQ,gBAAgB,EAAE;AAC5C,YAAU,QAAQ,QAAQ,mBAAmB,EAAE;AAE/C,SAAO,QACJ,YAAY,EACZ,MAAM,aAAa,EACnB,OAAO,OAAK,EAAE,SAAS,KAAK,CAAC,QAAQ,KAAK,CAAC,CAAC;AACjD;AAEO,SAAS,gBAAgB,OAA2B;AACzD,SAAO,MAAM,OAAO,OAAK,EAAE,SAAS,KAAK,CAAC,UAAU,IAAI,EAAE,YAAY,CAAC,CAAC;AAC1E;AAEO,SAAS,gBAAgB,SAA2B;AACzD,MAAI,CAAC,QAAS,QAAO,CAAC;AAEtB,QAAM,WAAqB,CAAC;AAC5B,QAAM,QAAQ;AACd,MAAI;AAEJ,UAAQ,QAAQ,MAAM,KAAK,OAAO,OAAO,MAAM;AAC7C,UAAM,OAAO,MAAM,CAAC,EAAE,KAAK;AAC3B,QAAI,KAAM,UAAS,KAAK,IAAI;AAAA,EAC9B;AAEA,SAAO;AACT;AAEO,SAAS,gBAAgB,SAA2B;AACzD,MAAI,CAAC,QAAS,QAAO,CAAC;AAEtB,QAAM,OAAiB,CAAC;AACxB,QAAM,QAAQ;AACd,MAAI;AAEJ,UAAQ,QAAQ,MAAM,KAAK,OAAO,OAAO,MAAM;AAC7C,UAAM,QAAQ,MAAM,CAAC,KAAK,MAAM,CAAC,GAAG,KAAK;AACzC,QAAI,KAAM,MAAK,KAAK,IAAI;AAAA,EAC1B;AAEA,SAAO;AACT;AAEO,SAAS,sBAAsB,SAAyB;AAC7D,MAAI,CAAC,QAAS,QAAO;AAErB,QAAM,QAAQ,QAAQ,MAAM,IAAI;AAChC,QAAM,iBAA2B,CAAC;AAElC,aAAW,QAAQ,OAAO;AACxB,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,WAAW,QAAQ,WAAW,GAAG,EAAG;AACzC,QAAI,QAAQ,WAAW,KAAK,EAAG;AAE/B,mBAAe,KAAK,OAAO;AAC3B,UAAM,SAAS,eAAe,KAAK,GAAG;AACtC,QAAI,OAAO,UAAU,2BAA2B;AAC9C,aAAO,OAAO,MAAM,GAAG,yBAAyB;AAAA,IAClD;AAEA;AAAA,EACF;AAEA,SAAO,eAAe,KAAK,GAAG,EAAE,MAAM,GAAG,yBAAyB;AACpE;AAEO,SAAS,eAAe,OAAyB;AACtD,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,QAAM,UAAU,MAAM,IAAI,OAAK,EAAE,QAAQ,WAAW,EAAE,CAAC;AACvD,SAAO,QAAQ,OAAO,OAAO,EAAE,KAAK,MAAM;AAC5C;","names":[]}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export declare const BOILERPLATE_TEXT_EQUALITY: ReadonlyArray<string>;
|
|
2
|
+
export declare const BOILERPLATE_TEXT_PATTERNS: ReadonlyArray<RegExp>;
|
|
3
|
+
export declare const BOILERPLATE_SELECTORS: ReadonlyArray<string>;
|
|
4
|
+
export interface BoilerplateDocument {
|
|
5
|
+
querySelectorAll(selector: string): ArrayLike<BoilerplateElement>;
|
|
6
|
+
}
|
|
7
|
+
interface BoilerplateElement {
|
|
8
|
+
parentNode: {
|
|
9
|
+
removeChild(child: BoilerplateElement): void;
|
|
10
|
+
} | null;
|
|
11
|
+
}
|
|
12
|
+
export declare function stripBoilerplateMarkdown(md: string): string;
|
|
13
|
+
export declare function stripBoilerplateDom(document: BoilerplateDocument): void;
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=boilerplate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"boilerplate.d.ts","sourceRoot":"","sources":["../../src/extraction/boilerplate.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,yBAAyB,EAAE,aAAa,CAAC,MAAM,CAQ3D,CAAC;AAEF,eAAO,MAAM,yBAAyB,EAAE,aAAa,CAAC,MAAM,CAE3D,CAAC;AAEF,eAAO,MAAM,qBAAqB,EAAE,aAAa,CAAC,MAAM,CAUvD,CAAC;AAEF,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAC;CACnE;AAED,UAAU,kBAAkB;IAC1B,UAAU,EAAE;QAAE,WAAW,CAAC,KAAK,EAAE,kBAAkB,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI,CAAC;CACrE;AAED,wBAAgB,wBAAwB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAU3D;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CASvE"}
|