@staticn0va/wigolo 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +146 -227
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +238 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +47 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +364 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +344 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/shutdown.d.ts +2 -0
- package/dist/cli/shutdown.d.ts.map +1 -0
- package/dist/cli/shutdown.js +26 -0
- package/dist/cli/shutdown.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +34 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/error-describe.d.ts +7 -0
- package/dist/fetch/error-describe.d.ts.map +1 -0
- package/dist/fetch/error-describe.js +37 -0
- package/dist/fetch/error-describe.js.map +1 -0
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +187 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +102 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
- package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/model-select.js +32 -0
- package/dist/integrations/cloud/llm/model-select.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/run.d.ts +27 -0
- package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/run.js +99 -0
- package/dist/integrations/cloud/llm/run.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.js +103 -0
- package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +17 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +41 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +6 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +246 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +19 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +62 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.d.ts.map +1 -1
- package/dist/search/filters.js +58 -54
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +94 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +177 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +174 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { getConfig } from "../config.js";
|
|
2
|
+
import { callAnthropic } from "../integrations/cloud/llm/anthropic.js";
|
|
3
|
+
import { callOpenAI } from "../integrations/cloud/llm/openai.js";
|
|
4
|
+
import { callGemini } from "../integrations/cloud/llm/gemini.js";
|
|
5
|
+
import { callGroq } from "../integrations/cloud/llm/groq.js";
|
|
6
|
+
import {
|
|
7
|
+
ensureLLMCacheTable,
|
|
8
|
+
insertLLMCache,
|
|
9
|
+
lookupLLMCache
|
|
10
|
+
} from "../integrations/cloud/llm/cache.js";
|
|
11
|
+
import { hashPrompt, hashSchema } from "../integrations/cloud/llm/hash.js";
|
|
12
|
+
import { allProviders, providerEnvVar, selectProvider } from "../integrations/cloud/llm/select.js";
|
|
13
|
+
import { validateAgainstSchema } from "../integrations/cloud/llm/validate.js";
|
|
14
|
+
const MAX_HTML_BYTES = 5e4;
|
|
15
|
+
const ADAPTERS = {
|
|
16
|
+
anthropic: callAnthropic,
|
|
17
|
+
openai: callOpenAI,
|
|
18
|
+
gemini: callGemini,
|
|
19
|
+
groq: callGroq
|
|
20
|
+
};
|
|
21
|
+
async function extractWithLLM(input) {
|
|
22
|
+
if (input.missing.length === 0) {
|
|
23
|
+
return emptyResult(input.partial, []);
|
|
24
|
+
}
|
|
25
|
+
const cfg = getConfig();
|
|
26
|
+
const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };
|
|
27
|
+
if (budget.remaining <= 0) {
|
|
28
|
+
return emptyResult(input.partial, [
|
|
29
|
+
`LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`
|
|
30
|
+
]);
|
|
31
|
+
}
|
|
32
|
+
const provider = selectProvider(process.env);
|
|
33
|
+
if (!provider) {
|
|
34
|
+
const envList = allProviders().map((p) => providerEnvVar(p)).join(", ");
|
|
35
|
+
return emptyResult(input.partial, [
|
|
36
|
+
`LLM fallback skipped: no provider key set (${envList}). ${input.missing.length} required field(s) still missing: ${input.missing.join(", ")}.`
|
|
37
|
+
]);
|
|
38
|
+
}
|
|
39
|
+
const apiKey = process.env[providerEnvVar(provider)];
|
|
40
|
+
const prompt = buildPrompt(input);
|
|
41
|
+
const promptHash = hashPrompt(prompt);
|
|
42
|
+
const schemaHash = hashSchema(input.jsonSchema);
|
|
43
|
+
const modelId = `${provider}:default`;
|
|
44
|
+
ensureLLMCacheTable();
|
|
45
|
+
const cached = lookupLLMCache(modelId, promptHash, schemaHash);
|
|
46
|
+
if (cached) {
|
|
47
|
+
const values = JSON.parse(cached);
|
|
48
|
+
return {
|
|
49
|
+
values: mergeOnlyMissing(input.partial, values, input.missing),
|
|
50
|
+
provider,
|
|
51
|
+
model: modelId,
|
|
52
|
+
cached: true,
|
|
53
|
+
latencyMs: 0,
|
|
54
|
+
warnings: []
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
let result;
|
|
58
|
+
try {
|
|
59
|
+
result = await ADAPTERS[provider](
|
|
60
|
+
{ prompt, jsonSchema: input.jsonSchema, signal: input.signal },
|
|
61
|
+
apiKey
|
|
62
|
+
);
|
|
63
|
+
} catch (e) {
|
|
64
|
+
return emptyResult(input.partial, [
|
|
65
|
+
`LLM fallback (${provider}) failed: ${e.message}`
|
|
66
|
+
]);
|
|
67
|
+
} finally {
|
|
68
|
+
budget.remaining = Math.max(0, budget.remaining - 1);
|
|
69
|
+
}
|
|
70
|
+
const errors = validateAgainstSchema(result.values, input.jsonSchema);
|
|
71
|
+
if (errors.length > 0) {
|
|
72
|
+
return emptyResult(input.partial, [
|
|
73
|
+
`LLM fallback (${provider}) response failed schema validation: ${errors.map((e) => `${e.path} ${e.message}`).join("; ")}`
|
|
74
|
+
]);
|
|
75
|
+
}
|
|
76
|
+
const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1e3;
|
|
77
|
+
const now = Date.now();
|
|
78
|
+
insertLLMCache({
|
|
79
|
+
modelId,
|
|
80
|
+
promptHash,
|
|
81
|
+
schemaHash,
|
|
82
|
+
response: JSON.stringify(result.values),
|
|
83
|
+
createdAt: now,
|
|
84
|
+
expiresAt: now + ttlMs
|
|
85
|
+
});
|
|
86
|
+
return {
|
|
87
|
+
values: mergeOnlyMissing(input.partial, result.values, input.missing),
|
|
88
|
+
provider,
|
|
89
|
+
model: result.model,
|
|
90
|
+
cached: false,
|
|
91
|
+
latencyMs: result.latencyMs,
|
|
92
|
+
warnings: result.warnings ?? []
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
function emptyResult(partial, warnings) {
|
|
96
|
+
return {
|
|
97
|
+
values: { ...partial },
|
|
98
|
+
provider: "anthropic",
|
|
99
|
+
model: "",
|
|
100
|
+
cached: false,
|
|
101
|
+
latencyMs: 0,
|
|
102
|
+
warnings
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function mergeOnlyMissing(partial, filled, missing) {
|
|
106
|
+
const out = { ...partial };
|
|
107
|
+
for (const key of missing) {
|
|
108
|
+
if (filled[key] !== void 0) out[key] = filled[key];
|
|
109
|
+
}
|
|
110
|
+
return out;
|
|
111
|
+
}
|
|
112
|
+
function buildPrompt(input) {
|
|
113
|
+
const html = truncate(input.html, MAX_HTML_BYTES);
|
|
114
|
+
return [
|
|
115
|
+
"Extract the following missing fields from the HTML below.",
|
|
116
|
+
`Missing fields: ${input.missing.join(", ")}.`,
|
|
117
|
+
"Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.",
|
|
118
|
+
"",
|
|
119
|
+
"HTML:",
|
|
120
|
+
html
|
|
121
|
+
].join("\n");
|
|
122
|
+
}
|
|
123
|
+
function truncate(s, maxBytes) {
|
|
124
|
+
if (s.length <= maxBytes) return s;
|
|
125
|
+
return s.slice(0, maxBytes);
|
|
126
|
+
}
|
|
127
|
+
export {
|
|
128
|
+
extractWithLLM
|
|
129
|
+
};
|
|
130
|
+
//# sourceMappingURL=llm-fallback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/llm-fallback.ts"],"sourcesContent":["import { getConfig } from '../config.js';\nimport { callAnthropic } from '../integrations/cloud/llm/anthropic.js';\nimport { callOpenAI } from '../integrations/cloud/llm/openai.js';\nimport { callGemini } from '../integrations/cloud/llm/gemini.js';\nimport { callGroq } from '../integrations/cloud/llm/groq.js';\nimport {\n ensureLLMCacheTable,\n insertLLMCache,\n lookupLLMCache,\n} from '../integrations/cloud/llm/cache.js';\nimport { hashPrompt, hashSchema } from '../integrations/cloud/llm/hash.js';\nimport { allProviders, providerEnvVar, selectProvider } from '../integrations/cloud/llm/select.js';\nimport type { LLMExtractResult, LLMProvider } from '../integrations/cloud/llm/types.js';\nimport { validateAgainstSchema } from '../integrations/cloud/llm/validate.js';\n\nconst MAX_HTML_BYTES = 50_000;\n\nexport interface LLMFallbackBudget {\n remaining: number;\n}\n\nexport interface LLMFallbackInput {\n html: string;\n jsonSchema: Record<string, unknown>;\n partial: Record<string, unknown>;\n missing: string[];\n signal?: AbortSignal;\n budget?: LLMFallbackBudget;\n}\n\nexport interface LLMFallbackResult extends LLMExtractResult {\n warnings: string[];\n}\n\nconst ADAPTERS: Record<\n LLMProvider,\n (\n opts: { prompt: string; jsonSchema: Record<string, unknown>; signal?: AbortSignal },\n apiKey: string,\n ) => Promise<LLMExtractResult>\n> = {\n anthropic: callAnthropic,\n openai: callOpenAI,\n gemini: callGemini,\n groq: callGroq,\n};\n\nexport async function extractWithLLM(\n input: LLMFallbackInput,\n): Promise<LLMFallbackResult> {\n if (input.missing.length === 0) {\n return emptyResult(input.partial, []);\n }\n\n const cfg = getConfig();\n const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };\n if (budget.remaining <= 0) {\n return emptyResult(input.partial, [\n `LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`,\n ]);\n }\n\n const provider = selectProvider(process.env);\n if (!provider) {\n const envList = allProviders()\n .map((p) => providerEnvVar(p))\n .join(', ');\n return emptyResult(input.partial, [\n `LLM fallback skipped: no provider key set (${envList}). ` +\n `${input.missing.length} required field(s) still missing: ${input.missing.join(', ')}.`,\n ]);\n }\n\n const apiKey = process.env[providerEnvVar(provider)] as string;\n const prompt = buildPrompt(input);\n const promptHash = hashPrompt(prompt);\n const schemaHash = hashSchema(input.jsonSchema);\n const modelId = `${provider}:default`;\n\n ensureLLMCacheTable();\n const cached = lookupLLMCache(modelId, promptHash, schemaHash);\n if (cached) {\n const values = JSON.parse(cached) as Record<string, unknown>;\n return {\n values: mergeOnlyMissing(input.partial, values, input.missing),\n provider,\n model: modelId,\n cached: true,\n latencyMs: 0,\n warnings: [],\n };\n }\n\n let result: LLMExtractResult;\n try {\n result = await ADAPTERS[provider](\n { prompt, jsonSchema: input.jsonSchema, signal: input.signal },\n apiKey,\n );\n } catch (e) {\n return emptyResult(input.partial, [\n `LLM fallback (${provider}) failed: ${(e as Error).message}`,\n ]);\n } finally {\n budget.remaining = Math.max(0, budget.remaining - 1);\n }\n\n const errors = validateAgainstSchema(result.values, input.jsonSchema);\n if (errors.length > 0) {\n return emptyResult(input.partial, [\n `LLM fallback (${provider}) response failed schema validation: ${errors\n .map((e) => `${e.path} ${e.message}`)\n .join('; ')}`,\n ]);\n }\n\n const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1000;\n const now = Date.now();\n insertLLMCache({\n modelId,\n promptHash,\n schemaHash,\n response: JSON.stringify(result.values),\n createdAt: now,\n expiresAt: now + ttlMs,\n });\n\n return {\n values: mergeOnlyMissing(input.partial, result.values, input.missing),\n provider,\n model: result.model,\n cached: false,\n latencyMs: result.latencyMs,\n warnings: result.warnings ?? [],\n };\n}\n\nfunction emptyResult(\n partial: Record<string, unknown>,\n warnings: string[],\n): LLMFallbackResult {\n return {\n values: { ...partial },\n provider: 'anthropic',\n model: '',\n cached: false,\n latencyMs: 0,\n warnings,\n };\n}\n\nfunction mergeOnlyMissing(\n partial: Record<string, unknown>,\n filled: Record<string, unknown>,\n missing: string[],\n): Record<string, unknown> {\n const out = { ...partial };\n for (const key of missing) {\n if (filled[key] !== undefined) out[key] = filled[key];\n }\n return out;\n}\n\nfunction buildPrompt(input: LLMFallbackInput): string {\n const html = truncate(input.html, MAX_HTML_BYTES);\n return [\n 'Extract the following missing fields from the HTML below.',\n `Missing fields: ${input.missing.join(', ')}.`,\n 'Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.',\n '',\n 'HTML:',\n html,\n ].join('\\n');\n}\n\nfunction truncate(s: string, maxBytes: number): string {\n if (s.length <= maxBytes) return s;\n return s.slice(0, maxBytes);\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,qBAAqB;AAC9B,SAAS,kBAAkB;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,gBAAgB;AACzB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,kBAAkB;AACvC,SAAS,cAAc,gBAAgB,sBAAsB;AAE7D,SAAS,6BAA6B;AAEtC,MAAM,iBAAiB;AAmBvB,MAAM,WAMF;AAAA,EACF,WAAW;AAAA,EACX,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,MAAM;AACR;AAEA,eAAsB,eACpB,OAC4B;AAC5B,MAAI,MAAM,QAAQ,WAAW,GAAG;AAC9B,WAAO,YAAY,MAAM,SAAS,CAAC,CAAC;AAAA,EACtC;AAEA,QAAM,MAAM,UAAU;AACtB,QAAM,SAAS,MAAM,UAAU,EAAE,WAAW,IAAI,sBAAsB;AACtE,MAAI,OAAO,aAAa,GAAG;AACzB,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,2DAA2D,IAAI,qBAAqB;AAAA,IACtF,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,eAAe,QAAQ,GAAG;AAC3C,MAAI,CAAC,UAAU;AACb,UAAM,UAAU,aAAa,EAC1B,IAAI,CAAC,MAAM,eAAe,CAAC,CAAC,EAC5B,KAAK,IAAI;AACZ,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,8CAA8C,OAAO,MAChD,MAAM,QAAQ,MAAM,qCAAqC,MAAM,QAAQ,KAAK,IAAI,CAAC;AAAA,IACxF,CAAC;AAAA,EACH;AAEA,QAAM,SAAS,QAAQ,IAAI,eAAe,QAAQ,CAAC;AACnD,QAAM,SAAS,YAAY,KAAK;AAChC,QAAM,aAAa,WAAW,MAAM;AACpC,QAAM,aAAa,WAAW,MAAM,UAAU;AAC9C,QAAM,UAAU,GAAG,QAAQ;AAE3B,sBAAoB;AACpB,QAAM,SAAS,eAAe,SAAS,YAAY,UAAU;AAC7D,MAAI,QAAQ;AACV,UAAM,SAAS,KAAK,MAAM,MAAM;AAChC,WAAO;AAAA,MACL,QAAQ,iBAAiB,MAAM,SAAS,QAAQ,MAAM,OAAO;AAAA,MAC7D;AAAA,MACA,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,WAAW;AAAA,MACX,UAAU,CAAC;AAAA,IACb;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACF,aAAS,MAAM,SAAS,QAAQ;AAAA,MAC9B,EAAE,QAAQ,YAAY,MAAM,YAAY,QAAQ,MAAM,OAAO;AAAA,MAC7D;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,iBAAiB,QAAQ,aAAc,EAAY,OAAO;AAAA,IAC5D,CAAC;AAAA,EACH,UAAE;AACA,WAAO,YAAY,KAAK,IAAI,GAAG,OAAO,YAAY,CAAC;AAAA,EACrD;AAEA,QAAM,SAAS,sBAAsB,OAAO,QAAQ,MAAM,UAAU;AACpE,MAAI,OAAO,SAAS,GAAG;AACrB,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,iBAAiB,QAAQ,wCAAwC,OAC9D,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,EAAE,OAAO,EAAE,EACnC,KAAK,IAAI,CAAC;AAAA,IACf,CAAC;AAAA,EACH;AAEA,QAAM,QAAQ,IAAI,kBAAkB,KAAK,KAAK,KAAK;AACnD,QAAM,MAAM,KAAK,IAAI;AACrB,iBAAe;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,KAAK,UAAU,OAAO,MAAM;AAAA,IACtC,WAAW;AAAA,IACX,WAAW,MAAM;AAAA,EACnB,CAAC;AAED,SAAO;AAAA,IACL,QAAQ,iBAAiB,MAAM,SAAS,OAAO,QAAQ,MAAM,OAAO;AAAA,IACpE;AAAA,IACA,OAAO,OAAO;AAAA,IACd,QAAQ;AAAA,IACR,WAAW,OAAO;AAAA,IAClB,UAAU,OAAO,YAAY,CAAC;AAAA,EAChC;AACF;AAEA,SAAS,YACP,SACA,UACmB;AACnB,SAAO;AAAA,IACL,QAAQ,EAAE,GAAG,QAAQ;AAAA,IACrB,UAAU;AAAA,IACV,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX;AAAA,EACF;AACF;AAEA,SAAS,iBACP,SACA,QACA,SACyB;AACzB,QAAM,MAAM,EAAE,GAAG,QAAQ;AACzB,aAAW,OAAO,SAAS;AACzB,QAAI,OAAO,GAAG,MAAM,OAAW,KAAI,GAAG,IAAI,OAAO,GAAG;AAAA,EACtD;AACA,SAAO;AACT;AAEA,SAAS,YAAY,OAAiC;AACpD,QAAM,OAAO,SAAS,MAAM,MAAM,cAAc;AAChD,SAAO;AAAA,IACL;AAAA,IACA,mBAAmB,MAAM,QAAQ,KAAK,IAAI,CAAC;AAAA,IAC3C;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,SAAS,GAAW,UAA0B;AACrD,MAAI,EAAE,UAAU,SAAU,QAAO;AACjC,SAAO,EAAE,MAAM,GAAG,QAAQ;AAC5B;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-sanitize.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown-sanitize.ts"],"names":[],"mappings":"AAkFA,wBAAgB,yBAAyB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CA6D5D"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
const STRAY_LABELS = /* @__PURE__ */ new Set([
|
|
2
|
+
"javascript",
|
|
3
|
+
"typescript",
|
|
4
|
+
"mjs",
|
|
5
|
+
"cjs",
|
|
6
|
+
"json",
|
|
7
|
+
"html",
|
|
8
|
+
"css",
|
|
9
|
+
"bash",
|
|
10
|
+
"sh",
|
|
11
|
+
"shell",
|
|
12
|
+
"python",
|
|
13
|
+
"py",
|
|
14
|
+
"go",
|
|
15
|
+
"rust",
|
|
16
|
+
"java",
|
|
17
|
+
"kotlin",
|
|
18
|
+
"swift",
|
|
19
|
+
"cpp",
|
|
20
|
+
"c++",
|
|
21
|
+
"csharp",
|
|
22
|
+
"ruby",
|
|
23
|
+
"php"
|
|
24
|
+
]);
|
|
25
|
+
const GLUED_LANG_PREFIXES = {
|
|
26
|
+
ts: "ts",
|
|
27
|
+
js: "js",
|
|
28
|
+
tsx: "tsx",
|
|
29
|
+
jsx: "jsx",
|
|
30
|
+
py: "python",
|
|
31
|
+
rb: "ruby",
|
|
32
|
+
go: "go",
|
|
33
|
+
rs: "rust",
|
|
34
|
+
sh: "bash",
|
|
35
|
+
json: "json",
|
|
36
|
+
html: "html",
|
|
37
|
+
css: "css",
|
|
38
|
+
yaml: "yaml",
|
|
39
|
+
yml: "yaml",
|
|
40
|
+
toml: "toml",
|
|
41
|
+
md: "markdown"
|
|
42
|
+
};
|
|
43
|
+
const POST_PREFIX_TOKENS = [
|
|
44
|
+
"function",
|
|
45
|
+
"const",
|
|
46
|
+
"let",
|
|
47
|
+
"var",
|
|
48
|
+
"class",
|
|
49
|
+
"interface",
|
|
50
|
+
"type",
|
|
51
|
+
"enum",
|
|
52
|
+
"import",
|
|
53
|
+
"export",
|
|
54
|
+
"async",
|
|
55
|
+
"await",
|
|
56
|
+
"return",
|
|
57
|
+
"if",
|
|
58
|
+
"for",
|
|
59
|
+
"while",
|
|
60
|
+
"def",
|
|
61
|
+
"print",
|
|
62
|
+
"echo",
|
|
63
|
+
"package",
|
|
64
|
+
"public",
|
|
65
|
+
"private",
|
|
66
|
+
"protected",
|
|
67
|
+
"struct",
|
|
68
|
+
"fn",
|
|
69
|
+
"pub",
|
|
70
|
+
"use",
|
|
71
|
+
"mod"
|
|
72
|
+
];
|
|
73
|
+
function unglueLangPrefix(line) {
|
|
74
|
+
for (const [prefix, lang] of Object.entries(GLUED_LANG_PREFIXES)) {
|
|
75
|
+
if (!line.startsWith(prefix)) continue;
|
|
76
|
+
const rest = line.slice(prefix.length);
|
|
77
|
+
for (const tok of POST_PREFIX_TOKENS) {
|
|
78
|
+
if (rest.startsWith(tok)) {
|
|
79
|
+
const next = rest.charAt(tok.length);
|
|
80
|
+
if (next === "" || /[\s({<\[]/.test(next)) {
|
|
81
|
+
return { lang, line: rest };
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
function isFenceLine(line) {
|
|
89
|
+
const m = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\s*$/);
|
|
90
|
+
if (!m) return { open: false, close: false };
|
|
91
|
+
const lang = m[2]?.trim() || void 0;
|
|
92
|
+
return { open: !!lang, close: !lang, lang };
|
|
93
|
+
}
|
|
94
|
+
function sanitizeExtractedMarkdown(md) {
|
|
95
|
+
if (!md.includes("```") && !md.includes("~~~")) return md;
|
|
96
|
+
const lines = md.split("\n");
|
|
97
|
+
const out = [];
|
|
98
|
+
let inFence = false;
|
|
99
|
+
let fenceMarker = null;
|
|
100
|
+
let pendingFirstContentLine = false;
|
|
101
|
+
for (let i = 0; i < lines.length; i++) {
|
|
102
|
+
const line = lines[i];
|
|
103
|
+
if (!inFence) {
|
|
104
|
+
const f = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\s*$/);
|
|
105
|
+
if (f) {
|
|
106
|
+
inFence = true;
|
|
107
|
+
fenceMarker = f[1];
|
|
108
|
+
pendingFirstContentLine = true;
|
|
109
|
+
const declaredLang = f[2];
|
|
110
|
+
if (declaredLang === "markdown") {
|
|
111
|
+
out.push(fenceMarker);
|
|
112
|
+
} else {
|
|
113
|
+
out.push(line);
|
|
114
|
+
}
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
out.push(line);
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
if (fenceMarker && line.startsWith(fenceMarker) && line.replace(fenceMarker, "").trim() === "") {
|
|
121
|
+
inFence = false;
|
|
122
|
+
fenceMarker = null;
|
|
123
|
+
pendingFirstContentLine = false;
|
|
124
|
+
out.push(line);
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (pendingFirstContentLine) {
|
|
128
|
+
pendingFirstContentLine = false;
|
|
129
|
+
const unglued = unglueLangPrefix(line);
|
|
130
|
+
if (unglued) {
|
|
131
|
+
const lastIdx = out.length - 1;
|
|
132
|
+
const prev = out[lastIdx];
|
|
133
|
+
if (prev.startsWith("```") || prev.startsWith("~~~")) {
|
|
134
|
+
out[lastIdx] = `${prev.match(/^(```+|~~~+)/)[1]}${unglued.lang ?? ""}`;
|
|
135
|
+
}
|
|
136
|
+
out.push(unglued.line);
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const trimmed = line.trim();
|
|
141
|
+
if (trimmed && STRAY_LABELS.has(trimmed.toLowerCase()) && !line.includes(" ")) {
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
out.push(line);
|
|
145
|
+
}
|
|
146
|
+
return out.join("\n");
|
|
147
|
+
}
|
|
148
|
+
export {
|
|
149
|
+
sanitizeExtractedMarkdown
|
|
150
|
+
};
|
|
151
|
+
//# sourceMappingURL=markdown-sanitize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/markdown-sanitize.ts"],"sourcesContent":["// Post-extraction sanitizer for markdown returned by ensemble extractors.\n// Currently targets a Node-docs pattern where tab labels (\"javascript\",\n// \"typescript\", etc.) leak into a fenced code block as a bare line.\n\nconst STRAY_LABELS = new Set([\n 'javascript',\n 'typescript',\n 'mjs',\n 'cjs',\n 'json',\n 'html',\n 'css',\n 'bash',\n 'sh',\n 'shell',\n 'python',\n 'py',\n 'go',\n 'rust',\n 'java',\n 'kotlin',\n 'swift',\n 'cpp',\n 'c++',\n 'csharp',\n 'ruby',\n 'php',\n]);\n\n// Short aliases that can appear *glued* to the first identifier on the first\n// line of a fenced block (e.g. the TypeScript docs render `<span>ts</span>` and\n// the next token concatenates without a separator → `tsfunction`, `jsconst`).\nconst GLUED_LANG_PREFIXES: Record<string, string> = {\n ts: 'ts',\n js: 'js',\n tsx: 'tsx',\n jsx: 'jsx',\n py: 'python',\n rb: 'ruby',\n go: 'go',\n rs: 'rust',\n sh: 'bash',\n json: 'json',\n html: 'html',\n css: 'css',\n yaml: 'yaml',\n yml: 'yaml',\n toml: 'toml',\n md: 'markdown',\n};\n\n// Identifier keywords that commonly follow a stuck language prefix.\nconst POST_PREFIX_TOKENS = [\n 'function', 'const', 'let', 'var', 'class', 'interface', 'type', 'enum',\n 'import', 'export', 'async', 'await', 'return', 'if', 'for', 'while',\n 'def', 'print', 'echo', 'package', 'public', 'private', 'protected',\n 'struct', 'fn', 'pub', 'use', 'mod',\n];\n\nfunction unglueLangPrefix(line: string): { lang?: string; line: string } | null {\n for (const [prefix, lang] of Object.entries(GLUED_LANG_PREFIXES)) {\n if (!line.startsWith(prefix)) continue;\n const rest = line.slice(prefix.length);\n for (const tok of POST_PREFIX_TOKENS) {\n if (rest.startsWith(tok)) {\n const next = rest.charAt(tok.length);\n if (next === '' || /[\\s({<\\[]/.test(next)) {\n return { lang, line: rest };\n }\n }\n }\n }\n return null;\n}\n\nfunction isFenceLine(line: string): { open: boolean; close: boolean; lang?: string } {\n const m = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\\s*$/);\n if (!m) return { open: false, close: false };\n const lang = m[2]?.trim() || undefined;\n return { open: !!lang, close: !lang, lang };\n}\n\nexport function sanitizeExtractedMarkdown(md: string): string {\n if (!md.includes('```') && !md.includes('~~~')) return md;\n const lines = md.split('\\n');\n const out: string[] = [];\n let inFence = false;\n let fenceMarker: string | null = null;\n let pendingFirstContentLine = false;\n\n for (let i = 0; i < lines.length; i++) {\n const line = lines[i];\n if (!inFence) {\n const f = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\\s*$/);\n if (f) {\n inFence = true;\n fenceMarker = f[1];\n pendingFirstContentLine = true;\n const declaredLang = f[2];\n // 'markdown' is a sentinel many extractors emit when the source code\n // tag didn't carry a language class. Reset it; we'll try to recover\n // the real lang from the first content line below.\n if (declaredLang === 'markdown') {\n out.push(fenceMarker);\n } else {\n out.push(line);\n }\n continue;\n }\n out.push(line);\n continue;\n }\n // inside a fence\n if (fenceMarker && line.startsWith(fenceMarker) && line.replace(fenceMarker, '').trim() === '') {\n inFence = false;\n fenceMarker = null;\n pendingFirstContentLine = false;\n out.push(line);\n continue;\n }\n if (pendingFirstContentLine) {\n pendingFirstContentLine = false;\n const unglued = unglueLangPrefix(line);\n if (unglued) {\n // Replace the most recently pushed fence-open line with one that\n // carries the recovered language tag.\n const lastIdx = out.length - 1;\n const prev = out[lastIdx];\n if (prev.startsWith('```') || prev.startsWith('~~~')) {\n out[lastIdx] = `${prev.match(/^(```+|~~~+)/)![1]}${unglued.lang ?? ''}`;\n }\n out.push(unglued.line);\n continue;\n }\n }\n const trimmed = line.trim();\n if (trimmed && STRAY_LABELS.has(trimmed.toLowerCase()) && !line.includes(' ')) {\n // Drop the stray language label line.\n continue;\n }\n out.push(line);\n }\n return out.join('\\n');\n}\n"],"mappings":"AAIA,MAAM,eAAe,oBAAI,IAAI;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKD,MAAM,sBAA8C;AAAA,EAClD,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,KAAK;AAAA,EACL,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,IAAI;AACN;AAGA,MAAM,qBAAqB;AAAA,EACzB;AAAA,EAAY;AAAA,EAAS;AAAA,EAAO;AAAA,EAAO;AAAA,EAAS;AAAA,EAAa;AAAA,EAAQ;AAAA,EACjE;AAAA,EAAU;AAAA,EAAU;AAAA,EAAS;AAAA,EAAS;AAAA,EAAU;AAAA,EAAM;AAAA,EAAO;AAAA,EAC7D;AAAA,EAAO;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAW;AAAA,EAAU;AAAA,EAAW;AAAA,EACxD;AAAA,EAAU;AAAA,EAAM;AAAA,EAAO;AAAA,EAAO;AAChC;AAEA,SAAS,iBAAiB,MAAsD;AAC9E,aAAW,CAAC,QAAQ,IAAI,KAAK,OAAO,QAAQ,mBAAmB,GAAG;AAChE,QAAI,CAAC,KAAK,WAAW,MAAM,EAAG;AAC9B,UAAM,OAAO,KAAK,MAAM,OAAO,MAAM;AACrC,eAAW,OAAO,oBAAoB;AACpC,UAAI,KAAK,WAAW,GAAG,GAAG;AACxB,cAAM,OAAO,KAAK,OAAO,IAAI,MAAM;AACnC,YAAI,SAAS,MAAM,YAAY,KAAK,IAAI,GAAG;AACzC,iBAAO,EAAE,MAAM,MAAM,KAAK;AAAA,QAC5B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,YAAY,MAAgE;AACnF,QAAM,IAAI,KAAK,MAAM,mCAAmC;AACxD,MAAI,CAAC,EAAG,QAAO,EAAE,MAAM,OAAO,OAAO,MAAM;AAC3C,QAAM,OAAO,EAAE,CAAC,GAAG,KAAK,KAAK;AAC7B,SAAO,EAAE,MAAM,CAAC,CAAC,MAAM,OAAO,CAAC,MAAM,KAAK;AAC5C;AAEO,SAAS,0BAA0B,IAAoB;AAC5D,MAAI,CAAC,GAAG,SAAS,KAAK,KAAK,CAAC,GAAG,SAAS,KAAK,EAAG,QAAO;AACvD,QAAM,QAAQ,GAAG,MAAM,IAAI;AAC3B,QAAM,MAAgB,CAAC;AACvB,MAAI,UAAU;AACd,MAAI,cAA6B;AACjC,MAAI,0BAA0B;AAE9B,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,OAAO,MAAM,CAAC;AACpB,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,KAAK,MAAM,mCAAmC;AACxD,UAAI,GAAG;AACL,kBAAU;AACV,sBAAc,EAAE,CAAC;AACjB,kCAA0B;AAC1B,cAAM,eAAe,EAAE,CAAC;AAIxB,YAAI,iBAAiB,YAAY;AAC/B,cAAI,KAAK,WAAW;AAAA,QACtB,OAAO;AACL,cAAI,KAAK,IAAI;AAAA,QACf;AACA;AAAA,MACF;AACA,UAAI,KAAK,IAAI;AACb;AAAA,IACF;AAEA,QAAI,eAAe,KAAK,WAAW,WAAW,KAAK,KAAK,QAAQ,aAAa,EAAE,EAAE,KAAK,MAAM,IAAI;AAC9F,gBAAU;AACV,oBAAc;AACd,gCAA0B;AAC1B,UAAI,KAAK,IAAI;AACb;AAAA,IACF;AACA,QAAI,yBAAyB;AAC3B,gCAA0B;AAC1B,YAAM,UAAU,iBAAiB,IAAI;AACrC,UAAI,SAAS;AAGX,cAAM,UAAU,IAAI,SAAS;AAC7B,cAAM,OAAO,IAAI,OAAO;AACxB,YAAI,KAAK,WAAW,KAAK,KAAK,KAAK,WAAW,KAAK,GAAG;AACpD,cAAI,OAAO,IAAI,GAAG,KAAK,MAAM,cAAc,EAAG,CAAC,CAAC,GAAG,QAAQ,QAAQ,EAAE;AAAA,QACvE;AACA,YAAI,KAAK,QAAQ,IAAI;AACrB;AAAA,MACF;AAAA,IACF;AACA,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,WAAW,aAAa,IAAI,QAAQ,YAAY,CAAC,KAAK,CAAC,KAAK,SAAS,GAAG,GAAG;AAE7E;AAAA,IACF;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO,IAAI,KAAK,IAAI;AACtB;","names":[]}
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
export declare function buildTurndown(): TurndownService;
|
|
1
3
|
export declare function htmlToMarkdown(html: string): string;
|
|
4
|
+
export interface Heading {
|
|
5
|
+
level: number;
|
|
6
|
+
text: string;
|
|
7
|
+
lineIndex: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function parseHeadings(lines: string[]): Heading[];
|
|
10
|
+
export declare function lineStartCharOffsets(lines: string[]): number[];
|
|
2
11
|
export declare function extractSection(markdown: string, section: string, sectionIndex?: number): {
|
|
3
12
|
content: string;
|
|
4
13
|
matched: boolean;
|
|
@@ -7,4 +16,6 @@ export declare function extractLinksAndImages(markdown: string): {
|
|
|
7
16
|
links: string[];
|
|
8
17
|
images: string[];
|
|
9
18
|
};
|
|
19
|
+
export declare function filterDecorativeImages(markdown: string): string;
|
|
20
|
+
export declare function resolveRelativeUrls(markdown: string, baseUrl: string): string;
|
|
10
21
|
//# sourceMappingURL=markdown.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAiBvC,wBAAgB,aAAa,IAAI,eAAe,CA2D/C;AAID,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,EAAE,CASxD;AAID,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQ9D;AAkBD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAyC7E"}
|
|
@@ -1,107 +1,211 @@
|
|
|
1
|
-
import TurndownService from
|
|
1
|
+
import TurndownService from "turndown";
|
|
2
|
+
import { detectCodeLanguage } from "./lang-hints.js";
|
|
3
|
+
function longestBacktickRun(s) {
|
|
4
|
+
let max = 0;
|
|
5
|
+
let cur = 0;
|
|
6
|
+
for (let i = 0; i < s.length; i++) {
|
|
7
|
+
if (s.charCodeAt(i) === 96) {
|
|
8
|
+
cur++;
|
|
9
|
+
if (cur > max) max = cur;
|
|
10
|
+
} else {
|
|
11
|
+
cur = 0;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
return max;
|
|
15
|
+
}
|
|
2
16
|
function buildTurndown() {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
17
|
+
const td = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
18
|
+
td.remove(["script", "style"]);
|
|
19
|
+
td.addRule("table", {
|
|
20
|
+
filter: "table",
|
|
21
|
+
replacement(_content, node) {
|
|
22
|
+
const el = node;
|
|
23
|
+
const rows = Array.from(el.querySelectorAll("tr"));
|
|
24
|
+
if (rows.length === 0) return "";
|
|
25
|
+
const renderRow = (row) => {
|
|
26
|
+
const cells = Array.from(row.querySelectorAll("th, td"));
|
|
27
|
+
return "| " + cells.map((c) => c.textContent?.replace(/\n/g, " ").trim() ?? "").join(" | ") + " |";
|
|
28
|
+
};
|
|
29
|
+
const headerRow = rows[0];
|
|
30
|
+
const isHeaderRow = headerRow.querySelectorAll("th").length > 0;
|
|
31
|
+
const headerCells = Array.from(headerRow.querySelectorAll("th, td"));
|
|
32
|
+
const separator = "| " + headerCells.map(() => "---").join(" | ") + " |";
|
|
33
|
+
if (isHeaderRow) {
|
|
34
|
+
const bodyRows = rows.slice(1);
|
|
35
|
+
const lines2 = [renderRow(headerRow), separator, ...bodyRows.map(renderRow)];
|
|
36
|
+
return "\n\n" + lines2.join("\n") + "\n\n";
|
|
37
|
+
}
|
|
38
|
+
const lines = [renderRow(headerRow), separator, ...rows.slice(1).map(renderRow)];
|
|
39
|
+
return "\n\n" + lines.join("\n") + "\n\n";
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
td.addRule("tableCell", {
|
|
43
|
+
filter: ["thead", "tbody", "tfoot", "tr", "th", "td"],
|
|
44
|
+
replacement(content) {
|
|
45
|
+
return content;
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
td.addRule("codeBlockLang", {
|
|
49
|
+
filter(node) {
|
|
50
|
+
return node.nodeName === "PRE" && node.querySelector("code") !== null;
|
|
51
|
+
},
|
|
52
|
+
replacement(_content, node) {
|
|
53
|
+
const pre = node;
|
|
54
|
+
const code = pre.querySelector("code");
|
|
55
|
+
const cls = code?.getAttribute("class") ?? pre.getAttribute("class") ?? "";
|
|
56
|
+
const lang = detectCodeLanguage(cls);
|
|
57
|
+
const body = code?.textContent ?? pre.textContent ?? "";
|
|
58
|
+
const fence = "`".repeat(Math.max(3, longestBacktickRun(body) + 1));
|
|
59
|
+
return `
|
|
60
|
+
|
|
61
|
+
${fence}${lang ?? ""}
|
|
62
|
+
${body.replace(/\n+$/, "")}
|
|
63
|
+
${fence}
|
|
64
|
+
|
|
65
|
+
`;
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
return td;
|
|
39
69
|
}
|
|
40
70
|
const turndown = buildTurndown();
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
return turndown.turndown(html);
|
|
71
|
+
function htmlToMarkdown(html) {
|
|
72
|
+
if (!html) return "";
|
|
73
|
+
return turndown.turndown(html);
|
|
45
74
|
}
|
|
46
75
|
function parseHeadings(lines) {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
76
|
+
const headings = [];
|
|
77
|
+
for (let i = 0; i < lines.length; i++) {
|
|
78
|
+
const match = lines[i].match(/^(#{1,6})\s+(.+)/);
|
|
79
|
+
if (match) {
|
|
80
|
+
headings.push({ level: match[1].length, text: match[2].trim(), lineIndex: i });
|
|
53
81
|
}
|
|
54
|
-
|
|
82
|
+
}
|
|
83
|
+
return headings;
|
|
84
|
+
}
|
|
85
|
+
function lineStartCharOffsets(lines) {
|
|
86
|
+
const offsets = new Array(lines.length);
|
|
87
|
+
let acc = 0;
|
|
88
|
+
for (let i = 0; i < lines.length; i++) {
|
|
89
|
+
offsets[i] = acc;
|
|
90
|
+
acc += lines[i].length + 1;
|
|
91
|
+
}
|
|
92
|
+
return offsets;
|
|
55
93
|
}
|
|
56
94
|
function extractFromHeading(lines, headings, headingIdx) {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
break;
|
|
65
|
-
}
|
|
95
|
+
const heading = headings[headingIdx];
|
|
96
|
+
const start = heading.lineIndex;
|
|
97
|
+
let end = lines.length;
|
|
98
|
+
for (let i = headingIdx + 1; i < headings.length; i++) {
|
|
99
|
+
if (headings[i].level <= heading.level) {
|
|
100
|
+
end = headings[i].lineIndex;
|
|
101
|
+
break;
|
|
66
102
|
}
|
|
67
|
-
|
|
103
|
+
}
|
|
104
|
+
return lines.slice(start, end).join("\n");
|
|
68
105
|
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
106
|
+
function extractSection(markdown, section, sectionIndex = 0) {
|
|
107
|
+
const lines = markdown.split("\n");
|
|
108
|
+
const headings = parseHeadings(lines);
|
|
109
|
+
if (headings.length === 0) return { content: markdown, matched: false };
|
|
110
|
+
const lower = section.toLowerCase();
|
|
111
|
+
const indexed = headings.map((h, i2) => ({ h, i: i2 }));
|
|
112
|
+
const exactMatches = indexed.filter(({ h }) => h.text.toLowerCase() === lower);
|
|
113
|
+
if (exactMatches.length > 0 && sectionIndex < exactMatches.length) {
|
|
114
|
+
const { i: i2 } = exactMatches[sectionIndex];
|
|
115
|
+
return { content: extractFromHeading(lines, headings, i2), matched: true };
|
|
116
|
+
}
|
|
117
|
+
const substringMatches = indexed.filter(({ h }) => h.text.toLowerCase().includes(lower));
|
|
118
|
+
if (substringMatches.length === 0 || sectionIndex >= substringMatches.length) {
|
|
119
|
+
return { content: markdown, matched: false };
|
|
120
|
+
}
|
|
121
|
+
const { i } = substringMatches[sectionIndex];
|
|
122
|
+
return { content: extractFromHeading(lines, headings, i), matched: true };
|
|
123
|
+
}
|
|
124
|
+
function extractLinksAndImages(markdown) {
|
|
125
|
+
const imagePattern = /!\[[^\]]*\]\(([^)]+)\)/g;
|
|
126
|
+
const linkPattern = /(?<!!)\[[^\]]*\]\(([^)]+)\)/g;
|
|
127
|
+
const images = /* @__PURE__ */ new Set();
|
|
128
|
+
const links = /* @__PURE__ */ new Set();
|
|
129
|
+
let match;
|
|
130
|
+
while ((match = imagePattern.exec(markdown)) !== null) {
|
|
131
|
+
images.add(match[1]);
|
|
132
|
+
}
|
|
133
|
+
while ((match = linkPattern.exec(markdown)) !== null) {
|
|
134
|
+
links.add(match[1]);
|
|
135
|
+
}
|
|
136
|
+
return { links: Array.from(links), images: Array.from(images) };
|
|
137
|
+
}
|
|
138
|
+
const DECORATIVE_URL_MARKERS = [
|
|
139
|
+
"avatar",
|
|
140
|
+
"icon",
|
|
141
|
+
"logo",
|
|
142
|
+
"badge",
|
|
143
|
+
"shield",
|
|
144
|
+
"tracking",
|
|
145
|
+
"pixel",
|
|
146
|
+
"sprite",
|
|
147
|
+
"emoji",
|
|
148
|
+
"favicon"
|
|
149
|
+
];
|
|
150
|
+
function filterDecorativeImages(markdown) {
|
|
151
|
+
if (!markdown) return markdown;
|
|
152
|
+
return markdown.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
|
|
153
|
+
const trimmedAlt = alt.trim();
|
|
154
|
+
const lowerSrc = src.toLowerCase();
|
|
155
|
+
if (lowerSrc.startsWith("data:image/gif;base64,")) return "";
|
|
156
|
+
if (lowerSrc.startsWith("data:image/svg+xml") && src.length < 200) return "";
|
|
157
|
+
for (const marker of DECORATIVE_URL_MARKERS) {
|
|
158
|
+
if (lowerSrc.includes(marker)) return "";
|
|
87
159
|
}
|
|
88
|
-
|
|
89
|
-
return
|
|
160
|
+
if (!trimmedAlt) return "";
|
|
161
|
+
return match;
|
|
162
|
+
});
|
|
90
163
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
164
|
+
function resolveRelativeUrls(markdown, baseUrl) {
|
|
165
|
+
if (!markdown || !baseUrl) return markdown;
|
|
166
|
+
const rewrite = (path) => {
|
|
167
|
+
const trimmed = path.trim();
|
|
168
|
+
if (!trimmed) return path;
|
|
169
|
+
if (/^(?:https?:|mailto:|tel:|javascript:|data:)/i.test(trimmed)) return path;
|
|
170
|
+
if (trimmed.startsWith("#")) {
|
|
171
|
+
try {
|
|
172
|
+
return new URL(trimmed, baseUrl).href;
|
|
173
|
+
} catch {
|
|
174
|
+
return path;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
if (trimmed.startsWith("//")) {
|
|
178
|
+
try {
|
|
179
|
+
const base = new URL(baseUrl);
|
|
180
|
+
return `${base.protocol}${trimmed}`;
|
|
181
|
+
} catch {
|
|
182
|
+
return path;
|
|
183
|
+
}
|
|
100
184
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
185
|
+
try {
|
|
186
|
+
return new URL(trimmed, baseUrl).href;
|
|
187
|
+
} catch {
|
|
188
|
+
return path;
|
|
104
189
|
}
|
|
105
|
-
|
|
190
|
+
};
|
|
191
|
+
let result = markdown.replace(
|
|
192
|
+
/(!\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g,
|
|
193
|
+
(_m, open, path, close) => `${open}${rewrite(path)}${close}`
|
|
194
|
+
);
|
|
195
|
+
result = result.replace(
|
|
196
|
+
/(^|[^!])(\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g,
|
|
197
|
+
(_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`
|
|
198
|
+
);
|
|
199
|
+
return result;
|
|
106
200
|
}
|
|
201
|
+
export {
|
|
202
|
+
buildTurndown,
|
|
203
|
+
extractLinksAndImages,
|
|
204
|
+
extractSection,
|
|
205
|
+
filterDecorativeImages,
|
|
206
|
+
htmlToMarkdown,
|
|
207
|
+
lineStartCharOffsets,
|
|
208
|
+
parseHeadings,
|
|
209
|
+
resolveRelativeUrls
|
|
210
|
+
};
|
|
107
211
|
//# sourceMappingURL=markdown.js.map
|