@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
const BOILERPLATE_TEXT_EQUALITY = [
|
|
2
|
+
"was this helpful?",
|
|
3
|
+
"send",
|
|
4
|
+
"edit this page",
|
|
5
|
+
"edit on github",
|
|
6
|
+
"suggest changes",
|
|
7
|
+
"skip to main content",
|
|
8
|
+
"on this page"
|
|
9
|
+
];
|
|
10
|
+
const BOILERPLATE_TEXT_PATTERNS = [
|
|
11
|
+
/^\s*last updated on .+$/i
|
|
12
|
+
];
|
|
13
|
+
const BOILERPLATE_SELECTORS = [
|
|
14
|
+
'[class*="feedback"]',
|
|
15
|
+
'[class*="edit-page"]',
|
|
16
|
+
'[aria-label*="Edit"]',
|
|
17
|
+
'footer[class*="docs"]',
|
|
18
|
+
'[class*="sticky-cta"]',
|
|
19
|
+
'main [role="banner"]',
|
|
20
|
+
'[role="navigation"]',
|
|
21
|
+
'[class*="sidebar"]',
|
|
22
|
+
'[data-collection="docs"]'
|
|
23
|
+
];
|
|
24
|
+
function stripBoilerplateMarkdown(md) {
|
|
25
|
+
if (!md) return md;
|
|
26
|
+
const lines = md.split("\n");
|
|
27
|
+
const kept = lines.filter((line) => {
|
|
28
|
+
const t = line.trim().toLowerCase();
|
|
29
|
+
if (!t) return true;
|
|
30
|
+
if (BOILERPLATE_TEXT_EQUALITY.includes(t)) return false;
|
|
31
|
+
return !BOILERPLATE_TEXT_PATTERNS.some((re) => re.test(line));
|
|
32
|
+
});
|
|
33
|
+
return kept.join("\n").replace(/\n{3,}/g, "\n\n");
|
|
34
|
+
}
|
|
35
|
+
function stripBoilerplateDom(document) {
|
|
36
|
+
for (const sel of BOILERPLATE_SELECTORS) {
|
|
37
|
+
const nodes = document.querySelectorAll(sel);
|
|
38
|
+
const list = [];
|
|
39
|
+
for (let i = 0; i < nodes.length; i++) list.push(nodes[i]);
|
|
40
|
+
for (const el of list) {
|
|
41
|
+
el.parentNode?.removeChild(el);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
export {
|
|
46
|
+
BOILERPLATE_SELECTORS,
|
|
47
|
+
BOILERPLATE_TEXT_EQUALITY,
|
|
48
|
+
BOILERPLATE_TEXT_PATTERNS,
|
|
49
|
+
stripBoilerplateDom,
|
|
50
|
+
stripBoilerplateMarkdown
|
|
51
|
+
};
|
|
52
|
+
//# sourceMappingURL=boilerplate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/boilerplate.ts"],"sourcesContent":["export const BOILERPLATE_TEXT_EQUALITY: ReadonlyArray<string> = [\n 'was this helpful?',\n 'send',\n 'edit this page',\n 'edit on github',\n 'suggest changes',\n 'skip to main content',\n 'on this page',\n];\n\nexport const BOILERPLATE_TEXT_PATTERNS: ReadonlyArray<RegExp> = [\n /^\\s*last updated on .+$/i,\n];\n\nexport const BOILERPLATE_SELECTORS: ReadonlyArray<string> = [\n '[class*=\"feedback\"]',\n '[class*=\"edit-page\"]',\n '[aria-label*=\"Edit\"]',\n 'footer[class*=\"docs\"]',\n '[class*=\"sticky-cta\"]',\n 'main [role=\"banner\"]',\n '[role=\"navigation\"]',\n '[class*=\"sidebar\"]',\n '[data-collection=\"docs\"]',\n];\n\nexport interface BoilerplateDocument {\n querySelectorAll(selector: string): ArrayLike<BoilerplateElement>;\n}\n\ninterface BoilerplateElement {\n parentNode: { removeChild(child: BoilerplateElement): void } | null;\n}\n\nexport function stripBoilerplateMarkdown(md: string): string {\n if (!md) return md;\n const lines = md.split('\\n');\n const kept = lines.filter((line) => {\n const t = line.trim().toLowerCase();\n if (!t) return true;\n if (BOILERPLATE_TEXT_EQUALITY.includes(t)) return false;\n return !BOILERPLATE_TEXT_PATTERNS.some((re) => re.test(line));\n });\n return kept.join('\\n').replace(/\\n{3,}/g, '\\n\\n');\n}\n\nexport function stripBoilerplateDom(document: BoilerplateDocument): void {\n for (const sel of BOILERPLATE_SELECTORS) {\n const nodes = document.querySelectorAll(sel);\n const list: BoilerplateElement[] = [];\n for (let i = 0; i < nodes.length; i++) list.push(nodes[i]);\n for (const el of list) {\n el.parentNode?.removeChild(el);\n }\n }\n}\n"],"mappings":"AAAO,MAAM,4BAAmD;AAAA,EAC9D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,MAAM,4BAAmD;AAAA,EAC9D;AACF;AAEO,MAAM,wBAA+C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAUO,SAAS,yBAAyB,IAAoB;AAC3D,MAAI,CAAC,GAAI,QAAO;AAChB,QAAM,QAAQ,GAAG,MAAM,IAAI;AAC3B,QAAM,OAAO,MAAM,OAAO,CAAC,SAAS;AAClC,UAAM,IAAI,KAAK,KAAK,EAAE,YAAY;AAClC,QAAI,CAAC,EAAG,QAAO;AACf,QAAI,0BAA0B,SAAS,CAAC,EAAG,QAAO;AAClD,WAAO,CAAC,0BAA0B,KAAK,CAAC,OAAO,GAAG,KAAK,IAAI,CAAC;AAAA,EAC9D,CAAC;AACD,SAAO,KAAK,KAAK,IAAI,EAAE,QAAQ,WAAW,MAAM;AAClD;AAEO,SAAS,oBAAoB,UAAqC;AACvE,aAAW,OAAO,uBAAuB;AACvC,UAAM,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,UAAM,OAA6B,CAAC;AACpC,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,MAAK,KAAK,MAAM,CAAC,CAAC;AACzD,eAAW,MAAM,MAAM;AACrB,SAAG,YAAY,YAAY,EAAE;AAAA,IAC/B;AAAA,EACF;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"defuddle.d.ts","sourceRoot":"","sources":["../../src/extraction/defuddle.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"defuddle.d.ts","sourceRoot":"","sources":["../../src/extraction/defuddle.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAKpD,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAsBjG"}
|
|
@@ -1,26 +1,30 @@
|
|
|
1
|
-
import { Defuddle } from
|
|
1
|
+
import { Defuddle } from "defuddle/node";
|
|
2
|
+
import { htmlToMarkdown } from "./markdown.js";
|
|
2
3
|
const MIN_CONTENT_THRESHOLD = 100;
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
4
|
+
async function defuddleExtract(html, url) {
|
|
5
|
+
try {
|
|
6
|
+
const result = await Defuddle(html, url);
|
|
7
|
+
if (!result.content) return null;
|
|
8
|
+
const markdown = htmlToMarkdown(result.content);
|
|
9
|
+
if (markdown.length < MIN_CONTENT_THRESHOLD) return null;
|
|
10
|
+
return {
|
|
11
|
+
title: result.title ?? "",
|
|
12
|
+
markdown,
|
|
13
|
+
metadata: {
|
|
14
|
+
description: result.description || void 0,
|
|
15
|
+
author: result.author || void 0,
|
|
16
|
+
date: result.published || void 0,
|
|
17
|
+
language: result.language || void 0
|
|
18
|
+
},
|
|
19
|
+
links: [],
|
|
20
|
+
images: [],
|
|
21
|
+
extractor: "defuddle"
|
|
22
|
+
};
|
|
23
|
+
} catch {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
25
26
|
}
|
|
27
|
+
export {
|
|
28
|
+
defuddleExtract
|
|
29
|
+
};
|
|
26
30
|
//# sourceMappingURL=defuddle.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/defuddle.ts"],"sourcesContent":["import { Defuddle } from 'defuddle/node';\nimport type { ExtractionResult } from '../types.js';\nimport { htmlToMarkdown } from './markdown.js';\n\nconst MIN_CONTENT_THRESHOLD = 100;\n\nexport async function defuddleExtract(html: string, url: string): Promise<ExtractionResult | null> {\n try {\n const result = await Defuddle(html, url);\n if (!result.content) return null;\n const markdown = htmlToMarkdown(result.content);\n if (markdown.length < MIN_CONTENT_THRESHOLD) return null;\n return {\n title: result.title ?? '',\n markdown,\n metadata: {\n description: result.description || undefined,\n author: result.author || undefined,\n date: result.published || undefined,\n language: result.language || undefined,\n },\n links: [],\n images: [],\n extractor: 'defuddle',\n };\n } catch {\n return null;\n }\n}\n"],"mappings":"AAAA,SAAS,gBAAgB;AAEzB,SAAS,sBAAsB;AAE/B,MAAM,wBAAwB;AAE9B,eAAsB,gBAAgB,MAAc,KAA+C;AACjG,MAAI;AACF,UAAM,SAAS,MAAM,SAAS,MAAM,GAAG;AACvC,QAAI,CAAC,OAAO,QAAS,QAAO;AAC5B,UAAM,WAAW,eAAe,OAAO,OAAO;AAC9C,QAAI,SAAS,SAAS,sBAAuB,QAAO;AACpD,WAAO;AAAA,MACL,OAAO,OAAO,SAAS;AAAA,MACvB;AAAA,MACA,UAAU;AAAA,QACR,aAAa,OAAO,eAAe;AAAA,QACnC,QAAQ,OAAO,UAAU;AAAA,QACzB,MAAM,OAAO,aAAa;AAAA,QAC1B,UAAU,OAAO,YAAY;AAAA,MAC/B;AAAA,MACA,OAAO,CAAC;AAAA,MACR,QAAQ,CAAC;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,
|
|
1
|
+
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAiC1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
|
|
@@ -1,83 +1,83 @@
|
|
|
1
|
-
import { parseHTML } from
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
2
|
function getMetaContent(doc, nameOrProperty) {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
return el?.getAttribute('content') ?? undefined;
|
|
3
|
+
const el = doc.querySelector(`meta[name="${nameOrProperty}"]`) ?? doc.querySelector(`meta[property="${nameOrProperty}"]`);
|
|
4
|
+
return el?.getAttribute("content") ?? void 0;
|
|
6
5
|
}
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
return result;
|
|
6
|
+
function extractMetadata(html) {
|
|
7
|
+
const { document: doc } = parseHTML(html);
|
|
8
|
+
const result = {};
|
|
9
|
+
const title = doc.querySelector("title")?.textContent?.trim();
|
|
10
|
+
if (title) result.title = title;
|
|
11
|
+
const description = getMetaContent(doc, "description") ?? getMetaContent(doc, "og:description");
|
|
12
|
+
if (description) result.description = description;
|
|
13
|
+
const author = getMetaContent(doc, "author");
|
|
14
|
+
if (author) result.author = author;
|
|
15
|
+
const date = getMetaContent(doc, "date") ?? getMetaContent(doc, "article:published_time");
|
|
16
|
+
if (date) result.date = date;
|
|
17
|
+
const keywords = getMetaContent(doc, "keywords");
|
|
18
|
+
if (keywords) {
|
|
19
|
+
result.keywords = keywords.split(",").map((k) => k.trim()).filter(Boolean);
|
|
20
|
+
}
|
|
21
|
+
const ogImage = getMetaContent(doc, "og:image");
|
|
22
|
+
if (ogImage) result.og_image = ogImage;
|
|
23
|
+
const ogType = getMetaContent(doc, "og:type");
|
|
24
|
+
if (ogType) result.og_type = ogType;
|
|
25
|
+
const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute("href");
|
|
26
|
+
if (canonical) result.canonical_url = canonical;
|
|
27
|
+
return result;
|
|
30
28
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
29
|
+
function extractSelector(html, selector, multiple) {
|
|
30
|
+
const { document: doc } = parseHTML(html);
|
|
31
|
+
if (multiple) {
|
|
32
|
+
const elements = doc.querySelectorAll(selector);
|
|
33
|
+
return Array.from(elements).map((el2) => (el2.textContent ?? "").trim());
|
|
34
|
+
}
|
|
35
|
+
const el = doc.querySelector(selector);
|
|
36
|
+
return el ? (el.textContent ?? "").trim() : "";
|
|
39
37
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
obj[header] = (cells[i]?.textContent ?? '').trim();
|
|
77
|
-
});
|
|
78
|
-
return obj;
|
|
79
|
-
});
|
|
80
|
-
return { caption, headers, rows };
|
|
38
|
+
function extractTables(html) {
|
|
39
|
+
const { document: doc } = parseHTML(html);
|
|
40
|
+
const tables = doc.querySelectorAll("table");
|
|
41
|
+
if (tables.length === 0) return [];
|
|
42
|
+
return Array.from(tables).map((table) => {
|
|
43
|
+
const caption = table.querySelector("caption")?.textContent?.trim() || void 0;
|
|
44
|
+
const thElements = table.querySelectorAll("thead th");
|
|
45
|
+
let headers;
|
|
46
|
+
let bodyRows;
|
|
47
|
+
if (thElements.length > 0) {
|
|
48
|
+
headers = Array.from(thElements).map((th) => (th.textContent ?? "").trim());
|
|
49
|
+
bodyRows = Array.from(table.querySelectorAll("tbody tr"));
|
|
50
|
+
if (bodyRows.length === 0) {
|
|
51
|
+
const allRows = Array.from(table.querySelectorAll("tr"));
|
|
52
|
+
bodyRows = allRows.slice(1);
|
|
53
|
+
}
|
|
54
|
+
} else {
|
|
55
|
+
const allRows = Array.from(table.querySelectorAll("tr"));
|
|
56
|
+
const firstRow = allRows[0];
|
|
57
|
+
const firstRowThs = firstRow ? Array.from(firstRow.querySelectorAll("th")) : [];
|
|
58
|
+
if (firstRowThs.length > 0) {
|
|
59
|
+
headers = firstRowThs.map((th) => (th.textContent ?? "").trim());
|
|
60
|
+
bodyRows = allRows.slice(1);
|
|
61
|
+
} else {
|
|
62
|
+
const cellCount = firstRow ? firstRow.querySelectorAll("td").length : 0;
|
|
63
|
+
headers = Array.from({ length: cellCount }, (_, i) => `col_${i + 1}`);
|
|
64
|
+
bodyRows = allRows;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const rows = bodyRows.map((row) => {
|
|
68
|
+
const cells = Array.from(row.querySelectorAll("td"));
|
|
69
|
+
const obj = {};
|
|
70
|
+
headers.forEach((header, i) => {
|
|
71
|
+
obj[header] = (cells[i]?.textContent ?? "").trim();
|
|
72
|
+
});
|
|
73
|
+
return obj;
|
|
81
74
|
});
|
|
75
|
+
return { caption, headers, rows };
|
|
76
|
+
});
|
|
82
77
|
}
|
|
78
|
+
export {
|
|
79
|
+
extractMetadata,
|
|
80
|
+
extractSelector,
|
|
81
|
+
extractTables
|
|
82
|
+
};
|
|
83
83
|
//# sourceMappingURL=extract.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/extract.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport type { MetadataData, TableData } from '../types.js';\n\nfunction getMetaContent(doc: Document, nameOrProperty: string): string | undefined {\n const el =\n doc.querySelector(`meta[name=\"${nameOrProperty}\"]`) ??\n doc.querySelector(`meta[property=\"${nameOrProperty}\"]`);\n return el?.getAttribute('content') ?? undefined;\n}\n\nexport function extractMetadata(html: string): MetadataData {\n const { document: doc } = parseHTML(html);\n const result: MetadataData = {};\n\n const title = doc.querySelector('title')?.textContent?.trim();\n if (title) result.title = title;\n\n const description =\n getMetaContent(doc, 'description') ?? getMetaContent(doc, 'og:description');\n if (description) result.description = description;\n\n const author = getMetaContent(doc, 'author');\n if (author) result.author = author;\n\n const date =\n getMetaContent(doc, 'date') ?? getMetaContent(doc, 'article:published_time');\n if (date) result.date = date;\n\n const keywords = getMetaContent(doc, 'keywords');\n if (keywords) {\n result.keywords = keywords.split(',').map((k) => k.trim()).filter(Boolean);\n }\n\n const ogImage = getMetaContent(doc, 'og:image');\n if (ogImage) result.og_image = ogImage;\n\n const ogType = getMetaContent(doc, 'og:type');\n if (ogType) result.og_type = ogType;\n\n const canonical = doc.querySelector('link[rel=\"canonical\"]')?.getAttribute('href');\n if (canonical) result.canonical_url = canonical;\n\n return result;\n}\n\nexport function extractSelector(\n html: string,\n selector: string,\n multiple: boolean,\n): string | string[] {\n const { document: doc } = parseHTML(html);\n\n if (multiple) {\n const elements = doc.querySelectorAll(selector);\n return Array.from(elements).map((el) => (el.textContent ?? '').trim());\n }\n\n const el = doc.querySelector(selector);\n return el ? (el.textContent ?? '').trim() : '';\n}\n\nexport function extractTables(html: string): TableData[] {\n const { document: doc } = parseHTML(html);\n const tables = doc.querySelectorAll('table');\n if (tables.length === 0) return [];\n\n return Array.from(tables).map((table) => {\n const caption = table.querySelector('caption')?.textContent?.trim() || undefined;\n\n const thElements = table.querySelectorAll('thead th');\n let headers: string[];\n let bodyRows: Element[];\n\n if (thElements.length > 0) {\n headers = Array.from(thElements).map((th) => (th.textContent ?? '').trim());\n bodyRows = Array.from(table.querySelectorAll('tbody tr'));\n if (bodyRows.length === 0) {\n const allRows = Array.from(table.querySelectorAll('tr'));\n bodyRows = allRows.slice(1);\n }\n } else {\n const allRows = Array.from(table.querySelectorAll('tr'));\n const firstRow = allRows[0];\n const firstRowThs = firstRow ? Array.from(firstRow.querySelectorAll('th')) : [];\n\n if (firstRowThs.length > 0) {\n headers = firstRowThs.map((th) => (th.textContent ?? '').trim());\n bodyRows = allRows.slice(1);\n } else {\n const cellCount = firstRow ? firstRow.querySelectorAll('td').length : 0;\n headers = Array.from({ length: cellCount }, (_, i) => `col_${i + 1}`);\n bodyRows = allRows;\n }\n }\n\n const rows = bodyRows.map((row) => {\n const cells = Array.from(row.querySelectorAll('td'));\n const obj: Record<string, string> = {};\n headers.forEach((header, i) => {\n obj[header] = (cells[i]?.textContent ?? '').trim();\n });\n return obj;\n });\n\n return { caption, headers, rows };\n });\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAG1B,SAAS,eAAe,KAAe,gBAA4C;AACjF,QAAM,KACJ,IAAI,cAAc,cAAc,cAAc,IAAI,KAClD,IAAI,cAAc,kBAAkB,cAAc,IAAI;AACxD,SAAO,IAAI,aAAa,SAAS,KAAK;AACxC;AAEO,SAAS,gBAAgB,MAA4B;AAC1D,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,SAAuB,CAAC;AAE9B,QAAM,QAAQ,IAAI,cAAc,OAAO,GAAG,aAAa,KAAK;AAC5D,MAAI,MAAO,QAAO,QAAQ;AAE1B,QAAM,cACJ,eAAe,KAAK,aAAa,KAAK,eAAe,KAAK,gBAAgB;AAC5E,MAAI,YAAa,QAAO,cAAc;AAEtC,QAAM,SAAS,eAAe,KAAK,QAAQ;AAC3C,MAAI,OAAQ,QAAO,SAAS;AAE5B,QAAM,OACJ,eAAe,KAAK,MAAM,KAAK,eAAe,KAAK,wBAAwB;AAC7E,MAAI,KAAM,QAAO,OAAO;AAExB,QAAM,WAAW,eAAe,KAAK,UAAU;AAC/C,MAAI,UAAU;AACZ,WAAO,WAAW,SAAS,MAAM,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO,OAAO;AAAA,EAC3E;AAEA,QAAM,UAAU,eAAe,KAAK,UAAU;AAC9C,MAAI,QAAS,QAAO,WAAW;AAE/B,QAAM,SAAS,eAAe,KAAK,SAAS;AAC5C,MAAI,OAAQ,QAAO,UAAU;AAE7B,QAAM,YAAY,IAAI,cAAc,uBAAuB,GAAG,aAAa,MAAM;AACjF,MAAI,UAAW,QAAO,gBAAgB;AAEtC,SAAO;AACT;AAEO,SAAS,gBACd,MACA,UACA,UACmB;AACnB,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AAExC,MAAI,UAAU;AACZ,UAAM,WAAW,IAAI,iBAAiB,QAAQ;AAC9C,WAAO,MAAM,KAAK,QAAQ,EAAE,IAAI,CAACA,SAAQA,IAAG,eAAe,IAAI,KAAK,CAAC;AAAA,EACvE;AAEA,QAAM,KAAK,IAAI,cAAc,QAAQ;AACrC,SAAO,MAAM,GAAG,eAAe,IAAI,KAAK,IAAI;AAC9C;AAEO,SAAS,cAAc,MAA2B;AACvD,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,SAAS,IAAI,iBAAiB,OAAO;AAC3C,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,SAAO,MAAM,KAAK,MAAM,EAAE,IAAI,CAAC,UAAU;AACvC,UAAM,UAAU,MAAM,cAAc,SAAS,GAAG,aAAa,KAAK,KAAK;AAEvE,UAAM,aAAa,MAAM,iBAAiB,UAAU;AACpD,QAAI;AACJ,QAAI;AAEJ,QAAI,WAAW,SAAS,GAAG;AACzB,gBAAU,MAAM,KAAK,UAAU,EAAE,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC;AAC1E,iBAAW,MAAM,KAAK,MAAM,iBAAiB,UAAU,CAAC;AACxD,UAAI,SAAS,WAAW,GAAG;AACzB,cAAM,UAAU,MAAM,KAAK,MAAM,iBAAiB,IAAI,CAAC;AACvD,mBAAW,QAAQ,MAAM,CAAC;AAAA,MAC5B;AAAA,IACF,OAAO;AACL,YAAM,UAAU,MAAM,KAAK,MAAM,iBAAiB,IAAI,CAAC;AACvD,YAAM,WAAW,QAAQ,CAAC;AAC1B,YAAM,cAAc,WAAW,MAAM,KAAK,SAAS,iBAAiB,IAAI,CAAC,IAAI,CAAC;AAE9E,UAAI,YAAY,SAAS,GAAG;AAC1B,kBAAU,YAAY,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC;AAC/D,mBAAW,QAAQ,MAAM,CAAC;AAAA,MAC5B,OAAO;AACL,cAAM,YAAY,WAAW,SAAS,iBAAiB,IAAI,EAAE,SAAS;AACtE,kBAAU,MAAM,KAAK,EAAE,QAAQ,UAAU,GAAG,CAAC,GAAG,MAAM,OAAO,IAAI,CAAC,EAAE;AACpE,mBAAW;AAAA,MACb;AAAA,IACF;AAEA,UAAM,OAAO,SAAS,IAAI,CAAC,QAAQ;AACjC,YAAM,QAAQ,MAAM,KAAK,IAAI,iBAAiB,IAAI,CAAC;AACnD,YAAM,MAA8B,CAAC;AACrC,cAAQ,QAAQ,CAAC,QAAQ,MAAM;AAC7B,YAAI,MAAM,KAAK,MAAM,CAAC,GAAG,eAAe,IAAI,KAAK;AAAA,MACnD,CAAC;AACD,aAAO;AAAA,IACT,CAAC;AAED,WAAO,EAAE,SAAS,SAAS,KAAK;AAAA,EAClC,CAAC;AACH;","names":["el"]}
|
|
@@ -1,64 +1,60 @@
|
|
|
1
|
-
import { parseHTML } from
|
|
2
|
-
import { createLogger } from
|
|
3
|
-
const log = createLogger(
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
catch (err) {
|
|
25
|
-
log.debug('Failed to parse JSON-LD block', { error: String(err) });
|
|
26
|
-
}
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { createLogger } from "../logger.js";
|
|
3
|
+
const log = createLogger("jsonld");
|
|
4
|
+
function extractJsonLd(html) {
|
|
5
|
+
const { document: doc } = parseHTML(html);
|
|
6
|
+
const scripts = doc.querySelectorAll('script[type="application/ld+json"]');
|
|
7
|
+
const results = [];
|
|
8
|
+
for (const script of scripts) {
|
|
9
|
+
try {
|
|
10
|
+
const text = script.textContent?.trim();
|
|
11
|
+
if (!text) continue;
|
|
12
|
+
const parsed = JSON.parse(text);
|
|
13
|
+
if (Array.isArray(parsed)) {
|
|
14
|
+
results.push(...parsed);
|
|
15
|
+
} else if (parsed["@graph"] && Array.isArray(parsed["@graph"])) {
|
|
16
|
+
results.push(...parsed["@graph"]);
|
|
17
|
+
} else {
|
|
18
|
+
results.push(parsed);
|
|
19
|
+
}
|
|
20
|
+
} catch (err) {
|
|
21
|
+
log.warn("Failed to parse JSON-LD block", { error: String(err) });
|
|
27
22
|
}
|
|
28
|
-
|
|
23
|
+
}
|
|
24
|
+
return results;
|
|
29
25
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
result[fieldName] = flattened[fieldName];
|
|
38
|
-
}
|
|
26
|
+
function matchJsonLdToSchema(jsonLdBlocks, schema) {
|
|
27
|
+
if (!schema.properties || jsonLdBlocks.length === 0) return {};
|
|
28
|
+
const result = {};
|
|
29
|
+
const flattened = flattenJsonLd(jsonLdBlocks);
|
|
30
|
+
for (const fieldName of Object.keys(schema.properties)) {
|
|
31
|
+
if (flattened[fieldName] !== void 0) {
|
|
32
|
+
result[fieldName] = flattened[fieldName];
|
|
39
33
|
}
|
|
40
|
-
|
|
34
|
+
}
|
|
35
|
+
return result;
|
|
41
36
|
}
|
|
42
37
|
function flattenJsonLd(blocks) {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
38
|
+
const flat = {};
|
|
39
|
+
for (const block of blocks) {
|
|
40
|
+
flattenObject(block, flat);
|
|
41
|
+
}
|
|
42
|
+
return flat;
|
|
48
43
|
}
|
|
49
44
|
function flattenObject(obj, target) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
else {
|
|
59
|
-
target[key] = value;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
45
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
46
|
+
if (key.startsWith("@")) continue;
|
|
47
|
+
if (!(key in target)) {
|
|
48
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
49
|
+
flattenObject(value, target);
|
|
50
|
+
} else {
|
|
51
|
+
target[key] = value;
|
|
52
|
+
}
|
|
62
53
|
}
|
|
54
|
+
}
|
|
63
55
|
}
|
|
56
|
+
export {
|
|
57
|
+
extractJsonLd,
|
|
58
|
+
matchJsonLdToSchema
|
|
59
|
+
};
|
|
64
60
|
//# sourceMappingURL=jsonld.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/jsonld.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { createLogger } from '../logger.js';\nimport type { JsonSchema } from './schema.js';\n\nconst log = createLogger('jsonld');\n\nexport function extractJsonLd(html: string): Record<string, unknown>[] {\n const { document: doc } = parseHTML(html);\n const scripts = doc.querySelectorAll('script[type=\"application/ld+json\"]');\n const results: Record<string, unknown>[] = [];\n\n for (const script of scripts) {\n try {\n const text = script.textContent?.trim();\n if (!text) continue;\n\n const parsed = JSON.parse(text);\n\n if (Array.isArray(parsed)) {\n results.push(...parsed);\n } else if (parsed['@graph'] && Array.isArray(parsed['@graph'])) {\n results.push(...parsed['@graph']);\n } else {\n results.push(parsed);\n }\n } catch (err) {\n log.warn('Failed to parse JSON-LD block', { error: String(err) });\n }\n }\n\n return results;\n}\n\nexport function matchJsonLdToSchema(\n jsonLdBlocks: Record<string, unknown>[],\n schema: JsonSchema,\n): Record<string, unknown> {\n if (!schema.properties || jsonLdBlocks.length === 0) return {};\n\n const result: Record<string, unknown> = {};\n const flattened = flattenJsonLd(jsonLdBlocks);\n\n for (const fieldName of Object.keys(schema.properties)) {\n if (flattened[fieldName] !== undefined) {\n result[fieldName] = flattened[fieldName];\n }\n }\n\n return result;\n}\n\nfunction flattenJsonLd(\n blocks: Record<string, unknown>[],\n): Record<string, unknown> {\n const flat: Record<string, unknown> = {};\n\n for (const block of blocks) {\n flattenObject(block, flat);\n }\n\n return flat;\n}\n\nfunction flattenObject(\n obj: Record<string, unknown>,\n target: Record<string, unknown>,\n): void {\n for (const [key, value] of Object.entries(obj)) {\n if (key.startsWith('@')) continue;\n\n // First-wins: earlier blocks and shallower keys take priority\n if (!(key in target)) {\n if (typeof value === 'object' && value !== null && !Array.isArray(value)) {\n flattenObject(value as Record<string, unknown>, target);\n } else {\n target[key] = value;\n }\n }\n }\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,QAAQ;AAE1B,SAAS,cAAc,MAAyC;AACrE,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,UAAU,IAAI,iBAAiB,oCAAoC;AACzE,QAAM,UAAqC,CAAC;AAE5C,aAAW,UAAU,SAAS;AAC5B,QAAI;AACF,YAAM,OAAO,OAAO,aAAa,KAAK;AACtC,UAAI,CAAC,KAAM;AAEX,YAAM,SAAS,KAAK,MAAM,IAAI;AAE9B,UAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,gBAAQ,KAAK,GAAG,MAAM;AAAA,MACxB,WAAW,OAAO,QAAQ,KAAK,MAAM,QAAQ,OAAO,QAAQ,CAAC,GAAG;AAC9D,gBAAQ,KAAK,GAAG,OAAO,QAAQ,CAAC;AAAA,MAClC,OAAO;AACL,gBAAQ,KAAK,MAAM;AAAA,MACrB;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,KAAK,iCAAiC,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,IAClE;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,oBACd,cACA,QACyB;AACzB,MAAI,CAAC,OAAO,cAAc,aAAa,WAAW,EAAG,QAAO,CAAC;AAE7D,QAAM,SAAkC,CAAC;AACzC,QAAM,YAAY,cAAc,YAAY;AAE5C,aAAW,aAAa,OAAO,KAAK,OAAO,UAAU,GAAG;AACtD,QAAI,UAAU,SAAS,MAAM,QAAW;AACtC,aAAO,SAAS,IAAI,UAAU,SAAS;AAAA,IACzC;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,cACP,QACyB;AACzB,QAAM,OAAgC,CAAC;AAEvC,aAAW,SAAS,QAAQ;AAC1B,kBAAc,OAAO,IAAI;AAAA,EAC3B;AAEA,SAAO;AACT;AAEA,SAAS,cACP,KACA,QACM;AACN,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,QAAI,IAAI,WAAW,GAAG,EAAG;AAGzB,QAAI,EAAE,OAAO,SAAS;AACpB,UAAI,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK,GAAG;AACxE,sBAAc,OAAkC,MAAM;AAAA,MACxD,OAAO;AACL,eAAO,GAAG,IAAI;AAAA,MAChB;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lang-hints.d.ts","sourceRoot":"","sources":["../../src/extraction/lang-hints.ts"],"names":[],"mappings":"AAiBA,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,GAAG,IAAI,CAUtF"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
const ALIASES = {
|
|
2
|
+
typescript: "ts",
|
|
3
|
+
javascript: "js",
|
|
4
|
+
python: "py",
|
|
5
|
+
rust: "rs",
|
|
6
|
+
golang: "go",
|
|
7
|
+
shell: "sh"
|
|
8
|
+
};
|
|
9
|
+
const PATTERNS = [
|
|
10
|
+
/(?:^|\s)language-([a-z0-9+#-]+)/i,
|
|
11
|
+
/(?:^|\s)lang-([a-z0-9+#-]+)/i,
|
|
12
|
+
/(?:^|\s)hljs-([a-z0-9+#-]+)/i,
|
|
13
|
+
/(?:^|\s)prism-language-([a-z0-9+#-]+)/i,
|
|
14
|
+
/(?:^|\s)highlight-source-([a-z0-9+#-]+)/i
|
|
15
|
+
];
|
|
16
|
+
function detectCodeLanguage(classAttr) {
|
|
17
|
+
if (!classAttr) return null;
|
|
18
|
+
for (const re of PATTERNS) {
|
|
19
|
+
const m = classAttr.match(re);
|
|
20
|
+
if (m) {
|
|
21
|
+
const raw = m[1].toLowerCase();
|
|
22
|
+
return ALIASES[raw] ?? raw;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
export {
|
|
28
|
+
detectCodeLanguage
|
|
29
|
+
};
|
|
30
|
+
//# sourceMappingURL=lang-hints.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/lang-hints.ts"],"sourcesContent":["const ALIASES: Record<string, string> = {\n typescript: 'ts',\n javascript: 'js',\n python: 'py',\n rust: 'rs',\n golang: 'go',\n shell: 'sh',\n};\n\nconst PATTERNS = [\n /(?:^|\\s)language-([a-z0-9+#-]+)/i,\n /(?:^|\\s)lang-([a-z0-9+#-]+)/i,\n /(?:^|\\s)hljs-([a-z0-9+#-]+)/i,\n /(?:^|\\s)prism-language-([a-z0-9+#-]+)/i,\n /(?:^|\\s)highlight-source-([a-z0-9+#-]+)/i,\n];\n\nexport function detectCodeLanguage(classAttr: string | null | undefined): string | null {\n if (!classAttr) return null;\n for (const re of PATTERNS) {\n const m = classAttr.match(re);\n if (m) {\n const raw = m[1].toLowerCase();\n return ALIASES[raw] ?? raw;\n }\n }\n return null;\n}\n"],"mappings":"AAAA,MAAM,UAAkC;AAAA,EACtC,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,OAAO;AACT;AAEA,MAAM,WAAW;AAAA,EACf;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,mBAAmB,WAAqD;AACtF,MAAI,CAAC,UAAW,QAAO;AACvB,aAAW,MAAM,UAAU;AACzB,UAAM,IAAI,UAAU,MAAM,EAAE;AAC5B,QAAI,GAAG;AACL,YAAM,MAAM,EAAE,CAAC,EAAE,YAAY;AAC7B,aAAO,QAAQ,GAAG,KAAK;AAAA,IACzB;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { LLMExtractResult } from '../integrations/cloud/llm/types.js';
|
|
2
|
+
export interface LLMFallbackBudget {
|
|
3
|
+
remaining: number;
|
|
4
|
+
}
|
|
5
|
+
export interface LLMFallbackInput {
|
|
6
|
+
html: string;
|
|
7
|
+
jsonSchema: Record<string, unknown>;
|
|
8
|
+
partial: Record<string, unknown>;
|
|
9
|
+
missing: string[];
|
|
10
|
+
signal?: AbortSignal;
|
|
11
|
+
budget?: LLMFallbackBudget;
|
|
12
|
+
}
|
|
13
|
+
export interface LLMFallbackResult extends LLMExtractResult {
|
|
14
|
+
warnings: string[];
|
|
15
|
+
}
|
|
16
|
+
export declare function extractWithLLM(input: LLMFallbackInput): Promise<LLMFallbackResult>;
|
|
17
|
+
//# sourceMappingURL=llm-fallback.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-fallback.d.ts","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,gBAAgB,EAAe,MAAM,oCAAoC,CAAC;AAKxF,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAED,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAeD,wBAAsB,cAAc,CAClC,KAAK,EAAE,gBAAgB,GACtB,OAAO,CAAC,iBAAiB,CAAC,CAsF5B"}
|