@staticn0va/wigolo 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +146 -227
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +238 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +47 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +364 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +344 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/shutdown.d.ts +2 -0
- package/dist/cli/shutdown.d.ts.map +1 -0
- package/dist/cli/shutdown.js +26 -0
- package/dist/cli/shutdown.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +34 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/error-describe.d.ts +7 -0
- package/dist/fetch/error-describe.d.ts.map +1 -0
- package/dist/fetch/error-describe.js +37 -0
- package/dist/fetch/error-describe.js.map +1 -0
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +187 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +102 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
- package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/model-select.js +32 -0
- package/dist/integrations/cloud/llm/model-select.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/run.d.ts +27 -0
- package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/run.js +99 -0
- package/dist/integrations/cloud/llm/run.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
- package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/text-adapters.js +103 -0
- package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +17 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +41 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +6 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +246 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +19 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +62 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.d.ts.map +1 -1
- package/dist/search/filters.js +58 -54
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +94 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +177 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +174 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/markdown.ts"],"sourcesContent":["import TurndownService from 'turndown';\nimport { detectCodeLanguage } from './lang-hints.js';\n\nfunction longestBacktickRun(s: string): number {\n let max = 0;\n let cur = 0;\n for (let i = 0; i < s.length; i++) {\n if (s.charCodeAt(i) === 96) {\n cur++;\n if (cur > max) max = cur;\n } else {\n cur = 0;\n }\n }\n return max;\n}\n\nexport function buildTurndown(): TurndownService {\n const td = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });\n\n // Remove script and style tags entirely\n td.remove(['script', 'style']);\n\n // Custom rule: convert <table> to markdown table\n td.addRule('table', {\n filter: 'table',\n replacement(_content, node) {\n const el = node as Element;\n const rows: Element[] = Array.from(el.querySelectorAll('tr'));\n if (rows.length === 0) return '';\n\n const renderRow = (row: Element): string => {\n const cells = Array.from(row.querySelectorAll('th, td'));\n return '| ' + cells.map(c => c.textContent?.replace(/\\n/g, ' ').trim() ?? '').join(' | ') + ' |';\n };\n\n const headerRow = rows[0];\n const isHeaderRow = headerRow.querySelectorAll('th').length > 0;\n const headerCells = Array.from(headerRow.querySelectorAll('th, td'));\n const separator = '| ' + headerCells.map(() => '---').join(' | ') + ' |';\n\n if (isHeaderRow) {\n const bodyRows = rows.slice(1);\n const lines = [renderRow(headerRow), separator, ...bodyRows.map(renderRow)];\n return '\\n\\n' + lines.join('\\n') + '\\n\\n';\n }\n\n const lines = [renderRow(headerRow), separator, ...rows.slice(1).map(renderRow)];\n return '\\n\\n' + lines.join('\\n') + '\\n\\n';\n },\n });\n\n // Suppress thead/tbody/tr/th/td individually since table rule handles the whole node\n td.addRule('tableCell', {\n filter: ['thead', 'tbody', 'tfoot', 'tr', 'th', 'td'],\n replacement(content) {\n return content;\n },\n });\n\n td.addRule('codeBlockLang', {\n filter(node) {\n return node.nodeName === 'PRE' && (node as Element).querySelector('code') !== null;\n },\n replacement(_content, node) {\n const pre = node as Element;\n const code = pre.querySelector('code');\n const cls = code?.getAttribute('class') ?? pre.getAttribute('class') ?? '';\n const lang = detectCodeLanguage(cls);\n const body = code?.textContent ?? pre.textContent ?? '';\n const fence = '`'.repeat(Math.max(3, longestBacktickRun(body) + 1));\n return `\\n\\n${fence}${lang ?? ''}\\n${body.replace(/\\n+$/, '')}\\n${fence}\\n\\n`;\n },\n });\n\n return td;\n}\n\nconst turndown = buildTurndown();\n\nexport function htmlToMarkdown(html: string): string {\n if (!html) return '';\n return turndown.turndown(html);\n}\n\nexport interface Heading {\n level: number;\n text: string;\n lineIndex: number;\n}\n\nexport function parseHeadings(lines: string[]): Heading[] {\n const headings: Heading[] = [];\n for (let i = 0; i < lines.length; i++) {\n const match = lines[i].match(/^(#{1,6})\\s+(.+)/);\n if (match) {\n headings.push({ level: match[1].length, text: match[2].trim(), lineIndex: i });\n }\n }\n return headings;\n}\n\n// Prefix-sum array of char offsets: offsets[i] is the index in\n// `lines.join('\\n')` at which lines[i] begins.\nexport function lineStartCharOffsets(lines: string[]): number[] {\n const offsets = new Array<number>(lines.length);\n let acc = 0;\n for (let i = 0; i < lines.length; i++) {\n offsets[i] = acc;\n acc += lines[i].length + 1; // +1 for the '\\n' separator\n }\n return offsets;\n}\n\nfunction extractFromHeading(lines: string[], headings: Heading[], headingIdx: number): string {\n const heading = headings[headingIdx];\n const start = heading.lineIndex;\n\n // Find the next heading of equal or higher level (lower or equal # count)\n let end = lines.length;\n for (let i = headingIdx + 1; i < headings.length; i++) {\n if (headings[i].level <= heading.level) {\n end = headings[i].lineIndex;\n break;\n }\n }\n\n return lines.slice(start, end).join('\\n');\n}\n\nexport function extractSection(\n markdown: string,\n section: string,\n sectionIndex = 0,\n): { content: string; matched: boolean } {\n const lines = markdown.split('\\n');\n const headings = parseHeadings(lines);\n\n if (headings.length === 0) return { content: markdown, matched: false };\n\n const lower = section.toLowerCase();\n const indexed = headings.map((h, i) => ({ h, i }));\n\n // Collect exact matches first\n const exactMatches = indexed.filter(({ h }) => h.text.toLowerCase() === lower);\n\n // If exact matches satisfy the requested index, use them\n if (exactMatches.length > 0 && sectionIndex < exactMatches.length) {\n const { i } = exactMatches[sectionIndex];\n return { content: extractFromHeading(lines, headings, i), matched: true };\n }\n\n // Fall back to substring matches (includes exact headings and partial ones)\n const substringMatches = indexed.filter(({ h }) => h.text.toLowerCase().includes(lower));\n\n if (substringMatches.length === 0 || sectionIndex >= substringMatches.length) {\n return { content: markdown, matched: false };\n }\n\n const { i } = substringMatches[sectionIndex];\n return { content: extractFromHeading(lines, headings, i), matched: true };\n}\n\nexport function extractLinksAndImages(markdown: string): { links: string[]; images: string[] } {\n const imagePattern = /!\\[[^\\]]*\\]\\(([^)]+)\\)/g;\n const linkPattern = /(?<!!)\\[[^\\]]*\\]\\(([^)]+)\\)/g;\n\n const images = new Set<string>();\n const links = new Set<string>();\n\n let match: RegExpExecArray | null;\n\n // Extract images first\n while ((match = imagePattern.exec(markdown)) !== null) {\n images.add(match[1]);\n }\n\n // Extract links (non-image)\n while ((match = linkPattern.exec(markdown)) !== null) {\n links.add(match[1]);\n }\n\n return { links: Array.from(links), images: Array.from(images) };\n}\n\nconst DECORATIVE_URL_MARKERS = [\n 'avatar',\n 'icon',\n 'logo',\n 'badge',\n 'shield',\n 'tracking',\n 'pixel',\n 'sprite',\n 'emoji',\n 'favicon',\n];\n\n// Drop `` tokens that look decorative. Heuristic only -- keep\n// images that have alt text unless the URL clearly marks them decorative.\n// Tracking pixels (tiny data-URI gifs) and empty-alt icons are removed.\nexport function filterDecorativeImages(markdown: string): string {\n if (!markdown) return markdown;\n return markdown.replace(/!\\[([^\\]]*)\\]\\(([^)]+)\\)/g, (match, alt: string, src: string) => {\n const trimmedAlt = alt.trim();\n const lowerSrc = src.toLowerCase();\n\n // Tiny animated-GIF tracking pixel / 1x1 beacons\n if (lowerSrc.startsWith('data:image/gif;base64,')) return '';\n\n // Inline SVG icon data URIs (short = tiny, likely decorative glyph)\n if (lowerSrc.startsWith('data:image/svg+xml') && src.length < 200) return '';\n\n // URL marks it as decorative regardless of alt\n for (const marker of DECORATIVE_URL_MARKERS) {\n if (lowerSrc.includes(marker)) return '';\n }\n\n // No alt text + no title = decorative\n if (!trimmedAlt) return '';\n\n return match;\n });\n}\n\n// Resolve relative `[text](path)` and `` targets against baseUrl.\n// Leaves absolute URLs, mailto:, tel:, javascript:, and #fragments untouched.\nexport function resolveRelativeUrls(markdown: string, baseUrl: string): string {\n if (!markdown || !baseUrl) return markdown;\n\n const rewrite = (path: string): string => {\n const trimmed = path.trim();\n if (!trimmed) return path;\n if (/^(?:https?:|mailto:|tel:|javascript:|data:)/i.test(trimmed)) return path;\n if (trimmed.startsWith('#')) {\n try {\n return new URL(trimmed, baseUrl).href;\n } catch {\n return path;\n }\n }\n if (trimmed.startsWith('//')) {\n try {\n const base = new URL(baseUrl);\n return `${base.protocol}${trimmed}`;\n } catch {\n return path;\n }\n }\n try {\n return new URL(trimmed, baseUrl).href;\n } catch {\n return path;\n }\n };\n\n // Image links first so the shared link regex does not rewrite them twice.\n let result = markdown.replace(\n /(!\\[[^\\]]*\\]\\()([^)\\s]+)(\\s*(?:\"[^\"]*\")?\\))/g,\n (_m, open, path, close) => `${open}${rewrite(path)}${close}`,\n );\n\n result = result.replace(\n /(^|[^!])(\\[[^\\]]*\\]\\()([^)\\s]+)(\\s*(?:\"[^\"]*\")?\\))/g,\n (_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`,\n );\n\n return result;\n}\n"],"mappings":"AAAA,OAAO,qBAAqB;AAC5B,SAAS,0BAA0B;AAEnC,SAAS,mBAAmB,GAAmB;AAC7C,MAAI,MAAM;AACV,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,QAAI,EAAE,WAAW,CAAC,MAAM,IAAI;AAC1B;AACA,UAAI,MAAM,IAAK,OAAM;AAAA,IACvB,OAAO;AACL,YAAM;AAAA,IACR;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gBAAiC;AAC/C,QAAM,KAAK,IAAI,gBAAgB,EAAE,cAAc,OAAO,gBAAgB,SAAS,CAAC;AAGhF,KAAG,OAAO,CAAC,UAAU,OAAO,CAAC;AAG7B,KAAG,QAAQ,SAAS;AAAA,IAClB,QAAQ;AAAA,IACR,YAAY,UAAU,MAAM;AAC1B,YAAM,KAAK;AACX,YAAM,OAAkB,MAAM,KAAK,GAAG,iBAAiB,IAAI,CAAC;AAC5D,UAAI,KAAK,WAAW,EAAG,QAAO;AAE9B,YAAM,YAAY,CAAC,QAAyB;AAC1C,cAAM,QAAQ,MAAM,KAAK,IAAI,iBAAiB,QAAQ,CAAC;AACvD,eAAO,OAAO,MAAM,IAAI,OAAK,EAAE,aAAa,QAAQ,OAAO,GAAG,EAAE,KAAK,KAAK,EAAE,EAAE,KAAK,KAAK,IAAI;AAAA,MAC9F;AAEA,YAAM,YAAY,KAAK,CAAC;AACxB,YAAM,cAAc,UAAU,iBAAiB,IAAI,EAAE,SAAS;AAC9D,YAAM,cAAc,MAAM,KAAK,UAAU,iBAAiB,QAAQ,CAAC;AACnE,YAAM,YAAY,OAAO,YAAY,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,IAAI;AAEpE,UAAI,aAAa;AACf,cAAM,WAAW,KAAK,MAAM,CAAC;AAC7B,cAAMA,SAAQ,CAAC,UAAU,SAAS,GAAG,WAAW,GAAG,SAAS,IAAI,SAAS,CAAC;AAC1E,eAAO,SAASA,OAAM,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,QAAQ,CAAC,UAAU,SAAS,GAAG,WAAW,GAAG,KAAK,MAAM,CAAC,EAAE,IAAI,SAAS,CAAC;AAC/E,aAAO,SAAS,MAAM,KAAK,IAAI,IAAI;AAAA,IACrC;AAAA,EACF,CAAC;AAGD,KAAG,QAAQ,aAAa;AAAA,IACtB,QAAQ,CAAC,SAAS,SAAS,SAAS,MAAM,MAAM,IAAI;AAAA,IACpD,YAAY,SAAS;AACnB,aAAO;AAAA,IACT;AAAA,EACF,CAAC;AAED,KAAG,QAAQ,iBAAiB;AAAA,IAC1B,OAAO,MAAM;AACX,aAAO,KAAK,aAAa,SAAU,KAAiB,cAAc,MAAM,MAAM;AAAA,IAChF;AAAA,IACA,YAAY,UAAU,MAAM;AAC1B,YAAM,MAAM;AACZ,YAAM,OAAO,IAAI,cAAc,MAAM;AACrC,YAAM,MAAM,MAAM,aAAa,OAAO,KAAK,IAAI,aAAa,OAAO,KAAK;AACxE,YAAM,OAAO,mBAAmB,GAAG;AACnC,YAAM,OAAO,MAAM,eAAe,IAAI,eAAe;AACrD,YAAM,QAAQ,IAAI,OAAO,KAAK,IAAI,GAAG,mBAAmB,IAAI,IAAI,CAAC,CAAC;AAClE,aAAO;AAAA;AAAA,EAAO,KAAK,GAAG,QAAQ,EAAE;AAAA,EAAK,KAAK,QAAQ,QAAQ,EAAE,CAAC;AAAA,EAAK,KAAK;AAAA;AAAA;AAAA,IACzE;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,MAAM,WAAW,cAAc;AAExB,SAAS,eAAe,MAAsB;AACnD,MAAI,CAAC,KAAM,QAAO;AAClB,SAAO,SAAS,SAAS,IAAI;AAC/B;AAQO,SAAS,cAAc,OAA4B;AACxD,QAAM,WAAsB,CAAC;AAC7B,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,QAAQ,MAAM,CAAC,EAAE,MAAM,kBAAkB;AAC/C,QAAI,OAAO;AACT,eAAS,KAAK,EAAE,OAAO,MAAM,CAAC,EAAE,QAAQ,MAAM,MAAM,CAAC,EAAE,KAAK,GAAG,WAAW,EAAE,CAAC;AAAA,IAC/E;AAAA,EACF;AACA,SAAO;AACT;AAIO,SAAS,qBAAqB,OAA2B;AAC9D,QAAM,UAAU,IAAI,MAAc,MAAM,MAAM;AAC9C,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAQ,CAAC,IAAI;AACb,WAAO,MAAM,CAAC,EAAE,SAAS;AAAA,EAC3B;AACA,SAAO;AACT;AAEA,SAAS,mBAAmB,OAAiB,UAAqB,YAA4B;AAC5F,QAAM,UAAU,SAAS,UAAU;AACnC,QAAM,QAAQ,QAAQ;AAGtB,MAAI,MAAM,MAAM;AAChB,WAAS,IAAI,aAAa,GAAG,IAAI,SAAS,QAAQ,KAAK;AACrD,QAAI,SAAS,CAAC,EAAE,SAAS,QAAQ,OAAO;AACtC,YAAM,SAAS,CAAC,EAAE;AAClB;AAAA,IACF;AAAA,EACF;AAEA,SAAO,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,IAAI;AAC1C;AAEO,SAAS,eACd,UACA,SACA,eAAe,GACwB;AACvC,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,QAAM,WAAW,cAAc,KAAK;AAEpC,MAAI,SAAS,WAAW,EAAG,QAAO,EAAE,SAAS,UAAU,SAAS,MAAM;AAEtE,QAAM,QAAQ,QAAQ,YAAY;AAClC,QAAM,UAAU,SAAS,IAAI,CAAC,GAAGC,QAAO,EAAE,GAAG,GAAAA,GAAE,EAAE;AAGjD,QAAM,eAAe,QAAQ,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE,KAAK,YAAY,MAAM,KAAK;AAG7E,MAAI,aAAa,SAAS,KAAK,eAAe,aAAa,QAAQ;AACjE,UAAM,EAAE,GAAAA,GAAE,IAAI,aAAa,YAAY;AACvC,WAAO,EAAE,SAAS,mBAAmB,OAAO,UAAUA,EAAC,GAAG,SAAS,KAAK;AAAA,EAC1E;AAGA,QAAM,mBAAmB,QAAQ,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE,KAAK,YAAY,EAAE,SAAS,KAAK,CAAC;AAEvF,MAAI,iBAAiB,WAAW,KAAK,gBAAgB,iBAAiB,QAAQ;AAC5E,WAAO,EAAE,SAAS,UAAU,SAAS,MAAM;AAAA,EAC7C;AAEA,QAAM,EAAE,EAAE,IAAI,iBAAiB,YAAY;AAC3C,SAAO,EAAE,SAAS,mBAAmB,OAAO,UAAU,CAAC,GAAG,SAAS,KAAK;AAC1E;AAEO,SAAS,sBAAsB,UAAyD;AAC7F,QAAM,eAAe;AACrB,QAAM,cAAc;AAEpB,QAAM,SAAS,oBAAI,IAAY;AAC/B,QAAM,QAAQ,oBAAI,IAAY;AAE9B,MAAI;AAGJ,UAAQ,QAAQ,aAAa,KAAK,QAAQ,OAAO,MAAM;AACrD,WAAO,IAAI,MAAM,CAAC,CAAC;AAAA,EACrB;AAGA,UAAQ,QAAQ,YAAY,KAAK,QAAQ,OAAO,MAAM;AACpD,UAAM,IAAI,MAAM,CAAC,CAAC;AAAA,EACpB;AAEA,SAAO,EAAE,OAAO,MAAM,KAAK,KAAK,GAAG,QAAQ,MAAM,KAAK,MAAM,EAAE;AAChE;AAEA,MAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAKO,SAAS,uBAAuB,UAA0B;AAC/D,MAAI,CAAC,SAAU,QAAO;AACtB,SAAO,SAAS,QAAQ,6BAA6B,CAAC,OAAO,KAAa,QAAgB;AACxF,UAAM,aAAa,IAAI,KAAK;AAC5B,UAAM,WAAW,IAAI,YAAY;AAGjC,QAAI,SAAS,WAAW,wBAAwB,EAAG,QAAO;AAG1D,QAAI,SAAS,WAAW,oBAAoB,KAAK,IAAI,SAAS,IAAK,QAAO;AAG1E,eAAW,UAAU,wBAAwB;AAC3C,UAAI,SAAS,SAAS,MAAM,EAAG,QAAO;AAAA,IACxC;AAGA,QAAI,CAAC,WAAY,QAAO;AAExB,WAAO;AAAA,EACT,CAAC;AACH;AAIO,SAAS,oBAAoB,UAAkB,SAAyB;AAC7E,MAAI,CAAC,YAAY,CAAC,QAAS,QAAO;AAElC,QAAM,UAAU,CAAC,SAAyB;AACxC,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,QAAS,QAAO;AACrB,QAAI,+CAA+C,KAAK,OAAO,EAAG,QAAO;AACzE,QAAI,QAAQ,WAAW,GAAG,GAAG;AAC3B,UAAI;AACF,eAAO,IAAI,IAAI,SAAS,OAAO,EAAE;AAAA,MACnC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AACA,QAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAI;AACF,cAAM,OAAO,IAAI,IAAI,OAAO;AAC5B,eAAO,GAAG,KAAK,QAAQ,GAAG,OAAO;AAAA,MACnC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AACA,QAAI;AACF,aAAO,IAAI,IAAI,SAAS,OAAO,EAAE;AAAA,IACnC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,SAAS,SAAS;AAAA,IACpB;AAAA,IACA,CAAC,IAAI,MAAM,MAAM,UAAU,GAAG,IAAI,GAAG,QAAQ,IAAI,CAAC,GAAG,KAAK;AAAA,EAC5D;AAEA,WAAS,OAAO;AAAA,IACd;AAAA,IACA,CAAC,IAAI,KAAK,MAAM,MAAM,UAAU,GAAG,GAAG,GAAG,IAAI,GAAG,QAAQ,IAAI,CAAC,GAAG,KAAK;AAAA,EACvE;AAEA,SAAO;AACT;","names":["lines","i"]}
|
|
@@ -7,5 +7,13 @@ export interface ExtractionOptions {
|
|
|
7
7
|
pdfBuffer?: Buffer;
|
|
8
8
|
}
|
|
9
9
|
export declare function registerExtractor(extractor: Extractor): void;
|
|
10
|
+
/**
|
|
11
|
+
* @deprecated Use `getExtractProvider().extract(...)` from
|
|
12
|
+
* `src/providers/extract-provider.ts`. This facade remains for backwards
|
|
13
|
+
* compatibility with existing test mocks and benchmark runners that import
|
|
14
|
+
* `extractContent` directly. Will be removed after the test-mock migration.
|
|
15
|
+
*/
|
|
10
16
|
export declare function extractContent(html: string, url: string, options?: ExtractionOptions): Promise<ExtractionResult>;
|
|
17
|
+
export declare function mergeMetadata(base: ExtractionResult['metadata'], html: string): ExtractionResult['metadata'];
|
|
18
|
+
export declare function applyPostProcessing(result: ExtractionResult, url: string, html: string, options: ExtractionOptions): ExtractionResult;
|
|
11
19
|
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAKD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAG3B;AAED,wBAAgB,aAAa,CAC3B,IAAI,EAAE,gBAAgB,CAAC,UAAU,CAAC,EAClC,IAAI,EAAE,MAAM,GACX,gBAAgB,CAAC,UAAU,CAAC,CAkB9B;AAED,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,gBAAgB,EACxB,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,iBAAiB,GACzB,gBAAgB,CAsBlB"}
|
|
@@ -1,95 +1,61 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
stackoverflowExtractor,
|
|
15
|
-
mdnExtractor,
|
|
16
|
-
docsGenericExtractor,
|
|
17
|
-
];
|
|
18
|
-
export function registerExtractor(extractor) {
|
|
19
|
-
siteExtractors.push(extractor);
|
|
1
|
+
import {
|
|
2
|
+
extractSection,
|
|
3
|
+
extractLinksAndImages,
|
|
4
|
+
filterDecorativeImages,
|
|
5
|
+
resolveRelativeUrls
|
|
6
|
+
} from "./markdown.js";
|
|
7
|
+
import { extractMetadata } from "./extract.js";
|
|
8
|
+
import { stripBoilerplateMarkdown } from "./boilerplate.js";
|
|
9
|
+
import { sanitizeExtractedMarkdown } from "./markdown-sanitize.js";
|
|
10
|
+
import { registerSiteExtractor } from "./v1/site-extractors.js";
|
|
11
|
+
import { getExtractProvider } from "../providers/extract-provider.js";
|
|
12
|
+
function registerExtractor(extractor) {
|
|
13
|
+
registerSiteExtractor(extractor);
|
|
20
14
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
let pdfText = '';
|
|
25
|
-
if (options.pdfBuffer) {
|
|
26
|
-
try {
|
|
27
|
-
const pdfParse = (await import('pdf-parse')).default;
|
|
28
|
-
const parsed = await pdfParse(options.pdfBuffer);
|
|
29
|
-
pdfText = parsed.text ?? '';
|
|
30
|
-
}
|
|
31
|
-
catch (err) {
|
|
32
|
-
log.warn('pdf-parse failed', { url, error: String(err) });
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
result = {
|
|
36
|
-
title: '',
|
|
37
|
-
markdown: pdfText,
|
|
38
|
-
metadata: {},
|
|
39
|
-
links: [],
|
|
40
|
-
images: [],
|
|
41
|
-
extractor: 'turndown',
|
|
42
|
-
};
|
|
43
|
-
return applyPostProcessing(result, options);
|
|
44
|
-
}
|
|
45
|
-
const siteExtractor = siteExtractors.find((e) => e.canHandle(url, html));
|
|
46
|
-
if (siteExtractor) {
|
|
47
|
-
const extracted = siteExtractor.extract(html, url);
|
|
48
|
-
if (extracted) {
|
|
49
|
-
result = extracted;
|
|
50
|
-
return applyPostProcessing(result, options);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
result = await defuddleExtract(html, url);
|
|
54
|
-
if (!result) {
|
|
55
|
-
const config = getConfig();
|
|
56
|
-
if (config.trafilatura !== 'never') {
|
|
57
|
-
const trafAvailable = await isTrafilaturaAvailable();
|
|
58
|
-
if (trafAvailable) {
|
|
59
|
-
result = await trafilaturaExtract(html, url);
|
|
60
|
-
if (result) {
|
|
61
|
-
log.info('Trafilatura extraction succeeded', { url, chars: result.markdown.length });
|
|
62
|
-
return applyPostProcessing(result, options);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
if (!result) {
|
|
68
|
-
result = readabilityExtract(html, url);
|
|
69
|
-
}
|
|
70
|
-
if (!result) {
|
|
71
|
-
const markdown = htmlToMarkdown(html);
|
|
72
|
-
result = {
|
|
73
|
-
title: '',
|
|
74
|
-
markdown,
|
|
75
|
-
metadata: {},
|
|
76
|
-
links: [],
|
|
77
|
-
images: [],
|
|
78
|
-
extractor: 'turndown',
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
return applyPostProcessing(result, options);
|
|
15
|
+
async function extractContent(html, url, options = {}) {
|
|
16
|
+
const provider = await getExtractProvider();
|
|
17
|
+
return provider.extract(html, url, options);
|
|
82
18
|
}
|
|
83
|
-
function
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
19
|
+
function mergeMetadata(base, html) {
|
|
20
|
+
try {
|
|
21
|
+
const meta = extractMetadata(html);
|
|
22
|
+
return {
|
|
23
|
+
...meta,
|
|
24
|
+
// Extractor-provided fields win when set (they already inspected the article body).
|
|
25
|
+
description: base.description || meta.description,
|
|
26
|
+
author: base.author || meta.author,
|
|
27
|
+
date: base.date || meta.date,
|
|
28
|
+
language: base.language,
|
|
29
|
+
og_image: base.og_image ?? meta.og_image,
|
|
30
|
+
og_type: base.og_type ?? meta.og_type,
|
|
31
|
+
canonical_url: base.canonical_url ?? meta.canonical_url,
|
|
32
|
+
keywords: base.keywords ?? meta.keywords
|
|
33
|
+
};
|
|
34
|
+
} catch {
|
|
35
|
+
return base;
|
|
36
|
+
}
|
|
94
37
|
}
|
|
38
|
+
function applyPostProcessing(result, url, html, options) {
|
|
39
|
+
let markdown = result.markdown;
|
|
40
|
+
markdown = resolveRelativeUrls(markdown, url);
|
|
41
|
+
markdown = stripBoilerplateMarkdown(markdown);
|
|
42
|
+
markdown = filterDecorativeImages(markdown);
|
|
43
|
+
markdown = sanitizeExtractedMarkdown(markdown);
|
|
44
|
+
if (options.section) {
|
|
45
|
+
const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);
|
|
46
|
+
markdown = content;
|
|
47
|
+
}
|
|
48
|
+
const { links, images } = extractLinksAndImages(markdown);
|
|
49
|
+
const metadata = mergeMetadata(result.metadata, html);
|
|
50
|
+
if (options.maxChars && markdown.length > options.maxChars) {
|
|
51
|
+
markdown = markdown.slice(0, options.maxChars);
|
|
52
|
+
}
|
|
53
|
+
return { ...result, markdown, links, images, metadata };
|
|
54
|
+
}
|
|
55
|
+
export {
|
|
56
|
+
applyPostProcessing,
|
|
57
|
+
extractContent,
|
|
58
|
+
mergeMetadata,
|
|
59
|
+
registerExtractor
|
|
60
|
+
};
|
|
95
61
|
//# sourceMappingURL=pipeline.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/pipeline.ts"],"sourcesContent":["import {\n extractSection,\n extractLinksAndImages,\n filterDecorativeImages,\n resolveRelativeUrls,\n} from './markdown.js';\nimport { extractMetadata } from './extract.js';\nimport { stripBoilerplateMarkdown } from './boilerplate.js';\nimport { sanitizeExtractedMarkdown } from './markdown-sanitize.js';\nimport type { ExtractionResult, Extractor } from '../types.js';\nimport { registerSiteExtractor } from './v1/site-extractors.js';\nimport { getExtractProvider } from '../providers/extract-provider.js';\n\nexport interface ExtractionOptions {\n maxChars?: number;\n section?: string;\n sectionIndex?: number;\n contentType?: string;\n pdfBuffer?: Buffer;\n}\n\n// Plugin entry point — back-compat alias. `src/server.ts` imports\n// `registerExtractor` from here. The registry lives in v1/site-extractors.ts\n// so both the facade and the v1 router see the same plugin-registered extractors.\nexport function registerExtractor(extractor: Extractor): void {\n registerSiteExtractor(extractor);\n}\n\n/**\n * @deprecated Use `getExtractProvider().extract(...)` from\n * `src/providers/extract-provider.ts`. This facade remains for backwards\n * compatibility with existing test mocks and benchmark runners that import\n * `extractContent` directly. Will be removed after the test-mock migration.\n */\nexport async function extractContent(\n html: string,\n url: string,\n options: ExtractionOptions = {},\n): Promise<ExtractionResult> {\n const provider = await getExtractProvider();\n return provider.extract(html, url, options);\n}\n\nexport function mergeMetadata(\n base: ExtractionResult['metadata'],\n html: string,\n): ExtractionResult['metadata'] {\n try {\n const meta = extractMetadata(html);\n return {\n ...meta,\n // Extractor-provided fields win when set (they already inspected the article body).\n description: base.description || meta.description,\n author: base.author || meta.author,\n date: base.date || meta.date,\n language: base.language,\n og_image: base.og_image ?? meta.og_image,\n og_type: base.og_type ?? meta.og_type,\n canonical_url: base.canonical_url ?? meta.canonical_url,\n keywords: base.keywords ?? meta.keywords,\n };\n } catch {\n return base;\n }\n}\n\nexport function applyPostProcessing(\n result: ExtractionResult,\n url: string,\n html: string,\n options: ExtractionOptions,\n): ExtractionResult {\n let markdown = result.markdown;\n\n // Resolve relative links/images before slicing so downstream consumers get absolute URLs.\n markdown = resolveRelativeUrls(markdown, url);\n markdown = stripBoilerplateMarkdown(markdown);\n markdown = filterDecorativeImages(markdown);\n markdown = sanitizeExtractedMarkdown(markdown);\n\n if (options.section) {\n const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);\n markdown = content;\n }\n\n const { links, images } = extractLinksAndImages(markdown);\n const metadata = mergeMetadata(result.metadata, html);\n\n if (options.maxChars && markdown.length > options.maxChars) {\n markdown = markdown.slice(0, options.maxChars);\n }\n\n return { ...result, markdown, links, images, metadata };\n}\n"],"mappings":"AAAA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,uBAAuB;AAChC,SAAS,gCAAgC;AACzC,SAAS,iCAAiC;AAE1C,SAAS,6BAA6B;AACtC,SAAS,0BAA0B;AAa5B,SAAS,kBAAkB,WAA4B;AAC5D,wBAAsB,SAAS;AACjC;AAQA,eAAsB,eACpB,MACA,KACA,UAA6B,CAAC,GACH;AAC3B,QAAM,WAAW,MAAM,mBAAmB;AAC1C,SAAO,SAAS,QAAQ,MAAM,KAAK,OAAO;AAC5C;AAEO,SAAS,cACd,MACA,MAC8B;AAC9B,MAAI;AACF,UAAM,OAAO,gBAAgB,IAAI;AACjC,WAAO;AAAA,MACL,GAAG;AAAA;AAAA,MAEH,aAAa,KAAK,eAAe,KAAK;AAAA,MACtC,QAAQ,KAAK,UAAU,KAAK;AAAA,MAC5B,MAAM,KAAK,QAAQ,KAAK;AAAA,MACxB,UAAU,KAAK;AAAA,MACf,UAAU,KAAK,YAAY,KAAK;AAAA,MAChC,SAAS,KAAK,WAAW,KAAK;AAAA,MAC9B,eAAe,KAAK,iBAAiB,KAAK;AAAA,MAC1C,UAAU,KAAK,YAAY,KAAK;AAAA,IAClC;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBACd,QACA,KACA,MACA,SACkB;AAClB,MAAI,WAAW,OAAO;AAGtB,aAAW,oBAAoB,UAAU,GAAG;AAC5C,aAAW,yBAAyB,QAAQ;AAC5C,aAAW,uBAAuB,QAAQ;AAC1C,aAAW,0BAA0B,QAAQ;AAE7C,MAAI,QAAQ,SAAS;AACnB,UAAM,EAAE,QAAQ,IAAI,eAAe,UAAU,QAAQ,SAAS,QAAQ,gBAAgB,CAAC;AACvF,eAAW;AAAA,EACb;AAEA,QAAM,EAAE,OAAO,OAAO,IAAI,sBAAsB,QAAQ;AACxD,QAAM,WAAW,cAAc,OAAO,UAAU,IAAI;AAEpD,MAAI,QAAQ,YAAY,SAAS,SAAS,QAAQ,UAAU;AAC1D,eAAW,SAAS,MAAM,GAAG,QAAQ,QAAQ;AAAA,EAC/C;AAEA,SAAO,EAAE,GAAG,QAAQ,UAAU,OAAO,QAAQ,SAAS;AACxD;","names":[]}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { ExtractionResult } from '../types.js';
|
|
2
|
-
export declare function readabilityExtract(html: string,
|
|
2
|
+
export declare function readabilityExtract(html: string, _url: string): ExtractionResult | null;
|
|
3
3
|
//# sourceMappingURL=readability.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAyBtF"}
|
|
@@ -1,32 +1,31 @@
|
|
|
1
|
-
import { Readability } from
|
|
2
|
-
import { parseHTML } from
|
|
3
|
-
import
|
|
1
|
+
import { Readability } from "@mozilla/readability";
|
|
2
|
+
import { parseHTML } from "linkedom";
|
|
3
|
+
import { htmlToMarkdown } from "./markdown.js";
|
|
4
4
|
const MIN_CONTENT_THRESHOLD = 100;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
catch {
|
|
29
|
-
return null;
|
|
30
|
-
}
|
|
5
|
+
function readabilityExtract(html, _url) {
|
|
6
|
+
try {
|
|
7
|
+
const { document } = parseHTML(html);
|
|
8
|
+
const reader = new Readability(document);
|
|
9
|
+
const article = reader.parse();
|
|
10
|
+
if (!article || !article.content) return null;
|
|
11
|
+
const markdown = htmlToMarkdown(article.content);
|
|
12
|
+
if (markdown.length < MIN_CONTENT_THRESHOLD) return null;
|
|
13
|
+
return {
|
|
14
|
+
title: article.title ?? "",
|
|
15
|
+
markdown,
|
|
16
|
+
metadata: {
|
|
17
|
+
author: article.byline || void 0,
|
|
18
|
+
language: article.lang || void 0
|
|
19
|
+
},
|
|
20
|
+
links: [],
|
|
21
|
+
images: [],
|
|
22
|
+
extractor: "readability"
|
|
23
|
+
};
|
|
24
|
+
} catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
31
27
|
}
|
|
28
|
+
export {
|
|
29
|
+
readabilityExtract
|
|
30
|
+
};
|
|
32
31
|
//# sourceMappingURL=readability.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/readability.ts"],"sourcesContent":["import { Readability } from '@mozilla/readability';\nimport { parseHTML } from 'linkedom';\nimport { htmlToMarkdown } from './markdown.js';\nimport type { ExtractionResult } from '../types.js';\n\nconst MIN_CONTENT_THRESHOLD = 100;\n\nexport function readabilityExtract(html: string, _url: string): ExtractionResult | null {\n try {\n const { document } = parseHTML(html);\n const reader = new Readability(document as any);\n const article = reader.parse();\n if (!article || !article.content) return null;\n\n const markdown = htmlToMarkdown(article.content);\n\n if (markdown.length < MIN_CONTENT_THRESHOLD) return null;\n\n return {\n title: article.title ?? '',\n markdown,\n metadata: {\n author: article.byline || undefined,\n language: article.lang || undefined,\n },\n links: [],\n images: [],\n extractor: 'readability',\n };\n } catch {\n return null;\n }\n}\n"],"mappings":"AAAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AAC1B,SAAS,sBAAsB;AAG/B,MAAM,wBAAwB;AAEvB,SAAS,mBAAmB,MAAc,MAAuC;AACtF,MAAI;AACF,UAAM,EAAE,SAAS,IAAI,UAAU,IAAI;AACnC,UAAM,SAAS,IAAI,YAAY,QAAe;AAC9C,UAAM,UAAU,OAAO,MAAM;AAC7B,QAAI,CAAC,WAAW,CAAC,QAAQ,QAAS,QAAO;AAEzC,UAAM,WAAW,eAAe,QAAQ,OAAO;AAE/C,QAAI,SAAS,SAAS,sBAAuB,QAAO;AAEpD,WAAO;AAAA,MACL,OAAO,QAAQ,SAAS;AAAA,MACxB;AAAA,MACA,UAAU;AAAA,QACR,QAAQ,QAAQ,UAAU;AAAA,QAC1B,UAAU,QAAQ,QAAQ;AAAA,MAC5B;AAAA,MACA,OAAO,CAAC;AAAA,MACR,QAAQ,CAAC;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
@@ -1,7 +1,19 @@
|
|
|
1
|
+
import { type LLMFallbackBudget } from './llm-fallback.js';
|
|
2
|
+
import type { SchemaExtractionResult } from '../types.js';
|
|
1
3
|
export interface JsonSchema {
|
|
2
4
|
type?: string;
|
|
3
5
|
properties?: Record<string, JsonSchema>;
|
|
4
6
|
items?: JsonSchema;
|
|
7
|
+
required?: string[];
|
|
8
|
+
}
|
|
9
|
+
export interface SchemaExtractionOpts {
|
|
10
|
+
signal?: AbortSignal;
|
|
11
|
+
budget?: LLMFallbackBudget;
|
|
5
12
|
}
|
|
6
13
|
export declare function extractWithSchema(html: string, schema: JsonSchema): Record<string, unknown>;
|
|
14
|
+
export declare function extractWithSchemaDetailed(html: string, schema: JsonSchema): SchemaExtractionResult;
|
|
15
|
+
export interface SchemaExtractionAsyncResult extends SchemaExtractionResult {
|
|
16
|
+
warnings: string[];
|
|
17
|
+
}
|
|
18
|
+
export declare function extractWithSchemaDetailedAsync(html: string, schema: JsonSchema, opts?: SchemaExtractionOpts): Promise<SchemaExtractionAsyncResult>;
|
|
7
19
|
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAEA,OAAO,EAAkB,KAAK,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,KAAK,EAEV,sBAAsB,EAEvB,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACxC,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,oBAAoB;IACnC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAQD,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAEzB;AAED,wBAAgB,yBAAyB,CACvC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,GACjB,sBAAsB,CAsCxB;AAED,MAAM,WAAW,2BAA4B,SAAQ,sBAAsB;IACzE,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAsB,8BAA8B,CAClD,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,EAClB,IAAI,GAAE,oBAAyB,GAC9B,OAAO,CAAC,2BAA2B,CAAC,CA+BtC"}
|
|
@@ -1,86 +1,149 @@
|
|
|
1
|
-
import { parseHTML } from
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { extractStructuredData } from "./structured-data.js";
|
|
3
|
+
import { extractWithLLM } from "./llm-fallback.js";
|
|
4
|
+
const PROVENANCE_PRIORITY = [
|
|
5
|
+
"json-ld",
|
|
6
|
+
"microdata",
|
|
7
|
+
"rdfa"
|
|
8
|
+
];
|
|
9
|
+
function extractWithSchema(html, schema) {
|
|
10
|
+
return extractWithSchemaDetailed(html, schema).values;
|
|
11
|
+
}
|
|
12
|
+
function extractWithSchemaDetailed(html, schema) {
|
|
13
|
+
const values = {};
|
|
14
|
+
const provenance = {};
|
|
15
|
+
if (!html || !schema.properties) return { values, provenance };
|
|
16
|
+
const blocks = extractStructuredData(html);
|
|
17
|
+
for (const source of PROVENANCE_PRIORITY) {
|
|
18
|
+
for (const block of blocks) {
|
|
19
|
+
if (block.provenance !== source) continue;
|
|
20
|
+
for (const fieldName of Object.keys(schema.properties)) {
|
|
21
|
+
if (values[fieldName] !== void 0) continue;
|
|
22
|
+
const v = pickField(block.fields, fieldName);
|
|
23
|
+
if (v !== void 0) {
|
|
24
|
+
values[fieldName] = v;
|
|
25
|
+
provenance[fieldName] = source;
|
|
16
26
|
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const allCovered = Object.keys(schema.properties).every(
|
|
31
|
+
(k) => values[k] !== void 0
|
|
32
|
+
);
|
|
33
|
+
if (allCovered) return { values, provenance };
|
|
34
|
+
const { document: doc } = parseHTML(html);
|
|
35
|
+
for (const [fieldName, fieldSchema] of Object.entries(schema.properties)) {
|
|
36
|
+
if (values[fieldName] !== void 0) continue;
|
|
37
|
+
const v = findFieldValue(doc, fieldName, fieldSchema);
|
|
38
|
+
if (v !== void 0) {
|
|
39
|
+
values[fieldName] = v;
|
|
40
|
+
provenance[fieldName] = "heuristic";
|
|
17
41
|
}
|
|
18
|
-
|
|
42
|
+
}
|
|
43
|
+
return { values, provenance };
|
|
19
44
|
}
|
|
20
|
-
function
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
45
|
+
async function extractWithSchemaDetailedAsync(html, schema, opts = {}) {
|
|
46
|
+
const det = extractWithSchemaDetailed(html, schema);
|
|
47
|
+
const warnings = [];
|
|
48
|
+
if (!schema.required || schema.required.length === 0) {
|
|
49
|
+
return { ...det, warnings };
|
|
50
|
+
}
|
|
51
|
+
const missing = schema.required.filter((k) => det.values[k] === void 0);
|
|
52
|
+
if (missing.length === 0) {
|
|
53
|
+
return { ...det, warnings };
|
|
54
|
+
}
|
|
55
|
+
const llm = await extractWithLLM({
|
|
56
|
+
html,
|
|
57
|
+
jsonSchema: schema,
|
|
58
|
+
partial: det.values,
|
|
59
|
+
missing,
|
|
60
|
+
signal: opts.signal,
|
|
61
|
+
budget: opts.budget
|
|
62
|
+
});
|
|
63
|
+
const values = { ...det.values };
|
|
64
|
+
const provenance = { ...det.provenance };
|
|
65
|
+
for (const key of missing) {
|
|
66
|
+
if (llm.values[key] !== void 0 && values[key] === void 0) {
|
|
67
|
+
values[key] = llm.values[key];
|
|
68
|
+
provenance[key] = "llm";
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return { values, provenance, warnings: llm.warnings };
|
|
72
|
+
}
|
|
73
|
+
function pickField(fields, name) {
|
|
74
|
+
if (fields[name] !== void 0) return fields[name];
|
|
75
|
+
for (const v of Object.values(fields)) {
|
|
76
|
+
if (v && typeof v === "object" && !Array.isArray(v)) {
|
|
77
|
+
const nested = v[name];
|
|
78
|
+
if (nested !== void 0) return nested;
|
|
26
79
|
}
|
|
27
|
-
|
|
80
|
+
}
|
|
81
|
+
return void 0;
|
|
82
|
+
}
|
|
83
|
+
function findFieldValue(doc, fieldName, schema) {
|
|
84
|
+
const normalizedName = fieldName.toLowerCase().replace(/_/g, "-");
|
|
85
|
+
const compactName = fieldName.replace(/_/g, "").toLowerCase();
|
|
86
|
+
const variants = [fieldName, normalizedName, compactName];
|
|
87
|
+
if (schema.type === "array") {
|
|
88
|
+
return findArrayValues(doc, variants);
|
|
89
|
+
}
|
|
90
|
+
return findSingleValue(doc, variants);
|
|
28
91
|
}
|
|
29
92
|
function cssEscape(value) {
|
|
30
|
-
|
|
93
|
+
return value.replace(/([^\w-])/g, "\\$1");
|
|
31
94
|
}
|
|
32
95
|
function findSingleValue(doc, variants) {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const byId = doc.querySelector(`#${cssEscape(name)}`);
|
|
57
|
-
if (byId) {
|
|
58
|
-
const text = byId.textContent?.trim();
|
|
59
|
-
if (text)
|
|
60
|
-
return text;
|
|
61
|
-
}
|
|
62
|
-
const byData = doc.querySelector(`[data-${name}]`);
|
|
63
|
-
if (byData) {
|
|
64
|
-
return byData.getAttribute(`data-${name}`) ?? byData.textContent?.trim() ?? undefined;
|
|
65
|
-
}
|
|
96
|
+
for (const name of variants) {
|
|
97
|
+
const byItemprop = doc.querySelector(`[itemprop="${name}"]`);
|
|
98
|
+
if (byItemprop) {
|
|
99
|
+
const text = byItemprop.getAttribute("content") ?? byItemprop.textContent?.trim();
|
|
100
|
+
if (text) return text;
|
|
101
|
+
}
|
|
102
|
+
const byClass = doc.querySelector(`[class*="${name}"]`);
|
|
103
|
+
if (byClass) {
|
|
104
|
+
const text = byClass.textContent?.trim();
|
|
105
|
+
if (text) return text;
|
|
106
|
+
}
|
|
107
|
+
const allWithAria = doc.querySelectorAll("[aria-label]");
|
|
108
|
+
for (const el of allWithAria) {
|
|
109
|
+
const label = el.getAttribute("aria-label")?.toLowerCase().replace(/\s+/g, "-") ?? "";
|
|
110
|
+
if (label === name.toLowerCase()) {
|
|
111
|
+
const text = el.textContent?.trim();
|
|
112
|
+
if (text) return text;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
const byId = doc.querySelector(`#${cssEscape(name)}`);
|
|
116
|
+
if (byId) {
|
|
117
|
+
const text = byId.textContent?.trim();
|
|
118
|
+
if (text) return text;
|
|
66
119
|
}
|
|
67
|
-
|
|
120
|
+
const byData = doc.querySelector(`[data-${name}]`);
|
|
121
|
+
if (byData) {
|
|
122
|
+
return byData.getAttribute(`data-${name}`) ?? byData.textContent?.trim() ?? void 0;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return void 0;
|
|
68
126
|
}
|
|
69
127
|
function findArrayValues(doc, variants) {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
128
|
+
for (const name of variants) {
|
|
129
|
+
const container = doc.querySelector(`[class*="${name}"]`);
|
|
130
|
+
if (container) {
|
|
131
|
+
const items = container.querySelectorAll('li, [class*="item"]');
|
|
132
|
+
if (items.length > 0) {
|
|
133
|
+
return Array.from(items).map((el) => (el.textContent ?? "").trim()).filter(Boolean);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
const singular = name.replace(/s$/, "");
|
|
137
|
+
const elements = doc.querySelectorAll(`[class*="${singular}"]`);
|
|
138
|
+
if (elements.length > 1) {
|
|
139
|
+
return Array.from(elements).map((el) => (el.textContent ?? "").trim()).filter(Boolean);
|
|
83
140
|
}
|
|
84
|
-
|
|
141
|
+
}
|
|
142
|
+
return void 0;
|
|
85
143
|
}
|
|
144
|
+
export {
|
|
145
|
+
extractWithSchema,
|
|
146
|
+
extractWithSchemaDetailed,
|
|
147
|
+
extractWithSchemaDetailedAsync
|
|
148
|
+
};
|
|
86
149
|
//# sourceMappingURL=schema.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAQjE,MAAM,UAAU,iBAAiB,CAC/B,IAAY,EACZ,MAAkB;IAElB,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU;QAAE,OAAO,EAAE,CAAC;IAE3C,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,YAAY,GAAG,mBAAmB,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAE/D,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,eAAe,GAA4B,EAAE,CAAC;IAEpD,KAAK,MAAM,CAAC,SAAS,EAAE,WAAW,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QACzE,IAAI,YAAY,CAAC,SAAS,CAAC,KAAK,SAAS;YAAE,SAAS;QAEpD,MAAM,KAAK,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;QAC1D,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,eAAe,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC;QACrC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,GAAG,YAAY,EAAE,GAAG,eAAe,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,cAAc,CACrB,GAAa,EACb,SAAiB,EACjB,MAAkB;IAElB,MAAM,cAAc,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAClE,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9D,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,cAAc,EAAE,WAAW,CAAC,CAAC;IAE1D,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC5B,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,cAAc,IAAI,IAAI,CAAC,CAAC;QAC7D,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,UAAU,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,UAAU,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YAClF,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,iFAAiF;QACjF,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QACxD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACzC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,WAAW,GAAG,GAAG,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACtF,IAAI,KAAK,KAAK,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gBACjC,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;gBACpC,IAAI,IAAI;oBAAE,OAAO,IAAI,CAAC;YACxB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,SAAS,IAAI,GAAG,CAAC,CAAC;QACnD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QACxF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAAC;YAChE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACtF,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACxC,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,YAAY,QAAQ,IAAI,CAAC,CAAC;QAChE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../../src/extraction/schema.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { extractStructuredData } from './structured-data.js';\nimport { extractWithLLM, type LLMFallbackBudget } from './llm-fallback.js';\nimport type {\n FieldProvenance,\n SchemaExtractionResult,\n StructuredDataResult,\n} from '../types.js';\n\nexport interface JsonSchema {\n type?: string;\n properties?: Record<string, JsonSchema>;\n items?: JsonSchema;\n required?: string[];\n}\n\nexport interface SchemaExtractionOpts {\n signal?: AbortSignal;\n budget?: LLMFallbackBudget;\n}\n\nconst PROVENANCE_PRIORITY: StructuredDataResult['provenance'][] = [\n 'json-ld',\n 'microdata',\n 'rdfa',\n];\n\nexport function extractWithSchema(\n html: string,\n schema: JsonSchema,\n): Record<string, unknown> {\n return extractWithSchemaDetailed(html, schema).values;\n}\n\nexport function extractWithSchemaDetailed(\n html: string,\n schema: JsonSchema,\n): SchemaExtractionResult {\n const values: Record<string, unknown> = {};\n const provenance: Record<string, FieldProvenance> = {};\n if (!html || !schema.properties) return { values, provenance };\n\n const blocks = extractStructuredData(html);\n\n for (const source of PROVENANCE_PRIORITY) {\n for (const block of blocks) {\n if (block.provenance !== source) continue;\n for (const fieldName of Object.keys(schema.properties)) {\n if (values[fieldName] !== undefined) continue;\n const v = pickField(block.fields, fieldName);\n if (v !== undefined) {\n values[fieldName] = v;\n provenance[fieldName] = source;\n }\n }\n }\n }\n\n const allCovered = Object.keys(schema.properties).every(\n (k) => values[k] !== undefined,\n );\n if (allCovered) return { values, provenance };\n\n // Heuristic fallback only for fields still missing\n const { document: doc } = parseHTML(html);\n for (const [fieldName, fieldSchema] of Object.entries(schema.properties)) {\n if (values[fieldName] !== undefined) continue;\n const v = findFieldValue(doc, fieldName, fieldSchema);\n if (v !== undefined) {\n values[fieldName] = v;\n provenance[fieldName] = 'heuristic';\n }\n }\n\n return { values, provenance };\n}\n\nexport interface SchemaExtractionAsyncResult extends SchemaExtractionResult {\n warnings: string[];\n}\n\nexport async function extractWithSchemaDetailedAsync(\n html: string,\n schema: JsonSchema,\n opts: SchemaExtractionOpts = {},\n): Promise<SchemaExtractionAsyncResult> {\n const det = extractWithSchemaDetailed(html, schema);\n const warnings: string[] = [];\n\n if (!schema.required || schema.required.length === 0) {\n return { ...det, warnings };\n }\n\n const missing = schema.required.filter((k) => det.values[k] === undefined);\n if (missing.length === 0) {\n return { ...det, warnings };\n }\n\n const llm = await extractWithLLM({\n html,\n jsonSchema: schema as unknown as Record<string, unknown>,\n partial: det.values,\n missing,\n signal: opts.signal,\n budget: opts.budget,\n });\n\n const values = { ...det.values };\n const provenance: Record<string, FieldProvenance> = { ...det.provenance };\n for (const key of missing) {\n if (llm.values[key] !== undefined && values[key] === undefined) {\n values[key] = llm.values[key];\n provenance[key] = 'llm';\n }\n }\n return { values, provenance, warnings: llm.warnings };\n}\n\nfunction pickField(fields: Record<string, unknown>, name: string): unknown {\n if (fields[name] !== undefined) return fields[name];\n // Shallow nested — e.g. JSON-LD Product.offers.price\n for (const v of Object.values(fields)) {\n if (v && typeof v === 'object' && !Array.isArray(v)) {\n const nested = (v as Record<string, unknown>)[name];\n if (nested !== undefined) return nested;\n }\n }\n return undefined;\n}\n\n// ---------- heuristic helpers (preserved from prior schema.ts) ----------\n\nfunction findFieldValue(\n doc: Document,\n fieldName: string,\n schema: JsonSchema,\n): unknown {\n const normalizedName = fieldName.toLowerCase().replace(/_/g, '-');\n const compactName = fieldName.replace(/_/g, '').toLowerCase();\n const variants = [fieldName, normalizedName, compactName];\n\n if (schema.type === 'array') {\n return findArrayValues(doc, variants);\n }\n\n return findSingleValue(doc, variants);\n}\n\nfunction cssEscape(value: string): string {\n return value.replace(/([^\\w-])/g, '\\\\$1');\n}\n\nfunction findSingleValue(doc: Document, variants: string[]): string | undefined {\n for (const name of variants) {\n const byItemprop = doc.querySelector(`[itemprop=\"${name}\"]`);\n if (byItemprop) {\n const text = byItemprop.getAttribute('content') ?? byItemprop.textContent?.trim();\n if (text) return text;\n }\n\n const byClass = doc.querySelector(`[class*=\"${name}\"]`);\n if (byClass) {\n const text = byClass.textContent?.trim();\n if (text) return text;\n }\n\n const allWithAria = doc.querySelectorAll('[aria-label]');\n for (const el of allWithAria) {\n const label = el.getAttribute('aria-label')?.toLowerCase().replace(/\\s+/g, '-') ?? '';\n if (label === name.toLowerCase()) {\n const text = el.textContent?.trim();\n if (text) return text;\n }\n }\n\n const byId = doc.querySelector(`#${cssEscape(name)}`);\n if (byId) {\n const text = byId.textContent?.trim();\n if (text) return text;\n }\n\n const byData = doc.querySelector(`[data-${name}]`);\n if (byData) {\n return byData.getAttribute(`data-${name}`) ?? byData.textContent?.trim() ?? undefined;\n }\n }\n\n return undefined;\n}\n\nfunction findArrayValues(doc: Document, variants: string[]): string[] | undefined {\n for (const name of variants) {\n const container = doc.querySelector(`[class*=\"${name}\"]`);\n if (container) {\n const items = container.querySelectorAll('li, [class*=\"item\"]');\n if (items.length > 0) {\n return Array.from(items).map((el) => (el.textContent ?? '').trim()).filter(Boolean);\n }\n }\n\n const singular = name.replace(/s$/, '');\n const elements = doc.querySelectorAll(`[class*=\"${singular}\"]`);\n if (elements.length > 1) {\n return Array.from(elements).map((el) => (el.textContent ?? '').trim()).filter(Boolean);\n }\n }\n\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,6BAA6B;AACtC,SAAS,sBAA8C;AAmBvD,MAAM,sBAA4D;AAAA,EAChE;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,kBACd,MACA,QACyB;AACzB,SAAO,0BAA0B,MAAM,MAAM,EAAE;AACjD;AAEO,SAAS,0BACd,MACA,QACwB;AACxB,QAAM,SAAkC,CAAC;AACzC,QAAM,aAA8C,CAAC;AACrD,MAAI,CAAC,QAAQ,CAAC,OAAO,WAAY,QAAO,EAAE,QAAQ,WAAW;AAE7D,QAAM,SAAS,sBAAsB,IAAI;AAEzC,aAAW,UAAU,qBAAqB;AACxC,eAAW,SAAS,QAAQ;AAC1B,UAAI,MAAM,eAAe,OAAQ;AACjC,iBAAW,aAAa,OAAO,KAAK,OAAO,UAAU,GAAG;AACtD,YAAI,OAAO,SAAS,MAAM,OAAW;AACrC,cAAM,IAAI,UAAU,MAAM,QAAQ,SAAS;AAC3C,YAAI,MAAM,QAAW;AACnB,iBAAO,SAAS,IAAI;AACpB,qBAAW,SAAS,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,aAAa,OAAO,KAAK,OAAO,UAAU,EAAE;AAAA,IAChD,CAAC,MAAM,OAAO,CAAC,MAAM;AAAA,EACvB;AACA,MAAI,WAAY,QAAO,EAAE,QAAQ,WAAW;AAG5C,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,aAAW,CAAC,WAAW,WAAW,KAAK,OAAO,QAAQ,OAAO,UAAU,GAAG;AACxE,QAAI,OAAO,SAAS,MAAM,OAAW;AACrC,UAAM,IAAI,eAAe,KAAK,WAAW,WAAW;AACpD,QAAI,MAAM,QAAW;AACnB,aAAO,SAAS,IAAI;AACpB,iBAAW,SAAS,IAAI;AAAA,IAC1B;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,WAAW;AAC9B;AAMA,eAAsB,+BACpB,MACA,QACA,OAA6B,CAAC,GACQ;AACtC,QAAM,MAAM,0BAA0B,MAAM,MAAM;AAClD,QAAM,WAAqB,CAAC;AAE5B,MAAI,CAAC,OAAO,YAAY,OAAO,SAAS,WAAW,GAAG;AACpD,WAAO,EAAE,GAAG,KAAK,SAAS;AAAA,EAC5B;AAEA,QAAM,UAAU,OAAO,SAAS,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,MAAS;AACzE,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO,EAAE,GAAG,KAAK,SAAS;AAAA,EAC5B;AAEA,QAAM,MAAM,MAAM,eAAe;AAAA,IAC/B;AAAA,IACA,YAAY;AAAA,IACZ,SAAS,IAAI;AAAA,IACb;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,QAAQ,KAAK;AAAA,EACf,CAAC;AAED,QAAM,SAAS,EAAE,GAAG,IAAI,OAAO;AAC/B,QAAM,aAA8C,EAAE,GAAG,IAAI,WAAW;AACxE,aAAW,OAAO,SAAS;AACzB,QAAI,IAAI,OAAO,GAAG,MAAM,UAAa,OAAO,GAAG,MAAM,QAAW;AAC9D,aAAO,GAAG,IAAI,IAAI,OAAO,GAAG;AAC5B,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,YAAY,UAAU,IAAI,SAAS;AACtD;AAEA,SAAS,UAAU,QAAiC,MAAuB;AACzE,MAAI,OAAO,IAAI,MAAM,OAAW,QAAO,OAAO,IAAI;AAElD,aAAW,KAAK,OAAO,OAAO,MAAM,GAAG;AACrC,QAAI,KAAK,OAAO,MAAM,YAAY,CAAC,MAAM,QAAQ,CAAC,GAAG;AACnD,YAAM,SAAU,EAA8B,IAAI;AAClD,UAAI,WAAW,OAAW,QAAO;AAAA,IACnC;AAAA,EACF;AACA,SAAO;AACT;AAIA,SAAS,eACP,KACA,WACA,QACS;AACT,QAAM,iBAAiB,UAAU,YAAY,EAAE,QAAQ,MAAM,GAAG;AAChE,QAAM,cAAc,UAAU,QAAQ,MAAM,EAAE,EAAE,YAAY;AAC5D,QAAM,WAAW,CAAC,WAAW,gBAAgB,WAAW;AAExD,MAAI,OAAO,SAAS,SAAS;AAC3B,WAAO,gBAAgB,KAAK,QAAQ;AAAA,EACtC;AAEA,SAAO,gBAAgB,KAAK,QAAQ;AACtC;AAEA,SAAS,UAAU,OAAuB;AACxC,SAAO,MAAM,QAAQ,aAAa,MAAM;AAC1C;AAEA,SAAS,gBAAgB,KAAe,UAAwC;AAC9E,aAAW,QAAQ,UAAU;AAC3B,UAAM,aAAa,IAAI,cAAc,cAAc,IAAI,IAAI;AAC3D,QAAI,YAAY;AACd,YAAM,OAAO,WAAW,aAAa,SAAS,KAAK,WAAW,aAAa,KAAK;AAChF,UAAI,KAAM,QAAO;AAAA,IACnB;AAEA,UAAM,UAAU,IAAI,cAAc,YAAY,IAAI,IAAI;AACtD,QAAI,SAAS;AACX,YAAM,OAAO,QAAQ,aAAa,KAAK;AACvC,UAAI,KAAM,QAAO;AAAA,IACnB;AAEA,UAAM,cAAc,IAAI,iBAAiB,cAAc;AACvD,eAAW,MAAM,aAAa;AAC5B,YAAM,QAAQ,GAAG,aAAa,YAAY,GAAG,YAAY,EAAE,QAAQ,QAAQ,GAAG,KAAK;AACnF,UAAI,UAAU,KAAK,YAAY,GAAG;AAChC,cAAM,OAAO,GAAG,aAAa,KAAK;AAClC,YAAI,KAAM,QAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,OAAO,IAAI,cAAc,IAAI,UAAU,IAAI,CAAC,EAAE;AACpD,QAAI,MAAM;AACR,YAAM,OAAO,KAAK,aAAa,KAAK;AACpC,UAAI,KAAM,QAAO;AAAA,IACnB;AAEA,UAAM,SAAS,IAAI,cAAc,SAAS,IAAI,GAAG;AACjD,QAAI,QAAQ;AACV,aAAO,OAAO,aAAa,QAAQ,IAAI,EAAE,KAAK,OAAO,aAAa,KAAK,KAAK;AAAA,IAC9E;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,gBAAgB,KAAe,UAA0C;AAChF,aAAW,QAAQ,UAAU;AAC3B,UAAM,YAAY,IAAI,cAAc,YAAY,IAAI,IAAI;AACxD,QAAI,WAAW;AACb,YAAM,QAAQ,UAAU,iBAAiB,qBAAqB;AAC9D,UAAI,MAAM,SAAS,GAAG;AACpB,eAAO,MAAM,KAAK,KAAK,EAAE,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC,EAAE,OAAO,OAAO;AAAA,MACpF;AAAA,IACF;AAEA,UAAM,WAAW,KAAK,QAAQ,MAAM,EAAE;AACtC,UAAM,WAAW,IAAI,iBAAiB,YAAY,QAAQ,IAAI;AAC9D,QAAI,SAAS,SAAS,GAAG;AACvB,aAAO,MAAM,KAAK,QAAQ,EAAE,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC,EAAE,OAAO,OAAO;AAAA,IACvF;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
|