@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { extractJsonLd } from "../jsonld.js";
|
|
2
|
+
const MIN_CONTENT_THRESHOLD = 100;
|
|
3
|
+
async function extractRecipe(html, _url) {
|
|
4
|
+
if (!html) return null;
|
|
5
|
+
let blocks;
|
|
6
|
+
try {
|
|
7
|
+
blocks = extractJsonLd(html);
|
|
8
|
+
} catch {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
const recipe = blocks.find((block) => typeIncludes(block["@type"], "recipe"));
|
|
12
|
+
if (!recipe) return null;
|
|
13
|
+
const name = stringField(recipe["name"]);
|
|
14
|
+
if (!name) return null;
|
|
15
|
+
const lines = [`# ${name}`];
|
|
16
|
+
const description = stringField(recipe["description"]);
|
|
17
|
+
if (description) {
|
|
18
|
+
lines.push("", description);
|
|
19
|
+
}
|
|
20
|
+
const metaLines = [];
|
|
21
|
+
const totalTime = stringField(recipe["totalTime"]);
|
|
22
|
+
if (totalTime) metaLines.push(`**Total time:** ${totalTime}`);
|
|
23
|
+
const prepTime = stringField(recipe["prepTime"]);
|
|
24
|
+
if (prepTime) metaLines.push(`**Prep time:** ${prepTime}`);
|
|
25
|
+
const cookTime = stringField(recipe["cookTime"]);
|
|
26
|
+
if (cookTime) metaLines.push(`**Cook time:** ${cookTime}`);
|
|
27
|
+
const recipeYield = stringField(recipe["recipeYield"]);
|
|
28
|
+
if (recipeYield) metaLines.push(`**Yield:** ${recipeYield}`);
|
|
29
|
+
const cuisine = stringField(recipe["recipeCuisine"]);
|
|
30
|
+
if (cuisine) metaLines.push(`**Cuisine:** ${cuisine}`);
|
|
31
|
+
if (metaLines.length > 0) {
|
|
32
|
+
lines.push("", ...metaLines);
|
|
33
|
+
}
|
|
34
|
+
const ingredients = stringArray(recipe["recipeIngredient"]);
|
|
35
|
+
if (ingredients.length > 0) {
|
|
36
|
+
lines.push("", "## Ingredients");
|
|
37
|
+
for (const ingredient of ingredients) {
|
|
38
|
+
lines.push(`- ${ingredient}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const instructions = readInstructions(recipe["recipeInstructions"]);
|
|
42
|
+
if (instructions.length > 0) {
|
|
43
|
+
lines.push("", "## Instructions");
|
|
44
|
+
instructions.forEach((step, idx) => {
|
|
45
|
+
lines.push(`${idx + 1}. ${step}`);
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
const markdown = lines.join("\n").trim();
|
|
49
|
+
if (markdown.length < MIN_CONTENT_THRESHOLD) return null;
|
|
50
|
+
const author = readAuthor(recipe["author"]);
|
|
51
|
+
const datePublished = stringField(recipe["datePublished"]);
|
|
52
|
+
return {
|
|
53
|
+
title: name,
|
|
54
|
+
markdown,
|
|
55
|
+
metadata: {
|
|
56
|
+
...description ? { description } : {},
|
|
57
|
+
...author ? { author } : {},
|
|
58
|
+
...datePublished ? { date: datePublished } : {}
|
|
59
|
+
},
|
|
60
|
+
links: [],
|
|
61
|
+
images: [],
|
|
62
|
+
extractor: "site-specific"
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function typeIncludes(raw, want) {
|
|
66
|
+
const target = want.toLowerCase();
|
|
67
|
+
if (typeof raw === "string") return normalizeType(raw) === target;
|
|
68
|
+
if (Array.isArray(raw)) {
|
|
69
|
+
return raw.some((entry) => typeof entry === "string" && normalizeType(entry) === target);
|
|
70
|
+
}
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
function normalizeType(raw) {
|
|
74
|
+
const tail = raw.split(/[/#:]/).pop() ?? raw;
|
|
75
|
+
return tail.toLowerCase();
|
|
76
|
+
}
|
|
77
|
+
function stringField(value) {
|
|
78
|
+
if (typeof value !== "string") return void 0;
|
|
79
|
+
const trimmed = value.trim();
|
|
80
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
81
|
+
}
|
|
82
|
+
function stringArray(value) {
|
|
83
|
+
if (!Array.isArray(value)) return [];
|
|
84
|
+
const out = [];
|
|
85
|
+
for (const entry of value) {
|
|
86
|
+
if (typeof entry === "string") {
|
|
87
|
+
const trimmed = entry.trim();
|
|
88
|
+
if (trimmed) out.push(trimmed);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return out;
|
|
92
|
+
}
|
|
93
|
+
function readInstructions(value) {
|
|
94
|
+
if (!Array.isArray(value)) {
|
|
95
|
+
if (typeof value === "string") {
|
|
96
|
+
const trimmed = value.trim();
|
|
97
|
+
return trimmed ? [trimmed] : [];
|
|
98
|
+
}
|
|
99
|
+
return [];
|
|
100
|
+
}
|
|
101
|
+
const out = [];
|
|
102
|
+
for (const entry of value) {
|
|
103
|
+
if (typeof entry === "string") {
|
|
104
|
+
const trimmed = entry.trim();
|
|
105
|
+
if (trimmed) out.push(trimmed);
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
if (entry && typeof entry === "object") {
|
|
109
|
+
const text = entry["text"];
|
|
110
|
+
if (typeof text === "string") {
|
|
111
|
+
const trimmed = text.trim();
|
|
112
|
+
if (trimmed) out.push(trimmed);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return out;
|
|
117
|
+
}
|
|
118
|
+
function readAuthor(value) {
|
|
119
|
+
if (typeof value === "string") return stringField(value);
|
|
120
|
+
if (Array.isArray(value)) {
|
|
121
|
+
for (const entry of value) {
|
|
122
|
+
const name = readAuthor(entry);
|
|
123
|
+
if (name) return name;
|
|
124
|
+
}
|
|
125
|
+
return void 0;
|
|
126
|
+
}
|
|
127
|
+
if (value && typeof value === "object") {
|
|
128
|
+
const name = value["name"];
|
|
129
|
+
if (typeof name === "string") return stringField(name);
|
|
130
|
+
}
|
|
131
|
+
return void 0;
|
|
132
|
+
}
|
|
133
|
+
export {
|
|
134
|
+
extractRecipe
|
|
135
|
+
};
|
|
136
|
+
//# sourceMappingURL=recipe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/extraction/v1/recipe.ts"],"sourcesContent":["import { extractJsonLd } from '../jsonld.js';\nimport type { ExtractionResult } from '../../types.js';\n\nconst MIN_CONTENT_THRESHOLD = 100;\n\nexport async function extractRecipe(html: string, _url: string): Promise<ExtractionResult | null> {\n if (!html) return null;\n\n let blocks: Record<string, unknown>[];\n try {\n blocks = extractJsonLd(html);\n } catch {\n return null;\n }\n\n const recipe = blocks.find((block) => typeIncludes(block['@type'], 'recipe'));\n if (!recipe) return null;\n\n const name = stringField(recipe['name']);\n if (!name) return null;\n\n const lines: string[] = [`# ${name}`];\n\n const description = stringField(recipe['description']);\n if (description) {\n lines.push('', description);\n }\n\n const metaLines: string[] = [];\n const totalTime = stringField(recipe['totalTime']);\n if (totalTime) metaLines.push(`**Total time:** ${totalTime}`);\n const prepTime = stringField(recipe['prepTime']);\n if (prepTime) metaLines.push(`**Prep time:** ${prepTime}`);\n const cookTime = stringField(recipe['cookTime']);\n if (cookTime) metaLines.push(`**Cook time:** ${cookTime}`);\n const recipeYield = stringField(recipe['recipeYield']);\n if (recipeYield) metaLines.push(`**Yield:** ${recipeYield}`);\n const cuisine = stringField(recipe['recipeCuisine']);\n if (cuisine) metaLines.push(`**Cuisine:** ${cuisine}`);\n if (metaLines.length > 0) {\n lines.push('', ...metaLines);\n }\n\n const ingredients = stringArray(recipe['recipeIngredient']);\n if (ingredients.length > 0) {\n lines.push('', '## Ingredients');\n for (const ingredient of ingredients) {\n lines.push(`- ${ingredient}`);\n }\n }\n\n const instructions = readInstructions(recipe['recipeInstructions']);\n if (instructions.length > 0) {\n lines.push('', '## Instructions');\n instructions.forEach((step, idx) => {\n lines.push(`${idx + 1}. ${step}`);\n });\n }\n\n const markdown = lines.join('\\n').trim();\n if (markdown.length < MIN_CONTENT_THRESHOLD) return null;\n\n const author = readAuthor(recipe['author']);\n const datePublished = stringField(recipe['datePublished']);\n\n return {\n title: name,\n markdown,\n metadata: {\n ...(description ? { description } : {}),\n ...(author ? { author } : {}),\n ...(datePublished ? { date: datePublished } : {}),\n },\n links: [],\n images: [],\n extractor: 'site-specific',\n };\n}\n\nfunction typeIncludes(raw: unknown, want: string): boolean {\n const target = want.toLowerCase();\n if (typeof raw === 'string') return normalizeType(raw) === target;\n if (Array.isArray(raw)) {\n return raw.some((entry) => typeof entry === 'string' && normalizeType(entry) === target);\n }\n return false;\n}\n\nfunction normalizeType(raw: string): string {\n const tail = raw.split(/[/#:]/).pop() ?? raw;\n return tail.toLowerCase();\n}\n\nfunction stringField(value: unknown): string | undefined {\n if (typeof value !== 'string') return undefined;\n const trimmed = value.trim();\n return trimmed.length > 0 ? trimmed : undefined;\n}\n\nfunction stringArray(value: unknown): string[] {\n if (!Array.isArray(value)) return [];\n const out: string[] = [];\n for (const entry of value) {\n if (typeof entry === 'string') {\n const trimmed = entry.trim();\n if (trimmed) out.push(trimmed);\n }\n }\n return out;\n}\n\nfunction readInstructions(value: unknown): string[] {\n if (!Array.isArray(value)) {\n if (typeof value === 'string') {\n const trimmed = value.trim();\n return trimmed ? [trimmed] : [];\n }\n return [];\n }\n const out: string[] = [];\n for (const entry of value) {\n if (typeof entry === 'string') {\n const trimmed = entry.trim();\n if (trimmed) out.push(trimmed);\n continue;\n }\n if (entry && typeof entry === 'object') {\n const text = (entry as Record<string, unknown>)['text'];\n if (typeof text === 'string') {\n const trimmed = text.trim();\n if (trimmed) out.push(trimmed);\n }\n }\n }\n return out;\n}\n\nfunction readAuthor(value: unknown): string | undefined {\n if (typeof value === 'string') return stringField(value);\n if (Array.isArray(value)) {\n for (const entry of value) {\n const name = readAuthor(entry);\n if (name) return name;\n }\n return undefined;\n }\n if (value && typeof value === 'object') {\n const name = (value as Record<string, unknown>)['name'];\n if (typeof name === 'string') return stringField(name);\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,qBAAqB;AAG9B,MAAM,wBAAwB;AAE9B,eAAsB,cAAc,MAAc,MAAgD;AAChG,MAAI,CAAC,KAAM,QAAO;AAElB,MAAI;AACJ,MAAI;AACF,aAAS,cAAc,IAAI;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO,KAAK,CAAC,UAAU,aAAa,MAAM,OAAO,GAAG,QAAQ,CAAC;AAC5E,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,OAAO,YAAY,OAAO,MAAM,CAAC;AACvC,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,QAAkB,CAAC,KAAK,IAAI,EAAE;AAEpC,QAAM,cAAc,YAAY,OAAO,aAAa,CAAC;AACrD,MAAI,aAAa;AACf,UAAM,KAAK,IAAI,WAAW;AAAA,EAC5B;AAEA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAY,YAAY,OAAO,WAAW,CAAC;AACjD,MAAI,UAAW,WAAU,KAAK,mBAAmB,SAAS,EAAE;AAC5D,QAAM,WAAW,YAAY,OAAO,UAAU,CAAC;AAC/C,MAAI,SAAU,WAAU,KAAK,kBAAkB,QAAQ,EAAE;AACzD,QAAM,WAAW,YAAY,OAAO,UAAU,CAAC;AAC/C,MAAI,SAAU,WAAU,KAAK,kBAAkB,QAAQ,EAAE;AACzD,QAAM,cAAc,YAAY,OAAO,aAAa,CAAC;AACrD,MAAI,YAAa,WAAU,KAAK,cAAc,WAAW,EAAE;AAC3D,QAAM,UAAU,YAAY,OAAO,eAAe,CAAC;AACnD,MAAI,QAAS,WAAU,KAAK,gBAAgB,OAAO,EAAE;AACrD,MAAI,UAAU,SAAS,GAAG;AACxB,UAAM,KAAK,IAAI,GAAG,SAAS;AAAA,EAC7B;AAEA,QAAM,cAAc,YAAY,OAAO,kBAAkB,CAAC;AAC1D,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,KAAK,IAAI,gBAAgB;AAC/B,eAAW,cAAc,aAAa;AACpC,YAAM,KAAK,KAAK,UAAU,EAAE;AAAA,IAC9B;AAAA,EACF;AAEA,QAAM,eAAe,iBAAiB,OAAO,oBAAoB,CAAC;AAClE,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,KAAK,IAAI,iBAAiB;AAChC,iBAAa,QAAQ,CAAC,MAAM,QAAQ;AAClC,YAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE;AAAA,IAClC,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,MAAM,KAAK,IAAI,EAAE,KAAK;AACvC,MAAI,SAAS,SAAS,sBAAuB,QAAO;AAEpD,QAAM,SAAS,WAAW,OAAO,QAAQ,CAAC;AAC1C,QAAM,gBAAgB,YAAY,OAAO,eAAe,CAAC;AAEzD,SAAO;AAAA,IACL,OAAO;AAAA,IACP;AAAA,IACA,UAAU;AAAA,MACR,GAAI,cAAc,EAAE,YAAY,IAAI,CAAC;AAAA,MACrC,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,MAC3B,GAAI,gBAAgB,EAAE,MAAM,cAAc,IAAI,CAAC;AAAA,IACjD;AAAA,IACA,OAAO,CAAC;AAAA,IACR,QAAQ,CAAC;AAAA,IACT,WAAW;AAAA,EACb;AACF;AAEA,SAAS,aAAa,KAAc,MAAuB;AACzD,QAAM,SAAS,KAAK,YAAY;AAChC,MAAI,OAAO,QAAQ,SAAU,QAAO,cAAc,GAAG,MAAM;AAC3D,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,KAAK,CAAC,UAAU,OAAO,UAAU,YAAY,cAAc,KAAK,MAAM,MAAM;AAAA,EACzF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,OAAO,IAAI,MAAM,OAAO,EAAE,IAAI,KAAK;AACzC,SAAO,KAAK,YAAY;AAC1B;AAEA,SAAS,YAAY,OAAoC;AACvD,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,QAAM,UAAU,MAAM,KAAK;AAC3B,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,YAAY,OAA0B;AAC7C,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,OAAO;AACzB,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,UAAU,MAAM,KAAK;AAC3B,UAAI,QAAS,KAAI,KAAK,OAAO;AAAA,IAC/B;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,iBAAiB,OAA0B;AAClD,MAAI,CAAC,MAAM,QAAQ,KAAK,GAAG;AACzB,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,UAAU,MAAM,KAAK;AAC3B,aAAO,UAAU,CAAC,OAAO,IAAI,CAAC;AAAA,IAChC;AACA,WAAO,CAAC;AAAA,EACV;AACA,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,OAAO;AACzB,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,UAAU,MAAM,KAAK;AAC3B,UAAI,QAAS,KAAI,KAAK,OAAO;AAC7B;AAAA,IACF;AACA,QAAI,SAAS,OAAO,UAAU,UAAU;AACtC,YAAM,OAAQ,MAAkC,MAAM;AACtD,UAAI,OAAO,SAAS,UAAU;AAC5B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,QAAS,KAAI,KAAK,OAAO;AAAA,MAC/B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WAAW,OAAoC;AACtD,MAAI,OAAO,UAAU,SAAU,QAAO,YAAY,KAAK;AACvD,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,OAAO,WAAW,KAAK;AAC7B,UAAI,KAAM,QAAO;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,OAAQ,MAAkC,MAAM;AACtD,QAAI,OAAO,SAAS,SAAU,QAAO,YAAY,IAAI;AAAA,EACvD;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { ExtractionResult } from '../../types.js';
|
|
2
|
+
export interface RoutedExtractInput {
|
|
3
|
+
html: string;
|
|
4
|
+
url: string;
|
|
5
|
+
cleanedHtml?: string;
|
|
6
|
+
contentType?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* V1 routed extractor — picks a category-specific extractor based on the
|
|
10
|
+
* classifier output, with defuddle → readability → turndown fallbacks. Site
|
|
11
|
+
* extractors (github/stackoverflow/mdn/docs-generic + plugins) run first and
|
|
12
|
+
* short-circuit on match, matching legacy behavior.
|
|
13
|
+
*
|
|
14
|
+
* PDF handling lives in V1Extractor — this router assumes HTML.
|
|
15
|
+
*/
|
|
16
|
+
export declare function routedExtract(input: RoutedExtractInput): Promise<ExtractionResult>;
|
|
17
|
+
//# sourceMappingURL=routed.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routed.d.ts","sourceRoot":"","sources":["../../../src/extraction/v1/routed.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAcvD,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;GAOG;AACH,wBAAsB,aAAa,CAAC,KAAK,EAAE,kBAAkB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA2BxF"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { defuddleExtract } from "../defuddle.js";
|
|
3
|
+
import { readabilityExtract } from "../readability.js";
|
|
4
|
+
import { htmlToMarkdown } from "../markdown.js";
|
|
5
|
+
import { stripBoilerplateDom } from "../boilerplate.js";
|
|
6
|
+
import { createLogger } from "../../logger.js";
|
|
7
|
+
import { classifyContent } from "./classifier.js";
|
|
8
|
+
import { extractRecipe } from "./recipe.js";
|
|
9
|
+
import { extractProduct } from "./product.js";
|
|
10
|
+
import { extractNews } from "./news.js";
|
|
11
|
+
import { getSiteExtractors } from "./site-extractors.js";
|
|
12
|
+
const log = createLogger("extract");
|
|
13
|
+
async function routedExtract(input) {
|
|
14
|
+
const { html, url } = input;
|
|
15
|
+
const cleanedHtml = input.cleanedHtml ?? cleanHtml(html, url);
|
|
16
|
+
const siteHit = trySiteExtractors(cleanedHtml, url, html);
|
|
17
|
+
if (siteHit) return siteHit;
|
|
18
|
+
const type = classifyContent(url, html);
|
|
19
|
+
log.debug("classified content", { url, type });
|
|
20
|
+
switch (type) {
|
|
21
|
+
case "recipe":
|
|
22
|
+
return await extractRecipe(cleanedHtml, url) ?? await fallbackChain(cleanedHtml, url);
|
|
23
|
+
case "product":
|
|
24
|
+
return await extractProduct(cleanedHtml, url) ?? await fallbackChain(cleanedHtml, url);
|
|
25
|
+
case "news":
|
|
26
|
+
return await extractNews(cleanedHtml, url) ?? await fallbackChain(cleanedHtml, url);
|
|
27
|
+
case "code":
|
|
28
|
+
case "docs":
|
|
29
|
+
case "generic":
|
|
30
|
+
default:
|
|
31
|
+
return fallbackChain(cleanedHtml, url, type);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function cleanHtml(html, url) {
|
|
35
|
+
try {
|
|
36
|
+
const { document } = parseHTML(html);
|
|
37
|
+
stripBoilerplateDom(document);
|
|
38
|
+
return document.toString();
|
|
39
|
+
} catch (err) {
|
|
40
|
+
log.warn("boilerplate DOM pre-pass failed", { url, error: String(err) });
|
|
41
|
+
return html;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function trySiteExtractors(cleanedHtml, url, originalHtml) {
|
|
45
|
+
const extractors = getSiteExtractors();
|
|
46
|
+
const match = extractors.find((e) => e.canHandle(url, originalHtml));
|
|
47
|
+
if (!match) return null;
|
|
48
|
+
const out = match.extract(cleanedHtml, url);
|
|
49
|
+
return out ?? null;
|
|
50
|
+
}
|
|
51
|
+
async function fallbackChain(cleanedHtml, url, _type) {
|
|
52
|
+
const fromDefuddle = await defuddleExtract(cleanedHtml, url);
|
|
53
|
+
if (fromDefuddle) return fromDefuddle;
|
|
54
|
+
const fromReadability = readabilityExtract(cleanedHtml, url);
|
|
55
|
+
if (fromReadability) return fromReadability;
|
|
56
|
+
return {
|
|
57
|
+
title: "",
|
|
58
|
+
markdown: htmlToMarkdown(cleanedHtml),
|
|
59
|
+
metadata: {},
|
|
60
|
+
links: [],
|
|
61
|
+
images: [],
|
|
62
|
+
extractor: "turndown"
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
export {
|
|
66
|
+
routedExtract
|
|
67
|
+
};
|
|
68
|
+
//# sourceMappingURL=routed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/extraction/v1/routed.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport type { ExtractionResult } from '../../types.js';\nimport { defuddleExtract } from '../defuddle.js';\nimport { readabilityExtract } from '../readability.js';\nimport { htmlToMarkdown } from '../markdown.js';\nimport { stripBoilerplateDom } from '../boilerplate.js';\nimport { createLogger } from '../../logger.js';\nimport { classifyContent, type ContentType } from './classifier.js';\nimport { extractRecipe } from './recipe.js';\nimport { extractProduct } from './product.js';\nimport { extractNews } from './news.js';\nimport { getSiteExtractors } from './site-extractors.js';\n\nconst log = createLogger('extract');\n\nexport interface RoutedExtractInput {\n html: string;\n url: string;\n cleanedHtml?: string;\n contentType?: string;\n}\n\n/**\n * V1 routed extractor — picks a category-specific extractor based on the\n * classifier output, with defuddle → readability → turndown fallbacks. Site\n * extractors (github/stackoverflow/mdn/docs-generic + plugins) run first and\n * short-circuit on match, matching legacy behavior.\n *\n * PDF handling lives in V1Extractor — this router assumes HTML.\n */\nexport async function routedExtract(input: RoutedExtractInput): Promise<ExtractionResult> {\n const { html, url } = input;\n const cleanedHtml = input.cleanedHtml ?? cleanHtml(html, url);\n\n const siteHit = trySiteExtractors(cleanedHtml, url, html);\n if (siteHit) return siteHit;\n\n const type = classifyContent(url, html);\n log.debug('classified content', { url, type });\n\n switch (type) {\n case 'recipe':\n return (\n (await extractRecipe(cleanedHtml, url)) ?? (await fallbackChain(cleanedHtml, url))\n );\n case 'product':\n return (\n (await extractProduct(cleanedHtml, url)) ?? (await fallbackChain(cleanedHtml, url))\n );\n case 'news':\n return (await extractNews(cleanedHtml, url)) ?? (await fallbackChain(cleanedHtml, url));\n case 'code':\n case 'docs':\n case 'generic':\n default:\n return fallbackChain(cleanedHtml, url, type);\n }\n}\n\nfunction cleanHtml(html: string, url: string): string {\n try {\n const { document } = parseHTML(html);\n stripBoilerplateDom(document);\n return document.toString();\n } catch (err) {\n log.warn('boilerplate DOM pre-pass failed', { url, error: String(err) });\n return html;\n }\n}\n\nfunction trySiteExtractors(\n cleanedHtml: string,\n url: string,\n originalHtml: string,\n): ExtractionResult | null {\n const extractors = getSiteExtractors();\n const match = extractors.find((e) => e.canHandle(url, originalHtml));\n if (!match) return null;\n const out = match.extract(cleanedHtml, url);\n return out ?? null;\n}\n\nasync function fallbackChain(\n cleanedHtml: string,\n url: string,\n _type?: ContentType,\n): Promise<ExtractionResult> {\n const fromDefuddle = await defuddleExtract(cleanedHtml, url);\n if (fromDefuddle) return fromDefuddle;\n\n const fromReadability = readabilityExtract(cleanedHtml, url);\n if (fromReadability) return fromReadability;\n\n return {\n title: '',\n markdown: htmlToMarkdown(cleanedHtml),\n metadata: {},\n links: [],\n images: [],\n extractor: 'turndown',\n };\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAE1B,SAAS,uBAAuB;AAChC,SAAS,0BAA0B;AACnC,SAAS,sBAAsB;AAC/B,SAAS,2BAA2B;AACpC,SAAS,oBAAoB;AAC7B,SAAS,uBAAyC;AAClD,SAAS,qBAAqB;AAC9B,SAAS,sBAAsB;AAC/B,SAAS,mBAAmB;AAC5B,SAAS,yBAAyB;AAElC,MAAM,MAAM,aAAa,SAAS;AAiBlC,eAAsB,cAAc,OAAsD;AACxF,QAAM,EAAE,MAAM,IAAI,IAAI;AACtB,QAAM,cAAc,MAAM,eAAe,UAAU,MAAM,GAAG;AAE5D,QAAM,UAAU,kBAAkB,aAAa,KAAK,IAAI;AACxD,MAAI,QAAS,QAAO;AAEpB,QAAM,OAAO,gBAAgB,KAAK,IAAI;AACtC,MAAI,MAAM,sBAAsB,EAAE,KAAK,KAAK,CAAC;AAE7C,UAAQ,MAAM;AAAA,IACZ,KAAK;AACH,aACG,MAAM,cAAc,aAAa,GAAG,KAAO,MAAM,cAAc,aAAa,GAAG;AAAA,IAEpF,KAAK;AACH,aACG,MAAM,eAAe,aAAa,GAAG,KAAO,MAAM,cAAc,aAAa,GAAG;AAAA,IAErF,KAAK;AACH,aAAQ,MAAM,YAAY,aAAa,GAAG,KAAO,MAAM,cAAc,aAAa,GAAG;AAAA,IACvF,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL;AACE,aAAO,cAAc,aAAa,KAAK,IAAI;AAAA,EAC/C;AACF;AAEA,SAAS,UAAU,MAAc,KAAqB;AACpD,MAAI;AACF,UAAM,EAAE,SAAS,IAAI,UAAU,IAAI;AACnC,wBAAoB,QAAQ;AAC5B,WAAO,SAAS,SAAS;AAAA,EAC3B,SAAS,KAAK;AACZ,QAAI,KAAK,mCAAmC,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AACvE,WAAO;AAAA,EACT;AACF;AAEA,SAAS,kBACP,aACA,KACA,cACyB;AACzB,QAAM,aAAa,kBAAkB;AACrC,QAAM,QAAQ,WAAW,KAAK,CAAC,MAAM,EAAE,UAAU,KAAK,YAAY,CAAC;AACnE,MAAI,CAAC,MAAO,QAAO;AACnB,QAAM,MAAM,MAAM,QAAQ,aAAa,GAAG;AAC1C,SAAO,OAAO;AAChB;AAEA,eAAe,cACb,aACA,KACA,OAC2B;AAC3B,QAAM,eAAe,MAAM,gBAAgB,aAAa,GAAG;AAC3D,MAAI,aAAc,QAAO;AAEzB,QAAM,kBAAkB,mBAAmB,aAAa,GAAG;AAC3D,MAAI,gBAAiB,QAAO;AAE5B,SAAO;AAAA,IACL,OAAO;AAAA,IACP,UAAU,eAAe,WAAW;AAAA,IACpC,UAAU,CAAC;AAAA,IACX,OAAO,CAAC;AAAA,IACR,QAAQ,CAAC;AAAA,IACT,WAAW;AAAA,EACb;AACF;","names":[]}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface ArticleData {
|
|
2
|
+
title: string;
|
|
3
|
+
body: string;
|
|
4
|
+
url: string;
|
|
5
|
+
author?: string;
|
|
6
|
+
date?: string;
|
|
7
|
+
description?: string;
|
|
8
|
+
language?: string;
|
|
9
|
+
}
|
|
10
|
+
export declare function extractArticle(html: string, url: string): Promise<ArticleData | null>;
|
|
11
|
+
//# sourceMappingURL=Article.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Article.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/Article.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAoB3F"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { extractNews } from "../news.js";
|
|
2
|
+
async function extractArticle(html, url) {
|
|
3
|
+
const result = await extractNews(html, url);
|
|
4
|
+
if (!result) return null;
|
|
5
|
+
const title = (result.title ?? "").trim();
|
|
6
|
+
const body = (result.markdown ?? "").trim();
|
|
7
|
+
if (!title && !body) return null;
|
|
8
|
+
const meta = result.metadata ?? {};
|
|
9
|
+
const data = {
|
|
10
|
+
title,
|
|
11
|
+
body,
|
|
12
|
+
url
|
|
13
|
+
};
|
|
14
|
+
if (meta.author) data.author = meta.author;
|
|
15
|
+
if (meta.date) data.date = meta.date;
|
|
16
|
+
if (meta.description) data.description = meta.description;
|
|
17
|
+
if (meta.language) data.language = meta.language;
|
|
18
|
+
return data;
|
|
19
|
+
}
|
|
20
|
+
export {
|
|
21
|
+
extractArticle
|
|
22
|
+
};
|
|
23
|
+
//# sourceMappingURL=Article.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/Article.ts"],"sourcesContent":["import { extractNews } from '../news.js';\n\nexport interface ArticleData {\n title: string;\n body: string;\n url: string;\n author?: string;\n date?: string;\n description?: string;\n language?: string;\n}\n\nexport async function extractArticle(html: string, url: string): Promise<ArticleData | null> {\n const result = await extractNews(html, url);\n if (!result) return null;\n\n const title = (result.title ?? '').trim();\n const body = (result.markdown ?? '').trim();\n if (!title && !body) return null;\n\n const meta = result.metadata ?? {};\n const data: ArticleData = {\n title,\n body,\n url,\n };\n if (meta.author) data.author = meta.author;\n if (meta.date) data.date = meta.date;\n if (meta.description) data.description = meta.description;\n if (meta.language) data.language = meta.language;\n\n return data;\n}\n"],"mappings":"AAAA,SAAS,mBAAmB;AAY5B,eAAsB,eAAe,MAAc,KAA0C;AAC3F,QAAM,SAAS,MAAM,YAAY,MAAM,GAAG;AAC1C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,SAAS,OAAO,SAAS,IAAI,KAAK;AACxC,QAAM,QAAQ,OAAO,YAAY,IAAI,KAAK;AAC1C,MAAI,CAAC,SAAS,CAAC,KAAM,QAAO;AAE5B,QAAM,OAAO,OAAO,YAAY,CAAC;AACjC,QAAM,OAAoB;AAAA,IACxB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,MAAI,KAAK,OAAQ,MAAK,SAAS,KAAK;AACpC,MAAI,KAAK,KAAM,MAAK,OAAO,KAAK;AAChC,MAAI,KAAK,YAAa,MAAK,cAAc,KAAK;AAC9C,MAAI,KAAK,SAAU,MAAK,WAAW,KAAK;AAExC,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export interface CodeSnippetData {
|
|
2
|
+
language?: string;
|
|
3
|
+
code: string;
|
|
4
|
+
filename?: string;
|
|
5
|
+
description?: string;
|
|
6
|
+
url: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function extractCodeSnippet(html: string, url: string): Promise<CodeSnippetData | null>;
|
|
9
|
+
//# sourceMappingURL=CodeSnippet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CodeSnippet.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/CodeSnippet.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,MAAM,CAAC;CACb;AAID,wBAAsB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC,CAwCnG"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
const MIN_CODE_LENGTH = 30;
|
|
3
|
+
async function extractCodeSnippet(html, url) {
|
|
4
|
+
if (!html) return null;
|
|
5
|
+
let document;
|
|
6
|
+
try {
|
|
7
|
+
({ document } = parseHTML(html));
|
|
8
|
+
} catch {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
const blocks = Array.from(document.querySelectorAll("pre > code, pre code"));
|
|
12
|
+
if (blocks.length === 0) return null;
|
|
13
|
+
let largest = null;
|
|
14
|
+
let largestLen = 0;
|
|
15
|
+
for (const block of blocks) {
|
|
16
|
+
const text = block.textContent ?? "";
|
|
17
|
+
if (text.length > largestLen) {
|
|
18
|
+
largestLen = text.length;
|
|
19
|
+
largest = block;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
if (!largest || largestLen < MIN_CODE_LENGTH) return null;
|
|
23
|
+
const code = (largest.textContent ?? "").trim();
|
|
24
|
+
if (!code) return null;
|
|
25
|
+
const data = { code, url };
|
|
26
|
+
const language = detectLanguage(largest);
|
|
27
|
+
if (language) data.language = language;
|
|
28
|
+
const pre = closestPre(largest);
|
|
29
|
+
const filename = detectFilename(pre);
|
|
30
|
+
if (filename) data.filename = filename;
|
|
31
|
+
const description = detectDescription(pre);
|
|
32
|
+
if (description) data.description = description;
|
|
33
|
+
return data;
|
|
34
|
+
}
|
|
35
|
+
function detectLanguage(codeEl) {
|
|
36
|
+
const fromCode = languageFromClass(codeEl.getAttribute("class"));
|
|
37
|
+
if (fromCode) return fromCode;
|
|
38
|
+
const pre = closestPre(codeEl);
|
|
39
|
+
if (pre) {
|
|
40
|
+
const fromPre = languageFromClass(pre.getAttribute("class"));
|
|
41
|
+
if (fromPre) return fromPre;
|
|
42
|
+
}
|
|
43
|
+
return void 0;
|
|
44
|
+
}
|
|
45
|
+
function languageFromClass(cls) {
|
|
46
|
+
if (!cls) return void 0;
|
|
47
|
+
for (const token of cls.split(/\s+/)) {
|
|
48
|
+
const m = /^(?:language|lang|hljs|highlight)-(.+)$/.exec(token);
|
|
49
|
+
if (m) return m[1];
|
|
50
|
+
}
|
|
51
|
+
return void 0;
|
|
52
|
+
}
|
|
53
|
+
function closestPre(el) {
|
|
54
|
+
let current = el;
|
|
55
|
+
while (current) {
|
|
56
|
+
if (current.tagName === "PRE") return current;
|
|
57
|
+
current = current.parentElement;
|
|
58
|
+
}
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
function detectFilename(pre) {
|
|
62
|
+
if (!pre) return void 0;
|
|
63
|
+
const figcaption = pre.parentElement?.querySelector("figcaption") ?? pre.previousElementSibling;
|
|
64
|
+
if (figcaption && figcaption.tagName === "FIGCAPTION") {
|
|
65
|
+
const text = (figcaption.textContent ?? "").trim();
|
|
66
|
+
if (text) return text;
|
|
67
|
+
}
|
|
68
|
+
const header = pre.parentElement?.querySelector("header, .filename, div.filename");
|
|
69
|
+
if (header) {
|
|
70
|
+
const text = (header.textContent ?? "").trim();
|
|
71
|
+
if (text) return text;
|
|
72
|
+
}
|
|
73
|
+
return void 0;
|
|
74
|
+
}
|
|
75
|
+
function detectDescription(pre) {
|
|
76
|
+
if (!pre) return void 0;
|
|
77
|
+
let sib = pre.previousElementSibling;
|
|
78
|
+
while (sib && (sib.tagName === "FIGCAPTION" || sib.tagName === "HEADER")) {
|
|
79
|
+
sib = sib.previousElementSibling;
|
|
80
|
+
}
|
|
81
|
+
if (sib && sib.tagName === "P") {
|
|
82
|
+
const text = (sib.textContent ?? "").trim();
|
|
83
|
+
if (text) return text.slice(0, 300);
|
|
84
|
+
}
|
|
85
|
+
return void 0;
|
|
86
|
+
}
|
|
87
|
+
export {
|
|
88
|
+
extractCodeSnippet
|
|
89
|
+
};
|
|
90
|
+
//# sourceMappingURL=CodeSnippet.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/CodeSnippet.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\n\nexport interface CodeSnippetData {\n language?: string;\n code: string;\n filename?: string;\n description?: string;\n url: string;\n}\n\nconst MIN_CODE_LENGTH = 30;\n\nexport async function extractCodeSnippet(html: string, url: string): Promise<CodeSnippetData | null> {\n if (!html) return null;\n\n let document: Document;\n try {\n ({ document } = parseHTML(html));\n } catch {\n return null;\n }\n\n const blocks = Array.from(document.querySelectorAll('pre > code, pre code'));\n if (blocks.length === 0) return null;\n\n let largest: Element | null = null;\n let largestLen = 0;\n for (const block of blocks) {\n const text = block.textContent ?? '';\n if (text.length > largestLen) {\n largestLen = text.length;\n largest = block;\n }\n }\n if (!largest || largestLen < MIN_CODE_LENGTH) return null;\n\n const code = (largest.textContent ?? '').trim();\n if (!code) return null;\n\n const data: CodeSnippetData = { code, url };\n\n const language = detectLanguage(largest);\n if (language) data.language = language;\n\n const pre = closestPre(largest);\n const filename = detectFilename(pre);\n if (filename) data.filename = filename;\n\n const description = detectDescription(pre);\n if (description) data.description = description;\n\n return data;\n}\n\nfunction detectLanguage(codeEl: Element): string | undefined {\n const fromCode = languageFromClass(codeEl.getAttribute('class'));\n if (fromCode) return fromCode;\n const pre = closestPre(codeEl);\n if (pre) {\n const fromPre = languageFromClass(pre.getAttribute('class'));\n if (fromPre) return fromPre;\n }\n return undefined;\n}\n\nfunction languageFromClass(cls: string | null | undefined): string | undefined {\n if (!cls) return undefined;\n for (const token of cls.split(/\\s+/)) {\n const m = /^(?:language|lang|hljs|highlight)-(.+)$/.exec(token);\n if (m) return m[1];\n }\n return undefined;\n}\n\nfunction closestPre(el: Element): Element | null {\n let current: Element | null = el;\n while (current) {\n if (current.tagName === 'PRE') return current;\n current = current.parentElement;\n }\n return null;\n}\n\nfunction detectFilename(pre: Element | null): string | undefined {\n if (!pre) return undefined;\n const figcaption =\n pre.parentElement?.querySelector('figcaption') ?? pre.previousElementSibling;\n if (figcaption && figcaption.tagName === 'FIGCAPTION') {\n const text = (figcaption.textContent ?? '').trim();\n if (text) return text;\n }\n const header = pre.parentElement?.querySelector('header, .filename, div.filename');\n if (header) {\n const text = (header.textContent ?? '').trim();\n if (text) return text;\n }\n return undefined;\n}\n\nfunction detectDescription(pre: Element | null): string | undefined {\n if (!pre) return undefined;\n let sib = pre.previousElementSibling;\n // skip over figcaption/header used for filename\n while (sib && (sib.tagName === 'FIGCAPTION' || sib.tagName === 'HEADER')) {\n sib = sib.previousElementSibling;\n }\n if (sib && sib.tagName === 'P') {\n const text = (sib.textContent ?? '').trim();\n if (text) return text.slice(0, 300);\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAU1B,MAAM,kBAAkB;AAExB,eAAsB,mBAAmB,MAAc,KAA8C;AACnG,MAAI,CAAC,KAAM,QAAO;AAElB,MAAI;AACJ,MAAI;AACF,KAAC,EAAE,SAAS,IAAI,UAAU,IAAI;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,MAAM,KAAK,SAAS,iBAAiB,sBAAsB,CAAC;AAC3E,MAAI,OAAO,WAAW,EAAG,QAAO;AAEhC,MAAI,UAA0B;AAC9B,MAAI,aAAa;AACjB,aAAW,SAAS,QAAQ;AAC1B,UAAM,OAAO,MAAM,eAAe;AAClC,QAAI,KAAK,SAAS,YAAY;AAC5B,mBAAa,KAAK;AAClB,gBAAU;AAAA,IACZ;AAAA,EACF;AACA,MAAI,CAAC,WAAW,aAAa,gBAAiB,QAAO;AAErD,QAAM,QAAQ,QAAQ,eAAe,IAAI,KAAK;AAC9C,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,OAAwB,EAAE,MAAM,IAAI;AAE1C,QAAM,WAAW,eAAe,OAAO;AACvC,MAAI,SAAU,MAAK,WAAW;AAE9B,QAAM,MAAM,WAAW,OAAO;AAC9B,QAAM,WAAW,eAAe,GAAG;AACnC,MAAI,SAAU,MAAK,WAAW;AAE9B,QAAM,cAAc,kBAAkB,GAAG;AACzC,MAAI,YAAa,MAAK,cAAc;AAEpC,SAAO;AACT;AAEA,SAAS,eAAe,QAAqC;AAC3D,QAAM,WAAW,kBAAkB,OAAO,aAAa,OAAO,CAAC;AAC/D,MAAI,SAAU,QAAO;AACrB,QAAM,MAAM,WAAW,MAAM;AAC7B,MAAI,KAAK;AACP,UAAM,UAAU,kBAAkB,IAAI,aAAa,OAAO,CAAC;AAC3D,QAAI,QAAS,QAAO;AAAA,EACtB;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,KAAoD;AAC7E,MAAI,CAAC,IAAK,QAAO;AACjB,aAAW,SAAS,IAAI,MAAM,KAAK,GAAG;AACpC,UAAM,IAAI,0CAA0C,KAAK,KAAK;AAC9D,QAAI,EAAG,QAAO,EAAE,CAAC;AAAA,EACnB;AACA,SAAO;AACT;AAEA,SAAS,WAAW,IAA6B;AAC/C,MAAI,UAA0B;AAC9B,SAAO,SAAS;AACd,QAAI,QAAQ,YAAY,MAAO,QAAO;AACtC,cAAU,QAAQ;AAAA,EACpB;AACA,SAAO;AACT;AAEA,SAAS,eAAe,KAAyC;AAC/D,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,aACJ,IAAI,eAAe,cAAc,YAAY,KAAK,IAAI;AACxD,MAAI,cAAc,WAAW,YAAY,cAAc;AACrD,UAAM,QAAQ,WAAW,eAAe,IAAI,KAAK;AACjD,QAAI,KAAM,QAAO;AAAA,EACnB;AACA,QAAM,SAAS,IAAI,eAAe,cAAc,iCAAiC;AACjF,MAAI,QAAQ;AACV,UAAM,QAAQ,OAAO,eAAe,IAAI,KAAK;AAC7C,QAAI,KAAM,QAAO;AAAA,EACnB;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,KAAyC;AAClE,MAAI,CAAC,IAAK,QAAO;AACjB,MAAI,MAAM,IAAI;AAEd,SAAO,QAAQ,IAAI,YAAY,gBAAgB,IAAI,YAAY,WAAW;AACxE,UAAM,IAAI;AAAA,EACZ;AACA,MAAI,OAAO,IAAI,YAAY,KAAK;AAC9B,UAAM,QAAQ,IAAI,eAAe,IAAI,KAAK;AAC1C,QAAI,KAAM,QAAO,KAAK,MAAM,GAAG,GAAG;AAAA,EACpC;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface EventListingData {
|
|
2
|
+
name: string;
|
|
3
|
+
startDate: string;
|
|
4
|
+
endDate?: string;
|
|
5
|
+
location?: string;
|
|
6
|
+
description?: string;
|
|
7
|
+
url: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function extractEventListing(html: string, url: string): Promise<EventListingData | null>;
|
|
10
|
+
//# sourceMappingURL=EventListing.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EventListing.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/EventListing.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAOrG"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { extractJsonLd } from "../../jsonld.js";
|
|
3
|
+
async function extractEventListing(html, url) {
|
|
4
|
+
if (!html) return null;
|
|
5
|
+
const fromJsonLd = tryJsonLd(html, url);
|
|
6
|
+
if (fromJsonLd) return fromJsonLd;
|
|
7
|
+
return tryMetaFallback(html, url);
|
|
8
|
+
}
|
|
9
|
+
function tryJsonLd(html, url) {
|
|
10
|
+
let blocks;
|
|
11
|
+
try {
|
|
12
|
+
blocks = extractJsonLd(html);
|
|
13
|
+
} catch {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
const ev = blocks.find((block) => typeIncludes(block["@type"], "event"));
|
|
17
|
+
if (!ev) return null;
|
|
18
|
+
const name = stringField(ev["name"]);
|
|
19
|
+
const startDate = stringField(ev["startDate"]);
|
|
20
|
+
if (!name && !startDate) return null;
|
|
21
|
+
const data = {
|
|
22
|
+
name: name ?? "",
|
|
23
|
+
startDate: startDate ?? "",
|
|
24
|
+
url
|
|
25
|
+
};
|
|
26
|
+
const endDate = stringField(ev["endDate"]);
|
|
27
|
+
if (endDate) data.endDate = endDate;
|
|
28
|
+
const description = stringField(ev["description"]);
|
|
29
|
+
if (description) data.description = description;
|
|
30
|
+
const location = readLocation(ev["location"]);
|
|
31
|
+
if (location) data.location = location;
|
|
32
|
+
return data;
|
|
33
|
+
}
|
|
34
|
+
function tryMetaFallback(html, url) {
|
|
35
|
+
let document;
|
|
36
|
+
try {
|
|
37
|
+
({ document } = parseHTML(html));
|
|
38
|
+
} catch {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
const startMeta = metaContent(document, 'meta[property="event:start_time"]');
|
|
42
|
+
const endMeta = metaContent(document, 'meta[property="event:end_time"]');
|
|
43
|
+
const locationMeta = metaContent(document, 'meta[property="event:location"]');
|
|
44
|
+
const description = metaContent(document, 'meta[property="og:description"]');
|
|
45
|
+
let start = startMeta;
|
|
46
|
+
if (!start) {
|
|
47
|
+
const timeEl = document.querySelector('time[itemprop="startDate"]');
|
|
48
|
+
const dt = timeEl?.getAttribute("datetime")?.trim();
|
|
49
|
+
if (dt) start = dt;
|
|
50
|
+
}
|
|
51
|
+
let name;
|
|
52
|
+
const h1 = document.querySelector("h1");
|
|
53
|
+
const h1Text = h1?.textContent?.trim();
|
|
54
|
+
if (h1Text) name = h1Text;
|
|
55
|
+
if (!name) {
|
|
56
|
+
name = metaContent(document, 'meta[property="og:title"]');
|
|
57
|
+
}
|
|
58
|
+
if (!name && !start) return null;
|
|
59
|
+
const data = {
|
|
60
|
+
name: name ?? "",
|
|
61
|
+
startDate: start ?? "",
|
|
62
|
+
url
|
|
63
|
+
};
|
|
64
|
+
if (endMeta) data.endDate = endMeta;
|
|
65
|
+
if (locationMeta) data.location = locationMeta;
|
|
66
|
+
if (description) data.description = description;
|
|
67
|
+
return data;
|
|
68
|
+
}
|
|
69
|
+
function metaContent(document, selector) {
|
|
70
|
+
const el = document.querySelector(selector);
|
|
71
|
+
const content = el?.getAttribute("content")?.trim();
|
|
72
|
+
return content && content.length > 0 ? content : void 0;
|
|
73
|
+
}
|
|
74
|
+
function typeIncludes(raw, want) {
|
|
75
|
+
const target = want.toLowerCase();
|
|
76
|
+
if (typeof raw === "string") return normalizeType(raw) === target;
|
|
77
|
+
if (Array.isArray(raw)) {
|
|
78
|
+
return raw.some((entry) => typeof entry === "string" && normalizeType(entry) === target);
|
|
79
|
+
}
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
function normalizeType(raw) {
|
|
83
|
+
const tail = raw.split(/[/#:]/).pop() ?? raw;
|
|
84
|
+
return tail.toLowerCase();
|
|
85
|
+
}
|
|
86
|
+
function stringField(value) {
|
|
87
|
+
if (typeof value !== "string") return void 0;
|
|
88
|
+
const trimmed = value.trim();
|
|
89
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
90
|
+
}
|
|
91
|
+
function readLocation(value) {
|
|
92
|
+
if (typeof value === "string") return stringField(value);
|
|
93
|
+
if (Array.isArray(value)) {
|
|
94
|
+
for (const entry of value) {
|
|
95
|
+
const loc = readLocation(entry);
|
|
96
|
+
if (loc) return loc;
|
|
97
|
+
}
|
|
98
|
+
return void 0;
|
|
99
|
+
}
|
|
100
|
+
if (value && typeof value === "object") {
|
|
101
|
+
const obj = value;
|
|
102
|
+
const name = stringField(obj["name"]);
|
|
103
|
+
if (name) return name;
|
|
104
|
+
const address = obj["address"];
|
|
105
|
+
if (typeof address === "string") return stringField(address);
|
|
106
|
+
if (address && typeof address === "object") {
|
|
107
|
+
const aobj = address;
|
|
108
|
+
const parts = [
|
|
109
|
+
stringField(aobj["streetAddress"]),
|
|
110
|
+
stringField(aobj["addressLocality"]),
|
|
111
|
+
stringField(aobj["addressRegion"]),
|
|
112
|
+
stringField(aobj["addressCountry"])
|
|
113
|
+
].filter((s) => Boolean(s));
|
|
114
|
+
if (parts.length > 0) return parts.join(", ");
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return void 0;
|
|
118
|
+
}
|
|
119
|
+
export {
|
|
120
|
+
extractEventListing
|
|
121
|
+
};
|
|
122
|
+
//# sourceMappingURL=EventListing.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/EventListing.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { extractJsonLd } from '../../jsonld.js';\n\nexport interface EventListingData {\n name: string;\n startDate: string;\n endDate?: string;\n location?: string;\n description?: string;\n url: string;\n}\n\nexport async function extractEventListing(html: string, url: string): Promise<EventListingData | null> {\n if (!html) return null;\n\n const fromJsonLd = tryJsonLd(html, url);\n if (fromJsonLd) return fromJsonLd;\n\n return tryMetaFallback(html, url);\n}\n\nfunction tryJsonLd(html: string, url: string): EventListingData | null {\n let blocks: Record<string, unknown>[];\n try {\n blocks = extractJsonLd(html);\n } catch {\n return null;\n }\n\n const ev = blocks.find((block) => typeIncludes(block['@type'], 'event'));\n if (!ev) return null;\n\n const name = stringField(ev['name']);\n const startDate = stringField(ev['startDate']);\n if (!name && !startDate) return null;\n\n const data: EventListingData = {\n name: name ?? '',\n startDate: startDate ?? '',\n url,\n };\n const endDate = stringField(ev['endDate']);\n if (endDate) data.endDate = endDate;\n const description = stringField(ev['description']);\n if (description) data.description = description;\n const location = readLocation(ev['location']);\n if (location) data.location = location;\n return data;\n}\n\nfunction tryMetaFallback(html: string, url: string): EventListingData | null {\n let document: Document;\n try {\n ({ document } = parseHTML(html));\n } catch {\n return null;\n }\n\n const startMeta = metaContent(document, 'meta[property=\"event:start_time\"]');\n const endMeta = metaContent(document, 'meta[property=\"event:end_time\"]');\n const locationMeta = metaContent(document, 'meta[property=\"event:location\"]');\n const description = metaContent(document, 'meta[property=\"og:description\"]');\n\n let start = startMeta;\n if (!start) {\n const timeEl = document.querySelector('time[itemprop=\"startDate\"]');\n const dt = timeEl?.getAttribute('datetime')?.trim();\n if (dt) start = dt;\n }\n\n let name: string | undefined;\n const h1 = document.querySelector('h1');\n const h1Text = h1?.textContent?.trim();\n if (h1Text) name = h1Text;\n if (!name) {\n name = metaContent(document, 'meta[property=\"og:title\"]');\n }\n\n if (!name && !start) return null;\n\n const data: EventListingData = {\n name: name ?? '',\n startDate: start ?? '',\n url,\n };\n if (endMeta) data.endDate = endMeta;\n if (locationMeta) data.location = locationMeta;\n if (description) data.description = description;\n return data;\n}\n\nfunction metaContent(document: Document, selector: string): string | undefined {\n const el = document.querySelector(selector);\n const content = el?.getAttribute('content')?.trim();\n return content && content.length > 0 ? content : undefined;\n}\n\nfunction typeIncludes(raw: unknown, want: string): boolean {\n const target = want.toLowerCase();\n if (typeof raw === 'string') return normalizeType(raw) === target;\n if (Array.isArray(raw)) {\n return raw.some((entry) => typeof entry === 'string' && normalizeType(entry) === target);\n }\n return false;\n}\n\nfunction normalizeType(raw: string): string {\n const tail = raw.split(/[/#:]/).pop() ?? raw;\n return tail.toLowerCase();\n}\n\nfunction stringField(value: unknown): string | undefined {\n if (typeof value !== 'string') return undefined;\n const trimmed = value.trim();\n return trimmed.length > 0 ? trimmed : undefined;\n}\n\nfunction readLocation(value: unknown): string | undefined {\n if (typeof value === 'string') return stringField(value);\n if (Array.isArray(value)) {\n for (const entry of value) {\n const loc = readLocation(entry);\n if (loc) return loc;\n }\n return undefined;\n }\n if (value && typeof value === 'object') {\n const obj = value as Record<string, unknown>;\n const name = stringField(obj['name']);\n if (name) return name;\n const address = obj['address'];\n if (typeof address === 'string') return stringField(address);\n if (address && typeof address === 'object') {\n const aobj = address as Record<string, unknown>;\n const parts = [\n stringField(aobj['streetAddress']),\n stringField(aobj['addressLocality']),\n stringField(aobj['addressRegion']),\n stringField(aobj['addressCountry']),\n ].filter((s): s is string => Boolean(s));\n if (parts.length > 0) return parts.join(', ');\n }\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,qBAAqB;AAW9B,eAAsB,oBAAoB,MAAc,KAA+C;AACrG,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,aAAa,UAAU,MAAM,GAAG;AACtC,MAAI,WAAY,QAAO;AAEvB,SAAO,gBAAgB,MAAM,GAAG;AAClC;AAEA,SAAS,UAAU,MAAc,KAAsC;AACrE,MAAI;AACJ,MAAI;AACF,aAAS,cAAc,IAAI;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,KAAK,OAAO,KAAK,CAAC,UAAU,aAAa,MAAM,OAAO,GAAG,OAAO,CAAC;AACvE,MAAI,CAAC,GAAI,QAAO;AAEhB,QAAM,OAAO,YAAY,GAAG,MAAM,CAAC;AACnC,QAAM,YAAY,YAAY,GAAG,WAAW,CAAC;AAC7C,MAAI,CAAC,QAAQ,CAAC,UAAW,QAAO;AAEhC,QAAM,OAAyB;AAAA,IAC7B,MAAM,QAAQ;AAAA,IACd,WAAW,aAAa;AAAA,IACxB;AAAA,EACF;AACA,QAAM,UAAU,YAAY,GAAG,SAAS,CAAC;AACzC,MAAI,QAAS,MAAK,UAAU;AAC5B,QAAM,cAAc,YAAY,GAAG,aAAa,CAAC;AACjD,MAAI,YAAa,MAAK,cAAc;AACpC,QAAM,WAAW,aAAa,GAAG,UAAU,CAAC;AAC5C,MAAI,SAAU,MAAK,WAAW;AAC9B,SAAO;AACT;AAEA,SAAS,gBAAgB,MAAc,KAAsC;AAC3E,MAAI;AACJ,MAAI;AACF,KAAC,EAAE,SAAS,IAAI,UAAU,IAAI;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,YAAY,YAAY,UAAU,mCAAmC;AAC3E,QAAM,UAAU,YAAY,UAAU,iCAAiC;AACvE,QAAM,eAAe,YAAY,UAAU,iCAAiC;AAC5E,QAAM,cAAc,YAAY,UAAU,iCAAiC;AAE3E,MAAI,QAAQ;AACZ,MAAI,CAAC,OAAO;AACV,UAAM,SAAS,SAAS,cAAc,4BAA4B;AAClE,UAAM,KAAK,QAAQ,aAAa,UAAU,GAAG,KAAK;AAClD,QAAI,GAAI,SAAQ;AAAA,EAClB;AAEA,MAAI;AACJ,QAAM,KAAK,SAAS,cAAc,IAAI;AACtC,QAAM,SAAS,IAAI,aAAa,KAAK;AACrC,MAAI,OAAQ,QAAO;AACnB,MAAI,CAAC,MAAM;AACT,WAAO,YAAY,UAAU,2BAA2B;AAAA,EAC1D;AAEA,MAAI,CAAC,QAAQ,CAAC,MAAO,QAAO;AAE5B,QAAM,OAAyB;AAAA,IAC7B,MAAM,QAAQ;AAAA,IACd,WAAW,SAAS;AAAA,IACpB;AAAA,EACF;AACA,MAAI,QAAS,MAAK,UAAU;AAC5B,MAAI,aAAc,MAAK,WAAW;AAClC,MAAI,YAAa,MAAK,cAAc;AACpC,SAAO;AACT;AAEA,SAAS,YAAY,UAAoB,UAAsC;AAC7E,QAAM,KAAK,SAAS,cAAc,QAAQ;AAC1C,QAAM,UAAU,IAAI,aAAa,SAAS,GAAG,KAAK;AAClD,SAAO,WAAW,QAAQ,SAAS,IAAI,UAAU;AACnD;AAEA,SAAS,aAAa,KAAc,MAAuB;AACzD,QAAM,SAAS,KAAK,YAAY;AAChC,MAAI,OAAO,QAAQ,SAAU,QAAO,cAAc,GAAG,MAAM;AAC3D,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,KAAK,CAAC,UAAU,OAAO,UAAU,YAAY,cAAc,KAAK,MAAM,MAAM;AAAA,EACzF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,OAAO,IAAI,MAAM,OAAO,EAAE,IAAI,KAAK;AACzC,SAAO,KAAK,YAAY;AAC1B;AAEA,SAAS,YAAY,OAAoC;AACvD,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,QAAM,UAAU,MAAM,KAAK;AAC3B,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,aAAa,OAAoC;AACxD,MAAI,OAAO,UAAU,SAAU,QAAO,YAAY,KAAK;AACvD,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,MAAM,aAAa,KAAK;AAC9B,UAAI,IAAK,QAAO;AAAA,IAClB;AACA,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,MAAM;AACZ,UAAM,OAAO,YAAY,IAAI,MAAM,CAAC;AACpC,QAAI,KAAM,QAAO;AACjB,UAAM,UAAU,IAAI,SAAS;AAC7B,QAAI,OAAO,YAAY,SAAU,QAAO,YAAY,OAAO;AAC3D,QAAI,WAAW,OAAO,YAAY,UAAU;AAC1C,YAAM,OAAO;AACb,YAAM,QAAQ;AAAA,QACZ,YAAY,KAAK,eAAe,CAAC;AAAA,QACjC,YAAY,KAAK,iBAAiB,CAAC;AAAA,QACnC,YAAY,KAAK,eAAe,CAAC;AAAA,QACjC,YAAY,KAAK,gBAAgB,CAAC;AAAA,MACpC,EAAE,OAAO,CAAC,MAAmB,QAAQ,CAAC,CAAC;AACvC,UAAI,MAAM,SAAS,EAAG,QAAO,MAAM,KAAK,IAAI;AAAA,IAC9C;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface PaperData {
|
|
2
|
+
title: string;
|
|
3
|
+
authors: string[];
|
|
4
|
+
abstract: string;
|
|
5
|
+
publishedDate?: string;
|
|
6
|
+
doi?: string;
|
|
7
|
+
arxivId?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function extractPaper(html: string, url: string): Promise<PaperData | null>;
|
|
10
|
+
//# sourceMappingURL=Paper.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Paper.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/Paper.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAID,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAOvF"}
|