@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { extractJsonLd } from "../../jsonld.js";
|
|
3
|
+
const SCHOLARLY_TYPES = ["scholarlyarticle", "article", "creativework"];
|
|
4
|
+
async function extractPaper(html, url) {
|
|
5
|
+
if (!html) return null;
|
|
6
|
+
const fromJsonLd = tryJsonLd(html, url);
|
|
7
|
+
if (fromJsonLd) return fromJsonLd;
|
|
8
|
+
return tryMetaFallback(html, url);
|
|
9
|
+
}
|
|
10
|
+
function tryJsonLd(html, url) {
|
|
11
|
+
let blocks;
|
|
12
|
+
try {
|
|
13
|
+
blocks = extractJsonLd(html);
|
|
14
|
+
} catch {
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
const article = blocks.find((block) => {
|
|
18
|
+
const t = block["@type"];
|
|
19
|
+
if (typeof t === "string") return SCHOLARLY_TYPES.includes(normalizeType(t));
|
|
20
|
+
if (Array.isArray(t)) {
|
|
21
|
+
return t.some((entry) => typeof entry === "string" && SCHOLARLY_TYPES.includes(normalizeType(entry)));
|
|
22
|
+
}
|
|
23
|
+
return false;
|
|
24
|
+
});
|
|
25
|
+
if (!article) return null;
|
|
26
|
+
const title = stringField(article["headline"]) ?? stringField(article["name"]);
|
|
27
|
+
const abstract = stringField(article["abstract"]) ?? stringField(article["description"]);
|
|
28
|
+
if (!title && !abstract) return null;
|
|
29
|
+
const authors = readAuthors(article["author"]);
|
|
30
|
+
const data = {
|
|
31
|
+
title: title ?? "",
|
|
32
|
+
authors,
|
|
33
|
+
abstract: abstract ?? ""
|
|
34
|
+
};
|
|
35
|
+
const published = stringField(article["datePublished"]);
|
|
36
|
+
if (published) data.publishedDate = published;
|
|
37
|
+
const doi = readDoi(article["identifier"]) ?? readDoi(article["sameAs"]);
|
|
38
|
+
if (doi) data.doi = doi;
|
|
39
|
+
const arxivId = extractArxivId(url);
|
|
40
|
+
if (arxivId) data.arxivId = arxivId;
|
|
41
|
+
return data;
|
|
42
|
+
}
|
|
43
|
+
function tryMetaFallback(html, url) {
|
|
44
|
+
let document;
|
|
45
|
+
try {
|
|
46
|
+
({ document } = parseHTML(html));
|
|
47
|
+
} catch {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
const title = metaContent(document, 'meta[name="citation_title"]');
|
|
51
|
+
const authors = allMetaContent(document, 'meta[name="citation_author"]');
|
|
52
|
+
const abstract = metaContent(document, 'meta[name="citation_abstract"]') ?? metaContent(document, 'meta[name="description"]');
|
|
53
|
+
const publishedDate = metaContent(document, 'meta[name="citation_publication_date"]');
|
|
54
|
+
const doi = metaContent(document, 'meta[name="citation_doi"]');
|
|
55
|
+
const arxivIdFromMeta = metaContent(document, 'meta[name="citation_arxiv_id"]');
|
|
56
|
+
const arxivIdFromUrl = extractArxivId(url);
|
|
57
|
+
let resolvedAbstract = abstract;
|
|
58
|
+
let resolvedTitle = title;
|
|
59
|
+
if (!resolvedTitle) {
|
|
60
|
+
const h1 = document.querySelector("h1");
|
|
61
|
+
const t = h1?.textContent?.trim();
|
|
62
|
+
if (t) resolvedTitle = t;
|
|
63
|
+
}
|
|
64
|
+
if (!resolvedAbstract) {
|
|
65
|
+
const absEl = document.querySelector('[class*="abstract" i] p, p[class*="abstract" i], blockquote.abstract');
|
|
66
|
+
const t = absEl?.textContent?.trim();
|
|
67
|
+
if (t) resolvedAbstract = t;
|
|
68
|
+
}
|
|
69
|
+
if (!resolvedTitle && !resolvedAbstract) return null;
|
|
70
|
+
const data = {
|
|
71
|
+
title: resolvedTitle ?? "",
|
|
72
|
+
authors,
|
|
73
|
+
abstract: resolvedAbstract ?? ""
|
|
74
|
+
};
|
|
75
|
+
if (publishedDate) data.publishedDate = publishedDate;
|
|
76
|
+
if (doi) data.doi = doi;
|
|
77
|
+
const arxivId = arxivIdFromMeta ?? arxivIdFromUrl;
|
|
78
|
+
if (arxivId) data.arxivId = arxivId;
|
|
79
|
+
return data;
|
|
80
|
+
}
|
|
81
|
+
function metaContent(document, selector) {
|
|
82
|
+
const el = document.querySelector(selector);
|
|
83
|
+
const content = el?.getAttribute("content")?.trim();
|
|
84
|
+
return content && content.length > 0 ? content : void 0;
|
|
85
|
+
}
|
|
86
|
+
function allMetaContent(document, selector) {
|
|
87
|
+
const out = [];
|
|
88
|
+
for (const el of document.querySelectorAll(selector)) {
|
|
89
|
+
const content = el.getAttribute("content")?.trim();
|
|
90
|
+
if (content) out.push(content);
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
94
|
+
function extractArxivId(url) {
|
|
95
|
+
if (!url) return void 0;
|
|
96
|
+
const m = /arxiv\.org\/(?:abs|pdf)\/([^/?#]+)/i.exec(url);
|
|
97
|
+
if (!m) return void 0;
|
|
98
|
+
return m[1].replace(/\.pdf$/i, "");
|
|
99
|
+
}
|
|
100
|
+
function normalizeType(raw) {
|
|
101
|
+
const tail = raw.split(/[/#:]/).pop() ?? raw;
|
|
102
|
+
return tail.toLowerCase();
|
|
103
|
+
}
|
|
104
|
+
function stringField(value) {
|
|
105
|
+
if (typeof value !== "string") return void 0;
|
|
106
|
+
const trimmed = value.trim();
|
|
107
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
108
|
+
}
|
|
109
|
+
function readAuthors(value) {
|
|
110
|
+
if (!value) return [];
|
|
111
|
+
const out = [];
|
|
112
|
+
const push = (entry) => {
|
|
113
|
+
if (typeof entry === "string") {
|
|
114
|
+
const t = entry.trim();
|
|
115
|
+
if (t) out.push(t);
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
if (entry && typeof entry === "object") {
|
|
119
|
+
const name = entry["name"];
|
|
120
|
+
if (typeof name === "string") {
|
|
121
|
+
const t = name.trim();
|
|
122
|
+
if (t) out.push(t);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
if (Array.isArray(value)) {
|
|
127
|
+
for (const entry of value) push(entry);
|
|
128
|
+
} else {
|
|
129
|
+
push(value);
|
|
130
|
+
}
|
|
131
|
+
return out;
|
|
132
|
+
}
|
|
133
|
+
function readDoi(value) {
|
|
134
|
+
if (typeof value === "string" && value.toLowerCase().includes("doi")) {
|
|
135
|
+
const m = /10\.\d{4,9}\/[^\s]+/.exec(value);
|
|
136
|
+
if (m) return m[0];
|
|
137
|
+
}
|
|
138
|
+
if (Array.isArray(value)) {
|
|
139
|
+
for (const entry of value) {
|
|
140
|
+
const doi = readDoi(entry);
|
|
141
|
+
if (doi) return doi;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (value && typeof value === "object") {
|
|
145
|
+
const obj = value;
|
|
146
|
+
if (typeof obj["value"] === "string") {
|
|
147
|
+
const doi = readDoi(obj["value"]);
|
|
148
|
+
if (doi) return doi;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return void 0;
|
|
152
|
+
}
|
|
153
|
+
export {
|
|
154
|
+
extractPaper
|
|
155
|
+
};
|
|
156
|
+
//# sourceMappingURL=Paper.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/Paper.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { extractJsonLd } from '../../jsonld.js';\n\nexport interface PaperData {\n title: string;\n authors: string[];\n abstract: string;\n publishedDate?: string;\n doi?: string;\n arxivId?: string;\n}\n\nconst SCHOLARLY_TYPES = ['scholarlyarticle', 'article', 'creativework'];\n\nexport async function extractPaper(html: string, url: string): Promise<PaperData | null> {\n if (!html) return null;\n\n const fromJsonLd = tryJsonLd(html, url);\n if (fromJsonLd) return fromJsonLd;\n\n return tryMetaFallback(html, url);\n}\n\nfunction tryJsonLd(html: string, url: string): PaperData | null {\n let blocks: Record<string, unknown>[];\n try {\n blocks = extractJsonLd(html);\n } catch {\n return null;\n }\n\n const article = blocks.find((block) => {\n const t = block['@type'];\n if (typeof t === 'string') return SCHOLARLY_TYPES.includes(normalizeType(t));\n if (Array.isArray(t)) {\n return t.some((entry) => typeof entry === 'string' && SCHOLARLY_TYPES.includes(normalizeType(entry)));\n }\n return false;\n });\n if (!article) return null;\n\n const title = stringField(article['headline']) ?? stringField(article['name']);\n const abstract = stringField(article['abstract']) ?? stringField(article['description']);\n if (!title && !abstract) return null;\n\n const authors = readAuthors(article['author']);\n const data: PaperData = {\n title: title ?? '',\n authors,\n abstract: abstract ?? '',\n };\n const published = stringField(article['datePublished']);\n if (published) data.publishedDate = published;\n\n const doi = readDoi(article['identifier']) ?? readDoi(article['sameAs']);\n if (doi) data.doi = doi;\n\n const arxivId = extractArxivId(url);\n if (arxivId) data.arxivId = arxivId;\n\n return data;\n}\n\nfunction tryMetaFallback(html: string, url: string): PaperData | null {\n let document: Document;\n try {\n ({ document } = parseHTML(html));\n } catch {\n return null;\n }\n\n const title = metaContent(document, 'meta[name=\"citation_title\"]');\n const authors = allMetaContent(document, 'meta[name=\"citation_author\"]');\n const abstract =\n metaContent(document, 'meta[name=\"citation_abstract\"]') ??\n metaContent(document, 'meta[name=\"description\"]');\n const publishedDate = metaContent(document, 'meta[name=\"citation_publication_date\"]');\n const doi = metaContent(document, 'meta[name=\"citation_doi\"]');\n const arxivIdFromMeta = metaContent(document, 'meta[name=\"citation_arxiv_id\"]');\n const arxivIdFromUrl = extractArxivId(url);\n\n let resolvedAbstract = abstract;\n let resolvedTitle = title;\n\n if (!resolvedTitle) {\n const h1 = document.querySelector('h1');\n const t = h1?.textContent?.trim();\n if (t) resolvedTitle = t;\n }\n if (!resolvedAbstract) {\n const absEl = document.querySelector('[class*=\"abstract\" i] p, p[class*=\"abstract\" i], blockquote.abstract');\n const t = absEl?.textContent?.trim();\n if (t) resolvedAbstract = t;\n }\n\n if (!resolvedTitle && !resolvedAbstract) return null;\n\n const data: PaperData = {\n title: resolvedTitle ?? '',\n authors,\n abstract: resolvedAbstract ?? '',\n };\n if (publishedDate) data.publishedDate = publishedDate;\n if (doi) data.doi = doi;\n const arxivId = arxivIdFromMeta ?? arxivIdFromUrl;\n if (arxivId) data.arxivId = arxivId;\n\n return data;\n}\n\nfunction metaContent(document: Document, selector: string): string | undefined {\n const el = document.querySelector(selector);\n const content = el?.getAttribute('content')?.trim();\n return content && content.length > 0 ? content : undefined;\n}\n\nfunction allMetaContent(document: Document, selector: string): string[] {\n const out: string[] = [];\n for (const el of document.querySelectorAll(selector)) {\n const content = el.getAttribute('content')?.trim();\n if (content) out.push(content);\n }\n return out;\n}\n\nfunction extractArxivId(url: string): string | undefined {\n if (!url) return undefined;\n const m = /arxiv\\.org\\/(?:abs|pdf)\\/([^/?#]+)/i.exec(url);\n if (!m) return undefined;\n return m[1].replace(/\\.pdf$/i, '');\n}\n\nfunction normalizeType(raw: string): string {\n const tail = raw.split(/[/#:]/).pop() ?? raw;\n return tail.toLowerCase();\n}\n\nfunction stringField(value: unknown): string | undefined {\n if (typeof value !== 'string') return undefined;\n const trimmed = value.trim();\n return trimmed.length > 0 ? trimmed : undefined;\n}\n\nfunction readAuthors(value: unknown): string[] {\n if (!value) return [];\n const out: string[] = [];\n const push = (entry: unknown): void => {\n if (typeof entry === 'string') {\n const t = entry.trim();\n if (t) out.push(t);\n return;\n }\n if (entry && typeof entry === 'object') {\n const name = (entry as Record<string, unknown>)['name'];\n if (typeof name === 'string') {\n const t = name.trim();\n if (t) out.push(t);\n }\n }\n };\n if (Array.isArray(value)) {\n for (const entry of value) push(entry);\n } else {\n push(value);\n }\n return out;\n}\n\nfunction readDoi(value: unknown): string | undefined {\n if (typeof value === 'string' && value.toLowerCase().includes('doi')) {\n const m = /10\\.\\d{4,9}\\/[^\\s]+/.exec(value);\n if (m) return m[0];\n }\n if (Array.isArray(value)) {\n for (const entry of value) {\n const doi = readDoi(entry);\n if (doi) return doi;\n }\n }\n if (value && typeof value === 'object') {\n const obj = value as Record<string, unknown>;\n if (typeof obj['value'] === 'string') {\n const doi = readDoi(obj['value']);\n if (doi) return doi;\n }\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,qBAAqB;AAW9B,MAAM,kBAAkB,CAAC,oBAAoB,WAAW,cAAc;AAEtE,eAAsB,aAAa,MAAc,KAAwC;AACvF,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,aAAa,UAAU,MAAM,GAAG;AACtC,MAAI,WAAY,QAAO;AAEvB,SAAO,gBAAgB,MAAM,GAAG;AAClC;AAEA,SAAS,UAAU,MAAc,KAA+B;AAC9D,MAAI;AACJ,MAAI;AACF,aAAS,cAAc,IAAI;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,UAAU,OAAO,KAAK,CAAC,UAAU;AACrC,UAAM,IAAI,MAAM,OAAO;AACvB,QAAI,OAAO,MAAM,SAAU,QAAO,gBAAgB,SAAS,cAAc,CAAC,CAAC;AAC3E,QAAI,MAAM,QAAQ,CAAC,GAAG;AACpB,aAAO,EAAE,KAAK,CAAC,UAAU,OAAO,UAAU,YAAY,gBAAgB,SAAS,cAAc,KAAK,CAAC,CAAC;AAAA,IACtG;AACA,WAAO;AAAA,EACT,CAAC;AACD,MAAI,CAAC,QAAS,QAAO;AAErB,QAAM,QAAQ,YAAY,QAAQ,UAAU,CAAC,KAAK,YAAY,QAAQ,MAAM,CAAC;AAC7E,QAAM,WAAW,YAAY,QAAQ,UAAU,CAAC,KAAK,YAAY,QAAQ,aAAa,CAAC;AACvF,MAAI,CAAC,SAAS,CAAC,SAAU,QAAO;AAEhC,QAAM,UAAU,YAAY,QAAQ,QAAQ,CAAC;AAC7C,QAAM,OAAkB;AAAA,IACtB,OAAO,SAAS;AAAA,IAChB;AAAA,IACA,UAAU,YAAY;AAAA,EACxB;AACA,QAAM,YAAY,YAAY,QAAQ,eAAe,CAAC;AACtD,MAAI,UAAW,MAAK,gBAAgB;AAEpC,QAAM,MAAM,QAAQ,QAAQ,YAAY,CAAC,KAAK,QAAQ,QAAQ,QAAQ,CAAC;AACvE,MAAI,IAAK,MAAK,MAAM;AAEpB,QAAM,UAAU,eAAe,GAAG;AAClC,MAAI,QAAS,MAAK,UAAU;AAE5B,SAAO;AACT;AAEA,SAAS,gBAAgB,MAAc,KAA+B;AACpE,MAAI;AACJ,MAAI;AACF,KAAC,EAAE,SAAS,IAAI,UAAU,IAAI;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,YAAY,UAAU,6BAA6B;AACjE,QAAM,UAAU,eAAe,UAAU,8BAA8B;AACvE,QAAM,WACJ,YAAY,UAAU,gCAAgC,KACtD,YAAY,UAAU,0BAA0B;AAClD,QAAM,gBAAgB,YAAY,UAAU,wCAAwC;AACpF,QAAM,MAAM,YAAY,UAAU,2BAA2B;AAC7D,QAAM,kBAAkB,YAAY,UAAU,gCAAgC;AAC9E,QAAM,iBAAiB,eAAe,GAAG;AAEzC,MAAI,mBAAmB;AACvB,MAAI,gBAAgB;AAEpB,MAAI,CAAC,eAAe;AAClB,UAAM,KAAK,SAAS,cAAc,IAAI;AACtC,UAAM,IAAI,IAAI,aAAa,KAAK;AAChC,QAAI,EAAG,iBAAgB;AAAA,EACzB;AACA,MAAI,CAAC,kBAAkB;AACrB,UAAM,QAAQ,SAAS,cAAc,sEAAsE;AAC3G,UAAM,IAAI,OAAO,aAAa,KAAK;AACnC,QAAI,EAAG,oBAAmB;AAAA,EAC5B;AAEA,MAAI,CAAC,iBAAiB,CAAC,iBAAkB,QAAO;AAEhD,QAAM,OAAkB;AAAA,IACtB,OAAO,iBAAiB;AAAA,IACxB;AAAA,IACA,UAAU,oBAAoB;AAAA,EAChC;AACA,MAAI,cAAe,MAAK,gBAAgB;AACxC,MAAI,IAAK,MAAK,MAAM;AACpB,QAAM,UAAU,mBAAmB;AACnC,MAAI,QAAS,MAAK,UAAU;AAE5B,SAAO;AACT;AAEA,SAAS,YAAY,UAAoB,UAAsC;AAC7E,QAAM,KAAK,SAAS,cAAc,QAAQ;AAC1C,QAAM,UAAU,IAAI,aAAa,SAAS,GAAG,KAAK;AAClD,SAAO,WAAW,QAAQ,SAAS,IAAI,UAAU;AACnD;AAEA,SAAS,eAAe,UAAoB,UAA4B;AACtE,QAAM,MAAgB,CAAC;AACvB,aAAW,MAAM,SAAS,iBAAiB,QAAQ,GAAG;AACpD,UAAM,UAAU,GAAG,aAAa,SAAS,GAAG,KAAK;AACjD,QAAI,QAAS,KAAI,KAAK,OAAO;AAAA,EAC/B;AACA,SAAO;AACT;AAEA,SAAS,eAAe,KAAiC;AACvD,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,IAAI,sCAAsC,KAAK,GAAG;AACxD,MAAI,CAAC,EAAG,QAAO;AACf,SAAO,EAAE,CAAC,EAAE,QAAQ,WAAW,EAAE;AACnC;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,OAAO,IAAI,MAAM,OAAO,EAAE,IAAI,KAAK;AACzC,SAAO,KAAK,YAAY;AAC1B;AAEA,SAAS,YAAY,OAAoC;AACvD,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,QAAM,UAAU,MAAM,KAAK;AAC3B,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,YAAY,OAA0B;AAC7C,MAAI,CAAC,MAAO,QAAO,CAAC;AACpB,QAAM,MAAgB,CAAC;AACvB,QAAM,OAAO,CAAC,UAAyB;AACrC,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,IAAI,MAAM,KAAK;AACrB,UAAI,EAAG,KAAI,KAAK,CAAC;AACjB;AAAA,IACF;AACA,QAAI,SAAS,OAAO,UAAU,UAAU;AACtC,YAAM,OAAQ,MAAkC,MAAM;AACtD,UAAI,OAAO,SAAS,UAAU;AAC5B,cAAM,IAAI,KAAK,KAAK;AACpB,YAAI,EAAG,KAAI,KAAK,CAAC;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AACA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,MAAO,MAAK,KAAK;AAAA,EACvC,OAAO;AACL,SAAK,KAAK;AAAA,EACZ;AACA,SAAO;AACT;AAEA,SAAS,QAAQ,OAAoC;AACnD,MAAI,OAAO,UAAU,YAAY,MAAM,YAAY,EAAE,SAAS,KAAK,GAAG;AACpE,UAAM,IAAI,sBAAsB,KAAK,KAAK;AAC1C,QAAI,EAAG,QAAO,EAAE,CAAC;AAAA,EACnB;AACA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,MAAM,QAAQ,KAAK;AACzB,UAAI,IAAK,QAAO;AAAA,IAClB;AAAA,EACF;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,MAAM;AACZ,QAAI,OAAO,IAAI,OAAO,MAAM,UAAU;AACpC,YAAM,MAAM,QAAQ,IAAI,OAAO,CAAC;AAChC,UAAI,IAAK,QAAO;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface ProductData {
|
|
2
|
+
name: string;
|
|
3
|
+
description?: string;
|
|
4
|
+
brand?: string;
|
|
5
|
+
price?: {
|
|
6
|
+
amount: number;
|
|
7
|
+
currency: string;
|
|
8
|
+
};
|
|
9
|
+
sku?: string;
|
|
10
|
+
rating?: {
|
|
11
|
+
value: number;
|
|
12
|
+
count: number;
|
|
13
|
+
};
|
|
14
|
+
image?: string;
|
|
15
|
+
}
|
|
16
|
+
export declare function extractProductSchema(html: string, _url: string): Promise<ProductData | null>;
|
|
17
|
+
//# sourceMappingURL=Product.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Product.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/Product.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1C,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAsB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAOlG"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
import { extractJsonLd } from "../../jsonld.js";
|
|
3
|
+
async function extractProductSchema(html, _url) {
|
|
4
|
+
if (!html) return null;
|
|
5
|
+
const fromJsonLd = tryJsonLd(html);
|
|
6
|
+
if (fromJsonLd) return fromJsonLd;
|
|
7
|
+
return tryOpenGraph(html);
|
|
8
|
+
}
|
|
9
|
+
function tryJsonLd(html) {
|
|
10
|
+
let blocks;
|
|
11
|
+
try {
|
|
12
|
+
blocks = extractJsonLd(html);
|
|
13
|
+
} catch {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
const product = blocks.find((block) => typeIncludes(block["@type"], "product"));
|
|
17
|
+
if (!product) return null;
|
|
18
|
+
const name = stringField(product["name"]);
|
|
19
|
+
if (!name) return null;
|
|
20
|
+
const data = { name };
|
|
21
|
+
const description = stringField(product["description"]);
|
|
22
|
+
if (description) data.description = description;
|
|
23
|
+
const brand = readBrand(product["brand"]);
|
|
24
|
+
if (brand) data.brand = brand;
|
|
25
|
+
const offer = pickOffer(product["offers"]);
|
|
26
|
+
if (offer) {
|
|
27
|
+
const amount = readNumber(offer["price"]);
|
|
28
|
+
const currency = stringField(offer["priceCurrency"]);
|
|
29
|
+
if (amount !== void 0 && currency) {
|
|
30
|
+
data.price = { amount, currency };
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
const sku = stringField(product["sku"]);
|
|
34
|
+
if (sku) data.sku = sku;
|
|
35
|
+
const rating = readRating(product["aggregateRating"]);
|
|
36
|
+
if (rating) data.rating = rating;
|
|
37
|
+
const image = firstImage(product["image"]);
|
|
38
|
+
if (image) data.image = image;
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
function tryOpenGraph(html) {
|
|
42
|
+
let document;
|
|
43
|
+
try {
|
|
44
|
+
({ document } = parseHTML(html));
|
|
45
|
+
} catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
const name = metaContent(document, 'meta[property="og:title"]');
|
|
49
|
+
if (!name) return null;
|
|
50
|
+
const data = { name };
|
|
51
|
+
const description = metaContent(document, 'meta[property="og:description"]');
|
|
52
|
+
if (description) data.description = description;
|
|
53
|
+
const image = metaContent(document, 'meta[property="og:image"]');
|
|
54
|
+
if (image) data.image = image;
|
|
55
|
+
const priceAmt = metaContent(document, 'meta[property="product:price:amount"]');
|
|
56
|
+
const priceCur = metaContent(document, 'meta[property="product:price:currency"]');
|
|
57
|
+
if (priceAmt && priceCur) {
|
|
58
|
+
const amount = Number.parseFloat(priceAmt);
|
|
59
|
+
if (Number.isFinite(amount)) {
|
|
60
|
+
data.price = { amount, currency: priceCur };
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return data;
|
|
64
|
+
}
|
|
65
|
+
function metaContent(document, selector) {
|
|
66
|
+
const el = document.querySelector(selector);
|
|
67
|
+
const content = el?.getAttribute("content")?.trim();
|
|
68
|
+
return content && content.length > 0 ? content : void 0;
|
|
69
|
+
}
|
|
70
|
+
function typeIncludes(raw, want) {
|
|
71
|
+
const target = want.toLowerCase();
|
|
72
|
+
if (typeof raw === "string") return normalizeType(raw) === target;
|
|
73
|
+
if (Array.isArray(raw)) {
|
|
74
|
+
return raw.some((entry) => typeof entry === "string" && normalizeType(entry) === target);
|
|
75
|
+
}
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
function normalizeType(raw) {
|
|
79
|
+
const tail = raw.split(/[/#:]/).pop() ?? raw;
|
|
80
|
+
return tail.toLowerCase();
|
|
81
|
+
}
|
|
82
|
+
function stringField(value) {
|
|
83
|
+
if (typeof value === "string") {
|
|
84
|
+
const trimmed = value.trim();
|
|
85
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
86
|
+
}
|
|
87
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
88
|
+
return String(value);
|
|
89
|
+
}
|
|
90
|
+
return void 0;
|
|
91
|
+
}
|
|
92
|
+
function readNumber(value) {
|
|
93
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
94
|
+
if (typeof value === "string") {
|
|
95
|
+
const parsed = Number.parseFloat(value);
|
|
96
|
+
if (Number.isFinite(parsed)) return parsed;
|
|
97
|
+
}
|
|
98
|
+
return void 0;
|
|
99
|
+
}
|
|
100
|
+
function readBrand(value) {
|
|
101
|
+
if (typeof value === "string") return stringField(value);
|
|
102
|
+
if (Array.isArray(value)) {
|
|
103
|
+
for (const entry of value) {
|
|
104
|
+
const name = readBrand(entry);
|
|
105
|
+
if (name) return name;
|
|
106
|
+
}
|
|
107
|
+
return void 0;
|
|
108
|
+
}
|
|
109
|
+
if (value && typeof value === "object") {
|
|
110
|
+
const name = value["name"];
|
|
111
|
+
return stringField(name);
|
|
112
|
+
}
|
|
113
|
+
return void 0;
|
|
114
|
+
}
|
|
115
|
+
function pickOffer(value) {
|
|
116
|
+
if (Array.isArray(value)) {
|
|
117
|
+
const first = value.find((entry) => entry && typeof entry === "object");
|
|
118
|
+
return first;
|
|
119
|
+
}
|
|
120
|
+
if (value && typeof value === "object") return value;
|
|
121
|
+
return void 0;
|
|
122
|
+
}
|
|
123
|
+
function readRating(value) {
|
|
124
|
+
if (!value || typeof value !== "object") return void 0;
|
|
125
|
+
const obj = value;
|
|
126
|
+
const ratingValue = readNumber(obj["ratingValue"]);
|
|
127
|
+
const reviewCount = readNumber(obj["reviewCount"]) ?? readNumber(obj["ratingCount"]);
|
|
128
|
+
if (ratingValue === void 0) return void 0;
|
|
129
|
+
return { value: ratingValue, count: reviewCount ?? 0 };
|
|
130
|
+
}
|
|
131
|
+
function firstImage(value) {
|
|
132
|
+
if (typeof value === "string") return stringField(value);
|
|
133
|
+
if (Array.isArray(value)) {
|
|
134
|
+
for (const entry of value) {
|
|
135
|
+
const img = firstImage(entry);
|
|
136
|
+
if (img) return img;
|
|
137
|
+
}
|
|
138
|
+
return void 0;
|
|
139
|
+
}
|
|
140
|
+
if (value && typeof value === "object") {
|
|
141
|
+
const url = value["url"];
|
|
142
|
+
return stringField(url);
|
|
143
|
+
}
|
|
144
|
+
return void 0;
|
|
145
|
+
}
|
|
146
|
+
export {
|
|
147
|
+
extractProductSchema
|
|
148
|
+
};
|
|
149
|
+
//# sourceMappingURL=Product.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/Product.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { extractJsonLd } from '../../jsonld.js';\n\nexport interface ProductData {\n name: string;\n description?: string;\n brand?: string;\n price?: { amount: number; currency: string };\n sku?: string;\n rating?: { value: number; count: number };\n image?: string;\n}\n\nexport async function extractProductSchema(html: string, _url: string): Promise<ProductData | null> {\n if (!html) return null;\n\n const fromJsonLd = tryJsonLd(html);\n if (fromJsonLd) return fromJsonLd;\n\n return tryOpenGraph(html);\n}\n\nfunction tryJsonLd(html: string): ProductData | null {\n let blocks: Record<string, unknown>[];\n try {\n blocks = extractJsonLd(html);\n } catch {\n return null;\n }\n\n const product = blocks.find((block) => typeIncludes(block['@type'], 'product'));\n if (!product) return null;\n\n const name = stringField(product['name']);\n if (!name) return null;\n\n const data: ProductData = { name };\n\n const description = stringField(product['description']);\n if (description) data.description = description;\n\n const brand = readBrand(product['brand']);\n if (brand) data.brand = brand;\n\n const offer = pickOffer(product['offers']);\n if (offer) {\n const amount = readNumber(offer['price']);\n const currency = stringField(offer['priceCurrency']);\n if (amount !== undefined && currency) {\n data.price = { amount, currency };\n }\n }\n\n const sku = stringField(product['sku']);\n if (sku) data.sku = sku;\n\n const rating = readRating(product['aggregateRating']);\n if (rating) data.rating = rating;\n\n const image = firstImage(product['image']);\n if (image) data.image = image;\n\n return data;\n}\n\nfunction tryOpenGraph(html: string): ProductData | null {\n let document: Document;\n try {\n ({ document } = parseHTML(html));\n } catch {\n return null;\n }\n\n const name = metaContent(document, 'meta[property=\"og:title\"]');\n if (!name) return null;\n\n const data: ProductData = { name };\n const description = metaContent(document, 'meta[property=\"og:description\"]');\n if (description) data.description = description;\n\n const image = metaContent(document, 'meta[property=\"og:image\"]');\n if (image) data.image = image;\n\n const priceAmt = metaContent(document, 'meta[property=\"product:price:amount\"]');\n const priceCur = metaContent(document, 'meta[property=\"product:price:currency\"]');\n if (priceAmt && priceCur) {\n const amount = Number.parseFloat(priceAmt);\n if (Number.isFinite(amount)) {\n data.price = { amount, currency: priceCur };\n }\n }\n\n return data;\n}\n\nfunction metaContent(document: Document, selector: string): string | undefined {\n const el = document.querySelector(selector);\n const content = el?.getAttribute('content')?.trim();\n return content && content.length > 0 ? content : undefined;\n}\n\nfunction typeIncludes(raw: unknown, want: string): boolean {\n const target = want.toLowerCase();\n if (typeof raw === 'string') return normalizeType(raw) === target;\n if (Array.isArray(raw)) {\n return raw.some((entry) => typeof entry === 'string' && normalizeType(entry) === target);\n }\n return false;\n}\n\nfunction normalizeType(raw: string): string {\n const tail = raw.split(/[/#:]/).pop() ?? raw;\n return tail.toLowerCase();\n}\n\nfunction stringField(value: unknown): string | undefined {\n if (typeof value === 'string') {\n const trimmed = value.trim();\n return trimmed.length > 0 ? trimmed : undefined;\n }\n if (typeof value === 'number' && Number.isFinite(value)) {\n return String(value);\n }\n return undefined;\n}\n\nfunction readNumber(value: unknown): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value)) return value;\n if (typeof value === 'string') {\n const parsed = Number.parseFloat(value);\n if (Number.isFinite(parsed)) return parsed;\n }\n return undefined;\n}\n\nfunction readBrand(value: unknown): string | undefined {\n if (typeof value === 'string') return stringField(value);\n if (Array.isArray(value)) {\n for (const entry of value) {\n const name = readBrand(entry);\n if (name) return name;\n }\n return undefined;\n }\n if (value && typeof value === 'object') {\n const name = (value as Record<string, unknown>)['name'];\n return stringField(name);\n }\n return undefined;\n}\n\nfunction pickOffer(value: unknown): Record<string, unknown> | undefined {\n if (Array.isArray(value)) {\n const first = value.find((entry) => entry && typeof entry === 'object');\n return first as Record<string, unknown> | undefined;\n }\n if (value && typeof value === 'object') return value as Record<string, unknown>;\n return undefined;\n}\n\nfunction readRating(value: unknown): { value: number; count: number } | undefined {\n if (!value || typeof value !== 'object') return undefined;\n const obj = value as Record<string, unknown>;\n const ratingValue = readNumber(obj['ratingValue']);\n const reviewCount = readNumber(obj['reviewCount']) ?? readNumber(obj['ratingCount']);\n if (ratingValue === undefined) return undefined;\n return { value: ratingValue, count: reviewCount ?? 0 };\n}\n\nfunction firstImage(value: unknown): string | undefined {\n if (typeof value === 'string') return stringField(value);\n if (Array.isArray(value)) {\n for (const entry of value) {\n const img = firstImage(entry);\n if (img) return img;\n }\n return undefined;\n }\n if (value && typeof value === 'object') {\n const url = (value as Record<string, unknown>)['url'];\n return stringField(url);\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,qBAAqB;AAY9B,eAAsB,qBAAqB,MAAc,MAA2C;AAClG,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,aAAa,UAAU,IAAI;AACjC,MAAI,WAAY,QAAO;AAEvB,SAAO,aAAa,IAAI;AAC1B;AAEA,SAAS,UAAU,MAAkC;AACnD,MAAI;AACJ,MAAI;AACF,aAAS,cAAc,IAAI;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,UAAU,OAAO,KAAK,CAAC,UAAU,aAAa,MAAM,OAAO,GAAG,SAAS,CAAC;AAC9E,MAAI,CAAC,QAAS,QAAO;AAErB,QAAM,OAAO,YAAY,QAAQ,MAAM,CAAC;AACxC,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,OAAoB,EAAE,KAAK;AAEjC,QAAM,cAAc,YAAY,QAAQ,aAAa,CAAC;AACtD,MAAI,YAAa,MAAK,cAAc;AAEpC,QAAM,QAAQ,UAAU,QAAQ,OAAO,CAAC;AACxC,MAAI,MAAO,MAAK,QAAQ;AAExB,QAAM,QAAQ,UAAU,QAAQ,QAAQ,CAAC;AACzC,MAAI,OAAO;AACT,UAAM,SAAS,WAAW,MAAM,OAAO,CAAC;AACxC,UAAM,WAAW,YAAY,MAAM,eAAe,CAAC;AACnD,QAAI,WAAW,UAAa,UAAU;AACpC,WAAK,QAAQ,EAAE,QAAQ,SAAS;AAAA,IAClC;AAAA,EACF;AAEA,QAAM,MAAM,YAAY,QAAQ,KAAK,CAAC;AACtC,MAAI,IAAK,MAAK,MAAM;AAEpB,QAAM,SAAS,WAAW,QAAQ,iBAAiB,CAAC;AACpD,MAAI,OAAQ,MAAK,SAAS;AAE1B,QAAM,QAAQ,WAAW,QAAQ,OAAO,CAAC;AACzC,MAAI,MAAO,MAAK,QAAQ;AAExB,SAAO;AACT;AAEA,SAAS,aAAa,MAAkC;AACtD,MAAI;AACJ,MAAI;AACF,KAAC,EAAE,SAAS,IAAI,UAAU,IAAI;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,OAAO,YAAY,UAAU,2BAA2B;AAC9D,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,OAAoB,EAAE,KAAK;AACjC,QAAM,cAAc,YAAY,UAAU,iCAAiC;AAC3E,MAAI,YAAa,MAAK,cAAc;AAEpC,QAAM,QAAQ,YAAY,UAAU,2BAA2B;AAC/D,MAAI,MAAO,MAAK,QAAQ;AAExB,QAAM,WAAW,YAAY,UAAU,uCAAuC;AAC9E,QAAM,WAAW,YAAY,UAAU,yCAAyC;AAChF,MAAI,YAAY,UAAU;AACxB,UAAM,SAAS,OAAO,WAAW,QAAQ;AACzC,QAAI,OAAO,SAAS,MAAM,GAAG;AAC3B,WAAK,QAAQ,EAAE,QAAQ,UAAU,SAAS;AAAA,IAC5C;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,YAAY,UAAoB,UAAsC;AAC7E,QAAM,KAAK,SAAS,cAAc,QAAQ;AAC1C,QAAM,UAAU,IAAI,aAAa,SAAS,GAAG,KAAK;AAClD,SAAO,WAAW,QAAQ,SAAS,IAAI,UAAU;AACnD;AAEA,SAAS,aAAa,KAAc,MAAuB;AACzD,QAAM,SAAS,KAAK,YAAY;AAChC,MAAI,OAAO,QAAQ,SAAU,QAAO,cAAc,GAAG,MAAM;AAC3D,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,KAAK,CAAC,UAAU,OAAO,UAAU,YAAY,cAAc,KAAK,MAAM,MAAM;AAAA,EACzF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,OAAO,IAAI,MAAM,OAAO,EAAE,IAAI,KAAK;AACzC,SAAO,KAAK,YAAY;AAC1B;AAEA,SAAS,YAAY,OAAoC;AACvD,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,UAAU,MAAM,KAAK;AAC3B,WAAO,QAAQ,SAAS,IAAI,UAAU;AAAA,EACxC;AACA,MAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,GAAG;AACvD,WAAO,OAAO,KAAK;AAAA,EACrB;AACA,SAAO;AACT;AAEA,SAAS,WAAW,OAAoC;AACtD,MAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,EAAG,QAAO;AAChE,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,SAAS,OAAO,WAAW,KAAK;AACtC,QAAI,OAAO,SAAS,MAAM,EAAG,QAAO;AAAA,EACtC;AACA,SAAO;AACT;AAEA,SAAS,UAAU,OAAoC;AACrD,MAAI,OAAO,UAAU,SAAU,QAAO,YAAY,KAAK;AACvD,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,OAAO,UAAU,KAAK;AAC5B,UAAI,KAAM,QAAO;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,OAAQ,MAAkC,MAAM;AACtD,WAAO,YAAY,IAAI;AAAA,EACzB;AACA,SAAO;AACT;AAEA,SAAS,UAAU,OAAqD;AACtE,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,UAAM,QAAQ,MAAM,KAAK,CAAC,UAAU,SAAS,OAAO,UAAU,QAAQ;AACtE,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,SAAU,QAAO;AAC/C,SAAO;AACT;AAEA,SAAS,WAAW,OAA8D;AAChF,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,MAAM;AACZ,QAAM,cAAc,WAAW,IAAI,aAAa,CAAC;AACjD,QAAM,cAAc,WAAW,IAAI,aAAa,CAAC,KAAK,WAAW,IAAI,aAAa,CAAC;AACnF,MAAI,gBAAgB,OAAW,QAAO;AACtC,SAAO,EAAE,OAAO,aAAa,OAAO,eAAe,EAAE;AACvD;AAEA,SAAS,WAAW,OAAoC;AACtD,MAAI,OAAO,UAAU,SAAU,QAAO,YAAY,KAAK;AACvD,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,MAAM,WAAW,KAAK;AAC5B,UAAI,IAAK,QAAO;AAAA,IAClB;AACA,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,MAAO,MAAkC,KAAK;AACpD,WAAO,YAAY,GAAG;AAAA,EACxB;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface RecipeData {
|
|
2
|
+
name: string;
|
|
3
|
+
description?: string;
|
|
4
|
+
ingredients: string[];
|
|
5
|
+
instructions: string[];
|
|
6
|
+
totalTime?: string;
|
|
7
|
+
prepTime?: string;
|
|
8
|
+
cookTime?: string;
|
|
9
|
+
recipeYield?: string;
|
|
10
|
+
author?: string;
|
|
11
|
+
date?: string;
|
|
12
|
+
}
|
|
13
|
+
export declare function extractRecipeSchema(html: string, url: string): Promise<RecipeData | null>;
|
|
14
|
+
//# sourceMappingURL=Recipe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Recipe.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/Recipe.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,wBAAsB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAO/F"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { extractJsonLd } from "../../jsonld.js";
|
|
2
|
+
import { extractRecipe } from "../recipe.js";
|
|
3
|
+
async function extractRecipeSchema(html, url) {
|
|
4
|
+
if (!html) return null;
|
|
5
|
+
const fromJsonLd = tryJsonLd(html);
|
|
6
|
+
if (fromJsonLd) return fromJsonLd;
|
|
7
|
+
return tryMarkdownFallback(html, url);
|
|
8
|
+
}
|
|
9
|
+
function tryJsonLd(html) {
|
|
10
|
+
let blocks;
|
|
11
|
+
try {
|
|
12
|
+
blocks = extractJsonLd(html);
|
|
13
|
+
} catch {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
const recipe = blocks.find((block) => typeIncludes(block["@type"], "recipe"));
|
|
17
|
+
if (!recipe) return null;
|
|
18
|
+
const name = stringField(recipe["name"]);
|
|
19
|
+
if (!name) return null;
|
|
20
|
+
const ingredients = stringArray(recipe["recipeIngredient"]);
|
|
21
|
+
const instructions = readInstructions(recipe["recipeInstructions"]);
|
|
22
|
+
const data = {
|
|
23
|
+
name,
|
|
24
|
+
ingredients,
|
|
25
|
+
instructions
|
|
26
|
+
};
|
|
27
|
+
const description = stringField(recipe["description"]);
|
|
28
|
+
if (description) data.description = description;
|
|
29
|
+
const totalTime = stringField(recipe["totalTime"]);
|
|
30
|
+
if (totalTime) data.totalTime = totalTime;
|
|
31
|
+
const prepTime = stringField(recipe["prepTime"]);
|
|
32
|
+
if (prepTime) data.prepTime = prepTime;
|
|
33
|
+
const cookTime = stringField(recipe["cookTime"]);
|
|
34
|
+
if (cookTime) data.cookTime = cookTime;
|
|
35
|
+
const recipeYield = stringField(recipe["recipeYield"]);
|
|
36
|
+
if (recipeYield) data.recipeYield = recipeYield;
|
|
37
|
+
const author = readAuthor(recipe["author"]);
|
|
38
|
+
if (author) data.author = author;
|
|
39
|
+
const date = stringField(recipe["datePublished"]);
|
|
40
|
+
if (date) data.date = date;
|
|
41
|
+
return data;
|
|
42
|
+
}
|
|
43
|
+
async function tryMarkdownFallback(html, url) {
|
|
44
|
+
const result = await extractRecipe(html, url);
|
|
45
|
+
if (!result) return null;
|
|
46
|
+
const name = (result.title ?? "").trim();
|
|
47
|
+
if (!name) return null;
|
|
48
|
+
const ingredients = parseSection(result.markdown, "## Ingredients");
|
|
49
|
+
const instructions = parseNumberedSection(result.markdown, "## Instructions");
|
|
50
|
+
const data = {
|
|
51
|
+
name,
|
|
52
|
+
ingredients,
|
|
53
|
+
instructions
|
|
54
|
+
};
|
|
55
|
+
if (result.metadata.description) data.description = result.metadata.description;
|
|
56
|
+
if (result.metadata.author) data.author = result.metadata.author;
|
|
57
|
+
if (result.metadata.date) data.date = result.metadata.date;
|
|
58
|
+
return data;
|
|
59
|
+
}
|
|
60
|
+
function parseSection(markdown, header) {
|
|
61
|
+
if (!markdown) return [];
|
|
62
|
+
const idx = markdown.indexOf(header);
|
|
63
|
+
if (idx < 0) return [];
|
|
64
|
+
const rest = markdown.slice(idx + header.length);
|
|
65
|
+
const stop = rest.indexOf("\n## ");
|
|
66
|
+
const slice = stop >= 0 ? rest.slice(0, stop) : rest;
|
|
67
|
+
const out = [];
|
|
68
|
+
for (const line of slice.split("\n")) {
|
|
69
|
+
const trimmed = line.trim();
|
|
70
|
+
if (trimmed.startsWith("- ")) {
|
|
71
|
+
out.push(trimmed.slice(2).trim());
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return out;
|
|
75
|
+
}
|
|
76
|
+
function parseNumberedSection(markdown, header) {
|
|
77
|
+
if (!markdown) return [];
|
|
78
|
+
const idx = markdown.indexOf(header);
|
|
79
|
+
if (idx < 0) return [];
|
|
80
|
+
const rest = markdown.slice(idx + header.length);
|
|
81
|
+
const stop = rest.indexOf("\n## ");
|
|
82
|
+
const slice = stop >= 0 ? rest.slice(0, stop) : rest;
|
|
83
|
+
const out = [];
|
|
84
|
+
for (const line of slice.split("\n")) {
|
|
85
|
+
const trimmed = line.trim();
|
|
86
|
+
const m = /^\d+\.\s+(.*)$/.exec(trimmed);
|
|
87
|
+
if (m) out.push(m[1].trim());
|
|
88
|
+
}
|
|
89
|
+
return out;
|
|
90
|
+
}
|
|
91
|
+
function typeIncludes(raw, want) {
|
|
92
|
+
const target = want.toLowerCase();
|
|
93
|
+
if (typeof raw === "string") return normalizeType(raw) === target;
|
|
94
|
+
if (Array.isArray(raw)) {
|
|
95
|
+
return raw.some((entry) => typeof entry === "string" && normalizeType(entry) === target);
|
|
96
|
+
}
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
function normalizeType(raw) {
|
|
100
|
+
const tail = raw.split(/[/#:]/).pop() ?? raw;
|
|
101
|
+
return tail.toLowerCase();
|
|
102
|
+
}
|
|
103
|
+
function stringField(value) {
|
|
104
|
+
if (typeof value !== "string") return void 0;
|
|
105
|
+
const trimmed = value.trim();
|
|
106
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
107
|
+
}
|
|
108
|
+
function stringArray(value) {
|
|
109
|
+
if (!Array.isArray(value)) return [];
|
|
110
|
+
const out = [];
|
|
111
|
+
for (const entry of value) {
|
|
112
|
+
if (typeof entry === "string") {
|
|
113
|
+
const trimmed = entry.trim();
|
|
114
|
+
if (trimmed) out.push(trimmed);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return out;
|
|
118
|
+
}
|
|
119
|
+
function readInstructions(value) {
|
|
120
|
+
if (typeof value === "string") {
|
|
121
|
+
const trimmed = value.trim();
|
|
122
|
+
return trimmed ? [trimmed] : [];
|
|
123
|
+
}
|
|
124
|
+
if (!Array.isArray(value)) return [];
|
|
125
|
+
const out = [];
|
|
126
|
+
for (const entry of value) {
|
|
127
|
+
if (typeof entry === "string") {
|
|
128
|
+
const trimmed = entry.trim();
|
|
129
|
+
if (trimmed) out.push(trimmed);
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
if (entry && typeof entry === "object") {
|
|
133
|
+
const text = entry["text"];
|
|
134
|
+
if (typeof text === "string") {
|
|
135
|
+
const trimmed = text.trim();
|
|
136
|
+
if (trimmed) out.push(trimmed);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return out;
|
|
141
|
+
}
|
|
142
|
+
function readAuthor(value) {
|
|
143
|
+
if (typeof value === "string") return stringField(value);
|
|
144
|
+
if (Array.isArray(value)) {
|
|
145
|
+
for (const entry of value) {
|
|
146
|
+
const name = readAuthor(entry);
|
|
147
|
+
if (name) return name;
|
|
148
|
+
}
|
|
149
|
+
return void 0;
|
|
150
|
+
}
|
|
151
|
+
if (value && typeof value === "object") {
|
|
152
|
+
const name = value["name"];
|
|
153
|
+
if (typeof name === "string") return stringField(name);
|
|
154
|
+
}
|
|
155
|
+
return void 0;
|
|
156
|
+
}
|
|
157
|
+
export {
|
|
158
|
+
extractRecipeSchema
|
|
159
|
+
};
|
|
160
|
+
//# sourceMappingURL=Recipe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/Recipe.ts"],"sourcesContent":["import { extractJsonLd } from '../../jsonld.js';\nimport { extractRecipe } from '../recipe.js';\n\nexport interface RecipeData {\n name: string;\n description?: string;\n ingredients: string[];\n instructions: string[];\n totalTime?: string;\n prepTime?: string;\n cookTime?: string;\n recipeYield?: string;\n author?: string;\n date?: string;\n}\n\nexport async function extractRecipeSchema(html: string, url: string): Promise<RecipeData | null> {\n if (!html) return null;\n\n const fromJsonLd = tryJsonLd(html);\n if (fromJsonLd) return fromJsonLd;\n\n return tryMarkdownFallback(html, url);\n}\n\nfunction tryJsonLd(html: string): RecipeData | null {\n let blocks: Record<string, unknown>[];\n try {\n blocks = extractJsonLd(html);\n } catch {\n return null;\n }\n\n const recipe = blocks.find((block) => typeIncludes(block['@type'], 'recipe'));\n if (!recipe) return null;\n\n const name = stringField(recipe['name']);\n if (!name) return null;\n\n const ingredients = stringArray(recipe['recipeIngredient']);\n const instructions = readInstructions(recipe['recipeInstructions']);\n\n const data: RecipeData = {\n name,\n ingredients,\n instructions,\n };\n const description = stringField(recipe['description']);\n if (description) data.description = description;\n const totalTime = stringField(recipe['totalTime']);\n if (totalTime) data.totalTime = totalTime;\n const prepTime = stringField(recipe['prepTime']);\n if (prepTime) data.prepTime = prepTime;\n const cookTime = stringField(recipe['cookTime']);\n if (cookTime) data.cookTime = cookTime;\n const recipeYield = stringField(recipe['recipeYield']);\n if (recipeYield) data.recipeYield = recipeYield;\n const author = readAuthor(recipe['author']);\n if (author) data.author = author;\n const date = stringField(recipe['datePublished']);\n if (date) data.date = date;\n\n return data;\n}\n\nasync function tryMarkdownFallback(html: string, url: string): Promise<RecipeData | null> {\n const result = await extractRecipe(html, url);\n if (!result) return null;\n\n const name = (result.title ?? '').trim();\n if (!name) return null;\n\n const ingredients = parseSection(result.markdown, '## Ingredients');\n const instructions = parseNumberedSection(result.markdown, '## Instructions');\n\n const data: RecipeData = {\n name,\n ingredients,\n instructions,\n };\n if (result.metadata.description) data.description = result.metadata.description;\n if (result.metadata.author) data.author = result.metadata.author;\n if (result.metadata.date) data.date = result.metadata.date;\n\n return data;\n}\n\nfunction parseSection(markdown: string, header: string): string[] {\n if (!markdown) return [];\n const idx = markdown.indexOf(header);\n if (idx < 0) return [];\n const rest = markdown.slice(idx + header.length);\n const stop = rest.indexOf('\\n## ');\n const slice = stop >= 0 ? rest.slice(0, stop) : rest;\n const out: string[] = [];\n for (const line of slice.split('\\n')) {\n const trimmed = line.trim();\n if (trimmed.startsWith('- ')) {\n out.push(trimmed.slice(2).trim());\n }\n }\n return out;\n}\n\nfunction parseNumberedSection(markdown: string, header: string): string[] {\n if (!markdown) return [];\n const idx = markdown.indexOf(header);\n if (idx < 0) return [];\n const rest = markdown.slice(idx + header.length);\n const stop = rest.indexOf('\\n## ');\n const slice = stop >= 0 ? rest.slice(0, stop) : rest;\n const out: string[] = [];\n for (const line of slice.split('\\n')) {\n const trimmed = line.trim();\n const m = /^\\d+\\.\\s+(.*)$/.exec(trimmed);\n if (m) out.push(m[1].trim());\n }\n return out;\n}\n\nfunction typeIncludes(raw: unknown, want: string): boolean {\n const target = want.toLowerCase();\n if (typeof raw === 'string') return normalizeType(raw) === target;\n if (Array.isArray(raw)) {\n return raw.some((entry) => typeof entry === 'string' && normalizeType(entry) === target);\n }\n return false;\n}\n\nfunction normalizeType(raw: string): string {\n const tail = raw.split(/[/#:]/).pop() ?? raw;\n return tail.toLowerCase();\n}\n\nfunction stringField(value: unknown): string | undefined {\n if (typeof value !== 'string') return undefined;\n const trimmed = value.trim();\n return trimmed.length > 0 ? trimmed : undefined;\n}\n\nfunction stringArray(value: unknown): string[] {\n if (!Array.isArray(value)) return [];\n const out: string[] = [];\n for (const entry of value) {\n if (typeof entry === 'string') {\n const trimmed = entry.trim();\n if (trimmed) out.push(trimmed);\n }\n }\n return out;\n}\n\nfunction readInstructions(value: unknown): string[] {\n if (typeof value === 'string') {\n const trimmed = value.trim();\n return trimmed ? [trimmed] : [];\n }\n if (!Array.isArray(value)) return [];\n const out: string[] = [];\n for (const entry of value) {\n if (typeof entry === 'string') {\n const trimmed = entry.trim();\n if (trimmed) out.push(trimmed);\n continue;\n }\n if (entry && typeof entry === 'object') {\n const text = (entry as Record<string, unknown>)['text'];\n if (typeof text === 'string') {\n const trimmed = text.trim();\n if (trimmed) out.push(trimmed);\n }\n }\n }\n return out;\n}\n\nfunction readAuthor(value: unknown): string | undefined {\n if (typeof value === 'string') return stringField(value);\n if (Array.isArray(value)) {\n for (const entry of value) {\n const name = readAuthor(entry);\n if (name) return name;\n }\n return undefined;\n }\n if (value && typeof value === 'object') {\n const name = (value as Record<string, unknown>)['name'];\n if (typeof name === 'string') return stringField(name);\n }\n return undefined;\n}\n"],"mappings":"AAAA,SAAS,qBAAqB;AAC9B,SAAS,qBAAqB;AAe9B,eAAsB,oBAAoB,MAAc,KAAyC;AAC/F,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,aAAa,UAAU,IAAI;AACjC,MAAI,WAAY,QAAO;AAEvB,SAAO,oBAAoB,MAAM,GAAG;AACtC;AAEA,SAAS,UAAU,MAAiC;AAClD,MAAI;AACJ,MAAI;AACF,aAAS,cAAc,IAAI;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO,KAAK,CAAC,UAAU,aAAa,MAAM,OAAO,GAAG,QAAQ,CAAC;AAC5E,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,OAAO,YAAY,OAAO,MAAM,CAAC;AACvC,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,cAAc,YAAY,OAAO,kBAAkB,CAAC;AAC1D,QAAM,eAAe,iBAAiB,OAAO,oBAAoB,CAAC;AAElE,QAAM,OAAmB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,cAAc,YAAY,OAAO,aAAa,CAAC;AACrD,MAAI,YAAa,MAAK,cAAc;AACpC,QAAM,YAAY,YAAY,OAAO,WAAW,CAAC;AACjD,MAAI,UAAW,MAAK,YAAY;AAChC,QAAM,WAAW,YAAY,OAAO,UAAU,CAAC;AAC/C,MAAI,SAAU,MAAK,WAAW;AAC9B,QAAM,WAAW,YAAY,OAAO,UAAU,CAAC;AAC/C,MAAI,SAAU,MAAK,WAAW;AAC9B,QAAM,cAAc,YAAY,OAAO,aAAa,CAAC;AACrD,MAAI,YAAa,MAAK,cAAc;AACpC,QAAM,SAAS,WAAW,OAAO,QAAQ,CAAC;AAC1C,MAAI,OAAQ,MAAK,SAAS;AAC1B,QAAM,OAAO,YAAY,OAAO,eAAe,CAAC;AAChD,MAAI,KAAM,MAAK,OAAO;AAEtB,SAAO;AACT;AAEA,eAAe,oBAAoB,MAAc,KAAyC;AACxF,QAAM,SAAS,MAAM,cAAc,MAAM,GAAG;AAC5C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,OAAO,SAAS,IAAI,KAAK;AACvC,MAAI,CAAC,KAAM,QAAO;AAElB,QAAM,cAAc,aAAa,OAAO,UAAU,gBAAgB;AAClE,QAAM,eAAe,qBAAqB,OAAO,UAAU,iBAAiB;AAE5E,QAAM,OAAmB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,MAAI,OAAO,SAAS,YAAa,MAAK,cAAc,OAAO,SAAS;AACpE,MAAI,OAAO,SAAS,OAAQ,MAAK,SAAS,OAAO,SAAS;AAC1D,MAAI,OAAO,SAAS,KAAM,MAAK,OAAO,OAAO,SAAS;AAEtD,SAAO;AACT;AAEA,SAAS,aAAa,UAAkB,QAA0B;AAChE,MAAI,CAAC,SAAU,QAAO,CAAC;AACvB,QAAM,MAAM,SAAS,QAAQ,MAAM;AACnC,MAAI,MAAM,EAAG,QAAO,CAAC;AACrB,QAAM,OAAO,SAAS,MAAM,MAAM,OAAO,MAAM;AAC/C,QAAM,OAAO,KAAK,QAAQ,OAAO;AACjC,QAAM,QAAQ,QAAQ,IAAI,KAAK,MAAM,GAAG,IAAI,IAAI;AAChD,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,MAAM,MAAM,IAAI,GAAG;AACpC,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,QAAQ,WAAW,IAAI,GAAG;AAC5B,UAAI,KAAK,QAAQ,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAClC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,UAAkB,QAA0B;AACxE,MAAI,CAAC,SAAU,QAAO,CAAC;AACvB,QAAM,MAAM,SAAS,QAAQ,MAAM;AACnC,MAAI,MAAM,EAAG,QAAO,CAAC;AACrB,QAAM,OAAO,SAAS,MAAM,MAAM,OAAO,MAAM;AAC/C,QAAM,OAAO,KAAK,QAAQ,OAAO;AACjC,QAAM,QAAQ,QAAQ,IAAI,KAAK,MAAM,GAAG,IAAI,IAAI;AAChD,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,MAAM,MAAM,IAAI,GAAG;AACpC,UAAM,UAAU,KAAK,KAAK;AAC1B,UAAM,IAAI,iBAAiB,KAAK,OAAO;AACvC,QAAI,EAAG,KAAI,KAAK,EAAE,CAAC,EAAE,KAAK,CAAC;AAAA,EAC7B;AACA,SAAO;AACT;AAEA,SAAS,aAAa,KAAc,MAAuB;AACzD,QAAM,SAAS,KAAK,YAAY;AAChC,MAAI,OAAO,QAAQ,SAAU,QAAO,cAAc,GAAG,MAAM;AAC3D,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,KAAK,CAAC,UAAU,OAAO,UAAU,YAAY,cAAc,KAAK,MAAM,MAAM;AAAA,EACzF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,OAAO,IAAI,MAAM,OAAO,EAAE,IAAI,KAAK;AACzC,SAAO,KAAK,YAAY;AAC1B;AAEA,SAAS,YAAY,OAAoC;AACvD,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,QAAM,UAAU,MAAM,KAAK;AAC3B,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,YAAY,OAA0B;AAC7C,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,OAAO;AACzB,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,UAAU,MAAM,KAAK;AAC3B,UAAI,QAAS,KAAI,KAAK,OAAO;AAAA,IAC/B;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,iBAAiB,OAA0B;AAClD,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,UAAU,MAAM,KAAK;AAC3B,WAAO,UAAU,CAAC,OAAO,IAAI,CAAC;AAAA,EAChC;AACA,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,OAAO;AACzB,QAAI,OAAO,UAAU,UAAU;AAC7B,YAAM,UAAU,MAAM,KAAK;AAC3B,UAAI,QAAS,KAAI,KAAK,OAAO;AAC7B;AAAA,IACF;AACA,QAAI,SAAS,OAAO,UAAU,UAAU;AACtC,YAAM,OAAQ,MAAkC,MAAM;AACtD,UAAI,OAAO,SAAS,UAAU;AAC5B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,QAAS,KAAI,KAAK,OAAO;AAAA,MAC/B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WAAW,OAAoC;AACtD,MAAI,OAAO,UAAU,SAAU,QAAO,YAAY,KAAK;AACvD,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,eAAW,SAAS,OAAO;AACzB,YAAM,OAAO,WAAW,KAAK;AAC7B,UAAI,KAAM,QAAO;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AACA,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,OAAQ,MAAkC,MAAM;AACtD,QAAI,OAAO,SAAS,SAAU,QAAO,YAAY,IAAI;AAAA,EACvD;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { NamedSchemaType } from '../../../types.js';
|
|
2
|
+
import { type ArticleData } from './Article.js';
|
|
3
|
+
import { type RecipeData } from './Recipe.js';
|
|
4
|
+
import { type ProductData } from './Product.js';
|
|
5
|
+
import { type CodeSnippetData } from './CodeSnippet.js';
|
|
6
|
+
import { type PaperData } from './Paper.js';
|
|
7
|
+
import { type EventListingData } from './EventListing.js';
|
|
8
|
+
export type { NamedSchemaType } from '../../../types.js';
|
|
9
|
+
export type NamedSchemaData = ArticleData | RecipeData | ProductData | CodeSnippetData | PaperData | EventListingData;
|
|
10
|
+
export declare const NAMED_SCHEMAS: readonly NamedSchemaType[];
|
|
11
|
+
export declare function isNamedSchemaType(s: string): s is NamedSchemaType;
|
|
12
|
+
export declare function extractNamedSchema(schema: NamedSchemaType, html: string, url: string): Promise<NamedSchemaData | null>;
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/extraction/v1/schemas/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAkB,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAChE,OAAO,EAAuB,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,EAAwB,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AACtE,OAAO,EAAsB,KAAK,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAC5E,OAAO,EAAgB,KAAK,SAAS,EAAE,MAAM,YAAY,CAAC;AAC1D,OAAO,EAAuB,KAAK,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAE/E,YAAY,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEzD,MAAM,MAAM,eAAe,GACvB,WAAW,GACX,UAAU,GACV,WAAW,GACX,eAAe,GACf,SAAS,GACT,gBAAgB,CAAC;AAErB,eAAO,MAAM,aAAa,EAAE,SAAS,eAAe,EAO1C,CAAC;AAEX,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,IAAI,eAAe,CAEjE;AAED,wBAAsB,kBAAkB,CACtC,MAAM,EAAE,eAAe,EACvB,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC,CAoBjC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { extractArticle } from "./Article.js";
|
|
2
|
+
import { extractRecipeSchema } from "./Recipe.js";
|
|
3
|
+
import { extractProductSchema } from "./Product.js";
|
|
4
|
+
import { extractCodeSnippet } from "./CodeSnippet.js";
|
|
5
|
+
import { extractPaper } from "./Paper.js";
|
|
6
|
+
import { extractEventListing } from "./EventListing.js";
|
|
7
|
+
const NAMED_SCHEMAS = [
|
|
8
|
+
"Article",
|
|
9
|
+
"Recipe",
|
|
10
|
+
"Product",
|
|
11
|
+
"CodeSnippet",
|
|
12
|
+
"Paper",
|
|
13
|
+
"EventListing"
|
|
14
|
+
];
|
|
15
|
+
function isNamedSchemaType(s) {
|
|
16
|
+
return NAMED_SCHEMAS.includes(s);
|
|
17
|
+
}
|
|
18
|
+
async function extractNamedSchema(schema, html, url) {
|
|
19
|
+
switch (schema) {
|
|
20
|
+
case "Article":
|
|
21
|
+
return extractArticle(html, url);
|
|
22
|
+
case "Recipe":
|
|
23
|
+
return extractRecipeSchema(html, url);
|
|
24
|
+
case "Product":
|
|
25
|
+
return extractProductSchema(html, url);
|
|
26
|
+
case "CodeSnippet":
|
|
27
|
+
return extractCodeSnippet(html, url);
|
|
28
|
+
case "Paper":
|
|
29
|
+
return extractPaper(html, url);
|
|
30
|
+
case "EventListing":
|
|
31
|
+
return extractEventListing(html, url);
|
|
32
|
+
default: {
|
|
33
|
+
const _exhaustive = schema;
|
|
34
|
+
void _exhaustive;
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
export {
|
|
40
|
+
NAMED_SCHEMAS,
|
|
41
|
+
extractNamedSchema,
|
|
42
|
+
isNamedSchemaType
|
|
43
|
+
};
|
|
44
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/extraction/v1/schemas/index.ts"],"sourcesContent":["import type { NamedSchemaType } from '../../../types.js';\nimport { extractArticle, type ArticleData } from './Article.js';\nimport { extractRecipeSchema, type RecipeData } from './Recipe.js';\nimport { extractProductSchema, type ProductData } from './Product.js';\nimport { extractCodeSnippet, type CodeSnippetData } from './CodeSnippet.js';\nimport { extractPaper, type PaperData } from './Paper.js';\nimport { extractEventListing, type EventListingData } from './EventListing.js';\n\nexport type { NamedSchemaType } from '../../../types.js';\n\nexport type NamedSchemaData =\n | ArticleData\n | RecipeData\n | ProductData\n | CodeSnippetData\n | PaperData\n | EventListingData;\n\nexport const NAMED_SCHEMAS: readonly NamedSchemaType[] = [\n 'Article',\n 'Recipe',\n 'Product',\n 'CodeSnippet',\n 'Paper',\n 'EventListing',\n] as const;\n\nexport function isNamedSchemaType(s: string): s is NamedSchemaType {\n return (NAMED_SCHEMAS as readonly string[]).includes(s);\n}\n\nexport async function extractNamedSchema(\n schema: NamedSchemaType,\n html: string,\n url: string,\n): Promise<NamedSchemaData | null> {\n switch (schema) {\n case 'Article':\n return extractArticle(html, url);\n case 'Recipe':\n return extractRecipeSchema(html, url);\n case 'Product':\n return extractProductSchema(html, url);\n case 'CodeSnippet':\n return extractCodeSnippet(html, url);\n case 'Paper':\n return extractPaper(html, url);\n case 'EventListing':\n return extractEventListing(html, url);\n default: {\n const _exhaustive: never = schema;\n void _exhaustive;\n return null;\n }\n }\n}\n"],"mappings":"AACA,SAAS,sBAAwC;AACjD,SAAS,2BAA4C;AACrD,SAAS,4BAA8C;AACvD,SAAS,0BAAgD;AACzD,SAAS,oBAAoC;AAC7C,SAAS,2BAAkD;AAYpD,MAAM,gBAA4C;AAAA,EACvD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,kBAAkB,GAAiC;AACjE,SAAQ,cAAoC,SAAS,CAAC;AACxD;AAEA,eAAsB,mBACpB,QACA,MACA,KACiC;AACjC,UAAQ,QAAQ;AAAA,IACd,KAAK;AACH,aAAO,eAAe,MAAM,GAAG;AAAA,IACjC,KAAK;AACH,aAAO,oBAAoB,MAAM,GAAG;AAAA,IACtC,KAAK;AACH,aAAO,qBAAqB,MAAM,GAAG;AAAA,IACvC,KAAK;AACH,aAAO,mBAAmB,MAAM,GAAG;AAAA,IACrC,KAAK;AACH,aAAO,aAAa,MAAM,GAAG;AAAA,IAC/B,KAAK;AACH,aAAO,oBAAoB,MAAM,GAAG;AAAA,IACtC,SAAS;AACP,YAAM,cAAqB;AAC3B,WAAK;AACL,aAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Extractor } from '../../types.js';
|
|
2
|
+
export declare function registerSiteExtractor(extractor: Extractor): void;
|
|
3
|
+
export declare function getSiteExtractors(): readonly Extractor[];
|
|
4
|
+
export declare function _resetSiteExtractorsForTest(): void;
|
|
5
|
+
//# sourceMappingURL=site-extractors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"site-extractors.d.ts","sourceRoot":"","sources":["../../../src/extraction/v1/site-extractors.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAiBhD,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAEhE;AAED,wBAAgB,iBAAiB,IAAI,SAAS,SAAS,EAAE,CAExD;AAED,wBAAgB,2BAA2B,IAAI,IAAI,CAQlD"}
|