mixdog 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +31 -0
- package/.claude-plugin/plugin.json +20 -0
- package/.gitattributes +34 -0
- package/.mcp.json +14 -0
- package/ARCHITECTURE.md +77 -0
- package/CHANGELOG.md +7 -0
- package/CONTRIBUTING.md +45 -0
- package/DATA-FLOW.md +79 -0
- package/LICENSE +21 -0
- package/README.md +389 -0
- package/SECURITY.md +138 -0
- package/UNINSTALL.md +112 -0
- package/agents/maintenance.md +5 -0
- package/agents/memory-classification.md +30 -0
- package/agents/scheduler-task.md +18 -0
- package/agents/webhook-handler.md +27 -0
- package/agents/worker.md +24 -0
- package/bin/bridge +133 -0
- package/bin/statusline-launcher.mjs +78 -0
- package/bin/statusline-lib.mjs +550 -0
- package/bin/statusline.mjs +607 -0
- package/bun.lock +802 -0
- package/commands/config.md +16 -0
- package/commands/doctor.md +13 -0
- package/commands/setup.md +17 -0
- package/defaults/cycle3-review-prompt.md +90 -0
- package/defaults/hidden-roles.json +65 -0
- package/defaults/memory-chunk-prompt.md +63 -0
- package/defaults/memory-promote-prompt.md +135 -0
- package/defaults/mixdog-config.template.json +27 -0
- package/defaults/user-workflow.json +8 -0
- package/defaults/user-workflow.md +12 -0
- package/hooks/hooks.json +73 -0
- package/hooks/lib/active-instance.cjs +77 -0
- package/hooks/lib/permission-evaluator.cjs +411 -0
- package/hooks/lib/permission-route.cjs +63 -0
- package/hooks/lib/permission-rules.cjs +170 -0
- package/hooks/lib/settings-loader.cjs +116 -0
- package/hooks/post-tool-use.cjs +84 -0
- package/hooks/pre-mcp-sandbox.cjs +158 -0
- package/hooks/pre-tool-subagent.cjs +253 -0
- package/hooks/session-start.cjs +1372 -0
- package/hooks/turn-timer.cjs +82 -0
- package/lib/claude-md-writer.cjs +386 -0
- package/lib/config-cjs.cjs +61 -0
- package/lib/hook-pipe-path.cjs +10 -0
- package/lib/keychain-cjs.cjs +263 -0
- package/lib/plugin-paths.cjs +61 -0
- package/lib/rules-builder.cjs +241 -0
- package/lib/text-utils.cjs +61 -0
- package/native/README.md +117 -0
- package/native/prebuilt/linux-aarch64/mixdog-shim +0 -0
- package/native/prebuilt/linux-x86_64/mixdog-shim +0 -0
- package/native/prebuilt/macos-aarch64/mixdog-shim +0 -0
- package/native/prebuilt/macos-x86_64/mixdog-shim +0 -0
- package/native/prebuilt/windows-x86_64/mixdog-shim.exe +0 -0
- package/package.json +107 -0
- package/prompts/code-review.txt +16 -0
- package/prompts/security-audit.txt +17 -0
- package/rules/bridge/00-common.md +39 -0
- package/rules/bridge/20-skip-protocol.md +18 -0
- package/rules/bridge/30-explorer.md +33 -0
- package/rules/bridge/40-cycle1-agent.md +52 -0
- package/rules/bridge/41-cycle2-agent.md +62 -0
- package/rules/bridge/42-cycle3-agent.md +44 -0
- package/rules/lead/00-tool-lead.md +61 -0
- package/rules/lead/01-general.md +23 -0
- package/rules/lead/02-channels.md +49 -0
- package/rules/lead/03-team.md +27 -0
- package/rules/lead/04-workflow.md +20 -0
- package/rules/shared/00-language.md +14 -0
- package/rules/shared/01-tool.md +138 -0
- package/scripts/bootstrap.mjs +184 -0
- package/scripts/bridge-unify-smoke.mjs +308 -0
- package/scripts/build-runtime-linux.sh +348 -0
- package/scripts/build-runtime-macos.sh +217 -0
- package/scripts/build-runtime-windows.ps1 +242 -0
- package/scripts/builtin-utils-smoke.mjs +392 -0
- package/scripts/check-json.mjs +45 -0
- package/scripts/check-syntax-changed.mjs +102 -0
- package/scripts/check-syntax.mjs +58 -0
- package/scripts/code-graph-batch.test.mjs +33 -0
- package/scripts/config-preserve-smoke.mjs +180 -0
- package/scripts/doctor.mjs +484 -0
- package/scripts/edit-normalize-fuzz.mjs +130 -0
- package/scripts/edit-normalize-smoke.mjs +401 -0
- package/scripts/edit-operation-smoke.mjs +369 -0
- package/scripts/edit2-smoke.mjs +63 -0
- package/scripts/fuzzy-e2e.mjs +28 -0
- package/scripts/fuzzy-smoke.mjs +26 -0
- package/scripts/generate-runtime-manifest.mjs +166 -0
- package/scripts/guard-smoke.mjs +66 -0
- package/scripts/hidden-role-schema-smoke.mjs +162 -0
- package/scripts/hook-routing-smoke.mjs +29 -0
- package/scripts/inject-input.ps1 +204 -0
- package/scripts/io-complex-smoke.mjs +667 -0
- package/scripts/io-explore-bench.mjs +424 -0
- package/scripts/io-guardrails-smoke.mjs +205 -0
- package/scripts/io-mini-bench-baseline.json +11 -0
- package/scripts/io-mini-bench.mjs +216 -0
- package/scripts/io-route-harness.mjs +933 -0
- package/scripts/io-telemetry-report.mjs +691 -0
- package/scripts/mutation-bench.mjs +564 -0
- package/scripts/mutation-io-smoke.mjs +1081 -0
- package/scripts/native-patch-bridge-smoke.mjs +288 -0
- package/scripts/native-patch-smoke.mjs +304 -0
- package/scripts/patch-interior-context-smoke.mjs +49 -0
- package/scripts/patch-newline-utf8-smoke.mjs +157 -0
- package/scripts/perf-hook-smoke.mjs +71 -0
- package/scripts/permission-eval-smoke.mjs +426 -0
- package/scripts/prep-patch.mjs +53 -0
- package/scripts/prep-shim.mjs +96 -0
- package/scripts/provider-cache-smoke.mjs +687 -0
- package/scripts/report-runtime-health.mjs +132 -0
- package/scripts/run-mcp.mjs +1547 -0
- package/scripts/salvage-v4a-shatter.test.mjs +58 -0
- package/scripts/scoped-cache-io-smoke.mjs +103 -0
- package/scripts/shell-policy-round3-smoke.mjs +46 -0
- package/scripts/smoke-runtime-negative.ps1 +100 -0
- package/scripts/smoke-runtime-negative.sh +95 -0
- package/scripts/stall-policy-smoke.mjs +50 -0
- package/scripts/start-memory-worker.mjs +23 -0
- package/scripts/statusline-launcher-smoke.mjs +82 -0
- package/scripts/stress-atomic-write.mjs +1028 -0
- package/scripts/test-config-rmw-restore.mjs +122 -0
- package/scripts/test-fault-inject.mjs +164 -0
- package/scripts/test-large-file.mjs +174 -0
- package/scripts/tool-edge-smoke.mjs +209 -0
- package/scripts/uninstall.mjs +201 -0
- package/scripts/webhook-selfheal-smoke.mjs +29 -0
- package/scripts/write-overwrite-guard-smoke.mjs +56 -0
- package/server-main.mjs +3055 -0
- package/server.mjs +468 -0
- package/setup/config-merge.mjs +254 -0
- package/setup/install.mjs +120 -0
- package/setup/launch-core.mjs +507 -0
- package/setup/launch.mjs +101 -0
- package/setup/setup-server.mjs +3206 -0
- package/setup/setup.html +3693 -0
- package/skills/retro-skill-proposer/SKILL.md +92 -0
- package/skills/schedule-add/SKILL.md +77 -0
- package/skills/setup/SKILL.md +346 -0
- package/skills/webhook-add/SKILL.md +81 -0
- package/src/agent/bridge-stall-watchdog.mjs +337 -0
- package/src/agent/index.mjs +2138 -0
- package/src/agent/orchestrator/activity-bus.mjs +38 -0
- package/src/agent/orchestrator/ai-wrapped-dispatch.mjs +1010 -0
- package/src/agent/orchestrator/bridge-retry.mjs +220 -0
- package/src/agent/orchestrator/bridge-trace.mjs +583 -0
- package/src/agent/orchestrator/cache-mtime.mjs +58 -0
- package/src/agent/orchestrator/config.mjs +358 -0
- package/src/agent/orchestrator/context/collect.mjs +651 -0
- package/src/agent/orchestrator/dispatch-persist.mjs +549 -0
- package/src/agent/orchestrator/drain-registry.mjs +50 -0
- package/src/agent/orchestrator/explore-validator.mjs +8 -0
- package/src/agent/orchestrator/internal-roles.mjs +118 -0
- package/src/agent/orchestrator/internal-tools.mjs +88 -0
- package/src/agent/orchestrator/jobs.mjs +116 -0
- package/src/agent/orchestrator/mcp/client.mjs +364 -0
- package/src/agent/orchestrator/providers/anthropic-betas.mjs +21 -0
- package/src/agent/orchestrator/providers/anthropic-oauth.mjs +1745 -0
- package/src/agent/orchestrator/providers/anthropic.mjs +437 -0
- package/src/agent/orchestrator/providers/gemini.mjs +1175 -0
- package/src/agent/orchestrator/providers/grok-oauth.mjs +782 -0
- package/src/agent/orchestrator/providers/model-catalog.mjs +241 -0
- package/src/agent/orchestrator/providers/openai-compat.mjs +1467 -0
- package/src/agent/orchestrator/providers/openai-oauth-ws.mjs +1890 -0
- package/src/agent/orchestrator/providers/openai-oauth.mjs +1307 -0
- package/src/agent/orchestrator/providers/openai-ws.mjs +104 -0
- package/src/agent/orchestrator/providers/registry.mjs +192 -0
- package/src/agent/orchestrator/providers/retry-classifier.mjs +325 -0
- package/src/agent/orchestrator/session/abort-lookup.mjs +13 -0
- package/src/agent/orchestrator/session/cache/post-edit-marks.mjs +42 -0
- package/src/agent/orchestrator/session/cache/prefetch-cache.mjs +142 -0
- package/src/agent/orchestrator/session/cache/read-cache.mjs +319 -0
- package/src/agent/orchestrator/session/cache/scoped-cache-outcome.mjs +11 -0
- package/src/agent/orchestrator/session/cache/scoped-cache.mjs +361 -0
- package/src/agent/orchestrator/session/cache/util.mjs +49 -0
- package/src/agent/orchestrator/session/loop.mjs +1478 -0
- package/src/agent/orchestrator/session/manager.mjs +1975 -0
- package/src/agent/orchestrator/session/read-dedup.mjs +6 -0
- package/src/agent/orchestrator/session/result-classification.mjs +65 -0
- package/src/agent/orchestrator/session/save-session-worker.mjs +18 -0
- package/src/agent/orchestrator/session/store.mjs +624 -0
- package/src/agent/orchestrator/session/stream-watchdog.mjs +130 -0
- package/src/agent/orchestrator/session/tool-result-offload.mjs +166 -0
- package/src/agent/orchestrator/session/trim.mjs +491 -0
- package/src/agent/orchestrator/smart-bridge/CACHE-SHARD.md +115 -0
- package/src/agent/orchestrator/smart-bridge/bridge-llm.mjs +327 -0
- package/src/agent/orchestrator/smart-bridge/cache-obs.mjs +150 -0
- package/src/agent/orchestrator/smart-bridge/cache-strategy.mjs +228 -0
- package/src/agent/orchestrator/smart-bridge/index.mjs +215 -0
- package/src/agent/orchestrator/smart-bridge/profiles.mjs +37 -0
- package/src/agent/orchestrator/smart-bridge/registry.mjs +348 -0
- package/src/agent/orchestrator/smart-bridge/session-builder.mjs +116 -0
- package/src/agent/orchestrator/stall-policy.mjs +195 -0
- package/src/agent/orchestrator/tool-loop-guard.mjs +75 -0
- package/src/agent/orchestrator/tools/bash-policy-scan.mjs +77 -0
- package/src/agent/orchestrator/tools/bash-session.mjs +721 -0
- package/src/agent/orchestrator/tools/builtin/advisory-lock.mjs +171 -0
- package/src/agent/orchestrator/tools/builtin/arg-guard.mjs +455 -0
- package/src/agent/orchestrator/tools/builtin/atomic-write.mjs +236 -0
- package/src/agent/orchestrator/tools/builtin/bash-tool.mjs +480 -0
- package/src/agent/orchestrator/tools/builtin/binary-file.mjs +76 -0
- package/src/agent/orchestrator/tools/builtin/builtin-tools.mjs +256 -0
- package/src/agent/orchestrator/tools/builtin/cache-layers.mjs +386 -0
- package/src/agent/orchestrator/tools/builtin/cwd-utils.mjs +37 -0
- package/src/agent/orchestrator/tools/builtin/device-paths.mjs +154 -0
- package/src/agent/orchestrator/tools/builtin/diagnostics-tool.mjs +292 -0
- package/src/agent/orchestrator/tools/builtin/diff-utils.mjs +109 -0
- package/src/agent/orchestrator/tools/builtin/edit-base-guard.mjs +58 -0
- package/src/agent/orchestrator/tools/builtin/edit-byte-plan.mjs +240 -0
- package/src/agent/orchestrator/tools/builtin/edit-byte-utils.mjs +113 -0
- package/src/agent/orchestrator/tools/builtin/edit-commit.mjs +74 -0
- package/src/agent/orchestrator/tools/builtin/edit-context-utils.mjs +242 -0
- package/src/agent/orchestrator/tools/builtin/edit-diagnostics.mjs +211 -0
- package/src/agent/orchestrator/tools/builtin/edit-engine.mjs +1364 -0
- package/src/agent/orchestrator/tools/builtin/edit-failure-context.mjs +126 -0
- package/src/agent/orchestrator/tools/builtin/edit-hint.mjs +141 -0
- package/src/agent/orchestrator/tools/builtin/edit-match-utils.mjs +194 -0
- package/src/agent/orchestrator/tools/builtin/edit-partial-write.mjs +60 -0
- package/src/agent/orchestrator/tools/builtin/edit-stale-refresh.mjs +168 -0
- package/src/agent/orchestrator/tools/builtin/edit-tool.mjs +173 -0
- package/src/agent/orchestrator/tools/builtin/edit-utf8-guard.mjs +48 -0
- package/src/agent/orchestrator/tools/builtin/fs-reachability.mjs +48 -0
- package/src/agent/orchestrator/tools/builtin/fuzzy-match.mjs +99 -0
- package/src/agent/orchestrator/tools/builtin/glob-walk.mjs +170 -0
- package/src/agent/orchestrator/tools/builtin/grep-formatting.mjs +113 -0
- package/src/agent/orchestrator/tools/builtin/hash-utils.mjs +6 -0
- package/src/agent/orchestrator/tools/builtin/list-formatting.mjs +7 -0
- package/src/agent/orchestrator/tools/builtin/list-tool.mjs +593 -0
- package/src/agent/orchestrator/tools/builtin/native-edit-runner.mjs +89 -0
- package/src/agent/orchestrator/tools/builtin/notebook-edit-tool.mjs +300 -0
- package/src/agent/orchestrator/tools/builtin/open-config-tool.mjs +26 -0
- package/src/agent/orchestrator/tools/builtin/path-diagnostics.mjs +152 -0
- package/src/agent/orchestrator/tools/builtin/path-locks.mjs +35 -0
- package/src/agent/orchestrator/tools/builtin/path-utils.mjs +201 -0
- package/src/agent/orchestrator/tools/builtin/read-args.mjs +103 -0
- package/src/agent/orchestrator/tools/builtin/read-batch.mjs +172 -0
- package/src/agent/orchestrator/tools/builtin/read-constants.mjs +40 -0
- package/src/agent/orchestrator/tools/builtin/read-formatting.mjs +118 -0
- package/src/agent/orchestrator/tools/builtin/read-image-resize.mjs +189 -0
- package/src/agent/orchestrator/tools/builtin/read-image.mjs +88 -0
- package/src/agent/orchestrator/tools/builtin/read-lines.mjs +12 -0
- package/src/agent/orchestrator/tools/builtin/read-mode-tool.mjs +455 -0
- package/src/agent/orchestrator/tools/builtin/read-open.mjs +190 -0
- package/src/agent/orchestrator/tools/builtin/read-range-index.mjs +271 -0
- package/src/agent/orchestrator/tools/builtin/read-ranges.mjs +26 -0
- package/src/agent/orchestrator/tools/builtin/read-single-tool.mjs +728 -0
- package/src/agent/orchestrator/tools/builtin/read-snapshot-runtime.mjs +173 -0
- package/src/agent/orchestrator/tools/builtin/read-special-files.mjs +268 -0
- package/src/agent/orchestrator/tools/builtin/read-streaming.mjs +602 -0
- package/src/agent/orchestrator/tools/builtin/read-tool.mjs +530 -0
- package/src/agent/orchestrator/tools/builtin/read-windows.mjs +107 -0
- package/src/agent/orchestrator/tools/builtin/rename-tool.mjs +196 -0
- package/src/agent/orchestrator/tools/builtin/rg-runner.mjs +422 -0
- package/src/agent/orchestrator/tools/builtin/search-builders.mjs +158 -0
- package/src/agent/orchestrator/tools/builtin/search-tool.mjs +869 -0
- package/src/agent/orchestrator/tools/builtin/shell-analysis.mjs +653 -0
- package/src/agent/orchestrator/tools/builtin/shell-jobs.mjs +936 -0
- package/src/agent/orchestrator/tools/builtin/shell-output.mjs +36 -0
- package/src/agent/orchestrator/tools/builtin/shell-runtime.mjs +214 -0
- package/src/agent/orchestrator/tools/builtin/snapshot-helpers.mjs +143 -0
- package/src/agent/orchestrator/tools/builtin/snapshot-store.mjs +206 -0
- package/src/agent/orchestrator/tools/builtin/snapshot-validation.mjs +98 -0
- package/src/agent/orchestrator/tools/builtin/text-stats.mjs +69 -0
- package/src/agent/orchestrator/tools/builtin/windows-roots.mjs +23 -0
- package/src/agent/orchestrator/tools/builtin/write-tool.mjs +401 -0
- package/src/agent/orchestrator/tools/builtin.mjs +500 -0
- package/src/agent/orchestrator/tools/code-graph-prewarm-worker.mjs +39 -0
- package/src/agent/orchestrator/tools/code-graph-tool-defs.mjs +24 -0
- package/src/agent/orchestrator/tools/code-graph.mjs +4095 -0
- package/src/agent/orchestrator/tools/cwd-tool.mjs +298 -0
- package/src/agent/orchestrator/tools/destructive-warning.mjs +323 -0
- package/src/agent/orchestrator/tools/edit-normalize.mjs +603 -0
- package/src/agent/orchestrator/tools/env-scrub.mjs +100 -0
- package/src/agent/orchestrator/tools/graph-binary-fetcher.mjs +144 -0
- package/src/agent/orchestrator/tools/graph-manifest.json +26 -0
- package/src/agent/orchestrator/tools/host-input.mjs +204 -0
- package/src/agent/orchestrator/tools/mutation-content-cache.mjs +67 -0
- package/src/agent/orchestrator/tools/mutation-planner.mjs +75 -0
- package/src/agent/orchestrator/tools/next-call-utils.mjs +48 -0
- package/src/agent/orchestrator/tools/patch-binary-fetcher.mjs +133 -0
- package/src/agent/orchestrator/tools/patch-manifest.json +26 -0
- package/src/agent/orchestrator/tools/patch-tool-defs.mjs +20 -0
- package/src/agent/orchestrator/tools/patch.mjs +2754 -0
- package/src/agent/orchestrator/tools/progress-message.mjs +118 -0
- package/src/agent/orchestrator/tools/result-compression.mjs +279 -0
- package/src/agent/orchestrator/tools/shell-command.mjs +865 -0
- package/src/agent/orchestrator/tools/shell-exec-policy.mjs +89 -0
- package/src/agent/orchestrator/tools/shell-policy-danger-target.mjs +27 -0
- package/src/agent/orchestrator/tools/shell-policy-imports.mjs +7 -0
- package/src/agent/orchestrator/tools/shell-policy.mjs +345 -0
- package/src/agent/orchestrator/tools/shell-snapshot.mjs +313 -0
- package/src/agent/orchestrator/workflow-store.mjs +93 -0
- package/src/agent/tool-defs.mjs +103 -0
- package/src/channels/backends/discord.mjs +784 -0
- package/src/channels/data/voice-runtime-manifest.json +138 -0
- package/src/channels/index.mjs +3229 -0
- package/src/channels/lib/cli-worker-host.mjs +12 -0
- package/src/channels/lib/config-lock.mjs +13 -0
- package/src/channels/lib/config.mjs +292 -0
- package/src/channels/lib/drop-trace.mjs +71 -0
- package/src/channels/lib/event-pipeline.mjs +81 -0
- package/src/channels/lib/event-queue.mjs +345 -0
- package/src/channels/lib/executor.mjs +168 -0
- package/src/channels/lib/format.mjs +188 -0
- package/src/channels/lib/holidays.mjs +138 -0
- package/src/channels/lib/hook-pipe-server.mjs +802 -0
- package/src/channels/lib/interaction-workflows.mjs +184 -0
- package/src/channels/lib/memory-client.mjs +149 -0
- package/src/channels/lib/output-forwarder.mjs +765 -0
- package/src/channels/lib/runtime-paths.mjs +479 -0
- package/src/channels/lib/scheduler.mjs +723 -0
- package/src/channels/lib/session-control.mjs +36 -0
- package/src/channels/lib/session-discovery.mjs +103 -0
- package/src/channels/lib/settings.mjs +11 -0
- package/src/channels/lib/state-file.mjs +68 -0
- package/src/channels/lib/status-snapshot.mjs +219 -0
- package/src/channels/lib/tool-format.mjs +140 -0
- package/src/channels/lib/transcript-discovery.mjs +195 -0
- package/src/channels/lib/voice-runtime-fetcher.mjs +734 -0
- package/src/channels/lib/webhook.mjs +1179 -0
- package/src/channels/lib/whisper-server.mjs +477 -0
- package/src/channels/tool-defs.mjs +170 -0
- package/src/daemon/host.mjs +118 -0
- package/src/daemon/mcp-transport.mjs +47 -0
- package/src/daemon/session.mjs +100 -0
- package/src/daemon/thin-client.mjs +71 -0
- package/src/daemon/transport.mjs +163 -0
- package/src/memory/data/runtime-manifest.json +40 -0
- package/src/memory/index.mjs +3305 -0
- package/src/memory/lib/agent-ipc.mjs +93 -0
- package/src/memory/lib/bridge-trace-queries.mjs +120 -0
- package/src/memory/lib/core-memory-store.mjs +330 -0
- package/src/memory/lib/embedding-provider.mjs +269 -0
- package/src/memory/lib/embedding-worker.mjs +323 -0
- package/src/memory/lib/llm-worker-host.mjs +17 -0
- package/src/memory/lib/memory-cycle.mjs +11 -0
- package/src/memory/lib/memory-cycle1.mjs +641 -0
- package/src/memory/lib/memory-cycle2.mjs +1284 -0
- package/src/memory/lib/memory-cycle3.mjs +540 -0
- package/src/memory/lib/memory-embed.mjs +299 -0
- package/src/memory/lib/memory-extraction.mjs +5 -0
- package/src/memory/lib/memory-maintenance-store.mjs +32 -0
- package/src/memory/lib/memory-ops-policy.mjs +190 -0
- package/src/memory/lib/memory-recall-id-patch.mjs +15 -0
- package/src/memory/lib/memory-recall-read-query.mjs +7 -0
- package/src/memory/lib/memory-recall-scope-filter.mjs +63 -0
- package/src/memory/lib/memory-recall-store.mjs +621 -0
- package/src/memory/lib/memory-retrievers.mjs +112 -0
- package/src/memory/lib/memory-score.mjs +71 -0
- package/src/memory/lib/memory-text-utils.mjs +58 -0
- package/src/memory/lib/memory.mjs +412 -0
- package/src/memory/lib/model-profile.mjs +85 -0
- package/src/memory/lib/pg/adapter.mjs +308 -0
- package/src/memory/lib/pg/process.mjs +360 -0
- package/src/memory/lib/pg/supervisor.mjs +396 -0
- package/src/memory/lib/project-id-resolver.mjs +86 -0
- package/src/memory/lib/runtime-fetcher.mjs +442 -0
- package/src/memory/lib/trace-store.mjs +728 -0
- package/src/memory/tool-defs.mjs +79 -0
- package/src/search/index.mjs +1173 -0
- package/src/search/lib/backends/anthropic-oauth.mjs +98 -0
- package/src/search/lib/backends/exa.mjs +50 -0
- package/src/search/lib/backends/firecrawl.mjs +61 -0
- package/src/search/lib/backends/gemini-api.mjs +83 -0
- package/src/search/lib/backends/grok-oauth.mjs +86 -0
- package/src/search/lib/backends/index.mjs +150 -0
- package/src/search/lib/backends/openai-api.mjs +144 -0
- package/src/search/lib/backends/openai-oauth.mjs +98 -0
- package/src/search/lib/backends/openai-web-search.mjs +76 -0
- package/src/search/lib/backends/tavily.mjs +55 -0
- package/src/search/lib/backends/xai-api.mjs +113 -0
- package/src/search/lib/cache.mjs +131 -0
- package/src/search/lib/config.mjs +192 -0
- package/src/search/lib/formatter.mjs +115 -0
- package/src/search/lib/provider-usage.mjs +67 -0
- package/src/search/lib/providers.mjs +47 -0
- package/src/search/lib/search-intent.mjs +109 -0
- package/src/search/lib/setup-handler.mjs +261 -0
- package/src/search/lib/state.mjs +201 -0
- package/src/search/lib/web-tools.mjs +1207 -0
- package/src/search/tool-defs.mjs +83 -0
- package/src/setup/defender-exclusion.mjs +183 -0
- package/src/shared/abort-controller.mjs +15 -0
- package/src/shared/atomic-file.mjs +420 -0
- package/src/shared/config.mjs +350 -0
- package/src/shared/daemon-recycle.mjs +108 -0
- package/src/shared/disable-claude-builtins.mjs +88 -0
- package/src/shared/err-text.mjs +12 -0
- package/src/shared/llm/cost.mjs +66 -0
- package/src/shared/llm/http-agent.mjs +123 -0
- package/src/shared/llm/index.mjs +41 -0
- package/src/shared/llm/pid-cleanup.mjs +27 -0
- package/src/shared/llm/usage-log.mjs +47 -0
- package/src/shared/plugin-paths.mjs +58 -0
- package/src/shared/schedules-store.mjs +70 -0
- package/src/shared/seed.mjs +119 -0
- package/src/shared/user-cwd.mjs +213 -0
- package/src/shared/user-data-guard.mjs +238 -0
- package/src/status/aggregator.mjs +584 -0
- package/src/status/server.mjs +413 -0
- package/tools.json +1653 -0
|
@@ -0,0 +1,1173 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js'
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'
|
|
5
|
+
import { z } from 'zod'
|
|
6
|
+
import { zodToJsonSchema } from 'zod-to-json-schema'
|
|
7
|
+
import fs from 'fs'
|
|
8
|
+
import path from 'path'
|
|
9
|
+
import {
|
|
10
|
+
ensureDataDir,
|
|
11
|
+
getFirecrawlApiKey,
|
|
12
|
+
getRequestTimeoutMs,
|
|
13
|
+
getRawSearchMaxResults,
|
|
14
|
+
getRawProviderCredentialSource,
|
|
15
|
+
getRawProviderApiKey,
|
|
16
|
+
loadConfig,
|
|
17
|
+
PLUGIN_ROOT,
|
|
18
|
+
} from './lib/config.mjs'
|
|
19
|
+
import { normalizeErrorMessage } from '../agent/orchestrator/tools/builtin/path-diagnostics.mjs'
|
|
20
|
+
import { getAgentApiKey } from '../shared/config.mjs'
|
|
21
|
+
|
|
22
|
+
function readPluginVersion() {
|
|
23
|
+
try {
|
|
24
|
+
const manifestPath = path.join(PLUGIN_ROOT, '.claude-plugin', 'plugin.json')
|
|
25
|
+
return JSON.parse(fs.readFileSync(manifestPath, 'utf8')).version || '0.0.1'
|
|
26
|
+
} catch { return '0.0.1' }
|
|
27
|
+
}
|
|
28
|
+
const PLUGIN_VERSION = readPluginVersion()
|
|
29
|
+
import {
|
|
30
|
+
buildCacheKey,
|
|
31
|
+
buildCacheMeta,
|
|
32
|
+
flushCacheState,
|
|
33
|
+
getCachedEntry,
|
|
34
|
+
loadCacheState,
|
|
35
|
+
setCachedEntry,
|
|
36
|
+
} from './lib/cache.mjs'
|
|
37
|
+
import { fetchProviderUsageSnapshot } from './lib/provider-usage.mjs'
|
|
38
|
+
import {
|
|
39
|
+
flushUsageState,
|
|
40
|
+
loadUsageState,
|
|
41
|
+
noteProviderFailure,
|
|
42
|
+
classifyProviderError,
|
|
43
|
+
noteProviderSuccess,
|
|
44
|
+
saveUsageState,
|
|
45
|
+
updateProviderState,
|
|
46
|
+
} from './lib/state.mjs'
|
|
47
|
+
import {
|
|
48
|
+
getProvidersWithApiKeys,
|
|
49
|
+
RAW_PROVIDER_CAPABILITIES,
|
|
50
|
+
} from './lib/providers.mjs'
|
|
51
|
+
import { dispatchSearchBackend, PROVIDER_CAPS } from './lib/backends/index.mjs'
|
|
52
|
+
import { normalizeSearchIntent } from './lib/search-intent.mjs'
|
|
53
|
+
import { assertPublicUrl, crawlSite, getScrapeCapabilities, pinnedFetch, scrapeUrls } from './lib/web-tools.mjs'
|
|
54
|
+
import { formatResponse } from './lib/formatter.mjs'
|
|
55
|
+
import { handleSetup } from './lib/setup-handler.mjs'
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
ensureDataDir()
|
|
59
|
+
|
|
60
|
+
const searchArgsSchema = z.object({
|
|
61
|
+
keywords: z.union([z.string().min(1), z.array(z.string().min(1)).min(1)]).describe('Search query string or array of queries.'),
|
|
62
|
+
site: z.string().optional().describe('Restrict results to a specific domain.'),
|
|
63
|
+
type: z.enum(['web', 'news', 'images']).optional().describe('Search type. Default: web.'),
|
|
64
|
+
maxResults: z.number().int().min(1).max(20).optional().describe('Maximum number of results to return (1-20).'),
|
|
65
|
+
locale: z.union([
|
|
66
|
+
z.string(),
|
|
67
|
+
z.object({
|
|
68
|
+
country: z.string().optional(),
|
|
69
|
+
language: z.string().optional(),
|
|
70
|
+
region: z.string().optional(),
|
|
71
|
+
city: z.string().optional(),
|
|
72
|
+
timezone: z.string().optional(),
|
|
73
|
+
}),
|
|
74
|
+
]).optional().describe('Explicit search locale. String such as "ko-KR" or object with country/language/city/region/timezone.'),
|
|
75
|
+
contextSize: z.enum(['low', 'medium', 'high']).optional().describe('Search context size for providers that support it. Default: low.'),
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
const searchUrlArgsSchema = z.object({
|
|
79
|
+
url: z.union([z.string().url(), z.array(z.string().url()).min(1)]).describe('Single URL or array of URLs to fetch.'),
|
|
80
|
+
startIndex: z.number().int().min(0).optional().describe('Character offset to start the slice from (default 0). For chunked reading of large pages, pass the previous response\'s nextStartIndex.'),
|
|
81
|
+
maxLength: z.number().int().min(0).optional().describe('Max characters to return per call (default 50000). Pass 0 for unlimited.'),
|
|
82
|
+
cwd: z.string().optional(),
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
const SEARCH_EMPTY_STRING_FIELDS = ['keywords', 'site', 'type', 'locale', 'contextSize']
|
|
86
|
+
|
|
87
|
+
function normalizeSearchArgs(rawArgs) {
|
|
88
|
+
if (!rawArgs || typeof rawArgs !== 'object' || Array.isArray(rawArgs)) return rawArgs
|
|
89
|
+
const args = { ...rawArgs }
|
|
90
|
+
for (const key of SEARCH_EMPTY_STRING_FIELDS) {
|
|
91
|
+
const value = args[key]
|
|
92
|
+
if (typeof value === 'string') {
|
|
93
|
+
const trimmed = value.trim()
|
|
94
|
+
if (!trimmed) delete args[key]
|
|
95
|
+
else args[key] = trimmed
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (Array.isArray(args.keywords)) {
|
|
99
|
+
const keywords = args.keywords
|
|
100
|
+
.map(value => typeof value === 'string' ? value.trim() : value)
|
|
101
|
+
.filter(value => typeof value === 'string' ? value.length > 0 : Boolean(value))
|
|
102
|
+
if (keywords.length > 0) args.keywords = keywords
|
|
103
|
+
else delete args.keywords
|
|
104
|
+
}
|
|
105
|
+
return args
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function normalizeSearchUrlArgs(rawArgs) {
|
|
109
|
+
if (!rawArgs || typeof rawArgs !== 'object' || Array.isArray(rawArgs)) return rawArgs
|
|
110
|
+
const args = { ...rawArgs }
|
|
111
|
+
if (typeof args.url === 'string') args.url = args.url.trim()
|
|
112
|
+
if (Array.isArray(args.url)) {
|
|
113
|
+
const urls = args.url
|
|
114
|
+
.map(value => typeof value === 'string' ? value.trim() : value)
|
|
115
|
+
.filter(value => typeof value === 'string' ? value.length > 0 : Boolean(value))
|
|
116
|
+
if (urls.length > 0) args.url = urls
|
|
117
|
+
else delete args.url
|
|
118
|
+
}
|
|
119
|
+
return args
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const crawlArgsSchema = z.object({
|
|
123
|
+
url: z.string().url().describe('Starting URL to begin crawling from.'),
|
|
124
|
+
maxPages: z.number().int().min(1).max(200).optional().describe('Maximum number of pages to visit (1-200).'),
|
|
125
|
+
maxDepth: z.number().int().min(0).max(5).optional().describe('Maximum link depth to follow (0-5).'),
|
|
126
|
+
sameDomainOnly: z.boolean().optional().describe('If true, only follow links on the same domain.'),
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
function jsonText(payload) {
|
|
130
|
+
return {
|
|
131
|
+
content: [
|
|
132
|
+
{
|
|
133
|
+
type: 'text',
|
|
134
|
+
text: JSON.stringify(payload, null, 2),
|
|
135
|
+
},
|
|
136
|
+
],
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function formattedText(tool, payload) {
|
|
141
|
+
const text = formatResponse(tool, tool === 'search' ? dropInvalidSearchResults(payload) : payload)
|
|
142
|
+
return {
|
|
143
|
+
content: [{ type: 'text', text }],
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function isInvalidSearchResult(result) {
|
|
148
|
+
const title = String(result?.title || '').trim()
|
|
149
|
+
return /\bpage not found\b|\b404\b.*\bnot found\b/i.test(title)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function dropInvalidSearchResults(payload) {
|
|
153
|
+
if (!payload || typeof payload !== 'object') return payload
|
|
154
|
+
const response = payload.response
|
|
155
|
+
if (!response || typeof response !== 'object' || !Array.isArray(response.results)) return payload
|
|
156
|
+
const results = response.results.filter(result => !isInvalidSearchResult(result))
|
|
157
|
+
if (results.length === response.results.length) return payload
|
|
158
|
+
return {
|
|
159
|
+
...payload,
|
|
160
|
+
response: {
|
|
161
|
+
...response,
|
|
162
|
+
results,
|
|
163
|
+
droppedInvalidResults: (response.droppedInvalidResults || 0) + (response.results.length - results.length),
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function buildInputSchema(zodSchema) {
|
|
169
|
+
const jsonSchema = zodToJsonSchema(zodSchema, { target: 'openApi3' })
|
|
170
|
+
delete jsonSchema.$schema
|
|
171
|
+
return jsonSchema
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function getSearchCacheTtlMs(type = 'web') {
|
|
175
|
+
switch (type) {
|
|
176
|
+
case 'news':
|
|
177
|
+
return 20 * 60 * 1000
|
|
178
|
+
case 'images':
|
|
179
|
+
return 60 * 60 * 1000
|
|
180
|
+
case 'web':
|
|
181
|
+
default:
|
|
182
|
+
return 30 * 60 * 1000
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function getScrapeCacheTtlMs(isXRoute = false) {
|
|
187
|
+
return isXRoute ? 10 * 60 * 1000 : 60 * 60 * 1000
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function buildRuntimeEnv(config) {
|
|
191
|
+
return {
|
|
192
|
+
...process.env,
|
|
193
|
+
...(getFirecrawlApiKey(config)
|
|
194
|
+
? { FIRECRAWL_API_KEY: getFirecrawlApiKey(config) }
|
|
195
|
+
: {}),
|
|
196
|
+
...(getRawProviderApiKey(config, 'tavily')
|
|
197
|
+
? { TAVILY_API_KEY: getRawProviderApiKey(config, 'tavily') }
|
|
198
|
+
: {}),
|
|
199
|
+
...(getRawProviderApiKey(config, 'exa')
|
|
200
|
+
? { EXA_API_KEY: getRawProviderApiKey(config, 'exa') }
|
|
201
|
+
: {}),
|
|
202
|
+
// xAI search runs through the xai-api backend, which reads the Agent xAI
|
|
203
|
+
// credential (getAgentApiKey('xai')) — not a separate raw search key. Mirror
|
|
204
|
+
// that source here so the startup snapshot discovers 'xai-api' iff the agent
|
|
205
|
+
// key is present.
|
|
206
|
+
...(getAgentApiKey('xai')
|
|
207
|
+
? { XAI_API_KEY: process.env.XAI_API_KEY || getAgentApiKey('xai') }
|
|
208
|
+
: {}),
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function normalizeCacheUrl(url) {
|
|
213
|
+
try {
|
|
214
|
+
return new URL(url).toString()
|
|
215
|
+
} catch {
|
|
216
|
+
return String(url)
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const DOC_INDEX_MAX_BYTES = 2 * 1024 * 1024
|
|
221
|
+
const DOC_INDEX_MAX_FETCHES = 8
|
|
222
|
+
const DOC_INDEX_COMMON_PATHS = ['docs', 'api', 'reference', 'api/reference']
|
|
223
|
+
const DOC_INDEX_STOPWORDS = new Set([
|
|
224
|
+
'about', 'after', 'again', 'also', 'and', 'are', 'can', 'com', 'doc', 'docs',
|
|
225
|
+
'documentation', 'for', 'from', 'how', 'http', 'https', 'into', 'official',
|
|
226
|
+
'page', 'pages', 'site', 'the', 'this', 'title', 'url', 'use', 'using', 'what',
|
|
227
|
+
'when', 'where', 'which', 'with', 'www',
|
|
228
|
+
])
|
|
229
|
+
|
|
230
|
+
function keywordsText(keywords) {
|
|
231
|
+
return Array.isArray(keywords) ? keywords.join(' ') : String(keywords || '')
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function queryTokens(keywords) {
|
|
235
|
+
const tokens = keywordsText(keywords)
|
|
236
|
+
.toLowerCase()
|
|
237
|
+
.match(/[\p{L}\p{N}][\p{L}\p{N}._-]{1,}/gu) || []
|
|
238
|
+
return [...new Set(tokens
|
|
239
|
+
.filter(token => token.length >= 3 && !DOC_INDEX_STOPWORDS.has(token)))]
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Weighted scoring across title/path/url/snippet. Title hit is the strongest
|
|
243
|
+
// signal (8) because llms.txt entries are hand-curated; path-segment hits
|
|
244
|
+
// (5) and last-segment hits (3..10) catch /api/foo over /blog/foo. Url and
|
|
245
|
+
// snippet (2 / 1) act as tiebreakers when title misses. .md penalty -2 so
|
|
246
|
+
// raw markdown sources lose to rendered docs when both are listed.
|
|
247
|
+
function docLinkScore(link, tokens) {
|
|
248
|
+
if (!tokens.length) return 0
|
|
249
|
+
const title = String(link.title || '').toLowerCase()
|
|
250
|
+
const url = String(link.url || '').toLowerCase()
|
|
251
|
+
const snippet = String(link.snippet || '').toLowerCase()
|
|
252
|
+
let pathname = ''
|
|
253
|
+
try {
|
|
254
|
+
pathname = new URL(link.url).pathname.toLowerCase()
|
|
255
|
+
} catch {}
|
|
256
|
+
const segments = pathname.split('/').filter(Boolean)
|
|
257
|
+
let score = 0
|
|
258
|
+
for (const token of tokens) {
|
|
259
|
+
if (title.includes(token)) score += 8
|
|
260
|
+
if (segments.includes(token)) score += 5
|
|
261
|
+
if (segments.at(-1) === token) score += 3 + Math.max(0, 7 - segments.length)
|
|
262
|
+
if (url.includes(token)) score += 2
|
|
263
|
+
if (snippet.includes(token)) score += 1
|
|
264
|
+
}
|
|
265
|
+
if (/\.md$/i.test(pathname)) score -= 2
|
|
266
|
+
return score
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function docIndexUrlCandidates(site, keywords) {
|
|
270
|
+
if (!site) return []
|
|
271
|
+
let parsed
|
|
272
|
+
try {
|
|
273
|
+
parsed = new URL(/^https?:\/\//i.test(site) ? site : `https://${site}`)
|
|
274
|
+
} catch {
|
|
275
|
+
return []
|
|
276
|
+
}
|
|
277
|
+
const candidates = []
|
|
278
|
+
const add = (url) => {
|
|
279
|
+
try {
|
|
280
|
+
const normalized = new URL(url).toString()
|
|
281
|
+
if (!candidates.includes(normalized)) candidates.push(normalized)
|
|
282
|
+
} catch {}
|
|
283
|
+
}
|
|
284
|
+
const pathParts = parsed.pathname.split('/').filter(Boolean)
|
|
285
|
+
for (let i = pathParts.length; i >= 0; i -= 1) {
|
|
286
|
+
const prefix = pathParts.slice(0, i).join('/')
|
|
287
|
+
add(`${parsed.origin}${prefix ? `/${prefix}` : ''}/llms.txt`)
|
|
288
|
+
}
|
|
289
|
+
// When the user asks an api/docs question on a bare-host site, also probe
|
|
290
|
+
// the common doc-prefix llms.txt locations the host might publish under.
|
|
291
|
+
const docsIntent = /\b(?:api|docs?|documentation|reference)\b/i.test(keywordsText(keywords))
|
|
292
|
+
if (docsIntent && pathParts.length === 0) {
|
|
293
|
+
for (const prefix of DOC_INDEX_COMMON_PATHS) {
|
|
294
|
+
add(`${parsed.origin}/${prefix}/llms.txt`)
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return candidates
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
function docIndexAbortSignal(timeoutMs, parentSignal) {
|
|
302
|
+
const ms = Math.min(Math.max(Number(timeoutMs) || 10_000, 1000), 10_000)
|
|
303
|
+
if (typeof AbortSignal.any === 'function') {
|
|
304
|
+
const parts = [AbortSignal.timeout(ms)]
|
|
305
|
+
if (parentSignal) parts.push(parentSignal)
|
|
306
|
+
return AbortSignal.any(parts)
|
|
307
|
+
}
|
|
308
|
+
const controller = new AbortController()
|
|
309
|
+
let timer
|
|
310
|
+
let onParentAbort
|
|
311
|
+
const abortWith = reason => {
|
|
312
|
+
if (timer !== undefined) {
|
|
313
|
+
clearTimeout(timer)
|
|
314
|
+
timer = undefined
|
|
315
|
+
}
|
|
316
|
+
if (parentSignal && onParentAbort) {
|
|
317
|
+
parentSignal.removeEventListener('abort', onParentAbort)
|
|
318
|
+
onParentAbort = undefined
|
|
319
|
+
}
|
|
320
|
+
if (!controller.signal.aborted) controller.abort(reason)
|
|
321
|
+
}
|
|
322
|
+
timer = setTimeout(
|
|
323
|
+
() => abortWith(new DOMException('The operation was aborted due to timeout', 'TimeoutError')),
|
|
324
|
+
ms,
|
|
325
|
+
)
|
|
326
|
+
if (parentSignal) {
|
|
327
|
+
if (parentSignal.aborted) {
|
|
328
|
+
abortWith(parentSignal.reason)
|
|
329
|
+
return controller.signal
|
|
330
|
+
}
|
|
331
|
+
onParentAbort = () => abortWith(parentSignal.reason)
|
|
332
|
+
parentSignal.addEventListener('abort', onParentAbort, { once: true })
|
|
333
|
+
}
|
|
334
|
+
return controller.signal
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function searchArgsForCacheKey(args, config) {
|
|
338
|
+
const caps = PROVIDER_CAPS[config.provider] || { searchTypes: ['web'], localeMode: 'tool' }
|
|
339
|
+
let keywords = args.keywords
|
|
340
|
+
if (Array.isArray(keywords)) {
|
|
341
|
+
const items = keywords.map(k => String(k || '').trim()).filter(Boolean)
|
|
342
|
+
keywords = items.length === 1 ? items[0] : items
|
|
343
|
+
}
|
|
344
|
+
const intent = normalizeSearchIntent(
|
|
345
|
+
{
|
|
346
|
+
keywords,
|
|
347
|
+
site: args.site,
|
|
348
|
+
type: args.type,
|
|
349
|
+
maxResults: args.maxResults,
|
|
350
|
+
locale: args.locale,
|
|
351
|
+
contextSize: args.contextSize,
|
|
352
|
+
},
|
|
353
|
+
{ caps, defaultMaxResults: getRawSearchMaxResults(config) },
|
|
354
|
+
)
|
|
355
|
+
return {
|
|
356
|
+
keywords: intent.rawQuery,
|
|
357
|
+
site: intent.site || null,
|
|
358
|
+
type: intent.type,
|
|
359
|
+
locale: intent.locale,
|
|
360
|
+
contextSize: intent.contextSize,
|
|
361
|
+
maxResults: intent.maxResults,
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
async function fetchDocIndex(url, timeoutMs, parentSignal) {
|
|
366
|
+
// SSRF: reuse the guarded web_fetch path's public-URL/private-IP check so
|
|
367
|
+
// docs-index discovery cannot be steered into localhost / link-local /
|
|
368
|
+
// cloud-metadata addresses by a hostile site override. Follow redirects
|
|
369
|
+
// manually and re-validate each hop so a 30x Location can't steer us to
|
|
370
|
+
// a private/loopback address after the initial check.
|
|
371
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308])
|
|
372
|
+
const MAX_REDIRECTS = 5
|
|
373
|
+
const signal = docIndexAbortSignal(timeoutMs, parentSignal)
|
|
374
|
+
let currentUrl = url
|
|
375
|
+
let response
|
|
376
|
+
for (let hops = 0; ; hops++) {
|
|
377
|
+
assertPublicUrl(currentUrl)
|
|
378
|
+
// pinnedFetch resolves+validates the host once and pins the connection
|
|
379
|
+
// to the validated IP, closing the validate-then-fetch DNS-rebinding /
|
|
380
|
+
// TOCTOU window that bare `fetch` left open.
|
|
381
|
+
response = await pinnedFetch(currentUrl, {
|
|
382
|
+
headers: { Accept: 'text/markdown,text/plain,text/*,*/*' },
|
|
383
|
+
signal,
|
|
384
|
+
redirect: 'manual',
|
|
385
|
+
})
|
|
386
|
+
if (!REDIRECT_STATUSES.has(response.status)) break
|
|
387
|
+
try { await response.body?.cancel() } catch {}
|
|
388
|
+
if (hops >= MAX_REDIRECTS) {
|
|
389
|
+
throw new Error(`docs index too many redirects (max ${MAX_REDIRECTS})`)
|
|
390
|
+
}
|
|
391
|
+
const location = response.headers.get('location')
|
|
392
|
+
if (!location) {
|
|
393
|
+
throw new Error(`docs index redirect ${response.status} without Location header`)
|
|
394
|
+
}
|
|
395
|
+
currentUrl = new URL(location, currentUrl).toString()
|
|
396
|
+
}
|
|
397
|
+
if (!response.ok) {
|
|
398
|
+
try { await response.body?.cancel() } catch {}
|
|
399
|
+
throw new Error(`docs index fetch failed: ${response.status}`)
|
|
400
|
+
}
|
|
401
|
+
const contentLength = Number(response.headers.get('content-length') || 0)
|
|
402
|
+
if (contentLength > DOC_INDEX_MAX_BYTES) {
|
|
403
|
+
try { await response.body?.cancel() } catch {}
|
|
404
|
+
throw new Error(`docs index too large: ${contentLength}`)
|
|
405
|
+
}
|
|
406
|
+
// Enforce DOC_INDEX_MAX_BYTES while streaming so chunked / missing-length
|
|
407
|
+
// responses can't blow past the 2MB cap by deferring the check until after
|
|
408
|
+
// the whole body is buffered.
|
|
409
|
+
const reader = response.body?.getReader?.()
|
|
410
|
+
let text
|
|
411
|
+
if (!reader) {
|
|
412
|
+
// Buffer as bytes and cap by byte length — string.length counts UTF-16
|
|
413
|
+
// code units, which under-counts multi-byte characters and lets the
|
|
414
|
+
// body blow past DOC_INDEX_MAX_BYTES.
|
|
415
|
+
const buf = new Uint8Array(await response.arrayBuffer())
|
|
416
|
+
const capped = buf.byteLength > DOC_INDEX_MAX_BYTES ? buf.subarray(0, DOC_INDEX_MAX_BYTES) : buf
|
|
417
|
+
text = new TextDecoder('utf-8', { fatal: false }).decode(capped)
|
|
418
|
+
} else {
|
|
419
|
+
const chunks = []
|
|
420
|
+
let total = 0
|
|
421
|
+
let capped = false
|
|
422
|
+
try {
|
|
423
|
+
while (true) {
|
|
424
|
+
const { done, value } = await reader.read()
|
|
425
|
+
if (done) break
|
|
426
|
+
total += value.byteLength
|
|
427
|
+
chunks.push(value)
|
|
428
|
+
if (total >= DOC_INDEX_MAX_BYTES) {
|
|
429
|
+
capped = true
|
|
430
|
+
try { await reader.cancel() } catch {}
|
|
431
|
+
break
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
} finally {
|
|
435
|
+
try { reader.releaseLock() } catch {}
|
|
436
|
+
}
|
|
437
|
+
const decoder = new TextDecoder('utf-8', { fatal: false })
|
|
438
|
+
let buf = ''
|
|
439
|
+
for (const chunk of chunks) buf += decoder.decode(chunk, { stream: true })
|
|
440
|
+
buf += decoder.decode()
|
|
441
|
+
text = capped || buf.length > DOC_INDEX_MAX_BYTES ? buf.slice(0, DOC_INDEX_MAX_BYTES) : buf
|
|
442
|
+
}
|
|
443
|
+
return {
|
|
444
|
+
text,
|
|
445
|
+
url: response.url || url,
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function parseDocIndexLinks(text, sourceUrl) {
|
|
450
|
+
const links = []
|
|
451
|
+
const seen = new Set()
|
|
452
|
+
const add = (title, rawUrl, snippet = '') => {
|
|
453
|
+
if (!title || !rawUrl) return
|
|
454
|
+
let url
|
|
455
|
+
try {
|
|
456
|
+
url = new URL(rawUrl, sourceUrl).toString()
|
|
457
|
+
} catch {
|
|
458
|
+
return
|
|
459
|
+
}
|
|
460
|
+
if (!/^https?:\/\//i.test(url) || seen.has(url)) return
|
|
461
|
+
seen.add(url)
|
|
462
|
+
links.push({
|
|
463
|
+
title: String(title).trim(),
|
|
464
|
+
url,
|
|
465
|
+
snippet: String(snippet || '').trim(),
|
|
466
|
+
sourceUrl,
|
|
467
|
+
})
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
for (const line of String(text || '').split(/\r?\n/)) {
|
|
471
|
+
const item = line.match(/^\s*[-*]\s+\[([^\]]{1,180})\]\(([^)\s]+)\)\s*:?\s*(.*)$/)
|
|
472
|
+
if (item) add(item[1], item[2], item[3])
|
|
473
|
+
}
|
|
474
|
+
const inlineRe = /\[([^\]]{1,180})\]\((https?:\/\/[^)\s]+)\)/g
|
|
475
|
+
let match
|
|
476
|
+
while ((match = inlineRe.exec(String(text || '')))) {
|
|
477
|
+
add(match[1], match[2])
|
|
478
|
+
}
|
|
479
|
+
return links
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
function isDocIndexLink(url) {
|
|
484
|
+
try {
|
|
485
|
+
return /\/llms(?:-full)?\.txt$/i.test(new URL(url).pathname)
|
|
486
|
+
} catch {
|
|
487
|
+
return false
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function hostFromUrl(url) {
|
|
492
|
+
try {
|
|
493
|
+
return new URL(/^https?:\/\//i.test(url) ? url : `https://${url}`).hostname.toLowerCase()
|
|
494
|
+
} catch {
|
|
495
|
+
return ''
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
function isBaseHost(host) {
|
|
500
|
+
return host.split('.').filter(Boolean).length <= 2
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
function hostMatchesScope(host, scopedHost) {
|
|
504
|
+
if (!host || !scopedHost) return false
|
|
505
|
+
if (host === scopedHost) return true
|
|
506
|
+
return isBaseHost(scopedHost) && host.endsWith(`.${scopedHost}`)
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function sameDocIndexScope(url, site, requestedIndexUrl) {
|
|
510
|
+
const linkHost = hostFromUrl(url)
|
|
511
|
+
if (!linkHost) return false
|
|
512
|
+
// Always require the link to match the original requested site host.
|
|
513
|
+
const siteHost = hostFromUrl(site)
|
|
514
|
+
if (siteHost && !hostMatchesScope(linkHost, siteHost)) return false
|
|
515
|
+
const scopes = [
|
|
516
|
+
siteHost,
|
|
517
|
+
hostFromUrl(requestedIndexUrl),
|
|
518
|
+
].filter(Boolean)
|
|
519
|
+
return scopes.some(scope => hostMatchesScope(linkHost, scope))
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
async function discoverDocsIndexResults(args, timeoutMs, parentSignal) {
|
|
523
|
+
if (!args?.site || (args.type && args.type !== 'web')) return []
|
|
524
|
+
const tokens = queryTokens(args.keywords)
|
|
525
|
+
if (!tokens.length) return []
|
|
526
|
+
|
|
527
|
+
const queue = docIndexUrlCandidates(args.site, args.keywords)
|
|
528
|
+
const seenIndexes = new Set()
|
|
529
|
+
const candidates = []
|
|
530
|
+
|
|
531
|
+
while (queue.length > 0 && seenIndexes.size < DOC_INDEX_MAX_FETCHES) {
|
|
532
|
+
if (parentSignal?.aborted) return []
|
|
533
|
+
const indexUrl = queue.shift()
|
|
534
|
+
if (!indexUrl || seenIndexes.has(indexUrl)) continue
|
|
535
|
+
seenIndexes.add(indexUrl)
|
|
536
|
+
let index = null
|
|
537
|
+
try {
|
|
538
|
+
index = await fetchDocIndex(indexUrl, timeoutMs, parentSignal)
|
|
539
|
+
} catch {
|
|
540
|
+
continue
|
|
541
|
+
}
|
|
542
|
+
const sourceUrl = index.url || indexUrl
|
|
543
|
+
const links = parseDocIndexLinks(index.text, sourceUrl)
|
|
544
|
+
for (const link of links) {
|
|
545
|
+
if (isDocIndexLink(link.url)) {
|
|
546
|
+
if (!seenIndexes.has(link.url) && queue.length + seenIndexes.size < DOC_INDEX_MAX_FETCHES) queue.push(link.url)
|
|
547
|
+
continue
|
|
548
|
+
}
|
|
549
|
+
if (!sameDocIndexScope(link.url, args.site, indexUrl)) continue
|
|
550
|
+
const score = docLinkScore(link, tokens)
|
|
551
|
+
if (score <= 0) continue
|
|
552
|
+
candidates.push({
|
|
553
|
+
...link,
|
|
554
|
+
score,
|
|
555
|
+
})
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const seenUrls = new Set()
|
|
560
|
+
return candidates
|
|
561
|
+
.sort((a, b) => b.score - a.score)
|
|
562
|
+
.filter((item) => {
|
|
563
|
+
if (seenUrls.has(item.url)) return false
|
|
564
|
+
seenUrls.add(item.url)
|
|
565
|
+
return true
|
|
566
|
+
})
|
|
567
|
+
.slice(0, Math.min(Number(args.maxResults) || 5, 5))
|
|
568
|
+
.map(item => ({
|
|
569
|
+
title: item.title,
|
|
570
|
+
url: item.url,
|
|
571
|
+
snippet: item.snippet || `Matched docs index: ${item.sourceUrl}`,
|
|
572
|
+
source: 'docs-index',
|
|
573
|
+
provider: 'docs-index',
|
|
574
|
+
publishedDate: null,
|
|
575
|
+
meta: { score: item.score, sourceUrl: item.sourceUrl },
|
|
576
|
+
}))
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
async function augmentSearchPayloadWithDocsIndex(payload, args, timeoutMs, parentSignal) {
|
|
580
|
+
if (!payload || typeof payload !== 'object') return payload
|
|
581
|
+
const response = payload.response
|
|
582
|
+
if (!response || typeof response !== 'object' || !Array.isArray(response.results)) return payload
|
|
583
|
+
const indexResults = await discoverDocsIndexResults(args, timeoutMs, parentSignal)
|
|
584
|
+
if (!indexResults.length) return payload
|
|
585
|
+
const seen = new Set()
|
|
586
|
+
const results = []
|
|
587
|
+
for (const result of [...indexResults, ...response.results]) {
|
|
588
|
+
const url = String(result?.url || '')
|
|
589
|
+
const key = url || `${result?.title || ''}\n${result?.snippet || ''}`
|
|
590
|
+
if (seen.has(key)) continue
|
|
591
|
+
seen.add(key)
|
|
592
|
+
results.push(result)
|
|
593
|
+
}
|
|
594
|
+
return {
|
|
595
|
+
...payload,
|
|
596
|
+
response: {
|
|
597
|
+
...response,
|
|
598
|
+
results: results.slice(0, Math.max(Number(args.maxResults) || results.length, indexResults.length)),
|
|
599
|
+
docsIndexAugmented: {
|
|
600
|
+
added: indexResults.length,
|
|
601
|
+
sources: [...new Set(indexResults.map(item => item.meta?.sourceUrl).filter(Boolean))],
|
|
602
|
+
},
|
|
603
|
+
},
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
async function writeStartupSnapshot() {
|
|
608
|
+
const config = loadConfig()
|
|
609
|
+
const usageState = loadUsageState()
|
|
610
|
+
const runtimeEnv = buildRuntimeEnv(config)
|
|
611
|
+
const rawProviders = getProvidersWithApiKeys(runtimeEnv)
|
|
612
|
+
const scrapeCapabilities = getScrapeCapabilities()
|
|
613
|
+
|
|
614
|
+
for (const provider of rawProviders) {
|
|
615
|
+
let usagePatch = null
|
|
616
|
+
try {
|
|
617
|
+
usagePatch = await fetchProviderUsageSnapshot(provider, runtimeEnv)
|
|
618
|
+
} catch {
|
|
619
|
+
usagePatch = null
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
updateProviderState(usageState, provider, {
|
|
623
|
+
available: true,
|
|
624
|
+
connection: 'api',
|
|
625
|
+
source: getRawProviderCredentialSource(config, provider, process.env) || 'env',
|
|
626
|
+
usageSupport: RAW_PROVIDER_CAPABILITIES[provider]?.usageSupport || null,
|
|
627
|
+
...(usagePatch || {}),
|
|
628
|
+
})
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
updateProviderState(usageState, 'readability', {
|
|
632
|
+
available: scrapeCapabilities.readability,
|
|
633
|
+
connection: 'builtin',
|
|
634
|
+
source: 'local',
|
|
635
|
+
})
|
|
636
|
+
|
|
637
|
+
updateProviderState(usageState, 'puppeteer', {
|
|
638
|
+
available: scrapeCapabilities.puppeteer,
|
|
639
|
+
connection: 'local-browser',
|
|
640
|
+
source: 'local',
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
updateProviderState(usageState, 'firecrawl', {
|
|
644
|
+
readability: scrapeCapabilities.readability,
|
|
645
|
+
puppeteer: scrapeCapabilities.puppeteer,
|
|
646
|
+
connection: 'api',
|
|
647
|
+
source: getRawProviderCredentialSource(config, 'firecrawl', process.env) || 'env',
|
|
648
|
+
})
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// ── Core action implementations (shared by individual and batch handlers) ──
|
|
652
|
+
|
|
653
|
+
const _searchInFlight = new Map()
|
|
654
|
+
|
|
655
|
+
function backendResultToSearchResponse(result) {
|
|
656
|
+
const maxResults = Math.max(1, Math.min(20, Number(result?.maxResults) || 10))
|
|
657
|
+
const citations = Array.isArray(result?.citations) ? result.citations : []
|
|
658
|
+
const results = citations.slice(0, maxResults).map((item) => ({
|
|
659
|
+
title: item?.title || '',
|
|
660
|
+
url: item?.url || '',
|
|
661
|
+
snippet: item?.snippet || '',
|
|
662
|
+
source: item?.source || result?.backend || '',
|
|
663
|
+
provider: result?.backend || '',
|
|
664
|
+
publishedDate: item?.publishedDate || item?.published_date || null,
|
|
665
|
+
}))
|
|
666
|
+
return {
|
|
667
|
+
usedProvider: result?.backend || '',
|
|
668
|
+
query: result?.rawQuery || result?.query || '',
|
|
669
|
+
rawQuery: result?.rawQuery || result?.query || '',
|
|
670
|
+
answer: result?.answer || '',
|
|
671
|
+
model: result?.model || null,
|
|
672
|
+
durationMs: result?.durationMs || 0,
|
|
673
|
+
usage: result?.usage || null,
|
|
674
|
+
results,
|
|
675
|
+
warnings: Array.isArray(result?.warnings) ? result.warnings : [],
|
|
676
|
+
type: result?.type || 'web',
|
|
677
|
+
site: result?.site || null,
|
|
678
|
+
locale: result?.locale || null,
|
|
679
|
+
webSearchCalls: result?.webSearchCalls || [],
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
async function _searchCore(args, { config, usageState, cacheState, timeoutMs, signal }) {
|
|
684
|
+
// Hoisted so the outer finally can reference it even on early throw.
|
|
685
|
+
let searchCacheKey
|
|
686
|
+
// Only the owner of the in-flight entry may delete it in the outer finally.
|
|
687
|
+
// Coalesced callers that early-return `existing` must leave the entry intact
|
|
688
|
+
// so a third identical caller still hits coalescing.
|
|
689
|
+
let ownsInFlight = false
|
|
690
|
+
try {
|
|
691
|
+
const provider = config.provider
|
|
692
|
+
if (!provider) {
|
|
693
|
+
throw new Error('No search provider configured. Set search.provider in mixdog-config.json.')
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
const cacheArgs = searchArgsForCacheKey(args, config)
|
|
697
|
+
searchCacheKey = buildCacheKey('search', {
|
|
698
|
+
keywords: cacheArgs.keywords,
|
|
699
|
+
provider,
|
|
700
|
+
site: cacheArgs.site,
|
|
701
|
+
type: cacheArgs.type,
|
|
702
|
+
locale: cacheArgs.locale,
|
|
703
|
+
contextSize: cacheArgs.contextSize,
|
|
704
|
+
docs_index: cacheArgs.site && cacheArgs.type === 'web' ? 4 : null,
|
|
705
|
+
maxResults: cacheArgs.maxResults,
|
|
706
|
+
})
|
|
707
|
+
const cachedSearch = getCachedEntry(cacheState, searchCacheKey)
|
|
708
|
+
if (cachedSearch) {
|
|
709
|
+
// Cache hit: skip docs-index network discovery. The cached payload
|
|
710
|
+
// already includes any docs-index augmentation captured at insert
|
|
711
|
+
// time, so re-running the network probe here would burn external I/O
|
|
712
|
+
// on every cached search.
|
|
713
|
+
return { ...cachedSearch.payload, cache: buildCacheMeta(cachedSearch, true) }
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
// Coalesce identical concurrent requests to the same cache key
|
|
717
|
+
const existing = _searchInFlight.get(searchCacheKey)
|
|
718
|
+
if (existing) return existing
|
|
719
|
+
let resolveCoalesce, rejectCoalesce
|
|
720
|
+
const coalescePromise = new Promise((res, rej) => { resolveCoalesce = res; rejectCoalesce = rej })
|
|
721
|
+
// The first caller owns the real await path; duplicate callers may await this.
|
|
722
|
+
// Mark it handled so a first-call failure does not leak as unhandledRejection.
|
|
723
|
+
coalescePromise.catch(() => {})
|
|
724
|
+
_searchInFlight.set(searchCacheKey, coalescePromise)
|
|
725
|
+
// Only the owner of the in-flight entry may delete it. Coalesced callers
|
|
726
|
+
// return `existing` above, but `return` still runs the outer finally; without
|
|
727
|
+
// this flag a coalesced caller would delete the owner's in-flight entry mid-
|
|
728
|
+
// flight and a third identical caller would miss coalescing.
|
|
729
|
+
ownsInFlight = true
|
|
730
|
+
|
|
731
|
+
try {
|
|
732
|
+
const backendResult = await dispatchSearchBackend({
|
|
733
|
+
provider,
|
|
734
|
+
query: args.keywords,
|
|
735
|
+
site: args.site,
|
|
736
|
+
type: args.type,
|
|
737
|
+
locale: args.locale,
|
|
738
|
+
contextSize: args.contextSize,
|
|
739
|
+
maxResults: args.maxResults || getRawSearchMaxResults(config),
|
|
740
|
+
config,
|
|
741
|
+
signal,
|
|
742
|
+
})
|
|
743
|
+
const response = backendResultToSearchResponse(backendResult)
|
|
744
|
+
|
|
745
|
+
noteProviderSuccess(usageState, response.usedProvider, {
|
|
746
|
+
lastCostUsdTicks: response.usage?.cost_in_usd_ticks || null,
|
|
747
|
+
})
|
|
748
|
+
|
|
749
|
+
const payload = await augmentSearchPayloadWithDocsIndex(
|
|
750
|
+
{ tool: 'search', provider, response },
|
|
751
|
+
{ ...args, ...cacheArgs, keywords: cacheArgs.keywords },
|
|
752
|
+
timeoutMs,
|
|
753
|
+
signal,
|
|
754
|
+
)
|
|
755
|
+
const cachedEntry = setCachedEntry(
|
|
756
|
+
cacheState,
|
|
757
|
+
searchCacheKey,
|
|
758
|
+
payload,
|
|
759
|
+
getSearchCacheTtlMs(args.type || 'web'),
|
|
760
|
+
)
|
|
761
|
+
flushCacheState()
|
|
762
|
+
flushUsageState()
|
|
763
|
+
const result = { ...payload, cache: buildCacheMeta(cachedEntry, false) }
|
|
764
|
+
if (ownsInFlight) _searchInFlight.delete(searchCacheKey)
|
|
765
|
+
resolveCoalesce(result)
|
|
766
|
+
return result
|
|
767
|
+
} catch (error) {
|
|
768
|
+
if (ownsInFlight) _searchInFlight.delete(searchCacheKey)
|
|
769
|
+
rejectCoalesce(error)
|
|
770
|
+
noteProviderFailure(
|
|
771
|
+
usageState,
|
|
772
|
+
provider,
|
|
773
|
+
error instanceof Error ? error.message : String(error),
|
|
774
|
+
classifyProviderError(error),
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
778
|
+
err.details = { tool: 'search', provider }
|
|
779
|
+
throw err
|
|
780
|
+
}
|
|
781
|
+
} finally {
|
|
782
|
+
// Resolve coalesce waiters if not already rejected. Only the owner may
|
|
783
|
+
// delete the in-flight entry — a coalesced caller that returned `existing`
|
|
784
|
+
// earlier must not evict the still-running owner's coalesce target.
|
|
785
|
+
if (ownsInFlight && _searchInFlight.has(searchCacheKey)) {
|
|
786
|
+
_searchInFlight.delete(searchCacheKey)
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
const DEFAULT_FETCH_MAX_LENGTH = 50000
|
|
792
|
+
|
|
793
|
+
// Apply character-level pagination to a cached or fresh fetch payload. Mirrors
|
|
794
|
+
// the mcp-server-fetch reference: caller passes startIndex/maxLength and
|
|
795
|
+
// receives a slice plus pointers (nextStartIndex, hasMore) for the next chunk.
|
|
796
|
+
// totalLength is preserved so the caller can decide whether to keep paging.
|
|
797
|
+
function applyFetchPagination(payload, args) {
|
|
798
|
+
const fullContent = String(payload?.content ?? '')
|
|
799
|
+
const totalLength = fullContent.length
|
|
800
|
+
const startIndex = Math.max(0, Number.isFinite(args?.startIndex) ? args.startIndex : 0)
|
|
801
|
+
const rawLimit = args?.maxLength
|
|
802
|
+
const limit = rawLimit === 0
|
|
803
|
+
? Infinity
|
|
804
|
+
: (rawLimit == null ? DEFAULT_FETCH_MAX_LENGTH : Math.max(0, Number(rawLimit)))
|
|
805
|
+
if (startIndex >= totalLength) {
|
|
806
|
+
return {
|
|
807
|
+
...payload,
|
|
808
|
+
content: '',
|
|
809
|
+
bytes: 0,
|
|
810
|
+
totalLength,
|
|
811
|
+
range: { startIndex, endIndex: startIndex },
|
|
812
|
+
hasMore: false,
|
|
813
|
+
nextStartIndex: null,
|
|
814
|
+
truncated: false,
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
const endIndex = Math.min(totalLength, startIndex + (Number.isFinite(limit) ? limit : totalLength - startIndex))
|
|
818
|
+
const slice = fullContent.slice(startIndex, endIndex)
|
|
819
|
+
const hasMore = endIndex < totalLength
|
|
820
|
+
return {
|
|
821
|
+
...payload,
|
|
822
|
+
content: slice,
|
|
823
|
+
bytes: Buffer.byteLength(slice, 'utf-8'),
|
|
824
|
+
totalLength,
|
|
825
|
+
range: { startIndex, endIndex },
|
|
826
|
+
hasMore,
|
|
827
|
+
nextStartIndex: hasMore ? endIndex : null,
|
|
828
|
+
truncated: hasMore || startIndex > 0,
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
async function _fetchCore(args, { usageState, cacheState, timeoutMs, signal }) {
|
|
833
|
+
const FETCH_URL_CAP = Math.max(1, Number(process.env.FETCH_URL_CAP) || 10)
|
|
834
|
+
// Bound how many URLs scrape concurrently. Each non-cached URL can launch a
|
|
835
|
+
// Puppeteer browser; running all FETCH_URL_CAP (default 10) at once can spawn
|
|
836
|
+
// up to 10 Chromium processes simultaneously and exhaust memory/file handles.
|
|
837
|
+
const FETCH_CONCURRENCY = Math.max(1, Number(process.env.FETCH_CONCURRENCY) || 3)
|
|
838
|
+
const allUrls = Array.isArray(args.url) ? args.url : [args.url]
|
|
839
|
+
const urls = allUrls.slice(0, FETCH_URL_CAP)
|
|
840
|
+
|
|
841
|
+
const runOne = async (url, index) => {
|
|
842
|
+
const normalizedUrl = normalizeCacheUrl(url)
|
|
843
|
+
const fetchCacheKey = buildCacheKey('fetch:url', { url: normalizedUrl })
|
|
844
|
+
const cached = getCachedEntry(cacheState, fetchCacheKey)
|
|
845
|
+
if (cached) {
|
|
846
|
+
return {
|
|
847
|
+
index: index + 1,
|
|
848
|
+
status: 'success',
|
|
849
|
+
...applyFetchPagination(cached.payload, args),
|
|
850
|
+
cache: buildCacheMeta(cached, true),
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
try {
|
|
855
|
+
const [page] = await scrapeUrls([url], timeoutMs, usageState, signal)
|
|
856
|
+
if (page?.error) {
|
|
857
|
+
return {
|
|
858
|
+
index: index + 1,
|
|
859
|
+
status: 'error',
|
|
860
|
+
tool: 'web_fetch',
|
|
861
|
+
url,
|
|
862
|
+
error: page.error,
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
const payload = { tool: 'web_fetch', ...page }
|
|
866
|
+
const cachedEntry = setCachedEntry(cacheState, fetchCacheKey, payload, getScrapeCacheTtlMs(false))
|
|
867
|
+
return {
|
|
868
|
+
index: index + 1,
|
|
869
|
+
status: 'success',
|
|
870
|
+
...applyFetchPagination(payload, args),
|
|
871
|
+
cache: buildCacheMeta(cachedEntry, false),
|
|
872
|
+
}
|
|
873
|
+
} catch (error) {
|
|
874
|
+
// Pre-extractor failures (e.g. assertPublicUrl in web-tools) throw
|
|
875
|
+
// before scrapeUrls returns a page-shaped error. Surface the raw
|
|
876
|
+
// message verbatim so the caller sees the actual cause rather than
|
|
877
|
+
// a silenced/swallowed result.
|
|
878
|
+
const message = error instanceof Error
|
|
879
|
+
? (error.message || error.name || 'fetch failed')
|
|
880
|
+
: String(error)
|
|
881
|
+
const code = error?.code || error?.name || null
|
|
882
|
+
return {
|
|
883
|
+
index: index + 1,
|
|
884
|
+
status: 'error',
|
|
885
|
+
tool: 'web_fetch',
|
|
886
|
+
url,
|
|
887
|
+
error: message,
|
|
888
|
+
...(code ? { errorCode: code } : {}),
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
// Bounded worker pool: at most FETCH_CONCURRENCY runOne() calls in flight.
|
|
894
|
+
const results = new Array(urls.length)
|
|
895
|
+
let next = 0
|
|
896
|
+
const worker = async () => {
|
|
897
|
+
while (next < urls.length) {
|
|
898
|
+
const i = next++
|
|
899
|
+
results[i] = await runOne(urls[i], i)
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
await Promise.all(
|
|
903
|
+
Array.from({ length: Math.min(FETCH_CONCURRENCY, urls.length) }, worker),
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
return { tool: 'web_fetch', results, urlsTruncated: allUrls.length > urls.length ? allUrls.length : 0 }
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
// `search` and `web_fetch` are the public surface. `crawl` / `setup`
|
|
910
|
+
// remain `public: false`: still reachable via the module's
|
|
911
|
+
// handleToolCall and advertised when this module runs as a standalone
|
|
912
|
+
// MCP server, but excluded from the unified build-tools-manifest output
|
|
913
|
+
// so the Lead only sees the high-level entry points.
|
|
914
|
+
import { TOOL_DEFS as toolDefinitions } from './tool-defs.mjs'
|
|
915
|
+
|
|
916
|
+
const SEARCH_INSTRUCTIONS = '';
|
|
917
|
+
|
|
918
|
+
const server = new Server(
|
|
919
|
+
{
|
|
920
|
+
name: 'mixdog-search',
|
|
921
|
+
version: PLUGIN_VERSION,
|
|
922
|
+
},
|
|
923
|
+
{
|
|
924
|
+
capabilities: {
|
|
925
|
+
tools: {},
|
|
926
|
+
},
|
|
927
|
+
instructions: SEARCH_INSTRUCTIONS,
|
|
928
|
+
},
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
932
|
+
tools: toolDefinitions.filter(t => t.public !== false),
|
|
933
|
+
}))
|
|
934
|
+
|
|
935
|
+
async function handleToolCall(name, rawArgs, { signal } = {}) {
|
|
936
|
+
const config = loadConfig()
|
|
937
|
+
const usageState = loadUsageState()
|
|
938
|
+
const cacheState = loadCacheState()
|
|
939
|
+
const timeoutMs = getRequestTimeoutMs(config)
|
|
940
|
+
|
|
941
|
+
switch (name) {
|
|
942
|
+
case 'web_fetch': {
|
|
943
|
+
let urlArgs
|
|
944
|
+
try {
|
|
945
|
+
urlArgs = searchUrlArgsSchema.parse(normalizeSearchUrlArgs(rawArgs || {}))
|
|
946
|
+
} catch (e) {
|
|
947
|
+
if (e instanceof z.ZodError) {
|
|
948
|
+
return { content: [{ type: 'text', text: JSON.stringify({ error: 'Invalid arguments', details: e.errors }) }], isError: true }
|
|
949
|
+
}
|
|
950
|
+
throw e
|
|
951
|
+
}
|
|
952
|
+
try {
|
|
953
|
+
const result = await _fetchCore(urlArgs, { config, usageState, cacheState, timeoutMs, signal })
|
|
954
|
+
flushCacheState()
|
|
955
|
+
flushUsageState()
|
|
956
|
+
return {
|
|
957
|
+
...formattedText('fetch', result),
|
|
958
|
+
...(result.results.some(item => item.status === 'success') ? {} : { isError: true }),
|
|
959
|
+
}
|
|
960
|
+
} catch (error) {
|
|
961
|
+
flushUsageState()
|
|
962
|
+
const _rawErr = error instanceof Error ? error.message : String(error)
|
|
963
|
+
return { ...jsonText({ tool: 'web_fetch', url: urlArgs.url, error: normalizeErrorMessage(_rawErr) }), isError: true }
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
case 'search': {
|
|
967
|
+
let args
|
|
968
|
+
if (rawArgs && rawArgs.pattern !== undefined && rawArgs.query === undefined && rawArgs.keywords === undefined) {
|
|
969
|
+
return { content: [{ type: 'text', text: 'Error: web search requires query; use glob(pattern=...) for file paths.' }], isError: true }
|
|
970
|
+
}
|
|
971
|
+
// The public aiWrapped schema uses `query` (to match recall/explore style).
|
|
972
|
+
// The direct zod schema expects `keywords`. Normalize so standalone callers
|
|
973
|
+
// using the advertised schema don't get a validation error.
|
|
974
|
+
if (rawArgs && rawArgs.query !== undefined && rawArgs.keywords === undefined) {
|
|
975
|
+
rawArgs = { ...rawArgs, keywords: rawArgs.query }
|
|
976
|
+
delete rawArgs.query
|
|
977
|
+
}
|
|
978
|
+
try {
|
|
979
|
+
args = searchArgsSchema.parse(normalizeSearchArgs(rawArgs || {}))
|
|
980
|
+
} catch (e) {
|
|
981
|
+
if (e instanceof z.ZodError) {
|
|
982
|
+
return { content: [{ type: 'text', text: JSON.stringify({ error: 'Invalid arguments', details: e.errors }) }], isError: true }
|
|
983
|
+
}
|
|
984
|
+
throw e
|
|
985
|
+
}
|
|
986
|
+
// Fan-out: array `keywords` -> N parallel single-keyword calls,
|
|
987
|
+
// grouped per-query with `### Query:` headers (mirrors recall fan-out).
|
|
988
|
+
if (Array.isArray(args.keywords) && args.keywords.length > 1) {
|
|
989
|
+
// Cap fan-out breadth: bounds both the parallel provider calls and the
|
|
990
|
+
// aggregate result size. Env-overridable; extras dropped with a note.
|
|
991
|
+
const SEARCH_FANOUT_CAP = Math.max(1, Number(process.env.SEARCH_FANOUT_CAP) || 10)
|
|
992
|
+
const allKeywords = [...new Set(args.keywords.map(kw => String(kw || '').trim()).filter(Boolean))]
|
|
993
|
+
const dedupedKeywords = allKeywords.slice(0, SEARCH_FANOUT_CAP)
|
|
994
|
+
const FANOUT_CONCURRENCY = Math.max(1, Number(process.env.SEARCH_FANOUT_CONCURRENCY) || 10)
|
|
995
|
+
const fanOutAbort = new AbortController()
|
|
996
|
+
const deadlineSec = Math.max(1, Number(process.env.SEARCH_FANOUT_DEADLINE_S) || 180)
|
|
997
|
+
const deadlineMs = deadlineSec * 1000
|
|
998
|
+
let deadlineTimer
|
|
999
|
+
let onToolCallAbort
|
|
1000
|
+
if (signal) {
|
|
1001
|
+
const abortFanoutFromToolCall = () => {
|
|
1002
|
+
fanOutAbort.abort(signal.reason ?? new Error('search aborted'))
|
|
1003
|
+
}
|
|
1004
|
+
if (signal.aborted) {
|
|
1005
|
+
abortFanoutFromToolCall()
|
|
1006
|
+
} else {
|
|
1007
|
+
onToolCallAbort = abortFanoutFromToolCall
|
|
1008
|
+
signal.addEventListener('abort', onToolCallAbort, { once: true })
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
const deadlineRace = new Promise((_res, rej) => {
|
|
1012
|
+
deadlineTimer = setTimeout(() => {
|
|
1013
|
+
fanOutAbort.abort(new Error(`fan-out deadline exceeded (${deadlineSec}s)`))
|
|
1014
|
+
rej(Object.assign(new Error(`fan-out deadline exceeded (${deadlineSec}s)`), { _deadline: true }))
|
|
1015
|
+
}, deadlineMs)
|
|
1016
|
+
})
|
|
1017
|
+
// Track per-query results as they settle so a deadline hit preserves
|
|
1018
|
+
// anything that already completed (Promise.allSettled would otherwise
|
|
1019
|
+
// only assign `settled` after the whole batch finishes).
|
|
1020
|
+
const partial = new Array(dedupedKeywords.length)
|
|
1021
|
+
let fanoutActive = 0
|
|
1022
|
+
const fanoutPending = []
|
|
1023
|
+
const acquireFanoutSlot = () => {
|
|
1024
|
+
if (fanOutAbort.signal.aborted) return Promise.reject(fanOutAbort.signal.reason)
|
|
1025
|
+
if (fanoutActive < FANOUT_CONCURRENCY) {
|
|
1026
|
+
fanoutActive++
|
|
1027
|
+
return Promise.resolve()
|
|
1028
|
+
}
|
|
1029
|
+
return new Promise((resolve, reject) => {
|
|
1030
|
+
const waiter = { resolve, reject }
|
|
1031
|
+
const onAbort = () => {
|
|
1032
|
+
const idx = fanoutPending.indexOf(waiter)
|
|
1033
|
+
if (idx !== -1) fanoutPending.splice(idx, 1)
|
|
1034
|
+
reject(fanOutAbort.signal.reason)
|
|
1035
|
+
}
|
|
1036
|
+
waiter.onAbort = onAbort
|
|
1037
|
+
fanoutPending.push(waiter)
|
|
1038
|
+
fanOutAbort.signal.addEventListener('abort', onAbort, { once: true })
|
|
1039
|
+
})
|
|
1040
|
+
}
|
|
1041
|
+
const releaseFanoutSlot = () => {
|
|
1042
|
+
while (fanoutPending.length > 0) {
|
|
1043
|
+
const waiter = fanoutPending.shift()
|
|
1044
|
+
if (fanOutAbort.signal.aborted) {
|
|
1045
|
+
if (waiter.onAbort) fanOutAbort.signal.removeEventListener('abort', waiter.onAbort)
|
|
1046
|
+
waiter.reject(fanOutAbort.signal.reason)
|
|
1047
|
+
continue
|
|
1048
|
+
}
|
|
1049
|
+
if (waiter.onAbort) fanOutAbort.signal.removeEventListener('abort', waiter.onAbort)
|
|
1050
|
+
waiter.resolve() // slot transferred; do NOT change fanoutActive
|
|
1051
|
+
return
|
|
1052
|
+
}
|
|
1053
|
+
fanoutActive--
|
|
1054
|
+
}
|
|
1055
|
+
const queryPromises = dedupedKeywords.map((kw, i) => (async () => {
|
|
1056
|
+
await acquireFanoutSlot()
|
|
1057
|
+
try {
|
|
1058
|
+
const sub = await handleToolCall('search', { ...rawArgs, keywords: kw }, { signal: fanOutAbort.signal })
|
|
1059
|
+
if (fanOutAbort.signal.aborted) throw fanOutAbort.signal.reason
|
|
1060
|
+
const text = (sub.content || []).filter(p => p.type === 'text').map(p => p.text).join('\n')
|
|
1061
|
+
if (sub.isError) {
|
|
1062
|
+
throw Object.assign(new Error(text || 'sub-search failed'), { _subError: true })
|
|
1063
|
+
}
|
|
1064
|
+
return `### Query: ${kw}\n\n${text}`
|
|
1065
|
+
} finally {
|
|
1066
|
+
releaseFanoutSlot()
|
|
1067
|
+
}
|
|
1068
|
+
})().then(
|
|
1069
|
+
(value) => { partial[i] = { status: 'fulfilled', value }; return value },
|
|
1070
|
+
(reason) => { partial[i] = { status: 'rejected', reason }; throw reason },
|
|
1071
|
+
))
|
|
1072
|
+
let settled
|
|
1073
|
+
try {
|
|
1074
|
+
settled = await Promise.race([
|
|
1075
|
+
Promise.allSettled(queryPromises),
|
|
1076
|
+
deadlineRace,
|
|
1077
|
+
])
|
|
1078
|
+
} catch (err) {
|
|
1079
|
+
if (!err._deadline) throw err
|
|
1080
|
+
// Deadline hit — preserve any completed partial results; mark the
|
|
1081
|
+
// rest as rejected with the abort reason.
|
|
1082
|
+
settled = dedupedKeywords.map((_kw, i) =>
|
|
1083
|
+
partial[i] ?? { status: 'rejected', reason: fanOutAbort.signal.reason }
|
|
1084
|
+
)
|
|
1085
|
+
} finally {
|
|
1086
|
+
clearTimeout(deadlineTimer)
|
|
1087
|
+
if (signal && onToolCallAbort) {
|
|
1088
|
+
signal.removeEventListener('abort', onToolCallAbort)
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
const anyFulfilled = settled.some(r => r.status === 'fulfilled')
|
|
1092
|
+
const sections = settled.map((r, i) =>
|
|
1093
|
+
r.status === 'fulfilled'
|
|
1094
|
+
? r.value
|
|
1095
|
+
: `### Query: ${dedupedKeywords[i]}\n\n[error] ${normalizeErrorMessage(String(r.reason?.message || r.reason))}`
|
|
1096
|
+
)
|
|
1097
|
+
const fanoutNote = allKeywords.length > dedupedKeywords.length
|
|
1098
|
+
? `[fan-out capped at ${SEARCH_FANOUT_CAP} of ${allKeywords.length} keywords; raise SEARCH_FANOUT_CAP for more]\n\n`
|
|
1099
|
+
: ''
|
|
1100
|
+
return {
|
|
1101
|
+
content: [{ type: 'text', text: fanoutNote + sections.join('\n\n---\n\n') }],
|
|
1102
|
+
...(anyFulfilled ? {} : { isError: true }),
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
try {
|
|
1106
|
+
const result = await _searchCore(args, { config, usageState, cacheState, timeoutMs, signal })
|
|
1107
|
+
flushUsageState()
|
|
1108
|
+
return formattedText('search', result)
|
|
1109
|
+
} catch (error) {
|
|
1110
|
+
flushUsageState()
|
|
1111
|
+
const details = error.details || { tool: 'search' }
|
|
1112
|
+
const _rawErr = error instanceof Error ? error.message : String(error)
|
|
1113
|
+
return { ...jsonText({ ...details, error: normalizeErrorMessage(_rawErr) }), isError: true }
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
case 'crawl': {
|
|
1118
|
+
let args
|
|
1119
|
+
try {
|
|
1120
|
+
args = crawlArgsSchema.parse(rawArgs || {})
|
|
1121
|
+
} catch (e) {
|
|
1122
|
+
if (e instanceof z.ZodError) {
|
|
1123
|
+
return { content: [{ type: 'text', text: JSON.stringify({ error: 'Invalid arguments', details: e.errors }) }], isError: true }
|
|
1124
|
+
}
|
|
1125
|
+
throw e
|
|
1126
|
+
}
|
|
1127
|
+
try {
|
|
1128
|
+
const pages = await crawlSite(
|
|
1129
|
+
args.url,
|
|
1130
|
+
{
|
|
1131
|
+
maxPages: args.maxPages || config.crawl?.maxPages || 10,
|
|
1132
|
+
maxDepth: args.maxDepth ?? config.crawl?.maxDepth ?? 1,
|
|
1133
|
+
sameDomainOnly: args.sameDomainOnly ?? config.crawl?.sameDomainOnly ?? true,
|
|
1134
|
+
},
|
|
1135
|
+
timeoutMs,
|
|
1136
|
+
usageState,
|
|
1137
|
+
signal,
|
|
1138
|
+
)
|
|
1139
|
+
saveUsageState(usageState)
|
|
1140
|
+
return formattedText('crawl', {
|
|
1141
|
+
tool: 'crawl',
|
|
1142
|
+
pages,
|
|
1143
|
+
})
|
|
1144
|
+
} catch (error) {
|
|
1145
|
+
saveUsageState(usageState)
|
|
1146
|
+
const _rawErr = error instanceof Error ? error.message : String(error)
|
|
1147
|
+
return { ...jsonText({
|
|
1148
|
+
tool: 'crawl',
|
|
1149
|
+
url: args.url,
|
|
1150
|
+
error: normalizeErrorMessage(_rawErr),
|
|
1151
|
+
}), isError: true }
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
case 'setup': {
|
|
1156
|
+
return await handleSetup(server)
|
|
1157
|
+
}
|
|
1158
|
+
default:
|
|
1159
|
+
throw new Error(`Unknown tool: ${name}`)
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
|
|
1164
|
+
return handleToolCall(request.params.name, request.params.arguments, { signal: extra?.signal })
|
|
1165
|
+
})
|
|
1166
|
+
|
|
1167
|
+
/* ── Module exports (used when imported by mixdog-unified) ── */
|
|
1168
|
+
export { toolDefinitions as TOOL_DEFS }
|
|
1169
|
+
export { SEARCH_INSTRUCTIONS as instructions }
|
|
1170
|
+
|
|
1171
|
+
export { handleToolCall }
|
|
1172
|
+
export async function start() { await writeStartupSnapshot() }
|
|
1173
|
+
export function stop() { flushUsageState(); flushCacheState() }
|