mixdog 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. package/.claude-plugin/marketplace.json +31 -0
  2. package/.claude-plugin/plugin.json +20 -0
  3. package/.gitattributes +34 -0
  4. package/.mcp.json +14 -0
  5. package/ARCHITECTURE.md +77 -0
  6. package/CHANGELOG.md +7 -0
  7. package/CONTRIBUTING.md +45 -0
  8. package/DATA-FLOW.md +79 -0
  9. package/LICENSE +21 -0
  10. package/README.md +389 -0
  11. package/SECURITY.md +138 -0
  12. package/UNINSTALL.md +112 -0
  13. package/agents/maintenance.md +5 -0
  14. package/agents/memory-classification.md +30 -0
  15. package/agents/scheduler-task.md +18 -0
  16. package/agents/webhook-handler.md +27 -0
  17. package/agents/worker.md +24 -0
  18. package/bin/bridge +133 -0
  19. package/bin/statusline-launcher.mjs +78 -0
  20. package/bin/statusline-lib.mjs +550 -0
  21. package/bin/statusline.mjs +607 -0
  22. package/bun.lock +802 -0
  23. package/commands/config.md +16 -0
  24. package/commands/doctor.md +13 -0
  25. package/commands/setup.md +17 -0
  26. package/defaults/cycle3-review-prompt.md +90 -0
  27. package/defaults/hidden-roles.json +65 -0
  28. package/defaults/memory-chunk-prompt.md +63 -0
  29. package/defaults/memory-promote-prompt.md +135 -0
  30. package/defaults/mixdog-config.template.json +27 -0
  31. package/defaults/user-workflow.json +8 -0
  32. package/defaults/user-workflow.md +12 -0
  33. package/hooks/hooks.json +73 -0
  34. package/hooks/lib/active-instance.cjs +77 -0
  35. package/hooks/lib/permission-evaluator.cjs +411 -0
  36. package/hooks/lib/permission-route.cjs +63 -0
  37. package/hooks/lib/permission-rules.cjs +170 -0
  38. package/hooks/lib/settings-loader.cjs +116 -0
  39. package/hooks/post-tool-use.cjs +84 -0
  40. package/hooks/pre-mcp-sandbox.cjs +158 -0
  41. package/hooks/pre-tool-subagent.cjs +253 -0
  42. package/hooks/session-start.cjs +1372 -0
  43. package/hooks/turn-timer.cjs +82 -0
  44. package/lib/claude-md-writer.cjs +386 -0
  45. package/lib/config-cjs.cjs +61 -0
  46. package/lib/hook-pipe-path.cjs +10 -0
  47. package/lib/keychain-cjs.cjs +263 -0
  48. package/lib/plugin-paths.cjs +61 -0
  49. package/lib/rules-builder.cjs +241 -0
  50. package/lib/text-utils.cjs +61 -0
  51. package/native/README.md +117 -0
  52. package/native/prebuilt/linux-aarch64/mixdog-shim +0 -0
  53. package/native/prebuilt/linux-x86_64/mixdog-shim +0 -0
  54. package/native/prebuilt/macos-aarch64/mixdog-shim +0 -0
  55. package/native/prebuilt/macos-x86_64/mixdog-shim +0 -0
  56. package/native/prebuilt/windows-x86_64/mixdog-shim.exe +0 -0
  57. package/package.json +107 -0
  58. package/prompts/code-review.txt +16 -0
  59. package/prompts/security-audit.txt +17 -0
  60. package/rules/bridge/00-common.md +39 -0
  61. package/rules/bridge/20-skip-protocol.md +18 -0
  62. package/rules/bridge/30-explorer.md +33 -0
  63. package/rules/bridge/40-cycle1-agent.md +52 -0
  64. package/rules/bridge/41-cycle2-agent.md +62 -0
  65. package/rules/bridge/42-cycle3-agent.md +44 -0
  66. package/rules/lead/00-tool-lead.md +61 -0
  67. package/rules/lead/01-general.md +23 -0
  68. package/rules/lead/02-channels.md +49 -0
  69. package/rules/lead/03-team.md +27 -0
  70. package/rules/lead/04-workflow.md +20 -0
  71. package/rules/shared/00-language.md +14 -0
  72. package/rules/shared/01-tool.md +138 -0
  73. package/scripts/bootstrap.mjs +184 -0
  74. package/scripts/bridge-unify-smoke.mjs +308 -0
  75. package/scripts/build-runtime-linux.sh +348 -0
  76. package/scripts/build-runtime-macos.sh +217 -0
  77. package/scripts/build-runtime-windows.ps1 +242 -0
  78. package/scripts/builtin-utils-smoke.mjs +392 -0
  79. package/scripts/check-json.mjs +45 -0
  80. package/scripts/check-syntax-changed.mjs +102 -0
  81. package/scripts/check-syntax.mjs +58 -0
  82. package/scripts/code-graph-batch.test.mjs +33 -0
  83. package/scripts/config-preserve-smoke.mjs +180 -0
  84. package/scripts/doctor.mjs +484 -0
  85. package/scripts/edit-normalize-fuzz.mjs +130 -0
  86. package/scripts/edit-normalize-smoke.mjs +401 -0
  87. package/scripts/edit-operation-smoke.mjs +369 -0
  88. package/scripts/edit2-smoke.mjs +63 -0
  89. package/scripts/fuzzy-e2e.mjs +28 -0
  90. package/scripts/fuzzy-smoke.mjs +26 -0
  91. package/scripts/generate-runtime-manifest.mjs +166 -0
  92. package/scripts/guard-smoke.mjs +66 -0
  93. package/scripts/hidden-role-schema-smoke.mjs +162 -0
  94. package/scripts/hook-routing-smoke.mjs +29 -0
  95. package/scripts/inject-input.ps1 +204 -0
  96. package/scripts/io-complex-smoke.mjs +667 -0
  97. package/scripts/io-explore-bench.mjs +424 -0
  98. package/scripts/io-guardrails-smoke.mjs +205 -0
  99. package/scripts/io-mini-bench-baseline.json +11 -0
  100. package/scripts/io-mini-bench.mjs +216 -0
  101. package/scripts/io-route-harness.mjs +933 -0
  102. package/scripts/io-telemetry-report.mjs +691 -0
  103. package/scripts/mutation-bench.mjs +564 -0
  104. package/scripts/mutation-io-smoke.mjs +1081 -0
  105. package/scripts/native-patch-bridge-smoke.mjs +288 -0
  106. package/scripts/native-patch-smoke.mjs +304 -0
  107. package/scripts/patch-interior-context-smoke.mjs +49 -0
  108. package/scripts/patch-newline-utf8-smoke.mjs +157 -0
  109. package/scripts/perf-hook-smoke.mjs +71 -0
  110. package/scripts/permission-eval-smoke.mjs +426 -0
  111. package/scripts/prep-patch.mjs +53 -0
  112. package/scripts/prep-shim.mjs +96 -0
  113. package/scripts/provider-cache-smoke.mjs +687 -0
  114. package/scripts/report-runtime-health.mjs +132 -0
  115. package/scripts/run-mcp.mjs +1547 -0
  116. package/scripts/salvage-v4a-shatter.test.mjs +58 -0
  117. package/scripts/scoped-cache-io-smoke.mjs +103 -0
  118. package/scripts/shell-policy-round3-smoke.mjs +46 -0
  119. package/scripts/smoke-runtime-negative.ps1 +100 -0
  120. package/scripts/smoke-runtime-negative.sh +95 -0
  121. package/scripts/stall-policy-smoke.mjs +50 -0
  122. package/scripts/start-memory-worker.mjs +23 -0
  123. package/scripts/statusline-launcher-smoke.mjs +82 -0
  124. package/scripts/stress-atomic-write.mjs +1028 -0
  125. package/scripts/test-config-rmw-restore.mjs +122 -0
  126. package/scripts/test-fault-inject.mjs +164 -0
  127. package/scripts/test-large-file.mjs +174 -0
  128. package/scripts/tool-edge-smoke.mjs +209 -0
  129. package/scripts/uninstall.mjs +201 -0
  130. package/scripts/webhook-selfheal-smoke.mjs +29 -0
  131. package/scripts/write-overwrite-guard-smoke.mjs +56 -0
  132. package/server-main.mjs +3055 -0
  133. package/server.mjs +468 -0
  134. package/setup/config-merge.mjs +254 -0
  135. package/setup/install.mjs +120 -0
  136. package/setup/launch-core.mjs +507 -0
  137. package/setup/launch.mjs +101 -0
  138. package/setup/setup-server.mjs +3206 -0
  139. package/setup/setup.html +3693 -0
  140. package/skills/retro-skill-proposer/SKILL.md +92 -0
  141. package/skills/schedule-add/SKILL.md +77 -0
  142. package/skills/setup/SKILL.md +346 -0
  143. package/skills/webhook-add/SKILL.md +81 -0
  144. package/src/agent/bridge-stall-watchdog.mjs +337 -0
  145. package/src/agent/index.mjs +2138 -0
  146. package/src/agent/orchestrator/activity-bus.mjs +38 -0
  147. package/src/agent/orchestrator/ai-wrapped-dispatch.mjs +1010 -0
  148. package/src/agent/orchestrator/bridge-retry.mjs +220 -0
  149. package/src/agent/orchestrator/bridge-trace.mjs +583 -0
  150. package/src/agent/orchestrator/cache-mtime.mjs +58 -0
  151. package/src/agent/orchestrator/config.mjs +358 -0
  152. package/src/agent/orchestrator/context/collect.mjs +651 -0
  153. package/src/agent/orchestrator/dispatch-persist.mjs +549 -0
  154. package/src/agent/orchestrator/drain-registry.mjs +50 -0
  155. package/src/agent/orchestrator/explore-validator.mjs +8 -0
  156. package/src/agent/orchestrator/internal-roles.mjs +118 -0
  157. package/src/agent/orchestrator/internal-tools.mjs +88 -0
  158. package/src/agent/orchestrator/jobs.mjs +116 -0
  159. package/src/agent/orchestrator/mcp/client.mjs +364 -0
  160. package/src/agent/orchestrator/providers/anthropic-betas.mjs +21 -0
  161. package/src/agent/orchestrator/providers/anthropic-oauth.mjs +1745 -0
  162. package/src/agent/orchestrator/providers/anthropic.mjs +437 -0
  163. package/src/agent/orchestrator/providers/gemini.mjs +1175 -0
  164. package/src/agent/orchestrator/providers/grok-oauth.mjs +782 -0
  165. package/src/agent/orchestrator/providers/model-catalog.mjs +241 -0
  166. package/src/agent/orchestrator/providers/openai-compat.mjs +1467 -0
  167. package/src/agent/orchestrator/providers/openai-oauth-ws.mjs +1890 -0
  168. package/src/agent/orchestrator/providers/openai-oauth.mjs +1307 -0
  169. package/src/agent/orchestrator/providers/openai-ws.mjs +104 -0
  170. package/src/agent/orchestrator/providers/registry.mjs +192 -0
  171. package/src/agent/orchestrator/providers/retry-classifier.mjs +325 -0
  172. package/src/agent/orchestrator/session/abort-lookup.mjs +13 -0
  173. package/src/agent/orchestrator/session/cache/post-edit-marks.mjs +42 -0
  174. package/src/agent/orchestrator/session/cache/prefetch-cache.mjs +142 -0
  175. package/src/agent/orchestrator/session/cache/read-cache.mjs +319 -0
  176. package/src/agent/orchestrator/session/cache/scoped-cache-outcome.mjs +11 -0
  177. package/src/agent/orchestrator/session/cache/scoped-cache.mjs +361 -0
  178. package/src/agent/orchestrator/session/cache/util.mjs +49 -0
  179. package/src/agent/orchestrator/session/loop.mjs +1478 -0
  180. package/src/agent/orchestrator/session/manager.mjs +1975 -0
  181. package/src/agent/orchestrator/session/read-dedup.mjs +6 -0
  182. package/src/agent/orchestrator/session/result-classification.mjs +65 -0
  183. package/src/agent/orchestrator/session/save-session-worker.mjs +18 -0
  184. package/src/agent/orchestrator/session/store.mjs +624 -0
  185. package/src/agent/orchestrator/session/stream-watchdog.mjs +130 -0
  186. package/src/agent/orchestrator/session/tool-result-offload.mjs +166 -0
  187. package/src/agent/orchestrator/session/trim.mjs +491 -0
  188. package/src/agent/orchestrator/smart-bridge/CACHE-SHARD.md +115 -0
  189. package/src/agent/orchestrator/smart-bridge/bridge-llm.mjs +327 -0
  190. package/src/agent/orchestrator/smart-bridge/cache-obs.mjs +150 -0
  191. package/src/agent/orchestrator/smart-bridge/cache-strategy.mjs +228 -0
  192. package/src/agent/orchestrator/smart-bridge/index.mjs +215 -0
  193. package/src/agent/orchestrator/smart-bridge/profiles.mjs +37 -0
  194. package/src/agent/orchestrator/smart-bridge/registry.mjs +348 -0
  195. package/src/agent/orchestrator/smart-bridge/session-builder.mjs +116 -0
  196. package/src/agent/orchestrator/stall-policy.mjs +195 -0
  197. package/src/agent/orchestrator/tool-loop-guard.mjs +75 -0
  198. package/src/agent/orchestrator/tools/bash-policy-scan.mjs +77 -0
  199. package/src/agent/orchestrator/tools/bash-session.mjs +721 -0
  200. package/src/agent/orchestrator/tools/builtin/advisory-lock.mjs +171 -0
  201. package/src/agent/orchestrator/tools/builtin/arg-guard.mjs +455 -0
  202. package/src/agent/orchestrator/tools/builtin/atomic-write.mjs +236 -0
  203. package/src/agent/orchestrator/tools/builtin/bash-tool.mjs +480 -0
  204. package/src/agent/orchestrator/tools/builtin/binary-file.mjs +76 -0
  205. package/src/agent/orchestrator/tools/builtin/builtin-tools.mjs +256 -0
  206. package/src/agent/orchestrator/tools/builtin/cache-layers.mjs +386 -0
  207. package/src/agent/orchestrator/tools/builtin/cwd-utils.mjs +37 -0
  208. package/src/agent/orchestrator/tools/builtin/device-paths.mjs +154 -0
  209. package/src/agent/orchestrator/tools/builtin/diagnostics-tool.mjs +292 -0
  210. package/src/agent/orchestrator/tools/builtin/diff-utils.mjs +109 -0
  211. package/src/agent/orchestrator/tools/builtin/edit-base-guard.mjs +58 -0
  212. package/src/agent/orchestrator/tools/builtin/edit-byte-plan.mjs +240 -0
  213. package/src/agent/orchestrator/tools/builtin/edit-byte-utils.mjs +113 -0
  214. package/src/agent/orchestrator/tools/builtin/edit-commit.mjs +74 -0
  215. package/src/agent/orchestrator/tools/builtin/edit-context-utils.mjs +242 -0
  216. package/src/agent/orchestrator/tools/builtin/edit-diagnostics.mjs +211 -0
  217. package/src/agent/orchestrator/tools/builtin/edit-engine.mjs +1364 -0
  218. package/src/agent/orchestrator/tools/builtin/edit-failure-context.mjs +126 -0
  219. package/src/agent/orchestrator/tools/builtin/edit-hint.mjs +141 -0
  220. package/src/agent/orchestrator/tools/builtin/edit-match-utils.mjs +194 -0
  221. package/src/agent/orchestrator/tools/builtin/edit-partial-write.mjs +60 -0
  222. package/src/agent/orchestrator/tools/builtin/edit-stale-refresh.mjs +168 -0
  223. package/src/agent/orchestrator/tools/builtin/edit-tool.mjs +173 -0
  224. package/src/agent/orchestrator/tools/builtin/edit-utf8-guard.mjs +48 -0
  225. package/src/agent/orchestrator/tools/builtin/fs-reachability.mjs +48 -0
  226. package/src/agent/orchestrator/tools/builtin/fuzzy-match.mjs +99 -0
  227. package/src/agent/orchestrator/tools/builtin/glob-walk.mjs +170 -0
  228. package/src/agent/orchestrator/tools/builtin/grep-formatting.mjs +113 -0
  229. package/src/agent/orchestrator/tools/builtin/hash-utils.mjs +6 -0
  230. package/src/agent/orchestrator/tools/builtin/list-formatting.mjs +7 -0
  231. package/src/agent/orchestrator/tools/builtin/list-tool.mjs +593 -0
  232. package/src/agent/orchestrator/tools/builtin/native-edit-runner.mjs +89 -0
  233. package/src/agent/orchestrator/tools/builtin/notebook-edit-tool.mjs +300 -0
  234. package/src/agent/orchestrator/tools/builtin/open-config-tool.mjs +26 -0
  235. package/src/agent/orchestrator/tools/builtin/path-diagnostics.mjs +152 -0
  236. package/src/agent/orchestrator/tools/builtin/path-locks.mjs +35 -0
  237. package/src/agent/orchestrator/tools/builtin/path-utils.mjs +201 -0
  238. package/src/agent/orchestrator/tools/builtin/read-args.mjs +103 -0
  239. package/src/agent/orchestrator/tools/builtin/read-batch.mjs +172 -0
  240. package/src/agent/orchestrator/tools/builtin/read-constants.mjs +40 -0
  241. package/src/agent/orchestrator/tools/builtin/read-formatting.mjs +118 -0
  242. package/src/agent/orchestrator/tools/builtin/read-image-resize.mjs +189 -0
  243. package/src/agent/orchestrator/tools/builtin/read-image.mjs +88 -0
  244. package/src/agent/orchestrator/tools/builtin/read-lines.mjs +12 -0
  245. package/src/agent/orchestrator/tools/builtin/read-mode-tool.mjs +455 -0
  246. package/src/agent/orchestrator/tools/builtin/read-open.mjs +190 -0
  247. package/src/agent/orchestrator/tools/builtin/read-range-index.mjs +271 -0
  248. package/src/agent/orchestrator/tools/builtin/read-ranges.mjs +26 -0
  249. package/src/agent/orchestrator/tools/builtin/read-single-tool.mjs +728 -0
  250. package/src/agent/orchestrator/tools/builtin/read-snapshot-runtime.mjs +173 -0
  251. package/src/agent/orchestrator/tools/builtin/read-special-files.mjs +268 -0
  252. package/src/agent/orchestrator/tools/builtin/read-streaming.mjs +602 -0
  253. package/src/agent/orchestrator/tools/builtin/read-tool.mjs +530 -0
  254. package/src/agent/orchestrator/tools/builtin/read-windows.mjs +107 -0
  255. package/src/agent/orchestrator/tools/builtin/rename-tool.mjs +196 -0
  256. package/src/agent/orchestrator/tools/builtin/rg-runner.mjs +422 -0
  257. package/src/agent/orchestrator/tools/builtin/search-builders.mjs +158 -0
  258. package/src/agent/orchestrator/tools/builtin/search-tool.mjs +869 -0
  259. package/src/agent/orchestrator/tools/builtin/shell-analysis.mjs +653 -0
  260. package/src/agent/orchestrator/tools/builtin/shell-jobs.mjs +936 -0
  261. package/src/agent/orchestrator/tools/builtin/shell-output.mjs +36 -0
  262. package/src/agent/orchestrator/tools/builtin/shell-runtime.mjs +214 -0
  263. package/src/agent/orchestrator/tools/builtin/snapshot-helpers.mjs +143 -0
  264. package/src/agent/orchestrator/tools/builtin/snapshot-store.mjs +206 -0
  265. package/src/agent/orchestrator/tools/builtin/snapshot-validation.mjs +98 -0
  266. package/src/agent/orchestrator/tools/builtin/text-stats.mjs +69 -0
  267. package/src/agent/orchestrator/tools/builtin/windows-roots.mjs +23 -0
  268. package/src/agent/orchestrator/tools/builtin/write-tool.mjs +401 -0
  269. package/src/agent/orchestrator/tools/builtin.mjs +500 -0
  270. package/src/agent/orchestrator/tools/code-graph-prewarm-worker.mjs +39 -0
  271. package/src/agent/orchestrator/tools/code-graph-tool-defs.mjs +24 -0
  272. package/src/agent/orchestrator/tools/code-graph.mjs +4095 -0
  273. package/src/agent/orchestrator/tools/cwd-tool.mjs +298 -0
  274. package/src/agent/orchestrator/tools/destructive-warning.mjs +323 -0
  275. package/src/agent/orchestrator/tools/edit-normalize.mjs +603 -0
  276. package/src/agent/orchestrator/tools/env-scrub.mjs +100 -0
  277. package/src/agent/orchestrator/tools/graph-binary-fetcher.mjs +144 -0
  278. package/src/agent/orchestrator/tools/graph-manifest.json +26 -0
  279. package/src/agent/orchestrator/tools/host-input.mjs +204 -0
  280. package/src/agent/orchestrator/tools/mutation-content-cache.mjs +67 -0
  281. package/src/agent/orchestrator/tools/mutation-planner.mjs +75 -0
  282. package/src/agent/orchestrator/tools/next-call-utils.mjs +48 -0
  283. package/src/agent/orchestrator/tools/patch-binary-fetcher.mjs +133 -0
  284. package/src/agent/orchestrator/tools/patch-manifest.json +26 -0
  285. package/src/agent/orchestrator/tools/patch-tool-defs.mjs +20 -0
  286. package/src/agent/orchestrator/tools/patch.mjs +2754 -0
  287. package/src/agent/orchestrator/tools/progress-message.mjs +118 -0
  288. package/src/agent/orchestrator/tools/result-compression.mjs +279 -0
  289. package/src/agent/orchestrator/tools/shell-command.mjs +865 -0
  290. package/src/agent/orchestrator/tools/shell-exec-policy.mjs +89 -0
  291. package/src/agent/orchestrator/tools/shell-policy-danger-target.mjs +27 -0
  292. package/src/agent/orchestrator/tools/shell-policy-imports.mjs +7 -0
  293. package/src/agent/orchestrator/tools/shell-policy.mjs +345 -0
  294. package/src/agent/orchestrator/tools/shell-snapshot.mjs +313 -0
  295. package/src/agent/orchestrator/workflow-store.mjs +93 -0
  296. package/src/agent/tool-defs.mjs +103 -0
  297. package/src/channels/backends/discord.mjs +784 -0
  298. package/src/channels/data/voice-runtime-manifest.json +138 -0
  299. package/src/channels/index.mjs +3229 -0
  300. package/src/channels/lib/cli-worker-host.mjs +12 -0
  301. package/src/channels/lib/config-lock.mjs +13 -0
  302. package/src/channels/lib/config.mjs +292 -0
  303. package/src/channels/lib/drop-trace.mjs +71 -0
  304. package/src/channels/lib/event-pipeline.mjs +81 -0
  305. package/src/channels/lib/event-queue.mjs +345 -0
  306. package/src/channels/lib/executor.mjs +168 -0
  307. package/src/channels/lib/format.mjs +188 -0
  308. package/src/channels/lib/holidays.mjs +138 -0
  309. package/src/channels/lib/hook-pipe-server.mjs +802 -0
  310. package/src/channels/lib/interaction-workflows.mjs +184 -0
  311. package/src/channels/lib/memory-client.mjs +149 -0
  312. package/src/channels/lib/output-forwarder.mjs +765 -0
  313. package/src/channels/lib/runtime-paths.mjs +479 -0
  314. package/src/channels/lib/scheduler.mjs +723 -0
  315. package/src/channels/lib/session-control.mjs +36 -0
  316. package/src/channels/lib/session-discovery.mjs +103 -0
  317. package/src/channels/lib/settings.mjs +11 -0
  318. package/src/channels/lib/state-file.mjs +68 -0
  319. package/src/channels/lib/status-snapshot.mjs +219 -0
  320. package/src/channels/lib/tool-format.mjs +140 -0
  321. package/src/channels/lib/transcript-discovery.mjs +195 -0
  322. package/src/channels/lib/voice-runtime-fetcher.mjs +734 -0
  323. package/src/channels/lib/webhook.mjs +1179 -0
  324. package/src/channels/lib/whisper-server.mjs +477 -0
  325. package/src/channels/tool-defs.mjs +170 -0
  326. package/src/daemon/host.mjs +118 -0
  327. package/src/daemon/mcp-transport.mjs +47 -0
  328. package/src/daemon/session.mjs +100 -0
  329. package/src/daemon/thin-client.mjs +71 -0
  330. package/src/daemon/transport.mjs +163 -0
  331. package/src/memory/data/runtime-manifest.json +40 -0
  332. package/src/memory/index.mjs +3305 -0
  333. package/src/memory/lib/agent-ipc.mjs +93 -0
  334. package/src/memory/lib/bridge-trace-queries.mjs +120 -0
  335. package/src/memory/lib/core-memory-store.mjs +330 -0
  336. package/src/memory/lib/embedding-provider.mjs +269 -0
  337. package/src/memory/lib/embedding-worker.mjs +323 -0
  338. package/src/memory/lib/llm-worker-host.mjs +17 -0
  339. package/src/memory/lib/memory-cycle.mjs +11 -0
  340. package/src/memory/lib/memory-cycle1.mjs +641 -0
  341. package/src/memory/lib/memory-cycle2.mjs +1284 -0
  342. package/src/memory/lib/memory-cycle3.mjs +540 -0
  343. package/src/memory/lib/memory-embed.mjs +299 -0
  344. package/src/memory/lib/memory-extraction.mjs +5 -0
  345. package/src/memory/lib/memory-maintenance-store.mjs +32 -0
  346. package/src/memory/lib/memory-ops-policy.mjs +190 -0
  347. package/src/memory/lib/memory-recall-id-patch.mjs +15 -0
  348. package/src/memory/lib/memory-recall-read-query.mjs +7 -0
  349. package/src/memory/lib/memory-recall-scope-filter.mjs +63 -0
  350. package/src/memory/lib/memory-recall-store.mjs +621 -0
  351. package/src/memory/lib/memory-retrievers.mjs +112 -0
  352. package/src/memory/lib/memory-score.mjs +71 -0
  353. package/src/memory/lib/memory-text-utils.mjs +58 -0
  354. package/src/memory/lib/memory.mjs +412 -0
  355. package/src/memory/lib/model-profile.mjs +85 -0
  356. package/src/memory/lib/pg/adapter.mjs +308 -0
  357. package/src/memory/lib/pg/process.mjs +360 -0
  358. package/src/memory/lib/pg/supervisor.mjs +396 -0
  359. package/src/memory/lib/project-id-resolver.mjs +86 -0
  360. package/src/memory/lib/runtime-fetcher.mjs +442 -0
  361. package/src/memory/lib/trace-store.mjs +728 -0
  362. package/src/memory/tool-defs.mjs +79 -0
  363. package/src/search/index.mjs +1173 -0
  364. package/src/search/lib/backends/anthropic-oauth.mjs +98 -0
  365. package/src/search/lib/backends/exa.mjs +50 -0
  366. package/src/search/lib/backends/firecrawl.mjs +61 -0
  367. package/src/search/lib/backends/gemini-api.mjs +83 -0
  368. package/src/search/lib/backends/grok-oauth.mjs +86 -0
  369. package/src/search/lib/backends/index.mjs +150 -0
  370. package/src/search/lib/backends/openai-api.mjs +144 -0
  371. package/src/search/lib/backends/openai-oauth.mjs +98 -0
  372. package/src/search/lib/backends/openai-web-search.mjs +76 -0
  373. package/src/search/lib/backends/tavily.mjs +55 -0
  374. package/src/search/lib/backends/xai-api.mjs +113 -0
  375. package/src/search/lib/cache.mjs +131 -0
  376. package/src/search/lib/config.mjs +192 -0
  377. package/src/search/lib/formatter.mjs +115 -0
  378. package/src/search/lib/provider-usage.mjs +67 -0
  379. package/src/search/lib/providers.mjs +47 -0
  380. package/src/search/lib/search-intent.mjs +109 -0
  381. package/src/search/lib/setup-handler.mjs +261 -0
  382. package/src/search/lib/state.mjs +201 -0
  383. package/src/search/lib/web-tools.mjs +1207 -0
  384. package/src/search/tool-defs.mjs +83 -0
  385. package/src/setup/defender-exclusion.mjs +183 -0
  386. package/src/shared/abort-controller.mjs +15 -0
  387. package/src/shared/atomic-file.mjs +420 -0
  388. package/src/shared/config.mjs +350 -0
  389. package/src/shared/daemon-recycle.mjs +108 -0
  390. package/src/shared/disable-claude-builtins.mjs +88 -0
  391. package/src/shared/err-text.mjs +12 -0
  392. package/src/shared/llm/cost.mjs +66 -0
  393. package/src/shared/llm/http-agent.mjs +123 -0
  394. package/src/shared/llm/index.mjs +41 -0
  395. package/src/shared/llm/pid-cleanup.mjs +27 -0
  396. package/src/shared/llm/usage-log.mjs +47 -0
  397. package/src/shared/plugin-paths.mjs +58 -0
  398. package/src/shared/schedules-store.mjs +70 -0
  399. package/src/shared/seed.mjs +119 -0
  400. package/src/shared/user-cwd.mjs +213 -0
  401. package/src/shared/user-data-guard.mjs +238 -0
  402. package/src/status/aggregator.mjs +584 -0
  403. package/src/status/server.mjs +413 -0
  404. package/tools.json +1653 -0
@@ -0,0 +1,1547 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * MCP server launcher for mixdog (bun-only) — proxy supervisor.
4
+ *
5
+ * Boot sequence:
6
+ * 1. Resolve the shared data directory via plugin-paths.cjs.
7
+ * 2. Copy package.json + bun.lock there and run `bun install --frozen-lockfile`
8
+ * into <dataDir>/node_modules/ (only when the lockfile / dep-keys change).
9
+ * 3. Symlink pluginRoot/node_modules → dataDir/node_modules so all plugin
10
+ * code resolves deps from the shared install.
11
+ * 4. Spawn server.mjs with bun and proxy MCP stdio between Claude Code and
12
+ * the child. The proxy caches the client's initialize/initialized so a
13
+ * child kill (dev-sync --restart, crash) can be silently re-handshaken
14
+ * against a fresh child without forcing the client to reconnect.
15
+ *
16
+ * Single-runtime path: any failure throws — no node fallback.
17
+ */
18
+ import { fileURLToPath } from 'url';
19
+ import { createRequire } from 'module';
20
+ import { dirname, join } from 'path';
21
+ import * as fs from 'fs';
22
+ import { createHash, randomUUID } from 'crypto';
23
+ import { execSync, spawn, spawnSync } from 'child_process';
24
+ import * as os from 'os';
25
+ import { assertSafeOwnedDir } from '../src/shared/user-data-guard.mjs';
26
+
27
+ // Stable per-terminal session id for this proxy supervisor's lifetime. The
28
+ // child server.mjs is respawned on crash / dev-sync restart, but THIS
29
+ // supervisor process survives, so a once-minted id stays constant across
30
+ // child reconnects. thin-client.mjs advertises it on the daemon control
31
+ // frame; a constant id keeps the daemon's bySession map pinned to the LIVE
32
+ // connection instead of minting a fresh bootstrap UUID per reconnect — which
33
+ // stranded detached worker results on stale (dead) connections. Honor an
34
+ // upstream-provided id if one already exists.
35
+ const STABLE_TERMINAL_SESSION_ID = process.env.MIXDOG_SESSION_ID || randomUUID();
36
+
37
+ const RENAME_RETRY_CODES = new Set(['EPERM', 'EACCES', 'EBUSY', 'EEXIST']);
38
+ const RENAME_BACKOFFS_MS = Object.freeze([25, 50, 100, 200, 400, 800, 1200, 1600]);
39
+ function sleepSync(ms) {
40
+ try {
41
+ const buf = new SharedArrayBuffer(4);
42
+ Atomics.wait(new Int32Array(buf), 0, 0, Math.max(1, Number(ms) || 1));
43
+ } catch {}
44
+ }
45
+ function renameWithRetrySync(src, dst) {
46
+ let lastErr = null;
47
+ for (let attempt = 0; attempt <= RENAME_BACKOFFS_MS.length; attempt++) {
48
+ try {
49
+ fs.renameSync(src, dst);
50
+ return true;
51
+ } catch (err) {
52
+ lastErr = err;
53
+ if (!RENAME_RETRY_CODES.has(err?.code) || attempt >= RENAME_BACKOFFS_MS.length) break;
54
+ sleepSync(RENAME_BACKOFFS_MS[attempt] + Math.floor(Math.random() * 50));
55
+ }
56
+ }
57
+ throw lastErr;
58
+ }
59
+
60
+ const __dirname = dirname(fileURLToPath(import.meta.url));
61
+ const __localRoot = join(__dirname, '..');
62
+
63
+ // Read installed_plugins.json each boot so dev-sync --restart picks up new code
64
+ // without forcing client reconnect. Falls back to own cache dir on any error.
65
+ function _resolveLatestPluginRoot() {
66
+ try {
67
+ const manifestPath = join(os.homedir(), '.claude', 'plugins', 'installed_plugins.json');
68
+ const data = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
69
+ if (!data || typeof data !== 'object' || !data.plugins) {
70
+ process.stderr.write('[run-mcp] WARN: installed_plugins.json has unexpected shape — using fallback\n')
71
+ return __localRoot
72
+ }
73
+ const entry = data?.plugins?.['mixdog@trib-plugin']?.[0];
74
+ if (entry?.installPath) {
75
+ const latest = entry.installPath.replace(/\\/g, '/');
76
+ if (fs.existsSync(latest)) {
77
+ return latest
78
+ }
79
+ }
80
+ } catch {}
81
+ process.stderr.write('[run-mcp] manifest-lock-fallback: manifest read failed — using boot pluginRoot as-is\n')
82
+ return __localRoot;
83
+ }
84
+ const pluginRoot = _resolveLatestPluginRoot();
85
+ if (pluginRoot !== __localRoot) {
86
+ process.stderr.write(`[run-mcp] supervisor proxying to latest cache: ${pluginRoot} (own=${__localRoot})\n`);
87
+ }
88
+ const serverPath = join(pluginRoot, 'server.mjs');
89
+ const pluginPkg = join(pluginRoot, 'package.json');
90
+ const pluginLock = join(pluginRoot, 'bun.lock');
91
+ const pluginNm = join(pluginRoot, 'node_modules');
92
+
93
+ process.stderr.write(`[boot-time] tag=run-mcp-entry tMs=${Date.now()}\n`);
94
+
95
+ // Surface plugin.json/package.json version drift at boot — warn-only.
96
+ try {
97
+ const pluginVer = JSON.parse(fs.readFileSync(join(pluginRoot, '.claude-plugin', 'plugin.json'), 'utf8')).version;
98
+ const packageVer = JSON.parse(fs.readFileSync(pluginPkg, 'utf8')).version;
99
+ if (pluginVer && packageVer && pluginVer !== packageVer) {
100
+ process.stderr.write(
101
+ `[run-mcp] WARN: version mismatch — plugin.json=${pluginVer} package.json=${packageVer}\n`
102
+ + ` Update package.json/.claude-plugin/plugin.json so both fields match.\n`,
103
+ );
104
+ }
105
+ } catch { /* missing manifest — not run-mcp's concern */ }
106
+ // Note: the supervisor cache-version advert (read by dev-sync) is written
107
+ // by server.mjs at child boot, NOT here. Keeping advert/diagnostic writes
108
+ // out of run-mcp.mjs means future updates to that logic land via
109
+ // child-only restart and never sever the stdio bridge to Claude Code.
110
+ // server.mjs reads MIXDOG_SUPERVISOR_PID + MIXDOG_SUPERVISOR_CACHE_DIR
111
+ // from its env (set in spawnChild below) to identify the supervisor.
112
+
113
+ const requiredDepNames = [
114
+ ['@modelcontextprotocol', 'sdk', 'package.json'],
115
+ ['zod', 'package.json'],
116
+ ['zod-to-json-schema', 'package.json'],
117
+ ['openai', 'package.json'],
118
+ ];
119
+
120
+ function hasRequiredDeps(nmDir) {
121
+ return requiredDepNames.every((parts) => fs.existsSync(join(nmDir, ...parts)));
122
+ }
123
+
124
+ // ── Lightweight JSON-RPC line scanner ────────────────────────────────────────
125
+ // Extracts `id` and `method` from a JSON-RPC line without a full JSON.parse.
126
+ // Returns { id, method } (each may be undefined), or null on scan failure.
127
+
128
+ // Returns true when the line must be fully parsed (initialize / negative-id).
129
+ function _lineNeedsFullParse(line) {
130
+ if (/"id"\s*:\s*-/.test(line)) return true; // internal negative-id
131
+ if (/"method"\s*:\s*"initializ/.test(line)) return true; // initialize / initialized
132
+ return false;
133
+ }
134
+
135
+ const _JSON_STRING_RE = '"(?:\\\\.|[^"\\\\])*"';
136
+ const _JSON_NUMBER_RE = '-?(?:0|[1-9]\\d*)(?:\\.\\d+)?(?:[eE][+-]?\\d+)?';
137
+ const _JSON_STRING_ONLY_RE = new RegExp(`^${_JSON_STRING_RE}$`);
138
+
139
+ function _parseJsonRpcScalar(raw) {
140
+ if (raw === 'null') return null;
141
+ if (/^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(raw)) return Number(raw);
142
+ if (raw && raw[0] === '"') {
143
+ try { return JSON.parse(raw); } catch { return undefined; }
144
+ }
145
+ return undefined;
146
+ }
147
+
148
+ function _skipJsonWs(s, i) {
149
+ while (i < s.length && /\s/.test(s[i])) i++;
150
+ return i;
151
+ }
152
+
153
+ function _readJsonStringLiteral(s, i) {
154
+ if (s[i] !== '"') return null;
155
+ let escaped = false;
156
+ for (let j = i + 1; j < s.length; j++) {
157
+ const ch = s[j];
158
+ if (escaped) { escaped = false; continue; }
159
+ if (ch === '\\') { escaped = true; continue; }
160
+ if (ch === '"') return { raw: s.slice(i, j + 1), end: j + 1 };
161
+ }
162
+ return null;
163
+ }
164
+
165
+ function _skipJsonValue(s, i) {
166
+ i = _skipJsonWs(s, i);
167
+ const ch = s[i];
168
+ if (ch === '"') return _readJsonStringLiteral(s, i)?.end ?? -1;
169
+ if (ch === '{' || ch === '[') {
170
+ const close = ch === '{' ? '}' : ']';
171
+ const open = ch;
172
+ let depth = 0;
173
+ let inString = false;
174
+ let escaped = false;
175
+ for (let j = i; j < s.length; j++) {
176
+ const c = s[j];
177
+ if (inString) {
178
+ if (escaped) { escaped = false; continue; }
179
+ if (c === '\\') { escaped = true; continue; }
180
+ if (c === '"') inString = false;
181
+ continue;
182
+ }
183
+ if (c === '"') { inString = true; continue; }
184
+ if (c === open) depth++;
185
+ else if (c === close) {
186
+ depth--;
187
+ if (depth === 0) return j + 1;
188
+ }
189
+ }
190
+ return -1;
191
+ }
192
+ while (i < s.length && s[i] !== ',' && s[i] !== '}' && s[i] !== ']') i++;
193
+ return i;
194
+ }
195
+
196
+ // Cheap extraction of `id` + `method` for the common single-message JSON-RPC
197
+ // hot path. Batch payloads are rare and still use JSON.parse so per-item
198
+ // errors stay exact. Non-RPC/noise lines return null and are quarantined.
199
+ function _scanIdMethod(line) {
200
+ try {
201
+ const s = String(line || '').trim();
202
+ if (!s) return null;
203
+ if (s[0] === '[') {
204
+ const obj = JSON.parse(s);
205
+ if (!Array.isArray(obj)) return null;
206
+ return obj.map(item => {
207
+ if (!item || typeof item !== 'object' || Array.isArray(item)) {
208
+ return { id: null, _malformed: true };
209
+ }
210
+ return { id: item.id, method: item.method, _malformed: false };
211
+ });
212
+ }
213
+ if (s[0] !== '{' || s[s.length - 1] !== '}') return null;
214
+ let id;
215
+ let method;
216
+ let sawId = false;
217
+ let sawMethod = false;
218
+ let i = 1;
219
+ while (i < s.length - 1) {
220
+ i = _skipJsonWs(s, i);
221
+ if (s[i] === ',') { i++; continue; }
222
+ if (s[i] === '}') break;
223
+ const keyLit = _readJsonStringLiteral(s, i);
224
+ if (!keyLit) return null;
225
+ let key;
226
+ try { key = JSON.parse(keyLit.raw); } catch { return null; }
227
+ i = _skipJsonWs(s, keyLit.end);
228
+ if (s[i] !== ':') return null;
229
+ i = _skipJsonWs(s, i + 1);
230
+ const valueStart = i;
231
+ const valueEnd = _skipJsonValue(s, valueStart);
232
+ if (valueEnd < 0) return null;
233
+ if (key === 'id') {
234
+ const raw = s.slice(valueStart, valueEnd).trim();
235
+ id = _parseJsonRpcScalar(raw);
236
+ if (id === undefined) return null;
237
+ sawId = true;
238
+ } else if (key === 'method') {
239
+ const raw = s.slice(valueStart, valueEnd).trim();
240
+ if (!_JSON_STRING_ONLY_RE.test(raw)) return null;
241
+ try { method = JSON.parse(raw); } catch { return null; }
242
+ sawMethod = true;
243
+ }
244
+ i = valueEnd;
245
+ }
246
+ if (!sawId && !sawMethod) return null;
247
+ return { id: sawId ? id : undefined, method: sawMethod ? method : undefined, _malformed: false };
248
+ } catch {
249
+ // Return null so handleChildLine's `if (scanned === null)` branch
250
+ // catches non-JSON noise and quarantines it via supLog instead of
251
+ // forwarding to the client. Previously this returned a malformed
252
+ // sentinel that fell through to writeToClient, leaking non-JSON
253
+ // bytes into the JSON-RPC frame stream (the "all tools hang"
254
+ // regression vector).
255
+ return null;
256
+ }
257
+ }
258
+
259
+ const LOCK_POLL_MS = 250;
260
+ const LOCK_MAX_MS = 15 * 60 * 1000;
261
+ const LOCK_XHOST_MS = 10 * 60 * 1000;
262
+
263
+ function acquireLock(lockFile) {
264
+ const start = Date.now();
265
+ while (Date.now() - start < LOCK_MAX_MS) {
266
+ try {
267
+ const body = JSON.stringify({
268
+ pid: process.pid,
269
+ hostname: os.hostname(),
270
+ startedAt: Date.now(),
271
+ });
272
+ // 'wx' = O_CREAT | O_EXCL — fails atomically if file already exists.
273
+ fs.writeFileSync(lockFile, body, { flag: 'wx' });
274
+ return;
275
+ } catch (e) {
276
+ if (e.code !== 'EEXIST') throw e;
277
+ try {
278
+ const raw = fs.readFileSync(lockFile, 'utf8');
279
+ const body = JSON.parse(raw);
280
+ const st = fs.statSync(lockFile);
281
+ const sameHost = body.hostname === os.hostname();
282
+ let dead = false;
283
+ if (sameHost) {
284
+ try { process.kill(body.pid, 0); }
285
+ catch (ke) { if (ke.code === 'ESRCH') dead = true; }
286
+ } else {
287
+ if (Date.now() - st.mtimeMs > LOCK_XHOST_MS) dead = true;
288
+ }
289
+ if (dead) fs.unlinkSync(lockFile);
290
+ } catch { /* lock may have been released between read and stat — retry */ }
291
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
292
+ }
293
+ }
294
+ throw new Error(
295
+ `timed out waiting for dependency install lock after ${LOCK_MAX_MS / 60000} minutes`
296
+ );
297
+ }
298
+
299
+ function releaseLock(lockFile) {
300
+ try { fs.unlinkSync(lockFile); } catch {}
301
+ }
302
+
303
+ function ensureNmSymlink(linkPath, targetPath) {
304
+ const linkType = process.platform === 'win32' ? 'junction' : 'dir';
305
+ // EPERM/EBUSY here is almost always a transient AV / indexer lock on the
306
+ // freshly-created junction. Retry with bounded backoff (~750ms) before
307
+ // giving up so a healthy boot doesn't have to wait for the next start.
308
+ const trySymlink = () => {
309
+ for (let attempt = 0; attempt < 5; attempt++) {
310
+ try { fs.symlinkSync(targetPath, linkPath, linkType); return; }
311
+ catch (e) {
312
+ if ((e.code === 'EBUSY' || e.code === 'EPERM') && attempt < 4) {
313
+ const end = Date.now() + 50 * (attempt + 1);
314
+ while (Date.now() < end) {}
315
+ continue;
316
+ }
317
+ if (e.code === 'EBUSY' || e.code === 'EPERM') {
318
+ process.stderr.write(`[run-mcp] WARN: symlinkSync ${e.code} (${linkPath}) after retries — next boot retry\n`);
319
+ return;
320
+ }
321
+ throw e;
322
+ }
323
+ }
324
+ };
325
+ let stat;
326
+ try { stat = fs.lstatSync(linkPath); } catch { stat = null; }
327
+ if (stat === null) { trySymlink(); return; }
328
+ if (stat.isSymbolicLink()) {
329
+ try {
330
+ const current = fs.readlinkSync(linkPath);
331
+ if (current === targetPath) return;
332
+ } catch {}
333
+ try { fs.unlinkSync(linkPath); }
334
+ catch (e) {
335
+ if (e.code === 'EBUSY' || e.code === 'EPERM') {
336
+ process.stderr.write(`[run-mcp] WARN: unlinkSync ${e.code} (${linkPath}) — next boot retry\n`);
337
+ return;
338
+ }
339
+ throw e;
340
+ }
341
+ trySymlink();
342
+ return;
343
+ }
344
+ try {
345
+ fs.rmSync(linkPath, { recursive: true, force: true });
346
+ } catch (e) {
347
+ if (e.code === 'EBUSY' || e.code === 'EPERM') {
348
+ process.stderr.write(`[run-mcp] WARN: cache node_modules locked by live process (${e.code}), skipping symlink replacement — next boot retry\n`);
349
+ return;
350
+ }
351
+ throw e;
352
+ }
353
+ trySymlink();
354
+ }
355
+
356
+ function sha256(buf) {
357
+ return createHash('sha256').update(buf).digest('hex');
358
+ }
359
+
360
+ /**
361
+ * SHA-256 hash that changes iff the resolved dep tree changes.
362
+ * Primary: bun.lock. Fallback: dep-key objects from package.json (so the very
363
+ * first install — before bun.lock exists — still hashes deterministically).
364
+ */
365
+ function computeDepHash(pkgJsonPath, pkgLockPath) {
366
+ if (fs.existsSync(pkgLockPath)) {
367
+ return sha256(fs.readFileSync(pkgLockPath));
368
+ }
369
+ const pkg = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8'));
370
+ const depKeys = ['dependencies', 'optionalDependencies', 'peerDependencies'];
371
+ const depObj = {};
372
+ for (const k of depKeys) {
373
+ if (pkg[k]) {
374
+ depObj[k] = Object.fromEntries(
375
+ Object.entries(pkg[k]).sort(([a], [b]) => a.localeCompare(b))
376
+ );
377
+ }
378
+ }
379
+ return sha256(Buffer.from(JSON.stringify(depObj)));
380
+ }
381
+
382
+ const require = createRequire(import.meta.url);
383
+ const { resolvePluginData } = require('../lib/plugin-paths.cjs');
384
+ const dataDir = resolvePluginData();
385
+
386
+ fs.mkdirSync(dataDir, { recursive: true });
387
+
388
+ // ── Supervisor self-cleanup on stdio loss ──────────────────────────────────
389
+ // Lifecycle invariant: this supervisor is owned by exactly one Claude Code
390
+ // MCP client (the process that spawned us). When that client tears down its
391
+ // end of stdio — IDE quit, mcp server toggle, restart — our stdin closes.
392
+ // Without a handler we'd linger forever (the comment near killChild
393
+ // historically defended this on grounds of "transient stdin events", but
394
+ // stdio close is not transient: it's the OS reporting EOF). Lingering
395
+ // supervisors accumulate as zombies and confuse Claude Code's routing layer
396
+ // on the next reconnect (it spawns a new supervisor; the old one stays
397
+ // alive answering nothing).
398
+ //
399
+ // Multi-session safety: each Claude Code session spawns its own supervisor
400
+ // with its own stdio. EOF on our stdin only signals OUR client going away.
401
+ // Nothing here touches another session's supervisor — they have their own
402
+ // pipe and their own EOF.
403
+ //
404
+ // Light diagnostic lock: record our PID in supervisor.lock for ps-style
405
+ // visibility, but never kill a PID found there. Stale entries are harmless;
406
+ // the stdin-EOF handler is the actual liveness mechanism.
407
+ const SUPERVISOR_LOCK = join(dataDir, 'supervisor.lock');
408
+ try {
409
+ fs.writeFileSync(SUPERVISOR_LOCK, String(process.pid));
410
+ const _releaseSupervisorLock = () => {
411
+ try {
412
+ const recorded = parseInt(fs.readFileSync(SUPERVISOR_LOCK, 'utf8').trim(), 10);
413
+ // Only unlink if the lock still names us — another supervisor may have
414
+ // overwritten it (multi-session, restart). Don't clobber theirs.
415
+ if (recorded === process.pid) fs.unlinkSync(SUPERVISOR_LOCK);
416
+ } catch {}
417
+ };
418
+ process.on('exit', _releaseSupervisorLock);
419
+ // SIGINT/SIGTERM are handled cooperatively by killChild (registered
420
+ // later in this file). killChild gracefully shuts down the child and
421
+ // then calls process.exit(code), which fires the 'exit' listener
422
+ // above to release the lock. Do NOT register a separate signal
423
+ // handler here that calls process.exit(0) — it short-circuits the
424
+ // killChild listener and orphans the MCP child. SIGHUP has no
425
+ // killChild listener, so handle it terminally here.
426
+ try {
427
+ process.on('SIGHUP', () => process.exit(0));
428
+ } catch { /* SIGHUP unsupported on Windows — ignore */ }
429
+ } catch (e) {
430
+ process.stderr.write(`[run-mcp] supervisor lock write failed: ${e?.message || e}\n`);
431
+ }
432
+
433
+ // Install runtime deps into a DEDICATED <dataDir>/.deps/ subdir — NEVER the
434
+ // data root, which holds user data (mixdog-config.json, user-workflow.*,
435
+ // roles/). Running `bun install` with cwd=dataDir would wipe that state.
436
+ const depsDir = join(dataDir, '.deps');
437
+ const sharedPkg = join(depsDir, 'package.json');
438
+ const sharedLock = join(depsDir, 'bun.lock');
439
+ const sharedNm = join(depsDir, 'node_modules');
440
+ const stamp = join(depsDir, '.deps-stamp');
441
+ const stampTmp = join(depsDir, '.deps-stamp.tmp');
442
+ const lockFile = join(depsDir, '.install.lock');
443
+
444
+ const currentHash = computeDepHash(pluginPkg, pluginLock);
445
+ let storedHash = '';
446
+ try { storedHash = fs.readFileSync(stamp, 'utf8').trim(); } catch {}
447
+
448
+ const needsInstall = (currentHash !== storedHash) || !hasRequiredDeps(sharedNm);
449
+
450
+ if (needsInstall) {
451
+ // Hard guard: refuse to install anywhere that would clobber user data.
452
+ // assertSafeOwnedDir throws unless depsDir is an owned subdir (.deps).
453
+ assertSafeOwnedDir(depsDir, dataDir, 'bun install');
454
+ fs.mkdirSync(depsDir, { recursive: true });
455
+ acquireLock(lockFile);
456
+ try {
457
+ fs.copyFileSync(pluginPkg, sharedPkg);
458
+ if (fs.existsSync(pluginLock)) fs.copyFileSync(pluginLock, sharedLock);
459
+
460
+ const args = fs.existsSync(sharedLock)
461
+ ? ['install', '--frozen-lockfile']
462
+ : ['install'];
463
+ process.stderr.write(`[run-mcp] installing shared deps: bun ${args.join(' ')}\n`);
464
+
465
+ // First install on a clean machine downloads + extracts all deps, which
466
+ // routinely exceeds 30s; too low a ceiling times out into an empty
467
+ // node_modules and aborts the very first boot. 3 minutes covers a cold
468
+ // network fetch while still bounding a genuinely stuck install.
469
+ const INSTALL_TIMEOUT_MS = 180_000;
470
+ const result = spawnSync(process.env.BUN_EXEC_PATH || process.execPath, args, {
471
+ cwd: depsDir,
472
+ stdio: 'inherit',
473
+ timeout: INSTALL_TIMEOUT_MS,
474
+ windowsHide: true,
475
+ });
476
+ if (result.error?.code === 'ETIMEDOUT' || result.signal === 'SIGTERM') {
477
+ process.stderr.write(
478
+ `[run-mcp] WARN: bun install timed out after ${INSTALL_TIMEOUT_MS}ms — ` +
479
+ `continuing with existing node_modules (stale lock removed)\n`
480
+ );
481
+ try { fs.unlinkSync(lockFile); } catch {}
482
+ } else if (result.status !== 0) {
483
+ const detail = result.status ?? result.signal ?? 'unknown';
484
+ process.stderr.write(
485
+ `[run-mcp] WARN: bun install exited with status ${detail} — ` +
486
+ `continuing with existing node_modules if available\n`
487
+ );
488
+ } else {
489
+ // Atomic stamp write: tmp + rename so a crash cannot leave it half-written.
490
+ fs.writeFileSync(stampTmp, currentHash);
491
+ renameWithRetrySync(stampTmp, stamp);
492
+ }
493
+ } finally {
494
+ releaseLock(lockFile);
495
+ }
496
+ }
497
+
498
+ ensureNmSymlink(pluginNm, sharedNm);
499
+
500
+ const probe = join(pluginNm, '@modelcontextprotocol', 'sdk', 'package.json');
501
+ if (!fs.existsSync(probe)) {
502
+ // Probe failed: node_modules may be stale or install failed.
503
+ // If any required dep is present the env may still be usable — warn and continue.
504
+ // If ALL required deps are missing (fresh env + install failure), abort with guidance.
505
+ const anyPresent = hasRequiredDeps(sharedNm) || hasRequiredDeps(pluginNm);
506
+ if (anyPresent) {
507
+ process.stderr.write(
508
+ `[run-mcp] WARN: @modelcontextprotocol/sdk not found at expected path after install — ` +
509
+ `continuing with available node_modules\n`
510
+ );
511
+ } else {
512
+ process.stderr.write(
513
+ `[run-mcp] ERROR: node_modules is incomplete and bun install did not succeed.\n` +
514
+ ` Run \`bun install\` manually in ${pluginRoot} and retry.\n`
515
+ );
516
+ process.exit(1);
517
+ }
518
+ }
519
+
520
+ const isWin = process.platform === 'win32';
521
+
522
+ // Proxy supervisor: parses NDJSON JSON-RPC, caches initialize so child kills
523
+ // are silent to the client; in-flight requests get a retry-able error on child death.
524
+
525
+ const CRASH_WINDOW_MS = 10_000;
526
+ const CRASH_MAX_RESTARTS = 5;
527
+ const CRASH_BACKOFF_MS = 500;
528
+ // Dev-sync respawn gate. dev-sync writes this lock (pid + ts) BEFORE killing
529
+ // the daemon/child and removes it AFTER the marketplace→cache copy completes.
530
+ // The respawn path below waits while the lock is present so the fresh child
531
+ // loads post-sync code instead of racing the copy and getting SIGTERMed (the
532
+ // "one wasted respawn" race). Hard mtime staleness cutoff so a crashed
533
+ // dev-sync can never deadlock respawns. Poll cadence mirrors the kill-delay
534
+ // granularity used elsewhere.
535
+ const DEV_SYNC_LOCK = join(dataDir, 'dev-sync-cache-write.lock');
536
+ const DEV_SYNC_GATE_POLL_MS = 100;
537
+ const DEV_SYNC_GATE_STALE_MS = 30_000;
538
+ // Hung-dev-sync escape hatch: even a live PID stops gating past this age, so a
539
+ // wedged dev-sync can never deadlock respawns forever.
540
+ const DEV_SYNC_GATE_HARD_CAP_MS = 5 * 60_000;
541
+ // True while dev-sync is mid cache-copy. runSync() is synchronous (spawnSync,
542
+ // bun install) so the lock mtime cannot heartbeat during long work — decide by
543
+ // PID LIVENESS: a live lock owner keeps gating (up to the 5min hard cap),
544
+ // regardless of mtime age. If the PID is dead or the lock is unparseable, fall
545
+ // back to the 30s mtime cutoff so a crashed dev-sync still clears.
546
+ function devSyncCacheWriteInProgress() {
547
+ let st;
548
+ try {
549
+ st = fs.statSync(DEV_SYNC_LOCK);
550
+ } catch {
551
+ return false; // missing lock → not syncing
552
+ }
553
+ const mtimeFresh = (Date.now() - st.mtimeMs) <= DEV_SYNC_GATE_STALE_MS;
554
+ let pid = null;
555
+ let ts = null;
556
+ try {
557
+ const parsed = JSON.parse(fs.readFileSync(DEV_SYNC_LOCK, 'utf8'));
558
+ pid = Number(parsed?.pid) || null;
559
+ ts = Number(parsed?.ts) || null;
560
+ } catch {
561
+ return mtimeFresh; // unparseable lock → mtime cutoff fallback
562
+ }
563
+ if (!pid) return mtimeFresh;
564
+ let alive;
565
+ try {
566
+ process.kill(pid, 0); // liveness probe (works on Windows too)
567
+ alive = true;
568
+ } catch (err) {
569
+ alive = err && err.code === 'EPERM'; // ESRCH = dead; EPERM = alive (foreign owner)
570
+ }
571
+ // Dead owner = crashed dev-sync; its copy is never going to finish, so
572
+ // unblock IMMEDIATELY rather than burning up to 30s of mtime cutoff. The
573
+ // mtime fallback above stays only for locks with no readable pid.
574
+ if (!alive) return false;
575
+ // Live owner: keep gating unless the lock is older than the hard cap.
576
+ const age = Date.now() - (ts ?? st.mtimeMs);
577
+ return age <= DEV_SYNC_GATE_HARD_CAP_MS;
578
+ }
579
+ // child stderr ring-buffer cap. 16 KB carries the last progress lines plus
580
+ // any final throw/abort stack without flooding supervisor.log on a runaway
581
+ // error loop.
582
+ const STDERR_TAIL_BYTES = 16 * 1024;
583
+ // Inbound frame guardrail. JSON-RPC lines are newline-terminated; an
584
+ // unterminated line that grows past this cap signals a runaway producer
585
+ // (corrupted child stdout, hostile client stdin). 4 MB comfortably fits
586
+ // any legitimate tool result while preventing unbounded memory growth.
587
+ const MAX_LINE_BYTES = 4 * 1024 * 1024;
588
+
589
+ let proc = null;
590
+ let shuttingDown = false;
591
+ let respawnTimer = null;
592
+ const recentRestarts = [];
593
+
594
+ // Handshake-readiness gate. A spawned child accepts stdin immediately but may
595
+ // be stuck in module-init (singleton lock contention, malformed cache, etc.)
596
+ // and never emit a response. Without this gate the supervisor forwarded
597
+ // requests into a black hole and the MCP layer either timed out or hung
598
+ // indefinitely. Invariant: forward only initialize-class traffic until the
599
+ // child has produced ≥1 stdout line; reply retry-able to anything else.
600
+ let childHasResponded = false;
601
+ // Respawn-orphan guard. When a child is replaced (crash/dev-sync), the client
602
+ // must re-fetch tools — but ONLY once the NEW child can answer tools/list.
603
+ // Firing notifications/tools/list_changed before the child has responded races
604
+ // the client's follow-up tools/list into the closed handshake gate, where it
605
+ // gets a -32603 "retry" the client may never re-issue — leaving the session
606
+ // with an empty tool list ("connected but no tools"). Deferred until
607
+ // childHasResponded flips true post-respawn (see handleChildLine).
608
+ let announceListChangedOnReady = false;
609
+ let cachedInitRequest = null; // { id, params } from client's first initialize
610
+ let cachedInitDone = false; // initialized notification observed from client
611
+ let internalIdSeq = -1; // negative ids reserved for supervisor-internal requests
612
+ const pendingFromClient = new Map(); // request id (from client) → { method }
613
+ const pendingInternal = new Set(); // internal ids (init replay) — drop responses
614
+ let stdinBuf = '';
615
+ let stdoutBuf = '';
616
+ let childStderrBuf = '';
617
+ let currentChildPluginRoot = pluginRoot;
618
+
619
+ // Supervisor diagnostic log. Distinct from mcp-debug.log (which is the
620
+ // child's own log via server-main.mjs:LOG_FILE). Captures transport-level
621
+ // events that previously had no audit trail: quarantined non-JSON lines,
622
+ // write errors, backpressure drain pauses. First place to inspect when
623
+ // "all tools hang" — supervisor stays alive even when the JSON-RPC stream
624
+ // to the client is wedged.
625
+ const SUPERVISOR_LOG = join(dataDir, 'supervisor.log');
626
+ const SUPERVISOR_LOG_SCOPED = join(dataDir, `supervisor.${process.pid}.log`);
627
+ const SUPERVISOR_CONTEXT = `lead=${process.pid} supervisor=${process.pid}`;
628
+ function _rotateSupervisorLog(file) {
629
+ try {
630
+ const st = fs.statSync(file);
631
+ if (st.size > 10 * 1024 * 1024) fs.renameSync(file, file + '.1');
632
+ } catch {}
633
+ }
634
+ _rotateSupervisorLog(SUPERVISOR_LOG);
635
+ _rotateSupervisorLog(SUPERVISOR_LOG_SCOPED);
636
+ // R14: sanitize a single log field — strip ANSI escapes and escape control
637
+ // chars (CR, lone C0/C1) so attacker-controlled bytes from the child's stderr
638
+ // can't forge new log lines, hide payloads with \r overwrites, or smuggle
639
+ // ANSI sequences into operator terminals tailing supervisor.log. Keep \t and
640
+ // \n: callers either pass single-line msgs or pre-split on \n.
641
+ function sanitizeLogField(text) {
642
+ if (text == null) return '';
643
+ let s = String(text);
644
+ s = s.replace(/\x1b\[[0-?]*[ -/]*[@-~]/g, (m) => '\\x1b' + m.slice(1));
645
+ s = s.replace(/\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)/g, (m) => '\\x1b' + m.slice(1));
646
+ s = s.replace(/\x1b[@-_]/g, (m) => '\\x1b' + m.slice(1));
647
+ s = s.replace(/\r/g, '\\r');
648
+ s = s.replace(/[\x00-\x08\x0B-\x1F\x7F-\x9F]/g, (c) => {
649
+ const code = c.charCodeAt(0);
650
+ return '\\x' + code.toString(16).padStart(2, '0');
651
+ });
652
+ return s;
653
+ }
654
+ function supLog(msg) {
655
+ const line = `[${new Date().toISOString()}] [${SUPERVISOR_CONTEXT} child=${proc?.pid ?? '-'}] ${sanitizeLogField(msg)}\n`;
656
+ try { fs.appendFileSync(SUPERVISOR_LOG, line); } catch {}
657
+ try { fs.appendFileSync(SUPERVISOR_LOG_SCOPED, line); } catch {}
658
+ }
659
+
660
+ function _envPositiveInt(name, fallback) {
661
+ const n = Number(process.env[name]);
662
+ return Number.isFinite(n) && n > 0 ? n : fallback;
663
+ }
664
+
665
+ const CLIENT_QUEUE_MAX_CHARS = _envPositiveInt('MIXDOG_SUPERVISOR_CLIENT_QUEUE_MAX_CHARS', 8 * 1024 * 1024);
666
+ const CHILD_QUEUE_MAX_CHARS = _envPositiveInt('MIXDOG_SUPERVISOR_CHILD_QUEUE_MAX_CHARS', 4 * 1024 * 1024);
667
+ const BACKPRESSURE_STALL_MS = _envPositiveInt('MIXDOG_SUPERVISOR_BACKPRESSURE_STALL_MS', 60_000);
668
+
669
+ // Liveness watchdog. A client request can sit in pendingFromClient forever
670
+ // when the child is ALIVE but the response path is wedged — a half-open daemon
671
+ // pipe, or a dead-but-not-closed socket after an ungraceful multi-terminal
672
+ // teardown. handleChildGone only fires on child PROCESS death and
673
+ // flushPendingClientErrors only on supervisor death; neither covers
674
+ // "alive but mute", so those requests never get answered → Claude Code's
675
+ // silent hang ("tool call, no response"). We probe with an MCP `ping`, which
676
+ // round-trips the SAME path as a tools/call: a healthy child's event loop
677
+ // answers in well under a second even while a genuinely long tool runs async,
678
+ // so this never aborts a slow-but-healthy call — only a dead path misses
679
+ // repeated pings. After STALL_MAX_MISSES consecutive misses we SIGTERM the
680
+ // child (NOT killChild, which tears down the whole supervisor) so
681
+ // handleChildGone flushes pending with a retry error and respawns a fresh
682
+ // thin client that re-attaches to the shared daemon.
683
+ const STALL_PROBE_AFTER_MS = _envPositiveInt('MIXDOG_SUPERVISOR_STALL_PROBE_MS', 30_000);
684
+ const PING_TIMEOUT_MS = _envPositiveInt('MIXDOG_SUPERVISOR_PING_TIMEOUT_MS', 10_000);
685
+ const STALL_MAX_MISSES = _envPositiveInt('MIXDOG_SUPERVISOR_STALL_MAX_MISSES', 2);
686
+ let _livenessPingId = null; // internal id of the in-flight liveness ping
687
+ let _livenessPingSentAt = 0; // when it was written to the child
688
+ let _livenessMisses = 0; // consecutive unanswered pings
689
+ let _livenessQuietUntil = 0; // suppress re-probe until here after a good pong
690
+
691
+ function _fatalSupervisor(reason) {
692
+ const msg = `[supervisor-fatal] ${reason}`;
693
+ try { process.stderr.write(msg + '\n'); } catch {}
694
+ supLog(msg);
695
+ flushPendingClientErrors(`fatal: ${reason}`);
696
+ try { proc?.kill('SIGTERM'); } catch {}
697
+ process.exit(1);
698
+ }
699
+
700
+ // Backpressure-aware writers. process.stdout.write / proc.stdin.write both
701
+ // return false when the stream's internal buffer crosses its high-water
702
+ // mark. Previously the return value was ignored, so the supervisor kept
703
+ // piling writes onto an already-pressured pipe. On Windows pipes this can
704
+ // stall the event loop when the peer (Claude Code or the child) falls
705
+ // behind reading — manifesting as "every tool hangs for many minutes,
706
+ // then suddenly all responses arrive" because the queue eventually drains
707
+ // in one burst. Track drain state and queue further writes until the
708
+ // 'drain' event fires, so we never push past a known backpressure
709
+ // boundary. Order is preserved because the queue is a single string.
710
+ let _clientQueue = '';
711
+ let _clientDraining = false;
712
+ let _clientDrainTimer = null;
713
+ function writeToClient(line) {
714
+ if (_clientQueue.length + line.length + 1 > CLIENT_QUEUE_MAX_CHARS) {
715
+ _fatalSupervisor(`client queue overflow queued=${_clientQueue.length} incoming=${line.length} max=${CLIENT_QUEUE_MAX_CHARS}`);
716
+ return;
717
+ }
718
+ _clientQueue += line + '\n';
719
+ _flushClient();
720
+ }
721
+ function _flushClient() {
722
+ if (_clientDraining || !_clientQueue) return;
723
+ const chunk = _clientQueue;
724
+ _clientQueue = '';
725
+ let writeOk;
726
+ try { writeOk = process.stdout.write(chunk); }
727
+ catch (e) { supLog(`[client-write-error] ${e && e.message || e}`); return; }
728
+ if (writeOk === false) {
729
+ _clientDraining = true;
730
+ const pausedAt = Date.now();
731
+ _clientDrainTimer = setTimeout(() => {
732
+ _fatalSupervisor(`client backpressure stuck after ${BACKPRESSURE_STALL_MS}ms queued=${_clientQueue.length}b`);
733
+ }, BACKPRESSURE_STALL_MS);
734
+ _clientDrainTimer.unref?.();
735
+ process.stdout.once('drain', () => {
736
+ _clientDraining = false;
737
+ if (_clientDrainTimer) { clearTimeout(_clientDrainTimer); _clientDrainTimer = null; }
738
+ const dur = Date.now() - pausedAt;
739
+ // Only record meaningful stalls. Fast pause/drain cycles (<100ms)
740
+ // happen normally under burst writes and aren't useful in the audit
741
+ // trail; the sync appendFileSync per event was a non-trivial tax.
742
+ if (dur >= 100) supLog(`[client-backpressure] paused/drained ${dur}ms queued=${_clientQueue.length}b`);
743
+ _flushClient();
744
+ });
745
+ }
746
+ }
747
+
748
+ let _childQueue = '';
749
+ let _childDraining = false;
750
+ let _childDrainTimer = null;
751
+ function writeToChild(line) {
752
+ if (!proc || !proc.stdin || !proc.stdin.writable) return false;
753
+ if (_childQueue.length + line.length + 1 > CHILD_QUEUE_MAX_CHARS) {
754
+ supLog(`[child-queue-overflow] queued=${_childQueue.length} incoming=${line.length} max=${CHILD_QUEUE_MAX_CHARS}; killing child`);
755
+ _childQueue = '';
756
+ try { proc.kill('SIGTERM'); } catch {}
757
+ return false;
758
+ }
759
+ _childQueue += line + '\n';
760
+ _flushChild();
761
+ return true;
762
+ }
763
+ function _flushChild() {
764
+ if (_childDraining || !_childQueue) return;
765
+ if (!proc || !proc.stdin || !proc.stdin.writable) {
766
+ // Child gone — handleChildGone will surface retry errors to the
767
+ // client. Drop queued writes here so a stale partial line cannot
768
+ // concatenate with the new child's first stdin chunk after respawn.
769
+ if (_childQueue) supLog(`[child-write-dropped] proc unavailable, dropped=${_childQueue.length}b`);
770
+ _childQueue = '';
771
+ return;
772
+ }
773
+ const chunk = _childQueue;
774
+ _childQueue = '';
775
+ let writeOk;
776
+ try { writeOk = proc.stdin.write(chunk); }
777
+ catch (e) { supLog(`[child-write-error] ${e && e.message || e}`); return; }
778
+ if (writeOk === false) {
779
+ _childDraining = true;
780
+ const pausedAt = Date.now();
781
+ _childDrainTimer = setTimeout(() => {
782
+ supLog(`[child-backpressure-stuck] after ${BACKPRESSURE_STALL_MS}ms queued=${_childQueue.length}b; killing child`);
783
+ try { proc?.kill('SIGTERM'); } catch {}
784
+ }, BACKPRESSURE_STALL_MS);
785
+ _childDrainTimer.unref?.();
786
+ proc.stdin.once('drain', () => {
787
+ _childDraining = false;
788
+ if (_childDrainTimer) { clearTimeout(_childDrainTimer); _childDrainTimer = null; }
789
+ const dur = Date.now() - pausedAt;
790
+ if (dur >= 100) supLog(`[child-backpressure] paused/drained ${dur}ms queued=${_childQueue.length}b`);
791
+ _flushChild();
792
+ });
793
+ }
794
+ }
795
+
796
+ function sendErrorToClient(id, code, message) {
797
+ // Only skip for notifications (no id field at all). JSON-RPC allows id:null.
798
+ if (id === undefined) return;
799
+ writeToClient(JSON.stringify({ jsonrpc: '2.0', id, error: { code, message } }));
800
+ }
801
+
802
+ // Invariant: if the SUPERVISOR itself goes down (uncaught exception, fatal
803
+ // rejection, fatal backpressure) while client tool calls are still
804
+ // outstanding, every outstanding request MUST receive a terminal JSON-RPC
805
+ // error — never silence. Claude Code does NOT auto-reconnect a stdio MCP
806
+ // server, so without this the client waits on a dead supervisor until a manual
807
+ // /mcp reconnect (the "silent hang"). handleChildGone covers CHILD death; this
808
+ // covers the supervisor's own death paths, which previously exited without
809
+ // answering pending ids. Frames go DIRECT to stdout (bypassing the
810
+ // backpressure queue) because the process is exiting and the async queue may
811
+ // never drain; best-effort under try/catch since a broken pipe can't be
812
+ // helped. Distinct "supervisor ..." tag (vs handleChildGone's "mcp child ...")
813
+ // keeps the two death classes separable in logs.
814
+ function flushPendingClientErrors(tag) {
815
+ if (pendingFromClient.size === 0) return;
816
+ const _n = pendingFromClient.size;
817
+ for (const [id] of pendingFromClient) {
818
+ if (id === undefined) continue;
819
+ const frame = JSON.stringify({ jsonrpc: '2.0', id, error: { code: -32603, message: `[run-mcp] supervisor ${tag}; retry` } });
820
+ try { process.stdout.write(frame + '\n'); } catch {}
821
+ }
822
+ pendingFromClient.clear();
823
+ try { supLog(`[supervisor-flush-pending] tag=${tag} flushed=${_n}`); } catch {}
824
+ }
825
+
826
+ function replayInitToChild() {
827
+ if (!cachedInitRequest) return;
828
+ const internalId = internalIdSeq--;
829
+ pendingInternal.add(internalId);
830
+ writeToChild(JSON.stringify({
831
+ jsonrpc: '2.0',
832
+ id: internalId,
833
+ method: 'initialize',
834
+ params: cachedInitRequest.params,
835
+ }));
836
+ if (cachedInitDone) {
837
+ // Notification — no id, no response expected.
838
+ writeToChild(JSON.stringify({
839
+ jsonrpc: '2.0',
840
+ method: 'notifications/initialized',
841
+ }));
842
+ }
843
+ }
844
+
845
+ function handleClientLine(line) {
846
+ if (!line.trim()) return;
847
+ // Fast-path: skip full JSON.parse on every tool call; only parse when the
848
+ // supervisor needs the full payload (initialize/initialized or negative-id).
849
+ const needsFullParse = _lineNeedsFullParse(line);
850
+ let msg = needsFullParse ? null : _scanIdMethod(line);
851
+ if (needsFullParse || msg === null) {
852
+ try { msg = JSON.parse(line); } catch {
853
+ // Non-JSON line from client stdin. Forwarding to the child would
854
+ // corrupt its JSON-RPC parser and drop subsequent valid requests
855
+ // until the parser realigns. Quarantine to supervisor.log and drop.
856
+ supLog(`[client-stdin-noise] ${line.slice(0, 500)}`);
857
+ return;
858
+ }
859
+ }
860
+ if (msg && typeof msg === 'object') {
861
+ const items = Array.isArray(msg) ? msg : [msg];
862
+ for (const item of items) {
863
+ if (!item || typeof item !== 'object') continue;
864
+ if (item.method === 'initialize') {
865
+ cachedInitRequest = { id: item.id, params: item.params };
866
+ } else if (item.method === 'notifications/initialized' || item.method === 'initialized') {
867
+ cachedInitDone = true;
868
+ }
869
+ if (item.id !== undefined && item.method) {
870
+ pendingFromClient.set(item.id, { method: item.method, ts: Date.now() });
871
+ }
872
+ }
873
+ }
874
+ // Handshake gate: hold back non-init traffic until child proves liveness
875
+ // with a response. Init/initialized are always forwarded since they are
876
+ // the only payload that can advance the gate.
877
+ if (!childHasResponded && msg && typeof msg === 'object') {
878
+ const _isInit = (it) => it && typeof it === 'object'
879
+ && (it.method === 'initialize'
880
+ || it.method === 'notifications/initialized'
881
+ || it.method === 'initialized');
882
+ const items = Array.isArray(msg) ? msg : [msg];
883
+ const allInit = items.every(_isInit);
884
+ if (!allInit) {
885
+ for (const item of items) {
886
+ if (!_isInit(item) && item && item.id !== undefined) {
887
+ sendErrorToClient(item.id, -32603, '[run-mcp] mcp child handshake pending; retry');
888
+ pendingFromClient.delete(item.id);
889
+ }
890
+ }
891
+ return;
892
+ }
893
+ }
894
+ if (!writeToChild(line)) {
895
+ // Child not yet ready (e.g. mid-respawn). For requests with an id, surface
896
+ // a retry-able error; notifications are dropped (clients re-emit on
897
+ // demand — list_changed will re-trigger).
898
+ if (Array.isArray(msg)) {
899
+ // Batch: send per-item errors and clean up pendingFromClient.
900
+ for (const item of msg) {
901
+ if (!item || typeof item !== 'object' || Array.isArray(item)) {
902
+ // Non-object batch item — spec requires id:null -32600.
903
+ sendErrorToClient(null, -32600, '[run-mcp] Invalid Request: batch item is not an object');
904
+ continue;
905
+ }
906
+ const hasValidMethod = typeof item.method === 'string' && item.method.length > 0;
907
+ if (item.id !== undefined || !hasValidMethod) {
908
+ const id = item.id !== undefined ? item.id : null;
909
+ const code = hasValidMethod ? -32603 : -32600;
910
+ const message = hasValidMethod
911
+ ? '[run-mcp] mcp child unavailable; retry'
912
+ : '[run-mcp] Invalid Request: missing or invalid method';
913
+ sendErrorToClient(id, code, message);
914
+ pendingFromClient.delete(item.id);
915
+ }
916
+ }
917
+ } else if (msg && msg.id !== undefined && msg.method) {
918
+ sendErrorToClient(msg.id, -32603, '[run-mcp] mcp child unavailable; retry');
919
+ pendingFromClient.delete(msg.id);
920
+ }
921
+ }
922
+ }
923
+
924
+ function handleChildLine(line) {
925
+ if (!line.trim()) return;
926
+ // Fast-path: only internal negative-id replies need full parse; everything
927
+ // else is forwarded after a lightweight id scan.
928
+ const scanned = _lineNeedsFullParse(line)
929
+ ? (() => { try { return JSON.parse(line); } catch { return null; } })()
930
+ : _scanIdMethod(line);
931
+ if (scanned === null) {
932
+ // Non-JSON noise must NOT flip childHasResponded — if it did, runtime
933
+ // warnings during module init would prematurely open the handshake
934
+ // gate and let regular tool requests reach a child that hadn't yet
935
+ // replied to MCP `initialize` ("all tools hang" regression).
936
+ } else if (!childHasResponded) {
937
+ // Valid JSON response — child has completed module-init.
938
+ childHasResponded = true;
939
+ // A respawn deferred its tools/list_changed until the child could serve
940
+ // tools/list. The gate is now open — announce so the client re-fetches
941
+ // into a child that will actually answer (not the closed-gate -32603).
942
+ // Gate on cachedInitDone: tools/list_changed is only valid AFTER the
943
+ // client has completed MCP initialization (notifications/initialized).
944
+ // A respawn that lands mid-handshake (before init completes) must NOT
945
+ // emit it — doing so would drive the client to tools/list before init
946
+ // finishes. In that pre-init case the client's own initialize→tools/list
947
+ // flow already covers tool discovery, so dropping the announce is safe.
948
+ if (announceListChangedOnReady) {
949
+ announceListChangedOnReady = false;
950
+ if (cachedInitDone) {
951
+ writeToClient(JSON.stringify({
952
+ jsonrpc: '2.0',
953
+ method: 'notifications/tools/list_changed',
954
+ }));
955
+ }
956
+ }
957
+ }
958
+ if (scanned === null) {
959
+ // Non-JSON line from the child stdout. Worker stdout used to be
960
+ // inherited (server-main.mjs stdio idx 1) so a bun runtime warning
961
+ // or dependency stdout write could leak here and corrupt the
962
+ // JSON-RPC frame stream the client sees. Worker stdout is now
963
+ // /dev/null but server-main.mjs itself or future regressions could
964
+ // still emit a non-JSON line — quarantine instead of forwarding so
965
+ // the client parser never sees a malformed frame.
966
+ supLog(`[child-stdout-noise] ${line.slice(0, 500)}`);
967
+ return;
968
+ }
969
+ if (Array.isArray(scanned)) {
970
+ const internalIds = new Set();
971
+ for (const item of scanned) {
972
+ if (item && item.id !== undefined) {
973
+ if (pendingInternal.has(item.id)) { internalIds.add(item.id); pendingInternal.delete(item.id); _maybeResolveLivenessPong(item.id); }
974
+ else { pendingFromClient.delete(item.id); }
975
+ }
976
+ }
977
+ if (internalIds.size) {
978
+ // A batch carrying an internal reply (init replay / liveness pong) must
979
+ // not surface those negative ids to the client. The thin-client emits one
980
+ // object per line so a mixed batch isn't expected — strip defensively and
981
+ // forward only genuine client replies (swallow if none remain).
982
+ const forward = scanned.filter((item) => !(item && item.id !== undefined && internalIds.has(item.id)));
983
+ if (forward.length === 0) return;
984
+ writeToClient(JSON.stringify(forward));
985
+ return;
986
+ }
987
+ } else if (scanned.id !== undefined) {
988
+ if (pendingInternal.has(scanned.id)) {
989
+ // Supervisor-internal reply (initialize replay or liveness ping pong) —
990
+ // swallow it: the client never issued this id. A liveness pong also
991
+ // clears the stall probe so a slow-but-healthy call is not recycled.
992
+ pendingInternal.delete(scanned.id);
993
+ _maybeResolveLivenessPong(scanned.id);
994
+ return;
995
+ }
996
+ if (!pendingFromClient.has(scanned.id)) {
997
+ // Unknown id — neither an internal replay nor an outstanding client
998
+ // request. Forwarding it would let a stale/rogue child line surface
999
+ // as a spurious response. Drop with a supLog anchor instead.
1000
+ supLog(`[child-stdout-unknown-id] ${line.slice(0, 500)}`);
1001
+ return;
1002
+ }
1003
+ pendingFromClient.delete(scanned.id);
1004
+ }
1005
+ writeToClient(line);
1006
+ }
1007
+
1008
+ function drainBuffer(buf, onLine) {
1009
+ let lastIndex = 0;
1010
+ let idx;
1011
+ while ((idx = buf.indexOf('\n', lastIndex)) !== -1) {
1012
+ const line = buf.slice(lastIndex, idx).replace(/\r$/, '');
1013
+ lastIndex = idx + 1;
1014
+ onLine(line);
1015
+ }
1016
+ return lastIndex === 0 ? buf : buf.slice(lastIndex);
1017
+ }
1018
+
1019
+ // Shared child-gone cleanup. Every path that leaves `proc` non-runnable
1020
+ // (normal exit, crash, spawn-error) must invalidate pending requests,
1021
+ // reset stdoutBuf (stale partial line from the dead child must not
1022
+ // concatenate with the new child's first response), and schedule a
1023
+ // respawn. Invariant: `proc` is never left as an orphaned handle
1024
+ // without a recovery path.
1025
+ function handleChildGone(why) {
1026
+ if (proc === null) return;
1027
+ proc = null;
1028
+ if (shuttingDown) {
1029
+ process.exit(why.exitCode ?? 0);
1030
+ return;
1031
+ }
1032
+ // Exit-cause diagnostics: drain the child stderr ring buffer NOW (before
1033
+ // anything else can overwrite it) so post-mortem analysis has the last
1034
+ // bytes the dying child emitted — progress lines, native error, throw stack.
1035
+ // Cleared after capture so the next child boots with a fresh buffer.
1036
+ const _stderrTail = childStderrBuf;
1037
+ childStderrBuf = '';
1038
+ if (_stderrTail) {
1039
+ const _trimmed = _stderrTail.slice(-STDERR_TAIL_BYTES);
1040
+ // R14: prefix EACH physical line so an attacker can't forge a fake
1041
+ // supervisor.log entry by emitting bytes like "\n[timestamp] [...] evil"
1042
+ // from the child's stderr — every embedded line now starts with the
1043
+ // marker, and per-line sanitize strips ANSI / escapes lone CR + C0/C1.
1044
+ const _prefixed = _trimmed
1045
+ .split(/\r?\n/)
1046
+ .map((ln) => '[stderr-tail] ' + sanitizeLogField(ln))
1047
+ .join('\n');
1048
+ supLog(`[child-stderr-tail exitCode=${why.exitCode ?? 'n/a'} signal=${why.signal ?? 'n/a'} bytes=${_trimmed.length}]\n${_prefixed}`);
1049
+ } else {
1050
+ supLog(`[child-stderr-tail exitCode=${why.exitCode ?? 'n/a'} signal=${why.signal ?? 'n/a'} bytes=0] (empty)`);
1051
+ }
1052
+ const _pendingClientAtGone = pendingFromClient.size;
1053
+ const _pendingInternalAtGone = pendingInternal.size;
1054
+ for (const [id] of pendingFromClient) {
1055
+ sendErrorToClient(id, -32603, `[run-mcp] mcp child ${why.tag}; retry`);
1056
+ }
1057
+ pendingFromClient.clear();
1058
+ pendingInternal.clear();
1059
+ // Fresh child = fresh response path; discard any in-flight liveness probe.
1060
+ _livenessPingId = null;
1061
+ _livenessMisses = 0;
1062
+ _livenessQuietUntil = 0;
1063
+ stdoutBuf = '';
1064
+ // Drop any stdin queue tied to the dead proc.stdin handle. The new
1065
+ // child gets a fresh writable stream from spawnChild; replaying queued
1066
+ // lines into it could break ordering (init replay must come first) or
1067
+ // leak requests the client already received an error for above.
1068
+ if (_childQueue) supLog(`[child-write-dropped] child gone, dropped=${_childQueue.length}b`);
1069
+ _childQueue = '';
1070
+ _childDraining = false;
1071
+ if (_childDrainTimer) { clearTimeout(_childDrainTimer); _childDrainTimer = null; }
1072
+
1073
+ const now = Date.now();
1074
+ recentRestarts.push(now);
1075
+ while (recentRestarts.length && now - recentRestarts[0] > CRASH_WINDOW_MS) {
1076
+ recentRestarts.shift();
1077
+ }
1078
+ const crashLoop = recentRestarts.length > CRASH_MAX_RESTARTS;
1079
+ if (crashLoop) {
1080
+ // Don't tear down the supervisor — staying alive lets a follow-up
1081
+ // dev-sync replace the broken child without losing the MCP stdio
1082
+ // session. Surface the diagnostic and back off; new client requests
1083
+ // will get a retry-able error until a clean child boots.
1084
+ const _crashMsg = `[run-mcp] child crash loop (${recentRestarts.length} ${why.tag} in ${CRASH_WINDOW_MS}ms) — backing off ${CRASH_BACKOFF_MS * 4}ms; supervisor stays up`;
1085
+ process.stderr.write(_crashMsg + '\n');
1086
+ supLog(_crashMsg);
1087
+ } else {
1088
+ const _respawnMsg = `[run-mcp] ${why.log} — respawning (#${recentRestarts.length}); pendingClient=${_pendingClientAtGone} pendingInternal=${_pendingInternalAtGone} shuttingDown=${shuttingDown}`;
1089
+ process.stderr.write(_respawnMsg + '\n');
1090
+ supLog(_respawnMsg);
1091
+ }
1092
+ const delay = crashLoop ? CRASH_BACKOFF_MS * 4 : CRASH_BACKOFF_MS;
1093
+ // Gate the respawn behind the dev-sync cache-write lock. When dev-sync kills
1094
+ // the child to deploy fresh code it holds DEV_SYNC_LOCK across the
1095
+ // marketplace→cache copy; respawning before the copy finishes loads STALE
1096
+ // code that dev-sync then SIGTERMs (the wasted-respawn race this gate fixes).
1097
+ // While the lock is present (and not stale) we re-poll every
1098
+ // DEV_SYNC_GATE_POLL_MS instead of spawning; devSyncCacheWriteInProgress's
1099
+ // mtime staleness cutoff guarantees a crashed dev-sync can never deadlock.
1100
+ const doRespawn = () => {
1101
+ if (shuttingDown) return;
1102
+ if (devSyncCacheWriteInProgress()) {
1103
+ respawnTimer = setTimeout(doRespawn, DEV_SYNC_GATE_POLL_MS);
1104
+ return;
1105
+ }
1106
+ spawnChild();
1107
+ if (cachedInitRequest) {
1108
+ replayInitToChild();
1109
+ } else if (crashLoop) {
1110
+ process.stderr.write('[run-mcp] WARN: crash-loop respawn before initialize landed — skipping init replay\n');
1111
+ }
1112
+ // Defer the tools/list_changed announcement until the fresh child proves
1113
+ // it can respond (handleChildLine flips childHasResponded → fires it).
1114
+ // Announcing now would race the client's tools/list into the closed
1115
+ // handshake gate and risk a permanently-empty tool list on reconnect.
1116
+ announceListChangedOnReady = true;
1117
+ };
1118
+ respawnTimer = setTimeout(doRespawn, delay);
1119
+ }
1120
+
1121
+ function spawnChild() {
1122
+ // Re-resolve pluginRoot on EVERY child spawn so dev-sync --restart
1123
+ // (kills only child) picks up the new cache path. Boot-time pluginRoot
1124
+ // is used for one-shot install / symlink / version warn; everything
1125
+ // child-facing must come from the live manifest each spawn.
1126
+ const childPluginRoot = _resolveLatestPluginRoot();
1127
+ currentChildPluginRoot = childPluginRoot;
1128
+ const childServerPath = join(childPluginRoot, 'server.mjs');
1129
+ if (childPluginRoot !== pluginRoot) {
1130
+ process.stderr.write(`[run-mcp] child spawn path refreshed: ${childPluginRoot} (boot=${pluginRoot})\n`);
1131
+ }
1132
+ // Reset the readiness gate every spawn — respawned child must re-prove
1133
+ // it can respond before it inherits "ready" from the previous instance.
1134
+ childHasResponded = false;
1135
+ // Reset the stdout parse buffer too. A partial JSON line left in the
1136
+ // buffer by the previous child must not concatenate with the new
1137
+ // child's first response and corrupt JSON.parse.
1138
+ stdoutBuf = '';
1139
+ process.stderr.write(`[boot-time] tag=run-mcp-spawn-server tMs=${Date.now()}\n`);
1140
+ proc = spawn(process.env.BUN_EXEC_PATH || process.execPath, [childServerPath], {
1141
+ cwd: childPluginRoot,
1142
+ // child stderr piped (not inherited) so supervisor can ring-buffer the
1143
+ // tail and surface it on unexpected exit. handleChildGone reads the
1144
+ // last STDERR_TAIL_BYTES on death to anchor exit-cause diagnostics
1145
+ // (e.g. crash loop, native crash, unhandledRejection final line).
1146
+ stdio: ['pipe', 'pipe', 'pipe', 'pipe'],
1147
+ // The supervisor itself can be console-less (hidden respawn via
1148
+ // launch.mjs); without CREATE_NO_WINDOW each child respawn allocates a
1149
+ // visible console that flashes on screen.
1150
+ windowsHide: true,
1151
+ env: {
1152
+ ...process.env,
1153
+ UV_THREADPOOL_SIZE: '2',
1154
+ CLAUDE_PLUGIN_ROOT: childPluginRoot,
1155
+ CLAUDE_PLUGIN_DATA: dataDir,
1156
+ MIXDOG_SUPERVISOR_CONTROL_FD: '3',
1157
+ // Identity passed to the child so server.mjs can write the supervisor
1158
+ // advert (consumed by dev-sync's cleanupOldCacheVersions). Owning the
1159
+ // write site in server.mjs keeps run-mcp.mjs change-free for future
1160
+ // advert tweaks → no stdio-severing full-restart needed.
1161
+ MIXDOG_SUPERVISOR_PID: String(process.pid),
1162
+ MIXDOG_SUPERVISOR_CACHE_DIR: __localRoot,
1163
+ // Stable routing id (see STABLE_TERMINAL_SESSION_ID): pins the daemon's
1164
+ // bySession map to this terminal's LIVE connection across child
1165
+ // reconnects so detached worker results are never delivered to a stale
1166
+ // connection.
1167
+ MIXDOG_SESSION_ID: STABLE_TERMINAL_SESSION_ID,
1168
+ },
1169
+ ...(isWin ? { windowsHide: true } : {}),
1170
+ });
1171
+
1172
+ if (isWin && proc.pid) {
1173
+ try {
1174
+ const ps = `$p = Get-Process -Id ${Number(proc.pid)} -ErrorAction SilentlyContinue; if ($p) { $p.PriorityClass = 'BelowNormal' }`;
1175
+ const encoded = Buffer.from(ps, 'utf16le').toString('base64');
1176
+ execSync(`powershell.exe -NoProfile -EncodedCommand ${encoded}`, {
1177
+ stdio: 'ignore',
1178
+ windowsHide: true,
1179
+ timeout: 3000,
1180
+ });
1181
+ } catch {}
1182
+ }
1183
+
1184
+ proc.stdout.setEncoding('utf8');
1185
+ proc.stdout.on('data', (chunk) => {
1186
+ stdoutBuf += chunk;
1187
+ stdoutBuf = drainBuffer(stdoutBuf, handleChildLine);
1188
+ // Unbounded-line guard: if the residual unterminated tail exceeds the
1189
+ // cap, the child is producing a frame too large to be legitimate (or
1190
+ // never emitting a newline). Kill the child so handleChildGone can
1191
+ // respawn it cleanly, and drop the corrupted buffer.
1192
+ if (Buffer.byteLength(stdoutBuf, 'utf8') > MAX_LINE_BYTES) {
1193
+ supLog(`[child-stdout-overflow] bytes=${Buffer.byteLength(stdoutBuf, 'utf8')} cap=${MAX_LINE_BYTES} — killing child`);
1194
+ stdoutBuf = '';
1195
+ try { proc?.kill(); } catch {}
1196
+ }
1197
+ });
1198
+
1199
+ // child stderr ring buffer — capped at STDERR_TAIL_BYTES; older bytes
1200
+ // are dropped from the head. Each chunk is mirrored to supervisor's own
1201
+ // stderr so the user-visible inherit-equivalent passthrough is preserved.
1202
+ childStderrBuf = '';
1203
+ proc.stderr.setEncoding('utf8');
1204
+ proc.stderr.on('data', (chunk) => {
1205
+ try { process.stderr.write(chunk); } catch {}
1206
+ childStderrBuf += chunk;
1207
+ if (childStderrBuf.length > STDERR_TAIL_BYTES) {
1208
+ childStderrBuf = childStderrBuf.slice(-STDERR_TAIL_BYTES);
1209
+ }
1210
+ });
1211
+
1212
+ proc.on('exit', (code, signal) => {
1213
+ handleChildGone({
1214
+ tag: `exit code=${code}`,
1215
+ log: `child exit code=${code} signal=${signal}`,
1216
+ exitCode: code || 0,
1217
+ signal,
1218
+ });
1219
+ });
1220
+
1221
+ proc.on('error', (err) => {
1222
+ handleChildGone({
1223
+ tag: 'spawn failed',
1224
+ log: `child spawn error: ${err && err.message}`,
1225
+ exitCode: 1,
1226
+ signal: null,
1227
+ });
1228
+ });
1229
+
1230
+ // Async write failures to a dying child's stdin (EPIPE/EOF) surface as a
1231
+ // stream 'error' event, NOT via the synchronous try/catch in _flushChild.
1232
+ // Without this handler the supervisor crashes (uncaught) during dev-sync
1233
+ // full-restart when a queued client line is flushed to the just-killed
1234
+ // child. handleChildGone (exit/error) owns respawn; here we only swallow.
1235
+ proc.stdin.on('error', (err) => {
1236
+ supLog(`[child-stdin-error] ${err && err.message || err}`);
1237
+ });
1238
+ }
1239
+
1240
+ function killChild(fast = false) {
1241
+ supLog(`[supervisor-killChild] entered shuttingDown=${shuttingDown} fast=${fast}`);
1242
+ if (shuttingDown) return;
1243
+ shuttingDown = true;
1244
+ clearTimeout(respawnTimer);
1245
+ respawnTimer = null;
1246
+ if (!proc) {
1247
+ process.exit(0);
1248
+ return;
1249
+ }
1250
+ // Graceful shutdown: write "shutdown\n" to fd-3 control pipe → child detects the command and
1251
+ // shuts down gracefully. fd-3 is dedicated to lifecycle control and independent of MCP stdio
1252
+ // transport — so transient stdin events from the MCP host can never trigger shutdown.
1253
+ // fast=true (stdin-EOF/dev-sync path): replacement child is identical, no flush owed —
1254
+ // shrink the two-children respawn window. Full timeout retained for SIGTERM.
1255
+ const GRACEFUL_TIMEOUT_MS = fast ? 2000 : 10000;
1256
+ const pid = proc.pid;
1257
+ try {
1258
+ const ctrlFd = proc.stdio && proc.stdio[3];
1259
+ if (ctrlFd && typeof ctrlFd.end === 'function') {
1260
+ ctrlFd.end('shutdown\n');
1261
+ process.stderr.write(`[run-mcp] sent shutdown to control fd (pid=${pid}) — signalling graceful shutdown\n`);
1262
+ } else {
1263
+ process.stderr.write(`[run-mcp] WARN: control fd unavailable (pid=${pid}) — falling back to SIGTERM\n`);
1264
+ try { proc.kill('SIGTERM'); } catch {}
1265
+ }
1266
+ } catch (e) {
1267
+ process.stderr.write(`[run-mcp] control fd write failed (pid=${pid}): ${e && e.message}\n`);
1268
+ }
1269
+ // Also send SIGINT (Ctrl+C simulation) on non-Windows; on Windows skip (no reliable delivery)
1270
+ if (!isWin) {
1271
+ try { proc.kill('SIGINT'); } catch {}
1272
+ }
1273
+ // Wait up to GRACEFUL_TIMEOUT_MS for clean exit; force-kill only if timeout expires.
1274
+ let exited = false;
1275
+ const forceTimer = setTimeout(() => {
1276
+ if (exited) return;
1277
+ process.stderr.write(`[run-mcp] child did not exit within ${GRACEFUL_TIMEOUT_MS}ms — forcing kill (pid=${pid}) path=force\n`);
1278
+ try {
1279
+ if (isWin && pid) {
1280
+ execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore', windowsHide: true, timeout: 5000 });
1281
+ } else {
1282
+ proc.kill('SIGKILL');
1283
+ }
1284
+ } catch {}
1285
+ }, GRACEFUL_TIMEOUT_MS);
1286
+ proc.once('exit', (code, signal) => {
1287
+ exited = true;
1288
+ clearTimeout(forceTimer);
1289
+ process.stderr.write(`[run-mcp] child exited cleanly (pid=${pid} code=${code} signal=${signal}) path=graceful\n`);
1290
+ process.exit(code || 0);
1291
+ });
1292
+ // process.exit is called by the proc 'exit' handler above once the child terminates.
1293
+ }
1294
+
1295
+ process.on('SIGTERM', killChild);
1296
+ process.on('SIGINT', killChild);
1297
+ // stdin EOF = our MCP client closed its end of the pipe (IDE quit, mcp
1298
+ // server toggled off, Claude Code restart). The historical fear of
1299
+ // "transient stdin events" doesn't apply: stdio close is a hard OS EOF,
1300
+ // not a wobble. Letting the supervisor linger past EOF is exactly what
1301
+ // produces zombie supervisors across reconnects — the new client spawns
1302
+ // a fresh supervisor while the old one keeps running, holding no client,
1303
+ // answering nothing. Hook EOF into the existing graceful-shutdown path so
1304
+ // the child gets the proper shutdown signal too.
1305
+ process.stdin.once('end', () => {
1306
+ process.stderr.write('[run-mcp] stdin EOF — client disconnected; initiating graceful shutdown\n');
1307
+ try { killChild(true); } catch { process.exit(0); }
1308
+ });
1309
+ process.stdin.once('close', () => {
1310
+ process.stderr.write('[run-mcp] stdin closed — initiating graceful shutdown\n');
1311
+ try { killChild(true); } catch { process.exit(0); }
1312
+ });
1313
+ let _HEARTBEAT_FILE = null;
1314
+ process.on('exit', (code) => {
1315
+ try { supLog(`[supervisor-exit] code=${code} shuttingDown=${shuttingDown}`); } catch {}
1316
+ try { if (_HEARTBEAT_FILE) fs.unlinkSync(_HEARTBEAT_FILE); } catch {}
1317
+ });
1318
+ process.on('uncaughtException', (err) => {
1319
+ try { supLog(`[supervisor-uncaught] ${err?.stack || err?.message || err}`); } catch {}
1320
+ flushPendingClientErrors('uncaught exception');
1321
+ try { killChild(); } catch {}
1322
+ process.exit(1);
1323
+ });
1324
+ function _isSupervisorFatal(err) {
1325
+ const code = err?.code;
1326
+ return code === 'EPIPE' || code === 'EADDRINUSE' || code === 'ENOMEM';
1327
+ }
1328
+ process.on('unhandledRejection', (reason) => {
1329
+ try { supLog(`[supervisor-unhandled-rejection] ${reason?.stack || reason?.message || reason}`); } catch {}
1330
+ if (_isSupervisorFatal(reason)) {
1331
+ try { supLog(`[supervisor-unhandled-rejection-fatal] code=${reason?.code} — exiting code=1`); } catch {}
1332
+ flushPendingClientErrors('fatal rejection');
1333
+ try { killChild(); } catch {}
1334
+ process.exit(1);
1335
+ }
1336
+ });
1337
+
1338
+ const _HEARTBEAT_MS = 5000;
1339
+ const _HEARTBEAT_DIR = join(os.tmpdir(), 'mixdog');
1340
+ _HEARTBEAT_FILE = join(_HEARTBEAT_DIR, `supervisor-heartbeat.${process.pid}.json`);
1341
+ const _HEARTBEAT_INDEX_FILE = join(_HEARTBEAT_DIR, 'supervisor-heartbeats.json');
1342
+ const _HEARTBEAT_INDEX_LOCK = `${_HEARTBEAT_INDEX_FILE}.lock`;
1343
+ let _heartbeatWarnedMultiAt = 0;
1344
+ function _heartbeatPidAlive(pid) {
1345
+ if (!Number.isFinite(pid) || pid <= 0) return false;
1346
+ if (pid === process.pid) return true;
1347
+ try {
1348
+ process.kill(pid, 0);
1349
+ return true;
1350
+ } catch (err) {
1351
+ return err?.code === 'EPERM';
1352
+ }
1353
+ }
1354
+ function _writeJsonAtomic(file, value) {
1355
+ const tmp = `${file}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`;
1356
+ fs.writeFileSync(tmp, JSON.stringify(value));
1357
+ try { return renameWithRetrySync(tmp, file); }
1358
+ catch (err) {
1359
+ try { fs.unlinkSync(tmp); } catch {}
1360
+ throw err;
1361
+ }
1362
+ }
1363
+ function _readJsonSafe(file) {
1364
+ try { return JSON.parse(fs.readFileSync(file, 'utf8')); } catch { return null; }
1365
+ }
1366
+ function _withHeartbeatIndexLock(fn) {
1367
+ const deadline = Date.now() + 8000;
1368
+ while (Date.now() < deadline) {
1369
+ let fd = null;
1370
+ try {
1371
+ fd = fs.openSync(_HEARTBEAT_INDEX_LOCK, 'wx');
1372
+ try { fs.writeSync(fd, `${process.pid} ${Date.now()}\n`); } catch {}
1373
+ try { return fn(); }
1374
+ finally {
1375
+ try { if (fd !== null) fs.closeSync(fd); } catch {}
1376
+ try { fs.unlinkSync(_HEARTBEAT_INDEX_LOCK); } catch {}
1377
+ }
1378
+ } catch (err) {
1379
+ try { if (fd !== null) fs.closeSync(fd); } catch {}
1380
+ if (!RENAME_RETRY_CODES.has(err?.code)) throw err;
1381
+ try {
1382
+ const st = fs.statSync(_HEARTBEAT_INDEX_LOCK);
1383
+ if (Date.now() - st.mtimeMs > _HEARTBEAT_MS * 3) {
1384
+ try { fs.unlinkSync(_HEARTBEAT_INDEX_LOCK); } catch {}
1385
+ continue;
1386
+ }
1387
+ } catch {}
1388
+ sleepSync(25 + Math.floor(Math.random() * 35));
1389
+ }
1390
+ }
1391
+ return false;
1392
+ }
1393
+ function _writeSupervisorHeartbeat() {
1394
+ try {
1395
+ fs.mkdirSync(_HEARTBEAT_DIR, { recursive: true });
1396
+ const now = Date.now();
1397
+ const payload = {
1398
+ pid: process.pid,
1399
+ ownerLeadPid: process.pid,
1400
+ childPid: proc?.pid ?? null,
1401
+ pendingClientCount: pendingFromClient.size,
1402
+ pendingInternalCount: pendingInternal.size,
1403
+ pendingClientMethods: [...pendingFromClient.values()].map(v => v?.method || 'unknown').slice(0, 8),
1404
+ ts: now,
1405
+ cacheDir: __localRoot,
1406
+ pluginRoot: currentChildPluginRoot,
1407
+ dataDir,
1408
+ ppid: process.ppid,
1409
+ };
1410
+ _writeJsonAtomic(_HEARTBEAT_FILE, payload);
1411
+
1412
+ const supervisors = [];
1413
+ for (const ent of fs.readdirSync(_HEARTBEAT_DIR, { withFileTypes: true })) {
1414
+ if (!ent.isFile()) continue;
1415
+ if (!/^supervisor-heartbeat\.\d+\.json$/.test(ent.name)) continue;
1416
+ const file = join(_HEARTBEAT_DIR, ent.name);
1417
+ const entry = _readJsonSafe(file);
1418
+ const pid = Number(entry?.ownerLeadPid ?? entry?.pid);
1419
+ const fresh = Number.isFinite(entry?.ts) && now - Number(entry.ts) <= _HEARTBEAT_MS * 6;
1420
+ if (!_heartbeatPidAlive(pid) || !fresh) {
1421
+ try { fs.unlinkSync(file); } catch {}
1422
+ continue;
1423
+ }
1424
+ supervisors.push({ ...entry, pid, ownerLeadPid: pid });
1425
+ }
1426
+ supervisors.sort((a, b) => Number(a.pid) - Number(b.pid));
1427
+ _withHeartbeatIndexLock(() => _writeJsonAtomic(_HEARTBEAT_INDEX_FILE, { updatedAt: now, supervisors }));
1428
+ if (supervisors.length > 1 && now - _heartbeatWarnedMultiAt > 60000) {
1429
+ _heartbeatWarnedMultiAt = now;
1430
+ const pids = supervisors.map(s => s.pid).join(',');
1431
+ const msg = `[heartbeat] multi-supervisor active count=${supervisors.length} pids=${pids}`;
1432
+ supLog(msg);
1433
+ try { process.stderr.write(`[run-mcp] ${msg}\n`); } catch {}
1434
+ }
1435
+ } catch (e) { supLog(`[heartbeat-error] ${e?.message || e}`); }
1436
+ }
1437
+ const _heartbeatTimer = setInterval(_writeSupervisorHeartbeat, _HEARTBEAT_MS);
1438
+ _heartbeatTimer.unref?.();
1439
+ _writeSupervisorHeartbeat();
1440
+
1441
+ // Liveness pong handler. Returns true when `id` is the in-flight liveness
1442
+ // ping's reply: the response path is proven healthy, so reset the miss
1443
+ // counter and back off re-probing for one STALL_PROBE_AFTER_MS window (a
1444
+ // genuinely long tool keeps the call pending but the path is fine).
1445
+ function _maybeResolveLivenessPong(id) {
1446
+ if (_livenessPingId === null || id !== _livenessPingId) return false;
1447
+ _livenessPingId = null;
1448
+ _livenessMisses = 0;
1449
+ _livenessQuietUntil = Date.now() + STALL_PROBE_AFTER_MS;
1450
+ return true;
1451
+ }
1452
+
1453
+ // Record one unanswered/failed liveness probe. On STALL_MAX_MISSES in a row the
1454
+ // response path is dead: SIGTERM the child ONLY (not killChild, which exits the
1455
+ // supervisor and severs the unrecoverable stdio bridge) so proc 'exit' →
1456
+ // handleChildGone flushes pending with a retry error and respawns a fresh thin
1457
+ // client. Both miss sources — an unanswered pong AND an unwritable child stdin
1458
+ // while the process lingers alive — funnel here so neither can hang pending
1459
+ // calls forever.
1460
+ function _recordLivenessMiss(reason) {
1461
+ _livenessMisses += 1;
1462
+ supLog(`[liveness] ${reason} — miss ${_livenessMisses}/${STALL_MAX_MISSES} (pendingClient=${pendingFromClient.size})`);
1463
+ if (_livenessMisses < STALL_MAX_MISSES) return;
1464
+ const _n = pendingFromClient.size;
1465
+ _livenessMisses = 0;
1466
+ const m = `[liveness] response path dead (${STALL_MAX_MISSES} missed pings) — recycling child to unblock ${_n} pending client call(s)`;
1467
+ supLog(m);
1468
+ try { process.stderr.write(`[run-mcp] ${m}\n`); } catch {}
1469
+ try { proc?.kill('SIGTERM'); } catch {}
1470
+ }
1471
+
1472
+ // Stall watchdog tick (shares the heartbeat cadence). Invariant: a live child
1473
+ // answers an MCP `ping` promptly. When a client call has been pending past
1474
+ // STALL_PROBE_AFTER_MS, send one ping down the same path; if it goes
1475
+ // unanswered STALL_MAX_MISSES times in a row, the response path is dead —
1476
+ // SIGTERM the child so handleChildGone flushes pending (retry error) and
1477
+ // respawns. Never aborts a healthy call: async tools don't block the child's
1478
+ // event loop, so the pong still round-trips while the tool runs.
1479
+ function _livenessTick() {
1480
+ if (shuttingDown) return;
1481
+ const now = Date.now();
1482
+ // Resolve an outstanding ping verdict first.
1483
+ if (_livenessPingId !== null) {
1484
+ if (now - _livenessPingSentAt < PING_TIMEOUT_MS) return; // still waiting
1485
+ pendingInternal.delete(_livenessPingId);
1486
+ _livenessPingId = null;
1487
+ _recordLivenessMiss(`ping unanswered after ${now - _livenessPingSentAt}ms`);
1488
+ return;
1489
+ }
1490
+ // Arm a probe only when a client call has genuinely waited too long.
1491
+ if (pendingFromClient.size === 0) { _livenessMisses = 0; return; }
1492
+ if (!proc || !childHasResponded || _childDraining || _clientDraining) return;
1493
+ if (now < _livenessQuietUntil) return;
1494
+ let oldest = Infinity;
1495
+ for (const v of pendingFromClient.values()) {
1496
+ const t = Number(v?.ts);
1497
+ if (Number.isFinite(t) && t < oldest) oldest = t;
1498
+ }
1499
+ if (!Number.isFinite(oldest) || now - oldest < STALL_PROBE_AFTER_MS) return;
1500
+ const id = internalIdSeq--;
1501
+ pendingInternal.add(id);
1502
+ _livenessPingId = id;
1503
+ _livenessPingSentAt = now;
1504
+ const ok = writeToChild(JSON.stringify({ jsonrpc: '2.0', id, method: 'ping' }));
1505
+ if (ok) {
1506
+ supLog(`[liveness] probing child — oldest pending client call ${now - oldest}ms (pendingClient=${pendingFromClient.size})`);
1507
+ } else {
1508
+ // Write path itself unusable (child stdin gone/non-writable while the
1509
+ // process lingers): count it as a miss so repeated failures recycle.
1510
+ pendingInternal.delete(id);
1511
+ _livenessPingId = null;
1512
+ _recordLivenessMiss('ping write rejected');
1513
+ }
1514
+ }
1515
+ const _livenessTimer = setInterval(_livenessTick, _HEARTBEAT_MS);
1516
+ _livenessTimer.unref?.();
1517
+
1518
+ process.stdin.setEncoding('utf8');
1519
+ process.stdin.on('data', (chunk) => {
1520
+ stdinBuf += chunk;
1521
+ stdinBuf = drainBuffer(stdinBuf, handleClientLine);
1522
+ // Unbounded-line guard: a client never legitimately sends a single
1523
+ // JSON-RPC frame larger than the cap. Drop the buffer (cannot kill the
1524
+ // client) and surface an anchor in supervisor.log.
1525
+ if (Buffer.byteLength(stdinBuf, 'utf8') > MAX_LINE_BYTES) {
1526
+ supLog(`[client-stdin-overflow] bytes=${Buffer.byteLength(stdinBuf, 'utf8')} cap=${MAX_LINE_BYTES} — dropping buffer`);
1527
+ stdinBuf = '';
1528
+ }
1529
+ });
1530
+ spawnChild();
1531
+
1532
+ // Parent (Claude Code) death watchdog — replaces the old stdin-EOF
1533
+ // lifecycle signal that was prone to transient close during boot.
1534
+ // process.kill(pid, 0) probes liveness without sending a signal.
1535
+ const initialPpid = process.ppid;
1536
+ if (initialPpid && initialPpid !== 1) {
1537
+ const parentWatch = setInterval(() => {
1538
+ try {
1539
+ process.kill(initialPpid, 0);
1540
+ } catch {
1541
+ process.stderr.write(`[run-mcp] parent pid=${initialPpid} no longer alive — initiating graceful shutdown\n`);
1542
+ clearInterval(parentWatch);
1543
+ killChild();
1544
+ }
1545
+ }, 5000);
1546
+ parentWatch.unref();
1547
+ }