mixdog 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. package/.claude-plugin/marketplace.json +31 -0
  2. package/.claude-plugin/plugin.json +20 -0
  3. package/.gitattributes +34 -0
  4. package/.mcp.json +14 -0
  5. package/ARCHITECTURE.md +77 -0
  6. package/CHANGELOG.md +7 -0
  7. package/CONTRIBUTING.md +45 -0
  8. package/DATA-FLOW.md +79 -0
  9. package/LICENSE +21 -0
  10. package/README.md +389 -0
  11. package/SECURITY.md +138 -0
  12. package/UNINSTALL.md +112 -0
  13. package/agents/maintenance.md +5 -0
  14. package/agents/memory-classification.md +30 -0
  15. package/agents/scheduler-task.md +18 -0
  16. package/agents/webhook-handler.md +27 -0
  17. package/agents/worker.md +24 -0
  18. package/bin/bridge +133 -0
  19. package/bin/statusline-launcher.mjs +78 -0
  20. package/bin/statusline-lib.mjs +550 -0
  21. package/bin/statusline.mjs +607 -0
  22. package/bun.lock +802 -0
  23. package/commands/config.md +16 -0
  24. package/commands/doctor.md +13 -0
  25. package/commands/setup.md +17 -0
  26. package/defaults/cycle3-review-prompt.md +90 -0
  27. package/defaults/hidden-roles.json +65 -0
  28. package/defaults/memory-chunk-prompt.md +63 -0
  29. package/defaults/memory-promote-prompt.md +135 -0
  30. package/defaults/mixdog-config.template.json +27 -0
  31. package/defaults/user-workflow.json +8 -0
  32. package/defaults/user-workflow.md +12 -0
  33. package/hooks/hooks.json +73 -0
  34. package/hooks/lib/active-instance.cjs +77 -0
  35. package/hooks/lib/permission-evaluator.cjs +411 -0
  36. package/hooks/lib/permission-route.cjs +63 -0
  37. package/hooks/lib/permission-rules.cjs +170 -0
  38. package/hooks/lib/settings-loader.cjs +116 -0
  39. package/hooks/post-tool-use.cjs +84 -0
  40. package/hooks/pre-mcp-sandbox.cjs +158 -0
  41. package/hooks/pre-tool-subagent.cjs +253 -0
  42. package/hooks/session-start.cjs +1372 -0
  43. package/hooks/turn-timer.cjs +82 -0
  44. package/lib/claude-md-writer.cjs +386 -0
  45. package/lib/config-cjs.cjs +61 -0
  46. package/lib/hook-pipe-path.cjs +10 -0
  47. package/lib/keychain-cjs.cjs +263 -0
  48. package/lib/plugin-paths.cjs +61 -0
  49. package/lib/rules-builder.cjs +241 -0
  50. package/lib/text-utils.cjs +61 -0
  51. package/native/README.md +117 -0
  52. package/native/prebuilt/linux-aarch64/mixdog-shim +0 -0
  53. package/native/prebuilt/linux-x86_64/mixdog-shim +0 -0
  54. package/native/prebuilt/macos-aarch64/mixdog-shim +0 -0
  55. package/native/prebuilt/macos-x86_64/mixdog-shim +0 -0
  56. package/native/prebuilt/windows-x86_64/mixdog-shim.exe +0 -0
  57. package/package.json +107 -0
  58. package/prompts/code-review.txt +16 -0
  59. package/prompts/security-audit.txt +17 -0
  60. package/rules/bridge/00-common.md +39 -0
  61. package/rules/bridge/20-skip-protocol.md +18 -0
  62. package/rules/bridge/30-explorer.md +33 -0
  63. package/rules/bridge/40-cycle1-agent.md +52 -0
  64. package/rules/bridge/41-cycle2-agent.md +62 -0
  65. package/rules/bridge/42-cycle3-agent.md +44 -0
  66. package/rules/lead/00-tool-lead.md +61 -0
  67. package/rules/lead/01-general.md +23 -0
  68. package/rules/lead/02-channels.md +49 -0
  69. package/rules/lead/03-team.md +27 -0
  70. package/rules/lead/04-workflow.md +20 -0
  71. package/rules/shared/00-language.md +14 -0
  72. package/rules/shared/01-tool.md +138 -0
  73. package/scripts/bootstrap.mjs +184 -0
  74. package/scripts/bridge-unify-smoke.mjs +308 -0
  75. package/scripts/build-runtime-linux.sh +348 -0
  76. package/scripts/build-runtime-macos.sh +217 -0
  77. package/scripts/build-runtime-windows.ps1 +242 -0
  78. package/scripts/builtin-utils-smoke.mjs +392 -0
  79. package/scripts/check-json.mjs +45 -0
  80. package/scripts/check-syntax-changed.mjs +102 -0
  81. package/scripts/check-syntax.mjs +58 -0
  82. package/scripts/code-graph-batch.test.mjs +33 -0
  83. package/scripts/config-preserve-smoke.mjs +180 -0
  84. package/scripts/doctor.mjs +484 -0
  85. package/scripts/edit-normalize-fuzz.mjs +130 -0
  86. package/scripts/edit-normalize-smoke.mjs +401 -0
  87. package/scripts/edit-operation-smoke.mjs +369 -0
  88. package/scripts/edit2-smoke.mjs +63 -0
  89. package/scripts/fuzzy-e2e.mjs +28 -0
  90. package/scripts/fuzzy-smoke.mjs +26 -0
  91. package/scripts/generate-runtime-manifest.mjs +166 -0
  92. package/scripts/guard-smoke.mjs +66 -0
  93. package/scripts/hidden-role-schema-smoke.mjs +162 -0
  94. package/scripts/hook-routing-smoke.mjs +29 -0
  95. package/scripts/inject-input.ps1 +204 -0
  96. package/scripts/io-complex-smoke.mjs +667 -0
  97. package/scripts/io-explore-bench.mjs +424 -0
  98. package/scripts/io-guardrails-smoke.mjs +205 -0
  99. package/scripts/io-mini-bench-baseline.json +11 -0
  100. package/scripts/io-mini-bench.mjs +216 -0
  101. package/scripts/io-route-harness.mjs +933 -0
  102. package/scripts/io-telemetry-report.mjs +691 -0
  103. package/scripts/mutation-bench.mjs +564 -0
  104. package/scripts/mutation-io-smoke.mjs +1081 -0
  105. package/scripts/native-patch-bridge-smoke.mjs +288 -0
  106. package/scripts/native-patch-smoke.mjs +304 -0
  107. package/scripts/patch-interior-context-smoke.mjs +49 -0
  108. package/scripts/patch-newline-utf8-smoke.mjs +157 -0
  109. package/scripts/perf-hook-smoke.mjs +71 -0
  110. package/scripts/permission-eval-smoke.mjs +426 -0
  111. package/scripts/prep-patch.mjs +53 -0
  112. package/scripts/prep-shim.mjs +96 -0
  113. package/scripts/provider-cache-smoke.mjs +687 -0
  114. package/scripts/report-runtime-health.mjs +132 -0
  115. package/scripts/run-mcp.mjs +1547 -0
  116. package/scripts/salvage-v4a-shatter.test.mjs +58 -0
  117. package/scripts/scoped-cache-io-smoke.mjs +103 -0
  118. package/scripts/shell-policy-round3-smoke.mjs +46 -0
  119. package/scripts/smoke-runtime-negative.ps1 +100 -0
  120. package/scripts/smoke-runtime-negative.sh +95 -0
  121. package/scripts/stall-policy-smoke.mjs +50 -0
  122. package/scripts/start-memory-worker.mjs +23 -0
  123. package/scripts/statusline-launcher-smoke.mjs +82 -0
  124. package/scripts/stress-atomic-write.mjs +1028 -0
  125. package/scripts/test-config-rmw-restore.mjs +122 -0
  126. package/scripts/test-fault-inject.mjs +164 -0
  127. package/scripts/test-large-file.mjs +174 -0
  128. package/scripts/tool-edge-smoke.mjs +209 -0
  129. package/scripts/uninstall.mjs +201 -0
  130. package/scripts/webhook-selfheal-smoke.mjs +29 -0
  131. package/scripts/write-overwrite-guard-smoke.mjs +56 -0
  132. package/server-main.mjs +3055 -0
  133. package/server.mjs +468 -0
  134. package/setup/config-merge.mjs +254 -0
  135. package/setup/install.mjs +120 -0
  136. package/setup/launch-core.mjs +507 -0
  137. package/setup/launch.mjs +101 -0
  138. package/setup/setup-server.mjs +3206 -0
  139. package/setup/setup.html +3693 -0
  140. package/skills/retro-skill-proposer/SKILL.md +92 -0
  141. package/skills/schedule-add/SKILL.md +77 -0
  142. package/skills/setup/SKILL.md +346 -0
  143. package/skills/webhook-add/SKILL.md +81 -0
  144. package/src/agent/bridge-stall-watchdog.mjs +337 -0
  145. package/src/agent/index.mjs +2138 -0
  146. package/src/agent/orchestrator/activity-bus.mjs +38 -0
  147. package/src/agent/orchestrator/ai-wrapped-dispatch.mjs +1010 -0
  148. package/src/agent/orchestrator/bridge-retry.mjs +220 -0
  149. package/src/agent/orchestrator/bridge-trace.mjs +583 -0
  150. package/src/agent/orchestrator/cache-mtime.mjs +58 -0
  151. package/src/agent/orchestrator/config.mjs +358 -0
  152. package/src/agent/orchestrator/context/collect.mjs +651 -0
  153. package/src/agent/orchestrator/dispatch-persist.mjs +549 -0
  154. package/src/agent/orchestrator/drain-registry.mjs +50 -0
  155. package/src/agent/orchestrator/explore-validator.mjs +8 -0
  156. package/src/agent/orchestrator/internal-roles.mjs +118 -0
  157. package/src/agent/orchestrator/internal-tools.mjs +88 -0
  158. package/src/agent/orchestrator/jobs.mjs +116 -0
  159. package/src/agent/orchestrator/mcp/client.mjs +364 -0
  160. package/src/agent/orchestrator/providers/anthropic-betas.mjs +21 -0
  161. package/src/agent/orchestrator/providers/anthropic-oauth.mjs +1745 -0
  162. package/src/agent/orchestrator/providers/anthropic.mjs +437 -0
  163. package/src/agent/orchestrator/providers/gemini.mjs +1175 -0
  164. package/src/agent/orchestrator/providers/grok-oauth.mjs +782 -0
  165. package/src/agent/orchestrator/providers/model-catalog.mjs +241 -0
  166. package/src/agent/orchestrator/providers/openai-compat.mjs +1467 -0
  167. package/src/agent/orchestrator/providers/openai-oauth-ws.mjs +1890 -0
  168. package/src/agent/orchestrator/providers/openai-oauth.mjs +1307 -0
  169. package/src/agent/orchestrator/providers/openai-ws.mjs +104 -0
  170. package/src/agent/orchestrator/providers/registry.mjs +192 -0
  171. package/src/agent/orchestrator/providers/retry-classifier.mjs +325 -0
  172. package/src/agent/orchestrator/session/abort-lookup.mjs +13 -0
  173. package/src/agent/orchestrator/session/cache/post-edit-marks.mjs +42 -0
  174. package/src/agent/orchestrator/session/cache/prefetch-cache.mjs +142 -0
  175. package/src/agent/orchestrator/session/cache/read-cache.mjs +319 -0
  176. package/src/agent/orchestrator/session/cache/scoped-cache-outcome.mjs +11 -0
  177. package/src/agent/orchestrator/session/cache/scoped-cache.mjs +361 -0
  178. package/src/agent/orchestrator/session/cache/util.mjs +49 -0
  179. package/src/agent/orchestrator/session/loop.mjs +1478 -0
  180. package/src/agent/orchestrator/session/manager.mjs +1975 -0
  181. package/src/agent/orchestrator/session/read-dedup.mjs +6 -0
  182. package/src/agent/orchestrator/session/result-classification.mjs +65 -0
  183. package/src/agent/orchestrator/session/save-session-worker.mjs +18 -0
  184. package/src/agent/orchestrator/session/store.mjs +624 -0
  185. package/src/agent/orchestrator/session/stream-watchdog.mjs +130 -0
  186. package/src/agent/orchestrator/session/tool-result-offload.mjs +166 -0
  187. package/src/agent/orchestrator/session/trim.mjs +491 -0
  188. package/src/agent/orchestrator/smart-bridge/CACHE-SHARD.md +115 -0
  189. package/src/agent/orchestrator/smart-bridge/bridge-llm.mjs +327 -0
  190. package/src/agent/orchestrator/smart-bridge/cache-obs.mjs +150 -0
  191. package/src/agent/orchestrator/smart-bridge/cache-strategy.mjs +228 -0
  192. package/src/agent/orchestrator/smart-bridge/index.mjs +215 -0
  193. package/src/agent/orchestrator/smart-bridge/profiles.mjs +37 -0
  194. package/src/agent/orchestrator/smart-bridge/registry.mjs +348 -0
  195. package/src/agent/orchestrator/smart-bridge/session-builder.mjs +116 -0
  196. package/src/agent/orchestrator/stall-policy.mjs +195 -0
  197. package/src/agent/orchestrator/tool-loop-guard.mjs +75 -0
  198. package/src/agent/orchestrator/tools/bash-policy-scan.mjs +77 -0
  199. package/src/agent/orchestrator/tools/bash-session.mjs +721 -0
  200. package/src/agent/orchestrator/tools/builtin/advisory-lock.mjs +171 -0
  201. package/src/agent/orchestrator/tools/builtin/arg-guard.mjs +455 -0
  202. package/src/agent/orchestrator/tools/builtin/atomic-write.mjs +236 -0
  203. package/src/agent/orchestrator/tools/builtin/bash-tool.mjs +480 -0
  204. package/src/agent/orchestrator/tools/builtin/binary-file.mjs +76 -0
  205. package/src/agent/orchestrator/tools/builtin/builtin-tools.mjs +256 -0
  206. package/src/agent/orchestrator/tools/builtin/cache-layers.mjs +386 -0
  207. package/src/agent/orchestrator/tools/builtin/cwd-utils.mjs +37 -0
  208. package/src/agent/orchestrator/tools/builtin/device-paths.mjs +154 -0
  209. package/src/agent/orchestrator/tools/builtin/diagnostics-tool.mjs +292 -0
  210. package/src/agent/orchestrator/tools/builtin/diff-utils.mjs +109 -0
  211. package/src/agent/orchestrator/tools/builtin/edit-base-guard.mjs +58 -0
  212. package/src/agent/orchestrator/tools/builtin/edit-byte-plan.mjs +240 -0
  213. package/src/agent/orchestrator/tools/builtin/edit-byte-utils.mjs +113 -0
  214. package/src/agent/orchestrator/tools/builtin/edit-commit.mjs +74 -0
  215. package/src/agent/orchestrator/tools/builtin/edit-context-utils.mjs +242 -0
  216. package/src/agent/orchestrator/tools/builtin/edit-diagnostics.mjs +211 -0
  217. package/src/agent/orchestrator/tools/builtin/edit-engine.mjs +1364 -0
  218. package/src/agent/orchestrator/tools/builtin/edit-failure-context.mjs +126 -0
  219. package/src/agent/orchestrator/tools/builtin/edit-hint.mjs +141 -0
  220. package/src/agent/orchestrator/tools/builtin/edit-match-utils.mjs +194 -0
  221. package/src/agent/orchestrator/tools/builtin/edit-partial-write.mjs +60 -0
  222. package/src/agent/orchestrator/tools/builtin/edit-stale-refresh.mjs +168 -0
  223. package/src/agent/orchestrator/tools/builtin/edit-tool.mjs +173 -0
  224. package/src/agent/orchestrator/tools/builtin/edit-utf8-guard.mjs +48 -0
  225. package/src/agent/orchestrator/tools/builtin/fs-reachability.mjs +48 -0
  226. package/src/agent/orchestrator/tools/builtin/fuzzy-match.mjs +99 -0
  227. package/src/agent/orchestrator/tools/builtin/glob-walk.mjs +170 -0
  228. package/src/agent/orchestrator/tools/builtin/grep-formatting.mjs +113 -0
  229. package/src/agent/orchestrator/tools/builtin/hash-utils.mjs +6 -0
  230. package/src/agent/orchestrator/tools/builtin/list-formatting.mjs +7 -0
  231. package/src/agent/orchestrator/tools/builtin/list-tool.mjs +593 -0
  232. package/src/agent/orchestrator/tools/builtin/native-edit-runner.mjs +89 -0
  233. package/src/agent/orchestrator/tools/builtin/notebook-edit-tool.mjs +300 -0
  234. package/src/agent/orchestrator/tools/builtin/open-config-tool.mjs +26 -0
  235. package/src/agent/orchestrator/tools/builtin/path-diagnostics.mjs +152 -0
  236. package/src/agent/orchestrator/tools/builtin/path-locks.mjs +35 -0
  237. package/src/agent/orchestrator/tools/builtin/path-utils.mjs +201 -0
  238. package/src/agent/orchestrator/tools/builtin/read-args.mjs +103 -0
  239. package/src/agent/orchestrator/tools/builtin/read-batch.mjs +172 -0
  240. package/src/agent/orchestrator/tools/builtin/read-constants.mjs +40 -0
  241. package/src/agent/orchestrator/tools/builtin/read-formatting.mjs +118 -0
  242. package/src/agent/orchestrator/tools/builtin/read-image-resize.mjs +189 -0
  243. package/src/agent/orchestrator/tools/builtin/read-image.mjs +88 -0
  244. package/src/agent/orchestrator/tools/builtin/read-lines.mjs +12 -0
  245. package/src/agent/orchestrator/tools/builtin/read-mode-tool.mjs +455 -0
  246. package/src/agent/orchestrator/tools/builtin/read-open.mjs +190 -0
  247. package/src/agent/orchestrator/tools/builtin/read-range-index.mjs +271 -0
  248. package/src/agent/orchestrator/tools/builtin/read-ranges.mjs +26 -0
  249. package/src/agent/orchestrator/tools/builtin/read-single-tool.mjs +728 -0
  250. package/src/agent/orchestrator/tools/builtin/read-snapshot-runtime.mjs +173 -0
  251. package/src/agent/orchestrator/tools/builtin/read-special-files.mjs +268 -0
  252. package/src/agent/orchestrator/tools/builtin/read-streaming.mjs +602 -0
  253. package/src/agent/orchestrator/tools/builtin/read-tool.mjs +530 -0
  254. package/src/agent/orchestrator/tools/builtin/read-windows.mjs +107 -0
  255. package/src/agent/orchestrator/tools/builtin/rename-tool.mjs +196 -0
  256. package/src/agent/orchestrator/tools/builtin/rg-runner.mjs +422 -0
  257. package/src/agent/orchestrator/tools/builtin/search-builders.mjs +158 -0
  258. package/src/agent/orchestrator/tools/builtin/search-tool.mjs +869 -0
  259. package/src/agent/orchestrator/tools/builtin/shell-analysis.mjs +653 -0
  260. package/src/agent/orchestrator/tools/builtin/shell-jobs.mjs +936 -0
  261. package/src/agent/orchestrator/tools/builtin/shell-output.mjs +36 -0
  262. package/src/agent/orchestrator/tools/builtin/shell-runtime.mjs +214 -0
  263. package/src/agent/orchestrator/tools/builtin/snapshot-helpers.mjs +143 -0
  264. package/src/agent/orchestrator/tools/builtin/snapshot-store.mjs +206 -0
  265. package/src/agent/orchestrator/tools/builtin/snapshot-validation.mjs +98 -0
  266. package/src/agent/orchestrator/tools/builtin/text-stats.mjs +69 -0
  267. package/src/agent/orchestrator/tools/builtin/windows-roots.mjs +23 -0
  268. package/src/agent/orchestrator/tools/builtin/write-tool.mjs +401 -0
  269. package/src/agent/orchestrator/tools/builtin.mjs +500 -0
  270. package/src/agent/orchestrator/tools/code-graph-prewarm-worker.mjs +39 -0
  271. package/src/agent/orchestrator/tools/code-graph-tool-defs.mjs +24 -0
  272. package/src/agent/orchestrator/tools/code-graph.mjs +4095 -0
  273. package/src/agent/orchestrator/tools/cwd-tool.mjs +298 -0
  274. package/src/agent/orchestrator/tools/destructive-warning.mjs +323 -0
  275. package/src/agent/orchestrator/tools/edit-normalize.mjs +603 -0
  276. package/src/agent/orchestrator/tools/env-scrub.mjs +100 -0
  277. package/src/agent/orchestrator/tools/graph-binary-fetcher.mjs +144 -0
  278. package/src/agent/orchestrator/tools/graph-manifest.json +26 -0
  279. package/src/agent/orchestrator/tools/host-input.mjs +204 -0
  280. package/src/agent/orchestrator/tools/mutation-content-cache.mjs +67 -0
  281. package/src/agent/orchestrator/tools/mutation-planner.mjs +75 -0
  282. package/src/agent/orchestrator/tools/next-call-utils.mjs +48 -0
  283. package/src/agent/orchestrator/tools/patch-binary-fetcher.mjs +133 -0
  284. package/src/agent/orchestrator/tools/patch-manifest.json +26 -0
  285. package/src/agent/orchestrator/tools/patch-tool-defs.mjs +20 -0
  286. package/src/agent/orchestrator/tools/patch.mjs +2754 -0
  287. package/src/agent/orchestrator/tools/progress-message.mjs +118 -0
  288. package/src/agent/orchestrator/tools/result-compression.mjs +279 -0
  289. package/src/agent/orchestrator/tools/shell-command.mjs +865 -0
  290. package/src/agent/orchestrator/tools/shell-exec-policy.mjs +89 -0
  291. package/src/agent/orchestrator/tools/shell-policy-danger-target.mjs +27 -0
  292. package/src/agent/orchestrator/tools/shell-policy-imports.mjs +7 -0
  293. package/src/agent/orchestrator/tools/shell-policy.mjs +345 -0
  294. package/src/agent/orchestrator/tools/shell-snapshot.mjs +313 -0
  295. package/src/agent/orchestrator/workflow-store.mjs +93 -0
  296. package/src/agent/tool-defs.mjs +103 -0
  297. package/src/channels/backends/discord.mjs +784 -0
  298. package/src/channels/data/voice-runtime-manifest.json +138 -0
  299. package/src/channels/index.mjs +3229 -0
  300. package/src/channels/lib/cli-worker-host.mjs +12 -0
  301. package/src/channels/lib/config-lock.mjs +13 -0
  302. package/src/channels/lib/config.mjs +292 -0
  303. package/src/channels/lib/drop-trace.mjs +71 -0
  304. package/src/channels/lib/event-pipeline.mjs +81 -0
  305. package/src/channels/lib/event-queue.mjs +345 -0
  306. package/src/channels/lib/executor.mjs +168 -0
  307. package/src/channels/lib/format.mjs +188 -0
  308. package/src/channels/lib/holidays.mjs +138 -0
  309. package/src/channels/lib/hook-pipe-server.mjs +802 -0
  310. package/src/channels/lib/interaction-workflows.mjs +184 -0
  311. package/src/channels/lib/memory-client.mjs +149 -0
  312. package/src/channels/lib/output-forwarder.mjs +765 -0
  313. package/src/channels/lib/runtime-paths.mjs +479 -0
  314. package/src/channels/lib/scheduler.mjs +723 -0
  315. package/src/channels/lib/session-control.mjs +36 -0
  316. package/src/channels/lib/session-discovery.mjs +103 -0
  317. package/src/channels/lib/settings.mjs +11 -0
  318. package/src/channels/lib/state-file.mjs +68 -0
  319. package/src/channels/lib/status-snapshot.mjs +219 -0
  320. package/src/channels/lib/tool-format.mjs +140 -0
  321. package/src/channels/lib/transcript-discovery.mjs +195 -0
  322. package/src/channels/lib/voice-runtime-fetcher.mjs +734 -0
  323. package/src/channels/lib/webhook.mjs +1179 -0
  324. package/src/channels/lib/whisper-server.mjs +477 -0
  325. package/src/channels/tool-defs.mjs +170 -0
  326. package/src/daemon/host.mjs +118 -0
  327. package/src/daemon/mcp-transport.mjs +47 -0
  328. package/src/daemon/session.mjs +100 -0
  329. package/src/daemon/thin-client.mjs +71 -0
  330. package/src/daemon/transport.mjs +163 -0
  331. package/src/memory/data/runtime-manifest.json +40 -0
  332. package/src/memory/index.mjs +3305 -0
  333. package/src/memory/lib/agent-ipc.mjs +93 -0
  334. package/src/memory/lib/bridge-trace-queries.mjs +120 -0
  335. package/src/memory/lib/core-memory-store.mjs +330 -0
  336. package/src/memory/lib/embedding-provider.mjs +269 -0
  337. package/src/memory/lib/embedding-worker.mjs +323 -0
  338. package/src/memory/lib/llm-worker-host.mjs +17 -0
  339. package/src/memory/lib/memory-cycle.mjs +11 -0
  340. package/src/memory/lib/memory-cycle1.mjs +641 -0
  341. package/src/memory/lib/memory-cycle2.mjs +1284 -0
  342. package/src/memory/lib/memory-cycle3.mjs +540 -0
  343. package/src/memory/lib/memory-embed.mjs +299 -0
  344. package/src/memory/lib/memory-extraction.mjs +5 -0
  345. package/src/memory/lib/memory-maintenance-store.mjs +32 -0
  346. package/src/memory/lib/memory-ops-policy.mjs +190 -0
  347. package/src/memory/lib/memory-recall-id-patch.mjs +15 -0
  348. package/src/memory/lib/memory-recall-read-query.mjs +7 -0
  349. package/src/memory/lib/memory-recall-scope-filter.mjs +63 -0
  350. package/src/memory/lib/memory-recall-store.mjs +621 -0
  351. package/src/memory/lib/memory-retrievers.mjs +112 -0
  352. package/src/memory/lib/memory-score.mjs +71 -0
  353. package/src/memory/lib/memory-text-utils.mjs +58 -0
  354. package/src/memory/lib/memory.mjs +412 -0
  355. package/src/memory/lib/model-profile.mjs +85 -0
  356. package/src/memory/lib/pg/adapter.mjs +308 -0
  357. package/src/memory/lib/pg/process.mjs +360 -0
  358. package/src/memory/lib/pg/supervisor.mjs +396 -0
  359. package/src/memory/lib/project-id-resolver.mjs +86 -0
  360. package/src/memory/lib/runtime-fetcher.mjs +442 -0
  361. package/src/memory/lib/trace-store.mjs +728 -0
  362. package/src/memory/tool-defs.mjs +79 -0
  363. package/src/search/index.mjs +1173 -0
  364. package/src/search/lib/backends/anthropic-oauth.mjs +98 -0
  365. package/src/search/lib/backends/exa.mjs +50 -0
  366. package/src/search/lib/backends/firecrawl.mjs +61 -0
  367. package/src/search/lib/backends/gemini-api.mjs +83 -0
  368. package/src/search/lib/backends/grok-oauth.mjs +86 -0
  369. package/src/search/lib/backends/index.mjs +150 -0
  370. package/src/search/lib/backends/openai-api.mjs +144 -0
  371. package/src/search/lib/backends/openai-oauth.mjs +98 -0
  372. package/src/search/lib/backends/openai-web-search.mjs +76 -0
  373. package/src/search/lib/backends/tavily.mjs +55 -0
  374. package/src/search/lib/backends/xai-api.mjs +113 -0
  375. package/src/search/lib/cache.mjs +131 -0
  376. package/src/search/lib/config.mjs +192 -0
  377. package/src/search/lib/formatter.mjs +115 -0
  378. package/src/search/lib/provider-usage.mjs +67 -0
  379. package/src/search/lib/providers.mjs +47 -0
  380. package/src/search/lib/search-intent.mjs +109 -0
  381. package/src/search/lib/setup-handler.mjs +261 -0
  382. package/src/search/lib/state.mjs +201 -0
  383. package/src/search/lib/web-tools.mjs +1207 -0
  384. package/src/search/tool-defs.mjs +83 -0
  385. package/src/setup/defender-exclusion.mjs +183 -0
  386. package/src/shared/abort-controller.mjs +15 -0
  387. package/src/shared/atomic-file.mjs +420 -0
  388. package/src/shared/config.mjs +350 -0
  389. package/src/shared/daemon-recycle.mjs +108 -0
  390. package/src/shared/disable-claude-builtins.mjs +88 -0
  391. package/src/shared/err-text.mjs +12 -0
  392. package/src/shared/llm/cost.mjs +66 -0
  393. package/src/shared/llm/http-agent.mjs +123 -0
  394. package/src/shared/llm/index.mjs +41 -0
  395. package/src/shared/llm/pid-cleanup.mjs +27 -0
  396. package/src/shared/llm/usage-log.mjs +47 -0
  397. package/src/shared/plugin-paths.mjs +58 -0
  398. package/src/shared/schedules-store.mjs +70 -0
  399. package/src/shared/seed.mjs +119 -0
  400. package/src/shared/user-cwd.mjs +213 -0
  401. package/src/shared/user-data-guard.mjs +238 -0
  402. package/src/status/aggregator.mjs +584 -0
  403. package/src/status/server.mjs +413 -0
  404. package/tools.json +1653 -0
@@ -0,0 +1,424 @@
1
+ #!/usr/bin/env node
2
+ // I/O EXPLORATION bench — multi-turn cost of LOOKUP routes.
3
+ //
4
+ // Why this exists: io-route-harness scores edit ROUTES; io-mini-bench scores
5
+ // per-call LATENCY. Neither measures the cost that dominates real exploration
6
+ // work: round-trips (turns) and the CONTEXT INTEGRAL — every tool result stays
7
+ // in context and is re-processed as prompt on every later turn, so early/large
8
+ // outputs cost ~ (output x remaining turns). This bench runs the REAL lookup
9
+ // tools (code_graph / grep / glob / list / read) on a hermetic fixture, then
10
+ // scores each competing route with that multi-turn model. `explore` (an LLM
11
+ // sub-agent) is MODELED, not executed, and labelled [modeled].
12
+ //
13
+ // Usage:
14
+ // node scripts/io-explore-bench.mjs # table
15
+ // node scripts/io-explore-bench.mjs --json # machine-readable
16
+ // node scripts/io-explore-bench.mjs --check # assert per-task expected winner (exit 1 on regression)
17
+
18
+ import { performance } from 'node:perf_hooks';
19
+ import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises';
20
+ import { tmpdir } from 'node:os';
21
+ import { dirname, join } from 'node:path';
22
+ import { fileURLToPath, pathToFileURL } from 'node:url';
23
+
24
+ const ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
25
+ process.env.CLAUDE_PLUGIN_ROOT ||= ROOT;
26
+ // SAFETY: own an isolated temp data dir; never `||=` the LIVE CLAUDE_PLUGIN_DATA,
27
+ // which the cleanup below would then rm.
28
+ const BENCH_PLUGIN_DATA = join(tmpdir(), `mixdog-io-explore-bench-data-${process.pid}`);
29
+ process.env.CLAUDE_PLUGIN_DATA = BENCH_PLUGIN_DATA;
30
+
31
+ const { executeBuiltinTool } = await import(pathToFileURL(join(ROOT, 'src/agent/orchestrator/tools/builtin.mjs')).href);
32
+ const { executeCodeGraphTool } = await import(pathToFileURL(join(ROOT, 'src/agent/orchestrator/tools/code-graph.mjs')).href);
33
+
34
+ const ARGV = process.argv.slice(2);
35
+ const JSON_OUT = ARGV.includes('--json');
36
+ const CHECK = ARGV.includes('--check');
37
+ const WORKDIR = await mkdtemp(join(tmpdir(), 'mixdog-io-explore-bench-'));
38
+
39
+ // ---- multi-turn cost model (bytes/4 ~= tokens) -----------------------------
40
+ const B2T = (b) => b / 4;
41
+ const P0 = 6000; // fixed prefix tokens (system+rules+convo)
42
+ const REASON_PER_TURN = 180; // model thinking tokens per turn
43
+ const RT_SECONDS = 1.4; // round-trip per turn (network+exec+TTFT)
44
+ const PROMPT_TOK_PER_S = 9000;
45
+ const GEN_TOK_PER_S = 90;
46
+ const CACHE_PREFIX_DISCOUNT = 0.1; // cached prefix billed at 0.1x
47
+ // A lookup is never the last thing you do: its OUTPUT lingers in context and is
48
+ // re-billed on the remaining task turns (the edit, the verify, the next probe).
49
+ // Without this, single-turn routes look cost-tied and tighter output (the real
50
+ // differentiator) is invisible. ~4 follow-on turns is a conservative tail.
51
+ const TAIL_TURNS = 4;
52
+ // gen bytes = what the model writes to invoke each tool (call args), by tool.
53
+ const GEN_BYTES = { code_graph: 40, grep: 55, glob: 40, list: 50, read: 45, explore: 120 };
54
+
55
+ // ---- hermetic fixture ------------------------------------------------------
56
+ // A small but realistic project: one hot symbol with several callers, a module
57
+ // with multiple exports, a config literal, nested dirs, and decoy files.
58
+ async function buildFixture(dir) {
59
+ const f = async (rel, body) => {
60
+ const full = join(dir, rel);
61
+ await mkdir(dirname(full), { recursive: true });
62
+ await writeFile(full, body, 'utf8');
63
+ };
64
+ await f('package.json', JSON.stringify({ name: 'fx', version: '1.0.0', type: 'module' }, null, 2) + '\n');
65
+ await f('src/core/engine.mjs', [
66
+ 'import { validate } from "./validate.mjs";',
67
+ 'import { CONFIG } from "../config.mjs";',
68
+ 'export function runEngine(input) {',
69
+ ' const v = validate(input);',
70
+ ' return v ? input * CONFIG.MAX_RETRIES : 0;',
71
+ '}',
72
+ 'export function bootEngine() {',
73
+ ' return runEngine(1) + runEngine(2);',
74
+ '}',
75
+ '',
76
+ ].join('\n'));
77
+ await f('src/core/validate.mjs', [
78
+ 'export function validate(x) {',
79
+ ' return typeof x === "number" && x >= 0;',
80
+ '}',
81
+ '',
82
+ ].join('\n'));
83
+ await f('src/config.mjs', [
84
+ 'export const CONFIG = {',
85
+ ' MAX_RETRIES: 5,',
86
+ ' TIMEOUT_MS: 3000,',
87
+ '};',
88
+ '',
89
+ ].join('\n'));
90
+ await f('src/api/handler.mjs', [
91
+ 'import { runEngine } from "../core/engine.mjs";',
92
+ 'export function handle(req) {',
93
+ ' return runEngine(req.value);',
94
+ '}',
95
+ 'export function batchHandle(reqs) {',
96
+ ' return reqs.map((r) => runEngine(r.value));',
97
+ '}',
98
+ '',
99
+ ].join('\n'));
100
+ await f('src/api/routes.mjs', [
101
+ 'import { handle } from "./handler.mjs";',
102
+ 'export const routes = { post: handle };',
103
+ '',
104
+ ].join('\n'));
105
+ // decoys so glob/list/grep have noise to wade through
106
+ for (let i = 0; i < 6; i++) {
107
+ await f(`src/util/util-${i}.mjs`, `export const util${i} = () => ${i};\n`);
108
+ }
109
+ // a large module to exercise read context-budget modes (full vs max_lines vs
110
+ // summary vs count) — small files would all fit under budget and show nothing.
111
+ await f('src/big-module.mjs', Array.from({ length: 320 }, (_, i) => `export const item${i} = { id: ${i}, name: "item-${i}", weight: ${i * 3} };`).join('\n') + '\n');
112
+ }
113
+
114
+ // ---- real tool execution wrappers ------------------------------------------
115
+ async function call(state, kind, run) {
116
+ const started = performance.now();
117
+ let out = '';
118
+ let ok = true;
119
+ try {
120
+ out = String((await run()) ?? '');
121
+ if (/^Error(\s|\[|:)/.test(out)) ok = false;
122
+ } catch (e) {
123
+ out = `Error: ${e?.message || String(e)}`;
124
+ ok = false;
125
+ }
126
+ const ms = Number((performance.now() - started).toFixed(3));
127
+ state.calls.push({ kind, outBytes: Buffer.byteLength(out, 'utf8'), ms, ok });
128
+ return out;
129
+ }
130
+
131
+ const TOOL = {
132
+ code_graph: (dir, args) => (s) => call(s, 'code_graph', () => executeCodeGraphTool('code_graph', { ...args, cwd: dir }, dir)),
133
+ grep: (dir, args) => (s) => call(s, 'grep', () => executeBuiltinTool('grep', args, dir, { readStateScope: s.scope, sessionId: s.scope })),
134
+ glob: (dir, args) => (s) => call(s, 'glob', () => executeBuiltinTool('glob', args, dir, { readStateScope: s.scope, sessionId: s.scope })),
135
+ list: (dir, args) => (s) => call(s, 'list', () => executeBuiltinTool('list', args, dir, { readStateScope: s.scope, sessionId: s.scope })),
136
+ read: (dir, args) => (s) => call(s, 'read', () => executeBuiltinTool('read', args, dir, { readStateScope: s.scope, sessionId: s.scope })),
137
+ // explore is an LLM sub-agent — MODELED, not executed. Background explore costs
138
+ // the LEAD exactly 2 turns regardless of N: (1) dispatch (lead writes the
139
+ // N-query array, gets a handle) and (2) integrate the dispatch_result brief.
140
+ // The N explorers run in parallel on a cheap model in throwaway contexts, so
141
+ // their inference is hiddenTok (NOT lead context); the merged brief is capped.
142
+ explore: (n = 1) => (s) => {
143
+ const BRIEF_CAP = 30000; // merged brief smart-truncate cap
144
+ const briefBytes = Math.min(BRIEF_CAP, 8000 * n); // grows with N, then capped
145
+ s.calls.push({ kind: 'explore', outBytes: 80, gen: 120 * n, ms: 0, modeled: true, ok: true }); // turn 1: dispatch
146
+ s.calls.push({ kind: 'explore', outBytes: briefBytes, gen: 20, ms: 0, modeled: true, hiddenTok: 14000 * n, ok: true }); // turn 2: notify
147
+ return Promise.resolve('[modeled explore]');
148
+ },
149
+ };
150
+
151
+ // ---- score a completed route (sequence of recorded calls) ------------------
152
+ // scoreTurns: a route is a list of TURNS; each turn is a list of calls executed
153
+ // in ONE assistant message (parallel tool use = one round-trip, outputs land
154
+ // together). Round-trip + prompt pass are billed PER TURN, not per call — so
155
+ // batching independent calls into one turn cuts both turns (time) and the
156
+ // intermediate context re-processing.
157
+ function scoreTurns(turns) {
158
+ let ctxTok = P0, genTok = 0, promptIntegral = 0, hiddenTok = 0, seconds = 0;
159
+ let turnCount = 0;
160
+ for (const turn of turns) {
161
+ turnCount += 1;
162
+ const genThisTurn = turn.reduce((a, c) => a + B2T(c.gen ?? GEN_BYTES[c.kind] ?? 40), 0) + REASON_PER_TURN;
163
+ const cached = P0 * CACHE_PREFIX_DISCOUNT;
164
+ const fresh = (ctxTok - P0) + genThisTurn;
165
+ promptIntegral += cached + fresh;
166
+ genTok += genThisTurn;
167
+ for (const c of turn) hiddenTok += c.hiddenTok || 0;
168
+ seconds += RT_SECONDS + (cached + fresh) / PROMPT_TOK_PER_S + genThisTurn / GEN_TOK_PER_S;
169
+ ctxTok += turn.reduce((a, c) => a + B2T(c.outBytes), 0) + genThisTurn;
170
+ }
171
+ const calls = turns.flat();
172
+ // Tail: this route's accumulated OUTPUT persists through ~TAIL_TURNS follow-on
173
+ // task turns, re-billed each time. Tighter lookup output wins here.
174
+ const outTok = calls.reduce((a, c) => a + B2T(c.outBytes), 0);
175
+ promptIntegral += outTok * TAIL_TURNS;
176
+ // leadTok = what the LEAD context actually pays (turns + briefs + tail).
177
+ // totalTok = leadTok + hidden sub-agent inference (offloaded, NOT in lead ctx).
178
+ // For direct tools hidden=0, so leadTok === totalTok. For explore they diverge:
179
+ // the heavy explorer loop is billed to a throwaway sub-agent context (and a
180
+ // cheaper model), so the lead only eats the brief.
181
+ const leadTok = Math.round(promptIntegral + genTok);
182
+ const totalTok = leadTok + Math.round(hiddenTok);
183
+ return {
184
+ turns: turnCount,
185
+ outBytes: calls.reduce((a, c) => a + c.outBytes, 0),
186
+ genTok: Math.round(genTok),
187
+ ctxIntegral: Math.round(promptIntegral),
188
+ hiddenTok: Math.round(hiddenTok),
189
+ leadTok,
190
+ totalTok,
191
+ seconds: +seconds.toFixed(1),
192
+ anyError: calls.some((c) => c.ok === false),
193
+ };
194
+ }
195
+
196
+ // Serial route: each call is its own turn (one round-trip per call).
197
+ const score = (calls) => scoreTurns(calls.map((c) => [c]));
198
+
199
+ // ---- tasks: each = realistic exploration goal + competing routes -----------
200
+ // expectedWinner names the tool/route that SHOULD be cheapest for that niche;
201
+ // --check fails if a different route wins (route regression / mis-routing).
202
+ function tasks(dir) {
203
+ return [
204
+ {
205
+ // Lesson the bench taught: for JUST a definition location, grep's one tight
206
+ // line beats code_graph, which over-delivers callees you didn't ask for.
207
+ name: 'locate-symbol-def (where is runEngine defined?)',
208
+ expectedWinner: 'grep',
209
+ routes: {
210
+ code_graph: [TOOL.code_graph(dir, { mode: 'find_symbol', symbol: 'runEngine' })],
211
+ grep: [TOOL.grep(dir, { pattern: 'function runEngine', path: 'src', glob: '*.mjs', output_mode: 'content', '-n': true })],
212
+ 'glob+grep': [
213
+ TOOL.glob(dir, { pattern: 'src/**/*.mjs' }),
214
+ TOOL.grep(dir, { pattern: 'function runEngine', path: 'src', output_mode: 'content', '-n': true }),
215
+ ],
216
+ },
217
+ },
218
+ {
219
+ name: 'find-callers (who calls runEngine?)',
220
+ expectedWinner: 'code_graph',
221
+ routes: {
222
+ code_graph: [TOOL.code_graph(dir, { mode: 'callers', symbol: 'runEngine' })],
223
+ 'grep+reads': [
224
+ TOOL.grep(dir, { pattern: 'runEngine', path: 'src', glob: '*.mjs', output_mode: 'content', '-n': true }),
225
+ TOOL.read(dir, { path: 'src/api/handler.mjs', line: 3, context: 2 }),
226
+ TOOL.read(dir, { path: 'src/core/engine.mjs', line: 8, context: 2 }),
227
+ ],
228
+ },
229
+ },
230
+ {
231
+ name: 'find-config-literal (where is MAX_RETRIES set?)',
232
+ expectedWinner: 'grep',
233
+ routes: {
234
+ grep: [TOOL.grep(dir, { pattern: 'MAX_RETRIES', path: 'src', glob: '*.mjs', output_mode: 'content', '-n': true })],
235
+ code_graph: [TOOL.code_graph(dir, { mode: 'find_symbol', symbol: 'MAX_RETRIES' })],
236
+ explore: [TOOL.explore()],
237
+ },
238
+ },
239
+ {
240
+ name: 'keyword-to-symbols (partial name Engine — which symbols?)',
241
+ expectedWinner: 'code_graph',
242
+ routes: {
243
+ code_graph: [TOOL.code_graph(dir, { mode: 'search', symbol: 'Engine' })],
244
+ grep: [TOOL.grep(dir, { pattern: 'Engine', path: 'src', glob: '*.mjs', output_mode: 'content', '-n': true })],
245
+ explore: [TOOL.explore()],
246
+ },
247
+ },
248
+
249
+ {
250
+ name: 'locate-file-by-name (find the config module)',
251
+ expectedWinner: 'glob',
252
+ routes: {
253
+ glob: [TOOL.glob(dir, { pattern: 'src/**/config.mjs' })],
254
+ list: [TOOL.list(dir, { mode: 'find', path: 'src', name: 'config', type: 'file' })],
255
+ grep: [TOOL.grep(dir, { pattern: 'CONFIG', path: 'src', glob: '*.mjs', output_mode: 'files_with_matches' })],
256
+ },
257
+ },
258
+ {
259
+ // read context-budget ladder on a 320-line module: count < summary <
260
+ // max_lines < full. Shows the new max_lines / mode knobs cut lead cost.
261
+ name: 'glance-large-file (320-line module)',
262
+ expectedWinner: 'count',
263
+ routes: {
264
+ count: [TOOL.read(dir, { path: 'src/big-module.mjs', mode: 'count' })],
265
+ summary: [TOOL.read(dir, { path: 'src/big-module.mjs', mode: 'summary', n: 30 })],
266
+ max_lines: [TOOL.read(dir, { path: 'src/big-module.mjs', max_lines: 40 })],
267
+ full: [TOOL.read(dir, { path: 'src/big-module.mjs' })],
268
+ },
269
+ },
270
+ {
271
+ name: 'understand-subsystem (entry points of src/core)',
272
+ expectedWinner: 'code_graph',
273
+ routes: {
274
+ code_graph: [TOOL.code_graph(dir, { mode: 'overview', file: 'src/core/engine.mjs' })],
275
+ 'list+read': [
276
+ TOOL.list(dir, { path: 'src/core' }),
277
+ TOOL.read(dir, { path: 'src/core/engine.mjs' }),
278
+ TOOL.read(dir, { path: 'src/core/validate.mjs' }),
279
+ ],
280
+ explore: [TOOL.explore()],
281
+ },
282
+ },
283
+ ];
284
+ }
285
+
286
+ async function main() {
287
+ await mkdir(BENCH_PLUGIN_DATA, { recursive: true });
288
+ const dir = await mkdtemp(join(WORKDIR, 'fx-'));
289
+ await buildFixture(dir);
290
+
291
+ const results = [];
292
+ for (const task of tasks(dir)) {
293
+ const rows = [];
294
+ for (const [routeName, steps] of Object.entries(task.routes)) {
295
+ const state = { scope: `explore-bench-${routeName}-${process.pid}`, calls: [] };
296
+ for (const step of steps) await step(state);
297
+ rows.push({ route: routeName, ...score(state.calls) });
298
+ }
299
+ rows.sort((a, b) => a.totalTok - b.totalTok);
300
+ results.push({ task: task.name, expectedWinner: task.expectedWinner, winner: rows[0].route, rows });
301
+ }
302
+
303
+ // ---- fan-out economics: explore's 2-turn LEAD overhead amortized across N
304
+ // parallel sub-queries. Modeled (explore can't be executed). Shows leadTok
305
+ // (lead context cost) diverging from totalTok (incl. hidden sub-agent loop).
306
+ const perAreaDirect = () => [
307
+ { kind: 'list', outBytes: 120, ok: true },
308
+ { kind: 'read', outBytes: 250, ok: true },
309
+ { kind: 'read', outBytes: 260, ok: true },
310
+ ];
311
+ const fanout = [1, 3, 6, 12].map((N) => {
312
+ // direct serial: per area list+read+read, each call its own turn.
313
+ const directSerial = []; for (let i = 0; i < N; i++) directSerial.push(...perAreaDirect());
314
+ // direct PARALLEL: turn 1 = N discovery calls batched, turn 2 = N reads batched
315
+ // (2 turns flat like explore) — but ALL raw output lands in LEAD context, no cap.
316
+ const directParallelTurns = [
317
+ Array.from({ length: N }, () => ({ kind: 'list', outBytes: 120, ok: true })),
318
+ Array.from({ length: N }, () => ({ kind: 'read', outBytes: 510, ok: true })),
319
+ ];
320
+ const sepExplore = []; for (let i = 0; i < N; i++) { const s = { calls: [] }; TOOL.explore(1)(s); sepExplore.push(...s.calls); }
321
+ const batchState = { calls: [] }; TOOL.explore(N)(batchState);
322
+ return {
323
+ N,
324
+ directSerial: score(directSerial),
325
+ directParallel: scoreTurns(directParallelTurns),
326
+ separateExplore: score(sepExplore),
327
+ batchedExplore: score(batchState.calls),
328
+ };
329
+ });
330
+
331
+ // ---- parallel-in-1-turn demo: independent calls batched into ONE turn vs
332
+ // serial (one round-trip each). Dependent chains (grep->read) cannot collapse.
333
+ const R = (b) => ({ kind: 'read', outBytes: b, ok: true });
334
+ const G = (b) => ({ kind: 'grep', outBytes: b, ok: true });
335
+ const parallelDemo = [
336
+ {
337
+ name: '3 independent file reads',
338
+ serial: score([R(260), R(260), R(260)]),
339
+ parallel: scoreTurns([[R(260), R(260), R(260)]]),
340
+ },
341
+ {
342
+ name: 'grep then 2 callsite reads (grep is dependency)',
343
+ serial: score([G(350), R(260), R(260)]),
344
+ parallel: scoreTurns([[G(350)], [R(260), R(260)]]),
345
+ },
346
+ ];
347
+
348
+ // ---- uniform output-cap effect: REAL full read of the 320-line module
349
+ // (~23KB) with options.toolOutputMaxBytes swept. Shows the single knob's
350
+ // actual leadTok reduction on a real runaway output.
351
+ // Distinct file per cap so the process-global read cache (keyed without scope
352
+ // or the cap) can't dedup later reads to a "[file unchanged]" stub.
353
+ const capSweep = [];
354
+ for (const cap of [0, 8000, 4000, 2000]) {
355
+ const rel = `src/cap-target-${cap}.mjs`;
356
+ await writeFile(join(dir, rel), Array.from({ length: 320 }, (_, i) => `export const c${cap}_item${i} = { id: ${i}, pad: "xxxxxxxxxxxxxxxxxxxx" };`).join('\n') + '\n', 'utf8');
357
+ const opts = { readStateScope: `cap-${cap}`, sessionId: `cap-${cap}` };
358
+ if (cap > 0) opts.toolOutputMaxBytes = cap;
359
+ const out = String(await executeBuiltinTool('read', { path: rel, full: true }, dir, opts));
360
+ const ob = Buffer.byteLength(out, 'utf8');
361
+ capSweep.push({ cap, outBytes: ob, capped: out.includes('tool_output_token_limit'), leadTok: scoreTurns([[{ kind: 'read', outBytes: ob, ok: true }]]).leadTok });
362
+ }
363
+
364
+ const failures = [];
365
+ if (CHECK) {
366
+ for (const r of results) {
367
+ if (r.expectedWinner && r.winner !== r.expectedWinner) {
368
+ failures.push({ task: r.task, winner: r.winner, expectedWinner: r.expectedWinner });
369
+ }
370
+ const w = r.rows[0];
371
+ if (w.anyError) failures.push({ task: r.task, winner: r.winner, reason: 'winner-errored' });
372
+ }
373
+ }
374
+
375
+ const pad = (s, n) => String(s).padEnd(n);
376
+ const padL = (s, n) => String(s).padStart(n);
377
+ if (JSON_OUT) {
378
+ console.log(JSON.stringify({ results, fanout, parallelDemo, capSweep, ...(CHECK ? { failures } : {}) }, null, 2));
379
+ } else {
380
+ console.log('## Niche map (ranked by totalTok; leadTok = lead-context cost only)');
381
+ for (const r of results) {
382
+ console.log(`\n### ${r.task} winner=${r.winner}${r.expectedWinner && r.winner !== r.expectedWinner ? ` (EXPECTED ${r.expectedWinner})` : ''}`);
383
+ console.log(pad('route', 14) + padL('turns', 6) + padL('outB', 8) + padL('leadTok', 9) + padL('hidden', 8) + padL('totalTok', 10) + padL('sec', 7));
384
+ const best = r.rows[0].totalTok;
385
+ for (const row of r.rows) {
386
+ const mark = row.totalTok === best ? ' <=' : ` ${(row.totalTok / best).toFixed(2)}x`;
387
+ console.log(pad(row.route, 14) + padL(row.turns, 6) + padL(row.outBytes, 8) + padL(row.leadTok, 9) + padL(row.hiddenTok, 8) + padL(row.totalTok, 10) + padL(row.seconds, 7) + mark + (row.anyError ? ' ERR' : ''));
388
+ }
389
+ }
390
+ console.log('\n## Parallel-in-1-turn (independent calls share ONE round-trip) — modeled');
391
+ console.log(pad('task', 46) + pad('mode', 10) + padL('turns', 6) + padL('leadTok', 9) + padL('sec', 7));
392
+ for (const d of parallelDemo) {
393
+ for (const [m, c] of [['serial', d.serial], ['parallel', d.parallel]]) {
394
+ const mark = m === 'parallel' ? ` ${(c.leadTok / d.serial.leadTok).toFixed(2)}x` : '';
395
+ console.log(pad(d.name, 46) + pad(m, 10) + padL(c.turns, 6) + padL(c.leadTok, 9) + padL(c.seconds, 7) + mark);
396
+ }
397
+ }
398
+ console.log('\n## Fan-out economics (N exploration areas) — modeled');
399
+ console.log('Both directParallel and batchedExplore are ~2 turns; only explore CAPS lead context.');
400
+ console.log(pad('N', 4) + pad('strategy', 20) + padL('turns', 6) + padL('leadTok', 9) + padL('total', 9) + padL('lead/area', 11));
401
+ for (const fo of fanout) {
402
+ for (const [label, c] of [['direct serial', fo.directSerial], ['direct parallel', fo.directParallel], ['N separate explore', fo.separateExplore], ['1 batched explore', fo.batchedExplore]]) {
403
+ console.log(pad(String(fo.N), 4) + pad(label, 20) + padL(c.turns, 6) + padL(c.leadTok, 9) + padL(c.totalTok, 9) + padL(Math.round(c.leadTok / fo.N), 11));
404
+ }
405
+ console.log('');
406
+ }
407
+ console.log('\n## Uniform output-cap effect (REAL full read of 320-line module, sweep toolOutputMaxBytes)');
408
+ console.log(pad('cap', 10) + padL('outBytes', 9) + padL('leadTok', 9) + padL('vs off', 8) + ' capped');
409
+ const offLead = capSweep[0]?.leadTok || 1;
410
+ for (const c of capSweep) {
411
+ console.log(pad(c.cap === 0 ? 'off' : String(c.cap), 10) + padL(c.outBytes, 9) + padL(c.leadTok, 9) + padL((c.leadTok / offLead).toFixed(2) + 'x', 8) + ' ' + (c.capped ? 'yes' : 'no'));
412
+ }
413
+ if (CHECK) console.log(`\nexplore-bench: ${failures.length === 0 ? 'pass' : 'fail ' + JSON.stringify(failures)}`);
414
+ console.log('note: tokens=bytes/4 proxy. leadTok=lead context (turns+briefs+tail). totalTok=leadTok+hidden sub-agent (offloaded, cheap model). explore [modeled], 2 lead turns.');
415
+ }
416
+ if (CHECK && failures.length > 0) process.exitCode = 1;
417
+ }
418
+
419
+ try {
420
+ await main();
421
+ } finally {
422
+ await rm(WORKDIR, { recursive: true, force: true }).catch(() => {});
423
+ await rm(BENCH_PLUGIN_DATA, { recursive: true, force: true }).catch(() => {});
424
+ }
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from 'node:child_process';
3
+ import { mkdirSync } from 'node:fs';
4
+ import { mkdtemp, rm, writeFile } from 'node:fs/promises';
5
+ import { tmpdir } from 'node:os';
6
+ import { dirname, join } from 'node:path';
7
+ import { fileURLToPath } from 'node:url';
8
+
9
+ const ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
10
+ const DATA_DIR = await mkdtemp(join(tmpdir(), 'mixdog-io-guardrails-data-'));
11
+ const WORKDIR = await mkdtemp(join(tmpdir(), 'mixdog-io-guardrails-'));
12
+ process.env.CLAUDE_PLUGIN_ROOT ||= ROOT;
13
+ process.env.CLAUDE_PLUGIN_DATA ||= DATA_DIR;
14
+ const { countJsonNextCalls } = await import('../src/agent/orchestrator/tools/next-call-utils.mjs');
15
+
16
+ function assert(cond, message) {
17
+ if (!cond) throw new Error(message);
18
+ }
19
+
20
+ function runScript(script, args = [], env = {}) {
21
+ return spawnSync(process.execPath, [join(ROOT, script), ...args], {
22
+ cwd: ROOT,
23
+ encoding: 'utf8',
24
+ env: {
25
+ ...process.env,
26
+ CLAUDE_PLUGIN_ROOT: ROOT,
27
+ CLAUDE_PLUGIN_DATA: DATA_DIR,
28
+ ...env,
29
+ },
30
+ });
31
+ }
32
+
33
+ function parseJson(label, text) {
34
+ try {
35
+ return JSON.parse(text);
36
+ } catch (err) {
37
+ throw new Error(`${label}: failed to parse JSON: ${err?.message || String(err)}\n${text}`);
38
+ }
39
+ }
40
+
41
+ function traceLine(row) {
42
+ return `${JSON.stringify({ ts: Date.now(), ...row })}\n`;
43
+ }
44
+
45
+ try {
46
+ assert(countJsonNextCalls('next_call: read({"path":"a.txt","line":1})') === 1, 'valid next_call should count');
47
+ assert(countJsonNextCalls('next_call: read({path,line})') === 0, 'non-JSON next_call should not count');
48
+ assert(countJsonNextCalls('next_call: read({"path":"a.txt"})\nnext_call: grep({"pattern":"x","path":"."})') === 2, 'multiple next_calls should count');
49
+
50
+ const passTrace = join(WORKDIR, 'telemetry-pass.jsonl');
51
+ await writeFile(passTrace, [
52
+ traceLine({ kind: 'tool', tool_name: 'read', tool_ms: 5, result_kind: 'normal', result_bytes_est: 120, result_lines_est: 5, tool_args: { path: 'a.txt', offset: 1, limit: 5 } }),
53
+ traceLine({ kind: 'tool', tool_name: 'grep', tool_ms: 8, result_kind: 'normal', result_has_next_call: true, result_next_call_count: 1, result_bytes_est: 80, result_lines_est: 2, tool_args: { pattern: 'needle', path: '.', output_mode: 'content' } }),
54
+ ].join(''), 'utf8');
55
+ const pass = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', `--trace=${passTrace}`], {
56
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
57
+ read: { p90: 100, errorRate: 0.5, wideReadRate: 0.5 },
58
+ grep: { p90: 100, errorRate: 0.5, nextCallRateMin: 0.5 },
59
+ }),
60
+ });
61
+ assert(pass.status === 0, `telemetry pass should exit 0\nstdout=${pass.stdout}\nstderr=${pass.stderr}`);
62
+ const passJson = parseJson('telemetry pass', pass.stdout);
63
+ assert(Array.isArray(passJson.failures) && passJson.failures.length === 0, `telemetry pass failures not empty: ${pass.stdout}`);
64
+ const grepRow = (passJson.tools || []).find((row) => row.tool === 'grep');
65
+ assert(grepRow && grepRow.nextCalls === 1 && grepRow.nextCallRate === 1 && grepRow.nextCallTotal === 1, `telemetry next_call coverage missing: ${pass.stdout}`);
66
+ const readRow = (passJson.tools || []).find((row) => row.tool === 'read');
67
+ assert(readRow?.outputBytes?.p90 === 120 && readRow?.outputLines?.p90 === 5, `telemetry output cost missing: ${pass.stdout}`);
68
+
69
+ const routeTrace = join(WORKDIR, 'telemetry-route.jsonl');
70
+ await writeFile(routeTrace, [
71
+ traceLine({ kind: 'tool', tool_name: 'read', tool_ms: 3, result_kind: 'normal', wide_read: true, result_bytes_est: 6000, tool_args: { path: 'big.txt' } }),
72
+ traceLine({ kind: 'route', route_scenario: 'large-file-target', route_name: 'wide-read-edit', route_success: false, route_tool_calls: 2, route_output_bytes: 6000, route_wide_reads: 1, route_errors: 0, route_score: null }),
73
+ traceLine({ kind: 'route', route_scenario: 'large-file-target', route_name: 'line-read-edit', route_success: true, route_tool_calls: 2, route_output_bytes: 260, route_wide_reads: 0, route_errors: 0, route_score: 2020 }),
74
+ ].join(''), 'utf8');
75
+ const routePass = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', '--require-rows', `--trace=${routeTrace}`], {
76
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
77
+ read: { p90: 100, errorRate: 0.5, wideReadRate: 1 },
78
+ }),
79
+ });
80
+ assert(routePass.status === 0, `telemetry route pass should exit 0\nstdout=${routePass.stdout}\nstderr=${routePass.stderr}`);
81
+ const routeJson = parseJson('telemetry route pass', routePass.stdout);
82
+ assert(routeJson.tools?.find((row) => row.tool === 'read')?.wideReads === 1, `telemetry explicit wide_read missing: ${routePass.stdout}`);
83
+ const routeRow = routeJson.routes?.find((row) => row.scenario === 'large-file-target');
84
+ assert(routeRow?.winner === 'line-read-edit' && routeRow.failures === 1, `telemetry route summary missing: ${routePass.stdout}`);
85
+
86
+ const emptyTrace = join(WORKDIR, 'telemetry-empty.jsonl');
87
+ await writeFile(emptyTrace, '', 'utf8');
88
+ const emptyFail = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', '--require-rows', `--trace=${emptyTrace}`]);
89
+ assert(emptyFail.status === 1, `telemetry require rows should fail on empty trace\nstdout=${emptyFail.stdout}\nstderr=${emptyFail.stderr}`);
90
+ const emptyJson = parseJson('telemetry empty fail', emptyFail.stdout);
91
+ assert((emptyJson.failures || []).some((f) => f.reason === 'no-trace-rows'), `telemetry empty failure reason missing: ${emptyFail.stdout}`);
92
+
93
+ const routeOnlyTrace = join(WORKDIR, 'telemetry-route-only.jsonl');
94
+ await writeFile(routeOnlyTrace, traceLine({ kind: 'route', route_scenario: 'route-only', route_name: 'winner', route_success: true, route_score: 1 }), 'utf8');
95
+ const routeOnlyFail = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', '--require-rows', `--trace=${routeOnlyTrace}`]);
96
+ assert(routeOnlyFail.status === 1, `telemetry require rows should fail without tool rows\nstdout=${routeOnlyFail.stdout}\nstderr=${routeOnlyFail.stderr}`);
97
+ const routeOnlyJson = parseJson('telemetry route-only fail', routeOnlyFail.stdout);
98
+ assert((routeOnlyJson.failures || []).some((f) => f.reason === 'no-io-tool-rows'), `telemetry route-only failure reason missing: ${routeOnlyFail.stdout}`);
99
+
100
+ mkdirSync(DATA_DIR, { recursive: true });
101
+ await writeFile(join(DATA_DIR, 'tool-events.log'), [
102
+ `[${new Date().toISOString()}] [tool=edit] [code=8] old_string not found in C:\\Project\\App\\src\\Loop.cs (no exact/fold/nfc-fold/crlf-fold match). Nearest match at line 40: " void Run()".`,
103
+ `[${new Date().toISOString()}] [tool=edit] [code=8] old_string not found in C:\\Project\\App\\src\\Loop.cs (no exact/fold/nfc-fold/crlf-fold match). Nearest match at line 41: " var x = 1;".`,
104
+ `[${new Date().toISOString()}] [tool=write] [code=10] partial-read snapshot — read full file before overwriting: C:\\Project\\App\\src\\Other.cs`,
105
+ ].join('\n') + '\n', 'utf8');
106
+ const eventsFail = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', '--require-rows', '--hours=1'], {
107
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
108
+ edit: { p90: 100, errorRate: 1 },
109
+ write: { p90: 100, errorRate: 1 },
110
+ }),
111
+ });
112
+ assert(eventsFail.status === 1, `telemetry tool-events fallback should flag edit miss loop\nstdout=${eventsFail.stdout}\nstderr=${eventsFail.stderr}`);
113
+ const eventsJson = parseJson('telemetry tool-events fail', eventsFail.stdout);
114
+ assert(eventsJson.source === 'tool-events' && eventsJson.toolEventRows === 3, `telemetry did not use tool-events fallback: ${eventsFail.stdout}`);
115
+ assert((eventsJson.failures || []).some((f) => f.reason === 'same-file-edit-miss-loop'), `telemetry event loop failure missing: ${eventsFail.stdout}`);
116
+ assert((eventsJson.eventClusters || []).some((c) => c.tool === 'edit' && c.code === '8' && c.count === 2), `telemetry event clusters missing edit loop: ${eventsFail.stdout}`);
117
+
118
+ const mcpDebugLog = join(WORKDIR, 'mcp-debug.log');
119
+ const emptyEventsLog = join(WORKDIR, 'empty-tool-events.log');
120
+ const nowIso = new Date().toISOString();
121
+ await writeFile(emptyEventsLog, '', 'utf8');
122
+ await writeFile(mcpDebugLog, [
123
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] start id=10-1 tool=read`,
124
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] ok id=10-1 tool=read elapsed=2ms`,
125
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] start id=10-2 tool=read`,
126
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] ok id=10-2 tool=read elapsed=3ms`,
127
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] start id=10-3 tool=read`,
128
+ `[${nowIso}] [lead=1 server=10 session=s] [dispatch] ok id=10-3 tool=read elapsed=4ms`,
129
+ ].join('\n') + '\n', 'utf8');
130
+ const mcpDebugPass = runScript('scripts/io-telemetry-report.mjs', ['--json', '--hours=1', `--mcp-debug=${mcpDebugLog}`, `--events=${emptyEventsLog}`], {
131
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
132
+ read: { p90: 100, errorRate: 0.5, wideReadRate: 1 },
133
+ }),
134
+ });
135
+ assert(mcpDebugPass.status === 0, `telemetry mcp-debug fallback should pass\nstdout=${mcpDebugPass.stdout}\nstderr=${mcpDebugPass.stderr}`);
136
+ const mcpDebugJson = parseJson('telemetry mcp-debug pass', mcpDebugPass.stdout);
137
+ assert(mcpDebugJson.source === 'mcp-debug' && mcpDebugJson.mcpDebugRows === 3, `telemetry did not use mcp-debug fallback: ${mcpDebugPass.stdout}`);
138
+ assert(mcpDebugJson.tools?.find((row) => row.tool === 'read')?.count === 3, `telemetry mcp-debug read count missing: ${mcpDebugPass.stdout}`);
139
+ assert((mcpDebugJson.repeatClusters || []).some((c) => c.tool === 'read' && c.count === 3), `telemetry repeat cluster missing: ${mcpDebugPass.stdout}`);
140
+
141
+ const failTrace = join(WORKDIR, 'telemetry-fail.jsonl');
142
+ await writeFile(failTrace, traceLine({
143
+ kind: 'tool',
144
+ tool_name: 'read',
145
+ tool_ms: 300,
146
+ result_kind: 'error',
147
+ result_bytes_est: 20000,
148
+ result_lines_est: 800,
149
+ tool_args: { path: 'big.txt', mode: 'full' },
150
+ }), 'utf8');
151
+ const fail = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', `--trace=${failTrace}`], {
152
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
153
+ read: { p90: 10, errorRate: 0.01, wideReadRate: 0.01, outputBytesP90: 1000 },
154
+ }),
155
+ });
156
+ assert(fail.status === 1, `telemetry fail should exit 1\nstdout=${fail.stdout}\nstderr=${fail.stderr}`);
157
+ const failJson = parseJson('telemetry fail', fail.stdout);
158
+ const reasons = new Set((failJson.failures || []).map((f) => f.reason));
159
+ for (const reason of ['slow-p90', 'error-rate', 'wide-read-rate', 'output-bytes-p90']) {
160
+ assert(reasons.has(reason), `telemetry fail missing ${reason}: ${fail.stdout}`);
161
+ }
162
+
163
+ const nextCallFailTrace = join(WORKDIR, 'telemetry-next-call-fail.jsonl');
164
+ await writeFile(nextCallFailTrace, traceLine({
165
+ kind: 'tool',
166
+ tool_name: 'grep',
167
+ tool_ms: 5,
168
+ result_kind: 'normal',
169
+ result_has_next_call: false,
170
+ tool_args: { pattern: 'needle', path: '.', output_mode: 'content' },
171
+ }), 'utf8');
172
+ const nextCallFail = runScript('scripts/io-telemetry-report.mjs', ['--json', '--check', `--trace=${nextCallFailTrace}`], {
173
+ MIXDOG_IO_TELEMETRY_THRESHOLDS_JSON: JSON.stringify({
174
+ grep: { p90: 100, errorRate: 0.5, nextCallRateMin: 1 },
175
+ }),
176
+ });
177
+ assert(nextCallFail.status === 1, `telemetry next_call fail should exit 1\nstdout=${nextCallFail.stdout}\nstderr=${nextCallFail.stderr}`);
178
+ const nextCallFailJson = parseJson('telemetry next_call fail', nextCallFail.stdout);
179
+ assert((nextCallFailJson.failures || []).some((f) => f.reason === 'next-call-rate'), `telemetry fail missing next-call-rate: ${nextCallFail.stdout}`);
180
+ const nextCallFailGrep = (nextCallFailJson.tools || []).find((row) => row.tool === 'grep');
181
+ assert(nextCallFailGrep?.nextCallMisses?.['grep:content'] === 1, `telemetry fail missing nextCallMisses shape: ${nextCallFail.stdout}`);
182
+ assert((nextCallFailJson.recommendations || []).some((line) => line.includes('top missing: grep:content:1')), `telemetry recommendation missing shape detail: ${nextCallFail.stdout}`);
183
+
184
+ const tightBaseline = join(WORKDIR, 'bench-tight-baseline.json');
185
+ await writeFile(tightBaseline, JSON.stringify({
186
+ thresholds_ms: {
187
+ 'small-edit': 0.001,
188
+ 'multi-edit': 0.001,
189
+ 'large-dry-run-patch': 0.001,
190
+ 'range-read-cache-hit': 0.001,
191
+ 'grep-cache-hit': 0.001,
192
+ },
193
+ }), 'utf8');
194
+ const benchFail = runScript('scripts/io-mini-bench.mjs', ['--json', '--check-thresholds', `--baseline=${tightBaseline}`], {
195
+ MIXDOG_IO_MINI_BENCH_ITERS: '1',
196
+ });
197
+ assert(benchFail.status !== 0, `bench with tight baseline should fail\nstdout=${benchFail.stdout}\nstderr=${benchFail.stderr}`);
198
+ const benchJson = parseJson('bench fail', benchFail.stdout);
199
+ assert(Array.isArray(benchJson.failures) && benchJson.failures.length > 0, `bench failures missing: ${benchFail.stdout}`);
200
+
201
+ console.log('io-guardrails smoke passed');
202
+ } finally {
203
+ await rm(WORKDIR, { recursive: true, force: true });
204
+ await rm(DATA_DIR, { recursive: true, force: true });
205
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "version": 1,
3
+ "note": "Conservative local I/O regression guard. Thresholds are intentionally loose enough for noisy desktop runs; tighten only after collecting stable trace/bench history.",
4
+ "thresholds_ms": {
5
+ "small-edit": 250,
6
+ "multi-edit": 300,
7
+ "large-dry-run-patch": 1500,
8
+ "range-read-cache-hit": 80,
9
+ "grep-cache-hit": 80
10
+ }
11
+ }