@team-agent/installer 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/Cargo.lock +744 -0
  2. package/Cargo.toml +34 -0
  3. package/crates/team-agent/Cargo.toml +33 -0
  4. package/crates/team-agent/src/cli/adapters.rs +1343 -0
  5. package/crates/team-agent/src/cli/diagnose.rs +554 -0
  6. package/crates/team-agent/src/cli/emit.rs +1077 -0
  7. package/crates/team-agent/src/cli/helpers.rs +88 -0
  8. package/crates/team-agent/src/cli/leader.rs +216 -0
  9. package/crates/team-agent/src/cli/mod.rs +1141 -0
  10. package/crates/team-agent/src/cli/profile.rs +306 -0
  11. package/crates/team-agent/src/cli/send.rs +215 -0
  12. package/crates/team-agent/src/cli/status.rs +179 -0
  13. package/crates/team-agent/src/cli/status_port.rs +502 -0
  14. package/crates/team-agent/src/cli/tests/base.rs +616 -0
  15. package/crates/team-agent/src/cli/tests/compile.rs +96 -0
  16. package/crates/team-agent/src/cli/tests/divergence.rs +509 -0
  17. package/crates/team-agent/src/cli/tests/lane_c.rs +333 -0
  18. package/crates/team-agent/src/cli/tests/leader_watch.rs +395 -0
  19. package/crates/team-agent/src/cli/tests/main_preserved.rs +675 -0
  20. package/crates/team-agent/src/cli/tests/missing_subcommands.rs +390 -0
  21. package/crates/team-agent/src/cli/tests/mod.rs +97 -0
  22. package/crates/team-agent/src/cli/tests/peer_allow.rs +137 -0
  23. package/crates/team-agent/src/cli/tests/repair_state_byte_lock.rs +302 -0
  24. package/crates/team-agent/src/cli/tests/run_delegation.rs +305 -0
  25. package/crates/team-agent/src/cli/tests/status_send.rs +385 -0
  26. package/crates/team-agent/src/cli/tests/verb_profile.rs +182 -0
  27. package/crates/team-agent/src/cli/tests/verb_settle.rs +236 -0
  28. package/crates/team-agent/src/cli/tests/verb_validate.rs +184 -0
  29. package/crates/team-agent/src/cli/types.rs +605 -0
  30. package/crates/team-agent/src/compiler/tests.rs +701 -0
  31. package/crates/team-agent/src/compiler.rs +489 -0
  32. package/crates/team-agent/src/coordinator/backoff.rs +153 -0
  33. package/crates/team-agent/src/coordinator/health.rs +436 -0
  34. package/crates/team-agent/src/coordinator/mod.rs +80 -0
  35. package/crates/team-agent/src/coordinator/orphan.rs +179 -0
  36. package/crates/team-agent/src/coordinator/tests/abnormal.rs +255 -0
  37. package/crates/team-agent/src/coordinator/tests/basics.rs +262 -0
  38. package/crates/team-agent/src/coordinator/tests/daemon.rs +323 -0
  39. package/crates/team-agent/src/coordinator/tests/health_sync.rs +263 -0
  40. package/crates/team-agent/src/coordinator/tests/main_preserved.rs +136 -0
  41. package/crates/team-agent/src/coordinator/tests/mod.rs +310 -0
  42. package/crates/team-agent/src/coordinator/tests/spine.rs +261 -0
  43. package/crates/team-agent/src/coordinator/tests/takeover.rs +227 -0
  44. package/crates/team-agent/src/coordinator/tests/tick_core.rs +256 -0
  45. package/crates/team-agent/src/coordinator/tests/watch.rs +167 -0
  46. package/crates/team-agent/src/coordinator/tick.rs +2032 -0
  47. package/crates/team-agent/src/coordinator/types.rs +584 -0
  48. package/crates/team-agent/src/db/migration.rs +716 -0
  49. package/crates/team-agent/src/db/mod.rs +23 -0
  50. package/crates/team-agent/src/db/schema.rs +378 -0
  51. package/crates/team-agent/src/event_log.rs +375 -0
  52. package/crates/team-agent/src/fake_worker.rs +253 -0
  53. package/crates/team-agent/src/leader/helpers.rs +190 -0
  54. package/crates/team-agent/src/leader/inject.rs +33 -0
  55. package/crates/team-agent/src/leader/lease.rs +1063 -0
  56. package/crates/team-agent/src/leader/mod.rs +99 -0
  57. package/crates/team-agent/src/leader/owner_bind.rs +292 -0
  58. package/crates/team-agent/src/leader/rediscover/tests.rs +525 -0
  59. package/crates/team-agent/src/leader/rediscover.rs +1099 -0
  60. package/crates/team-agent/src/leader/start.rs +273 -0
  61. package/crates/team-agent/src/leader/takeover.rs +235 -0
  62. package/crates/team-agent/src/leader/tests/basics.rs +183 -0
  63. package/crates/team-agent/src/leader/tests/byte_findings.rs +234 -0
  64. package/crates/team-agent/src/leader/tests/identity.rs +206 -0
  65. package/crates/team-agent/src/leader/tests/idle.rs +271 -0
  66. package/crates/team-agent/src/leader/tests/lease_api.rs +225 -0
  67. package/crates/team-agent/src/leader/tests/lease_claim.rs +253 -0
  68. package/crates/team-agent/src/leader/tests/mod.rs +125 -0
  69. package/crates/team-agent/src/leader/tests/rediscover.rs +351 -0
  70. package/crates/team-agent/src/leader/tests/wake_start_owner.rs +204 -0
  71. package/crates/team-agent/src/leader/types.rs +487 -0
  72. package/crates/team-agent/src/lib.rs +85 -0
  73. package/crates/team-agent/src/lifecycle/display.rs +228 -0
  74. package/crates/team-agent/src/lifecycle/helpers.rs +112 -0
  75. package/crates/team-agent/src/lifecycle/launch/plan.rs +227 -0
  76. package/crates/team-agent/src/lifecycle/launch.rs +1833 -0
  77. package/crates/team-agent/src/lifecycle/mod.rs +62 -0
  78. package/crates/team-agent/src/lifecycle/restart/agent.rs +533 -0
  79. package/crates/team-agent/src/lifecycle/restart/common.rs +517 -0
  80. package/crates/team-agent/src/lifecycle/restart/orchestrator.rs +41 -0
  81. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +268 -0
  82. package/crates/team-agent/src/lifecycle/restart/remove.rs +780 -0
  83. package/crates/team-agent/src/lifecycle/restart/selection.rs +208 -0
  84. package/crates/team-agent/src/lifecycle/restart/team_state.rs +242 -0
  85. package/crates/team-agent/src/lifecycle/restart.rs +76 -0
  86. package/crates/team-agent/src/lifecycle/tests/agent_ops.rs +455 -0
  87. package/crates/team-agent/src/lifecycle/tests/core.rs +989 -0
  88. package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +583 -0
  89. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +933 -0
  90. package/crates/team-agent/src/lifecycle/tests/main_preserved.rs +265 -0
  91. package/crates/team-agent/src/lifecycle/tests.rs +27 -0
  92. package/crates/team-agent/src/lifecycle/types.rs +685 -0
  93. package/crates/team-agent/src/main.rs +41 -0
  94. package/crates/team-agent/src/mcp_server/helpers.rs +228 -0
  95. package/crates/team-agent/src/mcp_server/mod.rs +183 -0
  96. package/crates/team-agent/src/mcp_server/normalize.rs +312 -0
  97. package/crates/team-agent/src/mcp_server/tests/golden.rs +283 -0
  98. package/crates/team-agent/src/mcp_server/tests/normalize.rs +244 -0
  99. package/crates/team-agent/src/mcp_server/tests/scoped.rs +189 -0
  100. package/crates/team-agent/src/mcp_server/tests/send.rs +222 -0
  101. package/crates/team-agent/src/mcp_server/tests/tools.rs +158 -0
  102. package/crates/team-agent/src/mcp_server/tests/wire.rs +159 -0
  103. package/crates/team-agent/src/mcp_server/tests.rs +38 -0
  104. package/crates/team-agent/src/mcp_server/tools.rs +603 -0
  105. package/crates/team-agent/src/mcp_server/types.rs +421 -0
  106. package/crates/team-agent/src/mcp_server/wire.rs +388 -0
  107. package/crates/team-agent/src/message_store.rs +767 -0
  108. package/crates/team-agent/src/messaging/activity.rs +433 -0
  109. package/crates/team-agent/src/messaging/delivery.rs +542 -0
  110. package/crates/team-agent/src/messaging/helpers.rs +209 -0
  111. package/crates/team-agent/src/messaging/leader_receiver.rs +340 -0
  112. package/crates/team-agent/src/messaging/mod.rs +147 -0
  113. package/crates/team-agent/src/messaging/peers.rs +32 -0
  114. package/crates/team-agent/src/messaging/results.rs +537 -0
  115. package/crates/team-agent/src/messaging/scheduler.rs +344 -0
  116. package/crates/team-agent/src/messaging/selftest.rs +100 -0
  117. package/crates/team-agent/src/messaging/send.rs +582 -0
  118. package/crates/team-agent/src/messaging/tests/basic.rs +357 -0
  119. package/crates/team-agent/src/messaging/tests/main_preserved.rs +122 -0
  120. package/crates/team-agent/src/messaging/tests/mod.rs +293 -0
  121. package/crates/team-agent/src/messaging/tests/runtime.rs +1422 -0
  122. package/crates/team-agent/src/messaging/tests/spine.rs +437 -0
  123. package/crates/team-agent/src/messaging/trust.rs +192 -0
  124. package/crates/team-agent/src/messaging/types.rs +355 -0
  125. package/crates/team-agent/src/messaging/watchers.rs +591 -0
  126. package/crates/team-agent/src/model/enums.rs +311 -0
  127. package/crates/team-agent/src/model/errors.rs +17 -0
  128. package/crates/team-agent/src/model/ids.rs +155 -0
  129. package/crates/team-agent/src/model/mod.rs +22 -0
  130. package/crates/team-agent/src/model/paths.rs +228 -0
  131. package/crates/team-agent/src/model/permissions.rs +567 -0
  132. package/crates/team-agent/src/model/routing.rs +340 -0
  133. package/crates/team-agent/src/model/spec.rs +680 -0
  134. package/crates/team-agent/src/model/task_graph.rs +380 -0
  135. package/crates/team-agent/src/model/testdata/fuzz.golden.yaml +43 -0
  136. package/crates/team-agent/src/model/testdata/fuzz.yaml +43 -0
  137. package/crates/team-agent/src/model/testdata/spec_invalid_a.yaml +207 -0
  138. package/crates/team-agent/src/model/testdata/team.spec.golden.yaml +206 -0
  139. package/crates/team-agent/src/model/testdata/team.spec.yaml +206 -0
  140. package/crates/team-agent/src/model/yaml/tests.rs +288 -0
  141. package/crates/team-agent/src/model/yaml.rs +800 -0
  142. package/crates/team-agent/src/packaging/install.rs +305 -0
  143. package/crates/team-agent/src/packaging/migrate.rs +30 -0
  144. package/crates/team-agent/src/packaging/mod.rs +82 -0
  145. package/crates/team-agent/src/packaging/repair.rs +24 -0
  146. package/crates/team-agent/src/packaging/tests.rs +829 -0
  147. package/crates/team-agent/src/packaging/types.rs +369 -0
  148. package/crates/team-agent/src/provider/adapter.rs +801 -0
  149. package/crates/team-agent/src/provider/approvals/mod.rs +2 -0
  150. package/crates/team-agent/src/provider/approvals/parsing.rs +452 -0
  151. package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +163 -0
  152. package/crates/team-agent/src/provider/classify.rs +456 -0
  153. package/crates/team-agent/src/provider/faults.rs +136 -0
  154. package/crates/team-agent/src/provider/helpers.rs +41 -0
  155. package/crates/team-agent/src/provider/mod.rs +53 -0
  156. package/crates/team-agent/src/provider/startup_prompt.rs +423 -0
  157. package/crates/team-agent/src/provider/tests/adapter.rs +239 -0
  158. package/crates/team-agent/src/provider/tests/classify.rs +240 -0
  159. package/crates/team-agent/src/provider/tests/faults.rs +120 -0
  160. package/crates/team-agent/src/provider/tests/idle.rs +208 -0
  161. package/crates/team-agent/src/provider/tests/wire.rs +213 -0
  162. package/crates/team-agent/src/provider/tests.rs +31 -0
  163. package/crates/team-agent/src/provider/types.rs +424 -0
  164. package/crates/team-agent/src/state/identity.rs +656 -0
  165. package/crates/team-agent/src/state/mod.rs +58 -0
  166. package/crates/team-agent/src/state/owner_gate.rs +423 -0
  167. package/crates/team-agent/src/state/persist.rs +712 -0
  168. package/crates/team-agent/src/state/projection.rs +657 -0
  169. package/crates/team-agent/src/state/selector.rs +105 -0
  170. package/crates/team-agent/src/state/testdata/state-rich.canonical.json +133 -0
  171. package/crates/team-agent/src/tmux_backend/tests.rs +586 -0
  172. package/crates/team-agent/src/tmux_backend.rs +758 -0
  173. package/crates/team-agent/src/transport/test_support.rs +252 -0
  174. package/crates/team-agent/src/transport/tests/behavior.rs +327 -0
  175. package/crates/team-agent/src/transport/tests/mod.rs +199 -0
  176. package/crates/team-agent/src/transport/tests/wire.rs +527 -0
  177. package/crates/team-agent/src/transport.rs +774 -0
  178. package/npm/install.mjs +90 -106
  179. package/package.json +15 -13
  180. package/crates/team-agent-core/Cargo.toml +0 -12
  181. package/crates/team-agent-core/src/lib.rs +0 -332
  182. package/crates/team-agent-core/src/main.rs +0 -152
  183. package/pyproject.toml +0 -18
  184. package/scripts/install.py +0 -88
  185. package/scripts/run_regression_tests.py +0 -83
  186. package/src/team_agent/__init__.py +0 -3
  187. package/src/team_agent/__main__.py +0 -5
  188. package/src/team_agent/_legacy_pane_discovery.py +0 -186
  189. package/src/team_agent/abnormal_track.py +0 -253
  190. package/src/team_agent/approvals/__init__.py +0 -65
  191. package/src/team_agent/approvals/constants.py +0 -6
  192. package/src/team_agent/approvals/parsing.py +0 -176
  193. package/src/team_agent/approvals/runtime_prompts.py +0 -171
  194. package/src/team_agent/approvals/status.py +0 -176
  195. package/src/team_agent/cli/__init__.py +0 -137
  196. package/src/team_agent/cli/commands.py +0 -481
  197. package/src/team_agent/cli/e2e.py +0 -202
  198. package/src/team_agent/cli/helpers.py +0 -226
  199. package/src/team_agent/cli/parser.py +0 -540
  200. package/src/team_agent/compiler.py +0 -334
  201. package/src/team_agent/coordinator/__init__.py +0 -53
  202. package/src/team_agent/coordinator/__main__.py +0 -119
  203. package/src/team_agent/coordinator/lifecycle.py +0 -411
  204. package/src/team_agent/coordinator/metadata.py +0 -61
  205. package/src/team_agent/coordinator/paths.py +0 -17
  206. package/src/team_agent/diagnose/__init__.py +0 -48
  207. package/src/team_agent/diagnose/checks.py +0 -101
  208. package/src/team_agent/diagnose/comms.py +0 -213
  209. package/src/team_agent/diagnose/health.py +0 -241
  210. package/src/team_agent/diagnose/orphan_cleanup.py +0 -364
  211. package/src/team_agent/diagnose/preflight.py +0 -194
  212. package/src/team_agent/diagnose/quick_start.py +0 -324
  213. package/src/team_agent/display/__init__.py +0 -92
  214. package/src/team_agent/display/adaptive.py +0 -511
  215. package/src/team_agent/display/backend.py +0 -46
  216. package/src/team_agent/display/close.py +0 -154
  217. package/src/team_agent/display/ghostty.py +0 -77
  218. package/src/team_agent/display/rebuild.py +0 -102
  219. package/src/team_agent/display/tiling.py +0 -156
  220. package/src/team_agent/display/worker_window.py +0 -114
  221. package/src/team_agent/display/workspace.py +0 -382
  222. package/src/team_agent/errors.py +0 -10
  223. package/src/team_agent/events.py +0 -84
  224. package/src/team_agent/fake_worker.py +0 -80
  225. package/src/team_agent/idle_predicate.py +0 -218
  226. package/src/team_agent/idle_takeover.py +0 -59
  227. package/src/team_agent/idle_takeover_wiring.py +0 -114
  228. package/src/team_agent/launch/__init__.py +0 -41
  229. package/src/team_agent/launch/bootstrap.py +0 -85
  230. package/src/team_agent/launch/config.py +0 -106
  231. package/src/team_agent/launch/core.py +0 -301
  232. package/src/team_agent/launch/requirements.py +0 -57
  233. package/src/team_agent/leader/__init__.py +0 -926
  234. package/src/team_agent/leader_binding.py +0 -183
  235. package/src/team_agent/lifecycle/__init__.py +0 -5
  236. package/src/team_agent/lifecycle/agents.py +0 -278
  237. package/src/team_agent/lifecycle/operations.py +0 -411
  238. package/src/team_agent/lifecycle/paste_buffer_hygiene.py +0 -39
  239. package/src/team_agent/lifecycle/start.py +0 -363
  240. package/src/team_agent/mcp_server/__init__.py +0 -42
  241. package/src/team_agent/mcp_server/__main__.py +0 -7
  242. package/src/team_agent/mcp_server/contracts.py +0 -148
  243. package/src/team_agent/mcp_server/normalize.py +0 -257
  244. package/src/team_agent/mcp_server/server.py +0 -150
  245. package/src/team_agent/mcp_server/tools.py +0 -352
  246. package/src/team_agent/message_store/__init__.py +0 -23
  247. package/src/team_agent/message_store/agent_health.py +0 -113
  248. package/src/team_agent/message_store/core.py +0 -497
  249. package/src/team_agent/message_store/leader_notification_log.py +0 -198
  250. package/src/team_agent/message_store/result_watchers.py +0 -251
  251. package/src/team_agent/message_store/schema.py +0 -308
  252. package/src/team_agent/message_store/schema_migration.py +0 -448
  253. package/src/team_agent/messaging/__init__.py +0 -1
  254. package/src/team_agent/messaging/activity_detector.py +0 -262
  255. package/src/team_agent/messaging/delivery.py +0 -504
  256. package/src/team_agent/messaging/deps.py +0 -247
  257. package/src/team_agent/messaging/idle_alerts.py +0 -423
  258. package/src/team_agent/messaging/internal_delivery.py +0 -46
  259. package/src/team_agent/messaging/leader.py +0 -497
  260. package/src/team_agent/messaging/leader_api_errors.py +0 -216
  261. package/src/team_agent/messaging/leader_panes.py +0 -673
  262. package/src/team_agent/messaging/owner_bypass.py +0 -29
  263. package/src/team_agent/messaging/result_delivery.py +0 -539
  264. package/src/team_agent/messaging/results.py +0 -447
  265. package/src/team_agent/messaging/scheduler.py +0 -450
  266. package/src/team_agent/messaging/send.py +0 -532
  267. package/src/team_agent/messaging/session_drift.py +0 -94
  268. package/src/team_agent/messaging/tmux_io.py +0 -506
  269. package/src/team_agent/messaging/tmux_prompt.py +0 -338
  270. package/src/team_agent/messaging/trust_auto_answer.py +0 -52
  271. package/src/team_agent/orchestrator/__init__.py +0 -376
  272. package/src/team_agent/orchestrator/plan.py +0 -122
  273. package/src/team_agent/orchestrator/state.py +0 -128
  274. package/src/team_agent/paths.py +0 -45
  275. package/src/team_agent/permissions.py +0 -123
  276. package/src/team_agent/profiles/__init__.py +0 -82
  277. package/src/team_agent/profiles/constants.py +0 -19
  278. package/src/team_agent/profiles/core.py +0 -407
  279. package/src/team_agent/profiles/helpers.py +0 -69
  280. package/src/team_agent/profiles/provider_env.py +0 -188
  281. package/src/team_agent/profiles/smoke.py +0 -201
  282. package/src/team_agent/provider_cli/__init__.py +0 -43
  283. package/src/team_agent/provider_cli/adapter.py +0 -172
  284. package/src/team_agent/provider_cli/base.py +0 -48
  285. package/src/team_agent/provider_cli/claude.py +0 -503
  286. package/src/team_agent/provider_cli/codex.py +0 -336
  287. package/src/team_agent/provider_cli/copilot.py +0 -8
  288. package/src/team_agent/provider_cli/fake.py +0 -39
  289. package/src/team_agent/provider_cli/gemini.py +0 -95
  290. package/src/team_agent/provider_cli/opencode.py +0 -8
  291. package/src/team_agent/provider_cli/prompt.py +0 -62
  292. package/src/team_agent/provider_cli/registry.py +0 -18
  293. package/src/team_agent/provider_cli/unsupported.py +0 -32
  294. package/src/team_agent/provider_state/README.md +0 -78
  295. package/src/team_agent/provider_state/__init__.py +0 -91
  296. package/src/team_agent/provider_state/claude.py +0 -86
  297. package/src/team_agent/provider_state/codex.py +0 -84
  298. package/src/team_agent/provider_state/common.py +0 -207
  299. package/src/team_agent/provider_state/registry.py +0 -118
  300. package/src/team_agent/providers.py +0 -163
  301. package/src/team_agent/quality_gates.py +0 -104
  302. package/src/team_agent/restart/__init__.py +0 -34
  303. package/src/team_agent/restart/orchestration.py +0 -554
  304. package/src/team_agent/restart/selection.py +0 -89
  305. package/src/team_agent/restart/snapshot.py +0 -70
  306. package/src/team_agent/routing.py +0 -84
  307. package/src/team_agent/runtime.py +0 -1243
  308. package/src/team_agent/rust_core.py +0 -327
  309. package/src/team_agent/sessions/__init__.py +0 -25
  310. package/src/team_agent/sessions/capture.py +0 -144
  311. package/src/team_agent/sessions/inventory.py +0 -44
  312. package/src/team_agent/sessions/resume.py +0 -135
  313. package/src/team_agent/simple_yaml.py +0 -236
  314. package/src/team_agent/spec.py +0 -370
  315. package/src/team_agent/state.py +0 -693
  316. package/src/team_agent/status/__init__.py +0 -63
  317. package/src/team_agent/status/approvals.py +0 -52
  318. package/src/team_agent/status/compact.py +0 -158
  319. package/src/team_agent/status/constants.py +0 -18
  320. package/src/team_agent/status/inbox.py +0 -58
  321. package/src/team_agent/status/peek.py +0 -117
  322. package/src/team_agent/status/queries.py +0 -199
  323. package/src/team_agent/task_graph.py +0 -80
  324. package/src/team_agent/terminal.py +0 -57
  325. package/src/team_agent/wake.py +0 -58
  326. package/src/team_agent/watch/__init__.py +0 -145
@@ -0,0 +1,712 @@
1
+ //! state.json 持久化(bug-084 韧性;真相源 `state.py:save_runtime_state` + `_self_heal_runtime_state`
2
+ //! + `runtime.py:_runtime_lock`)。
3
+ //!
4
+ //! 流程(逐条对齐 bug_084_085_contract.md):
5
+ //! 1. 入参 deep-equal `_RUNTIME_STATE_CACHE[path]` → 取锁/os.replace **之前**返回。
6
+ //! 2. `_runtime_lock(workspace, "state-save", timeout=2.0)` 串行化(flock;state-save 不发锁事件)。
7
+ //! 3. 原子 `tmp -> rename(tmp, state.json)`;payload = `json.dumps(indent=2, ensure_ascii=False)`(无尾换行)。
8
+ //! 4. EACCES/EPERM/EBUSY/PermissionError → 有界退避重试 delays=[0.05,0.2,0.5](4 次尝试);
9
+ //! ENOSPC 等不重试,直接 raise。
10
+ //! 5. 重试耗尽且仍 retryable → self-heal:写 heal-tmp → rename(原 state→backup) → rename(heal-tmp→state);
11
+ //! **绝不 in-place truncate**。self-heal 也失败 → 还原 backup(若已建)+ 原 state 仍可见 + raise。
12
+ //! 6. 事件:save_retry(每次重试)/ self_healed(成功)/ save_failed(最终失败)/ self_heal_restore_failed。
13
+ //! 7. MUST-NOT-13:全程零 provider/network 调用。
14
+ //!
15
+ //! **已知字节边界(同 event_log,实测 state 不出现;state-rich.json 3471B 字节对拍 PASS)**:
16
+ //! `<1e-4` 指数浮点 / `>2^53` 大整数经 serde_json 会漂移;state 字段实测为小整数/字符串/bool/嵌套。
17
+ //!
18
+ //! **load 迁移(本 slice 接入)**:`load_runtime_state` 现复刻 Python 全链——
19
+ //! `normalize_agent_session_state`(SESSION_STATE_FIELDS 补 None)→ `_migrate_state_identity`
20
+ //! (`identity::migrate_state_identity`,补 leader_session_uuid)→ `_migrate_active_team_key`
21
+ //! (seed active 指针);任一改动 → `save_runtime_state` 回写。不存在且命中缓存 → 返回缓存 deepcopy。
22
+
23
+ use std::collections::HashMap;
24
+ use std::io;
25
+ use std::path::{Path, PathBuf};
26
+ use std::sync::{LazyLock, Mutex};
27
+ use std::sync::atomic::{AtomicU64, Ordering};
28
+ use std::time::{Duration, Instant};
29
+
30
+ use serde_json::{json, Value};
31
+
32
+ use crate::event_log::EventLog;
33
+ use crate::model::paths::runtime_dir;
34
+ use crate::state::identity::{migrate_state_identity, SystemEnv};
35
+ use crate::state::json_truthy;
36
+ use crate::state::projection::team_state_key;
37
+ use crate::state::StateError;
38
+
39
+ /// `state.py:26-29`:agent session-state 归一字段(setdefault None)。
40
+ const SESSION_STATE_FIELDS: [&str; 6] = [
41
+ "session_id",
42
+ "rollout_path",
43
+ "captured_at",
44
+ "captured_via",
45
+ "attribution_confidence",
46
+ "spawn_cwd",
47
+ ];
48
+
49
+ /// `state.py:_RUNTIME_STATE_CACHE`:进程级 path→state 缓存(deep-equal 早返回)。
50
+ static RUNTIME_STATE_CACHE: LazyLock<Mutex<HashMap<PathBuf, Value>>> =
51
+ LazyLock::new(|| Mutex::new(HashMap::new()));
52
+ static TMP_SEQ: AtomicU64 = AtomicU64::new(0);
53
+
54
+ /// `state.py:41`。
55
+ pub fn runtime_state_path(workspace: &Path) -> PathBuf {
56
+ runtime_dir(workspace).join("state.json")
57
+ }
58
+
59
+ fn cache_equals(path: &Path, state: &Value) -> bool {
60
+ RUNTIME_STATE_CACHE.lock().is_ok_and(|c| c.get(path) == Some(state))
61
+ }
62
+ fn cache_set(path: &Path, state: &Value) {
63
+ if let Ok(mut c) = RUNTIME_STATE_CACHE.lock() {
64
+ c.insert(path.to_path_buf(), state.clone());
65
+ }
66
+ }
67
+ /// `_RUNTIME_STATE_CACHE.get(...)` → `copy.deepcopy(cached)`(clone = deepcopy)。
68
+ fn cache_get(path: &Path) -> Option<Value> {
69
+ RUNTIME_STATE_CACHE.lock().ok().and_then(|c| c.get(path).cloned())
70
+ }
71
+
72
+ fn unique_tmp(path: &Path, suffix: &str) -> PathBuf {
73
+ let name = path.file_name().map_or_else(String::new, |n| n.to_string_lossy().into_owned());
74
+ let seq = TMP_SEQ.fetch_add(1, Ordering::Relaxed);
75
+ path.with_file_name(format!("{name}.{}.{seq}.{suffix}", std::process::id()))
76
+ }
77
+
78
+ // 故障注入(测试):**per-call-index 谓词**。FAULT_PLAN[i] = 第 i 次 atomic_replace 的 errno
79
+ // (0=成功)。这是对抗检查的 critical 修复:递减计数器无法表达 self-heal restore 分支需要的
80
+ // 非连续 succeed/fail 序列(loop 失败、path→backup 成功、heal→path 失败、restore 成功/失败),
81
+ // 故崩溃安全不变量「原 state 经 backup 还原」原先根本不可测。
82
+ #[cfg(test)]
83
+ thread_local! {
84
+ static FAULT_PLAN: std::cell::RefCell<Vec<i32>> = const { std::cell::RefCell::new(Vec::new()) };
85
+ static FAULT_IDX: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
86
+ }
87
+
88
+ fn atomic_replace(from: &Path, to: &Path) -> io::Result<()> {
89
+ #[cfg(test)]
90
+ {
91
+ let idx = FAULT_IDX.with(|c| {
92
+ let i = c.get();
93
+ c.set(i + 1);
94
+ i
95
+ });
96
+ let errno = FAULT_PLAN.with(|p| p.borrow().get(idx).copied().unwrap_or(0));
97
+ if errno != 0 {
98
+ return Err(io::Error::from_raw_os_error(errno));
99
+ }
100
+ }
101
+ std::fs::rename(from, to)
102
+ }
103
+
104
+ /// `_retryable_replace_error`:PermissionError 或 errno ∈ {EACCES, EPERM, EBUSY}。
105
+ fn retryable_replace_error(e: &io::Error) -> bool {
106
+ if let Some(errno) = e.raw_os_error() {
107
+ return errno == libc::EACCES || errno == libc::EPERM || errno == libc::EBUSY;
108
+ }
109
+ e.kind() == io::ErrorKind::PermissionDenied
110
+ }
111
+
112
+ fn errno_name(errno: Option<i32>) -> Option<&'static str> {
113
+ match errno {
114
+ Some(e) if e == libc::EACCES => Some("EACCES"),
115
+ Some(e) if e == libc::EPERM => Some("EPERM"),
116
+ Some(e) if e == libc::EBUSY => Some("EBUSY"),
117
+ Some(e) if e == libc::ENOSPC => Some("ENOSPC"),
118
+ _ => None,
119
+ }
120
+ }
121
+
122
+ /// `runtime.py:_runtime_lock` 的 flock 版(RAII;Drop 释放)。state-save 不发锁事件。
123
+ /// POSIX flock(unix);Windows 锁(LockFileEx)延平台层(step 9+)。
124
+ struct RuntimeLock {
125
+ #[allow(dead_code)]
126
+ file: std::fs::File,
127
+ }
128
+
129
+ impl RuntimeLock {
130
+ fn acquire(workspace: &Path, name: &str, timeout: f64) -> Result<Self, StateError> {
131
+ let lock_path = runtime_dir(workspace).join(format!("{name}.lock"));
132
+ if let Some(parent) = lock_path.parent() {
133
+ std::fs::create_dir_all(parent)?;
134
+ }
135
+ let file = std::fs::OpenOptions::new().create(true).write(true).truncate(false).open(&lock_path)?;
136
+ #[cfg(unix)]
137
+ {
138
+ use std::os::unix::io::AsRawFd;
139
+ let fd = file.as_raw_fd();
140
+ let start = Instant::now();
141
+ loop {
142
+ // SAFETY: fd 来自打开的 lock_file,LOCK_EX|LOCK_NB 非阻塞。
143
+ let rc = unsafe { libc::flock(fd, libc::LOCK_EX | libc::LOCK_NB) };
144
+ if rc == 0 {
145
+ return Ok(Self { file });
146
+ }
147
+ if start.elapsed().as_secs_f64() >= timeout {
148
+ return Err(StateError::Locked(name.to_string()));
149
+ }
150
+ std::thread::sleep(Duration::from_millis(50));
151
+ }
152
+ }
153
+ #[cfg(not(unix))]
154
+ {
155
+ let _ = timeout;
156
+ Err(StateError::Locked(format!("{name} (runtime lock not yet implemented on non-unix)")))
157
+ }
158
+ }
159
+ }
160
+
161
+ #[cfg(unix)]
162
+ impl Drop for RuntimeLock {
163
+ fn drop(&mut self) {
164
+ use std::os::unix::io::AsRawFd;
165
+ // SAFETY: 释放本进程持有的 flock。
166
+ unsafe { libc::flock(self.file.as_raw_fd(), libc::LOCK_UN) };
167
+ }
168
+ }
169
+
170
+ /// `save_runtime_state`(bug-084)。`state` 是 state.json 的内存 Value(插入序保留)。
171
+ /// 注:Python 在此还调 `_migrate_state_identity`(identity slice 落地后接入;本 slice 不改 state 内容)。
172
+ pub fn save_runtime_state(workspace: &Path, state: &Value) -> Result<(), StateError> {
173
+ let path = runtime_state_path(workspace);
174
+ if cache_equals(&path, state) {
175
+ return Ok(());
176
+ }
177
+ // Python `state.py:497`:先对入参 state 跑 `_migrate_state_identity`(就地填缺失 leader uuid)。
178
+ // 我们 `&Value` 不可变 → 克隆后迁移,后续比较/写入/缓存/self-heal 全走 `migrated`。
179
+ // 该步**不**包 try/except → 错误 propagate(对齐 Python)。
180
+ let mut migrated = state.clone();
181
+ migrate_state_identity(&mut migrated, &SystemEnv, workspace)?;
182
+ if cache_equals(&path, &migrated) {
183
+ return Ok(());
184
+ }
185
+ // 与磁盘已有内容「迁移后」相同 → 更新缓存返回(避免无谓重写)。字节对拍 Python:对 `existing` 先
186
+ // `normalize_agent_session_state` + `_migrate_state_identity` 再比(读/迁移失败 try/except: pass →
187
+ // 落写路径)。**修对抗 P1**:此前比较 raw `existing` 漏了这两步,会把「磁盘已是迁移等价形」的 legacy
188
+ // 文件误判为不同而 spurious 重写,破坏 load+save 字节恒等。
189
+ if path.exists() {
190
+ if let Ok(text) = std::fs::read_to_string(&path) {
191
+ if let Ok(mut existing) = serde_json::from_str::<Value>(&text) {
192
+ normalize_agent_session_state(&mut existing);
193
+ let _ = migrate_state_identity(&mut existing, &SystemEnv, workspace);
194
+ if existing == migrated {
195
+ cache_set(&path, &migrated);
196
+ return Ok(());
197
+ }
198
+ }
199
+ }
200
+ }
201
+
202
+ let _lock = RuntimeLock::acquire(workspace, "state-save", 2.0)?;
203
+ if let Some(parent) = path.parent() {
204
+ std::fs::create_dir_all(parent)?;
205
+ }
206
+ // 字节对拍 Python json.dumps(indent=2, ensure_ascii=False)(无尾换行)。
207
+ let payload = serde_json::to_string_pretty(&migrated)?;
208
+ let delays = [0.05_f64, 0.2, 0.5];
209
+
210
+ for attempt in 0..=delays.len() {
211
+ let tmp = unique_tmp(&path, "tmp");
212
+ let result: io::Result<()> = (|| {
213
+ std::fs::write(&tmp, payload.as_bytes())?;
214
+ atomic_replace(&tmp, &path)
215
+ })();
216
+ let _ = std::fs::remove_file(&tmp); // finally: unlink missing_ok
217
+ match result {
218
+ Ok(()) => {
219
+ cache_set(&path, &migrated);
220
+ return Ok(());
221
+ }
222
+ Err(e) => {
223
+ let retryable = retryable_replace_error(&e);
224
+ if !retryable || attempt >= delays.len() {
225
+ if retryable {
226
+ return self_heal(workspace, &path, &payload, &migrated, attempt + 1, &e);
227
+ }
228
+ return Err(StateError::Io(e));
229
+ }
230
+ let _ = EventLog::new(workspace).write(
231
+ "runtime.state.save_retry",
232
+ json!({
233
+ "attempt": attempt + 1,
234
+ "errno": e.raw_os_error(),
235
+ "errno_name": errno_name(e.raw_os_error()),
236
+ "error": e.to_string(),
237
+ }),
238
+ );
239
+ std::thread::sleep(Duration::from_secs_f64(delays[attempt]));
240
+ }
241
+ }
242
+ }
243
+ Err(StateError::SaveFailed("retry loop exhausted without return".to_string()))
244
+ }
245
+
246
+ /// `_self_heal_runtime_state`:重建 inode(heal-tmp + backup-rename),绝不 in-place truncate。
247
+ fn self_heal(
248
+ workspace: &Path,
249
+ path: &Path,
250
+ payload: &str,
251
+ state: &Value,
252
+ attempts_used: usize,
253
+ original_exc: &io::Error,
254
+ ) -> Result<(), StateError> {
255
+ let event_log = EventLog::new(workspace);
256
+ let heal_tmp = unique_tmp(path, "heal.tmp");
257
+ let name = path.file_name().map_or_else(String::new, |n| n.to_string_lossy().into_owned());
258
+ let backup = path.with_file_name(format!("{name}.bak.{}", std::process::id()));
259
+ let mut backup_created = false;
260
+
261
+ let outcome: io::Result<()> = (|| {
262
+ std::fs::write(&heal_tmp, payload.as_bytes())?;
263
+ match atomic_replace(path, &backup) {
264
+ Ok(()) => backup_created = true,
265
+ Err(e) if e.kind() == io::ErrorKind::NotFound => {} // 原 state 不存在 → 无需备份
266
+ Err(e) => return Err(e),
267
+ }
268
+ atomic_replace(&heal_tmp, path)
269
+ })();
270
+ let _ = std::fs::remove_file(&heal_tmp); // finally
271
+
272
+ match outcome {
273
+ Ok(()) => {
274
+ cache_set(path, state);
275
+ let _ = event_log.write(
276
+ "runtime.state.self_healed",
277
+ json!({
278
+ "inode_rebuilt": true,
279
+ "attempts_used": attempts_used,
280
+ "replace_retries": attempts_used.saturating_sub(1),
281
+ }),
282
+ );
283
+ Ok(())
284
+ }
285
+ Err(e) => {
286
+ if backup_created {
287
+ if let Err(restore) = atomic_replace(&backup, path) {
288
+ let _ = event_log.write(
289
+ "runtime.state.self_heal_restore_failed",
290
+ json!({"error": restore.to_string()}),
291
+ );
292
+ }
293
+ }
294
+ let _ = event_log.write(
295
+ "runtime.state.save_failed",
296
+ json!({
297
+ "phase": "save_runtime_state",
298
+ "final_errno": e.raw_os_error().or_else(|| original_exc.raw_os_error()),
299
+ "error": e.to_string(),
300
+ "retries_used": attempts_used.saturating_sub(1),
301
+ }),
302
+ );
303
+ Err(StateError::SaveFailed(e.to_string()))
304
+ }
305
+ }
306
+ }
307
+
308
+ /// `load_runtime_state`(本 slice 最小:读+parse+缓存;normalize/migration 待 identity slice)。
309
+ /// `normalize_agent_session_state`(`state.py:45`):为每个 agent dict 的 SESSION_STATE_FIELDS
310
+ /// setdefault None(缺则末尾插)。
311
+ pub fn normalize_agent_session_state(state: &mut Value) {
312
+ let Some(agents) = state.get_mut("agents").and_then(Value::as_object_mut) else {
313
+ return;
314
+ };
315
+ for agent_state in agents.values_mut() {
316
+ if let Some(obj) = agent_state.as_object_mut() {
317
+ for field in SESSION_STATE_FIELDS {
318
+ obj.entry(field.to_string()).or_insert(Value::Null);
319
+ }
320
+ }
321
+ }
322
+ }
323
+
324
+ /// `_migrate_active_team_key`(`state.py:73`,0.2.6 Family B C6):legacy state 缺 active_team_key
325
+ /// 时 seed 一次。返回是否有改动。
326
+ ///
327
+ /// 注:Python `seed if seed in teams or not teams else seed` 两支均为 `seed`(死三元),
328
+ /// 此处直接 `= seed`,与可观测行为一致。
329
+ pub fn migrate_active_team_key(state: &mut Value) -> bool {
330
+ if state.as_object().is_some_and(|o| o.contains_key("active_team_key")) {
331
+ return false;
332
+ }
333
+ let teams_is_dict = state.get("teams").is_some_and(Value::is_object);
334
+ let teams_len = state.get("teams").and_then(Value::as_object).map_or(0, serde_json::Map::len);
335
+ if state.get("session_name").is_some_and(json_truthy) {
336
+ let seed = team_state_key(state);
337
+ if let Some(o) = state.as_object_mut() {
338
+ o.insert("active_team_key".to_string(), Value::String(seed));
339
+ }
340
+ return true;
341
+ }
342
+ if teams_is_dict && teams_len == 1 {
343
+ let first = state.get("teams").and_then(Value::as_object).and_then(|t| t.keys().next().cloned());
344
+ if let (Some(k), Some(o)) = (first, state.as_object_mut()) {
345
+ o.insert("active_team_key".to_string(), Value::String(k));
346
+ }
347
+ return true;
348
+ }
349
+ if let Some(o) = state.as_object_mut() {
350
+ o.insert("active_team_key".to_string(), Value::Null);
351
+ }
352
+ true
353
+ }
354
+
355
+ pub fn load_runtime_state(workspace: &Path) -> Result<Value, StateError> {
356
+ let path = runtime_state_path(workspace);
357
+ if !path.exists() {
358
+ if let Some(cached) = cache_get(&path) {
359
+ return Ok(cached);
360
+ }
361
+ return Ok(json!({"agents": {}, "tasks": [], "session_name": null, "active_team_key": null}));
362
+ }
363
+ let text = std::fs::read_to_string(&path)?;
364
+ let mut state: Value = serde_json::from_str(&text)?;
365
+ normalize_agent_session_state(&mut state);
366
+ let mut changed = migrate_state_identity(&mut state, &SystemEnv, workspace)?;
367
+ if migrate_active_team_key(&mut state) {
368
+ changed = true;
369
+ }
370
+ if changed {
371
+ save_runtime_state(workspace, &state)?;
372
+ }
373
+ cache_set(&path, &state);
374
+ Ok(state)
375
+ }
376
+
377
+ #[cfg(test)]
378
+ mod tests {
379
+ #![allow(clippy::unwrap_used, clippy::panic, clippy::expect_used)]
380
+ use super::*;
381
+ use std::sync::atomic::AtomicU32;
382
+
383
+ static SEQ: AtomicU32 = AtomicU32::new(0);
384
+ fn temp_ws() -> PathBuf {
385
+ let n = SEQ.fetch_add(1, Ordering::Relaxed);
386
+ let ws = std::env::temp_dir().join(format!("ta_rs_state_{}_{}", std::process::id(), n));
387
+ std::fs::create_dir_all(&ws).unwrap();
388
+ ws
389
+ }
390
+ fn read_events(ws: &Path) -> Vec<Value> {
391
+ EventLog::new(ws).tail(50).unwrap()
392
+ }
393
+ fn count_event(ws: &Path, name: &str) -> usize {
394
+ read_events(ws).iter().filter(|e| e["event"] == json!(name)).count()
395
+ }
396
+ // per-call-index 故障计划:plan[i] = 第 i 次 atomic_replace 的 errno(0=成功)。
397
+ fn set_fault_plan(plan: &[i32]) {
398
+ FAULT_PLAN.with(|p| *p.borrow_mut() = plan.to_vec());
399
+ FAULT_IDX.with(|c| c.set(0));
400
+ }
401
+ fn clear_fault() {
402
+ set_fault_plan(&[]);
403
+ }
404
+ fn get_event(ws: &Path, name: &str) -> Value {
405
+ read_events(ws).into_iter().find(|e| e["event"] == json!(name)).unwrap_or(Value::Null)
406
+ }
407
+ fn read_state(ws: &Path) -> Value {
408
+ serde_json::from_str(&std::fs::read_to_string(runtime_state_path(ws)).unwrap()).unwrap()
409
+ }
410
+ fn bak_files(ws: &Path) -> Vec<PathBuf> {
411
+ let dir = runtime_dir(ws);
412
+ std::fs::read_dir(&dir)
413
+ .map(|rd| {
414
+ rd.filter_map(std::result::Result::ok)
415
+ .map(|e| e.path())
416
+ .filter(|p| p.file_name().is_some_and(|n| n.to_string_lossy().contains(".bak.")))
417
+ .collect()
418
+ })
419
+ .unwrap_or_default()
420
+ }
421
+
422
+ // 字节对拍:state-rich.json 经 to_string_pretty 字节 == Python json.dumps(indent=2, ensure_ascii=False)。
423
+ #[test]
424
+ fn state_json_byte_parity_with_python_indent2() {
425
+ let fixture = include_str!(concat!(
426
+ env!("CARGO_MANIFEST_DIR"),
427
+ "/../../snapshot/fixtures/bug_084_state_resilience/state-rich.json"
428
+ ));
429
+ let canonical = include_str!("testdata/state-rich.canonical.json");
430
+ let v: Value = serde_json::from_str(fixture).unwrap();
431
+ assert_eq!(serde_json::to_string_pretty(&v).unwrap(), canonical, "state.json 序列化未字节对齐 Python indent=2");
432
+ }
433
+
434
+ #[test]
435
+ fn save_writes_atomically_and_caches() {
436
+ let ws = temp_ws();
437
+ let state = json!({"session_name":"t","agents":{"a":{"agent_id":"a"}},"active_team_key":"t"});
438
+ save_runtime_state(&ws, &state).unwrap();
439
+ let on_disk = std::fs::read_to_string(runtime_state_path(&ws)).unwrap();
440
+ assert_eq!(on_disk, serde_json::to_string_pretty(&state).unwrap());
441
+ assert!(!on_disk.ends_with('\n'), "无尾换行(对齐 json.dumps)");
442
+ // deep-equal 缓存早返回:再 save 相同 state 不应改文件 mtime 行为(此处验返回 Ok 且无错)。
443
+ save_runtime_state(&ws, &state).unwrap();
444
+ }
445
+
446
+ #[test]
447
+ fn deep_equal_cache_early_returns() {
448
+ let ws = temp_ws();
449
+ let state = json!({"x":1});
450
+ save_runtime_state(&ws, &state).unwrap();
451
+ // 删掉文件;若缓存早返回生效,save 相同 state 不会重建文件。
452
+ std::fs::remove_file(runtime_state_path(&ws)).unwrap();
453
+ save_runtime_state(&ws, &state).unwrap();
454
+ assert!(!runtime_state_path(&ws).exists(), "deep-equal 命中缓存 → 未重写(文件仍不存在)");
455
+ }
456
+
457
+ // bug-084 核心:EACCES 重试 3 次(有界退避)→ self-heal 成功 + 事件**字段精确**。
458
+ #[test]
459
+ fn retryable_eacces_retries_then_self_heals() {
460
+ let ws = temp_ws();
461
+ save_runtime_state(&ws, &json!({"v":1})).unwrap(); // 原 state(供 self-heal backup-rename)
462
+ let s2 = json!({"v":2});
463
+ set_fault_plan(&[libc::EACCES, libc::EACCES, libc::EACCES, libc::EACCES]); // loop 4 失败,heal 的 2 次 replace 成功
464
+ save_runtime_state(&ws, &s2).unwrap();
465
+ clear_fault();
466
+ assert_eq!(read_state(&ws), s2, "inode 重建,文件为 s2");
467
+ // 事件序列 + 字段精确。
468
+ let retries: Vec<_> = read_events(&ws).into_iter().filter(|e| e["event"] == json!("runtime.state.save_retry")).collect();
469
+ assert_eq!(retries.len(), 3, "3 次重试");
470
+ assert_eq!(retries[0]["attempt"], json!(1));
471
+ assert_eq!(retries[0]["errno_name"], json!("EACCES"));
472
+ assert_eq!(retries[2]["attempt"], json!(3));
473
+ let healed = get_event(&ws, "runtime.state.self_healed");
474
+ assert_eq!(healed["inode_rebuilt"], json!(true));
475
+ assert_eq!(healed["attempts_used"], json!(4));
476
+ assert_eq!(healed["replace_retries"], json!(3));
477
+ }
478
+
479
+ // EPERM / EBUSY 也是 retryable(不只 EACCES)。
480
+ #[test]
481
+ fn eperm_and_ebusy_are_retryable() {
482
+ for (errno, name) in [(libc::EPERM, "EPERM"), (libc::EBUSY, "EBUSY")] {
483
+ let ws = temp_ws();
484
+ save_runtime_state(&ws, &json!({"v":1})).unwrap();
485
+ set_fault_plan(&[errno, errno, errno, errno]);
486
+ save_runtime_state(&ws, &json!({"v":2})).unwrap();
487
+ clear_fault();
488
+ assert_eq!(read_state(&ws), json!({"v":2}));
489
+ assert_eq!(count_event(&ws, "runtime.state.self_healed"), 1);
490
+ assert_eq!(get_event(&ws, "runtime.state.save_retry")["errno_name"], json!(name));
491
+ }
492
+ }
493
+
494
+ // 非 retryable(ENOSPC)→ 不重试、不 self-heal,直接 Err。
495
+ #[test]
496
+ fn non_retryable_enospc_raises_without_self_heal() {
497
+ let ws = temp_ws();
498
+ set_fault_plan(&[libc::ENOSPC]);
499
+ let r = save_runtime_state(&ws, &json!({"v":9}));
500
+ clear_fault();
501
+ assert!(matches!(r, Err(StateError::Io(_))), "ENOSPC 直接 raise");
502
+ assert_eq!(count_event(&ws, "runtime.state.self_healed"), 0);
503
+ assert_eq!(count_event(&ws, "runtime.state.save_retry"), 0);
504
+ }
505
+
506
+ // 重试边界:恰 3 次重试,第 4 次(attempt 3)成功 → 无 self-heal。钉死 0..=delays.len() 的 off-by-one。
507
+ #[test]
508
+ fn retry_boundary_exactly_three_then_succeeds() {
509
+ let ws = temp_ws();
510
+ set_fault_plan(&[libc::EACCES, libc::EACCES, libc::EACCES]); // 前 3 失败,第 4 次成功
511
+ save_runtime_state(&ws, &json!({"v":7})).unwrap();
512
+ clear_fault();
513
+ assert_eq!(read_state(&ws), json!({"v":7}));
514
+ assert_eq!(count_event(&ws, "runtime.state.save_retry"), 3);
515
+ assert_eq!(count_event(&ws, "runtime.state.self_healed"), 0, "未触发 self-heal");
516
+ }
517
+
518
+ // 崩溃安全不变量①:self-heal 中途失败但 restore 成功 → 原 state 复位 + 0 restore_failed + 1 save_failed。
519
+ // (per-index 注入器解锁:loop4 失败、path→backup 成功、heal→path 失败、restore 成功。)
520
+ #[test]
521
+ fn self_heal_restore_success_recovers_original() {
522
+ let ws = temp_ws();
523
+ let original = json!({"keep":"original"});
524
+ save_runtime_state(&ws, &original).unwrap();
525
+ let e = libc::EACCES;
526
+ set_fault_plan(&[e, e, e, e, 0, e, 0]); // idx4=path→backup ok, idx5=heal→path fail, idx6=restore ok
527
+ let r = save_runtime_state(&ws, &json!({"keep":"new"}));
528
+ clear_fault();
529
+ assert!(matches!(r, Err(StateError::SaveFailed(_))));
530
+ assert_eq!(read_state(&ws), original, "restore 成功:原 state 复位到 state.json");
531
+ assert_eq!(count_event(&ws, "runtime.state.self_heal_restore_failed"), 0);
532
+ let failed = get_event(&ws, "runtime.state.save_failed");
533
+ assert_eq!(failed["phase"], json!("save_runtime_state"));
534
+ assert_eq!(failed["retries_used"], json!(3));
535
+ }
536
+
537
+ // 崩溃安全不变量②:self-heal 失败且 restore 也失败 → 原 state 在 .bak 里完好可恢复 + restore_failed 事件。
538
+ #[test]
539
+ fn self_heal_restore_failed_leaves_original_in_backup() {
540
+ let ws = temp_ws();
541
+ let original = json!({"keep":"original"});
542
+ save_runtime_state(&ws, &original).unwrap();
543
+ let e = libc::EACCES;
544
+ set_fault_plan(&[e, e, e, e, 0, e, e]); // idx4=backup ok, idx5=heal fail, idx6=restore fail
545
+ let r = save_runtime_state(&ws, &json!({"keep":"new"}));
546
+ clear_fault();
547
+ assert!(matches!(r, Err(StateError::SaveFailed(_))));
548
+ assert_eq!(count_event(&ws, "runtime.state.self_heal_restore_failed"), 1);
549
+ assert_eq!(count_event(&ws, "runtime.state.save_failed"), 1);
550
+ // state.json 已被 rename 到 backup(restore 失败),原 state 在 .bak 里完好(绝不丢失)。
551
+ let baks = bak_files(&ws);
552
+ assert_eq!(baks.len(), 1, "应有一个 .bak 存原 state");
553
+ let v: Value = serde_json::from_str(&std::fs::read_to_string(&baks[0]).unwrap()).unwrap();
554
+ assert_eq!(v, original, "原 state 经 backup 可恢复");
555
+ }
556
+
557
+ // backup FileNotFound 分支:无前置 state.json → path→backup 当 NotFound 吞,heal 仍成功,无 .bak。
558
+ #[test]
559
+ fn self_heal_with_no_prior_state_swallows_backup_notfound() {
560
+ let ws = temp_ws();
561
+ let e = libc::EACCES;
562
+ set_fault_plan(&[e, e, e, e]); // 仅 loop 失败;heal 的 path→backup(真 NotFound,未注入)+ heal→path 成功
563
+ save_runtime_state(&ws, &json!({"fresh":true})).unwrap();
564
+ clear_fault();
565
+ assert_eq!(read_state(&ws), json!({"fresh":true}));
566
+ assert_eq!(count_event(&ws, "runtime.state.self_healed"), 1);
567
+ assert!(bak_files(&ws).is_empty(), "原 state 不存在 → 不应产生 .bak");
568
+ }
569
+
570
+ // non-ASCII 字节透传(ensure_ascii=False):中文/emoji 字面写入,不转 \\uXXXX。
571
+ #[test]
572
+ fn non_ascii_values_pass_through_literally() {
573
+ let ws = temp_ws();
574
+ save_runtime_state(&ws, &json!({"objective":"héllo🦀 世界","x":1})).unwrap();
575
+ let bytes = std::fs::read_to_string(runtime_state_path(&ws)).unwrap();
576
+ assert!(bytes.contains("héllo🦀 世界"), "非 ASCII 应字面保留");
577
+ assert!(!bytes.contains("\\u"), "不应 ascii-escape");
578
+ }
579
+
580
+ // deep-equal 命中必须在**取锁之前**返回:持锁时对相同 state 再 save 应立即 Ok(不撞锁 timeout)。
581
+ #[test]
582
+ fn deep_equal_save_returns_before_lock() {
583
+ let ws = temp_ws();
584
+ let state = json!({"v":1});
585
+ save_runtime_state(&ws, &state).unwrap(); // 填充缓存
586
+ let _held = RuntimeLock::acquire(&ws, "state-save", 2.0).unwrap(); // 占锁
587
+ // 若 deep-equal 不早返回,会去抢已被占的锁 → 2s timeout → Locked。Ok 即证早返回。
588
+ assert!(save_runtime_state(&ws, &state).is_ok(), "deep-equal 应在取锁前返回");
589
+ }
590
+
591
+ // 并发全流程 save(非仅 lock acquire):多线程存不同 state → 全 Ok + 最终文件合法 JSON + 无 tmp 残留。
592
+ #[test]
593
+ fn concurrent_full_flow_saves_serialize_without_corruption() {
594
+ let ws = temp_ws();
595
+ std::fs::create_dir_all(runtime_dir(&ws)).unwrap();
596
+ let handles: Vec<_> = (0..6)
597
+ .map(|t| {
598
+ let w = ws.clone();
599
+ std::thread::spawn(move || save_runtime_state(&w, &json!({ "t": t })))
600
+ })
601
+ .collect();
602
+ for h in handles {
603
+ h.join().unwrap().unwrap(); // 每个线程 save 都 Ok
604
+ }
605
+ let v = read_state(&ws); // 最终文件是合法 JSON(某个线程的 state)
606
+ assert!(v["t"].is_number());
607
+ let residue: Vec<_> = std::fs::read_dir(runtime_dir(&ws))
608
+ .unwrap()
609
+ .filter_map(std::result::Result::ok)
610
+ .filter(|e| {
611
+ let n = e.file_name().to_string_lossy().into_owned();
612
+ n.ends_with(".tmp") || n.contains(".heal.")
613
+ })
614
+ .collect();
615
+ assert!(residue.is_empty(), "无 tmp/heal 残留:{residue:?}");
616
+ }
617
+
618
+ // 锁串行化:持锁时第二个 acquire 在 timeout 内拿不到 → Locked。
619
+ #[test]
620
+ fn runtime_lock_serializes() {
621
+ let ws = temp_ws();
622
+ std::fs::create_dir_all(runtime_dir(&ws)).unwrap();
623
+ let held = RuntimeLock::acquire(&ws, "state-save", 2.0).unwrap();
624
+ // 另一线程在短 timeout 内尝试 → 应 Locked(flock 进程内/跨 fd 互斥)。
625
+ let ws2 = ws.clone();
626
+ let r = std::thread::spawn(move || RuntimeLock::acquire(&ws2, "state-save", 0.2)).join().unwrap();
627
+ assert!(matches!(r, Err(StateError::Locked(_))), "持锁时第二者应 Locked");
628
+ drop(held);
629
+ }
630
+
631
+ // ---- load 迁移(state.py:45/73/55)----
632
+
633
+ #[test]
634
+ fn normalize_agent_session_state_fills_fields_in_order() {
635
+ let mut state = json!({"agents": {"w1": {"session_id": "keep"}}});
636
+ normalize_agent_session_state(&mut state);
637
+ let expected = json!({"agents": {"w1": {
638
+ "session_id": "keep", "rollout_path": null, "captured_at": null,
639
+ "captured_via": null, "attribution_confidence": null, "spawn_cwd": null,
640
+ }}});
641
+ assert_eq!(serde_json::to_string(&state).unwrap(), serde_json::to_string(&expected).unwrap());
642
+ }
643
+
644
+ #[test]
645
+ fn migrate_active_team_key_branches() {
646
+ // session_name 存在 → seed = team_state_key。
647
+ let mut m1 = json!({"session_name": "s", "team_dir": "/w/.team/tk", "teams": {}});
648
+ assert!(migrate_active_team_key(&mut m1));
649
+ assert_eq!(m1["active_team_key"], json!("tk"));
650
+ // 单 team → 取唯一 key。
651
+ let mut m2 = json!({"teams": {"only": {}}});
652
+ assert!(migrate_active_team_key(&mut m2));
653
+ assert_eq!(m2["active_team_key"], json!("only"));
654
+ // 多 team 无 session → None。
655
+ let mut m3 = json!({"teams": {"a": {}, "b": {}}});
656
+ assert!(migrate_active_team_key(&mut m3));
657
+ assert_eq!(m3["active_team_key"], json!(null));
658
+ // 已有 active_team_key → 不迁移。
659
+ let mut m4 = json!({"active_team_key": "x"});
660
+ assert!(!migrate_active_team_key(&mut m4));
661
+ }
662
+
663
+ #[test]
664
+ fn load_runtime_state_missing_returns_default() {
665
+ let ws = temp_ws();
666
+ let s = load_runtime_state(&ws).unwrap();
667
+ assert_eq!(s, json!({"agents": {}, "tasks": [], "session_name": null, "active_team_key": null}));
668
+ }
669
+
670
+ #[test]
671
+ fn load_runtime_state_migrates_and_persists() {
672
+ // legacy state:有 session_name、无 active_team_key、agent 缺 session 字段、team_owner 缺 uuid。
673
+ let ws = temp_ws();
674
+ std::fs::create_dir_all(runtime_dir(&ws)).unwrap();
675
+ let legacy = json!({
676
+ "session_name": "sess",
677
+ "team_dir": "/w/.team/tk",
678
+ "agents": {"w1": {"agent_id": "w1"}},
679
+ "team_owner": {"pane_id": "%1", "machine_fingerprint": "fp"},
680
+ });
681
+ std::fs::write(runtime_state_path(&ws), serde_json::to_string(&legacy).unwrap()).unwrap();
682
+ let s = load_runtime_state(&ws).unwrap();
683
+ // active_team_key seed = team_state_key = "tk"。
684
+ assert_eq!(s["active_team_key"], json!("tk"));
685
+ // agent session 字段补 None。
686
+ assert_eq!(s["agents"]["w1"]["spawn_cwd"], json!(null));
687
+ // team_owner 补 leader_session_uuid。
688
+ assert_eq!(s["team_owner"]["leader_session_uuid"].as_str().unwrap().len(), 32);
689
+ // 迁移已回写磁盘(再 load 不再变;active_team_key 已在)。
690
+ let on_disk = read_state(&ws);
691
+ assert_eq!(on_disk["active_team_key"], json!("tk"));
692
+ assert_eq!(on_disk["team_owner"]["leader_session_uuid"], s["team_owner"]["leader_session_uuid"]);
693
+ }
694
+
695
+ // 对抗 P1:legacy 文件**已有 active_team_key** 但缺 leader_session_uuid。load 内存补 uuid,
696
+ // 但 save 对 on-disk existing 同样 normalize+migrate 后发现等价 → **不重写**,磁盘字节恒等。
697
+ // (此前 save 比 raw existing 漏了这两步 → spurious 重写成 pretty 形,破坏 load+save 字节恒等。)
698
+ #[test]
699
+ fn load_does_not_rewrite_already_migrated_equivalent_legacy() {
700
+ let ws = temp_ws();
701
+ std::fs::create_dir_all(runtime_dir(&ws)).unwrap();
702
+ let legacy = r#"{"active_team_key": "preset", "session_name": "sess", "team_owner": {"pane_id": "%1"}}"#;
703
+ std::fs::write(runtime_state_path(&ws), legacy).unwrap();
704
+ let before = std::fs::read_to_string(runtime_state_path(&ws)).unwrap();
705
+ let loaded = load_runtime_state(&ws).unwrap();
706
+ // 内存态补了 uuid(证明确实需要迁移)。
707
+ assert_eq!(loaded["team_owner"]["leader_session_uuid"].as_str().unwrap().len(), 32);
708
+ // 但磁盘未被重写(字节恒等)。
709
+ let after = std::fs::read_to_string(runtime_state_path(&ws)).unwrap();
710
+ assert_eq!(after, before, "已是迁移等价形的 legacy 文件不得 spurious 重写");
711
+ }
712
+ }