@team-agent/installer 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/Cargo.lock +34 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/Cargo.toml +1 -1
  4. package/crates/team-agent/src/cli/adapters.rs +196 -19
  5. package/crates/team-agent/src/cli/diagnose.rs +144 -10
  6. package/crates/team-agent/src/cli/emit.rs +286 -52
  7. package/crates/team-agent/src/cli/leader.rs +37 -8
  8. package/crates/team-agent/src/cli/mod.rs +799 -316
  9. package/crates/team-agent/src/cli/status_port.rs +25 -2
  10. package/crates/team-agent/src/cli/tests/divergence.rs +1 -2
  11. package/crates/team-agent/src/cli/tests/lane_c.rs +23 -13
  12. package/crates/team-agent/src/cli/tests/main_preserved.rs +2 -0
  13. package/crates/team-agent/src/cli/tests/run_delegation.rs +57 -3
  14. package/crates/team-agent/src/cli/types.rs +17 -0
  15. package/crates/team-agent/src/compiler.rs +15 -5
  16. package/crates/team-agent/src/coordinator/health.rs +89 -20
  17. package/crates/team-agent/src/coordinator/mod.rs +4 -0
  18. package/crates/team-agent/src/coordinator/runtime_detectors.rs +500 -0
  19. package/crates/team-agent/src/coordinator/runtime_observation.rs +58 -0
  20. package/crates/team-agent/src/coordinator/tick.rs +222 -69
  21. package/crates/team-agent/src/coordinator/types.rs +15 -3
  22. package/crates/team-agent/src/db/schema.rs +37 -2
  23. package/crates/team-agent/src/diagnose/comms.rs +226 -0
  24. package/crates/team-agent/src/diagnose/mod.rs +45 -0
  25. package/crates/team-agent/src/diagnose/orphans.rs +658 -0
  26. package/crates/team-agent/src/fake_worker.rs +146 -3
  27. package/crates/team-agent/src/leader/start.rs +121 -23
  28. package/crates/team-agent/src/leader/types.rs +44 -1
  29. package/crates/team-agent/src/lib.rs +3 -0
  30. package/crates/team-agent/src/lifecycle/display.rs +645 -47
  31. package/crates/team-agent/src/lifecycle/launch.rs +818 -116
  32. package/crates/team-agent/src/lifecycle/mod.rs +2 -0
  33. package/crates/team-agent/src/lifecycle/profile_launch.rs +810 -0
  34. package/crates/team-agent/src/lifecycle/profile_smoke.rs +522 -0
  35. package/crates/team-agent/src/lifecycle/restart/agent.rs +99 -23
  36. package/crates/team-agent/src/lifecycle/restart/common.rs +177 -83
  37. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +443 -9
  38. package/crates/team-agent/src/lifecycle/restart/remove.rs +22 -6
  39. package/crates/team-agent/src/lifecycle/restart/team_state.rs +19 -0
  40. package/crates/team-agent/src/lifecycle/restart.rs +4 -1
  41. package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +5 -5
  42. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +37 -7
  43. package/crates/team-agent/src/lifecycle/types.rs +19 -0
  44. package/crates/team-agent/src/mcp_server/helpers.rs +1 -0
  45. package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +341 -0
  46. package/crates/team-agent/src/mcp_server/lifecycle_tools/mod.rs +10 -0
  47. package/crates/team-agent/src/mcp_server/lifecycle_tools/state_status.rs +158 -0
  48. package/crates/team-agent/src/mcp_server/mod.rs +3 -74
  49. package/crates/team-agent/src/mcp_server/tests/scoped.rs +1 -1
  50. package/crates/team-agent/src/mcp_server/tests/send.rs +6 -5
  51. package/crates/team-agent/src/mcp_server/tools.rs +312 -111
  52. package/crates/team-agent/src/mcp_server/types.rs +6 -4
  53. package/crates/team-agent/src/mcp_server/wire.rs +19 -7
  54. package/crates/team-agent/src/message_store.rs +21 -4
  55. package/crates/team-agent/src/messaging/delivery.rs +87 -37
  56. package/crates/team-agent/src/messaging/mod.rs +9 -6
  57. package/crates/team-agent/src/messaging/results.rs +153 -16
  58. package/crates/team-agent/src/messaging/selftest.rs +199 -12
  59. package/crates/team-agent/src/messaging/send.rs +35 -3
  60. package/crates/team-agent/src/messaging/tests/runtime.rs +19 -4
  61. package/crates/team-agent/src/messaging/types.rs +11 -3
  62. package/crates/team-agent/src/os_probe.rs +119 -0
  63. package/crates/team-agent/src/packaging/migrate.rs +10 -2
  64. package/crates/team-agent/src/packaging/tests.rs +23 -0
  65. package/crates/team-agent/src/provider/adapter.rs +483 -67
  66. package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +1 -7
  67. package/crates/team-agent/src/provider/classify.rs +51 -4
  68. package/crates/team-agent/src/provider/startup_prompt.rs +94 -0
  69. package/crates/team-agent/src/provider/types.rs +47 -0
  70. package/crates/team-agent/src/session_capture.rs +616 -0
  71. package/crates/team-agent/src/state/persist.rs +57 -0
  72. package/crates/team-agent/src/state/projection.rs +32 -23
  73. package/crates/team-agent/src/state/selector.rs +5 -2
  74. package/crates/team-agent/src/tmux_backend.rs +97 -60
  75. package/crates/team-agent/src/transport/test_support.rs +9 -0
  76. package/crates/team-agent/src/transport/tests/wire.rs +4 -0
  77. package/crates/team-agent/src/transport.rs +13 -2
  78. package/package.json +4 -4
@@ -24,7 +24,15 @@
24
24
  //! 所有 fn body = `unimplemented!("step14b port: ...")`。RED 契约据此 NAME 类型 + CALL 真 fn。
25
25
 
26
26
  // ROUND-0 skeleton:fn body 全 unimplemented!() → import/field/param/大 Err 暂未落地;P2 porter 实现时移除。
27
- #![allow(dead_code, unused_imports, unused_variables, clippy::result_large_err, clippy::doc_overindented_list_items, clippy::doc_lazy_continuation, clippy::io_other_error)]
27
+ #![allow(
28
+ dead_code,
29
+ unused_imports,
30
+ unused_variables,
31
+ clippy::result_large_err,
32
+ clippy::doc_overindented_list_items,
33
+ clippy::doc_lazy_continuation,
34
+ clippy::io_other_error
35
+ )]
28
36
  // §10:CLI 命令实现层禁 unwrap/expect/panic(unimplemented!() stub 不被拦);tests 子模块各自 allow。
29
37
  #![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
30
38
 
@@ -36,10 +44,10 @@ use serde_json::{json, Map, Value};
36
44
  use thiserror::Error;
37
45
 
38
46
  // REUSE in-tree(只 import,不 redefine):
39
- use crate::model::ids::{TaskId, TeamKey};
40
47
  use crate::messaging::{self, AlertType, MessageTarget, SendOptions};
48
+ use crate::model::ids::{TaskId, TeamKey};
41
49
 
42
- pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency. Does NOT perform live runtime message round-trip. comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)";
50
+ pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency and zero-token comms contracts. Does NOT perform live runtime message round-trip. (zero token, zero pollution)";
43
51
 
44
52
  pub mod adapters;
45
53
  pub mod diagnose;
@@ -60,6 +68,23 @@ pub use send::*;
60
68
  pub use status::*;
61
69
  pub use types::*;
62
70
 
71
+ /// Public `attach-leader` CLI handler. It consumes the typed pane/provider args and
72
+ /// writes/returns a `leader_receiver` binding via the leader lease port.
73
+ pub fn cmd_attach_leader(args: &AttachLeaderArgs) -> Result<CmdResult, CliError> {
74
+ let mut value = leader_port::attach_leader(
75
+ &args.workspace,
76
+ args.team.as_deref(),
77
+ args.pane.as_ref(),
78
+ args.provider,
79
+ args.confirm,
80
+ )?;
81
+ if let Some(obj) = value.as_object_mut() {
82
+ obj.entry("leader_receiver".to_string())
83
+ .or_insert(Value::Null);
84
+ }
85
+ Ok(CmdResult::from_json(value, args.json))
86
+ }
87
+
63
88
  pub(crate) use helpers::*;
64
89
 
65
90
  #[cfg(test)]
@@ -75,7 +100,6 @@ mod tests;
75
100
  /// `cmd_inbox` 委派的只读投影面。返回 serde `Value`(稳定 JSON 形状由 status lane 拥有)。
76
101
  pub mod status_port;
77
102
 
78
-
79
103
  /// PLACEHOLDER → step13 lifecycle(`runtime.{quick_start,start_agent,add_agent,fork_agent,
80
104
  /// remove_agent,start_agent,stop_agent,reset_agent,restart,shutdown,start_leader,acknowledge_idle}`)。
81
105
  /// `quick_start.py` 物理在本子系统但实现属 step 13(card)。本层只声明委派面。
@@ -92,7 +116,9 @@ pub mod lifecycle_port {
92
116
  yes: bool,
93
117
  fresh: bool,
94
118
  ) -> Result<Value, CliError> {
95
- match crate::lifecycle::quick_start_in_workspace(workspace, agents_dir, name, yes, fresh, team_id) {
119
+ match crate::lifecycle::quick_start_in_workspace(
120
+ workspace, agents_dir, name, yes, fresh, team_id,
121
+ ) {
96
122
  Ok(report) => Ok(quick_start_value(report)),
97
123
  Err(e) => Ok(error_value(e)),
98
124
  }
@@ -104,19 +130,37 @@ pub mod lifecycle_port {
104
130
  cwd: &Path,
105
131
  attach: &LeaderLauncherArgs,
106
132
  ) -> Result<Value, CliError> {
107
- let _ = (provider_args, cwd);
108
- let provider_name = match provider {
109
- Provider::Codex => "codex",
110
- Provider::ClaudeCode | Provider::Claude => "claude_code",
111
- Provider::GeminiCli => "gemini_cli",
112
- Provider::Fake => "fake",
133
+ let attach_session = attach
134
+ .attach_session
135
+ .as_ref()
136
+ .map(|name| crate::transport::SessionName::new(name.clone()));
137
+ let plan = crate::leader::start::leader_start_plan(
138
+ provider,
139
+ provider_args,
140
+ cwd,
141
+ attach.attach_existing,
142
+ attach.confirm_attach,
143
+ attach_session.as_ref(),
144
+ )
145
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
146
+ let outcome = crate::leader::start::execute_leader_plan(&plan, cwd)
147
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
148
+ let ok = match outcome.status {
149
+ crate::leader::LeaderLaunchStatus::Exited => outcome.exit_code == Some(0),
150
+ crate::leader::LeaderLaunchStatus::Detached => true,
151
+ crate::leader::LeaderLaunchStatus::NotStarted => false,
113
152
  };
114
153
  Ok(json!({
115
- "ok": true,
116
- "provider": provider_name,
154
+ "ok": ok,
155
+ "provider": provider,
156
+ "mode": plan.mode,
157
+ "status": outcome.status,
158
+ "exit_code": outcome.exit_code,
159
+ "reason": outcome.reason,
117
160
  "attach_existing": attach.attach_existing,
118
161
  "confirm_attach": attach.confirm_attach,
119
162
  "attach_session": attach.attach_session,
163
+ "session_name": plan.session_name.as_ref().map(|session| session.as_str().to_string()),
120
164
  }))
121
165
  }
122
166
  /// `runtime.shutdown`(`cmd_shutdown`)。
@@ -124,23 +168,13 @@ pub mod lifecycle_port {
124
168
  let run_ws = crate::model::paths::canonical_run_workspace(workspace)
125
169
  .map_err(|e| CliError::Runtime(e.to_string()))?;
126
170
  let state = shutdown_state_for_team(&run_ws, team)?;
127
- let endpoint = stored_tmux_endpoint(&state);
128
- let transport = match endpoint {
129
- Some(endpoint) if Path::new(endpoint).is_absolute() => {
130
- crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
131
- }
132
- Some(endpoint) if !endpoint.is_empty() => {
133
- crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
134
- }
135
- _ => shutdown_workspace_transport(&run_ws),
171
+ let transport = if let Some(endpoint) = legacy_worker_tmux_endpoint(&state) {
172
+ crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
173
+ } else {
174
+ shutdown_workspace_transport(&run_ws)
136
175
  };
137
- let result = shutdown_with_transport_and_state(
138
- workspace,
139
- keep_logs,
140
- team,
141
- &transport,
142
- Some(state),
143
- );
176
+ let result =
177
+ shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
144
178
  if team.is_none() {
145
179
  transport.kill_server();
146
180
  }
@@ -163,47 +197,63 @@ pub mod lifecycle_port {
163
197
  transport: &dyn crate::transport::Transport,
164
198
  state: Option<Value>,
165
199
  ) -> Result<Value, CliError> {
200
+ crate::os_probe::clear_probe_timeout();
201
+ let deadline = ShutdownDeadline::new(std::time::Duration::from_secs(20));
166
202
  let run_workspace = crate::model::paths::canonical_run_workspace(workspace)
167
203
  .map_err(|e| CliError::Runtime(e.to_string()))?;
168
- let stopped = if team.is_none() {
169
- let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
170
- Some(
171
- crate::coordinator::stop_coordinator(&wp)
172
- .map_err(|e| CliError::Runtime(e.to_string()))?,
204
+ let _started_event = crate::event_log::EventLog::new(&run_workspace)
205
+ .write(
206
+ "lifecycle.shutdown.started",
207
+ json!({
208
+ "keep_logs": keep_logs,
209
+ "team": team,
210
+ }),
173
211
  )
174
- } else {
175
- None
176
- };
212
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
177
213
  let mut state = match state {
178
214
  Some(state) => state,
179
215
  None => shutdown_state_for_team(&run_workspace, team)?,
180
216
  };
181
- let stored_transport = stored_tmux_endpoint(&state).map(tmux_transport_for_endpoint);
182
- let transport = stored_transport
183
- .as_ref()
184
- .map(|transport| transport as &dyn crate::transport::Transport)
185
- .unwrap_or(transport);
186
- let captured_missing_sessions = crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
187
- .map_err(|e| CliError::Runtime(e.to_string()))?;
217
+ deadline.check("refresh_provider_sessions")?;
218
+ let captured_missing_sessions =
219
+ crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
220
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
188
221
  let session_name = state
189
222
  .get("session_name")
190
223
  .and_then(Value::as_str)
191
224
  .filter(|s| !s.is_empty())
192
225
  .map(crate::transport::SessionName::new);
193
- let mut root_pids = state_process_roots(&state);
226
+ let protected = shutdown_protection_set();
227
+ let reap_scope = if team.is_some() {
228
+ ShutdownReapScope::ScopedTeam
229
+ } else {
230
+ ShutdownReapScope::Workspace
231
+ };
232
+ deadline.check("process_roots")?;
233
+ let mut root_pids = state_process_roots(&state, reap_scope)
234
+ .into_iter()
235
+ .filter(|pid| !protected.contains_pid(*pid))
236
+ .collect::<Vec<_>>();
194
237
  let pane_pids = session_name
195
238
  .as_ref()
196
- .map(|session| pane_pids_for_session(transport, session))
239
+ .map(|session| {
240
+ pane_pids_for_session(transport, session)
241
+ .into_iter()
242
+ .filter(|pid| !protected.contains_pid(*pid))
243
+ .collect::<Vec<_>>()
244
+ })
197
245
  .unwrap_or_default();
198
246
  root_pids.extend(pane_pids);
199
247
  root_pids.sort_unstable();
200
248
  root_pids.dedup();
201
- let root_pgids = process_pgids(&root_pids);
249
+ let root_pgids = process_pgids(&root_pids, &protected);
250
+ deadline.check("reap_process_tree")?;
202
251
  for pid in &root_pids {
203
- reap_process_tree(*pid);
252
+ reap_process_tree(*pid, &protected);
204
253
  }
205
- reap_process_groups(&root_pgids);
254
+ reap_process_groups(&root_pgids, &protected);
206
255
  let mut kill_error: Option<String> = None;
256
+ deadline.check("kill_session")?;
207
257
  if let Some(session) = session_name.as_ref() {
208
258
  if let Err(error) = transport.kill_session(session) {
209
259
  if !tmux_absent_error(&error.to_string()) {
@@ -211,7 +261,16 @@ pub mod lifecycle_port {
211
261
  }
212
262
  }
213
263
  }
214
- reap_workspace_process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
264
+ deadline.check("reap_workspace_residuals")?;
265
+ reap_workspace_process_residuals(
266
+ &run_workspace,
267
+ &state,
268
+ &root_pids,
269
+ &root_pgids,
270
+ &protected,
271
+ reap_scope,
272
+ );
273
+ deadline.check("session_residuals")?;
215
274
  let session_residuals = if let Some(session) = session_name.as_ref() {
216
275
  let (residuals, error) = session_residuals_after_reap(
217
276
  transport,
@@ -226,33 +285,92 @@ pub mod lifecycle_port {
226
285
  } else {
227
286
  Vec::new()
228
287
  };
229
- let process_residuals = process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
288
+ deadline.check("process_residuals")?;
289
+ let process_residuals = process_residuals(
290
+ &run_workspace,
291
+ &state,
292
+ &root_pids,
293
+ &root_pgids,
294
+ &protected,
295
+ reap_scope,
296
+ );
297
+ deadline.check("stop_coordinator")?;
298
+ let mut coordinator_timeout = false;
299
+ let stopped = if team.is_none() {
300
+ let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
301
+ match stop_coordinator_bounded(wp, std::time::Duration::from_millis(900)) {
302
+ Some(Ok(report)) => Some(report),
303
+ Some(Err(error)) => {
304
+ kill_error.get_or_insert(error);
305
+ None
306
+ }
307
+ None => {
308
+ coordinator_timeout = true;
309
+ None
310
+ }
311
+ }
312
+ } else {
313
+ None
314
+ };
315
+ let probe_timeout = crate::os_probe::probe_timeout();
316
+ let verification_degraded = probe_timeout.is_some();
230
317
  let session_killed = session_name.is_some()
231
318
  && kill_error.is_none()
232
319
  && session_residuals.is_empty()
233
320
  && process_residuals.is_empty();
234
321
  mark_agents_stopped(&mut state);
322
+ deadline.check("save_state")?;
235
323
  if team.is_some() {
236
324
  crate::state::projection::save_team_scoped_state(&run_workspace, &state)?;
325
+ promote_live_sibling_after_scoped_shutdown(&run_workspace, &state)?;
237
326
  } else {
327
+ let _changed_keys =
328
+ mark_matching_session_teams_stopped(&mut state, session_name.as_ref());
238
329
  crate::state::persist::save_runtime_state(&run_workspace, &state)?;
239
330
  }
240
- let coordinator_status = stopped
331
+ let coordinator_status = if coordinator_timeout {
332
+ "timeout"
333
+ } else {
334
+ stopped
335
+ .as_ref()
336
+ .map(|stopped| stop_status_wire(stopped.status))
337
+ .unwrap_or("not_stopped")
338
+ };
339
+ let coordinator_pid = stopped
241
340
  .as_ref()
242
- .map(|stopped| stop_status_wire(stopped.status))
243
- .unwrap_or("not_stopped");
244
- let coordinator_pid = stopped.as_ref().and_then(|stopped| stopped.pid.map(|p| p.get()));
341
+ .and_then(|stopped| stopped.pid.map(|p| p.get()));
245
342
  let ok = stopped.as_ref().map(|stopped| stopped.ok).unwrap_or(true)
246
343
  && kill_error.is_none()
247
344
  && session_residuals.is_empty()
248
- && process_residuals.is_empty();
345
+ && process_residuals.is_empty()
346
+ && !verification_degraded
347
+ && !coordinator_timeout;
249
348
  let status = if ok {
250
349
  "ok"
350
+ } else if coordinator_timeout {
351
+ "timeout"
352
+ } else if verification_degraded {
353
+ "partial"
251
354
  } else if kill_error.is_some() {
252
355
  "failed"
253
356
  } else {
254
357
  "partial"
255
358
  };
359
+ let phase = if coordinator_timeout {
360
+ Some("stop_coordinator")
361
+ } else if verification_degraded {
362
+ Some("os_probe")
363
+ } else {
364
+ None
365
+ };
366
+ let probe_timeout_kind = probe_timeout.as_ref().map(|timeout| timeout.probe);
367
+ let probe_timeout_value = probe_timeout.as_ref().map(|timeout| {
368
+ json!({
369
+ "probe": timeout.probe,
370
+ "pid": timeout.pid,
371
+ "timeout_ms": timeout.timeout_ms,
372
+ })
373
+ });
256
374
  let _event = crate::event_log::EventLog::new(&run_workspace)
257
375
  .write(
258
376
  "lifecycle.shutdown",
@@ -263,12 +381,20 @@ pub mod lifecycle_port {
263
381
  "session_killed": session_killed,
264
382
  "coordinator_status": coordinator_status,
265
383
  "status": status,
384
+ "phase": phase,
385
+ "verification_degraded": verification_degraded,
386
+ "probe_timeout_kind": probe_timeout_kind,
387
+ "probe_timeout": probe_timeout_value,
266
388
  }),
267
389
  )
268
390
  .map_err(|e| CliError::Runtime(e.to_string()))?;
269
391
  Ok(json!({
270
392
  "ok": ok,
271
393
  "status": status,
394
+ "phase": phase,
395
+ "verification_degraded": verification_degraded,
396
+ "probe_timeout_kind": probe_timeout_kind,
397
+ "probe_timeout": probe_timeout_value,
272
398
  "keep_logs": keep_logs,
273
399
  "team": team,
274
400
  "session_name": session_name.map(|s| s.as_str().to_string()),
@@ -285,9 +411,51 @@ pub mod lifecycle_port {
285
411
  }))
286
412
  }
287
413
 
414
+ fn stop_coordinator_bounded(
415
+ workspace: crate::coordinator::WorkspacePath,
416
+ timeout: std::time::Duration,
417
+ ) -> Option<Result<crate::coordinator::types::StopReport, String>> {
418
+ let (tx, rx) = std::sync::mpsc::channel();
419
+ std::thread::spawn(move || {
420
+ let result =
421
+ crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
422
+ let _ = tx.send(result);
423
+ });
424
+ rx.recv_timeout(timeout).ok()
425
+ }
426
+
427
+ struct ShutdownDeadline {
428
+ start: std::time::Instant,
429
+ timeout: std::time::Duration,
430
+ }
431
+
432
+ impl ShutdownDeadline {
433
+ fn new(timeout: std::time::Duration) -> Self {
434
+ Self {
435
+ start: std::time::Instant::now(),
436
+ timeout,
437
+ }
438
+ }
439
+
440
+ fn check(&self, phase: &'static str) -> Result<(), CliError> {
441
+ if self.start.elapsed() >= self.timeout {
442
+ return Err(CliError::Runtime(
443
+ json!({
444
+ "ok": false,
445
+ "status": "timeout",
446
+ "phase": phase,
447
+ })
448
+ .to_string(),
449
+ ));
450
+ }
451
+ Ok(())
452
+ }
453
+ }
454
+
288
455
  fn shutdown_state_for_team(workspace: &Path, team: Option<&str>) -> Result<Value, CliError> {
289
456
  if let Some(team) = team {
290
- crate::state::projection::select_runtime_state(workspace, Some(team)).map_err(CliError::from)
457
+ crate::state::projection::select_runtime_state(workspace, Some(team))
458
+ .map_err(CliError::from)
291
459
  } else {
292
460
  crate::state::persist::load_runtime_state(workspace).map_err(CliError::from)
293
461
  }
@@ -297,46 +465,11 @@ pub mod lifecycle_port {
297
465
  crate::tmux_backend::TmuxBackend::for_workspace(workspace)
298
466
  }
299
467
 
300
- fn tmux_transport_for_endpoint(endpoint: &str) -> crate::tmux_backend::TmuxBackend {
301
- if Path::new(endpoint).is_absolute() {
302
- crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
303
- } else {
304
- crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
305
- }
306
- }
307
-
308
- fn stored_tmux_endpoint(state: &Value) -> Option<&str> {
309
- leader_receiver_tmux_socket(state)
310
- .or_else(|| active_team_entry(state).and_then(leader_receiver_tmux_socket))
311
- .or_else(|| only_team_entry(state).and_then(leader_receiver_tmux_socket))
312
- }
313
-
314
- fn leader_receiver_tmux_socket(state: &Value) -> Option<&str> {
468
+ fn legacy_worker_tmux_endpoint(state: &Value) -> Option<&str> {
315
469
  state
316
- .get("leader_receiver")
317
- .and_then(|receiver| receiver.get("tmux_socket"))
318
- .and_then(Value::as_str)
319
- .filter(|socket| !socket.is_empty())
320
- }
321
-
322
- fn active_team_entry(state: &Value) -> Option<&Value> {
323
- let active = state
324
- .get("active_team_key")
470
+ .get("tmux_endpoint")
325
471
  .and_then(Value::as_str)
326
- .filter(|team| !team.is_empty())?;
327
- state
328
- .get("teams")
329
- .and_then(Value::as_object)
330
- .and_then(|teams| teams.get(active))
331
- }
332
-
333
- fn only_team_entry(state: &Value) -> Option<&Value> {
334
- let teams = state.get("teams").and_then(Value::as_object)?;
335
- if teams.len() == 1 {
336
- teams.values().next()
337
- } else {
338
- None
339
- }
472
+ .filter(|endpoint| !endpoint.is_empty())
340
473
  }
341
474
 
342
475
  fn pane_pids_for_session(
@@ -399,12 +532,20 @@ pub mod lifecycle_port {
399
532
  (sessions, error)
400
533
  }
401
534
 
402
- fn state_process_roots(state: &Value) -> Vec<u32> {
535
+ #[derive(Clone, Copy, Debug, Eq, PartialEq)]
536
+ enum ShutdownReapScope {
537
+ Workspace,
538
+ ScopedTeam,
539
+ }
540
+
541
+ fn state_process_roots(state: &Value, scope: ShutdownReapScope) -> Vec<u32> {
403
542
  let mut out = Vec::new();
404
543
  collect_agent_process_roots(state, &mut out);
405
- if let Some(teams) = state.get("teams").and_then(Value::as_object) {
406
- for team in teams.values() {
407
- collect_agent_process_roots(team, &mut out);
544
+ if scope == ShutdownReapScope::Workspace {
545
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
546
+ for team in teams.values() {
547
+ collect_agent_process_roots(team, &mut out);
548
+ }
408
549
  }
409
550
  }
410
551
  out.sort_unstable();
@@ -433,8 +574,11 @@ pub mod lifecycle_port {
433
574
  .filter(|pid| *pid > 0)
434
575
  }
435
576
 
436
- fn reap_process_tree(root_pid: u32) {
437
- let pids = process_tree_pids(root_pid);
577
+ fn reap_process_tree(root_pid: u32, protected: &ShutdownProtection) {
578
+ let pids = process_tree_pids(root_pid)
579
+ .into_iter()
580
+ .filter(|pid| !protected.contains_pid(*pid))
581
+ .collect::<Vec<_>>();
438
582
  for pid in pids.iter().rev() {
439
583
  send_process_signal(*pid, libc::SIGTERM);
440
584
  }
@@ -445,13 +589,12 @@ pub mod lifecycle_port {
445
589
  wait_for_processes_gone(&pids, std::time::Duration::from_secs(1));
446
590
  }
447
591
 
448
- fn reap_process_groups(pgids: &[u32]) {
449
- let current_pgid = unsafe { libc::getpgrp() };
592
+ fn reap_process_groups(pgids: &[u32], protected: &ShutdownProtection) {
450
593
  for pgid in pgids {
451
594
  let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
452
595
  continue;
453
596
  };
454
- if pgid_t <= 1 || pgid_t == current_pgid {
597
+ if pgid_t <= 1 || protected.contains_pgid(*pgid) {
455
598
  continue;
456
599
  }
457
600
  send_process_signal_group(pgid_t, libc::SIGTERM);
@@ -461,7 +604,7 @@ pub mod lifecycle_port {
461
604
  let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
462
605
  continue;
463
606
  };
464
- if pgid_t <= 1 || pgid_t == current_pgid {
607
+ if pgid_t <= 1 || protected.contains_pgid(*pgid) {
465
608
  continue;
466
609
  }
467
610
  send_process_signal_group(pgid_t, libc::SIGKILL);
@@ -473,20 +616,23 @@ pub mod lifecycle_port {
473
616
  state: &Value,
474
617
  root_pids: &[u32],
475
618
  root_pgids: &[u32],
619
+ protected: &ShutdownProtection,
620
+ scope: ShutdownReapScope,
476
621
  ) {
477
622
  for _ in 0..5 {
478
- let residuals = matched_processes(workspace, state, root_pids, root_pgids);
623
+ let residuals =
624
+ matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
479
625
  if residuals.is_empty() {
480
626
  return;
481
627
  }
482
628
  for process in &residuals {
483
- reap_process_tree(process.pid);
629
+ reap_process_tree(process.pid, protected);
484
630
  }
485
631
  let pgids = residuals
486
632
  .iter()
487
633
  .filter_map(|process| process.pgid)
488
634
  .collect::<Vec<_>>();
489
- reap_process_groups(&pgids);
635
+ reap_process_groups(&pgids, protected);
490
636
  std::thread::sleep(std::time::Duration::from_millis(100));
491
637
  }
492
638
  }
@@ -513,10 +659,11 @@ pub mod lifecycle_port {
513
659
  }
514
660
 
515
661
  fn process_parent_pairs() -> Vec<(u32, u32)> {
516
- let output = match std::process::Command::new("ps")
517
- .args(["-axo", "pid=,ppid="])
518
- .output()
519
- {
662
+ let output = match crate::os_probe::bounded_command_output_with_probe(
663
+ std::process::Command::new("ps").args(["-axo", "pid=,ppid="]),
664
+ "ps_parent",
665
+ None,
666
+ ) {
520
667
  Ok(output) if output.status.success() => output,
521
668
  _ => return Vec::new(),
522
669
  };
@@ -532,10 +679,11 @@ pub mod lifecycle_port {
532
679
  }
533
680
 
534
681
  fn process_table() -> Vec<ProcessInfo> {
535
- let output = match std::process::Command::new("ps")
536
- .args(["-axo", "pid=,ppid=,pgid=,command="])
537
- .output()
538
- {
682
+ let output = match crate::os_probe::bounded_command_output_with_probe(
683
+ std::process::Command::new("ps").args(["-axo", "pid=,ppid=,pgid=,sess=,command="]),
684
+ "ps_table",
685
+ None,
686
+ ) {
539
687
  Ok(output) if output.status.success() => output,
540
688
  _ => return Vec::new(),
541
689
  };
@@ -550,11 +698,13 @@ pub mod lifecycle_port {
550
698
  let pid = parts.next()?.parse::<u32>().ok()?;
551
699
  let ppid = parts.next()?.parse::<u32>().ok()?;
552
700
  let pgid = parts.next().and_then(|raw| raw.parse::<u32>().ok());
701
+ let session = parts.next().and_then(|raw| raw.parse::<u32>().ok());
553
702
  let command = parts.collect::<Vec<_>>().join(" ");
554
703
  Some(ProcessInfo {
555
704
  pid,
556
705
  ppid,
557
706
  pgid,
707
+ session,
558
708
  command,
559
709
  })
560
710
  }
@@ -564,9 +714,57 @@ pub mod lifecycle_port {
564
714
  pid: u32,
565
715
  ppid: u32,
566
716
  pgid: Option<u32>,
717
+ session: Option<u32>,
567
718
  command: String,
568
719
  }
569
720
 
721
+ #[derive(Clone, Debug, Default)]
722
+ struct ShutdownProtection {
723
+ pids: std::collections::BTreeSet<u32>,
724
+ pgids: std::collections::BTreeSet<u32>,
725
+ }
726
+
727
+ impl ShutdownProtection {
728
+ fn contains_pid(&self, pid: u32) -> bool {
729
+ self.pids.contains(&pid)
730
+ }
731
+
732
+ fn contains_pgid(&self, pgid: u32) -> bool {
733
+ self.pgids.contains(&pgid)
734
+ }
735
+
736
+ fn contains_process(&self, process: &ProcessInfo) -> bool {
737
+ self.pids.contains(&process.pid)
738
+ || process.pgid.is_some_and(|pgid| self.pgids.contains(&pgid))
739
+ }
740
+ }
741
+
742
+ fn shutdown_protection_set() -> ShutdownProtection {
743
+ let table = process_table();
744
+ let mut protected = ShutdownProtection::default();
745
+ let current = std::process::id();
746
+ protected.pids.insert(current);
747
+ if let Ok(pgid) = u32::try_from(unsafe { libc::getpgrp() }) {
748
+ protected.pgids.insert(pgid);
749
+ }
750
+ let mut cursor = current;
751
+ let mut seen = std::collections::BTreeSet::new();
752
+ while seen.insert(cursor) {
753
+ let Some(process) = table.iter().find(|process| process.pid == cursor) else {
754
+ break;
755
+ };
756
+ protected.pids.insert(process.pid);
757
+ if let Some(pgid) = process.pgid {
758
+ protected.pgids.insert(pgid);
759
+ }
760
+ if process.ppid == 0 || process.ppid == process.pid {
761
+ break;
762
+ }
763
+ cursor = process.ppid;
764
+ }
765
+ protected
766
+ }
767
+
570
768
  fn send_process_signal(pid: u32, signal: libc::c_int) {
571
769
  let Ok(pid_t) = libc::pid_t::try_from(pid) else {
572
770
  return;
@@ -617,16 +815,15 @@ pub mod lifecycle_port {
617
815
  err.raw_os_error() == Some(libc::EPERM)
618
816
  }
619
817
 
620
- fn process_pgids(pids: &[u32]) -> Vec<u32> {
818
+ fn process_pgids(pids: &[u32], protected: &ShutdownProtection) -> Vec<u32> {
621
819
  let table = process_table();
622
- let current_pgid = unsafe { libc::getpgrp() };
623
820
  let mut pgids = pids
624
821
  .iter()
625
822
  .filter_map(|pid| table.iter().find(|process| process.pid == *pid))
626
823
  .filter_map(|process| process.pgid)
627
824
  .filter(|pgid| {
628
825
  libc::pid_t::try_from(*pgid)
629
- .map(|pgid| pgid > 1 && pgid != current_pgid)
826
+ .map(|pgid_t| pgid_t > 1 && !protected.contains_pgid(*pgid))
630
827
  .unwrap_or(false)
631
828
  })
632
829
  .collect::<Vec<_>>();
@@ -640,15 +837,22 @@ pub mod lifecycle_port {
640
837
  state: &Value,
641
838
  root_pids: &[u32],
642
839
  root_pgids: &[u32],
840
+ protected: &ShutdownProtection,
841
+ scope: ShutdownReapScope,
643
842
  ) -> Vec<Value> {
644
- let mut residuals = matched_processes(workspace, state, root_pids, root_pgids);
645
- let mut seen = residuals.iter().map(|process| process.pid).collect::<std::collections::BTreeSet<_>>();
843
+ let mut residuals =
844
+ matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
845
+ let mut seen = residuals
846
+ .iter()
847
+ .map(|process| process.pid)
848
+ .collect::<std::collections::BTreeSet<_>>();
646
849
  for pid in root_pids {
647
- if process_is_live(*pid) && seen.insert(*pid) {
850
+ if !protected.contains_pid(*pid) && process_is_live(*pid) && seen.insert(*pid) {
648
851
  residuals.push(ProcessInfo {
649
852
  pid: *pid,
650
853
  ppid: 0,
651
854
  pgid: None,
855
+ session: None,
652
856
  command: String::new(),
653
857
  });
654
858
  }
@@ -660,6 +864,7 @@ pub mod lifecycle_port {
660
864
  "pid": process.pid,
661
865
  "ppid": process.ppid,
662
866
  "pgid": process.pgid,
867
+ "session": process.session,
663
868
  "command": process.command,
664
869
  })
665
870
  })
@@ -671,25 +876,42 @@ pub mod lifecycle_port {
671
876
  state: &Value,
672
877
  root_pids: &[u32],
673
878
  root_pgids: &[u32],
879
+ protected: &ShutdownProtection,
880
+ scope: ShutdownReapScope,
674
881
  ) -> Vec<ProcessInfo> {
675
882
  let table = process_table();
676
883
  let root_tree = root_pids
677
884
  .iter()
678
885
  .flat_map(|pid| process_tree_from_table(*pid, &table))
886
+ .filter(|pid| !protected.contains_pid(*pid))
887
+ .collect::<std::collections::BTreeSet<_>>();
888
+ let root_pgids = root_pgids
889
+ .iter()
890
+ .copied()
679
891
  .collect::<std::collections::BTreeSet<_>>();
680
- let root_pgids = root_pgids.iter().copied().collect::<std::collections::BTreeSet<_>>();
681
- let spawn_cwds = state_spawn_cwds(state);
892
+ let spawn_cwds = state_spawn_cwds(state, scope);
682
893
  let workspace_text = workspace.to_string_lossy().to_string();
683
- let current_pid = std::process::id();
684
- table
685
- .into_iter()
686
- .filter(|process| process.pid != current_pid)
687
- .filter(|process| {
688
- process_matches_workspace(process, &workspace_text, &spawn_cwds)
689
- || root_tree.contains(&process.pid)
690
- || process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
691
- })
692
- .collect()
894
+ let mut cwd_probe_budget = 3_usize;
895
+ let mut out = Vec::new();
896
+ for process in table {
897
+ if protected.contains_pid(process.pid) {
898
+ continue;
899
+ }
900
+ let matches_workspace = scope == ShutdownReapScope::Workspace
901
+ && process_matches_workspace(
902
+ &process,
903
+ &workspace_text,
904
+ &spawn_cwds,
905
+ &mut cwd_probe_budget,
906
+ );
907
+ if matches_workspace
908
+ || root_tree.contains(&process.pid)
909
+ || process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
910
+ {
911
+ out.push(process);
912
+ }
913
+ }
914
+ out
693
915
  }
694
916
 
695
917
  fn process_tree_from_table(root_pid: u32, table: &[ProcessInfo]) -> Vec<u32> {
@@ -712,12 +934,14 @@ pub mod lifecycle_port {
712
934
  out
713
935
  }
714
936
 
715
- fn state_spawn_cwds(state: &Value) -> Vec<PathBuf> {
937
+ fn state_spawn_cwds(state: &Value, scope: ShutdownReapScope) -> Vec<PathBuf> {
716
938
  let mut out = Vec::new();
717
939
  collect_spawn_cwds(state, &mut out);
718
- if let Some(teams) = state.get("teams").and_then(Value::as_object) {
719
- for team in teams.values() {
720
- collect_spawn_cwds(team, &mut out);
940
+ if scope == ShutdownReapScope::Workspace {
941
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
942
+ for team in teams.values() {
943
+ collect_spawn_cwds(team, &mut out);
944
+ }
721
945
  }
722
946
  }
723
947
  out
@@ -728,7 +952,11 @@ pub mod lifecycle_port {
728
952
  return;
729
953
  };
730
954
  for agent in agents.values() {
731
- if let Some(spawn_cwd) = agent.get("spawn_cwd").and_then(Value::as_str).filter(|cwd| !cwd.is_empty()) {
955
+ if let Some(spawn_cwd) = agent
956
+ .get("spawn_cwd")
957
+ .and_then(Value::as_str)
958
+ .filter(|cwd| !cwd.is_empty())
959
+ {
732
960
  out.push(PathBuf::from(spawn_cwd));
733
961
  }
734
962
  }
@@ -738,6 +966,7 @@ pub mod lifecycle_port {
738
966
  process: &ProcessInfo,
739
967
  workspace_text: &str,
740
968
  spawn_cwds: &[PathBuf],
969
+ cwd_probe_budget: &mut usize,
741
970
  ) -> bool {
742
971
  let command = process.command.as_str();
743
972
  if command.contains("mcp-server")
@@ -746,22 +975,19 @@ pub mod lifecycle_port {
746
975
  {
747
976
  return true;
748
977
  }
749
- let lower = command.to_ascii_lowercase();
750
- let provider_like = lower.contains("codex")
751
- || lower.contains("claude")
752
- || lower.contains("node")
753
- || lower.contains("mcp-server")
754
- || lower.contains("team-agent");
755
- if !provider_like {
756
- return false;
757
- }
758
978
  if command.contains(workspace_text) {
759
979
  return true;
760
980
  }
981
+ if spawn_cwds.is_empty() || *cwd_probe_budget == 0 {
982
+ return false;
983
+ }
984
+ *cwd_probe_budget -= 1;
761
985
  let Some(cwd) = process_cwd(process.pid) else {
762
986
  return false;
763
987
  };
764
- spawn_cwds.iter().any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
988
+ spawn_cwds
989
+ .iter()
990
+ .any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
765
991
  }
766
992
 
767
993
  fn process_cwd(pid: u32) -> Option<PathBuf> {
@@ -769,10 +995,22 @@ pub mod lifecycle_port {
769
995
  if let Ok(path) = std::fs::read_link(proc_cwd) {
770
996
  return Some(path);
771
997
  }
772
- let output = std::process::Command::new("lsof")
773
- .args(["-a", "-p", &pid.to_string(), "-d", "cwd", "-Fn"])
774
- .output()
775
- .ok()?;
998
+ if crate::os_probe::probe_timed_out() {
999
+ return None;
1000
+ }
1001
+ let output = crate::os_probe::bounded_command_output_with_probe(
1002
+ std::process::Command::new("lsof").args([
1003
+ "-a",
1004
+ "-p",
1005
+ &pid.to_string(),
1006
+ "-d",
1007
+ "cwd",
1008
+ "-Fn",
1009
+ ]),
1010
+ "lsof_cwd",
1011
+ Some(pid),
1012
+ )
1013
+ .ok()?;
776
1014
  if !output.status.success() {
777
1015
  return None;
778
1016
  }
@@ -787,8 +1025,18 @@ pub mod lifecycle_port {
787
1025
  path == root || path.starts_with(root)
788
1026
  }
789
1027
  /// `runtime.restart`(`cmd_restart`)。
790
- pub fn restart(workspace: &Path, allow_fresh: bool, team: Option<&str>) -> Result<Value, CliError> {
791
- match crate::lifecycle::restart(workspace, allow_fresh, team) {
1028
+ pub fn restart(
1029
+ workspace: &Path,
1030
+ allow_fresh: bool,
1031
+ team: Option<&str>,
1032
+ session_converge_deadline_ms: Option<u64>,
1033
+ ) -> Result<Value, CliError> {
1034
+ match crate::lifecycle::restart_with_session_convergence_deadline(
1035
+ workspace,
1036
+ allow_fresh,
1037
+ team,
1038
+ session_converge_deadline_ms,
1039
+ ) {
792
1040
  Ok(report) => Ok(restart_value(report)),
793
1041
  Err(e) => Ok(error_value(e)),
794
1042
  }
@@ -811,12 +1059,18 @@ pub mod lifecycle_port {
811
1059
  allow_fresh,
812
1060
  team,
813
1061
  ) {
814
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1062
+ Ok(report) => {
1063
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1064
+ }
815
1065
  Err(e) => Ok(error_value(e)),
816
1066
  }
817
1067
  }
818
1068
  /// `runtime.stop_agent`(`cmd_stop_agent`)。
819
- pub fn stop_agent(workspace: &Path, agent: &str, team: Option<&str>) -> Result<Value, CliError> {
1069
+ pub fn stop_agent(
1070
+ workspace: &Path,
1071
+ agent: &str,
1072
+ team: Option<&str>,
1073
+ ) -> Result<Value, CliError> {
820
1074
  let agent_id = crate::model::ids::AgentId::new(agent);
821
1075
  match crate::lifecycle::stop_agent(workspace, &agent_id, team) {
822
1076
  Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "stopped": report.stopped})),
@@ -839,7 +1093,9 @@ pub mod lifecycle_port {
839
1093
  open_display,
840
1094
  team,
841
1095
  ) {
842
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1096
+ Ok(report) => {
1097
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1098
+ }
843
1099
  Err(e) => Ok(error_value(e)),
844
1100
  }
845
1101
  }
@@ -898,11 +1154,15 @@ pub mod lifecycle_port {
898
1154
  team: Option<&str>,
899
1155
  ) -> Result<Value, CliError> {
900
1156
  if !confirm {
901
- return Ok(json!({"ok": false, "agent_id": agent, "error": "remove-agent requires --confirm"}));
1157
+ return Ok(
1158
+ json!({"ok": false, "agent_id": agent, "error": "remove-agent requires --confirm"}),
1159
+ );
902
1160
  }
903
1161
  let agent_id = crate::model::ids::AgentId::new(agent);
904
1162
  match crate::lifecycle::remove_agent(workspace, &agent_id, from_spec, force, team) {
905
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1163
+ Ok(report) => {
1164
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1165
+ }
906
1166
  Err(e) => Ok(error_value(e)),
907
1167
  }
908
1168
  }
@@ -912,9 +1172,18 @@ pub mod lifecycle_port {
912
1172
  .map_err(|e| CliError::Runtime(e.to_string()))?;
913
1173
  let team = team
914
1174
  .map(ToString::to_string)
915
- .or_else(|| state.get("active_team_key").and_then(Value::as_str).map(ToString::to_string))
1175
+ .or_else(|| {
1176
+ state
1177
+ .get("active_team_key")
1178
+ .and_then(Value::as_str)
1179
+ .map(ToString::to_string)
1180
+ })
916
1181
  .filter(|s| !s.is_empty())
917
- .or_else(|| workspace.file_name().map(|name| name.to_string_lossy().to_string()))
1182
+ .or_else(|| {
1183
+ workspace
1184
+ .file_name()
1185
+ .map(|name| name.to_string_lossy().to_string())
1186
+ })
918
1187
  .unwrap_or_else(|| "current".to_string());
919
1188
  let now = chrono::Utc::now().to_rfc3339();
920
1189
  let ttl_seconds = 1800;
@@ -930,7 +1199,10 @@ pub mod lifecycle_port {
930
1199
  crate::state::persist::save_runtime_state(workspace, &state)
931
1200
  .map_err(|e| CliError::Runtime(e.to_string()))?;
932
1201
  crate::event_log::EventLog::new(workspace)
933
- .write("coordinator.idle_acknowledged", json!({"team": team, "ttl_seconds": ttl_seconds}))
1202
+ .write(
1203
+ "coordinator.idle_acknowledged",
1204
+ json!({"team": team, "ttl_seconds": ttl_seconds}),
1205
+ )
934
1206
  .map_err(|e| CliError::Runtime(e.to_string()))?;
935
1207
  Ok(json!({
936
1208
  "ok": true,
@@ -1070,6 +1342,15 @@ pub mod lifecycle_port {
1070
1342
  // The summary string + a structured `worker_readiness` block tell the
1071
1343
  // caller exactly which agents are unhealthy (Degraded) or that the
1072
1344
  // tool-set load has not been confirmed yet (PendingToolLoad).
1345
+ let incomplete_session_capture_agents =
1346
+ launch.session_capture_incomplete_agents.clone();
1347
+ let all_spawned = !launch.started.is_empty();
1348
+ let leader_receiver_attached = launch.leader_receiver_attached;
1349
+ let all_resumable_have_session = incomplete_session_capture_agents.is_empty();
1350
+ let all_workers_spawned = all_spawned;
1351
+ let attached_receiver = leader_receiver_attached;
1352
+ let all_attached_receiver = leader_receiver_attached;
1353
+ let all_resumable_agents_have_sessions = all_resumable_have_session;
1073
1354
  let (summary, ok, readiness_json) = match &worker_readiness {
1074
1355
  crate::lifecycle::QuickStartReadiness::Degraded { unhealthy_agents } => (
1075
1356
  format!(
@@ -1079,28 +1360,109 @@ pub mod lifecycle_port {
1079
1360
  ),
1080
1361
  false,
1081
1362
  json!({
1363
+ "all_spawned": all_spawned,
1364
+ "all_workers_spawned": all_workers_spawned,
1365
+ "all_attached_receiver": all_attached_receiver,
1366
+ "attached_receiver": attached_receiver,
1367
+ "leader_receiver_attached": leader_receiver_attached,
1368
+ "all_resumable_have_session": all_resumable_have_session,
1369
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1370
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1082
1371
  "state": "degraded",
1372
+ "session_capture_complete": all_resumable_have_session,
1373
+ "session_capture_incomplete": !all_resumable_have_session,
1374
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1375
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1083
1376
  "unhealthy_agents": unhealthy_agents,
1084
1377
  }),
1085
1378
  ),
1086
- crate::lifecycle::QuickStartReadiness::PendingToolLoad => (
1087
- format!(
1088
- "quick-start launched (worker tool load unverified): {}",
1089
- session_name.as_str()
1090
- ),
1091
- true,
1092
- json!({
1093
- "state": "pending_tool_load",
1094
- "reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
1095
- }),
1096
- ),
1379
+ crate::lifecycle::QuickStartReadiness::PendingToolLoad => {
1380
+ if !all_resumable_have_session {
1381
+ (
1382
+ format!(
1383
+ "quick-start pending: {}; provider session capture incomplete",
1384
+ session_name.as_str()
1385
+ ),
1386
+ false,
1387
+ json!({
1388
+ "all_spawned": all_spawned,
1389
+ "all_workers_spawned": all_workers_spawned,
1390
+ "all_attached_receiver": all_attached_receiver,
1391
+ "attached_receiver": attached_receiver,
1392
+ "leader_receiver_attached": leader_receiver_attached,
1393
+ "all_resumable_have_session": all_resumable_have_session,
1394
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1395
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1396
+ "state": "session_capture_incomplete",
1397
+ "session_capture_complete": all_resumable_have_session,
1398
+ "session_capture_incomplete": !all_resumable_have_session,
1399
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1400
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1401
+ "reason": "provider session capture is incomplete; restart is not yet resume-safe",
1402
+ }),
1403
+ )
1404
+ } else if launch.leader_receiver_attached {
1405
+ (
1406
+ format!(
1407
+ "quick-start launched (worker tool load unverified): {}",
1408
+ session_name.as_str()
1409
+ ),
1410
+ all_spawned && all_attached_receiver && all_resumable_have_session,
1411
+ json!({
1412
+ "all_spawned": all_spawned,
1413
+ "all_workers_spawned": all_workers_spawned,
1414
+ "all_attached_receiver": all_attached_receiver,
1415
+ "attached_receiver": attached_receiver,
1416
+ "leader_receiver_attached": leader_receiver_attached,
1417
+ "all_resumable_have_session": all_resumable_have_session,
1418
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1419
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1420
+ "state": "pending_tool_load",
1421
+ "session_capture_complete": all_resumable_have_session,
1422
+ "session_capture_incomplete": !all_resumable_have_session,
1423
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1424
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1425
+ "reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
1426
+ }),
1427
+ )
1428
+ } else {
1429
+ (
1430
+ format!(
1431
+ "quick-start degraded: {}; leader receiver unbound",
1432
+ session_name.as_str()
1433
+ ),
1434
+ false,
1435
+ json!({
1436
+ "all_spawned": all_spawned,
1437
+ "all_workers_spawned": all_workers_spawned,
1438
+ "all_attached_receiver": all_attached_receiver,
1439
+ "attached_receiver": attached_receiver,
1440
+ "leader_receiver_attached": leader_receiver_attached,
1441
+ "all_resumable_have_session": all_resumable_have_session,
1442
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1443
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1444
+ "state": "leader_receiver_unbound",
1445
+ "session_capture_complete": all_resumable_have_session,
1446
+ "session_capture_incomplete": !all_resumable_have_session,
1447
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1448
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1449
+ "reason": "launched team has no attached leader receiver",
1450
+ "next_action": "claim-leader",
1451
+ }),
1452
+ )
1453
+ }
1454
+ }
1097
1455
  };
1098
1456
  json!({
1099
1457
  "ok": ok,
1100
1458
  "summary": summary,
1459
+ "status": readiness_json.get("state").cloned().unwrap_or(Value::Null),
1460
+ "reason": readiness_json.get("reason").cloned().unwrap_or(Value::Null),
1461
+ "ready": readiness_json.get("ready").cloned().unwrap_or(Value::Bool(false)),
1101
1462
  "session_name": session_name.as_str(),
1102
1463
  "dry_run": launch.dry_run,
1103
1464
  "next_actions": next_actions,
1465
+ "readiness": readiness_json.clone(),
1104
1466
  "worker_readiness": readiness_json,
1105
1467
  })
1106
1468
  }
@@ -1154,6 +1516,30 @@ pub mod lifecycle_port {
1154
1516
  "error": error,
1155
1517
  "unresumable": unresumable.iter().map(|w| w.agent_id.as_str()).collect::<Vec<_>>(),
1156
1518
  }),
1519
+ crate::lifecycle::RestartReport::RefusedResumeNotReady {
1520
+ missing,
1521
+ allow_fresh,
1522
+ deadline,
1523
+ elapsed,
1524
+ error,
1525
+ } => json!({
1526
+ "ok": false,
1527
+ "kind": "resume_not_ready",
1528
+ "reason": "session_capture_incomplete",
1529
+ "status": "resume_not_ready",
1530
+ "allow_fresh": allow_fresh,
1531
+ "error": error,
1532
+ "pending_agents": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1533
+ "missing": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1534
+ "session_convergence": {
1535
+ "complete": false,
1536
+ "deadline_s": deadline.as_secs_f64(),
1537
+ "deadline_ms": deadline.as_millis(),
1538
+ "elapsed_ms": elapsed.as_millis(),
1539
+ "pending_agent_ids": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1540
+ },
1541
+ "next_action": "rerun restart after session capture completes, or pass --allow-fresh to deliberately discard missing context",
1542
+ }),
1157
1543
  crate::lifecycle::RestartReport::RefusedInvalidFirstSendAt {
1158
1544
  invalid,
1159
1545
  allow_fresh,
@@ -1196,6 +1582,75 @@ pub mod lifecycle_port {
1196
1582
  }
1197
1583
  }
1198
1584
  }
1585
+
1586
+ fn mark_matching_session_teams_stopped(
1587
+ state: &mut Value,
1588
+ session_name: Option<&crate::transport::SessionName>,
1589
+ ) -> Vec<String> {
1590
+ let Some(session_name) = session_name.map(crate::transport::SessionName::as_str) else {
1591
+ return Vec::new();
1592
+ };
1593
+ let Some(teams) = state.get_mut("teams").and_then(Value::as_object_mut) else {
1594
+ return Vec::new();
1595
+ };
1596
+ let mut out = Vec::new();
1597
+ for (key, team) in teams.iter_mut() {
1598
+ let matches = team
1599
+ .get("session_name")
1600
+ .and_then(Value::as_str)
1601
+ .is_some_and(|session| session == session_name);
1602
+ if matches {
1603
+ mark_agents_stopped(team);
1604
+ out.push(key.clone());
1605
+ }
1606
+ }
1607
+ out
1608
+ }
1609
+
1610
+ fn promote_live_sibling_after_scoped_shutdown(
1611
+ workspace: &Path,
1612
+ stopped_state: &Value,
1613
+ ) -> Result<(), CliError> {
1614
+ let stopped_key = stopped_state
1615
+ .get("active_team_key")
1616
+ .and_then(Value::as_str)
1617
+ .filter(|key| !key.is_empty());
1618
+ let Some(stopped_key) = stopped_key else {
1619
+ return Ok(());
1620
+ };
1621
+ let raw = crate::state::persist::load_runtime_state(workspace)?;
1622
+ let active = raw
1623
+ .get("active_team_key")
1624
+ .and_then(Value::as_str)
1625
+ .unwrap_or("");
1626
+ if active != stopped_key {
1627
+ return Ok(());
1628
+ }
1629
+ let Some((next_key, _)) = raw
1630
+ .get("teams")
1631
+ .and_then(Value::as_object)
1632
+ .and_then(|teams| {
1633
+ teams
1634
+ .iter()
1635
+ .find(|(key, team)| key.as_str() != stopped_key && team_has_running_agent(team))
1636
+ })
1637
+ else {
1638
+ return Ok(());
1639
+ };
1640
+ let promoted = crate::state::projection::project_top_level_view(&raw, next_key);
1641
+ crate::state::persist::save_runtime_state(workspace, &promoted)?;
1642
+ Ok(())
1643
+ }
1644
+
1645
+ fn team_has_running_agent(team: &Value) -> bool {
1646
+ team.get("agents")
1647
+ .and_then(Value::as_object)
1648
+ .is_some_and(|agents| {
1649
+ agents
1650
+ .values()
1651
+ .any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
1652
+ })
1653
+ }
1199
1654
  }
1200
1655
 
1201
1656
  /// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
@@ -1207,9 +1662,19 @@ pub mod diagnose_port {
1207
1662
 
1208
1663
  /// `runtime.doctor(spec)` + schema 注入(`cmd_doctor` 默认分支)。
1209
1664
  pub fn doctor(workspace: &Path, spec: Option<&Path>) -> Result<Value, CliError> {
1210
- let _ = spec;
1211
1665
  let tmux_path = which_path("tmux");
1212
1666
  let tmux_installed = tmux_path.is_some();
1667
+ let workspace_valid = workspace.is_dir();
1668
+ let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
1669
+ let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
1670
+ let profile_smoke = doctor_team_dir(workspace, spec)
1671
+ .map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(&team))
1672
+ .transpose()?;
1673
+ let profile_smoke_ok = profile_smoke
1674
+ .as_ref()
1675
+ .and_then(|check| check.get("ok").and_then(Value::as_bool))
1676
+ .unwrap_or(true);
1677
+ let ok = workspace_valid && (team_context || workspace_has_entries) && profile_smoke_ok;
1213
1678
  let health = crate::coordinator::coordinator_health(
1214
1679
  &crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
1215
1680
  );
@@ -1226,11 +1691,81 @@ pub mod diagnose_port {
1226
1691
  "local_module": true,
1227
1692
  },
1228
1693
  "secret_scan": secret_scan(workspace),
1694
+ "profile_smoke": profile_smoke.unwrap_or_else(|| json!({
1695
+ "name": "profile_smoke",
1696
+ "ok": true,
1697
+ "status": "not_required",
1698
+ "checks": [],
1699
+ "secret_values_printed": false,
1700
+ })),
1229
1701
  "coordinator": coordinator_health_value(health),
1230
- "ok": true,
1702
+ "ok": ok,
1703
+ "error": if ok {
1704
+ Value::Null
1705
+ } else if !profile_smoke_ok {
1706
+ json!("profile_smoke_failed")
1707
+ } else if workspace_valid {
1708
+ json!("workspace has no Team Agent spec or runtime context")
1709
+ } else {
1710
+ json!("invalid workspace")
1711
+ },
1231
1712
  }))
1232
1713
  }
1233
1714
 
1715
+ fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
1716
+ if let Some(spec) = spec {
1717
+ let candidate = if spec.is_absolute() {
1718
+ spec.to_path_buf()
1719
+ } else {
1720
+ workspace.join(spec)
1721
+ };
1722
+ if candidate.is_file() {
1723
+ return candidate.parent().map(Path::to_path_buf);
1724
+ }
1725
+ if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
1726
+ return Some(candidate);
1727
+ }
1728
+ }
1729
+ if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
1730
+ return Some(workspace.to_path_buf());
1731
+ }
1732
+ let current = workspace.join(".team").join("current");
1733
+ if current.join("team.spec.yaml").is_file() || current.join("TEAM.md").is_file() {
1734
+ return Some(current);
1735
+ }
1736
+ None
1737
+ }
1738
+
1739
+ fn has_doctor_team_context(workspace: &Path, spec: Option<&Path>) -> bool {
1740
+ if spec.is_some_and(|path| {
1741
+ let candidate = if path.is_absolute() {
1742
+ path.to_path_buf()
1743
+ } else {
1744
+ workspace.join(path)
1745
+ };
1746
+ candidate.is_file()
1747
+ }) {
1748
+ return true;
1749
+ }
1750
+ [
1751
+ workspace.join("TEAM.md"),
1752
+ workspace.join("team.spec.yaml"),
1753
+ workspace.join(".team/current/TEAM.md"),
1754
+ workspace.join(".team/current/team.spec.yaml"),
1755
+ workspace.join(".team/runtime/state.json"),
1756
+ workspace.join(".team/runtime/team.db"),
1757
+ ]
1758
+ .into_iter()
1759
+ .any(|path| path.exists())
1760
+ }
1761
+
1762
+ fn workspace_has_any_entry(workspace: &Path) -> bool {
1763
+ std::fs::read_dir(workspace)
1764
+ .ok()
1765
+ .and_then(|mut entries| entries.next())
1766
+ .is_some()
1767
+ }
1768
+
1234
1769
  fn secret_scan(workspace: &Path) -> Value {
1235
1770
  let mut findings = Vec::new();
1236
1771
  let mut scanned = 0usize;
@@ -1245,7 +1780,13 @@ pub mod diagnose_port {
1245
1780
  const SECRET_SCAN_MAX_ENTRIES: usize = 512;
1246
1781
  const SECRET_SCAN_MAX_FILE_BYTES: u64 = 128 * 1024;
1247
1782
 
1248
- fn scan_secret_dir(root: &Path, dir: &Path, depth: usize, scanned: &mut usize, findings: &mut Vec<Value>) {
1783
+ fn scan_secret_dir(
1784
+ root: &Path,
1785
+ dir: &Path,
1786
+ depth: usize,
1787
+ scanned: &mut usize,
1788
+ findings: &mut Vec<Value>,
1789
+ ) {
1249
1790
  if depth > SECRET_SCAN_MAX_DEPTH || *scanned >= SECRET_SCAN_MAX_ENTRIES {
1250
1791
  return;
1251
1792
  }
@@ -1305,143 +1846,37 @@ pub mod diagnose_port {
1305
1846
  }
1306
1847
  }
1307
1848
  /// `run_comms_selftest`(`--comms`/`--gate comms`)。**纯 state-read,零 token**(MUST-NOT-13)。
1308
- pub fn comms_selftest(workspace: &Path, team: Option<&str>, gate: Option<&str>) -> Result<Value, CliError> {
1309
- let _ = (team, gate);
1310
- let state = read_runtime_state(workspace);
1311
- let receiver = state
1312
- .get("leader_receiver")
1313
- .and_then(Value::as_object);
1314
- let owner_pane_id = state
1315
- .get("owner")
1316
- .or_else(|| state.get("team_owner"))
1317
- .and_then(|v| v.get("pane_id"))
1318
- .cloned()
1319
- .unwrap_or(Value::Null);
1320
- let caller_pane_id = std::env::var("TMUX_PANE").ok().map(Value::String).unwrap_or(Value::Null);
1321
- let pane_id = receiver
1322
- .and_then(|r| r.get("pane_id"))
1323
- .cloned()
1324
- .unwrap_or(Value::Null);
1325
- let mismatches = receiver_binding_mismatches(&owner_pane_id, &caller_pane_id, &pane_id);
1326
- let receiver_binding = json!({
1327
- "status": if mismatches.is_empty() { "pass" } else { "fail" },
1328
- "verifies": "binding_consistency",
1329
- "proof": "state_read",
1330
- "state_read_observed": true,
1331
- "pane_id": pane_id,
1332
- "owner_pane_id": owner_pane_id,
1333
- "caller_pane_id": caller_pane_id,
1334
- "mismatches": mismatches,
1335
- "configured": receiver.is_some(),
1336
- });
1337
- Ok(json!({
1338
- "ok": true,
1339
- "status": "pass",
1340
- "run_id": run_id(),
1341
- "scope": "binding_consistency",
1342
- "boundary": COMMS_BOUNDARY_TEXT,
1343
- "checks": {
1344
- "receiver_binding": receiver_binding,
1345
- "contract_suite": {
1346
- "status": "deferred",
1347
- "deferred_to": "0.2.9",
1348
- "reason": "contract test files not shipped with package",
1349
- "message": "comms contract verification deferred to 0.2.9; contract test files not shipped with package",
1350
- },
1351
- "provider_sdk_calls": {
1352
- "status": "pass",
1353
- "verifies": "no_provider_sdk_calls",
1354
- "calls": {
1355
- "anthropic": 0,
1356
- "openai": 0,
1357
- "httpx": 0,
1358
- },
1359
- },
1360
- },
1361
- }))
1362
- }
1363
-
1364
- pub(super) fn receiver_binding_mismatches(
1365
- owner_pane_id: &Value,
1366
- caller_pane_id: &Value,
1367
- pane_id: &Value,
1368
- ) -> Vec<Value> {
1369
- let mut mismatches = Vec::new();
1370
- if pane_mismatch(owner_pane_id, pane_id) {
1371
- mismatches.push(json!("owner_receiver_pane_mismatch"));
1372
- }
1373
- if pane_mismatch(caller_pane_id, owner_pane_id) {
1374
- mismatches.push(json!("caller_owner_pane_mismatch"));
1375
- }
1376
- if pane_mismatch(caller_pane_id, pane_id) {
1377
- mismatches.push(json!("caller_receiver_pane_mismatch"));
1378
- }
1379
- mismatches
1380
- }
1381
-
1382
- fn pane_mismatch(left: &Value, right: &Value) -> bool {
1383
- let Some(left) = left.as_str().filter(|s| !s.is_empty()) else {
1384
- return false;
1385
- };
1386
- let Some(right) = right.as_str().filter(|s| !s.is_empty()) else {
1387
- return false;
1388
- };
1389
- left != right
1849
+ pub fn comms_selftest(
1850
+ workspace: &Path,
1851
+ team: Option<&str>,
1852
+ gate: Option<&str>,
1853
+ ) -> Result<Value, CliError> {
1854
+ crate::diagnose::comms::doctor_comms_json(workspace, team, gate)
1390
1855
  }
1391
1856
 
1392
1857
  /// `orphan_gate(fix, confirm)`(`--gate orphans`)。CI gate。
1393
1858
  pub fn orphan_gate(fix: bool, confirm: bool) -> Result<Value, CliError> {
1394
- if fix && !confirm {
1395
- return Ok(json!({
1396
- "ok": false,
1397
- "gate": "orphans",
1398
- "status": "refused",
1399
- "reason": "fix_requires_confirm",
1400
- "action": "re-run with --gate orphans --fix --confirm",
1401
- }));
1402
- }
1403
- Ok(json!({
1404
- "ok": true,
1405
- "gate": "orphans",
1406
- "status": "passed",
1407
- "scanned": 0,
1408
- "dry_run": !fix,
1409
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1410
- "action_required": false,
1411
- "fix": fix,
1412
- }))
1859
+ crate::diagnose::orphans::orphan_gate_json(fix, confirm)
1413
1860
  }
1414
1861
  /// `cleanup_orphan_coordinators(confirm)`(`--cleanup-orphans`;dry-run unless `--confirm`)。
1415
1862
  pub fn cleanup_orphans(confirm: bool) -> Result<Value, CliError> {
1416
- if confirm {
1417
- return Ok(json!({
1418
- "ok": true,
1419
- "scanned": 0,
1420
- "orphans": [],
1421
- "dry_run": false,
1422
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1423
- "killed": [],
1424
- "failed": [],
1425
- }));
1426
- }
1427
- Ok(json!({
1428
- "ok": true,
1429
- "scanned": 0,
1430
- "orphans": [],
1431
- "dry_run": true,
1432
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1433
- "action_required": "re-run with --confirm to send SIGTERM",
1434
- }))
1863
+ crate::diagnose::orphans::cleanup_orphans_json(confirm)
1435
1864
  }
1436
1865
  /// `fix_schema_layout`(`--fix-schema`)/`schema_diagnosis`。
1437
1866
  pub fn fix_schema(workspace: &Path) -> Result<Value, CliError> {
1438
1867
  let db_path = workspace.join(".team").join("runtime").join("team.db");
1439
- let result = crate::db::migration::fix_schema_layout(workspace, crate::db::schema::SCHEMA_VERSION)
1440
- .map_err(|e| CliError::Runtime(e.to_string()))?;
1868
+ let result =
1869
+ crate::db::migration::fix_schema_layout(workspace, crate::db::schema::SCHEMA_VERSION)
1870
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
1441
1871
  match result {
1442
- crate::db::migration::FixResult::Missing(diagnosis) => {
1443
- Ok(fix_schema_value(&db_path, diagnosis, false, Vec::new(), None, None))
1444
- }
1872
+ crate::db::migration::FixResult::Missing(diagnosis) => Ok(fix_schema_value(
1873
+ &db_path,
1874
+ diagnosis,
1875
+ false,
1876
+ Vec::new(),
1877
+ None,
1878
+ None,
1879
+ )),
1445
1880
  crate::db::migration::FixResult::Blocked { reason } => Ok(json!({
1446
1881
  "ok": false,
1447
1882
  "status": "blocked",
@@ -1450,12 +1885,22 @@ pub mod diagnose_port {
1450
1885
  "reason": reason,
1451
1886
  "fixed": false,
1452
1887
  })),
1453
- crate::db::migration::FixResult::Fixed { diagnosis, rebuilds } => {
1888
+ crate::db::migration::FixResult::Fixed {
1889
+ diagnosis,
1890
+ rebuilds,
1891
+ } => {
1454
1892
  let backup = rebuilds
1455
1893
  .first()
1456
1894
  .map(|event| event.backup_path.clone())
1457
1895
  .unwrap_or_else(|| backup_path_preview(&db_path, diagnosis.user_version));
1458
- Ok(fix_schema_value(&db_path, diagnosis, true, rebuild_values(rebuilds), Some(backup), Some("none")))
1896
+ Ok(fix_schema_value(
1897
+ &db_path,
1898
+ diagnosis,
1899
+ true,
1900
+ rebuild_values(rebuilds),
1901
+ Some(backup),
1902
+ Some("none"),
1903
+ ))
1459
1904
  }
1460
1905
  }
1461
1906
  }
@@ -1490,7 +1935,9 @@ pub mod diagnose_port {
1490
1935
  fn backup_path_preview(db_path: &Path, user_version: i64) -> String {
1491
1936
  let stamp = chrono::Utc::now().format("%Y%m%dT%H%M%SZ");
1492
1937
  db_path
1493
- .with_file_name(format!("team.db.pre-migration-{stamp}-from-v{user_version}.bak"))
1938
+ .with_file_name(format!(
1939
+ "team.db.pre-migration-{stamp}-from-v{user_version}.bak"
1940
+ ))
1494
1941
  .to_string_lossy()
1495
1942
  .to_string()
1496
1943
  }
@@ -1555,7 +2002,9 @@ pub mod diagnose_port {
1555
2002
  })
1556
2003
  }
1557
2004
 
1558
- fn coordinator_status_wire(status: crate::coordinator::CoordinatorHealthStatus) -> &'static str {
2005
+ fn coordinator_status_wire(
2006
+ status: crate::coordinator::CoordinatorHealthStatus,
2007
+ ) -> &'static str {
1559
2008
  match status {
1560
2009
  crate::coordinator::CoordinatorHealthStatus::Missing => "missing",
1561
2010
  crate::coordinator::CoordinatorHealthStatus::InvalidPid => "invalid_pid",
@@ -1572,7 +2021,11 @@ pub mod leader_port {
1572
2021
  use super::*;
1573
2022
 
1574
2023
  /// `runtime.takeover(workspace, team, confirm)` 的 CLI `--json` 投影。
1575
- pub fn takeover(workspace: &Path, team: Option<&str>, confirm: bool) -> Result<Value, CliError> {
2024
+ pub fn takeover(
2025
+ workspace: &Path,
2026
+ team: Option<&str>,
2027
+ confirm: bool,
2028
+ ) -> Result<Value, CliError> {
1576
2029
  if !confirm && !positive_caller_pane_env_present() {
1577
2030
  return Ok(json!({
1578
2031
  "ok": false,
@@ -1595,7 +2048,11 @@ pub mod leader_port {
1595
2048
  Ok(lease_value(result))
1596
2049
  }
1597
2050
  /// `runtime.claim_leader(...)` 的 CLI `--json` 投影(`cmd_claim_leader`;含 inbox_hint)。
1598
- pub fn claim_leader(workspace: &Path, team: Option<&str>, confirm: bool) -> Result<Value, CliError> {
2051
+ pub fn claim_leader(
2052
+ workspace: &Path,
2053
+ team: Option<&str>,
2054
+ confirm: bool,
2055
+ ) -> Result<Value, CliError> {
1599
2056
  let state = crate::state::persist::load_runtime_state(workspace)
1600
2057
  .map_err(|e| CliError::Runtime(e.to_string()))?;
1601
2058
  let Some(team_id) = resolve_owner_team_id(&state, team) else {
@@ -1623,13 +2080,23 @@ pub mod leader_port {
1623
2080
  /// `runtime.attach_leader(...)` 的 CLI `--json` 投影。
1624
2081
  pub fn attach_leader(
1625
2082
  workspace: &Path,
2083
+ team: Option<&str>,
1626
2084
  pane: Option<&crate::transport::PaneId>,
1627
2085
  provider: crate::provider::Provider,
2086
+ _confirm: bool,
1628
2087
  ) -> Result<Value, CliError> {
1629
2088
  let result = crate::leader::attach_leader(workspace, pane, provider)
1630
2089
  .map_err(|e| CliError::Runtime(e.to_string()))?;
1631
- let requeued = attach_requeued_exhausted_watchers(workspace, result.bound_pane_id.as_ref())?;
1632
- Ok(attach_lease_value(result, requeued))
2090
+ let requeued =
2091
+ attach_requeued_exhausted_watchers(workspace, result.bound_pane_id.as_ref())?;
2092
+ let mut value = attach_lease_value(result, requeued);
2093
+ if let Some(obj) = value.as_object_mut() {
2094
+ if let Some(team) = team {
2095
+ obj.insert("team".to_string(), json!(team));
2096
+ obj.insert("team_key".to_string(), json!(team));
2097
+ }
2098
+ }
2099
+ Ok(value)
1633
2100
  }
1634
2101
 
1635
2102
  /// `runtime.leader_identity(workspace, team)`(`cmd_identity`)。
@@ -1676,12 +2143,16 @@ pub mod leader_port {
1676
2143
  None
1677
2144
  }
1678
2145
  }
1679
- None => Some(TeamKey::new(crate::state::projection::team_state_key(state))),
2146
+ None => Some(TeamKey::new(crate::state::projection::team_state_key(
2147
+ state,
2148
+ ))),
1680
2149
  }
1681
2150
  }
1682
2151
 
1683
2152
  fn positive_caller_pane_env_present() -> bool {
1684
- std::env::var("TMUX_PANE").ok().is_some_and(|pane| !pane.is_empty())
2153
+ std::env::var("TMUX_PANE")
2154
+ .ok()
2155
+ .is_some_and(|pane| !pane.is_empty())
1685
2156
  || std::env::var("TEAM_AGENT_LEADER_PANE_ID")
1686
2157
  .ok()
1687
2158
  .is_some_and(|pane| !pane.is_empty())
@@ -1719,7 +2190,10 @@ pub mod leader_port {
1719
2190
  fn lease_value(result: crate::leader::LeaseResult) -> Value {
1720
2191
  let mut out = serde_json::Map::new();
1721
2192
  out.insert("ok".to_string(), json!(result.ok));
1722
- out.insert("status".to_string(), json!(lease_status_wire(result.status)));
2193
+ out.insert(
2194
+ "status".to_string(),
2195
+ json!(lease_status_wire(result.status)),
2196
+ );
1723
2197
  if let Some(reason) = result.reason {
1724
2198
  out.insert("reason".to_string(), json!(lease_reason_wire(reason)));
1725
2199
  }
@@ -1733,10 +2207,16 @@ pub mod leader_port {
1733
2207
  out.insert("bound_pane_id".to_string(), json!(pane.as_str()));
1734
2208
  }
1735
2209
  if let Some(receiver) = result.receiver {
1736
- out.insert("leader_receiver".to_string(), serde_json::to_value(receiver).unwrap_or(Value::Null));
2210
+ out.insert(
2211
+ "leader_receiver".to_string(),
2212
+ serde_json::to_value(receiver).unwrap_or(Value::Null),
2213
+ );
1737
2214
  }
1738
2215
  if let Some(owner) = result.owner {
1739
- out.insert("team_owner".to_string(), serde_json::to_value(owner).unwrap_or(Value::Null));
2216
+ out.insert(
2217
+ "team_owner".to_string(),
2218
+ serde_json::to_value(owner).unwrap_or(Value::Null),
2219
+ );
1740
2220
  }
1741
2221
  Value::Object(out)
1742
2222
  }
@@ -1780,7 +2260,10 @@ pub mod leader_port {
1780
2260
  /// STRING list. (Current divergent body — the `requeued` Vec<WatcherNotice> objects — kept until
1781
2261
  /// porter-c ports; pinned RED in cli::tests asserts the golden string list.)
1782
2262
  pub(crate) fn project_requeued_exhausted_watchers(event: &Value) -> Value {
1783
- event.get("watcher_ids").cloned().unwrap_or_else(|| json!([]))
2263
+ event
2264
+ .get("watcher_ids")
2265
+ .cloned()
2266
+ .unwrap_or_else(|| json!([]))
1784
2267
  }
1785
2268
 
1786
2269
  fn lease_status_wire(status: crate::leader::LeaseStatus) -> &'static str {