@team-agent/installer 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/Cargo.lock +34 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/Cargo.toml +1 -1
  4. package/crates/team-agent/src/cli/adapters.rs +196 -19
  5. package/crates/team-agent/src/cli/diagnose.rs +145 -11
  6. package/crates/team-agent/src/cli/emit.rs +287 -53
  7. package/crates/team-agent/src/cli/leader.rs +37 -8
  8. package/crates/team-agent/src/cli/mod.rs +807 -316
  9. package/crates/team-agent/src/cli/status_port.rs +25 -2
  10. package/crates/team-agent/src/cli/tests/divergence.rs +1 -2
  11. package/crates/team-agent/src/cli/tests/lane_c.rs +23 -13
  12. package/crates/team-agent/src/cli/tests/main_preserved.rs +2 -0
  13. package/crates/team-agent/src/cli/tests/run_delegation.rs +57 -3
  14. package/crates/team-agent/src/cli/types.rs +17 -0
  15. package/crates/team-agent/src/compiler/tests.rs +2 -2
  16. package/crates/team-agent/src/compiler.rs +16 -6
  17. package/crates/team-agent/src/coordinator/health.rs +89 -20
  18. package/crates/team-agent/src/coordinator/mod.rs +4 -0
  19. package/crates/team-agent/src/coordinator/runtime_detectors.rs +500 -0
  20. package/crates/team-agent/src/coordinator/runtime_observation.rs +58 -0
  21. package/crates/team-agent/src/coordinator/tests/watch.rs +4 -2
  22. package/crates/team-agent/src/coordinator/tick.rs +222 -69
  23. package/crates/team-agent/src/coordinator/types.rs +15 -3
  24. package/crates/team-agent/src/db/schema.rs +37 -2
  25. package/crates/team-agent/src/diagnose/comms.rs +226 -0
  26. package/crates/team-agent/src/diagnose/mod.rs +45 -0
  27. package/crates/team-agent/src/diagnose/orphans.rs +658 -0
  28. package/crates/team-agent/src/fake_worker.rs +146 -3
  29. package/crates/team-agent/src/leader/start.rs +121 -23
  30. package/crates/team-agent/src/leader/types.rs +44 -1
  31. package/crates/team-agent/src/lib.rs +3 -0
  32. package/crates/team-agent/src/lifecycle/display.rs +648 -50
  33. package/crates/team-agent/src/lifecycle/launch.rs +1048 -264
  34. package/crates/team-agent/src/lifecycle/mod.rs +3 -0
  35. package/crates/team-agent/src/lifecycle/profile_launch.rs +810 -0
  36. package/crates/team-agent/src/lifecycle/profile_smoke.rs +522 -0
  37. package/crates/team-agent/src/lifecycle/restart/agent.rs +113 -26
  38. package/crates/team-agent/src/lifecycle/restart/common.rs +189 -102
  39. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +465 -25
  40. package/crates/team-agent/src/lifecycle/restart/remove.rs +22 -6
  41. package/crates/team-agent/src/lifecycle/restart/team_state.rs +19 -0
  42. package/crates/team-agent/src/lifecycle/restart.rs +4 -1
  43. package/crates/team-agent/src/lifecycle/tests/core.rs +4 -4
  44. package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +5 -5
  45. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +39 -9
  46. package/crates/team-agent/src/lifecycle/types.rs +23 -0
  47. package/crates/team-agent/src/lifecycle/worker_command_context.rs +326 -0
  48. package/crates/team-agent/src/mcp_server/helpers.rs +1 -0
  49. package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +341 -0
  50. package/crates/team-agent/src/mcp_server/lifecycle_tools/mod.rs +10 -0
  51. package/crates/team-agent/src/mcp_server/lifecycle_tools/state_status.rs +158 -0
  52. package/crates/team-agent/src/mcp_server/mod.rs +3 -74
  53. package/crates/team-agent/src/mcp_server/tests/scoped.rs +1 -1
  54. package/crates/team-agent/src/mcp_server/tests/send.rs +6 -5
  55. package/crates/team-agent/src/mcp_server/tools.rs +312 -111
  56. package/crates/team-agent/src/mcp_server/types.rs +6 -4
  57. package/crates/team-agent/src/mcp_server/wire.rs +19 -7
  58. package/crates/team-agent/src/message_store.rs +21 -4
  59. package/crates/team-agent/src/messaging/delivery.rs +87 -37
  60. package/crates/team-agent/src/messaging/mod.rs +9 -6
  61. package/crates/team-agent/src/messaging/results.rs +153 -16
  62. package/crates/team-agent/src/messaging/selftest.rs +199 -12
  63. package/crates/team-agent/src/messaging/send.rs +35 -3
  64. package/crates/team-agent/src/messaging/tests/runtime.rs +19 -4
  65. package/crates/team-agent/src/messaging/types.rs +11 -3
  66. package/crates/team-agent/src/os_probe.rs +119 -0
  67. package/crates/team-agent/src/packaging/migrate.rs +10 -2
  68. package/crates/team-agent/src/packaging/tests.rs +23 -0
  69. package/crates/team-agent/src/provider/adapter.rs +483 -67
  70. package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +1 -7
  71. package/crates/team-agent/src/provider/classify.rs +51 -4
  72. package/crates/team-agent/src/provider/startup_prompt.rs +94 -0
  73. package/crates/team-agent/src/provider/types.rs +47 -0
  74. package/crates/team-agent/src/session_capture.rs +616 -0
  75. package/crates/team-agent/src/state/persist.rs +57 -0
  76. package/crates/team-agent/src/state/projection.rs +32 -23
  77. package/crates/team-agent/src/state/selector.rs +5 -2
  78. package/crates/team-agent/src/tmux_backend.rs +151 -60
  79. package/crates/team-agent/src/transport/test_support.rs +9 -0
  80. package/crates/team-agent/src/transport/tests/wire.rs +4 -0
  81. package/crates/team-agent/src/transport.rs +13 -2
  82. package/package.json +4 -4
@@ -24,7 +24,15 @@
24
24
  //! 所有 fn body = `unimplemented!("step14b port: ...")`。RED 契约据此 NAME 类型 + CALL 真 fn。
25
25
 
26
26
  // ROUND-0 skeleton:fn body 全 unimplemented!() → import/field/param/大 Err 暂未落地;P2 porter 实现时移除。
27
- #![allow(dead_code, unused_imports, unused_variables, clippy::result_large_err, clippy::doc_overindented_list_items, clippy::doc_lazy_continuation, clippy::io_other_error)]
27
+ #![allow(
28
+ dead_code,
29
+ unused_imports,
30
+ unused_variables,
31
+ clippy::result_large_err,
32
+ clippy::doc_overindented_list_items,
33
+ clippy::doc_lazy_continuation,
34
+ clippy::io_other_error
35
+ )]
28
36
  // §10:CLI 命令实现层禁 unwrap/expect/panic(unimplemented!() stub 不被拦);tests 子模块各自 allow。
29
37
  #![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
30
38
 
@@ -36,10 +44,10 @@ use serde_json::{json, Map, Value};
36
44
  use thiserror::Error;
37
45
 
38
46
  // REUSE in-tree(只 import,不 redefine):
39
- use crate::model::ids::{TaskId, TeamKey};
40
47
  use crate::messaging::{self, AlertType, MessageTarget, SendOptions};
48
+ use crate::model::ids::{TaskId, TeamKey};
41
49
 
42
- pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency. Does NOT perform live runtime message round-trip. comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)";
50
+ pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency and zero-token comms contracts. Does NOT perform live runtime message round-trip. (zero token, zero pollution)";
43
51
 
44
52
  pub mod adapters;
45
53
  pub mod diagnose;
@@ -60,6 +68,23 @@ pub use send::*;
60
68
  pub use status::*;
61
69
  pub use types::*;
62
70
 
71
+ /// Public `attach-leader` CLI handler. It consumes the typed pane/provider args and
72
+ /// writes/returns a `leader_receiver` binding via the leader lease port.
73
+ pub fn cmd_attach_leader(args: &AttachLeaderArgs) -> Result<CmdResult, CliError> {
74
+ let mut value = leader_port::attach_leader(
75
+ &args.workspace,
76
+ args.team.as_deref(),
77
+ args.pane.as_ref(),
78
+ args.provider,
79
+ args.confirm,
80
+ )?;
81
+ if let Some(obj) = value.as_object_mut() {
82
+ obj.entry("leader_receiver".to_string())
83
+ .or_insert(Value::Null);
84
+ }
85
+ Ok(CmdResult::from_json(value, args.json))
86
+ }
87
+
63
88
  pub(crate) use helpers::*;
64
89
 
65
90
  #[cfg(test)]
@@ -75,7 +100,6 @@ mod tests;
75
100
  /// `cmd_inbox` 委派的只读投影面。返回 serde `Value`(稳定 JSON 形状由 status lane 拥有)。
76
101
  pub mod status_port;
77
102
 
78
-
79
103
  /// PLACEHOLDER → step13 lifecycle(`runtime.{quick_start,start_agent,add_agent,fork_agent,
80
104
  /// remove_agent,start_agent,stop_agent,reset_agent,restart,shutdown,start_leader,acknowledge_idle}`)。
81
105
  /// `quick_start.py` 物理在本子系统但实现属 step 13(card)。本层只声明委派面。
@@ -92,7 +116,9 @@ pub mod lifecycle_port {
92
116
  yes: bool,
93
117
  fresh: bool,
94
118
  ) -> Result<Value, CliError> {
95
- match crate::lifecycle::quick_start_in_workspace(workspace, agents_dir, name, yes, fresh, team_id) {
119
+ match crate::lifecycle::quick_start_in_workspace(
120
+ workspace, agents_dir, name, yes, fresh, team_id,
121
+ ) {
96
122
  Ok(report) => Ok(quick_start_value(report)),
97
123
  Err(e) => Ok(error_value(e)),
98
124
  }
@@ -104,19 +130,37 @@ pub mod lifecycle_port {
104
130
  cwd: &Path,
105
131
  attach: &LeaderLauncherArgs,
106
132
  ) -> Result<Value, CliError> {
107
- let _ = (provider_args, cwd);
108
- let provider_name = match provider {
109
- Provider::Codex => "codex",
110
- Provider::ClaudeCode | Provider::Claude => "claude_code",
111
- Provider::GeminiCli => "gemini_cli",
112
- Provider::Fake => "fake",
133
+ let attach_session = attach
134
+ .attach_session
135
+ .as_ref()
136
+ .map(|name| crate::transport::SessionName::new(name.clone()));
137
+ let plan = crate::leader::start::leader_start_plan(
138
+ provider,
139
+ provider_args,
140
+ cwd,
141
+ attach.attach_existing,
142
+ attach.confirm_attach,
143
+ attach_session.as_ref(),
144
+ )
145
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
146
+ let outcome = crate::leader::start::execute_leader_plan(&plan, cwd)
147
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
148
+ let ok = match outcome.status {
149
+ crate::leader::LeaderLaunchStatus::Exited => outcome.exit_code == Some(0),
150
+ crate::leader::LeaderLaunchStatus::Detached => true,
151
+ crate::leader::LeaderLaunchStatus::NotStarted => false,
113
152
  };
114
153
  Ok(json!({
115
- "ok": true,
116
- "provider": provider_name,
154
+ "ok": ok,
155
+ "provider": provider,
156
+ "mode": plan.mode,
157
+ "status": outcome.status,
158
+ "exit_code": outcome.exit_code,
159
+ "reason": outcome.reason,
117
160
  "attach_existing": attach.attach_existing,
118
161
  "confirm_attach": attach.confirm_attach,
119
162
  "attach_session": attach.attach_session,
163
+ "session_name": plan.session_name.as_ref().map(|session| session.as_str().to_string()),
120
164
  }))
121
165
  }
122
166
  /// `runtime.shutdown`(`cmd_shutdown`)。
@@ -124,23 +168,13 @@ pub mod lifecycle_port {
124
168
  let run_ws = crate::model::paths::canonical_run_workspace(workspace)
125
169
  .map_err(|e| CliError::Runtime(e.to_string()))?;
126
170
  let state = shutdown_state_for_team(&run_ws, team)?;
127
- let endpoint = stored_tmux_endpoint(&state);
128
- let transport = match endpoint {
129
- Some(endpoint) if Path::new(endpoint).is_absolute() => {
130
- crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
131
- }
132
- Some(endpoint) if !endpoint.is_empty() => {
133
- crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
134
- }
135
- _ => shutdown_workspace_transport(&run_ws),
171
+ let transport = if let Some(endpoint) = legacy_worker_tmux_endpoint(&state) {
172
+ crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
173
+ } else {
174
+ shutdown_workspace_transport(&run_ws)
136
175
  };
137
- let result = shutdown_with_transport_and_state(
138
- workspace,
139
- keep_logs,
140
- team,
141
- &transport,
142
- Some(state),
143
- );
176
+ let result =
177
+ shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
144
178
  if team.is_none() {
145
179
  transport.kill_server();
146
180
  }
@@ -163,47 +197,63 @@ pub mod lifecycle_port {
163
197
  transport: &dyn crate::transport::Transport,
164
198
  state: Option<Value>,
165
199
  ) -> Result<Value, CliError> {
200
+ crate::os_probe::clear_probe_timeout();
201
+ let deadline = ShutdownDeadline::new(std::time::Duration::from_secs(20));
166
202
  let run_workspace = crate::model::paths::canonical_run_workspace(workspace)
167
203
  .map_err(|e| CliError::Runtime(e.to_string()))?;
168
- let stopped = if team.is_none() {
169
- let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
170
- Some(
171
- crate::coordinator::stop_coordinator(&wp)
172
- .map_err(|e| CliError::Runtime(e.to_string()))?,
204
+ let _started_event = crate::event_log::EventLog::new(&run_workspace)
205
+ .write(
206
+ "lifecycle.shutdown.started",
207
+ json!({
208
+ "keep_logs": keep_logs,
209
+ "team": team,
210
+ }),
173
211
  )
174
- } else {
175
- None
176
- };
212
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
177
213
  let mut state = match state {
178
214
  Some(state) => state,
179
215
  None => shutdown_state_for_team(&run_workspace, team)?,
180
216
  };
181
- let stored_transport = stored_tmux_endpoint(&state).map(tmux_transport_for_endpoint);
182
- let transport = stored_transport
183
- .as_ref()
184
- .map(|transport| transport as &dyn crate::transport::Transport)
185
- .unwrap_or(transport);
186
- let captured_missing_sessions = crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
187
- .map_err(|e| CliError::Runtime(e.to_string()))?;
217
+ deadline.check("refresh_provider_sessions")?;
218
+ let captured_missing_sessions =
219
+ crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
220
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
188
221
  let session_name = state
189
222
  .get("session_name")
190
223
  .and_then(Value::as_str)
191
224
  .filter(|s| !s.is_empty())
192
225
  .map(crate::transport::SessionName::new);
193
- let mut root_pids = state_process_roots(&state);
226
+ let protected = shutdown_protection_set();
227
+ let reap_scope = if team.is_some() {
228
+ ShutdownReapScope::ScopedTeam
229
+ } else {
230
+ ShutdownReapScope::Workspace
231
+ };
232
+ deadline.check("process_roots")?;
233
+ let mut root_pids = state_process_roots(&state, reap_scope)
234
+ .into_iter()
235
+ .filter(|pid| !protected.contains_pid(*pid))
236
+ .collect::<Vec<_>>();
194
237
  let pane_pids = session_name
195
238
  .as_ref()
196
- .map(|session| pane_pids_for_session(transport, session))
239
+ .map(|session| {
240
+ pane_pids_for_session(transport, session)
241
+ .into_iter()
242
+ .filter(|pid| !protected.contains_pid(*pid))
243
+ .collect::<Vec<_>>()
244
+ })
197
245
  .unwrap_or_default();
198
246
  root_pids.extend(pane_pids);
199
247
  root_pids.sort_unstable();
200
248
  root_pids.dedup();
201
- let root_pgids = process_pgids(&root_pids);
249
+ let root_pgids = process_pgids(&root_pids, &protected);
250
+ deadline.check("reap_process_tree")?;
202
251
  for pid in &root_pids {
203
- reap_process_tree(*pid);
252
+ reap_process_tree(*pid, &protected);
204
253
  }
205
- reap_process_groups(&root_pgids);
254
+ reap_process_groups(&root_pgids, &protected);
206
255
  let mut kill_error: Option<String> = None;
256
+ deadline.check("kill_session")?;
207
257
  if let Some(session) = session_name.as_ref() {
208
258
  if let Err(error) = transport.kill_session(session) {
209
259
  if !tmux_absent_error(&error.to_string()) {
@@ -211,7 +261,16 @@ pub mod lifecycle_port {
211
261
  }
212
262
  }
213
263
  }
214
- reap_workspace_process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
264
+ deadline.check("reap_workspace_residuals")?;
265
+ reap_workspace_process_residuals(
266
+ &run_workspace,
267
+ &state,
268
+ &root_pids,
269
+ &root_pgids,
270
+ &protected,
271
+ reap_scope,
272
+ );
273
+ deadline.check("session_residuals")?;
215
274
  let session_residuals = if let Some(session) = session_name.as_ref() {
216
275
  let (residuals, error) = session_residuals_after_reap(
217
276
  transport,
@@ -226,33 +285,92 @@ pub mod lifecycle_port {
226
285
  } else {
227
286
  Vec::new()
228
287
  };
229
- let process_residuals = process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
288
+ deadline.check("process_residuals")?;
289
+ let process_residuals = process_residuals(
290
+ &run_workspace,
291
+ &state,
292
+ &root_pids,
293
+ &root_pgids,
294
+ &protected,
295
+ reap_scope,
296
+ );
297
+ deadline.check("stop_coordinator")?;
298
+ let mut coordinator_timeout = false;
299
+ let stopped = if team.is_none() {
300
+ let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
301
+ match stop_coordinator_bounded(wp, std::time::Duration::from_millis(900)) {
302
+ Some(Ok(report)) => Some(report),
303
+ Some(Err(error)) => {
304
+ kill_error.get_or_insert(error);
305
+ None
306
+ }
307
+ None => {
308
+ coordinator_timeout = true;
309
+ None
310
+ }
311
+ }
312
+ } else {
313
+ None
314
+ };
315
+ let probe_timeout = crate::os_probe::probe_timeout();
316
+ let verification_degraded = probe_timeout.is_some();
230
317
  let session_killed = session_name.is_some()
231
318
  && kill_error.is_none()
232
319
  && session_residuals.is_empty()
233
320
  && process_residuals.is_empty();
234
321
  mark_agents_stopped(&mut state);
322
+ deadline.check("save_state")?;
235
323
  if team.is_some() {
236
324
  crate::state::projection::save_team_scoped_state(&run_workspace, &state)?;
325
+ promote_live_sibling_after_scoped_shutdown(&run_workspace, &state)?;
237
326
  } else {
327
+ let _changed_keys =
328
+ mark_matching_session_teams_stopped(&mut state, session_name.as_ref());
238
329
  crate::state::persist::save_runtime_state(&run_workspace, &state)?;
239
330
  }
240
- let coordinator_status = stopped
331
+ let coordinator_status = if coordinator_timeout {
332
+ "timeout"
333
+ } else {
334
+ stopped
335
+ .as_ref()
336
+ .map(|stopped| stop_status_wire(stopped.status))
337
+ .unwrap_or("not_stopped")
338
+ };
339
+ let coordinator_pid = stopped
241
340
  .as_ref()
242
- .map(|stopped| stop_status_wire(stopped.status))
243
- .unwrap_or("not_stopped");
244
- let coordinator_pid = stopped.as_ref().and_then(|stopped| stopped.pid.map(|p| p.get()));
341
+ .and_then(|stopped| stopped.pid.map(|p| p.get()));
245
342
  let ok = stopped.as_ref().map(|stopped| stopped.ok).unwrap_or(true)
246
343
  && kill_error.is_none()
247
344
  && session_residuals.is_empty()
248
- && process_residuals.is_empty();
345
+ && process_residuals.is_empty()
346
+ && !verification_degraded
347
+ && !coordinator_timeout;
249
348
  let status = if ok {
250
349
  "ok"
350
+ } else if coordinator_timeout {
351
+ "timeout"
352
+ } else if verification_degraded {
353
+ "partial"
251
354
  } else if kill_error.is_some() {
252
355
  "failed"
253
356
  } else {
254
357
  "partial"
255
358
  };
359
+ let phase = if coordinator_timeout {
360
+ Some("stop_coordinator")
361
+ } else if verification_degraded {
362
+ Some("os_probe")
363
+ } else {
364
+ None
365
+ };
366
+ let probe_timeout_kind = probe_timeout.as_ref().map(|timeout| timeout.probe);
367
+ let probe_timeout_value = probe_timeout.as_ref().map(|timeout| {
368
+ json!({
369
+ "probe": timeout.probe,
370
+ "pid": timeout.pid,
371
+ "timeout_ms": timeout.timeout_ms,
372
+ })
373
+ });
256
374
  let _event = crate::event_log::EventLog::new(&run_workspace)
257
375
  .write(
258
376
  "lifecycle.shutdown",
@@ -263,12 +381,20 @@ pub mod lifecycle_port {
263
381
  "session_killed": session_killed,
264
382
  "coordinator_status": coordinator_status,
265
383
  "status": status,
384
+ "phase": phase,
385
+ "verification_degraded": verification_degraded,
386
+ "probe_timeout_kind": probe_timeout_kind,
387
+ "probe_timeout": probe_timeout_value,
266
388
  }),
267
389
  )
268
390
  .map_err(|e| CliError::Runtime(e.to_string()))?;
269
391
  Ok(json!({
270
392
  "ok": ok,
271
393
  "status": status,
394
+ "phase": phase,
395
+ "verification_degraded": verification_degraded,
396
+ "probe_timeout_kind": probe_timeout_kind,
397
+ "probe_timeout": probe_timeout_value,
272
398
  "keep_logs": keep_logs,
273
399
  "team": team,
274
400
  "session_name": session_name.map(|s| s.as_str().to_string()),
@@ -285,9 +411,51 @@ pub mod lifecycle_port {
285
411
  }))
286
412
  }
287
413
 
414
+ fn stop_coordinator_bounded(
415
+ workspace: crate::coordinator::WorkspacePath,
416
+ timeout: std::time::Duration,
417
+ ) -> Option<Result<crate::coordinator::types::StopReport, String>> {
418
+ let (tx, rx) = std::sync::mpsc::channel();
419
+ std::thread::spawn(move || {
420
+ let result =
421
+ crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
422
+ let _ = tx.send(result);
423
+ });
424
+ rx.recv_timeout(timeout).ok()
425
+ }
426
+
427
+ struct ShutdownDeadline {
428
+ start: std::time::Instant,
429
+ timeout: std::time::Duration,
430
+ }
431
+
432
+ impl ShutdownDeadline {
433
+ fn new(timeout: std::time::Duration) -> Self {
434
+ Self {
435
+ start: std::time::Instant::now(),
436
+ timeout,
437
+ }
438
+ }
439
+
440
+ fn check(&self, phase: &'static str) -> Result<(), CliError> {
441
+ if self.start.elapsed() >= self.timeout {
442
+ return Err(CliError::Runtime(
443
+ json!({
444
+ "ok": false,
445
+ "status": "timeout",
446
+ "phase": phase,
447
+ })
448
+ .to_string(),
449
+ ));
450
+ }
451
+ Ok(())
452
+ }
453
+ }
454
+
288
455
  fn shutdown_state_for_team(workspace: &Path, team: Option<&str>) -> Result<Value, CliError> {
289
456
  if let Some(team) = team {
290
- crate::state::projection::select_runtime_state(workspace, Some(team)).map_err(CliError::from)
457
+ crate::state::projection::select_runtime_state(workspace, Some(team))
458
+ .map_err(CliError::from)
291
459
  } else {
292
460
  crate::state::persist::load_runtime_state(workspace).map_err(CliError::from)
293
461
  }
@@ -297,46 +465,11 @@ pub mod lifecycle_port {
297
465
  crate::tmux_backend::TmuxBackend::for_workspace(workspace)
298
466
  }
299
467
 
300
- fn tmux_transport_for_endpoint(endpoint: &str) -> crate::tmux_backend::TmuxBackend {
301
- if Path::new(endpoint).is_absolute() {
302
- crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
303
- } else {
304
- crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
305
- }
306
- }
307
-
308
- fn stored_tmux_endpoint(state: &Value) -> Option<&str> {
309
- leader_receiver_tmux_socket(state)
310
- .or_else(|| active_team_entry(state).and_then(leader_receiver_tmux_socket))
311
- .or_else(|| only_team_entry(state).and_then(leader_receiver_tmux_socket))
312
- }
313
-
314
- fn leader_receiver_tmux_socket(state: &Value) -> Option<&str> {
468
+ fn legacy_worker_tmux_endpoint(state: &Value) -> Option<&str> {
315
469
  state
316
- .get("leader_receiver")
317
- .and_then(|receiver| receiver.get("tmux_socket"))
470
+ .get("tmux_endpoint")
318
471
  .and_then(Value::as_str)
319
- .filter(|socket| !socket.is_empty())
320
- }
321
-
322
- fn active_team_entry(state: &Value) -> Option<&Value> {
323
- let active = state
324
- .get("active_team_key")
325
- .and_then(Value::as_str)
326
- .filter(|team| !team.is_empty())?;
327
- state
328
- .get("teams")
329
- .and_then(Value::as_object)
330
- .and_then(|teams| teams.get(active))
331
- }
332
-
333
- fn only_team_entry(state: &Value) -> Option<&Value> {
334
- let teams = state.get("teams").and_then(Value::as_object)?;
335
- if teams.len() == 1 {
336
- teams.values().next()
337
- } else {
338
- None
339
- }
472
+ .filter(|endpoint| !endpoint.is_empty())
340
473
  }
341
474
 
342
475
  fn pane_pids_for_session(
@@ -399,12 +532,20 @@ pub mod lifecycle_port {
399
532
  (sessions, error)
400
533
  }
401
534
 
402
- fn state_process_roots(state: &Value) -> Vec<u32> {
535
+ #[derive(Clone, Copy, Debug, Eq, PartialEq)]
536
+ enum ShutdownReapScope {
537
+ Workspace,
538
+ ScopedTeam,
539
+ }
540
+
541
+ fn state_process_roots(state: &Value, scope: ShutdownReapScope) -> Vec<u32> {
403
542
  let mut out = Vec::new();
404
543
  collect_agent_process_roots(state, &mut out);
405
- if let Some(teams) = state.get("teams").and_then(Value::as_object) {
406
- for team in teams.values() {
407
- collect_agent_process_roots(team, &mut out);
544
+ if scope == ShutdownReapScope::Workspace {
545
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
546
+ for team in teams.values() {
547
+ collect_agent_process_roots(team, &mut out);
548
+ }
408
549
  }
409
550
  }
410
551
  out.sort_unstable();
@@ -433,8 +574,11 @@ pub mod lifecycle_port {
433
574
  .filter(|pid| *pid > 0)
434
575
  }
435
576
 
436
- fn reap_process_tree(root_pid: u32) {
437
- let pids = process_tree_pids(root_pid);
577
+ fn reap_process_tree(root_pid: u32, protected: &ShutdownProtection) {
578
+ let pids = process_tree_pids(root_pid)
579
+ .into_iter()
580
+ .filter(|pid| !protected.contains_pid(*pid))
581
+ .collect::<Vec<_>>();
438
582
  for pid in pids.iter().rev() {
439
583
  send_process_signal(*pid, libc::SIGTERM);
440
584
  }
@@ -445,13 +589,12 @@ pub mod lifecycle_port {
445
589
  wait_for_processes_gone(&pids, std::time::Duration::from_secs(1));
446
590
  }
447
591
 
448
- fn reap_process_groups(pgids: &[u32]) {
449
- let current_pgid = unsafe { libc::getpgrp() };
592
+ fn reap_process_groups(pgids: &[u32], protected: &ShutdownProtection) {
450
593
  for pgid in pgids {
451
594
  let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
452
595
  continue;
453
596
  };
454
- if pgid_t <= 1 || pgid_t == current_pgid {
597
+ if pgid_t <= 1 || protected.contains_pgid(*pgid) {
455
598
  continue;
456
599
  }
457
600
  send_process_signal_group(pgid_t, libc::SIGTERM);
@@ -461,7 +604,7 @@ pub mod lifecycle_port {
461
604
  let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
462
605
  continue;
463
606
  };
464
- if pgid_t <= 1 || pgid_t == current_pgid {
607
+ if pgid_t <= 1 || protected.contains_pgid(*pgid) {
465
608
  continue;
466
609
  }
467
610
  send_process_signal_group(pgid_t, libc::SIGKILL);
@@ -473,20 +616,23 @@ pub mod lifecycle_port {
473
616
  state: &Value,
474
617
  root_pids: &[u32],
475
618
  root_pgids: &[u32],
619
+ protected: &ShutdownProtection,
620
+ scope: ShutdownReapScope,
476
621
  ) {
477
622
  for _ in 0..5 {
478
- let residuals = matched_processes(workspace, state, root_pids, root_pgids);
623
+ let residuals =
624
+ matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
479
625
  if residuals.is_empty() {
480
626
  return;
481
627
  }
482
628
  for process in &residuals {
483
- reap_process_tree(process.pid);
629
+ reap_process_tree(process.pid, protected);
484
630
  }
485
631
  let pgids = residuals
486
632
  .iter()
487
633
  .filter_map(|process| process.pgid)
488
634
  .collect::<Vec<_>>();
489
- reap_process_groups(&pgids);
635
+ reap_process_groups(&pgids, protected);
490
636
  std::thread::sleep(std::time::Duration::from_millis(100));
491
637
  }
492
638
  }
@@ -513,10 +659,11 @@ pub mod lifecycle_port {
513
659
  }
514
660
 
515
661
  fn process_parent_pairs() -> Vec<(u32, u32)> {
516
- let output = match std::process::Command::new("ps")
517
- .args(["-axo", "pid=,ppid="])
518
- .output()
519
- {
662
+ let output = match crate::os_probe::bounded_command_output_with_probe(
663
+ std::process::Command::new("ps").args(["-axo", "pid=,ppid="]),
664
+ "ps_parent",
665
+ None,
666
+ ) {
520
667
  Ok(output) if output.status.success() => output,
521
668
  _ => return Vec::new(),
522
669
  };
@@ -532,10 +679,11 @@ pub mod lifecycle_port {
532
679
  }
533
680
 
534
681
  fn process_table() -> Vec<ProcessInfo> {
535
- let output = match std::process::Command::new("ps")
536
- .args(["-axo", "pid=,ppid=,pgid=,command="])
537
- .output()
538
- {
682
+ let output = match crate::os_probe::bounded_command_output_with_probe(
683
+ std::process::Command::new("ps").args(["-axo", "pid=,ppid=,pgid=,sess=,command="]),
684
+ "ps_table",
685
+ None,
686
+ ) {
539
687
  Ok(output) if output.status.success() => output,
540
688
  _ => return Vec::new(),
541
689
  };
@@ -550,11 +698,13 @@ pub mod lifecycle_port {
550
698
  let pid = parts.next()?.parse::<u32>().ok()?;
551
699
  let ppid = parts.next()?.parse::<u32>().ok()?;
552
700
  let pgid = parts.next().and_then(|raw| raw.parse::<u32>().ok());
701
+ let session = parts.next().and_then(|raw| raw.parse::<u32>().ok());
553
702
  let command = parts.collect::<Vec<_>>().join(" ");
554
703
  Some(ProcessInfo {
555
704
  pid,
556
705
  ppid,
557
706
  pgid,
707
+ session,
558
708
  command,
559
709
  })
560
710
  }
@@ -564,9 +714,57 @@ pub mod lifecycle_port {
564
714
  pid: u32,
565
715
  ppid: u32,
566
716
  pgid: Option<u32>,
717
+ session: Option<u32>,
567
718
  command: String,
568
719
  }
569
720
 
721
+ #[derive(Clone, Debug, Default)]
722
+ struct ShutdownProtection {
723
+ pids: std::collections::BTreeSet<u32>,
724
+ pgids: std::collections::BTreeSet<u32>,
725
+ }
726
+
727
+ impl ShutdownProtection {
728
+ fn contains_pid(&self, pid: u32) -> bool {
729
+ self.pids.contains(&pid)
730
+ }
731
+
732
+ fn contains_pgid(&self, pgid: u32) -> bool {
733
+ self.pgids.contains(&pgid)
734
+ }
735
+
736
+ fn contains_process(&self, process: &ProcessInfo) -> bool {
737
+ self.pids.contains(&process.pid)
738
+ || process.pgid.is_some_and(|pgid| self.pgids.contains(&pgid))
739
+ }
740
+ }
741
+
742
+ fn shutdown_protection_set() -> ShutdownProtection {
743
+ let table = process_table();
744
+ let mut protected = ShutdownProtection::default();
745
+ let current = std::process::id();
746
+ protected.pids.insert(current);
747
+ if let Ok(pgid) = u32::try_from(unsafe { libc::getpgrp() }) {
748
+ protected.pgids.insert(pgid);
749
+ }
750
+ let mut cursor = current;
751
+ let mut seen = std::collections::BTreeSet::new();
752
+ while seen.insert(cursor) {
753
+ let Some(process) = table.iter().find(|process| process.pid == cursor) else {
754
+ break;
755
+ };
756
+ protected.pids.insert(process.pid);
757
+ if let Some(pgid) = process.pgid {
758
+ protected.pgids.insert(pgid);
759
+ }
760
+ if process.ppid == 0 || process.ppid == process.pid {
761
+ break;
762
+ }
763
+ cursor = process.ppid;
764
+ }
765
+ protected
766
+ }
767
+
570
768
  fn send_process_signal(pid: u32, signal: libc::c_int) {
571
769
  let Ok(pid_t) = libc::pid_t::try_from(pid) else {
572
770
  return;
@@ -617,16 +815,15 @@ pub mod lifecycle_port {
617
815
  err.raw_os_error() == Some(libc::EPERM)
618
816
  }
619
817
 
620
- fn process_pgids(pids: &[u32]) -> Vec<u32> {
818
+ fn process_pgids(pids: &[u32], protected: &ShutdownProtection) -> Vec<u32> {
621
819
  let table = process_table();
622
- let current_pgid = unsafe { libc::getpgrp() };
623
820
  let mut pgids = pids
624
821
  .iter()
625
822
  .filter_map(|pid| table.iter().find(|process| process.pid == *pid))
626
823
  .filter_map(|process| process.pgid)
627
824
  .filter(|pgid| {
628
825
  libc::pid_t::try_from(*pgid)
629
- .map(|pgid| pgid > 1 && pgid != current_pgid)
826
+ .map(|pgid_t| pgid_t > 1 && !protected.contains_pgid(*pgid))
630
827
  .unwrap_or(false)
631
828
  })
632
829
  .collect::<Vec<_>>();
@@ -640,15 +837,22 @@ pub mod lifecycle_port {
640
837
  state: &Value,
641
838
  root_pids: &[u32],
642
839
  root_pgids: &[u32],
840
+ protected: &ShutdownProtection,
841
+ scope: ShutdownReapScope,
643
842
  ) -> Vec<Value> {
644
- let mut residuals = matched_processes(workspace, state, root_pids, root_pgids);
645
- let mut seen = residuals.iter().map(|process| process.pid).collect::<std::collections::BTreeSet<_>>();
843
+ let mut residuals =
844
+ matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
845
+ let mut seen = residuals
846
+ .iter()
847
+ .map(|process| process.pid)
848
+ .collect::<std::collections::BTreeSet<_>>();
646
849
  for pid in root_pids {
647
- if process_is_live(*pid) && seen.insert(*pid) {
850
+ if !protected.contains_pid(*pid) && process_is_live(*pid) && seen.insert(*pid) {
648
851
  residuals.push(ProcessInfo {
649
852
  pid: *pid,
650
853
  ppid: 0,
651
854
  pgid: None,
855
+ session: None,
652
856
  command: String::new(),
653
857
  });
654
858
  }
@@ -660,6 +864,7 @@ pub mod lifecycle_port {
660
864
  "pid": process.pid,
661
865
  "ppid": process.ppid,
662
866
  "pgid": process.pgid,
867
+ "session": process.session,
663
868
  "command": process.command,
664
869
  })
665
870
  })
@@ -671,25 +876,42 @@ pub mod lifecycle_port {
671
876
  state: &Value,
672
877
  root_pids: &[u32],
673
878
  root_pgids: &[u32],
879
+ protected: &ShutdownProtection,
880
+ scope: ShutdownReapScope,
674
881
  ) -> Vec<ProcessInfo> {
675
882
  let table = process_table();
676
883
  let root_tree = root_pids
677
884
  .iter()
678
885
  .flat_map(|pid| process_tree_from_table(*pid, &table))
886
+ .filter(|pid| !protected.contains_pid(*pid))
679
887
  .collect::<std::collections::BTreeSet<_>>();
680
- let root_pgids = root_pgids.iter().copied().collect::<std::collections::BTreeSet<_>>();
681
- let spawn_cwds = state_spawn_cwds(state);
888
+ let root_pgids = root_pgids
889
+ .iter()
890
+ .copied()
891
+ .collect::<std::collections::BTreeSet<_>>();
892
+ let spawn_cwds = state_spawn_cwds(state, scope);
682
893
  let workspace_text = workspace.to_string_lossy().to_string();
683
- let current_pid = std::process::id();
684
- table
685
- .into_iter()
686
- .filter(|process| process.pid != current_pid)
687
- .filter(|process| {
688
- process_matches_workspace(process, &workspace_text, &spawn_cwds)
689
- || root_tree.contains(&process.pid)
690
- || process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
691
- })
692
- .collect()
894
+ let mut cwd_probe_budget = 3_usize;
895
+ let mut out = Vec::new();
896
+ for process in table {
897
+ if protected.contains_pid(process.pid) {
898
+ continue;
899
+ }
900
+ let matches_workspace = scope == ShutdownReapScope::Workspace
901
+ && process_matches_workspace(
902
+ &process,
903
+ &workspace_text,
904
+ &spawn_cwds,
905
+ &mut cwd_probe_budget,
906
+ );
907
+ if matches_workspace
908
+ || root_tree.contains(&process.pid)
909
+ || process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
910
+ {
911
+ out.push(process);
912
+ }
913
+ }
914
+ out
693
915
  }
694
916
 
695
917
  fn process_tree_from_table(root_pid: u32, table: &[ProcessInfo]) -> Vec<u32> {
@@ -712,12 +934,14 @@ pub mod lifecycle_port {
712
934
  out
713
935
  }
714
936
 
715
- fn state_spawn_cwds(state: &Value) -> Vec<PathBuf> {
937
+ fn state_spawn_cwds(state: &Value, scope: ShutdownReapScope) -> Vec<PathBuf> {
716
938
  let mut out = Vec::new();
717
939
  collect_spawn_cwds(state, &mut out);
718
- if let Some(teams) = state.get("teams").and_then(Value::as_object) {
719
- for team in teams.values() {
720
- collect_spawn_cwds(team, &mut out);
940
+ if scope == ShutdownReapScope::Workspace {
941
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
942
+ for team in teams.values() {
943
+ collect_spawn_cwds(team, &mut out);
944
+ }
721
945
  }
722
946
  }
723
947
  out
@@ -728,7 +952,11 @@ pub mod lifecycle_port {
728
952
  return;
729
953
  };
730
954
  for agent in agents.values() {
731
- if let Some(spawn_cwd) = agent.get("spawn_cwd").and_then(Value::as_str).filter(|cwd| !cwd.is_empty()) {
955
+ if let Some(spawn_cwd) = agent
956
+ .get("spawn_cwd")
957
+ .and_then(Value::as_str)
958
+ .filter(|cwd| !cwd.is_empty())
959
+ {
732
960
  out.push(PathBuf::from(spawn_cwd));
733
961
  }
734
962
  }
@@ -738,6 +966,7 @@ pub mod lifecycle_port {
738
966
  process: &ProcessInfo,
739
967
  workspace_text: &str,
740
968
  spawn_cwds: &[PathBuf],
969
+ cwd_probe_budget: &mut usize,
741
970
  ) -> bool {
742
971
  let command = process.command.as_str();
743
972
  if command.contains("mcp-server")
@@ -746,22 +975,19 @@ pub mod lifecycle_port {
746
975
  {
747
976
  return true;
748
977
  }
749
- let lower = command.to_ascii_lowercase();
750
- let provider_like = lower.contains("codex")
751
- || lower.contains("claude")
752
- || lower.contains("node")
753
- || lower.contains("mcp-server")
754
- || lower.contains("team-agent");
755
- if !provider_like {
756
- return false;
757
- }
758
978
  if command.contains(workspace_text) {
759
979
  return true;
760
980
  }
981
+ if spawn_cwds.is_empty() || *cwd_probe_budget == 0 {
982
+ return false;
983
+ }
984
+ *cwd_probe_budget -= 1;
761
985
  let Some(cwd) = process_cwd(process.pid) else {
762
986
  return false;
763
987
  };
764
- spawn_cwds.iter().any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
988
+ spawn_cwds
989
+ .iter()
990
+ .any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
765
991
  }
766
992
 
767
993
  fn process_cwd(pid: u32) -> Option<PathBuf> {
@@ -769,10 +995,22 @@ pub mod lifecycle_port {
769
995
  if let Ok(path) = std::fs::read_link(proc_cwd) {
770
996
  return Some(path);
771
997
  }
772
- let output = std::process::Command::new("lsof")
773
- .args(["-a", "-p", &pid.to_string(), "-d", "cwd", "-Fn"])
774
- .output()
775
- .ok()?;
998
+ if crate::os_probe::probe_timed_out() {
999
+ return None;
1000
+ }
1001
+ let output = crate::os_probe::bounded_command_output_with_probe(
1002
+ std::process::Command::new("lsof").args([
1003
+ "-a",
1004
+ "-p",
1005
+ &pid.to_string(),
1006
+ "-d",
1007
+ "cwd",
1008
+ "-Fn",
1009
+ ]),
1010
+ "lsof_cwd",
1011
+ Some(pid),
1012
+ )
1013
+ .ok()?;
776
1014
  if !output.status.success() {
777
1015
  return None;
778
1016
  }
@@ -787,8 +1025,18 @@ pub mod lifecycle_port {
787
1025
  path == root || path.starts_with(root)
788
1026
  }
789
1027
  /// `runtime.restart`(`cmd_restart`)。
790
- pub fn restart(workspace: &Path, allow_fresh: bool, team: Option<&str>) -> Result<Value, CliError> {
791
- match crate::lifecycle::restart(workspace, allow_fresh, team) {
1028
+ pub fn restart(
1029
+ workspace: &Path,
1030
+ allow_fresh: bool,
1031
+ team: Option<&str>,
1032
+ session_converge_deadline_ms: Option<u64>,
1033
+ ) -> Result<Value, CliError> {
1034
+ match crate::lifecycle::restart_with_session_convergence_deadline(
1035
+ workspace,
1036
+ allow_fresh,
1037
+ team,
1038
+ session_converge_deadline_ms,
1039
+ ) {
792
1040
  Ok(report) => Ok(restart_value(report)),
793
1041
  Err(e) => Ok(error_value(e)),
794
1042
  }
@@ -811,12 +1059,18 @@ pub mod lifecycle_port {
811
1059
  allow_fresh,
812
1060
  team,
813
1061
  ) {
814
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1062
+ Ok(report) => {
1063
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1064
+ }
815
1065
  Err(e) => Ok(error_value(e)),
816
1066
  }
817
1067
  }
818
1068
  /// `runtime.stop_agent`(`cmd_stop_agent`)。
819
- pub fn stop_agent(workspace: &Path, agent: &str, team: Option<&str>) -> Result<Value, CliError> {
1069
+ pub fn stop_agent(
1070
+ workspace: &Path,
1071
+ agent: &str,
1072
+ team: Option<&str>,
1073
+ ) -> Result<Value, CliError> {
820
1074
  let agent_id = crate::model::ids::AgentId::new(agent);
821
1075
  match crate::lifecycle::stop_agent(workspace, &agent_id, team) {
822
1076
  Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "stopped": report.stopped})),
@@ -839,7 +1093,9 @@ pub mod lifecycle_port {
839
1093
  open_display,
840
1094
  team,
841
1095
  ) {
842
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1096
+ Ok(report) => {
1097
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1098
+ }
843
1099
  Err(e) => Ok(error_value(e)),
844
1100
  }
845
1101
  }
@@ -898,11 +1154,15 @@ pub mod lifecycle_port {
898
1154
  team: Option<&str>,
899
1155
  ) -> Result<Value, CliError> {
900
1156
  if !confirm {
901
- return Ok(json!({"ok": false, "agent_id": agent, "error": "remove-agent requires --confirm"}));
1157
+ return Ok(
1158
+ json!({"ok": false, "agent_id": agent, "error": "remove-agent requires --confirm"}),
1159
+ );
902
1160
  }
903
1161
  let agent_id = crate::model::ids::AgentId::new(agent);
904
1162
  match crate::lifecycle::remove_agent(workspace, &agent_id, from_spec, force, team) {
905
- Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")})),
1163
+ Ok(report) => {
1164
+ Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
1165
+ }
906
1166
  Err(e) => Ok(error_value(e)),
907
1167
  }
908
1168
  }
@@ -912,9 +1172,18 @@ pub mod lifecycle_port {
912
1172
  .map_err(|e| CliError::Runtime(e.to_string()))?;
913
1173
  let team = team
914
1174
  .map(ToString::to_string)
915
- .or_else(|| state.get("active_team_key").and_then(Value::as_str).map(ToString::to_string))
1175
+ .or_else(|| {
1176
+ state
1177
+ .get("active_team_key")
1178
+ .and_then(Value::as_str)
1179
+ .map(ToString::to_string)
1180
+ })
916
1181
  .filter(|s| !s.is_empty())
917
- .or_else(|| workspace.file_name().map(|name| name.to_string_lossy().to_string()))
1182
+ .or_else(|| {
1183
+ workspace
1184
+ .file_name()
1185
+ .map(|name| name.to_string_lossy().to_string())
1186
+ })
918
1187
  .unwrap_or_else(|| "current".to_string());
919
1188
  let now = chrono::Utc::now().to_rfc3339();
920
1189
  let ttl_seconds = 1800;
@@ -930,7 +1199,10 @@ pub mod lifecycle_port {
930
1199
  crate::state::persist::save_runtime_state(workspace, &state)
931
1200
  .map_err(|e| CliError::Runtime(e.to_string()))?;
932
1201
  crate::event_log::EventLog::new(workspace)
933
- .write("coordinator.idle_acknowledged", json!({"team": team, "ttl_seconds": ttl_seconds}))
1202
+ .write(
1203
+ "coordinator.idle_acknowledged",
1204
+ json!({"team": team, "ttl_seconds": ttl_seconds}),
1205
+ )
934
1206
  .map_err(|e| CliError::Runtime(e.to_string()))?;
935
1207
  Ok(json!({
936
1208
  "ok": true,
@@ -1064,12 +1336,23 @@ pub mod lifecycle_port {
1064
1336
  session_name,
1065
1337
  launch,
1066
1338
  next_actions,
1339
+ attach_commands,
1340
+ display_backend,
1067
1341
  worker_readiness,
1068
1342
  } => {
1069
1343
  // BUG-7: never emit bare "ready" while worker tool-load is unverified.
1070
1344
  // The summary string + a structured `worker_readiness` block tell the
1071
1345
  // caller exactly which agents are unhealthy (Degraded) or that the
1072
1346
  // tool-set load has not been confirmed yet (PendingToolLoad).
1347
+ let incomplete_session_capture_agents =
1348
+ launch.session_capture_incomplete_agents.clone();
1349
+ let all_spawned = !launch.started.is_empty();
1350
+ let leader_receiver_attached = launch.leader_receiver_attached;
1351
+ let all_resumable_have_session = incomplete_session_capture_agents.is_empty();
1352
+ let all_workers_spawned = all_spawned;
1353
+ let attached_receiver = leader_receiver_attached;
1354
+ let all_attached_receiver = leader_receiver_attached;
1355
+ let all_resumable_agents_have_sessions = all_resumable_have_session;
1073
1356
  let (summary, ok, readiness_json) = match &worker_readiness {
1074
1357
  crate::lifecycle::QuickStartReadiness::Degraded { unhealthy_agents } => (
1075
1358
  format!(
@@ -1079,28 +1362,111 @@ pub mod lifecycle_port {
1079
1362
  ),
1080
1363
  false,
1081
1364
  json!({
1365
+ "all_spawned": all_spawned,
1366
+ "all_workers_spawned": all_workers_spawned,
1367
+ "all_attached_receiver": all_attached_receiver,
1368
+ "attached_receiver": attached_receiver,
1369
+ "leader_receiver_attached": leader_receiver_attached,
1370
+ "all_resumable_have_session": all_resumable_have_session,
1371
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1372
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1082
1373
  "state": "degraded",
1374
+ "session_capture_complete": all_resumable_have_session,
1375
+ "session_capture_incomplete": !all_resumable_have_session,
1376
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1377
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1083
1378
  "unhealthy_agents": unhealthy_agents,
1084
1379
  }),
1085
1380
  ),
1086
- crate::lifecycle::QuickStartReadiness::PendingToolLoad => (
1087
- format!(
1088
- "quick-start launched (worker tool load unverified): {}",
1089
- session_name.as_str()
1090
- ),
1091
- true,
1092
- json!({
1093
- "state": "pending_tool_load",
1094
- "reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
1095
- }),
1096
- ),
1381
+ crate::lifecycle::QuickStartReadiness::PendingToolLoad => {
1382
+ if !all_resumable_have_session {
1383
+ (
1384
+ format!(
1385
+ "quick-start pending: {}; provider session capture incomplete",
1386
+ session_name.as_str()
1387
+ ),
1388
+ false,
1389
+ json!({
1390
+ "all_spawned": all_spawned,
1391
+ "all_workers_spawned": all_workers_spawned,
1392
+ "all_attached_receiver": all_attached_receiver,
1393
+ "attached_receiver": attached_receiver,
1394
+ "leader_receiver_attached": leader_receiver_attached,
1395
+ "all_resumable_have_session": all_resumable_have_session,
1396
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1397
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1398
+ "state": "session_capture_incomplete",
1399
+ "session_capture_complete": all_resumable_have_session,
1400
+ "session_capture_incomplete": !all_resumable_have_session,
1401
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1402
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1403
+ "reason": "provider session capture is incomplete; restart is not yet resume-safe",
1404
+ }),
1405
+ )
1406
+ } else if launch.leader_receiver_attached {
1407
+ (
1408
+ format!(
1409
+ "quick-start launched (worker tool load unverified): {}",
1410
+ session_name.as_str()
1411
+ ),
1412
+ all_spawned && all_attached_receiver && all_resumable_have_session,
1413
+ json!({
1414
+ "all_spawned": all_spawned,
1415
+ "all_workers_spawned": all_workers_spawned,
1416
+ "all_attached_receiver": all_attached_receiver,
1417
+ "attached_receiver": attached_receiver,
1418
+ "leader_receiver_attached": leader_receiver_attached,
1419
+ "all_resumable_have_session": all_resumable_have_session,
1420
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1421
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1422
+ "state": "pending_tool_load",
1423
+ "session_capture_complete": all_resumable_have_session,
1424
+ "session_capture_incomplete": !all_resumable_have_session,
1425
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1426
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1427
+ "reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
1428
+ }),
1429
+ )
1430
+ } else {
1431
+ (
1432
+ format!(
1433
+ "quick-start degraded: {}; leader receiver unbound",
1434
+ session_name.as_str()
1435
+ ),
1436
+ false,
1437
+ json!({
1438
+ "all_spawned": all_spawned,
1439
+ "all_workers_spawned": all_workers_spawned,
1440
+ "all_attached_receiver": all_attached_receiver,
1441
+ "attached_receiver": attached_receiver,
1442
+ "leader_receiver_attached": leader_receiver_attached,
1443
+ "all_resumable_have_session": all_resumable_have_session,
1444
+ "all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
1445
+ "ready": all_spawned && all_attached_receiver && all_resumable_have_session,
1446
+ "state": "leader_receiver_unbound",
1447
+ "session_capture_complete": all_resumable_have_session,
1448
+ "session_capture_incomplete": !all_resumable_have_session,
1449
+ "incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
1450
+ "pending_session_agent_ids": incomplete_session_capture_agents,
1451
+ "reason": "launched team has no attached leader receiver",
1452
+ "next_action": "claim-leader",
1453
+ }),
1454
+ )
1455
+ }
1456
+ }
1097
1457
  };
1098
1458
  json!({
1099
1459
  "ok": ok,
1100
1460
  "summary": summary,
1461
+ "status": readiness_json.get("state").cloned().unwrap_or(Value::Null),
1462
+ "reason": readiness_json.get("reason").cloned().unwrap_or(Value::Null),
1463
+ "ready": readiness_json.get("ready").cloned().unwrap_or(Value::Bool(false)),
1101
1464
  "session_name": session_name.as_str(),
1102
1465
  "dry_run": launch.dry_run,
1466
+ "display_backend": display_backend,
1103
1467
  "next_actions": next_actions,
1468
+ "attach_commands": attach_commands,
1469
+ "readiness": readiness_json.clone(),
1104
1470
  "worker_readiness": readiness_json,
1105
1471
  })
1106
1472
  }
@@ -1136,12 +1502,16 @@ pub mod lifecycle_port {
1136
1502
  session_name,
1137
1503
  agents,
1138
1504
  coordinator_started,
1505
+ next_actions,
1506
+ attach_commands,
1139
1507
  } => json!({
1140
1508
  "ok": true,
1141
1509
  "status": "restarted",
1142
1510
  "session_name": session_name.as_str(),
1143
1511
  "agents": agents.iter().map(|a| a.agent_id.as_str()).collect::<Vec<_>>(),
1144
1512
  "coordinator_started": coordinator_started,
1513
+ "next_actions": next_actions,
1514
+ "attach_commands": attach_commands,
1145
1515
  }),
1146
1516
  crate::lifecycle::RestartReport::RefusedResumeAtomicity {
1147
1517
  unresumable,
@@ -1154,6 +1524,30 @@ pub mod lifecycle_port {
1154
1524
  "error": error,
1155
1525
  "unresumable": unresumable.iter().map(|w| w.agent_id.as_str()).collect::<Vec<_>>(),
1156
1526
  }),
1527
+ crate::lifecycle::RestartReport::RefusedResumeNotReady {
1528
+ missing,
1529
+ allow_fresh,
1530
+ deadline,
1531
+ elapsed,
1532
+ error,
1533
+ } => json!({
1534
+ "ok": false,
1535
+ "kind": "resume_not_ready",
1536
+ "reason": "session_capture_incomplete",
1537
+ "status": "resume_not_ready",
1538
+ "allow_fresh": allow_fresh,
1539
+ "error": error,
1540
+ "pending_agents": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1541
+ "missing": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1542
+ "session_convergence": {
1543
+ "complete": false,
1544
+ "deadline_s": deadline.as_secs_f64(),
1545
+ "deadline_ms": deadline.as_millis(),
1546
+ "elapsed_ms": elapsed.as_millis(),
1547
+ "pending_agent_ids": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
1548
+ },
1549
+ "next_action": "rerun restart after session capture completes, or pass --allow-fresh to deliberately discard missing context",
1550
+ }),
1157
1551
  crate::lifecycle::RestartReport::RefusedInvalidFirstSendAt {
1158
1552
  invalid,
1159
1553
  allow_fresh,
@@ -1196,6 +1590,75 @@ pub mod lifecycle_port {
1196
1590
  }
1197
1591
  }
1198
1592
  }
1593
+
1594
+ fn mark_matching_session_teams_stopped(
1595
+ state: &mut Value,
1596
+ session_name: Option<&crate::transport::SessionName>,
1597
+ ) -> Vec<String> {
1598
+ let Some(session_name) = session_name.map(crate::transport::SessionName::as_str) else {
1599
+ return Vec::new();
1600
+ };
1601
+ let Some(teams) = state.get_mut("teams").and_then(Value::as_object_mut) else {
1602
+ return Vec::new();
1603
+ };
1604
+ let mut out = Vec::new();
1605
+ for (key, team) in teams.iter_mut() {
1606
+ let matches = team
1607
+ .get("session_name")
1608
+ .and_then(Value::as_str)
1609
+ .is_some_and(|session| session == session_name);
1610
+ if matches {
1611
+ mark_agents_stopped(team);
1612
+ out.push(key.clone());
1613
+ }
1614
+ }
1615
+ out
1616
+ }
1617
+
1618
+ fn promote_live_sibling_after_scoped_shutdown(
1619
+ workspace: &Path,
1620
+ stopped_state: &Value,
1621
+ ) -> Result<(), CliError> {
1622
+ let stopped_key = stopped_state
1623
+ .get("active_team_key")
1624
+ .and_then(Value::as_str)
1625
+ .filter(|key| !key.is_empty());
1626
+ let Some(stopped_key) = stopped_key else {
1627
+ return Ok(());
1628
+ };
1629
+ let raw = crate::state::persist::load_runtime_state(workspace)?;
1630
+ let active = raw
1631
+ .get("active_team_key")
1632
+ .and_then(Value::as_str)
1633
+ .unwrap_or("");
1634
+ if active != stopped_key {
1635
+ return Ok(());
1636
+ }
1637
+ let Some((next_key, _)) = raw
1638
+ .get("teams")
1639
+ .and_then(Value::as_object)
1640
+ .and_then(|teams| {
1641
+ teams
1642
+ .iter()
1643
+ .find(|(key, team)| key.as_str() != stopped_key && team_has_running_agent(team))
1644
+ })
1645
+ else {
1646
+ return Ok(());
1647
+ };
1648
+ let promoted = crate::state::projection::project_top_level_view(&raw, next_key);
1649
+ crate::state::persist::save_runtime_state(workspace, &promoted)?;
1650
+ Ok(())
1651
+ }
1652
+
1653
+ fn team_has_running_agent(team: &Value) -> bool {
1654
+ team.get("agents")
1655
+ .and_then(Value::as_object)
1656
+ .is_some_and(|agents| {
1657
+ agents
1658
+ .values()
1659
+ .any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
1660
+ })
1661
+ }
1199
1662
  }
1200
1663
 
1201
1664
  /// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
@@ -1207,9 +1670,19 @@ pub mod diagnose_port {
1207
1670
 
1208
1671
  /// `runtime.doctor(spec)` + schema 注入(`cmd_doctor` 默认分支)。
1209
1672
  pub fn doctor(workspace: &Path, spec: Option<&Path>) -> Result<Value, CliError> {
1210
- let _ = spec;
1211
1673
  let tmux_path = which_path("tmux");
1212
1674
  let tmux_installed = tmux_path.is_some();
1675
+ let workspace_valid = workspace.is_dir();
1676
+ let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
1677
+ let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
1678
+ let profile_smoke = doctor_team_dir(workspace, spec)
1679
+ .map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(&team))
1680
+ .transpose()?;
1681
+ let profile_smoke_ok = profile_smoke
1682
+ .as_ref()
1683
+ .and_then(|check| check.get("ok").and_then(Value::as_bool))
1684
+ .unwrap_or(true);
1685
+ let ok = workspace_valid && (team_context || workspace_has_entries) && profile_smoke_ok;
1213
1686
  let health = crate::coordinator::coordinator_health(
1214
1687
  &crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
1215
1688
  );
@@ -1226,11 +1699,81 @@ pub mod diagnose_port {
1226
1699
  "local_module": true,
1227
1700
  },
1228
1701
  "secret_scan": secret_scan(workspace),
1702
+ "profile_smoke": profile_smoke.unwrap_or_else(|| json!({
1703
+ "name": "profile_smoke",
1704
+ "ok": true,
1705
+ "status": "not_required",
1706
+ "checks": [],
1707
+ "secret_values_printed": false,
1708
+ })),
1229
1709
  "coordinator": coordinator_health_value(health),
1230
- "ok": true,
1710
+ "ok": ok,
1711
+ "error": if ok {
1712
+ Value::Null
1713
+ } else if !profile_smoke_ok {
1714
+ json!("profile_smoke_failed")
1715
+ } else if workspace_valid {
1716
+ json!("workspace has no Team Agent spec or runtime context")
1717
+ } else {
1718
+ json!("invalid workspace")
1719
+ },
1231
1720
  }))
1232
1721
  }
1233
1722
 
1723
+ fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
1724
+ if let Some(spec) = spec {
1725
+ let candidate = if spec.is_absolute() {
1726
+ spec.to_path_buf()
1727
+ } else {
1728
+ workspace.join(spec)
1729
+ };
1730
+ if candidate.is_file() {
1731
+ return candidate.parent().map(Path::to_path_buf);
1732
+ }
1733
+ if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
1734
+ return Some(candidate);
1735
+ }
1736
+ }
1737
+ if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
1738
+ return Some(workspace.to_path_buf());
1739
+ }
1740
+ let current = workspace.join(".team").join("current");
1741
+ if current.join("team.spec.yaml").is_file() || current.join("TEAM.md").is_file() {
1742
+ return Some(current);
1743
+ }
1744
+ None
1745
+ }
1746
+
1747
+ fn has_doctor_team_context(workspace: &Path, spec: Option<&Path>) -> bool {
1748
+ if spec.is_some_and(|path| {
1749
+ let candidate = if path.is_absolute() {
1750
+ path.to_path_buf()
1751
+ } else {
1752
+ workspace.join(path)
1753
+ };
1754
+ candidate.is_file()
1755
+ }) {
1756
+ return true;
1757
+ }
1758
+ [
1759
+ workspace.join("TEAM.md"),
1760
+ workspace.join("team.spec.yaml"),
1761
+ workspace.join(".team/current/TEAM.md"),
1762
+ workspace.join(".team/current/team.spec.yaml"),
1763
+ workspace.join(".team/runtime/state.json"),
1764
+ workspace.join(".team/runtime/team.db"),
1765
+ ]
1766
+ .into_iter()
1767
+ .any(|path| path.exists())
1768
+ }
1769
+
1770
+ fn workspace_has_any_entry(workspace: &Path) -> bool {
1771
+ std::fs::read_dir(workspace)
1772
+ .ok()
1773
+ .and_then(|mut entries| entries.next())
1774
+ .is_some()
1775
+ }
1776
+
1234
1777
  fn secret_scan(workspace: &Path) -> Value {
1235
1778
  let mut findings = Vec::new();
1236
1779
  let mut scanned = 0usize;
@@ -1245,7 +1788,13 @@ pub mod diagnose_port {
1245
1788
  const SECRET_SCAN_MAX_ENTRIES: usize = 512;
1246
1789
  const SECRET_SCAN_MAX_FILE_BYTES: u64 = 128 * 1024;
1247
1790
 
1248
- fn scan_secret_dir(root: &Path, dir: &Path, depth: usize, scanned: &mut usize, findings: &mut Vec<Value>) {
1791
+ fn scan_secret_dir(
1792
+ root: &Path,
1793
+ dir: &Path,
1794
+ depth: usize,
1795
+ scanned: &mut usize,
1796
+ findings: &mut Vec<Value>,
1797
+ ) {
1249
1798
  if depth > SECRET_SCAN_MAX_DEPTH || *scanned >= SECRET_SCAN_MAX_ENTRIES {
1250
1799
  return;
1251
1800
  }
@@ -1305,143 +1854,37 @@ pub mod diagnose_port {
1305
1854
  }
1306
1855
  }
1307
1856
  /// `run_comms_selftest`(`--comms`/`--gate comms`)。**纯 state-read,零 token**(MUST-NOT-13)。
1308
- pub fn comms_selftest(workspace: &Path, team: Option<&str>, gate: Option<&str>) -> Result<Value, CliError> {
1309
- let _ = (team, gate);
1310
- let state = read_runtime_state(workspace);
1311
- let receiver = state
1312
- .get("leader_receiver")
1313
- .and_then(Value::as_object);
1314
- let owner_pane_id = state
1315
- .get("owner")
1316
- .or_else(|| state.get("team_owner"))
1317
- .and_then(|v| v.get("pane_id"))
1318
- .cloned()
1319
- .unwrap_or(Value::Null);
1320
- let caller_pane_id = std::env::var("TMUX_PANE").ok().map(Value::String).unwrap_or(Value::Null);
1321
- let pane_id = receiver
1322
- .and_then(|r| r.get("pane_id"))
1323
- .cloned()
1324
- .unwrap_or(Value::Null);
1325
- let mismatches = receiver_binding_mismatches(&owner_pane_id, &caller_pane_id, &pane_id);
1326
- let receiver_binding = json!({
1327
- "status": if mismatches.is_empty() { "pass" } else { "fail" },
1328
- "verifies": "binding_consistency",
1329
- "proof": "state_read",
1330
- "state_read_observed": true,
1331
- "pane_id": pane_id,
1332
- "owner_pane_id": owner_pane_id,
1333
- "caller_pane_id": caller_pane_id,
1334
- "mismatches": mismatches,
1335
- "configured": receiver.is_some(),
1336
- });
1337
- Ok(json!({
1338
- "ok": true,
1339
- "status": "pass",
1340
- "run_id": run_id(),
1341
- "scope": "binding_consistency",
1342
- "boundary": COMMS_BOUNDARY_TEXT,
1343
- "checks": {
1344
- "receiver_binding": receiver_binding,
1345
- "contract_suite": {
1346
- "status": "deferred",
1347
- "deferred_to": "0.2.9",
1348
- "reason": "contract test files not shipped with package",
1349
- "message": "comms contract verification deferred to 0.2.9; contract test files not shipped with package",
1350
- },
1351
- "provider_sdk_calls": {
1352
- "status": "pass",
1353
- "verifies": "no_provider_sdk_calls",
1354
- "calls": {
1355
- "anthropic": 0,
1356
- "openai": 0,
1357
- "httpx": 0,
1358
- },
1359
- },
1360
- },
1361
- }))
1362
- }
1363
-
1364
- pub(super) fn receiver_binding_mismatches(
1365
- owner_pane_id: &Value,
1366
- caller_pane_id: &Value,
1367
- pane_id: &Value,
1368
- ) -> Vec<Value> {
1369
- let mut mismatches = Vec::new();
1370
- if pane_mismatch(owner_pane_id, pane_id) {
1371
- mismatches.push(json!("owner_receiver_pane_mismatch"));
1372
- }
1373
- if pane_mismatch(caller_pane_id, owner_pane_id) {
1374
- mismatches.push(json!("caller_owner_pane_mismatch"));
1375
- }
1376
- if pane_mismatch(caller_pane_id, pane_id) {
1377
- mismatches.push(json!("caller_receiver_pane_mismatch"));
1378
- }
1379
- mismatches
1380
- }
1381
-
1382
- fn pane_mismatch(left: &Value, right: &Value) -> bool {
1383
- let Some(left) = left.as_str().filter(|s| !s.is_empty()) else {
1384
- return false;
1385
- };
1386
- let Some(right) = right.as_str().filter(|s| !s.is_empty()) else {
1387
- return false;
1388
- };
1389
- left != right
1857
+ pub fn comms_selftest(
1858
+ workspace: &Path,
1859
+ team: Option<&str>,
1860
+ gate: Option<&str>,
1861
+ ) -> Result<Value, CliError> {
1862
+ crate::diagnose::comms::doctor_comms_json(workspace, team, gate)
1390
1863
  }
1391
1864
 
1392
1865
  /// `orphan_gate(fix, confirm)`(`--gate orphans`)。CI gate。
1393
1866
  pub fn orphan_gate(fix: bool, confirm: bool) -> Result<Value, CliError> {
1394
- if fix && !confirm {
1395
- return Ok(json!({
1396
- "ok": false,
1397
- "gate": "orphans",
1398
- "status": "refused",
1399
- "reason": "fix_requires_confirm",
1400
- "action": "re-run with --gate orphans --fix --confirm",
1401
- }));
1402
- }
1403
- Ok(json!({
1404
- "ok": true,
1405
- "gate": "orphans",
1406
- "status": "passed",
1407
- "scanned": 0,
1408
- "dry_run": !fix,
1409
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1410
- "action_required": false,
1411
- "fix": fix,
1412
- }))
1867
+ crate::diagnose::orphans::orphan_gate_json(fix, confirm)
1413
1868
  }
1414
1869
  /// `cleanup_orphan_coordinators(confirm)`(`--cleanup-orphans`;dry-run unless `--confirm`)。
1415
1870
  pub fn cleanup_orphans(confirm: bool) -> Result<Value, CliError> {
1416
- if confirm {
1417
- return Ok(json!({
1418
- "ok": true,
1419
- "scanned": 0,
1420
- "orphans": [],
1421
- "dry_run": false,
1422
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1423
- "killed": [],
1424
- "failed": [],
1425
- }));
1426
- }
1427
- Ok(json!({
1428
- "ok": true,
1429
- "scanned": 0,
1430
- "orphans": [],
1431
- "dry_run": true,
1432
- "scanned_at": chrono::Utc::now().to_rfc3339(),
1433
- "action_required": "re-run with --confirm to send SIGTERM",
1434
- }))
1871
+ crate::diagnose::orphans::cleanup_orphans_json(confirm)
1435
1872
  }
1436
1873
  /// `fix_schema_layout`(`--fix-schema`)/`schema_diagnosis`。
1437
1874
  pub fn fix_schema(workspace: &Path) -> Result<Value, CliError> {
1438
1875
  let db_path = workspace.join(".team").join("runtime").join("team.db");
1439
- let result = crate::db::migration::fix_schema_layout(workspace, crate::db::schema::SCHEMA_VERSION)
1440
- .map_err(|e| CliError::Runtime(e.to_string()))?;
1876
+ let result =
1877
+ crate::db::migration::fix_schema_layout(workspace, crate::db::schema::SCHEMA_VERSION)
1878
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
1441
1879
  match result {
1442
- crate::db::migration::FixResult::Missing(diagnosis) => {
1443
- Ok(fix_schema_value(&db_path, diagnosis, false, Vec::new(), None, None))
1444
- }
1880
+ crate::db::migration::FixResult::Missing(diagnosis) => Ok(fix_schema_value(
1881
+ &db_path,
1882
+ diagnosis,
1883
+ false,
1884
+ Vec::new(),
1885
+ None,
1886
+ None,
1887
+ )),
1445
1888
  crate::db::migration::FixResult::Blocked { reason } => Ok(json!({
1446
1889
  "ok": false,
1447
1890
  "status": "blocked",
@@ -1450,12 +1893,22 @@ pub mod diagnose_port {
1450
1893
  "reason": reason,
1451
1894
  "fixed": false,
1452
1895
  })),
1453
- crate::db::migration::FixResult::Fixed { diagnosis, rebuilds } => {
1896
+ crate::db::migration::FixResult::Fixed {
1897
+ diagnosis,
1898
+ rebuilds,
1899
+ } => {
1454
1900
  let backup = rebuilds
1455
1901
  .first()
1456
1902
  .map(|event| event.backup_path.clone())
1457
1903
  .unwrap_or_else(|| backup_path_preview(&db_path, diagnosis.user_version));
1458
- Ok(fix_schema_value(&db_path, diagnosis, true, rebuild_values(rebuilds), Some(backup), Some("none")))
1904
+ Ok(fix_schema_value(
1905
+ &db_path,
1906
+ diagnosis,
1907
+ true,
1908
+ rebuild_values(rebuilds),
1909
+ Some(backup),
1910
+ Some("none"),
1911
+ ))
1459
1912
  }
1460
1913
  }
1461
1914
  }
@@ -1490,7 +1943,9 @@ pub mod diagnose_port {
1490
1943
  fn backup_path_preview(db_path: &Path, user_version: i64) -> String {
1491
1944
  let stamp = chrono::Utc::now().format("%Y%m%dT%H%M%SZ");
1492
1945
  db_path
1493
- .with_file_name(format!("team.db.pre-migration-{stamp}-from-v{user_version}.bak"))
1946
+ .with_file_name(format!(
1947
+ "team.db.pre-migration-{stamp}-from-v{user_version}.bak"
1948
+ ))
1494
1949
  .to_string_lossy()
1495
1950
  .to_string()
1496
1951
  }
@@ -1555,7 +2010,9 @@ pub mod diagnose_port {
1555
2010
  })
1556
2011
  }
1557
2012
 
1558
- fn coordinator_status_wire(status: crate::coordinator::CoordinatorHealthStatus) -> &'static str {
2013
+ fn coordinator_status_wire(
2014
+ status: crate::coordinator::CoordinatorHealthStatus,
2015
+ ) -> &'static str {
1559
2016
  match status {
1560
2017
  crate::coordinator::CoordinatorHealthStatus::Missing => "missing",
1561
2018
  crate::coordinator::CoordinatorHealthStatus::InvalidPid => "invalid_pid",
@@ -1572,7 +2029,11 @@ pub mod leader_port {
1572
2029
  use super::*;
1573
2030
 
1574
2031
  /// `runtime.takeover(workspace, team, confirm)` 的 CLI `--json` 投影。
1575
- pub fn takeover(workspace: &Path, team: Option<&str>, confirm: bool) -> Result<Value, CliError> {
2032
+ pub fn takeover(
2033
+ workspace: &Path,
2034
+ team: Option<&str>,
2035
+ confirm: bool,
2036
+ ) -> Result<Value, CliError> {
1576
2037
  if !confirm && !positive_caller_pane_env_present() {
1577
2038
  return Ok(json!({
1578
2039
  "ok": false,
@@ -1595,7 +2056,11 @@ pub mod leader_port {
1595
2056
  Ok(lease_value(result))
1596
2057
  }
1597
2058
  /// `runtime.claim_leader(...)` 的 CLI `--json` 投影(`cmd_claim_leader`;含 inbox_hint)。
1598
- pub fn claim_leader(workspace: &Path, team: Option<&str>, confirm: bool) -> Result<Value, CliError> {
2059
+ pub fn claim_leader(
2060
+ workspace: &Path,
2061
+ team: Option<&str>,
2062
+ confirm: bool,
2063
+ ) -> Result<Value, CliError> {
1599
2064
  let state = crate::state::persist::load_runtime_state(workspace)
1600
2065
  .map_err(|e| CliError::Runtime(e.to_string()))?;
1601
2066
  let Some(team_id) = resolve_owner_team_id(&state, team) else {
@@ -1623,13 +2088,23 @@ pub mod leader_port {
1623
2088
  /// `runtime.attach_leader(...)` 的 CLI `--json` 投影。
1624
2089
  pub fn attach_leader(
1625
2090
  workspace: &Path,
2091
+ team: Option<&str>,
1626
2092
  pane: Option<&crate::transport::PaneId>,
1627
2093
  provider: crate::provider::Provider,
2094
+ _confirm: bool,
1628
2095
  ) -> Result<Value, CliError> {
1629
2096
  let result = crate::leader::attach_leader(workspace, pane, provider)
1630
2097
  .map_err(|e| CliError::Runtime(e.to_string()))?;
1631
- let requeued = attach_requeued_exhausted_watchers(workspace, result.bound_pane_id.as_ref())?;
1632
- Ok(attach_lease_value(result, requeued))
2098
+ let requeued =
2099
+ attach_requeued_exhausted_watchers(workspace, result.bound_pane_id.as_ref())?;
2100
+ let mut value = attach_lease_value(result, requeued);
2101
+ if let Some(obj) = value.as_object_mut() {
2102
+ if let Some(team) = team {
2103
+ obj.insert("team".to_string(), json!(team));
2104
+ obj.insert("team_key".to_string(), json!(team));
2105
+ }
2106
+ }
2107
+ Ok(value)
1633
2108
  }
1634
2109
 
1635
2110
  /// `runtime.leader_identity(workspace, team)`(`cmd_identity`)。
@@ -1676,12 +2151,16 @@ pub mod leader_port {
1676
2151
  None
1677
2152
  }
1678
2153
  }
1679
- None => Some(TeamKey::new(crate::state::projection::team_state_key(state))),
2154
+ None => Some(TeamKey::new(crate::state::projection::team_state_key(
2155
+ state,
2156
+ ))),
1680
2157
  }
1681
2158
  }
1682
2159
 
1683
2160
  fn positive_caller_pane_env_present() -> bool {
1684
- std::env::var("TMUX_PANE").ok().is_some_and(|pane| !pane.is_empty())
2161
+ std::env::var("TMUX_PANE")
2162
+ .ok()
2163
+ .is_some_and(|pane| !pane.is_empty())
1685
2164
  || std::env::var("TEAM_AGENT_LEADER_PANE_ID")
1686
2165
  .ok()
1687
2166
  .is_some_and(|pane| !pane.is_empty())
@@ -1719,7 +2198,10 @@ pub mod leader_port {
1719
2198
  fn lease_value(result: crate::leader::LeaseResult) -> Value {
1720
2199
  let mut out = serde_json::Map::new();
1721
2200
  out.insert("ok".to_string(), json!(result.ok));
1722
- out.insert("status".to_string(), json!(lease_status_wire(result.status)));
2201
+ out.insert(
2202
+ "status".to_string(),
2203
+ json!(lease_status_wire(result.status)),
2204
+ );
1723
2205
  if let Some(reason) = result.reason {
1724
2206
  out.insert("reason".to_string(), json!(lease_reason_wire(reason)));
1725
2207
  }
@@ -1733,10 +2215,16 @@ pub mod leader_port {
1733
2215
  out.insert("bound_pane_id".to_string(), json!(pane.as_str()));
1734
2216
  }
1735
2217
  if let Some(receiver) = result.receiver {
1736
- out.insert("leader_receiver".to_string(), serde_json::to_value(receiver).unwrap_or(Value::Null));
2218
+ out.insert(
2219
+ "leader_receiver".to_string(),
2220
+ serde_json::to_value(receiver).unwrap_or(Value::Null),
2221
+ );
1737
2222
  }
1738
2223
  if let Some(owner) = result.owner {
1739
- out.insert("team_owner".to_string(), serde_json::to_value(owner).unwrap_or(Value::Null));
2224
+ out.insert(
2225
+ "team_owner".to_string(),
2226
+ serde_json::to_value(owner).unwrap_or(Value::Null),
2227
+ );
1740
2228
  }
1741
2229
  Value::Object(out)
1742
2230
  }
@@ -1780,7 +2268,10 @@ pub mod leader_port {
1780
2268
  /// STRING list. (Current divergent body — the `requeued` Vec<WatcherNotice> objects — kept until
1781
2269
  /// porter-c ports; pinned RED in cli::tests asserts the golden string list.)
1782
2270
  pub(crate) fn project_requeued_exhausted_watchers(event: &Value) -> Value {
1783
- event.get("watcher_ids").cloned().unwrap_or_else(|| json!([]))
2271
+ event
2272
+ .get("watcher_ids")
2273
+ .cloned()
2274
+ .unwrap_or_else(|| json!([]))
1784
2275
  }
1785
2276
 
1786
2277
  fn lease_status_wire(status: crate::leader::LeaseStatus) -> &'static str {