@team-agent/installer 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/Cargo.lock +1 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/src/cli/adapters.rs +52 -7
  4. package/crates/team-agent/src/cli/diagnose.rs +9 -0
  5. package/crates/team-agent/src/cli/emit.rs +175 -0
  6. package/crates/team-agent/src/cli/mod.rs +455 -63
  7. package/crates/team-agent/src/cli/status_port.rs +62 -0
  8. package/crates/team-agent/src/cli/tests/base.rs +9 -4
  9. package/crates/team-agent/src/cli/tests/missing_subcommands.rs +83 -1
  10. package/crates/team-agent/src/cli/tests/mod.rs +1 -0
  11. package/crates/team-agent/src/cli/tests/run_delegation.rs +10 -2
  12. package/crates/team-agent/src/cli/tests/shutdown_kill_plan.rs +86 -21
  13. package/crates/team-agent/src/cli/tests/verb_install_skill.rs +76 -0
  14. package/crates/team-agent/src/cli/types.rs +3 -2
  15. package/crates/team-agent/src/compiler.rs +73 -50
  16. package/crates/team-agent/src/coordinator/tick.rs +108 -20
  17. package/crates/team-agent/src/db/migration.rs +17 -1
  18. package/crates/team-agent/src/leader/owner_bind.rs +59 -20
  19. package/crates/team-agent/src/lifecycle/launch.rs +378 -56
  20. package/crates/team-agent/src/lifecycle/restart/common.rs +4 -9
  21. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +91 -12
  22. package/crates/team-agent/src/lifecycle/restart/selection.rs +6 -4
  23. package/crates/team-agent/src/lifecycle/tests/core.rs +238 -3
  24. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +257 -7
  25. package/crates/team-agent/src/lifecycle/types.rs +2 -0
  26. package/crates/team-agent/src/mcp_server/normalize.rs +29 -7
  27. package/crates/team-agent/src/mcp_server/tests/golden.rs +7 -5
  28. package/crates/team-agent/src/mcp_server/tests/normalize.rs +5 -2
  29. package/crates/team-agent/src/mcp_server/tools.rs +25 -1
  30. package/crates/team-agent/src/mcp_server/wire.rs +11 -1
  31. package/crates/team-agent/src/model/paths.rs +7 -0
  32. package/crates/team-agent/src/model/spec.rs +23 -1
  33. package/crates/team-agent/src/packaging/install.rs +42 -4
  34. package/crates/team-agent/src/packaging/tests.rs +91 -14
  35. package/crates/team-agent/src/packaging/types.rs +13 -1
  36. package/crates/team-agent/src/provider/adapter.rs +381 -15
  37. package/crates/team-agent/src/state/identity.rs +29 -0
  38. package/crates/team-agent/src/state/selector.rs +48 -14
  39. package/crates/team-agent/src/tmux_backend/tests.rs +44 -0
  40. package/crates/team-agent/src/tmux_backend.rs +104 -9
  41. package/crates/team-agent/src/transport/test_support.rs +57 -4
  42. package/crates/team-agent/src/transport.rs +13 -0
  43. package/npm/install.mjs +31 -35
  44. package/package.json +4 -4
  45. package/skills/team-agent/SKILL.md +82 -5
@@ -168,6 +168,7 @@ pub mod lifecycle_port {
168
168
  let run_ws = crate::model::paths::canonical_run_workspace(workspace)
169
169
  .map_err(|e| CliError::Runtime(e.to_string()))?;
170
170
  let state = shutdown_state_for_team(&run_ws, team)?;
171
+ let state_for_kill = state.clone();
171
172
  let transport = if let Some(endpoint) = legacy_worker_tmux_endpoint(&state) {
172
173
  crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
173
174
  } else {
@@ -176,17 +177,33 @@ pub mod lifecycle_port {
176
177
  let result =
177
178
  shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
178
179
  if team.is_none() {
179
- // B5/F1: the leader terminal (`team-agent claude`) lives on this same
180
- // workspace socket by design (leader/start.rs); a bare shutdown must not
181
- // `kill-server` it away. Spare `team-agent-leader-*` sessions and clear the
182
- // remaining non-leader sessions individually; only an empty-of-leader socket
183
- // gets the whole-server teardown (the original leak-cleanup intent).
180
+ // E12 (P0): the leader terminal lives on this socket by design. A bare shutdown must
181
+ // NOT `kill-server` it away. spare = state-anchor sessions ∪ `team-agent-leader-*`
182
+ // prefix sessions (union; cr E12 ①). kill_server only when the socket is exclusively
183
+ // ours (no spare + no foreign session); shared socket kill our sessions individually
184
+ // (cr E12 ②). All spare derivation comes from ONE snapshot (list_targets + the state
185
+ // already loaded) — no independent ps/tmux re-derivation (N39).
184
186
  let transport_dyn: &dyn crate::transport::Transport = &transport;
187
+ let pane_targets = transport_dyn.list_targets().unwrap_or_default();
185
188
  let sessions = socket_session_names(transport_dyn);
186
- match sessions_to_kill_sparing_leader(&sessions) {
187
- None => transport.kill_server(),
188
- Some(non_leader_sessions) => {
189
- for session in &non_leader_sessions {
189
+ let event_log = crate::event_log::EventLog::new(&run_ws);
190
+ let anchor_sessions =
191
+ anchor_sessions_from_state(&state_for_kill, &pane_targets, &event_log);
192
+ let decision = sessions_to_kill(&sessions, &anchor_sessions);
193
+ match decision {
194
+ KillDecision::KillServerExclusive => transport.kill_server(),
195
+ KillDecision::KillIndividually { to_kill, spared } => {
196
+ if !spared.is_empty() || to_kill.len() != sessions.len() {
197
+ // shared socket / leader spared → never whole-server teardown.
198
+ let _ = event_log.write(
199
+ "shutdown.kill_server_skipped_shared_socket",
200
+ json!({
201
+ "spared_sessions": spared.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
202
+ "killed_sessions": to_kill.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
203
+ }),
204
+ );
205
+ }
206
+ for session in &to_kill {
190
207
  let _ = transport_dyn.kill_session(session);
191
208
  }
192
209
  }
@@ -195,6 +212,29 @@ pub mod lifecycle_port {
195
212
  result
196
213
  }
197
214
 
215
+ /// E12 ①:从 state 锚 pane_id(leader_receiver/team_owner,top+teams)映射到其所在 session
216
+ /// (经同一帧 list_targets pane→session)。state 无任何锚 → 退命名判据 + spare_fallback event。
217
+ fn anchor_sessions_from_state(
218
+ state: &Value,
219
+ pane_targets: &[crate::transport::PaneInfo],
220
+ event_log: &crate::event_log::EventLog,
221
+ ) -> std::collections::BTreeSet<String> {
222
+ let anchor_pane_ids = collect_state_leader_anchor_pane_ids(state);
223
+ if anchor_pane_ids.is_empty() {
224
+ // 无锚(state 损坏/未记)→ 退纯命名前缀判据(下游 sessions_to_kill 仍 spare 前缀)。
225
+ let _ = event_log.write(
226
+ "shutdown.spare_fallback_to_naming",
227
+ json!({"reason": "no leader_receiver/team_owner pane anchor in state"}),
228
+ );
229
+ return std::collections::BTreeSet::new();
230
+ }
231
+ pane_targets
232
+ .iter()
233
+ .filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
234
+ .map(|pane| pane.session.as_str().to_string())
235
+ .collect()
236
+ }
237
+
198
238
  fn socket_session_names(
199
239
  transport: &dyn crate::transport::Transport,
200
240
  ) -> Vec<crate::transport::SessionName> {
@@ -208,26 +248,37 @@ pub mod lifecycle_port {
208
248
  .collect()
209
249
  }
210
250
 
211
- /// B5/F1 pure kill decision for the bare-shutdown socket teardown.
212
- /// `None` => no `team-agent-leader-*` session on the socket → safe to kill the whole
213
- /// server. `Some(rest)` => leader present → kill only the non-leader sessions.
214
- pub(crate) fn sessions_to_kill_sparing_leader(
251
+ /// E12 下沉纯函数:bare-shutdown socket 拆除决策。
252
+ #[derive(Debug, Clone, PartialEq, Eq)]
253
+ pub(crate) enum KillDecision {
254
+ /// socket 独享(无 spare、无外来 session) 可整 server 拆除。
255
+ KillServerExclusive,
256
+ /// 有 spare(leader 锚/前缀)或非独享 → 逐 session kill,绝不 kill-server。
257
+ KillIndividually {
258
+ to_kill: Vec<crate::transport::SessionName>,
259
+ spared: Vec<crate::transport::SessionName>,
260
+ },
261
+ }
262
+
263
+ /// E12 纯决策(单测下沉):spare = `anchor_sessions` ∪ `team-agent-leader-*` 前缀(并集,锚优先)。
264
+ /// 全部 session 都不 spare 且非空 → `KillServerExclusive`(独享 socket 兜底);否则逐 session
265
+ /// kill 非 spare 的(共享 socket / leader 在 → 绝不整 server 拆)。空 session 集 → 逐 kill(no-op)。
266
+ pub(crate) fn sessions_to_kill(
215
267
  sessions: &[crate::transport::SessionName],
216
- ) -> Option<Vec<crate::transport::SessionName>> {
217
- let leader_present = sessions
218
- .iter()
219
- .any(|session| session.as_str().starts_with(crate::leader::LEADER_SESSION_PREFIX));
220
- leader_present.then(|| {
221
- sessions
222
- .iter()
223
- .filter(|session| {
224
- !session
225
- .as_str()
226
- .starts_with(crate::leader::LEADER_SESSION_PREFIX)
227
- })
228
- .cloned()
229
- .collect()
230
- })
268
+ anchor_sessions: &std::collections::BTreeSet<String>,
269
+ ) -> KillDecision {
270
+ let is_spared = |s: &crate::transport::SessionName| {
271
+ s.as_str().starts_with(crate::leader::LEADER_SESSION_PREFIX)
272
+ || anchor_sessions.contains(s.as_str())
273
+ };
274
+ let spared: Vec<_> = sessions.iter().filter(|s| is_spared(s)).cloned().collect();
275
+ let to_kill: Vec<_> = sessions.iter().filter(|s| !is_spared(s)).cloned().collect();
276
+ // 独享 = 非空 + 无 spare(socket 上每个 session 都是要 kill 的我方 session)。
277
+ if spared.is_empty() && !sessions.is_empty() {
278
+ KillDecision::KillServerExclusive
279
+ } else {
280
+ KillDecision::KillIndividually { to_kill, spared }
281
+ }
231
282
  }
232
283
 
233
284
  pub fn shutdown_with_transport(
@@ -278,7 +329,7 @@ pub mod lifecycle_port {
278
329
  let mut probe_degraded = false;
279
330
  let entry_table = shutdown_table_snapshot(&run_workspace, &mut probe_degraded, "entry");
280
331
  let mut protected = shutdown_protection_set(&entry_table);
281
- extend_protection_with_leader_panes(&mut protected, transport, &entry_table);
332
+ extend_protection_with_leader_panes(&mut protected, transport, &state, &entry_table);
282
333
  let protected = protected;
283
334
  let reap_scope = if team.is_some() {
284
335
  ShutdownReapScope::ScopedTeam
@@ -474,17 +525,42 @@ pub mod lifecycle_port {
474
525
  }))
475
526
  }
476
527
 
528
+ /// T5 (harvest §1 / A2): the bounded stop RETAINS the JoinHandle and reclaims the
529
+ /// worker thread — on a timely result it joins immediately; on timeout it gives the
530
+ /// thread one short grace join window instead of dropping it detached (repeated
531
+ /// shutdowns no longer accumulate leaked threads racing the same workspace).
477
532
  fn stop_coordinator_bounded(
478
533
  workspace: crate::coordinator::WorkspacePath,
479
534
  timeout: std::time::Duration,
480
535
  ) -> Option<Result<crate::coordinator::types::StopReport, String>> {
481
536
  let (tx, rx) = std::sync::mpsc::channel();
482
- std::thread::spawn(move || {
537
+ let handle = std::thread::spawn(move || {
483
538
  let result =
484
539
  crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
485
540
  let _ = tx.send(result);
486
541
  });
487
- rx.recv_timeout(timeout).ok()
542
+ let outcome = rx.recv_timeout(timeout).ok();
543
+ if outcome.is_some() {
544
+ // The worker already sent its result; the join is immediate.
545
+ let _ = handle.join();
546
+ return outcome;
547
+ }
548
+ // Timeout: grant a short grace window for the worker to wind down, then join if
549
+ // it finished; a still-stuck stop is reported as timeout either way (the grace
550
+ // join keeps the common slightly-late case from leaking a detached thread).
551
+ match rx.recv_timeout(std::time::Duration::from_millis(250)) {
552
+ Ok(late) => {
553
+ let _ = handle.join();
554
+ let _ = late; // result arrived after the deadline: still a timeout to the caller
555
+ None
556
+ }
557
+ Err(_) => {
558
+ if handle.is_finished() {
559
+ let _ = handle.join();
560
+ }
561
+ None
562
+ }
563
+ }
488
564
  }
489
565
 
490
566
  struct ShutdownDeadline {
@@ -707,7 +783,7 @@ pub mod lifecycle_port {
707
783
  for _ in 0..5 {
708
784
  let round_table = shutdown_table_snapshot(workspace, probe_degraded, "residual_round");
709
785
  let mut protected = shutdown_protection_set(&round_table);
710
- extend_protection_with_leader_panes(&mut protected, transport, &round_table);
786
+ extend_protection_with_leader_panes(&mut protected, transport, state, &round_table);
711
787
  let residuals = matched_processes(
712
788
  workspace, state, root_pids, root_pgids, &protected, scope, &round_table,
713
789
  );
@@ -817,6 +893,71 @@ pub mod lifecycle_port {
817
893
  }
818
894
  }
819
895
 
896
+ /// E4 真机 grounded(任何 team 的 shutdown 都不杀任何 team 的 leader 锚 pane):
897
+ /// 扫 state.json 收集所有 leader-anchor pane_id(top-level team_owner /
898
+ /// leader_receiver + teams[<key>].* 嵌套形态)。返非空 BTreeSet 给
899
+ /// `extend_protection_with_leader_panes` 第二来源用。
900
+ ///
901
+ /// 覆盖场景:
902
+ /// - LeaderStartMode::ExecProvider:state.json team_owner.pane_id 指用户原 tmux
903
+ /// pane(非 leader 前缀)→ shutdown 不杀(E4 真机复发修法)
904
+ /// - E4b team-in-team:子 team state 的 team_owner.pane_id 指父 team worker pane;
905
+ /// 父 team state 的 teams.<child>.team_owner.pane_id 同义(若有该字段)
906
+ /// → 任一 team 的 shutdown 都不杀任何 team 的 leader 锚 pane
907
+ pub fn collect_state_leader_anchor_pane_ids(state: &Value) -> std::collections::BTreeSet<String> {
908
+ let mut out = std::collections::BTreeSet::new();
909
+ push_anchor_pane_id(state, &mut out);
910
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
911
+ for (_, team_state) in teams {
912
+ push_anchor_pane_id(team_state, &mut out);
913
+ }
914
+ }
915
+ out
916
+ }
917
+
918
+ /// 单帧扫 team_owner.pane_id + leader_receiver.pane_id → BTreeSet 累加。
919
+ fn push_anchor_pane_id(state: &Value, out: &mut std::collections::BTreeSet<String>) {
920
+ for key in &["team_owner", "leader_receiver"] {
921
+ if let Some(pane_id) = state
922
+ .get(*key)
923
+ .and_then(|v| v.get("pane_id"))
924
+ .and_then(Value::as_str)
925
+ .filter(|s| !s.is_empty())
926
+ {
927
+ out.insert(pane_id.to_string());
928
+ }
929
+ }
930
+ }
931
+
932
+ /// E4 真机 grounded(cross-socket):收 state.json 中所有记录的 tmux_socket
933
+ /// endpoint(top-level + teams[<key>] 嵌套形态;team_owner / leader_receiver
934
+ /// 任一字段)。owner_bind 在 claim 时把 leader pane 所在 socket 记进
935
+ /// leader_receiver.tmux_socket(evidence:/测试rust版本/4 state.json),用作
936
+ /// 跨 socket 查 leader pane → pane_pid 的真相源。
937
+ fn collect_state_recorded_tmux_sockets(state: &Value) -> std::collections::BTreeSet<String> {
938
+ let mut out = std::collections::BTreeSet::new();
939
+ push_recorded_tmux_socket(state, &mut out);
940
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
941
+ for (_, team_state) in teams {
942
+ push_recorded_tmux_socket(team_state, &mut out);
943
+ }
944
+ }
945
+ out
946
+ }
947
+
948
+ fn push_recorded_tmux_socket(state: &Value, out: &mut std::collections::BTreeSet<String>) {
949
+ for key in &["team_owner", "leader_receiver"] {
950
+ if let Some(socket) = state
951
+ .get(*key)
952
+ .and_then(|v| v.get("tmux_socket"))
953
+ .and_then(Value::as_str)
954
+ .filter(|s| !s.is_empty())
955
+ {
956
+ out.insert(socket.to_string());
957
+ }
958
+ }
959
+ }
960
+
820
961
  /// PERF-6 C-①-1/C-②-4 (N39): the protected set derives from the CALLER's snapshot —
821
962
  /// the same table the kill/wait sets derive from.
822
963
  fn shutdown_protection_set(table: &[ProcessInfo]) -> ShutdownProtection {
@@ -844,26 +985,72 @@ pub mod lifecycle_port {
844
985
  protected
845
986
  }
846
987
 
847
- /// B5/F2: the leader terminal's pane process tree joins the protected set (same
848
- /// set, same mechanism as the invoker ancestry) so the workspace residual sweep's
849
- /// cmdline/cwd matching cannot reap the leader including when ANOTHER team's bare
850
- /// shutdown runs, where the leader is never in the invoker's ancestry.
851
- fn extend_protection_with_leader_panes(
988
+ /// B5/F2 + E4 真机 grounded(任何 team shutdown 都不杀任何 team leader 锚 pane):
989
+ /// the leader terminal's pane process tree joins the protected set (same set, same
990
+ /// mechanism as the invoker ancestry) so the workspace residual sweep's cmdline/cwd
991
+ /// matching cannot reap the leader including when ANOTHER team's bare shutdown
992
+ /// runs, where the leader is never in the invoker's ancestry.
993
+ ///
994
+ /// Two leader-pane sources(N39 双来源,真机 grounded):
995
+ /// 1. **Session prefix**: tmux session starts with `team-agent-leader-`(契约 grounded;
996
+ /// 覆盖 LeaderStartMode::NewTmuxSession / AttachExisting).
997
+ /// 2. **State.json anchors**(E4 修法):state.team_owner.pane_id / state.leader_receiver.pane_id
998
+ /// 在 top-level **和** teams[<key>].* 都扫(N39 任何 team 的 leader 锚 pane);
999
+ /// 覆盖 LeaderStartMode::ExecProvider(用户 in_tmux 直接 exec,session 名是用户原
1000
+ /// `main`/`0`/whatever,不带 leader 前缀 — 此前 B5 三犯保护集漏覆盖)+ E4b
1001
+ /// team-in-team(子 team 的 leader 锚 = 父 team worker pane,window 名是 agent id
1002
+ /// 也不带 leader 前缀)。
1003
+ pub(crate) fn extend_protection_with_leader_panes(
852
1004
  protected: &mut ShutdownProtection,
853
1005
  transport: &dyn crate::transport::Transport,
1006
+ state: &Value,
854
1007
  table: &[ProcessInfo],
855
1008
  ) {
856
- let leader_pane_pids: Vec<u32> = transport
857
- .list_targets()
858
- .unwrap_or_default()
859
- .into_iter()
860
- .filter(|pane| {
861
- pane.session
862
- .as_str()
863
- .starts_with(crate::leader::LEADER_SESSION_PREFIX)
864
- })
865
- .filter_map(|pane| pane.pane_pid)
866
- .collect();
1009
+ let mut leader_pane_pids: Vec<u32> = Vec::new();
1010
+ let pane_targets = transport.list_targets().unwrap_or_default();
1011
+ // Source 1: session 前缀过滤(原 B5 实现)— per-workspace socket。
1012
+ leader_pane_pids.extend(
1013
+ pane_targets
1014
+ .iter()
1015
+ .filter(|pane| {
1016
+ pane.session
1017
+ .as_str()
1018
+ .starts_with(crate::leader::LEADER_SESSION_PREFIX)
1019
+ })
1020
+ .filter_map(|pane| pane.pane_pid),
1021
+ );
1022
+ // Source 2: state.json team_owner / leader_receiver 真锚 pane_id(top-level +
1023
+ // teams[*]),per-workspace socket 命中。
1024
+ let anchor_pane_ids: std::collections::BTreeSet<String> =
1025
+ collect_state_leader_anchor_pane_ids(state);
1026
+ leader_pane_pids.extend(
1027
+ pane_targets
1028
+ .iter()
1029
+ .filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
1030
+ .filter_map(|pane| pane.pane_pid),
1031
+ );
1032
+ // Source 3 (E4 真机 grounded · cross-socket):leader 锚 pane 可能在【别的
1033
+ // tmux socket】上 — LeaderStartMode::ExecProvider 真实场景里用户 in_tmux
1034
+ // 起 `team-agent claude`,leader pane 留在用户【默认 socket】,而 shutdown
1035
+ // 的 transport 走 per-workspace `ta-<hash>` socket,list_targets 看不见。
1036
+ // 从 state.json 读 leader_receiver/team_owner.tmux_socket(claim 时
1037
+ // owner_bind 记录,见 evidence /测试rust版本/4 state.json),查那个 socket
1038
+ // 的 list_targets 找 anchor pane_id → pane_pid → 进入 process_tree 保护。
1039
+ // 不在 state 中的 socket 不查(MUST-17 不撒宽 / 不主动枚举全机器 sockets)。
1040
+ for socket_endpoint in collect_state_recorded_tmux_sockets(state) {
1041
+ let cross_backend =
1042
+ crate::tmux_backend::TmuxBackend::for_tmux_endpoint(&socket_endpoint);
1043
+ let cross_panes = <crate::tmux_backend::TmuxBackend as crate::transport::Transport>::list_targets(&cross_backend)
1044
+ .unwrap_or_default();
1045
+ leader_pane_pids.extend(
1046
+ cross_panes
1047
+ .iter()
1048
+ .filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
1049
+ .filter_map(|pane| pane.pane_pid),
1050
+ );
1051
+ }
1052
+ leader_pane_pids.sort_unstable();
1053
+ leader_pane_pids.dedup();
867
1054
  if leader_pane_pids.is_empty() {
868
1055
  return;
869
1056
  }
@@ -1352,7 +1539,41 @@ pub mod lifecycle_port {
1352
1539
  }
1353
1540
 
1354
1541
  fn error_value(error: crate::lifecycle::LifecycleError) -> Value {
1355
- json!({"ok": false, "error": error.to_string()})
1542
+ let message = error.to_string();
1543
+ let mut payload = json!({"ok": false, "error": message});
1544
+ if let Some(next_action) = error_next_action(&message) {
1545
+ payload["next_action"] = json!(next_action);
1546
+ }
1547
+ payload
1548
+ }
1549
+
1550
+ /// E8 (N38): 把"错路常犯"的运行时错误指到正确出路(纯文案,无语义变更)。
1551
+ /// 匹配 [`LifecycleError`] 的人读消息子串(`agent {id} not found` /
1552
+ /// `agent id already exists` / `unknown worker agent id`),给出下一步命令。
1553
+ fn error_next_action(message: &str) -> Option<&'static str> {
1554
+ // start-agent 撞"agent ... not found":start-agent 语义=启动 state 已有 agent;
1555
+ // 想新增角色应走 add-agent。
1556
+ if message.contains("not found") && message.contains("agent") {
1557
+ return Some(
1558
+ "start-agent only starts an agent that already exists in state. \
1559
+ To add a NEW role at runtime use: team-agent add-agent <id> --role-file <path>",
1560
+ );
1561
+ }
1562
+ // add-agent / fork 撞"agent id already exists":id 已占用。
1563
+ if message.contains("agent id already exists") {
1564
+ return Some(
1565
+ "that agent id is already in the team. \
1566
+ Use a different id, or start the existing one with: team-agent start-agent <id>",
1567
+ );
1568
+ }
1569
+ // stop/reset/fork 源撞"unknown worker agent id":拼写/团队选择错。
1570
+ if message.contains("unknown worker agent id") {
1571
+ return Some(
1572
+ "no such worker agent in this team. \
1573
+ Run `team-agent status` to list agent ids (check --team if multiple teams)",
1574
+ );
1575
+ }
1576
+ None
1356
1577
  }
1357
1578
 
1358
1579
  fn record_idle_acknowledged(
@@ -1612,6 +1833,7 @@ pub mod lifecycle_port {
1612
1833
  session_name,
1613
1834
  state_path,
1614
1835
  next_actions,
1836
+ attach_commands,
1615
1837
  } => json!({
1616
1838
  "ok": false,
1617
1839
  "summary": "existing runtime",
@@ -1619,20 +1841,61 @@ pub mod lifecycle_port {
1619
1841
  "session_name": session_name.map(|s| s.as_str().to_string()),
1620
1842
  "state_path": state_path.map(|p| p.to_string_lossy().to_string()),
1621
1843
  "next_actions": next_actions,
1844
+ "attach_commands": attach_commands,
1622
1845
  }),
1623
1846
  crate::lifecycle::QuickStartReport::PreflightBlocked {
1624
1847
  summary,
1625
1848
  blockers,
1626
1849
  next_actions,
1850
+ attach_commands,
1627
1851
  } => json!({
1628
1852
  "ok": false,
1629
1853
  "summary": summary,
1630
1854
  "blockers": blockers,
1631
1855
  "next_actions": next_actions,
1856
+ "attach_commands": attach_commands,
1632
1857
  }),
1633
1858
  }
1634
1859
  }
1635
1860
 
1861
+ #[cfg(test)]
1862
+ mod quick_start_value_tests {
1863
+ use super::*;
1864
+
1865
+ #[test]
1866
+ fn existing_runtime_json_includes_attach_commands() {
1867
+ let value = quick_start_value(crate::lifecycle::QuickStartReport::ExistingRuntime {
1868
+ team: Some("teamA".to_string()),
1869
+ session_name: Some(crate::transport::SessionName::new("team-teamA")),
1870
+ state_path: Some(PathBuf::from("/tmp/state.json")),
1871
+ next_actions: vec!["restart".to_string()],
1872
+ attach_commands: vec![
1873
+ "tmux -S /tmp/tmux-501/ta-test attach -t team-teamA:worker".to_string(),
1874
+ ],
1875
+ });
1876
+ assert_eq!(
1877
+ value.pointer("/attach_commands/0").and_then(Value::as_str),
1878
+ Some("tmux -S /tmp/tmux-501/ta-test attach -t team-teamA:worker"),
1879
+ "B-2: ExistingRuntime JSON must preserve attach_commands instead of only next_actions; value={value}"
1880
+ );
1881
+ }
1882
+
1883
+ #[test]
1884
+ fn preflight_blocked_json_includes_empty_attach_commands() {
1885
+ let value = quick_start_value(crate::lifecycle::QuickStartReport::PreflightBlocked {
1886
+ summary: "blocked".to_string(),
1887
+ blockers: vec!["missing TEAM.md".to_string()],
1888
+ next_actions: vec!["fix preflight blockers".to_string()],
1889
+ attach_commands: Vec::new(),
1890
+ });
1891
+ assert_eq!(
1892
+ value.get("attach_commands").and_then(Value::as_array).map(Vec::len),
1893
+ Some(0),
1894
+ "B-2: PreflightBlocked JSON must include attach_commands: [] for schema parity with Ready/Restart; value={value}"
1895
+ );
1896
+ }
1897
+ }
1898
+
1636
1899
  fn restart_value(report: crate::lifecycle::RestartReport) -> Value {
1637
1900
  match report {
1638
1901
  crate::lifecycle::RestartReport::Restarted {
@@ -1796,6 +2059,53 @@ pub mod lifecycle_port {
1796
2059
  .any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
1797
2060
  })
1798
2061
  }
2062
+
2063
+ #[cfg(test)]
2064
+ mod e8_error_guidance_tests {
2065
+ use super::{error_next_action, error_value};
2066
+
2067
+ #[test]
2068
+ fn start_agent_not_found_points_to_add_agent() {
2069
+ // LifecycleError::RequirementUnmet("agent {id} not found") 经 to_string():
2070
+ // "agent start requirement unmet: agent foo not found".
2071
+ let msg = "agent start requirement unmet: agent foo not found";
2072
+ let na = error_next_action(msg).expect("not-found must carry next_action");
2073
+ assert!(na.contains("add-agent"), "must steer to add-agent: {na}");
2074
+ assert!(na.contains("--role-file"), "must show the role-file flag: {na}");
2075
+ }
2076
+
2077
+ #[test]
2078
+ fn add_agent_already_exists_explains_way_out() {
2079
+ let msg = "agent start requirement unmet: agent id already exists: foo";
2080
+ let na = error_next_action(msg).expect("already-exists must carry next_action");
2081
+ assert!(na.contains("start-agent"), "must mention start-agent: {na}");
2082
+ }
2083
+
2084
+ #[test]
2085
+ fn unknown_worker_points_to_status() {
2086
+ let msg = "agent start requirement unmet: unknown worker agent id: ghost";
2087
+ let na = error_next_action(msg).expect("unknown worker must carry next_action");
2088
+ assert!(na.contains("status"), "must steer to status: {na}");
2089
+ }
2090
+
2091
+ #[test]
2092
+ fn unrelated_error_has_no_next_action() {
2093
+ assert_eq!(error_next_action("state persistence failed: disk full"), None);
2094
+ }
2095
+
2096
+ #[test]
2097
+ fn error_value_attaches_next_action_field() {
2098
+ let err = crate::lifecycle::LifecycleError::RequirementUnmet(
2099
+ "agent foo not found".to_string(),
2100
+ );
2101
+ let v = error_value(err);
2102
+ assert_eq!(v["ok"], serde_json::json!(false));
2103
+ assert!(
2104
+ v["next_action"].as_str().unwrap_or("").contains("add-agent"),
2105
+ "error_value must attach the add-agent guidance: {v}"
2106
+ );
2107
+ }
2108
+ }
1799
2109
  }
1800
2110
 
1801
2111
  /// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
@@ -1812,14 +2122,45 @@ pub mod diagnose_port {
1812
2122
  let workspace_valid = workspace.is_dir();
1813
2123
  let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
1814
2124
  let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
1815
- let profile_smoke = doctor_team_dir(workspace, spec)
1816
- .map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(&team))
1817
- .transpose()?;
1818
- let profile_smoke_ok = profile_smoke
2125
+ // SMOKE-1 (locate.md §"Minimal Fix"):default doctor 不再隐式编译
2126
+ // `<workspace>/.team/current`(legacy 残留)作 profile_smoke 目标。
2127
+ // profile_smoke 是 team-scoped 体检,只在以下两种情形跑:
2128
+ // ① 用户显式给了 spec / team dir;
2129
+ // ② workspace 根本身就是 team dir(含 TEAM.md / team.spec.yaml)。
2130
+ // legacy `<workspace>/.team/current` 仅作降级诊断面(legacy_team_invalid),
2131
+ // 不再绑架整个 doctor 假死在 profile_smoke_failed 上。
2132
+ let explicit_team_target = explicit_doctor_team_dir(workspace, spec);
2133
+ let profile_smoke = explicit_team_target
1819
2134
  .as_ref()
1820
- .and_then(|check| check.get("ok").and_then(Value::as_bool))
2135
+ .map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(team))
2136
+ .transpose()?;
2137
+ let legacy_check = if explicit_team_target.is_none() {
2138
+ legacy_current_team_check(workspace)?
2139
+ } else {
2140
+ None
2141
+ };
2142
+ let profile_smoke_value = profile_smoke.unwrap_or_else(|| {
2143
+ legacy_check.clone().unwrap_or_else(|| {
2144
+ json!({
2145
+ "name": "profile_smoke",
2146
+ "ok": true,
2147
+ "status": "not_required",
2148
+ "checks": [],
2149
+ "secret_values_printed": false,
2150
+ })
2151
+ })
2152
+ });
2153
+ let profile_smoke_ok = profile_smoke_value
2154
+ .get("ok")
2155
+ .and_then(Value::as_bool)
1821
2156
  .unwrap_or(true);
1822
- let ok = workspace_valid && (team_context || workspace_has_entries) && profile_smoke_ok;
2157
+ // legacy 降级面(legacy_team_invalid)不下拉整体 ok —— 用户没显式让我们
2158
+ // 体检这个 team,失败是降级诊断信息,不是 install 自检失败。
2159
+ let legacy_only_failure =
2160
+ !profile_smoke_ok && profile_smoke_value.get("status").and_then(Value::as_str)
2161
+ == Some("legacy_team_invalid");
2162
+ let effective_smoke_ok = profile_smoke_ok || legacy_only_failure;
2163
+ let ok = workspace_valid && (team_context || workspace_has_entries) && effective_smoke_ok;
1823
2164
  let health = crate::coordinator::coordinator_health(
1824
2165
  &crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
1825
2166
  );
@@ -1836,18 +2177,12 @@ pub mod diagnose_port {
1836
2177
  "local_module": true,
1837
2178
  },
1838
2179
  "secret_scan": secret_scan(workspace),
1839
- "profile_smoke": profile_smoke.unwrap_or_else(|| json!({
1840
- "name": "profile_smoke",
1841
- "ok": true,
1842
- "status": "not_required",
1843
- "checks": [],
1844
- "secret_values_printed": false,
1845
- })),
2180
+ "profile_smoke": profile_smoke_value,
1846
2181
  "coordinator": coordinator_health_value(health),
1847
2182
  "ok": ok,
1848
2183
  "error": if ok {
1849
2184
  Value::Null
1850
- } else if !profile_smoke_ok {
2185
+ } else if !profile_smoke_ok && !legacy_only_failure {
1851
2186
  json!("profile_smoke_failed")
1852
2187
  } else if workspace_valid {
1853
2188
  json!("workspace has no Team Agent spec or runtime context")
@@ -1857,6 +2192,63 @@ pub mod diagnose_port {
1857
2192
  }))
1858
2193
  }
1859
2194
 
2195
+ /// SMOKE-1: 仅当用户显式提供 spec/team dir,或 workspace 根本身是 team dir
2196
+ /// (含 TEAM.md / team.spec.yaml)时返 team_dir。legacy `<workspace>/.team/
2197
+ /// current` 不算 explicit target(走 legacy_current_team_check 降级面)。
2198
+ fn explicit_doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
2199
+ if let Some(spec) = spec {
2200
+ let candidate = if spec.is_absolute() {
2201
+ spec.to_path_buf()
2202
+ } else {
2203
+ workspace.join(spec)
2204
+ };
2205
+ if candidate.is_file() {
2206
+ return candidate.parent().map(Path::to_path_buf);
2207
+ }
2208
+ if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
2209
+ return Some(candidate);
2210
+ }
2211
+ }
2212
+ if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
2213
+ return Some(workspace.to_path_buf());
2214
+ }
2215
+ None
2216
+ }
2217
+
2218
+ /// SMOKE-1: legacy `<workspace>/.team/current` 残留体检 — 降级诊断,**不**
2219
+ /// 当 install self-check 失败。如果 legacy 团有 spec/TEAM.md,尝试 compile,
2220
+ /// 失败返 `status=legacy_team_invalid` + team_dir + reason + next_action(N38
2221
+ /// 失败可解释性);compile 成功就不打扰用户(返 None,profile_smoke 走
2222
+ /// `not_required`)。无 legacy 团目录 → None。
2223
+ fn legacy_current_team_check(workspace: &Path) -> Result<Option<Value>, CliError> {
2224
+ let team = workspace.join(".team").join("current");
2225
+ let has_spec = team.join("team.spec.yaml").is_file();
2226
+ let has_team_md = team.join("TEAM.md").is_file();
2227
+ if !has_spec && !has_team_md {
2228
+ return Ok(None);
2229
+ }
2230
+ match crate::compiler::compile_team(&team) {
2231
+ Ok(_) => Ok(None),
2232
+ Err(error) => {
2233
+ let team_dir = team.to_string_lossy().to_string();
2234
+ Ok(Some(json!({
2235
+ "name": "profile_smoke",
2236
+ "ok": false,
2237
+ "status": "legacy_team_invalid",
2238
+ "team_dir": team_dir,
2239
+ "reason": error.to_string(),
2240
+ "next_action": format!(
2241
+ "scope doctor to a real team: `team-agent doctor <team-dir>`, \
2242
+ or repair/remove the legacy `{}` directory",
2243
+ team.display()
2244
+ ),
2245
+ "checks": [],
2246
+ "secret_values_printed": false,
2247
+ })))
2248
+ }
2249
+ }
2250
+ }
2251
+
1860
2252
  fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
1861
2253
  if let Some(spec) = spec {
1862
2254
  let candidate = if spec.is_absolute() {