@team-agent/installer 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/adapters.rs +52 -7
- package/crates/team-agent/src/cli/diagnose.rs +9 -0
- package/crates/team-agent/src/cli/emit.rs +175 -0
- package/crates/team-agent/src/cli/mod.rs +455 -63
- package/crates/team-agent/src/cli/status_port.rs +62 -0
- package/crates/team-agent/src/cli/tests/base.rs +9 -4
- package/crates/team-agent/src/cli/tests/missing_subcommands.rs +83 -1
- package/crates/team-agent/src/cli/tests/mod.rs +1 -0
- package/crates/team-agent/src/cli/tests/run_delegation.rs +10 -2
- package/crates/team-agent/src/cli/tests/shutdown_kill_plan.rs +86 -21
- package/crates/team-agent/src/cli/tests/verb_install_skill.rs +76 -0
- package/crates/team-agent/src/cli/types.rs +3 -2
- package/crates/team-agent/src/compiler.rs +73 -50
- package/crates/team-agent/src/coordinator/tick.rs +108 -20
- package/crates/team-agent/src/db/migration.rs +17 -1
- package/crates/team-agent/src/leader/owner_bind.rs +59 -20
- package/crates/team-agent/src/lifecycle/launch.rs +378 -56
- package/crates/team-agent/src/lifecycle/restart/common.rs +4 -9
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +91 -12
- package/crates/team-agent/src/lifecycle/restart/selection.rs +6 -4
- package/crates/team-agent/src/lifecycle/tests/core.rs +238 -3
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +257 -7
- package/crates/team-agent/src/lifecycle/types.rs +2 -0
- package/crates/team-agent/src/mcp_server/normalize.rs +29 -7
- package/crates/team-agent/src/mcp_server/tests/golden.rs +7 -5
- package/crates/team-agent/src/mcp_server/tests/normalize.rs +5 -2
- package/crates/team-agent/src/mcp_server/tools.rs +25 -1
- package/crates/team-agent/src/mcp_server/wire.rs +11 -1
- package/crates/team-agent/src/model/paths.rs +7 -0
- package/crates/team-agent/src/model/spec.rs +23 -1
- package/crates/team-agent/src/packaging/install.rs +42 -4
- package/crates/team-agent/src/packaging/tests.rs +91 -14
- package/crates/team-agent/src/packaging/types.rs +13 -1
- package/crates/team-agent/src/provider/adapter.rs +381 -15
- package/crates/team-agent/src/state/identity.rs +29 -0
- package/crates/team-agent/src/state/selector.rs +48 -14
- package/crates/team-agent/src/tmux_backend/tests.rs +44 -0
- package/crates/team-agent/src/tmux_backend.rs +104 -9
- package/crates/team-agent/src/transport/test_support.rs +57 -4
- package/crates/team-agent/src/transport.rs +13 -0
- package/npm/install.mjs +31 -35
- package/package.json +4 -4
- package/skills/team-agent/SKILL.md +82 -5
|
@@ -168,6 +168,7 @@ pub mod lifecycle_port {
|
|
|
168
168
|
let run_ws = crate::model::paths::canonical_run_workspace(workspace)
|
|
169
169
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
170
170
|
let state = shutdown_state_for_team(&run_ws, team)?;
|
|
171
|
+
let state_for_kill = state.clone();
|
|
171
172
|
let transport = if let Some(endpoint) = legacy_worker_tmux_endpoint(&state) {
|
|
172
173
|
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
|
|
173
174
|
} else {
|
|
@@ -176,17 +177,33 @@ pub mod lifecycle_port {
|
|
|
176
177
|
let result =
|
|
177
178
|
shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
|
|
178
179
|
if team.is_none() {
|
|
179
|
-
//
|
|
180
|
-
//
|
|
181
|
-
//
|
|
182
|
-
//
|
|
183
|
-
//
|
|
180
|
+
// E12 (P0): the leader terminal lives on this socket by design. A bare shutdown must
|
|
181
|
+
// NOT `kill-server` it away. spare = state-anchor sessions ∪ `team-agent-leader-*`
|
|
182
|
+
// prefix sessions (union; cr E12 ①). kill_server only when the socket is exclusively
|
|
183
|
+
// ours (no spare + no foreign session); shared socket → kill our sessions individually
|
|
184
|
+
// (cr E12 ②). All spare derivation comes from ONE snapshot (list_targets + the state
|
|
185
|
+
// already loaded) — no independent ps/tmux re-derivation (N39).
|
|
184
186
|
let transport_dyn: &dyn crate::transport::Transport = &transport;
|
|
187
|
+
let pane_targets = transport_dyn.list_targets().unwrap_or_default();
|
|
185
188
|
let sessions = socket_session_names(transport_dyn);
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
189
|
+
let event_log = crate::event_log::EventLog::new(&run_ws);
|
|
190
|
+
let anchor_sessions =
|
|
191
|
+
anchor_sessions_from_state(&state_for_kill, &pane_targets, &event_log);
|
|
192
|
+
let decision = sessions_to_kill(&sessions, &anchor_sessions);
|
|
193
|
+
match decision {
|
|
194
|
+
KillDecision::KillServerExclusive => transport.kill_server(),
|
|
195
|
+
KillDecision::KillIndividually { to_kill, spared } => {
|
|
196
|
+
if !spared.is_empty() || to_kill.len() != sessions.len() {
|
|
197
|
+
// shared socket / leader spared → never whole-server teardown.
|
|
198
|
+
let _ = event_log.write(
|
|
199
|
+
"shutdown.kill_server_skipped_shared_socket",
|
|
200
|
+
json!({
|
|
201
|
+
"spared_sessions": spared.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
|
|
202
|
+
"killed_sessions": to_kill.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
|
|
203
|
+
}),
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
for session in &to_kill {
|
|
190
207
|
let _ = transport_dyn.kill_session(session);
|
|
191
208
|
}
|
|
192
209
|
}
|
|
@@ -195,6 +212,29 @@ pub mod lifecycle_port {
|
|
|
195
212
|
result
|
|
196
213
|
}
|
|
197
214
|
|
|
215
|
+
/// E12 ①:从 state 锚 pane_id(leader_receiver/team_owner,top+teams)映射到其所在 session
|
|
216
|
+
/// (经同一帧 list_targets pane→session)。state 无任何锚 → 退命名判据 + spare_fallback event。
|
|
217
|
+
fn anchor_sessions_from_state(
|
|
218
|
+
state: &Value,
|
|
219
|
+
pane_targets: &[crate::transport::PaneInfo],
|
|
220
|
+
event_log: &crate::event_log::EventLog,
|
|
221
|
+
) -> std::collections::BTreeSet<String> {
|
|
222
|
+
let anchor_pane_ids = collect_state_leader_anchor_pane_ids(state);
|
|
223
|
+
if anchor_pane_ids.is_empty() {
|
|
224
|
+
// 无锚(state 损坏/未记)→ 退纯命名前缀判据(下游 sessions_to_kill 仍 spare 前缀)。
|
|
225
|
+
let _ = event_log.write(
|
|
226
|
+
"shutdown.spare_fallback_to_naming",
|
|
227
|
+
json!({"reason": "no leader_receiver/team_owner pane anchor in state"}),
|
|
228
|
+
);
|
|
229
|
+
return std::collections::BTreeSet::new();
|
|
230
|
+
}
|
|
231
|
+
pane_targets
|
|
232
|
+
.iter()
|
|
233
|
+
.filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
|
|
234
|
+
.map(|pane| pane.session.as_str().to_string())
|
|
235
|
+
.collect()
|
|
236
|
+
}
|
|
237
|
+
|
|
198
238
|
fn socket_session_names(
|
|
199
239
|
transport: &dyn crate::transport::Transport,
|
|
200
240
|
) -> Vec<crate::transport::SessionName> {
|
|
@@ -208,26 +248,37 @@ pub mod lifecycle_port {
|
|
|
208
248
|
.collect()
|
|
209
249
|
}
|
|
210
250
|
|
|
211
|
-
///
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
251
|
+
/// E12 下沉纯函数:bare-shutdown socket 拆除决策。
|
|
252
|
+
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
253
|
+
pub(crate) enum KillDecision {
|
|
254
|
+
/// socket 独享(无 spare、无外来 session)→ 可整 server 拆除。
|
|
255
|
+
KillServerExclusive,
|
|
256
|
+
/// 有 spare(leader 锚/前缀)或非独享 → 逐 session kill,绝不 kill-server。
|
|
257
|
+
KillIndividually {
|
|
258
|
+
to_kill: Vec<crate::transport::SessionName>,
|
|
259
|
+
spared: Vec<crate::transport::SessionName>,
|
|
260
|
+
},
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/// E12 纯决策(单测下沉):spare = `anchor_sessions` ∪ `team-agent-leader-*` 前缀(并集,锚优先)。
|
|
264
|
+
/// 全部 session 都不 spare 且非空 → `KillServerExclusive`(独享 socket 兜底);否则逐 session
|
|
265
|
+
/// kill 非 spare 的(共享 socket / leader 在 → 绝不整 server 拆)。空 session 集 → 逐 kill(no-op)。
|
|
266
|
+
pub(crate) fn sessions_to_kill(
|
|
215
267
|
sessions: &[crate::transport::SessionName],
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
.
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
})
|
|
268
|
+
anchor_sessions: &std::collections::BTreeSet<String>,
|
|
269
|
+
) -> KillDecision {
|
|
270
|
+
let is_spared = |s: &crate::transport::SessionName| {
|
|
271
|
+
s.as_str().starts_with(crate::leader::LEADER_SESSION_PREFIX)
|
|
272
|
+
|| anchor_sessions.contains(s.as_str())
|
|
273
|
+
};
|
|
274
|
+
let spared: Vec<_> = sessions.iter().filter(|s| is_spared(s)).cloned().collect();
|
|
275
|
+
let to_kill: Vec<_> = sessions.iter().filter(|s| !is_spared(s)).cloned().collect();
|
|
276
|
+
// 独享 = 非空 + 无 spare(socket 上每个 session 都是要 kill 的我方 session)。
|
|
277
|
+
if spared.is_empty() && !sessions.is_empty() {
|
|
278
|
+
KillDecision::KillServerExclusive
|
|
279
|
+
} else {
|
|
280
|
+
KillDecision::KillIndividually { to_kill, spared }
|
|
281
|
+
}
|
|
231
282
|
}
|
|
232
283
|
|
|
233
284
|
pub fn shutdown_with_transport(
|
|
@@ -278,7 +329,7 @@ pub mod lifecycle_port {
|
|
|
278
329
|
let mut probe_degraded = false;
|
|
279
330
|
let entry_table = shutdown_table_snapshot(&run_workspace, &mut probe_degraded, "entry");
|
|
280
331
|
let mut protected = shutdown_protection_set(&entry_table);
|
|
281
|
-
extend_protection_with_leader_panes(&mut protected, transport, &entry_table);
|
|
332
|
+
extend_protection_with_leader_panes(&mut protected, transport, &state, &entry_table);
|
|
282
333
|
let protected = protected;
|
|
283
334
|
let reap_scope = if team.is_some() {
|
|
284
335
|
ShutdownReapScope::ScopedTeam
|
|
@@ -474,17 +525,42 @@ pub mod lifecycle_port {
|
|
|
474
525
|
}))
|
|
475
526
|
}
|
|
476
527
|
|
|
528
|
+
/// T5 (harvest §1 / A2): the bounded stop RETAINS the JoinHandle and reclaims the
|
|
529
|
+
/// worker thread — on a timely result it joins immediately; on timeout it gives the
|
|
530
|
+
/// thread one short grace join window instead of dropping it detached (repeated
|
|
531
|
+
/// shutdowns no longer accumulate leaked threads racing the same workspace).
|
|
477
532
|
fn stop_coordinator_bounded(
|
|
478
533
|
workspace: crate::coordinator::WorkspacePath,
|
|
479
534
|
timeout: std::time::Duration,
|
|
480
535
|
) -> Option<Result<crate::coordinator::types::StopReport, String>> {
|
|
481
536
|
let (tx, rx) = std::sync::mpsc::channel();
|
|
482
|
-
std::thread::spawn(move || {
|
|
537
|
+
let handle = std::thread::spawn(move || {
|
|
483
538
|
let result =
|
|
484
539
|
crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
|
|
485
540
|
let _ = tx.send(result);
|
|
486
541
|
});
|
|
487
|
-
rx.recv_timeout(timeout).ok()
|
|
542
|
+
let outcome = rx.recv_timeout(timeout).ok();
|
|
543
|
+
if outcome.is_some() {
|
|
544
|
+
// The worker already sent its result; the join is immediate.
|
|
545
|
+
let _ = handle.join();
|
|
546
|
+
return outcome;
|
|
547
|
+
}
|
|
548
|
+
// Timeout: grant a short grace window for the worker to wind down, then join if
|
|
549
|
+
// it finished; a still-stuck stop is reported as timeout either way (the grace
|
|
550
|
+
// join keeps the common slightly-late case from leaking a detached thread).
|
|
551
|
+
match rx.recv_timeout(std::time::Duration::from_millis(250)) {
|
|
552
|
+
Ok(late) => {
|
|
553
|
+
let _ = handle.join();
|
|
554
|
+
let _ = late; // result arrived after the deadline: still a timeout to the caller
|
|
555
|
+
None
|
|
556
|
+
}
|
|
557
|
+
Err(_) => {
|
|
558
|
+
if handle.is_finished() {
|
|
559
|
+
let _ = handle.join();
|
|
560
|
+
}
|
|
561
|
+
None
|
|
562
|
+
}
|
|
563
|
+
}
|
|
488
564
|
}
|
|
489
565
|
|
|
490
566
|
struct ShutdownDeadline {
|
|
@@ -707,7 +783,7 @@ pub mod lifecycle_port {
|
|
|
707
783
|
for _ in 0..5 {
|
|
708
784
|
let round_table = shutdown_table_snapshot(workspace, probe_degraded, "residual_round");
|
|
709
785
|
let mut protected = shutdown_protection_set(&round_table);
|
|
710
|
-
extend_protection_with_leader_panes(&mut protected, transport, &round_table);
|
|
786
|
+
extend_protection_with_leader_panes(&mut protected, transport, state, &round_table);
|
|
711
787
|
let residuals = matched_processes(
|
|
712
788
|
workspace, state, root_pids, root_pgids, &protected, scope, &round_table,
|
|
713
789
|
);
|
|
@@ -817,6 +893,71 @@ pub mod lifecycle_port {
|
|
|
817
893
|
}
|
|
818
894
|
}
|
|
819
895
|
|
|
896
|
+
/// E4 真机 grounded(任何 team 的 shutdown 都不杀任何 team 的 leader 锚 pane):
|
|
897
|
+
/// 扫 state.json 收集所有 leader-anchor pane_id(top-level team_owner /
|
|
898
|
+
/// leader_receiver + teams[<key>].* 嵌套形态)。返非空 BTreeSet 给
|
|
899
|
+
/// `extend_protection_with_leader_panes` 第二来源用。
|
|
900
|
+
///
|
|
901
|
+
/// 覆盖场景:
|
|
902
|
+
/// - LeaderStartMode::ExecProvider:state.json team_owner.pane_id 指用户原 tmux
|
|
903
|
+
/// pane(非 leader 前缀)→ shutdown 不杀(E4 真机复发修法)
|
|
904
|
+
/// - E4b team-in-team:子 team state 的 team_owner.pane_id 指父 team worker pane;
|
|
905
|
+
/// 父 team state 的 teams.<child>.team_owner.pane_id 同义(若有该字段)
|
|
906
|
+
/// → 任一 team 的 shutdown 都不杀任何 team 的 leader 锚 pane
|
|
907
|
+
pub fn collect_state_leader_anchor_pane_ids(state: &Value) -> std::collections::BTreeSet<String> {
|
|
908
|
+
let mut out = std::collections::BTreeSet::new();
|
|
909
|
+
push_anchor_pane_id(state, &mut out);
|
|
910
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
911
|
+
for (_, team_state) in teams {
|
|
912
|
+
push_anchor_pane_id(team_state, &mut out);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
out
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
/// 单帧扫 team_owner.pane_id + leader_receiver.pane_id → BTreeSet 累加。
|
|
919
|
+
fn push_anchor_pane_id(state: &Value, out: &mut std::collections::BTreeSet<String>) {
|
|
920
|
+
for key in &["team_owner", "leader_receiver"] {
|
|
921
|
+
if let Some(pane_id) = state
|
|
922
|
+
.get(*key)
|
|
923
|
+
.and_then(|v| v.get("pane_id"))
|
|
924
|
+
.and_then(Value::as_str)
|
|
925
|
+
.filter(|s| !s.is_empty())
|
|
926
|
+
{
|
|
927
|
+
out.insert(pane_id.to_string());
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
/// E4 真机 grounded(cross-socket):收 state.json 中所有记录的 tmux_socket
|
|
933
|
+
/// endpoint(top-level + teams[<key>] 嵌套形态;team_owner / leader_receiver
|
|
934
|
+
/// 任一字段)。owner_bind 在 claim 时把 leader pane 所在 socket 记进
|
|
935
|
+
/// leader_receiver.tmux_socket(evidence:/测试rust版本/4 state.json),用作
|
|
936
|
+
/// 跨 socket 查 leader pane → pane_pid 的真相源。
|
|
937
|
+
fn collect_state_recorded_tmux_sockets(state: &Value) -> std::collections::BTreeSet<String> {
|
|
938
|
+
let mut out = std::collections::BTreeSet::new();
|
|
939
|
+
push_recorded_tmux_socket(state, &mut out);
|
|
940
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
941
|
+
for (_, team_state) in teams {
|
|
942
|
+
push_recorded_tmux_socket(team_state, &mut out);
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
out
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
fn push_recorded_tmux_socket(state: &Value, out: &mut std::collections::BTreeSet<String>) {
|
|
949
|
+
for key in &["team_owner", "leader_receiver"] {
|
|
950
|
+
if let Some(socket) = state
|
|
951
|
+
.get(*key)
|
|
952
|
+
.and_then(|v| v.get("tmux_socket"))
|
|
953
|
+
.and_then(Value::as_str)
|
|
954
|
+
.filter(|s| !s.is_empty())
|
|
955
|
+
{
|
|
956
|
+
out.insert(socket.to_string());
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
|
|
820
961
|
/// PERF-6 C-①-1/C-②-4 (N39): the protected set derives from the CALLER's snapshot —
|
|
821
962
|
/// the same table the kill/wait sets derive from.
|
|
822
963
|
fn shutdown_protection_set(table: &[ProcessInfo]) -> ShutdownProtection {
|
|
@@ -844,26 +985,72 @@ pub mod lifecycle_port {
|
|
|
844
985
|
protected
|
|
845
986
|
}
|
|
846
987
|
|
|
847
|
-
/// B5/F2
|
|
848
|
-
///
|
|
849
|
-
///
|
|
850
|
-
///
|
|
851
|
-
|
|
988
|
+
/// B5/F2 + E4 真机 grounded(任何 team 的 shutdown 都不杀任何 team 的 leader 锚 pane):
|
|
989
|
+
/// the leader terminal's pane process tree joins the protected set (same set, same
|
|
990
|
+
/// mechanism as the invoker ancestry) so the workspace residual sweep's cmdline/cwd
|
|
991
|
+
/// matching cannot reap the leader — including when ANOTHER team's bare shutdown
|
|
992
|
+
/// runs, where the leader is never in the invoker's ancestry.
|
|
993
|
+
///
|
|
994
|
+
/// Two leader-pane sources(N39 双来源,真机 grounded):
|
|
995
|
+
/// 1. **Session prefix**: tmux session starts with `team-agent-leader-`(契约 grounded;
|
|
996
|
+
/// 覆盖 LeaderStartMode::NewTmuxSession / AttachExisting).
|
|
997
|
+
/// 2. **State.json anchors**(E4 修法):state.team_owner.pane_id / state.leader_receiver.pane_id
|
|
998
|
+
/// 在 top-level **和** teams[<key>].* 都扫(N39 任何 team 的 leader 锚 pane);
|
|
999
|
+
/// 覆盖 LeaderStartMode::ExecProvider(用户 in_tmux 直接 exec,session 名是用户原
|
|
1000
|
+
/// `main`/`0`/whatever,不带 leader 前缀 — 此前 B5 三犯保护集漏覆盖)+ E4b
|
|
1001
|
+
/// team-in-team(子 team 的 leader 锚 = 父 team worker pane,window 名是 agent id
|
|
1002
|
+
/// 也不带 leader 前缀)。
|
|
1003
|
+
pub(crate) fn extend_protection_with_leader_panes(
|
|
852
1004
|
protected: &mut ShutdownProtection,
|
|
853
1005
|
transport: &dyn crate::transport::Transport,
|
|
1006
|
+
state: &Value,
|
|
854
1007
|
table: &[ProcessInfo],
|
|
855
1008
|
) {
|
|
856
|
-
let leader_pane_pids: Vec<u32> =
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
.
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
1009
|
+
let mut leader_pane_pids: Vec<u32> = Vec::new();
|
|
1010
|
+
let pane_targets = transport.list_targets().unwrap_or_default();
|
|
1011
|
+
// Source 1: session 前缀过滤(原 B5 实现)— per-workspace socket。
|
|
1012
|
+
leader_pane_pids.extend(
|
|
1013
|
+
pane_targets
|
|
1014
|
+
.iter()
|
|
1015
|
+
.filter(|pane| {
|
|
1016
|
+
pane.session
|
|
1017
|
+
.as_str()
|
|
1018
|
+
.starts_with(crate::leader::LEADER_SESSION_PREFIX)
|
|
1019
|
+
})
|
|
1020
|
+
.filter_map(|pane| pane.pane_pid),
|
|
1021
|
+
);
|
|
1022
|
+
// Source 2: state.json team_owner / leader_receiver 真锚 pane_id(top-level +
|
|
1023
|
+
// teams[*]),per-workspace socket 命中。
|
|
1024
|
+
let anchor_pane_ids: std::collections::BTreeSet<String> =
|
|
1025
|
+
collect_state_leader_anchor_pane_ids(state);
|
|
1026
|
+
leader_pane_pids.extend(
|
|
1027
|
+
pane_targets
|
|
1028
|
+
.iter()
|
|
1029
|
+
.filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
|
|
1030
|
+
.filter_map(|pane| pane.pane_pid),
|
|
1031
|
+
);
|
|
1032
|
+
// Source 3 (E4 真机 grounded · cross-socket):leader 锚 pane 可能在【别的
|
|
1033
|
+
// tmux socket】上 — LeaderStartMode::ExecProvider 真实场景里用户 in_tmux
|
|
1034
|
+
// 起 `team-agent claude`,leader pane 留在用户【默认 socket】,而 shutdown
|
|
1035
|
+
// 的 transport 走 per-workspace `ta-<hash>` socket,list_targets 看不见。
|
|
1036
|
+
// 从 state.json 读 leader_receiver/team_owner.tmux_socket(claim 时
|
|
1037
|
+
// owner_bind 记录,见 evidence /测试rust版本/4 state.json),查那个 socket
|
|
1038
|
+
// 的 list_targets 找 anchor pane_id → pane_pid → 进入 process_tree 保护。
|
|
1039
|
+
// 不在 state 中的 socket 不查(MUST-17 不撒宽 / 不主动枚举全机器 sockets)。
|
|
1040
|
+
for socket_endpoint in collect_state_recorded_tmux_sockets(state) {
|
|
1041
|
+
let cross_backend =
|
|
1042
|
+
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(&socket_endpoint);
|
|
1043
|
+
let cross_panes = <crate::tmux_backend::TmuxBackend as crate::transport::Transport>::list_targets(&cross_backend)
|
|
1044
|
+
.unwrap_or_default();
|
|
1045
|
+
leader_pane_pids.extend(
|
|
1046
|
+
cross_panes
|
|
1047
|
+
.iter()
|
|
1048
|
+
.filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
|
|
1049
|
+
.filter_map(|pane| pane.pane_pid),
|
|
1050
|
+
);
|
|
1051
|
+
}
|
|
1052
|
+
leader_pane_pids.sort_unstable();
|
|
1053
|
+
leader_pane_pids.dedup();
|
|
867
1054
|
if leader_pane_pids.is_empty() {
|
|
868
1055
|
return;
|
|
869
1056
|
}
|
|
@@ -1352,7 +1539,41 @@ pub mod lifecycle_port {
|
|
|
1352
1539
|
}
|
|
1353
1540
|
|
|
1354
1541
|
fn error_value(error: crate::lifecycle::LifecycleError) -> Value {
|
|
1355
|
-
|
|
1542
|
+
let message = error.to_string();
|
|
1543
|
+
let mut payload = json!({"ok": false, "error": message});
|
|
1544
|
+
if let Some(next_action) = error_next_action(&message) {
|
|
1545
|
+
payload["next_action"] = json!(next_action);
|
|
1546
|
+
}
|
|
1547
|
+
payload
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
/// E8 (N38): 把"错路常犯"的运行时错误指到正确出路(纯文案,无语义变更)。
|
|
1551
|
+
/// 匹配 [`LifecycleError`] 的人读消息子串(`agent {id} not found` /
|
|
1552
|
+
/// `agent id already exists` / `unknown worker agent id`),给出下一步命令。
|
|
1553
|
+
fn error_next_action(message: &str) -> Option<&'static str> {
|
|
1554
|
+
// start-agent 撞"agent ... not found":start-agent 语义=启动 state 已有 agent;
|
|
1555
|
+
// 想新增角色应走 add-agent。
|
|
1556
|
+
if message.contains("not found") && message.contains("agent") {
|
|
1557
|
+
return Some(
|
|
1558
|
+
"start-agent only starts an agent that already exists in state. \
|
|
1559
|
+
To add a NEW role at runtime use: team-agent add-agent <id> --role-file <path>",
|
|
1560
|
+
);
|
|
1561
|
+
}
|
|
1562
|
+
// add-agent / fork 撞"agent id already exists":id 已占用。
|
|
1563
|
+
if message.contains("agent id already exists") {
|
|
1564
|
+
return Some(
|
|
1565
|
+
"that agent id is already in the team. \
|
|
1566
|
+
Use a different id, or start the existing one with: team-agent start-agent <id>",
|
|
1567
|
+
);
|
|
1568
|
+
}
|
|
1569
|
+
// stop/reset/fork 源撞"unknown worker agent id":拼写/团队选择错。
|
|
1570
|
+
if message.contains("unknown worker agent id") {
|
|
1571
|
+
return Some(
|
|
1572
|
+
"no such worker agent in this team. \
|
|
1573
|
+
Run `team-agent status` to list agent ids (check --team if multiple teams)",
|
|
1574
|
+
);
|
|
1575
|
+
}
|
|
1576
|
+
None
|
|
1356
1577
|
}
|
|
1357
1578
|
|
|
1358
1579
|
fn record_idle_acknowledged(
|
|
@@ -1612,6 +1833,7 @@ pub mod lifecycle_port {
|
|
|
1612
1833
|
session_name,
|
|
1613
1834
|
state_path,
|
|
1614
1835
|
next_actions,
|
|
1836
|
+
attach_commands,
|
|
1615
1837
|
} => json!({
|
|
1616
1838
|
"ok": false,
|
|
1617
1839
|
"summary": "existing runtime",
|
|
@@ -1619,20 +1841,61 @@ pub mod lifecycle_port {
|
|
|
1619
1841
|
"session_name": session_name.map(|s| s.as_str().to_string()),
|
|
1620
1842
|
"state_path": state_path.map(|p| p.to_string_lossy().to_string()),
|
|
1621
1843
|
"next_actions": next_actions,
|
|
1844
|
+
"attach_commands": attach_commands,
|
|
1622
1845
|
}),
|
|
1623
1846
|
crate::lifecycle::QuickStartReport::PreflightBlocked {
|
|
1624
1847
|
summary,
|
|
1625
1848
|
blockers,
|
|
1626
1849
|
next_actions,
|
|
1850
|
+
attach_commands,
|
|
1627
1851
|
} => json!({
|
|
1628
1852
|
"ok": false,
|
|
1629
1853
|
"summary": summary,
|
|
1630
1854
|
"blockers": blockers,
|
|
1631
1855
|
"next_actions": next_actions,
|
|
1856
|
+
"attach_commands": attach_commands,
|
|
1632
1857
|
}),
|
|
1633
1858
|
}
|
|
1634
1859
|
}
|
|
1635
1860
|
|
|
1861
|
+
#[cfg(test)]
|
|
1862
|
+
mod quick_start_value_tests {
|
|
1863
|
+
use super::*;
|
|
1864
|
+
|
|
1865
|
+
#[test]
|
|
1866
|
+
fn existing_runtime_json_includes_attach_commands() {
|
|
1867
|
+
let value = quick_start_value(crate::lifecycle::QuickStartReport::ExistingRuntime {
|
|
1868
|
+
team: Some("teamA".to_string()),
|
|
1869
|
+
session_name: Some(crate::transport::SessionName::new("team-teamA")),
|
|
1870
|
+
state_path: Some(PathBuf::from("/tmp/state.json")),
|
|
1871
|
+
next_actions: vec!["restart".to_string()],
|
|
1872
|
+
attach_commands: vec![
|
|
1873
|
+
"tmux -S /tmp/tmux-501/ta-test attach -t team-teamA:worker".to_string(),
|
|
1874
|
+
],
|
|
1875
|
+
});
|
|
1876
|
+
assert_eq!(
|
|
1877
|
+
value.pointer("/attach_commands/0").and_then(Value::as_str),
|
|
1878
|
+
Some("tmux -S /tmp/tmux-501/ta-test attach -t team-teamA:worker"),
|
|
1879
|
+
"B-2: ExistingRuntime JSON must preserve attach_commands instead of only next_actions; value={value}"
|
|
1880
|
+
);
|
|
1881
|
+
}
|
|
1882
|
+
|
|
1883
|
+
#[test]
|
|
1884
|
+
fn preflight_blocked_json_includes_empty_attach_commands() {
|
|
1885
|
+
let value = quick_start_value(crate::lifecycle::QuickStartReport::PreflightBlocked {
|
|
1886
|
+
summary: "blocked".to_string(),
|
|
1887
|
+
blockers: vec!["missing TEAM.md".to_string()],
|
|
1888
|
+
next_actions: vec!["fix preflight blockers".to_string()],
|
|
1889
|
+
attach_commands: Vec::new(),
|
|
1890
|
+
});
|
|
1891
|
+
assert_eq!(
|
|
1892
|
+
value.get("attach_commands").and_then(Value::as_array).map(Vec::len),
|
|
1893
|
+
Some(0),
|
|
1894
|
+
"B-2: PreflightBlocked JSON must include attach_commands: [] for schema parity with Ready/Restart; value={value}"
|
|
1895
|
+
);
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1636
1899
|
fn restart_value(report: crate::lifecycle::RestartReport) -> Value {
|
|
1637
1900
|
match report {
|
|
1638
1901
|
crate::lifecycle::RestartReport::Restarted {
|
|
@@ -1796,6 +2059,53 @@ pub mod lifecycle_port {
|
|
|
1796
2059
|
.any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
|
|
1797
2060
|
})
|
|
1798
2061
|
}
|
|
2062
|
+
|
|
2063
|
+
#[cfg(test)]
|
|
2064
|
+
mod e8_error_guidance_tests {
|
|
2065
|
+
use super::{error_next_action, error_value};
|
|
2066
|
+
|
|
2067
|
+
#[test]
|
|
2068
|
+
fn start_agent_not_found_points_to_add_agent() {
|
|
2069
|
+
// LifecycleError::RequirementUnmet("agent {id} not found") 经 to_string():
|
|
2070
|
+
// "agent start requirement unmet: agent foo not found".
|
|
2071
|
+
let msg = "agent start requirement unmet: agent foo not found";
|
|
2072
|
+
let na = error_next_action(msg).expect("not-found must carry next_action");
|
|
2073
|
+
assert!(na.contains("add-agent"), "must steer to add-agent: {na}");
|
|
2074
|
+
assert!(na.contains("--role-file"), "must show the role-file flag: {na}");
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
#[test]
|
|
2078
|
+
fn add_agent_already_exists_explains_way_out() {
|
|
2079
|
+
let msg = "agent start requirement unmet: agent id already exists: foo";
|
|
2080
|
+
let na = error_next_action(msg).expect("already-exists must carry next_action");
|
|
2081
|
+
assert!(na.contains("start-agent"), "must mention start-agent: {na}");
|
|
2082
|
+
}
|
|
2083
|
+
|
|
2084
|
+
#[test]
|
|
2085
|
+
fn unknown_worker_points_to_status() {
|
|
2086
|
+
let msg = "agent start requirement unmet: unknown worker agent id: ghost";
|
|
2087
|
+
let na = error_next_action(msg).expect("unknown worker must carry next_action");
|
|
2088
|
+
assert!(na.contains("status"), "must steer to status: {na}");
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
#[test]
|
|
2092
|
+
fn unrelated_error_has_no_next_action() {
|
|
2093
|
+
assert_eq!(error_next_action("state persistence failed: disk full"), None);
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
#[test]
|
|
2097
|
+
fn error_value_attaches_next_action_field() {
|
|
2098
|
+
let err = crate::lifecycle::LifecycleError::RequirementUnmet(
|
|
2099
|
+
"agent foo not found".to_string(),
|
|
2100
|
+
);
|
|
2101
|
+
let v = error_value(err);
|
|
2102
|
+
assert_eq!(v["ok"], serde_json::json!(false));
|
|
2103
|
+
assert!(
|
|
2104
|
+
v["next_action"].as_str().unwrap_or("").contains("add-agent"),
|
|
2105
|
+
"error_value must attach the add-agent guidance: {v}"
|
|
2106
|
+
);
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
1799
2109
|
}
|
|
1800
2110
|
|
|
1801
2111
|
/// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
|
|
@@ -1812,14 +2122,45 @@ pub mod diagnose_port {
|
|
|
1812
2122
|
let workspace_valid = workspace.is_dir();
|
|
1813
2123
|
let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
|
|
1814
2124
|
let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
2125
|
+
// SMOKE-1 (locate.md §"Minimal Fix"):default doctor 不再隐式编译
|
|
2126
|
+
// `<workspace>/.team/current`(legacy 残留)作 profile_smoke 目标。
|
|
2127
|
+
// profile_smoke 是 team-scoped 体检,只在以下两种情形跑:
|
|
2128
|
+
// ① 用户显式给了 spec / team dir;
|
|
2129
|
+
// ② workspace 根本身就是 team dir(含 TEAM.md / team.spec.yaml)。
|
|
2130
|
+
// legacy `<workspace>/.team/current` 仅作降级诊断面(legacy_team_invalid),
|
|
2131
|
+
// 不再绑架整个 doctor 假死在 profile_smoke_failed 上。
|
|
2132
|
+
let explicit_team_target = explicit_doctor_team_dir(workspace, spec);
|
|
2133
|
+
let profile_smoke = explicit_team_target
|
|
1819
2134
|
.as_ref()
|
|
1820
|
-
.
|
|
2135
|
+
.map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(team))
|
|
2136
|
+
.transpose()?;
|
|
2137
|
+
let legacy_check = if explicit_team_target.is_none() {
|
|
2138
|
+
legacy_current_team_check(workspace)?
|
|
2139
|
+
} else {
|
|
2140
|
+
None
|
|
2141
|
+
};
|
|
2142
|
+
let profile_smoke_value = profile_smoke.unwrap_or_else(|| {
|
|
2143
|
+
legacy_check.clone().unwrap_or_else(|| {
|
|
2144
|
+
json!({
|
|
2145
|
+
"name": "profile_smoke",
|
|
2146
|
+
"ok": true,
|
|
2147
|
+
"status": "not_required",
|
|
2148
|
+
"checks": [],
|
|
2149
|
+
"secret_values_printed": false,
|
|
2150
|
+
})
|
|
2151
|
+
})
|
|
2152
|
+
});
|
|
2153
|
+
let profile_smoke_ok = profile_smoke_value
|
|
2154
|
+
.get("ok")
|
|
2155
|
+
.and_then(Value::as_bool)
|
|
1821
2156
|
.unwrap_or(true);
|
|
1822
|
-
|
|
2157
|
+
// legacy 降级面(legacy_team_invalid)不下拉整体 ok —— 用户没显式让我们
|
|
2158
|
+
// 体检这个 team,失败是降级诊断信息,不是 install 自检失败。
|
|
2159
|
+
let legacy_only_failure =
|
|
2160
|
+
!profile_smoke_ok && profile_smoke_value.get("status").and_then(Value::as_str)
|
|
2161
|
+
== Some("legacy_team_invalid");
|
|
2162
|
+
let effective_smoke_ok = profile_smoke_ok || legacy_only_failure;
|
|
2163
|
+
let ok = workspace_valid && (team_context || workspace_has_entries) && effective_smoke_ok;
|
|
1823
2164
|
let health = crate::coordinator::coordinator_health(
|
|
1824
2165
|
&crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
|
|
1825
2166
|
);
|
|
@@ -1836,18 +2177,12 @@ pub mod diagnose_port {
|
|
|
1836
2177
|
"local_module": true,
|
|
1837
2178
|
},
|
|
1838
2179
|
"secret_scan": secret_scan(workspace),
|
|
1839
|
-
"profile_smoke":
|
|
1840
|
-
"name": "profile_smoke",
|
|
1841
|
-
"ok": true,
|
|
1842
|
-
"status": "not_required",
|
|
1843
|
-
"checks": [],
|
|
1844
|
-
"secret_values_printed": false,
|
|
1845
|
-
})),
|
|
2180
|
+
"profile_smoke": profile_smoke_value,
|
|
1846
2181
|
"coordinator": coordinator_health_value(health),
|
|
1847
2182
|
"ok": ok,
|
|
1848
2183
|
"error": if ok {
|
|
1849
2184
|
Value::Null
|
|
1850
|
-
} else if !profile_smoke_ok {
|
|
2185
|
+
} else if !profile_smoke_ok && !legacy_only_failure {
|
|
1851
2186
|
json!("profile_smoke_failed")
|
|
1852
2187
|
} else if workspace_valid {
|
|
1853
2188
|
json!("workspace has no Team Agent spec or runtime context")
|
|
@@ -1857,6 +2192,63 @@ pub mod diagnose_port {
|
|
|
1857
2192
|
}))
|
|
1858
2193
|
}
|
|
1859
2194
|
|
|
2195
|
+
/// SMOKE-1: 仅当用户显式提供 spec/team dir,或 workspace 根本身是 team dir
|
|
2196
|
+
/// (含 TEAM.md / team.spec.yaml)时返 team_dir。legacy `<workspace>/.team/
|
|
2197
|
+
/// current` 不算 explicit target(走 legacy_current_team_check 降级面)。
|
|
2198
|
+
fn explicit_doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
|
|
2199
|
+
if let Some(spec) = spec {
|
|
2200
|
+
let candidate = if spec.is_absolute() {
|
|
2201
|
+
spec.to_path_buf()
|
|
2202
|
+
} else {
|
|
2203
|
+
workspace.join(spec)
|
|
2204
|
+
};
|
|
2205
|
+
if candidate.is_file() {
|
|
2206
|
+
return candidate.parent().map(Path::to_path_buf);
|
|
2207
|
+
}
|
|
2208
|
+
if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
|
|
2209
|
+
return Some(candidate);
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
|
|
2213
|
+
return Some(workspace.to_path_buf());
|
|
2214
|
+
}
|
|
2215
|
+
None
|
|
2216
|
+
}
|
|
2217
|
+
|
|
2218
|
+
/// SMOKE-1: legacy `<workspace>/.team/current` 残留体检 — 降级诊断,**不**
|
|
2219
|
+
/// 当 install self-check 失败。如果 legacy 团有 spec/TEAM.md,尝试 compile,
|
|
2220
|
+
/// 失败返 `status=legacy_team_invalid` + team_dir + reason + next_action(N38
|
|
2221
|
+
/// 失败可解释性);compile 成功就不打扰用户(返 None,profile_smoke 走
|
|
2222
|
+
/// `not_required`)。无 legacy 团目录 → None。
|
|
2223
|
+
fn legacy_current_team_check(workspace: &Path) -> Result<Option<Value>, CliError> {
|
|
2224
|
+
let team = workspace.join(".team").join("current");
|
|
2225
|
+
let has_spec = team.join("team.spec.yaml").is_file();
|
|
2226
|
+
let has_team_md = team.join("TEAM.md").is_file();
|
|
2227
|
+
if !has_spec && !has_team_md {
|
|
2228
|
+
return Ok(None);
|
|
2229
|
+
}
|
|
2230
|
+
match crate::compiler::compile_team(&team) {
|
|
2231
|
+
Ok(_) => Ok(None),
|
|
2232
|
+
Err(error) => {
|
|
2233
|
+
let team_dir = team.to_string_lossy().to_string();
|
|
2234
|
+
Ok(Some(json!({
|
|
2235
|
+
"name": "profile_smoke",
|
|
2236
|
+
"ok": false,
|
|
2237
|
+
"status": "legacy_team_invalid",
|
|
2238
|
+
"team_dir": team_dir,
|
|
2239
|
+
"reason": error.to_string(),
|
|
2240
|
+
"next_action": format!(
|
|
2241
|
+
"scope doctor to a real team: `team-agent doctor <team-dir>`, \
|
|
2242
|
+
or repair/remove the legacy `{}` directory",
|
|
2243
|
+
team.display()
|
|
2244
|
+
),
|
|
2245
|
+
"checks": [],
|
|
2246
|
+
"secret_values_printed": false,
|
|
2247
|
+
})))
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
}
|
|
2251
|
+
|
|
1860
2252
|
fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
|
|
1861
2253
|
if let Some(spec) = spec {
|
|
1862
2254
|
let candidate = if spec.is_absolute() {
|