@team-agent/installer 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/diagnose.rs +9 -0
- package/crates/team-agent/src/cli/emit.rs +63 -0
- package/crates/team-agent/src/cli/mod.rs +334 -35
- package/crates/team-agent/src/cli/status_port.rs +62 -0
- package/crates/team-agent/src/cli/tests/base.rs +9 -4
- package/crates/team-agent/src/cli/tests/run_delegation.rs +10 -2
- package/crates/team-agent/src/cli/types.rs +3 -2
- package/crates/team-agent/src/compiler.rs +73 -50
- package/crates/team-agent/src/coordinator/tick.rs +108 -20
- package/crates/team-agent/src/db/migration.rs +17 -1
- package/crates/team-agent/src/lifecycle/launch.rs +182 -47
- package/crates/team-agent/src/lifecycle/restart/common.rs +4 -9
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +75 -2
- package/crates/team-agent/src/lifecycle/restart/selection.rs +6 -4
- package/crates/team-agent/src/lifecycle/tests/core.rs +46 -3
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +221 -7
- package/crates/team-agent/src/mcp_server/normalize.rs +29 -7
- package/crates/team-agent/src/mcp_server/tests/golden.rs +7 -5
- package/crates/team-agent/src/mcp_server/tests/normalize.rs +5 -2
- package/crates/team-agent/src/mcp_server/tools.rs +25 -1
- package/crates/team-agent/src/mcp_server/wire.rs +11 -1
- package/crates/team-agent/src/model/paths.rs +7 -0
- package/crates/team-agent/src/model/spec.rs +23 -1
- package/crates/team-agent/src/packaging/install.rs +42 -4
- package/crates/team-agent/src/packaging/tests.rs +91 -14
- package/crates/team-agent/src/packaging/types.rs +13 -1
- package/crates/team-agent/src/provider/adapter.rs +204 -0
- package/crates/team-agent/src/state/selector.rs +48 -14
- package/crates/team-agent/src/tmux_backend.rs +14 -2
- package/package.json +4 -4
- package/skills/team-agent/SKILL.md +82 -5
package/Cargo.lock
CHANGED
package/Cargo.toml
CHANGED
|
@@ -208,11 +208,20 @@ pub(crate) fn build_profile_smoke_check_for_team(team: &std::path::Path) -> Resu
|
|
|
208
208
|
let spec = match crate::compiler::compile_team(team) {
|
|
209
209
|
Ok(spec) => spec,
|
|
210
210
|
Err(error) => {
|
|
211
|
+
// SMOKE-1 (locate.md §"Smallest likely code touch" item 2):compile
|
|
212
|
+
// 失败时把 team_dir + next_action 带上,operator 才有可下手的诊断
|
|
213
|
+
// (不是只贴一行 reason)。
|
|
211
214
|
return Ok(json!({
|
|
212
215
|
"name": "profile_smoke",
|
|
213
216
|
"ok": false,
|
|
214
217
|
"status": "profile_invalid",
|
|
218
|
+
"team_dir": team.to_string_lossy().to_string(),
|
|
215
219
|
"reason": error.to_string(),
|
|
220
|
+
"next_action": format!(
|
|
221
|
+
"fix the team spec at `{}` (see reason above) or re-run \
|
|
222
|
+
doctor with a different `<team-dir>`",
|
|
223
|
+
team.display()
|
|
224
|
+
),
|
|
216
225
|
"secret_values_printed": false,
|
|
217
226
|
"checks": [],
|
|
218
227
|
}));
|
|
@@ -249,9 +249,49 @@ fn emit_unknown_subcommand_usage(command: &str) -> ExitCode {
|
|
|
249
249
|
emit_usage_error(&format!(
|
|
250
250
|
"argument {{codex,claude,...,doctor}}: invalid choice: '{command}' (choose from codex, claude, ..., doctor)"
|
|
251
251
|
));
|
|
252
|
+
// E8 (N38): 错路引导 —— 拼写近似时建议最接近的真子命令(additive,不改既有 golden 行)。
|
|
253
|
+
if let Some(suggestion) = nearest_subcommand(command) {
|
|
254
|
+
eprintln!("team-agent: did you mean `{suggestion}`?");
|
|
255
|
+
}
|
|
252
256
|
ExitCode::Usage
|
|
253
257
|
}
|
|
254
258
|
|
|
259
|
+
/// 在已知子命令里找与 `input` 最接近的一个(Levenshtein ≤ 阈值)。无足够接近者 → None。
|
|
260
|
+
fn nearest_subcommand(input: &str) -> Option<&'static str> {
|
|
261
|
+
let mut candidates: Vec<&'static str> = vec!["codex", "claude"];
|
|
262
|
+
candidates.extend_from_slice(DISPATCH_COMMANDS);
|
|
263
|
+
candidates.extend_from_slice(SPEC_ONLY_HELP_COMMANDS);
|
|
264
|
+
// 阈值随长度放宽,但短词收紧,避免 'x' 误配任何东西。
|
|
265
|
+
let max_distance = match input.chars().count() {
|
|
266
|
+
0..=3 => 1,
|
|
267
|
+
4..=6 => 2,
|
|
268
|
+
_ => 3,
|
|
269
|
+
};
|
|
270
|
+
candidates
|
|
271
|
+
.into_iter()
|
|
272
|
+
.map(|c| (c, levenshtein(input, c)))
|
|
273
|
+
.filter(|(_, d)| *d <= max_distance)
|
|
274
|
+
.min_by_key(|(_, d)| *d)
|
|
275
|
+
.map(|(c, _)| c)
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/// 标准 Levenshtein 编辑距离(纯函数,无依赖;子命令建议用)。
|
|
279
|
+
fn levenshtein(a: &str, b: &str) -> usize {
|
|
280
|
+
let a: Vec<char> = a.chars().collect();
|
|
281
|
+
let b: Vec<char> = b.chars().collect();
|
|
282
|
+
let mut prev: Vec<usize> = (0..=b.len()).collect();
|
|
283
|
+
let mut curr = vec![0usize; b.len() + 1];
|
|
284
|
+
for (i, &ca) in a.iter().enumerate() {
|
|
285
|
+
curr[0] = i + 1;
|
|
286
|
+
for (j, &cb) in b.iter().enumerate() {
|
|
287
|
+
let cost = if ca == cb { 0 } else { 1 };
|
|
288
|
+
curr[j + 1] = (prev[j + 1] + 1).min(curr[j] + 1).min(prev[j] + cost);
|
|
289
|
+
}
|
|
290
|
+
std::mem::swap(&mut prev, &mut curr);
|
|
291
|
+
}
|
|
292
|
+
prev[b.len()]
|
|
293
|
+
}
|
|
294
|
+
|
|
255
295
|
fn emit_usage_error(message: &str) {
|
|
256
296
|
eprintln!("usage: team-agent [-h] {{codex,claude,...,doctor}} ...");
|
|
257
297
|
eprintln!("team-agent: error: {message}");
|
|
@@ -1437,4 +1477,27 @@ mod tests {
|
|
|
1437
1477
|
let _ = std::fs::remove_dir_all(&cwd);
|
|
1438
1478
|
let _ = std::fs::remove_dir_all(&ws);
|
|
1439
1479
|
}
|
|
1480
|
+
|
|
1481
|
+
// ── E8 (N38): 未知子命令 → 最近似建议(additive,不破坏 golden invalid-choice 行) ──
|
|
1482
|
+
#[test]
|
|
1483
|
+
fn e8_unknown_subcommand_suggests_nearest_known_command() {
|
|
1484
|
+
// 'statu' typo → status; 'add-agen' → add-agent.
|
|
1485
|
+
assert_eq!(nearest_subcommand("statu"), Some("status"));
|
|
1486
|
+
assert_eq!(nearest_subcommand("add-agen"), Some("add-agent"));
|
|
1487
|
+
assert_eq!(nearest_subcommand("start-agnet"), Some("start-agent"));
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
#[test]
|
|
1491
|
+
fn e8_unknown_subcommand_no_suggestion_when_far() {
|
|
1492
|
+
// 完全无关的串不应误配出任何建议。
|
|
1493
|
+
assert_eq!(nearest_subcommand("zzzzzzzzzz"), None);
|
|
1494
|
+
assert_eq!(nearest_subcommand("x"), None);
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
#[test]
|
|
1498
|
+
fn e8_levenshtein_basic() {
|
|
1499
|
+
assert_eq!(levenshtein("kitten", "sitting"), 3);
|
|
1500
|
+
assert_eq!(levenshtein("status", "status"), 0);
|
|
1501
|
+
assert_eq!(levenshtein("statu", "status"), 1);
|
|
1502
|
+
}
|
|
1440
1503
|
}
|
|
@@ -278,7 +278,7 @@ pub mod lifecycle_port {
|
|
|
278
278
|
let mut probe_degraded = false;
|
|
279
279
|
let entry_table = shutdown_table_snapshot(&run_workspace, &mut probe_degraded, "entry");
|
|
280
280
|
let mut protected = shutdown_protection_set(&entry_table);
|
|
281
|
-
extend_protection_with_leader_panes(&mut protected, transport, &entry_table);
|
|
281
|
+
extend_protection_with_leader_panes(&mut protected, transport, &state, &entry_table);
|
|
282
282
|
let protected = protected;
|
|
283
283
|
let reap_scope = if team.is_some() {
|
|
284
284
|
ShutdownReapScope::ScopedTeam
|
|
@@ -474,17 +474,42 @@ pub mod lifecycle_port {
|
|
|
474
474
|
}))
|
|
475
475
|
}
|
|
476
476
|
|
|
477
|
+
/// T5 (harvest §1 / A2): the bounded stop RETAINS the JoinHandle and reclaims the
|
|
478
|
+
/// worker thread — on a timely result it joins immediately; on timeout it gives the
|
|
479
|
+
/// thread one short grace join window instead of dropping it detached (repeated
|
|
480
|
+
/// shutdowns no longer accumulate leaked threads racing the same workspace).
|
|
477
481
|
fn stop_coordinator_bounded(
|
|
478
482
|
workspace: crate::coordinator::WorkspacePath,
|
|
479
483
|
timeout: std::time::Duration,
|
|
480
484
|
) -> Option<Result<crate::coordinator::types::StopReport, String>> {
|
|
481
485
|
let (tx, rx) = std::sync::mpsc::channel();
|
|
482
|
-
std::thread::spawn(move || {
|
|
486
|
+
let handle = std::thread::spawn(move || {
|
|
483
487
|
let result =
|
|
484
488
|
crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
|
|
485
489
|
let _ = tx.send(result);
|
|
486
490
|
});
|
|
487
|
-
rx.recv_timeout(timeout).ok()
|
|
491
|
+
let outcome = rx.recv_timeout(timeout).ok();
|
|
492
|
+
if outcome.is_some() {
|
|
493
|
+
// The worker already sent its result; the join is immediate.
|
|
494
|
+
let _ = handle.join();
|
|
495
|
+
return outcome;
|
|
496
|
+
}
|
|
497
|
+
// Timeout: grant a short grace window for the worker to wind down, then join if
|
|
498
|
+
// it finished; a still-stuck stop is reported as timeout either way (the grace
|
|
499
|
+
// join keeps the common slightly-late case from leaking a detached thread).
|
|
500
|
+
match rx.recv_timeout(std::time::Duration::from_millis(250)) {
|
|
501
|
+
Ok(late) => {
|
|
502
|
+
let _ = handle.join();
|
|
503
|
+
let _ = late; // result arrived after the deadline: still a timeout to the caller
|
|
504
|
+
None
|
|
505
|
+
}
|
|
506
|
+
Err(_) => {
|
|
507
|
+
if handle.is_finished() {
|
|
508
|
+
let _ = handle.join();
|
|
509
|
+
}
|
|
510
|
+
None
|
|
511
|
+
}
|
|
512
|
+
}
|
|
488
513
|
}
|
|
489
514
|
|
|
490
515
|
struct ShutdownDeadline {
|
|
@@ -707,7 +732,7 @@ pub mod lifecycle_port {
|
|
|
707
732
|
for _ in 0..5 {
|
|
708
733
|
let round_table = shutdown_table_snapshot(workspace, probe_degraded, "residual_round");
|
|
709
734
|
let mut protected = shutdown_protection_set(&round_table);
|
|
710
|
-
extend_protection_with_leader_panes(&mut protected, transport, &round_table);
|
|
735
|
+
extend_protection_with_leader_panes(&mut protected, transport, state, &round_table);
|
|
711
736
|
let residuals = matched_processes(
|
|
712
737
|
workspace, state, root_pids, root_pgids, &protected, scope, &round_table,
|
|
713
738
|
);
|
|
@@ -817,6 +842,71 @@ pub mod lifecycle_port {
|
|
|
817
842
|
}
|
|
818
843
|
}
|
|
819
844
|
|
|
845
|
+
/// E4 真机 grounded(任何 team 的 shutdown 都不杀任何 team 的 leader 锚 pane):
|
|
846
|
+
/// 扫 state.json 收集所有 leader-anchor pane_id(top-level team_owner /
|
|
847
|
+
/// leader_receiver + teams[<key>].* 嵌套形态)。返非空 BTreeSet 给
|
|
848
|
+
/// `extend_protection_with_leader_panes` 第二来源用。
|
|
849
|
+
///
|
|
850
|
+
/// 覆盖场景:
|
|
851
|
+
/// - LeaderStartMode::ExecProvider:state.json team_owner.pane_id 指用户原 tmux
|
|
852
|
+
/// pane(非 leader 前缀)→ shutdown 不杀(E4 真机复发修法)
|
|
853
|
+
/// - E4b team-in-team:子 team state 的 team_owner.pane_id 指父 team worker pane;
|
|
854
|
+
/// 父 team state 的 teams.<child>.team_owner.pane_id 同义(若有该字段)
|
|
855
|
+
/// → 任一 team 的 shutdown 都不杀任何 team 的 leader 锚 pane
|
|
856
|
+
pub fn collect_state_leader_anchor_pane_ids(state: &Value) -> std::collections::BTreeSet<String> {
|
|
857
|
+
let mut out = std::collections::BTreeSet::new();
|
|
858
|
+
push_anchor_pane_id(state, &mut out);
|
|
859
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
860
|
+
for (_, team_state) in teams {
|
|
861
|
+
push_anchor_pane_id(team_state, &mut out);
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
out
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
/// 单帧扫 team_owner.pane_id + leader_receiver.pane_id → BTreeSet 累加。
|
|
868
|
+
fn push_anchor_pane_id(state: &Value, out: &mut std::collections::BTreeSet<String>) {
|
|
869
|
+
for key in &["team_owner", "leader_receiver"] {
|
|
870
|
+
if let Some(pane_id) = state
|
|
871
|
+
.get(*key)
|
|
872
|
+
.and_then(|v| v.get("pane_id"))
|
|
873
|
+
.and_then(Value::as_str)
|
|
874
|
+
.filter(|s| !s.is_empty())
|
|
875
|
+
{
|
|
876
|
+
out.insert(pane_id.to_string());
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
/// E4 真机 grounded(cross-socket):收 state.json 中所有记录的 tmux_socket
|
|
882
|
+
/// endpoint(top-level + teams[<key>] 嵌套形态;team_owner / leader_receiver
|
|
883
|
+
/// 任一字段)。owner_bind 在 claim 时把 leader pane 所在 socket 记进
|
|
884
|
+
/// leader_receiver.tmux_socket(evidence:/测试rust版本/4 state.json),用作
|
|
885
|
+
/// 跨 socket 查 leader pane → pane_pid 的真相源。
|
|
886
|
+
fn collect_state_recorded_tmux_sockets(state: &Value) -> std::collections::BTreeSet<String> {
|
|
887
|
+
let mut out = std::collections::BTreeSet::new();
|
|
888
|
+
push_recorded_tmux_socket(state, &mut out);
|
|
889
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
890
|
+
for (_, team_state) in teams {
|
|
891
|
+
push_recorded_tmux_socket(team_state, &mut out);
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
out
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
fn push_recorded_tmux_socket(state: &Value, out: &mut std::collections::BTreeSet<String>) {
|
|
898
|
+
for key in &["team_owner", "leader_receiver"] {
|
|
899
|
+
if let Some(socket) = state
|
|
900
|
+
.get(*key)
|
|
901
|
+
.and_then(|v| v.get("tmux_socket"))
|
|
902
|
+
.and_then(Value::as_str)
|
|
903
|
+
.filter(|s| !s.is_empty())
|
|
904
|
+
{
|
|
905
|
+
out.insert(socket.to_string());
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
|
|
820
910
|
/// PERF-6 C-①-1/C-②-4 (N39): the protected set derives from the CALLER's snapshot —
|
|
821
911
|
/// the same table the kill/wait sets derive from.
|
|
822
912
|
fn shutdown_protection_set(table: &[ProcessInfo]) -> ShutdownProtection {
|
|
@@ -844,26 +934,72 @@ pub mod lifecycle_port {
|
|
|
844
934
|
protected
|
|
845
935
|
}
|
|
846
936
|
|
|
847
|
-
/// B5/F2
|
|
848
|
-
///
|
|
849
|
-
///
|
|
850
|
-
///
|
|
851
|
-
|
|
937
|
+
/// B5/F2 + E4 真机 grounded(任何 team 的 shutdown 都不杀任何 team 的 leader 锚 pane):
|
|
938
|
+
/// the leader terminal's pane process tree joins the protected set (same set, same
|
|
939
|
+
/// mechanism as the invoker ancestry) so the workspace residual sweep's cmdline/cwd
|
|
940
|
+
/// matching cannot reap the leader — including when ANOTHER team's bare shutdown
|
|
941
|
+
/// runs, where the leader is never in the invoker's ancestry.
|
|
942
|
+
///
|
|
943
|
+
/// Two leader-pane sources(N39 双来源,真机 grounded):
|
|
944
|
+
/// 1. **Session prefix**: tmux session starts with `team-agent-leader-`(契约 grounded;
|
|
945
|
+
/// 覆盖 LeaderStartMode::NewTmuxSession / AttachExisting).
|
|
946
|
+
/// 2. **State.json anchors**(E4 修法):state.team_owner.pane_id / state.leader_receiver.pane_id
|
|
947
|
+
/// 在 top-level **和** teams[<key>].* 都扫(N39 任何 team 的 leader 锚 pane);
|
|
948
|
+
/// 覆盖 LeaderStartMode::ExecProvider(用户 in_tmux 直接 exec,session 名是用户原
|
|
949
|
+
/// `main`/`0`/whatever,不带 leader 前缀 — 此前 B5 三犯保护集漏覆盖)+ E4b
|
|
950
|
+
/// team-in-team(子 team 的 leader 锚 = 父 team worker pane,window 名是 agent id
|
|
951
|
+
/// 也不带 leader 前缀)。
|
|
952
|
+
pub(crate) fn extend_protection_with_leader_panes(
|
|
852
953
|
protected: &mut ShutdownProtection,
|
|
853
954
|
transport: &dyn crate::transport::Transport,
|
|
955
|
+
state: &Value,
|
|
854
956
|
table: &[ProcessInfo],
|
|
855
957
|
) {
|
|
856
|
-
let leader_pane_pids: Vec<u32> =
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
.
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
958
|
+
let mut leader_pane_pids: Vec<u32> = Vec::new();
|
|
959
|
+
let pane_targets = transport.list_targets().unwrap_or_default();
|
|
960
|
+
// Source 1: session 前缀过滤(原 B5 实现)— per-workspace socket。
|
|
961
|
+
leader_pane_pids.extend(
|
|
962
|
+
pane_targets
|
|
963
|
+
.iter()
|
|
964
|
+
.filter(|pane| {
|
|
965
|
+
pane.session
|
|
966
|
+
.as_str()
|
|
967
|
+
.starts_with(crate::leader::LEADER_SESSION_PREFIX)
|
|
968
|
+
})
|
|
969
|
+
.filter_map(|pane| pane.pane_pid),
|
|
970
|
+
);
|
|
971
|
+
// Source 2: state.json team_owner / leader_receiver 真锚 pane_id(top-level +
|
|
972
|
+
// teams[*]),per-workspace socket 命中。
|
|
973
|
+
let anchor_pane_ids: std::collections::BTreeSet<String> =
|
|
974
|
+
collect_state_leader_anchor_pane_ids(state);
|
|
975
|
+
leader_pane_pids.extend(
|
|
976
|
+
pane_targets
|
|
977
|
+
.iter()
|
|
978
|
+
.filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
|
|
979
|
+
.filter_map(|pane| pane.pane_pid),
|
|
980
|
+
);
|
|
981
|
+
// Source 3 (E4 真机 grounded · cross-socket):leader 锚 pane 可能在【别的
|
|
982
|
+
// tmux socket】上 — LeaderStartMode::ExecProvider 真实场景里用户 in_tmux
|
|
983
|
+
// 起 `team-agent claude`,leader pane 留在用户【默认 socket】,而 shutdown
|
|
984
|
+
// 的 transport 走 per-workspace `ta-<hash>` socket,list_targets 看不见。
|
|
985
|
+
// 从 state.json 读 leader_receiver/team_owner.tmux_socket(claim 时
|
|
986
|
+
// owner_bind 记录,见 evidence /测试rust版本/4 state.json),查那个 socket
|
|
987
|
+
// 的 list_targets 找 anchor pane_id → pane_pid → 进入 process_tree 保护。
|
|
988
|
+
// 不在 state 中的 socket 不查(MUST-17 不撒宽 / 不主动枚举全机器 sockets)。
|
|
989
|
+
for socket_endpoint in collect_state_recorded_tmux_sockets(state) {
|
|
990
|
+
let cross_backend =
|
|
991
|
+
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(&socket_endpoint);
|
|
992
|
+
let cross_panes = <crate::tmux_backend::TmuxBackend as crate::transport::Transport>::list_targets(&cross_backend)
|
|
993
|
+
.unwrap_or_default();
|
|
994
|
+
leader_pane_pids.extend(
|
|
995
|
+
cross_panes
|
|
996
|
+
.iter()
|
|
997
|
+
.filter(|pane| anchor_pane_ids.contains(pane.pane_id.as_str()))
|
|
998
|
+
.filter_map(|pane| pane.pane_pid),
|
|
999
|
+
);
|
|
1000
|
+
}
|
|
1001
|
+
leader_pane_pids.sort_unstable();
|
|
1002
|
+
leader_pane_pids.dedup();
|
|
867
1003
|
if leader_pane_pids.is_empty() {
|
|
868
1004
|
return;
|
|
869
1005
|
}
|
|
@@ -1352,7 +1488,41 @@ pub mod lifecycle_port {
|
|
|
1352
1488
|
}
|
|
1353
1489
|
|
|
1354
1490
|
fn error_value(error: crate::lifecycle::LifecycleError) -> Value {
|
|
1355
|
-
|
|
1491
|
+
let message = error.to_string();
|
|
1492
|
+
let mut payload = json!({"ok": false, "error": message});
|
|
1493
|
+
if let Some(next_action) = error_next_action(&message) {
|
|
1494
|
+
payload["next_action"] = json!(next_action);
|
|
1495
|
+
}
|
|
1496
|
+
payload
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
/// E8 (N38): 把"错路常犯"的运行时错误指到正确出路(纯文案,无语义变更)。
|
|
1500
|
+
/// 匹配 [`LifecycleError`] 的人读消息子串(`agent {id} not found` /
|
|
1501
|
+
/// `agent id already exists` / `unknown worker agent id`),给出下一步命令。
|
|
1502
|
+
fn error_next_action(message: &str) -> Option<&'static str> {
|
|
1503
|
+
// start-agent 撞"agent ... not found":start-agent 语义=启动 state 已有 agent;
|
|
1504
|
+
// 想新增角色应走 add-agent。
|
|
1505
|
+
if message.contains("not found") && message.contains("agent") {
|
|
1506
|
+
return Some(
|
|
1507
|
+
"start-agent only starts an agent that already exists in state. \
|
|
1508
|
+
To add a NEW role at runtime use: team-agent add-agent <id> --role-file <path>",
|
|
1509
|
+
);
|
|
1510
|
+
}
|
|
1511
|
+
// add-agent / fork 撞"agent id already exists":id 已占用。
|
|
1512
|
+
if message.contains("agent id already exists") {
|
|
1513
|
+
return Some(
|
|
1514
|
+
"that agent id is already in the team. \
|
|
1515
|
+
Use a different id, or start the existing one with: team-agent start-agent <id>",
|
|
1516
|
+
);
|
|
1517
|
+
}
|
|
1518
|
+
// stop/reset/fork 源撞"unknown worker agent id":拼写/团队选择错。
|
|
1519
|
+
if message.contains("unknown worker agent id") {
|
|
1520
|
+
return Some(
|
|
1521
|
+
"no such worker agent in this team. \
|
|
1522
|
+
Run `team-agent status` to list agent ids (check --team if multiple teams)",
|
|
1523
|
+
);
|
|
1524
|
+
}
|
|
1525
|
+
None
|
|
1356
1526
|
}
|
|
1357
1527
|
|
|
1358
1528
|
fn record_idle_acknowledged(
|
|
@@ -1796,6 +1966,53 @@ pub mod lifecycle_port {
|
|
|
1796
1966
|
.any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
|
|
1797
1967
|
})
|
|
1798
1968
|
}
|
|
1969
|
+
|
|
1970
|
+
#[cfg(test)]
|
|
1971
|
+
mod e8_error_guidance_tests {
|
|
1972
|
+
use super::{error_next_action, error_value};
|
|
1973
|
+
|
|
1974
|
+
#[test]
|
|
1975
|
+
fn start_agent_not_found_points_to_add_agent() {
|
|
1976
|
+
// LifecycleError::RequirementUnmet("agent {id} not found") 经 to_string():
|
|
1977
|
+
// "agent start requirement unmet: agent foo not found".
|
|
1978
|
+
let msg = "agent start requirement unmet: agent foo not found";
|
|
1979
|
+
let na = error_next_action(msg).expect("not-found must carry next_action");
|
|
1980
|
+
assert!(na.contains("add-agent"), "must steer to add-agent: {na}");
|
|
1981
|
+
assert!(na.contains("--role-file"), "must show the role-file flag: {na}");
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
#[test]
|
|
1985
|
+
fn add_agent_already_exists_explains_way_out() {
|
|
1986
|
+
let msg = "agent start requirement unmet: agent id already exists: foo";
|
|
1987
|
+
let na = error_next_action(msg).expect("already-exists must carry next_action");
|
|
1988
|
+
assert!(na.contains("start-agent"), "must mention start-agent: {na}");
|
|
1989
|
+
}
|
|
1990
|
+
|
|
1991
|
+
#[test]
|
|
1992
|
+
fn unknown_worker_points_to_status() {
|
|
1993
|
+
let msg = "agent start requirement unmet: unknown worker agent id: ghost";
|
|
1994
|
+
let na = error_next_action(msg).expect("unknown worker must carry next_action");
|
|
1995
|
+
assert!(na.contains("status"), "must steer to status: {na}");
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
#[test]
|
|
1999
|
+
fn unrelated_error_has_no_next_action() {
|
|
2000
|
+
assert_eq!(error_next_action("state persistence failed: disk full"), None);
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
#[test]
|
|
2004
|
+
fn error_value_attaches_next_action_field() {
|
|
2005
|
+
let err = crate::lifecycle::LifecycleError::RequirementUnmet(
|
|
2006
|
+
"agent foo not found".to_string(),
|
|
2007
|
+
);
|
|
2008
|
+
let v = error_value(err);
|
|
2009
|
+
assert_eq!(v["ok"], serde_json::json!(false));
|
|
2010
|
+
assert!(
|
|
2011
|
+
v["next_action"].as_str().unwrap_or("").contains("add-agent"),
|
|
2012
|
+
"error_value must attach the add-agent guidance: {v}"
|
|
2013
|
+
);
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
1799
2016
|
}
|
|
1800
2017
|
|
|
1801
2018
|
/// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
|
|
@@ -1812,14 +2029,45 @@ pub mod diagnose_port {
|
|
|
1812
2029
|
let workspace_valid = workspace.is_dir();
|
|
1813
2030
|
let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
|
|
1814
2031
|
let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
2032
|
+
// SMOKE-1 (locate.md §"Minimal Fix"):default doctor 不再隐式编译
|
|
2033
|
+
// `<workspace>/.team/current`(legacy 残留)作 profile_smoke 目标。
|
|
2034
|
+
// profile_smoke 是 team-scoped 体检,只在以下两种情形跑:
|
|
2035
|
+
// ① 用户显式给了 spec / team dir;
|
|
2036
|
+
// ② workspace 根本身就是 team dir(含 TEAM.md / team.spec.yaml)。
|
|
2037
|
+
// legacy `<workspace>/.team/current` 仅作降级诊断面(legacy_team_invalid),
|
|
2038
|
+
// 不再绑架整个 doctor 假死在 profile_smoke_failed 上。
|
|
2039
|
+
let explicit_team_target = explicit_doctor_team_dir(workspace, spec);
|
|
2040
|
+
let profile_smoke = explicit_team_target
|
|
1819
2041
|
.as_ref()
|
|
1820
|
-
.
|
|
2042
|
+
.map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(team))
|
|
2043
|
+
.transpose()?;
|
|
2044
|
+
let legacy_check = if explicit_team_target.is_none() {
|
|
2045
|
+
legacy_current_team_check(workspace)?
|
|
2046
|
+
} else {
|
|
2047
|
+
None
|
|
2048
|
+
};
|
|
2049
|
+
let profile_smoke_value = profile_smoke.unwrap_or_else(|| {
|
|
2050
|
+
legacy_check.clone().unwrap_or_else(|| {
|
|
2051
|
+
json!({
|
|
2052
|
+
"name": "profile_smoke",
|
|
2053
|
+
"ok": true,
|
|
2054
|
+
"status": "not_required",
|
|
2055
|
+
"checks": [],
|
|
2056
|
+
"secret_values_printed": false,
|
|
2057
|
+
})
|
|
2058
|
+
})
|
|
2059
|
+
});
|
|
2060
|
+
let profile_smoke_ok = profile_smoke_value
|
|
2061
|
+
.get("ok")
|
|
2062
|
+
.and_then(Value::as_bool)
|
|
1821
2063
|
.unwrap_or(true);
|
|
1822
|
-
|
|
2064
|
+
// legacy 降级面(legacy_team_invalid)不下拉整体 ok —— 用户没显式让我们
|
|
2065
|
+
// 体检这个 team,失败是降级诊断信息,不是 install 自检失败。
|
|
2066
|
+
let legacy_only_failure =
|
|
2067
|
+
!profile_smoke_ok && profile_smoke_value.get("status").and_then(Value::as_str)
|
|
2068
|
+
== Some("legacy_team_invalid");
|
|
2069
|
+
let effective_smoke_ok = profile_smoke_ok || legacy_only_failure;
|
|
2070
|
+
let ok = workspace_valid && (team_context || workspace_has_entries) && effective_smoke_ok;
|
|
1823
2071
|
let health = crate::coordinator::coordinator_health(
|
|
1824
2072
|
&crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
|
|
1825
2073
|
);
|
|
@@ -1836,18 +2084,12 @@ pub mod diagnose_port {
|
|
|
1836
2084
|
"local_module": true,
|
|
1837
2085
|
},
|
|
1838
2086
|
"secret_scan": secret_scan(workspace),
|
|
1839
|
-
"profile_smoke":
|
|
1840
|
-
"name": "profile_smoke",
|
|
1841
|
-
"ok": true,
|
|
1842
|
-
"status": "not_required",
|
|
1843
|
-
"checks": [],
|
|
1844
|
-
"secret_values_printed": false,
|
|
1845
|
-
})),
|
|
2087
|
+
"profile_smoke": profile_smoke_value,
|
|
1846
2088
|
"coordinator": coordinator_health_value(health),
|
|
1847
2089
|
"ok": ok,
|
|
1848
2090
|
"error": if ok {
|
|
1849
2091
|
Value::Null
|
|
1850
|
-
} else if !profile_smoke_ok {
|
|
2092
|
+
} else if !profile_smoke_ok && !legacy_only_failure {
|
|
1851
2093
|
json!("profile_smoke_failed")
|
|
1852
2094
|
} else if workspace_valid {
|
|
1853
2095
|
json!("workspace has no Team Agent spec or runtime context")
|
|
@@ -1857,6 +2099,63 @@ pub mod diagnose_port {
|
|
|
1857
2099
|
}))
|
|
1858
2100
|
}
|
|
1859
2101
|
|
|
2102
|
+
/// SMOKE-1: 仅当用户显式提供 spec/team dir,或 workspace 根本身是 team dir
|
|
2103
|
+
/// (含 TEAM.md / team.spec.yaml)时返 team_dir。legacy `<workspace>/.team/
|
|
2104
|
+
/// current` 不算 explicit target(走 legacy_current_team_check 降级面)。
|
|
2105
|
+
fn explicit_doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
|
|
2106
|
+
if let Some(spec) = spec {
|
|
2107
|
+
let candidate = if spec.is_absolute() {
|
|
2108
|
+
spec.to_path_buf()
|
|
2109
|
+
} else {
|
|
2110
|
+
workspace.join(spec)
|
|
2111
|
+
};
|
|
2112
|
+
if candidate.is_file() {
|
|
2113
|
+
return candidate.parent().map(Path::to_path_buf);
|
|
2114
|
+
}
|
|
2115
|
+
if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
|
|
2116
|
+
return Some(candidate);
|
|
2117
|
+
}
|
|
2118
|
+
}
|
|
2119
|
+
if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
|
|
2120
|
+
return Some(workspace.to_path_buf());
|
|
2121
|
+
}
|
|
2122
|
+
None
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
/// SMOKE-1: legacy `<workspace>/.team/current` 残留体检 — 降级诊断,**不**
|
|
2126
|
+
/// 当 install self-check 失败。如果 legacy 团有 spec/TEAM.md,尝试 compile,
|
|
2127
|
+
/// 失败返 `status=legacy_team_invalid` + team_dir + reason + next_action(N38
|
|
2128
|
+
/// 失败可解释性);compile 成功就不打扰用户(返 None,profile_smoke 走
|
|
2129
|
+
/// `not_required`)。无 legacy 团目录 → None。
|
|
2130
|
+
fn legacy_current_team_check(workspace: &Path) -> Result<Option<Value>, CliError> {
|
|
2131
|
+
let team = workspace.join(".team").join("current");
|
|
2132
|
+
let has_spec = team.join("team.spec.yaml").is_file();
|
|
2133
|
+
let has_team_md = team.join("TEAM.md").is_file();
|
|
2134
|
+
if !has_spec && !has_team_md {
|
|
2135
|
+
return Ok(None);
|
|
2136
|
+
}
|
|
2137
|
+
match crate::compiler::compile_team(&team) {
|
|
2138
|
+
Ok(_) => Ok(None),
|
|
2139
|
+
Err(error) => {
|
|
2140
|
+
let team_dir = team.to_string_lossy().to_string();
|
|
2141
|
+
Ok(Some(json!({
|
|
2142
|
+
"name": "profile_smoke",
|
|
2143
|
+
"ok": false,
|
|
2144
|
+
"status": "legacy_team_invalid",
|
|
2145
|
+
"team_dir": team_dir,
|
|
2146
|
+
"reason": error.to_string(),
|
|
2147
|
+
"next_action": format!(
|
|
2148
|
+
"scope doctor to a real team: `team-agent doctor <team-dir>`, \
|
|
2149
|
+
or repair/remove the legacy `{}` directory",
|
|
2150
|
+
team.display()
|
|
2151
|
+
),
|
|
2152
|
+
"checks": [],
|
|
2153
|
+
"secret_values_printed": false,
|
|
2154
|
+
})))
|
|
2155
|
+
}
|
|
2156
|
+
}
|
|
2157
|
+
}
|
|
2158
|
+
|
|
1860
2159
|
fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
|
|
1861
2160
|
if let Some(spec) = spec {
|
|
1862
2161
|
let candidate = if spec.is_absolute() {
|
|
@@ -29,6 +29,15 @@ use rusqlite::params;
|
|
|
29
29
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
30
30
|
let conn = crate::db::schema::open_db(store.db_path())
|
|
31
31
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
32
|
+
// B-5 / 036b N38 explicable — status 出口 runtime 块:把 coordinator_health
|
|
33
|
+
// (现状)+ undelivered backlog count 一起暴露;coordinator not running ∧
|
|
34
|
+
// backlog>0 才挂 down-hint(anti-nag)。auto-recovery 不做(user 已裁)。
|
|
35
|
+
let coordinator_running = coordinator_status_running(&health);
|
|
36
|
+
let undelivered_backlog = count_undelivered_backlog(&conn, owner_team_id)?;
|
|
37
|
+
let runtime_block = build_runtime_status_block(
|
|
38
|
+
coordinator_running,
|
|
39
|
+
undelivered_backlog,
|
|
40
|
+
);
|
|
32
41
|
let agents = enrich_agents(state.get("agents"));
|
|
33
42
|
let tasks = state
|
|
34
43
|
.get("tasks")
|
|
@@ -68,6 +77,7 @@ use rusqlite::params;
|
|
|
68
77
|
"latest_results": latest_result_summaries(&store, owner_team_id)?,
|
|
69
78
|
"readiness": readiness,
|
|
70
79
|
"coordinator": coordinator_health_value(health),
|
|
80
|
+
"runtime": runtime_block,
|
|
71
81
|
"last_events": Value::Array(
|
|
72
82
|
crate::event_log::EventLog::new(workspace)
|
|
73
83
|
.tail(10)
|
|
@@ -748,6 +758,58 @@ use rusqlite::params;
|
|
|
748
758
|
}
|
|
749
759
|
}
|
|
750
760
|
|
|
761
|
+
/// B-5 / 036b N38 — status 出口的 runtime 块:把 coordinator_health 与
|
|
762
|
+
/// undelivered backlog 合体暴露。down-hint 只在【coordinator 不在跑 ∧ 有 backlog】
|
|
763
|
+
/// 两条件同时满足才挂(anti-nag);健康状态下不挂提示。auto-recovery 不做。
|
|
764
|
+
fn build_runtime_status_block(coordinator_running: bool, undelivered: i64) -> Value {
|
|
765
|
+
let mut runtime = serde_json::Map::new();
|
|
766
|
+
runtime.insert(
|
|
767
|
+
"coordinator".to_string(),
|
|
768
|
+
json!({"ok": coordinator_running}),
|
|
769
|
+
);
|
|
770
|
+
runtime.insert("undelivered".to_string(), json!(undelivered));
|
|
771
|
+
if !coordinator_running && undelivered > 0 {
|
|
772
|
+
runtime.insert(
|
|
773
|
+
"hint".to_string(),
|
|
774
|
+
json!(format!(
|
|
775
|
+
"coordinator not running with {undelivered} undelivered — run team-agent restart"
|
|
776
|
+
)),
|
|
777
|
+
);
|
|
778
|
+
}
|
|
779
|
+
Value::Object(runtime)
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
/// Whether the coordinator HealthReport reflects a running tick loop. Used by the
|
|
783
|
+
/// runtime block + the hint gate.
|
|
784
|
+
fn coordinator_status_running(health: &crate::coordinator::HealthReport) -> bool {
|
|
785
|
+
matches!(health.status, crate::coordinator::CoordinatorHealthStatus::Running)
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
/// Count of messages currently sitting in delivery-able backlog
|
|
789
|
+
/// (accepted/pending/queued forms — not delivered / not failed / not refused).
|
|
790
|
+
/// owner_team_id scope honored when present.
|
|
791
|
+
fn count_undelivered_backlog(
|
|
792
|
+
conn: &rusqlite::Connection,
|
|
793
|
+
owner_team_id: Option<&str>,
|
|
794
|
+
) -> Result<i64, CliError> {
|
|
795
|
+
// Backlog statuses chosen to mirror what `deliver_pending` would pick up.
|
|
796
|
+
let sql = match owner_team_id {
|
|
797
|
+
Some(_) => "select count(*) from messages
|
|
798
|
+
where owner_team_id = ?1 and status in ('accepted','pending','queued','queued_until_trust')",
|
|
799
|
+
None => "select count(*) from messages
|
|
800
|
+
where status in ('accepted','pending','queued','queued_until_trust')",
|
|
801
|
+
};
|
|
802
|
+
let count: i64 = match owner_team_id {
|
|
803
|
+
Some(team) => conn
|
|
804
|
+
.query_row(sql, params![team], |row| row.get(0))
|
|
805
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?,
|
|
806
|
+
None => conn
|
|
807
|
+
.query_row(sql, [], |row| row.get(0))
|
|
808
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?,
|
|
809
|
+
};
|
|
810
|
+
Ok(count)
|
|
811
|
+
}
|
|
812
|
+
|
|
751
813
|
fn coordinator_health_value(health: crate::coordinator::HealthReport) -> Value {
|
|
752
814
|
json!({
|
|
753
815
|
"ok": health.ok,
|