@team-agent/installer 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/adapters.rs +8 -0
- package/crates/team-agent/src/cli/diagnose.rs +52 -11
- package/crates/team-agent/src/cli/emit.rs +3 -2
- package/crates/team-agent/src/cli/mod.rs +225 -80
- package/crates/team-agent/src/cli/send.rs +1 -0
- package/crates/team-agent/src/cli/status_port.rs +135 -7
- package/crates/team-agent/src/cli/tests/missing_subcommands.rs +8 -1
- package/crates/team-agent/src/cli/tests/mod.rs +1 -0
- package/crates/team-agent/src/cli/tests/shutdown_kill_plan.rs +39 -0
- package/crates/team-agent/src/cli/types.rs +5 -1
- package/crates/team-agent/src/compiler/tests.rs +2 -2
- package/crates/team-agent/src/compiler.rs +1 -1
- package/crates/team-agent/src/coordinator/backoff.rs +57 -9
- package/crates/team-agent/src/coordinator/health.rs +65 -2
- package/crates/team-agent/src/coordinator/runtime_detectors.rs +28 -16
- package/crates/team-agent/src/coordinator/tests/a0_lostupdate.rs +87 -0
- package/crates/team-agent/src/coordinator/tests/mod.rs +1 -0
- package/crates/team-agent/src/coordinator/tests/watch.rs +4 -2
- package/crates/team-agent/src/coordinator/tick.rs +195 -43
- package/crates/team-agent/src/leader/helpers.rs +2 -0
- package/crates/team-agent/src/leader/rediscover.rs +1 -0
- package/crates/team-agent/src/leader/start.rs +9 -1
- package/crates/team-agent/src/leader/takeover.rs +18 -1
- package/crates/team-agent/src/lifecycle/display.rs +3 -3
- package/crates/team-agent/src/lifecycle/launch.rs +772 -285
- package/crates/team-agent/src/lifecycle/mod.rs +1 -0
- package/crates/team-agent/src/lifecycle/profile_launch.rs +110 -4
- package/crates/team-agent/src/lifecycle/profile_smoke.rs +4 -1
- package/crates/team-agent/src/lifecycle/restart/agent.rs +16 -5
- package/crates/team-agent/src/lifecycle/restart/common.rs +35 -25
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +31 -25
- package/crates/team-agent/src/lifecycle/tests/agent_ops.rs +2 -2
- package/crates/team-agent/src/lifecycle/tests/core.rs +5 -5
- package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +4 -4
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +5 -3
- package/crates/team-agent/src/lifecycle/types.rs +4 -0
- package/crates/team-agent/src/lifecycle/worker_command_context.rs +361 -0
- package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +2 -1
- package/crates/team-agent/src/mcp_server/tests/scoped.rs +14 -1
- package/crates/team-agent/src/mcp_server/tests/send.rs +15 -1
- package/crates/team-agent/src/mcp_server/tools.rs +65 -9
- package/crates/team-agent/src/mcp_server/wire.rs +2 -1
- package/crates/team-agent/src/message_store.rs +80 -0
- package/crates/team-agent/src/messaging/results.rs +76 -5
- package/crates/team-agent/src/messaging/send.rs +3 -1
- package/crates/team-agent/src/messaging/types.rs +15 -1
- package/crates/team-agent/src/messaging/watchers.rs +68 -30
- package/crates/team-agent/src/model/enums.rs +7 -1
- package/crates/team-agent/src/model/permissions.rs +7 -0
- package/crates/team-agent/src/model/spec.rs +3 -1
- package/crates/team-agent/src/provider/adapter.rs +472 -7
- package/crates/team-agent/src/provider/classify.rs +6 -2
- package/crates/team-agent/src/provider/faults.rs +3 -2
- package/crates/team-agent/src/provider/startup_prompt.rs +25 -7
- package/crates/team-agent/src/provider/types.rs +11 -0
- package/crates/team-agent/src/session_capture.rs +1 -0
- package/crates/team-agent/src/state/persist.rs +95 -19
- package/crates/team-agent/src/tmux_backend/tests.rs +8 -7
- package/crates/team-agent/src/tmux_backend.rs +134 -6
- package/crates/team-agent/src/transport.rs +32 -0
- package/package.json +4 -4
|
@@ -176,11 +176,60 @@ pub mod lifecycle_port {
|
|
|
176
176
|
let result =
|
|
177
177
|
shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
|
|
178
178
|
if team.is_none() {
|
|
179
|
-
|
|
179
|
+
// B5/F1: the leader terminal (`team-agent claude`) lives on this same
|
|
180
|
+
// workspace socket by design (leader/start.rs); a bare shutdown must not
|
|
181
|
+
// `kill-server` it away. Spare `team-agent-leader-*` sessions and clear the
|
|
182
|
+
// remaining non-leader sessions individually; only an empty-of-leader socket
|
|
183
|
+
// gets the whole-server teardown (the original leak-cleanup intent).
|
|
184
|
+
let transport_dyn: &dyn crate::transport::Transport = &transport;
|
|
185
|
+
let sessions = socket_session_names(transport_dyn);
|
|
186
|
+
match sessions_to_kill_sparing_leader(&sessions) {
|
|
187
|
+
None => transport.kill_server(),
|
|
188
|
+
Some(non_leader_sessions) => {
|
|
189
|
+
for session in &non_leader_sessions {
|
|
190
|
+
let _ = transport_dyn.kill_session(session);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
180
194
|
}
|
|
181
195
|
result
|
|
182
196
|
}
|
|
183
197
|
|
|
198
|
+
fn socket_session_names(
|
|
199
|
+
transport: &dyn crate::transport::Transport,
|
|
200
|
+
) -> Vec<crate::transport::SessionName> {
|
|
201
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
202
|
+
transport
|
|
203
|
+
.list_targets()
|
|
204
|
+
.unwrap_or_default()
|
|
205
|
+
.into_iter()
|
|
206
|
+
.map(|pane| pane.session)
|
|
207
|
+
.filter(|session| seen.insert(session.as_str().to_string()))
|
|
208
|
+
.collect()
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/// B5/F1 pure kill decision for the bare-shutdown socket teardown.
|
|
212
|
+
/// `None` => no `team-agent-leader-*` session on the socket → safe to kill the whole
|
|
213
|
+
/// server. `Some(rest)` => leader present → kill only the non-leader sessions.
|
|
214
|
+
pub(crate) fn sessions_to_kill_sparing_leader(
|
|
215
|
+
sessions: &[crate::transport::SessionName],
|
|
216
|
+
) -> Option<Vec<crate::transport::SessionName>> {
|
|
217
|
+
let leader_present = sessions
|
|
218
|
+
.iter()
|
|
219
|
+
.any(|session| session.as_str().starts_with(crate::leader::LEADER_SESSION_PREFIX));
|
|
220
|
+
leader_present.then(|| {
|
|
221
|
+
sessions
|
|
222
|
+
.iter()
|
|
223
|
+
.filter(|session| {
|
|
224
|
+
!session
|
|
225
|
+
.as_str()
|
|
226
|
+
.starts_with(crate::leader::LEADER_SESSION_PREFIX)
|
|
227
|
+
})
|
|
228
|
+
.cloned()
|
|
229
|
+
.collect()
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
184
233
|
pub fn shutdown_with_transport(
|
|
185
234
|
workspace: &Path,
|
|
186
235
|
keep_logs: bool,
|
|
@@ -223,7 +272,14 @@ pub mod lifecycle_port {
|
|
|
223
272
|
.and_then(Value::as_str)
|
|
224
273
|
.filter(|s| !s.is_empty())
|
|
225
274
|
.map(crate::transport::SessionName::new);
|
|
226
|
-
|
|
275
|
+
// PERF-6 C-①-1: ONE process-table snapshot for the whole happy path; the
|
|
276
|
+
// protected / pgid / kill / wait sets all derive from it (N39 same-source).
|
|
277
|
+
// A probe failure is observable, not a silent empty table (swallow batch 1).
|
|
278
|
+
let mut probe_degraded = false;
|
|
279
|
+
let entry_table = shutdown_table_snapshot(&run_workspace, &mut probe_degraded, "entry");
|
|
280
|
+
let mut protected = shutdown_protection_set(&entry_table);
|
|
281
|
+
extend_protection_with_leader_panes(&mut protected, transport, &entry_table);
|
|
282
|
+
let protected = protected;
|
|
227
283
|
let reap_scope = if team.is_some() {
|
|
228
284
|
ShutdownReapScope::ScopedTeam
|
|
229
285
|
} else {
|
|
@@ -246,11 +302,9 @@ pub mod lifecycle_port {
|
|
|
246
302
|
root_pids.extend(pane_pids);
|
|
247
303
|
root_pids.sort_unstable();
|
|
248
304
|
root_pids.dedup();
|
|
249
|
-
let root_pgids = process_pgids(&root_pids, &protected);
|
|
305
|
+
let root_pgids = process_pgids(&root_pids, &protected, &entry_table);
|
|
250
306
|
deadline.check("reap_process_tree")?;
|
|
251
|
-
|
|
252
|
-
reap_process_tree(*pid, &protected);
|
|
253
|
-
}
|
|
307
|
+
reap_process_tree(&root_pids, &protected, &entry_table);
|
|
254
308
|
reap_process_groups(&root_pgids, &protected);
|
|
255
309
|
let mut kill_error: Option<String> = None;
|
|
256
310
|
deadline.check("kill_session")?;
|
|
@@ -267,8 +321,9 @@ pub mod lifecycle_port {
|
|
|
267
321
|
&state,
|
|
268
322
|
&root_pids,
|
|
269
323
|
&root_pgids,
|
|
270
|
-
|
|
324
|
+
transport,
|
|
271
325
|
reap_scope,
|
|
326
|
+
&mut probe_degraded,
|
|
272
327
|
);
|
|
273
328
|
deadline.check("session_residuals")?;
|
|
274
329
|
let session_residuals = if let Some(session) = session_name.as_ref() {
|
|
@@ -286,6 +341,10 @@ pub mod lifecycle_port {
|
|
|
286
341
|
Vec::new()
|
|
287
342
|
};
|
|
288
343
|
deadline.check("process_residuals")?;
|
|
344
|
+
// C-①: the post-verify gets ONE fresh verification snapshot (reaps changed
|
|
345
|
+
// the world; #248 post-verify facts must be current, not the entry view).
|
|
346
|
+
let verify_table =
|
|
347
|
+
shutdown_table_snapshot(&run_workspace, &mut probe_degraded, "post_verify");
|
|
289
348
|
let process_residuals = process_residuals(
|
|
290
349
|
&run_workspace,
|
|
291
350
|
&state,
|
|
@@ -293,6 +352,7 @@ pub mod lifecycle_port {
|
|
|
293
352
|
&root_pgids,
|
|
294
353
|
&protected,
|
|
295
354
|
reap_scope,
|
|
355
|
+
&verify_table,
|
|
296
356
|
);
|
|
297
357
|
deadline.check("stop_coordinator")?;
|
|
298
358
|
let mut coordinator_timeout = false;
|
|
@@ -313,7 +373,9 @@ pub mod lifecycle_port {
|
|
|
313
373
|
None
|
|
314
374
|
};
|
|
315
375
|
let probe_timeout = crate::os_probe::probe_timeout();
|
|
316
|
-
|
|
376
|
+
// swallow batch 1: a failed ps probe degrades verification truthfully — the
|
|
377
|
+
// empty table must never read as a clean "no residual processes".
|
|
378
|
+
let verification_degraded = probe_timeout.is_some() || probe_degraded;
|
|
317
379
|
let session_killed = session_name.is_some()
|
|
318
380
|
&& kill_error.is_none()
|
|
319
381
|
&& session_residuals.is_empty()
|
|
@@ -393,6 +455,7 @@ pub mod lifecycle_port {
|
|
|
393
455
|
"status": status,
|
|
394
456
|
"phase": phase,
|
|
395
457
|
"verification_degraded": verification_degraded,
|
|
458
|
+
"probe_degraded": probe_degraded,
|
|
396
459
|
"probe_timeout_kind": probe_timeout_kind,
|
|
397
460
|
"probe_timeout": probe_timeout_value,
|
|
398
461
|
"keep_logs": keep_logs,
|
|
@@ -574,11 +637,29 @@ pub mod lifecycle_port {
|
|
|
574
637
|
.filter(|pid| *pid > 0)
|
|
575
638
|
}
|
|
576
639
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
640
|
+
/// PERF-6 C-② batched signals: the UNION of all root trees gets SIGTERM, shares ONE
|
|
641
|
+
/// >=150ms grace window (no single pid's grace is shortened — the serial per-root
|
|
642
|
+
/// chain is what's removed), then the union gets SIGKILL (noop for already-dead
|
|
643
|
+
/// pids; Gap 37 escalation order TERM -> grace -> KILL preserved), then a single
|
|
644
|
+
/// bounded wait for the whole union. kill/wait sets derive from the SAME snapshot
|
|
645
|
+
/// as the protected set (N39).
|
|
646
|
+
fn reap_process_tree(
|
|
647
|
+
root_pids: &[u32],
|
|
648
|
+
protected: &ShutdownProtection,
|
|
649
|
+
table: &[ProcessInfo],
|
|
650
|
+
) {
|
|
651
|
+
let mut pids = Vec::new();
|
|
652
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
653
|
+
for root in root_pids {
|
|
654
|
+
for pid in process_tree_from_table(*root, table) {
|
|
655
|
+
if !protected.contains_pid(pid) && seen.insert(pid) {
|
|
656
|
+
pids.push(pid);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
if pids.is_empty() {
|
|
661
|
+
return;
|
|
662
|
+
}
|
|
582
663
|
for pid in pids.iter().rev() {
|
|
583
664
|
send_process_signal(*pid, libc::SIGTERM);
|
|
584
665
|
}
|
|
@@ -611,86 +692,83 @@ pub mod lifecycle_port {
|
|
|
611
692
|
}
|
|
612
693
|
}
|
|
613
694
|
|
|
695
|
+
/// PERF-6 C-①-2 + C-②-5: every residual round fetches ONE fresh snapshot (reap
|
|
696
|
+
/// changed the world) and re-derives the protected set from THAT snapshot; all
|
|
697
|
+
/// in-round consumers (match + tree walks) reuse it.
|
|
614
698
|
fn reap_workspace_process_residuals(
|
|
615
699
|
workspace: &Path,
|
|
616
700
|
state: &Value,
|
|
617
701
|
root_pids: &[u32],
|
|
618
702
|
root_pgids: &[u32],
|
|
619
|
-
|
|
703
|
+
transport: &dyn crate::transport::Transport,
|
|
620
704
|
scope: ShutdownReapScope,
|
|
705
|
+
probe_degraded: &mut bool,
|
|
621
706
|
) {
|
|
622
707
|
for _ in 0..5 {
|
|
623
|
-
let
|
|
624
|
-
|
|
708
|
+
let round_table = shutdown_table_snapshot(workspace, probe_degraded, "residual_round");
|
|
709
|
+
let mut protected = shutdown_protection_set(&round_table);
|
|
710
|
+
extend_protection_with_leader_panes(&mut protected, transport, &round_table);
|
|
711
|
+
let residuals = matched_processes(
|
|
712
|
+
workspace, state, root_pids, root_pgids, &protected, scope, &round_table,
|
|
713
|
+
);
|
|
625
714
|
if residuals.is_empty() {
|
|
626
715
|
return;
|
|
627
716
|
}
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
}
|
|
717
|
+
let residual_pids = residuals.iter().map(|process| process.pid).collect::<Vec<_>>();
|
|
718
|
+
reap_process_tree(&residual_pids, &protected, &round_table);
|
|
631
719
|
let pgids = residuals
|
|
632
720
|
.iter()
|
|
633
721
|
.filter_map(|process| process.pgid)
|
|
634
722
|
.collect::<Vec<_>>();
|
|
635
|
-
reap_process_groups(&pgids, protected);
|
|
723
|
+
reap_process_groups(&pgids, &protected);
|
|
636
724
|
std::thread::sleep(std::time::Duration::from_millis(100));
|
|
637
725
|
}
|
|
638
726
|
}
|
|
639
727
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
let mut seen = std::collections::BTreeSet::new();
|
|
647
|
-
seen.insert(root_pid);
|
|
648
|
-
let mut index = 0;
|
|
649
|
-
while index < out.len() {
|
|
650
|
-
let parent = out[index];
|
|
651
|
-
for (pid, ppid) in &pairs {
|
|
652
|
-
if *ppid == parent && seen.insert(*pid) {
|
|
653
|
-
out.push(*pid);
|
|
654
|
-
}
|
|
655
|
-
}
|
|
656
|
-
index += 1;
|
|
657
|
-
}
|
|
658
|
-
out
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
fn process_parent_pairs() -> Vec<(u32, u32)> {
|
|
662
|
-
let output = match crate::os_probe::bounded_command_output_with_probe(
|
|
663
|
-
std::process::Command::new("ps").args(["-axo", "pid=,ppid="]),
|
|
664
|
-
"ps_parent",
|
|
728
|
+
/// swallow batch 1: the raw ps probe with an explicit error channel — a failed
|
|
729
|
+
/// probe must never masquerade as "no processes" (CLAUDE.md §5).
|
|
730
|
+
fn probed_process_table() -> Result<Vec<ProcessInfo>, String> {
|
|
731
|
+
match crate::os_probe::bounded_command_output_with_probe(
|
|
732
|
+
std::process::Command::new("ps").args(["-axo", "pid=,ppid=,pgid=,sess=,command="]),
|
|
733
|
+
"ps_table",
|
|
665
734
|
None,
|
|
666
735
|
) {
|
|
667
|
-
Ok(output) if output.status.success() => output
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
.
|
|
672
|
-
.
|
|
673
|
-
|
|
674
|
-
let pid = parts.next()?.parse::<u32>().ok()?;
|
|
675
|
-
let ppid = parts.next()?.parse::<u32>().ok()?;
|
|
676
|
-
Some((pid, ppid))
|
|
677
|
-
})
|
|
678
|
-
.collect()
|
|
736
|
+
Ok(output) if output.status.success() => Ok(String::from_utf8_lossy(&output.stdout)
|
|
737
|
+
.lines()
|
|
738
|
+
.filter_map(parse_process_info)
|
|
739
|
+
.collect()),
|
|
740
|
+
Ok(output) => Err(format!("ps exited with status {:?}", output.status.code())),
|
|
741
|
+
Err(error) => Err(error.to_string()),
|
|
742
|
+
}
|
|
679
743
|
}
|
|
680
744
|
|
|
681
745
|
fn process_table() -> Vec<ProcessInfo> {
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
746
|
+
probed_process_table().unwrap_or_default()
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/// PERF-6 C-①-1 / swallow batch 1: the shutdown-scope snapshot fetch. A probe
|
|
750
|
+
/// failure writes a `shutdown.process_probe_failed` event (non-null error) and
|
|
751
|
+
/// marks the run degraded instead of silently treating it as "no processes".
|
|
752
|
+
fn shutdown_table_snapshot(
|
|
753
|
+
workspace: &Path,
|
|
754
|
+
probe_degraded: &mut bool,
|
|
755
|
+
phase: &str,
|
|
756
|
+
) -> Vec<ProcessInfo> {
|
|
757
|
+
match probed_process_table() {
|
|
758
|
+
Ok(table) => table,
|
|
759
|
+
Err(error) => {
|
|
760
|
+
*probe_degraded = true;
|
|
761
|
+
let _ = crate::event_log::EventLog::new(workspace).write(
|
|
762
|
+
"shutdown.process_probe_failed",
|
|
763
|
+
json!({
|
|
764
|
+
"phase": phase,
|
|
765
|
+
"probe": "ps_table",
|
|
766
|
+
"error": error,
|
|
767
|
+
}),
|
|
768
|
+
);
|
|
769
|
+
Vec::new()
|
|
770
|
+
}
|
|
771
|
+
}
|
|
694
772
|
}
|
|
695
773
|
|
|
696
774
|
fn parse_process_info(line: &str) -> Option<ProcessInfo> {
|
|
@@ -739,8 +817,9 @@ pub mod lifecycle_port {
|
|
|
739
817
|
}
|
|
740
818
|
}
|
|
741
819
|
|
|
742
|
-
|
|
743
|
-
|
|
820
|
+
/// PERF-6 C-①-1/C-②-4 (N39): the protected set derives from the CALLER's snapshot —
|
|
821
|
+
/// the same table the kill/wait sets derive from.
|
|
822
|
+
fn shutdown_protection_set(table: &[ProcessInfo]) -> ShutdownProtection {
|
|
744
823
|
let mut protected = ShutdownProtection::default();
|
|
745
824
|
let current = std::process::id();
|
|
746
825
|
protected.pids.insert(current);
|
|
@@ -765,6 +844,61 @@ pub mod lifecycle_port {
|
|
|
765
844
|
protected
|
|
766
845
|
}
|
|
767
846
|
|
|
847
|
+
/// B5/F2: the leader terminal's pane process tree joins the protected set (same
|
|
848
|
+
/// set, same mechanism as the invoker ancestry) so the workspace residual sweep's
|
|
849
|
+
/// cmdline/cwd matching cannot reap the leader — including when ANOTHER team's bare
|
|
850
|
+
/// shutdown runs, where the leader is never in the invoker's ancestry.
|
|
851
|
+
fn extend_protection_with_leader_panes(
|
|
852
|
+
protected: &mut ShutdownProtection,
|
|
853
|
+
transport: &dyn crate::transport::Transport,
|
|
854
|
+
table: &[ProcessInfo],
|
|
855
|
+
) {
|
|
856
|
+
let leader_pane_pids: Vec<u32> = transport
|
|
857
|
+
.list_targets()
|
|
858
|
+
.unwrap_or_default()
|
|
859
|
+
.into_iter()
|
|
860
|
+
.filter(|pane| {
|
|
861
|
+
pane.session
|
|
862
|
+
.as_str()
|
|
863
|
+
.starts_with(crate::leader::LEADER_SESSION_PREFIX)
|
|
864
|
+
})
|
|
865
|
+
.filter_map(|pane| pane.pane_pid)
|
|
866
|
+
.collect();
|
|
867
|
+
if leader_pane_pids.is_empty() {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
for root in &leader_pane_pids {
|
|
871
|
+
for pid in process_tree_from_table(*root, table) {
|
|
872
|
+
protected.pids.insert(pid);
|
|
873
|
+
if let Some(pgid) = table
|
|
874
|
+
.iter()
|
|
875
|
+
.find(|process| process.pid == pid)
|
|
876
|
+
.and_then(|process| process.pgid)
|
|
877
|
+
{
|
|
878
|
+
protected.pgids.insert(pgid);
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
// The tmux SERVER carrying the leader pane must survive too: its command line
|
|
883
|
+
// contains the workspace path (it was started with the worker spawn command), so
|
|
884
|
+
// the residual sweep matches it, and killing the server SIGHUPs every pane —
|
|
885
|
+
// including the protected leader — bypassing per-pid protection. Protect the
|
|
886
|
+
// server pid itself (NOT its tree: worker panes must still die).
|
|
887
|
+
for pane_pid in &leader_pane_pids {
|
|
888
|
+
if let Some(server) = table
|
|
889
|
+
.iter()
|
|
890
|
+
.find(|process| process.pid == *pane_pid)
|
|
891
|
+
.and_then(|pane| table.iter().find(|process| process.pid == pane.ppid))
|
|
892
|
+
.filter(|server| server.pid > 1)
|
|
893
|
+
{
|
|
894
|
+
protected.pids.insert(server.pid);
|
|
895
|
+
if let Some(pgid) = server.pgid {
|
|
896
|
+
protected.pgids.insert(pgid);
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
768
902
|
fn send_process_signal(pid: u32, signal: libc::c_int) {
|
|
769
903
|
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
770
904
|
return;
|
|
@@ -815,8 +949,11 @@ pub mod lifecycle_port {
|
|
|
815
949
|
err.raw_os_error() == Some(libc::EPERM)
|
|
816
950
|
}
|
|
817
951
|
|
|
818
|
-
fn process_pgids(
|
|
819
|
-
|
|
952
|
+
fn process_pgids(
|
|
953
|
+
pids: &[u32],
|
|
954
|
+
protected: &ShutdownProtection,
|
|
955
|
+
table: &[ProcessInfo],
|
|
956
|
+
) -> Vec<u32> {
|
|
820
957
|
let mut pgids = pids
|
|
821
958
|
.iter()
|
|
822
959
|
.filter_map(|pid| table.iter().find(|process| process.pid == *pid))
|
|
@@ -839,9 +976,10 @@ pub mod lifecycle_port {
|
|
|
839
976
|
root_pgids: &[u32],
|
|
840
977
|
protected: &ShutdownProtection,
|
|
841
978
|
scope: ShutdownReapScope,
|
|
979
|
+
table: &[ProcessInfo],
|
|
842
980
|
) -> Vec<Value> {
|
|
843
981
|
let mut residuals =
|
|
844
|
-
matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
|
|
982
|
+
matched_processes(workspace, state, root_pids, root_pgids, protected, scope, table);
|
|
845
983
|
let mut seen = residuals
|
|
846
984
|
.iter()
|
|
847
985
|
.map(|process| process.pid)
|
|
@@ -878,11 +1016,11 @@ pub mod lifecycle_port {
|
|
|
878
1016
|
root_pgids: &[u32],
|
|
879
1017
|
protected: &ShutdownProtection,
|
|
880
1018
|
scope: ShutdownReapScope,
|
|
1019
|
+
table: &[ProcessInfo],
|
|
881
1020
|
) -> Vec<ProcessInfo> {
|
|
882
|
-
let table = process_table();
|
|
883
1021
|
let root_tree = root_pids
|
|
884
1022
|
.iter()
|
|
885
|
-
.flat_map(|pid| process_tree_from_table(*pid,
|
|
1023
|
+
.flat_map(|pid| process_tree_from_table(*pid, table))
|
|
886
1024
|
.filter(|pid| !protected.contains_pid(*pid))
|
|
887
1025
|
.collect::<std::collections::BTreeSet<_>>();
|
|
888
1026
|
let root_pgids = root_pgids
|
|
@@ -899,7 +1037,7 @@ pub mod lifecycle_port {
|
|
|
899
1037
|
}
|
|
900
1038
|
let matches_workspace = scope == ShutdownReapScope::Workspace
|
|
901
1039
|
&& process_matches_workspace(
|
|
902
|
-
|
|
1040
|
+
process,
|
|
903
1041
|
&workspace_text,
|
|
904
1042
|
&spawn_cwds,
|
|
905
1043
|
&mut cwd_probe_budget,
|
|
@@ -908,7 +1046,7 @@ pub mod lifecycle_port {
|
|
|
908
1046
|
|| root_tree.contains(&process.pid)
|
|
909
1047
|
|| process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
|
|
910
1048
|
{
|
|
911
|
-
out.push(process);
|
|
1049
|
+
out.push(process.clone());
|
|
912
1050
|
}
|
|
913
1051
|
}
|
|
914
1052
|
out
|
|
@@ -1132,10 +1270,9 @@ pub mod lifecycle_port {
|
|
|
1132
1270
|
open_display: bool,
|
|
1133
1271
|
team: Option<&str>,
|
|
1134
1272
|
) -> Result<Value, CliError> {
|
|
1135
|
-
let _ = label;
|
|
1136
1273
|
let source = crate::model::ids::AgentId::new(source_agent);
|
|
1137
1274
|
let dest = crate::model::ids::AgentId::new(as_agent_id);
|
|
1138
|
-
match crate::lifecycle::fork_agent(workspace, &source, &dest, open_display, team) {
|
|
1275
|
+
match crate::lifecycle::fork_agent(workspace, &source, &dest, label, open_display, team) {
|
|
1139
1276
|
Ok(report) => Ok(json!({
|
|
1140
1277
|
"ok": true,
|
|
1141
1278
|
"source_agent_id": report.source_agent_id.as_str(),
|
|
@@ -1336,6 +1473,8 @@ pub mod lifecycle_port {
|
|
|
1336
1473
|
session_name,
|
|
1337
1474
|
launch,
|
|
1338
1475
|
next_actions,
|
|
1476
|
+
attach_commands,
|
|
1477
|
+
display_backend,
|
|
1339
1478
|
worker_readiness,
|
|
1340
1479
|
} => {
|
|
1341
1480
|
// BUG-7: never emit bare "ready" while worker tool-load is unverified.
|
|
@@ -1461,7 +1600,9 @@ pub mod lifecycle_port {
|
|
|
1461
1600
|
"ready": readiness_json.get("ready").cloned().unwrap_or(Value::Bool(false)),
|
|
1462
1601
|
"session_name": session_name.as_str(),
|
|
1463
1602
|
"dry_run": launch.dry_run,
|
|
1603
|
+
"display_backend": display_backend,
|
|
1464
1604
|
"next_actions": next_actions,
|
|
1605
|
+
"attach_commands": attach_commands,
|
|
1465
1606
|
"readiness": readiness_json.clone(),
|
|
1466
1607
|
"worker_readiness": readiness_json,
|
|
1467
1608
|
})
|
|
@@ -1498,12 +1639,16 @@ pub mod lifecycle_port {
|
|
|
1498
1639
|
session_name,
|
|
1499
1640
|
agents,
|
|
1500
1641
|
coordinator_started,
|
|
1642
|
+
next_actions,
|
|
1643
|
+
attach_commands,
|
|
1501
1644
|
} => json!({
|
|
1502
1645
|
"ok": true,
|
|
1503
1646
|
"status": "restarted",
|
|
1504
1647
|
"session_name": session_name.as_str(),
|
|
1505
1648
|
"agents": agents.iter().map(|a| a.agent_id.as_str()).collect::<Vec<_>>(),
|
|
1506
1649
|
"coordinator_started": coordinator_started,
|
|
1650
|
+
"next_actions": next_actions,
|
|
1651
|
+
"attach_commands": attach_commands,
|
|
1507
1652
|
}),
|
|
1508
1653
|
crate::lifecycle::RestartReport::RefusedResumeAtomicity {
|
|
1509
1654
|
unresumable,
|
|
@@ -202,6 +202,7 @@ fn delivery_refusal_wire(reason: DeliveryRefusal) -> &'static str {
|
|
|
202
202
|
DeliveryRefusal::SessionDrift => "session_drift",
|
|
203
203
|
DeliveryRefusal::Duplicate => "duplicate",
|
|
204
204
|
DeliveryRefusal::RoutingAmbiguous => "routing_ambiguous",
|
|
205
|
+
DeliveryRefusal::EmptyTargetList => "empty_target_list",
|
|
205
206
|
}
|
|
206
207
|
}
|
|
207
208
|
|
|
@@ -17,7 +17,9 @@ use rusqlite::params;
|
|
|
17
17
|
compact: bool,
|
|
18
18
|
detail: bool,
|
|
19
19
|
) -> Result<Value, CliError> {
|
|
20
|
-
|
|
20
|
+
// commands.py:99 — `--json --detail` maps to compact=False: detail wins and
|
|
21
|
+
// returns the FULL payload.
|
|
22
|
+
let compact = compact && !detail;
|
|
21
23
|
let resolved_owner_team_id = resolve_status_owner_team(workspace, owner_team_id)?;
|
|
22
24
|
let owner_team_id = resolved_owner_team_id.as_deref().or(owner_team_id);
|
|
23
25
|
let health = crate::coordinator::coordinator_health(
|
|
@@ -63,7 +65,7 @@ use rusqlite::params;
|
|
|
63
65
|
"messages": message_counts(&conn, owner_team_id)?,
|
|
64
66
|
"queued_messages": queued_messages(&conn, owner_team_id, 8)?,
|
|
65
67
|
"results": result_counts(&conn, owner_team_id)?,
|
|
66
|
-
"latest_results":
|
|
68
|
+
"latest_results": latest_result_summaries(&store, owner_team_id)?,
|
|
67
69
|
"readiness": readiness,
|
|
68
70
|
"coordinator": coordinator_health_value(health),
|
|
69
71
|
"last_events": Value::Array(
|
|
@@ -90,11 +92,137 @@ use rusqlite::params;
|
|
|
90
92
|
owner_team_id: Option<&str>,
|
|
91
93
|
agent: Option<&str>,
|
|
92
94
|
) -> Result<String, CliError> {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
match agent {
|
|
96
|
+
// queries.py:130-162 — the agent branch renders the multi-line agent detail
|
|
97
|
+
// from the FULL status payload; an unknown agent id errors.
|
|
98
|
+
Some(agent) => {
|
|
99
|
+
let status = status_scoped(workspace, state, owner_team_id, false, false)?;
|
|
100
|
+
format_agent_status(workspace, &status, agent)
|
|
101
|
+
}
|
|
102
|
+
None => {
|
|
103
|
+
let status = status_scoped(workspace, state, owner_team_id, true, false)?;
|
|
104
|
+
Ok(crate::cli::format_status_summary(&status))
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/// `format_status` agent 分支(`queries.py:135-162`)。
|
|
110
|
+
fn format_agent_status(
|
|
111
|
+
workspace: &Path,
|
|
112
|
+
status: &Value,
|
|
113
|
+
agent_id: &str,
|
|
114
|
+
) -> Result<String, CliError> {
|
|
115
|
+
let agents = status.get("agents").and_then(Value::as_object);
|
|
116
|
+
let health = status.get("agent_health").and_then(Value::as_object);
|
|
117
|
+
let known = agents.is_some_and(|map| map.contains_key(agent_id))
|
|
118
|
+
|| health.is_some_and(|map| map.contains_key(agent_id));
|
|
119
|
+
if !known {
|
|
120
|
+
return Err(CliError::Runtime(format!("unknown agent id: {agent_id}")));
|
|
121
|
+
}
|
|
122
|
+
let empty = json!({});
|
|
123
|
+
let agent = agents
|
|
124
|
+
.and_then(|map| map.get(agent_id))
|
|
125
|
+
.unwrap_or(&empty);
|
|
126
|
+
let row = health.and_then(|map| map.get(agent_id)).unwrap_or(&empty);
|
|
127
|
+
let status_text = row
|
|
128
|
+
.get("status")
|
|
129
|
+
.and_then(Value::as_str)
|
|
130
|
+
.map(str::to_string)
|
|
131
|
+
.unwrap_or_else(||
|
|
132
|
+
|
|
133
|
+
agent_health_status_text(agent.get("status").and_then(Value::as_str).unwrap_or(""))
|
|
134
|
+
);
|
|
135
|
+
let tasks = status.get("tasks").and_then(Value::as_array).cloned().unwrap_or_default();
|
|
136
|
+
let task_id = current_task_for_agent(&tasks, agent_id).unwrap_or_else(|| "-".to_string());
|
|
137
|
+
let inbox_rows = crate::message_store::MessageStore::open(workspace)
|
|
138
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?
|
|
139
|
+
.inbox(agent_id, 3, None)
|
|
140
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
141
|
+
let mut lines = vec![
|
|
142
|
+
format!("{agent_id} {status_text}"),
|
|
143
|
+
format!(" provider: {}", py_get(agent, "provider")),
|
|
144
|
+
format!(" model: {}", py_get(agent, "model")),
|
|
145
|
+
format!(" profile: {}", py_get(agent, "profile")),
|
|
146
|
+
format!(" session_id: {}", py_get_or_dash(agent, "session_id")),
|
|
147
|
+
format!(" captured_via: {}", py_get_or_dash(agent, "captured_via")),
|
|
148
|
+
format!(
|
|
149
|
+
" attribution_confidence: {}",
|
|
150
|
+
py_get_or_dash(agent, "attribution_confidence")
|
|
151
|
+
),
|
|
152
|
+
format!(" task: {task_id}"),
|
|
153
|
+
format!(" handoff: {}", py_get(agent, "handoff_path")),
|
|
154
|
+
" recent messages:".to_string(),
|
|
155
|
+
];
|
|
156
|
+
if inbox_rows.is_empty() {
|
|
157
|
+
lines.push(" none".to_string());
|
|
158
|
+
} else {
|
|
159
|
+
for item in &inbox_rows {
|
|
160
|
+
let content = item.get("content").and_then(Value::as_str).unwrap_or("");
|
|
161
|
+
let content: String = content.chars().take(120).collect();
|
|
162
|
+
lines.push(format!(
|
|
163
|
+
" {} {} -> {} {}: {content}",
|
|
164
|
+
py_get_or_dash(item, "created_at"),
|
|
165
|
+
py_get_or_dash(item, "sender"),
|
|
166
|
+
py_get_or_dash(item, "recipient"),
|
|
167
|
+
py_get_or_dash(item, "status"),
|
|
168
|
+
));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
Ok(lines.join("\n"))
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/// `current_task_for_agent`(`approvals/status.py:127-132`)。
|
|
175
|
+
fn current_task_for_agent(tasks: &[Value], agent_id: &str) -> Option<String> {
|
|
176
|
+
const ACTIVE: [&str; 5] = ["pending", "ready", "running", "blocked", "needs_retry"];
|
|
177
|
+
for task in tasks.iter().rev() {
|
|
178
|
+
let assignee = task.get("assignee").and_then(Value::as_str);
|
|
179
|
+
let status = task.get("status").and_then(Value::as_str).unwrap_or("pending");
|
|
180
|
+
if assignee == Some(agent_id) && ACTIVE.contains(&status) {
|
|
181
|
+
return task.get("id").and_then(Value::as_str).map(str::to_string);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
None
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
fn agent_health_status_text(status: &str) -> String {
|
|
188
|
+
serde_json::to_value(crate::provider::agent_health_status(status))
|
|
189
|
+
.ok()
|
|
190
|
+
.and_then(|v| v.as_str().map(str::to_string))
|
|
191
|
+
.unwrap_or_else(|| "-".to_string())
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/// Python `agent.get(key, '-')`:键缺失 → `-`;键存在但为 null → 打印 `None`。
|
|
195
|
+
fn py_get(agent: &Value, key: &str) -> String {
|
|
196
|
+
match agent.get(key) {
|
|
197
|
+
None => "-".to_string(),
|
|
198
|
+
Some(Value::Null) => "None".to_string(),
|
|
199
|
+
Some(Value::String(s)) => s.clone(),
|
|
200
|
+
Some(other) => other.to_string(),
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/// Python `agent.get(key) or '-'`:缺失/null/空串都落 `-`。
|
|
205
|
+
fn py_get_or_dash(agent: &Value, key: &str) -> String {
|
|
206
|
+
match agent.get(key) {
|
|
207
|
+
Some(Value::String(s)) if !s.is_empty() => s.clone(),
|
|
208
|
+
Some(Value::Number(n)) => n.to_string(),
|
|
209
|
+
_ => "-".to_string(),
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/// `latest_result_summaries`(`queries.py:83-89`)。
|
|
214
|
+
fn latest_result_summaries(
|
|
215
|
+
store: &crate::message_store::MessageStore,
|
|
216
|
+
owner_team_id: Option<&str>,
|
|
217
|
+
) -> Result<Value, CliError> {
|
|
218
|
+
let rows = store
|
|
219
|
+
.latest_results(5, owner_team_id)
|
|
220
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
221
|
+
Ok(Value::Array(
|
|
222
|
+
rows.iter()
|
|
223
|
+
.filter_map(crate::message_store::result_summary_from_row)
|
|
224
|
+
.collect(),
|
|
225
|
+
))
|
|
98
226
|
}
|
|
99
227
|
/// `status.approvals(workspace, agent_id)`(JSON)/`format_approvals`(人读)。
|
|
100
228
|
pub fn approvals(workspace: &Path, agent: Option<&str>, as_json: bool) -> Result<Value, CliError> {
|