@team-agent/installer 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +34 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/adapters.rs +234 -26
- package/crates/team-agent/src/cli/diagnose.rs +144 -10
- package/crates/team-agent/src/cli/emit.rs +289 -54
- package/crates/team-agent/src/cli/leader.rs +37 -8
- package/crates/team-agent/src/cli/mod.rs +1281 -196
- package/crates/team-agent/src/cli/status_port.rs +195 -46
- package/crates/team-agent/src/cli/tests/divergence.rs +1 -2
- package/crates/team-agent/src/cli/tests/lane_c.rs +23 -13
- package/crates/team-agent/src/cli/tests/main_preserved.rs +2 -0
- package/crates/team-agent/src/cli/tests/run_delegation.rs +59 -3
- package/crates/team-agent/src/cli/types.rs +18 -0
- package/crates/team-agent/src/compiler.rs +15 -5
- package/crates/team-agent/src/coordinator/health.rs +95 -17
- package/crates/team-agent/src/coordinator/mod.rs +4 -0
- package/crates/team-agent/src/coordinator/runtime_detectors.rs +500 -0
- package/crates/team-agent/src/coordinator/runtime_observation.rs +58 -0
- package/crates/team-agent/src/coordinator/tick.rs +222 -69
- package/crates/team-agent/src/coordinator/types.rs +15 -3
- package/crates/team-agent/src/db/schema.rs +37 -2
- package/crates/team-agent/src/diagnose/comms.rs +226 -0
- package/crates/team-agent/src/diagnose/mod.rs +45 -0
- package/crates/team-agent/src/diagnose/orphans.rs +658 -0
- package/crates/team-agent/src/fake_worker.rs +146 -3
- package/crates/team-agent/src/leader/start.rs +121 -23
- package/crates/team-agent/src/leader/types.rs +44 -1
- package/crates/team-agent/src/lib.rs +3 -0
- package/crates/team-agent/src/lifecycle/display.rs +645 -47
- package/crates/team-agent/src/lifecycle/launch.rs +1061 -146
- package/crates/team-agent/src/lifecycle/mod.rs +2 -0
- package/crates/team-agent/src/lifecycle/profile_launch.rs +810 -0
- package/crates/team-agent/src/lifecycle/profile_smoke.rs +522 -0
- package/crates/team-agent/src/lifecycle/restart/agent.rs +99 -23
- package/crates/team-agent/src/lifecycle/restart/common.rs +183 -24
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +498 -22
- package/crates/team-agent/src/lifecycle/restart/remove.rs +27 -7
- package/crates/team-agent/src/lifecycle/restart/team_state.rs +19 -0
- package/crates/team-agent/src/lifecycle/restart.rs +24 -1
- package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +5 -5
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +37 -7
- package/crates/team-agent/src/lifecycle/types.rs +19 -0
- package/crates/team-agent/src/mcp_server/helpers.rs +1 -0
- package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +341 -0
- package/crates/team-agent/src/mcp_server/lifecycle_tools/mod.rs +10 -0
- package/crates/team-agent/src/mcp_server/lifecycle_tools/state_status.rs +158 -0
- package/crates/team-agent/src/mcp_server/mod.rs +3 -74
- package/crates/team-agent/src/mcp_server/tests/scoped.rs +1 -1
- package/crates/team-agent/src/mcp_server/tests/send.rs +6 -5
- package/crates/team-agent/src/mcp_server/tools.rs +312 -111
- package/crates/team-agent/src/mcp_server/types.rs +6 -4
- package/crates/team-agent/src/mcp_server/wire.rs +19 -7
- package/crates/team-agent/src/message_store.rs +21 -4
- package/crates/team-agent/src/messaging/delivery.rs +470 -59
- package/crates/team-agent/src/messaging/mod.rs +9 -6
- package/crates/team-agent/src/messaging/results.rs +353 -63
- package/crates/team-agent/src/messaging/selftest.rs +199 -12
- package/crates/team-agent/src/messaging/send.rs +35 -3
- package/crates/team-agent/src/messaging/tests/runtime.rs +19 -4
- package/crates/team-agent/src/messaging/types.rs +11 -3
- package/crates/team-agent/src/os_probe.rs +119 -0
- package/crates/team-agent/src/packaging/migrate.rs +10 -2
- package/crates/team-agent/src/packaging/tests.rs +23 -0
- package/crates/team-agent/src/provider/adapter.rs +564 -63
- package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +1 -7
- package/crates/team-agent/src/provider/classify.rs +51 -4
- package/crates/team-agent/src/provider/helpers.rs +10 -1
- package/crates/team-agent/src/provider/startup_prompt.rs +94 -0
- package/crates/team-agent/src/provider/types.rs +47 -0
- package/crates/team-agent/src/session_capture.rs +616 -0
- package/crates/team-agent/src/state/persist.rs +170 -1
- package/crates/team-agent/src/state/projection.rs +141 -8
- package/crates/team-agent/src/state/selector.rs +5 -2
- package/crates/team-agent/src/tmux_backend.rs +161 -64
- package/crates/team-agent/src/transport/test_support.rs +9 -0
- package/crates/team-agent/src/transport/tests/wire.rs +4 -0
- package/crates/team-agent/src/transport.rs +13 -2
- package/package.json +4 -4
|
@@ -24,7 +24,15 @@
|
|
|
24
24
|
//! 所有 fn body = `unimplemented!("step14b port: ...")`。RED 契约据此 NAME 类型 + CALL 真 fn。
|
|
25
25
|
|
|
26
26
|
// ROUND-0 skeleton:fn body 全 unimplemented!() → import/field/param/大 Err 暂未落地;P2 porter 实现时移除。
|
|
27
|
-
#![allow(
|
|
27
|
+
#![allow(
|
|
28
|
+
dead_code,
|
|
29
|
+
unused_imports,
|
|
30
|
+
unused_variables,
|
|
31
|
+
clippy::result_large_err,
|
|
32
|
+
clippy::doc_overindented_list_items,
|
|
33
|
+
clippy::doc_lazy_continuation,
|
|
34
|
+
clippy::io_other_error
|
|
35
|
+
)]
|
|
28
36
|
// §10:CLI 命令实现层禁 unwrap/expect/panic(unimplemented!() stub 不被拦);tests 子模块各自 allow。
|
|
29
37
|
#![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
|
|
30
38
|
|
|
@@ -36,10 +44,10 @@ use serde_json::{json, Map, Value};
|
|
|
36
44
|
use thiserror::Error;
|
|
37
45
|
|
|
38
46
|
// REUSE in-tree(只 import,不 redefine):
|
|
39
|
-
use crate::model::ids::{TaskId, TeamKey};
|
|
40
47
|
use crate::messaging::{self, AlertType, MessageTarget, SendOptions};
|
|
48
|
+
use crate::model::ids::{TaskId, TeamKey};
|
|
41
49
|
|
|
42
|
-
pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency. Does NOT perform live runtime message round-trip.
|
|
50
|
+
pub(crate) const COMMS_BOUNDARY_TEXT: &str = "validates live pane binding consistency and zero-token comms contracts. Does NOT perform live runtime message round-trip. (zero token, zero pollution)";
|
|
43
51
|
|
|
44
52
|
pub mod adapters;
|
|
45
53
|
pub mod diagnose;
|
|
@@ -60,6 +68,23 @@ pub use send::*;
|
|
|
60
68
|
pub use status::*;
|
|
61
69
|
pub use types::*;
|
|
62
70
|
|
|
71
|
+
/// Public `attach-leader` CLI handler. It consumes the typed pane/provider args and
|
|
72
|
+
/// writes/returns a `leader_receiver` binding via the leader lease port.
|
|
73
|
+
pub fn cmd_attach_leader(args: &AttachLeaderArgs) -> Result<CmdResult, CliError> {
|
|
74
|
+
let mut value = leader_port::attach_leader(
|
|
75
|
+
&args.workspace,
|
|
76
|
+
args.team.as_deref(),
|
|
77
|
+
args.pane.as_ref(),
|
|
78
|
+
args.provider,
|
|
79
|
+
args.confirm,
|
|
80
|
+
)?;
|
|
81
|
+
if let Some(obj) = value.as_object_mut() {
|
|
82
|
+
obj.entry("leader_receiver".to_string())
|
|
83
|
+
.or_insert(Value::Null);
|
|
84
|
+
}
|
|
85
|
+
Ok(CmdResult::from_json(value, args.json))
|
|
86
|
+
}
|
|
87
|
+
|
|
63
88
|
pub(crate) use helpers::*;
|
|
64
89
|
|
|
65
90
|
#[cfg(test)]
|
|
@@ -75,7 +100,6 @@ mod tests;
|
|
|
75
100
|
/// `cmd_inbox` 委派的只读投影面。返回 serde `Value`(稳定 JSON 形状由 status lane 拥有)。
|
|
76
101
|
pub mod status_port;
|
|
77
102
|
|
|
78
|
-
|
|
79
103
|
/// PLACEHOLDER → step13 lifecycle(`runtime.{quick_start,start_agent,add_agent,fork_agent,
|
|
80
104
|
/// remove_agent,start_agent,stop_agent,reset_agent,restart,shutdown,start_leader,acknowledge_idle}`)。
|
|
81
105
|
/// `quick_start.py` 物理在本子系统但实现属 step 13(card)。本层只声明委派面。
|
|
@@ -92,8 +116,9 @@ pub mod lifecycle_port {
|
|
|
92
116
|
yes: bool,
|
|
93
117
|
fresh: bool,
|
|
94
118
|
) -> Result<Value, CliError> {
|
|
95
|
-
|
|
96
|
-
|
|
119
|
+
match crate::lifecycle::quick_start_in_workspace(
|
|
120
|
+
workspace, agents_dir, name, yes, fresh, team_id,
|
|
121
|
+
) {
|
|
97
122
|
Ok(report) => Ok(quick_start_value(report)),
|
|
98
123
|
Err(e) => Ok(error_value(e)),
|
|
99
124
|
}
|
|
@@ -105,30 +130,54 @@ pub mod lifecycle_port {
|
|
|
105
130
|
cwd: &Path,
|
|
106
131
|
attach: &LeaderLauncherArgs,
|
|
107
132
|
) -> Result<Value, CliError> {
|
|
108
|
-
let
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
133
|
+
let attach_session = attach
|
|
134
|
+
.attach_session
|
|
135
|
+
.as_ref()
|
|
136
|
+
.map(|name| crate::transport::SessionName::new(name.clone()));
|
|
137
|
+
let plan = crate::leader::start::leader_start_plan(
|
|
138
|
+
provider,
|
|
139
|
+
provider_args,
|
|
140
|
+
cwd,
|
|
141
|
+
attach.attach_existing,
|
|
142
|
+
attach.confirm_attach,
|
|
143
|
+
attach_session.as_ref(),
|
|
144
|
+
)
|
|
145
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
146
|
+
let outcome = crate::leader::start::execute_leader_plan(&plan, cwd)
|
|
147
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
148
|
+
let ok = match outcome.status {
|
|
149
|
+
crate::leader::LeaderLaunchStatus::Exited => outcome.exit_code == Some(0),
|
|
150
|
+
crate::leader::LeaderLaunchStatus::Detached => true,
|
|
151
|
+
crate::leader::LeaderLaunchStatus::NotStarted => false,
|
|
114
152
|
};
|
|
115
153
|
Ok(json!({
|
|
116
|
-
"ok":
|
|
117
|
-
"provider":
|
|
154
|
+
"ok": ok,
|
|
155
|
+
"provider": provider,
|
|
156
|
+
"mode": plan.mode,
|
|
157
|
+
"status": outcome.status,
|
|
158
|
+
"exit_code": outcome.exit_code,
|
|
159
|
+
"reason": outcome.reason,
|
|
118
160
|
"attach_existing": attach.attach_existing,
|
|
119
161
|
"confirm_attach": attach.confirm_attach,
|
|
120
162
|
"attach_session": attach.attach_session,
|
|
163
|
+
"session_name": plan.session_name.as_ref().map(|session| session.as_str().to_string()),
|
|
121
164
|
}))
|
|
122
165
|
}
|
|
123
166
|
/// `runtime.shutdown`(`cmd_shutdown`)。
|
|
124
167
|
pub fn shutdown(workspace: &Path, keep_logs: bool, team: Option<&str>) -> Result<Value, CliError> {
|
|
125
|
-
// CP-1: workspace-bound backend so kill-session hits the per-team `tmux -L <socket>` server,
|
|
126
|
-
// then tear that server down so the per-team socket does not orphan (best-effort).
|
|
127
168
|
let run_ws = crate::model::paths::canonical_run_workspace(workspace)
|
|
128
169
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
129
|
-
let
|
|
130
|
-
let
|
|
131
|
-
|
|
170
|
+
let state = shutdown_state_for_team(&run_ws, team)?;
|
|
171
|
+
let transport = if let Some(endpoint) = legacy_worker_tmux_endpoint(&state) {
|
|
172
|
+
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
|
|
173
|
+
} else {
|
|
174
|
+
shutdown_workspace_transport(&run_ws)
|
|
175
|
+
};
|
|
176
|
+
let result =
|
|
177
|
+
shutdown_with_transport_and_state(workspace, keep_logs, team, &transport, Some(state));
|
|
178
|
+
if team.is_none() {
|
|
179
|
+
transport.kill_server();
|
|
180
|
+
}
|
|
132
181
|
result
|
|
133
182
|
}
|
|
134
183
|
|
|
@@ -138,28 +187,190 @@ pub mod lifecycle_port {
|
|
|
138
187
|
team: Option<&str>,
|
|
139
188
|
transport: &dyn crate::transport::Transport,
|
|
140
189
|
) -> Result<Value, CliError> {
|
|
190
|
+
shutdown_with_transport_and_state(workspace, keep_logs, team, transport, None)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
fn shutdown_with_transport_and_state(
|
|
194
|
+
workspace: &Path,
|
|
195
|
+
keep_logs: bool,
|
|
196
|
+
team: Option<&str>,
|
|
197
|
+
transport: &dyn crate::transport::Transport,
|
|
198
|
+
state: Option<Value>,
|
|
199
|
+
) -> Result<Value, CliError> {
|
|
200
|
+
crate::os_probe::clear_probe_timeout();
|
|
201
|
+
let deadline = ShutdownDeadline::new(std::time::Duration::from_secs(20));
|
|
141
202
|
let run_workspace = crate::model::paths::canonical_run_workspace(workspace)
|
|
142
203
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
143
|
-
let
|
|
144
|
-
|
|
204
|
+
let _started_event = crate::event_log::EventLog::new(&run_workspace)
|
|
205
|
+
.write(
|
|
206
|
+
"lifecycle.shutdown.started",
|
|
207
|
+
json!({
|
|
208
|
+
"keep_logs": keep_logs,
|
|
209
|
+
"team": team,
|
|
210
|
+
}),
|
|
211
|
+
)
|
|
145
212
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
146
|
-
let mut state =
|
|
213
|
+
let mut state = match state {
|
|
214
|
+
Some(state) => state,
|
|
215
|
+
None => shutdown_state_for_team(&run_workspace, team)?,
|
|
216
|
+
};
|
|
217
|
+
deadline.check("refresh_provider_sessions")?;
|
|
218
|
+
let captured_missing_sessions =
|
|
219
|
+
crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
|
|
220
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
147
221
|
let session_name = state
|
|
148
222
|
.get("session_name")
|
|
149
223
|
.and_then(Value::as_str)
|
|
150
224
|
.filter(|s| !s.is_empty())
|
|
151
225
|
.map(crate::transport::SessionName::new);
|
|
152
|
-
let
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
226
|
+
let protected = shutdown_protection_set();
|
|
227
|
+
let reap_scope = if team.is_some() {
|
|
228
|
+
ShutdownReapScope::ScopedTeam
|
|
229
|
+
} else {
|
|
230
|
+
ShutdownReapScope::Workspace
|
|
231
|
+
};
|
|
232
|
+
deadline.check("process_roots")?;
|
|
233
|
+
let mut root_pids = state_process_roots(&state, reap_scope)
|
|
234
|
+
.into_iter()
|
|
235
|
+
.filter(|pid| !protected.contains_pid(*pid))
|
|
236
|
+
.collect::<Vec<_>>();
|
|
237
|
+
let pane_pids = session_name
|
|
238
|
+
.as_ref()
|
|
239
|
+
.map(|session| {
|
|
240
|
+
pane_pids_for_session(transport, session)
|
|
241
|
+
.into_iter()
|
|
242
|
+
.filter(|pid| !protected.contains_pid(*pid))
|
|
243
|
+
.collect::<Vec<_>>()
|
|
244
|
+
})
|
|
245
|
+
.unwrap_or_default();
|
|
246
|
+
root_pids.extend(pane_pids);
|
|
247
|
+
root_pids.sort_unstable();
|
|
248
|
+
root_pids.dedup();
|
|
249
|
+
let root_pgids = process_pgids(&root_pids, &protected);
|
|
250
|
+
deadline.check("reap_process_tree")?;
|
|
251
|
+
for pid in &root_pids {
|
|
252
|
+
reap_process_tree(*pid, &protected);
|
|
253
|
+
}
|
|
254
|
+
reap_process_groups(&root_pgids, &protected);
|
|
255
|
+
let mut kill_error: Option<String> = None;
|
|
256
|
+
deadline.check("kill_session")?;
|
|
257
|
+
if let Some(session) = session_name.as_ref() {
|
|
258
|
+
if let Err(error) = transport.kill_session(session) {
|
|
259
|
+
if !tmux_absent_error(&error.to_string()) {
|
|
260
|
+
kill_error = Some(error.to_string());
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
deadline.check("reap_workspace_residuals")?;
|
|
265
|
+
reap_workspace_process_residuals(
|
|
266
|
+
&run_workspace,
|
|
267
|
+
&state,
|
|
268
|
+
&root_pids,
|
|
269
|
+
&root_pgids,
|
|
270
|
+
&protected,
|
|
271
|
+
reap_scope,
|
|
272
|
+
);
|
|
273
|
+
deadline.check("session_residuals")?;
|
|
274
|
+
let session_residuals = if let Some(session) = session_name.as_ref() {
|
|
275
|
+
let (residuals, error) = session_residuals_after_reap(
|
|
276
|
+
transport,
|
|
277
|
+
&run_workspace,
|
|
278
|
+
session,
|
|
279
|
+
!captured_missing_sessions,
|
|
280
|
+
);
|
|
281
|
+
if let Some(error) = error {
|
|
282
|
+
kill_error.get_or_insert(error);
|
|
157
283
|
}
|
|
284
|
+
residuals
|
|
158
285
|
} else {
|
|
159
|
-
|
|
286
|
+
Vec::new()
|
|
160
287
|
};
|
|
288
|
+
deadline.check("process_residuals")?;
|
|
289
|
+
let process_residuals = process_residuals(
|
|
290
|
+
&run_workspace,
|
|
291
|
+
&state,
|
|
292
|
+
&root_pids,
|
|
293
|
+
&root_pgids,
|
|
294
|
+
&protected,
|
|
295
|
+
reap_scope,
|
|
296
|
+
);
|
|
297
|
+
deadline.check("stop_coordinator")?;
|
|
298
|
+
let mut coordinator_timeout = false;
|
|
299
|
+
let stopped = if team.is_none() {
|
|
300
|
+
let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
|
|
301
|
+
match stop_coordinator_bounded(wp, std::time::Duration::from_millis(900)) {
|
|
302
|
+
Some(Ok(report)) => Some(report),
|
|
303
|
+
Some(Err(error)) => {
|
|
304
|
+
kill_error.get_or_insert(error);
|
|
305
|
+
None
|
|
306
|
+
}
|
|
307
|
+
None => {
|
|
308
|
+
coordinator_timeout = true;
|
|
309
|
+
None
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
None
|
|
314
|
+
};
|
|
315
|
+
let probe_timeout = crate::os_probe::probe_timeout();
|
|
316
|
+
let verification_degraded = probe_timeout.is_some();
|
|
317
|
+
let session_killed = session_name.is_some()
|
|
318
|
+
&& kill_error.is_none()
|
|
319
|
+
&& session_residuals.is_empty()
|
|
320
|
+
&& process_residuals.is_empty();
|
|
161
321
|
mark_agents_stopped(&mut state);
|
|
162
|
-
|
|
322
|
+
deadline.check("save_state")?;
|
|
323
|
+
if team.is_some() {
|
|
324
|
+
crate::state::projection::save_team_scoped_state(&run_workspace, &state)?;
|
|
325
|
+
promote_live_sibling_after_scoped_shutdown(&run_workspace, &state)?;
|
|
326
|
+
} else {
|
|
327
|
+
let _changed_keys =
|
|
328
|
+
mark_matching_session_teams_stopped(&mut state, session_name.as_ref());
|
|
329
|
+
crate::state::persist::save_runtime_state(&run_workspace, &state)?;
|
|
330
|
+
}
|
|
331
|
+
let coordinator_status = if coordinator_timeout {
|
|
332
|
+
"timeout"
|
|
333
|
+
} else {
|
|
334
|
+
stopped
|
|
335
|
+
.as_ref()
|
|
336
|
+
.map(|stopped| stop_status_wire(stopped.status))
|
|
337
|
+
.unwrap_or("not_stopped")
|
|
338
|
+
};
|
|
339
|
+
let coordinator_pid = stopped
|
|
340
|
+
.as_ref()
|
|
341
|
+
.and_then(|stopped| stopped.pid.map(|p| p.get()));
|
|
342
|
+
let ok = stopped.as_ref().map(|stopped| stopped.ok).unwrap_or(true)
|
|
343
|
+
&& kill_error.is_none()
|
|
344
|
+
&& session_residuals.is_empty()
|
|
345
|
+
&& process_residuals.is_empty()
|
|
346
|
+
&& !verification_degraded
|
|
347
|
+
&& !coordinator_timeout;
|
|
348
|
+
let status = if ok {
|
|
349
|
+
"ok"
|
|
350
|
+
} else if coordinator_timeout {
|
|
351
|
+
"timeout"
|
|
352
|
+
} else if verification_degraded {
|
|
353
|
+
"partial"
|
|
354
|
+
} else if kill_error.is_some() {
|
|
355
|
+
"failed"
|
|
356
|
+
} else {
|
|
357
|
+
"partial"
|
|
358
|
+
};
|
|
359
|
+
let phase = if coordinator_timeout {
|
|
360
|
+
Some("stop_coordinator")
|
|
361
|
+
} else if verification_degraded {
|
|
362
|
+
Some("os_probe")
|
|
363
|
+
} else {
|
|
364
|
+
None
|
|
365
|
+
};
|
|
366
|
+
let probe_timeout_kind = probe_timeout.as_ref().map(|timeout| timeout.probe);
|
|
367
|
+
let probe_timeout_value = probe_timeout.as_ref().map(|timeout| {
|
|
368
|
+
json!({
|
|
369
|
+
"probe": timeout.probe,
|
|
370
|
+
"pid": timeout.pid,
|
|
371
|
+
"timeout_ms": timeout.timeout_ms,
|
|
372
|
+
})
|
|
373
|
+
});
|
|
163
374
|
let _event = crate::event_log::EventLog::new(&run_workspace)
|
|
164
375
|
.write(
|
|
165
376
|
"lifecycle.shutdown",
|
|
@@ -168,25 +379,664 @@ pub mod lifecycle_port {
|
|
|
168
379
|
"team": team,
|
|
169
380
|
"session_name": session_name.as_ref().map(|s| s.as_str().to_string()),
|
|
170
381
|
"session_killed": session_killed,
|
|
171
|
-
"coordinator_status":
|
|
382
|
+
"coordinator_status": coordinator_status,
|
|
383
|
+
"status": status,
|
|
384
|
+
"phase": phase,
|
|
385
|
+
"verification_degraded": verification_degraded,
|
|
386
|
+
"probe_timeout_kind": probe_timeout_kind,
|
|
387
|
+
"probe_timeout": probe_timeout_value,
|
|
172
388
|
}),
|
|
173
389
|
)
|
|
174
390
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
175
391
|
Ok(json!({
|
|
176
|
-
"ok":
|
|
392
|
+
"ok": ok,
|
|
393
|
+
"status": status,
|
|
394
|
+
"phase": phase,
|
|
395
|
+
"verification_degraded": verification_degraded,
|
|
396
|
+
"probe_timeout_kind": probe_timeout_kind,
|
|
397
|
+
"probe_timeout": probe_timeout_value,
|
|
177
398
|
"keep_logs": keep_logs,
|
|
178
399
|
"team": team,
|
|
179
400
|
"session_name": session_name.map(|s| s.as_str().to_string()),
|
|
180
401
|
"session_killed": session_killed,
|
|
402
|
+
"residuals": {
|
|
403
|
+
"sessions": session_residuals,
|
|
404
|
+
"processes": process_residuals,
|
|
405
|
+
},
|
|
406
|
+
"error": kill_error,
|
|
181
407
|
"coordinator": {
|
|
182
|
-
"status":
|
|
183
|
-
"pid":
|
|
408
|
+
"status": coordinator_status,
|
|
409
|
+
"pid": coordinator_pid,
|
|
184
410
|
}
|
|
185
411
|
}))
|
|
186
412
|
}
|
|
413
|
+
|
|
414
|
+
fn stop_coordinator_bounded(
|
|
415
|
+
workspace: crate::coordinator::WorkspacePath,
|
|
416
|
+
timeout: std::time::Duration,
|
|
417
|
+
) -> Option<Result<crate::coordinator::types::StopReport, String>> {
|
|
418
|
+
let (tx, rx) = std::sync::mpsc::channel();
|
|
419
|
+
std::thread::spawn(move || {
|
|
420
|
+
let result =
|
|
421
|
+
crate::coordinator::stop_coordinator(&workspace).map_err(|error| error.to_string());
|
|
422
|
+
let _ = tx.send(result);
|
|
423
|
+
});
|
|
424
|
+
rx.recv_timeout(timeout).ok()
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
struct ShutdownDeadline {
|
|
428
|
+
start: std::time::Instant,
|
|
429
|
+
timeout: std::time::Duration,
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
impl ShutdownDeadline {
|
|
433
|
+
fn new(timeout: std::time::Duration) -> Self {
|
|
434
|
+
Self {
|
|
435
|
+
start: std::time::Instant::now(),
|
|
436
|
+
timeout,
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
fn check(&self, phase: &'static str) -> Result<(), CliError> {
|
|
441
|
+
if self.start.elapsed() >= self.timeout {
|
|
442
|
+
return Err(CliError::Runtime(
|
|
443
|
+
json!({
|
|
444
|
+
"ok": false,
|
|
445
|
+
"status": "timeout",
|
|
446
|
+
"phase": phase,
|
|
447
|
+
})
|
|
448
|
+
.to_string(),
|
|
449
|
+
));
|
|
450
|
+
}
|
|
451
|
+
Ok(())
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
fn shutdown_state_for_team(workspace: &Path, team: Option<&str>) -> Result<Value, CliError> {
|
|
456
|
+
if let Some(team) = team {
|
|
457
|
+
crate::state::projection::select_runtime_state(workspace, Some(team))
|
|
458
|
+
.map_err(CliError::from)
|
|
459
|
+
} else {
|
|
460
|
+
crate::state::persist::load_runtime_state(workspace).map_err(CliError::from)
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
fn shutdown_workspace_transport(workspace: &Path) -> crate::tmux_backend::TmuxBackend {
|
|
465
|
+
crate::tmux_backend::TmuxBackend::for_workspace(workspace)
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
fn legacy_worker_tmux_endpoint(state: &Value) -> Option<&str> {
|
|
469
|
+
state
|
|
470
|
+
.get("tmux_endpoint")
|
|
471
|
+
.and_then(Value::as_str)
|
|
472
|
+
.filter(|endpoint| !endpoint.is_empty())
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
fn pane_pids_for_session(
|
|
476
|
+
transport: &dyn crate::transport::Transport,
|
|
477
|
+
session: &crate::transport::SessionName,
|
|
478
|
+
) -> Vec<u32> {
|
|
479
|
+
transport
|
|
480
|
+
.list_targets()
|
|
481
|
+
.unwrap_or_default()
|
|
482
|
+
.into_iter()
|
|
483
|
+
.filter(|pane| pane.session.as_str() == session.as_str())
|
|
484
|
+
.filter_map(|pane| pane.pane_pid)
|
|
485
|
+
.collect()
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
fn session_residuals_after_reap(
|
|
489
|
+
transport: &dyn crate::transport::Transport,
|
|
490
|
+
workspace: &Path,
|
|
491
|
+
session: &crate::transport::SessionName,
|
|
492
|
+
check_primary_transport: bool,
|
|
493
|
+
) -> (Vec<String>, Option<String>) {
|
|
494
|
+
let mut residual = false;
|
|
495
|
+
let mut error = None;
|
|
496
|
+
if check_primary_transport {
|
|
497
|
+
match transport.has_session(session) {
|
|
498
|
+
Ok(true) => residual = true,
|
|
499
|
+
Ok(false) => {}
|
|
500
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
501
|
+
Err(err) => {
|
|
502
|
+
error = Some(err.to_string());
|
|
503
|
+
residual = true;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
let workspace_transport = shutdown_workspace_transport(workspace);
|
|
508
|
+
match crate::transport::Transport::has_session(&workspace_transport, session) {
|
|
509
|
+
Ok(true) => residual = true,
|
|
510
|
+
Ok(false) => {}
|
|
511
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
512
|
+
Err(err) => {
|
|
513
|
+
error.get_or_insert_with(|| err.to_string());
|
|
514
|
+
residual = true;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
let default_transport = crate::tmux_backend::TmuxBackend::new();
|
|
518
|
+
match crate::transport::Transport::has_session(&default_transport, session) {
|
|
519
|
+
Ok(true) => residual = true,
|
|
520
|
+
Ok(false) => {}
|
|
521
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
522
|
+
Err(err) => {
|
|
523
|
+
error.get_or_insert_with(|| err.to_string());
|
|
524
|
+
residual = true;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
let sessions = if residual {
|
|
528
|
+
vec![session.as_str().to_string()]
|
|
529
|
+
} else {
|
|
530
|
+
Vec::new()
|
|
531
|
+
};
|
|
532
|
+
(sessions, error)
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
536
|
+
enum ShutdownReapScope {
|
|
537
|
+
Workspace,
|
|
538
|
+
ScopedTeam,
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
fn state_process_roots(state: &Value, scope: ShutdownReapScope) -> Vec<u32> {
|
|
542
|
+
let mut out = Vec::new();
|
|
543
|
+
collect_agent_process_roots(state, &mut out);
|
|
544
|
+
if scope == ShutdownReapScope::Workspace {
|
|
545
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
546
|
+
for team in teams.values() {
|
|
547
|
+
collect_agent_process_roots(team, &mut out);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
out.sort_unstable();
|
|
552
|
+
out.dedup();
|
|
553
|
+
out
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
fn collect_agent_process_roots(state: &Value, out: &mut Vec<u32>) {
|
|
557
|
+
let Some(agents) = state.get("agents").and_then(Value::as_object) else {
|
|
558
|
+
return;
|
|
559
|
+
};
|
|
560
|
+
for agent in agents.values() {
|
|
561
|
+
for key in ["provider_pid", "process_id", "pid", "child_pid", "pane_pid"] {
|
|
562
|
+
if let Some(pid) = agent.get(key).and_then(value_u32) {
|
|
563
|
+
out.push(pid);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
fn value_u32(value: &Value) -> Option<u32> {
|
|
570
|
+
value
|
|
571
|
+
.as_u64()
|
|
572
|
+
.and_then(|pid| u32::try_from(pid).ok())
|
|
573
|
+
.or_else(|| value.as_str().and_then(|pid| pid.parse::<u32>().ok()))
|
|
574
|
+
.filter(|pid| *pid > 0)
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
fn reap_process_tree(root_pid: u32, protected: &ShutdownProtection) {
|
|
578
|
+
let pids = process_tree_pids(root_pid)
|
|
579
|
+
.into_iter()
|
|
580
|
+
.filter(|pid| !protected.contains_pid(*pid))
|
|
581
|
+
.collect::<Vec<_>>();
|
|
582
|
+
for pid in pids.iter().rev() {
|
|
583
|
+
send_process_signal(*pid, libc::SIGTERM);
|
|
584
|
+
}
|
|
585
|
+
std::thread::sleep(std::time::Duration::from_millis(150));
|
|
586
|
+
for pid in pids.iter().rev() {
|
|
587
|
+
send_process_signal(*pid, libc::SIGKILL);
|
|
588
|
+
}
|
|
589
|
+
wait_for_processes_gone(&pids, std::time::Duration::from_secs(1));
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
fn reap_process_groups(pgids: &[u32], protected: &ShutdownProtection) {
|
|
593
|
+
for pgid in pgids {
|
|
594
|
+
let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
|
|
595
|
+
continue;
|
|
596
|
+
};
|
|
597
|
+
if pgid_t <= 1 || protected.contains_pgid(*pgid) {
|
|
598
|
+
continue;
|
|
599
|
+
}
|
|
600
|
+
send_process_signal_group(pgid_t, libc::SIGTERM);
|
|
601
|
+
}
|
|
602
|
+
std::thread::sleep(std::time::Duration::from_millis(150));
|
|
603
|
+
for pgid in pgids {
|
|
604
|
+
let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
|
|
605
|
+
continue;
|
|
606
|
+
};
|
|
607
|
+
if pgid_t <= 1 || protected.contains_pgid(*pgid) {
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
send_process_signal_group(pgid_t, libc::SIGKILL);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
fn reap_workspace_process_residuals(
|
|
615
|
+
workspace: &Path,
|
|
616
|
+
state: &Value,
|
|
617
|
+
root_pids: &[u32],
|
|
618
|
+
root_pgids: &[u32],
|
|
619
|
+
protected: &ShutdownProtection,
|
|
620
|
+
scope: ShutdownReapScope,
|
|
621
|
+
) {
|
|
622
|
+
for _ in 0..5 {
|
|
623
|
+
let residuals =
|
|
624
|
+
matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
|
|
625
|
+
if residuals.is_empty() {
|
|
626
|
+
return;
|
|
627
|
+
}
|
|
628
|
+
for process in &residuals {
|
|
629
|
+
reap_process_tree(process.pid, protected);
|
|
630
|
+
}
|
|
631
|
+
let pgids = residuals
|
|
632
|
+
.iter()
|
|
633
|
+
.filter_map(|process| process.pgid)
|
|
634
|
+
.collect::<Vec<_>>();
|
|
635
|
+
reap_process_groups(&pgids, protected);
|
|
636
|
+
std::thread::sleep(std::time::Duration::from_millis(100));
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
fn process_tree_pids(root_pid: u32) -> Vec<u32> {
|
|
641
|
+
if root_pid == 0 {
|
|
642
|
+
return Vec::new();
|
|
643
|
+
}
|
|
644
|
+
let pairs = process_parent_pairs();
|
|
645
|
+
let mut out = vec![root_pid];
|
|
646
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
647
|
+
seen.insert(root_pid);
|
|
648
|
+
let mut index = 0;
|
|
649
|
+
while index < out.len() {
|
|
650
|
+
let parent = out[index];
|
|
651
|
+
for (pid, ppid) in &pairs {
|
|
652
|
+
if *ppid == parent && seen.insert(*pid) {
|
|
653
|
+
out.push(*pid);
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
index += 1;
|
|
657
|
+
}
|
|
658
|
+
out
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
fn process_parent_pairs() -> Vec<(u32, u32)> {
|
|
662
|
+
let output = match crate::os_probe::bounded_command_output_with_probe(
|
|
663
|
+
std::process::Command::new("ps").args(["-axo", "pid=,ppid="]),
|
|
664
|
+
"ps_parent",
|
|
665
|
+
None,
|
|
666
|
+
) {
|
|
667
|
+
Ok(output) if output.status.success() => output,
|
|
668
|
+
_ => return Vec::new(),
|
|
669
|
+
};
|
|
670
|
+
String::from_utf8_lossy(&output.stdout)
|
|
671
|
+
.lines()
|
|
672
|
+
.filter_map(|line| {
|
|
673
|
+
let mut parts = line.split_whitespace();
|
|
674
|
+
let pid = parts.next()?.parse::<u32>().ok()?;
|
|
675
|
+
let ppid = parts.next()?.parse::<u32>().ok()?;
|
|
676
|
+
Some((pid, ppid))
|
|
677
|
+
})
|
|
678
|
+
.collect()
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
fn process_table() -> Vec<ProcessInfo> {
|
|
682
|
+
let output = match crate::os_probe::bounded_command_output_with_probe(
|
|
683
|
+
std::process::Command::new("ps").args(["-axo", "pid=,ppid=,pgid=,sess=,command="]),
|
|
684
|
+
"ps_table",
|
|
685
|
+
None,
|
|
686
|
+
) {
|
|
687
|
+
Ok(output) if output.status.success() => output,
|
|
688
|
+
_ => return Vec::new(),
|
|
689
|
+
};
|
|
690
|
+
String::from_utf8_lossy(&output.stdout)
|
|
691
|
+
.lines()
|
|
692
|
+
.filter_map(parse_process_info)
|
|
693
|
+
.collect()
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
fn parse_process_info(line: &str) -> Option<ProcessInfo> {
|
|
697
|
+
let mut parts = line.split_whitespace();
|
|
698
|
+
let pid = parts.next()?.parse::<u32>().ok()?;
|
|
699
|
+
let ppid = parts.next()?.parse::<u32>().ok()?;
|
|
700
|
+
let pgid = parts.next().and_then(|raw| raw.parse::<u32>().ok());
|
|
701
|
+
let session = parts.next().and_then(|raw| raw.parse::<u32>().ok());
|
|
702
|
+
let command = parts.collect::<Vec<_>>().join(" ");
|
|
703
|
+
Some(ProcessInfo {
|
|
704
|
+
pid,
|
|
705
|
+
ppid,
|
|
706
|
+
pgid,
|
|
707
|
+
session,
|
|
708
|
+
command,
|
|
709
|
+
})
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
#[derive(Clone, Debug)]
|
|
713
|
+
struct ProcessInfo {
|
|
714
|
+
pid: u32,
|
|
715
|
+
ppid: u32,
|
|
716
|
+
pgid: Option<u32>,
|
|
717
|
+
session: Option<u32>,
|
|
718
|
+
command: String,
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
#[derive(Clone, Debug, Default)]
|
|
722
|
+
struct ShutdownProtection {
|
|
723
|
+
pids: std::collections::BTreeSet<u32>,
|
|
724
|
+
pgids: std::collections::BTreeSet<u32>,
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
impl ShutdownProtection {
|
|
728
|
+
fn contains_pid(&self, pid: u32) -> bool {
|
|
729
|
+
self.pids.contains(&pid)
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
fn contains_pgid(&self, pgid: u32) -> bool {
|
|
733
|
+
self.pgids.contains(&pgid)
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
fn contains_process(&self, process: &ProcessInfo) -> bool {
|
|
737
|
+
self.pids.contains(&process.pid)
|
|
738
|
+
|| process.pgid.is_some_and(|pgid| self.pgids.contains(&pgid))
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
fn shutdown_protection_set() -> ShutdownProtection {
|
|
743
|
+
let table = process_table();
|
|
744
|
+
let mut protected = ShutdownProtection::default();
|
|
745
|
+
let current = std::process::id();
|
|
746
|
+
protected.pids.insert(current);
|
|
747
|
+
if let Ok(pgid) = u32::try_from(unsafe { libc::getpgrp() }) {
|
|
748
|
+
protected.pgids.insert(pgid);
|
|
749
|
+
}
|
|
750
|
+
let mut cursor = current;
|
|
751
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
752
|
+
while seen.insert(cursor) {
|
|
753
|
+
let Some(process) = table.iter().find(|process| process.pid == cursor) else {
|
|
754
|
+
break;
|
|
755
|
+
};
|
|
756
|
+
protected.pids.insert(process.pid);
|
|
757
|
+
if let Some(pgid) = process.pgid {
|
|
758
|
+
protected.pgids.insert(pgid);
|
|
759
|
+
}
|
|
760
|
+
if process.ppid == 0 || process.ppid == process.pid {
|
|
761
|
+
break;
|
|
762
|
+
}
|
|
763
|
+
cursor = process.ppid;
|
|
764
|
+
}
|
|
765
|
+
protected
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
fn send_process_signal(pid: u32, signal: libc::c_int) {
|
|
769
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
770
|
+
return;
|
|
771
|
+
};
|
|
772
|
+
unsafe {
|
|
773
|
+
libc::kill(pid_t, signal);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
fn send_process_signal_group(pgid: libc::pid_t, signal: libc::c_int) {
|
|
778
|
+
unsafe {
|
|
779
|
+
libc::kill(-pgid, signal);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
fn wait_for_processes_gone(pids: &[u32], timeout: std::time::Duration) {
|
|
784
|
+
let start = std::time::Instant::now();
|
|
785
|
+
loop {
|
|
786
|
+
for pid in pids {
|
|
787
|
+
reap_child_if_possible(*pid);
|
|
788
|
+
}
|
|
789
|
+
if !pids.iter().any(|pid| process_is_live(*pid)) || start.elapsed() >= timeout {
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
std::thread::sleep(std::time::Duration::from_millis(25));
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
fn reap_child_if_possible(pid: u32) {
|
|
797
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
798
|
+
return;
|
|
799
|
+
};
|
|
800
|
+
let mut status = 0;
|
|
801
|
+
unsafe {
|
|
802
|
+
libc::waitpid(pid_t, &mut status, libc::WNOHANG);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
fn process_is_live(pid: u32) -> bool {
|
|
807
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
808
|
+
return false;
|
|
809
|
+
};
|
|
810
|
+
let rc = unsafe { libc::kill(pid_t, 0) };
|
|
811
|
+
if rc == 0 {
|
|
812
|
+
return true;
|
|
813
|
+
}
|
|
814
|
+
let err = std::io::Error::last_os_error();
|
|
815
|
+
err.raw_os_error() == Some(libc::EPERM)
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
fn process_pgids(pids: &[u32], protected: &ShutdownProtection) -> Vec<u32> {
|
|
819
|
+
let table = process_table();
|
|
820
|
+
let mut pgids = pids
|
|
821
|
+
.iter()
|
|
822
|
+
.filter_map(|pid| table.iter().find(|process| process.pid == *pid))
|
|
823
|
+
.filter_map(|process| process.pgid)
|
|
824
|
+
.filter(|pgid| {
|
|
825
|
+
libc::pid_t::try_from(*pgid)
|
|
826
|
+
.map(|pgid_t| pgid_t > 1 && !protected.contains_pgid(*pgid))
|
|
827
|
+
.unwrap_or(false)
|
|
828
|
+
})
|
|
829
|
+
.collect::<Vec<_>>();
|
|
830
|
+
pgids.sort_unstable();
|
|
831
|
+
pgids.dedup();
|
|
832
|
+
pgids
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
fn process_residuals(
|
|
836
|
+
workspace: &Path,
|
|
837
|
+
state: &Value,
|
|
838
|
+
root_pids: &[u32],
|
|
839
|
+
root_pgids: &[u32],
|
|
840
|
+
protected: &ShutdownProtection,
|
|
841
|
+
scope: ShutdownReapScope,
|
|
842
|
+
) -> Vec<Value> {
|
|
843
|
+
let mut residuals =
|
|
844
|
+
matched_processes(workspace, state, root_pids, root_pgids, protected, scope);
|
|
845
|
+
let mut seen = residuals
|
|
846
|
+
.iter()
|
|
847
|
+
.map(|process| process.pid)
|
|
848
|
+
.collect::<std::collections::BTreeSet<_>>();
|
|
849
|
+
for pid in root_pids {
|
|
850
|
+
if !protected.contains_pid(*pid) && process_is_live(*pid) && seen.insert(*pid) {
|
|
851
|
+
residuals.push(ProcessInfo {
|
|
852
|
+
pid: *pid,
|
|
853
|
+
ppid: 0,
|
|
854
|
+
pgid: None,
|
|
855
|
+
session: None,
|
|
856
|
+
command: String::new(),
|
|
857
|
+
});
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
residuals
|
|
861
|
+
.into_iter()
|
|
862
|
+
.map(|process| {
|
|
863
|
+
json!({
|
|
864
|
+
"pid": process.pid,
|
|
865
|
+
"ppid": process.ppid,
|
|
866
|
+
"pgid": process.pgid,
|
|
867
|
+
"session": process.session,
|
|
868
|
+
"command": process.command,
|
|
869
|
+
})
|
|
870
|
+
})
|
|
871
|
+
.collect()
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
fn matched_processes(
|
|
875
|
+
workspace: &Path,
|
|
876
|
+
state: &Value,
|
|
877
|
+
root_pids: &[u32],
|
|
878
|
+
root_pgids: &[u32],
|
|
879
|
+
protected: &ShutdownProtection,
|
|
880
|
+
scope: ShutdownReapScope,
|
|
881
|
+
) -> Vec<ProcessInfo> {
|
|
882
|
+
let table = process_table();
|
|
883
|
+
let root_tree = root_pids
|
|
884
|
+
.iter()
|
|
885
|
+
.flat_map(|pid| process_tree_from_table(*pid, &table))
|
|
886
|
+
.filter(|pid| !protected.contains_pid(*pid))
|
|
887
|
+
.collect::<std::collections::BTreeSet<_>>();
|
|
888
|
+
let root_pgids = root_pgids
|
|
889
|
+
.iter()
|
|
890
|
+
.copied()
|
|
891
|
+
.collect::<std::collections::BTreeSet<_>>();
|
|
892
|
+
let spawn_cwds = state_spawn_cwds(state, scope);
|
|
893
|
+
let workspace_text = workspace.to_string_lossy().to_string();
|
|
894
|
+
let mut cwd_probe_budget = 3_usize;
|
|
895
|
+
let mut out = Vec::new();
|
|
896
|
+
for process in table {
|
|
897
|
+
if protected.contains_pid(process.pid) {
|
|
898
|
+
continue;
|
|
899
|
+
}
|
|
900
|
+
let matches_workspace = scope == ShutdownReapScope::Workspace
|
|
901
|
+
&& process_matches_workspace(
|
|
902
|
+
&process,
|
|
903
|
+
&workspace_text,
|
|
904
|
+
&spawn_cwds,
|
|
905
|
+
&mut cwd_probe_budget,
|
|
906
|
+
);
|
|
907
|
+
if matches_workspace
|
|
908
|
+
|| root_tree.contains(&process.pid)
|
|
909
|
+
|| process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
|
|
910
|
+
{
|
|
911
|
+
out.push(process);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
out
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
fn process_tree_from_table(root_pid: u32, table: &[ProcessInfo]) -> Vec<u32> {
|
|
918
|
+
if root_pid == 0 {
|
|
919
|
+
return Vec::new();
|
|
920
|
+
}
|
|
921
|
+
let mut out = vec![root_pid];
|
|
922
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
923
|
+
seen.insert(root_pid);
|
|
924
|
+
let mut index = 0;
|
|
925
|
+
while index < out.len() {
|
|
926
|
+
let parent = out[index];
|
|
927
|
+
for process in table {
|
|
928
|
+
if process.ppid == parent && seen.insert(process.pid) {
|
|
929
|
+
out.push(process.pid);
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
index += 1;
|
|
933
|
+
}
|
|
934
|
+
out
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
fn state_spawn_cwds(state: &Value, scope: ShutdownReapScope) -> Vec<PathBuf> {
|
|
938
|
+
let mut out = Vec::new();
|
|
939
|
+
collect_spawn_cwds(state, &mut out);
|
|
940
|
+
if scope == ShutdownReapScope::Workspace {
|
|
941
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
942
|
+
for team in teams.values() {
|
|
943
|
+
collect_spawn_cwds(team, &mut out);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
out
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
fn collect_spawn_cwds(state: &Value, out: &mut Vec<PathBuf>) {
|
|
951
|
+
let Some(agents) = state.get("agents").and_then(Value::as_object) else {
|
|
952
|
+
return;
|
|
953
|
+
};
|
|
954
|
+
for agent in agents.values() {
|
|
955
|
+
if let Some(spawn_cwd) = agent
|
|
956
|
+
.get("spawn_cwd")
|
|
957
|
+
.and_then(Value::as_str)
|
|
958
|
+
.filter(|cwd| !cwd.is_empty())
|
|
959
|
+
{
|
|
960
|
+
out.push(PathBuf::from(spawn_cwd));
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
fn process_matches_workspace(
|
|
966
|
+
process: &ProcessInfo,
|
|
967
|
+
workspace_text: &str,
|
|
968
|
+
spawn_cwds: &[PathBuf],
|
|
969
|
+
cwd_probe_budget: &mut usize,
|
|
970
|
+
) -> bool {
|
|
971
|
+
let command = process.command.as_str();
|
|
972
|
+
if command.contains("mcp-server")
|
|
973
|
+
&& command.contains("--workspace")
|
|
974
|
+
&& command.contains(workspace_text)
|
|
975
|
+
{
|
|
976
|
+
return true;
|
|
977
|
+
}
|
|
978
|
+
if command.contains(workspace_text) {
|
|
979
|
+
return true;
|
|
980
|
+
}
|
|
981
|
+
if spawn_cwds.is_empty() || *cwd_probe_budget == 0 {
|
|
982
|
+
return false;
|
|
983
|
+
}
|
|
984
|
+
*cwd_probe_budget -= 1;
|
|
985
|
+
let Some(cwd) = process_cwd(process.pid) else {
|
|
986
|
+
return false;
|
|
987
|
+
};
|
|
988
|
+
spawn_cwds
|
|
989
|
+
.iter()
|
|
990
|
+
.any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
fn process_cwd(pid: u32) -> Option<PathBuf> {
|
|
994
|
+
let proc_cwd = PathBuf::from(format!("/proc/{pid}/cwd"));
|
|
995
|
+
if let Ok(path) = std::fs::read_link(proc_cwd) {
|
|
996
|
+
return Some(path);
|
|
997
|
+
}
|
|
998
|
+
if crate::os_probe::probe_timed_out() {
|
|
999
|
+
return None;
|
|
1000
|
+
}
|
|
1001
|
+
let output = crate::os_probe::bounded_command_output_with_probe(
|
|
1002
|
+
std::process::Command::new("lsof").args([
|
|
1003
|
+
"-a",
|
|
1004
|
+
"-p",
|
|
1005
|
+
&pid.to_string(),
|
|
1006
|
+
"-d",
|
|
1007
|
+
"cwd",
|
|
1008
|
+
"-Fn",
|
|
1009
|
+
]),
|
|
1010
|
+
"lsof_cwd",
|
|
1011
|
+
Some(pid),
|
|
1012
|
+
)
|
|
1013
|
+
.ok()?;
|
|
1014
|
+
if !output.status.success() {
|
|
1015
|
+
return None;
|
|
1016
|
+
}
|
|
1017
|
+
String::from_utf8_lossy(&output.stdout)
|
|
1018
|
+
.lines()
|
|
1019
|
+
.find_map(|line| line.strip_prefix('n').map(PathBuf::from))
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
fn path_is_under(path: &Path, root: &Path) -> bool {
|
|
1023
|
+
let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
|
|
1024
|
+
let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
|
|
1025
|
+
path == root || path.starts_with(root)
|
|
1026
|
+
}
|
|
187
1027
|
/// `runtime.restart`(`cmd_restart`)。
|
|
188
|
-
pub fn restart(
|
|
189
|
-
|
|
1028
|
+
pub fn restart(
|
|
1029
|
+
workspace: &Path,
|
|
1030
|
+
allow_fresh: bool,
|
|
1031
|
+
team: Option<&str>,
|
|
1032
|
+
session_converge_deadline_ms: Option<u64>,
|
|
1033
|
+
) -> Result<Value, CliError> {
|
|
1034
|
+
match crate::lifecycle::restart_with_session_convergence_deadline(
|
|
1035
|
+
workspace,
|
|
1036
|
+
allow_fresh,
|
|
1037
|
+
team,
|
|
1038
|
+
session_converge_deadline_ms,
|
|
1039
|
+
) {
|
|
190
1040
|
Ok(report) => Ok(restart_value(report)),
|
|
191
1041
|
Err(e) => Ok(error_value(e)),
|
|
192
1042
|
}
|
|
@@ -209,12 +1059,18 @@ pub mod lifecycle_port {
|
|
|
209
1059
|
allow_fresh,
|
|
210
1060
|
team,
|
|
211
1061
|
) {
|
|
212
|
-
Ok(report) =>
|
|
1062
|
+
Ok(report) => {
|
|
1063
|
+
Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
|
|
1064
|
+
}
|
|
213
1065
|
Err(e) => Ok(error_value(e)),
|
|
214
1066
|
}
|
|
215
1067
|
}
|
|
216
1068
|
/// `runtime.stop_agent`(`cmd_stop_agent`)。
|
|
217
|
-
pub fn stop_agent(
|
|
1069
|
+
pub fn stop_agent(
|
|
1070
|
+
workspace: &Path,
|
|
1071
|
+
agent: &str,
|
|
1072
|
+
team: Option<&str>,
|
|
1073
|
+
) -> Result<Value, CliError> {
|
|
218
1074
|
let agent_id = crate::model::ids::AgentId::new(agent);
|
|
219
1075
|
match crate::lifecycle::stop_agent(workspace, &agent_id, team) {
|
|
220
1076
|
Ok(report) => Ok(json!({"ok": true, "agent_id": agent, "stopped": report.stopped})),
|
|
@@ -237,7 +1093,9 @@ pub mod lifecycle_port {
|
|
|
237
1093
|
open_display,
|
|
238
1094
|
team,
|
|
239
1095
|
) {
|
|
240
|
-
Ok(report) =>
|
|
1096
|
+
Ok(report) => {
|
|
1097
|
+
Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
|
|
1098
|
+
}
|
|
241
1099
|
Err(e) => Ok(error_value(e)),
|
|
242
1100
|
}
|
|
243
1101
|
}
|
|
@@ -296,11 +1154,15 @@ pub mod lifecycle_port {
|
|
|
296
1154
|
team: Option<&str>,
|
|
297
1155
|
) -> Result<Value, CliError> {
|
|
298
1156
|
if !confirm {
|
|
299
|
-
return Ok(
|
|
1157
|
+
return Ok(
|
|
1158
|
+
json!({"ok": false, "agent_id": agent, "error": "remove-agent requires --confirm"}),
|
|
1159
|
+
);
|
|
300
1160
|
}
|
|
301
1161
|
let agent_id = crate::model::ids::AgentId::new(agent);
|
|
302
1162
|
match crate::lifecycle::remove_agent(workspace, &agent_id, from_spec, force, team) {
|
|
303
|
-
Ok(report) =>
|
|
1163
|
+
Ok(report) => {
|
|
1164
|
+
Ok(json!({"ok": true, "agent_id": agent, "report": format!("{report:?}")}))
|
|
1165
|
+
}
|
|
304
1166
|
Err(e) => Ok(error_value(e)),
|
|
305
1167
|
}
|
|
306
1168
|
}
|
|
@@ -310,9 +1172,18 @@ pub mod lifecycle_port {
|
|
|
310
1172
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
311
1173
|
let team = team
|
|
312
1174
|
.map(ToString::to_string)
|
|
313
|
-
.or_else(||
|
|
1175
|
+
.or_else(|| {
|
|
1176
|
+
state
|
|
1177
|
+
.get("active_team_key")
|
|
1178
|
+
.and_then(Value::as_str)
|
|
1179
|
+
.map(ToString::to_string)
|
|
1180
|
+
})
|
|
314
1181
|
.filter(|s| !s.is_empty())
|
|
315
|
-
.or_else(||
|
|
1182
|
+
.or_else(|| {
|
|
1183
|
+
workspace
|
|
1184
|
+
.file_name()
|
|
1185
|
+
.map(|name| name.to_string_lossy().to_string())
|
|
1186
|
+
})
|
|
316
1187
|
.unwrap_or_else(|| "current".to_string());
|
|
317
1188
|
let now = chrono::Utc::now().to_rfc3339();
|
|
318
1189
|
let ttl_seconds = 1800;
|
|
@@ -328,7 +1199,10 @@ pub mod lifecycle_port {
|
|
|
328
1199
|
crate::state::persist::save_runtime_state(workspace, &state)
|
|
329
1200
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
330
1201
|
crate::event_log::EventLog::new(workspace)
|
|
331
|
-
.write(
|
|
1202
|
+
.write(
|
|
1203
|
+
"coordinator.idle_acknowledged",
|
|
1204
|
+
json!({"team": team, "ttl_seconds": ttl_seconds}),
|
|
1205
|
+
)
|
|
332
1206
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
333
1207
|
Ok(json!({
|
|
334
1208
|
"ok": true,
|
|
@@ -468,6 +1342,15 @@ pub mod lifecycle_port {
|
|
|
468
1342
|
// The summary string + a structured `worker_readiness` block tell the
|
|
469
1343
|
// caller exactly which agents are unhealthy (Degraded) or that the
|
|
470
1344
|
// tool-set load has not been confirmed yet (PendingToolLoad).
|
|
1345
|
+
let incomplete_session_capture_agents =
|
|
1346
|
+
launch.session_capture_incomplete_agents.clone();
|
|
1347
|
+
let all_spawned = !launch.started.is_empty();
|
|
1348
|
+
let leader_receiver_attached = launch.leader_receiver_attached;
|
|
1349
|
+
let all_resumable_have_session = incomplete_session_capture_agents.is_empty();
|
|
1350
|
+
let all_workers_spawned = all_spawned;
|
|
1351
|
+
let attached_receiver = leader_receiver_attached;
|
|
1352
|
+
let all_attached_receiver = leader_receiver_attached;
|
|
1353
|
+
let all_resumable_agents_have_sessions = all_resumable_have_session;
|
|
471
1354
|
let (summary, ok, readiness_json) = match &worker_readiness {
|
|
472
1355
|
crate::lifecycle::QuickStartReadiness::Degraded { unhealthy_agents } => (
|
|
473
1356
|
format!(
|
|
@@ -477,28 +1360,109 @@ pub mod lifecycle_port {
|
|
|
477
1360
|
),
|
|
478
1361
|
false,
|
|
479
1362
|
json!({
|
|
1363
|
+
"all_spawned": all_spawned,
|
|
1364
|
+
"all_workers_spawned": all_workers_spawned,
|
|
1365
|
+
"all_attached_receiver": all_attached_receiver,
|
|
1366
|
+
"attached_receiver": attached_receiver,
|
|
1367
|
+
"leader_receiver_attached": leader_receiver_attached,
|
|
1368
|
+
"all_resumable_have_session": all_resumable_have_session,
|
|
1369
|
+
"all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
|
|
1370
|
+
"ready": all_spawned && all_attached_receiver && all_resumable_have_session,
|
|
480
1371
|
"state": "degraded",
|
|
1372
|
+
"session_capture_complete": all_resumable_have_session,
|
|
1373
|
+
"session_capture_incomplete": !all_resumable_have_session,
|
|
1374
|
+
"incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
|
|
1375
|
+
"pending_session_agent_ids": incomplete_session_capture_agents,
|
|
481
1376
|
"unhealthy_agents": unhealthy_agents,
|
|
482
1377
|
}),
|
|
483
1378
|
),
|
|
484
|
-
crate::lifecycle::QuickStartReadiness::PendingToolLoad =>
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
1379
|
+
crate::lifecycle::QuickStartReadiness::PendingToolLoad => {
|
|
1380
|
+
if !all_resumable_have_session {
|
|
1381
|
+
(
|
|
1382
|
+
format!(
|
|
1383
|
+
"quick-start pending: {}; provider session capture incomplete",
|
|
1384
|
+
session_name.as_str()
|
|
1385
|
+
),
|
|
1386
|
+
false,
|
|
1387
|
+
json!({
|
|
1388
|
+
"all_spawned": all_spawned,
|
|
1389
|
+
"all_workers_spawned": all_workers_spawned,
|
|
1390
|
+
"all_attached_receiver": all_attached_receiver,
|
|
1391
|
+
"attached_receiver": attached_receiver,
|
|
1392
|
+
"leader_receiver_attached": leader_receiver_attached,
|
|
1393
|
+
"all_resumable_have_session": all_resumable_have_session,
|
|
1394
|
+
"all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
|
|
1395
|
+
"ready": all_spawned && all_attached_receiver && all_resumable_have_session,
|
|
1396
|
+
"state": "session_capture_incomplete",
|
|
1397
|
+
"session_capture_complete": all_resumable_have_session,
|
|
1398
|
+
"session_capture_incomplete": !all_resumable_have_session,
|
|
1399
|
+
"incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
|
|
1400
|
+
"pending_session_agent_ids": incomplete_session_capture_agents,
|
|
1401
|
+
"reason": "provider session capture is incomplete; restart is not yet resume-safe",
|
|
1402
|
+
}),
|
|
1403
|
+
)
|
|
1404
|
+
} else if launch.leader_receiver_attached {
|
|
1405
|
+
(
|
|
1406
|
+
format!(
|
|
1407
|
+
"quick-start launched (worker tool load unverified): {}",
|
|
1408
|
+
session_name.as_str()
|
|
1409
|
+
),
|
|
1410
|
+
all_spawned && all_attached_receiver && all_resumable_have_session,
|
|
1411
|
+
json!({
|
|
1412
|
+
"all_spawned": all_spawned,
|
|
1413
|
+
"all_workers_spawned": all_workers_spawned,
|
|
1414
|
+
"all_attached_receiver": all_attached_receiver,
|
|
1415
|
+
"attached_receiver": attached_receiver,
|
|
1416
|
+
"leader_receiver_attached": leader_receiver_attached,
|
|
1417
|
+
"all_resumable_have_session": all_resumable_have_session,
|
|
1418
|
+
"all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
|
|
1419
|
+
"ready": all_spawned && all_attached_receiver && all_resumable_have_session,
|
|
1420
|
+
"state": "pending_tool_load",
|
|
1421
|
+
"session_capture_complete": all_resumable_have_session,
|
|
1422
|
+
"session_capture_incomplete": !all_resumable_have_session,
|
|
1423
|
+
"incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
|
|
1424
|
+
"pending_session_agent_ids": incomplete_session_capture_agents,
|
|
1425
|
+
"reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
|
|
1426
|
+
}),
|
|
1427
|
+
)
|
|
1428
|
+
} else {
|
|
1429
|
+
(
|
|
1430
|
+
format!(
|
|
1431
|
+
"quick-start degraded: {}; leader receiver unbound",
|
|
1432
|
+
session_name.as_str()
|
|
1433
|
+
),
|
|
1434
|
+
false,
|
|
1435
|
+
json!({
|
|
1436
|
+
"all_spawned": all_spawned,
|
|
1437
|
+
"all_workers_spawned": all_workers_spawned,
|
|
1438
|
+
"all_attached_receiver": all_attached_receiver,
|
|
1439
|
+
"attached_receiver": attached_receiver,
|
|
1440
|
+
"leader_receiver_attached": leader_receiver_attached,
|
|
1441
|
+
"all_resumable_have_session": all_resumable_have_session,
|
|
1442
|
+
"all_resumable_agents_have_sessions": all_resumable_agents_have_sessions,
|
|
1443
|
+
"ready": all_spawned && all_attached_receiver && all_resumable_have_session,
|
|
1444
|
+
"state": "leader_receiver_unbound",
|
|
1445
|
+
"session_capture_complete": all_resumable_have_session,
|
|
1446
|
+
"session_capture_incomplete": !all_resumable_have_session,
|
|
1447
|
+
"incomplete_session_capture_agents": incomplete_session_capture_agents.clone(),
|
|
1448
|
+
"pending_session_agent_ids": incomplete_session_capture_agents,
|
|
1449
|
+
"reason": "launched team has no attached leader receiver",
|
|
1450
|
+
"next_action": "claim-leader",
|
|
1451
|
+
}),
|
|
1452
|
+
)
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
495
1455
|
};
|
|
496
1456
|
json!({
|
|
497
1457
|
"ok": ok,
|
|
498
1458
|
"summary": summary,
|
|
1459
|
+
"status": readiness_json.get("state").cloned().unwrap_or(Value::Null),
|
|
1460
|
+
"reason": readiness_json.get("reason").cloned().unwrap_or(Value::Null),
|
|
1461
|
+
"ready": readiness_json.get("ready").cloned().unwrap_or(Value::Bool(false)),
|
|
499
1462
|
"session_name": session_name.as_str(),
|
|
500
1463
|
"dry_run": launch.dry_run,
|
|
501
1464
|
"next_actions": next_actions,
|
|
1465
|
+
"readiness": readiness_json.clone(),
|
|
502
1466
|
"worker_readiness": readiness_json,
|
|
503
1467
|
})
|
|
504
1468
|
}
|
|
@@ -552,6 +1516,30 @@ pub mod lifecycle_port {
|
|
|
552
1516
|
"error": error,
|
|
553
1517
|
"unresumable": unresumable.iter().map(|w| w.agent_id.as_str()).collect::<Vec<_>>(),
|
|
554
1518
|
}),
|
|
1519
|
+
crate::lifecycle::RestartReport::RefusedResumeNotReady {
|
|
1520
|
+
missing,
|
|
1521
|
+
allow_fresh,
|
|
1522
|
+
deadline,
|
|
1523
|
+
elapsed,
|
|
1524
|
+
error,
|
|
1525
|
+
} => json!({
|
|
1526
|
+
"ok": false,
|
|
1527
|
+
"kind": "resume_not_ready",
|
|
1528
|
+
"reason": "session_capture_incomplete",
|
|
1529
|
+
"status": "resume_not_ready",
|
|
1530
|
+
"allow_fresh": allow_fresh,
|
|
1531
|
+
"error": error,
|
|
1532
|
+
"pending_agents": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
|
|
1533
|
+
"missing": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
|
|
1534
|
+
"session_convergence": {
|
|
1535
|
+
"complete": false,
|
|
1536
|
+
"deadline_s": deadline.as_secs_f64(),
|
|
1537
|
+
"deadline_ms": deadline.as_millis(),
|
|
1538
|
+
"elapsed_ms": elapsed.as_millis(),
|
|
1539
|
+
"pending_agent_ids": missing.iter().map(|w| w.as_str()).collect::<Vec<_>>(),
|
|
1540
|
+
},
|
|
1541
|
+
"next_action": "rerun restart after session capture completes, or pass --allow-fresh to deliberately discard missing context",
|
|
1542
|
+
}),
|
|
555
1543
|
crate::lifecycle::RestartReport::RefusedInvalidFirstSendAt {
|
|
556
1544
|
invalid,
|
|
557
1545
|
allow_fresh,
|
|
@@ -594,6 +1582,75 @@ pub mod lifecycle_port {
|
|
|
594
1582
|
}
|
|
595
1583
|
}
|
|
596
1584
|
}
|
|
1585
|
+
|
|
1586
|
+
fn mark_matching_session_teams_stopped(
|
|
1587
|
+
state: &mut Value,
|
|
1588
|
+
session_name: Option<&crate::transport::SessionName>,
|
|
1589
|
+
) -> Vec<String> {
|
|
1590
|
+
let Some(session_name) = session_name.map(crate::transport::SessionName::as_str) else {
|
|
1591
|
+
return Vec::new();
|
|
1592
|
+
};
|
|
1593
|
+
let Some(teams) = state.get_mut("teams").and_then(Value::as_object_mut) else {
|
|
1594
|
+
return Vec::new();
|
|
1595
|
+
};
|
|
1596
|
+
let mut out = Vec::new();
|
|
1597
|
+
for (key, team) in teams.iter_mut() {
|
|
1598
|
+
let matches = team
|
|
1599
|
+
.get("session_name")
|
|
1600
|
+
.and_then(Value::as_str)
|
|
1601
|
+
.is_some_and(|session| session == session_name);
|
|
1602
|
+
if matches {
|
|
1603
|
+
mark_agents_stopped(team);
|
|
1604
|
+
out.push(key.clone());
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
out
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
fn promote_live_sibling_after_scoped_shutdown(
|
|
1611
|
+
workspace: &Path,
|
|
1612
|
+
stopped_state: &Value,
|
|
1613
|
+
) -> Result<(), CliError> {
|
|
1614
|
+
let stopped_key = stopped_state
|
|
1615
|
+
.get("active_team_key")
|
|
1616
|
+
.and_then(Value::as_str)
|
|
1617
|
+
.filter(|key| !key.is_empty());
|
|
1618
|
+
let Some(stopped_key) = stopped_key else {
|
|
1619
|
+
return Ok(());
|
|
1620
|
+
};
|
|
1621
|
+
let raw = crate::state::persist::load_runtime_state(workspace)?;
|
|
1622
|
+
let active = raw
|
|
1623
|
+
.get("active_team_key")
|
|
1624
|
+
.and_then(Value::as_str)
|
|
1625
|
+
.unwrap_or("");
|
|
1626
|
+
if active != stopped_key {
|
|
1627
|
+
return Ok(());
|
|
1628
|
+
}
|
|
1629
|
+
let Some((next_key, _)) = raw
|
|
1630
|
+
.get("teams")
|
|
1631
|
+
.and_then(Value::as_object)
|
|
1632
|
+
.and_then(|teams| {
|
|
1633
|
+
teams
|
|
1634
|
+
.iter()
|
|
1635
|
+
.find(|(key, team)| key.as_str() != stopped_key && team_has_running_agent(team))
|
|
1636
|
+
})
|
|
1637
|
+
else {
|
|
1638
|
+
return Ok(());
|
|
1639
|
+
};
|
|
1640
|
+
let promoted = crate::state::projection::project_top_level_view(&raw, next_key);
|
|
1641
|
+
crate::state::persist::save_runtime_state(workspace, &promoted)?;
|
|
1642
|
+
Ok(())
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
fn team_has_running_agent(team: &Value) -> bool {
|
|
1646
|
+
team.get("agents")
|
|
1647
|
+
.and_then(Value::as_object)
|
|
1648
|
+
.is_some_and(|agents| {
|
|
1649
|
+
agents
|
|
1650
|
+
.values()
|
|
1651
|
+
.any(|agent| agent.get("status").and_then(Value::as_str) == Some("running"))
|
|
1652
|
+
})
|
|
1653
|
+
}
|
|
597
1654
|
}
|
|
598
1655
|
|
|
599
1656
|
/// PLACEHOLDER → diagnose lane(`diagnose/health.py` `doctor`、`diagnose/comms.py`
|
|
@@ -605,9 +1662,19 @@ pub mod diagnose_port {
|
|
|
605
1662
|
|
|
606
1663
|
/// `runtime.doctor(spec)` + schema 注入(`cmd_doctor` 默认分支)。
|
|
607
1664
|
pub fn doctor(workspace: &Path, spec: Option<&Path>) -> Result<Value, CliError> {
|
|
608
|
-
let _ = spec;
|
|
609
1665
|
let tmux_path = which_path("tmux");
|
|
610
1666
|
let tmux_installed = tmux_path.is_some();
|
|
1667
|
+
let workspace_valid = workspace.is_dir();
|
|
1668
|
+
let team_context = workspace_valid && has_doctor_team_context(workspace, spec);
|
|
1669
|
+
let workspace_has_entries = workspace_valid && workspace_has_any_entry(workspace);
|
|
1670
|
+
let profile_smoke = doctor_team_dir(workspace, spec)
|
|
1671
|
+
.map(|team| crate::cli::diagnose::build_profile_smoke_check_for_team(&team))
|
|
1672
|
+
.transpose()?;
|
|
1673
|
+
let profile_smoke_ok = profile_smoke
|
|
1674
|
+
.as_ref()
|
|
1675
|
+
.and_then(|check| check.get("ok").and_then(Value::as_bool))
|
|
1676
|
+
.unwrap_or(true);
|
|
1677
|
+
let ok = workspace_valid && (team_context || workspace_has_entries) && profile_smoke_ok;
|
|
611
1678
|
let health = crate::coordinator::coordinator_health(
|
|
612
1679
|
&crate::coordinator::WorkspacePath::new(workspace.to_path_buf()),
|
|
613
1680
|
);
|
|
@@ -624,11 +1691,81 @@ pub mod diagnose_port {
|
|
|
624
1691
|
"local_module": true,
|
|
625
1692
|
},
|
|
626
1693
|
"secret_scan": secret_scan(workspace),
|
|
1694
|
+
"profile_smoke": profile_smoke.unwrap_or_else(|| json!({
|
|
1695
|
+
"name": "profile_smoke",
|
|
1696
|
+
"ok": true,
|
|
1697
|
+
"status": "not_required",
|
|
1698
|
+
"checks": [],
|
|
1699
|
+
"secret_values_printed": false,
|
|
1700
|
+
})),
|
|
627
1701
|
"coordinator": coordinator_health_value(health),
|
|
628
|
-
"ok":
|
|
1702
|
+
"ok": ok,
|
|
1703
|
+
"error": if ok {
|
|
1704
|
+
Value::Null
|
|
1705
|
+
} else if !profile_smoke_ok {
|
|
1706
|
+
json!("profile_smoke_failed")
|
|
1707
|
+
} else if workspace_valid {
|
|
1708
|
+
json!("workspace has no Team Agent spec or runtime context")
|
|
1709
|
+
} else {
|
|
1710
|
+
json!("invalid workspace")
|
|
1711
|
+
},
|
|
629
1712
|
}))
|
|
630
1713
|
}
|
|
631
1714
|
|
|
1715
|
+
fn doctor_team_dir(workspace: &Path, spec: Option<&Path>) -> Option<PathBuf> {
|
|
1716
|
+
if let Some(spec) = spec {
|
|
1717
|
+
let candidate = if spec.is_absolute() {
|
|
1718
|
+
spec.to_path_buf()
|
|
1719
|
+
} else {
|
|
1720
|
+
workspace.join(spec)
|
|
1721
|
+
};
|
|
1722
|
+
if candidate.is_file() {
|
|
1723
|
+
return candidate.parent().map(Path::to_path_buf);
|
|
1724
|
+
}
|
|
1725
|
+
if candidate.join("team.spec.yaml").is_file() || candidate.join("TEAM.md").is_file() {
|
|
1726
|
+
return Some(candidate);
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
if workspace.join("team.spec.yaml").is_file() || workspace.join("TEAM.md").is_file() {
|
|
1730
|
+
return Some(workspace.to_path_buf());
|
|
1731
|
+
}
|
|
1732
|
+
let current = workspace.join(".team").join("current");
|
|
1733
|
+
if current.join("team.spec.yaml").is_file() || current.join("TEAM.md").is_file() {
|
|
1734
|
+
return Some(current);
|
|
1735
|
+
}
|
|
1736
|
+
None
|
|
1737
|
+
}
|
|
1738
|
+
|
|
1739
|
+
fn has_doctor_team_context(workspace: &Path, spec: Option<&Path>) -> bool {
|
|
1740
|
+
if spec.is_some_and(|path| {
|
|
1741
|
+
let candidate = if path.is_absolute() {
|
|
1742
|
+
path.to_path_buf()
|
|
1743
|
+
} else {
|
|
1744
|
+
workspace.join(path)
|
|
1745
|
+
};
|
|
1746
|
+
candidate.is_file()
|
|
1747
|
+
}) {
|
|
1748
|
+
return true;
|
|
1749
|
+
}
|
|
1750
|
+
[
|
|
1751
|
+
workspace.join("TEAM.md"),
|
|
1752
|
+
workspace.join("team.spec.yaml"),
|
|
1753
|
+
workspace.join(".team/current/TEAM.md"),
|
|
1754
|
+
workspace.join(".team/current/team.spec.yaml"),
|
|
1755
|
+
workspace.join(".team/runtime/state.json"),
|
|
1756
|
+
workspace.join(".team/runtime/team.db"),
|
|
1757
|
+
]
|
|
1758
|
+
.into_iter()
|
|
1759
|
+
.any(|path| path.exists())
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
fn workspace_has_any_entry(workspace: &Path) -> bool {
|
|
1763
|
+
std::fs::read_dir(workspace)
|
|
1764
|
+
.ok()
|
|
1765
|
+
.and_then(|mut entries| entries.next())
|
|
1766
|
+
.is_some()
|
|
1767
|
+
}
|
|
1768
|
+
|
|
632
1769
|
fn secret_scan(workspace: &Path) -> Value {
|
|
633
1770
|
let mut findings = Vec::new();
|
|
634
1771
|
let mut scanned = 0usize;
|
|
@@ -643,7 +1780,13 @@ pub mod diagnose_port {
|
|
|
643
1780
|
const SECRET_SCAN_MAX_ENTRIES: usize = 512;
|
|
644
1781
|
const SECRET_SCAN_MAX_FILE_BYTES: u64 = 128 * 1024;
|
|
645
1782
|
|
|
646
|
-
fn scan_secret_dir(
|
|
1783
|
+
fn scan_secret_dir(
|
|
1784
|
+
root: &Path,
|
|
1785
|
+
dir: &Path,
|
|
1786
|
+
depth: usize,
|
|
1787
|
+
scanned: &mut usize,
|
|
1788
|
+
findings: &mut Vec<Value>,
|
|
1789
|
+
) {
|
|
647
1790
|
if depth > SECRET_SCAN_MAX_DEPTH || *scanned >= SECRET_SCAN_MAX_ENTRIES {
|
|
648
1791
|
return;
|
|
649
1792
|
}
|
|
@@ -703,143 +1846,37 @@ pub mod diagnose_port {
|
|
|
703
1846
|
}
|
|
704
1847
|
}
|
|
705
1848
|
/// `run_comms_selftest`(`--comms`/`--gate comms`)。**纯 state-read,零 token**(MUST-NOT-13)。
|
|
706
|
-
pub fn comms_selftest(
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
let owner_pane_id = state
|
|
713
|
-
.get("owner")
|
|
714
|
-
.or_else(|| state.get("team_owner"))
|
|
715
|
-
.and_then(|v| v.get("pane_id"))
|
|
716
|
-
.cloned()
|
|
717
|
-
.unwrap_or(Value::Null);
|
|
718
|
-
let caller_pane_id = std::env::var("TMUX_PANE").ok().map(Value::String).unwrap_or(Value::Null);
|
|
719
|
-
let pane_id = receiver
|
|
720
|
-
.and_then(|r| r.get("pane_id"))
|
|
721
|
-
.cloned()
|
|
722
|
-
.unwrap_or(Value::Null);
|
|
723
|
-
let mismatches = receiver_binding_mismatches(&owner_pane_id, &caller_pane_id, &pane_id);
|
|
724
|
-
let receiver_binding = json!({
|
|
725
|
-
"status": if mismatches.is_empty() { "pass" } else { "fail" },
|
|
726
|
-
"verifies": "binding_consistency",
|
|
727
|
-
"proof": "state_read",
|
|
728
|
-
"state_read_observed": true,
|
|
729
|
-
"pane_id": pane_id,
|
|
730
|
-
"owner_pane_id": owner_pane_id,
|
|
731
|
-
"caller_pane_id": caller_pane_id,
|
|
732
|
-
"mismatches": mismatches,
|
|
733
|
-
"configured": receiver.is_some(),
|
|
734
|
-
});
|
|
735
|
-
Ok(json!({
|
|
736
|
-
"ok": true,
|
|
737
|
-
"status": "pass",
|
|
738
|
-
"run_id": run_id(),
|
|
739
|
-
"scope": "binding_consistency",
|
|
740
|
-
"boundary": COMMS_BOUNDARY_TEXT,
|
|
741
|
-
"checks": {
|
|
742
|
-
"receiver_binding": receiver_binding,
|
|
743
|
-
"contract_suite": {
|
|
744
|
-
"status": "deferred",
|
|
745
|
-
"deferred_to": "0.2.9",
|
|
746
|
-
"reason": "contract test files not shipped with package",
|
|
747
|
-
"message": "comms contract verification deferred to 0.2.9; contract test files not shipped with package",
|
|
748
|
-
},
|
|
749
|
-
"provider_sdk_calls": {
|
|
750
|
-
"status": "pass",
|
|
751
|
-
"verifies": "no_provider_sdk_calls",
|
|
752
|
-
"calls": {
|
|
753
|
-
"anthropic": 0,
|
|
754
|
-
"openai": 0,
|
|
755
|
-
"httpx": 0,
|
|
756
|
-
},
|
|
757
|
-
},
|
|
758
|
-
},
|
|
759
|
-
}))
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
pub(super) fn receiver_binding_mismatches(
|
|
763
|
-
owner_pane_id: &Value,
|
|
764
|
-
caller_pane_id: &Value,
|
|
765
|
-
pane_id: &Value,
|
|
766
|
-
) -> Vec<Value> {
|
|
767
|
-
let mut mismatches = Vec::new();
|
|
768
|
-
if pane_mismatch(owner_pane_id, pane_id) {
|
|
769
|
-
mismatches.push(json!("owner_receiver_pane_mismatch"));
|
|
770
|
-
}
|
|
771
|
-
if pane_mismatch(caller_pane_id, owner_pane_id) {
|
|
772
|
-
mismatches.push(json!("caller_owner_pane_mismatch"));
|
|
773
|
-
}
|
|
774
|
-
if pane_mismatch(caller_pane_id, pane_id) {
|
|
775
|
-
mismatches.push(json!("caller_receiver_pane_mismatch"));
|
|
776
|
-
}
|
|
777
|
-
mismatches
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
fn pane_mismatch(left: &Value, right: &Value) -> bool {
|
|
781
|
-
let Some(left) = left.as_str().filter(|s| !s.is_empty()) else {
|
|
782
|
-
return false;
|
|
783
|
-
};
|
|
784
|
-
let Some(right) = right.as_str().filter(|s| !s.is_empty()) else {
|
|
785
|
-
return false;
|
|
786
|
-
};
|
|
787
|
-
left != right
|
|
1849
|
+
pub fn comms_selftest(
|
|
1850
|
+
workspace: &Path,
|
|
1851
|
+
team: Option<&str>,
|
|
1852
|
+
gate: Option<&str>,
|
|
1853
|
+
) -> Result<Value, CliError> {
|
|
1854
|
+
crate::diagnose::comms::doctor_comms_json(workspace, team, gate)
|
|
788
1855
|
}
|
|
789
1856
|
|
|
790
1857
|
/// `orphan_gate(fix, confirm)`(`--gate orphans`)。CI gate。
|
|
791
1858
|
pub fn orphan_gate(fix: bool, confirm: bool) -> Result<Value, CliError> {
|
|
792
|
-
|
|
793
|
-
return Ok(json!({
|
|
794
|
-
"ok": false,
|
|
795
|
-
"gate": "orphans",
|
|
796
|
-
"status": "refused",
|
|
797
|
-
"reason": "fix_requires_confirm",
|
|
798
|
-
"action": "re-run with --gate orphans --fix --confirm",
|
|
799
|
-
}));
|
|
800
|
-
}
|
|
801
|
-
Ok(json!({
|
|
802
|
-
"ok": true,
|
|
803
|
-
"gate": "orphans",
|
|
804
|
-
"status": "passed",
|
|
805
|
-
"scanned": 0,
|
|
806
|
-
"dry_run": !fix,
|
|
807
|
-
"scanned_at": chrono::Utc::now().to_rfc3339(),
|
|
808
|
-
"action_required": false,
|
|
809
|
-
"fix": fix,
|
|
810
|
-
}))
|
|
1859
|
+
crate::diagnose::orphans::orphan_gate_json(fix, confirm)
|
|
811
1860
|
}
|
|
812
1861
|
/// `cleanup_orphan_coordinators(confirm)`(`--cleanup-orphans`;dry-run unless `--confirm`)。
|
|
813
1862
|
pub fn cleanup_orphans(confirm: bool) -> Result<Value, CliError> {
|
|
814
|
-
|
|
815
|
-
return Ok(json!({
|
|
816
|
-
"ok": true,
|
|
817
|
-
"scanned": 0,
|
|
818
|
-
"orphans": [],
|
|
819
|
-
"dry_run": false,
|
|
820
|
-
"scanned_at": chrono::Utc::now().to_rfc3339(),
|
|
821
|
-
"killed": [],
|
|
822
|
-
"failed": [],
|
|
823
|
-
}));
|
|
824
|
-
}
|
|
825
|
-
Ok(json!({
|
|
826
|
-
"ok": true,
|
|
827
|
-
"scanned": 0,
|
|
828
|
-
"orphans": [],
|
|
829
|
-
"dry_run": true,
|
|
830
|
-
"scanned_at": chrono::Utc::now().to_rfc3339(),
|
|
831
|
-
"action_required": "re-run with --confirm to send SIGTERM",
|
|
832
|
-
}))
|
|
1863
|
+
crate::diagnose::orphans::cleanup_orphans_json(confirm)
|
|
833
1864
|
}
|
|
834
1865
|
/// `fix_schema_layout`(`--fix-schema`)/`schema_diagnosis`。
|
|
835
1866
|
pub fn fix_schema(workspace: &Path) -> Result<Value, CliError> {
|
|
836
1867
|
let db_path = workspace.join(".team").join("runtime").join("team.db");
|
|
837
|
-
let result =
|
|
838
|
-
|
|
1868
|
+
let result =
|
|
1869
|
+
crate::db::migration::fix_schema_layout(workspace, crate::db::schema::SCHEMA_VERSION)
|
|
1870
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
839
1871
|
match result {
|
|
840
|
-
crate::db::migration::FixResult::Missing(diagnosis) =>
|
|
841
|
-
|
|
842
|
-
|
|
1872
|
+
crate::db::migration::FixResult::Missing(diagnosis) => Ok(fix_schema_value(
|
|
1873
|
+
&db_path,
|
|
1874
|
+
diagnosis,
|
|
1875
|
+
false,
|
|
1876
|
+
Vec::new(),
|
|
1877
|
+
None,
|
|
1878
|
+
None,
|
|
1879
|
+
)),
|
|
843
1880
|
crate::db::migration::FixResult::Blocked { reason } => Ok(json!({
|
|
844
1881
|
"ok": false,
|
|
845
1882
|
"status": "blocked",
|
|
@@ -848,12 +1885,22 @@ pub mod diagnose_port {
|
|
|
848
1885
|
"reason": reason,
|
|
849
1886
|
"fixed": false,
|
|
850
1887
|
})),
|
|
851
|
-
crate::db::migration::FixResult::Fixed {
|
|
1888
|
+
crate::db::migration::FixResult::Fixed {
|
|
1889
|
+
diagnosis,
|
|
1890
|
+
rebuilds,
|
|
1891
|
+
} => {
|
|
852
1892
|
let backup = rebuilds
|
|
853
1893
|
.first()
|
|
854
1894
|
.map(|event| event.backup_path.clone())
|
|
855
1895
|
.unwrap_or_else(|| backup_path_preview(&db_path, diagnosis.user_version));
|
|
856
|
-
Ok(fix_schema_value(
|
|
1896
|
+
Ok(fix_schema_value(
|
|
1897
|
+
&db_path,
|
|
1898
|
+
diagnosis,
|
|
1899
|
+
true,
|
|
1900
|
+
rebuild_values(rebuilds),
|
|
1901
|
+
Some(backup),
|
|
1902
|
+
Some("none"),
|
|
1903
|
+
))
|
|
857
1904
|
}
|
|
858
1905
|
}
|
|
859
1906
|
}
|
|
@@ -888,7 +1935,9 @@ pub mod diagnose_port {
|
|
|
888
1935
|
fn backup_path_preview(db_path: &Path, user_version: i64) -> String {
|
|
889
1936
|
let stamp = chrono::Utc::now().format("%Y%m%dT%H%M%SZ");
|
|
890
1937
|
db_path
|
|
891
|
-
.with_file_name(format!(
|
|
1938
|
+
.with_file_name(format!(
|
|
1939
|
+
"team.db.pre-migration-{stamp}-from-v{user_version}.bak"
|
|
1940
|
+
))
|
|
892
1941
|
.to_string_lossy()
|
|
893
1942
|
.to_string()
|
|
894
1943
|
}
|
|
@@ -953,7 +2002,9 @@ pub mod diagnose_port {
|
|
|
953
2002
|
})
|
|
954
2003
|
}
|
|
955
2004
|
|
|
956
|
-
fn coordinator_status_wire(
|
|
2005
|
+
fn coordinator_status_wire(
|
|
2006
|
+
status: crate::coordinator::CoordinatorHealthStatus,
|
|
2007
|
+
) -> &'static str {
|
|
957
2008
|
match status {
|
|
958
2009
|
crate::coordinator::CoordinatorHealthStatus::Missing => "missing",
|
|
959
2010
|
crate::coordinator::CoordinatorHealthStatus::InvalidPid => "invalid_pid",
|
|
@@ -970,7 +2021,11 @@ pub mod leader_port {
|
|
|
970
2021
|
use super::*;
|
|
971
2022
|
|
|
972
2023
|
/// `runtime.takeover(workspace, team, confirm)` 的 CLI `--json` 投影。
|
|
973
|
-
pub fn takeover(
|
|
2024
|
+
pub fn takeover(
|
|
2025
|
+
workspace: &Path,
|
|
2026
|
+
team: Option<&str>,
|
|
2027
|
+
confirm: bool,
|
|
2028
|
+
) -> Result<Value, CliError> {
|
|
974
2029
|
if !confirm && !positive_caller_pane_env_present() {
|
|
975
2030
|
return Ok(json!({
|
|
976
2031
|
"ok": false,
|
|
@@ -993,7 +2048,11 @@ pub mod leader_port {
|
|
|
993
2048
|
Ok(lease_value(result))
|
|
994
2049
|
}
|
|
995
2050
|
/// `runtime.claim_leader(...)` 的 CLI `--json` 投影(`cmd_claim_leader`;含 inbox_hint)。
|
|
996
|
-
pub fn claim_leader(
|
|
2051
|
+
pub fn claim_leader(
|
|
2052
|
+
workspace: &Path,
|
|
2053
|
+
team: Option<&str>,
|
|
2054
|
+
confirm: bool,
|
|
2055
|
+
) -> Result<Value, CliError> {
|
|
997
2056
|
let state = crate::state::persist::load_runtime_state(workspace)
|
|
998
2057
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
999
2058
|
let Some(team_id) = resolve_owner_team_id(&state, team) else {
|
|
@@ -1021,13 +2080,23 @@ pub mod leader_port {
|
|
|
1021
2080
|
/// `runtime.attach_leader(...)` 的 CLI `--json` 投影。
|
|
1022
2081
|
pub fn attach_leader(
|
|
1023
2082
|
workspace: &Path,
|
|
2083
|
+
team: Option<&str>,
|
|
1024
2084
|
pane: Option<&crate::transport::PaneId>,
|
|
1025
2085
|
provider: crate::provider::Provider,
|
|
2086
|
+
_confirm: bool,
|
|
1026
2087
|
) -> Result<Value, CliError> {
|
|
1027
2088
|
let result = crate::leader::attach_leader(workspace, pane, provider)
|
|
1028
2089
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
1029
|
-
let requeued =
|
|
1030
|
-
|
|
2090
|
+
let requeued =
|
|
2091
|
+
attach_requeued_exhausted_watchers(workspace, result.bound_pane_id.as_ref())?;
|
|
2092
|
+
let mut value = attach_lease_value(result, requeued);
|
|
2093
|
+
if let Some(obj) = value.as_object_mut() {
|
|
2094
|
+
if let Some(team) = team {
|
|
2095
|
+
obj.insert("team".to_string(), json!(team));
|
|
2096
|
+
obj.insert("team_key".to_string(), json!(team));
|
|
2097
|
+
}
|
|
2098
|
+
}
|
|
2099
|
+
Ok(value)
|
|
1031
2100
|
}
|
|
1032
2101
|
|
|
1033
2102
|
/// `runtime.leader_identity(workspace, team)`(`cmd_identity`)。
|
|
@@ -1074,12 +2143,16 @@ pub mod leader_port {
|
|
|
1074
2143
|
None
|
|
1075
2144
|
}
|
|
1076
2145
|
}
|
|
1077
|
-
None => Some(TeamKey::new(crate::state::projection::team_state_key(
|
|
2146
|
+
None => Some(TeamKey::new(crate::state::projection::team_state_key(
|
|
2147
|
+
state,
|
|
2148
|
+
))),
|
|
1078
2149
|
}
|
|
1079
2150
|
}
|
|
1080
2151
|
|
|
1081
2152
|
fn positive_caller_pane_env_present() -> bool {
|
|
1082
|
-
std::env::var("TMUX_PANE")
|
|
2153
|
+
std::env::var("TMUX_PANE")
|
|
2154
|
+
.ok()
|
|
2155
|
+
.is_some_and(|pane| !pane.is_empty())
|
|
1083
2156
|
|| std::env::var("TEAM_AGENT_LEADER_PANE_ID")
|
|
1084
2157
|
.ok()
|
|
1085
2158
|
.is_some_and(|pane| !pane.is_empty())
|
|
@@ -1117,7 +2190,10 @@ pub mod leader_port {
|
|
|
1117
2190
|
fn lease_value(result: crate::leader::LeaseResult) -> Value {
|
|
1118
2191
|
let mut out = serde_json::Map::new();
|
|
1119
2192
|
out.insert("ok".to_string(), json!(result.ok));
|
|
1120
|
-
out.insert(
|
|
2193
|
+
out.insert(
|
|
2194
|
+
"status".to_string(),
|
|
2195
|
+
json!(lease_status_wire(result.status)),
|
|
2196
|
+
);
|
|
1121
2197
|
if let Some(reason) = result.reason {
|
|
1122
2198
|
out.insert("reason".to_string(), json!(lease_reason_wire(reason)));
|
|
1123
2199
|
}
|
|
@@ -1131,10 +2207,16 @@ pub mod leader_port {
|
|
|
1131
2207
|
out.insert("bound_pane_id".to_string(), json!(pane.as_str()));
|
|
1132
2208
|
}
|
|
1133
2209
|
if let Some(receiver) = result.receiver {
|
|
1134
|
-
out.insert(
|
|
2210
|
+
out.insert(
|
|
2211
|
+
"leader_receiver".to_string(),
|
|
2212
|
+
serde_json::to_value(receiver).unwrap_or(Value::Null),
|
|
2213
|
+
);
|
|
1135
2214
|
}
|
|
1136
2215
|
if let Some(owner) = result.owner {
|
|
1137
|
-
out.insert(
|
|
2216
|
+
out.insert(
|
|
2217
|
+
"team_owner".to_string(),
|
|
2218
|
+
serde_json::to_value(owner).unwrap_or(Value::Null),
|
|
2219
|
+
);
|
|
1138
2220
|
}
|
|
1139
2221
|
Value::Object(out)
|
|
1140
2222
|
}
|
|
@@ -1178,7 +2260,10 @@ pub mod leader_port {
|
|
|
1178
2260
|
/// STRING list. (Current divergent body — the `requeued` Vec<WatcherNotice> objects — kept until
|
|
1179
2261
|
/// porter-c ports; pinned RED in cli::tests asserts the golden string list.)
|
|
1180
2262
|
pub(crate) fn project_requeued_exhausted_watchers(event: &Value) -> Value {
|
|
1181
|
-
event
|
|
2263
|
+
event
|
|
2264
|
+
.get("watcher_ids")
|
|
2265
|
+
.cloned()
|
|
2266
|
+
.unwrap_or_else(|| json!([]))
|
|
1182
2267
|
}
|
|
1183
2268
|
|
|
1184
2269
|
fn lease_status_wire(status: crate::leader::LeaseStatus) -> &'static str {
|