@team-agent/installer 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/adapters.rs +38 -7
- package/crates/team-agent/src/cli/emit.rs +182 -54
- package/crates/team-agent/src/cli/mod.rs +703 -35
- package/crates/team-agent/src/cli/status_port.rs +170 -44
- package/crates/team-agent/src/cli/tests/run_delegation.rs +2 -0
- package/crates/team-agent/src/cli/types.rs +1 -0
- package/crates/team-agent/src/coordinator/health.rs +130 -0
- package/crates/team-agent/src/leader/lease.rs +23 -2
- package/crates/team-agent/src/leader/rediscover/tests.rs +1 -0
- package/crates/team-agent/src/leader/rediscover.rs +2 -0
- package/crates/team-agent/src/leader/tests/byte_findings.rs +9 -6
- package/crates/team-agent/src/leader/tests/idle.rs +1 -0
- package/crates/team-agent/src/leader/tests/lease_claim.rs +157 -0
- package/crates/team-agent/src/leader/types.rs +2 -0
- package/crates/team-agent/src/lifecycle/launch.rs +554 -65
- package/crates/team-agent/src/lifecycle/restart/common.rs +65 -0
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +57 -15
- package/crates/team-agent/src/lifecycle/restart/remove.rs +5 -1
- package/crates/team-agent/src/lifecycle/restart.rs +20 -0
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +52 -0
- package/crates/team-agent/src/lifecycle/types.rs +25 -0
- package/crates/team-agent/src/mcp_server/tests/wire.rs +28 -0
- package/crates/team-agent/src/mcp_server/wire.rs +81 -1
- package/crates/team-agent/src/messaging/delivery.rs +574 -12
- package/crates/team-agent/src/messaging/leader_receiver.rs +26 -37
- package/crates/team-agent/src/messaging/mod.rs +1 -1
- package/crates/team-agent/src/messaging/results.rs +218 -49
- package/crates/team-agent/src/messaging/send.rs +15 -19
- package/crates/team-agent/src/provider/adapter.rs +95 -10
- package/crates/team-agent/src/provider/helpers.rs +10 -1
- package/crates/team-agent/src/state/identity.rs +3 -0
- package/crates/team-agent/src/state/persist.rs +113 -1
- package/crates/team-agent/src/state/projection.rs +127 -3
- package/crates/team-agent/src/tmux_backend/tests.rs +179 -0
- package/crates/team-agent/src/tmux_backend.rs +124 -12
- package/npm/install.mjs +29 -7
- package/package.json +4 -4
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
// §10:CLI 命令实现层禁 unwrap/expect/panic(unimplemented!() stub 不被拦);tests 子模块各自 allow。
|
|
29
29
|
#![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
|
|
30
30
|
|
|
31
|
+
use std::io::Read;
|
|
31
32
|
use std::path::{Path, PathBuf};
|
|
32
33
|
|
|
33
34
|
use serde::{Deserialize, Serialize};
|
|
@@ -91,8 +92,7 @@ pub mod lifecycle_port {
|
|
|
91
92
|
yes: bool,
|
|
92
93
|
fresh: bool,
|
|
93
94
|
) -> Result<Value, CliError> {
|
|
94
|
-
|
|
95
|
-
match crate::lifecycle::quick_start(agents_dir, name, yes, fresh, team_id) {
|
|
95
|
+
match crate::lifecycle::quick_start_in_workspace(workspace, agents_dir, name, yes, fresh, team_id) {
|
|
96
96
|
Ok(report) => Ok(quick_start_value(report)),
|
|
97
97
|
Err(e) => Ok(error_value(e)),
|
|
98
98
|
}
|
|
@@ -121,13 +121,29 @@ pub mod lifecycle_port {
|
|
|
121
121
|
}
|
|
122
122
|
/// `runtime.shutdown`(`cmd_shutdown`)。
|
|
123
123
|
pub fn shutdown(workspace: &Path, keep_logs: bool, team: Option<&str>) -> Result<Value, CliError> {
|
|
124
|
-
// CP-1: workspace-bound backend so kill-session hits the per-team `tmux -L <socket>` server,
|
|
125
|
-
// then tear that server down so the per-team socket does not orphan (best-effort).
|
|
126
124
|
let run_ws = crate::model::paths::canonical_run_workspace(workspace)
|
|
127
125
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
128
|
-
let
|
|
129
|
-
let
|
|
130
|
-
transport
|
|
126
|
+
let state = shutdown_state_for_team(&run_ws, team)?;
|
|
127
|
+
let endpoint = stored_tmux_endpoint(&state);
|
|
128
|
+
let transport = match endpoint {
|
|
129
|
+
Some(endpoint) if Path::new(endpoint).is_absolute() => {
|
|
130
|
+
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
|
|
131
|
+
}
|
|
132
|
+
Some(endpoint) if !endpoint.is_empty() => {
|
|
133
|
+
crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
|
|
134
|
+
}
|
|
135
|
+
_ => shutdown_workspace_transport(&run_ws),
|
|
136
|
+
};
|
|
137
|
+
let result = shutdown_with_transport_and_state(
|
|
138
|
+
workspace,
|
|
139
|
+
keep_logs,
|
|
140
|
+
team,
|
|
141
|
+
&transport,
|
|
142
|
+
Some(state),
|
|
143
|
+
);
|
|
144
|
+
if team.is_none() {
|
|
145
|
+
transport.kill_server();
|
|
146
|
+
}
|
|
131
147
|
result
|
|
132
148
|
}
|
|
133
149
|
|
|
@@ -137,27 +153,107 @@ pub mod lifecycle_port {
|
|
|
137
153
|
team: Option<&str>,
|
|
138
154
|
transport: &dyn crate::transport::Transport,
|
|
139
155
|
) -> Result<Value, CliError> {
|
|
140
|
-
|
|
141
|
-
|
|
156
|
+
shutdown_with_transport_and_state(workspace, keep_logs, team, transport, None)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
fn shutdown_with_transport_and_state(
|
|
160
|
+
workspace: &Path,
|
|
161
|
+
keep_logs: bool,
|
|
162
|
+
team: Option<&str>,
|
|
163
|
+
transport: &dyn crate::transport::Transport,
|
|
164
|
+
state: Option<Value>,
|
|
165
|
+
) -> Result<Value, CliError> {
|
|
166
|
+
let run_workspace = crate::model::paths::canonical_run_workspace(workspace)
|
|
167
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
168
|
+
let stopped = if team.is_none() {
|
|
169
|
+
let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
|
|
170
|
+
Some(
|
|
171
|
+
crate::coordinator::stop_coordinator(&wp)
|
|
172
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?,
|
|
173
|
+
)
|
|
174
|
+
} else {
|
|
175
|
+
None
|
|
176
|
+
};
|
|
177
|
+
let mut state = match state {
|
|
178
|
+
Some(state) => state,
|
|
179
|
+
None => shutdown_state_for_team(&run_workspace, team)?,
|
|
180
|
+
};
|
|
181
|
+
let stored_transport = stored_tmux_endpoint(&state).map(tmux_transport_for_endpoint);
|
|
182
|
+
let transport = stored_transport
|
|
183
|
+
.as_ref()
|
|
184
|
+
.map(|transport| transport as &dyn crate::transport::Transport)
|
|
185
|
+
.unwrap_or(transport);
|
|
186
|
+
let captured_missing_sessions = crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
|
|
142
187
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
143
|
-
let mut state = crate::state::persist::load_runtime_state(workspace)?;
|
|
144
188
|
let session_name = state
|
|
145
189
|
.get("session_name")
|
|
146
190
|
.and_then(Value::as_str)
|
|
147
191
|
.filter(|s| !s.is_empty())
|
|
148
192
|
.map(crate::transport::SessionName::new);
|
|
149
|
-
let
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
193
|
+
let mut root_pids = state_process_roots(&state);
|
|
194
|
+
let pane_pids = session_name
|
|
195
|
+
.as_ref()
|
|
196
|
+
.map(|session| pane_pids_for_session(transport, session))
|
|
197
|
+
.unwrap_or_default();
|
|
198
|
+
root_pids.extend(pane_pids);
|
|
199
|
+
root_pids.sort_unstable();
|
|
200
|
+
root_pids.dedup();
|
|
201
|
+
let root_pgids = process_pgids(&root_pids);
|
|
202
|
+
for pid in &root_pids {
|
|
203
|
+
reap_process_tree(*pid);
|
|
204
|
+
}
|
|
205
|
+
reap_process_groups(&root_pgids);
|
|
206
|
+
let mut kill_error: Option<String> = None;
|
|
207
|
+
if let Some(session) = session_name.as_ref() {
|
|
208
|
+
if let Err(error) = transport.kill_session(session) {
|
|
209
|
+
if !tmux_absent_error(&error.to_string()) {
|
|
210
|
+
kill_error = Some(error.to_string());
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
reap_workspace_process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
|
|
215
|
+
let session_residuals = if let Some(session) = session_name.as_ref() {
|
|
216
|
+
let (residuals, error) = session_residuals_after_reap(
|
|
217
|
+
transport,
|
|
218
|
+
&run_workspace,
|
|
219
|
+
session,
|
|
220
|
+
!captured_missing_sessions,
|
|
221
|
+
);
|
|
222
|
+
if let Some(error) = error {
|
|
223
|
+
kill_error.get_or_insert(error);
|
|
154
224
|
}
|
|
225
|
+
residuals
|
|
155
226
|
} else {
|
|
156
|
-
|
|
227
|
+
Vec::new()
|
|
157
228
|
};
|
|
229
|
+
let process_residuals = process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
|
|
230
|
+
let session_killed = session_name.is_some()
|
|
231
|
+
&& kill_error.is_none()
|
|
232
|
+
&& session_residuals.is_empty()
|
|
233
|
+
&& process_residuals.is_empty();
|
|
158
234
|
mark_agents_stopped(&mut state);
|
|
159
|
-
|
|
160
|
-
|
|
235
|
+
if team.is_some() {
|
|
236
|
+
crate::state::projection::save_team_scoped_state(&run_workspace, &state)?;
|
|
237
|
+
} else {
|
|
238
|
+
crate::state::persist::save_runtime_state(&run_workspace, &state)?;
|
|
239
|
+
}
|
|
240
|
+
let coordinator_status = stopped
|
|
241
|
+
.as_ref()
|
|
242
|
+
.map(|stopped| stop_status_wire(stopped.status))
|
|
243
|
+
.unwrap_or("not_stopped");
|
|
244
|
+
let coordinator_pid = stopped.as_ref().and_then(|stopped| stopped.pid.map(|p| p.get()));
|
|
245
|
+
let ok = stopped.as_ref().map(|stopped| stopped.ok).unwrap_or(true)
|
|
246
|
+
&& kill_error.is_none()
|
|
247
|
+
&& session_residuals.is_empty()
|
|
248
|
+
&& process_residuals.is_empty();
|
|
249
|
+
let status = if ok {
|
|
250
|
+
"ok"
|
|
251
|
+
} else if kill_error.is_some() {
|
|
252
|
+
"failed"
|
|
253
|
+
} else {
|
|
254
|
+
"partial"
|
|
255
|
+
};
|
|
256
|
+
let _event = crate::event_log::EventLog::new(&run_workspace)
|
|
161
257
|
.write(
|
|
162
258
|
"lifecycle.shutdown",
|
|
163
259
|
json!({
|
|
@@ -165,22 +261,531 @@ pub mod lifecycle_port {
|
|
|
165
261
|
"team": team,
|
|
166
262
|
"session_name": session_name.as_ref().map(|s| s.as_str().to_string()),
|
|
167
263
|
"session_killed": session_killed,
|
|
168
|
-
"coordinator_status":
|
|
264
|
+
"coordinator_status": coordinator_status,
|
|
265
|
+
"status": status,
|
|
169
266
|
}),
|
|
170
267
|
)
|
|
171
268
|
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
172
269
|
Ok(json!({
|
|
173
|
-
"ok":
|
|
270
|
+
"ok": ok,
|
|
271
|
+
"status": status,
|
|
174
272
|
"keep_logs": keep_logs,
|
|
175
273
|
"team": team,
|
|
176
274
|
"session_name": session_name.map(|s| s.as_str().to_string()),
|
|
177
275
|
"session_killed": session_killed,
|
|
276
|
+
"residuals": {
|
|
277
|
+
"sessions": session_residuals,
|
|
278
|
+
"processes": process_residuals,
|
|
279
|
+
},
|
|
280
|
+
"error": kill_error,
|
|
178
281
|
"coordinator": {
|
|
179
|
-
"status":
|
|
180
|
-
"pid":
|
|
282
|
+
"status": coordinator_status,
|
|
283
|
+
"pid": coordinator_pid,
|
|
181
284
|
}
|
|
182
285
|
}))
|
|
183
286
|
}
|
|
287
|
+
|
|
288
|
+
fn shutdown_state_for_team(workspace: &Path, team: Option<&str>) -> Result<Value, CliError> {
|
|
289
|
+
if let Some(team) = team {
|
|
290
|
+
crate::state::projection::select_runtime_state(workspace, Some(team)).map_err(CliError::from)
|
|
291
|
+
} else {
|
|
292
|
+
crate::state::persist::load_runtime_state(workspace).map_err(CliError::from)
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
fn shutdown_workspace_transport(workspace: &Path) -> crate::tmux_backend::TmuxBackend {
|
|
297
|
+
crate::tmux_backend::TmuxBackend::for_workspace(workspace)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
fn tmux_transport_for_endpoint(endpoint: &str) -> crate::tmux_backend::TmuxBackend {
|
|
301
|
+
if Path::new(endpoint).is_absolute() {
|
|
302
|
+
crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
|
|
303
|
+
} else {
|
|
304
|
+
crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
fn stored_tmux_endpoint(state: &Value) -> Option<&str> {
|
|
309
|
+
leader_receiver_tmux_socket(state)
|
|
310
|
+
.or_else(|| active_team_entry(state).and_then(leader_receiver_tmux_socket))
|
|
311
|
+
.or_else(|| only_team_entry(state).and_then(leader_receiver_tmux_socket))
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
fn leader_receiver_tmux_socket(state: &Value) -> Option<&str> {
|
|
315
|
+
state
|
|
316
|
+
.get("leader_receiver")
|
|
317
|
+
.and_then(|receiver| receiver.get("tmux_socket"))
|
|
318
|
+
.and_then(Value::as_str)
|
|
319
|
+
.filter(|socket| !socket.is_empty())
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
fn active_team_entry(state: &Value) -> Option<&Value> {
|
|
323
|
+
let active = state
|
|
324
|
+
.get("active_team_key")
|
|
325
|
+
.and_then(Value::as_str)
|
|
326
|
+
.filter(|team| !team.is_empty())?;
|
|
327
|
+
state
|
|
328
|
+
.get("teams")
|
|
329
|
+
.and_then(Value::as_object)
|
|
330
|
+
.and_then(|teams| teams.get(active))
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
fn only_team_entry(state: &Value) -> Option<&Value> {
|
|
334
|
+
let teams = state.get("teams").and_then(Value::as_object)?;
|
|
335
|
+
if teams.len() == 1 {
|
|
336
|
+
teams.values().next()
|
|
337
|
+
} else {
|
|
338
|
+
None
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
fn pane_pids_for_session(
|
|
343
|
+
transport: &dyn crate::transport::Transport,
|
|
344
|
+
session: &crate::transport::SessionName,
|
|
345
|
+
) -> Vec<u32> {
|
|
346
|
+
transport
|
|
347
|
+
.list_targets()
|
|
348
|
+
.unwrap_or_default()
|
|
349
|
+
.into_iter()
|
|
350
|
+
.filter(|pane| pane.session.as_str() == session.as_str())
|
|
351
|
+
.filter_map(|pane| pane.pane_pid)
|
|
352
|
+
.collect()
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
fn session_residuals_after_reap(
|
|
356
|
+
transport: &dyn crate::transport::Transport,
|
|
357
|
+
workspace: &Path,
|
|
358
|
+
session: &crate::transport::SessionName,
|
|
359
|
+
check_primary_transport: bool,
|
|
360
|
+
) -> (Vec<String>, Option<String>) {
|
|
361
|
+
let mut residual = false;
|
|
362
|
+
let mut error = None;
|
|
363
|
+
if check_primary_transport {
|
|
364
|
+
match transport.has_session(session) {
|
|
365
|
+
Ok(true) => residual = true,
|
|
366
|
+
Ok(false) => {}
|
|
367
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
368
|
+
Err(err) => {
|
|
369
|
+
error = Some(err.to_string());
|
|
370
|
+
residual = true;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
let workspace_transport = shutdown_workspace_transport(workspace);
|
|
375
|
+
match crate::transport::Transport::has_session(&workspace_transport, session) {
|
|
376
|
+
Ok(true) => residual = true,
|
|
377
|
+
Ok(false) => {}
|
|
378
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
379
|
+
Err(err) => {
|
|
380
|
+
error.get_or_insert_with(|| err.to_string());
|
|
381
|
+
residual = true;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
let default_transport = crate::tmux_backend::TmuxBackend::new();
|
|
385
|
+
match crate::transport::Transport::has_session(&default_transport, session) {
|
|
386
|
+
Ok(true) => residual = true,
|
|
387
|
+
Ok(false) => {}
|
|
388
|
+
Err(err) if tmux_absent_error(&err.to_string()) => {}
|
|
389
|
+
Err(err) => {
|
|
390
|
+
error.get_or_insert_with(|| err.to_string());
|
|
391
|
+
residual = true;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
let sessions = if residual {
|
|
395
|
+
vec![session.as_str().to_string()]
|
|
396
|
+
} else {
|
|
397
|
+
Vec::new()
|
|
398
|
+
};
|
|
399
|
+
(sessions, error)
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
fn state_process_roots(state: &Value) -> Vec<u32> {
|
|
403
|
+
let mut out = Vec::new();
|
|
404
|
+
collect_agent_process_roots(state, &mut out);
|
|
405
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
406
|
+
for team in teams.values() {
|
|
407
|
+
collect_agent_process_roots(team, &mut out);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
out.sort_unstable();
|
|
411
|
+
out.dedup();
|
|
412
|
+
out
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
fn collect_agent_process_roots(state: &Value, out: &mut Vec<u32>) {
|
|
416
|
+
let Some(agents) = state.get("agents").and_then(Value::as_object) else {
|
|
417
|
+
return;
|
|
418
|
+
};
|
|
419
|
+
for agent in agents.values() {
|
|
420
|
+
for key in ["provider_pid", "process_id", "pid", "child_pid", "pane_pid"] {
|
|
421
|
+
if let Some(pid) = agent.get(key).and_then(value_u32) {
|
|
422
|
+
out.push(pid);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
fn value_u32(value: &Value) -> Option<u32> {
|
|
429
|
+
value
|
|
430
|
+
.as_u64()
|
|
431
|
+
.and_then(|pid| u32::try_from(pid).ok())
|
|
432
|
+
.or_else(|| value.as_str().and_then(|pid| pid.parse::<u32>().ok()))
|
|
433
|
+
.filter(|pid| *pid > 0)
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
fn reap_process_tree(root_pid: u32) {
|
|
437
|
+
let pids = process_tree_pids(root_pid);
|
|
438
|
+
for pid in pids.iter().rev() {
|
|
439
|
+
send_process_signal(*pid, libc::SIGTERM);
|
|
440
|
+
}
|
|
441
|
+
std::thread::sleep(std::time::Duration::from_millis(150));
|
|
442
|
+
for pid in pids.iter().rev() {
|
|
443
|
+
send_process_signal(*pid, libc::SIGKILL);
|
|
444
|
+
}
|
|
445
|
+
wait_for_processes_gone(&pids, std::time::Duration::from_secs(1));
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
fn reap_process_groups(pgids: &[u32]) {
|
|
449
|
+
let current_pgid = unsafe { libc::getpgrp() };
|
|
450
|
+
for pgid in pgids {
|
|
451
|
+
let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
|
|
452
|
+
continue;
|
|
453
|
+
};
|
|
454
|
+
if pgid_t <= 1 || pgid_t == current_pgid {
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
send_process_signal_group(pgid_t, libc::SIGTERM);
|
|
458
|
+
}
|
|
459
|
+
std::thread::sleep(std::time::Duration::from_millis(150));
|
|
460
|
+
for pgid in pgids {
|
|
461
|
+
let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
|
|
462
|
+
continue;
|
|
463
|
+
};
|
|
464
|
+
if pgid_t <= 1 || pgid_t == current_pgid {
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
send_process_signal_group(pgid_t, libc::SIGKILL);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
fn reap_workspace_process_residuals(
|
|
472
|
+
workspace: &Path,
|
|
473
|
+
state: &Value,
|
|
474
|
+
root_pids: &[u32],
|
|
475
|
+
root_pgids: &[u32],
|
|
476
|
+
) {
|
|
477
|
+
for _ in 0..5 {
|
|
478
|
+
let residuals = matched_processes(workspace, state, root_pids, root_pgids);
|
|
479
|
+
if residuals.is_empty() {
|
|
480
|
+
return;
|
|
481
|
+
}
|
|
482
|
+
for process in &residuals {
|
|
483
|
+
reap_process_tree(process.pid);
|
|
484
|
+
}
|
|
485
|
+
let pgids = residuals
|
|
486
|
+
.iter()
|
|
487
|
+
.filter_map(|process| process.pgid)
|
|
488
|
+
.collect::<Vec<_>>();
|
|
489
|
+
reap_process_groups(&pgids);
|
|
490
|
+
std::thread::sleep(std::time::Duration::from_millis(100));
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
fn process_tree_pids(root_pid: u32) -> Vec<u32> {
|
|
495
|
+
if root_pid == 0 {
|
|
496
|
+
return Vec::new();
|
|
497
|
+
}
|
|
498
|
+
let pairs = process_parent_pairs();
|
|
499
|
+
let mut out = vec![root_pid];
|
|
500
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
501
|
+
seen.insert(root_pid);
|
|
502
|
+
let mut index = 0;
|
|
503
|
+
while index < out.len() {
|
|
504
|
+
let parent = out[index];
|
|
505
|
+
for (pid, ppid) in &pairs {
|
|
506
|
+
if *ppid == parent && seen.insert(*pid) {
|
|
507
|
+
out.push(*pid);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
index += 1;
|
|
511
|
+
}
|
|
512
|
+
out
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
fn process_parent_pairs() -> Vec<(u32, u32)> {
|
|
516
|
+
let output = match std::process::Command::new("ps")
|
|
517
|
+
.args(["-axo", "pid=,ppid="])
|
|
518
|
+
.output()
|
|
519
|
+
{
|
|
520
|
+
Ok(output) if output.status.success() => output,
|
|
521
|
+
_ => return Vec::new(),
|
|
522
|
+
};
|
|
523
|
+
String::from_utf8_lossy(&output.stdout)
|
|
524
|
+
.lines()
|
|
525
|
+
.filter_map(|line| {
|
|
526
|
+
let mut parts = line.split_whitespace();
|
|
527
|
+
let pid = parts.next()?.parse::<u32>().ok()?;
|
|
528
|
+
let ppid = parts.next()?.parse::<u32>().ok()?;
|
|
529
|
+
Some((pid, ppid))
|
|
530
|
+
})
|
|
531
|
+
.collect()
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
fn process_table() -> Vec<ProcessInfo> {
|
|
535
|
+
let output = match std::process::Command::new("ps")
|
|
536
|
+
.args(["-axo", "pid=,ppid=,pgid=,command="])
|
|
537
|
+
.output()
|
|
538
|
+
{
|
|
539
|
+
Ok(output) if output.status.success() => output,
|
|
540
|
+
_ => return Vec::new(),
|
|
541
|
+
};
|
|
542
|
+
String::from_utf8_lossy(&output.stdout)
|
|
543
|
+
.lines()
|
|
544
|
+
.filter_map(parse_process_info)
|
|
545
|
+
.collect()
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
fn parse_process_info(line: &str) -> Option<ProcessInfo> {
|
|
549
|
+
let mut parts = line.split_whitespace();
|
|
550
|
+
let pid = parts.next()?.parse::<u32>().ok()?;
|
|
551
|
+
let ppid = parts.next()?.parse::<u32>().ok()?;
|
|
552
|
+
let pgid = parts.next().and_then(|raw| raw.parse::<u32>().ok());
|
|
553
|
+
let command = parts.collect::<Vec<_>>().join(" ");
|
|
554
|
+
Some(ProcessInfo {
|
|
555
|
+
pid,
|
|
556
|
+
ppid,
|
|
557
|
+
pgid,
|
|
558
|
+
command,
|
|
559
|
+
})
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
#[derive(Clone, Debug)]
|
|
563
|
+
struct ProcessInfo {
|
|
564
|
+
pid: u32,
|
|
565
|
+
ppid: u32,
|
|
566
|
+
pgid: Option<u32>,
|
|
567
|
+
command: String,
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
fn send_process_signal(pid: u32, signal: libc::c_int) {
|
|
571
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
572
|
+
return;
|
|
573
|
+
};
|
|
574
|
+
unsafe {
|
|
575
|
+
libc::kill(pid_t, signal);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
fn send_process_signal_group(pgid: libc::pid_t, signal: libc::c_int) {
|
|
580
|
+
unsafe {
|
|
581
|
+
libc::kill(-pgid, signal);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
fn wait_for_processes_gone(pids: &[u32], timeout: std::time::Duration) {
|
|
586
|
+
let start = std::time::Instant::now();
|
|
587
|
+
loop {
|
|
588
|
+
for pid in pids {
|
|
589
|
+
reap_child_if_possible(*pid);
|
|
590
|
+
}
|
|
591
|
+
if !pids.iter().any(|pid| process_is_live(*pid)) || start.elapsed() >= timeout {
|
|
592
|
+
return;
|
|
593
|
+
}
|
|
594
|
+
std::thread::sleep(std::time::Duration::from_millis(25));
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
fn reap_child_if_possible(pid: u32) {
|
|
599
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
600
|
+
return;
|
|
601
|
+
};
|
|
602
|
+
let mut status = 0;
|
|
603
|
+
unsafe {
|
|
604
|
+
libc::waitpid(pid_t, &mut status, libc::WNOHANG);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
fn process_is_live(pid: u32) -> bool {
|
|
609
|
+
let Ok(pid_t) = libc::pid_t::try_from(pid) else {
|
|
610
|
+
return false;
|
|
611
|
+
};
|
|
612
|
+
let rc = unsafe { libc::kill(pid_t, 0) };
|
|
613
|
+
if rc == 0 {
|
|
614
|
+
return true;
|
|
615
|
+
}
|
|
616
|
+
let err = std::io::Error::last_os_error();
|
|
617
|
+
err.raw_os_error() == Some(libc::EPERM)
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
fn process_pgids(pids: &[u32]) -> Vec<u32> {
|
|
621
|
+
let table = process_table();
|
|
622
|
+
let current_pgid = unsafe { libc::getpgrp() };
|
|
623
|
+
let mut pgids = pids
|
|
624
|
+
.iter()
|
|
625
|
+
.filter_map(|pid| table.iter().find(|process| process.pid == *pid))
|
|
626
|
+
.filter_map(|process| process.pgid)
|
|
627
|
+
.filter(|pgid| {
|
|
628
|
+
libc::pid_t::try_from(*pgid)
|
|
629
|
+
.map(|pgid| pgid > 1 && pgid != current_pgid)
|
|
630
|
+
.unwrap_or(false)
|
|
631
|
+
})
|
|
632
|
+
.collect::<Vec<_>>();
|
|
633
|
+
pgids.sort_unstable();
|
|
634
|
+
pgids.dedup();
|
|
635
|
+
pgids
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
fn process_residuals(
|
|
639
|
+
workspace: &Path,
|
|
640
|
+
state: &Value,
|
|
641
|
+
root_pids: &[u32],
|
|
642
|
+
root_pgids: &[u32],
|
|
643
|
+
) -> Vec<Value> {
|
|
644
|
+
let mut residuals = matched_processes(workspace, state, root_pids, root_pgids);
|
|
645
|
+
let mut seen = residuals.iter().map(|process| process.pid).collect::<std::collections::BTreeSet<_>>();
|
|
646
|
+
for pid in root_pids {
|
|
647
|
+
if process_is_live(*pid) && seen.insert(*pid) {
|
|
648
|
+
residuals.push(ProcessInfo {
|
|
649
|
+
pid: *pid,
|
|
650
|
+
ppid: 0,
|
|
651
|
+
pgid: None,
|
|
652
|
+
command: String::new(),
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
residuals
|
|
657
|
+
.into_iter()
|
|
658
|
+
.map(|process| {
|
|
659
|
+
json!({
|
|
660
|
+
"pid": process.pid,
|
|
661
|
+
"ppid": process.ppid,
|
|
662
|
+
"pgid": process.pgid,
|
|
663
|
+
"command": process.command,
|
|
664
|
+
})
|
|
665
|
+
})
|
|
666
|
+
.collect()
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
fn matched_processes(
|
|
670
|
+
workspace: &Path,
|
|
671
|
+
state: &Value,
|
|
672
|
+
root_pids: &[u32],
|
|
673
|
+
root_pgids: &[u32],
|
|
674
|
+
) -> Vec<ProcessInfo> {
|
|
675
|
+
let table = process_table();
|
|
676
|
+
let root_tree = root_pids
|
|
677
|
+
.iter()
|
|
678
|
+
.flat_map(|pid| process_tree_from_table(*pid, &table))
|
|
679
|
+
.collect::<std::collections::BTreeSet<_>>();
|
|
680
|
+
let root_pgids = root_pgids.iter().copied().collect::<std::collections::BTreeSet<_>>();
|
|
681
|
+
let spawn_cwds = state_spawn_cwds(state);
|
|
682
|
+
let workspace_text = workspace.to_string_lossy().to_string();
|
|
683
|
+
let current_pid = std::process::id();
|
|
684
|
+
table
|
|
685
|
+
.into_iter()
|
|
686
|
+
.filter(|process| process.pid != current_pid)
|
|
687
|
+
.filter(|process| {
|
|
688
|
+
process_matches_workspace(process, &workspace_text, &spawn_cwds)
|
|
689
|
+
|| root_tree.contains(&process.pid)
|
|
690
|
+
|| process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
|
|
691
|
+
})
|
|
692
|
+
.collect()
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
fn process_tree_from_table(root_pid: u32, table: &[ProcessInfo]) -> Vec<u32> {
|
|
696
|
+
if root_pid == 0 {
|
|
697
|
+
return Vec::new();
|
|
698
|
+
}
|
|
699
|
+
let mut out = vec![root_pid];
|
|
700
|
+
let mut seen = std::collections::BTreeSet::new();
|
|
701
|
+
seen.insert(root_pid);
|
|
702
|
+
let mut index = 0;
|
|
703
|
+
while index < out.len() {
|
|
704
|
+
let parent = out[index];
|
|
705
|
+
for process in table {
|
|
706
|
+
if process.ppid == parent && seen.insert(process.pid) {
|
|
707
|
+
out.push(process.pid);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
index += 1;
|
|
711
|
+
}
|
|
712
|
+
out
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
fn state_spawn_cwds(state: &Value) -> Vec<PathBuf> {
|
|
716
|
+
let mut out = Vec::new();
|
|
717
|
+
collect_spawn_cwds(state, &mut out);
|
|
718
|
+
if let Some(teams) = state.get("teams").and_then(Value::as_object) {
|
|
719
|
+
for team in teams.values() {
|
|
720
|
+
collect_spawn_cwds(team, &mut out);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
out
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
fn collect_spawn_cwds(state: &Value, out: &mut Vec<PathBuf>) {
|
|
727
|
+
let Some(agents) = state.get("agents").and_then(Value::as_object) else {
|
|
728
|
+
return;
|
|
729
|
+
};
|
|
730
|
+
for agent in agents.values() {
|
|
731
|
+
if let Some(spawn_cwd) = agent.get("spawn_cwd").and_then(Value::as_str).filter(|cwd| !cwd.is_empty()) {
|
|
732
|
+
out.push(PathBuf::from(spawn_cwd));
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
fn process_matches_workspace(
|
|
738
|
+
process: &ProcessInfo,
|
|
739
|
+
workspace_text: &str,
|
|
740
|
+
spawn_cwds: &[PathBuf],
|
|
741
|
+
) -> bool {
|
|
742
|
+
let command = process.command.as_str();
|
|
743
|
+
if command.contains("mcp-server")
|
|
744
|
+
&& command.contains("--workspace")
|
|
745
|
+
&& command.contains(workspace_text)
|
|
746
|
+
{
|
|
747
|
+
return true;
|
|
748
|
+
}
|
|
749
|
+
let lower = command.to_ascii_lowercase();
|
|
750
|
+
let provider_like = lower.contains("codex")
|
|
751
|
+
|| lower.contains("claude")
|
|
752
|
+
|| lower.contains("node")
|
|
753
|
+
|| lower.contains("mcp-server")
|
|
754
|
+
|| lower.contains("team-agent");
|
|
755
|
+
if !provider_like {
|
|
756
|
+
return false;
|
|
757
|
+
}
|
|
758
|
+
if command.contains(workspace_text) {
|
|
759
|
+
return true;
|
|
760
|
+
}
|
|
761
|
+
let Some(cwd) = process_cwd(process.pid) else {
|
|
762
|
+
return false;
|
|
763
|
+
};
|
|
764
|
+
spawn_cwds.iter().any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
fn process_cwd(pid: u32) -> Option<PathBuf> {
|
|
768
|
+
let proc_cwd = PathBuf::from(format!("/proc/{pid}/cwd"));
|
|
769
|
+
if let Ok(path) = std::fs::read_link(proc_cwd) {
|
|
770
|
+
return Some(path);
|
|
771
|
+
}
|
|
772
|
+
let output = std::process::Command::new("lsof")
|
|
773
|
+
.args(["-a", "-p", &pid.to_string(), "-d", "cwd", "-Fn"])
|
|
774
|
+
.output()
|
|
775
|
+
.ok()?;
|
|
776
|
+
if !output.status.success() {
|
|
777
|
+
return None;
|
|
778
|
+
}
|
|
779
|
+
String::from_utf8_lossy(&output.stdout)
|
|
780
|
+
.lines()
|
|
781
|
+
.find_map(|line| line.strip_prefix('n').map(PathBuf::from))
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
fn path_is_under(path: &Path, root: &Path) -> bool {
|
|
785
|
+
let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
|
|
786
|
+
let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
|
|
787
|
+
path == root || path.starts_with(root)
|
|
788
|
+
}
|
|
184
789
|
/// `runtime.restart`(`cmd_restart`)。
|
|
185
790
|
pub fn restart(workspace: &Path, allow_fresh: bool, team: Option<&str>) -> Result<Value, CliError> {
|
|
186
791
|
match crate::lifecycle::restart(workspace, allow_fresh, team) {
|
|
@@ -459,13 +1064,46 @@ pub mod lifecycle_port {
|
|
|
459
1064
|
session_name,
|
|
460
1065
|
launch,
|
|
461
1066
|
next_actions,
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
"
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
1067
|
+
worker_readiness,
|
|
1068
|
+
} => {
|
|
1069
|
+
// BUG-7: never emit bare "ready" while worker tool-load is unverified.
|
|
1070
|
+
// The summary string + a structured `worker_readiness` block tell the
|
|
1071
|
+
// caller exactly which agents are unhealthy (Degraded) or that the
|
|
1072
|
+
// tool-set load has not been confirmed yet (PendingToolLoad).
|
|
1073
|
+
let (summary, ok, readiness_json) = match &worker_readiness {
|
|
1074
|
+
crate::lifecycle::QuickStartReadiness::Degraded { unhealthy_agents } => (
|
|
1075
|
+
format!(
|
|
1076
|
+
"quick-start degraded: {}; unhealthy: {}",
|
|
1077
|
+
session_name.as_str(),
|
|
1078
|
+
unhealthy_agents.join(",")
|
|
1079
|
+
),
|
|
1080
|
+
false,
|
|
1081
|
+
json!({
|
|
1082
|
+
"state": "degraded",
|
|
1083
|
+
"unhealthy_agents": unhealthy_agents,
|
|
1084
|
+
}),
|
|
1085
|
+
),
|
|
1086
|
+
crate::lifecycle::QuickStartReadiness::PendingToolLoad => (
|
|
1087
|
+
format!(
|
|
1088
|
+
"quick-start launched (worker tool load unverified): {}",
|
|
1089
|
+
session_name.as_str()
|
|
1090
|
+
),
|
|
1091
|
+
true,
|
|
1092
|
+
json!({
|
|
1093
|
+
"state": "pending_tool_load",
|
|
1094
|
+
"reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
|
|
1095
|
+
}),
|
|
1096
|
+
),
|
|
1097
|
+
};
|
|
1098
|
+
json!({
|
|
1099
|
+
"ok": ok,
|
|
1100
|
+
"summary": summary,
|
|
1101
|
+
"session_name": session_name.as_str(),
|
|
1102
|
+
"dry_run": launch.dry_run,
|
|
1103
|
+
"next_actions": next_actions,
|
|
1104
|
+
"worker_readiness": readiness_json,
|
|
1105
|
+
})
|
|
1106
|
+
}
|
|
469
1107
|
crate::lifecycle::QuickStartReport::ExistingRuntime {
|
|
470
1108
|
team,
|
|
471
1109
|
session_name,
|
|
@@ -595,35 +1233,65 @@ pub mod diagnose_port {
|
|
|
595
1233
|
|
|
596
1234
|
fn secret_scan(workspace: &Path) -> Value {
|
|
597
1235
|
let mut findings = Vec::new();
|
|
598
|
-
|
|
1236
|
+
let mut scanned = 0usize;
|
|
1237
|
+
scan_secret_dir(workspace, workspace, 0, &mut scanned, &mut findings);
|
|
599
1238
|
json!({
|
|
600
1239
|
"ok": findings.is_empty(),
|
|
601
1240
|
"findings": findings,
|
|
602
1241
|
})
|
|
603
1242
|
}
|
|
604
1243
|
|
|
605
|
-
|
|
1244
|
+
const SECRET_SCAN_MAX_DEPTH: usize = 4;
|
|
1245
|
+
const SECRET_SCAN_MAX_ENTRIES: usize = 512;
|
|
1246
|
+
const SECRET_SCAN_MAX_FILE_BYTES: u64 = 128 * 1024;
|
|
1247
|
+
|
|
1248
|
+
fn scan_secret_dir(root: &Path, dir: &Path, depth: usize, scanned: &mut usize, findings: &mut Vec<Value>) {
|
|
1249
|
+
if depth > SECRET_SCAN_MAX_DEPTH || *scanned >= SECRET_SCAN_MAX_ENTRIES {
|
|
1250
|
+
return;
|
|
1251
|
+
}
|
|
606
1252
|
let Ok(entries) = std::fs::read_dir(dir) else {
|
|
607
1253
|
return;
|
|
608
1254
|
};
|
|
609
1255
|
for entry in entries.flatten() {
|
|
1256
|
+
if *scanned >= SECRET_SCAN_MAX_ENTRIES {
|
|
1257
|
+
return;
|
|
1258
|
+
}
|
|
1259
|
+
*scanned = scanned.saturating_add(1);
|
|
610
1260
|
let path = entry.path();
|
|
611
1261
|
let name = path.file_name().map(|s| s.to_string_lossy());
|
|
612
1262
|
if name.as_deref() == Some(".team") || name.as_deref() == Some(".git") {
|
|
613
1263
|
continue;
|
|
614
1264
|
}
|
|
615
|
-
|
|
616
|
-
|
|
1265
|
+
let Ok(file_type) = entry.file_type() else {
|
|
1266
|
+
continue;
|
|
1267
|
+
};
|
|
1268
|
+
if file_type.is_dir() {
|
|
1269
|
+
scan_secret_dir(root, &path, depth.saturating_add(1), scanned, findings);
|
|
617
1270
|
continue;
|
|
618
1271
|
}
|
|
619
|
-
|
|
1272
|
+
if file_type.is_file() {
|
|
1273
|
+
scan_secret_file(root, &path, findings);
|
|
1274
|
+
}
|
|
620
1275
|
}
|
|
621
1276
|
}
|
|
622
1277
|
|
|
623
1278
|
fn scan_secret_file(root: &Path, path: &Path, findings: &mut Vec<Value>) {
|
|
624
|
-
let Ok(
|
|
1279
|
+
let Ok(metadata) = std::fs::metadata(path) else {
|
|
625
1280
|
return;
|
|
626
1281
|
};
|
|
1282
|
+
if !metadata.is_file() || metadata.len() > SECRET_SCAN_MAX_FILE_BYTES {
|
|
1283
|
+
return;
|
|
1284
|
+
}
|
|
1285
|
+
let Ok(file) = std::fs::File::open(path) else {
|
|
1286
|
+
return;
|
|
1287
|
+
};
|
|
1288
|
+
let mut text = String::new();
|
|
1289
|
+
if std::io::Read::take(file, SECRET_SCAN_MAX_FILE_BYTES)
|
|
1290
|
+
.read_to_string(&mut text)
|
|
1291
|
+
.is_err()
|
|
1292
|
+
{
|
|
1293
|
+
return;
|
|
1294
|
+
}
|
|
627
1295
|
for (idx, line) in text.lines().enumerate() {
|
|
628
1296
|
if line.contains("OPENAI_API_KEY=") || line.contains("ANTHROPIC_API_KEY=") {
|
|
629
1297
|
let rel = path.strip_prefix(root).unwrap_or(path);
|