@team-agent/installer 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/Cargo.lock +1 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/src/cli/adapters.rs +38 -7
  4. package/crates/team-agent/src/cli/emit.rs +182 -54
  5. package/crates/team-agent/src/cli/mod.rs +703 -35
  6. package/crates/team-agent/src/cli/status_port.rs +170 -44
  7. package/crates/team-agent/src/cli/tests/run_delegation.rs +2 -0
  8. package/crates/team-agent/src/cli/types.rs +1 -0
  9. package/crates/team-agent/src/coordinator/health.rs +130 -0
  10. package/crates/team-agent/src/leader/lease.rs +23 -2
  11. package/crates/team-agent/src/leader/rediscover/tests.rs +1 -0
  12. package/crates/team-agent/src/leader/rediscover.rs +2 -0
  13. package/crates/team-agent/src/leader/tests/byte_findings.rs +9 -6
  14. package/crates/team-agent/src/leader/tests/idle.rs +1 -0
  15. package/crates/team-agent/src/leader/tests/lease_claim.rs +157 -0
  16. package/crates/team-agent/src/leader/types.rs +2 -0
  17. package/crates/team-agent/src/lifecycle/launch.rs +554 -65
  18. package/crates/team-agent/src/lifecycle/restart/common.rs +65 -0
  19. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +57 -15
  20. package/crates/team-agent/src/lifecycle/restart/remove.rs +5 -1
  21. package/crates/team-agent/src/lifecycle/restart.rs +20 -0
  22. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +52 -0
  23. package/crates/team-agent/src/lifecycle/types.rs +25 -0
  24. package/crates/team-agent/src/mcp_server/tests/wire.rs +28 -0
  25. package/crates/team-agent/src/mcp_server/wire.rs +81 -1
  26. package/crates/team-agent/src/messaging/delivery.rs +574 -12
  27. package/crates/team-agent/src/messaging/leader_receiver.rs +26 -37
  28. package/crates/team-agent/src/messaging/mod.rs +1 -1
  29. package/crates/team-agent/src/messaging/results.rs +218 -49
  30. package/crates/team-agent/src/messaging/send.rs +15 -19
  31. package/crates/team-agent/src/provider/adapter.rs +95 -10
  32. package/crates/team-agent/src/provider/helpers.rs +10 -1
  33. package/crates/team-agent/src/state/identity.rs +3 -0
  34. package/crates/team-agent/src/state/persist.rs +113 -1
  35. package/crates/team-agent/src/state/projection.rs +127 -3
  36. package/crates/team-agent/src/tmux_backend/tests.rs +179 -0
  37. package/crates/team-agent/src/tmux_backend.rs +124 -12
  38. package/npm/install.mjs +29 -7
  39. package/package.json +4 -4
@@ -28,6 +28,7 @@
28
28
  // §10:CLI 命令实现层禁 unwrap/expect/panic(unimplemented!() stub 不被拦);tests 子模块各自 allow。
29
29
  #![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
30
30
 
31
+ use std::io::Read;
31
32
  use std::path::{Path, PathBuf};
32
33
 
33
34
  use serde::{Deserialize, Serialize};
@@ -91,8 +92,7 @@ pub mod lifecycle_port {
91
92
  yes: bool,
92
93
  fresh: bool,
93
94
  ) -> Result<Value, CliError> {
94
- let _ = workspace;
95
- match crate::lifecycle::quick_start(agents_dir, name, yes, fresh, team_id) {
95
+ match crate::lifecycle::quick_start_in_workspace(workspace, agents_dir, name, yes, fresh, team_id) {
96
96
  Ok(report) => Ok(quick_start_value(report)),
97
97
  Err(e) => Ok(error_value(e)),
98
98
  }
@@ -121,13 +121,29 @@ pub mod lifecycle_port {
121
121
  }
122
122
  /// `runtime.shutdown`(`cmd_shutdown`)。
123
123
  pub fn shutdown(workspace: &Path, keep_logs: bool, team: Option<&str>) -> Result<Value, CliError> {
124
- // CP-1: workspace-bound backend so kill-session hits the per-team `tmux -L <socket>` server,
125
- // then tear that server down so the per-team socket does not orphan (best-effort).
126
124
  let run_ws = crate::model::paths::canonical_run_workspace(workspace)
127
125
  .map_err(|e| CliError::Runtime(e.to_string()))?;
128
- let transport = crate::tmux_backend::TmuxBackend::for_workspace(&run_ws);
129
- let result = shutdown_with_transport(workspace, keep_logs, team, &transport);
130
- transport.kill_server();
126
+ let state = shutdown_state_for_team(&run_ws, team)?;
127
+ let endpoint = stored_tmux_endpoint(&state);
128
+ let transport = match endpoint {
129
+ Some(endpoint) if Path::new(endpoint).is_absolute() => {
130
+ crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
131
+ }
132
+ Some(endpoint) if !endpoint.is_empty() => {
133
+ crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
134
+ }
135
+ _ => shutdown_workspace_transport(&run_ws),
136
+ };
137
+ let result = shutdown_with_transport_and_state(
138
+ workspace,
139
+ keep_logs,
140
+ team,
141
+ &transport,
142
+ Some(state),
143
+ );
144
+ if team.is_none() {
145
+ transport.kill_server();
146
+ }
131
147
  result
132
148
  }
133
149
 
@@ -137,27 +153,107 @@ pub mod lifecycle_port {
137
153
  team: Option<&str>,
138
154
  transport: &dyn crate::transport::Transport,
139
155
  ) -> Result<Value, CliError> {
140
- let wp = crate::coordinator::WorkspacePath::new(workspace.to_path_buf());
141
- let stopped = crate::coordinator::stop_coordinator(&wp)
156
+ shutdown_with_transport_and_state(workspace, keep_logs, team, transport, None)
157
+ }
158
+
159
+ fn shutdown_with_transport_and_state(
160
+ workspace: &Path,
161
+ keep_logs: bool,
162
+ team: Option<&str>,
163
+ transport: &dyn crate::transport::Transport,
164
+ state: Option<Value>,
165
+ ) -> Result<Value, CliError> {
166
+ let run_workspace = crate::model::paths::canonical_run_workspace(workspace)
167
+ .map_err(|e| CliError::Runtime(e.to_string()))?;
168
+ let stopped = if team.is_none() {
169
+ let wp = crate::coordinator::WorkspacePath::new(run_workspace.clone());
170
+ Some(
171
+ crate::coordinator::stop_coordinator(&wp)
172
+ .map_err(|e| CliError::Runtime(e.to_string()))?,
173
+ )
174
+ } else {
175
+ None
176
+ };
177
+ let mut state = match state {
178
+ Some(state) => state,
179
+ None => shutdown_state_for_team(&run_workspace, team)?,
180
+ };
181
+ let stored_transport = stored_tmux_endpoint(&state).map(tmux_transport_for_endpoint);
182
+ let transport = stored_transport
183
+ .as_ref()
184
+ .map(|transport| transport as &dyn crate::transport::Transport)
185
+ .unwrap_or(transport);
186
+ let captured_missing_sessions = crate::lifecycle::restart::refresh_missing_provider_sessions(&mut state)
142
187
  .map_err(|e| CliError::Runtime(e.to_string()))?;
143
- let mut state = crate::state::persist::load_runtime_state(workspace)?;
144
188
  let session_name = state
145
189
  .get("session_name")
146
190
  .and_then(Value::as_str)
147
191
  .filter(|s| !s.is_empty())
148
192
  .map(crate::transport::SessionName::new);
149
- let session_killed = if let Some(session) = session_name.as_ref() {
150
- match transport.kill_session(session) {
151
- Ok(()) => true,
152
- Err(error) if tmux_absent_error(&error.to_string()) => false,
153
- Err(error) => return Err(CliError::Runtime(error.to_string())),
193
+ let mut root_pids = state_process_roots(&state);
194
+ let pane_pids = session_name
195
+ .as_ref()
196
+ .map(|session| pane_pids_for_session(transport, session))
197
+ .unwrap_or_default();
198
+ root_pids.extend(pane_pids);
199
+ root_pids.sort_unstable();
200
+ root_pids.dedup();
201
+ let root_pgids = process_pgids(&root_pids);
202
+ for pid in &root_pids {
203
+ reap_process_tree(*pid);
204
+ }
205
+ reap_process_groups(&root_pgids);
206
+ let mut kill_error: Option<String> = None;
207
+ if let Some(session) = session_name.as_ref() {
208
+ if let Err(error) = transport.kill_session(session) {
209
+ if !tmux_absent_error(&error.to_string()) {
210
+ kill_error = Some(error.to_string());
211
+ }
212
+ }
213
+ }
214
+ reap_workspace_process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
215
+ let session_residuals = if let Some(session) = session_name.as_ref() {
216
+ let (residuals, error) = session_residuals_after_reap(
217
+ transport,
218
+ &run_workspace,
219
+ session,
220
+ !captured_missing_sessions,
221
+ );
222
+ if let Some(error) = error {
223
+ kill_error.get_or_insert(error);
154
224
  }
225
+ residuals
155
226
  } else {
156
- false
227
+ Vec::new()
157
228
  };
229
+ let process_residuals = process_residuals(&run_workspace, &state, &root_pids, &root_pgids);
230
+ let session_killed = session_name.is_some()
231
+ && kill_error.is_none()
232
+ && session_residuals.is_empty()
233
+ && process_residuals.is_empty();
158
234
  mark_agents_stopped(&mut state);
159
- crate::state::persist::save_runtime_state(workspace, &state)?;
160
- let _event = crate::event_log::EventLog::new(workspace)
235
+ if team.is_some() {
236
+ crate::state::projection::save_team_scoped_state(&run_workspace, &state)?;
237
+ } else {
238
+ crate::state::persist::save_runtime_state(&run_workspace, &state)?;
239
+ }
240
+ let coordinator_status = stopped
241
+ .as_ref()
242
+ .map(|stopped| stop_status_wire(stopped.status))
243
+ .unwrap_or("not_stopped");
244
+ let coordinator_pid = stopped.as_ref().and_then(|stopped| stopped.pid.map(|p| p.get()));
245
+ let ok = stopped.as_ref().map(|stopped| stopped.ok).unwrap_or(true)
246
+ && kill_error.is_none()
247
+ && session_residuals.is_empty()
248
+ && process_residuals.is_empty();
249
+ let status = if ok {
250
+ "ok"
251
+ } else if kill_error.is_some() {
252
+ "failed"
253
+ } else {
254
+ "partial"
255
+ };
256
+ let _event = crate::event_log::EventLog::new(&run_workspace)
161
257
  .write(
162
258
  "lifecycle.shutdown",
163
259
  json!({
@@ -165,22 +261,531 @@ pub mod lifecycle_port {
165
261
  "team": team,
166
262
  "session_name": session_name.as_ref().map(|s| s.as_str().to_string()),
167
263
  "session_killed": session_killed,
168
- "coordinator_status": stop_status_wire(stopped.status),
264
+ "coordinator_status": coordinator_status,
265
+ "status": status,
169
266
  }),
170
267
  )
171
268
  .map_err(|e| CliError::Runtime(e.to_string()))?;
172
269
  Ok(json!({
173
- "ok": stopped.ok,
270
+ "ok": ok,
271
+ "status": status,
174
272
  "keep_logs": keep_logs,
175
273
  "team": team,
176
274
  "session_name": session_name.map(|s| s.as_str().to_string()),
177
275
  "session_killed": session_killed,
276
+ "residuals": {
277
+ "sessions": session_residuals,
278
+ "processes": process_residuals,
279
+ },
280
+ "error": kill_error,
178
281
  "coordinator": {
179
- "status": stop_status_wire(stopped.status),
180
- "pid": stopped.pid.map(|p| p.get()),
282
+ "status": coordinator_status,
283
+ "pid": coordinator_pid,
181
284
  }
182
285
  }))
183
286
  }
287
+
288
+ fn shutdown_state_for_team(workspace: &Path, team: Option<&str>) -> Result<Value, CliError> {
289
+ if let Some(team) = team {
290
+ crate::state::projection::select_runtime_state(workspace, Some(team)).map_err(CliError::from)
291
+ } else {
292
+ crate::state::persist::load_runtime_state(workspace).map_err(CliError::from)
293
+ }
294
+ }
295
+
296
+ fn shutdown_workspace_transport(workspace: &Path) -> crate::tmux_backend::TmuxBackend {
297
+ crate::tmux_backend::TmuxBackend::for_workspace(workspace)
298
+ }
299
+
300
+ fn tmux_transport_for_endpoint(endpoint: &str) -> crate::tmux_backend::TmuxBackend {
301
+ if Path::new(endpoint).is_absolute() {
302
+ crate::tmux_backend::TmuxBackend::for_tmux_endpoint(endpoint)
303
+ } else {
304
+ crate::tmux_backend::TmuxBackend::for_socket_name(endpoint)
305
+ }
306
+ }
307
+
308
+ fn stored_tmux_endpoint(state: &Value) -> Option<&str> {
309
+ leader_receiver_tmux_socket(state)
310
+ .or_else(|| active_team_entry(state).and_then(leader_receiver_tmux_socket))
311
+ .or_else(|| only_team_entry(state).and_then(leader_receiver_tmux_socket))
312
+ }
313
+
314
+ fn leader_receiver_tmux_socket(state: &Value) -> Option<&str> {
315
+ state
316
+ .get("leader_receiver")
317
+ .and_then(|receiver| receiver.get("tmux_socket"))
318
+ .and_then(Value::as_str)
319
+ .filter(|socket| !socket.is_empty())
320
+ }
321
+
322
+ fn active_team_entry(state: &Value) -> Option<&Value> {
323
+ let active = state
324
+ .get("active_team_key")
325
+ .and_then(Value::as_str)
326
+ .filter(|team| !team.is_empty())?;
327
+ state
328
+ .get("teams")
329
+ .and_then(Value::as_object)
330
+ .and_then(|teams| teams.get(active))
331
+ }
332
+
333
+ fn only_team_entry(state: &Value) -> Option<&Value> {
334
+ let teams = state.get("teams").and_then(Value::as_object)?;
335
+ if teams.len() == 1 {
336
+ teams.values().next()
337
+ } else {
338
+ None
339
+ }
340
+ }
341
+
342
+ fn pane_pids_for_session(
343
+ transport: &dyn crate::transport::Transport,
344
+ session: &crate::transport::SessionName,
345
+ ) -> Vec<u32> {
346
+ transport
347
+ .list_targets()
348
+ .unwrap_or_default()
349
+ .into_iter()
350
+ .filter(|pane| pane.session.as_str() == session.as_str())
351
+ .filter_map(|pane| pane.pane_pid)
352
+ .collect()
353
+ }
354
+
355
+ fn session_residuals_after_reap(
356
+ transport: &dyn crate::transport::Transport,
357
+ workspace: &Path,
358
+ session: &crate::transport::SessionName,
359
+ check_primary_transport: bool,
360
+ ) -> (Vec<String>, Option<String>) {
361
+ let mut residual = false;
362
+ let mut error = None;
363
+ if check_primary_transport {
364
+ match transport.has_session(session) {
365
+ Ok(true) => residual = true,
366
+ Ok(false) => {}
367
+ Err(err) if tmux_absent_error(&err.to_string()) => {}
368
+ Err(err) => {
369
+ error = Some(err.to_string());
370
+ residual = true;
371
+ }
372
+ }
373
+ }
374
+ let workspace_transport = shutdown_workspace_transport(workspace);
375
+ match crate::transport::Transport::has_session(&workspace_transport, session) {
376
+ Ok(true) => residual = true,
377
+ Ok(false) => {}
378
+ Err(err) if tmux_absent_error(&err.to_string()) => {}
379
+ Err(err) => {
380
+ error.get_or_insert_with(|| err.to_string());
381
+ residual = true;
382
+ }
383
+ }
384
+ let default_transport = crate::tmux_backend::TmuxBackend::new();
385
+ match crate::transport::Transport::has_session(&default_transport, session) {
386
+ Ok(true) => residual = true,
387
+ Ok(false) => {}
388
+ Err(err) if tmux_absent_error(&err.to_string()) => {}
389
+ Err(err) => {
390
+ error.get_or_insert_with(|| err.to_string());
391
+ residual = true;
392
+ }
393
+ }
394
+ let sessions = if residual {
395
+ vec![session.as_str().to_string()]
396
+ } else {
397
+ Vec::new()
398
+ };
399
+ (sessions, error)
400
+ }
401
+
402
+ fn state_process_roots(state: &Value) -> Vec<u32> {
403
+ let mut out = Vec::new();
404
+ collect_agent_process_roots(state, &mut out);
405
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
406
+ for team in teams.values() {
407
+ collect_agent_process_roots(team, &mut out);
408
+ }
409
+ }
410
+ out.sort_unstable();
411
+ out.dedup();
412
+ out
413
+ }
414
+
415
+ fn collect_agent_process_roots(state: &Value, out: &mut Vec<u32>) {
416
+ let Some(agents) = state.get("agents").and_then(Value::as_object) else {
417
+ return;
418
+ };
419
+ for agent in agents.values() {
420
+ for key in ["provider_pid", "process_id", "pid", "child_pid", "pane_pid"] {
421
+ if let Some(pid) = agent.get(key).and_then(value_u32) {
422
+ out.push(pid);
423
+ }
424
+ }
425
+ }
426
+ }
427
+
428
+ fn value_u32(value: &Value) -> Option<u32> {
429
+ value
430
+ .as_u64()
431
+ .and_then(|pid| u32::try_from(pid).ok())
432
+ .or_else(|| value.as_str().and_then(|pid| pid.parse::<u32>().ok()))
433
+ .filter(|pid| *pid > 0)
434
+ }
435
+
436
+ fn reap_process_tree(root_pid: u32) {
437
+ let pids = process_tree_pids(root_pid);
438
+ for pid in pids.iter().rev() {
439
+ send_process_signal(*pid, libc::SIGTERM);
440
+ }
441
+ std::thread::sleep(std::time::Duration::from_millis(150));
442
+ for pid in pids.iter().rev() {
443
+ send_process_signal(*pid, libc::SIGKILL);
444
+ }
445
+ wait_for_processes_gone(&pids, std::time::Duration::from_secs(1));
446
+ }
447
+
448
+ fn reap_process_groups(pgids: &[u32]) {
449
+ let current_pgid = unsafe { libc::getpgrp() };
450
+ for pgid in pgids {
451
+ let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
452
+ continue;
453
+ };
454
+ if pgid_t <= 1 || pgid_t == current_pgid {
455
+ continue;
456
+ }
457
+ send_process_signal_group(pgid_t, libc::SIGTERM);
458
+ }
459
+ std::thread::sleep(std::time::Duration::from_millis(150));
460
+ for pgid in pgids {
461
+ let Ok(pgid_t) = libc::pid_t::try_from(*pgid) else {
462
+ continue;
463
+ };
464
+ if pgid_t <= 1 || pgid_t == current_pgid {
465
+ continue;
466
+ }
467
+ send_process_signal_group(pgid_t, libc::SIGKILL);
468
+ }
469
+ }
470
+
471
+ fn reap_workspace_process_residuals(
472
+ workspace: &Path,
473
+ state: &Value,
474
+ root_pids: &[u32],
475
+ root_pgids: &[u32],
476
+ ) {
477
+ for _ in 0..5 {
478
+ let residuals = matched_processes(workspace, state, root_pids, root_pgids);
479
+ if residuals.is_empty() {
480
+ return;
481
+ }
482
+ for process in &residuals {
483
+ reap_process_tree(process.pid);
484
+ }
485
+ let pgids = residuals
486
+ .iter()
487
+ .filter_map(|process| process.pgid)
488
+ .collect::<Vec<_>>();
489
+ reap_process_groups(&pgids);
490
+ std::thread::sleep(std::time::Duration::from_millis(100));
491
+ }
492
+ }
493
+
494
+ fn process_tree_pids(root_pid: u32) -> Vec<u32> {
495
+ if root_pid == 0 {
496
+ return Vec::new();
497
+ }
498
+ let pairs = process_parent_pairs();
499
+ let mut out = vec![root_pid];
500
+ let mut seen = std::collections::BTreeSet::new();
501
+ seen.insert(root_pid);
502
+ let mut index = 0;
503
+ while index < out.len() {
504
+ let parent = out[index];
505
+ for (pid, ppid) in &pairs {
506
+ if *ppid == parent && seen.insert(*pid) {
507
+ out.push(*pid);
508
+ }
509
+ }
510
+ index += 1;
511
+ }
512
+ out
513
+ }
514
+
515
+ fn process_parent_pairs() -> Vec<(u32, u32)> {
516
+ let output = match std::process::Command::new("ps")
517
+ .args(["-axo", "pid=,ppid="])
518
+ .output()
519
+ {
520
+ Ok(output) if output.status.success() => output,
521
+ _ => return Vec::new(),
522
+ };
523
+ String::from_utf8_lossy(&output.stdout)
524
+ .lines()
525
+ .filter_map(|line| {
526
+ let mut parts = line.split_whitespace();
527
+ let pid = parts.next()?.parse::<u32>().ok()?;
528
+ let ppid = parts.next()?.parse::<u32>().ok()?;
529
+ Some((pid, ppid))
530
+ })
531
+ .collect()
532
+ }
533
+
534
+ fn process_table() -> Vec<ProcessInfo> {
535
+ let output = match std::process::Command::new("ps")
536
+ .args(["-axo", "pid=,ppid=,pgid=,command="])
537
+ .output()
538
+ {
539
+ Ok(output) if output.status.success() => output,
540
+ _ => return Vec::new(),
541
+ };
542
+ String::from_utf8_lossy(&output.stdout)
543
+ .lines()
544
+ .filter_map(parse_process_info)
545
+ .collect()
546
+ }
547
+
548
+ fn parse_process_info(line: &str) -> Option<ProcessInfo> {
549
+ let mut parts = line.split_whitespace();
550
+ let pid = parts.next()?.parse::<u32>().ok()?;
551
+ let ppid = parts.next()?.parse::<u32>().ok()?;
552
+ let pgid = parts.next().and_then(|raw| raw.parse::<u32>().ok());
553
+ let command = parts.collect::<Vec<_>>().join(" ");
554
+ Some(ProcessInfo {
555
+ pid,
556
+ ppid,
557
+ pgid,
558
+ command,
559
+ })
560
+ }
561
+
562
+ #[derive(Clone, Debug)]
563
+ struct ProcessInfo {
564
+ pid: u32,
565
+ ppid: u32,
566
+ pgid: Option<u32>,
567
+ command: String,
568
+ }
569
+
570
+ fn send_process_signal(pid: u32, signal: libc::c_int) {
571
+ let Ok(pid_t) = libc::pid_t::try_from(pid) else {
572
+ return;
573
+ };
574
+ unsafe {
575
+ libc::kill(pid_t, signal);
576
+ }
577
+ }
578
+
579
+ fn send_process_signal_group(pgid: libc::pid_t, signal: libc::c_int) {
580
+ unsafe {
581
+ libc::kill(-pgid, signal);
582
+ }
583
+ }
584
+
585
+ fn wait_for_processes_gone(pids: &[u32], timeout: std::time::Duration) {
586
+ let start = std::time::Instant::now();
587
+ loop {
588
+ for pid in pids {
589
+ reap_child_if_possible(*pid);
590
+ }
591
+ if !pids.iter().any(|pid| process_is_live(*pid)) || start.elapsed() >= timeout {
592
+ return;
593
+ }
594
+ std::thread::sleep(std::time::Duration::from_millis(25));
595
+ }
596
+ }
597
+
598
+ fn reap_child_if_possible(pid: u32) {
599
+ let Ok(pid_t) = libc::pid_t::try_from(pid) else {
600
+ return;
601
+ };
602
+ let mut status = 0;
603
+ unsafe {
604
+ libc::waitpid(pid_t, &mut status, libc::WNOHANG);
605
+ }
606
+ }
607
+
608
+ fn process_is_live(pid: u32) -> bool {
609
+ let Ok(pid_t) = libc::pid_t::try_from(pid) else {
610
+ return false;
611
+ };
612
+ let rc = unsafe { libc::kill(pid_t, 0) };
613
+ if rc == 0 {
614
+ return true;
615
+ }
616
+ let err = std::io::Error::last_os_error();
617
+ err.raw_os_error() == Some(libc::EPERM)
618
+ }
619
+
620
+ fn process_pgids(pids: &[u32]) -> Vec<u32> {
621
+ let table = process_table();
622
+ let current_pgid = unsafe { libc::getpgrp() };
623
+ let mut pgids = pids
624
+ .iter()
625
+ .filter_map(|pid| table.iter().find(|process| process.pid == *pid))
626
+ .filter_map(|process| process.pgid)
627
+ .filter(|pgid| {
628
+ libc::pid_t::try_from(*pgid)
629
+ .map(|pgid| pgid > 1 && pgid != current_pgid)
630
+ .unwrap_or(false)
631
+ })
632
+ .collect::<Vec<_>>();
633
+ pgids.sort_unstable();
634
+ pgids.dedup();
635
+ pgids
636
+ }
637
+
638
+ fn process_residuals(
639
+ workspace: &Path,
640
+ state: &Value,
641
+ root_pids: &[u32],
642
+ root_pgids: &[u32],
643
+ ) -> Vec<Value> {
644
+ let mut residuals = matched_processes(workspace, state, root_pids, root_pgids);
645
+ let mut seen = residuals.iter().map(|process| process.pid).collect::<std::collections::BTreeSet<_>>();
646
+ for pid in root_pids {
647
+ if process_is_live(*pid) && seen.insert(*pid) {
648
+ residuals.push(ProcessInfo {
649
+ pid: *pid,
650
+ ppid: 0,
651
+ pgid: None,
652
+ command: String::new(),
653
+ });
654
+ }
655
+ }
656
+ residuals
657
+ .into_iter()
658
+ .map(|process| {
659
+ json!({
660
+ "pid": process.pid,
661
+ "ppid": process.ppid,
662
+ "pgid": process.pgid,
663
+ "command": process.command,
664
+ })
665
+ })
666
+ .collect()
667
+ }
668
+
669
+ fn matched_processes(
670
+ workspace: &Path,
671
+ state: &Value,
672
+ root_pids: &[u32],
673
+ root_pgids: &[u32],
674
+ ) -> Vec<ProcessInfo> {
675
+ let table = process_table();
676
+ let root_tree = root_pids
677
+ .iter()
678
+ .flat_map(|pid| process_tree_from_table(*pid, &table))
679
+ .collect::<std::collections::BTreeSet<_>>();
680
+ let root_pgids = root_pgids.iter().copied().collect::<std::collections::BTreeSet<_>>();
681
+ let spawn_cwds = state_spawn_cwds(state);
682
+ let workspace_text = workspace.to_string_lossy().to_string();
683
+ let current_pid = std::process::id();
684
+ table
685
+ .into_iter()
686
+ .filter(|process| process.pid != current_pid)
687
+ .filter(|process| {
688
+ process_matches_workspace(process, &workspace_text, &spawn_cwds)
689
+ || root_tree.contains(&process.pid)
690
+ || process.pgid.is_some_and(|pgid| root_pgids.contains(&pgid))
691
+ })
692
+ .collect()
693
+ }
694
+
695
+ fn process_tree_from_table(root_pid: u32, table: &[ProcessInfo]) -> Vec<u32> {
696
+ if root_pid == 0 {
697
+ return Vec::new();
698
+ }
699
+ let mut out = vec![root_pid];
700
+ let mut seen = std::collections::BTreeSet::new();
701
+ seen.insert(root_pid);
702
+ let mut index = 0;
703
+ while index < out.len() {
704
+ let parent = out[index];
705
+ for process in table {
706
+ if process.ppid == parent && seen.insert(process.pid) {
707
+ out.push(process.pid);
708
+ }
709
+ }
710
+ index += 1;
711
+ }
712
+ out
713
+ }
714
+
715
+ fn state_spawn_cwds(state: &Value) -> Vec<PathBuf> {
716
+ let mut out = Vec::new();
717
+ collect_spawn_cwds(state, &mut out);
718
+ if let Some(teams) = state.get("teams").and_then(Value::as_object) {
719
+ for team in teams.values() {
720
+ collect_spawn_cwds(team, &mut out);
721
+ }
722
+ }
723
+ out
724
+ }
725
+
726
+ fn collect_spawn_cwds(state: &Value, out: &mut Vec<PathBuf>) {
727
+ let Some(agents) = state.get("agents").and_then(Value::as_object) else {
728
+ return;
729
+ };
730
+ for agent in agents.values() {
731
+ if let Some(spawn_cwd) = agent.get("spawn_cwd").and_then(Value::as_str).filter(|cwd| !cwd.is_empty()) {
732
+ out.push(PathBuf::from(spawn_cwd));
733
+ }
734
+ }
735
+ }
736
+
737
+ fn process_matches_workspace(
738
+ process: &ProcessInfo,
739
+ workspace_text: &str,
740
+ spawn_cwds: &[PathBuf],
741
+ ) -> bool {
742
+ let command = process.command.as_str();
743
+ if command.contains("mcp-server")
744
+ && command.contains("--workspace")
745
+ && command.contains(workspace_text)
746
+ {
747
+ return true;
748
+ }
749
+ let lower = command.to_ascii_lowercase();
750
+ let provider_like = lower.contains("codex")
751
+ || lower.contains("claude")
752
+ || lower.contains("node")
753
+ || lower.contains("mcp-server")
754
+ || lower.contains("team-agent");
755
+ if !provider_like {
756
+ return false;
757
+ }
758
+ if command.contains(workspace_text) {
759
+ return true;
760
+ }
761
+ let Some(cwd) = process_cwd(process.pid) else {
762
+ return false;
763
+ };
764
+ spawn_cwds.iter().any(|spawn_cwd| path_is_under(&cwd, spawn_cwd))
765
+ }
766
+
767
+ fn process_cwd(pid: u32) -> Option<PathBuf> {
768
+ let proc_cwd = PathBuf::from(format!("/proc/{pid}/cwd"));
769
+ if let Ok(path) = std::fs::read_link(proc_cwd) {
770
+ return Some(path);
771
+ }
772
+ let output = std::process::Command::new("lsof")
773
+ .args(["-a", "-p", &pid.to_string(), "-d", "cwd", "-Fn"])
774
+ .output()
775
+ .ok()?;
776
+ if !output.status.success() {
777
+ return None;
778
+ }
779
+ String::from_utf8_lossy(&output.stdout)
780
+ .lines()
781
+ .find_map(|line| line.strip_prefix('n').map(PathBuf::from))
782
+ }
783
+
784
+ fn path_is_under(path: &Path, root: &Path) -> bool {
785
+ let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
786
+ let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
787
+ path == root || path.starts_with(root)
788
+ }
184
789
  /// `runtime.restart`(`cmd_restart`)。
185
790
  pub fn restart(workspace: &Path, allow_fresh: bool, team: Option<&str>) -> Result<Value, CliError> {
186
791
  match crate::lifecycle::restart(workspace, allow_fresh, team) {
@@ -459,13 +1064,46 @@ pub mod lifecycle_port {
459
1064
  session_name,
460
1065
  launch,
461
1066
  next_actions,
462
- } => json!({
463
- "ok": true,
464
- "summary": format!("quick-start ready: {}", session_name.as_str()),
465
- "session_name": session_name.as_str(),
466
- "dry_run": launch.dry_run,
467
- "next_actions": next_actions,
468
- }),
1067
+ worker_readiness,
1068
+ } => {
1069
+ // BUG-7: never emit bare "ready" while worker tool-load is unverified.
1070
+ // The summary string + a structured `worker_readiness` block tell the
1071
+ // caller exactly which agents are unhealthy (Degraded) or that the
1072
+ // tool-set load has not been confirmed yet (PendingToolLoad).
1073
+ let (summary, ok, readiness_json) = match &worker_readiness {
1074
+ crate::lifecycle::QuickStartReadiness::Degraded { unhealthy_agents } => (
1075
+ format!(
1076
+ "quick-start degraded: {}; unhealthy: {}",
1077
+ session_name.as_str(),
1078
+ unhealthy_agents.join(",")
1079
+ ),
1080
+ false,
1081
+ json!({
1082
+ "state": "degraded",
1083
+ "unhealthy_agents": unhealthy_agents,
1084
+ }),
1085
+ ),
1086
+ crate::lifecycle::QuickStartReadiness::PendingToolLoad => (
1087
+ format!(
1088
+ "quick-start launched (worker tool load unverified): {}",
1089
+ session_name.as_str()
1090
+ ),
1091
+ true,
1092
+ json!({
1093
+ "state": "pending_tool_load",
1094
+ "reason": "worker MCP tool set load not yet confirmed; run `team-agent doctor` or wait for first worker turn",
1095
+ }),
1096
+ ),
1097
+ };
1098
+ json!({
1099
+ "ok": ok,
1100
+ "summary": summary,
1101
+ "session_name": session_name.as_str(),
1102
+ "dry_run": launch.dry_run,
1103
+ "next_actions": next_actions,
1104
+ "worker_readiness": readiness_json,
1105
+ })
1106
+ }
469
1107
  crate::lifecycle::QuickStartReport::ExistingRuntime {
470
1108
  team,
471
1109
  session_name,
@@ -595,35 +1233,65 @@ pub mod diagnose_port {
595
1233
 
596
1234
  fn secret_scan(workspace: &Path) -> Value {
597
1235
  let mut findings = Vec::new();
598
- scan_secret_dir(workspace, workspace, &mut findings);
1236
+ let mut scanned = 0usize;
1237
+ scan_secret_dir(workspace, workspace, 0, &mut scanned, &mut findings);
599
1238
  json!({
600
1239
  "ok": findings.is_empty(),
601
1240
  "findings": findings,
602
1241
  })
603
1242
  }
604
1243
 
605
- fn scan_secret_dir(root: &Path, dir: &Path, findings: &mut Vec<Value>) {
1244
+ const SECRET_SCAN_MAX_DEPTH: usize = 4;
1245
+ const SECRET_SCAN_MAX_ENTRIES: usize = 512;
1246
+ const SECRET_SCAN_MAX_FILE_BYTES: u64 = 128 * 1024;
1247
+
1248
+ fn scan_secret_dir(root: &Path, dir: &Path, depth: usize, scanned: &mut usize, findings: &mut Vec<Value>) {
1249
+ if depth > SECRET_SCAN_MAX_DEPTH || *scanned >= SECRET_SCAN_MAX_ENTRIES {
1250
+ return;
1251
+ }
606
1252
  let Ok(entries) = std::fs::read_dir(dir) else {
607
1253
  return;
608
1254
  };
609
1255
  for entry in entries.flatten() {
1256
+ if *scanned >= SECRET_SCAN_MAX_ENTRIES {
1257
+ return;
1258
+ }
1259
+ *scanned = scanned.saturating_add(1);
610
1260
  let path = entry.path();
611
1261
  let name = path.file_name().map(|s| s.to_string_lossy());
612
1262
  if name.as_deref() == Some(".team") || name.as_deref() == Some(".git") {
613
1263
  continue;
614
1264
  }
615
- if path.is_dir() {
616
- scan_secret_dir(root, &path, findings);
1265
+ let Ok(file_type) = entry.file_type() else {
1266
+ continue;
1267
+ };
1268
+ if file_type.is_dir() {
1269
+ scan_secret_dir(root, &path, depth.saturating_add(1), scanned, findings);
617
1270
  continue;
618
1271
  }
619
- scan_secret_file(root, &path, findings);
1272
+ if file_type.is_file() {
1273
+ scan_secret_file(root, &path, findings);
1274
+ }
620
1275
  }
621
1276
  }
622
1277
 
623
1278
  fn scan_secret_file(root: &Path, path: &Path, findings: &mut Vec<Value>) {
624
- let Ok(text) = std::fs::read_to_string(path) else {
1279
+ let Ok(metadata) = std::fs::metadata(path) else {
625
1280
  return;
626
1281
  };
1282
+ if !metadata.is_file() || metadata.len() > SECRET_SCAN_MAX_FILE_BYTES {
1283
+ return;
1284
+ }
1285
+ let Ok(file) = std::fs::File::open(path) else {
1286
+ return;
1287
+ };
1288
+ let mut text = String::new();
1289
+ if std::io::Read::take(file, SECRET_SCAN_MAX_FILE_BYTES)
1290
+ .read_to_string(&mut text)
1291
+ .is_err()
1292
+ {
1293
+ return;
1294
+ }
627
1295
  for (idx, line) in text.lines().enumerate() {
628
1296
  if line.contains("OPENAI_API_KEY=") || line.contains("ANTHROPIC_API_KEY=") {
629
1297
  let rel = path.strip_prefix(root).unwrap_or(path);