@team-agent/installer 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/Cargo.lock +34 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/Cargo.toml +1 -1
  4. package/crates/team-agent/src/cli/adapters.rs +234 -26
  5. package/crates/team-agent/src/cli/diagnose.rs +144 -10
  6. package/crates/team-agent/src/cli/emit.rs +289 -54
  7. package/crates/team-agent/src/cli/leader.rs +37 -8
  8. package/crates/team-agent/src/cli/mod.rs +1281 -196
  9. package/crates/team-agent/src/cli/status_port.rs +195 -46
  10. package/crates/team-agent/src/cli/tests/divergence.rs +1 -2
  11. package/crates/team-agent/src/cli/tests/lane_c.rs +23 -13
  12. package/crates/team-agent/src/cli/tests/main_preserved.rs +2 -0
  13. package/crates/team-agent/src/cli/tests/run_delegation.rs +59 -3
  14. package/crates/team-agent/src/cli/types.rs +18 -0
  15. package/crates/team-agent/src/compiler.rs +15 -5
  16. package/crates/team-agent/src/coordinator/health.rs +95 -17
  17. package/crates/team-agent/src/coordinator/mod.rs +4 -0
  18. package/crates/team-agent/src/coordinator/runtime_detectors.rs +500 -0
  19. package/crates/team-agent/src/coordinator/runtime_observation.rs +58 -0
  20. package/crates/team-agent/src/coordinator/tick.rs +222 -69
  21. package/crates/team-agent/src/coordinator/types.rs +15 -3
  22. package/crates/team-agent/src/db/schema.rs +37 -2
  23. package/crates/team-agent/src/diagnose/comms.rs +226 -0
  24. package/crates/team-agent/src/diagnose/mod.rs +45 -0
  25. package/crates/team-agent/src/diagnose/orphans.rs +658 -0
  26. package/crates/team-agent/src/fake_worker.rs +146 -3
  27. package/crates/team-agent/src/leader/start.rs +121 -23
  28. package/crates/team-agent/src/leader/types.rs +44 -1
  29. package/crates/team-agent/src/lib.rs +3 -0
  30. package/crates/team-agent/src/lifecycle/display.rs +645 -47
  31. package/crates/team-agent/src/lifecycle/launch.rs +1061 -146
  32. package/crates/team-agent/src/lifecycle/mod.rs +2 -0
  33. package/crates/team-agent/src/lifecycle/profile_launch.rs +810 -0
  34. package/crates/team-agent/src/lifecycle/profile_smoke.rs +522 -0
  35. package/crates/team-agent/src/lifecycle/restart/agent.rs +99 -23
  36. package/crates/team-agent/src/lifecycle/restart/common.rs +183 -24
  37. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +498 -22
  38. package/crates/team-agent/src/lifecycle/restart/remove.rs +27 -7
  39. package/crates/team-agent/src/lifecycle/restart/team_state.rs +19 -0
  40. package/crates/team-agent/src/lifecycle/restart.rs +24 -1
  41. package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +5 -5
  42. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +37 -7
  43. package/crates/team-agent/src/lifecycle/types.rs +19 -0
  44. package/crates/team-agent/src/mcp_server/helpers.rs +1 -0
  45. package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +341 -0
  46. package/crates/team-agent/src/mcp_server/lifecycle_tools/mod.rs +10 -0
  47. package/crates/team-agent/src/mcp_server/lifecycle_tools/state_status.rs +158 -0
  48. package/crates/team-agent/src/mcp_server/mod.rs +3 -74
  49. package/crates/team-agent/src/mcp_server/tests/scoped.rs +1 -1
  50. package/crates/team-agent/src/mcp_server/tests/send.rs +6 -5
  51. package/crates/team-agent/src/mcp_server/tools.rs +312 -111
  52. package/crates/team-agent/src/mcp_server/types.rs +6 -4
  53. package/crates/team-agent/src/mcp_server/wire.rs +19 -7
  54. package/crates/team-agent/src/message_store.rs +21 -4
  55. package/crates/team-agent/src/messaging/delivery.rs +470 -59
  56. package/crates/team-agent/src/messaging/mod.rs +9 -6
  57. package/crates/team-agent/src/messaging/results.rs +353 -63
  58. package/crates/team-agent/src/messaging/selftest.rs +199 -12
  59. package/crates/team-agent/src/messaging/send.rs +35 -3
  60. package/crates/team-agent/src/messaging/tests/runtime.rs +19 -4
  61. package/crates/team-agent/src/messaging/types.rs +11 -3
  62. package/crates/team-agent/src/os_probe.rs +119 -0
  63. package/crates/team-agent/src/packaging/migrate.rs +10 -2
  64. package/crates/team-agent/src/packaging/tests.rs +23 -0
  65. package/crates/team-agent/src/provider/adapter.rs +564 -63
  66. package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +1 -7
  67. package/crates/team-agent/src/provider/classify.rs +51 -4
  68. package/crates/team-agent/src/provider/helpers.rs +10 -1
  69. package/crates/team-agent/src/provider/startup_prompt.rs +94 -0
  70. package/crates/team-agent/src/provider/types.rs +47 -0
  71. package/crates/team-agent/src/session_capture.rs +616 -0
  72. package/crates/team-agent/src/state/persist.rs +170 -1
  73. package/crates/team-agent/src/state/projection.rs +141 -8
  74. package/crates/team-agent/src/state/selector.rs +5 -2
  75. package/crates/team-agent/src/tmux_backend.rs +161 -64
  76. package/crates/team-agent/src/transport/test_support.rs +9 -0
  77. package/crates/team-agent/src/transport/tests/wire.rs +4 -0
  78. package/crates/team-agent/src/transport.rs +13 -2
  79. package/package.json +4 -4
@@ -0,0 +1,658 @@
1
+ use std::collections::{BTreeMap, BTreeSet};
2
+ use std::path::{Path, PathBuf};
3
+ use std::process::Command;
4
+ use std::time::{Duration, Instant};
5
+
6
+ use serde_json::{json, Value};
7
+
8
+ use crate::cli::CliError;
9
+ use crate::coordinator::health::{
10
+ coordinator_metadata_ok, pid_is_running, read_coordinator_metadata, terminate_pid_tree,
11
+ };
12
+ use crate::coordinator::types::{OrphanReason, Pid, WorkspacePath};
13
+ use crate::tmux_backend::TmuxBackend;
14
+ use crate::transport::{SessionName, Transport};
15
+
16
+ #[derive(Debug, Clone)]
17
+ struct OrphanRecord {
18
+ kind: &'static str,
19
+ pid: Option<Pid>,
20
+ session: Option<String>,
21
+ tmux_socket: Option<String>,
22
+ workspace: Option<PathBuf>,
23
+ reason: OrphanReason,
24
+ command: Option<String>,
25
+ action: &'static str,
26
+ }
27
+
28
+ #[derive(Debug, Clone)]
29
+ struct ScanReport {
30
+ scanned: usize,
31
+ orphans: Vec<OrphanRecord>,
32
+ }
33
+
34
+ pub fn orphan_gate_json(fix: bool, confirm: bool) -> Result<Value, CliError> {
35
+ if fix && !confirm {
36
+ return Ok(json!({
37
+ "ok": false,
38
+ "gate": "orphans",
39
+ "status": "refused",
40
+ "reason": "fix_requires_confirm",
41
+ "action": "re-run with --gate orphans --fix --confirm",
42
+ }));
43
+ }
44
+ let report = scan_orphans_bounded(false);
45
+ if report.orphans.is_empty() {
46
+ return Ok(json!({
47
+ "ok": true,
48
+ "gate": "orphans",
49
+ "status": "passed",
50
+ "scanned": report.scanned,
51
+ "dry_run": !fix,
52
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
53
+ "action_required": false,
54
+ "fix": fix,
55
+ "orphans": [],
56
+ }));
57
+ }
58
+ if fix {
59
+ return fix_orphans(report);
60
+ }
61
+ Ok(json!({
62
+ "ok": false,
63
+ "gate": "orphans",
64
+ "status": "failed",
65
+ "scanned": report.scanned,
66
+ "dry_run": true,
67
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
68
+ "action_required": true,
69
+ "fix": false,
70
+ "orphans": orphan_values(&report.orphans),
71
+ }))
72
+ }
73
+
74
+ pub fn cleanup_orphans_json(confirm: bool) -> Result<Value, CliError> {
75
+ let report = scan_orphans_bounded(false);
76
+ if confirm {
77
+ if report.orphans.is_empty() {
78
+ return Ok(json!({
79
+ "ok": true,
80
+ "scanned": report.scanned,
81
+ "orphans": [],
82
+ "dry_run": false,
83
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
84
+ "killed": [],
85
+ "failed": [],
86
+ }));
87
+ }
88
+ return cleanup_confirmed(report);
89
+ }
90
+ Ok(json!({
91
+ "ok": true,
92
+ "scanned": report.scanned,
93
+ "orphans": orphan_values(&report.orphans),
94
+ "dry_run": true,
95
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
96
+ "action_required": "re-run with --confirm to send SIGTERM",
97
+ }))
98
+ }
99
+
100
+ pub fn has_orphan_residue() -> bool {
101
+ !scan_orphans_bounded(false).orphans.is_empty()
102
+ }
103
+
104
+ pub fn orphan_blocker_detail() -> String {
105
+ let report = scan_orphans_bounded(false);
106
+ if report.orphans.is_empty() {
107
+ return "no orphan coordinator residue detected".to_string();
108
+ }
109
+ report
110
+ .orphans
111
+ .iter()
112
+ .map(|orphan| {
113
+ let target = orphan
114
+ .pid
115
+ .map(|pid| format!("pid={pid}"))
116
+ .or_else(|| orphan.session.as_ref().map(|s| format!("session={s}")))
117
+ .unwrap_or_else(|| "target=unknown".to_string());
118
+ let workspace = orphan
119
+ .workspace
120
+ .as_ref()
121
+ .map(|p| p.to_string_lossy().to_string())
122
+ .unwrap_or_else(|| "workspace=unknown".to_string());
123
+ format!(
124
+ "{} {target} workspace={workspace} reason={}",
125
+ orphan.kind,
126
+ reason_key(&orphan.reason)
127
+ )
128
+ })
129
+ .collect::<Vec<_>>()
130
+ .join("; ")
131
+ }
132
+
133
+ fn fix_orphans(report: ScanReport) -> Result<Value, CliError> {
134
+ let cleanup = cleanup_report(report);
135
+ let residual = scan_orphans(false);
136
+ Ok(json!({
137
+ "ok": residual.orphans.is_empty() && cleanup.failed.is_empty(),
138
+ "gate": "orphans",
139
+ "status": if residual.orphans.is_empty() && cleanup.failed.is_empty() { "fixed" } else { "failed" },
140
+ "scanned": cleanup.scanned,
141
+ "dry_run": false,
142
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
143
+ "action_required": !residual.orphans.is_empty() || !cleanup.failed.is_empty(),
144
+ "fix": true,
145
+ "orphans": orphan_values(&residual.orphans),
146
+ "killed": cleanup.killed,
147
+ "failed": cleanup.failed,
148
+ }))
149
+ }
150
+
151
+ fn cleanup_confirmed(report: ScanReport) -> Result<Value, CliError> {
152
+ let cleanup = cleanup_report(report);
153
+ let residual = scan_orphans(false);
154
+ Ok(json!({
155
+ "ok": residual.orphans.is_empty() && cleanup.failed.is_empty(),
156
+ "scanned": cleanup.scanned,
157
+ "orphans": orphan_values(&residual.orphans),
158
+ "dry_run": false,
159
+ "scanned_at": chrono::Utc::now().to_rfc3339(),
160
+ "killed": cleanup.killed,
161
+ "failed": cleanup.failed,
162
+ }))
163
+ }
164
+
165
+ struct CleanupReport {
166
+ scanned: usize,
167
+ killed: Vec<Value>,
168
+ failed: Vec<Value>,
169
+ }
170
+
171
+ fn cleanup_report(report: ScanReport) -> CleanupReport {
172
+ let protected = protected_pids();
173
+ let mut killed = Vec::new();
174
+ let mut failed = Vec::new();
175
+ for orphan in &report.orphans {
176
+ if let Some(pid) = orphan.pid {
177
+ if protected.contains(&pid.get()) {
178
+ failed.push(orphan_value(orphan, "skipped"));
179
+ continue;
180
+ }
181
+ if terminate_pid_tree(pid) {
182
+ killed.push(orphan_value(orphan, "killed"));
183
+ } else {
184
+ failed.push(orphan_value(orphan, "failed"));
185
+ }
186
+ continue;
187
+ }
188
+ if kill_tmux_session(orphan) {
189
+ killed.push(orphan_value(orphan, "killed"));
190
+ } else {
191
+ failed.push(orphan_value(orphan, "failed"));
192
+ }
193
+ }
194
+ CleanupReport {
195
+ scanned: report.scanned,
196
+ killed,
197
+ failed,
198
+ }
199
+ }
200
+
201
+ fn scan_orphans(include_unparsed: bool) -> ScanReport {
202
+ let protected = protected_pids();
203
+ let mut scanned = 0;
204
+ let mut orphans = Vec::new();
205
+ for process in coordinator_processes() {
206
+ if protected.contains(&process.pid.get()) {
207
+ continue;
208
+ }
209
+ scanned += 1;
210
+ let Some(workspace) = parse_workspace_arg(&process.command) else {
211
+ if include_unparsed {
212
+ orphans.push(OrphanRecord {
213
+ kind: "coordinator_process",
214
+ pid: Some(process.pid),
215
+ session: None,
216
+ tmux_socket: None,
217
+ workspace: None,
218
+ reason: OrphanReason::CmdlineUnparsed,
219
+ command: Some(process.command),
220
+ action: "would_kill",
221
+ });
222
+ }
223
+ continue;
224
+ };
225
+ if let Some(reason) = classify_workspace_orphan(&workspace, process.pid) {
226
+ orphans.push(OrphanRecord {
227
+ kind: "coordinator_process",
228
+ pid: Some(process.pid),
229
+ session: None,
230
+ tmux_socket: None,
231
+ workspace: Some(workspace),
232
+ reason,
233
+ command: Some(process.command),
234
+ action: "would_kill",
235
+ });
236
+ }
237
+ }
238
+ for orphan in coordinator_pid_file_orphans() {
239
+ scanned += 1;
240
+ orphans.push(orphan);
241
+ }
242
+ for orphan in tmux_session_orphans() {
243
+ scanned += 1;
244
+ orphans.push(orphan);
245
+ }
246
+ for orphan in provider_mcp_process_orphans() {
247
+ scanned += 1;
248
+ orphans.push(orphan);
249
+ }
250
+ ScanReport { scanned, orphans }
251
+ }
252
+
253
+ fn coordinator_pid_file_orphans() -> Vec<OrphanRecord> {
254
+ temp_scan_roots()
255
+ .into_iter()
256
+ .flat_map(|root| match std::fs::read_dir(root) {
257
+ Ok(entries) => entries.filter_map(Result::ok).collect::<Vec<_>>(),
258
+ Err(_) => Vec::new(),
259
+ })
260
+ .filter_map(|entry| {
261
+ let workspace = entry.path();
262
+ if !workspace.is_dir() || ephemeral_workspace_hint(&workspace).is_none() {
263
+ return None;
264
+ }
265
+ let pid_path = crate::model::paths::runtime_dir(&workspace).join("coordinator.pid");
266
+ let pid = read_pid_file(&pid_path)?;
267
+ let workspace_path = WorkspacePath::new(workspace.clone());
268
+ let metadata = read_coordinator_metadata(&workspace_path);
269
+ let reason = if pid_is_running(pid).ok() != Some(true) {
270
+ OrphanReason::PidNotRunning
271
+ } else if metadata.is_some() && !coordinator_metadata_ok(metadata.as_ref(), pid) {
272
+ OrphanReason::MetadataMismatch
273
+ } else {
274
+ OrphanReason::EphemeralTempdirPattern {
275
+ hint: ephemeral_workspace_hint(&workspace)
276
+ .unwrap_or_else(|| "ephemeral_workspace".to_string()),
277
+ }
278
+ };
279
+ Some(OrphanRecord {
280
+ kind: "coordinator_metadata",
281
+ pid: Some(pid),
282
+ session: None,
283
+ tmux_socket: None,
284
+ workspace: Some(workspace),
285
+ reason,
286
+ command: None,
287
+ action: "would_kill",
288
+ })
289
+ })
290
+ .collect()
291
+ }
292
+
293
+ fn tmux_session_orphans() -> Vec<OrphanRecord> {
294
+ tmux_socket_names()
295
+ .into_iter()
296
+ .flat_map(|socket| {
297
+ tmux_list_panes(&socket)
298
+ .into_iter()
299
+ .filter_map(move |pane| {
300
+ let workspace = pane.workspace?;
301
+ if !is_orphan_marker_workspace(&workspace) {
302
+ return None;
303
+ }
304
+ let reason = classify_workspace_without_pid(&workspace)?;
305
+ Some(OrphanRecord {
306
+ kind: "tmux_session",
307
+ pid: None,
308
+ session: Some(pane.session),
309
+ tmux_socket: Some(socket.clone()),
310
+ workspace: Some(workspace),
311
+ reason,
312
+ command: pane.command,
313
+ action: "would_kill",
314
+ })
315
+ })
316
+ })
317
+ .collect()
318
+ }
319
+
320
+ #[derive(Debug)]
321
+ struct TmuxPaneRow {
322
+ session: String,
323
+ workspace: Option<PathBuf>,
324
+ command: Option<String>,
325
+ }
326
+
327
+ fn tmux_socket_names() -> Vec<String> {
328
+ let mut names = BTreeSet::new();
329
+ for root in tmux_socket_roots() {
330
+ let Ok(entries) = std::fs::read_dir(root) else {
331
+ continue;
332
+ };
333
+ for entry in entries.filter_map(Result::ok) {
334
+ let name = entry.file_name().to_string_lossy().to_string();
335
+ if name.starts_with("ta-") {
336
+ names.insert(name);
337
+ }
338
+ }
339
+ }
340
+ names.into_iter().collect()
341
+ }
342
+
343
+ fn tmux_socket_roots() -> Vec<PathBuf> {
344
+ let uid = unsafe { libc::geteuid() };
345
+ let mut roots = vec![PathBuf::from(format!("/tmp/tmux-{uid}"))];
346
+ if let Some(tmpdir) = std::env::var_os("TMPDIR") {
347
+ roots.push(PathBuf::from(tmpdir).join(format!("tmux-{uid}")));
348
+ }
349
+ roots.sort();
350
+ roots.dedup();
351
+ roots
352
+ }
353
+
354
+ fn tmux_list_panes(socket: &str) -> Vec<TmuxPaneRow> {
355
+ TmuxBackend::for_socket_name(socket)
356
+ .list_targets()
357
+ .unwrap_or_default()
358
+ .into_iter()
359
+ .map(|pane| TmuxPaneRow {
360
+ session: pane.session.as_str().to_string(),
361
+ workspace: pane.current_path,
362
+ command: pane.current_command,
363
+ })
364
+ .collect()
365
+ }
366
+
367
+ fn provider_mcp_process_orphans() -> Vec<OrphanRecord> {
368
+ ps_command_rows()
369
+ .into_iter()
370
+ .filter(|row| is_provider_or_mcp_workspace_command(&row.command))
371
+ .filter_map(|process| {
372
+ let workspace = parse_workspace_arg(&process.command)?;
373
+ if !is_orphan_marker_workspace(&workspace) {
374
+ return None;
375
+ }
376
+ let reason = classify_workspace_without_pid(&workspace)?;
377
+ Some(OrphanRecord {
378
+ kind: if process.command.contains("mcp-server") {
379
+ "mcp_process"
380
+ } else {
381
+ "provider_process"
382
+ },
383
+ pid: Some(process.pid),
384
+ session: None,
385
+ tmux_socket: None,
386
+ workspace: Some(workspace),
387
+ reason,
388
+ command: Some(process.command),
389
+ action: "would_kill",
390
+ })
391
+ })
392
+ .collect()
393
+ }
394
+
395
+ fn temp_scan_roots() -> Vec<PathBuf> {
396
+ let mut roots = Vec::new();
397
+ if let Some(tmpdir) = std::env::var_os("TMPDIR") {
398
+ roots.push(PathBuf::from(tmpdir));
399
+ }
400
+ roots.push(std::env::temp_dir());
401
+ roots.sort();
402
+ roots.dedup();
403
+ roots
404
+ }
405
+
406
+ fn read_pid_file(path: &Path) -> Option<Pid> {
407
+ let text = std::fs::read_to_string(path).ok()?;
408
+ let pid = text.trim().parse::<u32>().ok()?;
409
+ Some(Pid::new(pid))
410
+ }
411
+
412
+ fn scan_orphans_bounded(include_unparsed: bool) -> ScanReport {
413
+ let deadline = Instant::now() + Duration::from_millis(800);
414
+ let mut scanned = 0;
415
+ let mut by_key = BTreeMap::new();
416
+ loop {
417
+ let report = scan_orphans(include_unparsed);
418
+ scanned = scanned.max(report.scanned);
419
+ for orphan in report.orphans {
420
+ by_key.insert(orphan_key(&orphan), orphan);
421
+ }
422
+ if !by_key.is_empty() || Instant::now() >= deadline {
423
+ break;
424
+ }
425
+ std::thread::sleep(Duration::from_millis(25));
426
+ }
427
+ ScanReport {
428
+ scanned,
429
+ orphans: by_key.into_values().collect(),
430
+ }
431
+ }
432
+
433
+ fn orphan_key(orphan: &OrphanRecord) -> String {
434
+ if let Some(pid) = orphan.pid {
435
+ return format!("pid:{pid}");
436
+ }
437
+ if let Some(session) = &orphan.session {
438
+ return format!(
439
+ "session:{}:{session}",
440
+ orphan.tmux_socket.as_deref().unwrap_or("default")
441
+ );
442
+ }
443
+ orphan.kind.to_string()
444
+ }
445
+
446
+ #[derive(Debug, Clone)]
447
+ struct ProcessRow {
448
+ pid: Pid,
449
+ command: String,
450
+ }
451
+
452
+ fn coordinator_processes() -> Vec<ProcessRow> {
453
+ ps_command_rows()
454
+ .into_iter()
455
+ .filter(|row| is_team_agent_coordinator_command(&row.command))
456
+ .collect()
457
+ }
458
+
459
+ fn ps_command_rows() -> Vec<ProcessRow> {
460
+ let output = match crate::os_probe::bounded_command_output_with_probe(
461
+ Command::new("ps").args(["-axww", "-o", "pid=,command="]),
462
+ "ps_table",
463
+ None,
464
+ )
465
+ {
466
+ Ok(output) if output.status.success() => output,
467
+ _ => return Vec::new(),
468
+ };
469
+ String::from_utf8_lossy(&output.stdout)
470
+ .lines()
471
+ .filter_map(parse_ps_command_line)
472
+ .collect()
473
+ }
474
+
475
+ fn parse_ps_command_line(line: &str) -> Option<ProcessRow> {
476
+ let line = line.trim_start();
477
+ let split = line
478
+ .find(char::is_whitespace)
479
+ .unwrap_or(line.len());
480
+ let pid = line.get(..split)?.trim().parse::<u32>().ok()?;
481
+ let command = line.get(split..)?.trim().to_string();
482
+ Some(ProcessRow {
483
+ pid: Pid::new(pid),
484
+ command,
485
+ })
486
+ }
487
+
488
+ fn is_team_agent_coordinator_command(command: &str) -> bool {
489
+ command.contains("team-agent")
490
+ && command.contains("coordinator")
491
+ && command.contains("--workspace")
492
+ }
493
+
494
+ fn is_provider_or_mcp_workspace_command(command: &str) -> bool {
495
+ command.contains("--workspace")
496
+ && (command.contains("mcp-server")
497
+ || command.contains(" codex ")
498
+ || command.ends_with(" codex")
499
+ || command.contains(" claude ")
500
+ || command.ends_with(" claude")
501
+ || command.contains("claude-code")
502
+ || command.contains("fake-worker"))
503
+ }
504
+
505
+ fn parse_workspace_arg(command: &str) -> Option<PathBuf> {
506
+ let mut parts = command.split_whitespace().peekable();
507
+ while let Some(part) = parts.next() {
508
+ if let Some(value) = part.strip_prefix("--workspace=") {
509
+ if !value.is_empty() {
510
+ return Some(PathBuf::from(value));
511
+ }
512
+ }
513
+ if part == "--workspace" {
514
+ return parts.peek().map(PathBuf::from);
515
+ }
516
+ }
517
+ None
518
+ }
519
+
520
+ fn classify_workspace_orphan(workspace: &Path, pid: Pid) -> Option<OrphanReason> {
521
+ if !workspace.is_absolute() {
522
+ return None;
523
+ }
524
+ if let Some(hint) = ephemeral_workspace_hint(workspace) {
525
+ return Some(OrphanReason::EphemeralTempdirPattern { hint });
526
+ }
527
+ if !workspace.exists() {
528
+ return Some(OrphanReason::WorkspacePathMissing);
529
+ }
530
+ let workspace_path = WorkspacePath::new(workspace.to_path_buf());
531
+ let metadata = read_coordinator_metadata(&workspace_path);
532
+ if metadata.is_some() && !coordinator_metadata_ok(metadata.as_ref(), pid) {
533
+ return Some(OrphanReason::MetadataMismatch);
534
+ }
535
+ if pid_is_running(pid).ok() == Some(false) {
536
+ return Some(OrphanReason::PidNotRunning);
537
+ }
538
+ None
539
+ }
540
+
541
+ fn classify_workspace_without_pid(workspace: &Path) -> Option<OrphanReason> {
542
+ if !workspace.is_absolute() {
543
+ return None;
544
+ }
545
+ if let Some(hint) = ephemeral_workspace_hint(workspace) {
546
+ return Some(OrphanReason::EphemeralTempdirPattern { hint });
547
+ }
548
+ if !workspace.exists() {
549
+ return Some(OrphanReason::WorkspacePathMissing);
550
+ }
551
+ None
552
+ }
553
+
554
+ fn ephemeral_workspace_hint(workspace: &Path) -> Option<String> {
555
+ let text = workspace.to_string_lossy();
556
+ let patterns = [
557
+ "ta_doctor_comms_orphans-",
558
+ "team-agent-watcher-dedupe",
559
+ ];
560
+ patterns
561
+ .iter()
562
+ .find(|pattern| text.contains(**pattern))
563
+ .map(|pattern| (*pattern).to_string())
564
+ }
565
+
566
+ fn is_orphan_marker_workspace(workspace: &Path) -> bool {
567
+ ephemeral_workspace_hint(workspace).is_some()
568
+ }
569
+
570
+ fn protected_pids() -> BTreeSet<u32> {
571
+ let mut protected = BTreeSet::new();
572
+ let current = std::process::id();
573
+ protected.insert(current);
574
+ let parents = ps_parent_map();
575
+ let mut cursor = current;
576
+ while let Some(parent) = parents.get(&cursor).copied() {
577
+ if parent == 0 || !protected.insert(parent) {
578
+ break;
579
+ }
580
+ cursor = parent;
581
+ }
582
+ protected
583
+ }
584
+
585
+ fn ps_parent_map() -> BTreeMap<u32, u32> {
586
+ let output = match crate::os_probe::bounded_command_output_with_probe(
587
+ Command::new("ps").args(["-axo", "pid=,ppid="]),
588
+ "ps_parent",
589
+ None,
590
+ )
591
+ {
592
+ Ok(output) if output.status.success() => output,
593
+ _ => return BTreeMap::new(),
594
+ };
595
+ String::from_utf8_lossy(&output.stdout)
596
+ .lines()
597
+ .filter_map(|line| {
598
+ let mut parts = line.split_whitespace();
599
+ let pid = parts.next()?.parse::<u32>().ok()?;
600
+ let ppid = parts.next()?.parse::<u32>().ok()?;
601
+ Some((pid, ppid))
602
+ })
603
+ .collect()
604
+ }
605
+
606
+ fn orphan_values(orphans: &[OrphanRecord]) -> Vec<Value> {
607
+ orphans
608
+ .iter()
609
+ .map(|orphan| orphan_value(orphan, orphan.action))
610
+ .collect()
611
+ }
612
+
613
+ fn orphan_value(orphan: &OrphanRecord, action: &str) -> Value {
614
+ let mut value = json!({
615
+ "kind": orphan.kind,
616
+ "reason": reason_key(&orphan.reason),
617
+ "action": action,
618
+ });
619
+ if let Some(pid) = orphan.pid {
620
+ value["pid"] = json!(pid.get());
621
+ }
622
+ if let Some(session) = &orphan.session {
623
+ value["session"] = json!(session);
624
+ }
625
+ if let Some(socket) = &orphan.tmux_socket {
626
+ value["tmux_socket"] = json!(socket);
627
+ }
628
+ if let Some(workspace) = &orphan.workspace {
629
+ value["workspace"] = json!(workspace.to_string_lossy().to_string());
630
+ }
631
+ if let Some(command) = &orphan.command {
632
+ value["command"] = json!(command);
633
+ }
634
+ if let OrphanReason::EphemeralTempdirPattern { hint } = &orphan.reason {
635
+ value["hint"] = json!(hint);
636
+ }
637
+ value
638
+ }
639
+
640
+ fn kill_tmux_session(orphan: &OrphanRecord) -> bool {
641
+ let (Some(socket), Some(session)) = (&orphan.tmux_socket, &orphan.session) else {
642
+ return false;
643
+ };
644
+ TmuxBackend::for_socket_name(socket)
645
+ .kill_session(&SessionName::new(session.clone()))
646
+ .is_ok()
647
+ }
648
+
649
+ fn reason_key(reason: &OrphanReason) -> &'static str {
650
+ match reason {
651
+ OrphanReason::WorkspacePathMissing => "workspace_path_missing",
652
+ OrphanReason::EphemeralTempdirPattern { .. } => "ephemeral_tempdir_pattern",
653
+ OrphanReason::WorkspaceAlive => "workspace_alive",
654
+ OrphanReason::CmdlineUnparsed => "cmdline_unparsed",
655
+ OrphanReason::MetadataMismatch => "metadata_mismatch",
656
+ OrphanReason::PidNotRunning => "pid_not_running",
657
+ }
658
+ }