@team-agent/installer 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/package.json +1 -1
  2. package/schemas/team.schema.json +6 -0
  3. package/src/team_agent/abnormal_track.py +253 -0
  4. package/src/team_agent/approvals/runtime_prompts.py +1 -1
  5. package/src/team_agent/cli/commands.py +104 -3
  6. package/src/team_agent/cli/parser.py +10 -1
  7. package/src/team_agent/compiler.py +1 -1
  8. package/src/team_agent/coordinator/lifecycle.py +23 -2
  9. package/src/team_agent/diagnose/orphan_cleanup.py +199 -28
  10. package/src/team_agent/display/__init__.py +31 -0
  11. package/src/team_agent/display/adaptive.py +425 -0
  12. package/src/team_agent/display/backend.py +46 -0
  13. package/src/team_agent/display/close.py +6 -0
  14. package/src/team_agent/display/rebuild.py +102 -0
  15. package/src/team_agent/display/tiling.py +156 -0
  16. package/src/team_agent/display/worker_window.py +4 -0
  17. package/src/team_agent/display/workspace.py +36 -127
  18. package/src/team_agent/idle_predicate.py +200 -0
  19. package/src/team_agent/idle_takeover.py +59 -0
  20. package/src/team_agent/idle_takeover_wiring.py +111 -0
  21. package/src/team_agent/launch/core.py +14 -4
  22. package/src/team_agent/leader/__init__.py +444 -61
  23. package/src/team_agent/lifecycle/operations.py +1 -0
  24. package/src/team_agent/lifecycle/start.py +1 -1
  25. package/src/team_agent/message_store/core.py +38 -11
  26. package/src/team_agent/message_store/leader_notification_log.py +47 -26
  27. package/src/team_agent/message_store/schema.py +8 -2
  28. package/src/team_agent/messaging/delivery.py +336 -1
  29. package/src/team_agent/messaging/leader.py +13 -4
  30. package/src/team_agent/messaging/leader_api_errors.py +216 -0
  31. package/src/team_agent/messaging/leader_panes.py +294 -0
  32. package/src/team_agent/messaging/scheduler.py +12 -0
  33. package/src/team_agent/messaging/send.py +54 -26
  34. package/src/team_agent/messaging/tmux_io.py +202 -33
  35. package/src/team_agent/messaging/tmux_prompt.py +87 -0
  36. package/src/team_agent/messaging/trust_auto_answer.py +52 -0
  37. package/src/team_agent/provider_state/README.md +78 -0
  38. package/src/team_agent/provider_state/__init__.py +86 -0
  39. package/src/team_agent/provider_state/claude.py +86 -0
  40. package/src/team_agent/provider_state/codex.py +84 -0
  41. package/src/team_agent/provider_state/common.py +207 -0
  42. package/src/team_agent/provider_state/registry.py +118 -0
  43. package/src/team_agent/restart/orchestration.py +215 -12
  44. package/src/team_agent/runtime.py +65 -15
  45. package/src/team_agent/sessions/capture.py +65 -15
  46. package/src/team_agent/spec.py +63 -3
  47. package/src/team_agent/status/queries.py +32 -1
  48. package/src/team_agent/wake.py +58 -0
  49. package/src/team_agent/watch/__init__.py +145 -0
@@ -16,7 +16,6 @@ import signal
16
16
  import subprocess
17
17
  import time
18
18
  from datetime import datetime, timezone
19
- from pathlib import Path
20
19
  from typing import Any
21
20
 
22
21
  # Pattern: argv contains "team_agent.coordinator --workspace <path>" anywhere.
@@ -39,6 +38,7 @@ _EPHEMERAL_PATH_HINTS = (
39
38
  "team-agent-test-",
40
39
  )
41
40
  _SIGTERM_WAIT_SECONDS = 3.0
41
+ _SIGKILL_WAIT_SECONDS = 2.0
42
42
 
43
43
 
44
44
  def find_coordinator_processes(*, runner=subprocess.run) -> list[dict[str, Any]]:
@@ -91,7 +91,7 @@ def classify_orphan(entry: dict[str, Any]) -> tuple[bool, str]:
91
91
  workspace = entry.get("workspace")
92
92
  if not workspace:
93
93
  return False, "cmdline_unparsed"
94
- if not Path(workspace).exists():
94
+ if not os.path.exists(workspace):
95
95
  return True, "workspace_path_missing"
96
96
  for hint in _EPHEMERAL_PATH_HINTS:
97
97
  if hint in workspace:
@@ -104,12 +104,35 @@ def cleanup_orphan_coordinators(
104
104
  confirm: bool = False,
105
105
  runner=subprocess.run,
106
106
  killer=os.kill,
107
+ pg_killer=None,
108
+ pgid_getter=None,
107
109
  sleeper=time.sleep,
110
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
111
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
108
112
  ) -> dict[str, Any]:
109
113
  """Scan for orphan coordinators. Without confirm: dry-run (just classify and report).
110
- With confirm: SIGTERM each orphan and wait up to _SIGTERM_WAIT_SECONDS for the
111
- process to exit; report success/failure per pid."""
114
+ With confirm: SIGTERM each orphan, wait up to _SIGTERM_WAIT_SECONDS for graceful
115
+ exit; if still alive, escalate to SIGKILL and wait _SIGKILL_WAIT_SECONDS. Only
116
+ report status='failed' (with error='alive_after_sigkill') when the process
117
+ survives BOTH signals — that's extremely rare and almost always indicates a
118
+ zombie/uninterruptible-sleep kernel state.
119
+
120
+ Mac mini 2026-05-26 evidence: real orphan coordinators have been observed alive
121
+ 40+ hours; many of them never exit on SIGTERM (signal handler suppressed during
122
+ long sqlite reads, or the python interpreter is hosting an async loop that
123
+ swallows the term signal). SIGKILL escalation is required for production.
124
+
125
+ pg_killer / pgid_getter default to os.killpg / os.getpgid; mock them in tests.
126
+ If pgid_getter succeeds AND returns a pgid > 1 AND the pgid != pid (i.e. the
127
+ process leads its own process group with children), we signal the WHOLE group;
128
+ otherwise we signal the pid directly. This catches orphan coordinators that
129
+ spawned subprocess.Popen children which would otherwise survive a pid-only
130
+ SIGTERM."""
112
131
  now = datetime.now(timezone.utc).isoformat()
132
+ if pg_killer is None:
133
+ pg_killer = getattr(os, "killpg", None)
134
+ if pgid_getter is None:
135
+ pgid_getter = getattr(os, "getpgid", None)
113
136
  entries = find_coordinator_processes(runner=runner)
114
137
  classified: list[dict[str, Any]] = []
115
138
  orphans: list[dict[str, Any]] = []
@@ -131,30 +154,19 @@ def cleanup_orphan_coordinators(
131
154
  killed: list[dict[str, Any]] = []
132
155
  failed: list[dict[str, Any]] = []
133
156
  for entry in orphans:
134
- pid = entry["pid"]
135
- try:
136
- killer(pid, signal.SIGTERM)
137
- except (ProcessLookupError, PermissionError, OSError) as exc:
138
- failed.append({**entry, "error": str(exc)})
139
- continue
140
- # Wait briefly; if the process is still alive after _SIGTERM_WAIT_SECONDS,
141
- # mark as failed (caller may want to SIGKILL).
142
- deadline = time.monotonic() + _SIGTERM_WAIT_SECONDS
143
- gone = False
144
- while time.monotonic() < deadline:
145
- try:
146
- killer(pid, 0)
147
- except ProcessLookupError:
148
- gone = True
149
- break
150
- except (PermissionError, OSError):
151
- gone = True
152
- break
153
- sleeper(0.1)
154
- if gone:
155
- killed.append(entry)
157
+ outcome = _terminate_orphan(
158
+ entry["pid"], killer=killer, pg_killer=pg_killer,
159
+ pgid_getter=pgid_getter, sleeper=sleeper,
160
+ sigterm_wait_seconds=sigterm_wait_seconds,
161
+ sigkill_wait_seconds=sigkill_wait_seconds,
162
+ )
163
+ annotated = {**entry, **outcome}
164
+ if outcome.get("status") == "killed":
165
+ killed.append(annotated)
166
+ elif outcome.get("status") == "missing":
167
+ killed.append(annotated)
156
168
  else:
157
- failed.append({**entry, "error": "still_alive_after_sigterm"})
169
+ failed.append(annotated)
158
170
  return {
159
171
  "ok": True,
160
172
  "scanned": len(classified),
@@ -166,6 +178,162 @@ def cleanup_orphan_coordinators(
166
178
  }
167
179
 
168
180
 
181
+ def _terminate_orphan(
182
+ pid: int,
183
+ *,
184
+ killer,
185
+ pg_killer,
186
+ pgid_getter,
187
+ sleeper,
188
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
189
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
190
+ ) -> dict[str, Any]:
191
+ """SIGTERM → wait 3s → SIGKILL → wait 2s escalation. Returns one of:
192
+ {status: 'killed', sigkill_required: False, signaled: 'pid'|'pgid'}
193
+ {status: 'killed', sigkill_required: True, signaled: 'pid'|'pgid'}
194
+ {status: 'missing', error: '<exc>'} — process gone before SIGTERM
195
+ {status: 'failed', error: 'alive_after_sigkill'} — process survived both
196
+ {status: 'failed', error: '<exc>'} — permission denied / OS error
197
+ """
198
+ pgid, pgid_error = _safe_getpgid(pid, pgid_getter)
199
+ use_group = bool(pg_killer and pgid is not None and pgid > 1 and pgid != pid)
200
+ signaled = "pgid" if use_group else "pid"
201
+
202
+ def send(sig: int) -> tuple[bool, str | None]:
203
+ try:
204
+ if use_group:
205
+ pg_killer(pgid, sig)
206
+ else:
207
+ killer(pid, sig)
208
+ except ProcessLookupError:
209
+ return False, "process_lookup_error"
210
+ except (PermissionError, OSError) as exc:
211
+ return False, str(exc)
212
+ return True, None
213
+
214
+ ok, err = send(signal.SIGTERM)
215
+ if not ok:
216
+ if err == "process_lookup_error":
217
+ return {"status": "missing", "signaled": signaled, "pgid": pgid}
218
+ return {"status": "failed", "error": err, "signaled": signaled, "pgid": pgid}
219
+ if _wait_for_exit(pid, sigterm_wait_seconds, killer=killer, sleeper=sleeper):
220
+ return {
221
+ "status": "killed",
222
+ "sigkill_required": False,
223
+ "signaled": signaled,
224
+ "pgid": pgid,
225
+ "pgid_error": pgid_error,
226
+ }
227
+ # SIGTERM did not work — escalate.
228
+ ok, err = send(signal.SIGKILL)
229
+ if not ok:
230
+ if err == "process_lookup_error":
231
+ # Race: died between checks.
232
+ return {
233
+ "status": "killed",
234
+ "sigkill_required": False,
235
+ "signaled": signaled,
236
+ "pgid": pgid,
237
+ "pgid_error": pgid_error,
238
+ }
239
+ return {
240
+ "status": "failed",
241
+ "error": err,
242
+ "signaled": signaled,
243
+ "pgid": pgid,
244
+ "sigkill_attempted": True,
245
+ }
246
+ if _wait_for_exit(pid, sigkill_wait_seconds, killer=killer, sleeper=sleeper):
247
+ return {
248
+ "status": "killed",
249
+ "sigkill_required": True,
250
+ "signaled": signaled,
251
+ "pgid": pgid,
252
+ "pgid_error": pgid_error,
253
+ }
254
+ return {
255
+ "status": "failed",
256
+ "error": "alive_after_sigkill",
257
+ "signaled": signaled,
258
+ "pgid": pgid,
259
+ "sigkill_required": True,
260
+ }
261
+
262
+
263
+ def _safe_getpgid(pid: int, pgid_getter) -> tuple[int | None, str | None]:
264
+ if pgid_getter is None:
265
+ return None, "getpgid_unavailable"
266
+ try:
267
+ return pgid_getter(pid), None
268
+ except (ProcessLookupError, PermissionError, OSError) as exc:
269
+ return None, str(exc)
270
+
271
+
272
+ def _wait_for_exit(pid: int, timeout: float, *, killer, sleeper) -> bool:
273
+ deadline = time.monotonic() + max(timeout, 0.0)
274
+ while time.monotonic() < deadline:
275
+ try:
276
+ killer(pid, 0)
277
+ except ProcessLookupError:
278
+ return True
279
+ except (PermissionError, OSError):
280
+ return True
281
+ sleeper(0.1)
282
+ # Final check after the deadline elapses.
283
+ try:
284
+ killer(pid, 0)
285
+ except ProcessLookupError:
286
+ return True
287
+ except (PermissionError, OSError):
288
+ return True
289
+ return False
290
+
291
+
292
+ def orphan_gate(
293
+ *,
294
+ fix: bool = False,
295
+ confirm: bool = False,
296
+ runner=subprocess.run,
297
+ killer=os.kill,
298
+ pg_killer=None,
299
+ pgid_getter=None,
300
+ sleeper=time.sleep,
301
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
302
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
303
+ ) -> dict[str, Any]:
304
+ if fix and not confirm:
305
+ return {
306
+ "ok": False,
307
+ "gate": "orphans",
308
+ "status": "refused",
309
+ "reason": "fix_requires_confirm",
310
+ "action": "re-run with --gate orphans --fix --confirm",
311
+ }
312
+ result = cleanup_orphan_coordinators(
313
+ confirm=fix and confirm,
314
+ runner=runner,
315
+ killer=killer,
316
+ pg_killer=pg_killer,
317
+ pgid_getter=pgid_getter,
318
+ sleeper=sleeper,
319
+ sigterm_wait_seconds=sigterm_wait_seconds,
320
+ sigkill_wait_seconds=sigkill_wait_seconds,
321
+ )
322
+ orphans = result.get("orphans") or []
323
+ failed = result.get("failed") or []
324
+ passed = not orphans if not fix else not failed
325
+ envelope = {
326
+ **result,
327
+ "ok": passed,
328
+ "gate": "orphans",
329
+ "status": "passed" if passed else "failed",
330
+ "fix": bool(fix),
331
+ }
332
+ if not fix and orphans:
333
+ envelope["action_required"] = "re-run with --gate orphans --fix --confirm"
334
+ return envelope
335
+
336
+
169
337
  def format_cleanup_orphans(result: dict[str, Any]) -> str:
170
338
  lines = [
171
339
  f"Coordinator orphan scan @ {result.get('scanned_at')}",
@@ -175,7 +343,9 @@ def format_cleanup_orphans(result: dict[str, Any]) -> str:
175
343
  if result.get("dry_run"):
176
344
  lines.append(" mode: DRY-RUN (no SIGTERM sent; re-run with --confirm)")
177
345
  else:
178
- lines.append(f" killed: {len(result.get('killed') or [])}")
346
+ killed_entries = result.get("killed") or []
347
+ escalated = sum(1 for k in killed_entries if k.get("sigkill_required"))
348
+ lines.append(f" killed: {len(killed_entries)} (sigkill_required: {escalated})")
179
349
  lines.append(f" failed: {len(result.get('failed') or [])}")
180
350
  for orphan in result.get("orphans") or []:
181
351
  lines.append(
@@ -190,4 +360,5 @@ __all__ = [
190
360
  "classify_orphan",
191
361
  "find_coordinator_processes",
192
362
  "format_cleanup_orphans",
363
+ "orphan_gate",
193
364
  ]
@@ -1,9 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from team_agent.display.adaptive import (
4
+ ADAPTIVE_BLOCK_REASONS,
5
+ adaptive_blocked,
6
+ close_adaptive_display,
7
+ close_adaptive_windows,
8
+ open_adaptive_display,
9
+ prepare_adaptive_windows,
10
+ probe_display_capabilities,
11
+ )
12
+ from team_agent.display.backend import (
13
+ ADAPTIVE_DISPLAY_BACKEND,
14
+ DISPLAY_BACKENDS_WITH_WORKER_VIEWS,
15
+ GHOSTTY_DISPLAY_BACKENDS,
16
+ VALID_DISPLAY_BACKENDS,
17
+ resolve_display_backend,
18
+ )
3
19
  from team_agent.display.close import (
20
+ close_adaptive_display,
4
21
  close_ghostty_display,
5
22
  close_ghostty_workspace,
6
23
  close_ghostty_workspace_slot,
24
+ close_team_display_backends,
7
25
  )
8
26
  from team_agent.display.ghostty import (
9
27
  ghostty_app_exists,
@@ -34,10 +52,19 @@ from team_agent.display.workspace import (
34
52
  )
35
53
 
36
54
  __all__ = [
55
+ "ADAPTIVE_BLOCK_REASONS",
56
+ "ADAPTIVE_DISPLAY_BACKEND",
57
+ "DISPLAY_BACKENDS_WITH_WORKER_VIEWS",
37
58
  "GHOSTTY_WORKSPACE_PANES_PER_WINDOW",
59
+ "GHOSTTY_DISPLAY_BACKENDS",
60
+ "VALID_DISPLAY_BACKENDS",
61
+ "adaptive_blocked",
62
+ "close_adaptive_display",
63
+ "close_adaptive_windows",
38
64
  "close_ghostty_display",
39
65
  "close_ghostty_workspace",
40
66
  "close_ghostty_workspace_slot",
67
+ "close_team_display_backends",
41
68
  "ghostty_app_exists",
42
69
  "ghostty_attach_args",
43
70
  "ghostty_command",
@@ -51,11 +78,15 @@ __all__ = [
51
78
  "ghostty_workspace_window_name",
52
79
  "kill_ghostty_workspace_linked_sessions",
53
80
  "open_ghostty_worker_window",
81
+ "open_adaptive_display",
54
82
  "open_ghostty_workspace",
55
83
  "open_ghostty_workspace_agent_display",
56
84
  "open_worker_displays",
85
+ "prepare_adaptive_windows",
57
86
  "prepare_ghostty_display_session",
58
87
  "prepare_ghostty_workspace_aggregator",
59
88
  "prepare_ghostty_workspace_linked_sessions",
89
+ "probe_display_capabilities",
90
+ "resolve_display_backend",
60
91
  "set_ghostty_workspace_pane_title",
61
92
  ]