@team-agent/installer 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/schemas/team.schema.json +6 -0
- package/src/team_agent/abnormal_track.py +253 -0
- package/src/team_agent/approvals/runtime_prompts.py +1 -1
- package/src/team_agent/cli/commands.py +104 -3
- package/src/team_agent/cli/parser.py +10 -1
- package/src/team_agent/compiler.py +1 -1
- package/src/team_agent/coordinator/lifecycle.py +23 -2
- package/src/team_agent/diagnose/orphan_cleanup.py +199 -28
- package/src/team_agent/display/__init__.py +31 -0
- package/src/team_agent/display/adaptive.py +425 -0
- package/src/team_agent/display/backend.py +46 -0
- package/src/team_agent/display/close.py +6 -0
- package/src/team_agent/display/rebuild.py +102 -0
- package/src/team_agent/display/tiling.py +156 -0
- package/src/team_agent/display/worker_window.py +4 -0
- package/src/team_agent/display/workspace.py +36 -127
- package/src/team_agent/idle_predicate.py +200 -0
- package/src/team_agent/idle_takeover.py +59 -0
- package/src/team_agent/idle_takeover_wiring.py +111 -0
- package/src/team_agent/launch/core.py +14 -4
- package/src/team_agent/leader/__init__.py +444 -61
- package/src/team_agent/lifecycle/operations.py +1 -0
- package/src/team_agent/lifecycle/start.py +1 -1
- package/src/team_agent/message_store/core.py +38 -11
- package/src/team_agent/message_store/leader_notification_log.py +47 -26
- package/src/team_agent/message_store/schema.py +8 -2
- package/src/team_agent/messaging/delivery.py +336 -1
- package/src/team_agent/messaging/leader.py +13 -4
- package/src/team_agent/messaging/leader_api_errors.py +216 -0
- package/src/team_agent/messaging/leader_panes.py +294 -0
- package/src/team_agent/messaging/scheduler.py +12 -0
- package/src/team_agent/messaging/send.py +54 -26
- package/src/team_agent/messaging/tmux_io.py +202 -33
- package/src/team_agent/messaging/tmux_prompt.py +87 -0
- package/src/team_agent/messaging/trust_auto_answer.py +52 -0
- package/src/team_agent/provider_state/README.md +78 -0
- package/src/team_agent/provider_state/__init__.py +86 -0
- package/src/team_agent/provider_state/claude.py +86 -0
- package/src/team_agent/provider_state/codex.py +84 -0
- package/src/team_agent/provider_state/common.py +207 -0
- package/src/team_agent/provider_state/registry.py +118 -0
- package/src/team_agent/restart/orchestration.py +215 -12
- package/src/team_agent/runtime.py +65 -15
- package/src/team_agent/sessions/capture.py +65 -15
- package/src/team_agent/spec.py +63 -3
- package/src/team_agent/status/queries.py +32 -1
- package/src/team_agent/wake.py +58 -0
- package/src/team_agent/watch/__init__.py +145 -0
|
@@ -16,7 +16,6 @@ import signal
|
|
|
16
16
|
import subprocess
|
|
17
17
|
import time
|
|
18
18
|
from datetime import datetime, timezone
|
|
19
|
-
from pathlib import Path
|
|
20
19
|
from typing import Any
|
|
21
20
|
|
|
22
21
|
# Pattern: argv contains "team_agent.coordinator --workspace <path>" anywhere.
|
|
@@ -39,6 +38,7 @@ _EPHEMERAL_PATH_HINTS = (
|
|
|
39
38
|
"team-agent-test-",
|
|
40
39
|
)
|
|
41
40
|
_SIGTERM_WAIT_SECONDS = 3.0
|
|
41
|
+
_SIGKILL_WAIT_SECONDS = 2.0
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def find_coordinator_processes(*, runner=subprocess.run) -> list[dict[str, Any]]:
|
|
@@ -91,7 +91,7 @@ def classify_orphan(entry: dict[str, Any]) -> tuple[bool, str]:
|
|
|
91
91
|
workspace = entry.get("workspace")
|
|
92
92
|
if not workspace:
|
|
93
93
|
return False, "cmdline_unparsed"
|
|
94
|
-
if not
|
|
94
|
+
if not os.path.exists(workspace):
|
|
95
95
|
return True, "workspace_path_missing"
|
|
96
96
|
for hint in _EPHEMERAL_PATH_HINTS:
|
|
97
97
|
if hint in workspace:
|
|
@@ -104,12 +104,35 @@ def cleanup_orphan_coordinators(
|
|
|
104
104
|
confirm: bool = False,
|
|
105
105
|
runner=subprocess.run,
|
|
106
106
|
killer=os.kill,
|
|
107
|
+
pg_killer=None,
|
|
108
|
+
pgid_getter=None,
|
|
107
109
|
sleeper=time.sleep,
|
|
110
|
+
sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
|
|
111
|
+
sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
|
|
108
112
|
) -> dict[str, Any]:
|
|
109
113
|
"""Scan for orphan coordinators. Without confirm: dry-run (just classify and report).
|
|
110
|
-
With confirm: SIGTERM each orphan
|
|
111
|
-
|
|
114
|
+
With confirm: SIGTERM each orphan, wait up to _SIGTERM_WAIT_SECONDS for graceful
|
|
115
|
+
exit; if still alive, escalate to SIGKILL and wait _SIGKILL_WAIT_SECONDS. Only
|
|
116
|
+
report status='failed' (with error='alive_after_sigkill') when the process
|
|
117
|
+
survives BOTH signals — that's extremely rare and almost always indicates a
|
|
118
|
+
zombie/uninterruptible-sleep kernel state.
|
|
119
|
+
|
|
120
|
+
Mac mini 2026-05-26 evidence: real orphan coordinators have been observed alive
|
|
121
|
+
40+ hours; many of them never exit on SIGTERM (signal handler suppressed during
|
|
122
|
+
long sqlite reads, or the python interpreter is hosting an async loop that
|
|
123
|
+
swallows the term signal). SIGKILL escalation is required for production.
|
|
124
|
+
|
|
125
|
+
pg_killer / pgid_getter default to os.killpg / os.getpgid; mock them in tests.
|
|
126
|
+
If pgid_getter succeeds AND returns a pgid > 1 AND the pgid != pid (i.e. the
|
|
127
|
+
process leads its own process group with children), we signal the WHOLE group;
|
|
128
|
+
otherwise we signal the pid directly. This catches orphan coordinators that
|
|
129
|
+
spawned subprocess.Popen children which would otherwise survive a pid-only
|
|
130
|
+
SIGTERM."""
|
|
112
131
|
now = datetime.now(timezone.utc).isoformat()
|
|
132
|
+
if pg_killer is None:
|
|
133
|
+
pg_killer = getattr(os, "killpg", None)
|
|
134
|
+
if pgid_getter is None:
|
|
135
|
+
pgid_getter = getattr(os, "getpgid", None)
|
|
113
136
|
entries = find_coordinator_processes(runner=runner)
|
|
114
137
|
classified: list[dict[str, Any]] = []
|
|
115
138
|
orphans: list[dict[str, Any]] = []
|
|
@@ -131,30 +154,19 @@ def cleanup_orphan_coordinators(
|
|
|
131
154
|
killed: list[dict[str, Any]] = []
|
|
132
155
|
failed: list[dict[str, Any]] = []
|
|
133
156
|
for entry in orphans:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
killer(pid, 0)
|
|
147
|
-
except ProcessLookupError:
|
|
148
|
-
gone = True
|
|
149
|
-
break
|
|
150
|
-
except (PermissionError, OSError):
|
|
151
|
-
gone = True
|
|
152
|
-
break
|
|
153
|
-
sleeper(0.1)
|
|
154
|
-
if gone:
|
|
155
|
-
killed.append(entry)
|
|
157
|
+
outcome = _terminate_orphan(
|
|
158
|
+
entry["pid"], killer=killer, pg_killer=pg_killer,
|
|
159
|
+
pgid_getter=pgid_getter, sleeper=sleeper,
|
|
160
|
+
sigterm_wait_seconds=sigterm_wait_seconds,
|
|
161
|
+
sigkill_wait_seconds=sigkill_wait_seconds,
|
|
162
|
+
)
|
|
163
|
+
annotated = {**entry, **outcome}
|
|
164
|
+
if outcome.get("status") == "killed":
|
|
165
|
+
killed.append(annotated)
|
|
166
|
+
elif outcome.get("status") == "missing":
|
|
167
|
+
killed.append(annotated)
|
|
156
168
|
else:
|
|
157
|
-
failed.append(
|
|
169
|
+
failed.append(annotated)
|
|
158
170
|
return {
|
|
159
171
|
"ok": True,
|
|
160
172
|
"scanned": len(classified),
|
|
@@ -166,6 +178,162 @@ def cleanup_orphan_coordinators(
|
|
|
166
178
|
}
|
|
167
179
|
|
|
168
180
|
|
|
181
|
+
def _terminate_orphan(
|
|
182
|
+
pid: int,
|
|
183
|
+
*,
|
|
184
|
+
killer,
|
|
185
|
+
pg_killer,
|
|
186
|
+
pgid_getter,
|
|
187
|
+
sleeper,
|
|
188
|
+
sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
|
|
189
|
+
sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
|
|
190
|
+
) -> dict[str, Any]:
|
|
191
|
+
"""SIGTERM → wait 3s → SIGKILL → wait 2s escalation. Returns one of:
|
|
192
|
+
{status: 'killed', sigkill_required: False, signaled: 'pid'|'pgid'}
|
|
193
|
+
{status: 'killed', sigkill_required: True, signaled: 'pid'|'pgid'}
|
|
194
|
+
{status: 'missing', error: '<exc>'} — process gone before SIGTERM
|
|
195
|
+
{status: 'failed', error: 'alive_after_sigkill'} — process survived both
|
|
196
|
+
{status: 'failed', error: '<exc>'} — permission denied / OS error
|
|
197
|
+
"""
|
|
198
|
+
pgid, pgid_error = _safe_getpgid(pid, pgid_getter)
|
|
199
|
+
use_group = bool(pg_killer and pgid is not None and pgid > 1 and pgid != pid)
|
|
200
|
+
signaled = "pgid" if use_group else "pid"
|
|
201
|
+
|
|
202
|
+
def send(sig: int) -> tuple[bool, str | None]:
|
|
203
|
+
try:
|
|
204
|
+
if use_group:
|
|
205
|
+
pg_killer(pgid, sig)
|
|
206
|
+
else:
|
|
207
|
+
killer(pid, sig)
|
|
208
|
+
except ProcessLookupError:
|
|
209
|
+
return False, "process_lookup_error"
|
|
210
|
+
except (PermissionError, OSError) as exc:
|
|
211
|
+
return False, str(exc)
|
|
212
|
+
return True, None
|
|
213
|
+
|
|
214
|
+
ok, err = send(signal.SIGTERM)
|
|
215
|
+
if not ok:
|
|
216
|
+
if err == "process_lookup_error":
|
|
217
|
+
return {"status": "missing", "signaled": signaled, "pgid": pgid}
|
|
218
|
+
return {"status": "failed", "error": err, "signaled": signaled, "pgid": pgid}
|
|
219
|
+
if _wait_for_exit(pid, sigterm_wait_seconds, killer=killer, sleeper=sleeper):
|
|
220
|
+
return {
|
|
221
|
+
"status": "killed",
|
|
222
|
+
"sigkill_required": False,
|
|
223
|
+
"signaled": signaled,
|
|
224
|
+
"pgid": pgid,
|
|
225
|
+
"pgid_error": pgid_error,
|
|
226
|
+
}
|
|
227
|
+
# SIGTERM did not work — escalate.
|
|
228
|
+
ok, err = send(signal.SIGKILL)
|
|
229
|
+
if not ok:
|
|
230
|
+
if err == "process_lookup_error":
|
|
231
|
+
# Race: died between checks.
|
|
232
|
+
return {
|
|
233
|
+
"status": "killed",
|
|
234
|
+
"sigkill_required": False,
|
|
235
|
+
"signaled": signaled,
|
|
236
|
+
"pgid": pgid,
|
|
237
|
+
"pgid_error": pgid_error,
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
"status": "failed",
|
|
241
|
+
"error": err,
|
|
242
|
+
"signaled": signaled,
|
|
243
|
+
"pgid": pgid,
|
|
244
|
+
"sigkill_attempted": True,
|
|
245
|
+
}
|
|
246
|
+
if _wait_for_exit(pid, sigkill_wait_seconds, killer=killer, sleeper=sleeper):
|
|
247
|
+
return {
|
|
248
|
+
"status": "killed",
|
|
249
|
+
"sigkill_required": True,
|
|
250
|
+
"signaled": signaled,
|
|
251
|
+
"pgid": pgid,
|
|
252
|
+
"pgid_error": pgid_error,
|
|
253
|
+
}
|
|
254
|
+
return {
|
|
255
|
+
"status": "failed",
|
|
256
|
+
"error": "alive_after_sigkill",
|
|
257
|
+
"signaled": signaled,
|
|
258
|
+
"pgid": pgid,
|
|
259
|
+
"sigkill_required": True,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _safe_getpgid(pid: int, pgid_getter) -> tuple[int | None, str | None]:
|
|
264
|
+
if pgid_getter is None:
|
|
265
|
+
return None, "getpgid_unavailable"
|
|
266
|
+
try:
|
|
267
|
+
return pgid_getter(pid), None
|
|
268
|
+
except (ProcessLookupError, PermissionError, OSError) as exc:
|
|
269
|
+
return None, str(exc)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _wait_for_exit(pid: int, timeout: float, *, killer, sleeper) -> bool:
|
|
273
|
+
deadline = time.monotonic() + max(timeout, 0.0)
|
|
274
|
+
while time.monotonic() < deadline:
|
|
275
|
+
try:
|
|
276
|
+
killer(pid, 0)
|
|
277
|
+
except ProcessLookupError:
|
|
278
|
+
return True
|
|
279
|
+
except (PermissionError, OSError):
|
|
280
|
+
return True
|
|
281
|
+
sleeper(0.1)
|
|
282
|
+
# Final check after the deadline elapses.
|
|
283
|
+
try:
|
|
284
|
+
killer(pid, 0)
|
|
285
|
+
except ProcessLookupError:
|
|
286
|
+
return True
|
|
287
|
+
except (PermissionError, OSError):
|
|
288
|
+
return True
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def orphan_gate(
|
|
293
|
+
*,
|
|
294
|
+
fix: bool = False,
|
|
295
|
+
confirm: bool = False,
|
|
296
|
+
runner=subprocess.run,
|
|
297
|
+
killer=os.kill,
|
|
298
|
+
pg_killer=None,
|
|
299
|
+
pgid_getter=None,
|
|
300
|
+
sleeper=time.sleep,
|
|
301
|
+
sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
|
|
302
|
+
sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
|
|
303
|
+
) -> dict[str, Any]:
|
|
304
|
+
if fix and not confirm:
|
|
305
|
+
return {
|
|
306
|
+
"ok": False,
|
|
307
|
+
"gate": "orphans",
|
|
308
|
+
"status": "refused",
|
|
309
|
+
"reason": "fix_requires_confirm",
|
|
310
|
+
"action": "re-run with --gate orphans --fix --confirm",
|
|
311
|
+
}
|
|
312
|
+
result = cleanup_orphan_coordinators(
|
|
313
|
+
confirm=fix and confirm,
|
|
314
|
+
runner=runner,
|
|
315
|
+
killer=killer,
|
|
316
|
+
pg_killer=pg_killer,
|
|
317
|
+
pgid_getter=pgid_getter,
|
|
318
|
+
sleeper=sleeper,
|
|
319
|
+
sigterm_wait_seconds=sigterm_wait_seconds,
|
|
320
|
+
sigkill_wait_seconds=sigkill_wait_seconds,
|
|
321
|
+
)
|
|
322
|
+
orphans = result.get("orphans") or []
|
|
323
|
+
failed = result.get("failed") or []
|
|
324
|
+
passed = not orphans if not fix else not failed
|
|
325
|
+
envelope = {
|
|
326
|
+
**result,
|
|
327
|
+
"ok": passed,
|
|
328
|
+
"gate": "orphans",
|
|
329
|
+
"status": "passed" if passed else "failed",
|
|
330
|
+
"fix": bool(fix),
|
|
331
|
+
}
|
|
332
|
+
if not fix and orphans:
|
|
333
|
+
envelope["action_required"] = "re-run with --gate orphans --fix --confirm"
|
|
334
|
+
return envelope
|
|
335
|
+
|
|
336
|
+
|
|
169
337
|
def format_cleanup_orphans(result: dict[str, Any]) -> str:
|
|
170
338
|
lines = [
|
|
171
339
|
f"Coordinator orphan scan @ {result.get('scanned_at')}",
|
|
@@ -175,7 +343,9 @@ def format_cleanup_orphans(result: dict[str, Any]) -> str:
|
|
|
175
343
|
if result.get("dry_run"):
|
|
176
344
|
lines.append(" mode: DRY-RUN (no SIGTERM sent; re-run with --confirm)")
|
|
177
345
|
else:
|
|
178
|
-
|
|
346
|
+
killed_entries = result.get("killed") or []
|
|
347
|
+
escalated = sum(1 for k in killed_entries if k.get("sigkill_required"))
|
|
348
|
+
lines.append(f" killed: {len(killed_entries)} (sigkill_required: {escalated})")
|
|
179
349
|
lines.append(f" failed: {len(result.get('failed') or [])}")
|
|
180
350
|
for orphan in result.get("orphans") or []:
|
|
181
351
|
lines.append(
|
|
@@ -190,4 +360,5 @@ __all__ = [
|
|
|
190
360
|
"classify_orphan",
|
|
191
361
|
"find_coordinator_processes",
|
|
192
362
|
"format_cleanup_orphans",
|
|
363
|
+
"orphan_gate",
|
|
193
364
|
]
|
|
@@ -1,9 +1,27 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from team_agent.display.adaptive import (
|
|
4
|
+
ADAPTIVE_BLOCK_REASONS,
|
|
5
|
+
adaptive_blocked,
|
|
6
|
+
close_adaptive_display,
|
|
7
|
+
close_adaptive_windows,
|
|
8
|
+
open_adaptive_display,
|
|
9
|
+
prepare_adaptive_windows,
|
|
10
|
+
probe_display_capabilities,
|
|
11
|
+
)
|
|
12
|
+
from team_agent.display.backend import (
|
|
13
|
+
ADAPTIVE_DISPLAY_BACKEND,
|
|
14
|
+
DISPLAY_BACKENDS_WITH_WORKER_VIEWS,
|
|
15
|
+
GHOSTTY_DISPLAY_BACKENDS,
|
|
16
|
+
VALID_DISPLAY_BACKENDS,
|
|
17
|
+
resolve_display_backend,
|
|
18
|
+
)
|
|
3
19
|
from team_agent.display.close import (
|
|
20
|
+
close_adaptive_display,
|
|
4
21
|
close_ghostty_display,
|
|
5
22
|
close_ghostty_workspace,
|
|
6
23
|
close_ghostty_workspace_slot,
|
|
24
|
+
close_team_display_backends,
|
|
7
25
|
)
|
|
8
26
|
from team_agent.display.ghostty import (
|
|
9
27
|
ghostty_app_exists,
|
|
@@ -34,10 +52,19 @@ from team_agent.display.workspace import (
|
|
|
34
52
|
)
|
|
35
53
|
|
|
36
54
|
__all__ = [
|
|
55
|
+
"ADAPTIVE_BLOCK_REASONS",
|
|
56
|
+
"ADAPTIVE_DISPLAY_BACKEND",
|
|
57
|
+
"DISPLAY_BACKENDS_WITH_WORKER_VIEWS",
|
|
37
58
|
"GHOSTTY_WORKSPACE_PANES_PER_WINDOW",
|
|
59
|
+
"GHOSTTY_DISPLAY_BACKENDS",
|
|
60
|
+
"VALID_DISPLAY_BACKENDS",
|
|
61
|
+
"adaptive_blocked",
|
|
62
|
+
"close_adaptive_display",
|
|
63
|
+
"close_adaptive_windows",
|
|
38
64
|
"close_ghostty_display",
|
|
39
65
|
"close_ghostty_workspace",
|
|
40
66
|
"close_ghostty_workspace_slot",
|
|
67
|
+
"close_team_display_backends",
|
|
41
68
|
"ghostty_app_exists",
|
|
42
69
|
"ghostty_attach_args",
|
|
43
70
|
"ghostty_command",
|
|
@@ -51,11 +78,15 @@ __all__ = [
|
|
|
51
78
|
"ghostty_workspace_window_name",
|
|
52
79
|
"kill_ghostty_workspace_linked_sessions",
|
|
53
80
|
"open_ghostty_worker_window",
|
|
81
|
+
"open_adaptive_display",
|
|
54
82
|
"open_ghostty_workspace",
|
|
55
83
|
"open_ghostty_workspace_agent_display",
|
|
56
84
|
"open_worker_displays",
|
|
85
|
+
"prepare_adaptive_windows",
|
|
57
86
|
"prepare_ghostty_display_session",
|
|
58
87
|
"prepare_ghostty_workspace_aggregator",
|
|
59
88
|
"prepare_ghostty_workspace_linked_sessions",
|
|
89
|
+
"probe_display_capabilities",
|
|
90
|
+
"resolve_display_backend",
|
|
60
91
|
"set_ghostty_workspace_pane_title",
|
|
61
92
|
]
|