@team-agent/installer 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@team-agent/installer",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "npx installer for Team Agent",
5
5
  "keywords": [
6
6
  "codex",
@@ -72,6 +72,12 @@
72
72
  "startup_order": {
73
73
  "type": "array",
74
74
  "items": { "type": "string" }
75
+ },
76
+ "auto_trust_own_workspace": {
77
+ "type": "boolean",
78
+ "default": false,
79
+ "deprecated": true,
80
+ "description": "DEPRECATED: use env TEAM_AGENT_AUTO_TRUST_OWN_WORKSPACE per session. Will be removed in 0.3.0."
75
81
  }
76
82
  }
77
83
  },
@@ -64,7 +64,7 @@ def handle_provider_startup_prompts(workspace: Path, state: dict[str, Any], even
64
64
  continue
65
65
  agent_state["startup_prompt_check_count"] = check_count + 1
66
66
  adapter = get_adapter(agent_state["provider"])
67
- for prompt_event in adapter.handle_startup_prompts(session_name, window, checks=1, sleep_s=0.0):
67
+ for prompt_event in adapter.handle_startup_prompts(session_name, window, checks=20, sleep_s=0.5):
68
68
  event_log.write(
69
69
  "runtime.startup_prompt_handled",
70
70
  agent_id=agent_id,
@@ -88,9 +88,25 @@ def cmd_settle(args: argparse.Namespace) -> dict[str, Any]:
88
88
 
89
89
 
90
90
  def cmd_status(args: argparse.Namespace) -> dict[str, Any]:
91
- if args.json:
92
- return runtime.status(Path(args.workspace).resolve(), as_json=True, compact=not args.detail)
93
- return runtime.format_status(Path(args.workspace).resolve(), args.agent)
91
+ if getattr(args, "summary", False) is True:
92
+ if getattr(args, "json", False) is True:
93
+ raise TeamAgentError("--summary and --json are mutually exclusive")
94
+ if getattr(args, "agent", None):
95
+ raise TeamAgentError("status --summary does not accept an agent argument")
96
+ data = runtime.status(Path(args.workspace).resolve(), as_json=True, compact=False)
97
+ return _format_status_summary(data)
98
+ if getattr(args, "json", False) is True:
99
+ return runtime.status(Path(args.workspace).resolve(), as_json=True, compact=not (getattr(args, "detail", False) is True))
100
+ return runtime.format_status(Path(args.workspace).resolve(), getattr(args, "agent", None))
101
+
102
+
103
+ def cmd_watch(args: argparse.Namespace) -> None:
104
+ from team_agent.watch import run_watch
105
+ try:
106
+ run_watch(Path(args.workspace).resolve(), team=getattr(args, "team", None))
107
+ except KeyboardInterrupt:
108
+ raise SystemExit(0)
109
+ raise SystemExit(0)
94
110
 
95
111
 
96
112
  def cmd_approvals(args: argparse.Namespace) -> dict[str, Any]:
@@ -200,6 +216,14 @@ def cmd_validate_result(args: argparse.Namespace) -> dict[str, Any]:
200
216
 
201
217
 
202
218
  def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
219
+ gate = getattr(args, "gate", None)
220
+ if getattr(args, "fix", False) is True and not gate:
221
+ raise TeamAgentError("--fix requires --gate")
222
+ if isinstance(gate, str) and gate:
223
+ from team_agent.diagnose.orphan_cleanup import orphan_gate
224
+ if gate != "orphans":
225
+ raise TeamAgentError(f"unknown doctor gate: {gate}")
226
+ return orphan_gate(fix=bool(getattr(args, "fix", False)), confirm=bool(getattr(args, "confirm", False)))
203
227
  if getattr(args, "cleanup_orphans", False):
204
228
  from team_agent.diagnose.orphan_cleanup import cleanup_orphan_coordinators, format_cleanup_orphans
205
229
  result = cleanup_orphan_coordinators(confirm=bool(getattr(args, "confirm", False)))
@@ -210,6 +234,83 @@ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
210
234
  return runtime.doctor(spec)
211
235
 
212
236
 
237
+ def _format_status_summary(data: dict[str, Any]) -> str:
238
+ coordinator = data.get("coordinator") or {}
239
+ receiver = data.get("leader_receiver") or {}
240
+ agents = data.get("agents") or {}
241
+ health = data.get("agent_health") or {}
242
+ latest = (data.get("latest_results") or [{}])[0] if data.get("latest_results") else None
243
+ counts = _agent_summary_counts(agents, health)
244
+ agents_line = (
245
+ f"agents: {len(agents)} — running={counts['running']} busy={counts['busy']} "
246
+ f"idle={counts['idle']} stopped={counts['stopped']} failed={counts['failed']} "
247
+ f"unknown={counts['unknown']}"
248
+ )
249
+ # C3 (cr verdict, 2026-05-27): append a (N interacted, M never) marker
250
+ # only when at least one worker has a valid first_send_at stamp. When N
251
+ # is zero, the agents line stays byte-identical to the pre-Route-B
252
+ # output so the Gap 18a triage contract (strict five-line shape with
253
+ # exact line[2] string) remains unchanged.
254
+ interacted_count, never_count = _interaction_counts(agents)
255
+ if interacted_count > 0:
256
+ agents_line = f"{agents_line} ({interacted_count} interacted, {never_count} never)"
257
+ return "\n".join([
258
+ f"coordinator: {coordinator.get('status') or 'stopped'} schema_ok={bool(coordinator.get('schema_ok'))} tmux={bool(data.get('tmux_session_present'))}",
259
+ f"receiver: {receiver.get('pane_id') or '-'} cmd={receiver.get('pane_current_command') or receiver.get('current_command') or '-'}",
260
+ agents_line,
261
+ f"queued: {len(data.get('queued_messages') or [])} mailbox messages awaiting delivery",
262
+ _latest_result_line(latest),
263
+ ])
264
+
265
+
266
+ def _interaction_counts(agents: dict[str, Any]) -> tuple[int, int]:
267
+ """Return (interacted, never_interacted) over the agents dict. An agent is
268
+ interacted when its `interacted` field (added by status.queries.status) is
269
+ a non-empty string other than the literal "never". This intentionally
270
+ sources from the enriched per-status interacted field rather than re-
271
+ parsing first_send_at so the summary stays a derived view."""
272
+ interacted = 0
273
+ never = 0
274
+ for entry in agents.values():
275
+ marker = (entry or {}).get("interacted") if isinstance(entry, dict) else None
276
+ if isinstance(marker, str) and marker and marker != "never":
277
+ interacted += 1
278
+ else:
279
+ never += 1
280
+ return interacted, never
281
+
282
+
283
+ def _agent_summary_counts(agents: dict[str, Any], health: dict[str, Any]) -> dict[str, int]:
284
+ counts = dict.fromkeys(("running", "busy", "idle", "stopped", "failed", "unknown"), 0)
285
+ for agent_id, agent in agents.items():
286
+ raw = str((agent or {}).get("status") or "").lower()
287
+ hstatus = str((health.get(agent_id) or {}).get("status") or "").lower()
288
+ if raw in {"failed", "error"} or hstatus in {"failed", "error"}:
289
+ counts["failed"] += 1
290
+ elif raw in {"stopped", "done"} or hstatus == "done":
291
+ counts["stopped"] += 1
292
+ elif raw == "busy" or hstatus in {"running", "working"}:
293
+ counts["busy"] += 1
294
+ elif hstatus == "idle":
295
+ counts["idle"] += 1
296
+ elif raw in {"blocked", "awaiting_approval", "interrupted", "missing", "stuck", "uncertain"} or hstatus in {
297
+ "blocked", "awaiting_approval", "interrupted", "missing", "stuck", "uncertain"
298
+ }:
299
+ counts["unknown"] += 1
300
+ elif raw == "running":
301
+ counts["running"] += 1
302
+ else:
303
+ counts["unknown"] += 1
304
+ return counts
305
+
306
+
307
+ def _latest_result_line(result: dict[str, Any] | None) -> str:
308
+ if not result:
309
+ return "latest result: none"
310
+ summary = str(result.get("summary") or "").replace("\n", " ")[:80]
311
+ return f"latest result: {result.get('agent_id') or '-'} -> {summary or '-'} @ {runtime._age_text(result.get('created_at'))}"
312
+
313
+
213
314
  def cmd_shutdown(args: argparse.Namespace) -> dict[str, Any]:
214
315
  return runtime.shutdown(Path(args.workspace).resolve(), keep_logs=args.keep_logs, team=args.team)
215
316
 
@@ -24,6 +24,7 @@ from team_agent.cli.commands import (
24
24
  cmd_wait_ready,
25
25
  cmd_settle,
26
26
  cmd_status,
27
+ cmd_watch,
27
28
  cmd_approvals,
28
29
  cmd_peek,
29
30
  cmd_inbox,
@@ -182,9 +183,15 @@ def main(argv: list[str] | None = None) -> None:
182
183
  p.add_argument("agent", nargs="?")
183
184
  p.add_argument("--workspace", default=".")
184
185
  p.add_argument("--detail", action="store_true", help="Include full raw runtime state in --json output")
186
+ p.add_argument("--summary", action="store_true", help="Emit five-line human-readable triage summary")
185
187
  add_json(p)
186
188
  p.set_defaults(func=cmd_status)
187
189
 
190
+ p = sub.add_parser("watch", help="Watch leader-visible team events")
191
+ p.add_argument("--workspace", default=".")
192
+ p.add_argument("--team", help="Explicit team/session selector when a workspace has multiple teams")
193
+ p.set_defaults(func=cmd_watch)
194
+
188
195
  p = sub.add_parser("approvals", help="Show structured pending worker approval prompts")
189
196
  p.add_argument("agent", nargs="?")
190
197
  p.add_argument("--workspace", default=".")
@@ -310,6 +317,8 @@ def main(argv: list[str] | None = None) -> None:
310
317
 
311
318
  p = sub.add_parser("doctor", help="Check local dependencies, providers, auth hints, tmux, and MCP")
312
319
  p.add_argument("spec", nargs="?")
320
+ p.add_argument("--gate", choices=["orphans"], help="Run a CI-friendly doctor gate")
321
+ p.add_argument("--fix", action="store_true", help="With --gate orphans: apply the gate fix")
313
322
  p.add_argument(
314
323
  "--cleanup-orphans",
315
324
  action="store_true",
@@ -461,7 +470,7 @@ def main(argv: list[str] | None = None) -> None:
461
470
  sub._choices_actions = [ # type: ignore[attr-defined]
462
471
  action for action in sub._choices_actions if action.help != argparse.SUPPRESS # type: ignore[attr-defined]
463
472
  ]
464
- sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,takeover,claim-leader,identity,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
473
+ sub.metavar = "{codex,claude,quick-start,send,status,watch,approvals,inbox,takeover,claim-leader,identity,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
465
474
 
466
475
  args = parser.parse_args(raw_argv)
467
476
  try:
@@ -265,6 +265,7 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
265
265
  detect_idle_fallbacks,
266
266
  )
267
267
  from team_agent.messaging.activity_detector import detect_compaction_degradation
268
+ from team_agent.messaging.leader_api_errors import detect_leader_api_errors
268
269
  from team_agent.messaging.session_drift import detect_session_drift
269
270
  from team_agent.state import load_runtime_state, save_runtime_state
270
271
  state = load_runtime_state(workspace)
@@ -318,6 +319,7 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
318
319
  )
319
320
  if drift:
320
321
  drift_results.append(drift)
322
+ api_errors = detect_leader_api_errors(workspace, state, store, event_log)
321
323
  save_runtime_state(workspace, state)
322
324
  results = _collect_results_and_notify_watchers(workspace, event_log)
323
325
  # Stage 12: prune the dedupe log every tick — cheap O(n) delete bounded by 24h window.
@@ -338,5 +340,6 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
338
340
  "deadlock_alerts": deadlock_alerts,
339
341
  "compaction": compaction_results,
340
342
  "session_drift": drift_results,
343
+ "api_errors": api_errors,
341
344
  "results": results,
342
345
  }
@@ -16,7 +16,6 @@ import signal
16
16
  import subprocess
17
17
  import time
18
18
  from datetime import datetime, timezone
19
- from pathlib import Path
20
19
  from typing import Any
21
20
 
22
21
  # Pattern: argv contains "team_agent.coordinator --workspace <path>" anywhere.
@@ -39,6 +38,7 @@ _EPHEMERAL_PATH_HINTS = (
39
38
  "team-agent-test-",
40
39
  )
41
40
  _SIGTERM_WAIT_SECONDS = 3.0
41
+ _SIGKILL_WAIT_SECONDS = 2.0
42
42
 
43
43
 
44
44
  def find_coordinator_processes(*, runner=subprocess.run) -> list[dict[str, Any]]:
@@ -91,7 +91,7 @@ def classify_orphan(entry: dict[str, Any]) -> tuple[bool, str]:
91
91
  workspace = entry.get("workspace")
92
92
  if not workspace:
93
93
  return False, "cmdline_unparsed"
94
- if not Path(workspace).exists():
94
+ if not os.path.exists(workspace):
95
95
  return True, "workspace_path_missing"
96
96
  for hint in _EPHEMERAL_PATH_HINTS:
97
97
  if hint in workspace:
@@ -104,12 +104,35 @@ def cleanup_orphan_coordinators(
104
104
  confirm: bool = False,
105
105
  runner=subprocess.run,
106
106
  killer=os.kill,
107
+ pg_killer=None,
108
+ pgid_getter=None,
107
109
  sleeper=time.sleep,
110
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
111
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
108
112
  ) -> dict[str, Any]:
109
113
  """Scan for orphan coordinators. Without confirm: dry-run (just classify and report).
110
- With confirm: SIGTERM each orphan and wait up to _SIGTERM_WAIT_SECONDS for the
111
- process to exit; report success/failure per pid."""
114
+ With confirm: SIGTERM each orphan, wait up to _SIGTERM_WAIT_SECONDS for graceful
115
+ exit; if still alive, escalate to SIGKILL and wait _SIGKILL_WAIT_SECONDS. Only
116
+ report status='failed' (with error='alive_after_sigkill') when the process
117
+ survives BOTH signals — that's extremely rare and almost always indicates a
118
+ zombie/uninterruptible-sleep kernel state.
119
+
120
+ Mac mini 2026-05-26 evidence: real orphan coordinators have been observed alive
121
+ 40+ hours; many of them never exit on SIGTERM (signal handler suppressed during
122
+ long sqlite reads, or the python interpreter is hosting an async loop that
123
+ swallows the term signal). SIGKILL escalation is required for production.
124
+
125
+ pg_killer / pgid_getter default to os.killpg / os.getpgid; mock them in tests.
126
+ If pgid_getter succeeds AND returns a pgid > 1 AND the pgid != pid (i.e. the
127
+ process leads its own process group with children), we signal the WHOLE group;
128
+ otherwise we signal the pid directly. This catches orphan coordinators that
129
+ spawned subprocess.Popen children which would otherwise survive a pid-only
130
+ SIGTERM."""
112
131
  now = datetime.now(timezone.utc).isoformat()
132
+ if pg_killer is None:
133
+ pg_killer = getattr(os, "killpg", None)
134
+ if pgid_getter is None:
135
+ pgid_getter = getattr(os, "getpgid", None)
113
136
  entries = find_coordinator_processes(runner=runner)
114
137
  classified: list[dict[str, Any]] = []
115
138
  orphans: list[dict[str, Any]] = []
@@ -131,30 +154,19 @@ def cleanup_orphan_coordinators(
131
154
  killed: list[dict[str, Any]] = []
132
155
  failed: list[dict[str, Any]] = []
133
156
  for entry in orphans:
134
- pid = entry["pid"]
135
- try:
136
- killer(pid, signal.SIGTERM)
137
- except (ProcessLookupError, PermissionError, OSError) as exc:
138
- failed.append({**entry, "error": str(exc)})
139
- continue
140
- # Wait briefly; if the process is still alive after _SIGTERM_WAIT_SECONDS,
141
- # mark as failed (caller may want to SIGKILL).
142
- deadline = time.monotonic() + _SIGTERM_WAIT_SECONDS
143
- gone = False
144
- while time.monotonic() < deadline:
145
- try:
146
- killer(pid, 0)
147
- except ProcessLookupError:
148
- gone = True
149
- break
150
- except (PermissionError, OSError):
151
- gone = True
152
- break
153
- sleeper(0.1)
154
- if gone:
155
- killed.append(entry)
157
+ outcome = _terminate_orphan(
158
+ entry["pid"], killer=killer, pg_killer=pg_killer,
159
+ pgid_getter=pgid_getter, sleeper=sleeper,
160
+ sigterm_wait_seconds=sigterm_wait_seconds,
161
+ sigkill_wait_seconds=sigkill_wait_seconds,
162
+ )
163
+ annotated = {**entry, **outcome}
164
+ if outcome.get("status") == "killed":
165
+ killed.append(annotated)
166
+ elif outcome.get("status") == "missing":
167
+ killed.append(annotated)
156
168
  else:
157
- failed.append({**entry, "error": "still_alive_after_sigterm"})
169
+ failed.append(annotated)
158
170
  return {
159
171
  "ok": True,
160
172
  "scanned": len(classified),
@@ -166,6 +178,162 @@ def cleanup_orphan_coordinators(
166
178
  }
167
179
 
168
180
 
181
+ def _terminate_orphan(
182
+ pid: int,
183
+ *,
184
+ killer,
185
+ pg_killer,
186
+ pgid_getter,
187
+ sleeper,
188
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
189
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
190
+ ) -> dict[str, Any]:
191
+ """SIGTERM → wait 3s → SIGKILL → wait 2s escalation. Returns one of:
192
+ {status: 'killed', sigkill_required: False, signaled: 'pid'|'pgid'}
193
+ {status: 'killed', sigkill_required: True, signaled: 'pid'|'pgid'}
194
+ {status: 'missing', error: '<exc>'} — process gone before SIGTERM
195
+ {status: 'failed', error: 'alive_after_sigkill'} — process survived both
196
+ {status: 'failed', error: '<exc>'} — permission denied / OS error
197
+ """
198
+ pgid, pgid_error = _safe_getpgid(pid, pgid_getter)
199
+ use_group = bool(pg_killer and pgid is not None and pgid > 1 and pgid != pid)
200
+ signaled = "pgid" if use_group else "pid"
201
+
202
+ def send(sig: int) -> tuple[bool, str | None]:
203
+ try:
204
+ if use_group:
205
+ pg_killer(pgid, sig)
206
+ else:
207
+ killer(pid, sig)
208
+ except ProcessLookupError:
209
+ return False, "process_lookup_error"
210
+ except (PermissionError, OSError) as exc:
211
+ return False, str(exc)
212
+ return True, None
213
+
214
+ ok, err = send(signal.SIGTERM)
215
+ if not ok:
216
+ if err == "process_lookup_error":
217
+ return {"status": "missing", "signaled": signaled, "pgid": pgid}
218
+ return {"status": "failed", "error": err, "signaled": signaled, "pgid": pgid}
219
+ if _wait_for_exit(pid, sigterm_wait_seconds, killer=killer, sleeper=sleeper):
220
+ return {
221
+ "status": "killed",
222
+ "sigkill_required": False,
223
+ "signaled": signaled,
224
+ "pgid": pgid,
225
+ "pgid_error": pgid_error,
226
+ }
227
+ # SIGTERM did not work — escalate.
228
+ ok, err = send(signal.SIGKILL)
229
+ if not ok:
230
+ if err == "process_lookup_error":
231
+ # Race: died between checks.
232
+ return {
233
+ "status": "killed",
234
+ "sigkill_required": False,
235
+ "signaled": signaled,
236
+ "pgid": pgid,
237
+ "pgid_error": pgid_error,
238
+ }
239
+ return {
240
+ "status": "failed",
241
+ "error": err,
242
+ "signaled": signaled,
243
+ "pgid": pgid,
244
+ "sigkill_attempted": True,
245
+ }
246
+ if _wait_for_exit(pid, sigkill_wait_seconds, killer=killer, sleeper=sleeper):
247
+ return {
248
+ "status": "killed",
249
+ "sigkill_required": True,
250
+ "signaled": signaled,
251
+ "pgid": pgid,
252
+ "pgid_error": pgid_error,
253
+ }
254
+ return {
255
+ "status": "failed",
256
+ "error": "alive_after_sigkill",
257
+ "signaled": signaled,
258
+ "pgid": pgid,
259
+ "sigkill_required": True,
260
+ }
261
+
262
+
263
+ def _safe_getpgid(pid: int, pgid_getter) -> tuple[int | None, str | None]:
264
+ if pgid_getter is None:
265
+ return None, "getpgid_unavailable"
266
+ try:
267
+ return pgid_getter(pid), None
268
+ except (ProcessLookupError, PermissionError, OSError) as exc:
269
+ return None, str(exc)
270
+
271
+
272
+ def _wait_for_exit(pid: int, timeout: float, *, killer, sleeper) -> bool:
273
+ deadline = time.monotonic() + max(timeout, 0.0)
274
+ while time.monotonic() < deadline:
275
+ try:
276
+ killer(pid, 0)
277
+ except ProcessLookupError:
278
+ return True
279
+ except (PermissionError, OSError):
280
+ return True
281
+ sleeper(0.1)
282
+ # Final check after the deadline elapses.
283
+ try:
284
+ killer(pid, 0)
285
+ except ProcessLookupError:
286
+ return True
287
+ except (PermissionError, OSError):
288
+ return True
289
+ return False
290
+
291
+
292
+ def orphan_gate(
293
+ *,
294
+ fix: bool = False,
295
+ confirm: bool = False,
296
+ runner=subprocess.run,
297
+ killer=os.kill,
298
+ pg_killer=None,
299
+ pgid_getter=None,
300
+ sleeper=time.sleep,
301
+ sigterm_wait_seconds: float = _SIGTERM_WAIT_SECONDS,
302
+ sigkill_wait_seconds: float = _SIGKILL_WAIT_SECONDS,
303
+ ) -> dict[str, Any]:
304
+ if fix and not confirm:
305
+ return {
306
+ "ok": False,
307
+ "gate": "orphans",
308
+ "status": "refused",
309
+ "reason": "fix_requires_confirm",
310
+ "action": "re-run with --gate orphans --fix --confirm",
311
+ }
312
+ result = cleanup_orphan_coordinators(
313
+ confirm=fix and confirm,
314
+ runner=runner,
315
+ killer=killer,
316
+ pg_killer=pg_killer,
317
+ pgid_getter=pgid_getter,
318
+ sleeper=sleeper,
319
+ sigterm_wait_seconds=sigterm_wait_seconds,
320
+ sigkill_wait_seconds=sigkill_wait_seconds,
321
+ )
322
+ orphans = result.get("orphans") or []
323
+ failed = result.get("failed") or []
324
+ passed = not orphans if not fix else not failed
325
+ envelope = {
326
+ **result,
327
+ "ok": passed,
328
+ "gate": "orphans",
329
+ "status": "passed" if passed else "failed",
330
+ "fix": bool(fix),
331
+ }
332
+ if not fix and orphans:
333
+ envelope["action_required"] = "re-run with --gate orphans --fix --confirm"
334
+ return envelope
335
+
336
+
169
337
  def format_cleanup_orphans(result: dict[str, Any]) -> str:
170
338
  lines = [
171
339
  f"Coordinator orphan scan @ {result.get('scanned_at')}",
@@ -175,7 +343,9 @@ def format_cleanup_orphans(result: dict[str, Any]) -> str:
175
343
  if result.get("dry_run"):
176
344
  lines.append(" mode: DRY-RUN (no SIGTERM sent; re-run with --confirm)")
177
345
  else:
178
- lines.append(f" killed: {len(result.get('killed') or [])}")
346
+ killed_entries = result.get("killed") or []
347
+ escalated = sum(1 for k in killed_entries if k.get("sigkill_required"))
348
+ lines.append(f" killed: {len(killed_entries)} (sigkill_required: {escalated})")
179
349
  lines.append(f" failed: {len(result.get('failed') or [])}")
180
350
  for orphan in result.get("orphans") or []:
181
351
  lines.append(
@@ -190,4 +360,5 @@ __all__ = [
190
360
  "classify_orphan",
191
361
  "find_coordinator_processes",
192
362
  "format_cleanup_orphans",
363
+ "orphan_gate",
193
364
  ]
@@ -215,7 +215,7 @@ def launch(
215
215
  stdout=proc.stdout,
216
216
  )
217
217
  raise RuntimeError(f"Failed to start agent {agent['id']}: {proc.stderr.strip()}")
218
- handled_prompts = adapter.handle_startup_prompts(session_name, agent["id"], checks=1, sleep_s=0.0)
218
+ handled_prompts = adapter.handle_startup_prompts(session_name, agent["id"], checks=20, sleep_s=0.5)
219
219
  for prompt_event in handled_prompts:
220
220
  event_log.write(
221
221
  "launch.startup_prompt_handled",
@@ -261,6 +261,7 @@ def launch(
261
261
  event_log,
262
262
  timeout_s=1.5,
263
263
  exclude_session_ids=known_session_ids,
264
+ raise_on_missed=False,
264
265
  )
265
266
  if state.get("display_backend") in GHOSTTY_DISPLAY_BACKENDS:
266
267
  display_jobs.append((agent["id"], agent))
@@ -358,6 +358,7 @@ def fork_agent(
358
358
  event_log,
359
359
  timeout_s=1.5,
360
360
  exclude_session_ids={source_session_id},
361
+ raise_on_missed=False,
361
362
  )
362
363
  if open_display and state.get("display_backend") in {"ghostty", "ghostty_window"}:
363
364
  agent_state["display"] = _open_ghostty_worker_window(workspace, session_name, as_agent_id, new_agent, event_log)
@@ -319,7 +319,7 @@ def _start_agent_unlocked(workspace: Path, agent_id: str, force: bool, open_disp
319
319
  _clear_session_capture_fields(agent_state)
320
320
  if command_agent.get("_session_id"):
321
321
  agent_state["_pending_session_id"] = command_agent["_session_id"]
322
- _capture_agent_session(workspace, agent_id, agent_state, event_log, timeout_s=1.5, exclude_session_ids=known_session_ids)
322
+ _capture_agent_session(workspace, agent_id, agent_state, event_log, timeout_s=1.5, exclude_session_ids=known_session_ids, raise_on_missed=False)
323
323
  if open_display and state.get("display_backend") in {"ghostty", "ghostty_window"}:
324
324
  agent_state["display"] = _open_ghostty_worker_window(workspace, session_name, agent_id, agent, event_log)
325
325
  elif open_display and state.get("display_backend") == "ghostty_workspace":
@@ -331,17 +331,17 @@ class MessageStore:
331
331
  return counts
332
332
 
333
333
  def add_result(self, envelope: dict[str, Any], owner_team_id: str | None = None) -> str:
334
- _ = owner_team_id
335
334
  validate_result_envelope(envelope)
336
335
  result_id = f"res_{uuid.uuid4().hex[:12]}"
337
336
  with closing(self.connect()) as conn:
338
337
  with conn:
339
338
  conn.execute(
340
339
  """
341
- insert into results(result_id, task_id, agent_id, envelope, status, created_at)
342
- values (?, ?, ?, ?, ?, ?)
340
+ insert into results(owner_team_id, result_id, task_id, agent_id, envelope, status, created_at)
341
+ values (?, ?, ?, ?, ?, ?, ?)
343
342
  """,
344
343
  (
344
+ owner_team_id,
345
345
  result_id,
346
346
  envelope["task_id"],
347
347
  envelope["agent_id"],
@@ -423,16 +423,17 @@ class MessageStore:
423
423
  return dict(row) if row else None
424
424
 
425
425
  def latest_results(self, limit: int = 5, owner_team_id: str | None = None) -> list[dict[str, Any]]:
426
- _ = owner_team_id
426
+ owner_clause = "and owner_team_id = ?" if owner_team_id else ""
427
+ args: tuple[Any, ...] = (owner_team_id, limit) if owner_team_id else (limit,)
427
428
  with closing(self.connect()) as conn:
428
429
  rows = conn.execute(
429
- """
430
+ f"""
430
431
  select * from results
431
- where status != 'invalid'
432
+ where status != 'invalid' {owner_clause}
432
433
  order by created_at desc
433
434
  limit ?
434
435
  """,
435
- (limit,),
436
+ args,
436
437
  ).fetchall()
437
438
  return [dict(row) for row in reversed(rows)]
438
439
 
@@ -22,7 +22,7 @@ MESSAGE_COLUMNS = {
22
22
  "error",
23
23
  "delivery_attempts",
24
24
  }
25
- RESULT_COLUMNS = {"result_id", "task_id", "agent_id", "envelope", "status", "created_at"}
25
+ RESULT_COLUMNS = {"owner_team_id", "result_id", "task_id", "agent_id", "envelope", "status", "created_at"}
26
26
  SCHEDULED_EVENT_COLUMNS = {
27
27
  "id",
28
28
  "owner_team_id",
@@ -125,6 +125,7 @@ def initialize_schema(conn: sqlite3.Connection) -> None:
125
125
  """
126
126
  create table if not exists results (
127
127
  result_id text primary key,
128
+ owner_team_id text,
128
129
  task_id text not null,
129
130
  agent_id text not null,
130
131
  envelope text not null,
@@ -215,7 +216,12 @@ def initialize_schema(conn: sqlite3.Connection) -> None:
215
216
  "owner_team_id": "alter table messages add column owner_team_id text",
216
217
  },
217
218
  )
218
- _ensure_table_columns(conn, "results", RESULT_COLUMNS)
219
+ _ensure_table_columns(
220
+ conn,
221
+ "results",
222
+ RESULT_COLUMNS,
223
+ {"owner_team_id": "alter table results add column owner_team_id text"},
224
+ )
219
225
  _ensure_table_columns(
220
226
  conn,
221
227
  "scheduled_events",