@team-agent/installer 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/schemas/team.schema.json +6 -0
  3. package/src/team_agent/approvals/runtime_prompts.py +1 -1
  4. package/src/team_agent/cli/commands.py +122 -6
  5. package/src/team_agent/cli/parser.py +42 -1
  6. package/src/team_agent/coordinator/__main__.py +21 -2
  7. package/src/team_agent/coordinator/lifecycle.py +11 -0
  8. package/src/team_agent/diagnose/orphan_cleanup.py +364 -0
  9. package/src/team_agent/events.py +47 -0
  10. package/src/team_agent/launch/core.py +2 -1
  11. package/src/team_agent/leader/__init__.py +273 -60
  12. package/src/team_agent/lifecycle/agents.py +54 -2
  13. package/src/team_agent/lifecycle/operations.py +87 -9
  14. package/src/team_agent/lifecycle/start.py +1 -1
  15. package/src/team_agent/message_store/core.py +8 -7
  16. package/src/team_agent/message_store/leader_notification_log.py +132 -0
  17. package/src/team_agent/message_store/result_watchers.py +144 -1
  18. package/src/team_agent/message_store/schema.py +31 -2
  19. package/src/team_agent/messaging/delivery.py +293 -1
  20. package/src/team_agent/messaging/idle_alerts.py +109 -9
  21. package/src/team_agent/messaging/leader.py +179 -10
  22. package/src/team_agent/messaging/leader_api_errors.py +216 -0
  23. package/src/team_agent/messaging/leader_panes.py +393 -23
  24. package/src/team_agent/messaging/result_delivery.py +219 -4
  25. package/src/team_agent/messaging/results.py +12 -21
  26. package/src/team_agent/messaging/scheduler.py +24 -2
  27. package/src/team_agent/messaging/send.py +21 -26
  28. package/src/team_agent/messaging/tmux_io.py +153 -23
  29. package/src/team_agent/messaging/tmux_prompt.py +87 -0
  30. package/src/team_agent/messaging/trust_auto_answer.py +44 -0
  31. package/src/team_agent/restart/orchestration.py +207 -4
  32. package/src/team_agent/runtime.py +7 -7
  33. package/src/team_agent/rust_core.py +157 -3
  34. package/src/team_agent/sessions/capture.py +65 -15
  35. package/src/team_agent/spec.py +59 -0
  36. package/src/team_agent/state.py +153 -10
  37. package/src/team_agent/status/inbox.py +33 -3
  38. package/src/team_agent/status/queries.py +32 -1
  39. package/src/team_agent/watch/__init__.py +145 -0
@@ -20,6 +20,7 @@ from team_agent.messaging.deps import (
20
20
 
21
21
  from pathlib import Path
22
22
  from typing import Any
23
+ from team_agent.messaging.tmux_prompt import detect_non_input_scrollback, non_input_scrollback_window
23
24
 
24
25
  def _tmux_inject_text(
25
26
  target: str,
@@ -28,6 +29,8 @@ def _tmux_inject_text(
28
29
  buffer_name: str,
29
30
  attempts: int = 3,
30
31
  provider: str = "fake",
32
+ *,
33
+ bypass_non_input_gate: bool = False,
31
34
  ) -> dict[str, Any]:
32
35
  token_match = re.search(r"\[team-agent-token:([^\]]+)\]", text)
33
36
  token = token_match.group(1) if token_match else ""
@@ -37,15 +40,25 @@ def _tmux_inject_text(
37
40
  submit_settle_timeout = _tmux_submit_settle_timeout(text)
38
41
  text_bytes = _tmux_text_size(text)
39
42
  for attempt in range(1, max(attempts, 1) + 1):
40
- prepared = _prepare_tmux_pane_for_input(target)
43
+ prepared = (
44
+ {"ok": True, "verification": "non_input_gate_bypassed"}
45
+ if bypass_non_input_gate
46
+ else _prepare_tmux_pane_for_input(target)
47
+ )
41
48
  if not prepared["ok"]:
42
- attempt_log.append({"attempt": attempt, "visible": False, "verification": prepared["verification"]})
49
+ attempt_log.append(_prepare_failure_attempt(attempt, prepared))
43
50
  return {
44
51
  "ok": False,
52
+ "status": "failed",
45
53
  "stage": prepared["stage"],
54
+ "reason": prepared.get("reason"),
46
55
  "error": prepared.get("error"),
47
56
  "attempts": attempt_log,
48
57
  "verification": prepared["verification"],
58
+ "detected": prepared.get("detected"),
59
+ "pane_id": prepared.get("pane_id"),
60
+ "pane_mode": prepared.get("pane_mode"),
61
+ "pane_capture_tail": prepared.get("pane_capture_tail"),
49
62
  }
50
63
  baseline = _capture_tmux_pane_text(target)
51
64
  if not baseline["ok"]:
@@ -97,6 +110,9 @@ def _tmux_inject_text(
97
110
  attempt_entry["buffer_delete_error"] = deleted.get("error")
98
111
  if prepared.get("recovered_from_mode"):
99
112
  attempt_entry["recovered_from_mode"] = True
113
+ attempt_entry["recovered_from_pane_mode"] = prepared.get("pane_mode")
114
+ if prepared.get("warning_event"):
115
+ attempt_entry["warning_event"] = prepared["warning_event"]
100
116
  attempt_log.append(attempt_entry)
101
117
  if not visible:
102
118
  time.sleep(0.2)
@@ -276,50 +292,164 @@ def _tmux_load_buffer_stdin(buffer_name: str, text: str) -> subprocess.Completed
276
292
 
277
293
 
278
294
  def _prepare_tmux_pane_for_input(target: str) -> dict[str, Any]:
279
- mode = run_cmd(["tmux", "display-message", "-p", "-t", target, "#{pane_in_mode}"], timeout=5)
280
- if mode.returncode != 0:
295
+ mode_result = _pane_mode(target)
296
+ if not mode_result["ok"]:
281
297
  return {
282
298
  "ok": False,
283
299
  "stage": "pane-mode-check",
284
300
  "verification": "pane_mode_check_failed",
285
- "error": mode.stderr.strip() or "tmux pane mode check failed",
301
+ "error": mode_result.get("error") or "tmux pane mode check failed",
286
302
  }
287
- if mode.stdout.strip() != "1":
288
- return {"ok": True, "verification": "pane_input_ready"}
289
- cancel = run_cmd(["tmux", "send-keys", "-t", target, "-X", "cancel"], timeout=10)
290
- if cancel.returncode != 0:
303
+ capture_result = _pane_capture_tail(target, lines=30)
304
+ if not capture_result["ok"]:
291
305
  return {
292
306
  "ok": False,
293
- "stage": "pane-mode-cancel",
294
- "verification": "pane_mode_cancel_failed",
295
- "error": cancel.stderr.strip() or "tmux copy-mode cancel failed",
307
+ "stage": "pane-tail-capture",
308
+ "verification": "pane_tail_capture_failed",
309
+ "error": capture_result.get("error") or "tmux capture-pane failed",
296
310
  }
311
+ pane_mode = _normalize_pane_mode(mode_result.get("pane_mode"))
312
+ capture_tail = str(capture_result.get("capture") or "")
313
+ detected = detect_non_input_scrollback(capture_tail)
314
+ if detected:
315
+ return _non_input_refusal(target, pane_mode, capture_tail, detected)
316
+ if not pane_mode:
317
+ return {"ok": True, "verification": "pane_input_ready"}
318
+ cancel = _pane_mode_cancel(target, pane_mode)
319
+ if not cancel["ok"]:
320
+ return _non_input_refusal(
321
+ target,
322
+ pane_mode,
323
+ capture_tail,
324
+ f"tmux_{pane_mode}",
325
+ error=cancel.get("error") or "tmux pane mode cancel failed",
326
+ verification="pane_mode_cancel_failed",
327
+ warning_event=cancel.get("warning_event"),
328
+ )
329
+ warning_event = cancel.get("warning_event")
297
330
  deadline = time.monotonic() + 1.5
298
331
  while True:
299
- check = run_cmd(["tmux", "display-message", "-p", "-t", target, "#{pane_in_mode}"], timeout=5)
300
- if check.returncode != 0:
332
+ check = _pane_mode(target)
333
+ if not check["ok"]:
301
334
  return {
302
335
  "ok": False,
303
336
  "stage": "pane-mode-check",
304
337
  "verification": "pane_mode_recheck_failed",
305
- "error": check.stderr.strip() or "tmux pane mode recheck failed",
338
+ "error": check.get("error") or "tmux pane mode recheck failed",
306
339
  }
307
- if check.stdout.strip() != "1":
308
- return {"ok": True, "verification": "pane_input_ready_after_mode_cancel", "recovered_from_mode": True}
309
- if time.monotonic() >= deadline:
310
- return {
311
- "ok": False,
312
- "stage": "pane-mode-cancel",
313
- "verification": "pane_mode_still_active_after_cancel",
314
- "error": "tmux pane stayed in copy-mode after cancel",
340
+ if not _normalize_pane_mode(check.get("pane_mode")):
341
+ result = {
342
+ "ok": True,
343
+ "verification": "pane_input_ready_after_mode_cancel",
344
+ "recovered_from_mode": True,
345
+ "pane_mode": pane_mode,
315
346
  }
347
+ if warning_event:
348
+ result["warning_event"] = warning_event
349
+ return result
350
+ if time.monotonic() >= deadline:
351
+ return _non_input_refusal(
352
+ target,
353
+ pane_mode,
354
+ capture_tail,
355
+ f"tmux_{pane_mode}",
356
+ error=f"tmux pane stayed in {pane_mode} after cancel",
357
+ verification="pane_mode_still_active_after_cancel",
358
+ warning_event=warning_event,
359
+ )
316
360
  time.sleep(0.1)
317
361
 
318
362
 
363
+ def _pane_mode(target: str) -> dict[str, Any]:
364
+ proc = run_cmd(["tmux", "display-message", "-p", "-t", target, "#{pane_mode}"], timeout=5)
365
+ if proc.returncode != 0:
366
+ return {"ok": False, "error": proc.stderr.strip() or "tmux pane mode check failed"}
367
+ return {"ok": True, "pane_mode": proc.stdout.strip()}
368
+
369
+
370
+ def _pane_capture_tail(target: str, lines: int = 30) -> dict[str, Any]:
371
+ capture = run_cmd(["tmux", "capture-pane", "-p", "-S", f"-{lines}", "-t", target], timeout=5)
372
+ if capture.returncode != 0:
373
+ return {"ok": False, "capture": "", "error": capture.stderr.strip() or "tmux capture-pane failed"}
374
+ return {"ok": True, "capture": capture.stdout}
375
+
376
+
377
+ def _pane_mode_cancel(target: str, pane_mode: str) -> dict[str, Any]:
378
+ mode = _normalize_pane_mode(pane_mode)
379
+ warning_event = None
380
+ if mode == "copy-mode":
381
+ args = ["tmux", "send-keys", "-t", target, "-X", "cancel"]
382
+ elif mode in {"tree-mode", "view-mode"}:
383
+ args = ["tmux", "send-keys", "-t", target, "q"]
384
+ elif mode == "client-mode":
385
+ args = ["tmux", "send-keys", "-t", target, "d"]
386
+ else:
387
+ args = ["tmux", "send-keys", "-t", target, "-X", "cancel"]
388
+ warning_event = "pane_mode_unknown_cancel_attempted"
389
+ cancel = run_cmd(args, timeout=10)
390
+ if cancel.returncode != 0:
391
+ return {
392
+ "ok": False,
393
+ "error": cancel.stderr.strip() or f"tmux {mode or 'unknown'} cancel failed",
394
+ "warning_event": warning_event,
395
+ }
396
+ result = {"ok": True, "mode": mode, "args": args}
397
+ if warning_event:
398
+ result["warning_event"] = warning_event
399
+ return result
400
+
401
+
402
+ def _normalize_pane_mode(mode: Any) -> str:
403
+ value = str(mode or "").strip()
404
+ if value == "0":
405
+ return ""
406
+ if value == "1":
407
+ return "copy-mode"
408
+ return value
409
+
410
+
411
+ def _non_input_refusal(
412
+ target: str,
413
+ pane_mode: str,
414
+ capture_tail: str,
415
+ detected: str,
416
+ *,
417
+ error: str | None = None,
418
+ verification: str = "recipient_pane_in_non_input_mode",
419
+ warning_event: str | None = None,
420
+ ) -> dict[str, Any]:
421
+ result = {
422
+ "ok": False,
423
+ "status": "failed",
424
+ "stage": "pre-paste-pane-state",
425
+ "reason": "recipient_pane_in_non_input_mode",
426
+ "error": error or "recipient_pane_in_non_input_mode",
427
+ "verification": verification,
428
+ "detected": detected,
429
+ "pane_id": target,
430
+ "pane_mode": pane_mode,
431
+ "pane_capture_tail": non_input_scrollback_window(capture_tail) or _last_lines(capture_tail, 10),
432
+ }
433
+ if warning_event:
434
+ result["warning_event"] = warning_event
435
+ return result
319
436
 
320
437
 
438
+ def _prepare_failure_attempt(attempt: int, prepared: dict[str, Any]) -> dict[str, Any]:
439
+ entry = {
440
+ "attempt": attempt,
441
+ "visible": False,
442
+ "verification": prepared["verification"],
443
+ }
444
+ for key in ("reason", "detected", "pane_id", "pane_mode", "pane_capture_tail", "warning_event"):
445
+ if key in prepared:
446
+ entry[key] = prepared[key]
447
+ return entry
321
448
 
322
449
 
450
+ def _last_lines(text: str, count: int) -> str:
451
+ lines = text.splitlines()
452
+ return "\n".join(lines[-count:])
323
453
 
324
454
 
325
455
 
@@ -12,6 +12,93 @@ from team_agent.messaging.deps import (
12
12
  from pathlib import Path
13
13
  from typing import Any
14
14
 
15
+
16
+ _ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
17
+
18
+
19
+ def detect_non_input_scrollback(capture_tail: str) -> str | None:
20
+ nonempty = _non_input_scrollback_lines(capture_tail)
21
+ tail_text = "\n".join(nonempty)
22
+ lower = tail_text.lower()
23
+ stale_before_input = _stale_non_input_before_ready_prompt(nonempty)
24
+ if re.search(r"do\s+you\s+trust\s+the\s+contents\s+of\s+this\s+directory", lower):
25
+ if stale_before_input:
26
+ return None
27
+ return "codex_trust_prompt"
28
+ if "press enter to log in" in lower or "press enter to login" in lower:
29
+ if stale_before_input:
30
+ return None
31
+ return "codex_first_run_auth"
32
+ if "capability may degrade" in lower:
33
+ if stale_before_input:
34
+ return None
35
+ return "codex_compaction_warning"
36
+ if re.search(r"press\s+(enter|return)\s+to\s+continue", lower):
37
+ if stale_before_input:
38
+ return None
39
+ return "generic_press_enter"
40
+ if re.search(r"press\s+any\s+key", lower):
41
+ if stale_before_input:
42
+ return None
43
+ return "generic_press_enter"
44
+ if re.search(r"(\(y/n\)|\([yY]/n\)|\[y/N\]|\[Y/n\]|\[y/n\])", tail_text):
45
+ if stale_before_input:
46
+ return None
47
+ return "y_n_confirm"
48
+ for first, second in zip(nonempty, nonempty[1:]):
49
+ if _starts_numbered_choice(first, "1") and _starts_numbered_choice(second, "2"):
50
+ if stale_before_input:
51
+ return None
52
+ return "numbered_menu"
53
+ if nonempty:
54
+ last = nonempty[-1]
55
+ if re.search(r"(^|[\s~/.\w-])[$%]\s*$", last):
56
+ return "shell_prompt_cli_dead"
57
+ return None
58
+
59
+
60
+ def non_input_scrollback_window(capture_tail: str, limit: int = 15) -> str:
61
+ return "\n".join(_non_input_scrollback_lines(capture_tail, limit=limit))
62
+
63
+
64
+ def _non_input_scrollback_lines(capture_tail: str, limit: int = 15) -> list[str]:
65
+ lines = [_ANSI_ESCAPE_RE.sub("", line).rstrip() for line in capture_tail.splitlines()]
66
+ while lines and not lines[-1].strip():
67
+ lines.pop()
68
+ return [line for line in lines if line.strip()][-limit:]
69
+
70
+
71
+ def _starts_numbered_choice(line: str, number: str) -> bool:
72
+ return bool(re.match(rf"^\s*(?:[›❯>]\s*)?{number}\.\s+", line))
73
+
74
+
75
+ def _stale_non_input_before_ready_prompt(lines: list[str]) -> bool:
76
+ latest_non_input = -1
77
+ latest_ready = -1
78
+ for index, line in enumerate(lines):
79
+ lower = line.lower()
80
+ if (
81
+ "do you trust the contents of this directory" in lower
82
+ or re.search(r"press\s+(enter|return)\s+to\s+continue", lower)
83
+ or re.search(r"press\s+any\s+key", lower)
84
+ or _starts_numbered_choice(line, "1")
85
+ or _starts_numbered_choice(line, "2")
86
+ ):
87
+ latest_non_input = index
88
+ if _is_input_ready_prompt(line):
89
+ latest_ready = index
90
+ return latest_non_input >= 0 and latest_ready > latest_non_input
91
+
92
+
93
+ def _is_input_ready_prompt(line: str) -> bool:
94
+ if _starts_numbered_choice(line, "1") or _starts_numbered_choice(line, "2"):
95
+ return False
96
+ value = line.strip()
97
+ if re.match(r"^[›❯>]\s+\S", value):
98
+ return True
99
+ return bool(re.search(r"\b(codex|claude)\s*[>›❯]\s*$", value, re.IGNORECASE))
100
+
101
+
15
102
  def _enable_codex_fast_mode(session_name: str, window_name: str) -> dict[str, Any]:
16
103
  target = f"{session_name}:{window_name}"
17
104
  proc = run_cmd(["tmux", "send-keys", "-t", target, "/fast", "Enter"], timeout=10)
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from team_agent.events import EventLog
7
+ from team_agent.messaging.deps import _tmux_inject_text
8
+
9
+
10
+ def retry_injection_after_trust_auto_answer(
11
+ workspace: Path,
12
+ state: dict[str, Any],
13
+ event_log: EventLog,
14
+ injection: dict[str, Any],
15
+ target: str,
16
+ text: str,
17
+ submit_key: str,
18
+ buffer_name: str,
19
+ provider: str,
20
+ ) -> dict[str, Any]:
21
+ from team_agent.messaging.delivery import _wait_for_trust_prompt_dismissal
22
+ from team_agent.messaging.leader_panes import attempt_trust_auto_answer
23
+ answer = attempt_trust_auto_answer(
24
+ workspace,
25
+ injection.get("pane_id") or target,
26
+ injection.get("pane_capture_tail") or "",
27
+ event_log,
28
+ state=state,
29
+ )
30
+ if not answer.get("answered"):
31
+ return injection
32
+ if not _wait_for_trust_prompt_dismissal(injection.get("pane_id") or target, timeout=3.0):
33
+ retry_blocked = dict(injection)
34
+ retry_blocked["error"] = "trust_prompt_not_dismissed_after_answer"
35
+ retry_blocked["verification"] = "trust_prompt_not_dismissed_after_answer"
36
+ retry_blocked["stage"] = "trust_auto_answer_dismissal_wait"
37
+ return retry_blocked
38
+ return _tmux_inject_text(
39
+ target,
40
+ text,
41
+ submit_key,
42
+ buffer_name,
43
+ provider=provider,
44
+ )
@@ -84,15 +84,72 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
84
84
  raise RuntimeError(_tmux_session_conflict_error(session_name))
85
85
  runtime_cfg = _effective_runtime_config(spec.get("runtime", {}))
86
86
  display_backend = spec.get("runtime", {}).get("display_backend", state.get("display_backend", "none"))
87
- _close_ghostty_workspace(state, event_log)
88
- for agent_id, agent_state in state.get("agents", {}).items():
89
- _close_ghostty_display(agent_id, agent_state, event_log)
90
- state["display_backend"] = display_backend
87
+ # Stage 7 S5 — Slice 6 lifecycle atomicity contract: compute restart_agents
88
+ # early so we can pre-validate resumability BEFORE any destructive teardown
89
+ # (ghostty close, tmux session creation). Without --allow-fresh, every
90
+ # non-paused worker MUST be resumable; if any is not, refuse the operation
91
+ # atomically with a structured result and a restart.atomic_refusal event.
92
+ # No rollback path is needed because nothing has been created yet.
91
93
  restart_agents = [
92
94
  agent
93
95
  for agent in spec.get("agents", [])
94
96
  if state.get("agents", {}).get(agent["id"], {}).get("status") != "paused" and not agent.get("paused")
95
97
  ]
98
+ # cr strict-typing (2026-05-27): refuse the operation deterministically
99
+ # before any decision logic if any persisted first_send_at is corrupt
100
+ # (empty string, 0, False, literal "null", any non-ISO garbage). This
101
+ # avoids silent misclassification through Python truthiness and gives the
102
+ # operator a clear audit signal that state.json is damaged.
103
+ invalid_first_send_at = _collect_corrupt_first_send_at(restart_agents, state)
104
+ if invalid_first_send_at:
105
+ for entry in invalid_first_send_at:
106
+ event_log.write(
107
+ "restart.first_send_at_invalid",
108
+ worker_id=entry["worker_id"],
109
+ raw_first_send_at=entry["raw_first_send_at"],
110
+ raw_first_send_at_type=entry["raw_first_send_at_type"],
111
+ )
112
+ invalid_names = [entry["worker_id"] for entry in invalid_first_send_at]
113
+ return {
114
+ "ok": False,
115
+ "status": "refused",
116
+ "reason": "invalid_first_send_at",
117
+ "invalid_first_send_at": invalid_first_send_at,
118
+ "allow_fresh": bool(allow_fresh),
119
+ "error": (
120
+ f"Cannot restart: workers {invalid_names} have a corrupt "
121
+ "first_send_at in state.json (only null/missing or a valid "
122
+ "ISO-8601 UTC timestamp string is accepted). Inspect the "
123
+ "restart.first_send_at_invalid audit events for raw values "
124
+ "and repair state.json before retrying."
125
+ ),
126
+ }
127
+ # cr C2: emit one restart.resume_decision event per non-paused worker so
128
+ # every restart attempt produces an auditable per-worker classification.
129
+ # The function returns only refused workers — populated when
130
+ # allow_fresh=False AND at least one interacted worker cannot be repaired.
131
+ refused = _emit_resume_decisions(
132
+ workspace, restart_agents, state, get_adapter, event_log, allow_fresh,
133
+ )
134
+ if refused:
135
+ event_log.write(
136
+ "restart.atomic_refusal",
137
+ unresumable=refused,
138
+ allow_fresh=bool(allow_fresh),
139
+ reason="resume_atomicity",
140
+ )
141
+ return {
142
+ "ok": False,
143
+ "status": "refused",
144
+ "reason": "resume_atomicity",
145
+ "unresumable": refused,
146
+ "allow_fresh": bool(allow_fresh),
147
+ "error": _format_atomic_refusal_error(refused),
148
+ }
149
+ _close_ghostty_workspace(state, event_log)
150
+ for agent_id, agent_state in state.get("agents", {}).items():
151
+ _close_ghostty_display(agent_id, agent_state, event_log)
152
+ state["display_backend"] = display_backend
96
153
  _ensure_agent_start_requirements(workspace, restart_agents, event_log, "restart")
97
154
  first = True
98
155
  restarted: list[dict[str, Any]] = []
@@ -271,6 +328,7 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
271
328
  event_log,
272
329
  timeout_s=1.5,
273
330
  exclude_session_ids=known_session_ids,
331
+ raise_on_missed=False,
274
332
  )
275
333
  if display_backend in GHOSTTY_DISPLAY_BACKENDS:
276
334
  display_jobs.append((agent["id"], agent))
@@ -315,6 +373,151 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
315
373
  return {"ok": True, "session_name": session_name, "agents": restarted, "coordinator": coordinator}
316
374
 
317
375
 
376
+ _FIRST_SEND_AT_ABSENT = "absent"
377
+ _FIRST_SEND_AT_VALID = "valid"
378
+ _FIRST_SEND_AT_CORRUPT = "corrupt"
379
+
380
+
381
+ def _classify_first_send_at(value: Any) -> str:
382
+ """Strict first_send_at typing (cr verdict, 2026-05-27).
383
+
384
+ Returns one of:
385
+ "absent" — None or missing field (worker never-interacted).
386
+ "valid" — non-empty ISO-8601 UTC string parseable by datetime.fromisoformat.
387
+ "corrupt" — anything else: empty string, 0, False, literal "null", garbage.
388
+
389
+ The contract requires that corrupt values be detected deterministically
390
+ before any restart decision so we never silent-misclassify a worker's
391
+ interaction state via Python truthiness.
392
+ """
393
+ if value is None:
394
+ return _FIRST_SEND_AT_ABSENT
395
+ if not isinstance(value, str):
396
+ return _FIRST_SEND_AT_CORRUPT
397
+ if not value:
398
+ return _FIRST_SEND_AT_CORRUPT
399
+ try:
400
+ datetime.fromisoformat(value)
401
+ except (ValueError, TypeError):
402
+ return _FIRST_SEND_AT_CORRUPT
403
+ return _FIRST_SEND_AT_VALID
404
+
405
+
406
+ def _collect_corrupt_first_send_at(
407
+ restart_agents: list[dict[str, Any]],
408
+ state: dict[str, Any],
409
+ ) -> list[dict[str, Any]]:
410
+ """Walk every non-paused worker and flag any whose persisted first_send_at
411
+ is corrupt. Returns the list of invalid records ready for the
412
+ `restart.first_send_at_invalid` event and the refusal envelope."""
413
+ invalid: list[dict[str, Any]] = []
414
+ for agent in restart_agents:
415
+ agent_id = agent["id"]
416
+ previous = state.get("agents", {}).get(agent_id, {})
417
+ raw = previous.get("first_send_at") if isinstance(previous, dict) else None
418
+ if _classify_first_send_at(raw) != _FIRST_SEND_AT_CORRUPT:
419
+ continue
420
+ invalid.append({
421
+ "worker_id": agent_id,
422
+ "raw_first_send_at": raw,
423
+ "raw_first_send_at_type": type(raw).__name__,
424
+ })
425
+ return invalid
426
+
427
+
428
+ def _emit_resume_decisions(
429
+ workspace: Path,
430
+ restart_agents: list[dict[str, Any]],
431
+ state: dict[str, Any],
432
+ get_adapter_fn: Any,
433
+ event_log: EventLog,
434
+ allow_fresh: bool,
435
+ ) -> list[dict[str, Any]]:
436
+ """Route B audit-events contract (cr C2, 2026-05-27). For every non-paused
437
+ worker considered by restart, derive the resume decision per the Route B
438
+ matrix and emit ONE `restart.resume_decision` event:
439
+
440
+ resumable AND ... -> decision = "resume"
441
+ not resumable AND not interacted -> decision = "fresh_start"
442
+ not resumable AND interacted AND fresh -> decision = "fresh_start"
443
+ not resumable AND interacted AND not fresh -> decision = "refuse"
444
+
445
+ Resumability mirrors sessions.resume.prepare_resume_state's repair chain
446
+ so workers the runtime would legitimately repair are NOT flagged. Returns
447
+ the subset of refused workers — populated only when allow_fresh=False AND
448
+ some interacted worker cannot be repaired — for use by atomic_refusal.
449
+ """
450
+ from team_agent.sessions.resume import recover_resume_session_from_events
451
+ refused: list[dict[str, Any]] = []
452
+ for agent in restart_agents:
453
+ agent_id = agent["id"]
454
+ previous = state.get("agents", {}).get(agent_id, {})
455
+ session_id = previous.get("session_id")
456
+ first_send_at = previous.get("first_send_at")
457
+ has_first_send_at = _classify_first_send_at(first_send_at) == _FIRST_SEND_AT_VALID
458
+ has_session_id = bool(session_id)
459
+ adapter = get_adapter_fn(agent["provider"])
460
+ resumable = bool(session_id) and adapter.session_is_resumable(previous, workspace)
461
+ if not resumable:
462
+ known_session_ids = {
463
+ str(item.get("session_id"))
464
+ for aid, item in state.get("agents", {}).items()
465
+ if aid != agent_id and item.get("session_id")
466
+ }
467
+ repaired = recover_resume_session_from_events(
468
+ workspace, agent_id, previous, adapter, known_session_ids,
469
+ )
470
+ if not repaired:
471
+ repaired = adapter.recover_session_id(
472
+ agent_id, previous, workspace, known_session_ids,
473
+ )
474
+ resumable = bool(repaired)
475
+ if resumable:
476
+ decision = "resume"
477
+ elif not has_first_send_at:
478
+ decision = "fresh_start"
479
+ elif allow_fresh:
480
+ decision = "fresh_start"
481
+ else:
482
+ decision = "refuse"
483
+ event_log.write(
484
+ "restart.resume_decision",
485
+ worker_id=agent_id,
486
+ has_first_send_at=has_first_send_at,
487
+ has_session_id=has_session_id,
488
+ allow_fresh=bool(allow_fresh),
489
+ decision=decision,
490
+ first_send_at=first_send_at if has_first_send_at else None,
491
+ session_id=session_id,
492
+ )
493
+ if decision == "refuse":
494
+ refused.append({
495
+ "agent_id": agent_id,
496
+ "reason": "no_persisted_session_id" if not session_id else "session_unresumable",
497
+ "session_id": session_id,
498
+ "first_send_at": first_send_at,
499
+ })
500
+ return refused
501
+
502
+
503
+ def _format_atomic_refusal_error(refused: list[dict[str, Any]]) -> str:
504
+ """C4 (cr verdict, 2026-05-27): the human-readable refusal error must
505
+ name every refused worker AND its first_send_at timestamp so an operator
506
+ can decide whether to pass --allow-fresh and accept losing that
507
+ interaction history."""
508
+ names = [item["agent_id"] for item in refused]
509
+ details = ". ".join(
510
+ f"{item['agent_id']} was first interacted with at {item.get('first_send_at')}; "
511
+ "its persisted session is missing"
512
+ for item in refused
513
+ )
514
+ return (
515
+ f"Cannot restart: workers {names} have no resumable session despite "
516
+ f"previous interaction. {details}. "
517
+ "Pass --allow-fresh if you accept losing that interaction history."
518
+ )
519
+
520
+
318
521
  def rollback_restart_session(session_name: str, event_log: EventLog) -> dict[str, Any]:
319
522
  from team_agent.runtime import run_cmd
320
523
  proc = run_cmd(["tmux", "kill-session", "-t", session_name], timeout=10)
@@ -67,6 +67,8 @@ from team_agent.display import (
67
67
  from team_agent.leader import (
68
68
  attach_leader,
69
69
  attach_leader_to_state as _attach_leader_to_state,
70
+ claim_leader,
71
+ leader_identity,
70
72
  leader_session_name as _leader_session_name,
71
73
  leader_start_plan,
72
74
  start_leader,
@@ -438,12 +440,10 @@ for _name in (
438
440
  assert hasattr(_launch_pkg, _name), f"team_agent.launch missing {_name}"
439
441
  del _launch_pkg, _name
440
442
 
441
- # Leader lane re-exports keep runtime.attach_leader, runtime.start_leader,
442
- # runtime.leader_start_plan, runtime._attach_leader_to_state,
443
- # runtime._leader_session_name resolving for CLI handlers and tests.
443
+ # Leader lane re-exports keep runtime leader helpers resolving for CLI handlers and tests.
444
444
  import team_agent.leader as _leader_pkg
445
445
  assert attach_leader is _leader_pkg.attach_leader
446
- for _name in ("attach_leader", "attach_leader_to_state", "leader_session_name", "leader_start_plan", "start_leader"):
446
+ for _name in ("attach_leader", "attach_leader_to_state", "claim_leader", "leader_identity", "leader_session_name", "leader_start_plan", "start_leader"):
447
447
  assert hasattr(_leader_pkg, _name), f"team_agent.leader missing {_name}"
448
448
  del _leader_pkg, _name
449
449
  from team_agent.task_graph import ready_tasks, update_task_status
@@ -674,7 +674,7 @@ def _handle_startup_prompts_and_verify_window(
674
674
  session_name: str,
675
675
  start_mode: str,
676
676
  ) -> bool:
677
- handled_prompts = adapter.handle_startup_prompts(session_name, agent_id, checks=1, sleep_s=0.0)
677
+ handled_prompts = adapter.handle_startup_prompts(session_name, agent_id, checks=20, sleep_s=0.5)
678
678
  for prompt_event in handled_prompts:
679
679
  event_log.write(f"{event_prefix}.startup_prompt_handled", agent_id=agent_id, provider=provider, **prompt_event)
680
680
  deadline = time.monotonic() + 1.0
@@ -840,10 +840,10 @@ def _retry_or_failed(task: dict[str, Any]) -> str:
840
840
  return "failed"
841
841
 
842
842
 
843
- def _deliver_pending_message(workspace: Path, state: dict[str, Any], message_id: str, wait_visible: bool = True, timeout: float = 30.0) -> dict[str, Any]:
843
+ def _deliver_pending_message(workspace: Path, state: dict[str, Any], message_id: str, wait_visible: bool = True, timeout: float = 30.0, *, _trust_retry_attempt: int = 1) -> dict[str, Any]:
844
844
  from team_agent.messaging.delivery import _deliver_pending_message as impl
845
845
 
846
- return impl(workspace, state, message_id, wait_visible, timeout)
846
+ return impl(workspace, state, message_id, wait_visible, timeout, _trust_retry_attempt=_trust_retry_attempt)
847
847
 
848
848
  def _enable_codex_fast_mode(session_name: str, window_name: str) -> dict[str, Any]:
849
849
  from team_agent.messaging.tmux_prompt import _enable_codex_fast_mode as impl