@team-agent/installer 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/package.json +1 -1
  2. package/schemas/team.schema.json +6 -0
  3. package/src/team_agent/abnormal_track.py +253 -0
  4. package/src/team_agent/approvals/runtime_prompts.py +1 -1
  5. package/src/team_agent/cli/commands.py +104 -3
  6. package/src/team_agent/cli/parser.py +10 -1
  7. package/src/team_agent/compiler.py +1 -1
  8. package/src/team_agent/coordinator/lifecycle.py +23 -2
  9. package/src/team_agent/diagnose/orphan_cleanup.py +199 -28
  10. package/src/team_agent/display/__init__.py +31 -0
  11. package/src/team_agent/display/adaptive.py +425 -0
  12. package/src/team_agent/display/backend.py +46 -0
  13. package/src/team_agent/display/close.py +6 -0
  14. package/src/team_agent/display/rebuild.py +102 -0
  15. package/src/team_agent/display/tiling.py +156 -0
  16. package/src/team_agent/display/worker_window.py +4 -0
  17. package/src/team_agent/display/workspace.py +36 -127
  18. package/src/team_agent/idle_predicate.py +200 -0
  19. package/src/team_agent/idle_takeover.py +59 -0
  20. package/src/team_agent/idle_takeover_wiring.py +111 -0
  21. package/src/team_agent/launch/core.py +14 -4
  22. package/src/team_agent/leader/__init__.py +444 -61
  23. package/src/team_agent/lifecycle/operations.py +1 -0
  24. package/src/team_agent/lifecycle/start.py +1 -1
  25. package/src/team_agent/message_store/core.py +38 -11
  26. package/src/team_agent/message_store/leader_notification_log.py +47 -26
  27. package/src/team_agent/message_store/schema.py +8 -2
  28. package/src/team_agent/messaging/delivery.py +336 -1
  29. package/src/team_agent/messaging/leader.py +13 -4
  30. package/src/team_agent/messaging/leader_api_errors.py +216 -0
  31. package/src/team_agent/messaging/leader_panes.py +294 -0
  32. package/src/team_agent/messaging/scheduler.py +12 -0
  33. package/src/team_agent/messaging/send.py +54 -26
  34. package/src/team_agent/messaging/tmux_io.py +202 -33
  35. package/src/team_agent/messaging/tmux_prompt.py +87 -0
  36. package/src/team_agent/messaging/trust_auto_answer.py +52 -0
  37. package/src/team_agent/provider_state/README.md +78 -0
  38. package/src/team_agent/provider_state/__init__.py +86 -0
  39. package/src/team_agent/provider_state/claude.py +86 -0
  40. package/src/team_agent/provider_state/codex.py +84 -0
  41. package/src/team_agent/provider_state/common.py +207 -0
  42. package/src/team_agent/provider_state/registry.py +118 -0
  43. package/src/team_agent/restart/orchestration.py +215 -12
  44. package/src/team_agent/runtime.py +65 -15
  45. package/src/team_agent/sessions/capture.py +65 -15
  46. package/src/team_agent/spec.py +63 -3
  47. package/src/team_agent/status/queries.py +32 -1
  48. package/src/team_agent/wake.py +58 -0
  49. package/src/team_agent/watch/__init__.py +145 -0
@@ -2,11 +2,12 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import sqlite3
5
+ import time
5
6
  import uuid
6
7
  from contextlib import closing
7
8
  from datetime import datetime, timedelta, timezone
8
9
  from pathlib import Path
9
- from typing import Any
10
+ from typing import Any, Callable
10
11
 
11
12
  from . import agent_health as _agent_health
12
13
  from . import result_watchers as _result_watchers
@@ -15,6 +16,28 @@ from team_agent.paths import runtime_dir
15
16
  from team_agent.spec import validate_result_envelope
16
17
 
17
18
 
19
+ def _is_sqlite_locked(exc: sqlite3.OperationalError) -> bool:
20
+ message = str(exc).lower()
21
+ return (
22
+ "database is locked" in message
23
+ or "database table is locked" in message
24
+ or "database schema is locked" in message
25
+ )
26
+
27
+
28
+ def _with_sqlite_busy_retry(action: Callable[[], None]) -> None:
29
+ delay = 0.05
30
+ for attempt in range(6):
31
+ try:
32
+ action()
33
+ return
34
+ except sqlite3.OperationalError as exc:
35
+ if not _is_sqlite_locked(exc) or attempt == 5:
36
+ raise
37
+ time.sleep(delay)
38
+ delay *= 2
39
+
40
+
18
41
  class MessageStore:
19
42
  SCHEMA_VERSION = SCHEMA_VERSION
20
43
 
@@ -27,13 +50,16 @@ class MessageStore:
27
50
  def connect(self) -> sqlite3.Connection:
28
51
  conn = sqlite3.connect(self.path, timeout=30.0, isolation_level=None)
29
52
  conn.row_factory = sqlite3.Row
30
- conn.execute("PRAGMA journal_mode=WAL")
31
53
  conn.execute("PRAGMA busy_timeout=30000")
54
+ conn.execute("PRAGMA journal_mode=WAL")
32
55
  return conn
33
56
 
34
57
  def _init(self) -> None:
35
- with closing(self.connect()) as conn:
36
- initialize_schema(conn)
58
+ def initialize() -> None:
59
+ with closing(self.connect()) as conn:
60
+ initialize_schema(conn)
61
+
62
+ _with_sqlite_busy_retry(initialize)
37
63
 
38
64
  def create_message(
39
65
  self,
@@ -331,17 +357,17 @@ class MessageStore:
331
357
  return counts
332
358
 
333
359
  def add_result(self, envelope: dict[str, Any], owner_team_id: str | None = None) -> str:
334
- _ = owner_team_id
335
360
  validate_result_envelope(envelope)
336
361
  result_id = f"res_{uuid.uuid4().hex[:12]}"
337
362
  with closing(self.connect()) as conn:
338
363
  with conn:
339
364
  conn.execute(
340
365
  """
341
- insert into results(result_id, task_id, agent_id, envelope, status, created_at)
342
- values (?, ?, ?, ?, ?, ?)
366
+ insert into results(owner_team_id, result_id, task_id, agent_id, envelope, status, created_at)
367
+ values (?, ?, ?, ?, ?, ?, ?)
343
368
  """,
344
369
  (
370
+ owner_team_id,
345
371
  result_id,
346
372
  envelope["task_id"],
347
373
  envelope["agent_id"],
@@ -423,16 +449,17 @@ class MessageStore:
423
449
  return dict(row) if row else None
424
450
 
425
451
  def latest_results(self, limit: int = 5, owner_team_id: str | None = None) -> list[dict[str, Any]]:
426
- _ = owner_team_id
452
+ owner_clause = "and owner_team_id = ?" if owner_team_id else ""
453
+ args: tuple[Any, ...] = (owner_team_id, limit) if owner_team_id else (limit,)
427
454
  with closing(self.connect()) as conn:
428
455
  rows = conn.execute(
429
- """
456
+ f"""
430
457
  select * from results
431
- where status != 'invalid'
458
+ where status != 'invalid' {owner_clause}
432
459
  order by created_at desc
433
460
  limit ?
434
461
  """,
435
- (limit,),
462
+ args,
436
463
  ).fetchall()
437
464
  return [dict(row) for row in reversed(rows)]
438
465
 
@@ -11,9 +11,20 @@ from __future__ import annotations
11
11
 
12
12
  from contextlib import closing
13
13
  from datetime import datetime, timedelta, timezone
14
+ import sqlite3
15
+ import time
14
16
  from typing import Any
15
17
 
16
18
 
19
+ def _sqlite_locked(exc: sqlite3.OperationalError) -> bool:
20
+ message = str(exc).lower()
21
+ return (
22
+ "database is locked" in message
23
+ or "database table is locked" in message
24
+ or "database schema is locked" in message
25
+ )
26
+
27
+
17
28
  def claim_leader_notification_delivery(
18
29
  store: Any,
19
30
  *,
@@ -28,32 +39,42 @@ def claim_leader_notification_delivery(
28
39
  rowcount=0 means a prior row exists for (result_id, leader_session_uuid); SELECT
29
40
  it and return so the caller can decide to suppress (same envelope_hash) or surface
30
41
  legitimate-duplicate (different envelope_hash)."""
31
- now = datetime.now(timezone.utc).isoformat()
32
- with closing(store.connect()) as conn:
33
- with conn:
34
- cur = conn.execute(
35
- "insert or ignore into leader_notification_log("
36
- " result_id, leader_session_uuid, notified_message_id, notified_at,"
37
- " leader_pane_id_at_notify, envelope_content_hash, owner_team_id"
38
- ") values (?, ?, ?, ?, ?, ?, ?)",
39
- (
40
- result_id, leader_session_uuid, proposed_message_id, now,
41
- pane_id, envelope_hash, owner_team_id,
42
- ),
43
- )
44
- if cur.rowcount == 1:
45
- return {
46
- "status": "claimed_by_you",
47
- "notified_message_id": proposed_message_id,
48
- "notified_at": now,
49
- "envelope_content_hash": envelope_hash,
50
- }
51
- row = conn.execute(
52
- "select notified_message_id, notified_at, envelope_content_hash, "
53
- "leader_pane_id_at_notify from leader_notification_log "
54
- "where result_id = ? and leader_session_uuid = ?",
55
- (result_id, leader_session_uuid),
56
- ).fetchone()
42
+ delay = 0.05
43
+ row = None
44
+ for attempt in range(6):
45
+ now = datetime.now(timezone.utc).isoformat()
46
+ try:
47
+ with closing(store.connect()) as conn:
48
+ with conn:
49
+ cur = conn.execute(
50
+ "insert or ignore into leader_notification_log("
51
+ " result_id, leader_session_uuid, notified_message_id, notified_at,"
52
+ " leader_pane_id_at_notify, envelope_content_hash, owner_team_id"
53
+ ") values (?, ?, ?, ?, ?, ?, ?)",
54
+ (
55
+ result_id, leader_session_uuid, proposed_message_id, now,
56
+ pane_id, envelope_hash, owner_team_id,
57
+ ),
58
+ )
59
+ if cur.rowcount == 1:
60
+ return {
61
+ "status": "claimed_by_you",
62
+ "notified_message_id": proposed_message_id,
63
+ "notified_at": now,
64
+ "envelope_content_hash": envelope_hash,
65
+ }
66
+ row = conn.execute(
67
+ "select notified_message_id, notified_at, envelope_content_hash, "
68
+ "leader_pane_id_at_notify from leader_notification_log "
69
+ "where result_id = ? and leader_session_uuid = ?",
70
+ (result_id, leader_session_uuid),
71
+ ).fetchone()
72
+ break
73
+ except sqlite3.OperationalError as exc:
74
+ if not _sqlite_locked(exc) or attempt == 5:
75
+ raise
76
+ time.sleep(delay)
77
+ delay *= 2
57
78
  if row is None:
58
79
  # Should not happen (INSERT OR IGNORE returned 0 → row must exist), but be defensive.
59
80
  return {"status": "claimed_by_you", "notified_message_id": proposed_message_id,
@@ -22,7 +22,7 @@ MESSAGE_COLUMNS = {
22
22
  "error",
23
23
  "delivery_attempts",
24
24
  }
25
- RESULT_COLUMNS = {"result_id", "task_id", "agent_id", "envelope", "status", "created_at"}
25
+ RESULT_COLUMNS = {"owner_team_id", "result_id", "task_id", "agent_id", "envelope", "status", "created_at"}
26
26
  SCHEDULED_EVENT_COLUMNS = {
27
27
  "id",
28
28
  "owner_team_id",
@@ -125,6 +125,7 @@ def initialize_schema(conn: sqlite3.Connection) -> None:
125
125
  """
126
126
  create table if not exists results (
127
127
  result_id text primary key,
128
+ owner_team_id text,
128
129
  task_id text not null,
129
130
  agent_id text not null,
130
131
  envelope text not null,
@@ -215,7 +216,12 @@ def initialize_schema(conn: sqlite3.Connection) -> None:
215
216
  "owner_team_id": "alter table messages add column owner_team_id text",
216
217
  },
217
218
  )
218
- _ensure_table_columns(conn, "results", RESULT_COLUMNS)
219
+ _ensure_table_columns(
220
+ conn,
221
+ "results",
222
+ RESULT_COLUMNS,
223
+ {"owner_team_id": "alter table results add column owner_team_id text"},
224
+ )
219
225
  _ensure_table_columns(
220
226
  conn,
221
227
  "scheduled_events",
@@ -10,15 +10,62 @@ from team_agent.messaging.deps import (
10
10
  core_render_message,
11
11
  )
12
12
 
13
+ from datetime import datetime, timedelta, timezone
13
14
  from pathlib import Path
14
15
  from typing import Any
15
16
 
17
+
18
+ def _tmux_pane_width(target: str) -> dict[str, Any]:
19
+ """Query the tmux pane width (display columns) for ``target``.
20
+
21
+ Live wiring seam for the trust-prompt truncation matcher: returns
22
+ ``{"ok": True, "pane_width": <int>}`` on success or
23
+ ``{"ok": False, "error": "..."}`` on any failure / timeout / unparseable
24
+ output. Fail-safe by design: NEVER returns a default width. Callers must
25
+ treat failure as "no boundary signal" and let the workspace matcher fall
26
+ back to exact equality, so a hard-truncated prompt is never auto-answered
27
+ on guesswork.
28
+ """
29
+ from team_agent.messaging.deps import run_cmd
30
+ try:
31
+ proc = run_cmd(
32
+ ["tmux", "display-message", "-p", "-t", str(target), "-F", "#{pane_width}"],
33
+ timeout=2,
34
+ )
35
+ except Exception as exc: # pragma: no cover - defensive; tmux not present, timeout, etc.
36
+ return {"ok": False, "error": f"tmux_query_failed:{exc.__class__.__name__}"}
37
+ if getattr(proc, "returncode", 1) != 0:
38
+ err = (getattr(proc, "stderr", "") or "").strip().splitlines()
39
+ return {"ok": False, "error": err[0] if err else "tmux_query_nonzero"}
40
+ text = (getattr(proc, "stdout", "") or "").strip()
41
+ if not text:
42
+ return {"ok": False, "error": "empty_output"}
43
+ try:
44
+ width = int(text.splitlines()[0].strip())
45
+ except (ValueError, IndexError):
46
+ return {"ok": False, "error": "unparseable_output"}
47
+ if width <= 0:
48
+ return {"ok": False, "error": "non_positive_width"}
49
+ return {"ok": True, "pane_width": width}
50
+
51
+
52
+ # Spark MEDIUM sweep #3 (2026-05-26): retry_needed bounded backoff. Each entry is
53
+ # the delay (seconds) BEFORE the attempt with that number runs; attempt 1 was the
54
+ # original delivery, attempt 2 fires 5s after retry_needed, attempt 3 fires 15s
55
+ # after the previous, attempt 4 fires 30s after the previous. _TRUST_RETRY_MAX_ATTEMPTS
56
+ # bounds the total — the 4th retry_needed is terminal and emits
57
+ # leader_panes.trust_auto_answer_exhausted.
58
+ _TRUST_RETRY_BACKOFF_SECONDS = {2: 5, 3: 15, 4: 30}
59
+ _TRUST_RETRY_MAX_ATTEMPTS = 4
60
+
16
61
  def _deliver_pending_message(
17
62
  workspace: Path,
18
63
  state: dict[str, Any],
19
64
  message_id: str,
20
65
  wait_visible: bool = True,
21
66
  timeout: float = 30.0,
67
+ *,
68
+ _trust_retry_attempt: int = 1,
22
69
  ) -> dict[str, Any]:
23
70
  store = MessageStore(workspace)
24
71
  row = next((m for m in store.messages() if m["message_id"] == message_id), None)
@@ -65,9 +112,58 @@ def _deliver_pending_message(
65
112
  attempts=3 if wait_visible else 1,
66
113
  provider=agent_state.get("provider", "fake"),
67
114
  )
115
+ if not injection.get("ok") and injection.get("detected") == "codex_trust_prompt":
116
+ # Gap 29 (Stage 2): opt-in trust auto-answer. The helper enforces both the
117
+ # opt-in flag and a workspace-dir match before sending '1'+Enter, then we
118
+ # retry the original paste once the prompt has actually been dismissed.
119
+ # Bypassed entirely when opt-out (default) — the existing failed envelope
120
+ # is preserved.
121
+ from team_agent.messaging.leader_panes import attempt_trust_auto_answer
122
+ pane_target = injection.get("pane_id") or target
123
+ # Live wiring: query the tmux pane width now and hand it to the trust
124
+ # matcher via state["pane_width"]. On failure we leave pane_width
125
+ # absent so the matcher falls back to exact equality (fail-safe — a
126
+ # right-edge truncated prefix is never auto-answered on guesswork).
127
+ width_query = _tmux_pane_width(pane_target)
128
+ trust_state = dict(state) if isinstance(state, dict) else {}
129
+ if width_query.get("ok"):
130
+ trust_state["pane_width"] = width_query["pane_width"]
131
+ answer = attempt_trust_auto_answer(
132
+ workspace,
133
+ pane_target,
134
+ injection.get("pane_capture_tail") or "",
135
+ EventLog(workspace),
136
+ state=trust_state,
137
+ )
138
+ if answer.get("answered"):
139
+ # Spark MEDIUM #4 (2026-05-26): replace the fixed 0.3s sleep with a
140
+ # bounded poll. Slow terminals can take well over a second to clear
141
+ # the trust prompt; sleeping a fixed amount races dismissal and
142
+ # leaves the retry hitting the same codex_trust_prompt state. We
143
+ # poll for prompt dismissal up to 3s; if still present, return a
144
+ # retry_needed envelope and let the upstream scheduler decide
145
+ # whether to back off and try again later.
146
+ dismissed = _wait_for_trust_prompt_dismissal(
147
+ injection.get("pane_id") or target, timeout=3.0,
148
+ )
149
+ if not dismissed:
150
+ return _handle_trust_retry_needed(
151
+ workspace, state, store, message_id, target, injection,
152
+ attempt=_trust_retry_attempt,
153
+ )
154
+ injection = _tmux_inject_text(
155
+ target,
156
+ text,
157
+ "Enter",
158
+ f"team-agent-send-{message_id}-trust-retry",
159
+ attempts=3 if wait_visible else 1,
160
+ provider=agent_state.get("provider", "fake"),
161
+ )
68
162
  if injection["ok"]:
69
163
  store.mark(message_id, "submitted")
70
- EventLog(workspace).write(
164
+ send_event_log = EventLog(workspace)
165
+ _stamp_first_send_at_if_leader_to_worker(state, row, send_event_log)
166
+ send_event_log.write(
71
167
  "send.submitted",
72
168
  message_id=message_id,
73
169
  target=target,
@@ -112,9 +208,248 @@ def _deliver_pending_message(
112
208
  "turn_verification": injection.get("turn_verification"),
113
209
  "paste_attempts": injection.get("attempts"),
114
210
  "submit_attempts": injection.get("submit_attempts"),
211
+ "detected": injection.get("detected"),
212
+ "pane_id": injection.get("pane_id"),
213
+ "pane_mode": injection.get("pane_mode"),
214
+ "pane_capture_tail": injection.get("pane_capture_tail"),
115
215
  }
116
216
 
117
217
 
218
+ def _handle_trust_retry_needed(
219
+ workspace: Path,
220
+ state: dict[str, Any],
221
+ store: MessageStore,
222
+ message_id: str,
223
+ target: str,
224
+ injection: dict[str, Any],
225
+ *,
226
+ attempt: int,
227
+ ) -> dict[str, Any]:
228
+ """Spark MEDIUM sweep #3: replace the dead-end failed mark with a real
229
+ bounded-backoff consumer. attempt is the number of the delivery that JUST
230
+ failed (1 = the original delivery; 2..4 = the scheduler-fired retries).
231
+
232
+ Behaviour:
233
+ * attempt < _TRUST_RETRY_MAX_ATTEMPTS: schedule a trust_retry
234
+ scheduled_event for the message, holding the message in 'failed' status
235
+ so _deliver_pending_messages does not race the scheduler. Emit
236
+ leader_panes.trust_auto_answer_retry_scheduled. Return status='retry_scheduled'.
237
+ * attempt >= _TRUST_RETRY_MAX_ATTEMPTS: terminal. Mark the message failed
238
+ and emit leader_panes.trust_auto_answer_exhausted. Return
239
+ status='trust_auto_answer_exhausted'.
240
+ """
241
+ event_log = EventLog(workspace)
242
+ next_attempt = attempt + 1
243
+ if next_attempt > _TRUST_RETRY_MAX_ATTEMPTS:
244
+ store.mark(message_id, "failed", "trust_auto_answer_exhausted")
245
+ event_log.write(
246
+ "leader_panes.trust_auto_answer_exhausted",
247
+ message_id=message_id,
248
+ workspace=str(workspace),
249
+ attempts=attempt,
250
+ target=target,
251
+ pane_id=injection.get("pane_id"),
252
+ reason="trust_auto_answer_exhausted",
253
+ )
254
+ return {
255
+ "ok": False,
256
+ "status": "trust_auto_answer_exhausted",
257
+ "reason": "trust_auto_answer_exhausted",
258
+ "attempts": attempt,
259
+ "detected": injection.get("detected"),
260
+ "pane_id": injection.get("pane_id"),
261
+ "pane_mode": injection.get("pane_mode"),
262
+ "pane_capture_tail": injection.get("pane_capture_tail"),
263
+ }
264
+ backoff = _TRUST_RETRY_BACKOFF_SECONDS.get(next_attempt, _TRUST_RETRY_BACKOFF_SECONDS[_TRUST_RETRY_MAX_ATTEMPTS])
265
+ due_at = (datetime.now(timezone.utc) + timedelta(seconds=backoff)).isoformat()
266
+ owner_team_id = _message_owner_team_id(store, message_id)
267
+ event_id = store.add_scheduled_event(
268
+ due_at,
269
+ message_id,
270
+ "trust_retry",
271
+ {
272
+ "message_id": message_id,
273
+ "attempt": next_attempt,
274
+ "max_attempts": _TRUST_RETRY_MAX_ATTEMPTS,
275
+ "first_target": target,
276
+ },
277
+ owner_team_id=owner_team_id,
278
+ )
279
+ # Hold the message in 'failed' so _deliver_pending_messages does not race
280
+ # the scheduled retry. The scheduler consumer resets it to 'accepted' just
281
+ # before re-delivery.
282
+ store.mark(message_id, "failed", "trust_retry_scheduled")
283
+ event_log.write(
284
+ "leader_panes.trust_auto_answer_retry_needed",
285
+ message_id=message_id,
286
+ workspace=str(workspace),
287
+ pane_id=injection.get("pane_id") or target,
288
+ target=target,
289
+ reason="trust_prompt_not_dismissed_after_answer",
290
+ attempt=attempt,
291
+ )
292
+ event_log.write(
293
+ "leader_panes.trust_auto_answer_retry_scheduled",
294
+ message_id=message_id,
295
+ workspace=str(workspace),
296
+ scheduled_event_id=event_id,
297
+ due_at=due_at,
298
+ next_attempt=next_attempt,
299
+ max_attempts=_TRUST_RETRY_MAX_ATTEMPTS,
300
+ backoff_seconds=backoff,
301
+ )
302
+ return {
303
+ "ok": False,
304
+ "status": "retry_scheduled",
305
+ "reason": "trust_prompt_not_dismissed_after_answer",
306
+ "stage": "trust_auto_answer_dismissal_wait",
307
+ "verification": "trust_prompt_not_dismissed_after_answer",
308
+ "scheduled_event_id": event_id,
309
+ "scheduled_retry_at": due_at,
310
+ "next_attempt": next_attempt,
311
+ "max_attempts": _TRUST_RETRY_MAX_ATTEMPTS,
312
+ "detected": injection.get("detected"),
313
+ "pane_id": injection.get("pane_id"),
314
+ "pane_mode": injection.get("pane_mode"),
315
+ "pane_capture_tail": injection.get("pane_capture_tail"),
316
+ }
317
+
318
+
319
+ def _message_owner_team_id(store: MessageStore, message_id: str) -> str | None:
320
+ row = _message_by_id(store, message_id)
321
+ if not row:
322
+ return None
323
+ owner = row.get("owner_team_id")
324
+ return str(owner) if owner else None
325
+
326
+
327
+ def _execute_trust_retry(
328
+ workspace: Path,
329
+ store: MessageStore,
330
+ event_log: EventLog,
331
+ payload: dict[str, Any],
332
+ *,
333
+ owner_team_id: str | None = None,
334
+ ) -> dict[str, Any]:
335
+ """Scheduler-side consumer for kind='trust_retry'. Resets the message back
336
+ to 'accepted' so claim_for_delivery succeeds, re-runs _deliver_pending_message,
337
+ and either succeeds, escalates to a further retry (via _handle_trust_retry_needed),
338
+ or hits the terminal exhausted branch.
339
+ """
340
+ from team_agent.state import load_runtime_state
341
+ message_id = str(payload.get("message_id") or "")
342
+ if not message_id:
343
+ return {"ok": False, "reason": "trust_retry_missing_message_id"}
344
+ attempt = int(payload.get("attempt") or 1)
345
+ row = _message_by_id(store, message_id)
346
+ if not row:
347
+ event_log.write(
348
+ "leader_panes.trust_auto_answer_retry_skipped",
349
+ message_id=message_id,
350
+ reason="message_missing",
351
+ attempt=attempt,
352
+ )
353
+ return {"ok": False, "reason": "message_missing"}
354
+ # Reset to accepted so claim_for_delivery succeeds. The previous attempt
355
+ # left the row in 'failed' status with reason='trust_retry_scheduled'.
356
+ store.mark(message_id, "accepted", "trust_retry_resuming")
357
+ event_log.write(
358
+ "leader_panes.trust_auto_answer_retry_attempted",
359
+ message_id=message_id,
360
+ workspace=str(workspace),
361
+ attempt=attempt,
362
+ max_attempts=int(payload.get("max_attempts") or _TRUST_RETRY_MAX_ATTEMPTS),
363
+ )
364
+ state = load_runtime_state(workspace)
365
+ if owner_team_id and isinstance(state.get("teams"), dict):
366
+ scoped = state["teams"].get(owner_team_id)
367
+ if isinstance(scoped, dict):
368
+ state = scoped
369
+ delivery_result = _deliver_pending_message(
370
+ workspace, state, message_id,
371
+ wait_visible=True, timeout=30.0,
372
+ _trust_retry_attempt=attempt,
373
+ )
374
+ return delivery_result
375
+
376
+
377
+ def _stamp_first_send_at_if_leader_to_worker(
378
+ state: dict[str, Any],
379
+ row: dict[str, Any],
380
+ event_log: EventLog | None = None,
381
+ ) -> None:
382
+ """Route B atomicity (2026-05-27): record the first time the leader
383
+ successfully sends work to each worker. The presence of this stamp drives
384
+ restart's resumability decision — a worker the leader has interacted with
385
+ has accumulated conversation state, so a missing session_id at restart
386
+ time IS an atomicity violation. A worker that has never received work
387
+ legitimately fresh-starts during restart.
388
+
389
+ Only stamped once per worker (idempotent across re-sends). Only fires on
390
+ leader -> worker sends; worker-to-worker peer messages do not count.
391
+ The mutation lives on the state dict the caller already saves
392
+ (`save_team_scoped_state` in send.py, or `save_runtime_state` after
393
+ coordinator_tick), so persistence is automatic.
394
+
395
+ C1 (cr verdict, 2026-05-27): when the stamp transitions null -> ts (the
396
+ one-time write), emit a `worker.first_interaction` audit event with
397
+ worker_id, first_send_at, message_id. Re-sends to the same worker hit the
398
+ idempotency guard above and do NOT re-emit. Worker-to-worker peer sends
399
+ short-circuit at the sender check and do NOT emit.
400
+ """
401
+ sender = str(row.get("sender") or "")
402
+ recipient = str(row.get("recipient") or "")
403
+ if not recipient:
404
+ return
405
+ leader_id = str((state.get("leader") or {}).get("id") or "leader")
406
+ if sender not in {"leader", "Leader", leader_id}:
407
+ return
408
+ agents = state.get("agents")
409
+ if not isinstance(agents, dict):
410
+ return
411
+ agent_state = agents.get(recipient)
412
+ if not isinstance(agent_state, dict):
413
+ return
414
+ if agent_state.get("first_send_at"):
415
+ return
416
+ stamp = datetime.now(timezone.utc).isoformat()
417
+ agent_state["first_send_at"] = stamp
418
+ if event_log is not None:
419
+ event_log.write(
420
+ "worker.first_interaction",
421
+ worker_id=recipient,
422
+ first_send_at=stamp,
423
+ message_id=str(row.get("message_id") or ""),
424
+ )
425
+
426
+
427
+ def _wait_for_trust_prompt_dismissal(target: str, *, timeout: float = 3.0, poll_interval: float = 0.1) -> bool:
428
+ """Spark MEDIUM #4: bounded poll for trust prompt dismissal. Returns True once
429
+ the pane no longer matches detect_non_input_scrollback, False if the prompt
430
+ is still present after `timeout` seconds. Uses the same detector the inject
431
+ path uses so behaviour stays consistent."""
432
+ import time as _time
433
+ from team_agent.messaging.tmux_prompt import detect_non_input_scrollback
434
+ deadline = _time.monotonic() + max(timeout, 0.0)
435
+ while True:
436
+ capture = _capture_pane_tail(target)
437
+ detected = detect_non_input_scrollback(capture)
438
+ if detected != "codex_trust_prompt":
439
+ return True
440
+ if _time.monotonic() >= deadline:
441
+ return False
442
+ _time.sleep(poll_interval)
443
+
444
+
445
+ def _capture_pane_tail(target: str) -> str:
446
+ from team_agent.messaging.deps import _capture_tmux_pane_text
447
+ capture = _capture_tmux_pane_text(target)
448
+ if not capture.get("ok"):
449
+ return ""
450
+ return str(capture.get("capture") or "")
451
+
452
+
118
453
  def _deliver_pending_messages(workspace: Path, state: dict[str, Any], event_log: EventLog) -> list[str]:
119
454
  store = MessageStore(workspace)
120
455
  delivered: list[str] = []
@@ -251,6 +251,19 @@ def _send_to_leader_receiver(
251
251
  f"team-agent-leader-receiver-{message_id}",
252
252
  provider=receiver.get("provider", "codex"),
253
253
  )
254
+ if not injection.get("ok") and injection.get("detected") == "codex_trust_prompt":
255
+ from team_agent.messaging.trust_auto_answer import retry_injection_after_trust_auto_answer
256
+ injection = retry_injection_after_trust_auto_answer(
257
+ workspace,
258
+ state,
259
+ event_log,
260
+ injection,
261
+ target,
262
+ text,
263
+ submit_key,
264
+ f"team-agent-leader-receiver-{message_id}-trust-retry",
265
+ receiver.get("provider", "codex"),
266
+ )
254
267
  if injection["ok"]:
255
268
  store.mark(message_id, "submitted")
256
269
  event_log.write(
@@ -466,10 +479,6 @@ def _format_team_agent_message(payload: dict[str, Any]) -> str:
466
479
 
467
480
 
468
481
 
469
-
470
-
471
-
472
-
473
482
 
474
483
 
475
484