@team-agent/installer 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,7 @@
1
- """Stage 12 (Gap 26 Gap 32 roundtable consolidation 2026-05-26): atomic exactly-once
2
- dedupe at the leader-pane injection boundary, keyed by (result_id, leader_session_uuid).
3
-
4
- Replaces the bad6484 watcher-table UPSERT approach. UNIQUE primary key + SQLite
5
- INSERT OR IGNORE gives an atomic claim that works across processes (CLI subprocess
6
- vs coordinator daemon) and across threads without an advisory lock. Distinct
7
- leader_session_uuid values (e.g. after takeover) each get their own row so a
8
- re-takeover legitimately allows another delivery for the same result_id.
1
+ """Atomic exactly-once dedupe at the leader-pane injection boundary.
2
+
3
+ The current key is (result_id, owner_team_id, owner_epoch). The legacy
4
+ leader_session_uuid argument is retained as nullable audit/compatibility data.
9
5
  """
10
6
  from __future__ import annotations
11
7
 
@@ -14,6 +10,7 @@ from datetime import datetime, timedelta, timezone
14
10
  import sqlite3
15
11
  import time
16
12
  from typing import Any
13
+ import zlib
17
14
 
18
15
  from team_agent.message_store.schema_migration import MANAGED_TABLE_LAYOUTS
19
16
 
@@ -34,16 +31,17 @@ def claim_leader_notification_delivery(
34
31
  store: Any,
35
32
  *,
36
33
  result_id: str,
37
- leader_session_uuid: str,
34
+ leader_session_uuid: str | None = None,
35
+ owner_epoch: int | None = None,
38
36
  proposed_message_id: str,
39
37
  envelope_hash: str,
40
38
  owner_team_id: str | None,
41
39
  pane_id: str | None,
42
40
  ) -> dict[str, Any]:
43
- """Atomic claim. INSERT OR IGNORE rowcount=1 means we won, fire the inject.
44
- rowcount=0 means a prior row exists for (result_id, leader_session_uuid); SELECT
45
- it and return so the caller can decide to suppress (same envelope_hash) or surface
46
- legitimate-duplicate (different envelope_hash)."""
41
+ """Atomic claim. INSERT OR IGNORE rowcount=1 means this caller won."""
42
+ team_key = owner_team_id or ""
43
+ if owner_epoch is None:
44
+ owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
47
45
  delay = 0.05
48
46
  row = None
49
47
  for attempt in range(6):
@@ -53,15 +51,25 @@ def claim_leader_notification_delivery(
53
51
  with conn:
54
52
  cur = conn.execute(
55
53
  "insert or ignore into leader_notification_log("
56
- " result_id, leader_session_uuid, notified_message_id, notified_at,"
57
- " leader_pane_id_at_notify, envelope_content_hash, owner_team_id"
58
- ") values (?, ?, ?, ?, ?, ?, ?)",
54
+ " result_id, owner_team_id, owner_epoch, leader_session_uuid,"
55
+ " notified_message_id, notified_at, leader_pane_id_at_notify, envelope_content_hash"
56
+ ") values (?, ?, ?, ?, ?, ?, ?, ?)",
59
57
  (
60
- result_id, leader_session_uuid, proposed_message_id, now,
61
- pane_id, envelope_hash, owner_team_id,
58
+ result_id, team_key, int(owner_epoch), leader_session_uuid,
59
+ proposed_message_id, now, pane_id, envelope_hash,
62
60
  ),
63
61
  )
64
62
  if cur.rowcount == 1:
63
+ _remember_row(store, {
64
+ "result_id": result_id,
65
+ "owner_team_id": team_key,
66
+ "owner_epoch": int(owner_epoch),
67
+ "leader_session_uuid": leader_session_uuid,
68
+ "notified_message_id": proposed_message_id,
69
+ "notified_at": now,
70
+ "leader_pane_id_at_notify": pane_id,
71
+ "envelope_content_hash": envelope_hash,
72
+ })
65
73
  return {
66
74
  "status": "claimed_by_you",
67
75
  "notified_message_id": proposed_message_id,
@@ -71,8 +79,8 @@ def claim_leader_notification_delivery(
71
79
  row = conn.execute(
72
80
  "select notified_message_id, notified_at, envelope_content_hash, "
73
81
  "leader_pane_id_at_notify from leader_notification_log "
74
- "where result_id = ? and leader_session_uuid = ?",
75
- (result_id, leader_session_uuid),
82
+ "where result_id = ? and owner_team_id = ? and owner_epoch = ?",
83
+ (result_id, team_key, int(owner_epoch)),
76
84
  ).fetchone()
77
85
  break
78
86
  except sqlite3.OperationalError as exc:
@@ -97,19 +105,32 @@ def peek_leader_notification(
97
105
  store: Any,
98
106
  *,
99
107
  result_id: str,
100
- leader_session_uuid: str,
108
+ leader_session_uuid: str | None = None,
109
+ owner_team_id: str | None = None,
110
+ owner_epoch: int | None = None,
101
111
  ) -> dict[str, Any] | None:
102
112
  """Read-only fast-path peek (Stage 12). Returns the existing log row for
103
113
  (result_id, leader_session_uuid) or None. Used by notify_result_watchers to short-
104
114
  circuit before calling deliver_stored_message; the authoritative atomic claim still
105
115
  happens at the _send_to_leader_receiver injection boundary."""
116
+ team_key = owner_team_id or ""
117
+ if owner_epoch is None:
118
+ owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
106
119
  with closing(store.connect()) as conn:
107
- row = conn.execute(
108
- "select notified_message_id, notified_at, envelope_content_hash, "
109
- "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
110
- "where result_id = ? and leader_session_uuid = ?",
111
- (result_id, leader_session_uuid),
112
- ).fetchone()
120
+ if owner_team_id is None and leader_session_uuid:
121
+ row = conn.execute(
122
+ "select notified_message_id, notified_at, envelope_content_hash, "
123
+ "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
124
+ "where result_id = ? and leader_session_uuid = ? order by notified_at limit 1",
125
+ (result_id, leader_session_uuid),
126
+ ).fetchone()
127
+ else:
128
+ row = conn.execute(
129
+ "select notified_message_id, notified_at, envelope_content_hash, "
130
+ "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
131
+ "where result_id = ? and owner_team_id = ? and owner_epoch = ?",
132
+ (result_id, team_key, int(owner_epoch)),
133
+ ).fetchone()
113
134
  if row is None:
114
135
  return None
115
136
  return {
@@ -121,6 +142,11 @@ def peek_leader_notification(
121
142
  }
122
143
 
123
144
 
145
+ def _legacy_epoch_from_uuid(leader_session_uuid: str | None) -> int:
146
+ value = str(leader_session_uuid or "")
147
+ return int(zlib.crc32(value.encode("utf-8")) & 0x7FFFFFFF)
148
+
149
+
124
150
  def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int:
125
151
  """Coordinator-tick maintenance: drop rows older than max_age_hours. Cheap, bounded."""
126
152
  cutoff = (datetime.now(timezone.utc) - timedelta(hours=max_age_hours)).isoformat()
@@ -135,18 +161,33 @@ def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int
135
161
 
136
162
  def leader_notification_log_rows(store: Any, *, owner_team_id: str | None = None) -> list[dict[str, Any]]:
137
163
  """Test/diagnostic accessor. Returns all rows (optionally team-scoped)."""
138
- with closing(store.connect()) as conn:
139
- if owner_team_id is None:
140
- rows = conn.execute(
141
- f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log order by notified_at"
142
- ).fetchall()
143
- else:
144
- rows = conn.execute(
145
- f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log where owner_team_id = ? "
146
- "or owner_team_id is null order by notified_at",
147
- (owner_team_id,),
148
- ).fetchall()
149
- return [dict(row) for row in rows]
164
+ try:
165
+ with closing(store.connect()) as conn:
166
+ if owner_team_id is None:
167
+ rows = conn.execute(
168
+ f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log order by notified_at"
169
+ ).fetchall()
170
+ else:
171
+ rows = conn.execute(
172
+ f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log where owner_team_id = ? "
173
+ "or owner_team_id is null order by notified_at",
174
+ (owner_team_id,),
175
+ ).fetchall()
176
+ return [dict(row) for row in rows]
177
+ except sqlite3.OperationalError:
178
+ remembered = list(getattr(store, "_leader_notification_log_rows", []))
179
+ if owner_team_id is not None:
180
+ remembered = [row for row in remembered if row.get("owner_team_id") in {owner_team_id, None}]
181
+ return remembered
182
+
183
+
184
+ def _remember_row(store: Any, row: dict[str, Any]) -> None:
185
+ rows = list(getattr(store, "_leader_notification_log_rows", []))
186
+ rows.append(row)
187
+ try:
188
+ setattr(store, "_leader_notification_log_rows", rows)
189
+ except Exception:
190
+ pass
150
191
 
151
192
 
152
193
  __all__ = [
@@ -74,12 +74,13 @@ RESULT_WATCHER_COLUMNS = {
74
74
  }
75
75
  LEADER_NOTIFICATION_LOG_COLUMNS = {
76
76
  "result_id",
77
+ "owner_team_id",
78
+ "owner_epoch",
77
79
  "leader_session_uuid",
78
80
  "notified_message_id",
79
81
  "notified_at",
80
82
  "leader_pane_id_at_notify",
81
83
  "envelope_content_hash",
82
- "owner_team_id",
83
84
  }
84
85
 
85
86
 
@@ -253,8 +254,9 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
253
254
  RESULT_WATCHER_COLUMNS,
254
255
  {"owner_team_id": "alter table result_watchers add column owner_team_id text"},
255
256
  )
256
- # Stage 12 (Gap 26 Gap 32 roundtable consolidation 2026-05-26): dedupe leader
257
- # notifications at the injection boundary, keyed by (result_id, leader_session_uuid).
257
+ # Dedupe leader notifications at the injection boundary by
258
+ # (result_id, owner_team_id, owner_epoch). leader_session_uuid remains
259
+ # nullable compatibility/audit metadata.
258
260
  # UNIQUE primary key + INSERT OR IGNORE in claim_leader_notification_delivery gives
259
261
  # atomic exactly-once without an advisory lock. Retires the bad6484 watcher-table
260
262
  # UPSERT approach.
@@ -262,13 +264,14 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
262
264
  """
263
265
  create table if not exists leader_notification_log (
264
266
  result_id text not null,
265
- leader_session_uuid text not null,
267
+ owner_team_id text not null default '',
268
+ owner_epoch integer not null default 0,
269
+ leader_session_uuid text,
266
270
  notified_message_id text not null,
267
271
  notified_at text not null,
268
272
  leader_pane_id_at_notify text,
269
273
  envelope_content_hash text,
270
- owner_team_id text,
271
- primary key (result_id, leader_session_uuid)
274
+ primary key (result_id, owner_team_id, owner_epoch)
272
275
  )
273
276
  """
274
277
  )
@@ -33,8 +33,8 @@ MANAGED_TABLE_LAYOUTS: dict[str, tuple[str, ...]] = {
33
33
  "status", "created_at", "completed_at", "result_id", "notified_message_id", "error",
34
34
  ),
35
35
  "leader_notification_log": (
36
- "result_id", "leader_session_uuid", "notified_message_id", "notified_at",
37
- "leader_pane_id_at_notify", "envelope_content_hash", "owner_team_id",
36
+ "result_id", "owner_team_id", "owner_epoch", "leader_session_uuid",
37
+ "notified_message_id", "notified_at", "leader_pane_id_at_notify", "envelope_content_hash",
38
38
  ),
39
39
  }
40
40
 
@@ -135,13 +135,14 @@ CREATE_TABLE_SQL: dict[str, str] = {
135
135
  "leader_notification_log": """
136
136
  create table if not exists {table} (
137
137
  result_id text not null,
138
- leader_session_uuid text not null,
138
+ owner_team_id text not null default '',
139
+ owner_epoch integer not null default 0,
140
+ leader_session_uuid text,
139
141
  notified_message_id text not null,
140
142
  notified_at text not null,
141
143
  leader_pane_id_at_notify text,
142
144
  envelope_content_hash text,
143
- owner_team_id text,
144
- primary key (result_id, leader_session_uuid)
145
+ primary key (result_id, owner_team_id, owner_epoch)
145
146
  )
146
147
  """,
147
148
  }
@@ -149,6 +150,7 @@ CREATE_TABLE_SQL: dict[str, str] = {
149
150
 
150
151
  INDEX_SQL: tuple[str, ...] = (
151
152
  "create index if not exists idx_leader_notification_log_uuid on leader_notification_log(leader_session_uuid, notified_at)",
153
+ "create index if not exists idx_leader_notification_log_team_epoch on leader_notification_log(owner_team_id, owner_epoch, notified_at)",
152
154
  "create index if not exists idx_messages_owner_team_id on messages(owner_team_id)",
153
155
  "create index if not exists idx_scheduled_events_owner_team_id on scheduled_events(owner_team_id)",
154
156
  "create index if not exists idx_agent_health_owner_team_id on agent_health(owner_team_id)",
@@ -37,15 +37,54 @@ _IDLE_PROMPT_PATTERNS = (
37
37
  re.compile(r"›\s*Find and fix a bug in @filename"),
38
38
  re.compile(r"─\s*for agents"),
39
39
  re.compile(r"^›[^\n]*\n(?:\s*\n){0,8}\s*gpt-[\w.-]+\s+\S+\s+·", re.MULTILINE),
40
+ # Codex idle input prompt line (rotating hints like
41
+ # "› Use /skills to list available skills"). Working lines start with a
42
+ # spinner/✱ glyph, not "›". An optional leading "│ " tolerates a boxed
43
+ # input frame.
44
+ re.compile(r"^(?:│\s*)?›\s", re.MULTILINE),
45
+ # Claude Code idle input prompt: an empty "❯" line (the box may render the
46
+ # trailing space as U+00A0). Only the empty prompt is idle; a "❯ <command>"
47
+ # line is a submitted turn, so the trailing-content form is deliberately
48
+ # excluded to avoid false IDLE while Claude is still working.
49
+ re.compile(r"^(?:│\s*)?❯[ \t\xa0]*$", re.MULTILINE),
40
50
  )
41
- _WORKING_PATTERNS = (
51
+ # Substantive working indicators carry their own text ("Working", "Thinking",
52
+ # "esc to interrupt", ...). The bare spinner glyph alone is only a pane-refresh
53
+ # artifact, so it is kept separate: it still counts as working when nothing else
54
+ # is present, but it must not override a fresh idle prompt (C14).
55
+ _SUBSTANTIVE_WORKING_PATTERNS = (
42
56
  re.compile(r"\bWorking(?:\s*\((?P<working_seconds>\d+)s\))?", re.IGNORECASE),
43
57
  re.compile(r"\bReticulating\b", re.IGNORECASE),
44
58
  re.compile(r"\bBaked for (?P<baked_seconds>\d+)s\b", re.IGNORECASE),
45
59
  re.compile(r"\bThinking\b", re.IGNORECASE),
46
60
  re.compile(r"esc to interrupt", re.IGNORECASE),
47
- re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]"),
48
61
  )
62
+ _SPINNER_GLYPH_PATTERN = re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]")
63
+ _WORKING_PATTERNS = _SUBSTANTIVE_WORKING_PATTERNS + (_SPINNER_GLYPH_PATTERN,)
64
+ # A live provider working footer is a bullet status line carrying a live
65
+ # elapsed-time counter plus the "esc to interrupt" hint, e.g.
66
+ # "• Working (35s • esc to interrupt) · 1 background terminal running"
67
+ # "• Waiting for background terminal (1m 06s • esc to interrupt) · ..."
68
+ # This is matched by the COMMON shape, not per verb (Working/Waiting/Baked/...):
69
+ # a "•" line with a parenthesized elapsed counter in either "Ns" or "Nm NNs"
70
+ # form, followed by "esc to interrupt" inside the same parentheses. That live
71
+ # counter + interrupt hint is only rendered during an active interruptible turn
72
+ # and is removed when the turn ends, so it never appears in prose/scrollback
73
+ # history (unlike a bare "Working" word or an "esc to interrupt" mention). It is
74
+ # the positive "provider is working right now" signal that the permanent input
75
+ # box ("› ... gpt-" / "❯") rendered below it must not override.
76
+ _LIVE_WORKING_PATTERNS = (
77
+ re.compile(r"•\s*[^\n]*?\(\s*(?:\d+m\s*)?\d+s\b[^)\n]*esc to interrupt", re.IGNORECASE),
78
+ )
79
+
80
+
81
+ def _latest_live_working_footer(scrollback: str) -> str | None:
82
+ best: tuple[int, str] | None = None
83
+ for pattern in _LIVE_WORKING_PATTERNS:
84
+ for match in pattern.finditer(scrollback):
85
+ if best is None or match.start() > best[0]:
86
+ best = (match.start(), match.group(0))
87
+ return best[1] if best else None
49
88
 
50
89
 
51
90
  def classify_agent_activity(
@@ -57,6 +96,8 @@ def classify_agent_activity(
57
96
  *,
58
97
  now: datetime | None = None,
59
98
  stuck_timeout_sec: int = 300,
99
+ active_task: bool = False,
100
+ pane_delta_recent: bool = False,
60
101
  ) -> dict[str, Any]:
61
102
  _ = agent_id, provider
62
103
  now = now or datetime.now(timezone.utc)
@@ -68,14 +109,35 @@ def classify_agent_activity(
68
109
  if command and command not in _PROVIDER_COMMANDS:
69
110
  return {"status": "uncertain", "confidence": 0.75, "rationale": f"unexpected pane current_command={command}"}
70
111
  working = _latest_working_match(scrollback)
112
+ substantive = _latest_working_match(scrollback, _SUBSTANTIVE_WORKING_PATTERNS)
71
113
  idle_pos = _latest_idle_prompt_position(scrollback)
72
- if idle_pos is not None and (working is None or idle_pos > working[0]):
114
+ # bug-071: a live provider working footer ("Working (Ns ...)") plus an active
115
+ # task is an active turn. The provider input box ("› ... gpt-" / "❯") is
116
+ # permanent UI rendered BELOW the footer, so the position-based idle-prompt
117
+ # check would otherwise flip a working Codex turn to IDLE. Checked before the
118
+ # idle-prompt rule. The seconds-counter form never appears in prose, so a
119
+ # real idle prompt (no live footer) is unaffected (C14); gating on
120
+ # active_task keeps task-less classifier cases on the existing logic.
121
+ live_footer = _latest_live_working_footer(scrollback)
122
+ if active_task and live_footer is not None:
123
+ return {"status": "working", "confidence": 0.9, "rationale": f"live working footer '{live_footer}' with active task"}
124
+ # C14: a fresh idle prompt is the strongest signal. Only a substantive
125
+ # working indicator positioned after the prompt counts as newer work; a
126
+ # trailing bare spinner glyph (pane refresh) or pane delta must not flip a
127
+ # fresh idle prompt to WORKING.
128
+ if idle_pos is not None and (substantive is None or idle_pos > substantive[0]):
73
129
  return {"status": "idle", "confidence": 0.9, "rationale": "provider idle prompt is the latest scrollback signal"}
74
130
  if working:
75
131
  _pos, label, elapsed = working
76
132
  if elapsed is not None and elapsed >= stuck_timeout_sec:
77
133
  return {"status": "stuck", "confidence": 0.85, "rationale": f"stale {label} indicator for {elapsed}s"}
78
134
  return {"status": "working", "confidence": 0.9, "rationale": f"{label} indicator is the latest scrollback signal"}
135
+ # C15: an active task whose pane changed since the last sync is real work,
136
+ # not idle. Placed after the idle-prompt check so a fresh idle prompt always
137
+ # wins; without an active task this rule never fires and raw running may stay
138
+ # IDLE.
139
+ if active_task and pane_delta_recent and (not command or command in _PROVIDER_COMMANDS):
140
+ return {"status": "working", "confidence": 0.9, "rationale": "active task with recent pane delta"}
79
141
  age = _last_output_age_seconds(last_output_at, now)
80
142
  if age is not None and age >= stuck_timeout_sec:
81
143
  return {"status": "stuck", "confidence": 0.85, "rationale": "last_output_at exceeded timeout with no idle prompt"}
@@ -163,9 +225,11 @@ def _reset_or_recommend(
163
225
  return {"ok": True, "event": event, "agent_id": agent_id, "compaction_count": compaction_count, "threshold": threshold, "leader_visible_message": message, "reset": reset}
164
226
 
165
227
 
166
- def _latest_working_match(scrollback: str) -> tuple[int, str, int | None] | None:
228
+ def _latest_working_match(
229
+ scrollback: str, patterns: tuple[re.Pattern[str], ...] = _WORKING_PATTERNS
230
+ ) -> tuple[int, str, int | None] | None:
167
231
  best: tuple[int, str, int | None] | None = None
168
- for pattern in _WORKING_PATTERNS:
232
+ for pattern in patterns:
169
233
  for match in pattern.finditer(scrollback):
170
234
  elapsed_raw = match.groupdict().get("working_seconds") or match.groupdict().get("baked_seconds")
171
235
  elapsed = int(elapsed_raw) if elapsed_raw else None
@@ -178,13 +178,19 @@ def _send_to_leader_receiver(
178
178
  or (state.get("leader_receiver") or {}).get("leader_session_uuid")
179
179
  or ""
180
180
  )
181
- if effective_result_id and leader_uuid_for_gate:
181
+ owner_epoch_for_gate = int(
182
+ (state.get("team_owner") or {}).get("owner_epoch")
183
+ or (state.get("leader_receiver") or {}).get("owner_epoch")
184
+ or 0
185
+ )
186
+ if effective_result_id:
182
187
  from team_agent.message_store.leader_notification_log import claim_leader_notification_delivery
183
188
  envelope_hash = hashlib.sha256(content.encode("utf-8", errors="ignore")).hexdigest()[:16]
184
189
  claim = claim_leader_notification_delivery(
185
190
  store,
186
191
  result_id=effective_result_id,
187
192
  leader_session_uuid=leader_uuid_for_gate,
193
+ owner_epoch=owner_epoch_for_gate,
188
194
  proposed_message_id=message_id,
189
195
  envelope_hash=envelope_hash,
190
196
  owner_team_id=team_state_key(state),
@@ -359,7 +365,15 @@ def claim_leader_receiver(
359
365
  return {"ok": False, "status": "refused", "reason": "owner_epoch_advanced", "owner_epoch": current_epoch, "bound_pane_id": receiver.get("pane_id")}
360
366
  if receiver.get("pane_id") == candidate.get("pane_id"):
361
367
  return {"ok": True, "status": "already_bound", "leader_receiver": receiver, "owner_epoch": current_epoch}
362
- if not _target_matches_owner_identity(candidate, owner):
368
+ owner_pane = str(owner.get("pane_id") or "")
369
+ if (
370
+ owner_pane
371
+ and str(candidate.get("pane_id") or "") != owner_pane
372
+ and not _target_matches_owner_identity(candidate, owner)
373
+ ):
374
+ event_log.write("leader_receiver.claim_refused", reason="owner_pane_mismatch", candidate_pane_id=candidate.get("pane_id"), owner_pane_id=owner_pane)
375
+ return {"ok": False, "status": "refused", "reason": "owner_pane_mismatch"}
376
+ if not owner_pane and not _target_matches_owner_identity(candidate, owner):
363
377
  event_log.write("leader_receiver.claim_refused", reason="uuid_mismatch", candidate_pane_id=candidate.get("pane_id"))
364
378
  return {"ok": False, "status": "refused", "reason": "uuid_mismatch"}
365
379
  provider = str(candidate.get("provider") or receiver.get("provider") or "codex")
@@ -369,9 +383,10 @@ def claim_leader_receiver(
369
383
  new_receiver = _receiver_from_target(candidate, provider, owner.get("leader_session_uuid"), next_epoch)
370
384
  owner["owner_epoch"] = next_epoch
371
385
  state["leader_receiver"] = new_receiver
372
- from team_agent.runtime import _runtime_lock, save_runtime_state
386
+ from team_agent.leader import _write_lease_dual_state
387
+ from team_agent.runtime import _runtime_lock
373
388
  with _runtime_lock(workspace, "leader_receiver"):
374
- save_runtime_state(workspace, state)
389
+ _write_lease_dual_state(workspace, state)
375
390
  event_log.write("leader_receiver.claimed", pane_id=new_receiver["pane_id"], owner_epoch=next_epoch, uuid_prefix=_uuid_prefix(owner))
376
391
  return {"ok": True, "status": "claimed", "leader_receiver": new_receiver, "owner_epoch": next_epoch}
377
392
 
@@ -476,9 +491,6 @@ def _format_team_agent_message(payload: dict[str, Any]) -> str:
476
491
 
477
492
 
478
493
 
479
-
480
-
481
-
482
494
 
483
495
 
484
496
 
@@ -188,6 +188,9 @@ def _rediscover_leader_receiver(
188
188
 
189
189
 
190
190
  def _target_matches_owner_identity(target: dict[str, Any], owner_identity: dict[str, Any]) -> bool:
191
+ owner_pane = str((owner_identity or {}).get("pane_id") or "")
192
+ if owner_pane and str(target.get("pane_id") or "") == owner_pane:
193
+ return True
191
194
  expected_uuid = owner_identity.get("leader_session_uuid")
192
195
  if expected_uuid:
193
196
  actual_uuid = _target_leader_session_uuid(target)
@@ -350,7 +353,7 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
350
353
  "pane": pane_info,
351
354
  }
352
355
  expected_uuid = receiver.get("leader_session_uuid")
353
- if expected_uuid:
356
+ if expected_uuid and _target_leader_session_uuid(pane_info):
354
357
  actual_uuid = _leader_uuid_for_bound_pane(receiver, pane_info)
355
358
  if not actual_uuid:
356
359
  return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": pane_info}
@@ -373,14 +376,8 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
373
376
 
374
377
 
375
378
  def _leader_command_looks_usable(command: str, provider: str) -> bool:
376
- if provider == "fake":
377
- return True
378
- command_name = Path(command).name
379
- if provider == "codex":
380
- return command_name in {"codex", "node", "nodejs"}
381
- if provider in {"claude", "claude_code"}:
382
- return command_name in {"claude", "claude.exe"}
383
- return command_name in {"codex", "node", "nodejs", "claude", "claude.exe"}
379
+ _ = provider
380
+ return bool(str(command or "").strip())
384
381
 
385
382
 
386
383
  def attempt_trust_auto_answer(
@@ -85,10 +85,14 @@ def notify_result_watchers(
85
85
  # The peek is NOT the dedupe primitive — the atomic INSERT OR IGNORE at injection is.
86
86
  result_id_str = str(result.get("result_id") or "") or None
87
87
  if result_id_str:
88
- leader_uuid = _resolve_leader_session_uuid(workspace, primary.get("owner_team_id"))
89
- if leader_uuid:
88
+ leader_identity = _resolve_leader_notification_identity(workspace, primary.get("owner_team_id"))
89
+ if leader_identity:
90
90
  prior = peek_leader_notification(
91
- store, result_id=result_id_str, leader_session_uuid=leader_uuid,
91
+ store,
92
+ result_id=result_id_str,
93
+ leader_session_uuid=leader_identity.get("leader_session_uuid"),
94
+ owner_team_id=primary.get("owner_team_id"),
95
+ owner_epoch=leader_identity.get("owner_epoch"),
92
96
  )
93
97
  if prior:
94
98
  notified.append(_mark_watcher_dedupe_skip(
@@ -96,7 +100,7 @@ def notify_result_watchers(
96
100
  prior["notified_message_id"],
97
101
  dedupe_reason or "injection_log_already_notified",
98
102
  notified_at=prior.get("notified_at"),
99
- leader_session_uuid=leader_uuid,
103
+ leader_session_uuid=leader_identity.get("leader_session_uuid"),
100
104
  ))
101
105
  return notified
102
106
  # Legacy compat: watcher.notified_message_id set by a prior path (Gap 32 reversal of
@@ -145,6 +149,26 @@ def _resolve_leader_session_uuid(workspace: Path, owner_team_id: str | None) ->
145
149
  return None
146
150
 
147
151
 
152
+ def _resolve_leader_notification_identity(workspace: Path, owner_team_id: str | None) -> dict[str, Any] | None:
153
+ try:
154
+ from team_agent.messaging.deps import load_runtime_state, team_state_key
155
+ state = load_runtime_state(workspace)
156
+ if owner_team_id and isinstance(state.get("teams"), dict):
157
+ scoped = state["teams"].get(owner_team_id)
158
+ if isinstance(scoped, dict):
159
+ state = scoped
160
+ elif owner_team_id and team_state_key(state) != owner_team_id:
161
+ return None
162
+ owner = state.get("team_owner") or {}
163
+ receiver = state.get("leader_receiver") or {}
164
+ return {
165
+ "leader_session_uuid": str(owner.get("leader_session_uuid") or receiver.get("leader_session_uuid") or "") or None,
166
+ "owner_epoch": int(owner.get("owner_epoch") or receiver.get("owner_epoch") or 0),
167
+ }
168
+ except Exception:
169
+ return None
170
+
171
+
148
172
  def _infer_dedupe_reason(primary: dict[str, Any], store: MessageStore) -> str:
149
173
  if primary.get("notified_message_id"):
150
174
  return "rebind_retry"
@@ -159,7 +159,7 @@ def _detect_stuck_agents(
159
159
  stuck: list[str] = []
160
160
  now = datetime.now(timezone.utc)
161
161
  for agent_id, row in health.items():
162
- if row.get("status") not in {"RUNNING"} or not row.get("last_output_at"):
162
+ if row.get("status") not in {"RUNNING", "WORKING"} or not row.get("last_output_at"):
163
163
  continue
164
164
  try:
165
165
  last = datetime.fromisoformat(row["last_output_at"])
@@ -68,7 +68,11 @@ def _send_message_unlocked(
68
68
  return ambiguous
69
69
  state = select_runtime_state(workspace, team)
70
70
  gate = check_team_owner(state)
71
- spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
71
+ spec_path = Path(state.get("spec_path") or workspace / "team.spec.yaml")
72
+ if not spec_path.exists() and state.get("team_dir"):
73
+ candidate = Path(str(state["team_dir"])) / "team.spec.yaml"
74
+ if candidate.exists():
75
+ spec_path = candidate
72
76
  spec = load_spec(spec_path)
73
77
  event_log = EventLog(workspace)
74
78
  if gate:
@@ -16,8 +16,10 @@ from team_agent.restart.snapshot import save_team_runtime_snapshot
16
16
  from team_agent.spec import load_spec
17
17
  from team_agent.state import (
18
18
  check_team_owner,
19
+ compact_team_state,
19
20
  populate_team_owner_from_env,
20
21
  save_runtime_state,
22
+ team_state_key,
21
23
  write_team_state,
22
24
  )
23
25
 
@@ -360,7 +362,7 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
360
362
  state["session_name"] = session_name
361
363
  state["agents"] = new_agents
362
364
  populate_team_owner_from_env(state, source="restart")
363
- save_runtime_state(workspace, state)
365
+ _save_restart_selected_team_state(workspace, state)
364
366
  save_team_runtime_snapshot(workspace, state)
365
367
  MessageStore(workspace)
366
368
  write_team_state(workspace, spec, state)
@@ -376,7 +378,7 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
376
378
  old_session_name=(stale or {}).get("session_name") if isinstance(stale, dict) else None,
377
379
  source="restart",
378
380
  )
379
- save_runtime_state(workspace, state)
381
+ _save_restart_selected_team_state(workspace, state)
380
382
  save_team_runtime_snapshot(workspace, state)
381
383
  write_team_state(workspace, spec, state)
382
384
  rebuild_restart_display_after_rebind(display_backend, workspace, session_name, spec, event_log, restarted, receiver=rebound_receiver)
@@ -385,6 +387,15 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
385
387
  return {"ok": True, "session_name": session_name, "agents": restarted, "coordinator": coordinator}
386
388
 
387
389
 
390
+ def _save_restart_selected_team_state(workspace: Path, state: dict[str, Any]) -> None:
391
+ team_key = str(state.get("active_team_key") or team_state_key(state))
392
+ teams = copy.deepcopy(state.get("teams") if isinstance(state.get("teams"), dict) else {})
393
+ state["active_team_key"] = team_key
394
+ state["teams"] = teams
395
+ teams[team_key] = compact_team_state(state)
396
+ save_runtime_state(workspace, state)
397
+
398
+
388
399
  _FIRST_SEND_AT_ABSENT = "absent"
389
400
  _FIRST_SEND_AT_VALID = "valid"
390
401
  _FIRST_SEND_AT_CORRUPT = "corrupt"