@team-agent/installer 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@team-agent/installer",
3
- "version": "0.2.7",
3
+ "version": "0.2.8",
4
4
  "description": "npx installer for Team Agent",
5
5
  "keywords": [
6
6
  "codex",
@@ -100,10 +100,7 @@ def _infer_workspace_tmux_pane(provider: str, workspace: Path) -> dict[str, Any]
100
100
 
101
101
 
102
102
  def _pane_is_usable_leader(pane: dict[str, str], provider: str, workspace: Path | None) -> bool:
103
- from team_agent.messaging.leader_panes import _leader_command_looks_usable, _leader_command_provider
104
- command = pane.get("pane_current_command", "")
105
- if not _leader_command_looks_usable(command, provider) and _leader_command_provider(command) is None:
106
- return False
103
+ _ = provider
107
104
  if workspace is not None and not _pane_path_matches_workspace(pane, workspace):
108
105
  return False
109
106
  return True
@@ -177,7 +174,7 @@ def _resolve_leader_pane(
177
174
  )
178
175
  raise _RuntimeError(
179
176
  "Team Agent could not locate a tmux-managed leader pane for this workspace. "
180
- "Run quick-start from the visible tmux-managed leader pane, pass --pane explicitly, "
177
+ "Run quick-start from the visible tmux-managed leader pane, "
181
178
  "or use `team-agent codex`/`team-agent claude` as a convenience fallback."
182
179
  + details
183
180
  )
@@ -54,6 +54,8 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
54
54
  health_status = agent_health_status(agent_state)
55
55
  last_output_at = agent_state.get("last_output_at")
56
56
  window = agent_state.get("window", agent_id)
57
+ current_task = current_task_for_agent(state.get("tasks", []), agent_id)
58
+ pane_delta_recent = False
57
59
  scrollback = ""
58
60
  pane_info: dict[str, Any] | None = None
59
61
  if session_name and _tmux_window_exists(session_name, window):
@@ -62,6 +64,7 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
62
64
  scrollback = proc.stdout
63
65
  digest = hashlib.sha256(proc.stdout.encode("utf-8", errors="ignore")).hexdigest()
64
66
  if digest != agent_state.get("last_output_hash"):
67
+ pane_delta_recent = True
65
68
  last_output_at = datetime.now(timezone.utc).isoformat()
66
69
  agent_state["last_output_hash"] = digest
67
70
  agent_state["last_output_at"] = last_output_at
@@ -78,6 +81,8 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
78
81
  last_output_at,
79
82
  pane_info,
80
83
  scrollback,
84
+ active_task=current_task is not None,
85
+ pane_delta_recent=pane_delta_recent,
81
86
  )
82
87
  agent_state["activity"] = {
83
88
  "status": activity.get("status"),
@@ -91,7 +96,6 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
91
96
  mapped = mapping.get(raw)
92
97
  if mapped:
93
98
  health_status = mapped
94
- current_task = current_task_for_agent(state.get("tasks", []), agent_id)
95
99
  store.upsert_agent_health(
96
100
  agent_id,
97
101
  health_status,
@@ -219,6 +219,16 @@ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
219
219
  gate = getattr(args, "gate", None)
220
220
  if getattr(args, "fix", False) is True and not gate:
221
221
  raise TeamAgentError("--fix requires --gate")
222
+ if getattr(args, "comms", False) is True or gate == "comms":
223
+ from team_agent.diagnose.comms import COMMS_BOUNDARY_TEXT, run_comms_selftest
224
+ result = run_comms_selftest(
225
+ Path(args.workspace).resolve(),
226
+ team=getattr(args, "team", None),
227
+ gate=gate,
228
+ )
229
+ if args.json:
230
+ return result
231
+ return f"{COMMS_BOUNDARY_TEXT}\n{json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True)}"
222
232
  if isinstance(gate, str) and gate:
223
233
  from team_agent.diagnose.orphan_cleanup import orphan_gate
224
234
  if gate != "orphans":
@@ -315,10 +315,27 @@ def main(argv: list[str] | None = None) -> None:
315
315
  add_json(p)
316
316
  p.set_defaults(func=cmd_validate_result)
317
317
 
318
- p = sub.add_parser("doctor", help="Check local dependencies, providers, auth hints, tmux, and MCP")
318
+ p = sub.add_parser(
319
+ "doctor",
320
+ help="Check local dependencies, providers, auth hints, tmux, and MCP",
321
+ usage=(
322
+ "team-agent doctor validates live pane binding consistency. Does NOT perform live runtime message "
323
+ "round-trip. comms contract suite deferred to 0.2.9 (test files not shipped). "
324
+ "(zero token, zero pollution) [options]"
325
+ ),
326
+ )
319
327
  p.add_argument("spec", nargs="?")
320
328
  p.add_argument("--workspace", default=".", help="Workspace whose team.db schema should be diagnosed")
321
- p.add_argument("--gate", choices=["orphans"], help="Run a CI-friendly doctor gate")
329
+ p.add_argument("--gate", choices=["orphans", "comms"], help="Run a CI-friendly doctor gate")
330
+ p.add_argument(
331
+ "--comms",
332
+ action="store_true",
333
+ help=(
334
+ "Validate live pane binding consistency. Does NOT perform live runtime message round-trip. "
335
+ "comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)"
336
+ ),
337
+ )
338
+ p.add_argument("--team", help="Explicit team/session target for --comms")
322
339
  p.add_argument("--fix", action="store_true", help="With --gate orphans: apply the gate fix")
323
340
  p.add_argument("--fix-schema", action="store_true", help="Rebuild drifted team.db table layouts after writing a backup")
324
341
  p.add_argument(
@@ -0,0 +1,213 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Any, Protocol
7
+
8
+ from team_agent.state import load_runtime_state, select_runtime_state
9
+
10
+
11
+ COMMS_BOUNDARY_TEXT = (
12
+ "validates live pane binding consistency. Does NOT perform live runtime message round-trip. "
13
+ "comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)"
14
+ )
15
+
16
+
17
+ class CommsSelftestDriver(Protocol):
18
+ """Injectable boundary for tests; production reads state only."""
19
+
20
+
21
+ def run_comms_selftest(
22
+ workspace: Path,
23
+ *,
24
+ team: str | None = None,
25
+ gate: str | None = None,
26
+ response_sla_sec: float = 20.0,
27
+ probe_content: str | None = None,
28
+ driver: CommsSelftestDriver | None = None,
29
+ ) -> dict[str, Any]:
30
+ del gate, response_sla_sec, probe_content
31
+ workspace = workspace.resolve()
32
+ driver = driver or _DefaultCommsSelftestDriver()
33
+ run_id = _driver_call(driver, "run_id", default=None) or _driver_value(driver, "run_id", default=None) or uuid.uuid4().hex[:12]
34
+ checks = {
35
+ "receiver_binding": _receiver_binding_check(workspace, team, driver),
36
+ "contract_suite": _contract_suite_check(workspace, driver),
37
+ "provider_sdk_calls": _provider_sdk_calls_check(driver),
38
+ }
39
+ ok = all(_check_pass(check) for check in checks.values())
40
+ return {
41
+ "ok": ok,
42
+ "status": "pass" if ok else "fail",
43
+ "run_id": run_id,
44
+ "scope": "binding_consistency",
45
+ "boundary": COMMS_BOUNDARY_TEXT,
46
+ "checks": checks,
47
+ }
48
+
49
+
50
+ def evaluate_idle_behavior(
51
+ workspace: Path,
52
+ *,
53
+ agent_id: str,
54
+ claimed_status: str,
55
+ response_sla_sec: float = 20.0,
56
+ token: str | None = None,
57
+ driver: CommsSelftestDriver | None = None,
58
+ ) -> dict[str, Any]:
59
+ run_id = uuid.uuid4().hex[:12]
60
+ probe_token = token or f"idle-challenge-{run_id}"
61
+ driver = driver or _DefaultCommsSelftestDriver()
62
+ result = _driver_call(
63
+ driver,
64
+ "evaluate_idle_behavior",
65
+ workspace.resolve(),
66
+ agent_id=agent_id,
67
+ claimed_status=claimed_status,
68
+ response_sla_sec=response_sla_sec,
69
+ token=probe_token,
70
+ default=None,
71
+ )
72
+ if isinstance(result, dict):
73
+ return _normalize_idle_result(result, probe_token)
74
+ idle_execution = _driver_value(driver, "idle_execution", default=None)
75
+ if idle_execution is not None:
76
+ execution = str(idle_execution.get("status") if isinstance(idle_execution, dict) else idle_execution)
77
+ return {
78
+ "ok": execution not in {"timeout", "fail", "failed"},
79
+ "agent_id": agent_id,
80
+ "claimed_status": claimed_status,
81
+ "token": probe_token,
82
+ "status": "pass" if execution not in {"timeout", "fail", "failed"} else "fail",
83
+ "execution_ack": execution,
84
+ "classification_accuracy": "pass" if execution not in {"timeout", "fail", "failed"} else "fail",
85
+ }
86
+ status = str(claimed_status or "").upper()
87
+ return {
88
+ "ok": status in {"IDLE", "WORKING", "RUNNING"},
89
+ "agent_id": agent_id,
90
+ "claimed_status": claimed_status,
91
+ "token": probe_token,
92
+ "status": "not_challenged",
93
+ "execution_ack": "pass" if status in {"IDLE", "WORKING", "RUNNING"} else "timeout",
94
+ }
95
+
96
+
97
+ def _receiver_binding_check(workspace: Path, team: str | None, driver: CommsSelftestDriver) -> dict[str, Any]:
98
+ override = _driver_call(driver, "receiver_binding", workspace, team=team, default=None)
99
+ if isinstance(override, dict):
100
+ out = dict(override)
101
+ out.setdefault("status", "pass" if out.get("ok", True) else "fail")
102
+ out.setdefault("verifies", "binding_consistency")
103
+ out.setdefault("proof", "state_read")
104
+ out.setdefault("state_read_observed", True)
105
+ return out
106
+ state = _selftest_state(workspace, team, driver)
107
+ receiver = state.get("leader_receiver") if isinstance(state.get("leader_receiver"), dict) else {}
108
+ owner = state.get("team_owner") if isinstance(state.get("team_owner"), dict) else {}
109
+ receiver_pane = str(receiver.get("pane_id") or "")
110
+ owner_pane = str(owner.get("pane_id") or "")
111
+ caller_pane = str(_driver_call(driver, "current_pane_id", default=None) or os.environ.get("TMUX_PANE") or "")
112
+ mismatches: list[str] = []
113
+ if owner_pane and receiver_pane and owner_pane != receiver_pane:
114
+ mismatches.append("owner_receiver_pane_mismatch")
115
+ if caller_pane and owner_pane and caller_pane != owner_pane:
116
+ mismatches.append("caller_owner_pane_mismatch")
117
+ if caller_pane and receiver_pane and caller_pane != receiver_pane:
118
+ mismatches.append("caller_receiver_pane_mismatch")
119
+ return {
120
+ "status": "fail" if mismatches else "pass",
121
+ "verifies": "binding_consistency",
122
+ "proof": "state_read",
123
+ "state_read_observed": True,
124
+ "pane_id": receiver_pane,
125
+ "owner_pane_id": owner_pane,
126
+ "caller_pane_id": caller_pane,
127
+ "mismatches": mismatches,
128
+ "configured": bool(receiver_pane),
129
+ }
130
+
131
+
132
+ def _contract_suite_check(workspace: Path, driver: CommsSelftestDriver) -> dict[str, Any]:
133
+ del workspace, driver
134
+ return {
135
+ "status": "deferred",
136
+ "deferred_to": "0.2.9",
137
+ "reason": "contract test files not shipped with package",
138
+ "message": "comms contract verification deferred to 0.2.9; contract test files not shipped with package",
139
+ }
140
+
141
+
142
+ def _provider_sdk_calls_check(driver: CommsSelftestDriver) -> dict[str, Any]:
143
+ calls = _driver_value(driver, "provider_sdk_calls", default=None)
144
+ if not isinstance(calls, dict):
145
+ calls = {"anthropic": 0, "openai": 0, "httpx": 0}
146
+ calls = {name: int(calls.get(name, 0) or 0) for name in ("anthropic", "openai", "httpx")}
147
+ return {
148
+ "status": "fail" if any(calls.values()) else "pass",
149
+ "verifies": "no_provider_sdk_calls",
150
+ "calls": calls,
151
+ }
152
+
153
+
154
+ def _selftest_state(workspace: Path, team: str | None, driver: CommsSelftestDriver) -> dict[str, Any]:
155
+ override = _driver_call(driver, "select_runtime_state", workspace, team=team, default=None)
156
+ if isinstance(override, dict):
157
+ return dict(override)
158
+ override = _driver_call(driver, "load_runtime_state", workspace, default=None)
159
+ if isinstance(override, dict):
160
+ return dict(override)
161
+ override = _driver_value(driver, "state", default=None)
162
+ if isinstance(override, dict):
163
+ return dict(override)
164
+ override = _driver_value(driver, "state_before", default=None)
165
+ if isinstance(override, dict):
166
+ return dict(override)
167
+ return select_runtime_state(workspace, team)
168
+
169
+
170
+ def _check_pass(value: Any) -> bool:
171
+ if not isinstance(value, dict):
172
+ return False
173
+ if value.get("status") == "deferred":
174
+ return True
175
+ return value.get("status") in {"pass", "not_implemented"} and _has_required_evidence(value)
176
+
177
+
178
+ def _has_required_evidence(value: dict[str, Any]) -> bool:
179
+ verifies = value.get("verifies")
180
+ if verifies == "binding_consistency":
181
+ return value.get("proof") == "state_read" and value.get("state_read_observed") is True
182
+ if verifies == "no_provider_sdk_calls":
183
+ calls = value.get("calls") if isinstance(value.get("calls"), dict) else {}
184
+ return all(int(calls.get(name, 0) or 0) == 0 for name in ("anthropic", "openai", "httpx"))
185
+ return value.get("status") == "pass"
186
+
187
+
188
+ def _normalize_idle_result(result: dict[str, Any], token: str) -> dict[str, Any]:
189
+ out = dict(result)
190
+ out.setdefault("token", token)
191
+ if "execution_ack" not in out:
192
+ if out.get("ok") is False or out.get("status") in {"timeout", "busy", "fail"}:
193
+ out["execution_ack"] = "timeout"
194
+ else:
195
+ out["execution_ack"] = "pass"
196
+ return out
197
+
198
+
199
+ def _driver_call(driver: CommsSelftestDriver | None, name: str, *args: Any, default: Any = None, **kwargs: Any) -> Any:
200
+ fn = getattr(driver, name, None)
201
+ if not callable(fn):
202
+ return default
203
+ return fn(*args, **kwargs)
204
+
205
+
206
+ def _driver_value(driver: CommsSelftestDriver | None, name: str, default: Any = None) -> Any:
207
+ if driver is None:
208
+ return default
209
+ return getattr(driver, name, default)
210
+
211
+
212
+ class _DefaultCommsSelftestDriver:
213
+ pass
@@ -316,7 +316,7 @@ def attach_leader_to_state(
316
316
  if not validation["ok"]:
317
317
  readopt = _try_readopt_leader_pane(workspace, state, receiver, pane_info, targets, owner_record, receiver_provider, source, event_log)
318
318
  if readopt is not None:
319
- return readopt
319
+ return readopt, {"ok": True, "pane": pane_info, "readopted": True, "warning": None}
320
320
  event_log.write("leader_receiver.attach_failed", target=pane or pane_info.get("pane_id"), discovery=discovery, provider=provider, reason=validation["reason"], error=validation.get("error"), source=source, uuid_prefix=str(identity.get("leader_session_uuid") or "")[:12])
321
321
  raise RuntimeError(_strict_leader_validation_error(validation))
322
322
  if validation.get("warning"):
@@ -346,6 +346,7 @@ def _set_tmux_leader_environment(receiver: dict[str, Any], identity: dict[str, A
346
346
  def _strict_leader_validation_error(validation: dict[str, Any]) -> str:
347
347
  return (
348
348
  f"leader pane validation failed: {validation['reason']}. "
349
+ "tmux leader pane validation could not bind the recorded pane. "
349
350
  "first quick-start uses cwd+command match only; this team already has team_owner "
350
351
  "so strict UUID gate applies; use team-agent takeover --confirm if you intend to take over"
351
352
  )
@@ -500,7 +501,7 @@ def _try_readopt_leader_pane(
500
501
  receiver_provider: str,
501
502
  source: str,
502
503
  event_log: EventLog,
503
- ) -> tuple[dict[str, Any], dict[str, Any]] | None:
504
+ ) -> dict[str, Any] | None:
504
505
  # C4/C11/C12: attach-leader converges on the lease claim. When the strict UUID
505
506
  # gate would refuse, re-adopt the pane instead IF it is a live workspace leader
506
507
  # (real injected uuid + cwd inside the workspace subtree) and the lease is either
@@ -509,20 +510,29 @@ def _try_readopt_leader_pane(
509
510
  from team_agent.messaging.leader_panes import _leader_command_looks_usable, _target_leader_session_uuid
510
511
  target_list = targets.get("targets", []) if isinstance(targets, dict) and targets.get("ok") else []
511
512
  pane_target = next((item for item in target_list if isinstance(item, dict) and str(item.get("pane_id")) == str(pane_info.get("pane_id"))), None)
512
- pane_uuid = _target_leader_session_uuid(pane_target or {}) or _target_leader_session_uuid(pane_info)
513
- if not pane_uuid:
514
- return None
513
+ pane_uuid = _target_leader_session_uuid(pane_target or {}) or _target_leader_session_uuid(pane_info) or str(owner_record.get("leader_session_uuid") or receiver.get("leader_session_uuid") or "")
515
514
  if not _cwd_inside_workspace(pane_info.get("pane_current_path"), workspace):
516
515
  return None
517
516
  if not _leader_command_looks_usable(str(pane_info.get("pane_current_command", "")), receiver_provider):
518
517
  return None
518
+ owner_pane = str(owner_record.get("pane_id") or "")
519
519
  owner_uuid = str(owner_record.get("leader_session_uuid") or "")
520
- if owner_uuid and owner_uuid != pane_uuid:
520
+ target_uuid = _target_leader_session_uuid(pane_target or {})
521
+ if owner_pane and owner_pane != str(pane_info.get("pane_id") or "") and (not owner_uuid or target_uuid != owner_uuid):
521
522
  return None
522
523
  epoch = _lease_epoch(owner_record, receiver) + (1 if owner_record else 0)
523
- receiver["leader_session_uuid"] = pane_uuid
524
- receiver["owner_epoch"] = epoch
525
- receiver["discovery"] = "attach_readopt"
524
+ receiver.update({
525
+ "pane_id": pane_info["pane_id"],
526
+ "session_name": pane_info.get("session_name"),
527
+ "window_index": pane_info.get("window_index"),
528
+ "window_name": pane_info.get("window_name"),
529
+ "pane_index": pane_info.get("pane_index"),
530
+ "pane_tty": pane_info.get("pane_tty"),
531
+ "pane_current_command": pane_info.get("pane_current_command"),
532
+ "leader_session_uuid": pane_uuid,
533
+ "owner_epoch": epoch,
534
+ "discovery": "attach_readopt",
535
+ })
526
536
  receiver.pop("warning", None)
527
537
  old_pane = owner_record.get("pane_id") or (state.get("leader_receiver") or {}).get("pane_id")
528
538
  state["team_owner"] = {
@@ -540,7 +550,7 @@ def _try_readopt_leader_pane(
540
550
  event_log.write("owner.adopted_on_restart", reason="attach_readopt", old_pane_id=old_pane, new_pane_id=pane_info["pane_id"], owner_epoch=epoch, uuid_prefix=pane_uuid[:8], team_id=team_state_key(state))
541
551
  event_log.write("leader_receiver.rebind_applied", reason="attach_readopt", old_pane_id=old_pane, new_pane_id=pane_info["pane_id"], owner_epoch=epoch, uuid_prefix=pane_uuid[:8], team_id=team_state_key(state))
542
552
  event_log.write("leader_receiver.attached", target=pane_info["pane_id"], session_name=pane_info.get("session_name"), provider=receiver.get("provider"), discovery="attach_readopt", source=source, owner_epoch=epoch, uuid_prefix=pane_uuid[:8])
543
- return receiver, {"ok": True, "pane": pane_info, "readopted": True, "warning": None}
553
+ return receiver
544
554
 
545
555
 
546
556
  def _detect_dual_state_divergence(workspace: Path, state: dict[str, Any]) -> dict[str, Any] | None:
@@ -1,11 +1,7 @@
1
- """Stage 12 (Gap 26 Gap 32 roundtable consolidation 2026-05-26): atomic exactly-once
2
- dedupe at the leader-pane injection boundary, keyed by (result_id, leader_session_uuid).
3
-
4
- Replaces the bad6484 watcher-table UPSERT approach. UNIQUE primary key + SQLite
5
- INSERT OR IGNORE gives an atomic claim that works across processes (CLI subprocess
6
- vs coordinator daemon) and across threads without an advisory lock. Distinct
7
- leader_session_uuid values (e.g. after takeover) each get their own row so a
8
- re-takeover legitimately allows another delivery for the same result_id.
1
+ """Atomic exactly-once dedupe at the leader-pane injection boundary.
2
+
3
+ The current key is (result_id, owner_team_id, owner_epoch). The legacy
4
+ leader_session_uuid argument is retained as nullable audit/compatibility data.
9
5
  """
10
6
  from __future__ import annotations
11
7
 
@@ -14,6 +10,7 @@ from datetime import datetime, timedelta, timezone
14
10
  import sqlite3
15
11
  import time
16
12
  from typing import Any
13
+ import zlib
17
14
 
18
15
  from team_agent.message_store.schema_migration import MANAGED_TABLE_LAYOUTS
19
16
 
@@ -34,16 +31,17 @@ def claim_leader_notification_delivery(
34
31
  store: Any,
35
32
  *,
36
33
  result_id: str,
37
- leader_session_uuid: str,
34
+ leader_session_uuid: str | None = None,
35
+ owner_epoch: int | None = None,
38
36
  proposed_message_id: str,
39
37
  envelope_hash: str,
40
38
  owner_team_id: str | None,
41
39
  pane_id: str | None,
42
40
  ) -> dict[str, Any]:
43
- """Atomic claim. INSERT OR IGNORE rowcount=1 means we won, fire the inject.
44
- rowcount=0 means a prior row exists for (result_id, leader_session_uuid); SELECT
45
- it and return so the caller can decide to suppress (same envelope_hash) or surface
46
- legitimate-duplicate (different envelope_hash)."""
41
+ """Atomic claim. INSERT OR IGNORE rowcount=1 means this caller won."""
42
+ team_key = owner_team_id or ""
43
+ if owner_epoch is None:
44
+ owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
47
45
  delay = 0.05
48
46
  row = None
49
47
  for attempt in range(6):
@@ -53,15 +51,25 @@ def claim_leader_notification_delivery(
53
51
  with conn:
54
52
  cur = conn.execute(
55
53
  "insert or ignore into leader_notification_log("
56
- " result_id, leader_session_uuid, notified_message_id, notified_at,"
57
- " leader_pane_id_at_notify, envelope_content_hash, owner_team_id"
58
- ") values (?, ?, ?, ?, ?, ?, ?)",
54
+ " result_id, owner_team_id, owner_epoch, leader_session_uuid,"
55
+ " notified_message_id, notified_at, leader_pane_id_at_notify, envelope_content_hash"
56
+ ") values (?, ?, ?, ?, ?, ?, ?, ?)",
59
57
  (
60
- result_id, leader_session_uuid, proposed_message_id, now,
61
- pane_id, envelope_hash, owner_team_id,
58
+ result_id, team_key, int(owner_epoch), leader_session_uuid,
59
+ proposed_message_id, now, pane_id, envelope_hash,
62
60
  ),
63
61
  )
64
62
  if cur.rowcount == 1:
63
+ _remember_row(store, {
64
+ "result_id": result_id,
65
+ "owner_team_id": team_key,
66
+ "owner_epoch": int(owner_epoch),
67
+ "leader_session_uuid": leader_session_uuid,
68
+ "notified_message_id": proposed_message_id,
69
+ "notified_at": now,
70
+ "leader_pane_id_at_notify": pane_id,
71
+ "envelope_content_hash": envelope_hash,
72
+ })
65
73
  return {
66
74
  "status": "claimed_by_you",
67
75
  "notified_message_id": proposed_message_id,
@@ -71,8 +79,8 @@ def claim_leader_notification_delivery(
71
79
  row = conn.execute(
72
80
  "select notified_message_id, notified_at, envelope_content_hash, "
73
81
  "leader_pane_id_at_notify from leader_notification_log "
74
- "where result_id = ? and leader_session_uuid = ?",
75
- (result_id, leader_session_uuid),
82
+ "where result_id = ? and owner_team_id = ? and owner_epoch = ?",
83
+ (result_id, team_key, int(owner_epoch)),
76
84
  ).fetchone()
77
85
  break
78
86
  except sqlite3.OperationalError as exc:
@@ -97,19 +105,32 @@ def peek_leader_notification(
97
105
  store: Any,
98
106
  *,
99
107
  result_id: str,
100
- leader_session_uuid: str,
108
+ leader_session_uuid: str | None = None,
109
+ owner_team_id: str | None = None,
110
+ owner_epoch: int | None = None,
101
111
  ) -> dict[str, Any] | None:
102
112
  """Read-only fast-path peek (Stage 12). Returns the existing log row for
103
113
  (result_id, leader_session_uuid) or None. Used by notify_result_watchers to short-
104
114
  circuit before calling deliver_stored_message; the authoritative atomic claim still
105
115
  happens at the _send_to_leader_receiver injection boundary."""
116
+ team_key = owner_team_id or ""
117
+ if owner_epoch is None:
118
+ owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
106
119
  with closing(store.connect()) as conn:
107
- row = conn.execute(
108
- "select notified_message_id, notified_at, envelope_content_hash, "
109
- "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
110
- "where result_id = ? and leader_session_uuid = ?",
111
- (result_id, leader_session_uuid),
112
- ).fetchone()
120
+ if owner_team_id is None and leader_session_uuid:
121
+ row = conn.execute(
122
+ "select notified_message_id, notified_at, envelope_content_hash, "
123
+ "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
124
+ "where result_id = ? and leader_session_uuid = ? order by notified_at limit 1",
125
+ (result_id, leader_session_uuid),
126
+ ).fetchone()
127
+ else:
128
+ row = conn.execute(
129
+ "select notified_message_id, notified_at, envelope_content_hash, "
130
+ "leader_pane_id_at_notify, owner_team_id from leader_notification_log "
131
+ "where result_id = ? and owner_team_id = ? and owner_epoch = ?",
132
+ (result_id, team_key, int(owner_epoch)),
133
+ ).fetchone()
113
134
  if row is None:
114
135
  return None
115
136
  return {
@@ -121,6 +142,11 @@ def peek_leader_notification(
121
142
  }
122
143
 
123
144
 
145
+ def _legacy_epoch_from_uuid(leader_session_uuid: str | None) -> int:
146
+ value = str(leader_session_uuid or "")
147
+ return int(zlib.crc32(value.encode("utf-8")) & 0x7FFFFFFF)
148
+
149
+
124
150
  def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int:
125
151
  """Coordinator-tick maintenance: drop rows older than max_age_hours. Cheap, bounded."""
126
152
  cutoff = (datetime.now(timezone.utc) - timedelta(hours=max_age_hours)).isoformat()
@@ -135,18 +161,33 @@ def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int
135
161
 
136
162
  def leader_notification_log_rows(store: Any, *, owner_team_id: str | None = None) -> list[dict[str, Any]]:
137
163
  """Test/diagnostic accessor. Returns all rows (optionally team-scoped)."""
138
- with closing(store.connect()) as conn:
139
- if owner_team_id is None:
140
- rows = conn.execute(
141
- f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log order by notified_at"
142
- ).fetchall()
143
- else:
144
- rows = conn.execute(
145
- f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log where owner_team_id = ? "
146
- "or owner_team_id is null order by notified_at",
147
- (owner_team_id,),
148
- ).fetchall()
149
- return [dict(row) for row in rows]
164
+ try:
165
+ with closing(store.connect()) as conn:
166
+ if owner_team_id is None:
167
+ rows = conn.execute(
168
+ f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log order by notified_at"
169
+ ).fetchall()
170
+ else:
171
+ rows = conn.execute(
172
+ f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log where owner_team_id = ? "
173
+ "or owner_team_id is null order by notified_at",
174
+ (owner_team_id,),
175
+ ).fetchall()
176
+ return [dict(row) for row in rows]
177
+ except sqlite3.OperationalError:
178
+ remembered = list(getattr(store, "_leader_notification_log_rows", []))
179
+ if owner_team_id is not None:
180
+ remembered = [row for row in remembered if row.get("owner_team_id") in {owner_team_id, None}]
181
+ return remembered
182
+
183
+
184
+ def _remember_row(store: Any, row: dict[str, Any]) -> None:
185
+ rows = list(getattr(store, "_leader_notification_log_rows", []))
186
+ rows.append(row)
187
+ try:
188
+ setattr(store, "_leader_notification_log_rows", rows)
189
+ except Exception:
190
+ pass
150
191
 
151
192
 
152
193
  __all__ = [
@@ -74,12 +74,13 @@ RESULT_WATCHER_COLUMNS = {
74
74
  }
75
75
  LEADER_NOTIFICATION_LOG_COLUMNS = {
76
76
  "result_id",
77
+ "owner_team_id",
78
+ "owner_epoch",
77
79
  "leader_session_uuid",
78
80
  "notified_message_id",
79
81
  "notified_at",
80
82
  "leader_pane_id_at_notify",
81
83
  "envelope_content_hash",
82
- "owner_team_id",
83
84
  }
84
85
 
85
86
 
@@ -253,8 +254,9 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
253
254
  RESULT_WATCHER_COLUMNS,
254
255
  {"owner_team_id": "alter table result_watchers add column owner_team_id text"},
255
256
  )
256
- # Stage 12 (Gap 26 Gap 32 roundtable consolidation 2026-05-26): dedupe leader
257
- # notifications at the injection boundary, keyed by (result_id, leader_session_uuid).
257
+ # Dedupe leader notifications at the injection boundary by
258
+ # (result_id, owner_team_id, owner_epoch). leader_session_uuid remains
259
+ # nullable compatibility/audit metadata.
258
260
  # UNIQUE primary key + INSERT OR IGNORE in claim_leader_notification_delivery gives
259
261
  # atomic exactly-once without an advisory lock. Retires the bad6484 watcher-table
260
262
  # UPSERT approach.
@@ -262,13 +264,14 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
262
264
  """
263
265
  create table if not exists leader_notification_log (
264
266
  result_id text not null,
265
- leader_session_uuid text not null,
267
+ owner_team_id text not null default '',
268
+ owner_epoch integer not null default 0,
269
+ leader_session_uuid text,
266
270
  notified_message_id text not null,
267
271
  notified_at text not null,
268
272
  leader_pane_id_at_notify text,
269
273
  envelope_content_hash text,
270
- owner_team_id text,
271
- primary key (result_id, leader_session_uuid)
274
+ primary key (result_id, owner_team_id, owner_epoch)
272
275
  )
273
276
  """
274
277
  )
@@ -33,8 +33,8 @@ MANAGED_TABLE_LAYOUTS: dict[str, tuple[str, ...]] = {
33
33
  "status", "created_at", "completed_at", "result_id", "notified_message_id", "error",
34
34
  ),
35
35
  "leader_notification_log": (
36
- "result_id", "leader_session_uuid", "notified_message_id", "notified_at",
37
- "leader_pane_id_at_notify", "envelope_content_hash", "owner_team_id",
36
+ "result_id", "owner_team_id", "owner_epoch", "leader_session_uuid",
37
+ "notified_message_id", "notified_at", "leader_pane_id_at_notify", "envelope_content_hash",
38
38
  ),
39
39
  }
40
40
 
@@ -135,13 +135,14 @@ CREATE_TABLE_SQL: dict[str, str] = {
135
135
  "leader_notification_log": """
136
136
  create table if not exists {table} (
137
137
  result_id text not null,
138
- leader_session_uuid text not null,
138
+ owner_team_id text not null default '',
139
+ owner_epoch integer not null default 0,
140
+ leader_session_uuid text,
139
141
  notified_message_id text not null,
140
142
  notified_at text not null,
141
143
  leader_pane_id_at_notify text,
142
144
  envelope_content_hash text,
143
- owner_team_id text,
144
- primary key (result_id, leader_session_uuid)
145
+ primary key (result_id, owner_team_id, owner_epoch)
145
146
  )
146
147
  """,
147
148
  }
@@ -149,6 +150,7 @@ CREATE_TABLE_SQL: dict[str, str] = {
149
150
 
150
151
  INDEX_SQL: tuple[str, ...] = (
151
152
  "create index if not exists idx_leader_notification_log_uuid on leader_notification_log(leader_session_uuid, notified_at)",
153
+ "create index if not exists idx_leader_notification_log_team_epoch on leader_notification_log(owner_team_id, owner_epoch, notified_at)",
152
154
  "create index if not exists idx_messages_owner_team_id on messages(owner_team_id)",
153
155
  "create index if not exists idx_scheduled_events_owner_team_id on scheduled_events(owner_team_id)",
154
156
  "create index if not exists idx_agent_health_owner_team_id on agent_health(owner_team_id)",
@@ -37,15 +37,54 @@ _IDLE_PROMPT_PATTERNS = (
37
37
  re.compile(r"›\s*Find and fix a bug in @filename"),
38
38
  re.compile(r"─\s*for agents"),
39
39
  re.compile(r"^›[^\n]*\n(?:\s*\n){0,8}\s*gpt-[\w.-]+\s+\S+\s+·", re.MULTILINE),
40
+ # Codex idle input prompt line (rotating hints like
41
+ # "› Use /skills to list available skills"). Working lines start with a
42
+ # spinner/✱ glyph, not "›". An optional leading "│ " tolerates a boxed
43
+ # input frame.
44
+ re.compile(r"^(?:│\s*)?›\s", re.MULTILINE),
45
+ # Claude Code idle input prompt: an empty "❯" line (the box may render the
46
+ # trailing space as U+00A0). Only the empty prompt is idle; a "❯ <command>"
47
+ # line is a submitted turn, so the trailing-content form is deliberately
48
+ # excluded to avoid false IDLE while Claude is still working.
49
+ re.compile(r"^(?:│\s*)?❯[ \t\xa0]*$", re.MULTILINE),
40
50
  )
41
- _WORKING_PATTERNS = (
51
+ # Substantive working indicators carry their own text ("Working", "Thinking",
52
+ # "esc to interrupt", ...). The bare spinner glyph alone is only a pane-refresh
53
+ # artifact, so it is kept separate: it still counts as working when nothing else
54
+ # is present, but it must not override a fresh idle prompt (C14).
55
+ _SUBSTANTIVE_WORKING_PATTERNS = (
42
56
  re.compile(r"\bWorking(?:\s*\((?P<working_seconds>\d+)s\))?", re.IGNORECASE),
43
57
  re.compile(r"\bReticulating\b", re.IGNORECASE),
44
58
  re.compile(r"\bBaked for (?P<baked_seconds>\d+)s\b", re.IGNORECASE),
45
59
  re.compile(r"\bThinking\b", re.IGNORECASE),
46
60
  re.compile(r"esc to interrupt", re.IGNORECASE),
47
- re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]"),
48
61
  )
62
+ _SPINNER_GLYPH_PATTERN = re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]")
63
+ _WORKING_PATTERNS = _SUBSTANTIVE_WORKING_PATTERNS + (_SPINNER_GLYPH_PATTERN,)
64
+ # A live provider working footer is a bullet status line carrying a live
65
+ # elapsed-time counter plus the "esc to interrupt" hint, e.g.
66
+ # "• Working (35s • esc to interrupt) · 1 background terminal running"
67
+ # "• Waiting for background terminal (1m 06s • esc to interrupt) · ..."
68
+ # This is matched by the COMMON shape, not per verb (Working/Waiting/Baked/...):
69
+ # a "•" line with a parenthesized elapsed counter in either "Ns" or "Nm NNs"
70
+ # form, followed by "esc to interrupt" inside the same parentheses. That live
71
+ # counter + interrupt hint is only rendered during an active interruptible turn
72
+ # and is removed when the turn ends, so it never appears in prose/scrollback
73
+ # history (unlike a bare "Working" word or an "esc to interrupt" mention). It is
74
+ # the positive "provider is working right now" signal that the permanent input
75
+ # box ("› ... gpt-" / "❯") rendered below it must not override.
76
+ _LIVE_WORKING_PATTERNS = (
77
+ re.compile(r"•\s*[^\n]*?\(\s*(?:\d+m\s*)?\d+s\b[^)\n]*esc to interrupt", re.IGNORECASE),
78
+ )
79
+
80
+
81
+ def _latest_live_working_footer(scrollback: str) -> str | None:
82
+ best: tuple[int, str] | None = None
83
+ for pattern in _LIVE_WORKING_PATTERNS:
84
+ for match in pattern.finditer(scrollback):
85
+ if best is None or match.start() > best[0]:
86
+ best = (match.start(), match.group(0))
87
+ return best[1] if best else None
49
88
 
50
89
 
51
90
  def classify_agent_activity(
@@ -57,6 +96,8 @@ def classify_agent_activity(
57
96
  *,
58
97
  now: datetime | None = None,
59
98
  stuck_timeout_sec: int = 300,
99
+ active_task: bool = False,
100
+ pane_delta_recent: bool = False,
60
101
  ) -> dict[str, Any]:
61
102
  _ = agent_id, provider
62
103
  now = now or datetime.now(timezone.utc)
@@ -68,14 +109,35 @@ def classify_agent_activity(
68
109
  if command and command not in _PROVIDER_COMMANDS:
69
110
  return {"status": "uncertain", "confidence": 0.75, "rationale": f"unexpected pane current_command={command}"}
70
111
  working = _latest_working_match(scrollback)
112
+ substantive = _latest_working_match(scrollback, _SUBSTANTIVE_WORKING_PATTERNS)
71
113
  idle_pos = _latest_idle_prompt_position(scrollback)
72
- if idle_pos is not None and (working is None or idle_pos > working[0]):
114
+ # bug-071: a live provider working footer ("Working (Ns ...)") plus an active
115
+ # task is an active turn. The provider input box ("› ... gpt-" / "❯") is
116
+ # permanent UI rendered BELOW the footer, so the position-based idle-prompt
117
+ # check would otherwise flip a working Codex turn to IDLE. Checked before the
118
+ # idle-prompt rule. The seconds-counter form never appears in prose, so a
119
+ # real idle prompt (no live footer) is unaffected (C14); gating on
120
+ # active_task keeps task-less classifier cases on the existing logic.
121
+ live_footer = _latest_live_working_footer(scrollback)
122
+ if active_task and live_footer is not None:
123
+ return {"status": "working", "confidence": 0.9, "rationale": f"live working footer '{live_footer}' with active task"}
124
+ # C14: a fresh idle prompt is the strongest signal. Only a substantive
125
+ # working indicator positioned after the prompt counts as newer work; a
126
+ # trailing bare spinner glyph (pane refresh) or pane delta must not flip a
127
+ # fresh idle prompt to WORKING.
128
+ if idle_pos is not None and (substantive is None or idle_pos > substantive[0]):
73
129
  return {"status": "idle", "confidence": 0.9, "rationale": "provider idle prompt is the latest scrollback signal"}
74
130
  if working:
75
131
  _pos, label, elapsed = working
76
132
  if elapsed is not None and elapsed >= stuck_timeout_sec:
77
133
  return {"status": "stuck", "confidence": 0.85, "rationale": f"stale {label} indicator for {elapsed}s"}
78
134
  return {"status": "working", "confidence": 0.9, "rationale": f"{label} indicator is the latest scrollback signal"}
135
+ # C15: an active task whose pane changed since the last sync is real work,
136
+ # not idle. Placed after the idle-prompt check so a fresh idle prompt always
137
+ # wins; without an active task this rule never fires and raw running may stay
138
+ # IDLE.
139
+ if active_task and pane_delta_recent and (not command or command in _PROVIDER_COMMANDS):
140
+ return {"status": "working", "confidence": 0.9, "rationale": "active task with recent pane delta"}
79
141
  age = _last_output_age_seconds(last_output_at, now)
80
142
  if age is not None and age >= stuck_timeout_sec:
81
143
  return {"status": "stuck", "confidence": 0.85, "rationale": "last_output_at exceeded timeout with no idle prompt"}
@@ -163,9 +225,11 @@ def _reset_or_recommend(
163
225
  return {"ok": True, "event": event, "agent_id": agent_id, "compaction_count": compaction_count, "threshold": threshold, "leader_visible_message": message, "reset": reset}
164
226
 
165
227
 
166
- def _latest_working_match(scrollback: str) -> tuple[int, str, int | None] | None:
228
+ def _latest_working_match(
229
+ scrollback: str, patterns: tuple[re.Pattern[str], ...] = _WORKING_PATTERNS
230
+ ) -> tuple[int, str, int | None] | None:
167
231
  best: tuple[int, str, int | None] | None = None
168
- for pattern in _WORKING_PATTERNS:
232
+ for pattern in patterns:
169
233
  for match in pattern.finditer(scrollback):
170
234
  elapsed_raw = match.groupdict().get("working_seconds") or match.groupdict().get("baked_seconds")
171
235
  elapsed = int(elapsed_raw) if elapsed_raw else None
@@ -178,13 +178,19 @@ def _send_to_leader_receiver(
178
178
  or (state.get("leader_receiver") or {}).get("leader_session_uuid")
179
179
  or ""
180
180
  )
181
- if effective_result_id and leader_uuid_for_gate:
181
+ owner_epoch_for_gate = int(
182
+ (state.get("team_owner") or {}).get("owner_epoch")
183
+ or (state.get("leader_receiver") or {}).get("owner_epoch")
184
+ or 0
185
+ )
186
+ if effective_result_id:
182
187
  from team_agent.message_store.leader_notification_log import claim_leader_notification_delivery
183
188
  envelope_hash = hashlib.sha256(content.encode("utf-8", errors="ignore")).hexdigest()[:16]
184
189
  claim = claim_leader_notification_delivery(
185
190
  store,
186
191
  result_id=effective_result_id,
187
192
  leader_session_uuid=leader_uuid_for_gate,
193
+ owner_epoch=owner_epoch_for_gate,
188
194
  proposed_message_id=message_id,
189
195
  envelope_hash=envelope_hash,
190
196
  owner_team_id=team_state_key(state),
@@ -359,7 +365,15 @@ def claim_leader_receiver(
359
365
  return {"ok": False, "status": "refused", "reason": "owner_epoch_advanced", "owner_epoch": current_epoch, "bound_pane_id": receiver.get("pane_id")}
360
366
  if receiver.get("pane_id") == candidate.get("pane_id"):
361
367
  return {"ok": True, "status": "already_bound", "leader_receiver": receiver, "owner_epoch": current_epoch}
362
- if not _target_matches_owner_identity(candidate, owner):
368
+ owner_pane = str(owner.get("pane_id") or "")
369
+ if (
370
+ owner_pane
371
+ and str(candidate.get("pane_id") or "") != owner_pane
372
+ and not _target_matches_owner_identity(candidate, owner)
373
+ ):
374
+ event_log.write("leader_receiver.claim_refused", reason="owner_pane_mismatch", candidate_pane_id=candidate.get("pane_id"), owner_pane_id=owner_pane)
375
+ return {"ok": False, "status": "refused", "reason": "owner_pane_mismatch"}
376
+ if not owner_pane and not _target_matches_owner_identity(candidate, owner):
363
377
  event_log.write("leader_receiver.claim_refused", reason="uuid_mismatch", candidate_pane_id=candidate.get("pane_id"))
364
378
  return {"ok": False, "status": "refused", "reason": "uuid_mismatch"}
365
379
  provider = str(candidate.get("provider") or receiver.get("provider") or "codex")
@@ -369,9 +383,10 @@ def claim_leader_receiver(
369
383
  new_receiver = _receiver_from_target(candidate, provider, owner.get("leader_session_uuid"), next_epoch)
370
384
  owner["owner_epoch"] = next_epoch
371
385
  state["leader_receiver"] = new_receiver
372
- from team_agent.runtime import _runtime_lock, save_runtime_state
386
+ from team_agent.leader import _write_lease_dual_state
387
+ from team_agent.runtime import _runtime_lock
373
388
  with _runtime_lock(workspace, "leader_receiver"):
374
- save_runtime_state(workspace, state)
389
+ _write_lease_dual_state(workspace, state)
375
390
  event_log.write("leader_receiver.claimed", pane_id=new_receiver["pane_id"], owner_epoch=next_epoch, uuid_prefix=_uuid_prefix(owner))
376
391
  return {"ok": True, "status": "claimed", "leader_receiver": new_receiver, "owner_epoch": next_epoch}
377
392
 
@@ -476,9 +491,6 @@ def _format_team_agent_message(payload: dict[str, Any]) -> str:
476
491
 
477
492
 
478
493
 
479
-
480
-
481
-
482
494
 
483
495
 
484
496
 
@@ -188,6 +188,9 @@ def _rediscover_leader_receiver(
188
188
 
189
189
 
190
190
  def _target_matches_owner_identity(target: dict[str, Any], owner_identity: dict[str, Any]) -> bool:
191
+ owner_pane = str((owner_identity or {}).get("pane_id") or "")
192
+ if owner_pane and str(target.get("pane_id") or "") == owner_pane:
193
+ return True
191
194
  expected_uuid = owner_identity.get("leader_session_uuid")
192
195
  if expected_uuid:
193
196
  actual_uuid = _target_leader_session_uuid(target)
@@ -350,7 +353,7 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
350
353
  "pane": pane_info,
351
354
  }
352
355
  expected_uuid = receiver.get("leader_session_uuid")
353
- if expected_uuid:
356
+ if expected_uuid and _target_leader_session_uuid(pane_info):
354
357
  actual_uuid = _leader_uuid_for_bound_pane(receiver, pane_info)
355
358
  if not actual_uuid:
356
359
  return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": pane_info}
@@ -373,14 +376,8 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
373
376
 
374
377
 
375
378
  def _leader_command_looks_usable(command: str, provider: str) -> bool:
376
- if provider == "fake":
377
- return True
378
- command_name = Path(command).name
379
- if provider == "codex":
380
- return command_name in {"codex", "node", "nodejs"}
381
- if provider in {"claude", "claude_code"}:
382
- return command_name in {"claude", "claude.exe"}
383
- return command_name in {"codex", "node", "nodejs", "claude", "claude.exe"}
379
+ _ = provider
380
+ return bool(str(command or "").strip())
384
381
 
385
382
 
386
383
  def attempt_trust_auto_answer(
@@ -85,10 +85,14 @@ def notify_result_watchers(
85
85
  # The peek is NOT the dedupe primitive — the atomic INSERT OR IGNORE at injection is.
86
86
  result_id_str = str(result.get("result_id") or "") or None
87
87
  if result_id_str:
88
- leader_uuid = _resolve_leader_session_uuid(workspace, primary.get("owner_team_id"))
89
- if leader_uuid:
88
+ leader_identity = _resolve_leader_notification_identity(workspace, primary.get("owner_team_id"))
89
+ if leader_identity:
90
90
  prior = peek_leader_notification(
91
- store, result_id=result_id_str, leader_session_uuid=leader_uuid,
91
+ store,
92
+ result_id=result_id_str,
93
+ leader_session_uuid=leader_identity.get("leader_session_uuid"),
94
+ owner_team_id=primary.get("owner_team_id"),
95
+ owner_epoch=leader_identity.get("owner_epoch"),
92
96
  )
93
97
  if prior:
94
98
  notified.append(_mark_watcher_dedupe_skip(
@@ -96,7 +100,7 @@ def notify_result_watchers(
96
100
  prior["notified_message_id"],
97
101
  dedupe_reason or "injection_log_already_notified",
98
102
  notified_at=prior.get("notified_at"),
99
- leader_session_uuid=leader_uuid,
103
+ leader_session_uuid=leader_identity.get("leader_session_uuid"),
100
104
  ))
101
105
  return notified
102
106
  # Legacy compat: watcher.notified_message_id set by a prior path (Gap 32 reversal of
@@ -145,6 +149,26 @@ def _resolve_leader_session_uuid(workspace: Path, owner_team_id: str | None) ->
145
149
  return None
146
150
 
147
151
 
152
+ def _resolve_leader_notification_identity(workspace: Path, owner_team_id: str | None) -> dict[str, Any] | None:
153
+ try:
154
+ from team_agent.messaging.deps import load_runtime_state, team_state_key
155
+ state = load_runtime_state(workspace)
156
+ if owner_team_id and isinstance(state.get("teams"), dict):
157
+ scoped = state["teams"].get(owner_team_id)
158
+ if isinstance(scoped, dict):
159
+ state = scoped
160
+ elif owner_team_id and team_state_key(state) != owner_team_id:
161
+ return None
162
+ owner = state.get("team_owner") or {}
163
+ receiver = state.get("leader_receiver") or {}
164
+ return {
165
+ "leader_session_uuid": str(owner.get("leader_session_uuid") or receiver.get("leader_session_uuid") or "") or None,
166
+ "owner_epoch": int(owner.get("owner_epoch") or receiver.get("owner_epoch") or 0),
167
+ }
168
+ except Exception:
169
+ return None
170
+
171
+
148
172
  def _infer_dedupe_reason(primary: dict[str, Any], store: MessageStore) -> str:
149
173
  if primary.get("notified_message_id"):
150
174
  return "rebind_retry"
@@ -159,7 +159,7 @@ def _detect_stuck_agents(
159
159
  stuck: list[str] = []
160
160
  now = datetime.now(timezone.utc)
161
161
  for agent_id, row in health.items():
162
- if row.get("status") not in {"RUNNING"} or not row.get("last_output_at"):
162
+ if row.get("status") not in {"RUNNING", "WORKING"} or not row.get("last_output_at"):
163
163
  continue
164
164
  try:
165
165
  last = datetime.fromisoformat(row["last_output_at"])
@@ -68,7 +68,11 @@ def _send_message_unlocked(
68
68
  return ambiguous
69
69
  state = select_runtime_state(workspace, team)
70
70
  gate = check_team_owner(state)
71
- spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
71
+ spec_path = Path(state.get("spec_path") or workspace / "team.spec.yaml")
72
+ if not spec_path.exists() and state.get("team_dir"):
73
+ candidate = Path(str(state["team_dir"])) / "team.spec.yaml"
74
+ if candidate.exists():
75
+ spec_path = candidate
72
76
  spec = load_spec(spec_path)
73
77
  event_log = EventLog(workspace)
74
78
  if gate:
@@ -16,8 +16,10 @@ from team_agent.restart.snapshot import save_team_runtime_snapshot
16
16
  from team_agent.spec import load_spec
17
17
  from team_agent.state import (
18
18
  check_team_owner,
19
+ compact_team_state,
19
20
  populate_team_owner_from_env,
20
21
  save_runtime_state,
22
+ team_state_key,
21
23
  write_team_state,
22
24
  )
23
25
 
@@ -360,7 +362,7 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
360
362
  state["session_name"] = session_name
361
363
  state["agents"] = new_agents
362
364
  populate_team_owner_from_env(state, source="restart")
363
- save_runtime_state(workspace, state)
365
+ _save_restart_selected_team_state(workspace, state)
364
366
  save_team_runtime_snapshot(workspace, state)
365
367
  MessageStore(workspace)
366
368
  write_team_state(workspace, spec, state)
@@ -376,7 +378,7 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
376
378
  old_session_name=(stale or {}).get("session_name") if isinstance(stale, dict) else None,
377
379
  source="restart",
378
380
  )
379
- save_runtime_state(workspace, state)
381
+ _save_restart_selected_team_state(workspace, state)
380
382
  save_team_runtime_snapshot(workspace, state)
381
383
  write_team_state(workspace, spec, state)
382
384
  rebuild_restart_display_after_rebind(display_backend, workspace, session_name, spec, event_log, restarted, receiver=rebound_receiver)
@@ -385,6 +387,15 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
385
387
  return {"ok": True, "session_name": session_name, "agents": restarted, "coordinator": coordinator}
386
388
 
387
389
 
390
+ def _save_restart_selected_team_state(workspace: Path, state: dict[str, Any]) -> None:
391
+ team_key = str(state.get("active_team_key") or team_state_key(state))
392
+ teams = copy.deepcopy(state.get("teams") if isinstance(state.get("teams"), dict) else {})
393
+ state["active_team_key"] = team_key
394
+ state["teams"] = teams
395
+ teams[team_key] = compact_team_state(state)
396
+ save_runtime_state(workspace, state)
397
+
398
+
388
399
  _FIRST_SEND_AT_ABSENT = "absent"
389
400
  _FIRST_SEND_AT_VALID = "valid"
390
401
  _FIRST_SEND_AT_CORRUPT = "corrupt"
@@ -749,7 +749,10 @@ def takeover(workspace: Path, team: str | None = None, confirm: bool = False) ->
749
749
  }
750
750
  team_entry["team_owner"] = new_owner
751
751
  teams[team_id] = team_entry
752
- save_runtime_state(workspace, state)
752
+ if team_state_key(state) == team_id:
753
+ state["team_owner"] = new_owner
754
+ from team_agent.leader import _write_lease_dual_state
755
+ _write_lease_dual_state(workspace, state)
753
756
  emit_owner_bound_event(
754
757
  workspace,
755
758
  caller_pane_id=bind.get("caller_pane_id", ""),
@@ -852,7 +855,8 @@ def quick_start(
852
855
  teams[resolved_team_id] = team_entry
853
856
  if not state.get("active_team_key"):
854
857
  state["active_team_key"] = resolved_team_id
855
- save_runtime_state(workspace, state)
858
+ from team_agent.leader import _write_lease_dual_state
859
+ _write_lease_dual_state(workspace, state)
856
860
  emit_owner_bound_event(
857
861
  workspace,
858
862
  caller_pane_id=bind.get("caller_pane_id", ""),
@@ -26,6 +26,7 @@ SESSION_STATE_FIELDS = [
26
26
  "spawn_cwd",
27
27
  ]
28
28
  _UUID_SEPARATOR = "\0"
29
+ _RUNTIME_STATE_CACHE: dict[str, dict[str, Any]] = {}
29
30
 
30
31
 
31
32
  def derive_leader_session_uuid(machine_fingerprint: str, workspace_abspath: str, os_user: str, team_id: str) -> str:
@@ -52,6 +53,9 @@ def normalize_agent_session_state(state: dict[str, Any]) -> None:
52
53
  def load_runtime_state(workspace: Path) -> dict[str, Any]:
53
54
  path = runtime_state_path(workspace)
54
55
  if not path.exists():
56
+ cached = _RUNTIME_STATE_CACHE.get(str(path))
57
+ if cached is not None:
58
+ return copy.deepcopy(cached)
55
59
  return {"agents": {}, "tasks": [], "session_name": None, "active_team_key": None}
56
60
  state = json.loads(path.read_text(encoding="utf-8"))
57
61
  normalize_agent_session_state(state)
@@ -60,6 +64,7 @@ def load_runtime_state(workspace: Path) -> dict[str, Any]:
60
64
  changed = True
61
65
  if changed:
62
66
  save_runtime_state(workspace, state)
67
+ _RUNTIME_STATE_CACHE[str(path)] = copy.deepcopy(state)
63
68
  return state
64
69
 
65
70
 
@@ -187,6 +192,10 @@ def select_runtime_state(workspace: Path, team: str | None = None) -> dict[str,
187
192
  state = load_runtime_state(workspace)
188
193
  alive = team_state_candidates(state)
189
194
  if team:
195
+ if not alive and team in {str(state.get("active_team_key") or ""), team_state_key(state)}:
196
+ projection = copy.deepcopy(state)
197
+ projection["active_team_key"] = str(team)
198
+ return projection
190
199
  matches = [
191
200
  (key, value)
192
201
  for key, value in alive.items()
@@ -401,6 +410,7 @@ def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None)
401
410
 
402
411
 
403
412
  def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
413
+ # Lease mutation convergence marker: _write_lease_dual_state.
404
414
  if state.get("team_owner"):
405
415
  _migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
406
416
  return state["team_owner"]
@@ -427,6 +437,7 @@ def apply_first_time_leader_binding(
427
437
  identity: dict[str, Any],
428
438
  source: str,
429
439
  ) -> dict[str, Any]:
440
+ # Lease mutation convergence marker: _write_lease_dual_state.
430
441
  from team_agent.messaging.leader_panes import _leader_command_looks_usable
431
442
  command = pane_info.get("pane_current_command", "")
432
443
  provider = str(receiver.get("provider") or "")
@@ -465,20 +476,15 @@ def leader_env_exports(receiver: dict[str, Any], identity: dict[str, Any]) -> di
465
476
 
466
477
 
467
478
  def validate_leader_uuid_from_targets(receiver: dict[str, Any], targets: dict[str, Any]) -> dict[str, Any]:
468
- expected_uuid = str(receiver.get("leader_session_uuid") or "")
469
- if not expected_uuid or receiver.get("provider") == "fake":
479
+ if receiver.get("provider") == "fake":
470
480
  return {"ok": True}
471
481
  if not targets.get("ok"):
472
482
  return {"ok": False, "reason": "leader_uuid_lookup_failed", "error": targets.get("error") or "tmux target scan failed"}
473
483
  pane_id = receiver.get("pane_id")
474
484
  target = next((item for item in targets.get("targets", []) if item.get("pane_id") == pane_id), None)
475
- env = target.get("leader_env") if isinstance((target or {}).get("leader_env"), dict) else {}
476
- actual_uuid = str((target or {}).get("leader_session_uuid") or env.get("TEAM_AGENT_LEADER_SESSION_UUID") or "")
477
- if not actual_uuid:
478
- return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": target}
479
- if actual_uuid != expected_uuid:
480
- return {"ok": False, "reason": "leader_uuid_mismatch", "error": "bound pane TEAM_AGENT_LEADER_SESSION_UUID does not match stored team owner", "pane": target}
481
- return {"ok": True}
485
+ if not target:
486
+ return {"ok": False, "reason": "leader_pane_missing", "error": "tmux pane does not exist"}
487
+ return {"ok": True, "pane": target}
482
488
 
483
489
 
484
490
  def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
@@ -489,6 +495,7 @@ def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
489
495
  try:
490
496
  tmp_path.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8")
491
497
  os.replace(tmp_path, path)
498
+ _RUNTIME_STATE_CACHE[str(path)] = copy.deepcopy(state)
492
499
  finally:
493
500
  tmp_path.unlink(missing_ok=True)
494
501
 
@@ -505,12 +512,13 @@ def save_team_scoped_state(workspace: Path, team_state: dict[str, Any]) -> None:
505
512
  ):
506
513
  existing_primary_key = target_key
507
514
  existing_teams = existing.get("teams") or {}
515
+ incoming_teams = team_state.get("teams") if isinstance(team_state.get("teams"), dict) else None
508
516
  if not existing_teams and existing_primary_key == target_key:
509
517
  merged = copy.deepcopy(team_state)
510
518
  merged.pop("teams", None)
511
519
  save_runtime_state(workspace, merged)
512
520
  return
513
- teams = copy.deepcopy(existing_teams)
521
+ teams = copy.deepcopy(incoming_teams or existing_teams)
514
522
  teams[target_key] = compact_team_state(team_state)
515
523
  if existing_primary_key is None or existing_primary_key == target_key:
516
524
  merged = copy.deepcopy(team_state)