@team-agent/installer 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/schemas/team.schema.json +6 -0
  3. package/src/team_agent/approvals/runtime_prompts.py +1 -1
  4. package/src/team_agent/cli/commands.py +122 -6
  5. package/src/team_agent/cli/parser.py +42 -1
  6. package/src/team_agent/coordinator/__main__.py +21 -2
  7. package/src/team_agent/coordinator/lifecycle.py +11 -0
  8. package/src/team_agent/diagnose/orphan_cleanup.py +364 -0
  9. package/src/team_agent/events.py +47 -0
  10. package/src/team_agent/launch/core.py +2 -1
  11. package/src/team_agent/leader/__init__.py +273 -60
  12. package/src/team_agent/lifecycle/agents.py +54 -2
  13. package/src/team_agent/lifecycle/operations.py +87 -9
  14. package/src/team_agent/lifecycle/start.py +1 -1
  15. package/src/team_agent/message_store/core.py +8 -7
  16. package/src/team_agent/message_store/leader_notification_log.py +132 -0
  17. package/src/team_agent/message_store/result_watchers.py +144 -1
  18. package/src/team_agent/message_store/schema.py +31 -2
  19. package/src/team_agent/messaging/delivery.py +293 -1
  20. package/src/team_agent/messaging/idle_alerts.py +109 -9
  21. package/src/team_agent/messaging/leader.py +179 -10
  22. package/src/team_agent/messaging/leader_api_errors.py +216 -0
  23. package/src/team_agent/messaging/leader_panes.py +393 -23
  24. package/src/team_agent/messaging/result_delivery.py +219 -4
  25. package/src/team_agent/messaging/results.py +12 -21
  26. package/src/team_agent/messaging/scheduler.py +24 -2
  27. package/src/team_agent/messaging/send.py +21 -26
  28. package/src/team_agent/messaging/tmux_io.py +153 -23
  29. package/src/team_agent/messaging/tmux_prompt.py +87 -0
  30. package/src/team_agent/messaging/trust_auto_answer.py +44 -0
  31. package/src/team_agent/restart/orchestration.py +207 -4
  32. package/src/team_agent/runtime.py +7 -7
  33. package/src/team_agent/rust_core.py +157 -3
  34. package/src/team_agent/sessions/capture.py +65 -15
  35. package/src/team_agent/spec.py +59 -0
  36. package/src/team_agent/state.py +153 -10
  37. package/src/team_agent/status/inbox.py +33 -3
  38. package/src/team_agent/status/queries.py +32 -1
  39. package/src/team_agent/watch/__init__.py +145 -0
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import platform
4
5
  import re
5
6
  import shutil
6
7
  import subprocess
@@ -10,6 +11,18 @@ from typing import Any
10
11
  from team_agent.paths import repo_root
11
12
 
12
13
 
14
+ _LEADER_ENV_KEYS = (
15
+ "TEAM_AGENT_LEADER_SESSION_UUID",
16
+ "TEAM_AGENT_LEADER_PANE_ID",
17
+ "TEAM_AGENT_LEADER_PROVIDER",
18
+ "TEAM_AGENT_MACHINE_FINGERPRINT",
19
+ "TEAM_AGENT_LEADER_SESSION_UUID_OVERRIDE",
20
+ )
21
+ _LEADER_SHAPED_COMMANDS = {"codex", "claude", "claude.exe", "node", "nodejs"}
22
+ _PANE_ENV_SCAN_TIMEOUT_SECONDS = 2.0
23
+ _run_subprocess = subprocess.run # test-injectable indirection
24
+
25
+
13
26
  def core_binary() -> Path | None:
14
27
  configured = shutil.which("team-agent-core")
15
28
  if configured:
@@ -105,13 +118,13 @@ def list_targets() -> dict[str, Any]:
105
118
  result = call_core("list-targets")
106
119
  if result.get("ok"):
107
120
  return result
108
- proc = subprocess.run(
121
+ proc = _run_subprocess(
109
122
  [
110
123
  "tmux",
111
124
  "list-panes",
112
125
  "-a",
113
126
  "-F",
114
- "#{pane_id}\t#{session_name}\t#{window_index}\t#{window_name}\t#{pane_index}\t#{pane_tty}\t#{pane_current_command}\t#{pane_active}",
127
+ "#{pane_id}\t#{session_name}\t#{window_index}\t#{window_name}\t#{pane_index}\t#{pane_tty}\t#{pane_current_command}\t#{pane_active}\t#{pane_pid}",
115
128
  ],
116
129
  text=True,
117
130
  capture_output=True,
@@ -123,7 +136,7 @@ def list_targets() -> dict[str, Any]:
123
136
  targets = []
124
137
  for line in proc.stdout.splitlines():
125
138
  parts = line.split("\t")
126
- if len(parts) != 8:
139
+ if len(parts) not in {8, 9}:
127
140
  continue
128
141
  target = {
129
142
  "pane_id": parts[0],
@@ -135,11 +148,152 @@ def list_targets() -> dict[str, Any]:
135
148
  "pane_current_command": parts[6],
136
149
  "pane_active": parts[7] == "1",
137
150
  }
151
+ pane_pid = parts[8].strip() if len(parts) == 9 else ""
152
+ if pane_pid:
153
+ target["pane_pid"] = pane_pid
138
154
  target["fingerprint"] = f"{target['session_name']}|{target['window_index']}|{target['pane_index']}|{target['pane_tty']}"
155
+ _attach_leader_env(target)
139
156
  targets.append(target)
140
157
  return {"ok": True, "targets": targets, "engine": "python_fallback", "fallback_reason": result.get("error")}
141
158
 
142
159
 
160
+ def _attach_leader_env(target: dict[str, Any]) -> None:
161
+ pane_pid = str(target.get("pane_pid") or "").strip()
162
+ if not pane_pid:
163
+ target["leader_env"] = None
164
+ return
165
+ env = _read_process_env(pane_pid)
166
+ if env is None:
167
+ target["leader_env"] = None
168
+ return
169
+ leader_env = {key: env[key] for key in _LEADER_ENV_KEYS if key in env}
170
+ if "TEAM_AGENT_LEADER_SESSION_UUID" not in leader_env:
171
+ for child_pid in _walk_leader_shaped_children(pane_pid):
172
+ child_env = _read_process_env(child_pid)
173
+ if child_env is None:
174
+ continue
175
+ for key in _LEADER_ENV_KEYS:
176
+ if key not in leader_env and key in child_env:
177
+ leader_env[key] = child_env[key]
178
+ if "TEAM_AGENT_LEADER_SESSION_UUID" in leader_env:
179
+ break
180
+ target["leader_env"] = leader_env
181
+ uuid_value = leader_env.get("TEAM_AGENT_LEADER_SESSION_UUID")
182
+ if uuid_value:
183
+ target["leader_session_uuid"] = uuid_value
184
+
185
+
186
+ def _read_process_env(pid: str) -> dict[str, str] | None:
187
+ if platform.system() == "Linux":
188
+ return _read_proc_environ(pid)
189
+ return _read_ps_eww_env(pid)
190
+
191
+
192
+ def _read_proc_environ(pid: str) -> dict[str, str] | None:
193
+ path = Path(f"/proc/{pid}/environ")
194
+ try:
195
+ raw = path.read_bytes()
196
+ except (FileNotFoundError, PermissionError, OSError):
197
+ return None
198
+ env: dict[str, str] = {}
199
+ for token in raw.split(b"\x00"):
200
+ if not token or b"=" not in token:
201
+ continue
202
+ try:
203
+ text = token.decode("utf-8", errors="replace")
204
+ except Exception:
205
+ continue
206
+ key, _, value = text.partition("=")
207
+ env[key] = value
208
+ return env
209
+
210
+
211
+ def _read_ps_eww_env(pid: str) -> dict[str, str] | None:
212
+ try:
213
+ proc = _run_subprocess(
214
+ ["ps", "-E", "-ww", "-p", str(pid)],
215
+ text=True,
216
+ capture_output=True,
217
+ timeout=_PANE_ENV_SCAN_TIMEOUT_SECONDS,
218
+ check=False,
219
+ )
220
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
221
+ return None
222
+ if proc.returncode != 0 or not proc.stdout:
223
+ return None
224
+ return _parse_ps_eww_output(proc.stdout, pid)
225
+
226
+
227
+ def _parse_ps_eww_output(text: str, pid: str) -> dict[str, str]:
228
+ env: dict[str, str] = {}
229
+ lines = text.splitlines()
230
+ if len(lines) < 2:
231
+ return env
232
+ target_row = None
233
+ for line in lines[1:]:
234
+ stripped = line.lstrip()
235
+ if stripped.split(" ", 1)[0] == str(pid):
236
+ target_row = stripped
237
+ break
238
+ if target_row is None:
239
+ # Spark MEDIUM #2 (da436a3): never fall back to lines[1] — that row may belong to
240
+ # an unrelated process and would leak its env (incl. another team's
241
+ # TEAM_AGENT_LEADER_SESSION_UUID) into this pane's leader_env, corrupting rediscovery.
242
+ return env
243
+ for token in target_row.split():
244
+ if "=" not in token:
245
+ continue
246
+ key, _, value = token.partition("=")
247
+ if not key or " " in key:
248
+ continue
249
+ if not (key[0].isalpha() or key[0] == "_"):
250
+ continue
251
+ if not all(ch.isalnum() or ch == "_" for ch in key):
252
+ continue
253
+ env[key] = value
254
+ return env
255
+
256
+
257
+ def _walk_leader_shaped_children(parent_pid: str) -> list[str]:
258
+ try:
259
+ proc = _run_subprocess(
260
+ ["ps", "-o", "pid=,ppid=,comm="],
261
+ text=True,
262
+ capture_output=True,
263
+ timeout=_PANE_ENV_SCAN_TIMEOUT_SECONDS,
264
+ check=False,
265
+ )
266
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
267
+ return []
268
+ if proc.returncode != 0 or not proc.stdout:
269
+ return []
270
+ return _select_leader_shaped_descendants(proc.stdout, parent_pid)
271
+
272
+
273
+ def _select_leader_shaped_descendants(ps_output: str, parent_pid: str) -> list[str]:
274
+ rows: list[tuple[str, str, str]] = []
275
+ for line in ps_output.splitlines():
276
+ parts = line.split()
277
+ if len(parts) < 3:
278
+ continue
279
+ pid, ppid, command = parts[0], parts[1], " ".join(parts[2:])
280
+ rows.append((pid, ppid, Path(command).name))
281
+ descendants: set[str] = set()
282
+ frontier = {str(parent_pid)}
283
+ while frontier:
284
+ next_frontier: set[str] = set()
285
+ for pid, ppid, _ in rows:
286
+ if ppid in frontier and pid not in descendants:
287
+ descendants.add(pid)
288
+ next_frontier.add(pid)
289
+ frontier = next_frontier
290
+ return [
291
+ pid
292
+ for pid, _, command in rows
293
+ if pid in descendants and command in _LEADER_SHAPED_COMMANDS
294
+ ]
295
+
296
+
143
297
  def contains_inline_secret(value: str) -> bool:
144
298
  return (
145
299
  _contains_secret_assignment(value)
@@ -1,14 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import time
3
4
  from datetime import datetime, timezone
4
5
  from pathlib import Path
5
6
  from typing import Any
6
7
 
8
+ from team_agent.errors import RuntimeError as TeamAgentRuntimeError
7
9
  from team_agent.events import EventLog
8
10
  from team_agent.providers import get_adapter
9
11
  from team_agent.state import SESSION_CAPTURE_FIELDS, SESSION_STATE_FIELDS
10
12
 
11
13
 
14
+ # Stage 7 S6 (2026-05-27): capture_agent_session used to do a single adapter
15
+ # call and silently return None on miss, leaving status='running' workers with
16
+ # session_id=null. Slow worker startups (Codex writing the rollout file a few
17
+ # tenths of a second after window creation) raced this check. We now poll on a
18
+ # small interval inside the caller's timeout_s budget so the adapter's own
19
+ # fast-path call doesn't have to absorb all the latency on its own.
20
+ _CAPTURE_POLL_INTERVAL_SECONDS = 0.05
21
+
22
+
12
23
  def capture_missing_sessions(
13
24
  workspace: Path,
14
25
  state: dict[str, Any],
@@ -25,6 +36,10 @@ def capture_missing_sessions(
25
36
  for aid, item in state.get("agents", {}).items()
26
37
  if aid != agent_id and item.get("session_id")
27
38
  }
39
+ # capture_missing_sessions is invoked from coordinator_tick, diagnose,
40
+ # status, etc. with very short timeouts; a transient miss should NOT
41
+ # crash those paths. The loud raise contract belongs to direct callers
42
+ # (e.g. lifecycle start/restart) who own the worker's atomicity.
28
43
  result = capture_agent_session(
29
44
  workspace,
30
45
  agent_id,
@@ -32,6 +47,7 @@ def capture_missing_sessions(
32
47
  event_log,
33
48
  timeout_s=timeout_s,
34
49
  exclude_session_ids=known_session_ids,
50
+ raise_on_missed=False,
35
51
  )
36
52
  if result:
37
53
  captured.append(agent_id)
@@ -53,6 +69,7 @@ def capture_agent_session(
53
69
  event_log: EventLog,
54
70
  timeout_s: float,
55
71
  exclude_session_ids: set[str] | None = None,
72
+ raise_on_missed: bool = True,
56
73
  ) -> dict[str, Any] | None:
57
74
  if agent_state.get("session_id"):
58
75
  return None
@@ -66,21 +83,54 @@ def capture_agent_session(
66
83
  "exclude_session_ids": sorted(exclude_session_ids or set()),
67
84
  "claude_projects_root": agent_state.get("claude_projects_root"),
68
85
  }
69
- result = adapter.capture_session_id(agent_id, spawn_context, timeout_s=timeout_s)
70
- if not isinstance(result, dict) or not result.get("session_id"):
71
- return None
72
- copy_session_metadata(agent_state, result)
73
- agent_state.pop("_pending_session_id", None)
74
- event_log.write(
75
- "session.captured",
76
- agent_id=agent_id,
77
- provider=agent_state.get("provider"),
78
- session_id=agent_state.get("session_id"),
79
- rollout_path=agent_state.get("rollout_path"),
80
- captured_via=agent_state.get("captured_via"),
81
- attribution_confidence=agent_state.get("attribution_confidence"),
82
- )
83
- return result
86
+ deadline = time.monotonic() + max(timeout_s, 0.0)
87
+ while True:
88
+ # Pass timeout_s=0 so the adapter does a single fast-path check; the
89
+ # outer loop owns the polling budget so behaviour stays consistent
90
+ # whether or not the adapter has its own internal sleep.
91
+ result = adapter.capture_session_id(agent_id, spawn_context, timeout_s=0)
92
+ if isinstance(result, dict) and result.get("session_id"):
93
+ copy_session_metadata(agent_state, result)
94
+ agent_state.pop("_pending_session_id", None)
95
+ event_log.write(
96
+ "session.captured",
97
+ agent_id=agent_id,
98
+ provider=agent_state.get("provider"),
99
+ session_id=agent_state.get("session_id"),
100
+ rollout_path=agent_state.get("rollout_path"),
101
+ captured_via=agent_state.get("captured_via"),
102
+ attribution_confidence=agent_state.get("attribution_confidence"),
103
+ )
104
+ return result
105
+ if time.monotonic() >= deadline:
106
+ break
107
+ time.sleep(_CAPTURE_POLL_INTERVAL_SECONDS)
108
+ # Timeout. Slice 1 atomicity contract: a worker whose status is 'running'
109
+ # must NEVER be left with session_id=null — that half-state is what made
110
+ # Mac mini Stage 7 S5/S6 unreproducible and breaks resume on next restart.
111
+ # Emit a structured attention event so the coordinator/operator sees the
112
+ # miss, then raise so callers cannot accidentally treat the None as a
113
+ # silent "no-op". Non-running workers (still starting, paused, stopped)
114
+ # legitimately have no session yet, so they still get the silent-None
115
+ # return that existing callers expect.
116
+ if agent_state.get("status") == "running":
117
+ event_log.write(
118
+ "session.capture_required_attention",
119
+ agent_id=agent_id,
120
+ provider=agent_state.get("provider"),
121
+ timeout_s=timeout_s,
122
+ spawn_cwd=agent_state.get("spawn_cwd"),
123
+ session_name=agent_state.get("session_name"),
124
+ window=agent_state.get("window", agent_id),
125
+ )
126
+ if raise_on_missed:
127
+ raise TeamAgentRuntimeError(
128
+ f"Failed to capture session_id for agent {agent_id}: adapter "
129
+ f"did not produce a session within {timeout_s}s. Worker is "
130
+ "running but unidentifiable; this is a Slice 1 atomicity "
131
+ "violation."
132
+ )
133
+ return None
84
134
 
85
135
 
86
136
  def copy_session_metadata(target: dict[str, Any], source: dict[str, Any]) -> None:
@@ -27,9 +27,60 @@ def load_yaml(path: Path) -> dict[str, Any]:
27
27
  def load_spec(path: Path) -> dict[str, Any]:
28
28
  spec = load_yaml(path)
29
29
  validate_spec(spec, base_dir=path.parent)
30
+ _emit_load_time_deprecations(spec, path)
30
31
  return spec
31
32
 
32
33
 
34
+ def _emit_load_time_deprecations(spec: dict[str, Any], path: Path) -> None:
35
+ """Stage 7 S7 (2026-05-27): deprecation signals attached to the spec field
36
+ itself must fire when the YAML is read, not lazily inside the trust-prompt
37
+ code path. A user with the deprecated field in team.spec.yaml needs to see
38
+ the warning even when startup never reaches attempt_trust_auto_answer.
39
+
40
+ The leader-panes helper owns the one-shot stderr guard + the structured
41
+ audit event, so we reuse it. EventLog points at the WORKSPACE ROOT (not
42
+ the spec file's directory) so a quick-start layout that stores the spec
43
+ under <workspace>/.team/current/team.spec.yaml still routes the audit
44
+ event into the single canonical <workspace>/.team/logs/events.jsonl
45
+ instead of a doubled <workspace>/.team/current/.team/logs/events.jsonl
46
+ nesting.
47
+ """
48
+ runtime = spec.get("runtime")
49
+ if not isinstance(runtime, dict):
50
+ return
51
+ if not bool(runtime.get("auto_trust_own_workspace")):
52
+ return
53
+ # Local import keeps the spec module free of messaging-layer coupling at
54
+ # import time; only YAMLs that opt into the deprecated field pay the cost.
55
+ from team_agent.events import EventLog
56
+ from team_agent.messaging.leader_panes import _emit_spec_opt_in_deprecation
57
+ _emit_spec_opt_in_deprecation(EventLog(_resolve_workspace_root(path)))
58
+
59
+
60
+ def _resolve_workspace_root(spec_path: Path) -> Path:
61
+ """Find the workspace root that owns this spec.
62
+
63
+ A workspace root is the directory whose `.team/` subdirectory holds the
64
+ runtime state, logs, artifacts, and (for quick-start layouts) the spec
65
+ itself under `.team/current/`. We climb from the spec file's parent
66
+ looking for the first ancestor that has a `.team/` child. If no ancestor
67
+ qualifies (fresh workspace before init, or a spec deliberately placed
68
+ outside any team workspace), we fall back to `spec_path.parent` which is
69
+ the legacy single-layout behaviour.
70
+
71
+ Implementation note: we use real filesystem evidence (`(dir/.team).is_dir()`)
72
+ rather than path-string parsing so the resolver works correctly even when
73
+ workspace paths legitimately contain a `.team` segment.
74
+ """
75
+ direct_parent = spec_path.parent
76
+ if (direct_parent / ".team").is_dir():
77
+ return direct_parent
78
+ for ancestor in direct_parent.parents:
79
+ if (ancestor / ".team").is_dir():
80
+ return ancestor
81
+ return direct_parent
82
+
83
+
33
84
  def validate_spec(spec: dict[str, Any], base_dir: Path | None = None) -> None:
34
85
  messages = _basic_schema_errors(spec)
35
86
  messages.extend(_semantic_errors(spec, base_dir or Path.cwd()))
@@ -190,6 +241,12 @@ def _check_runtime(runtime: Any, errors: list[str]) -> None:
190
241
  "tick_interval_sec",
191
242
  "push_min_interval_sec",
192
243
  "stuck_timeout_sec",
244
+ # Gap 29 / F3 deprecation (2026-05-26): accept the legacy spec opt-in so
245
+ # YAMLs that still set it validate and the deprecation warning + structured
246
+ # event in messaging/leader_panes.py can fire. The preferred per-session
247
+ # opt-in is the env var TEAM_AGENT_AUTO_TRUST_OWN_WORKSPACE; this spec
248
+ # field will be removed in 0.3.0.
249
+ "auto_trust_own_workspace",
193
250
  }
194
251
  _check_keys(runtime, "/runtime", required, allowed, errors)
195
252
  if not isinstance(runtime, dict):
@@ -200,6 +257,8 @@ def _check_runtime(runtime: Any, errors: list[str]) -> None:
200
257
  errors.append("/runtime/display_backend: invalid display backend")
201
258
  if "dangerous_auto_approve" in runtime and not isinstance(runtime["dangerous_auto_approve"], bool):
202
259
  errors.append("/runtime/dangerous_auto_approve: must be a boolean")
260
+ if "auto_trust_own_workspace" in runtime and not isinstance(runtime["auto_trust_own_workspace"], bool):
261
+ errors.append("/runtime/auto_trust_own_workspace: must be a boolean")
203
262
  _check_list(runtime.get("startup_order"), "/runtime/startup_order", errors)
204
263
 
205
264
 
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hashlib
3
4
  import json
4
5
  import os
5
6
  import copy
@@ -23,6 +24,14 @@ SESSION_STATE_FIELDS = [
23
24
  *SESSION_CAPTURE_FIELDS,
24
25
  "spawn_cwd",
25
26
  ]
27
+ _UUID_SEPARATOR = "\0"
28
+
29
+
30
+ def derive_leader_session_uuid(machine_fingerprint: str, workspace_abspath: str, os_user: str, team_id: str) -> str:
31
+ parts = [machine_fingerprint, workspace_abspath, os_user, team_id]
32
+ if any(_UUID_SEPARATOR in part for part in parts):
33
+ raise ValueError("leader_session_uuid inputs must not contain NUL")
34
+ return hashlib.sha256(_UUID_SEPARATOR.join(parts).encode("utf-8")).hexdigest()[:32]
26
35
 
27
36
 
28
37
  def runtime_state_path(workspace: Path) -> Path:
@@ -45,6 +54,8 @@ def load_runtime_state(workspace: Path) -> dict[str, Any]:
45
54
  return {"agents": {}, "tasks": [], "session_name": None}
46
55
  state = json.loads(path.read_text(encoding="utf-8"))
47
56
  normalize_agent_session_state(state)
57
+ if _migrate_state_identity(state, workspace):
58
+ save_runtime_state(workspace, state)
48
59
  return state
49
60
 
50
61
 
@@ -163,11 +174,75 @@ def resolve_team_scoped_state(
163
174
  }
164
175
 
165
176
 
166
- def _caller_identity_from_env() -> dict[str, str]:
177
+ def _identity_workspace_abspath(state: dict[str, Any], workspace: Path | None = None) -> str:
178
+ if state.get("workspace"):
179
+ return str(Path(str(state["workspace"])).resolve())
180
+ if state.get("team_dir"):
181
+ return str(Path(str(state["team_dir"])).resolve().parent.parent)
182
+ if state.get("spec_path"):
183
+ spec_path = Path(str(state["spec_path"])).resolve()
184
+ return str(spec_path.parent.parent.parent if spec_path.parent.parent.name == ".team" else spec_path.parent)
185
+ return str((workspace or Path(os.environ.get("TEAM_AGENT_WORKSPACE") or os.getcwd())).resolve())
186
+
187
+
188
+ def _identity_os_user() -> str:
189
+ return os.environ.get("USER") or os.environ.get("USERNAME") or ""
190
+
191
+
192
+ def _identity_machine_fingerprint(state: dict[str, Any]) -> str:
193
+ for record in (state.get("team_owner"), state.get("leader_receiver")):
194
+ if isinstance(record, dict) and record.get("machine_fingerprint"):
195
+ return str(record["machine_fingerprint"])
196
+ return os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or ""
197
+
198
+
199
+ def _leader_session_uuid_for_state(state: dict[str, Any], workspace: Path | None = None, team_id: str | None = None) -> str:
200
+ return derive_leader_session_uuid(
201
+ _identity_machine_fingerprint(state),
202
+ _identity_workspace_abspath(state, workspace),
203
+ _identity_os_user(),
204
+ team_id or team_state_key(state),
205
+ )
206
+
207
+
208
+ def _migrate_team_identity(state: dict[str, Any], workspace: Path, team_id: str | None = None) -> bool:
209
+ leader_uuid = _leader_session_uuid_for_state(state, workspace, team_id)
210
+ changed = False
211
+ for key in ("team_owner", "leader_receiver"):
212
+ record = state.get(key)
213
+ if isinstance(record, dict) and not record.get("leader_session_uuid"):
214
+ record["leader_session_uuid"] = leader_uuid
215
+ changed = True
216
+ return changed
217
+
218
+
219
+ def _migrate_state_identity(state: dict[str, Any], workspace: Path) -> bool:
220
+ changed = _migrate_team_identity(state, workspace) if state.get("session_name") else False
221
+ teams = state.get("teams")
222
+ if isinstance(teams, dict):
223
+ for team_id, team_state in teams.items():
224
+ if isinstance(team_state, dict):
225
+ changed = _migrate_team_identity(team_state, workspace, str(team_id)) or changed
226
+ return changed
227
+
228
+
229
+ def _caller_identity_from_env(state: dict[str, Any] | None = None, team_id: str | None = None, workspace: Path | None = None) -> dict[str, str]:
230
+ state = state or {}
231
+ machine_fingerprint = os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or ""
232
+ override = os.environ.get("TEAM_AGENT_LEADER_SESSION_UUID_OVERRIDE") or ""
233
+ env_uuid = os.environ.get("TEAM_AGENT_LEADER_SESSION_UUID") or ""
234
+ leader_uuid = override or env_uuid or derive_leader_session_uuid(
235
+ machine_fingerprint,
236
+ _identity_workspace_abspath(state, workspace),
237
+ _identity_os_user(),
238
+ team_id or os.environ.get("TEAM_AGENT_TEAM_ID") or team_state_key(state),
239
+ )
167
240
  return {
168
241
  "pane_id": os.environ.get("TEAM_AGENT_LEADER_PANE_ID") or "",
169
242
  "provider": os.environ.get("TEAM_AGENT_LEADER_PROVIDER") or "",
170
- "machine_fingerprint": os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or "",
243
+ "machine_fingerprint": machine_fingerprint,
244
+ "leader_session_uuid": leader_uuid,
245
+ "leader_session_uuid_source": "explicit-override" if override else ("env" if env_uuid else "derived"),
171
246
  }
172
247
 
173
248
 
@@ -175,19 +250,22 @@ def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
175
250
  owner = state.get("team_owner") or {}
176
251
  if not owner:
177
252
  return None
178
- caller = _caller_identity_from_env()
179
- if (
180
- caller["pane_id"] == (owner.get("pane_id") or "")
181
- and caller["provider"] == (owner.get("provider") or "")
182
- and caller["machine_fingerprint"] == (owner.get("machine_fingerprint") or "")
183
- ):
253
+ _migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
254
+ caller = _caller_identity_from_env(state, team_state_key(state))
255
+ owner_uuid = str(owner.get("leader_session_uuid") or "")
256
+ caller_uuid = caller["leader_session_uuid"]
257
+ owner_pane = str(owner.get("pane_id") or "")
258
+ caller_pane = caller.get("pane_id") or ""
259
+ if caller_uuid == owner_uuid and (not caller_pane or caller_pane == owner_pane):
184
260
  return None
261
+ same_uuid = caller_uuid == owner_uuid
185
262
  return {
186
263
  "ok": False,
187
264
  "status": "refused",
188
265
  "reason": "team_owner_mismatch",
266
+ "reason_kind": "sticky_bind_collision" if same_uuid else "owner_takeover_required",
189
267
  "error": "not_owner",
190
- "action": "use team-agent takeover --confirm",
268
+ "action": "team-agent claim-leader --confirm" if same_uuid else "team-agent takeover --confirm",
191
269
  "team_owner": owner,
192
270
  "caller": caller,
193
271
  }
@@ -209,14 +287,16 @@ def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None)
209
287
 
210
288
  def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
211
289
  if state.get("team_owner"):
290
+ _migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
212
291
  return state["team_owner"]
213
- caller = _caller_identity_from_env()
292
+ caller = _caller_identity_from_env(state, team_state_key(state))
214
293
  if not caller["pane_id"]:
215
294
  return None
216
295
  owner = {
217
296
  "pane_id": caller["pane_id"],
218
297
  "provider": caller["provider"],
219
298
  "machine_fingerprint": caller["machine_fingerprint"],
299
+ "leader_session_uuid": caller["leader_session_uuid"],
220
300
  "claimed_at": datetime.now(timezone.utc).isoformat(),
221
301
  "claimed_via": source,
222
302
  }
@@ -224,7 +304,70 @@ def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopul
224
304
  return owner
225
305
 
226
306
 
307
+ def apply_first_time_leader_binding(
308
+ workspace: Path,
309
+ state: dict[str, Any],
310
+ receiver: dict[str, Any],
311
+ pane_info: dict[str, Any],
312
+ identity: dict[str, Any],
313
+ source: str,
314
+ ) -> dict[str, Any]:
315
+ from team_agent.messaging.leader_panes import _leader_command_looks_usable
316
+ command = pane_info.get("pane_current_command", "")
317
+ provider = str(receiver.get("provider") or "")
318
+ if not _leader_command_looks_usable(command, provider):
319
+ return {"ok": False, "reason": "leader_pane_wrong_command", "error": f"pane command {command!r} is not a leader host", "pane": pane_info}
320
+ current_path = pane_info.get("pane_current_path")
321
+ if not current_path or os.path.realpath(current_path) != os.path.realpath(str(workspace.resolve())):
322
+ return {"ok": False, "reason": "leader_pane_wrong_workspace", "error": f"pane cwd {current_path!r} does not match workspace {str(workspace.resolve())!r}", "pane": pane_info}
323
+ receiver.update({
324
+ "leader_session_uuid": identity["leader_session_uuid"],
325
+ "machine_fingerprint": identity["machine_fingerprint"],
326
+ "owner_epoch": 0,
327
+ })
328
+ state["team_owner"] = {
329
+ "pane_id": receiver["pane_id"],
330
+ "provider": provider,
331
+ "machine_fingerprint": identity["machine_fingerprint"],
332
+ "leader_session_uuid": identity["leader_session_uuid"],
333
+ "owner_epoch": 0,
334
+ "claimed_at": datetime.now(timezone.utc).isoformat(),
335
+ "claimed_via": source,
336
+ }
337
+ state["leader_receiver"] = receiver
338
+ return {"ok": True, "pane": pane_info, "warning": None, "first_time": True}
339
+
340
+
341
+ def leader_env_exports(receiver: dict[str, Any], identity: dict[str, Any]) -> dict[str, str]:
342
+ return {
343
+ "TEAM_AGENT_LEADER_PANE_ID": str(receiver.get("pane_id") or ""),
344
+ "TEAM_AGENT_LEADER_PROVIDER": str(receiver.get("provider") or ""),
345
+ "TEAM_AGENT_LEADER_SESSION_UUID": str(identity.get("leader_session_uuid") or ""),
346
+ "TEAM_AGENT_MACHINE_FINGERPRINT": str(identity.get("machine_fingerprint") or ""),
347
+ "TEAM_AGENT_WORKSPACE": str(identity.get("workspace_abspath") or ""),
348
+ "TEAM_AGENT_TEAM_ID": str(identity.get("team_id") or ""),
349
+ }
350
+
351
+
352
+ def validate_leader_uuid_from_targets(receiver: dict[str, Any], targets: dict[str, Any]) -> dict[str, Any]:
353
+ expected_uuid = str(receiver.get("leader_session_uuid") or "")
354
+ if not expected_uuid or receiver.get("provider") == "fake":
355
+ return {"ok": True}
356
+ if not targets.get("ok"):
357
+ return {"ok": False, "reason": "leader_uuid_lookup_failed", "error": targets.get("error") or "tmux target scan failed"}
358
+ pane_id = receiver.get("pane_id")
359
+ target = next((item for item in targets.get("targets", []) if item.get("pane_id") == pane_id), None)
360
+ env = target.get("leader_env") if isinstance((target or {}).get("leader_env"), dict) else {}
361
+ actual_uuid = str((target or {}).get("leader_session_uuid") or env.get("TEAM_AGENT_LEADER_SESSION_UUID") or "")
362
+ if not actual_uuid:
363
+ return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": target}
364
+ if actual_uuid != expected_uuid:
365
+ return {"ok": False, "reason": "leader_uuid_mismatch", "error": "bound pane TEAM_AGENT_LEADER_SESSION_UUID does not match stored team owner", "pane": target}
366
+ return {"ok": True}
367
+
368
+
227
369
  def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
370
+ _migrate_state_identity(state, workspace)
228
371
  path = runtime_state_path(workspace)
229
372
  path.parent.mkdir(parents=True, exist_ok=True)
230
373
  tmp_path = path.with_name(f"{path.name}.{os.getpid()}.{uuid.uuid4().hex}.tmp")