@team-agent/installer 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/schemas/team.schema.json +6 -0
- package/src/team_agent/approvals/runtime_prompts.py +1 -1
- package/src/team_agent/cli/commands.py +122 -6
- package/src/team_agent/cli/parser.py +42 -1
- package/src/team_agent/coordinator/__main__.py +21 -2
- package/src/team_agent/coordinator/lifecycle.py +11 -0
- package/src/team_agent/diagnose/orphan_cleanup.py +364 -0
- package/src/team_agent/events.py +47 -0
- package/src/team_agent/launch/core.py +2 -1
- package/src/team_agent/leader/__init__.py +273 -60
- package/src/team_agent/lifecycle/agents.py +54 -2
- package/src/team_agent/lifecycle/operations.py +87 -9
- package/src/team_agent/lifecycle/start.py +1 -1
- package/src/team_agent/message_store/core.py +8 -7
- package/src/team_agent/message_store/leader_notification_log.py +132 -0
- package/src/team_agent/message_store/result_watchers.py +144 -1
- package/src/team_agent/message_store/schema.py +31 -2
- package/src/team_agent/messaging/delivery.py +293 -1
- package/src/team_agent/messaging/idle_alerts.py +109 -9
- package/src/team_agent/messaging/leader.py +179 -10
- package/src/team_agent/messaging/leader_api_errors.py +216 -0
- package/src/team_agent/messaging/leader_panes.py +393 -23
- package/src/team_agent/messaging/result_delivery.py +219 -4
- package/src/team_agent/messaging/results.py +12 -21
- package/src/team_agent/messaging/scheduler.py +24 -2
- package/src/team_agent/messaging/send.py +21 -26
- package/src/team_agent/messaging/tmux_io.py +153 -23
- package/src/team_agent/messaging/tmux_prompt.py +87 -0
- package/src/team_agent/messaging/trust_auto_answer.py +44 -0
- package/src/team_agent/restart/orchestration.py +207 -4
- package/src/team_agent/runtime.py +7 -7
- package/src/team_agent/rust_core.py +157 -3
- package/src/team_agent/sessions/capture.py +65 -15
- package/src/team_agent/spec.py +59 -0
- package/src/team_agent/state.py +153 -10
- package/src/team_agent/status/inbox.py +33 -3
- package/src/team_agent/status/queries.py +32 -1
- package/src/team_agent/watch/__init__.py +145 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import platform
|
|
4
5
|
import re
|
|
5
6
|
import shutil
|
|
6
7
|
import subprocess
|
|
@@ -10,6 +11,18 @@ from typing import Any
|
|
|
10
11
|
from team_agent.paths import repo_root
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
_LEADER_ENV_KEYS = (
|
|
15
|
+
"TEAM_AGENT_LEADER_SESSION_UUID",
|
|
16
|
+
"TEAM_AGENT_LEADER_PANE_ID",
|
|
17
|
+
"TEAM_AGENT_LEADER_PROVIDER",
|
|
18
|
+
"TEAM_AGENT_MACHINE_FINGERPRINT",
|
|
19
|
+
"TEAM_AGENT_LEADER_SESSION_UUID_OVERRIDE",
|
|
20
|
+
)
|
|
21
|
+
_LEADER_SHAPED_COMMANDS = {"codex", "claude", "claude.exe", "node", "nodejs"}
|
|
22
|
+
_PANE_ENV_SCAN_TIMEOUT_SECONDS = 2.0
|
|
23
|
+
_run_subprocess = subprocess.run # test-injectable indirection
|
|
24
|
+
|
|
25
|
+
|
|
13
26
|
def core_binary() -> Path | None:
|
|
14
27
|
configured = shutil.which("team-agent-core")
|
|
15
28
|
if configured:
|
|
@@ -105,13 +118,13 @@ def list_targets() -> dict[str, Any]:
|
|
|
105
118
|
result = call_core("list-targets")
|
|
106
119
|
if result.get("ok"):
|
|
107
120
|
return result
|
|
108
|
-
proc =
|
|
121
|
+
proc = _run_subprocess(
|
|
109
122
|
[
|
|
110
123
|
"tmux",
|
|
111
124
|
"list-panes",
|
|
112
125
|
"-a",
|
|
113
126
|
"-F",
|
|
114
|
-
"#{pane_id}\t#{session_name}\t#{window_index}\t#{window_name}\t#{pane_index}\t#{pane_tty}\t#{pane_current_command}\t#{pane_active}",
|
|
127
|
+
"#{pane_id}\t#{session_name}\t#{window_index}\t#{window_name}\t#{pane_index}\t#{pane_tty}\t#{pane_current_command}\t#{pane_active}\t#{pane_pid}",
|
|
115
128
|
],
|
|
116
129
|
text=True,
|
|
117
130
|
capture_output=True,
|
|
@@ -123,7 +136,7 @@ def list_targets() -> dict[str, Any]:
|
|
|
123
136
|
targets = []
|
|
124
137
|
for line in proc.stdout.splitlines():
|
|
125
138
|
parts = line.split("\t")
|
|
126
|
-
if len(parts)
|
|
139
|
+
if len(parts) not in {8, 9}:
|
|
127
140
|
continue
|
|
128
141
|
target = {
|
|
129
142
|
"pane_id": parts[0],
|
|
@@ -135,11 +148,152 @@ def list_targets() -> dict[str, Any]:
|
|
|
135
148
|
"pane_current_command": parts[6],
|
|
136
149
|
"pane_active": parts[7] == "1",
|
|
137
150
|
}
|
|
151
|
+
pane_pid = parts[8].strip() if len(parts) == 9 else ""
|
|
152
|
+
if pane_pid:
|
|
153
|
+
target["pane_pid"] = pane_pid
|
|
138
154
|
target["fingerprint"] = f"{target['session_name']}|{target['window_index']}|{target['pane_index']}|{target['pane_tty']}"
|
|
155
|
+
_attach_leader_env(target)
|
|
139
156
|
targets.append(target)
|
|
140
157
|
return {"ok": True, "targets": targets, "engine": "python_fallback", "fallback_reason": result.get("error")}
|
|
141
158
|
|
|
142
159
|
|
|
160
|
+
def _attach_leader_env(target: dict[str, Any]) -> None:
|
|
161
|
+
pane_pid = str(target.get("pane_pid") or "").strip()
|
|
162
|
+
if not pane_pid:
|
|
163
|
+
target["leader_env"] = None
|
|
164
|
+
return
|
|
165
|
+
env = _read_process_env(pane_pid)
|
|
166
|
+
if env is None:
|
|
167
|
+
target["leader_env"] = None
|
|
168
|
+
return
|
|
169
|
+
leader_env = {key: env[key] for key in _LEADER_ENV_KEYS if key in env}
|
|
170
|
+
if "TEAM_AGENT_LEADER_SESSION_UUID" not in leader_env:
|
|
171
|
+
for child_pid in _walk_leader_shaped_children(pane_pid):
|
|
172
|
+
child_env = _read_process_env(child_pid)
|
|
173
|
+
if child_env is None:
|
|
174
|
+
continue
|
|
175
|
+
for key in _LEADER_ENV_KEYS:
|
|
176
|
+
if key not in leader_env and key in child_env:
|
|
177
|
+
leader_env[key] = child_env[key]
|
|
178
|
+
if "TEAM_AGENT_LEADER_SESSION_UUID" in leader_env:
|
|
179
|
+
break
|
|
180
|
+
target["leader_env"] = leader_env
|
|
181
|
+
uuid_value = leader_env.get("TEAM_AGENT_LEADER_SESSION_UUID")
|
|
182
|
+
if uuid_value:
|
|
183
|
+
target["leader_session_uuid"] = uuid_value
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _read_process_env(pid: str) -> dict[str, str] | None:
|
|
187
|
+
if platform.system() == "Linux":
|
|
188
|
+
return _read_proc_environ(pid)
|
|
189
|
+
return _read_ps_eww_env(pid)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _read_proc_environ(pid: str) -> dict[str, str] | None:
|
|
193
|
+
path = Path(f"/proc/{pid}/environ")
|
|
194
|
+
try:
|
|
195
|
+
raw = path.read_bytes()
|
|
196
|
+
except (FileNotFoundError, PermissionError, OSError):
|
|
197
|
+
return None
|
|
198
|
+
env: dict[str, str] = {}
|
|
199
|
+
for token in raw.split(b"\x00"):
|
|
200
|
+
if not token or b"=" not in token:
|
|
201
|
+
continue
|
|
202
|
+
try:
|
|
203
|
+
text = token.decode("utf-8", errors="replace")
|
|
204
|
+
except Exception:
|
|
205
|
+
continue
|
|
206
|
+
key, _, value = text.partition("=")
|
|
207
|
+
env[key] = value
|
|
208
|
+
return env
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _read_ps_eww_env(pid: str) -> dict[str, str] | None:
|
|
212
|
+
try:
|
|
213
|
+
proc = _run_subprocess(
|
|
214
|
+
["ps", "-E", "-ww", "-p", str(pid)],
|
|
215
|
+
text=True,
|
|
216
|
+
capture_output=True,
|
|
217
|
+
timeout=_PANE_ENV_SCAN_TIMEOUT_SECONDS,
|
|
218
|
+
check=False,
|
|
219
|
+
)
|
|
220
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
221
|
+
return None
|
|
222
|
+
if proc.returncode != 0 or not proc.stdout:
|
|
223
|
+
return None
|
|
224
|
+
return _parse_ps_eww_output(proc.stdout, pid)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _parse_ps_eww_output(text: str, pid: str) -> dict[str, str]:
|
|
228
|
+
env: dict[str, str] = {}
|
|
229
|
+
lines = text.splitlines()
|
|
230
|
+
if len(lines) < 2:
|
|
231
|
+
return env
|
|
232
|
+
target_row = None
|
|
233
|
+
for line in lines[1:]:
|
|
234
|
+
stripped = line.lstrip()
|
|
235
|
+
if stripped.split(" ", 1)[0] == str(pid):
|
|
236
|
+
target_row = stripped
|
|
237
|
+
break
|
|
238
|
+
if target_row is None:
|
|
239
|
+
# Spark MEDIUM #2 (da436a3): never fall back to lines[1] — that row may belong to
|
|
240
|
+
# an unrelated process and would leak its env (incl. another team's
|
|
241
|
+
# TEAM_AGENT_LEADER_SESSION_UUID) into this pane's leader_env, corrupting rediscovery.
|
|
242
|
+
return env
|
|
243
|
+
for token in target_row.split():
|
|
244
|
+
if "=" not in token:
|
|
245
|
+
continue
|
|
246
|
+
key, _, value = token.partition("=")
|
|
247
|
+
if not key or " " in key:
|
|
248
|
+
continue
|
|
249
|
+
if not (key[0].isalpha() or key[0] == "_"):
|
|
250
|
+
continue
|
|
251
|
+
if not all(ch.isalnum() or ch == "_" for ch in key):
|
|
252
|
+
continue
|
|
253
|
+
env[key] = value
|
|
254
|
+
return env
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _walk_leader_shaped_children(parent_pid: str) -> list[str]:
|
|
258
|
+
try:
|
|
259
|
+
proc = _run_subprocess(
|
|
260
|
+
["ps", "-o", "pid=,ppid=,comm="],
|
|
261
|
+
text=True,
|
|
262
|
+
capture_output=True,
|
|
263
|
+
timeout=_PANE_ENV_SCAN_TIMEOUT_SECONDS,
|
|
264
|
+
check=False,
|
|
265
|
+
)
|
|
266
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
267
|
+
return []
|
|
268
|
+
if proc.returncode != 0 or not proc.stdout:
|
|
269
|
+
return []
|
|
270
|
+
return _select_leader_shaped_descendants(proc.stdout, parent_pid)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _select_leader_shaped_descendants(ps_output: str, parent_pid: str) -> list[str]:
|
|
274
|
+
rows: list[tuple[str, str, str]] = []
|
|
275
|
+
for line in ps_output.splitlines():
|
|
276
|
+
parts = line.split()
|
|
277
|
+
if len(parts) < 3:
|
|
278
|
+
continue
|
|
279
|
+
pid, ppid, command = parts[0], parts[1], " ".join(parts[2:])
|
|
280
|
+
rows.append((pid, ppid, Path(command).name))
|
|
281
|
+
descendants: set[str] = set()
|
|
282
|
+
frontier = {str(parent_pid)}
|
|
283
|
+
while frontier:
|
|
284
|
+
next_frontier: set[str] = set()
|
|
285
|
+
for pid, ppid, _ in rows:
|
|
286
|
+
if ppid in frontier and pid not in descendants:
|
|
287
|
+
descendants.add(pid)
|
|
288
|
+
next_frontier.add(pid)
|
|
289
|
+
frontier = next_frontier
|
|
290
|
+
return [
|
|
291
|
+
pid
|
|
292
|
+
for pid, _, command in rows
|
|
293
|
+
if pid in descendants and command in _LEADER_SHAPED_COMMANDS
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
|
|
143
297
|
def contains_inline_secret(value: str) -> bool:
|
|
144
298
|
return (
|
|
145
299
|
_contains_secret_assignment(value)
|
|
@@ -1,14 +1,25 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import time
|
|
3
4
|
from datetime import datetime, timezone
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
8
|
+
from team_agent.errors import RuntimeError as TeamAgentRuntimeError
|
|
7
9
|
from team_agent.events import EventLog
|
|
8
10
|
from team_agent.providers import get_adapter
|
|
9
11
|
from team_agent.state import SESSION_CAPTURE_FIELDS, SESSION_STATE_FIELDS
|
|
10
12
|
|
|
11
13
|
|
|
14
|
+
# Stage 7 S6 (2026-05-27): capture_agent_session used to do a single adapter
|
|
15
|
+
# call and silently return None on miss, leaving status='running' workers with
|
|
16
|
+
# session_id=null. Slow worker startups (Codex writing the rollout file a few
|
|
17
|
+
# tenths of a second after window creation) raced this check. We now poll on a
|
|
18
|
+
# small interval inside the caller's timeout_s budget so the adapter's own
|
|
19
|
+
# fast-path call doesn't have to absorb all the latency on its own.
|
|
20
|
+
_CAPTURE_POLL_INTERVAL_SECONDS = 0.05
|
|
21
|
+
|
|
22
|
+
|
|
12
23
|
def capture_missing_sessions(
|
|
13
24
|
workspace: Path,
|
|
14
25
|
state: dict[str, Any],
|
|
@@ -25,6 +36,10 @@ def capture_missing_sessions(
|
|
|
25
36
|
for aid, item in state.get("agents", {}).items()
|
|
26
37
|
if aid != agent_id and item.get("session_id")
|
|
27
38
|
}
|
|
39
|
+
# capture_missing_sessions is invoked from coordinator_tick, diagnose,
|
|
40
|
+
# status, etc. with very short timeouts; a transient miss should NOT
|
|
41
|
+
# crash those paths. The loud raise contract belongs to direct callers
|
|
42
|
+
# (e.g. lifecycle start/restart) who own the worker's atomicity.
|
|
28
43
|
result = capture_agent_session(
|
|
29
44
|
workspace,
|
|
30
45
|
agent_id,
|
|
@@ -32,6 +47,7 @@ def capture_missing_sessions(
|
|
|
32
47
|
event_log,
|
|
33
48
|
timeout_s=timeout_s,
|
|
34
49
|
exclude_session_ids=known_session_ids,
|
|
50
|
+
raise_on_missed=False,
|
|
35
51
|
)
|
|
36
52
|
if result:
|
|
37
53
|
captured.append(agent_id)
|
|
@@ -53,6 +69,7 @@ def capture_agent_session(
|
|
|
53
69
|
event_log: EventLog,
|
|
54
70
|
timeout_s: float,
|
|
55
71
|
exclude_session_ids: set[str] | None = None,
|
|
72
|
+
raise_on_missed: bool = True,
|
|
56
73
|
) -> dict[str, Any] | None:
|
|
57
74
|
if agent_state.get("session_id"):
|
|
58
75
|
return None
|
|
@@ -66,21 +83,54 @@ def capture_agent_session(
|
|
|
66
83
|
"exclude_session_ids": sorted(exclude_session_ids or set()),
|
|
67
84
|
"claude_projects_root": agent_state.get("claude_projects_root"),
|
|
68
85
|
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
86
|
+
deadline = time.monotonic() + max(timeout_s, 0.0)
|
|
87
|
+
while True:
|
|
88
|
+
# Pass timeout_s=0 so the adapter does a single fast-path check; the
|
|
89
|
+
# outer loop owns the polling budget so behaviour stays consistent
|
|
90
|
+
# whether or not the adapter has its own internal sleep.
|
|
91
|
+
result = adapter.capture_session_id(agent_id, spawn_context, timeout_s=0)
|
|
92
|
+
if isinstance(result, dict) and result.get("session_id"):
|
|
93
|
+
copy_session_metadata(agent_state, result)
|
|
94
|
+
agent_state.pop("_pending_session_id", None)
|
|
95
|
+
event_log.write(
|
|
96
|
+
"session.captured",
|
|
97
|
+
agent_id=agent_id,
|
|
98
|
+
provider=agent_state.get("provider"),
|
|
99
|
+
session_id=agent_state.get("session_id"),
|
|
100
|
+
rollout_path=agent_state.get("rollout_path"),
|
|
101
|
+
captured_via=agent_state.get("captured_via"),
|
|
102
|
+
attribution_confidence=agent_state.get("attribution_confidence"),
|
|
103
|
+
)
|
|
104
|
+
return result
|
|
105
|
+
if time.monotonic() >= deadline:
|
|
106
|
+
break
|
|
107
|
+
time.sleep(_CAPTURE_POLL_INTERVAL_SECONDS)
|
|
108
|
+
# Timeout. Slice 1 atomicity contract: a worker whose status is 'running'
|
|
109
|
+
# must NEVER be left with session_id=null — that half-state is what made
|
|
110
|
+
# Mac mini Stage 7 S5/S6 unreproducible and breaks resume on next restart.
|
|
111
|
+
# Emit a structured attention event so the coordinator/operator sees the
|
|
112
|
+
# miss, then raise so callers cannot accidentally treat the None as a
|
|
113
|
+
# silent "no-op". Non-running workers (still starting, paused, stopped)
|
|
114
|
+
# legitimately have no session yet, so they still get the silent-None
|
|
115
|
+
# return that existing callers expect.
|
|
116
|
+
if agent_state.get("status") == "running":
|
|
117
|
+
event_log.write(
|
|
118
|
+
"session.capture_required_attention",
|
|
119
|
+
agent_id=agent_id,
|
|
120
|
+
provider=agent_state.get("provider"),
|
|
121
|
+
timeout_s=timeout_s,
|
|
122
|
+
spawn_cwd=agent_state.get("spawn_cwd"),
|
|
123
|
+
session_name=agent_state.get("session_name"),
|
|
124
|
+
window=agent_state.get("window", agent_id),
|
|
125
|
+
)
|
|
126
|
+
if raise_on_missed:
|
|
127
|
+
raise TeamAgentRuntimeError(
|
|
128
|
+
f"Failed to capture session_id for agent {agent_id}: adapter "
|
|
129
|
+
f"did not produce a session within {timeout_s}s. Worker is "
|
|
130
|
+
"running but unidentifiable; this is a Slice 1 atomicity "
|
|
131
|
+
"violation."
|
|
132
|
+
)
|
|
133
|
+
return None
|
|
84
134
|
|
|
85
135
|
|
|
86
136
|
def copy_session_metadata(target: dict[str, Any], source: dict[str, Any]) -> None:
|
package/src/team_agent/spec.py
CHANGED
|
@@ -27,9 +27,60 @@ def load_yaml(path: Path) -> dict[str, Any]:
|
|
|
27
27
|
def load_spec(path: Path) -> dict[str, Any]:
|
|
28
28
|
spec = load_yaml(path)
|
|
29
29
|
validate_spec(spec, base_dir=path.parent)
|
|
30
|
+
_emit_load_time_deprecations(spec, path)
|
|
30
31
|
return spec
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
def _emit_load_time_deprecations(spec: dict[str, Any], path: Path) -> None:
|
|
35
|
+
"""Stage 7 S7 (2026-05-27): deprecation signals attached to the spec field
|
|
36
|
+
itself must fire when the YAML is read, not lazily inside the trust-prompt
|
|
37
|
+
code path. A user with the deprecated field in team.spec.yaml needs to see
|
|
38
|
+
the warning even when startup never reaches attempt_trust_auto_answer.
|
|
39
|
+
|
|
40
|
+
The leader-panes helper owns the one-shot stderr guard + the structured
|
|
41
|
+
audit event, so we reuse it. EventLog points at the WORKSPACE ROOT (not
|
|
42
|
+
the spec file's directory) so a quick-start layout that stores the spec
|
|
43
|
+
under <workspace>/.team/current/team.spec.yaml still routes the audit
|
|
44
|
+
event into the single canonical <workspace>/.team/logs/events.jsonl
|
|
45
|
+
instead of a doubled <workspace>/.team/current/.team/logs/events.jsonl
|
|
46
|
+
nesting.
|
|
47
|
+
"""
|
|
48
|
+
runtime = spec.get("runtime")
|
|
49
|
+
if not isinstance(runtime, dict):
|
|
50
|
+
return
|
|
51
|
+
if not bool(runtime.get("auto_trust_own_workspace")):
|
|
52
|
+
return
|
|
53
|
+
# Local import keeps the spec module free of messaging-layer coupling at
|
|
54
|
+
# import time; only YAMLs that opt into the deprecated field pay the cost.
|
|
55
|
+
from team_agent.events import EventLog
|
|
56
|
+
from team_agent.messaging.leader_panes import _emit_spec_opt_in_deprecation
|
|
57
|
+
_emit_spec_opt_in_deprecation(EventLog(_resolve_workspace_root(path)))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _resolve_workspace_root(spec_path: Path) -> Path:
|
|
61
|
+
"""Find the workspace root that owns this spec.
|
|
62
|
+
|
|
63
|
+
A workspace root is the directory whose `.team/` subdirectory holds the
|
|
64
|
+
runtime state, logs, artifacts, and (for quick-start layouts) the spec
|
|
65
|
+
itself under `.team/current/`. We climb from the spec file's parent
|
|
66
|
+
looking for the first ancestor that has a `.team/` child. If no ancestor
|
|
67
|
+
qualifies (fresh workspace before init, or a spec deliberately placed
|
|
68
|
+
outside any team workspace), we fall back to `spec_path.parent` which is
|
|
69
|
+
the legacy single-layout behaviour.
|
|
70
|
+
|
|
71
|
+
Implementation note: we use real filesystem evidence (`(dir/.team).is_dir()`)
|
|
72
|
+
rather than path-string parsing so the resolver works correctly even when
|
|
73
|
+
workspace paths legitimately contain a `.team` segment.
|
|
74
|
+
"""
|
|
75
|
+
direct_parent = spec_path.parent
|
|
76
|
+
if (direct_parent / ".team").is_dir():
|
|
77
|
+
return direct_parent
|
|
78
|
+
for ancestor in direct_parent.parents:
|
|
79
|
+
if (ancestor / ".team").is_dir():
|
|
80
|
+
return ancestor
|
|
81
|
+
return direct_parent
|
|
82
|
+
|
|
83
|
+
|
|
33
84
|
def validate_spec(spec: dict[str, Any], base_dir: Path | None = None) -> None:
|
|
34
85
|
messages = _basic_schema_errors(spec)
|
|
35
86
|
messages.extend(_semantic_errors(spec, base_dir or Path.cwd()))
|
|
@@ -190,6 +241,12 @@ def _check_runtime(runtime: Any, errors: list[str]) -> None:
|
|
|
190
241
|
"tick_interval_sec",
|
|
191
242
|
"push_min_interval_sec",
|
|
192
243
|
"stuck_timeout_sec",
|
|
244
|
+
# Gap 29 / F3 deprecation (2026-05-26): accept the legacy spec opt-in so
|
|
245
|
+
# YAMLs that still set it validate and the deprecation warning + structured
|
|
246
|
+
# event in messaging/leader_panes.py can fire. The preferred per-session
|
|
247
|
+
# opt-in is the env var TEAM_AGENT_AUTO_TRUST_OWN_WORKSPACE; this spec
|
|
248
|
+
# field will be removed in 0.3.0.
|
|
249
|
+
"auto_trust_own_workspace",
|
|
193
250
|
}
|
|
194
251
|
_check_keys(runtime, "/runtime", required, allowed, errors)
|
|
195
252
|
if not isinstance(runtime, dict):
|
|
@@ -200,6 +257,8 @@ def _check_runtime(runtime: Any, errors: list[str]) -> None:
|
|
|
200
257
|
errors.append("/runtime/display_backend: invalid display backend")
|
|
201
258
|
if "dangerous_auto_approve" in runtime and not isinstance(runtime["dangerous_auto_approve"], bool):
|
|
202
259
|
errors.append("/runtime/dangerous_auto_approve: must be a boolean")
|
|
260
|
+
if "auto_trust_own_workspace" in runtime and not isinstance(runtime["auto_trust_own_workspace"], bool):
|
|
261
|
+
errors.append("/runtime/auto_trust_own_workspace: must be a boolean")
|
|
203
262
|
_check_list(runtime.get("startup_order"), "/runtime/startup_order", errors)
|
|
204
263
|
|
|
205
264
|
|
package/src/team_agent/state.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import hashlib
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import copy
|
|
@@ -23,6 +24,14 @@ SESSION_STATE_FIELDS = [
|
|
|
23
24
|
*SESSION_CAPTURE_FIELDS,
|
|
24
25
|
"spawn_cwd",
|
|
25
26
|
]
|
|
27
|
+
_UUID_SEPARATOR = "\0"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def derive_leader_session_uuid(machine_fingerprint: str, workspace_abspath: str, os_user: str, team_id: str) -> str:
|
|
31
|
+
parts = [machine_fingerprint, workspace_abspath, os_user, team_id]
|
|
32
|
+
if any(_UUID_SEPARATOR in part for part in parts):
|
|
33
|
+
raise ValueError("leader_session_uuid inputs must not contain NUL")
|
|
34
|
+
return hashlib.sha256(_UUID_SEPARATOR.join(parts).encode("utf-8")).hexdigest()[:32]
|
|
26
35
|
|
|
27
36
|
|
|
28
37
|
def runtime_state_path(workspace: Path) -> Path:
|
|
@@ -45,6 +54,8 @@ def load_runtime_state(workspace: Path) -> dict[str, Any]:
|
|
|
45
54
|
return {"agents": {}, "tasks": [], "session_name": None}
|
|
46
55
|
state = json.loads(path.read_text(encoding="utf-8"))
|
|
47
56
|
normalize_agent_session_state(state)
|
|
57
|
+
if _migrate_state_identity(state, workspace):
|
|
58
|
+
save_runtime_state(workspace, state)
|
|
48
59
|
return state
|
|
49
60
|
|
|
50
61
|
|
|
@@ -163,11 +174,75 @@ def resolve_team_scoped_state(
|
|
|
163
174
|
}
|
|
164
175
|
|
|
165
176
|
|
|
166
|
-
def
|
|
177
|
+
def _identity_workspace_abspath(state: dict[str, Any], workspace: Path | None = None) -> str:
|
|
178
|
+
if state.get("workspace"):
|
|
179
|
+
return str(Path(str(state["workspace"])).resolve())
|
|
180
|
+
if state.get("team_dir"):
|
|
181
|
+
return str(Path(str(state["team_dir"])).resolve().parent.parent)
|
|
182
|
+
if state.get("spec_path"):
|
|
183
|
+
spec_path = Path(str(state["spec_path"])).resolve()
|
|
184
|
+
return str(spec_path.parent.parent.parent if spec_path.parent.parent.name == ".team" else spec_path.parent)
|
|
185
|
+
return str((workspace or Path(os.environ.get("TEAM_AGENT_WORKSPACE") or os.getcwd())).resolve())
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _identity_os_user() -> str:
|
|
189
|
+
return os.environ.get("USER") or os.environ.get("USERNAME") or ""
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _identity_machine_fingerprint(state: dict[str, Any]) -> str:
|
|
193
|
+
for record in (state.get("team_owner"), state.get("leader_receiver")):
|
|
194
|
+
if isinstance(record, dict) and record.get("machine_fingerprint"):
|
|
195
|
+
return str(record["machine_fingerprint"])
|
|
196
|
+
return os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _leader_session_uuid_for_state(state: dict[str, Any], workspace: Path | None = None, team_id: str | None = None) -> str:
|
|
200
|
+
return derive_leader_session_uuid(
|
|
201
|
+
_identity_machine_fingerprint(state),
|
|
202
|
+
_identity_workspace_abspath(state, workspace),
|
|
203
|
+
_identity_os_user(),
|
|
204
|
+
team_id or team_state_key(state),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _migrate_team_identity(state: dict[str, Any], workspace: Path, team_id: str | None = None) -> bool:
|
|
209
|
+
leader_uuid = _leader_session_uuid_for_state(state, workspace, team_id)
|
|
210
|
+
changed = False
|
|
211
|
+
for key in ("team_owner", "leader_receiver"):
|
|
212
|
+
record = state.get(key)
|
|
213
|
+
if isinstance(record, dict) and not record.get("leader_session_uuid"):
|
|
214
|
+
record["leader_session_uuid"] = leader_uuid
|
|
215
|
+
changed = True
|
|
216
|
+
return changed
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _migrate_state_identity(state: dict[str, Any], workspace: Path) -> bool:
|
|
220
|
+
changed = _migrate_team_identity(state, workspace) if state.get("session_name") else False
|
|
221
|
+
teams = state.get("teams")
|
|
222
|
+
if isinstance(teams, dict):
|
|
223
|
+
for team_id, team_state in teams.items():
|
|
224
|
+
if isinstance(team_state, dict):
|
|
225
|
+
changed = _migrate_team_identity(team_state, workspace, str(team_id)) or changed
|
|
226
|
+
return changed
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _caller_identity_from_env(state: dict[str, Any] | None = None, team_id: str | None = None, workspace: Path | None = None) -> dict[str, str]:
|
|
230
|
+
state = state or {}
|
|
231
|
+
machine_fingerprint = os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or ""
|
|
232
|
+
override = os.environ.get("TEAM_AGENT_LEADER_SESSION_UUID_OVERRIDE") or ""
|
|
233
|
+
env_uuid = os.environ.get("TEAM_AGENT_LEADER_SESSION_UUID") or ""
|
|
234
|
+
leader_uuid = override or env_uuid or derive_leader_session_uuid(
|
|
235
|
+
machine_fingerprint,
|
|
236
|
+
_identity_workspace_abspath(state, workspace),
|
|
237
|
+
_identity_os_user(),
|
|
238
|
+
team_id or os.environ.get("TEAM_AGENT_TEAM_ID") or team_state_key(state),
|
|
239
|
+
)
|
|
167
240
|
return {
|
|
168
241
|
"pane_id": os.environ.get("TEAM_AGENT_LEADER_PANE_ID") or "",
|
|
169
242
|
"provider": os.environ.get("TEAM_AGENT_LEADER_PROVIDER") or "",
|
|
170
|
-
"machine_fingerprint":
|
|
243
|
+
"machine_fingerprint": machine_fingerprint,
|
|
244
|
+
"leader_session_uuid": leader_uuid,
|
|
245
|
+
"leader_session_uuid_source": "explicit-override" if override else ("env" if env_uuid else "derived"),
|
|
171
246
|
}
|
|
172
247
|
|
|
173
248
|
|
|
@@ -175,19 +250,22 @@ def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
|
|
|
175
250
|
owner = state.get("team_owner") or {}
|
|
176
251
|
if not owner:
|
|
177
252
|
return None
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
)
|
|
253
|
+
_migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
|
|
254
|
+
caller = _caller_identity_from_env(state, team_state_key(state))
|
|
255
|
+
owner_uuid = str(owner.get("leader_session_uuid") or "")
|
|
256
|
+
caller_uuid = caller["leader_session_uuid"]
|
|
257
|
+
owner_pane = str(owner.get("pane_id") or "")
|
|
258
|
+
caller_pane = caller.get("pane_id") or ""
|
|
259
|
+
if caller_uuid == owner_uuid and (not caller_pane or caller_pane == owner_pane):
|
|
184
260
|
return None
|
|
261
|
+
same_uuid = caller_uuid == owner_uuid
|
|
185
262
|
return {
|
|
186
263
|
"ok": False,
|
|
187
264
|
"status": "refused",
|
|
188
265
|
"reason": "team_owner_mismatch",
|
|
266
|
+
"reason_kind": "sticky_bind_collision" if same_uuid else "owner_takeover_required",
|
|
189
267
|
"error": "not_owner",
|
|
190
|
-
"action": "
|
|
268
|
+
"action": "team-agent claim-leader --confirm" if same_uuid else "team-agent takeover --confirm",
|
|
191
269
|
"team_owner": owner,
|
|
192
270
|
"caller": caller,
|
|
193
271
|
}
|
|
@@ -209,14 +287,16 @@ def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None)
|
|
|
209
287
|
|
|
210
288
|
def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
|
|
211
289
|
if state.get("team_owner"):
|
|
290
|
+
_migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
|
|
212
291
|
return state["team_owner"]
|
|
213
|
-
caller = _caller_identity_from_env()
|
|
292
|
+
caller = _caller_identity_from_env(state, team_state_key(state))
|
|
214
293
|
if not caller["pane_id"]:
|
|
215
294
|
return None
|
|
216
295
|
owner = {
|
|
217
296
|
"pane_id": caller["pane_id"],
|
|
218
297
|
"provider": caller["provider"],
|
|
219
298
|
"machine_fingerprint": caller["machine_fingerprint"],
|
|
299
|
+
"leader_session_uuid": caller["leader_session_uuid"],
|
|
220
300
|
"claimed_at": datetime.now(timezone.utc).isoformat(),
|
|
221
301
|
"claimed_via": source,
|
|
222
302
|
}
|
|
@@ -224,7 +304,70 @@ def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopul
|
|
|
224
304
|
return owner
|
|
225
305
|
|
|
226
306
|
|
|
307
|
+
def apply_first_time_leader_binding(
|
|
308
|
+
workspace: Path,
|
|
309
|
+
state: dict[str, Any],
|
|
310
|
+
receiver: dict[str, Any],
|
|
311
|
+
pane_info: dict[str, Any],
|
|
312
|
+
identity: dict[str, Any],
|
|
313
|
+
source: str,
|
|
314
|
+
) -> dict[str, Any]:
|
|
315
|
+
from team_agent.messaging.leader_panes import _leader_command_looks_usable
|
|
316
|
+
command = pane_info.get("pane_current_command", "")
|
|
317
|
+
provider = str(receiver.get("provider") or "")
|
|
318
|
+
if not _leader_command_looks_usable(command, provider):
|
|
319
|
+
return {"ok": False, "reason": "leader_pane_wrong_command", "error": f"pane command {command!r} is not a leader host", "pane": pane_info}
|
|
320
|
+
current_path = pane_info.get("pane_current_path")
|
|
321
|
+
if not current_path or os.path.realpath(current_path) != os.path.realpath(str(workspace.resolve())):
|
|
322
|
+
return {"ok": False, "reason": "leader_pane_wrong_workspace", "error": f"pane cwd {current_path!r} does not match workspace {str(workspace.resolve())!r}", "pane": pane_info}
|
|
323
|
+
receiver.update({
|
|
324
|
+
"leader_session_uuid": identity["leader_session_uuid"],
|
|
325
|
+
"machine_fingerprint": identity["machine_fingerprint"],
|
|
326
|
+
"owner_epoch": 0,
|
|
327
|
+
})
|
|
328
|
+
state["team_owner"] = {
|
|
329
|
+
"pane_id": receiver["pane_id"],
|
|
330
|
+
"provider": provider,
|
|
331
|
+
"machine_fingerprint": identity["machine_fingerprint"],
|
|
332
|
+
"leader_session_uuid": identity["leader_session_uuid"],
|
|
333
|
+
"owner_epoch": 0,
|
|
334
|
+
"claimed_at": datetime.now(timezone.utc).isoformat(),
|
|
335
|
+
"claimed_via": source,
|
|
336
|
+
}
|
|
337
|
+
state["leader_receiver"] = receiver
|
|
338
|
+
return {"ok": True, "pane": pane_info, "warning": None, "first_time": True}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def leader_env_exports(receiver: dict[str, Any], identity: dict[str, Any]) -> dict[str, str]:
|
|
342
|
+
return {
|
|
343
|
+
"TEAM_AGENT_LEADER_PANE_ID": str(receiver.get("pane_id") or ""),
|
|
344
|
+
"TEAM_AGENT_LEADER_PROVIDER": str(receiver.get("provider") or ""),
|
|
345
|
+
"TEAM_AGENT_LEADER_SESSION_UUID": str(identity.get("leader_session_uuid") or ""),
|
|
346
|
+
"TEAM_AGENT_MACHINE_FINGERPRINT": str(identity.get("machine_fingerprint") or ""),
|
|
347
|
+
"TEAM_AGENT_WORKSPACE": str(identity.get("workspace_abspath") or ""),
|
|
348
|
+
"TEAM_AGENT_TEAM_ID": str(identity.get("team_id") or ""),
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def validate_leader_uuid_from_targets(receiver: dict[str, Any], targets: dict[str, Any]) -> dict[str, Any]:
|
|
353
|
+
expected_uuid = str(receiver.get("leader_session_uuid") or "")
|
|
354
|
+
if not expected_uuid or receiver.get("provider") == "fake":
|
|
355
|
+
return {"ok": True}
|
|
356
|
+
if not targets.get("ok"):
|
|
357
|
+
return {"ok": False, "reason": "leader_uuid_lookup_failed", "error": targets.get("error") or "tmux target scan failed"}
|
|
358
|
+
pane_id = receiver.get("pane_id")
|
|
359
|
+
target = next((item for item in targets.get("targets", []) if item.get("pane_id") == pane_id), None)
|
|
360
|
+
env = target.get("leader_env") if isinstance((target or {}).get("leader_env"), dict) else {}
|
|
361
|
+
actual_uuid = str((target or {}).get("leader_session_uuid") or env.get("TEAM_AGENT_LEADER_SESSION_UUID") or "")
|
|
362
|
+
if not actual_uuid:
|
|
363
|
+
return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": target}
|
|
364
|
+
if actual_uuid != expected_uuid:
|
|
365
|
+
return {"ok": False, "reason": "leader_uuid_mismatch", "error": "bound pane TEAM_AGENT_LEADER_SESSION_UUID does not match stored team owner", "pane": target}
|
|
366
|
+
return {"ok": True}
|
|
367
|
+
|
|
368
|
+
|
|
227
369
|
def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
|
|
370
|
+
_migrate_state_identity(state, workspace)
|
|
228
371
|
path = runtime_state_path(workspace)
|
|
229
372
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
230
373
|
tmp_path = path.with_name(f"{path.name}.{os.getpid()}.{uuid.uuid4().hex}.tmp")
|