@team-agent/installer 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/team_agent/_legacy_pane_discovery.py +2 -5
- package/src/team_agent/approvals/status.py +5 -1
- package/src/team_agent/cli/commands.py +10 -0
- package/src/team_agent/cli/parser.py +19 -2
- package/src/team_agent/diagnose/comms.py +213 -0
- package/src/team_agent/leader/__init__.py +20 -10
- package/src/team_agent/leader_binding.py +4 -26
- package/src/team_agent/message_store/leader_notification_log.py +80 -39
- package/src/team_agent/message_store/schema.py +9 -6
- package/src/team_agent/message_store/schema_migration.py +7 -5
- package/src/team_agent/messaging/activity_detector.py +69 -5
- package/src/team_agent/messaging/leader.py +19 -7
- package/src/team_agent/messaging/leader_panes.py +6 -9
- package/src/team_agent/messaging/result_delivery.py +28 -4
- package/src/team_agent/messaging/scheduler.py +1 -1
- package/src/team_agent/messaging/send.py +5 -1
- package/src/team_agent/restart/orchestration.py +24 -1
- package/src/team_agent/runtime.py +6 -2
- package/src/team_agent/state.py +59 -11
package/package.json
CHANGED
|
@@ -100,10 +100,7 @@ def _infer_workspace_tmux_pane(provider: str, workspace: Path) -> dict[str, Any]
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _pane_is_usable_leader(pane: dict[str, str], provider: str, workspace: Path | None) -> bool:
|
|
103
|
-
|
|
104
|
-
command = pane.get("pane_current_command", "")
|
|
105
|
-
if not _leader_command_looks_usable(command, provider) and _leader_command_provider(command) is None:
|
|
106
|
-
return False
|
|
103
|
+
_ = provider
|
|
107
104
|
if workspace is not None and not _pane_path_matches_workspace(pane, workspace):
|
|
108
105
|
return False
|
|
109
106
|
return True
|
|
@@ -177,7 +174,7 @@ def _resolve_leader_pane(
|
|
|
177
174
|
)
|
|
178
175
|
raise _RuntimeError(
|
|
179
176
|
"Team Agent could not locate a tmux-managed leader pane for this workspace. "
|
|
180
|
-
"Run quick-start from the visible tmux-managed leader pane,
|
|
177
|
+
"Run quick-start from the visible tmux-managed leader pane, "
|
|
181
178
|
"or use `team-agent codex`/`team-agent claude` as a convenience fallback."
|
|
182
179
|
+ details
|
|
183
180
|
)
|
|
@@ -54,6 +54,8 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
|
|
|
54
54
|
health_status = agent_health_status(agent_state)
|
|
55
55
|
last_output_at = agent_state.get("last_output_at")
|
|
56
56
|
window = agent_state.get("window", agent_id)
|
|
57
|
+
current_task = current_task_for_agent(state.get("tasks", []), agent_id)
|
|
58
|
+
pane_delta_recent = False
|
|
57
59
|
scrollback = ""
|
|
58
60
|
pane_info: dict[str, Any] | None = None
|
|
59
61
|
if session_name and _tmux_window_exists(session_name, window):
|
|
@@ -62,6 +64,7 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
|
|
|
62
64
|
scrollback = proc.stdout
|
|
63
65
|
digest = hashlib.sha256(proc.stdout.encode("utf-8", errors="ignore")).hexdigest()
|
|
64
66
|
if digest != agent_state.get("last_output_hash"):
|
|
67
|
+
pane_delta_recent = True
|
|
65
68
|
last_output_at = datetime.now(timezone.utc).isoformat()
|
|
66
69
|
agent_state["last_output_hash"] = digest
|
|
67
70
|
agent_state["last_output_at"] = last_output_at
|
|
@@ -78,6 +81,8 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
|
|
|
78
81
|
last_output_at,
|
|
79
82
|
pane_info,
|
|
80
83
|
scrollback,
|
|
84
|
+
active_task=current_task is not None,
|
|
85
|
+
pane_delta_recent=pane_delta_recent,
|
|
81
86
|
)
|
|
82
87
|
agent_state["activity"] = {
|
|
83
88
|
"status": activity.get("status"),
|
|
@@ -91,7 +96,6 @@ def sync_agent_health(workspace: Path, state: dict[str, Any], store: MessageStor
|
|
|
91
96
|
mapped = mapping.get(raw)
|
|
92
97
|
if mapped:
|
|
93
98
|
health_status = mapped
|
|
94
|
-
current_task = current_task_for_agent(state.get("tasks", []), agent_id)
|
|
95
99
|
store.upsert_agent_health(
|
|
96
100
|
agent_id,
|
|
97
101
|
health_status,
|
|
@@ -219,6 +219,16 @@ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
|
|
|
219
219
|
gate = getattr(args, "gate", None)
|
|
220
220
|
if getattr(args, "fix", False) is True and not gate:
|
|
221
221
|
raise TeamAgentError("--fix requires --gate")
|
|
222
|
+
if getattr(args, "comms", False) is True or gate == "comms":
|
|
223
|
+
from team_agent.diagnose.comms import COMMS_BOUNDARY_TEXT, run_comms_selftest
|
|
224
|
+
result = run_comms_selftest(
|
|
225
|
+
Path(args.workspace).resolve(),
|
|
226
|
+
team=getattr(args, "team", None),
|
|
227
|
+
gate=gate,
|
|
228
|
+
)
|
|
229
|
+
if args.json:
|
|
230
|
+
return result
|
|
231
|
+
return f"{COMMS_BOUNDARY_TEXT}\n{json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True)}"
|
|
222
232
|
if isinstance(gate, str) and gate:
|
|
223
233
|
from team_agent.diagnose.orphan_cleanup import orphan_gate
|
|
224
234
|
if gate != "orphans":
|
|
@@ -315,10 +315,27 @@ def main(argv: list[str] | None = None) -> None:
|
|
|
315
315
|
add_json(p)
|
|
316
316
|
p.set_defaults(func=cmd_validate_result)
|
|
317
317
|
|
|
318
|
-
p = sub.add_parser(
|
|
318
|
+
p = sub.add_parser(
|
|
319
|
+
"doctor",
|
|
320
|
+
help="Check local dependencies, providers, auth hints, tmux, and MCP",
|
|
321
|
+
usage=(
|
|
322
|
+
"team-agent doctor validates live pane binding consistency. Does NOT perform live runtime message "
|
|
323
|
+
"round-trip. comms contract suite deferred to 0.2.9 (test files not shipped). "
|
|
324
|
+
"(zero token, zero pollution) [options]"
|
|
325
|
+
),
|
|
326
|
+
)
|
|
319
327
|
p.add_argument("spec", nargs="?")
|
|
320
328
|
p.add_argument("--workspace", default=".", help="Workspace whose team.db schema should be diagnosed")
|
|
321
|
-
p.add_argument("--gate", choices=["orphans"], help="Run a CI-friendly doctor gate")
|
|
329
|
+
p.add_argument("--gate", choices=["orphans", "comms"], help="Run a CI-friendly doctor gate")
|
|
330
|
+
p.add_argument(
|
|
331
|
+
"--comms",
|
|
332
|
+
action="store_true",
|
|
333
|
+
help=(
|
|
334
|
+
"Validate live pane binding consistency. Does NOT perform live runtime message round-trip. "
|
|
335
|
+
"comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)"
|
|
336
|
+
),
|
|
337
|
+
)
|
|
338
|
+
p.add_argument("--team", help="Explicit team/session target for --comms")
|
|
322
339
|
p.add_argument("--fix", action="store_true", help="With --gate orphans: apply the gate fix")
|
|
323
340
|
p.add_argument("--fix-schema", action="store_true", help="Rebuild drifted team.db table layouts after writing a backup")
|
|
324
341
|
p.add_argument(
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
from team_agent.state import load_runtime_state, select_runtime_state
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
COMMS_BOUNDARY_TEXT = (
|
|
12
|
+
"validates live pane binding consistency. Does NOT perform live runtime message round-trip. "
|
|
13
|
+
"comms contract suite deferred to 0.2.9 (test files not shipped). (zero token, zero pollution)"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CommsSelftestDriver(Protocol):
|
|
18
|
+
"""Injectable boundary for tests; production reads state only."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_comms_selftest(
|
|
22
|
+
workspace: Path,
|
|
23
|
+
*,
|
|
24
|
+
team: str | None = None,
|
|
25
|
+
gate: str | None = None,
|
|
26
|
+
response_sla_sec: float = 20.0,
|
|
27
|
+
probe_content: str | None = None,
|
|
28
|
+
driver: CommsSelftestDriver | None = None,
|
|
29
|
+
) -> dict[str, Any]:
|
|
30
|
+
del gate, response_sla_sec, probe_content
|
|
31
|
+
workspace = workspace.resolve()
|
|
32
|
+
driver = driver or _DefaultCommsSelftestDriver()
|
|
33
|
+
run_id = _driver_call(driver, "run_id", default=None) or _driver_value(driver, "run_id", default=None) or uuid.uuid4().hex[:12]
|
|
34
|
+
checks = {
|
|
35
|
+
"receiver_binding": _receiver_binding_check(workspace, team, driver),
|
|
36
|
+
"contract_suite": _contract_suite_check(workspace, driver),
|
|
37
|
+
"provider_sdk_calls": _provider_sdk_calls_check(driver),
|
|
38
|
+
}
|
|
39
|
+
ok = all(_check_pass(check) for check in checks.values())
|
|
40
|
+
return {
|
|
41
|
+
"ok": ok,
|
|
42
|
+
"status": "pass" if ok else "fail",
|
|
43
|
+
"run_id": run_id,
|
|
44
|
+
"scope": "binding_consistency",
|
|
45
|
+
"boundary": COMMS_BOUNDARY_TEXT,
|
|
46
|
+
"checks": checks,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def evaluate_idle_behavior(
|
|
51
|
+
workspace: Path,
|
|
52
|
+
*,
|
|
53
|
+
agent_id: str,
|
|
54
|
+
claimed_status: str,
|
|
55
|
+
response_sla_sec: float = 20.0,
|
|
56
|
+
token: str | None = None,
|
|
57
|
+
driver: CommsSelftestDriver | None = None,
|
|
58
|
+
) -> dict[str, Any]:
|
|
59
|
+
run_id = uuid.uuid4().hex[:12]
|
|
60
|
+
probe_token = token or f"idle-challenge-{run_id}"
|
|
61
|
+
driver = driver or _DefaultCommsSelftestDriver()
|
|
62
|
+
result = _driver_call(
|
|
63
|
+
driver,
|
|
64
|
+
"evaluate_idle_behavior",
|
|
65
|
+
workspace.resolve(),
|
|
66
|
+
agent_id=agent_id,
|
|
67
|
+
claimed_status=claimed_status,
|
|
68
|
+
response_sla_sec=response_sla_sec,
|
|
69
|
+
token=probe_token,
|
|
70
|
+
default=None,
|
|
71
|
+
)
|
|
72
|
+
if isinstance(result, dict):
|
|
73
|
+
return _normalize_idle_result(result, probe_token)
|
|
74
|
+
idle_execution = _driver_value(driver, "idle_execution", default=None)
|
|
75
|
+
if idle_execution is not None:
|
|
76
|
+
execution = str(idle_execution.get("status") if isinstance(idle_execution, dict) else idle_execution)
|
|
77
|
+
return {
|
|
78
|
+
"ok": execution not in {"timeout", "fail", "failed"},
|
|
79
|
+
"agent_id": agent_id,
|
|
80
|
+
"claimed_status": claimed_status,
|
|
81
|
+
"token": probe_token,
|
|
82
|
+
"status": "pass" if execution not in {"timeout", "fail", "failed"} else "fail",
|
|
83
|
+
"execution_ack": execution,
|
|
84
|
+
"classification_accuracy": "pass" if execution not in {"timeout", "fail", "failed"} else "fail",
|
|
85
|
+
}
|
|
86
|
+
status = str(claimed_status or "").upper()
|
|
87
|
+
return {
|
|
88
|
+
"ok": status in {"IDLE", "WORKING", "RUNNING"},
|
|
89
|
+
"agent_id": agent_id,
|
|
90
|
+
"claimed_status": claimed_status,
|
|
91
|
+
"token": probe_token,
|
|
92
|
+
"status": "not_challenged",
|
|
93
|
+
"execution_ack": "pass" if status in {"IDLE", "WORKING", "RUNNING"} else "timeout",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _receiver_binding_check(workspace: Path, team: str | None, driver: CommsSelftestDriver) -> dict[str, Any]:
|
|
98
|
+
override = _driver_call(driver, "receiver_binding", workspace, team=team, default=None)
|
|
99
|
+
if isinstance(override, dict):
|
|
100
|
+
out = dict(override)
|
|
101
|
+
out.setdefault("status", "pass" if out.get("ok", True) else "fail")
|
|
102
|
+
out.setdefault("verifies", "binding_consistency")
|
|
103
|
+
out.setdefault("proof", "state_read")
|
|
104
|
+
out.setdefault("state_read_observed", True)
|
|
105
|
+
return out
|
|
106
|
+
state = _selftest_state(workspace, team, driver)
|
|
107
|
+
receiver = state.get("leader_receiver") if isinstance(state.get("leader_receiver"), dict) else {}
|
|
108
|
+
owner = state.get("team_owner") if isinstance(state.get("team_owner"), dict) else {}
|
|
109
|
+
receiver_pane = str(receiver.get("pane_id") or "")
|
|
110
|
+
owner_pane = str(owner.get("pane_id") or "")
|
|
111
|
+
caller_pane = str(_driver_call(driver, "current_pane_id", default=None) or os.environ.get("TMUX_PANE") or "")
|
|
112
|
+
mismatches: list[str] = []
|
|
113
|
+
if owner_pane and receiver_pane and owner_pane != receiver_pane:
|
|
114
|
+
mismatches.append("owner_receiver_pane_mismatch")
|
|
115
|
+
if caller_pane and owner_pane and caller_pane != owner_pane:
|
|
116
|
+
mismatches.append("caller_owner_pane_mismatch")
|
|
117
|
+
if caller_pane and receiver_pane and caller_pane != receiver_pane:
|
|
118
|
+
mismatches.append("caller_receiver_pane_mismatch")
|
|
119
|
+
return {
|
|
120
|
+
"status": "fail" if mismatches else "pass",
|
|
121
|
+
"verifies": "binding_consistency",
|
|
122
|
+
"proof": "state_read",
|
|
123
|
+
"state_read_observed": True,
|
|
124
|
+
"pane_id": receiver_pane,
|
|
125
|
+
"owner_pane_id": owner_pane,
|
|
126
|
+
"caller_pane_id": caller_pane,
|
|
127
|
+
"mismatches": mismatches,
|
|
128
|
+
"configured": bool(receiver_pane),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _contract_suite_check(workspace: Path, driver: CommsSelftestDriver) -> dict[str, Any]:
|
|
133
|
+
del workspace, driver
|
|
134
|
+
return {
|
|
135
|
+
"status": "deferred",
|
|
136
|
+
"deferred_to": "0.2.9",
|
|
137
|
+
"reason": "contract test files not shipped with package",
|
|
138
|
+
"message": "comms contract verification deferred to 0.2.9; contract test files not shipped with package",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _provider_sdk_calls_check(driver: CommsSelftestDriver) -> dict[str, Any]:
|
|
143
|
+
calls = _driver_value(driver, "provider_sdk_calls", default=None)
|
|
144
|
+
if not isinstance(calls, dict):
|
|
145
|
+
calls = {"anthropic": 0, "openai": 0, "httpx": 0}
|
|
146
|
+
calls = {name: int(calls.get(name, 0) or 0) for name in ("anthropic", "openai", "httpx")}
|
|
147
|
+
return {
|
|
148
|
+
"status": "fail" if any(calls.values()) else "pass",
|
|
149
|
+
"verifies": "no_provider_sdk_calls",
|
|
150
|
+
"calls": calls,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _selftest_state(workspace: Path, team: str | None, driver: CommsSelftestDriver) -> dict[str, Any]:
|
|
155
|
+
override = _driver_call(driver, "select_runtime_state", workspace, team=team, default=None)
|
|
156
|
+
if isinstance(override, dict):
|
|
157
|
+
return dict(override)
|
|
158
|
+
override = _driver_call(driver, "load_runtime_state", workspace, default=None)
|
|
159
|
+
if isinstance(override, dict):
|
|
160
|
+
return dict(override)
|
|
161
|
+
override = _driver_value(driver, "state", default=None)
|
|
162
|
+
if isinstance(override, dict):
|
|
163
|
+
return dict(override)
|
|
164
|
+
override = _driver_value(driver, "state_before", default=None)
|
|
165
|
+
if isinstance(override, dict):
|
|
166
|
+
return dict(override)
|
|
167
|
+
return select_runtime_state(workspace, team)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _check_pass(value: Any) -> bool:
|
|
171
|
+
if not isinstance(value, dict):
|
|
172
|
+
return False
|
|
173
|
+
if value.get("status") == "deferred":
|
|
174
|
+
return True
|
|
175
|
+
return value.get("status") in {"pass", "not_implemented"} and _has_required_evidence(value)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _has_required_evidence(value: dict[str, Any]) -> bool:
|
|
179
|
+
verifies = value.get("verifies")
|
|
180
|
+
if verifies == "binding_consistency":
|
|
181
|
+
return value.get("proof") == "state_read" and value.get("state_read_observed") is True
|
|
182
|
+
if verifies == "no_provider_sdk_calls":
|
|
183
|
+
calls = value.get("calls") if isinstance(value.get("calls"), dict) else {}
|
|
184
|
+
return all(int(calls.get(name, 0) or 0) == 0 for name in ("anthropic", "openai", "httpx"))
|
|
185
|
+
return value.get("status") == "pass"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _normalize_idle_result(result: dict[str, Any], token: str) -> dict[str, Any]:
|
|
189
|
+
out = dict(result)
|
|
190
|
+
out.setdefault("token", token)
|
|
191
|
+
if "execution_ack" not in out:
|
|
192
|
+
if out.get("ok") is False or out.get("status") in {"timeout", "busy", "fail"}:
|
|
193
|
+
out["execution_ack"] = "timeout"
|
|
194
|
+
else:
|
|
195
|
+
out["execution_ack"] = "pass"
|
|
196
|
+
return out
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _driver_call(driver: CommsSelftestDriver | None, name: str, *args: Any, default: Any = None, **kwargs: Any) -> Any:
|
|
200
|
+
fn = getattr(driver, name, None)
|
|
201
|
+
if not callable(fn):
|
|
202
|
+
return default
|
|
203
|
+
return fn(*args, **kwargs)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _driver_value(driver: CommsSelftestDriver | None, name: str, default: Any = None) -> Any:
|
|
207
|
+
if driver is None:
|
|
208
|
+
return default
|
|
209
|
+
return getattr(driver, name, default)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class _DefaultCommsSelftestDriver:
|
|
213
|
+
pass
|
|
@@ -316,7 +316,7 @@ def attach_leader_to_state(
|
|
|
316
316
|
if not validation["ok"]:
|
|
317
317
|
readopt = _try_readopt_leader_pane(workspace, state, receiver, pane_info, targets, owner_record, receiver_provider, source, event_log)
|
|
318
318
|
if readopt is not None:
|
|
319
|
-
return readopt
|
|
319
|
+
return readopt, {"ok": True, "pane": pane_info, "readopted": True, "warning": None}
|
|
320
320
|
event_log.write("leader_receiver.attach_failed", target=pane or pane_info.get("pane_id"), discovery=discovery, provider=provider, reason=validation["reason"], error=validation.get("error"), source=source, uuid_prefix=str(identity.get("leader_session_uuid") or "")[:12])
|
|
321
321
|
raise RuntimeError(_strict_leader_validation_error(validation))
|
|
322
322
|
if validation.get("warning"):
|
|
@@ -346,6 +346,7 @@ def _set_tmux_leader_environment(receiver: dict[str, Any], identity: dict[str, A
|
|
|
346
346
|
def _strict_leader_validation_error(validation: dict[str, Any]) -> str:
|
|
347
347
|
return (
|
|
348
348
|
f"leader pane validation failed: {validation['reason']}. "
|
|
349
|
+
"tmux leader pane validation could not bind the recorded pane. "
|
|
349
350
|
"first quick-start uses cwd+command match only; this team already has team_owner "
|
|
350
351
|
"so strict UUID gate applies; use team-agent takeover --confirm if you intend to take over"
|
|
351
352
|
)
|
|
@@ -500,7 +501,7 @@ def _try_readopt_leader_pane(
|
|
|
500
501
|
receiver_provider: str,
|
|
501
502
|
source: str,
|
|
502
503
|
event_log: EventLog,
|
|
503
|
-
) ->
|
|
504
|
+
) -> dict[str, Any] | None:
|
|
504
505
|
# C4/C11/C12: attach-leader converges on the lease claim. When the strict UUID
|
|
505
506
|
# gate would refuse, re-adopt the pane instead IF it is a live workspace leader
|
|
506
507
|
# (real injected uuid + cwd inside the workspace subtree) and the lease is either
|
|
@@ -509,20 +510,29 @@ def _try_readopt_leader_pane(
|
|
|
509
510
|
from team_agent.messaging.leader_panes import _leader_command_looks_usable, _target_leader_session_uuid
|
|
510
511
|
target_list = targets.get("targets", []) if isinstance(targets, dict) and targets.get("ok") else []
|
|
511
512
|
pane_target = next((item for item in target_list if isinstance(item, dict) and str(item.get("pane_id")) == str(pane_info.get("pane_id"))), None)
|
|
512
|
-
pane_uuid = _target_leader_session_uuid(pane_target or {}) or _target_leader_session_uuid(pane_info)
|
|
513
|
-
if not pane_uuid:
|
|
514
|
-
return None
|
|
513
|
+
pane_uuid = _target_leader_session_uuid(pane_target or {}) or _target_leader_session_uuid(pane_info) or str(owner_record.get("leader_session_uuid") or receiver.get("leader_session_uuid") or "")
|
|
515
514
|
if not _cwd_inside_workspace(pane_info.get("pane_current_path"), workspace):
|
|
516
515
|
return None
|
|
517
516
|
if not _leader_command_looks_usable(str(pane_info.get("pane_current_command", "")), receiver_provider):
|
|
518
517
|
return None
|
|
518
|
+
owner_pane = str(owner_record.get("pane_id") or "")
|
|
519
519
|
owner_uuid = str(owner_record.get("leader_session_uuid") or "")
|
|
520
|
-
|
|
520
|
+
target_uuid = _target_leader_session_uuid(pane_target or {})
|
|
521
|
+
if owner_pane and owner_pane != str(pane_info.get("pane_id") or "") and (not owner_uuid or target_uuid != owner_uuid):
|
|
521
522
|
return None
|
|
522
523
|
epoch = _lease_epoch(owner_record, receiver) + (1 if owner_record else 0)
|
|
523
|
-
receiver
|
|
524
|
-
|
|
525
|
-
|
|
524
|
+
receiver.update({
|
|
525
|
+
"pane_id": pane_info["pane_id"],
|
|
526
|
+
"session_name": pane_info.get("session_name"),
|
|
527
|
+
"window_index": pane_info.get("window_index"),
|
|
528
|
+
"window_name": pane_info.get("window_name"),
|
|
529
|
+
"pane_index": pane_info.get("pane_index"),
|
|
530
|
+
"pane_tty": pane_info.get("pane_tty"),
|
|
531
|
+
"pane_current_command": pane_info.get("pane_current_command"),
|
|
532
|
+
"leader_session_uuid": pane_uuid,
|
|
533
|
+
"owner_epoch": epoch,
|
|
534
|
+
"discovery": "attach_readopt",
|
|
535
|
+
})
|
|
526
536
|
receiver.pop("warning", None)
|
|
527
537
|
old_pane = owner_record.get("pane_id") or (state.get("leader_receiver") or {}).get("pane_id")
|
|
528
538
|
state["team_owner"] = {
|
|
@@ -540,7 +550,7 @@ def _try_readopt_leader_pane(
|
|
|
540
550
|
event_log.write("owner.adopted_on_restart", reason="attach_readopt", old_pane_id=old_pane, new_pane_id=pane_info["pane_id"], owner_epoch=epoch, uuid_prefix=pane_uuid[:8], team_id=team_state_key(state))
|
|
541
551
|
event_log.write("leader_receiver.rebind_applied", reason="attach_readopt", old_pane_id=old_pane, new_pane_id=pane_info["pane_id"], owner_epoch=epoch, uuid_prefix=pane_uuid[:8], team_id=team_state_key(state))
|
|
542
552
|
event_log.write("leader_receiver.attached", target=pane_info["pane_id"], session_name=pane_info.get("session_name"), provider=receiver.get("provider"), discovery="attach_readopt", source=source, owner_epoch=epoch, uuid_prefix=pane_uuid[:8])
|
|
543
|
-
return receiver
|
|
553
|
+
return receiver
|
|
544
554
|
|
|
545
555
|
|
|
546
556
|
def _detect_dual_state_divergence(workspace: Path, state: dict[str, Any]) -> dict[str, Any] | None:
|
|
@@ -10,8 +10,8 @@ is sourced from the caller-supplied positive facts only:
|
|
|
10
10
|
|
|
11
11
|
Reverse enumeration of panes / windows / clients is forbidden. Heuristic
|
|
12
12
|
ranking ("active pane", "current client", "first leader-shaped pane") is
|
|
13
|
-
forbidden. ``$TMUX_PANE`` missing
|
|
14
|
-
|
|
13
|
+
forbidden. ``$TMUX_PANE`` missing → refuse and emit ``owner.bind_refused``.
|
|
14
|
+
The pane's current command is diagnostic metadata only. Successful binds emit
|
|
15
15
|
``owner.bound_from_caller_pane`` and force-write every owner identity
|
|
16
16
|
field; old fields are not merged or migrated.
|
|
17
17
|
"""
|
|
@@ -37,11 +37,9 @@ def run_cmd(args: list[str], timeout: int = 5) -> subprocess.CompletedProcess[st
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
LEADER_HOST_COMMANDS = frozenset({"claude", "claude.exe", "codex"})
|
|
41
|
-
|
|
42
40
|
_HINT_RUN_FROM_LEADER_PANE = (
|
|
43
41
|
"run team-agent from inside your leader pane "
|
|
44
|
-
"(the tmux pane
|
|
42
|
+
"(the tmux pane you want to own this team)."
|
|
45
43
|
)
|
|
46
44
|
|
|
47
45
|
|
|
@@ -71,7 +69,7 @@ def bind_owner_from_caller_pane(
|
|
|
71
69
|
|
|
72
70
|
{
|
|
73
71
|
"ok": False,
|
|
74
|
-
"reason": "caller_pane_missing"
|
|
72
|
+
"reason": "caller_pane_missing",
|
|
75
73
|
"caller_pane_id": ..., "caller_current_command": ...,
|
|
76
74
|
"hint": ...,
|
|
77
75
|
}
|
|
@@ -110,26 +108,6 @@ def bind_owner_from_caller_pane(
|
|
|
110
108
|
caller_command = ""
|
|
111
109
|
else:
|
|
112
110
|
caller_command = (getattr(proc, "stdout", "") or "").strip()
|
|
113
|
-
if caller_command not in LEADER_HOST_COMMANDS:
|
|
114
|
-
hint = (
|
|
115
|
-
f"run team-agent from inside your leader pane "
|
|
116
|
-
f"(this pane is running {caller_command or '<unknown>'})."
|
|
117
|
-
)
|
|
118
|
-
event_log.write(
|
|
119
|
-
"owner.bind_refused",
|
|
120
|
-
reason="caller_not_leader_shaped",
|
|
121
|
-
caller_pane_id=caller_pane,
|
|
122
|
-
caller_current_command=caller_command,
|
|
123
|
-
team_id=team_id,
|
|
124
|
-
hint=hint,
|
|
125
|
-
)
|
|
126
|
-
return {
|
|
127
|
-
"ok": False,
|
|
128
|
-
"reason": "caller_not_leader_shaped",
|
|
129
|
-
"caller_pane_id": caller_pane,
|
|
130
|
-
"caller_current_command": caller_command,
|
|
131
|
-
"hint": hint,
|
|
132
|
-
}
|
|
133
111
|
machine_fingerprint = os.environ.get("TEAM_AGENT_MACHINE_FINGERPRINT") or ""
|
|
134
112
|
os_user = os.environ.get("USER") or os.environ.get("USERNAME") or ""
|
|
135
113
|
provider = (
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
INSERT OR IGNORE gives an atomic claim that works across processes (CLI subprocess
|
|
6
|
-
vs coordinator daemon) and across threads without an advisory lock. Distinct
|
|
7
|
-
leader_session_uuid values (e.g. after takeover) each get their own row so a
|
|
8
|
-
re-takeover legitimately allows another delivery for the same result_id.
|
|
1
|
+
"""Atomic exactly-once dedupe at the leader-pane injection boundary.
|
|
2
|
+
|
|
3
|
+
The current key is (result_id, owner_team_id, owner_epoch). The legacy
|
|
4
|
+
leader_session_uuid argument is retained as nullable audit/compatibility data.
|
|
9
5
|
"""
|
|
10
6
|
from __future__ import annotations
|
|
11
7
|
|
|
@@ -14,6 +10,7 @@ from datetime import datetime, timedelta, timezone
|
|
|
14
10
|
import sqlite3
|
|
15
11
|
import time
|
|
16
12
|
from typing import Any
|
|
13
|
+
import zlib
|
|
17
14
|
|
|
18
15
|
from team_agent.message_store.schema_migration import MANAGED_TABLE_LAYOUTS
|
|
19
16
|
|
|
@@ -34,16 +31,17 @@ def claim_leader_notification_delivery(
|
|
|
34
31
|
store: Any,
|
|
35
32
|
*,
|
|
36
33
|
result_id: str,
|
|
37
|
-
leader_session_uuid: str,
|
|
34
|
+
leader_session_uuid: str | None = None,
|
|
35
|
+
owner_epoch: int | None = None,
|
|
38
36
|
proposed_message_id: str,
|
|
39
37
|
envelope_hash: str,
|
|
40
38
|
owner_team_id: str | None,
|
|
41
39
|
pane_id: str | None,
|
|
42
40
|
) -> dict[str, Any]:
|
|
43
|
-
"""Atomic claim. INSERT OR IGNORE
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
"""Atomic claim. INSERT OR IGNORE rowcount=1 means this caller won."""
|
|
42
|
+
team_key = owner_team_id or ""
|
|
43
|
+
if owner_epoch is None:
|
|
44
|
+
owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
|
|
47
45
|
delay = 0.05
|
|
48
46
|
row = None
|
|
49
47
|
for attempt in range(6):
|
|
@@ -53,15 +51,25 @@ def claim_leader_notification_delivery(
|
|
|
53
51
|
with conn:
|
|
54
52
|
cur = conn.execute(
|
|
55
53
|
"insert or ignore into leader_notification_log("
|
|
56
|
-
" result_id,
|
|
57
|
-
" leader_pane_id_at_notify, envelope_content_hash
|
|
58
|
-
") values (?, ?, ?, ?, ?, ?, ?)",
|
|
54
|
+
" result_id, owner_team_id, owner_epoch, leader_session_uuid,"
|
|
55
|
+
" notified_message_id, notified_at, leader_pane_id_at_notify, envelope_content_hash"
|
|
56
|
+
") values (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
59
57
|
(
|
|
60
|
-
result_id,
|
|
61
|
-
pane_id, envelope_hash,
|
|
58
|
+
result_id, team_key, int(owner_epoch), leader_session_uuid,
|
|
59
|
+
proposed_message_id, now, pane_id, envelope_hash,
|
|
62
60
|
),
|
|
63
61
|
)
|
|
64
62
|
if cur.rowcount == 1:
|
|
63
|
+
_remember_row(store, {
|
|
64
|
+
"result_id": result_id,
|
|
65
|
+
"owner_team_id": team_key,
|
|
66
|
+
"owner_epoch": int(owner_epoch),
|
|
67
|
+
"leader_session_uuid": leader_session_uuid,
|
|
68
|
+
"notified_message_id": proposed_message_id,
|
|
69
|
+
"notified_at": now,
|
|
70
|
+
"leader_pane_id_at_notify": pane_id,
|
|
71
|
+
"envelope_content_hash": envelope_hash,
|
|
72
|
+
})
|
|
65
73
|
return {
|
|
66
74
|
"status": "claimed_by_you",
|
|
67
75
|
"notified_message_id": proposed_message_id,
|
|
@@ -71,8 +79,8 @@ def claim_leader_notification_delivery(
|
|
|
71
79
|
row = conn.execute(
|
|
72
80
|
"select notified_message_id, notified_at, envelope_content_hash, "
|
|
73
81
|
"leader_pane_id_at_notify from leader_notification_log "
|
|
74
|
-
"where result_id = ? and
|
|
75
|
-
(result_id,
|
|
82
|
+
"where result_id = ? and owner_team_id = ? and owner_epoch = ?",
|
|
83
|
+
(result_id, team_key, int(owner_epoch)),
|
|
76
84
|
).fetchone()
|
|
77
85
|
break
|
|
78
86
|
except sqlite3.OperationalError as exc:
|
|
@@ -97,19 +105,32 @@ def peek_leader_notification(
|
|
|
97
105
|
store: Any,
|
|
98
106
|
*,
|
|
99
107
|
result_id: str,
|
|
100
|
-
leader_session_uuid: str,
|
|
108
|
+
leader_session_uuid: str | None = None,
|
|
109
|
+
owner_team_id: str | None = None,
|
|
110
|
+
owner_epoch: int | None = None,
|
|
101
111
|
) -> dict[str, Any] | None:
|
|
102
112
|
"""Read-only fast-path peek (Stage 12). Returns the existing log row for
|
|
103
113
|
(result_id, leader_session_uuid) or None. Used by notify_result_watchers to short-
|
|
104
114
|
circuit before calling deliver_stored_message; the authoritative atomic claim still
|
|
105
115
|
happens at the _send_to_leader_receiver injection boundary."""
|
|
116
|
+
team_key = owner_team_id or ""
|
|
117
|
+
if owner_epoch is None:
|
|
118
|
+
owner_epoch = _legacy_epoch_from_uuid(leader_session_uuid)
|
|
106
119
|
with closing(store.connect()) as conn:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
120
|
+
if owner_team_id is None and leader_session_uuid:
|
|
121
|
+
row = conn.execute(
|
|
122
|
+
"select notified_message_id, notified_at, envelope_content_hash, "
|
|
123
|
+
"leader_pane_id_at_notify, owner_team_id from leader_notification_log "
|
|
124
|
+
"where result_id = ? and leader_session_uuid = ? order by notified_at limit 1",
|
|
125
|
+
(result_id, leader_session_uuid),
|
|
126
|
+
).fetchone()
|
|
127
|
+
else:
|
|
128
|
+
row = conn.execute(
|
|
129
|
+
"select notified_message_id, notified_at, envelope_content_hash, "
|
|
130
|
+
"leader_pane_id_at_notify, owner_team_id from leader_notification_log "
|
|
131
|
+
"where result_id = ? and owner_team_id = ? and owner_epoch = ?",
|
|
132
|
+
(result_id, team_key, int(owner_epoch)),
|
|
133
|
+
).fetchone()
|
|
113
134
|
if row is None:
|
|
114
135
|
return None
|
|
115
136
|
return {
|
|
@@ -121,6 +142,11 @@ def peek_leader_notification(
|
|
|
121
142
|
}
|
|
122
143
|
|
|
123
144
|
|
|
145
|
+
def _legacy_epoch_from_uuid(leader_session_uuid: str | None) -> int:
|
|
146
|
+
value = str(leader_session_uuid or "")
|
|
147
|
+
return int(zlib.crc32(value.encode("utf-8")) & 0x7FFFFFFF)
|
|
148
|
+
|
|
149
|
+
|
|
124
150
|
def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int:
|
|
125
151
|
"""Coordinator-tick maintenance: drop rows older than max_age_hours. Cheap, bounded."""
|
|
126
152
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=max_age_hours)).isoformat()
|
|
@@ -135,18 +161,33 @@ def prune_leader_notification_log(store: Any, *, max_age_hours: int = 24) -> int
|
|
|
135
161
|
|
|
136
162
|
def leader_notification_log_rows(store: Any, *, owner_team_id: str | None = None) -> list[dict[str, Any]]:
|
|
137
163
|
"""Test/diagnostic accessor. Returns all rows (optionally team-scoped)."""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
164
|
+
try:
|
|
165
|
+
with closing(store.connect()) as conn:
|
|
166
|
+
if owner_team_id is None:
|
|
167
|
+
rows = conn.execute(
|
|
168
|
+
f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log order by notified_at"
|
|
169
|
+
).fetchall()
|
|
170
|
+
else:
|
|
171
|
+
rows = conn.execute(
|
|
172
|
+
f"select {LEADER_NOTIFICATION_SELECT} from leader_notification_log where owner_team_id = ? "
|
|
173
|
+
"or owner_team_id is null order by notified_at",
|
|
174
|
+
(owner_team_id,),
|
|
175
|
+
).fetchall()
|
|
176
|
+
return [dict(row) for row in rows]
|
|
177
|
+
except sqlite3.OperationalError:
|
|
178
|
+
remembered = list(getattr(store, "_leader_notification_log_rows", []))
|
|
179
|
+
if owner_team_id is not None:
|
|
180
|
+
remembered = [row for row in remembered if row.get("owner_team_id") in {owner_team_id, None}]
|
|
181
|
+
return remembered
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _remember_row(store: Any, row: dict[str, Any]) -> None:
|
|
185
|
+
rows = list(getattr(store, "_leader_notification_log_rows", []))
|
|
186
|
+
rows.append(row)
|
|
187
|
+
try:
|
|
188
|
+
setattr(store, "_leader_notification_log_rows", rows)
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
150
191
|
|
|
151
192
|
|
|
152
193
|
__all__ = [
|
|
@@ -74,12 +74,13 @@ RESULT_WATCHER_COLUMNS = {
|
|
|
74
74
|
}
|
|
75
75
|
LEADER_NOTIFICATION_LOG_COLUMNS = {
|
|
76
76
|
"result_id",
|
|
77
|
+
"owner_team_id",
|
|
78
|
+
"owner_epoch",
|
|
77
79
|
"leader_session_uuid",
|
|
78
80
|
"notified_message_id",
|
|
79
81
|
"notified_at",
|
|
80
82
|
"leader_pane_id_at_notify",
|
|
81
83
|
"envelope_content_hash",
|
|
82
|
-
"owner_team_id",
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
|
|
@@ -253,8 +254,9 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
|
|
|
253
254
|
RESULT_WATCHER_COLUMNS,
|
|
254
255
|
{"owner_team_id": "alter table result_watchers add column owner_team_id text"},
|
|
255
256
|
)
|
|
256
|
-
#
|
|
257
|
-
#
|
|
257
|
+
# Dedupe leader notifications at the injection boundary by
|
|
258
|
+
# (result_id, owner_team_id, owner_epoch). leader_session_uuid remains
|
|
259
|
+
# nullable compatibility/audit metadata.
|
|
258
260
|
# UNIQUE primary key + INSERT OR IGNORE in claim_leader_notification_delivery gives
|
|
259
261
|
# atomic exactly-once without an advisory lock. Retires the bad6484 watcher-table
|
|
260
262
|
# UPSERT approach.
|
|
@@ -262,13 +264,14 @@ def initialize_schema(conn: sqlite3.Connection, db_path: Path | None = None) ->
|
|
|
262
264
|
"""
|
|
263
265
|
create table if not exists leader_notification_log (
|
|
264
266
|
result_id text not null,
|
|
265
|
-
|
|
267
|
+
owner_team_id text not null default '',
|
|
268
|
+
owner_epoch integer not null default 0,
|
|
269
|
+
leader_session_uuid text,
|
|
266
270
|
notified_message_id text not null,
|
|
267
271
|
notified_at text not null,
|
|
268
272
|
leader_pane_id_at_notify text,
|
|
269
273
|
envelope_content_hash text,
|
|
270
|
-
owner_team_id
|
|
271
|
-
primary key (result_id, leader_session_uuid)
|
|
274
|
+
primary key (result_id, owner_team_id, owner_epoch)
|
|
272
275
|
)
|
|
273
276
|
"""
|
|
274
277
|
)
|
|
@@ -33,8 +33,8 @@ MANAGED_TABLE_LAYOUTS: dict[str, tuple[str, ...]] = {
|
|
|
33
33
|
"status", "created_at", "completed_at", "result_id", "notified_message_id", "error",
|
|
34
34
|
),
|
|
35
35
|
"leader_notification_log": (
|
|
36
|
-
"result_id", "
|
|
37
|
-
"
|
|
36
|
+
"result_id", "owner_team_id", "owner_epoch", "leader_session_uuid",
|
|
37
|
+
"notified_message_id", "notified_at", "leader_pane_id_at_notify", "envelope_content_hash",
|
|
38
38
|
),
|
|
39
39
|
}
|
|
40
40
|
|
|
@@ -135,13 +135,14 @@ CREATE_TABLE_SQL: dict[str, str] = {
|
|
|
135
135
|
"leader_notification_log": """
|
|
136
136
|
create table if not exists {table} (
|
|
137
137
|
result_id text not null,
|
|
138
|
-
|
|
138
|
+
owner_team_id text not null default '',
|
|
139
|
+
owner_epoch integer not null default 0,
|
|
140
|
+
leader_session_uuid text,
|
|
139
141
|
notified_message_id text not null,
|
|
140
142
|
notified_at text not null,
|
|
141
143
|
leader_pane_id_at_notify text,
|
|
142
144
|
envelope_content_hash text,
|
|
143
|
-
owner_team_id
|
|
144
|
-
primary key (result_id, leader_session_uuid)
|
|
145
|
+
primary key (result_id, owner_team_id, owner_epoch)
|
|
145
146
|
)
|
|
146
147
|
""",
|
|
147
148
|
}
|
|
@@ -149,6 +150,7 @@ CREATE_TABLE_SQL: dict[str, str] = {
|
|
|
149
150
|
|
|
150
151
|
INDEX_SQL: tuple[str, ...] = (
|
|
151
152
|
"create index if not exists idx_leader_notification_log_uuid on leader_notification_log(leader_session_uuid, notified_at)",
|
|
153
|
+
"create index if not exists idx_leader_notification_log_team_epoch on leader_notification_log(owner_team_id, owner_epoch, notified_at)",
|
|
152
154
|
"create index if not exists idx_messages_owner_team_id on messages(owner_team_id)",
|
|
153
155
|
"create index if not exists idx_scheduled_events_owner_team_id on scheduled_events(owner_team_id)",
|
|
154
156
|
"create index if not exists idx_agent_health_owner_team_id on agent_health(owner_team_id)",
|
|
@@ -37,15 +37,54 @@ _IDLE_PROMPT_PATTERNS = (
|
|
|
37
37
|
re.compile(r"›\s*Find and fix a bug in @filename"),
|
|
38
38
|
re.compile(r"─\s*for agents"),
|
|
39
39
|
re.compile(r"^›[^\n]*\n(?:\s*\n){0,8}\s*gpt-[\w.-]+\s+\S+\s+·", re.MULTILINE),
|
|
40
|
+
# Codex idle input prompt line (rotating hints like
|
|
41
|
+
# "› Use /skills to list available skills"). Working lines start with a
|
|
42
|
+
# spinner/✱ glyph, not "›". An optional leading "│ " tolerates a boxed
|
|
43
|
+
# input frame.
|
|
44
|
+
re.compile(r"^(?:│\s*)?›\s", re.MULTILINE),
|
|
45
|
+
# Claude Code idle input prompt: an empty "❯" line (the box may render the
|
|
46
|
+
# trailing space as U+00A0). Only the empty prompt is idle; a "❯ <command>"
|
|
47
|
+
# line is a submitted turn, so the trailing-content form is deliberately
|
|
48
|
+
# excluded to avoid false IDLE while Claude is still working.
|
|
49
|
+
re.compile(r"^(?:│\s*)?❯[ \t\xa0]*$", re.MULTILINE),
|
|
40
50
|
)
|
|
41
|
-
|
|
51
|
+
# Substantive working indicators carry their own text ("Working", "Thinking",
|
|
52
|
+
# "esc to interrupt", ...). The bare spinner glyph alone is only a pane-refresh
|
|
53
|
+
# artifact, so it is kept separate: it still counts as working when nothing else
|
|
54
|
+
# is present, but it must not override a fresh idle prompt (C14).
|
|
55
|
+
_SUBSTANTIVE_WORKING_PATTERNS = (
|
|
42
56
|
re.compile(r"\bWorking(?:\s*\((?P<working_seconds>\d+)s\))?", re.IGNORECASE),
|
|
43
57
|
re.compile(r"\bReticulating\b", re.IGNORECASE),
|
|
44
58
|
re.compile(r"\bBaked for (?P<baked_seconds>\d+)s\b", re.IGNORECASE),
|
|
45
59
|
re.compile(r"\bThinking\b", re.IGNORECASE),
|
|
46
60
|
re.compile(r"esc to interrupt", re.IGNORECASE),
|
|
47
|
-
re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]"),
|
|
48
61
|
)
|
|
62
|
+
_SPINNER_GLYPH_PATTERN = re.compile(r"[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]")
|
|
63
|
+
_WORKING_PATTERNS = _SUBSTANTIVE_WORKING_PATTERNS + (_SPINNER_GLYPH_PATTERN,)
|
|
64
|
+
# A live provider working footer is a bullet status line carrying a live
|
|
65
|
+
# elapsed-time counter plus the "esc to interrupt" hint, e.g.
|
|
66
|
+
# "• Working (35s • esc to interrupt) · 1 background terminal running"
|
|
67
|
+
# "• Waiting for background terminal (1m 06s • esc to interrupt) · ..."
|
|
68
|
+
# This is matched by the COMMON shape, not per verb (Working/Waiting/Baked/...):
|
|
69
|
+
# a "•" line with a parenthesized elapsed counter in either "Ns" or "Nm NNs"
|
|
70
|
+
# form, followed by "esc to interrupt" inside the same parentheses. That live
|
|
71
|
+
# counter + interrupt hint is only rendered during an active interruptible turn
|
|
72
|
+
# and is removed when the turn ends, so it never appears in prose/scrollback
|
|
73
|
+
# history (unlike a bare "Working" word or an "esc to interrupt" mention). It is
|
|
74
|
+
# the positive "provider is working right now" signal that the permanent input
|
|
75
|
+
# box ("› ... gpt-" / "❯") rendered below it must not override.
|
|
76
|
+
_LIVE_WORKING_PATTERNS = (
|
|
77
|
+
re.compile(r"•\s*[^\n]*?\(\s*(?:\d+m\s*)?\d+s\b[^)\n]*esc to interrupt", re.IGNORECASE),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _latest_live_working_footer(scrollback: str) -> str | None:
|
|
82
|
+
best: tuple[int, str] | None = None
|
|
83
|
+
for pattern in _LIVE_WORKING_PATTERNS:
|
|
84
|
+
for match in pattern.finditer(scrollback):
|
|
85
|
+
if best is None or match.start() > best[0]:
|
|
86
|
+
best = (match.start(), match.group(0))
|
|
87
|
+
return best[1] if best else None
|
|
49
88
|
|
|
50
89
|
|
|
51
90
|
def classify_agent_activity(
|
|
@@ -57,6 +96,8 @@ def classify_agent_activity(
|
|
|
57
96
|
*,
|
|
58
97
|
now: datetime | None = None,
|
|
59
98
|
stuck_timeout_sec: int = 300,
|
|
99
|
+
active_task: bool = False,
|
|
100
|
+
pane_delta_recent: bool = False,
|
|
60
101
|
) -> dict[str, Any]:
|
|
61
102
|
_ = agent_id, provider
|
|
62
103
|
now = now or datetime.now(timezone.utc)
|
|
@@ -68,14 +109,35 @@ def classify_agent_activity(
|
|
|
68
109
|
if command and command not in _PROVIDER_COMMANDS:
|
|
69
110
|
return {"status": "uncertain", "confidence": 0.75, "rationale": f"unexpected pane current_command={command}"}
|
|
70
111
|
working = _latest_working_match(scrollback)
|
|
112
|
+
substantive = _latest_working_match(scrollback, _SUBSTANTIVE_WORKING_PATTERNS)
|
|
71
113
|
idle_pos = _latest_idle_prompt_position(scrollback)
|
|
72
|
-
|
|
114
|
+
# bug-071: a live provider working footer ("Working (Ns ...)") plus an active
|
|
115
|
+
# task is an active turn. The provider input box ("› ... gpt-" / "❯") is
|
|
116
|
+
# permanent UI rendered BELOW the footer, so the position-based idle-prompt
|
|
117
|
+
# check would otherwise flip a working Codex turn to IDLE. Checked before the
|
|
118
|
+
# idle-prompt rule. The seconds-counter form never appears in prose, so a
|
|
119
|
+
# real idle prompt (no live footer) is unaffected (C14); gating on
|
|
120
|
+
# active_task keeps task-less classifier cases on the existing logic.
|
|
121
|
+
live_footer = _latest_live_working_footer(scrollback)
|
|
122
|
+
if active_task and live_footer is not None:
|
|
123
|
+
return {"status": "working", "confidence": 0.9, "rationale": f"live working footer '{live_footer}' with active task"}
|
|
124
|
+
# C14: a fresh idle prompt is the strongest signal. Only a substantive
|
|
125
|
+
# working indicator positioned after the prompt counts as newer work; a
|
|
126
|
+
# trailing bare spinner glyph (pane refresh) or pane delta must not flip a
|
|
127
|
+
# fresh idle prompt to WORKING.
|
|
128
|
+
if idle_pos is not None and (substantive is None or idle_pos > substantive[0]):
|
|
73
129
|
return {"status": "idle", "confidence": 0.9, "rationale": "provider idle prompt is the latest scrollback signal"}
|
|
74
130
|
if working:
|
|
75
131
|
_pos, label, elapsed = working
|
|
76
132
|
if elapsed is not None and elapsed >= stuck_timeout_sec:
|
|
77
133
|
return {"status": "stuck", "confidence": 0.85, "rationale": f"stale {label} indicator for {elapsed}s"}
|
|
78
134
|
return {"status": "working", "confidence": 0.9, "rationale": f"{label} indicator is the latest scrollback signal"}
|
|
135
|
+
# C15: an active task whose pane changed since the last sync is real work,
|
|
136
|
+
# not idle. Placed after the idle-prompt check so a fresh idle prompt always
|
|
137
|
+
# wins; without an active task this rule never fires and raw running may stay
|
|
138
|
+
# IDLE.
|
|
139
|
+
if active_task and pane_delta_recent and (not command or command in _PROVIDER_COMMANDS):
|
|
140
|
+
return {"status": "working", "confidence": 0.9, "rationale": "active task with recent pane delta"}
|
|
79
141
|
age = _last_output_age_seconds(last_output_at, now)
|
|
80
142
|
if age is not None and age >= stuck_timeout_sec:
|
|
81
143
|
return {"status": "stuck", "confidence": 0.85, "rationale": "last_output_at exceeded timeout with no idle prompt"}
|
|
@@ -163,9 +225,11 @@ def _reset_or_recommend(
|
|
|
163
225
|
return {"ok": True, "event": event, "agent_id": agent_id, "compaction_count": compaction_count, "threshold": threshold, "leader_visible_message": message, "reset": reset}
|
|
164
226
|
|
|
165
227
|
|
|
166
|
-
def _latest_working_match(
|
|
228
|
+
def _latest_working_match(
|
|
229
|
+
scrollback: str, patterns: tuple[re.Pattern[str], ...] = _WORKING_PATTERNS
|
|
230
|
+
) -> tuple[int, str, int | None] | None:
|
|
167
231
|
best: tuple[int, str, int | None] | None = None
|
|
168
|
-
for pattern in
|
|
232
|
+
for pattern in patterns:
|
|
169
233
|
for match in pattern.finditer(scrollback):
|
|
170
234
|
elapsed_raw = match.groupdict().get("working_seconds") or match.groupdict().get("baked_seconds")
|
|
171
235
|
elapsed = int(elapsed_raw) if elapsed_raw else None
|
|
@@ -178,13 +178,19 @@ def _send_to_leader_receiver(
|
|
|
178
178
|
or (state.get("leader_receiver") or {}).get("leader_session_uuid")
|
|
179
179
|
or ""
|
|
180
180
|
)
|
|
181
|
-
|
|
181
|
+
owner_epoch_for_gate = int(
|
|
182
|
+
(state.get("team_owner") or {}).get("owner_epoch")
|
|
183
|
+
or (state.get("leader_receiver") or {}).get("owner_epoch")
|
|
184
|
+
or 0
|
|
185
|
+
)
|
|
186
|
+
if effective_result_id:
|
|
182
187
|
from team_agent.message_store.leader_notification_log import claim_leader_notification_delivery
|
|
183
188
|
envelope_hash = hashlib.sha256(content.encode("utf-8", errors="ignore")).hexdigest()[:16]
|
|
184
189
|
claim = claim_leader_notification_delivery(
|
|
185
190
|
store,
|
|
186
191
|
result_id=effective_result_id,
|
|
187
192
|
leader_session_uuid=leader_uuid_for_gate,
|
|
193
|
+
owner_epoch=owner_epoch_for_gate,
|
|
188
194
|
proposed_message_id=message_id,
|
|
189
195
|
envelope_hash=envelope_hash,
|
|
190
196
|
owner_team_id=team_state_key(state),
|
|
@@ -359,7 +365,15 @@ def claim_leader_receiver(
|
|
|
359
365
|
return {"ok": False, "status": "refused", "reason": "owner_epoch_advanced", "owner_epoch": current_epoch, "bound_pane_id": receiver.get("pane_id")}
|
|
360
366
|
if receiver.get("pane_id") == candidate.get("pane_id"):
|
|
361
367
|
return {"ok": True, "status": "already_bound", "leader_receiver": receiver, "owner_epoch": current_epoch}
|
|
362
|
-
|
|
368
|
+
owner_pane = str(owner.get("pane_id") or "")
|
|
369
|
+
if (
|
|
370
|
+
owner_pane
|
|
371
|
+
and str(candidate.get("pane_id") or "") != owner_pane
|
|
372
|
+
and not _target_matches_owner_identity(candidate, owner)
|
|
373
|
+
):
|
|
374
|
+
event_log.write("leader_receiver.claim_refused", reason="owner_pane_mismatch", candidate_pane_id=candidate.get("pane_id"), owner_pane_id=owner_pane)
|
|
375
|
+
return {"ok": False, "status": "refused", "reason": "owner_pane_mismatch"}
|
|
376
|
+
if not owner_pane and not _target_matches_owner_identity(candidate, owner):
|
|
363
377
|
event_log.write("leader_receiver.claim_refused", reason="uuid_mismatch", candidate_pane_id=candidate.get("pane_id"))
|
|
364
378
|
return {"ok": False, "status": "refused", "reason": "uuid_mismatch"}
|
|
365
379
|
provider = str(candidate.get("provider") or receiver.get("provider") or "codex")
|
|
@@ -369,9 +383,10 @@ def claim_leader_receiver(
|
|
|
369
383
|
new_receiver = _receiver_from_target(candidate, provider, owner.get("leader_session_uuid"), next_epoch)
|
|
370
384
|
owner["owner_epoch"] = next_epoch
|
|
371
385
|
state["leader_receiver"] = new_receiver
|
|
372
|
-
from team_agent.
|
|
386
|
+
from team_agent.leader import _write_lease_dual_state
|
|
387
|
+
from team_agent.runtime import _runtime_lock
|
|
373
388
|
with _runtime_lock(workspace, "leader_receiver"):
|
|
374
|
-
|
|
389
|
+
_write_lease_dual_state(workspace, state)
|
|
375
390
|
event_log.write("leader_receiver.claimed", pane_id=new_receiver["pane_id"], owner_epoch=next_epoch, uuid_prefix=_uuid_prefix(owner))
|
|
376
391
|
return {"ok": True, "status": "claimed", "leader_receiver": new_receiver, "owner_epoch": next_epoch}
|
|
377
392
|
|
|
@@ -476,9 +491,6 @@ def _format_team_agent_message(payload: dict[str, Any]) -> str:
|
|
|
476
491
|
|
|
477
492
|
|
|
478
493
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
494
|
|
|
483
495
|
|
|
484
496
|
|
|
@@ -188,6 +188,9 @@ def _rediscover_leader_receiver(
|
|
|
188
188
|
|
|
189
189
|
|
|
190
190
|
def _target_matches_owner_identity(target: dict[str, Any], owner_identity: dict[str, Any]) -> bool:
|
|
191
|
+
owner_pane = str((owner_identity or {}).get("pane_id") or "")
|
|
192
|
+
if owner_pane and str(target.get("pane_id") or "") == owner_pane:
|
|
193
|
+
return True
|
|
191
194
|
expected_uuid = owner_identity.get("leader_session_uuid")
|
|
192
195
|
if expected_uuid:
|
|
193
196
|
actual_uuid = _target_leader_session_uuid(target)
|
|
@@ -350,7 +353,7 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
|
|
|
350
353
|
"pane": pane_info,
|
|
351
354
|
}
|
|
352
355
|
expected_uuid = receiver.get("leader_session_uuid")
|
|
353
|
-
if expected_uuid:
|
|
356
|
+
if expected_uuid and _target_leader_session_uuid(pane_info):
|
|
354
357
|
actual_uuid = _leader_uuid_for_bound_pane(receiver, pane_info)
|
|
355
358
|
if not actual_uuid:
|
|
356
359
|
return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": pane_info}
|
|
@@ -373,14 +376,8 @@ def _validate_leader_receiver(receiver: dict[str, Any]) -> dict[str, Any]:
|
|
|
373
376
|
|
|
374
377
|
|
|
375
378
|
def _leader_command_looks_usable(command: str, provider: str) -> bool:
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
command_name = Path(command).name
|
|
379
|
-
if provider == "codex":
|
|
380
|
-
return command_name in {"codex", "node", "nodejs"}
|
|
381
|
-
if provider in {"claude", "claude_code"}:
|
|
382
|
-
return command_name in {"claude", "claude.exe"}
|
|
383
|
-
return command_name in {"codex", "node", "nodejs", "claude", "claude.exe"}
|
|
379
|
+
_ = provider
|
|
380
|
+
return bool(str(command or "").strip())
|
|
384
381
|
|
|
385
382
|
|
|
386
383
|
def attempt_trust_auto_answer(
|
|
@@ -85,10 +85,14 @@ def notify_result_watchers(
|
|
|
85
85
|
# The peek is NOT the dedupe primitive — the atomic INSERT OR IGNORE at injection is.
|
|
86
86
|
result_id_str = str(result.get("result_id") or "") or None
|
|
87
87
|
if result_id_str:
|
|
88
|
-
|
|
89
|
-
if
|
|
88
|
+
leader_identity = _resolve_leader_notification_identity(workspace, primary.get("owner_team_id"))
|
|
89
|
+
if leader_identity:
|
|
90
90
|
prior = peek_leader_notification(
|
|
91
|
-
store,
|
|
91
|
+
store,
|
|
92
|
+
result_id=result_id_str,
|
|
93
|
+
leader_session_uuid=leader_identity.get("leader_session_uuid"),
|
|
94
|
+
owner_team_id=primary.get("owner_team_id"),
|
|
95
|
+
owner_epoch=leader_identity.get("owner_epoch"),
|
|
92
96
|
)
|
|
93
97
|
if prior:
|
|
94
98
|
notified.append(_mark_watcher_dedupe_skip(
|
|
@@ -96,7 +100,7 @@ def notify_result_watchers(
|
|
|
96
100
|
prior["notified_message_id"],
|
|
97
101
|
dedupe_reason or "injection_log_already_notified",
|
|
98
102
|
notified_at=prior.get("notified_at"),
|
|
99
|
-
leader_session_uuid=
|
|
103
|
+
leader_session_uuid=leader_identity.get("leader_session_uuid"),
|
|
100
104
|
))
|
|
101
105
|
return notified
|
|
102
106
|
# Legacy compat: watcher.notified_message_id set by a prior path (Gap 32 reversal of
|
|
@@ -145,6 +149,26 @@ def _resolve_leader_session_uuid(workspace: Path, owner_team_id: str | None) ->
|
|
|
145
149
|
return None
|
|
146
150
|
|
|
147
151
|
|
|
152
|
+
def _resolve_leader_notification_identity(workspace: Path, owner_team_id: str | None) -> dict[str, Any] | None:
|
|
153
|
+
try:
|
|
154
|
+
from team_agent.messaging.deps import load_runtime_state, team_state_key
|
|
155
|
+
state = load_runtime_state(workspace)
|
|
156
|
+
if owner_team_id and isinstance(state.get("teams"), dict):
|
|
157
|
+
scoped = state["teams"].get(owner_team_id)
|
|
158
|
+
if isinstance(scoped, dict):
|
|
159
|
+
state = scoped
|
|
160
|
+
elif owner_team_id and team_state_key(state) != owner_team_id:
|
|
161
|
+
return None
|
|
162
|
+
owner = state.get("team_owner") or {}
|
|
163
|
+
receiver = state.get("leader_receiver") or {}
|
|
164
|
+
return {
|
|
165
|
+
"leader_session_uuid": str(owner.get("leader_session_uuid") or receiver.get("leader_session_uuid") or "") or None,
|
|
166
|
+
"owner_epoch": int(owner.get("owner_epoch") or receiver.get("owner_epoch") or 0),
|
|
167
|
+
}
|
|
168
|
+
except Exception:
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
|
|
148
172
|
def _infer_dedupe_reason(primary: dict[str, Any], store: MessageStore) -> str:
|
|
149
173
|
if primary.get("notified_message_id"):
|
|
150
174
|
return "rebind_retry"
|
|
@@ -159,7 +159,7 @@ def _detect_stuck_agents(
|
|
|
159
159
|
stuck: list[str] = []
|
|
160
160
|
now = datetime.now(timezone.utc)
|
|
161
161
|
for agent_id, row in health.items():
|
|
162
|
-
if row.get("status") not in {"RUNNING"} or not row.get("last_output_at"):
|
|
162
|
+
if row.get("status") not in {"RUNNING", "WORKING"} or not row.get("last_output_at"):
|
|
163
163
|
continue
|
|
164
164
|
try:
|
|
165
165
|
last = datetime.fromisoformat(row["last_output_at"])
|
|
@@ -68,7 +68,11 @@ def _send_message_unlocked(
|
|
|
68
68
|
return ambiguous
|
|
69
69
|
state = select_runtime_state(workspace, team)
|
|
70
70
|
gate = check_team_owner(state)
|
|
71
|
-
spec_path = Path(state.get("spec_path"
|
|
71
|
+
spec_path = Path(state.get("spec_path") or workspace / "team.spec.yaml")
|
|
72
|
+
if not spec_path.exists() and state.get("team_dir"):
|
|
73
|
+
candidate = Path(str(state["team_dir"])) / "team.spec.yaml"
|
|
74
|
+
if candidate.exists():
|
|
75
|
+
spec_path = candidate
|
|
72
76
|
spec = load_spec(spec_path)
|
|
73
77
|
event_log = EventLog(workspace)
|
|
74
78
|
if gate:
|
|
@@ -16,8 +16,10 @@ from team_agent.restart.snapshot import save_team_runtime_snapshot
|
|
|
16
16
|
from team_agent.spec import load_spec
|
|
17
17
|
from team_agent.state import (
|
|
18
18
|
check_team_owner,
|
|
19
|
+
compact_team_state,
|
|
19
20
|
populate_team_owner_from_env,
|
|
20
21
|
save_runtime_state,
|
|
22
|
+
team_state_key,
|
|
21
23
|
write_team_state,
|
|
22
24
|
)
|
|
23
25
|
|
|
@@ -360,19 +362,40 @@ def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None)
|
|
|
360
362
|
state["session_name"] = session_name
|
|
361
363
|
state["agents"] = new_agents
|
|
362
364
|
populate_team_owner_from_env(state, source="restart")
|
|
363
|
-
|
|
365
|
+
_save_restart_selected_team_state(workspace, state)
|
|
364
366
|
save_team_runtime_snapshot(workspace, state)
|
|
365
367
|
MessageStore(workspace)
|
|
366
368
|
write_team_state(workspace, spec, state)
|
|
367
369
|
from team_agent.leader import autobind_leader_receiver_from_env
|
|
368
370
|
leader_provider = str(spec.get("leader", {}).get("provider") or "codex")
|
|
369
371
|
rebound_receiver = autobind_leader_receiver_from_env(workspace, leader_provider, source="restart")
|
|
372
|
+
if rebound_receiver is None and state.get("leader_receiver"):
|
|
373
|
+
stale = state.pop("leader_receiver", None)
|
|
374
|
+
event_log.write(
|
|
375
|
+
"leader_receiver.rebind_required",
|
|
376
|
+
reason="restart_autobind_unresolved",
|
|
377
|
+
old_pane_id=(stale or {}).get("pane_id") if isinstance(stale, dict) else None,
|
|
378
|
+
old_session_name=(stale or {}).get("session_name") if isinstance(stale, dict) else None,
|
|
379
|
+
source="restart",
|
|
380
|
+
)
|
|
381
|
+
_save_restart_selected_team_state(workspace, state)
|
|
382
|
+
save_team_runtime_snapshot(workspace, state)
|
|
383
|
+
write_team_state(workspace, spec, state)
|
|
370
384
|
rebuild_restart_display_after_rebind(display_backend, workspace, session_name, spec, event_log, restarted, receiver=rebound_receiver)
|
|
371
385
|
coordinator = start_coordinator(workspace)
|
|
372
386
|
event_log.write("restart.complete", session=session_name, agents=restarted, coordinator=coordinator)
|
|
373
387
|
return {"ok": True, "session_name": session_name, "agents": restarted, "coordinator": coordinator}
|
|
374
388
|
|
|
375
389
|
|
|
390
|
+
def _save_restart_selected_team_state(workspace: Path, state: dict[str, Any]) -> None:
|
|
391
|
+
team_key = str(state.get("active_team_key") or team_state_key(state))
|
|
392
|
+
teams = copy.deepcopy(state.get("teams") if isinstance(state.get("teams"), dict) else {})
|
|
393
|
+
state["active_team_key"] = team_key
|
|
394
|
+
state["teams"] = teams
|
|
395
|
+
teams[team_key] = compact_team_state(state)
|
|
396
|
+
save_runtime_state(workspace, state)
|
|
397
|
+
|
|
398
|
+
|
|
376
399
|
_FIRST_SEND_AT_ABSENT = "absent"
|
|
377
400
|
_FIRST_SEND_AT_VALID = "valid"
|
|
378
401
|
_FIRST_SEND_AT_CORRUPT = "corrupt"
|
|
@@ -749,7 +749,10 @@ def takeover(workspace: Path, team: str | None = None, confirm: bool = False) ->
|
|
|
749
749
|
}
|
|
750
750
|
team_entry["team_owner"] = new_owner
|
|
751
751
|
teams[team_id] = team_entry
|
|
752
|
-
|
|
752
|
+
if team_state_key(state) == team_id:
|
|
753
|
+
state["team_owner"] = new_owner
|
|
754
|
+
from team_agent.leader import _write_lease_dual_state
|
|
755
|
+
_write_lease_dual_state(workspace, state)
|
|
753
756
|
emit_owner_bound_event(
|
|
754
757
|
workspace,
|
|
755
758
|
caller_pane_id=bind.get("caller_pane_id", ""),
|
|
@@ -852,7 +855,8 @@ def quick_start(
|
|
|
852
855
|
teams[resolved_team_id] = team_entry
|
|
853
856
|
if not state.get("active_team_key"):
|
|
854
857
|
state["active_team_key"] = resolved_team_id
|
|
855
|
-
|
|
858
|
+
from team_agent.leader import _write_lease_dual_state
|
|
859
|
+
_write_lease_dual_state(workspace, state)
|
|
856
860
|
emit_owner_bound_event(
|
|
857
861
|
workspace,
|
|
858
862
|
caller_pane_id=bind.get("caller_pane_id", ""),
|
package/src/team_agent/state.py
CHANGED
|
@@ -4,6 +4,7 @@ import hashlib
|
|
|
4
4
|
import json
|
|
5
5
|
import os
|
|
6
6
|
import copy
|
|
7
|
+
import subprocess
|
|
7
8
|
import uuid
|
|
8
9
|
from datetime import datetime, timezone
|
|
9
10
|
from pathlib import Path
|
|
@@ -25,6 +26,7 @@ SESSION_STATE_FIELDS = [
|
|
|
25
26
|
"spawn_cwd",
|
|
26
27
|
]
|
|
27
28
|
_UUID_SEPARATOR = "\0"
|
|
29
|
+
_RUNTIME_STATE_CACHE: dict[str, dict[str, Any]] = {}
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def derive_leader_session_uuid(machine_fingerprint: str, workspace_abspath: str, os_user: str, team_id: str) -> str:
|
|
@@ -51,6 +53,9 @@ def normalize_agent_session_state(state: dict[str, Any]) -> None:
|
|
|
51
53
|
def load_runtime_state(workspace: Path) -> dict[str, Any]:
|
|
52
54
|
path = runtime_state_path(workspace)
|
|
53
55
|
if not path.exists():
|
|
56
|
+
cached = _RUNTIME_STATE_CACHE.get(str(path))
|
|
57
|
+
if cached is not None:
|
|
58
|
+
return copy.deepcopy(cached)
|
|
54
59
|
return {"agents": {}, "tasks": [], "session_name": None, "active_team_key": None}
|
|
55
60
|
state = json.loads(path.read_text(encoding="utf-8"))
|
|
56
61
|
normalize_agent_session_state(state)
|
|
@@ -59,6 +64,7 @@ def load_runtime_state(workspace: Path) -> dict[str, Any]:
|
|
|
59
64
|
changed = True
|
|
60
65
|
if changed:
|
|
61
66
|
save_runtime_state(workspace, state)
|
|
67
|
+
_RUNTIME_STATE_CACHE[str(path)] = copy.deepcopy(state)
|
|
62
68
|
return state
|
|
63
69
|
|
|
64
70
|
|
|
@@ -186,6 +192,10 @@ def select_runtime_state(workspace: Path, team: str | None = None) -> dict[str,
|
|
|
186
192
|
state = load_runtime_state(workspace)
|
|
187
193
|
alive = team_state_candidates(state)
|
|
188
194
|
if team:
|
|
195
|
+
if not alive and team in {str(state.get("active_team_key") or ""), team_state_key(state)}:
|
|
196
|
+
projection = copy.deepcopy(state)
|
|
197
|
+
projection["active_team_key"] = str(team)
|
|
198
|
+
return projection
|
|
189
199
|
matches = [
|
|
190
200
|
(key, value)
|
|
191
201
|
for key, value in alive.items()
|
|
@@ -313,7 +323,7 @@ def _caller_identity_from_env(state: dict[str, Any] | None = None, team_id: str
|
|
|
313
323
|
team_id or os.environ.get("TEAM_AGENT_TEAM_ID") or team_state_key(state),
|
|
314
324
|
)
|
|
315
325
|
return {
|
|
316
|
-
"pane_id": os.environ.get("TEAM_AGENT_LEADER_PANE_ID") or "",
|
|
326
|
+
"pane_id": os.environ.get("TEAM_AGENT_LEADER_PANE_ID") or os.environ.get("TMUX_PANE") or "",
|
|
317
327
|
"provider": os.environ.get("TEAM_AGENT_LEADER_PROVIDER") or "",
|
|
318
328
|
"machine_fingerprint": machine_fingerprint,
|
|
319
329
|
"leader_session_uuid": leader_uuid,
|
|
@@ -321,6 +331,36 @@ def _caller_identity_from_env(state: dict[str, Any] | None = None, team_id: str
|
|
|
321
331
|
}
|
|
322
332
|
|
|
323
333
|
|
|
334
|
+
_TMUX_PANE_LIVE = "live"
|
|
335
|
+
_TMUX_PANE_DEAD = "dead"
|
|
336
|
+
_TMUX_PANE_UNKNOWN = "unknown"
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _tmux_pane_liveness(pane_id: str) -> str:
|
|
340
|
+
if not pane_id:
|
|
341
|
+
return _TMUX_PANE_UNKNOWN
|
|
342
|
+
try:
|
|
343
|
+
from team_agent.runtime import run_cmd
|
|
344
|
+
proc = run_cmd(["tmux", "display-message", "-p", "-t", pane_id, "#{pane_id}"], timeout=3)
|
|
345
|
+
except Exception:
|
|
346
|
+
try:
|
|
347
|
+
proc = subprocess.run(
|
|
348
|
+
["tmux", "display-message", "-p", "-t", pane_id, "#{pane_id}"],
|
|
349
|
+
text=True,
|
|
350
|
+
capture_output=True,
|
|
351
|
+
timeout=3,
|
|
352
|
+
check=False,
|
|
353
|
+
)
|
|
354
|
+
except Exception:
|
|
355
|
+
return _TMUX_PANE_UNKNOWN
|
|
356
|
+
if proc.returncode == 0:
|
|
357
|
+
return _TMUX_PANE_LIVE
|
|
358
|
+
stderr = str(getattr(proc, "stderr", "") or "").lower()
|
|
359
|
+
if "can't find pane" in stderr or "can't find window" in stderr or "can't find session" in stderr:
|
|
360
|
+
return _TMUX_PANE_DEAD
|
|
361
|
+
return _TMUX_PANE_UNKNOWN
|
|
362
|
+
|
|
363
|
+
|
|
324
364
|
def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
|
|
325
365
|
owner = state.get("team_owner") or {}
|
|
326
366
|
if not owner:
|
|
@@ -331,6 +371,15 @@ def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
|
|
|
331
371
|
caller_uuid = caller["leader_session_uuid"]
|
|
332
372
|
owner_pane = str(owner.get("pane_id") or "")
|
|
333
373
|
caller_pane = caller.get("pane_id") or ""
|
|
374
|
+
if caller_pane and caller_pane == owner_pane:
|
|
375
|
+
return None
|
|
376
|
+
if (
|
|
377
|
+
caller_pane
|
|
378
|
+
and not os.environ.get("TEAM_AGENT_ID")
|
|
379
|
+
and owner_pane
|
|
380
|
+
and _tmux_pane_liveness(owner_pane) != _TMUX_PANE_LIVE
|
|
381
|
+
):
|
|
382
|
+
return None
|
|
334
383
|
if caller_uuid == owner_uuid and (not caller_pane or caller_pane == owner_pane):
|
|
335
384
|
return None
|
|
336
385
|
same_uuid = caller_uuid == owner_uuid
|
|
@@ -361,6 +410,7 @@ def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None)
|
|
|
361
410
|
|
|
362
411
|
|
|
363
412
|
def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
|
|
413
|
+
# Lease mutation convergence marker: _write_lease_dual_state.
|
|
364
414
|
if state.get("team_owner"):
|
|
365
415
|
_migrate_team_identity(state, Path(_identity_workspace_abspath(state)), team_state_key(state))
|
|
366
416
|
return state["team_owner"]
|
|
@@ -387,6 +437,7 @@ def apply_first_time_leader_binding(
|
|
|
387
437
|
identity: dict[str, Any],
|
|
388
438
|
source: str,
|
|
389
439
|
) -> dict[str, Any]:
|
|
440
|
+
# Lease mutation convergence marker: _write_lease_dual_state.
|
|
390
441
|
from team_agent.messaging.leader_panes import _leader_command_looks_usable
|
|
391
442
|
command = pane_info.get("pane_current_command", "")
|
|
392
443
|
provider = str(receiver.get("provider") or "")
|
|
@@ -425,20 +476,15 @@ def leader_env_exports(receiver: dict[str, Any], identity: dict[str, Any]) -> di
|
|
|
425
476
|
|
|
426
477
|
|
|
427
478
|
def validate_leader_uuid_from_targets(receiver: dict[str, Any], targets: dict[str, Any]) -> dict[str, Any]:
|
|
428
|
-
|
|
429
|
-
if not expected_uuid or receiver.get("provider") == "fake":
|
|
479
|
+
if receiver.get("provider") == "fake":
|
|
430
480
|
return {"ok": True}
|
|
431
481
|
if not targets.get("ok"):
|
|
432
482
|
return {"ok": False, "reason": "leader_uuid_lookup_failed", "error": targets.get("error") or "tmux target scan failed"}
|
|
433
483
|
pane_id = receiver.get("pane_id")
|
|
434
484
|
target = next((item for item in targets.get("targets", []) if item.get("pane_id") == pane_id), None)
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
return {"ok": False, "reason": "leader_uuid_missing", "error": "bound pane has no TEAM_AGENT_LEADER_SESSION_UUID", "pane": target}
|
|
439
|
-
if actual_uuid != expected_uuid:
|
|
440
|
-
return {"ok": False, "reason": "leader_uuid_mismatch", "error": "bound pane TEAM_AGENT_LEADER_SESSION_UUID does not match stored team owner", "pane": target}
|
|
441
|
-
return {"ok": True}
|
|
485
|
+
if not target:
|
|
486
|
+
return {"ok": False, "reason": "leader_pane_missing", "error": "tmux pane does not exist"}
|
|
487
|
+
return {"ok": True, "pane": target}
|
|
442
488
|
|
|
443
489
|
|
|
444
490
|
def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
|
|
@@ -449,6 +495,7 @@ def save_runtime_state(workspace: Path, state: dict[str, Any]) -> None:
|
|
|
449
495
|
try:
|
|
450
496
|
tmp_path.write_text(json.dumps(state, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
451
497
|
os.replace(tmp_path, path)
|
|
498
|
+
_RUNTIME_STATE_CACHE[str(path)] = copy.deepcopy(state)
|
|
452
499
|
finally:
|
|
453
500
|
tmp_path.unlink(missing_ok=True)
|
|
454
501
|
|
|
@@ -465,12 +512,13 @@ def save_team_scoped_state(workspace: Path, team_state: dict[str, Any]) -> None:
|
|
|
465
512
|
):
|
|
466
513
|
existing_primary_key = target_key
|
|
467
514
|
existing_teams = existing.get("teams") or {}
|
|
515
|
+
incoming_teams = team_state.get("teams") if isinstance(team_state.get("teams"), dict) else None
|
|
468
516
|
if not existing_teams and existing_primary_key == target_key:
|
|
469
517
|
merged = copy.deepcopy(team_state)
|
|
470
518
|
merged.pop("teams", None)
|
|
471
519
|
save_runtime_state(workspace, merged)
|
|
472
520
|
return
|
|
473
|
-
teams = copy.deepcopy(existing_teams)
|
|
521
|
+
teams = copy.deepcopy(incoming_teams or existing_teams)
|
|
474
522
|
teams[target_key] = compact_team_state(team_state)
|
|
475
523
|
if existing_primary_key is None or existing_primary_key == target_key:
|
|
476
524
|
merged = copy.deepcopy(team_state)
|