@team-agent/installer 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/team_agent/cli/__init__.py +2 -0
- package/src/team_agent/cli/commands.py +4 -0
- package/src/team_agent/cli/parser.py +8 -1
- package/src/team_agent/coordinator/lifecycle.py +15 -0
- package/src/team_agent/lifecycle/paste_buffer_hygiene.py +39 -0
- package/src/team_agent/lifecycle/start.py +3 -0
- package/src/team_agent/messaging/delivery.py +10 -0
- package/src/team_agent/messaging/idle_alerts.py +126 -20
- package/src/team_agent/messaging/owner_bypass.py +29 -0
- package/src/team_agent/messaging/scheduler.py +10 -0
- package/src/team_agent/messaging/send.py +9 -2
- package/src/team_agent/messaging/session_drift.py +94 -0
- package/src/team_agent/runtime.py +18 -10
- package/src/team_agent/state.py +14 -0
package/package.json
CHANGED
|
@@ -56,6 +56,7 @@ from team_agent.cli.commands import (
|
|
|
56
56
|
cmd_remove_agent,
|
|
57
57
|
cmd_stuck_list,
|
|
58
58
|
cmd_stuck_cancel,
|
|
59
|
+
cmd_acknowledge_idle,
|
|
59
60
|
cmd_allow_peer_talk,
|
|
60
61
|
cmd_advanced,
|
|
61
62
|
cmd_install_skill,
|
|
@@ -122,6 +123,7 @@ __all__ = [
|
|
|
122
123
|
'cmd_remove_agent',
|
|
123
124
|
'cmd_stuck_list',
|
|
124
125
|
'cmd_stuck_cancel',
|
|
126
|
+
'cmd_acknowledge_idle',
|
|
125
127
|
'cmd_allow_peer_talk',
|
|
126
128
|
'cmd_advanced',
|
|
127
129
|
'cmd_install_skill',
|
|
@@ -273,6 +273,10 @@ def cmd_stuck_cancel(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
273
273
|
)
|
|
274
274
|
|
|
275
275
|
|
|
276
|
+
def cmd_acknowledge_idle(args: argparse.Namespace) -> dict[str, Any]:
|
|
277
|
+
return runtime.acknowledge_idle(Path(args.workspace).resolve(), team=args.team)
|
|
278
|
+
|
|
279
|
+
|
|
276
280
|
def cmd_allow_peer_talk(args: argparse.Namespace) -> dict[str, Any]:
|
|
277
281
|
return runtime.allow_peer_talk(Path(args.workspace).resolve(), args.agent_a, args.agent_b)
|
|
278
282
|
|
|
@@ -46,6 +46,7 @@ from team_agent.cli.commands import (
|
|
|
46
46
|
cmd_remove_agent,
|
|
47
47
|
cmd_stuck_list,
|
|
48
48
|
cmd_stuck_cancel,
|
|
49
|
+
cmd_acknowledge_idle,
|
|
49
50
|
cmd_allow_peer_talk,
|
|
50
51
|
cmd_advanced,
|
|
51
52
|
cmd_install_skill,
|
|
@@ -372,6 +373,12 @@ def main(argv: list[str] | None = None) -> None:
|
|
|
372
373
|
add_json(p)
|
|
373
374
|
p.set_defaults(func=cmd_stuck_cancel)
|
|
374
375
|
|
|
376
|
+
p = sub.add_parser("acknowledge-idle", help="Suppress idle-fallback reminders for this team for a bounded window (default 30 minutes)")
|
|
377
|
+
p.add_argument("team", nargs="?", help="Explicit team/session target when a workspace has multiple teams")
|
|
378
|
+
p.add_argument("--workspace", default=".")
|
|
379
|
+
add_json(p)
|
|
380
|
+
p.set_defaults(func=cmd_acknowledge_idle)
|
|
381
|
+
|
|
375
382
|
p = sub.add_parser("install-skill", help=argparse.SUPPRESS)
|
|
376
383
|
p.add_argument("--target", choices=["codex", "claude", "all"], default="codex")
|
|
377
384
|
p.add_argument("--dest", help="Explicit destination directory; overrides --target")
|
|
@@ -422,7 +429,7 @@ def main(argv: list[str] | None = None) -> None:
|
|
|
422
429
|
sub._choices_actions = [ # type: ignore[attr-defined]
|
|
423
430
|
action for action in sub._choices_actions if action.help != argparse.SUPPRESS # type: ignore[attr-defined]
|
|
424
431
|
]
|
|
425
|
-
sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,doctor}"
|
|
432
|
+
sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
|
|
426
433
|
|
|
427
434
|
args = parser.parse_args(raw_argv)
|
|
428
435
|
try:
|
|
@@ -265,6 +265,7 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
|
|
|
265
265
|
detect_idle_fallbacks,
|
|
266
266
|
)
|
|
267
267
|
from team_agent.messaging.activity_detector import detect_compaction_degradation
|
|
268
|
+
from team_agent.messaging.session_drift import detect_session_drift
|
|
268
269
|
from team_agent.state import load_runtime_state, save_runtime_state
|
|
269
270
|
state = load_runtime_state(workspace)
|
|
270
271
|
event_log = EventLog(workspace)
|
|
@@ -304,6 +305,19 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
|
|
|
304
305
|
)
|
|
305
306
|
if result.get("event") and result.get("event") != "compaction_threshold_crossed.none":
|
|
306
307
|
compaction_results.append(result)
|
|
308
|
+
drift_results: list[dict[str, Any]] = []
|
|
309
|
+
for agent_id, agent_state in state.get("agents", {}).items():
|
|
310
|
+
if str(agent_state.get("provider") or "") != "codex":
|
|
311
|
+
continue
|
|
312
|
+
scrollback = str((captures.get(agent_id) or {}).get("scrollback") or "")
|
|
313
|
+
if not scrollback:
|
|
314
|
+
continue
|
|
315
|
+
drift = detect_session_drift(
|
|
316
|
+
workspace, state, event_log,
|
|
317
|
+
agent_id=agent_id, agent_state=agent_state, scrollback=scrollback,
|
|
318
|
+
)
|
|
319
|
+
if drift:
|
|
320
|
+
drift_results.append(drift)
|
|
307
321
|
save_runtime_state(workspace, state)
|
|
308
322
|
results = _collect_results_and_notify_watchers(workspace, event_log)
|
|
309
323
|
return {
|
|
@@ -315,5 +329,6 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
|
|
|
315
329
|
"idle_alerts": idle_alerts,
|
|
316
330
|
"deadlock_alerts": deadlock_alerts,
|
|
317
331
|
"compaction": compaction_results,
|
|
332
|
+
"session_drift": drift_results,
|
|
318
333
|
"results": results,
|
|
319
334
|
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from team_agent.events import EventLog
|
|
7
|
+
|
|
8
|
+
_TEAM_AGENT_BUFFER_PREFIXES = ("team-agent-send-", "team-agent-leader-receiver-", "team-agent-")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_team_agent_buffer(name: str) -> bool:
|
|
12
|
+
return any(name.startswith(prefix) for prefix in _TEAM_AGENT_BUFFER_PREFIXES)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def cleanup_stale_team_agent_buffers(workspace: Path, event_log: EventLog, *, context: str) -> dict[str, Any]:
|
|
16
|
+
from team_agent.runtime import run_cmd
|
|
17
|
+
proc = run_cmd(["tmux", "list-buffers", "-F", "#{buffer_name}"], timeout=5)
|
|
18
|
+
if proc.returncode != 0:
|
|
19
|
+
event_log.write("paste_buffer_hygiene.list_failed", context=context, stderr=proc.stderr.strip()[:200])
|
|
20
|
+
return {"ok": False, "deleted": [], "reason": "list_buffers_failed"}
|
|
21
|
+
names = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
|
|
22
|
+
targets = [name for name in names if _is_team_agent_buffer(name)]
|
|
23
|
+
deleted: list[str] = []
|
|
24
|
+
for name in targets:
|
|
25
|
+
delete_proc = run_cmd(["tmux", "delete-buffer", "-b", name], timeout=5)
|
|
26
|
+
if delete_proc.returncode == 0:
|
|
27
|
+
deleted.append(name)
|
|
28
|
+
if deleted:
|
|
29
|
+
event_log.write(
|
|
30
|
+
"paste_buffer_hygiene.prevented_resume_injection",
|
|
31
|
+
context=context,
|
|
32
|
+
deleted_buffers=deleted,
|
|
33
|
+
scanned_count=len(names),
|
|
34
|
+
matched_count=len(targets),
|
|
35
|
+
)
|
|
36
|
+
return {"ok": True, "deleted": deleted, "scanned": len(names), "matched": len(targets)}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
__all__ = ["cleanup_stale_team_agent_buffers"]
|
|
@@ -219,6 +219,8 @@ def _start_agent_unlocked(workspace: Path, agent_id: str, force: bool, open_disp
|
|
|
219
219
|
reason="rollout_missing" if start_mode == "fresh_after_missing_rollout" else "session_id_missing",
|
|
220
220
|
)
|
|
221
221
|
|
|
222
|
+
from team_agent.lifecycle.paste_buffer_hygiene import cleanup_stale_team_agent_buffers
|
|
223
|
+
cleanup_stale_team_agent_buffers(workspace, event_log, context=f"start_agent:{agent_id}")
|
|
222
224
|
tmux_cmd, tmux_start_mode = _tmux_start_command_for_agent_window(session_name, agent_id, command)
|
|
223
225
|
event_log.write(
|
|
224
226
|
"start_agent.agent_start",
|
|
@@ -273,6 +275,7 @@ def _start_agent_unlocked(workspace: Path, agent_id: str, force: bool, open_disp
|
|
|
273
275
|
)
|
|
274
276
|
command = shell_command_for_agent(command_agent, workspace, mcp_config)
|
|
275
277
|
start_mode = "fresh_after_missing_rollout" if missing_resume_rollout else "fresh"
|
|
278
|
+
cleanup_stale_team_agent_buffers(workspace, event_log, context=f"start_agent_fallback:{agent_id}")
|
|
276
279
|
tmux_cmd, tmux_start_mode = _tmux_start_command_for_agent_window(session_name, agent_id, command)
|
|
277
280
|
event_log.write(
|
|
278
281
|
"start_agent.agent_start",
|
|
@@ -121,6 +121,16 @@ def _deliver_pending_messages(workspace: Path, state: dict[str, Any], event_log:
|
|
|
121
121
|
for row in store.messages():
|
|
122
122
|
if row["status"] not in {"pending", "accepted"}:
|
|
123
123
|
continue
|
|
124
|
+
agent_state = state.get("agents", {}).get(row["recipient"]) or {}
|
|
125
|
+
if str(agent_state.get("status") or "").lower() == "busy":
|
|
126
|
+
event_log.write(
|
|
127
|
+
"send.deferred_busy",
|
|
128
|
+
message_id=row["message_id"],
|
|
129
|
+
sender=row.get("sender"),
|
|
130
|
+
recipient=row["recipient"],
|
|
131
|
+
reason="recipient_busy",
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
124
134
|
result = _deliver_pending_message(workspace, state, row["message_id"], wait_visible=True, timeout=30.0)
|
|
125
135
|
if result.get("ok"):
|
|
126
136
|
delivered.append(row["message_id"])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from datetime import datetime, timezone
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
@@ -23,33 +23,116 @@ _UNDELIVERED_MESSAGE_STATUSES = {
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
STABLE_IDLE_SECONDS = 120
|
|
27
|
+
FIRE_DEBOUNCE_SECONDS = 300
|
|
28
|
+
OBLIGATION_PENDING_MIN_AGE_SECONDS = 60
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _parse_iso(text: Any) -> datetime | None:
|
|
32
|
+
if not isinstance(text, str) or not text:
|
|
33
|
+
return None
|
|
34
|
+
try:
|
|
35
|
+
dt = datetime.fromisoformat(text.replace("Z", "+00:00"))
|
|
36
|
+
except ValueError:
|
|
37
|
+
return None
|
|
38
|
+
if dt.tzinfo is None:
|
|
39
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
40
|
+
return dt
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def record_team_progress(
|
|
44
|
+
state: dict[str, Any],
|
|
45
|
+
now: datetime | None = None,
|
|
46
|
+
*,
|
|
47
|
+
source: str = "",
|
|
48
|
+
owner_team_id: str | None = None,
|
|
49
|
+
) -> None:
|
|
50
|
+
coordinator = state.setdefault("coordinator", {})
|
|
51
|
+
progress = coordinator.setdefault("team_last_progress_at", {})
|
|
52
|
+
key = owner_team_id or team_state_key(state)
|
|
53
|
+
if not key:
|
|
54
|
+
return
|
|
55
|
+
progress[key] = {
|
|
56
|
+
"at": (now or datetime.now(timezone.utc)).isoformat(),
|
|
57
|
+
"source": source,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _team_last_progress_at(
|
|
62
|
+
state: dict[str, Any],
|
|
63
|
+
store: MessageStore,
|
|
64
|
+
owner_team_id: str,
|
|
65
|
+
) -> datetime | None:
|
|
66
|
+
candidates: list[datetime] = []
|
|
67
|
+
coordinator = state.get("coordinator") or {}
|
|
68
|
+
explicit = (coordinator.get("team_last_progress_at") or {}).get(owner_team_id)
|
|
69
|
+
if isinstance(explicit, dict):
|
|
70
|
+
ts = _parse_iso(explicit.get("at"))
|
|
71
|
+
if ts:
|
|
72
|
+
candidates.append(ts)
|
|
73
|
+
elif isinstance(explicit, str):
|
|
74
|
+
ts = _parse_iso(explicit)
|
|
75
|
+
if ts:
|
|
76
|
+
candidates.append(ts)
|
|
77
|
+
health = store.agent_health(owner_team_id=owner_team_id)
|
|
78
|
+
for row in health.values():
|
|
79
|
+
ts = _parse_iso(row.get("last_output_at"))
|
|
80
|
+
if ts:
|
|
81
|
+
candidates.append(ts)
|
|
82
|
+
return max(candidates) if candidates else None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _team_last_idle_fallback_fire_at(state: dict[str, Any], owner_team_id: str) -> datetime | None:
|
|
86
|
+
coordinator = state.get("coordinator") or {}
|
|
87
|
+
fires = coordinator.get("team_last_idle_fallback_fire_at") or {}
|
|
88
|
+
return _parse_iso(fires.get(owner_team_id))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _record_idle_fallback_fire(state: dict[str, Any], owner_team_id: str, now: datetime) -> None:
|
|
92
|
+
coordinator = state.setdefault("coordinator", {})
|
|
93
|
+
fires = coordinator.setdefault("team_last_idle_fallback_fire_at", {})
|
|
94
|
+
fires[owner_team_id] = now.isoformat()
|
|
95
|
+
|
|
96
|
+
|
|
26
97
|
def _team_undelivered_obligations(
|
|
27
98
|
state: dict[str, Any],
|
|
28
99
|
store: MessageStore,
|
|
29
100
|
owner_team_id: str,
|
|
30
101
|
active_task_statuses: set[str],
|
|
102
|
+
*,
|
|
103
|
+
now: datetime | None = None,
|
|
31
104
|
) -> list[dict[str, Any]]:
|
|
105
|
+
now = now or datetime.now(timezone.utc)
|
|
106
|
+
min_age = timedelta(seconds=OBLIGATION_PENDING_MIN_AGE_SECONDS)
|
|
32
107
|
obligations: list[dict[str, Any]] = []
|
|
33
108
|
for message in store.messages(owner_team_id=owner_team_id):
|
|
34
|
-
if message.get("status") in _UNDELIVERED_MESSAGE_STATUSES:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
109
|
+
if message.get("status") not in _UNDELIVERED_MESSAGE_STATUSES:
|
|
110
|
+
continue
|
|
111
|
+
created_at = _parse_iso(message.get("created_at"))
|
|
112
|
+
if created_at and (now - created_at) < min_age:
|
|
113
|
+
continue
|
|
114
|
+
obligations.append(
|
|
115
|
+
{
|
|
116
|
+
"kind": "undelivered_message",
|
|
117
|
+
"message_id": message.get("message_id"),
|
|
118
|
+
"recipient": message.get("recipient"),
|
|
119
|
+
"status": message.get("status"),
|
|
120
|
+
}
|
|
121
|
+
)
|
|
43
122
|
for watcher in store.retryable_result_watchers():
|
|
44
|
-
if watcher.get("status") in {"pending", "notify_failed"}:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
123
|
+
if watcher.get("status") not in {"pending", "notify_failed"}:
|
|
124
|
+
continue
|
|
125
|
+
created_at = _parse_iso(watcher.get("created_at"))
|
|
126
|
+
if created_at and (now - created_at) < min_age:
|
|
127
|
+
continue
|
|
128
|
+
obligations.append(
|
|
129
|
+
{
|
|
130
|
+
"kind": "pending_result_watcher",
|
|
131
|
+
"watcher_id": watcher.get("watcher_id"),
|
|
132
|
+
"task_id": watcher.get("task_id"),
|
|
133
|
+
"agent_id": watcher.get("agent_id"),
|
|
134
|
+
}
|
|
135
|
+
)
|
|
53
136
|
for task in state.get("tasks", []):
|
|
54
137
|
if task.get("status", "pending") in active_task_statuses and task.get("assignee"):
|
|
55
138
|
obligations.append(
|
|
@@ -118,11 +201,33 @@ def detect_idle_fallbacks(
|
|
|
118
201
|
)
|
|
119
202
|
now = now or datetime.now(timezone.utc)
|
|
120
203
|
owner_team_id = team_state_key(state)
|
|
121
|
-
obligations = _team_undelivered_obligations(state, store, owner_team_id, _ACTIVE_TASK_STATUSES)
|
|
204
|
+
obligations = _team_undelivered_obligations(state, store, owner_team_id, _ACTIVE_TASK_STATUSES, now=now)
|
|
122
205
|
if not obligations:
|
|
123
206
|
return []
|
|
124
207
|
all_idle, idle_workers = _all_workers_idle(state, store, owner_team_id)
|
|
125
208
|
if not all_idle:
|
|
209
|
+
record_team_progress(state, now, source="all_workers_idle:false", owner_team_id=owner_team_id)
|
|
210
|
+
save_runtime_state(workspace, state)
|
|
211
|
+
return []
|
|
212
|
+
last_progress = _team_last_progress_at(state, store, owner_team_id)
|
|
213
|
+
if last_progress and (now - last_progress) < timedelta(seconds=STABLE_IDLE_SECONDS):
|
|
214
|
+
event_log.write(
|
|
215
|
+
"coordinator.idle_fallback_skipped",
|
|
216
|
+
reason="stable_idle_window",
|
|
217
|
+
team=owner_team_id,
|
|
218
|
+
stable_idle_seconds=STABLE_IDLE_SECONDS,
|
|
219
|
+
elapsed_seconds=int((now - last_progress).total_seconds()),
|
|
220
|
+
)
|
|
221
|
+
return []
|
|
222
|
+
last_fire = _team_last_idle_fallback_fire_at(state, owner_team_id)
|
|
223
|
+
if last_fire and (now - last_fire) < timedelta(seconds=FIRE_DEBOUNCE_SECONDS):
|
|
224
|
+
event_log.write(
|
|
225
|
+
"coordinator.idle_fallback_skipped",
|
|
226
|
+
reason="fire_debounce",
|
|
227
|
+
team=owner_team_id,
|
|
228
|
+
fire_debounce_seconds=FIRE_DEBOUNCE_SECONDS,
|
|
229
|
+
elapsed_seconds=int((now - last_fire).total_seconds()),
|
|
230
|
+
)
|
|
126
231
|
return []
|
|
127
232
|
spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
|
|
128
233
|
spec = load_spec(spec_path) if spec_path.exists() else {}
|
|
@@ -137,6 +242,7 @@ def detect_idle_fallbacks(
|
|
|
137
242
|
alerts.append({"agent_id": agent_id, "alert_type": "idle_fallback", "obligations": obligations})
|
|
138
243
|
if not alerts:
|
|
139
244
|
return []
|
|
245
|
+
_record_idle_fallback_fire(state, owner_team_id, now)
|
|
140
246
|
save_runtime_state(workspace, state)
|
|
141
247
|
content = (
|
|
142
248
|
"There is still unfinished work. Continue coordinating, deliver a result, "
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from team_agent.events import EventLog
|
|
6
|
+
from team_agent.state import worker_sender_bypasses_owner_gate
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def apply_worker_sender_bypass(
|
|
10
|
+
state: dict[str, Any],
|
|
11
|
+
sender: str | None,
|
|
12
|
+
target: Any,
|
|
13
|
+
task_id: str | None,
|
|
14
|
+
event_log: EventLog,
|
|
15
|
+
) -> bool:
|
|
16
|
+
via = worker_sender_bypasses_owner_gate(state, sender)
|
|
17
|
+
if not via:
|
|
18
|
+
return False
|
|
19
|
+
event_log.write(
|
|
20
|
+
"send.bypassed_owner_gate_worker_sender",
|
|
21
|
+
sender=sender,
|
|
22
|
+
env_team_agent_id=via,
|
|
23
|
+
target=target if isinstance(target, str) else None,
|
|
24
|
+
task_id=task_id,
|
|
25
|
+
)
|
|
26
|
+
return True
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = ["apply_worker_sender_bypass"]
|
|
@@ -311,6 +311,16 @@ def _suppression_clear_reason(
|
|
|
311
311
|
agent_id: str,
|
|
312
312
|
entry: dict[str, Any],
|
|
313
313
|
) -> str | None:
|
|
314
|
+
if entry.get("manual_acknowledge"):
|
|
315
|
+
try:
|
|
316
|
+
expires_at = datetime.fromisoformat(str(entry.get("expires_at")))
|
|
317
|
+
except ValueError:
|
|
318
|
+
return "invalid_suppression_timestamp"
|
|
319
|
+
if expires_at.tzinfo is None:
|
|
320
|
+
expires_at = expires_at.replace(tzinfo=timezone.utc)
|
|
321
|
+
if datetime.now(timezone.utc) < expires_at:
|
|
322
|
+
return None
|
|
323
|
+
return "manual_acknowledge_expired"
|
|
314
324
|
previous = entry.get("snapshot") if isinstance(entry.get("snapshot"), dict) else {}
|
|
315
325
|
current = _agent_alert_snapshot(state, store, agent_id)
|
|
316
326
|
if current.get("assigned_task_ids") != previous.get("assigned_task_ids"):
|
|
@@ -85,11 +85,13 @@ def _send_message_unlocked(
|
|
|
85
85
|
return ambiguous
|
|
86
86
|
state = select_runtime_state(workspace, team)
|
|
87
87
|
gate = check_team_owner(state)
|
|
88
|
-
if gate:
|
|
89
|
-
return gate
|
|
90
88
|
spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
|
|
91
89
|
spec = load_spec(spec_path)
|
|
92
90
|
event_log = EventLog(workspace)
|
|
91
|
+
if gate:
|
|
92
|
+
from team_agent.messaging.owner_bypass import apply_worker_sender_bypass
|
|
93
|
+
if not apply_worker_sender_bypass(state, sender, target, task_id, event_log):
|
|
94
|
+
return gate
|
|
93
95
|
owner_team_id = team_state_key(state)
|
|
94
96
|
leader_id = _leader_id(state, spec)
|
|
95
97
|
|
|
@@ -174,6 +176,11 @@ def _send_single_message_unlocked(
|
|
|
174
176
|
if _is_leader_target(target, leader_id) and not _is_leader_sender(sender, leader_id):
|
|
175
177
|
return _send_to_leader_receiver(workspace, state, leader_id, content, task_id, sender, requires_ack, event_log)
|
|
176
178
|
|
|
179
|
+
from team_agent.messaging.session_drift import session_drift_refusal
|
|
180
|
+
drift = session_drift_refusal(state, target, leader_id, sender, task_id, event_log)
|
|
181
|
+
if drift:
|
|
182
|
+
return drift
|
|
183
|
+
|
|
177
184
|
if task_id and route_task_id:
|
|
178
185
|
task = _find_task(state.get("tasks", []), task_id)
|
|
179
186
|
if task.get("human_confirmation") and not task.get("human_confirmed"):
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from team_agent.events import EventLog
|
|
9
|
+
|
|
10
|
+
_UUID = r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
11
|
+
_RESUME_THREAD_RE = re.compile(
|
|
12
|
+
rf"(?:Switched to thread|resume|thread)\s+({_UUID})",
|
|
13
|
+
re.IGNORECASE,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def extract_thread_id_from_scrollback(scrollback: str) -> str | None:
|
|
18
|
+
if not scrollback:
|
|
19
|
+
return None
|
|
20
|
+
matches = _RESUME_THREAD_RE.findall(scrollback)
|
|
21
|
+
if not matches:
|
|
22
|
+
return None
|
|
23
|
+
return matches[-1].lower()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def detect_session_drift(
|
|
27
|
+
workspace: Path,
|
|
28
|
+
state: dict[str, Any],
|
|
29
|
+
event_log: EventLog,
|
|
30
|
+
*,
|
|
31
|
+
agent_id: str,
|
|
32
|
+
agent_state: dict[str, Any],
|
|
33
|
+
scrollback: str,
|
|
34
|
+
) -> dict[str, Any] | None:
|
|
35
|
+
provider = str(agent_state.get("provider") or "").lower()
|
|
36
|
+
if provider != "codex":
|
|
37
|
+
return None
|
|
38
|
+
stored = str(agent_state.get("session_id") or "").strip()
|
|
39
|
+
if not stored:
|
|
40
|
+
return None
|
|
41
|
+
if str(agent_state.get("status") or "").lower() == "session_drift":
|
|
42
|
+
return None
|
|
43
|
+
actual = extract_thread_id_from_scrollback(scrollback)
|
|
44
|
+
if not actual:
|
|
45
|
+
return None
|
|
46
|
+
if actual.lower() == stored.lower():
|
|
47
|
+
return None
|
|
48
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
49
|
+
event = event_log.write(
|
|
50
|
+
"coordinator.session_drift_detected",
|
|
51
|
+
agent_id=agent_id,
|
|
52
|
+
stored_session_id=stored,
|
|
53
|
+
actual_thread_id=actual,
|
|
54
|
+
status="session_drift",
|
|
55
|
+
provider=provider,
|
|
56
|
+
ts=now,
|
|
57
|
+
remediation="team-agent reset-agent --discard-session <agent>",
|
|
58
|
+
)
|
|
59
|
+
agent_state["status"] = "session_drift"
|
|
60
|
+
agent_state["session_drift"] = {
|
|
61
|
+
"stored_session_id": stored,
|
|
62
|
+
"actual_thread_id": actual,
|
|
63
|
+
"detected_at": now,
|
|
64
|
+
"remediation": "team-agent reset-agent --discard-session <agent>",
|
|
65
|
+
}
|
|
66
|
+
return event
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def session_drift_refusal(state, target, leader_id, sender, task_id, event_log):
|
|
70
|
+
if not target or target == leader_id or target == "*":
|
|
71
|
+
return None
|
|
72
|
+
rs = (state.get("agents") or {}).get(target) or {}
|
|
73
|
+
if str(rs.get("status") or "").lower() != "session_drift":
|
|
74
|
+
return None
|
|
75
|
+
info = rs.get("session_drift") or {}
|
|
76
|
+
event_log.write(
|
|
77
|
+
"send.refused_session_drift",
|
|
78
|
+
target=target,
|
|
79
|
+
sender=sender,
|
|
80
|
+
task_id=task_id,
|
|
81
|
+
stored_session_id=info.get("stored_session_id"),
|
|
82
|
+
actual_thread_id=info.get("actual_thread_id"),
|
|
83
|
+
)
|
|
84
|
+
return {
|
|
85
|
+
"ok": False,
|
|
86
|
+
"status": "refused",
|
|
87
|
+
"reason": "session_drift",
|
|
88
|
+
"to": target,
|
|
89
|
+
"action": f"team-agent reset-agent --discard-session {target}",
|
|
90
|
+
"session_drift": info,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
__all__ = ["detect_session_drift", "extract_thread_id_from_scrollback", "session_drift_refusal"]
|
|
@@ -220,6 +220,7 @@ from team_agent.state import (
|
|
|
220
220
|
save_runtime_state,
|
|
221
221
|
save_team_scoped_state,
|
|
222
222
|
select_runtime_state,
|
|
223
|
+
team_state_key,
|
|
223
224
|
write_spec,
|
|
224
225
|
write_team_state,
|
|
225
226
|
)
|
|
@@ -578,20 +579,27 @@ def remove_agent(
|
|
|
578
579
|
return lifecycle_remove_agent(workspace, agent_id, from_spec=from_spec, confirm=confirm, force=force, team=team)
|
|
579
580
|
|
|
580
581
|
|
|
581
|
-
def acknowledge_idle(workspace: Path, agent_id: str) -> dict[str, Any]:
|
|
582
|
+
def acknowledge_idle(workspace: Path, agent_id: str | None = None, *, team: str | None = None) -> dict[str, Any]:
|
|
582
583
|
with _runtime_lock(workspace, "acknowledge-idle"):
|
|
583
|
-
|
|
584
|
+
try:
|
|
585
|
+
state = select_runtime_state(workspace, team)
|
|
586
|
+
except Exception as exc:
|
|
587
|
+
return {"ok": False, "status": "refused", "reason": "team_target_unresolved", "team": team, "error": str(exc)}
|
|
584
588
|
gate = check_team_owner(state)
|
|
585
589
|
if gate:
|
|
586
590
|
return gate
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
591
|
+
now_dt = datetime.now(timezone.utc); now = now_dt.isoformat()
|
|
592
|
+
ttl_seconds = 1800
|
|
593
|
+
expires_at = (now_dt + timedelta(seconds=ttl_seconds)).isoformat()
|
|
594
|
+
owner_team_id = team_state_key(state); coordinator = state.setdefault("coordinator", {})
|
|
595
|
+
coordinator.setdefault("idle_acknowledged", {})[owner_team_id] = {"acknowledged_at": now, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
|
|
596
|
+
team_suppressions = coordinator.setdefault("suppressed_idle_alerts", {}).setdefault(owner_team_id, {})
|
|
597
|
+
entry = {"suppressed_at": now, "suppressed_by": "manual_acknowledge", "manual_acknowledge": True, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
|
|
598
|
+
for worker_id in state.get("agents", {}):
|
|
599
|
+
team_suppressions.setdefault(worker_id, {})["idle_fallback"] = dict(entry)
|
|
600
|
+
save_team_scoped_state(workspace, state)
|
|
601
|
+
EventLog(workspace).write("coordinator.idle_acknowledged", agent_id=agent_id, team=owner_team_id, acknowledged_at=now, expires_at=expires_at, ttl_seconds=ttl_seconds)
|
|
602
|
+
return {"ok": True, "team": owner_team_id, "agent_id": agent_id, "acknowledged_at": now, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
|
|
595
603
|
|
|
596
604
|
def takeover(workspace: Path, team: str | None = None, confirm: bool = False) -> dict[str, Any]:
|
|
597
605
|
if not confirm:
|
package/src/team_agent/state.py
CHANGED
|
@@ -193,6 +193,20 @@ def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
|
|
|
193
193
|
}
|
|
194
194
|
|
|
195
195
|
|
|
196
|
+
def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None) -> str | None:
|
|
197
|
+
if not sender:
|
|
198
|
+
return None
|
|
199
|
+
leader_id = (state.get("leader") or {}).get("id") or "leader"
|
|
200
|
+
if sender == leader_id or sender in {"leader", "Leader"}:
|
|
201
|
+
return None
|
|
202
|
+
if sender not in (state.get("agents") or {}):
|
|
203
|
+
return None
|
|
204
|
+
env_agent_id = os.environ.get("TEAM_AGENT_ID") or ""
|
|
205
|
+
if env_agent_id and env_agent_id != sender:
|
|
206
|
+
return None
|
|
207
|
+
return env_agent_id or sender
|
|
208
|
+
|
|
209
|
+
|
|
196
210
|
def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
|
|
197
211
|
if state.get("team_owner"):
|
|
198
212
|
return state["team_owner"]
|