@team-agent/installer 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@team-agent/installer",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "npx installer for Team Agent",
5
5
  "keywords": [
6
6
  "codex",
@@ -56,6 +56,7 @@ from team_agent.cli.commands import (
56
56
  cmd_remove_agent,
57
57
  cmd_stuck_list,
58
58
  cmd_stuck_cancel,
59
+ cmd_acknowledge_idle,
59
60
  cmd_allow_peer_talk,
60
61
  cmd_advanced,
61
62
  cmd_install_skill,
@@ -122,6 +123,7 @@ __all__ = [
122
123
  'cmd_remove_agent',
123
124
  'cmd_stuck_list',
124
125
  'cmd_stuck_cancel',
126
+ 'cmd_acknowledge_idle',
125
127
  'cmd_allow_peer_talk',
126
128
  'cmd_advanced',
127
129
  'cmd_install_skill',
@@ -273,6 +273,10 @@ def cmd_stuck_cancel(args: argparse.Namespace) -> dict[str, Any]:
273
273
  )
274
274
 
275
275
 
276
+ def cmd_acknowledge_idle(args: argparse.Namespace) -> dict[str, Any]:
277
+ return runtime.acknowledge_idle(Path(args.workspace).resolve(), team=args.team)
278
+
279
+
276
280
  def cmd_allow_peer_talk(args: argparse.Namespace) -> dict[str, Any]:
277
281
  return runtime.allow_peer_talk(Path(args.workspace).resolve(), args.agent_a, args.agent_b)
278
282
 
@@ -46,6 +46,7 @@ from team_agent.cli.commands import (
46
46
  cmd_remove_agent,
47
47
  cmd_stuck_list,
48
48
  cmd_stuck_cancel,
49
+ cmd_acknowledge_idle,
49
50
  cmd_allow_peer_talk,
50
51
  cmd_advanced,
51
52
  cmd_install_skill,
@@ -372,6 +373,12 @@ def main(argv: list[str] | None = None) -> None:
372
373
  add_json(p)
373
374
  p.set_defaults(func=cmd_stuck_cancel)
374
375
 
376
+ p = sub.add_parser("acknowledge-idle", help="Suppress idle-fallback reminders for this team for a bounded window (default 30 minutes)")
377
+ p.add_argument("team", nargs="?", help="Explicit team/session target when a workspace has multiple teams")
378
+ p.add_argument("--workspace", default=".")
379
+ add_json(p)
380
+ p.set_defaults(func=cmd_acknowledge_idle)
381
+
375
382
  p = sub.add_parser("install-skill", help=argparse.SUPPRESS)
376
383
  p.add_argument("--target", choices=["codex", "claude", "all"], default="codex")
377
384
  p.add_argument("--dest", help="Explicit destination directory; overrides --target")
@@ -422,7 +429,7 @@ def main(argv: list[str] | None = None) -> None:
422
429
  sub._choices_actions = [ # type: ignore[attr-defined]
423
430
  action for action in sub._choices_actions if action.help != argparse.SUPPRESS # type: ignore[attr-defined]
424
431
  ]
425
- sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,doctor}"
432
+ sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
426
433
 
427
434
  args = parser.parse_args(raw_argv)
428
435
  try:
@@ -265,6 +265,7 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
265
265
  detect_idle_fallbacks,
266
266
  )
267
267
  from team_agent.messaging.activity_detector import detect_compaction_degradation
268
+ from team_agent.messaging.session_drift import detect_session_drift
268
269
  from team_agent.state import load_runtime_state, save_runtime_state
269
270
  state = load_runtime_state(workspace)
270
271
  event_log = EventLog(workspace)
@@ -304,6 +305,19 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
304
305
  )
305
306
  if result.get("event") and result.get("event") != "compaction_threshold_crossed.none":
306
307
  compaction_results.append(result)
308
+ drift_results: list[dict[str, Any]] = []
309
+ for agent_id, agent_state in state.get("agents", {}).items():
310
+ if str(agent_state.get("provider") or "") != "codex":
311
+ continue
312
+ scrollback = str((captures.get(agent_id) or {}).get("scrollback") or "")
313
+ if not scrollback:
314
+ continue
315
+ drift = detect_session_drift(
316
+ workspace, state, event_log,
317
+ agent_id=agent_id, agent_state=agent_state, scrollback=scrollback,
318
+ )
319
+ if drift:
320
+ drift_results.append(drift)
307
321
  save_runtime_state(workspace, state)
308
322
  results = _collect_results_and_notify_watchers(workspace, event_log)
309
323
  return {
@@ -315,5 +329,6 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
315
329
  "idle_alerts": idle_alerts,
316
330
  "deadlock_alerts": deadlock_alerts,
317
331
  "compaction": compaction_results,
332
+ "session_drift": drift_results,
318
333
  "results": results,
319
334
  }
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from team_agent.events import EventLog
7
+
8
+ _TEAM_AGENT_BUFFER_PREFIXES = ("team-agent-send-", "team-agent-leader-receiver-", "team-agent-")
9
+
10
+
11
+ def _is_team_agent_buffer(name: str) -> bool:
12
+ return any(name.startswith(prefix) for prefix in _TEAM_AGENT_BUFFER_PREFIXES)
13
+
14
+
15
+ def cleanup_stale_team_agent_buffers(workspace: Path, event_log: EventLog, *, context: str) -> dict[str, Any]:
16
+ from team_agent.runtime import run_cmd
17
+ proc = run_cmd(["tmux", "list-buffers", "-F", "#{buffer_name}"], timeout=5)
18
+ if proc.returncode != 0:
19
+ event_log.write("paste_buffer_hygiene.list_failed", context=context, stderr=proc.stderr.strip()[:200])
20
+ return {"ok": False, "deleted": [], "reason": "list_buffers_failed"}
21
+ names = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
22
+ targets = [name for name in names if _is_team_agent_buffer(name)]
23
+ deleted: list[str] = []
24
+ for name in targets:
25
+ delete_proc = run_cmd(["tmux", "delete-buffer", "-b", name], timeout=5)
26
+ if delete_proc.returncode == 0:
27
+ deleted.append(name)
28
+ if deleted:
29
+ event_log.write(
30
+ "paste_buffer_hygiene.prevented_resume_injection",
31
+ context=context,
32
+ deleted_buffers=deleted,
33
+ scanned_count=len(names),
34
+ matched_count=len(targets),
35
+ )
36
+ return {"ok": True, "deleted": deleted, "scanned": len(names), "matched": len(targets)}
37
+
38
+
39
+ __all__ = ["cleanup_stale_team_agent_buffers"]
@@ -219,6 +219,8 @@ def _start_agent_unlocked(workspace: Path, agent_id: str, force: bool, open_disp
219
219
  reason="rollout_missing" if start_mode == "fresh_after_missing_rollout" else "session_id_missing",
220
220
  )
221
221
 
222
+ from team_agent.lifecycle.paste_buffer_hygiene import cleanup_stale_team_agent_buffers
223
+ cleanup_stale_team_agent_buffers(workspace, event_log, context=f"start_agent:{agent_id}")
222
224
  tmux_cmd, tmux_start_mode = _tmux_start_command_for_agent_window(session_name, agent_id, command)
223
225
  event_log.write(
224
226
  "start_agent.agent_start",
@@ -273,6 +275,7 @@ def _start_agent_unlocked(workspace: Path, agent_id: str, force: bool, open_disp
273
275
  )
274
276
  command = shell_command_for_agent(command_agent, workspace, mcp_config)
275
277
  start_mode = "fresh_after_missing_rollout" if missing_resume_rollout else "fresh"
278
+ cleanup_stale_team_agent_buffers(workspace, event_log, context=f"start_agent_fallback:{agent_id}")
276
279
  tmux_cmd, tmux_start_mode = _tmux_start_command_for_agent_window(session_name, agent_id, command)
277
280
  event_log.write(
278
281
  "start_agent.agent_start",
@@ -121,6 +121,16 @@ def _deliver_pending_messages(workspace: Path, state: dict[str, Any], event_log:
121
121
  for row in store.messages():
122
122
  if row["status"] not in {"pending", "accepted"}:
123
123
  continue
124
+ agent_state = state.get("agents", {}).get(row["recipient"]) or {}
125
+ if str(agent_state.get("status") or "").lower() == "busy":
126
+ event_log.write(
127
+ "send.deferred_busy",
128
+ message_id=row["message_id"],
129
+ sender=row.get("sender"),
130
+ recipient=row["recipient"],
131
+ reason="recipient_busy",
132
+ )
133
+ continue
124
134
  result = _deliver_pending_message(workspace, state, row["message_id"], wait_visible=True, timeout=30.0)
125
135
  if result.get("ok"):
126
136
  delivered.append(row["message_id"])
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from datetime import datetime, timezone
3
+ from datetime import datetime, timedelta, timezone
4
4
  from pathlib import Path
5
5
  from typing import Any
6
6
 
@@ -23,33 +23,116 @@ _UNDELIVERED_MESSAGE_STATUSES = {
23
23
  }
24
24
 
25
25
 
26
+ STABLE_IDLE_SECONDS = 120
27
+ FIRE_DEBOUNCE_SECONDS = 300
28
+ OBLIGATION_PENDING_MIN_AGE_SECONDS = 60
29
+
30
+
31
+ def _parse_iso(text: Any) -> datetime | None:
32
+ if not isinstance(text, str) or not text:
33
+ return None
34
+ try:
35
+ dt = datetime.fromisoformat(text.replace("Z", "+00:00"))
36
+ except ValueError:
37
+ return None
38
+ if dt.tzinfo is None:
39
+ dt = dt.replace(tzinfo=timezone.utc)
40
+ return dt
41
+
42
+
43
+ def record_team_progress(
44
+ state: dict[str, Any],
45
+ now: datetime | None = None,
46
+ *,
47
+ source: str = "",
48
+ owner_team_id: str | None = None,
49
+ ) -> None:
50
+ coordinator = state.setdefault("coordinator", {})
51
+ progress = coordinator.setdefault("team_last_progress_at", {})
52
+ key = owner_team_id or team_state_key(state)
53
+ if not key:
54
+ return
55
+ progress[key] = {
56
+ "at": (now or datetime.now(timezone.utc)).isoformat(),
57
+ "source": source,
58
+ }
59
+
60
+
61
+ def _team_last_progress_at(
62
+ state: dict[str, Any],
63
+ store: MessageStore,
64
+ owner_team_id: str,
65
+ ) -> datetime | None:
66
+ candidates: list[datetime] = []
67
+ coordinator = state.get("coordinator") or {}
68
+ explicit = (coordinator.get("team_last_progress_at") or {}).get(owner_team_id)
69
+ if isinstance(explicit, dict):
70
+ ts = _parse_iso(explicit.get("at"))
71
+ if ts:
72
+ candidates.append(ts)
73
+ elif isinstance(explicit, str):
74
+ ts = _parse_iso(explicit)
75
+ if ts:
76
+ candidates.append(ts)
77
+ health = store.agent_health(owner_team_id=owner_team_id)
78
+ for row in health.values():
79
+ ts = _parse_iso(row.get("last_output_at"))
80
+ if ts:
81
+ candidates.append(ts)
82
+ return max(candidates) if candidates else None
83
+
84
+
85
+ def _team_last_idle_fallback_fire_at(state: dict[str, Any], owner_team_id: str) -> datetime | None:
86
+ coordinator = state.get("coordinator") or {}
87
+ fires = coordinator.get("team_last_idle_fallback_fire_at") or {}
88
+ return _parse_iso(fires.get(owner_team_id))
89
+
90
+
91
+ def _record_idle_fallback_fire(state: dict[str, Any], owner_team_id: str, now: datetime) -> None:
92
+ coordinator = state.setdefault("coordinator", {})
93
+ fires = coordinator.setdefault("team_last_idle_fallback_fire_at", {})
94
+ fires[owner_team_id] = now.isoformat()
95
+
96
+
26
97
  def _team_undelivered_obligations(
27
98
  state: dict[str, Any],
28
99
  store: MessageStore,
29
100
  owner_team_id: str,
30
101
  active_task_statuses: set[str],
102
+ *,
103
+ now: datetime | None = None,
31
104
  ) -> list[dict[str, Any]]:
105
+ now = now or datetime.now(timezone.utc)
106
+ min_age = timedelta(seconds=OBLIGATION_PENDING_MIN_AGE_SECONDS)
32
107
  obligations: list[dict[str, Any]] = []
33
108
  for message in store.messages(owner_team_id=owner_team_id):
34
- if message.get("status") in _UNDELIVERED_MESSAGE_STATUSES:
35
- obligations.append(
36
- {
37
- "kind": "undelivered_message",
38
- "message_id": message.get("message_id"),
39
- "recipient": message.get("recipient"),
40
- "status": message.get("status"),
41
- }
42
- )
109
+ if message.get("status") not in _UNDELIVERED_MESSAGE_STATUSES:
110
+ continue
111
+ created_at = _parse_iso(message.get("created_at"))
112
+ if created_at and (now - created_at) < min_age:
113
+ continue
114
+ obligations.append(
115
+ {
116
+ "kind": "undelivered_message",
117
+ "message_id": message.get("message_id"),
118
+ "recipient": message.get("recipient"),
119
+ "status": message.get("status"),
120
+ }
121
+ )
43
122
  for watcher in store.retryable_result_watchers():
44
- if watcher.get("status") in {"pending", "notify_failed"}:
45
- obligations.append(
46
- {
47
- "kind": "pending_result_watcher",
48
- "watcher_id": watcher.get("watcher_id"),
49
- "task_id": watcher.get("task_id"),
50
- "agent_id": watcher.get("agent_id"),
51
- }
52
- )
123
+ if watcher.get("status") not in {"pending", "notify_failed"}:
124
+ continue
125
+ created_at = _parse_iso(watcher.get("created_at"))
126
+ if created_at and (now - created_at) < min_age:
127
+ continue
128
+ obligations.append(
129
+ {
130
+ "kind": "pending_result_watcher",
131
+ "watcher_id": watcher.get("watcher_id"),
132
+ "task_id": watcher.get("task_id"),
133
+ "agent_id": watcher.get("agent_id"),
134
+ }
135
+ )
53
136
  for task in state.get("tasks", []):
54
137
  if task.get("status", "pending") in active_task_statuses and task.get("assignee"):
55
138
  obligations.append(
@@ -118,11 +201,33 @@ def detect_idle_fallbacks(
118
201
  )
119
202
  now = now or datetime.now(timezone.utc)
120
203
  owner_team_id = team_state_key(state)
121
- obligations = _team_undelivered_obligations(state, store, owner_team_id, _ACTIVE_TASK_STATUSES)
204
+ obligations = _team_undelivered_obligations(state, store, owner_team_id, _ACTIVE_TASK_STATUSES, now=now)
122
205
  if not obligations:
123
206
  return []
124
207
  all_idle, idle_workers = _all_workers_idle(state, store, owner_team_id)
125
208
  if not all_idle:
209
+ record_team_progress(state, now, source="all_workers_idle:false", owner_team_id=owner_team_id)
210
+ save_runtime_state(workspace, state)
211
+ return []
212
+ last_progress = _team_last_progress_at(state, store, owner_team_id)
213
+ if last_progress and (now - last_progress) < timedelta(seconds=STABLE_IDLE_SECONDS):
214
+ event_log.write(
215
+ "coordinator.idle_fallback_skipped",
216
+ reason="stable_idle_window",
217
+ team=owner_team_id,
218
+ stable_idle_seconds=STABLE_IDLE_SECONDS,
219
+ elapsed_seconds=int((now - last_progress).total_seconds()),
220
+ )
221
+ return []
222
+ last_fire = _team_last_idle_fallback_fire_at(state, owner_team_id)
223
+ if last_fire and (now - last_fire) < timedelta(seconds=FIRE_DEBOUNCE_SECONDS):
224
+ event_log.write(
225
+ "coordinator.idle_fallback_skipped",
226
+ reason="fire_debounce",
227
+ team=owner_team_id,
228
+ fire_debounce_seconds=FIRE_DEBOUNCE_SECONDS,
229
+ elapsed_seconds=int((now - last_fire).total_seconds()),
230
+ )
126
231
  return []
127
232
  spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
128
233
  spec = load_spec(spec_path) if spec_path.exists() else {}
@@ -137,6 +242,7 @@ def detect_idle_fallbacks(
137
242
  alerts.append({"agent_id": agent_id, "alert_type": "idle_fallback", "obligations": obligations})
138
243
  if not alerts:
139
244
  return []
245
+ _record_idle_fallback_fire(state, owner_team_id, now)
140
246
  save_runtime_state(workspace, state)
141
247
  content = (
142
248
  "There is still unfinished work. Continue coordinating, deliver a result, "
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from team_agent.events import EventLog
6
+ from team_agent.state import worker_sender_bypasses_owner_gate
7
+
8
+
9
+ def apply_worker_sender_bypass(
10
+ state: dict[str, Any],
11
+ sender: str | None,
12
+ target: Any,
13
+ task_id: str | None,
14
+ event_log: EventLog,
15
+ ) -> bool:
16
+ via = worker_sender_bypasses_owner_gate(state, sender)
17
+ if not via:
18
+ return False
19
+ event_log.write(
20
+ "send.bypassed_owner_gate_worker_sender",
21
+ sender=sender,
22
+ env_team_agent_id=via,
23
+ target=target if isinstance(target, str) else None,
24
+ task_id=task_id,
25
+ )
26
+ return True
27
+
28
+
29
+ __all__ = ["apply_worker_sender_bypass"]
@@ -311,6 +311,16 @@ def _suppression_clear_reason(
311
311
  agent_id: str,
312
312
  entry: dict[str, Any],
313
313
  ) -> str | None:
314
+ if entry.get("manual_acknowledge"):
315
+ try:
316
+ expires_at = datetime.fromisoformat(str(entry.get("expires_at")))
317
+ except ValueError:
318
+ return "invalid_suppression_timestamp"
319
+ if expires_at.tzinfo is None:
320
+ expires_at = expires_at.replace(tzinfo=timezone.utc)
321
+ if datetime.now(timezone.utc) < expires_at:
322
+ return None
323
+ return "manual_acknowledge_expired"
314
324
  previous = entry.get("snapshot") if isinstance(entry.get("snapshot"), dict) else {}
315
325
  current = _agent_alert_snapshot(state, store, agent_id)
316
326
  if current.get("assigned_task_ids") != previous.get("assigned_task_ids"):
@@ -85,11 +85,13 @@ def _send_message_unlocked(
85
85
  return ambiguous
86
86
  state = select_runtime_state(workspace, team)
87
87
  gate = check_team_owner(state)
88
- if gate:
89
- return gate
90
88
  spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
91
89
  spec = load_spec(spec_path)
92
90
  event_log = EventLog(workspace)
91
+ if gate:
92
+ from team_agent.messaging.owner_bypass import apply_worker_sender_bypass
93
+ if not apply_worker_sender_bypass(state, sender, target, task_id, event_log):
94
+ return gate
93
95
  owner_team_id = team_state_key(state)
94
96
  leader_id = _leader_id(state, spec)
95
97
 
@@ -174,6 +176,11 @@ def _send_single_message_unlocked(
174
176
  if _is_leader_target(target, leader_id) and not _is_leader_sender(sender, leader_id):
175
177
  return _send_to_leader_receiver(workspace, state, leader_id, content, task_id, sender, requires_ack, event_log)
176
178
 
179
+ from team_agent.messaging.session_drift import session_drift_refusal
180
+ drift = session_drift_refusal(state, target, leader_id, sender, task_id, event_log)
181
+ if drift:
182
+ return drift
183
+
177
184
  if task_id and route_task_id:
178
185
  task = _find_task(state.get("tasks", []), task_id)
179
186
  if task.get("human_confirmation") and not task.get("human_confirmed"):
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from team_agent.events import EventLog
9
+
10
+ _UUID = r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
11
+ _RESUME_THREAD_RE = re.compile(
12
+ rf"(?:Switched to thread|resume|thread)\s+({_UUID})",
13
+ re.IGNORECASE,
14
+ )
15
+
16
+
17
+ def extract_thread_id_from_scrollback(scrollback: str) -> str | None:
18
+ if not scrollback:
19
+ return None
20
+ matches = _RESUME_THREAD_RE.findall(scrollback)
21
+ if not matches:
22
+ return None
23
+ return matches[-1].lower()
24
+
25
+
26
+ def detect_session_drift(
27
+ workspace: Path,
28
+ state: dict[str, Any],
29
+ event_log: EventLog,
30
+ *,
31
+ agent_id: str,
32
+ agent_state: dict[str, Any],
33
+ scrollback: str,
34
+ ) -> dict[str, Any] | None:
35
+ provider = str(agent_state.get("provider") or "").lower()
36
+ if provider != "codex":
37
+ return None
38
+ stored = str(agent_state.get("session_id") or "").strip()
39
+ if not stored:
40
+ return None
41
+ if str(agent_state.get("status") or "").lower() == "session_drift":
42
+ return None
43
+ actual = extract_thread_id_from_scrollback(scrollback)
44
+ if not actual:
45
+ return None
46
+ if actual.lower() == stored.lower():
47
+ return None
48
+ now = datetime.now(timezone.utc).isoformat()
49
+ event = event_log.write(
50
+ "coordinator.session_drift_detected",
51
+ agent_id=agent_id,
52
+ stored_session_id=stored,
53
+ actual_thread_id=actual,
54
+ status="session_drift",
55
+ provider=provider,
56
+ ts=now,
57
+ remediation="team-agent reset-agent --discard-session <agent>",
58
+ )
59
+ agent_state["status"] = "session_drift"
60
+ agent_state["session_drift"] = {
61
+ "stored_session_id": stored,
62
+ "actual_thread_id": actual,
63
+ "detected_at": now,
64
+ "remediation": "team-agent reset-agent --discard-session <agent>",
65
+ }
66
+ return event
67
+
68
+
69
+ def session_drift_refusal(state, target, leader_id, sender, task_id, event_log):
70
+ if not target or target == leader_id or target == "*":
71
+ return None
72
+ rs = (state.get("agents") or {}).get(target) or {}
73
+ if str(rs.get("status") or "").lower() != "session_drift":
74
+ return None
75
+ info = rs.get("session_drift") or {}
76
+ event_log.write(
77
+ "send.refused_session_drift",
78
+ target=target,
79
+ sender=sender,
80
+ task_id=task_id,
81
+ stored_session_id=info.get("stored_session_id"),
82
+ actual_thread_id=info.get("actual_thread_id"),
83
+ )
84
+ return {
85
+ "ok": False,
86
+ "status": "refused",
87
+ "reason": "session_drift",
88
+ "to": target,
89
+ "action": f"team-agent reset-agent --discard-session {target}",
90
+ "session_drift": info,
91
+ }
92
+
93
+
94
+ __all__ = ["detect_session_drift", "extract_thread_id_from_scrollback", "session_drift_refusal"]
@@ -220,6 +220,7 @@ from team_agent.state import (
220
220
  save_runtime_state,
221
221
  save_team_scoped_state,
222
222
  select_runtime_state,
223
+ team_state_key,
223
224
  write_spec,
224
225
  write_team_state,
225
226
  )
@@ -578,20 +579,27 @@ def remove_agent(
578
579
  return lifecycle_remove_agent(workspace, agent_id, from_spec=from_spec, confirm=confirm, force=force, team=team)
579
580
 
580
581
 
581
- def acknowledge_idle(workspace: Path, agent_id: str) -> dict[str, Any]:
582
+ def acknowledge_idle(workspace: Path, agent_id: str | None = None, *, team: str | None = None) -> dict[str, Any]:
582
583
  with _runtime_lock(workspace, "acknowledge-idle"):
583
- state = load_runtime_state(workspace)
584
+ try:
585
+ state = select_runtime_state(workspace, team)
586
+ except Exception as exc:
587
+ return {"ok": False, "status": "refused", "reason": "team_target_unresolved", "team": team, "error": str(exc)}
584
588
  gate = check_team_owner(state)
585
589
  if gate:
586
590
  return gate
587
- now = datetime.now(timezone.utc).isoformat()
588
- coordinator = state.setdefault("coordinator", {})
589
- ack = coordinator.setdefault("idle_acknowledged", {})
590
- ack[agent_id] = {"acknowledged_at": now}
591
- save_runtime_state(workspace, state)
592
- EventLog(workspace).write("coordinator.idle_acknowledged", agent_id=agent_id, acknowledged_at=now)
593
- return {"ok": True, "agent_id": agent_id, "acknowledged_at": now}
594
-
591
+ now_dt = datetime.now(timezone.utc); now = now_dt.isoformat()
592
+ ttl_seconds = 1800
593
+ expires_at = (now_dt + timedelta(seconds=ttl_seconds)).isoformat()
594
+ owner_team_id = team_state_key(state); coordinator = state.setdefault("coordinator", {})
595
+ coordinator.setdefault("idle_acknowledged", {})[owner_team_id] = {"acknowledged_at": now, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
596
+ team_suppressions = coordinator.setdefault("suppressed_idle_alerts", {}).setdefault(owner_team_id, {})
597
+ entry = {"suppressed_at": now, "suppressed_by": "manual_acknowledge", "manual_acknowledge": True, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
598
+ for worker_id in state.get("agents", {}):
599
+ team_suppressions.setdefault(worker_id, {})["idle_fallback"] = dict(entry)
600
+ save_team_scoped_state(workspace, state)
601
+ EventLog(workspace).write("coordinator.idle_acknowledged", agent_id=agent_id, team=owner_team_id, acknowledged_at=now, expires_at=expires_at, ttl_seconds=ttl_seconds)
602
+ return {"ok": True, "team": owner_team_id, "agent_id": agent_id, "acknowledged_at": now, "expires_at": expires_at, "ttl_seconds": ttl_seconds}
595
603
 
596
604
  def takeover(workspace: Path, team: str | None = None, confirm: bool = False) -> dict[str, Any]:
597
605
  if not confirm:
@@ -193,6 +193,20 @@ def check_team_owner(state: dict[str, Any]) -> dict[str, Any] | None:
193
193
  }
194
194
 
195
195
 
196
+ def worker_sender_bypasses_owner_gate(state: dict[str, Any], sender: str | None) -> str | None:
197
+ if not sender:
198
+ return None
199
+ leader_id = (state.get("leader") or {}).get("id") or "leader"
200
+ if sender == leader_id or sender in {"leader", "Leader"}:
201
+ return None
202
+ if sender not in (state.get("agents") or {}):
203
+ return None
204
+ env_agent_id = os.environ.get("TEAM_AGENT_ID") or ""
205
+ if env_agent_id and env_agent_id != sender:
206
+ return None
207
+ return env_agent_id or sender
208
+
209
+
196
210
  def populate_team_owner_from_env(state: dict[str, Any], source: str = "autopopulate") -> dict[str, Any] | None:
197
211
  if state.get("team_owner"):
198
212
  return state["team_owner"]