@team-agent/installer 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/schemas/team.schema.json +6 -0
  3. package/src/team_agent/approvals/runtime_prompts.py +1 -1
  4. package/src/team_agent/cli/commands.py +122 -6
  5. package/src/team_agent/cli/parser.py +42 -1
  6. package/src/team_agent/coordinator/__main__.py +21 -2
  7. package/src/team_agent/coordinator/lifecycle.py +11 -0
  8. package/src/team_agent/diagnose/orphan_cleanup.py +364 -0
  9. package/src/team_agent/events.py +47 -0
  10. package/src/team_agent/launch/core.py +2 -1
  11. package/src/team_agent/leader/__init__.py +273 -60
  12. package/src/team_agent/lifecycle/agents.py +54 -2
  13. package/src/team_agent/lifecycle/operations.py +87 -9
  14. package/src/team_agent/lifecycle/start.py +1 -1
  15. package/src/team_agent/message_store/core.py +8 -7
  16. package/src/team_agent/message_store/leader_notification_log.py +132 -0
  17. package/src/team_agent/message_store/result_watchers.py +144 -1
  18. package/src/team_agent/message_store/schema.py +31 -2
  19. package/src/team_agent/messaging/delivery.py +293 -1
  20. package/src/team_agent/messaging/idle_alerts.py +109 -9
  21. package/src/team_agent/messaging/leader.py +179 -10
  22. package/src/team_agent/messaging/leader_api_errors.py +216 -0
  23. package/src/team_agent/messaging/leader_panes.py +393 -23
  24. package/src/team_agent/messaging/result_delivery.py +219 -4
  25. package/src/team_agent/messaging/results.py +12 -21
  26. package/src/team_agent/messaging/scheduler.py +24 -2
  27. package/src/team_agent/messaging/send.py +21 -26
  28. package/src/team_agent/messaging/tmux_io.py +153 -23
  29. package/src/team_agent/messaging/tmux_prompt.py +87 -0
  30. package/src/team_agent/messaging/trust_auto_answer.py +44 -0
  31. package/src/team_agent/restart/orchestration.py +207 -4
  32. package/src/team_agent/runtime.py +7 -7
  33. package/src/team_agent/rust_core.py +157 -3
  34. package/src/team_agent/sessions/capture.py +65 -15
  35. package/src/team_agent/spec.py +59 -0
  36. package/src/team_agent/state.py +153 -10
  37. package/src/team_agent/status/inbox.py +33 -3
  38. package/src/team_agent/status/queries.py +32 -1
  39. package/src/team_agent/watch/__init__.py +145 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@team-agent/installer",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "npx installer for Team Agent",
5
5
  "keywords": [
6
6
  "codex",
@@ -72,6 +72,12 @@
72
72
  "startup_order": {
73
73
  "type": "array",
74
74
  "items": { "type": "string" }
75
+ },
76
+ "auto_trust_own_workspace": {
77
+ "type": "boolean",
78
+ "default": false,
79
+ "deprecated": true,
80
+ "description": "DEPRECATED: use env TEAM_AGENT_AUTO_TRUST_OWN_WORKSPACE per session. Will be removed in 0.3.0."
75
81
  }
76
82
  }
77
83
  },
@@ -64,7 +64,7 @@ def handle_provider_startup_prompts(workspace: Path, state: dict[str, Any], even
64
64
  continue
65
65
  agent_state["startup_prompt_check_count"] = check_count + 1
66
66
  adapter = get_adapter(agent_state["provider"])
67
- for prompt_event in adapter.handle_startup_prompts(session_name, window, checks=1, sleep_s=0.0):
67
+ for prompt_event in adapter.handle_startup_prompts(session_name, window, checks=20, sleep_s=0.5):
68
68
  event_log.write(
69
69
  "runtime.startup_prompt_handled",
70
70
  agent_id=agent_id,
@@ -88,9 +88,25 @@ def cmd_settle(args: argparse.Namespace) -> dict[str, Any]:
88
88
 
89
89
 
90
90
  def cmd_status(args: argparse.Namespace) -> dict[str, Any]:
91
- if args.json:
92
- return runtime.status(Path(args.workspace).resolve(), as_json=True, compact=not args.detail)
93
- return runtime.format_status(Path(args.workspace).resolve(), args.agent)
91
+ if getattr(args, "summary", False) is True:
92
+ if getattr(args, "json", False) is True:
93
+ raise TeamAgentError("--summary and --json are mutually exclusive")
94
+ if getattr(args, "agent", None):
95
+ raise TeamAgentError("status --summary does not accept an agent argument")
96
+ data = runtime.status(Path(args.workspace).resolve(), as_json=True, compact=False)
97
+ return _format_status_summary(data)
98
+ if getattr(args, "json", False) is True:
99
+ return runtime.status(Path(args.workspace).resolve(), as_json=True, compact=not (getattr(args, "detail", False) is True))
100
+ return runtime.format_status(Path(args.workspace).resolve(), getattr(args, "agent", None))
101
+
102
+
103
+ def cmd_watch(args: argparse.Namespace) -> None:
104
+ from team_agent.watch import run_watch
105
+ try:
106
+ run_watch(Path(args.workspace).resolve(), team=getattr(args, "team", None))
107
+ except KeyboardInterrupt:
108
+ raise SystemExit(0)
109
+ raise SystemExit(0)
94
110
 
95
111
 
96
112
  def cmd_approvals(args: argparse.Namespace) -> dict[str, Any]:
@@ -119,9 +135,10 @@ def cmd_peek(args: argparse.Namespace) -> dict[str, Any]:
119
135
 
120
136
 
121
137
  def cmd_inbox(args: argparse.Namespace) -> dict[str, Any]:
138
+ since = getattr(args, "since", None)
122
139
  if args.json:
123
- return runtime.inbox(Path(args.workspace).resolve(), args.agent, limit=args.limit)
124
- return runtime.format_inbox(Path(args.workspace).resolve(), args.agent, limit=args.limit)
140
+ return runtime.inbox(Path(args.workspace).resolve(), args.agent, limit=args.limit, since=since)
141
+ return runtime.format_inbox(Path(args.workspace).resolve(), args.agent, limit=args.limit, since=since)
125
142
 
126
143
 
127
144
  def cmd_sessions(args: argparse.Namespace) -> dict[str, Any]:
@@ -136,6 +153,14 @@ def cmd_takeover(args: argparse.Namespace) -> dict[str, Any]:
136
153
  return runtime.takeover(Path(args.workspace).resolve(), team=args.team, confirm=args.confirm)
137
154
 
138
155
 
156
+ def cmd_claim_leader(args: argparse.Namespace) -> dict[str, Any]:
157
+ return runtime.claim_leader(Path(args.workspace).resolve(), team=args.team, confirm=args.confirm)
158
+
159
+
160
+ def cmd_identity(args: argparse.Namespace) -> dict[str, Any]:
161
+ return runtime.leader_identity(Path(args.workspace).resolve(), team=args.team)
162
+
163
+
139
164
  def cmd_send(args: argparse.Namespace) -> dict[str, Any]:
140
165
  target = _send_target(args)
141
166
  return runtime.send_message(
@@ -190,11 +215,102 @@ def cmd_validate_result(args: argparse.Namespace) -> dict[str, Any]:
190
215
  return {"ok": True, "task_id": envelope["task_id"], "agent_id": envelope["agent_id"], "status": envelope["status"]}
191
216
 
192
217
 
193
- def cmd_doctor(args: argparse.Namespace) -> dict[str, Any]:
218
+ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
219
+ gate = getattr(args, "gate", None)
220
+ if getattr(args, "fix", False) is True and not gate:
221
+ raise TeamAgentError("--fix requires --gate")
222
+ if isinstance(gate, str) and gate:
223
+ from team_agent.diagnose.orphan_cleanup import orphan_gate
224
+ if gate != "orphans":
225
+ raise TeamAgentError(f"unknown doctor gate: {gate}")
226
+ return orphan_gate(fix=bool(getattr(args, "fix", False)), confirm=bool(getattr(args, "confirm", False)))
227
+ if getattr(args, "cleanup_orphans", False):
228
+ from team_agent.diagnose.orphan_cleanup import cleanup_orphan_coordinators, format_cleanup_orphans
229
+ result = cleanup_orphan_coordinators(confirm=bool(getattr(args, "confirm", False)))
230
+ if args.json:
231
+ return result
232
+ return format_cleanup_orphans(result)
194
233
  spec = Path(args.spec).resolve() if args.spec else None
195
234
  return runtime.doctor(spec)
196
235
 
197
236
 
237
+ def _format_status_summary(data: dict[str, Any]) -> str:
238
+ coordinator = data.get("coordinator") or {}
239
+ receiver = data.get("leader_receiver") or {}
240
+ agents = data.get("agents") or {}
241
+ health = data.get("agent_health") or {}
242
+ latest = (data.get("latest_results") or [{}])[0] if data.get("latest_results") else None
243
+ counts = _agent_summary_counts(agents, health)
244
+ agents_line = (
245
+ f"agents: {len(agents)} — running={counts['running']} busy={counts['busy']} "
246
+ f"idle={counts['idle']} stopped={counts['stopped']} failed={counts['failed']} "
247
+ f"unknown={counts['unknown']}"
248
+ )
249
+ # C3 (cr verdict, 2026-05-27): append a (N interacted, M never) marker
250
+ # only when at least one worker has a valid first_send_at stamp. When N
251
+ # is zero, the agents line stays byte-identical to the pre-Route-B
252
+ # output so the Gap 18a triage contract (strict five-line shape with
253
+ # exact line[2] string) remains unchanged.
254
+ interacted_count, never_count = _interaction_counts(agents)
255
+ if interacted_count > 0:
256
+ agents_line = f"{agents_line} ({interacted_count} interacted, {never_count} never)"
257
+ return "\n".join([
258
+ f"coordinator: {coordinator.get('status') or 'stopped'} schema_ok={bool(coordinator.get('schema_ok'))} tmux={bool(data.get('tmux_session_present'))}",
259
+ f"receiver: {receiver.get('pane_id') or '-'} cmd={receiver.get('pane_current_command') or receiver.get('current_command') or '-'}",
260
+ agents_line,
261
+ f"queued: {len(data.get('queued_messages') or [])} mailbox messages awaiting delivery",
262
+ _latest_result_line(latest),
263
+ ])
264
+
265
+
266
+ def _interaction_counts(agents: dict[str, Any]) -> tuple[int, int]:
267
+ """Return (interacted, never_interacted) over the agents dict. An agent is
268
+ interacted when its `interacted` field (added by status.queries.status) is
269
+ a non-empty string other than the literal "never". This intentionally
270
+ sources from the enriched per-status interacted field rather than re-
271
+ parsing first_send_at so the summary stays a derived view."""
272
+ interacted = 0
273
+ never = 0
274
+ for entry in agents.values():
275
+ marker = (entry or {}).get("interacted") if isinstance(entry, dict) else None
276
+ if isinstance(marker, str) and marker and marker != "never":
277
+ interacted += 1
278
+ else:
279
+ never += 1
280
+ return interacted, never
281
+
282
+
283
+ def _agent_summary_counts(agents: dict[str, Any], health: dict[str, Any]) -> dict[str, int]:
284
+ counts = dict.fromkeys(("running", "busy", "idle", "stopped", "failed", "unknown"), 0)
285
+ for agent_id, agent in agents.items():
286
+ raw = str((agent or {}).get("status") or "").lower()
287
+ hstatus = str((health.get(agent_id) or {}).get("status") or "").lower()
288
+ if raw in {"failed", "error"} or hstatus in {"failed", "error"}:
289
+ counts["failed"] += 1
290
+ elif raw in {"stopped", "done"} or hstatus == "done":
291
+ counts["stopped"] += 1
292
+ elif raw == "busy" or hstatus in {"running", "working"}:
293
+ counts["busy"] += 1
294
+ elif hstatus == "idle":
295
+ counts["idle"] += 1
296
+ elif raw in {"blocked", "awaiting_approval", "interrupted", "missing", "stuck", "uncertain"} or hstatus in {
297
+ "blocked", "awaiting_approval", "interrupted", "missing", "stuck", "uncertain"
298
+ }:
299
+ counts["unknown"] += 1
300
+ elif raw == "running":
301
+ counts["running"] += 1
302
+ else:
303
+ counts["unknown"] += 1
304
+ return counts
305
+
306
+
307
+ def _latest_result_line(result: dict[str, Any] | None) -> str:
308
+ if not result:
309
+ return "latest result: none"
310
+ summary = str(result.get("summary") or "").replace("\n", " ")[:80]
311
+ return f"latest result: {result.get('agent_id') or '-'} -> {summary or '-'} @ {runtime._age_text(result.get('created_at'))}"
312
+
313
+
198
314
  def cmd_shutdown(args: argparse.Namespace) -> dict[str, Any]:
199
315
  return runtime.shutdown(Path(args.workspace).resolve(), keep_logs=args.keep_logs, team=args.team)
200
316
 
@@ -24,12 +24,15 @@ from team_agent.cli.commands import (
24
24
  cmd_wait_ready,
25
25
  cmd_settle,
26
26
  cmd_status,
27
+ cmd_watch,
27
28
  cmd_approvals,
28
29
  cmd_peek,
29
30
  cmd_inbox,
30
31
  cmd_sessions,
31
32
  cmd_attach_leader,
32
33
  cmd_takeover,
34
+ cmd_claim_leader,
35
+ cmd_identity,
33
36
  cmd_send,
34
37
  cmd_collect,
35
38
  cmd_diagnose,
@@ -180,9 +183,15 @@ def main(argv: list[str] | None = None) -> None:
180
183
  p.add_argument("agent", nargs="?")
181
184
  p.add_argument("--workspace", default=".")
182
185
  p.add_argument("--detail", action="store_true", help="Include full raw runtime state in --json output")
186
+ p.add_argument("--summary", action="store_true", help="Emit five-line human-readable triage summary")
183
187
  add_json(p)
184
188
  p.set_defaults(func=cmd_status)
185
189
 
190
+ p = sub.add_parser("watch", help="Watch leader-visible team events")
191
+ p.add_argument("--workspace", default=".")
192
+ p.add_argument("--team", help="Explicit team/session selector when a workspace has multiple teams")
193
+ p.set_defaults(func=cmd_watch)
194
+
186
195
  p = sub.add_parser("approvals", help="Show structured pending worker approval prompts")
187
196
  p.add_argument("agent", nargs="?")
188
197
  p.add_argument("--workspace", default=".")
@@ -209,6 +218,12 @@ def main(argv: list[str] | None = None) -> None:
209
218
  p.add_argument("agent")
210
219
  p.add_argument("--workspace", default=".")
211
220
  p.add_argument("--limit", type=int, default=20)
221
+ p.add_argument(
222
+ "--since",
223
+ help="ISO 8601 timestamp; only show messages created at-or-after this time. "
224
+ "Use the timestamp from claim-leader's inbox_hint to retrieve messages "
225
+ "missed during a prior ambiguous-leader state.",
226
+ )
212
227
  add_json(p)
213
228
  p.set_defaults(func=cmd_inbox)
214
229
 
@@ -231,6 +246,19 @@ def main(argv: list[str] | None = None) -> None:
231
246
  add_json(p)
232
247
  p.set_defaults(func=cmd_takeover)
233
248
 
249
+ p = sub.add_parser("claim-leader", help="Claim this pane as leader after ambiguous leader recovery")
250
+ p.add_argument("--workspace", default=".")
251
+ p.add_argument("--team", help="Explicit team/session selector when a workspace has multiple teams")
252
+ p.add_argument("--confirm", action="store_true", help="Apply the claim; without this, show a dry-run summary")
253
+ add_json(p)
254
+ p.set_defaults(func=cmd_claim_leader)
255
+
256
+ p = sub.add_parser("identity", help="Show leader identity diagnostics")
257
+ p.add_argument("--workspace", default=".")
258
+ p.add_argument("--team", help="Explicit team/session selector when a workspace has multiple teams")
259
+ add_json(p)
260
+ p.set_defaults(func=cmd_identity)
261
+
234
262
  p = sub.add_parser(
235
263
  "send",
236
264
  help="Send a message to an agent, task assignee, or attached leader",
@@ -289,6 +317,19 @@ def main(argv: list[str] | None = None) -> None:
289
317
 
290
318
  p = sub.add_parser("doctor", help="Check local dependencies, providers, auth hints, tmux, and MCP")
291
319
  p.add_argument("spec", nargs="?")
320
+ p.add_argument("--gate", choices=["orphans"], help="Run a CI-friendly doctor gate")
321
+ p.add_argument("--fix", action="store_true", help="With --gate orphans: apply the gate fix")
322
+ p.add_argument(
323
+ "--cleanup-orphans",
324
+ action="store_true",
325
+ help="Scan for orphan team_agent.coordinator processes pointing at non-existent or "
326
+ "ephemeral-tempdir workspaces (dry-run unless --confirm is also passed).",
327
+ )
328
+ p.add_argument(
329
+ "--confirm",
330
+ action="store_true",
331
+ help="With --cleanup-orphans: send SIGTERM to each orphan (default is dry-run).",
332
+ )
292
333
  add_json(p)
293
334
  p.set_defaults(func=cmd_doctor)
294
335
 
@@ -429,7 +470,7 @@ def main(argv: list[str] | None = None) -> None:
429
470
  sub._choices_actions = [ # type: ignore[attr-defined]
430
471
  action for action in sub._choices_actions if action.help != argparse.SUPPRESS # type: ignore[attr-defined]
431
472
  ]
432
- sub.metavar = "{codex,claude,quick-start,send,status,approvals,inbox,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
473
+ sub.metavar = "{codex,claude,quick-start,send,status,watch,approvals,inbox,takeover,claim-leader,identity,shutdown,restart,start-agent,stop-agent,reset-agent,add-agent,fork-agent,remove-agent,stuck-list,stuck-cancel,acknowledge-idle,doctor}"
433
474
 
434
475
  args = parser.parse_args(raw_argv)
435
476
  try:
@@ -38,7 +38,23 @@ def main(argv: list[str] | None = None) -> None:
38
38
  signal.signal(signal.SIGINT, _stop)
39
39
 
40
40
  interval = args.tick_interval if args.tick_interval is not None else _tick_interval(workspace)
41
+ initial_ppid = os.getppid()
41
42
  while not STOP:
43
+ # Stage 14 (Gap 37b) — orphan self-detection. If our original parent (test harness,
44
+ # shell, or supervisor) died, our ppid is reparented to 1 (or to a launchd shim on
45
+ # macOS). When that happens AND the workspace no longer exists on disk, we are an
46
+ # orphan from a torn-down test environment and must self-terminate so we don't
47
+ # accumulate (today's evidence: 35 orphans pointing at /var/folders/...team-agent-
48
+ # watcher-dedupe-* paths long since cleaned up).
49
+ current_ppid = os.getppid()
50
+ if current_ppid != initial_ppid and current_ppid == 1 and not workspace.exists():
51
+ event_log.write(
52
+ "coordinator.orphan_self_terminate",
53
+ initial_ppid=initial_ppid,
54
+ current_ppid=current_ppid,
55
+ workspace=str(workspace),
56
+ )
57
+ break
42
58
  result = runtime.coordinator_tick(workspace)
43
59
  if result.get("stop") or args.once:
44
60
  break
@@ -46,18 +62,21 @@ def main(argv: list[str] | None = None) -> None:
46
62
  event_log.write("coordinator.exit", stop=STOP)
47
63
 
48
64
 
65
+ DEFAULT_TICK_INTERVAL_SEC = 5.0 # Stage 14 (Gap 36c) — bumped from 2.0 (2.5x less CPU)
66
+
67
+
49
68
  def _tick_interval(workspace: Path) -> float:
50
69
  state = load_runtime_state(workspace)
51
70
  spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
52
71
  if spec_path.exists():
53
72
  try:
54
73
  spec = load_spec(spec_path)
55
- return float(spec.get("runtime", {}).get("tick_interval_sec", 2))
74
+ return float(spec.get("runtime", {}).get("tick_interval_sec", DEFAULT_TICK_INTERVAL_SEC))
56
75
  except Exception:
57
76
  pass
58
77
  # Ensure schema exists even before launch; this makes doctor/tick diagnostics deterministic.
59
78
  MessageStore(workspace)
60
- return 2.0
79
+ return DEFAULT_TICK_INTERVAL_SEC
61
80
 
62
81
 
63
82
  if __name__ == "__main__":
@@ -265,6 +265,7 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
265
265
  detect_idle_fallbacks,
266
266
  )
267
267
  from team_agent.messaging.activity_detector import detect_compaction_degradation
268
+ from team_agent.messaging.leader_api_errors import detect_leader_api_errors
268
269
  from team_agent.messaging.session_drift import detect_session_drift
269
270
  from team_agent.state import load_runtime_state, save_runtime_state
270
271
  state = load_runtime_state(workspace)
@@ -318,8 +319,17 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
318
319
  )
319
320
  if drift:
320
321
  drift_results.append(drift)
322
+ api_errors = detect_leader_api_errors(workspace, state, store, event_log)
321
323
  save_runtime_state(workspace, state)
322
324
  results = _collect_results_and_notify_watchers(workspace, event_log)
325
+ # Stage 12: prune the dedupe log every tick — cheap O(n) delete bounded by 24h window.
326
+ from team_agent.message_store.leader_notification_log import prune_leader_notification_log
327
+ try:
328
+ pruned = prune_leader_notification_log(store, max_age_hours=24)
329
+ if pruned:
330
+ event_log.write("leader_notification.log_pruned", removed=pruned)
331
+ except Exception as exc:
332
+ event_log.write("leader_notification.prune_failed", error=str(exc))
323
333
  return {
324
334
  "ok": True,
325
335
  "stop": False,
@@ -330,5 +340,6 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
330
340
  "deadlock_alerts": deadlock_alerts,
331
341
  "compaction": compaction_results,
332
342
  "session_drift": drift_results,
343
+ "api_errors": api_errors,
333
344
  "results": results,
334
345
  }