@team-agent/installer 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/package.json +1 -1
  2. package/src/team_agent/abnormal_track.py +253 -0
  3. package/src/team_agent/cli/commands.py +17 -1
  4. package/src/team_agent/cli/parser.py +2 -2
  5. package/src/team_agent/compiler.py +1 -1
  6. package/src/team_agent/coordinator/lifecycle.py +20 -2
  7. package/src/team_agent/display/__init__.py +31 -0
  8. package/src/team_agent/display/adaptive.py +425 -0
  9. package/src/team_agent/display/backend.py +46 -0
  10. package/src/team_agent/display/close.py +6 -0
  11. package/src/team_agent/display/rebuild.py +102 -0
  12. package/src/team_agent/display/tiling.py +156 -0
  13. package/src/team_agent/display/worker_window.py +4 -0
  14. package/src/team_agent/display/workspace.py +36 -127
  15. package/src/team_agent/idle_predicate.py +200 -0
  16. package/src/team_agent/idle_takeover.py +59 -0
  17. package/src/team_agent/idle_takeover_wiring.py +111 -0
  18. package/src/team_agent/launch/core.py +13 -4
  19. package/src/team_agent/leader/__init__.py +444 -61
  20. package/src/team_agent/message_store/agent_health.py +6 -2
  21. package/src/team_agent/message_store/core.py +51 -18
  22. package/src/team_agent/message_store/leader_notification_log.py +63 -38
  23. package/src/team_agent/message_store/result_watchers.py +17 -11
  24. package/src/team_agent/message_store/schema.py +19 -2
  25. package/src/team_agent/message_store/schema_migration.py +386 -0
  26. package/src/team_agent/messaging/delivery.py +45 -2
  27. package/src/team_agent/messaging/leader_panes.py +115 -21
  28. package/src/team_agent/messaging/send.py +33 -0
  29. package/src/team_agent/messaging/tmux_io.py +49 -10
  30. package/src/team_agent/messaging/trust_auto_answer.py +11 -3
  31. package/src/team_agent/provider_state/README.md +78 -0
  32. package/src/team_agent/provider_state/__init__.py +86 -0
  33. package/src/team_agent/provider_state/claude.py +86 -0
  34. package/src/team_agent/provider_state/codex.py +84 -0
  35. package/src/team_agent/provider_state/common.py +207 -0
  36. package/src/team_agent/provider_state/registry.py +118 -0
  37. package/src/team_agent/restart/orchestration.py +9 -9
  38. package/src/team_agent/runtime.py +62 -12
  39. package/src/team_agent/spec.py +4 -3
  40. package/src/team_agent/wake.py +58 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@team-agent/installer",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "npx installer for Team Agent",
5
5
  "keywords": [
6
6
  "codex",
@@ -0,0 +1,253 @@
1
+ """Provider-neutral abnormal-state track (Gap 32 §4).
2
+
3
+ Reads structured fault records + process identity; never reads a screen and
4
+ never names a provider. Catch-bias for structured error/failed-class records
5
+ (C9), dedup by (signature, turn) (C8), and coordinator-independent whole-team
6
+ disappearance with clean-shutdown vs unexpected distinction (C10).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+
14
+ def process_abnormal_records(
15
+ records: list[dict[str, Any]],
16
+ *,
17
+ registry: Any,
18
+ notification_state: dict[str, Any] | None,
19
+ event_sink: Any = None,
20
+ ) -> dict[str, Any]:
21
+ """Classify raw provider session records that may carry faults.
22
+
23
+ ``registry`` carries the provider whose records these are (``{"provider":
24
+ name}``) or a full registry mapping. Records are turned into structured
25
+ fault facts by the provider reader (so this module names no provider), then
26
+ catch-biased + deduped by (signature, turn).
27
+ """
28
+ from team_agent.provider_state import read_fault_facts
29
+ from team_agent.provider_state.registry import get_provider_registry
30
+
31
+ state = dict(notification_state or {})
32
+ seen = set(state.get("seen") or [])
33
+ notifications: list[dict[str, Any]] = []
34
+ discovery_log: list[dict[str, Any]] = []
35
+ diagnostics: list[dict[str, Any]] = []
36
+
37
+ provider = _provider_of(registry)
38
+ white, black = _lists_for(provider, registry, get_provider_registry)
39
+
40
+ faults = read_fault_facts(provider, records or []) if provider else []
41
+ if not faults and records:
42
+ # Records that produced no structured fault fact are not default-notify
43
+ # candidates (C9): arbitrary unrecognized lines become diagnostics only.
44
+ diagnostics.append({"kind": "no_structured_fault", "count": len(records)})
45
+
46
+ for fact in faults:
47
+ signature = str(fact.get("signature") or fact.get("reason") or "fault")
48
+ turn_id = fact.get("turn_id")
49
+ text = " ".join(str(x) for x in (signature, fact.get("reason"), _raw_message(fact)) if x).lower()
50
+ decision = _classify(text, signature, white, black)
51
+ discovery_log.append({
52
+ "signature": signature,
53
+ "turn_id": turn_id,
54
+ "decision": decision,
55
+ "kind": fact.get("kind"),
56
+ "provider": provider,
57
+ })
58
+ if decision == "skip":
59
+ continue
60
+ # C8: dedup by (signature, turn_id) — a retry loop in the SAME turn folds
61
+ # to one notify. But a MISSING turn_id must not collapse distinct errors
62
+ # into one global bucket: discriminate by a per-record content fingerprint
63
+ # so genuinely different faults each notify (identical duplicates still fold).
64
+ bucket = turn_id if turn_id is not None else f"norow:{_record_fingerprint(fact)}"
65
+ dedupe_key = (signature, bucket)
66
+ key = f"{signature}\x00{bucket}"
67
+ if key in seen:
68
+ continue
69
+ seen.add(key)
70
+ notifications.append({
71
+ "signature": signature,
72
+ "turn_id": turn_id,
73
+ "dedupe_key": dedupe_key,
74
+ "state": "blocked_on_human" if fact.get("kind") == "approval" else "abnormal",
75
+ "decision": decision,
76
+ "provider": provider,
77
+ "raw": fact.get("raw", fact),
78
+ "raw_record": fact.get("raw", fact),
79
+ })
80
+ _emit(event_sink, "abnormal.notify", signature=signature, turn_id=turn_id, decision=decision)
81
+
82
+ state["seen"] = sorted(seen)
83
+ return {
84
+ "notifications": notifications,
85
+ "discovery_log": discovery_log,
86
+ "diagnostics": diagnostics,
87
+ "notification_state": state,
88
+ }
89
+
90
+
91
+ def detect_whole_team_gone(
92
+ snapshot: dict[str, Any],
93
+ *,
94
+ marker_store: Any,
95
+ event_sink: Any = None,
96
+ ) -> dict[str, Any]:
97
+ """Coordinator-independent whole-team-gone detection (C10/C13).
98
+
99
+ Does not require the coordinator to be alive. The whole team is gone when the
100
+ coordinator, the leader, every provider process, and every session are all
101
+ absent. Clean shutdown / restart-in-progress (flagged in the snapshot) are
102
+ silent; an unexpected disappearance records a durable marker and defers user
103
+ escalation to the next leader command.
104
+ """
105
+ coordinator = snapshot.get("coordinator") or {}
106
+ leader = snapshot.get("leader") or {}
107
+ provider_processes = snapshot.get("provider_processes")
108
+ if provider_processes is None:
109
+ provider_processes = snapshot.get("nodes") or snapshot.get("agents") or []
110
+ tmux_sessions = snapshot.get("tmux_sessions") or []
111
+
112
+ coord_alive = _alive(coordinator)
113
+ leader_alive = _alive(leader)
114
+ any_worker_alive = any(_alive(p) for p in provider_processes)
115
+ sessions_present = bool(tmux_sessions)
116
+
117
+ whole_gone = not (coord_alive or leader_alive or any_worker_alive or sessions_present)
118
+
119
+ if not whole_gone:
120
+ return {
121
+ "state": "alive",
122
+ "whole_team_gone": False,
123
+ "classification": "alive",
124
+ "notify": False,
125
+ "escalate_user_on_next_leader_command": False,
126
+ "marker_written": False,
127
+ }
128
+
129
+ if snapshot.get("clean_shutdown"):
130
+ return _silent_gone("clean_shutdown")
131
+ if snapshot.get("restart_in_progress"):
132
+ return _silent_gone("restart_in_progress")
133
+
134
+ # Unexpected disappearance (闪退): durable marker + deferred escalation.
135
+ marker_written = _marker_set(marker_store, "whole_team_gone", {
136
+ "classification": "unexpected_exit",
137
+ "provider_processes": len(provider_processes),
138
+ })
139
+ _emit(event_sink, "abnormal.whole_team_gone", classification="unexpected_exit")
140
+ return {
141
+ "state": "whole_team_gone",
142
+ "whole_team_gone": True,
143
+ "classification": "unexpected_exit",
144
+ "notify": True,
145
+ "escalate_user_on_next_leader_command": True,
146
+ "marker_written": bool(marker_written),
147
+ }
148
+
149
+
150
+ def _silent_gone(classification: str) -> dict[str, Any]:
151
+ return {
152
+ "state": classification,
153
+ "whole_team_gone": True,
154
+ "classification": classification,
155
+ "notify": False,
156
+ "escalate_user_on_next_leader_command": False,
157
+ "marker_written": False,
158
+ }
159
+
160
+
161
+ def _alive(entry: Any) -> bool:
162
+ from team_agent.provider_state.common import process_is_live
163
+
164
+ if isinstance(entry, dict):
165
+ if "alive" in entry:
166
+ return entry.get("alive") is True
167
+ if "process" in entry:
168
+ ok, _r, _d = process_is_live(entry.get("process"))
169
+ return ok
170
+ ok, _r, _d = process_is_live(entry)
171
+ return ok
172
+ return bool(entry)
173
+
174
+
175
+ def _provider_of(registry: Any) -> str | None:
176
+ if isinstance(registry, dict):
177
+ if isinstance(registry.get("provider"), str):
178
+ return registry.get("provider")
179
+ if isinstance(registry.get("kind"), str):
180
+ return registry.get("kind")
181
+ return None
182
+
183
+
184
+ def _lists_for(provider: str | None, registry: Any, get_provider_registry: Any) -> tuple[list[str], list[str]]:
185
+ entry: Any = None
186
+ if isinstance(registry, dict) and ("error_whitelist" in registry or "error_blacklist" in registry):
187
+ entry = registry
188
+ elif provider is not None:
189
+ entry = get_provider_registry(provider)
190
+ if not isinstance(entry, dict):
191
+ return [], []
192
+ lists = entry.get("error_lists") if isinstance(entry.get("error_lists"), dict) else {}
193
+ white = [str(x).lower() for x in (lists.get("whitelist") or entry.get("error_whitelist") or [])]
194
+ black = [str(x).lower() for x in (lists.get("blacklist") or entry.get("error_blacklist") or [])]
195
+ return white, black
196
+
197
+
198
+ def _classify(text: str, signature: str, white: list[str], black: list[str]) -> str:
199
+ sig = signature.lower()
200
+ if any(w and (w in text or w in sig) for w in white):
201
+ return "skip" # whitelist > blacklist > default
202
+ if any(b and (b in text or b in sig) for b in black):
203
+ return "notify_blacklist"
204
+ return "notify_default" # C9 catch-bias for structured faults
205
+
206
+
207
+ def _record_fingerprint(fact: dict[str, Any]) -> str:
208
+ import hashlib
209
+ import json
210
+
211
+ raw = fact.get("raw", fact)
212
+ try:
213
+ blob = json.dumps(raw, sort_keys=True, default=str)
214
+ except (TypeError, ValueError):
215
+ blob = repr(raw)
216
+ return hashlib.sha256(blob.encode("utf-8", errors="ignore")).hexdigest()[:16]
217
+
218
+
219
+ def _raw_message(fact: dict[str, Any]) -> str:
220
+ raw = fact.get("raw")
221
+ if isinstance(raw, dict):
222
+ return str(raw.get("message") or "")
223
+ return ""
224
+
225
+
226
+ def _marker_set(marker_store: Any, name: str, value: Any) -> bool:
227
+ if marker_store is None:
228
+ return False
229
+ if isinstance(marker_store, dict):
230
+ marker_store[name] = value
231
+ return True
232
+ setter = getattr(marker_store, "set", None) or getattr(marker_store, "write", None)
233
+ if callable(setter):
234
+ try:
235
+ setter(name, value)
236
+ return True
237
+ except Exception:
238
+ return False
239
+ return False
240
+
241
+
242
+ def _emit(event_sink: Any, name: str, **fields: Any) -> None:
243
+ if event_sink is None:
244
+ return
245
+ try:
246
+ event_sink(name, fields)
247
+ except TypeError:
248
+ try:
249
+ event_sink({"event": name, **fields})
250
+ except Exception:
251
+ pass
252
+ except Exception:
253
+ pass
@@ -224,6 +224,20 @@ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
224
224
  if gate != "orphans":
225
225
  raise TeamAgentError(f"unknown doctor gate: {gate}")
226
226
  return orphan_gate(fix=bool(getattr(args, "fix", False)), confirm=bool(getattr(args, "confirm", False)))
227
+ from team_agent.message_store.schema import SCHEMA_VERSION
228
+ from team_agent.message_store.schema_migration import fix_schema_layout, schema_diagnosis
229
+ if getattr(args, "fix_schema", False) is True:
230
+ return fix_schema_layout(Path(args.workspace).resolve(), schema_version=SCHEMA_VERSION)
231
+ schema = schema_diagnosis(Path(args.workspace).resolve(), schema_version=SCHEMA_VERSION)
232
+ if schema.get("layout_diffs"):
233
+ return {
234
+ "ok": True,
235
+ "schema": schema,
236
+ "coordinator": {
237
+ "schema_ok": False,
238
+ "schema_error": "team.db physical layout drift detected",
239
+ },
240
+ }
227
241
  if getattr(args, "cleanup_orphans", False):
228
242
  from team_agent.diagnose.orphan_cleanup import cleanup_orphan_coordinators, format_cleanup_orphans
229
243
  result = cleanup_orphan_coordinators(confirm=bool(getattr(args, "confirm", False)))
@@ -231,7 +245,9 @@ def cmd_doctor(args: argparse.Namespace) -> dict[str, Any] | str:
231
245
  return result
232
246
  return format_cleanup_orphans(result)
233
247
  spec = Path(args.spec).resolve() if args.spec else None
234
- return runtime.doctor(spec)
248
+ result = runtime.doctor(spec)
249
+ result["schema"] = schema
250
+ return result
235
251
 
236
252
 
237
253
  def _format_status_summary(data: dict[str, Any]) -> str:
@@ -54,7 +54,6 @@ from team_agent.cli.commands import (
54
54
  cmd_advanced,
55
55
  cmd_install_skill,
56
56
  cmd_run_overnight,
57
-
58
57
  )
59
58
  from team_agent.cli.e2e import cmd_e2e
60
59
  from team_agent.cli.helpers import (
@@ -65,7 +64,6 @@ from team_agent.cli.helpers import (
65
64
  emit,
66
65
  )
67
66
 
68
-
69
67
  SEND_ORDER_HINT = (
70
68
  "options must appear before target/message. Use: "
71
69
  "team-agent send --task <task_id> --json \"<message>\" or "
@@ -317,8 +315,10 @@ def main(argv: list[str] | None = None) -> None:
317
315
 
318
316
  p = sub.add_parser("doctor", help="Check local dependencies, providers, auth hints, tmux, and MCP")
319
317
  p.add_argument("spec", nargs="?")
318
+ p.add_argument("--workspace", default=".", help="Workspace whose team.db schema should be diagnosed")
320
319
  p.add_argument("--gate", choices=["orphans"], help="Run a CI-friendly doctor gate")
321
320
  p.add_argument("--fix", action="store_true", help="With --gate orphans: apply the gate fix")
321
+ p.add_argument("--fix-schema", action="store_true", help="Rebuild drifted team.db table layouts after writing a backup")
322
322
  p.add_argument(
323
323
  "--cleanup-orphans",
324
324
  action="store_true",
@@ -93,7 +93,7 @@ def compile_team(team_dir: Path, out_path: Path | None = None) -> dict[str, Any]
93
93
  },
94
94
  "runtime": {
95
95
  "backend": "tmux",
96
- "display_backend": str(team_meta.get("display_backend") or "ghostty_window"),
96
+ "display_backend": str(team_meta.get("display_backend") or "adaptive"),
97
97
  "session_name": str(team_meta.get("session_name") or f"team-{_slug(team_name)}"),
98
98
  "auto_launch": True,
99
99
  "require_user_approval_before_launch": True,
@@ -262,8 +262,10 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
262
262
  )
263
263
  from team_agent.messaging.idle_alerts import (
264
264
  detect_cross_worker_deadlocks,
265
- detect_idle_fallbacks,
266
265
  )
266
+ from team_agent.idle_predicate import evaluate_takeover_reminder
267
+ from team_agent.idle_takeover_wiring import build_idle_nodes, push_idle_reminder, IDLE_DEBOUNCE_SECONDS
268
+ import time as _time
267
269
  from team_agent.messaging.activity_detector import detect_compaction_degradation
268
270
  from team_agent.messaging.leader_api_errors import detect_leader_api_errors
269
271
  from team_agent.messaging.session_drift import detect_session_drift
@@ -283,7 +285,23 @@ def coordinator_tick(workspace: Path) -> dict[str, Any]:
283
285
  delivered = _deliver_pending_messages(workspace, state, event_log)
284
286
  fired = _fire_due_scheduled_events(workspace, store, event_log)
285
287
  stuck = _detect_stuck_agents(workspace, state, store, event_log)
286
- idle_alerts = detect_idle_fallbacks(workspace, state, store, event_log)
288
+ # Gap 32: the take-over reminder is driven by file-fact turn-state via the
289
+ # idle_takeover predicate (the legacy screen-scrape obligation path is retired).
290
+ _coord_meta = state.setdefault("coordinator", {})
291
+ idle_eval = evaluate_takeover_reminder(
292
+ build_idle_nodes(state),
293
+ monitor_state=_coord_meta.get("idle_takeover_monitor"),
294
+ now_monotonic=_time.monotonic(),
295
+ debounce_seconds=IDLE_DEBOUNCE_SECONDS,
296
+ )
297
+ _coord_meta["idle_takeover_monitor"] = idle_eval.get("monitor_state")
298
+ push_idle_reminder(workspace, state, event_log, idle_eval)
299
+ idle_alerts = (
300
+ [{"alert_type": "idle_takeover", "message": idle_eval.get("message"),
301
+ "reason": idle_eval.get("reason"), "interrupted": idle_eval.get("interrupted_nodes")}]
302
+ if idle_eval.get("should_ping")
303
+ else []
304
+ )
287
305
  deadlock_alerts = detect_cross_worker_deadlocks(workspace, state, store, event_log)
288
306
  compaction_results: list[dict[str, Any]] = []
289
307
  for agent_id, agent_state in state.get("agents", {}).items():
@@ -1,9 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from team_agent.display.adaptive import (
4
+ ADAPTIVE_BLOCK_REASONS,
5
+ adaptive_blocked,
6
+ close_adaptive_display,
7
+ close_adaptive_windows,
8
+ open_adaptive_display,
9
+ prepare_adaptive_windows,
10
+ probe_display_capabilities,
11
+ )
12
+ from team_agent.display.backend import (
13
+ ADAPTIVE_DISPLAY_BACKEND,
14
+ DISPLAY_BACKENDS_WITH_WORKER_VIEWS,
15
+ GHOSTTY_DISPLAY_BACKENDS,
16
+ VALID_DISPLAY_BACKENDS,
17
+ resolve_display_backend,
18
+ )
3
19
  from team_agent.display.close import (
20
+ close_adaptive_display,
4
21
  close_ghostty_display,
5
22
  close_ghostty_workspace,
6
23
  close_ghostty_workspace_slot,
24
+ close_team_display_backends,
7
25
  )
8
26
  from team_agent.display.ghostty import (
9
27
  ghostty_app_exists,
@@ -34,10 +52,19 @@ from team_agent.display.workspace import (
34
52
  )
35
53
 
36
54
  __all__ = [
55
+ "ADAPTIVE_BLOCK_REASONS",
56
+ "ADAPTIVE_DISPLAY_BACKEND",
57
+ "DISPLAY_BACKENDS_WITH_WORKER_VIEWS",
37
58
  "GHOSTTY_WORKSPACE_PANES_PER_WINDOW",
59
+ "GHOSTTY_DISPLAY_BACKENDS",
60
+ "VALID_DISPLAY_BACKENDS",
61
+ "adaptive_blocked",
62
+ "close_adaptive_display",
63
+ "close_adaptive_windows",
38
64
  "close_ghostty_display",
39
65
  "close_ghostty_workspace",
40
66
  "close_ghostty_workspace_slot",
67
+ "close_team_display_backends",
41
68
  "ghostty_app_exists",
42
69
  "ghostty_attach_args",
43
70
  "ghostty_command",
@@ -51,11 +78,15 @@ __all__ = [
51
78
  "ghostty_workspace_window_name",
52
79
  "kill_ghostty_workspace_linked_sessions",
53
80
  "open_ghostty_worker_window",
81
+ "open_adaptive_display",
54
82
  "open_ghostty_workspace",
55
83
  "open_ghostty_workspace_agent_display",
56
84
  "open_worker_displays",
85
+ "prepare_adaptive_windows",
57
86
  "prepare_ghostty_display_session",
58
87
  "prepare_ghostty_workspace_aggregator",
59
88
  "prepare_ghostty_workspace_linked_sessions",
89
+ "probe_display_capabilities",
90
+ "resolve_display_backend",
60
91
  "set_ghostty_workspace_pane_title",
61
92
  ]