@team-agent/installer 0.1.11 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/crates/team-agent-core/src/lib.rs +50 -5
  2. package/package.json +1 -1
  3. package/schemas/team.schema.json +1 -0
  4. package/src/team_agent/approvals/__init__.py +65 -0
  5. package/src/team_agent/approvals/constants.py +6 -0
  6. package/src/team_agent/approvals/parsing.py +176 -0
  7. package/src/team_agent/approvals/runtime_prompts.py +171 -0
  8. package/src/team_agent/approvals/status.py +165 -0
  9. package/src/team_agent/cli/__init__.py +137 -0
  10. package/src/team_agent/cli/commands.py +339 -0
  11. package/src/team_agent/cli/e2e.py +202 -0
  12. package/src/team_agent/cli/helpers.py +137 -0
  13. package/src/team_agent/cli/parser.py +477 -0
  14. package/src/team_agent/compiler.py +98 -33
  15. package/src/team_agent/coordinator/__init__.py +53 -0
  16. package/src/team_agent/{coordinator.py → coordinator/__main__.py} +3 -1
  17. package/src/team_agent/coordinator/lifecycle.py +334 -0
  18. package/src/team_agent/coordinator/metadata.py +61 -0
  19. package/src/team_agent/coordinator/paths.py +17 -0
  20. package/src/team_agent/diagnose/__init__.py +48 -0
  21. package/src/team_agent/diagnose/checks.py +101 -0
  22. package/src/team_agent/diagnose/health.py +241 -0
  23. package/src/team_agent/diagnose/preflight.py +194 -0
  24. package/src/team_agent/diagnose/quick_start.py +233 -0
  25. package/src/team_agent/display/__init__.py +61 -0
  26. package/src/team_agent/display/close.py +147 -0
  27. package/src/team_agent/display/ghostty.py +77 -0
  28. package/src/team_agent/display/worker_window.py +110 -0
  29. package/src/team_agent/display/workspace.py +473 -0
  30. package/src/team_agent/launch/__init__.py +41 -0
  31. package/src/team_agent/launch/bootstrap.py +85 -0
  32. package/src/team_agent/launch/config.py +106 -0
  33. package/src/team_agent/launch/core.py +291 -0
  34. package/src/team_agent/launch/requirements.py +57 -0
  35. package/src/team_agent/leader/__init__.py +320 -0
  36. package/src/team_agent/lifecycle/__init__.py +5 -0
  37. package/src/team_agent/lifecycle/agents.py +226 -0
  38. package/src/team_agent/lifecycle/operations.py +321 -0
  39. package/src/team_agent/lifecycle/paste_buffer_hygiene.py +39 -0
  40. package/src/team_agent/lifecycle/start.py +363 -0
  41. package/src/team_agent/mcp_server/__init__.py +42 -0
  42. package/src/team_agent/mcp_server/__main__.py +7 -0
  43. package/src/team_agent/mcp_server/contracts.py +148 -0
  44. package/src/team_agent/mcp_server/normalize.py +257 -0
  45. package/src/team_agent/mcp_server/server.py +150 -0
  46. package/src/team_agent/mcp_server/tools.py +205 -0
  47. package/src/team_agent/message_store/__init__.py +23 -0
  48. package/src/team_agent/message_store/agent_health.py +109 -0
  49. package/src/team_agent/{message_store.py → message_store/core.py} +188 -245
  50. package/src/team_agent/message_store/result_watchers.py +102 -0
  51. package/src/team_agent/message_store/schema.py +266 -0
  52. package/src/team_agent/messaging/__init__.py +1 -0
  53. package/src/team_agent/messaging/activity_detector.py +190 -0
  54. package/src/team_agent/messaging/delivery.py +138 -0
  55. package/src/team_agent/messaging/deps.py +263 -0
  56. package/src/team_agent/messaging/idle_alerts.py +323 -0
  57. package/src/team_agent/messaging/internal_delivery.py +46 -0
  58. package/src/team_agent/messaging/leader.py +317 -0
  59. package/src/team_agent/messaging/leader_panes.py +343 -0
  60. package/src/team_agent/messaging/owner_bypass.py +29 -0
  61. package/src/team_agent/messaging/result_delivery.py +300 -0
  62. package/src/team_agent/messaging/results.py +456 -0
  63. package/src/team_agent/messaging/scheduler.py +428 -0
  64. package/src/team_agent/messaging/send.py +500 -0
  65. package/src/team_agent/messaging/session_drift.py +94 -0
  66. package/src/team_agent/messaging/tmux_io.py +337 -0
  67. package/src/team_agent/messaging/tmux_prompt.py +229 -0
  68. package/src/team_agent/orchestrator/__init__.py +376 -0
  69. package/src/team_agent/orchestrator/plan.py +122 -0
  70. package/src/team_agent/orchestrator/state.py +128 -0
  71. package/src/team_agent/profiles/__init__.py +82 -0
  72. package/src/team_agent/profiles/constants.py +19 -0
  73. package/src/team_agent/profiles/core.py +407 -0
  74. package/src/team_agent/profiles/helpers.py +69 -0
  75. package/src/team_agent/profiles/provider_env.py +188 -0
  76. package/src/team_agent/profiles/smoke.py +201 -0
  77. package/src/team_agent/provider_cli/__init__.py +43 -0
  78. package/src/team_agent/provider_cli/adapter.py +167 -0
  79. package/src/team_agent/provider_cli/base.py +48 -0
  80. package/src/team_agent/provider_cli/claude.py +457 -0
  81. package/src/team_agent/provider_cli/codex.py +319 -0
  82. package/src/team_agent/provider_cli/copilot.py +8 -0
  83. package/src/team_agent/provider_cli/fake.py +39 -0
  84. package/src/team_agent/provider_cli/gemini.py +95 -0
  85. package/src/team_agent/provider_cli/opencode.py +8 -0
  86. package/src/team_agent/provider_cli/prompt.py +62 -0
  87. package/src/team_agent/provider_cli/registry.py +18 -0
  88. package/src/team_agent/provider_cli/unsupported.py +32 -0
  89. package/src/team_agent/providers.py +67 -949
  90. package/src/team_agent/quality_gates.py +104 -0
  91. package/src/team_agent/restart/__init__.py +34 -0
  92. package/src/team_agent/restart/orchestration.py +328 -0
  93. package/src/team_agent/restart/selection.py +89 -0
  94. package/src/team_agent/restart/snapshot.py +70 -0
  95. package/src/team_agent/runtime.py +809 -5892
  96. package/src/team_agent/rust_core.py +22 -5
  97. package/src/team_agent/sessions/__init__.py +25 -0
  98. package/src/team_agent/sessions/capture.py +93 -0
  99. package/src/team_agent/sessions/inventory.py +44 -0
  100. package/src/team_agent/sessions/resume.py +135 -0
  101. package/src/team_agent/spec.py +3 -1
  102. package/src/team_agent/state.py +218 -4
  103. package/src/team_agent/status/__init__.py +63 -0
  104. package/src/team_agent/status/approvals.py +52 -0
  105. package/src/team_agent/status/compact.py +158 -0
  106. package/src/team_agent/status/constants.py +18 -0
  107. package/src/team_agent/status/inbox.py +28 -0
  108. package/src/team_agent/status/peek.py +117 -0
  109. package/src/team_agent/status/queries.py +168 -0
  110. package/src/team_agent/terminal.py +57 -0
  111. package/src/team_agent/cli.py +0 -858
  112. package/src/team_agent/mcp_server.py +0 -579
  113. package/src/team_agent/profiles.py +0 -882
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  from pathlib import Path
4
5
  from typing import Any
5
6
 
@@ -39,40 +40,19 @@ def compile_team(team_dir: Path, out_path: Path | None = None) -> dict[str, Any]
39
40
  routing_rules = []
40
41
  startup_order = []
41
42
  for role_doc in sorted(agents_dir.glob("*.md")):
42
- meta, body = _read_front_matter(role_doc)
43
- if "auth_mode" not in meta and default_auth_mode is not None:
44
- meta["auth_mode"] = default_auth_mode
45
- if "profile" not in meta and default_profile is not None:
46
- meta["profile"] = default_profile
47
- profile_model = _profile_model(workspace, meta.get("profile"), team_dir / "profiles")
48
- if (
49
- "model" not in meta
50
- and not (meta.get("auth_mode") == "compatible_api" and profile_model)
51
- ):
52
- meta["model"] = _default_model_for_provider(meta.get("provider"), provider_models, default_model)
53
- _validate_role_doc(role_doc, meta, body, profile_names, profile_model)
43
+ meta, body = _role_doc_meta_for_team(
44
+ role_doc,
45
+ team_meta,
46
+ workspace,
47
+ team_dir,
48
+ profile_names,
49
+ default_auth_mode,
50
+ default_profile,
51
+ provider_models,
52
+ default_model,
53
+ )
54
54
  agent_id = str(meta["name"])
55
- tools = _normalize_tools(list(meta["tools"] or []))
56
- agent = {
57
- "id": agent_id,
58
- "role": str(meta["role"]),
59
- "provider": str(meta["provider"]),
60
- "model": str(meta["model"]) if meta.get("model") is not None else None,
61
- "auth_mode": str(meta["auth_mode"]),
62
- "working_directory": str(workspace),
63
- "system_prompt": {"inline": body.strip() or str(meta["role"]), "file": None},
64
- "tools": tools,
65
- "permission_mode": "restricted",
66
- "preferred_for": [agent_id, str(meta["role"])],
67
- "avoid_for": [],
68
- "output_contract": {
69
- "format": "result_envelope_v1",
70
- "required_fields": ["task_id", "status", "summary", "artifacts"],
71
- },
72
- }
73
- if meta.get("profile"):
74
- agent["profile"] = str(meta["profile"])
75
- agent["credential_ref"] = f"profile:{meta['profile']}"
55
+ agent = _agent_from_role_doc(meta, body, workspace, agent_id)
76
56
  agents.append(agent)
77
57
  routing_rules.append({"id": f"route-{agent_id}", "match": {"assignee": [agent_id]}, "assign_to": agent_id, "priority": 10})
78
58
  startup_order.append(agent_id)
@@ -155,6 +135,41 @@ def compile_team(team_dir: Path, out_path: Path | None = None) -> dict[str, Any]
155
135
  return {"ok": True, "team_dir": str(team_dir), "out": str(out_path) if out_path else None, "spec": spec}
156
136
 
157
137
 
138
+ def compile_role_doc_agent(role_doc: Path, team_dir: Path, agent_id: str | None = None) -> dict[str, Any]:
139
+ meta, body = _read_front_matter(role_doc.resolve())
140
+ return compile_role_entry_agent(role_doc.resolve(), team_dir, meta, body, agent_id)
141
+
142
+
143
+ def compile_role_entry_agent(
144
+ role_doc: Path,
145
+ team_dir: Path,
146
+ meta: dict[str, Any],
147
+ body: str,
148
+ agent_id: str | None = None,
149
+ ) -> dict[str, Any]:
150
+ team_dir = team_dir.resolve()
151
+ workspace = team_workspace(team_dir)
152
+ team_doc = team_dir / "TEAM.md"
153
+ if not team_doc.exists():
154
+ raise ValidationError(f"{team_doc}: missing TEAM.md")
155
+ profile_names = known_profiles(team_dir)
156
+ team_meta, _team_body = _read_front_matter(team_doc)
157
+ meta, body = _role_doc_meta_for_team(
158
+ role_doc,
159
+ team_meta,
160
+ workspace,
161
+ team_dir,
162
+ profile_names,
163
+ team_meta.get("default_auth_mode") or "subscription",
164
+ team_meta.get("default_profile"),
165
+ _provider_model_defaults(team_meta),
166
+ team_meta.get("default_model") or team_meta.get("model"),
167
+ role_meta=meta,
168
+ role_body=body,
169
+ )
170
+ return _agent_from_role_doc(meta, body, workspace, str(agent_id or meta["name"]))
171
+
172
+
158
173
  def _read_front_matter(path: Path) -> tuple[dict[str, Any], str]:
159
174
  text = path.read_text(encoding="utf-8")
160
175
  if not text.startswith("---\n"):
@@ -170,6 +185,56 @@ def _read_front_matter(path: Path) -> tuple[dict[str, Any], str]:
170
185
  return data, body
171
186
 
172
187
 
188
+ def _role_doc_meta_for_team(
189
+ role_doc: Path,
190
+ team_meta: dict[str, Any],
191
+ workspace: Path,
192
+ team_dir: Path,
193
+ profile_names: set[str],
194
+ default_auth_mode: Any,
195
+ default_profile: Any,
196
+ provider_models: dict[str, str],
197
+ default_model: Any,
198
+ role_meta: dict[str, Any] | None = None,
199
+ role_body: str | None = None,
200
+ ) -> tuple[dict[str, Any], str]:
201
+ meta, body = (role_meta, role_body) if role_meta is not None and role_body is not None else _read_front_matter(role_doc)
202
+ meta = copy.deepcopy(meta)
203
+ if "auth_mode" not in meta and default_auth_mode is not None:
204
+ meta["auth_mode"] = default_auth_mode
205
+ if "profile" not in meta and default_profile is not None:
206
+ meta["profile"] = default_profile
207
+ profile_model = _profile_model(workspace, meta.get("profile"), team_dir / "profiles")
208
+ if "model" not in meta and not (meta.get("auth_mode") == "compatible_api" and profile_model):
209
+ meta["model"] = _default_model_for_provider(meta.get("provider"), provider_models, default_model)
210
+ _validate_role_doc(role_doc, meta, body, profile_names, profile_model)
211
+ return meta, body
212
+
213
+
214
+ def _agent_from_role_doc(meta: dict[str, Any], body: str, workspace: Path, agent_id: str) -> dict[str, Any]:
215
+ agent = {
216
+ "id": agent_id,
217
+ "role": str(meta["role"]),
218
+ "provider": str(meta["provider"]),
219
+ "model": str(meta["model"]) if meta.get("model") is not None else None,
220
+ "auth_mode": str(meta["auth_mode"]),
221
+ "working_directory": str(workspace),
222
+ "system_prompt": {"inline": body.strip() or str(meta["role"]), "file": None},
223
+ "tools": _normalize_tools(list(meta["tools"] or [])),
224
+ "permission_mode": "restricted",
225
+ "preferred_for": [agent_id, str(meta["role"])],
226
+ "avoid_for": [],
227
+ "output_contract": {
228
+ "format": "result_envelope_v1",
229
+ "required_fields": ["task_id", "status", "summary", "artifacts"],
230
+ },
231
+ }
232
+ if meta.get("profile"):
233
+ agent["profile"] = str(meta["profile"])
234
+ agent["credential_ref"] = f"profile:{meta['profile']}"
235
+ return agent
236
+
237
+
173
238
  def _validate_role_doc(
174
239
  path: Path,
175
240
  meta: dict[str, Any],
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from team_agent.coordinator.lifecycle import (
6
+ coordinator_health,
7
+ coordinator_tick,
8
+ message_store_schema_health,
9
+ start_coordinator,
10
+ stop_coordinator,
11
+ )
12
+ from team_agent.coordinator.metadata import (
13
+ COORDINATOR_PROTOCOL_VERSION,
14
+ coordinator_metadata_ok,
15
+ pid_is_running,
16
+ read_coordinator_metadata,
17
+ write_coordinator_metadata,
18
+ )
19
+ from team_agent.coordinator.paths import (
20
+ coordinator_log_path,
21
+ coordinator_meta_path,
22
+ coordinator_pid_path,
23
+ )
24
+
25
+ __all__ = [
26
+ "COORDINATOR_PROTOCOL_VERSION",
27
+ "coordinator_health",
28
+ "coordinator_log_path",
29
+ "coordinator_meta_path",
30
+ "coordinator_metadata_ok",
31
+ "coordinator_pid_path",
32
+ "coordinator_tick",
33
+ "main",
34
+ "message_store_schema_health",
35
+ "pid_is_running",
36
+ "read_coordinator_metadata",
37
+ "start_coordinator",
38
+ "stop_coordinator",
39
+ "write_coordinator_metadata",
40
+ ]
41
+
42
+
43
+ def __getattr__(name: str) -> Any:
44
+ # Lazy re-export of the daemon entry so the pyproject console_script
45
+ # `team-agent-coordinator = "team_agent.coordinator:main"` keeps
46
+ # resolving after the package split, without triggering the runtime
47
+ # <-> coordinator import cycle that an eager top-level
48
+ # `from team_agent.coordinator.__main__ import main` would cause
49
+ # (runtime imports coordinator/__init__ at module load).
50
+ if name == "main":
51
+ from team_agent.coordinator.__main__ import main as _main
52
+ return _main
53
+ raise AttributeError(f"module 'team_agent.coordinator' has no attribute {name!r}")
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import argparse
4
+ import os
4
5
  import signal
5
6
  import sys
6
7
  import time
@@ -29,7 +30,8 @@ def main(argv: list[str] | None = None) -> None:
29
30
  args = parser.parse_args(argv)
30
31
  workspace = Path(args.workspace).resolve()
31
32
  runtime.ensure_workspace_dirs(workspace)
32
- runtime.coordinator_pid_path(workspace).write_text(str(__import__("os").getpid()), encoding="utf-8")
33
+ runtime.coordinator_pid_path(workspace).write_text(str(os.getpid()), encoding="utf-8")
34
+ runtime.write_coordinator_metadata(workspace, os.getpid(), source="boot")
33
35
  event_log = EventLog(workspace)
34
36
  event_log.write("coordinator.boot", workspace=str(workspace), once=args.once)
35
37
  signal.signal(signal.SIGTERM, _stop)
@@ -0,0 +1,334 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import signal
5
+ import subprocess
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from team_agent.coordinator.metadata import (
11
+ COORDINATOR_PROTOCOL_VERSION,
12
+ coordinator_metadata_ok,
13
+ pid_is_running,
14
+ read_coordinator_metadata,
15
+ write_coordinator_metadata,
16
+ )
17
+ from team_agent.coordinator.paths import (
18
+ coordinator_log_path,
19
+ coordinator_meta_path,
20
+ coordinator_pid_path,
21
+ )
22
+ from team_agent.events import EventLog
23
+ from team_agent.message_store import MessageStore
24
+
25
+
26
+ def coordinator_health(workspace: Path) -> dict[str, Any]:
27
+ schema = message_store_schema_health(workspace)
28
+ pid_path = coordinator_pid_path(workspace)
29
+ if not pid_path.exists():
30
+ return {"ok": False, "status": "missing", "pid": None, "metadata": None, "metadata_ok": False, **schema}
31
+ try:
32
+ pid = int(pid_path.read_text(encoding="utf-8").strip())
33
+ except ValueError:
34
+ return {"ok": False, "status": "invalid_pid", "pid": None, "metadata": None, "metadata_ok": False, **schema}
35
+ running = pid_is_running(pid)
36
+ metadata = read_coordinator_metadata(workspace)
37
+ metadata_ok = coordinator_metadata_ok(metadata, pid)
38
+ ok = running and metadata_ok and bool(schema.get("schema_ok"))
39
+ return {
40
+ "ok": ok,
41
+ "status": "running" if running else "stale",
42
+ "pid": pid,
43
+ "metadata": metadata,
44
+ "metadata_ok": metadata_ok,
45
+ **schema,
46
+ }
47
+
48
+
49
+ def start_coordinator(workspace: Path) -> dict[str, Any]:
50
+ from team_agent.runtime import ensure_workspace_dirs
51
+ ensure_workspace_dirs(workspace)
52
+ health = coordinator_health(workspace)
53
+ if health["ok"]:
54
+ return {"ok": True, "pid": health["pid"], "status": "already_running", "log": str(coordinator_log_path(workspace))}
55
+ if health["status"] == "running" and not health.get("metadata_ok"):
56
+ EventLog(workspace).write(
57
+ "coordinator.restart_incompatible",
58
+ pid=health.get("pid"),
59
+ metadata=health.get("metadata"),
60
+ expected_protocol=COORDINATOR_PROTOCOL_VERSION,
61
+ expected_schema=MessageStore.SCHEMA_VERSION,
62
+ )
63
+ stopped = stop_coordinator(workspace)
64
+ if not stopped.get("ok"):
65
+ EventLog(workspace).write(
66
+ "coordinator.restart_incompatible_stop_failed",
67
+ pid=health.get("pid"),
68
+ stop_result=stopped,
69
+ )
70
+ return {
71
+ "ok": False,
72
+ "pid": health.get("pid"),
73
+ "status": "restart_incompatible_stop_failed",
74
+ "error": stopped.get("error") or stopped.get("status"),
75
+ "stop_result": stopped,
76
+ }
77
+ if not health.get("schema_ok", False):
78
+ EventLog(workspace).write(
79
+ "coordinator.schema_incompatible",
80
+ error=health.get("schema_error"),
81
+ schema=health.get("schema"),
82
+ reason=health.get("reason"),
83
+ table=health.get("table"),
84
+ missing_columns=health.get("missing_columns"),
85
+ )
86
+ return {
87
+ "ok": False,
88
+ "pid": None,
89
+ "status": "schema_incompatible",
90
+ "error": health.get("schema_error"),
91
+ "schema": health.get("schema"),
92
+ "action": health.get("action", _SCHEMA_ACTION_HINT),
93
+ "reason": health.get("reason"),
94
+ "table": health.get("table"),
95
+ "expected_columns": health.get("expected_columns"),
96
+ "actual_columns": health.get("actual_columns"),
97
+ "missing_columns": health.get("missing_columns"),
98
+ }
99
+ if health["status"] in {"stale", "invalid_pid"}:
100
+ coordinator_pid_path(workspace).unlink(missing_ok=True)
101
+ coordinator_meta_path(workspace).unlink(missing_ok=True)
102
+ log_path = coordinator_log_path(workspace)
103
+ log_path.parent.mkdir(parents=True, exist_ok=True)
104
+ env = dict(os.environ)
105
+ repo_src = str(Path(__file__).resolve().parents[2])
106
+ env["PYTHONPATH"] = repo_src + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
107
+ log = log_path.open("a", encoding="utf-8")
108
+ proc = subprocess.Popen(
109
+ [sys.executable, "-m", "team_agent.coordinator", "--workspace", str(workspace)],
110
+ cwd=str(workspace),
111
+ stdin=subprocess.DEVNULL,
112
+ stdout=log,
113
+ stderr=log,
114
+ env=env,
115
+ start_new_session=True,
116
+ )
117
+ log.close()
118
+ coordinator_pid_path(workspace).write_text(str(proc.pid), encoding="utf-8")
119
+ write_coordinator_metadata(workspace, proc.pid, source="start")
120
+ EventLog(workspace).write("coordinator.started", pid=proc.pid, log=str(log_path))
121
+ return {"ok": True, "pid": proc.pid, "status": "started", "log": str(log_path)}
122
+
123
+
124
+ _SCHEMA_EXPECTED_COLUMNS: dict[str, set[str]] = {}
125
+ _SCHEMA_MIGRATABLE_COLUMNS: dict[str, set[str]] = {
126
+ "messages": {"delivery_attempts", "owner_team_id"},
127
+ "scheduled_events": {"owner_team_id"},
128
+ "agent_health": {"owner_team_id"},
129
+ "result_watchers": {"owner_team_id"},
130
+ }
131
+ _SCHEMA_ACTION_HINT = (
132
+ "use team-agent advanced repair-state --schema to re-run column migrations; "
133
+ "if that fails, back up .team/runtime/team.db, then delete it and rerun team-agent launch "
134
+ "(in-flight messages will be lost)"
135
+ )
136
+
137
+
138
+ def _load_expected_schema_columns() -> dict[str, set[str]]:
139
+ if _SCHEMA_EXPECTED_COLUMNS:
140
+ return _SCHEMA_EXPECTED_COLUMNS
141
+ from team_agent.message_store.schema import (
142
+ AGENT_HEALTH_COLUMNS,
143
+ DELIVERY_TOKEN_COLUMNS,
144
+ MESSAGE_COLUMNS,
145
+ PEER_ALLOWLIST_COLUMNS,
146
+ RESULT_COLUMNS,
147
+ RESULT_WATCHER_COLUMNS,
148
+ SCHEDULED_EVENT_COLUMNS,
149
+ )
150
+ _SCHEMA_EXPECTED_COLUMNS.update(
151
+ {
152
+ "messages": set(MESSAGE_COLUMNS),
153
+ "results": set(RESULT_COLUMNS),
154
+ "scheduled_events": set(SCHEDULED_EVENT_COLUMNS),
155
+ "delivery_tokens": set(DELIVERY_TOKEN_COLUMNS),
156
+ "agent_health": set(AGENT_HEALTH_COLUMNS),
157
+ "peer_allowlist": set(PEER_ALLOWLIST_COLUMNS),
158
+ "result_watchers": set(RESULT_WATCHER_COLUMNS),
159
+ }
160
+ )
161
+ return _SCHEMA_EXPECTED_COLUMNS
162
+
163
+
164
+ def _diagnose_schema_mismatch(workspace: Path, *, ignore_migratable: bool = False) -> dict[str, Any] | None:
165
+ import sqlite3
166
+ from team_agent.paths import runtime_dir
167
+ db_path = runtime_dir(workspace) / "team.db"
168
+ if not db_path.exists():
169
+ return None
170
+ conn = sqlite3.connect(db_path)
171
+ try:
172
+ for table, expected in _load_expected_schema_columns().items():
173
+ present = conn.execute(
174
+ "select name from sqlite_master where type='table' and name=?",
175
+ (table,),
176
+ ).fetchone()
177
+ if present is None:
178
+ continue
179
+ actual = {row[1] for row in conn.execute(f"pragma table_info({table})").fetchall()}
180
+ missing = expected - actual
181
+ if ignore_migratable:
182
+ migratable = _SCHEMA_MIGRATABLE_COLUMNS.get(table, set())
183
+ missing = missing - migratable
184
+ if missing:
185
+ return {
186
+ "reason": "schema_mismatch",
187
+ "table": table,
188
+ "expected_columns": sorted(expected),
189
+ "actual_columns": sorted(actual),
190
+ "missing_columns": sorted(missing),
191
+ }
192
+ finally:
193
+ conn.close()
194
+ return None
195
+
196
+
197
+ def message_store_schema_health(workspace: Path) -> dict[str, Any]:
198
+ schema_version = {"message_store_schema_version": MessageStore.SCHEMA_VERSION}
199
+ pre_mismatch = _diagnose_schema_mismatch(workspace, ignore_migratable=True)
200
+ if pre_mismatch is not None:
201
+ return {
202
+ "schema_ok": False,
203
+ "schema_error": (
204
+ f"team.db table {pre_mismatch['table']} is missing required column(s): "
205
+ + ", ".join(pre_mismatch["missing_columns"])
206
+ ),
207
+ "schema": schema_version,
208
+ "action": _SCHEMA_ACTION_HINT,
209
+ **pre_mismatch,
210
+ }
211
+ try:
212
+ MessageStore(workspace)
213
+ except Exception as exc:
214
+ post_init_mismatch = _diagnose_schema_mismatch(workspace) or {}
215
+ return {
216
+ "schema_ok": False,
217
+ "schema_error": str(exc),
218
+ "schema": schema_version,
219
+ "action": _SCHEMA_ACTION_HINT,
220
+ **post_init_mismatch,
221
+ }
222
+ return {
223
+ "schema_ok": True,
224
+ "schema_error": None,
225
+ "schema": schema_version,
226
+ }
227
+
228
+
229
+ def stop_coordinator(workspace: Path) -> dict[str, Any]:
230
+ pid_path = coordinator_pid_path(workspace)
231
+ if not pid_path.exists():
232
+ return {"ok": True, "status": "missing"}
233
+ try:
234
+ pid = int(pid_path.read_text(encoding="utf-8").strip())
235
+ except ValueError:
236
+ pid_path.unlink(missing_ok=True)
237
+ coordinator_meta_path(workspace).unlink(missing_ok=True)
238
+ return {"ok": True, "status": "invalid_pid_removed"}
239
+ if pid_is_running(pid):
240
+ try:
241
+ os.kill(pid, signal.SIGTERM)
242
+ except OSError as exc:
243
+ return {"ok": False, "status": "kill_failed", "pid": pid, "error": str(exc)}
244
+ pid_path.unlink(missing_ok=True)
245
+ coordinator_meta_path(workspace).unlink(missing_ok=True)
246
+ EventLog(workspace).write("coordinator.stopped", pid=pid)
247
+ return {"ok": True, "status": "stopped", "pid": pid}
248
+
249
+
250
+ def coordinator_tick(workspace: Path) -> dict[str, Any]:
251
+ from team_agent.runtime import (
252
+ _capture_missing_sessions,
253
+ _deliver_pending_messages,
254
+ _detect_stuck_agents,
255
+ _fire_due_scheduled_events,
256
+ _handle_provider_runtime_prompts,
257
+ _handle_provider_startup_prompts,
258
+ _refresh_agent_runtime_statuses,
259
+ _sync_agent_health,
260
+ _tmux_session_exists,
261
+ _collect_results_and_notify_watchers,
262
+ )
263
+ from team_agent.messaging.idle_alerts import (
264
+ detect_cross_worker_deadlocks,
265
+ detect_idle_fallbacks,
266
+ )
267
+ from team_agent.messaging.activity_detector import detect_compaction_degradation
268
+ from team_agent.messaging.session_drift import detect_session_drift
269
+ from team_agent.state import load_runtime_state, save_runtime_state
270
+ state = load_runtime_state(workspace)
271
+ event_log = EventLog(workspace)
272
+ store = MessageStore(workspace)
273
+ session_name = state.get("session_name")
274
+ if session_name and not _tmux_session_exists(session_name):
275
+ event_log.write("coordinator.session_missing", session=session_name)
276
+ return {"ok": False, "stop": True, "reason": "tmux_session_missing"}
277
+ _capture_missing_sessions(workspace, state, event_log, timeout_s=0.0, log_miss=False)
278
+ _refresh_agent_runtime_statuses(workspace, state, event_log)
279
+ _handle_provider_startup_prompts(workspace, state, event_log)
280
+ _handle_provider_runtime_prompts(workspace, state, event_log)
281
+ captures = _sync_agent_health(workspace, state, store) or {}
282
+ delivered = _deliver_pending_messages(workspace, state, event_log)
283
+ fired = _fire_due_scheduled_events(workspace, store, event_log)
284
+ stuck = _detect_stuck_agents(workspace, state, store, event_log)
285
+ idle_alerts = detect_idle_fallbacks(workspace, state, store, event_log)
286
+ deadlock_alerts = detect_cross_worker_deadlocks(workspace, state, store, event_log)
287
+ compaction_results: list[dict[str, Any]] = []
288
+ for agent_id, agent_state in state.get("agents", {}).items():
289
+ provider = str(agent_state.get("provider") or "")
290
+ if provider != "codex":
291
+ continue
292
+ cap = captures.get(agent_id) or {}
293
+ scrollback = str(cap.get("scrollback") or "")
294
+ if not scrollback:
295
+ continue
296
+ stuck_loop = agent_id in (stuck or [])
297
+ result = detect_compaction_degradation(
298
+ workspace,
299
+ state,
300
+ event_log,
301
+ agent_id=agent_id,
302
+ provider=provider,
303
+ scrollback=scrollback,
304
+ stuck_loop=stuck_loop,
305
+ )
306
+ if result.get("event") and result.get("event") != "compaction_threshold_crossed.none":
307
+ compaction_results.append(result)
308
+ drift_results: list[dict[str, Any]] = []
309
+ for agent_id, agent_state in state.get("agents", {}).items():
310
+ if str(agent_state.get("provider") or "") != "codex":
311
+ continue
312
+ scrollback = str((captures.get(agent_id) or {}).get("scrollback") or "")
313
+ if not scrollback:
314
+ continue
315
+ drift = detect_session_drift(
316
+ workspace, state, event_log,
317
+ agent_id=agent_id, agent_state=agent_state, scrollback=scrollback,
318
+ )
319
+ if drift:
320
+ drift_results.append(drift)
321
+ save_runtime_state(workspace, state)
322
+ results = _collect_results_and_notify_watchers(workspace, event_log)
323
+ return {
324
+ "ok": True,
325
+ "stop": False,
326
+ "delivered": delivered,
327
+ "scheduled": fired,
328
+ "stuck": stuck,
329
+ "idle_alerts": idle_alerts,
330
+ "deadlock_alerts": deadlock_alerts,
331
+ "compaction": compaction_results,
332
+ "session_drift": drift_results,
333
+ "results": results,
334
+ }
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from team_agent.coordinator.paths import coordinator_meta_path
10
+ from team_agent.message_store import MessageStore
11
+
12
+
13
+ COORDINATOR_PROTOCOL_VERSION = 2
14
+
15
+
16
+ def pid_is_running(pid: int) -> bool:
17
+ from team_agent.runtime import run_cmd
18
+ try:
19
+ os.kill(pid, 0)
20
+ except OSError:
21
+ return False
22
+ proc = run_cmd(["ps", "-p", str(pid), "-o", "stat="], timeout=5)
23
+ if proc.returncode == 0 and proc.stdout.strip().upper().startswith("Z"):
24
+ return False
25
+ return True
26
+
27
+
28
+ def read_coordinator_metadata(workspace: Path) -> dict[str, Any] | None:
29
+ path = coordinator_meta_path(workspace)
30
+ try:
31
+ raw = json.loads(path.read_text(encoding="utf-8"))
32
+ except (OSError, json.JSONDecodeError):
33
+ return None
34
+ return raw if isinstance(raw, dict) else None
35
+
36
+
37
+ def coordinator_metadata_ok(metadata: dict[str, Any] | None, pid: int) -> bool:
38
+ return bool(
39
+ metadata
40
+ and metadata.get("pid") == pid
41
+ and metadata.get("protocol_version") == COORDINATOR_PROTOCOL_VERSION
42
+ and metadata.get("message_store_schema_version") == MessageStore.SCHEMA_VERSION
43
+ )
44
+
45
+
46
+ def write_coordinator_metadata(workspace: Path, pid: int, source: str) -> None:
47
+ path = coordinator_meta_path(workspace)
48
+ path.parent.mkdir(parents=True, exist_ok=True)
49
+ path.write_text(
50
+ json.dumps(
51
+ {
52
+ "pid": pid,
53
+ "protocol_version": COORDINATOR_PROTOCOL_VERSION,
54
+ "message_store_schema_version": MessageStore.SCHEMA_VERSION,
55
+ "source": source,
56
+ "updated_at": datetime.now(timezone.utc).isoformat(),
57
+ },
58
+ indent=2,
59
+ ),
60
+ encoding="utf-8",
61
+ )
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from team_agent.paths import runtime_dir
6
+
7
+
8
+ def coordinator_pid_path(workspace: Path) -> Path:
9
+ return runtime_dir(workspace) / "coordinator.pid"
10
+
11
+
12
+ def coordinator_meta_path(workspace: Path) -> Path:
13
+ return runtime_dir(workspace) / "coordinator.json"
14
+
15
+
16
+ def coordinator_log_path(workspace: Path) -> Path:
17
+ return runtime_dir(workspace) / "coordinator.log"