@team-agent/installer 0.1.11 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crates/team-agent-core/src/lib.rs +50 -5
- package/package.json +1 -1
- package/schemas/team.schema.json +1 -0
- package/src/team_agent/approvals/__init__.py +65 -0
- package/src/team_agent/approvals/constants.py +6 -0
- package/src/team_agent/approvals/parsing.py +176 -0
- package/src/team_agent/approvals/runtime_prompts.py +171 -0
- package/src/team_agent/approvals/status.py +165 -0
- package/src/team_agent/cli/__init__.py +137 -0
- package/src/team_agent/cli/commands.py +339 -0
- package/src/team_agent/cli/e2e.py +202 -0
- package/src/team_agent/cli/helpers.py +137 -0
- package/src/team_agent/cli/parser.py +477 -0
- package/src/team_agent/compiler.py +98 -33
- package/src/team_agent/coordinator/__init__.py +53 -0
- package/src/team_agent/{coordinator.py → coordinator/__main__.py} +3 -1
- package/src/team_agent/coordinator/lifecycle.py +334 -0
- package/src/team_agent/coordinator/metadata.py +61 -0
- package/src/team_agent/coordinator/paths.py +17 -0
- package/src/team_agent/diagnose/__init__.py +48 -0
- package/src/team_agent/diagnose/checks.py +101 -0
- package/src/team_agent/diagnose/health.py +241 -0
- package/src/team_agent/diagnose/preflight.py +194 -0
- package/src/team_agent/diagnose/quick_start.py +233 -0
- package/src/team_agent/display/__init__.py +61 -0
- package/src/team_agent/display/close.py +147 -0
- package/src/team_agent/display/ghostty.py +77 -0
- package/src/team_agent/display/worker_window.py +110 -0
- package/src/team_agent/display/workspace.py +473 -0
- package/src/team_agent/launch/__init__.py +41 -0
- package/src/team_agent/launch/bootstrap.py +85 -0
- package/src/team_agent/launch/config.py +106 -0
- package/src/team_agent/launch/core.py +291 -0
- package/src/team_agent/launch/requirements.py +57 -0
- package/src/team_agent/leader/__init__.py +320 -0
- package/src/team_agent/lifecycle/__init__.py +5 -0
- package/src/team_agent/lifecycle/agents.py +226 -0
- package/src/team_agent/lifecycle/operations.py +321 -0
- package/src/team_agent/lifecycle/paste_buffer_hygiene.py +39 -0
- package/src/team_agent/lifecycle/start.py +363 -0
- package/src/team_agent/mcp_server/__init__.py +42 -0
- package/src/team_agent/mcp_server/__main__.py +7 -0
- package/src/team_agent/mcp_server/contracts.py +148 -0
- package/src/team_agent/mcp_server/normalize.py +257 -0
- package/src/team_agent/mcp_server/server.py +150 -0
- package/src/team_agent/mcp_server/tools.py +205 -0
- package/src/team_agent/message_store/__init__.py +23 -0
- package/src/team_agent/message_store/agent_health.py +109 -0
- package/src/team_agent/{message_store.py → message_store/core.py} +188 -245
- package/src/team_agent/message_store/result_watchers.py +102 -0
- package/src/team_agent/message_store/schema.py +266 -0
- package/src/team_agent/messaging/__init__.py +1 -0
- package/src/team_agent/messaging/activity_detector.py +190 -0
- package/src/team_agent/messaging/delivery.py +138 -0
- package/src/team_agent/messaging/deps.py +263 -0
- package/src/team_agent/messaging/idle_alerts.py +323 -0
- package/src/team_agent/messaging/internal_delivery.py +46 -0
- package/src/team_agent/messaging/leader.py +317 -0
- package/src/team_agent/messaging/leader_panes.py +343 -0
- package/src/team_agent/messaging/owner_bypass.py +29 -0
- package/src/team_agent/messaging/result_delivery.py +300 -0
- package/src/team_agent/messaging/results.py +456 -0
- package/src/team_agent/messaging/scheduler.py +428 -0
- package/src/team_agent/messaging/send.py +500 -0
- package/src/team_agent/messaging/session_drift.py +94 -0
- package/src/team_agent/messaging/tmux_io.py +337 -0
- package/src/team_agent/messaging/tmux_prompt.py +229 -0
- package/src/team_agent/orchestrator/__init__.py +376 -0
- package/src/team_agent/orchestrator/plan.py +122 -0
- package/src/team_agent/orchestrator/state.py +128 -0
- package/src/team_agent/profiles/__init__.py +82 -0
- package/src/team_agent/profiles/constants.py +19 -0
- package/src/team_agent/profiles/core.py +407 -0
- package/src/team_agent/profiles/helpers.py +69 -0
- package/src/team_agent/profiles/provider_env.py +188 -0
- package/src/team_agent/profiles/smoke.py +201 -0
- package/src/team_agent/provider_cli/__init__.py +43 -0
- package/src/team_agent/provider_cli/adapter.py +167 -0
- package/src/team_agent/provider_cli/base.py +48 -0
- package/src/team_agent/provider_cli/claude.py +457 -0
- package/src/team_agent/provider_cli/codex.py +319 -0
- package/src/team_agent/provider_cli/copilot.py +8 -0
- package/src/team_agent/provider_cli/fake.py +39 -0
- package/src/team_agent/provider_cli/gemini.py +95 -0
- package/src/team_agent/provider_cli/opencode.py +8 -0
- package/src/team_agent/provider_cli/prompt.py +62 -0
- package/src/team_agent/provider_cli/registry.py +18 -0
- package/src/team_agent/provider_cli/unsupported.py +32 -0
- package/src/team_agent/providers.py +67 -949
- package/src/team_agent/quality_gates.py +104 -0
- package/src/team_agent/restart/__init__.py +34 -0
- package/src/team_agent/restart/orchestration.py +328 -0
- package/src/team_agent/restart/selection.py +89 -0
- package/src/team_agent/restart/snapshot.py +70 -0
- package/src/team_agent/runtime.py +809 -5892
- package/src/team_agent/rust_core.py +22 -5
- package/src/team_agent/sessions/__init__.py +25 -0
- package/src/team_agent/sessions/capture.py +93 -0
- package/src/team_agent/sessions/inventory.py +44 -0
- package/src/team_agent/sessions/resume.py +135 -0
- package/src/team_agent/spec.py +3 -1
- package/src/team_agent/state.py +218 -4
- package/src/team_agent/status/__init__.py +63 -0
- package/src/team_agent/status/approvals.py +52 -0
- package/src/team_agent/status/compact.py +158 -0
- package/src/team_agent/status/constants.py +18 -0
- package/src/team_agent/status/inbox.py +28 -0
- package/src/team_agent/status/peek.py +117 -0
- package/src/team_agent/status/queries.py +168 -0
- package/src/team_agent/terminal.py +57 -0
- package/src/team_agent/cli.py +0 -858
- package/src/team_agent/mcp_server.py +0 -579
- package/src/team_agent/profiles.py +0 -882
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
@@ -39,40 +40,19 @@ def compile_team(team_dir: Path, out_path: Path | None = None) -> dict[str, Any]
|
|
|
39
40
|
routing_rules = []
|
|
40
41
|
startup_order = []
|
|
41
42
|
for role_doc in sorted(agents_dir.glob("*.md")):
|
|
42
|
-
meta, body =
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
_validate_role_doc(role_doc, meta, body, profile_names, profile_model)
|
|
43
|
+
meta, body = _role_doc_meta_for_team(
|
|
44
|
+
role_doc,
|
|
45
|
+
team_meta,
|
|
46
|
+
workspace,
|
|
47
|
+
team_dir,
|
|
48
|
+
profile_names,
|
|
49
|
+
default_auth_mode,
|
|
50
|
+
default_profile,
|
|
51
|
+
provider_models,
|
|
52
|
+
default_model,
|
|
53
|
+
)
|
|
54
54
|
agent_id = str(meta["name"])
|
|
55
|
-
|
|
56
|
-
agent = {
|
|
57
|
-
"id": agent_id,
|
|
58
|
-
"role": str(meta["role"]),
|
|
59
|
-
"provider": str(meta["provider"]),
|
|
60
|
-
"model": str(meta["model"]) if meta.get("model") is not None else None,
|
|
61
|
-
"auth_mode": str(meta["auth_mode"]),
|
|
62
|
-
"working_directory": str(workspace),
|
|
63
|
-
"system_prompt": {"inline": body.strip() or str(meta["role"]), "file": None},
|
|
64
|
-
"tools": tools,
|
|
65
|
-
"permission_mode": "restricted",
|
|
66
|
-
"preferred_for": [agent_id, str(meta["role"])],
|
|
67
|
-
"avoid_for": [],
|
|
68
|
-
"output_contract": {
|
|
69
|
-
"format": "result_envelope_v1",
|
|
70
|
-
"required_fields": ["task_id", "status", "summary", "artifacts"],
|
|
71
|
-
},
|
|
72
|
-
}
|
|
73
|
-
if meta.get("profile"):
|
|
74
|
-
agent["profile"] = str(meta["profile"])
|
|
75
|
-
agent["credential_ref"] = f"profile:{meta['profile']}"
|
|
55
|
+
agent = _agent_from_role_doc(meta, body, workspace, agent_id)
|
|
76
56
|
agents.append(agent)
|
|
77
57
|
routing_rules.append({"id": f"route-{agent_id}", "match": {"assignee": [agent_id]}, "assign_to": agent_id, "priority": 10})
|
|
78
58
|
startup_order.append(agent_id)
|
|
@@ -155,6 +135,41 @@ def compile_team(team_dir: Path, out_path: Path | None = None) -> dict[str, Any]
|
|
|
155
135
|
return {"ok": True, "team_dir": str(team_dir), "out": str(out_path) if out_path else None, "spec": spec}
|
|
156
136
|
|
|
157
137
|
|
|
138
|
+
def compile_role_doc_agent(role_doc: Path, team_dir: Path, agent_id: str | None = None) -> dict[str, Any]:
|
|
139
|
+
meta, body = _read_front_matter(role_doc.resolve())
|
|
140
|
+
return compile_role_entry_agent(role_doc.resolve(), team_dir, meta, body, agent_id)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def compile_role_entry_agent(
|
|
144
|
+
role_doc: Path,
|
|
145
|
+
team_dir: Path,
|
|
146
|
+
meta: dict[str, Any],
|
|
147
|
+
body: str,
|
|
148
|
+
agent_id: str | None = None,
|
|
149
|
+
) -> dict[str, Any]:
|
|
150
|
+
team_dir = team_dir.resolve()
|
|
151
|
+
workspace = team_workspace(team_dir)
|
|
152
|
+
team_doc = team_dir / "TEAM.md"
|
|
153
|
+
if not team_doc.exists():
|
|
154
|
+
raise ValidationError(f"{team_doc}: missing TEAM.md")
|
|
155
|
+
profile_names = known_profiles(team_dir)
|
|
156
|
+
team_meta, _team_body = _read_front_matter(team_doc)
|
|
157
|
+
meta, body = _role_doc_meta_for_team(
|
|
158
|
+
role_doc,
|
|
159
|
+
team_meta,
|
|
160
|
+
workspace,
|
|
161
|
+
team_dir,
|
|
162
|
+
profile_names,
|
|
163
|
+
team_meta.get("default_auth_mode") or "subscription",
|
|
164
|
+
team_meta.get("default_profile"),
|
|
165
|
+
_provider_model_defaults(team_meta),
|
|
166
|
+
team_meta.get("default_model") or team_meta.get("model"),
|
|
167
|
+
role_meta=meta,
|
|
168
|
+
role_body=body,
|
|
169
|
+
)
|
|
170
|
+
return _agent_from_role_doc(meta, body, workspace, str(agent_id or meta["name"]))
|
|
171
|
+
|
|
172
|
+
|
|
158
173
|
def _read_front_matter(path: Path) -> tuple[dict[str, Any], str]:
|
|
159
174
|
text = path.read_text(encoding="utf-8")
|
|
160
175
|
if not text.startswith("---\n"):
|
|
@@ -170,6 +185,56 @@ def _read_front_matter(path: Path) -> tuple[dict[str, Any], str]:
|
|
|
170
185
|
return data, body
|
|
171
186
|
|
|
172
187
|
|
|
188
|
+
def _role_doc_meta_for_team(
|
|
189
|
+
role_doc: Path,
|
|
190
|
+
team_meta: dict[str, Any],
|
|
191
|
+
workspace: Path,
|
|
192
|
+
team_dir: Path,
|
|
193
|
+
profile_names: set[str],
|
|
194
|
+
default_auth_mode: Any,
|
|
195
|
+
default_profile: Any,
|
|
196
|
+
provider_models: dict[str, str],
|
|
197
|
+
default_model: Any,
|
|
198
|
+
role_meta: dict[str, Any] | None = None,
|
|
199
|
+
role_body: str | None = None,
|
|
200
|
+
) -> tuple[dict[str, Any], str]:
|
|
201
|
+
meta, body = (role_meta, role_body) if role_meta is not None and role_body is not None else _read_front_matter(role_doc)
|
|
202
|
+
meta = copy.deepcopy(meta)
|
|
203
|
+
if "auth_mode" not in meta and default_auth_mode is not None:
|
|
204
|
+
meta["auth_mode"] = default_auth_mode
|
|
205
|
+
if "profile" not in meta and default_profile is not None:
|
|
206
|
+
meta["profile"] = default_profile
|
|
207
|
+
profile_model = _profile_model(workspace, meta.get("profile"), team_dir / "profiles")
|
|
208
|
+
if "model" not in meta and not (meta.get("auth_mode") == "compatible_api" and profile_model):
|
|
209
|
+
meta["model"] = _default_model_for_provider(meta.get("provider"), provider_models, default_model)
|
|
210
|
+
_validate_role_doc(role_doc, meta, body, profile_names, profile_model)
|
|
211
|
+
return meta, body
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _agent_from_role_doc(meta: dict[str, Any], body: str, workspace: Path, agent_id: str) -> dict[str, Any]:
|
|
215
|
+
agent = {
|
|
216
|
+
"id": agent_id,
|
|
217
|
+
"role": str(meta["role"]),
|
|
218
|
+
"provider": str(meta["provider"]),
|
|
219
|
+
"model": str(meta["model"]) if meta.get("model") is not None else None,
|
|
220
|
+
"auth_mode": str(meta["auth_mode"]),
|
|
221
|
+
"working_directory": str(workspace),
|
|
222
|
+
"system_prompt": {"inline": body.strip() or str(meta["role"]), "file": None},
|
|
223
|
+
"tools": _normalize_tools(list(meta["tools"] or [])),
|
|
224
|
+
"permission_mode": "restricted",
|
|
225
|
+
"preferred_for": [agent_id, str(meta["role"])],
|
|
226
|
+
"avoid_for": [],
|
|
227
|
+
"output_contract": {
|
|
228
|
+
"format": "result_envelope_v1",
|
|
229
|
+
"required_fields": ["task_id", "status", "summary", "artifacts"],
|
|
230
|
+
},
|
|
231
|
+
}
|
|
232
|
+
if meta.get("profile"):
|
|
233
|
+
agent["profile"] = str(meta["profile"])
|
|
234
|
+
agent["credential_ref"] = f"profile:{meta['profile']}"
|
|
235
|
+
return agent
|
|
236
|
+
|
|
237
|
+
|
|
173
238
|
def _validate_role_doc(
|
|
174
239
|
path: Path,
|
|
175
240
|
meta: dict[str, Any],
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from team_agent.coordinator.lifecycle import (
|
|
6
|
+
coordinator_health,
|
|
7
|
+
coordinator_tick,
|
|
8
|
+
message_store_schema_health,
|
|
9
|
+
start_coordinator,
|
|
10
|
+
stop_coordinator,
|
|
11
|
+
)
|
|
12
|
+
from team_agent.coordinator.metadata import (
|
|
13
|
+
COORDINATOR_PROTOCOL_VERSION,
|
|
14
|
+
coordinator_metadata_ok,
|
|
15
|
+
pid_is_running,
|
|
16
|
+
read_coordinator_metadata,
|
|
17
|
+
write_coordinator_metadata,
|
|
18
|
+
)
|
|
19
|
+
from team_agent.coordinator.paths import (
|
|
20
|
+
coordinator_log_path,
|
|
21
|
+
coordinator_meta_path,
|
|
22
|
+
coordinator_pid_path,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"COORDINATOR_PROTOCOL_VERSION",
|
|
27
|
+
"coordinator_health",
|
|
28
|
+
"coordinator_log_path",
|
|
29
|
+
"coordinator_meta_path",
|
|
30
|
+
"coordinator_metadata_ok",
|
|
31
|
+
"coordinator_pid_path",
|
|
32
|
+
"coordinator_tick",
|
|
33
|
+
"main",
|
|
34
|
+
"message_store_schema_health",
|
|
35
|
+
"pid_is_running",
|
|
36
|
+
"read_coordinator_metadata",
|
|
37
|
+
"start_coordinator",
|
|
38
|
+
"stop_coordinator",
|
|
39
|
+
"write_coordinator_metadata",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def __getattr__(name: str) -> Any:
|
|
44
|
+
# Lazy re-export of the daemon entry so the pyproject console_script
|
|
45
|
+
# `team-agent-coordinator = "team_agent.coordinator:main"` keeps
|
|
46
|
+
# resolving after the package split, without triggering the runtime
|
|
47
|
+
# <-> coordinator import cycle that an eager top-level
|
|
48
|
+
# `from team_agent.coordinator.__main__ import main` would cause
|
|
49
|
+
# (runtime imports coordinator/__init__ at module load).
|
|
50
|
+
if name == "main":
|
|
51
|
+
from team_agent.coordinator.__main__ import main as _main
|
|
52
|
+
return _main
|
|
53
|
+
raise AttributeError(f"module 'team_agent.coordinator' has no attribute {name!r}")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import argparse
|
|
4
|
+
import os
|
|
4
5
|
import signal
|
|
5
6
|
import sys
|
|
6
7
|
import time
|
|
@@ -29,7 +30,8 @@ def main(argv: list[str] | None = None) -> None:
|
|
|
29
30
|
args = parser.parse_args(argv)
|
|
30
31
|
workspace = Path(args.workspace).resolve()
|
|
31
32
|
runtime.ensure_workspace_dirs(workspace)
|
|
32
|
-
runtime.coordinator_pid_path(workspace).write_text(str(
|
|
33
|
+
runtime.coordinator_pid_path(workspace).write_text(str(os.getpid()), encoding="utf-8")
|
|
34
|
+
runtime.write_coordinator_metadata(workspace, os.getpid(), source="boot")
|
|
33
35
|
event_log = EventLog(workspace)
|
|
34
36
|
event_log.write("coordinator.boot", workspace=str(workspace), once=args.once)
|
|
35
37
|
signal.signal(signal.SIGTERM, _stop)
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import signal
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from team_agent.coordinator.metadata import (
|
|
11
|
+
COORDINATOR_PROTOCOL_VERSION,
|
|
12
|
+
coordinator_metadata_ok,
|
|
13
|
+
pid_is_running,
|
|
14
|
+
read_coordinator_metadata,
|
|
15
|
+
write_coordinator_metadata,
|
|
16
|
+
)
|
|
17
|
+
from team_agent.coordinator.paths import (
|
|
18
|
+
coordinator_log_path,
|
|
19
|
+
coordinator_meta_path,
|
|
20
|
+
coordinator_pid_path,
|
|
21
|
+
)
|
|
22
|
+
from team_agent.events import EventLog
|
|
23
|
+
from team_agent.message_store import MessageStore
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def coordinator_health(workspace: Path) -> dict[str, Any]:
|
|
27
|
+
schema = message_store_schema_health(workspace)
|
|
28
|
+
pid_path = coordinator_pid_path(workspace)
|
|
29
|
+
if not pid_path.exists():
|
|
30
|
+
return {"ok": False, "status": "missing", "pid": None, "metadata": None, "metadata_ok": False, **schema}
|
|
31
|
+
try:
|
|
32
|
+
pid = int(pid_path.read_text(encoding="utf-8").strip())
|
|
33
|
+
except ValueError:
|
|
34
|
+
return {"ok": False, "status": "invalid_pid", "pid": None, "metadata": None, "metadata_ok": False, **schema}
|
|
35
|
+
running = pid_is_running(pid)
|
|
36
|
+
metadata = read_coordinator_metadata(workspace)
|
|
37
|
+
metadata_ok = coordinator_metadata_ok(metadata, pid)
|
|
38
|
+
ok = running and metadata_ok and bool(schema.get("schema_ok"))
|
|
39
|
+
return {
|
|
40
|
+
"ok": ok,
|
|
41
|
+
"status": "running" if running else "stale",
|
|
42
|
+
"pid": pid,
|
|
43
|
+
"metadata": metadata,
|
|
44
|
+
"metadata_ok": metadata_ok,
|
|
45
|
+
**schema,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def start_coordinator(workspace: Path) -> dict[str, Any]:
|
|
50
|
+
from team_agent.runtime import ensure_workspace_dirs
|
|
51
|
+
ensure_workspace_dirs(workspace)
|
|
52
|
+
health = coordinator_health(workspace)
|
|
53
|
+
if health["ok"]:
|
|
54
|
+
return {"ok": True, "pid": health["pid"], "status": "already_running", "log": str(coordinator_log_path(workspace))}
|
|
55
|
+
if health["status"] == "running" and not health.get("metadata_ok"):
|
|
56
|
+
EventLog(workspace).write(
|
|
57
|
+
"coordinator.restart_incompatible",
|
|
58
|
+
pid=health.get("pid"),
|
|
59
|
+
metadata=health.get("metadata"),
|
|
60
|
+
expected_protocol=COORDINATOR_PROTOCOL_VERSION,
|
|
61
|
+
expected_schema=MessageStore.SCHEMA_VERSION,
|
|
62
|
+
)
|
|
63
|
+
stopped = stop_coordinator(workspace)
|
|
64
|
+
if not stopped.get("ok"):
|
|
65
|
+
EventLog(workspace).write(
|
|
66
|
+
"coordinator.restart_incompatible_stop_failed",
|
|
67
|
+
pid=health.get("pid"),
|
|
68
|
+
stop_result=stopped,
|
|
69
|
+
)
|
|
70
|
+
return {
|
|
71
|
+
"ok": False,
|
|
72
|
+
"pid": health.get("pid"),
|
|
73
|
+
"status": "restart_incompatible_stop_failed",
|
|
74
|
+
"error": stopped.get("error") or stopped.get("status"),
|
|
75
|
+
"stop_result": stopped,
|
|
76
|
+
}
|
|
77
|
+
if not health.get("schema_ok", False):
|
|
78
|
+
EventLog(workspace).write(
|
|
79
|
+
"coordinator.schema_incompatible",
|
|
80
|
+
error=health.get("schema_error"),
|
|
81
|
+
schema=health.get("schema"),
|
|
82
|
+
reason=health.get("reason"),
|
|
83
|
+
table=health.get("table"),
|
|
84
|
+
missing_columns=health.get("missing_columns"),
|
|
85
|
+
)
|
|
86
|
+
return {
|
|
87
|
+
"ok": False,
|
|
88
|
+
"pid": None,
|
|
89
|
+
"status": "schema_incompatible",
|
|
90
|
+
"error": health.get("schema_error"),
|
|
91
|
+
"schema": health.get("schema"),
|
|
92
|
+
"action": health.get("action", _SCHEMA_ACTION_HINT),
|
|
93
|
+
"reason": health.get("reason"),
|
|
94
|
+
"table": health.get("table"),
|
|
95
|
+
"expected_columns": health.get("expected_columns"),
|
|
96
|
+
"actual_columns": health.get("actual_columns"),
|
|
97
|
+
"missing_columns": health.get("missing_columns"),
|
|
98
|
+
}
|
|
99
|
+
if health["status"] in {"stale", "invalid_pid"}:
|
|
100
|
+
coordinator_pid_path(workspace).unlink(missing_ok=True)
|
|
101
|
+
coordinator_meta_path(workspace).unlink(missing_ok=True)
|
|
102
|
+
log_path = coordinator_log_path(workspace)
|
|
103
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
env = dict(os.environ)
|
|
105
|
+
repo_src = str(Path(__file__).resolve().parents[2])
|
|
106
|
+
env["PYTHONPATH"] = repo_src + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
|
|
107
|
+
log = log_path.open("a", encoding="utf-8")
|
|
108
|
+
proc = subprocess.Popen(
|
|
109
|
+
[sys.executable, "-m", "team_agent.coordinator", "--workspace", str(workspace)],
|
|
110
|
+
cwd=str(workspace),
|
|
111
|
+
stdin=subprocess.DEVNULL,
|
|
112
|
+
stdout=log,
|
|
113
|
+
stderr=log,
|
|
114
|
+
env=env,
|
|
115
|
+
start_new_session=True,
|
|
116
|
+
)
|
|
117
|
+
log.close()
|
|
118
|
+
coordinator_pid_path(workspace).write_text(str(proc.pid), encoding="utf-8")
|
|
119
|
+
write_coordinator_metadata(workspace, proc.pid, source="start")
|
|
120
|
+
EventLog(workspace).write("coordinator.started", pid=proc.pid, log=str(log_path))
|
|
121
|
+
return {"ok": True, "pid": proc.pid, "status": "started", "log": str(log_path)}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
_SCHEMA_EXPECTED_COLUMNS: dict[str, set[str]] = {}
|
|
125
|
+
_SCHEMA_MIGRATABLE_COLUMNS: dict[str, set[str]] = {
|
|
126
|
+
"messages": {"delivery_attempts", "owner_team_id"},
|
|
127
|
+
"scheduled_events": {"owner_team_id"},
|
|
128
|
+
"agent_health": {"owner_team_id"},
|
|
129
|
+
"result_watchers": {"owner_team_id"},
|
|
130
|
+
}
|
|
131
|
+
_SCHEMA_ACTION_HINT = (
|
|
132
|
+
"use team-agent advanced repair-state --schema to re-run column migrations; "
|
|
133
|
+
"if that fails, back up .team/runtime/team.db, then delete it and rerun team-agent launch "
|
|
134
|
+
"(in-flight messages will be lost)"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _load_expected_schema_columns() -> dict[str, set[str]]:
|
|
139
|
+
if _SCHEMA_EXPECTED_COLUMNS:
|
|
140
|
+
return _SCHEMA_EXPECTED_COLUMNS
|
|
141
|
+
from team_agent.message_store.schema import (
|
|
142
|
+
AGENT_HEALTH_COLUMNS,
|
|
143
|
+
DELIVERY_TOKEN_COLUMNS,
|
|
144
|
+
MESSAGE_COLUMNS,
|
|
145
|
+
PEER_ALLOWLIST_COLUMNS,
|
|
146
|
+
RESULT_COLUMNS,
|
|
147
|
+
RESULT_WATCHER_COLUMNS,
|
|
148
|
+
SCHEDULED_EVENT_COLUMNS,
|
|
149
|
+
)
|
|
150
|
+
_SCHEMA_EXPECTED_COLUMNS.update(
|
|
151
|
+
{
|
|
152
|
+
"messages": set(MESSAGE_COLUMNS),
|
|
153
|
+
"results": set(RESULT_COLUMNS),
|
|
154
|
+
"scheduled_events": set(SCHEDULED_EVENT_COLUMNS),
|
|
155
|
+
"delivery_tokens": set(DELIVERY_TOKEN_COLUMNS),
|
|
156
|
+
"agent_health": set(AGENT_HEALTH_COLUMNS),
|
|
157
|
+
"peer_allowlist": set(PEER_ALLOWLIST_COLUMNS),
|
|
158
|
+
"result_watchers": set(RESULT_WATCHER_COLUMNS),
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
return _SCHEMA_EXPECTED_COLUMNS
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _diagnose_schema_mismatch(workspace: Path, *, ignore_migratable: bool = False) -> dict[str, Any] | None:
|
|
165
|
+
import sqlite3
|
|
166
|
+
from team_agent.paths import runtime_dir
|
|
167
|
+
db_path = runtime_dir(workspace) / "team.db"
|
|
168
|
+
if not db_path.exists():
|
|
169
|
+
return None
|
|
170
|
+
conn = sqlite3.connect(db_path)
|
|
171
|
+
try:
|
|
172
|
+
for table, expected in _load_expected_schema_columns().items():
|
|
173
|
+
present = conn.execute(
|
|
174
|
+
"select name from sqlite_master where type='table' and name=?",
|
|
175
|
+
(table,),
|
|
176
|
+
).fetchone()
|
|
177
|
+
if present is None:
|
|
178
|
+
continue
|
|
179
|
+
actual = {row[1] for row in conn.execute(f"pragma table_info({table})").fetchall()}
|
|
180
|
+
missing = expected - actual
|
|
181
|
+
if ignore_migratable:
|
|
182
|
+
migratable = _SCHEMA_MIGRATABLE_COLUMNS.get(table, set())
|
|
183
|
+
missing = missing - migratable
|
|
184
|
+
if missing:
|
|
185
|
+
return {
|
|
186
|
+
"reason": "schema_mismatch",
|
|
187
|
+
"table": table,
|
|
188
|
+
"expected_columns": sorted(expected),
|
|
189
|
+
"actual_columns": sorted(actual),
|
|
190
|
+
"missing_columns": sorted(missing),
|
|
191
|
+
}
|
|
192
|
+
finally:
|
|
193
|
+
conn.close()
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def message_store_schema_health(workspace: Path) -> dict[str, Any]:
|
|
198
|
+
schema_version = {"message_store_schema_version": MessageStore.SCHEMA_VERSION}
|
|
199
|
+
pre_mismatch = _diagnose_schema_mismatch(workspace, ignore_migratable=True)
|
|
200
|
+
if pre_mismatch is not None:
|
|
201
|
+
return {
|
|
202
|
+
"schema_ok": False,
|
|
203
|
+
"schema_error": (
|
|
204
|
+
f"team.db table {pre_mismatch['table']} is missing required column(s): "
|
|
205
|
+
+ ", ".join(pre_mismatch["missing_columns"])
|
|
206
|
+
),
|
|
207
|
+
"schema": schema_version,
|
|
208
|
+
"action": _SCHEMA_ACTION_HINT,
|
|
209
|
+
**pre_mismatch,
|
|
210
|
+
}
|
|
211
|
+
try:
|
|
212
|
+
MessageStore(workspace)
|
|
213
|
+
except Exception as exc:
|
|
214
|
+
post_init_mismatch = _diagnose_schema_mismatch(workspace) or {}
|
|
215
|
+
return {
|
|
216
|
+
"schema_ok": False,
|
|
217
|
+
"schema_error": str(exc),
|
|
218
|
+
"schema": schema_version,
|
|
219
|
+
"action": _SCHEMA_ACTION_HINT,
|
|
220
|
+
**post_init_mismatch,
|
|
221
|
+
}
|
|
222
|
+
return {
|
|
223
|
+
"schema_ok": True,
|
|
224
|
+
"schema_error": None,
|
|
225
|
+
"schema": schema_version,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def stop_coordinator(workspace: Path) -> dict[str, Any]:
|
|
230
|
+
pid_path = coordinator_pid_path(workspace)
|
|
231
|
+
if not pid_path.exists():
|
|
232
|
+
return {"ok": True, "status": "missing"}
|
|
233
|
+
try:
|
|
234
|
+
pid = int(pid_path.read_text(encoding="utf-8").strip())
|
|
235
|
+
except ValueError:
|
|
236
|
+
pid_path.unlink(missing_ok=True)
|
|
237
|
+
coordinator_meta_path(workspace).unlink(missing_ok=True)
|
|
238
|
+
return {"ok": True, "status": "invalid_pid_removed"}
|
|
239
|
+
if pid_is_running(pid):
|
|
240
|
+
try:
|
|
241
|
+
os.kill(pid, signal.SIGTERM)
|
|
242
|
+
except OSError as exc:
|
|
243
|
+
return {"ok": False, "status": "kill_failed", "pid": pid, "error": str(exc)}
|
|
244
|
+
pid_path.unlink(missing_ok=True)
|
|
245
|
+
coordinator_meta_path(workspace).unlink(missing_ok=True)
|
|
246
|
+
EventLog(workspace).write("coordinator.stopped", pid=pid)
|
|
247
|
+
return {"ok": True, "status": "stopped", "pid": pid}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def coordinator_tick(workspace: Path) -> dict[str, Any]:
|
|
251
|
+
from team_agent.runtime import (
|
|
252
|
+
_capture_missing_sessions,
|
|
253
|
+
_deliver_pending_messages,
|
|
254
|
+
_detect_stuck_agents,
|
|
255
|
+
_fire_due_scheduled_events,
|
|
256
|
+
_handle_provider_runtime_prompts,
|
|
257
|
+
_handle_provider_startup_prompts,
|
|
258
|
+
_refresh_agent_runtime_statuses,
|
|
259
|
+
_sync_agent_health,
|
|
260
|
+
_tmux_session_exists,
|
|
261
|
+
_collect_results_and_notify_watchers,
|
|
262
|
+
)
|
|
263
|
+
from team_agent.messaging.idle_alerts import (
|
|
264
|
+
detect_cross_worker_deadlocks,
|
|
265
|
+
detect_idle_fallbacks,
|
|
266
|
+
)
|
|
267
|
+
from team_agent.messaging.activity_detector import detect_compaction_degradation
|
|
268
|
+
from team_agent.messaging.session_drift import detect_session_drift
|
|
269
|
+
from team_agent.state import load_runtime_state, save_runtime_state
|
|
270
|
+
state = load_runtime_state(workspace)
|
|
271
|
+
event_log = EventLog(workspace)
|
|
272
|
+
store = MessageStore(workspace)
|
|
273
|
+
session_name = state.get("session_name")
|
|
274
|
+
if session_name and not _tmux_session_exists(session_name):
|
|
275
|
+
event_log.write("coordinator.session_missing", session=session_name)
|
|
276
|
+
return {"ok": False, "stop": True, "reason": "tmux_session_missing"}
|
|
277
|
+
_capture_missing_sessions(workspace, state, event_log, timeout_s=0.0, log_miss=False)
|
|
278
|
+
_refresh_agent_runtime_statuses(workspace, state, event_log)
|
|
279
|
+
_handle_provider_startup_prompts(workspace, state, event_log)
|
|
280
|
+
_handle_provider_runtime_prompts(workspace, state, event_log)
|
|
281
|
+
captures = _sync_agent_health(workspace, state, store) or {}
|
|
282
|
+
delivered = _deliver_pending_messages(workspace, state, event_log)
|
|
283
|
+
fired = _fire_due_scheduled_events(workspace, store, event_log)
|
|
284
|
+
stuck = _detect_stuck_agents(workspace, state, store, event_log)
|
|
285
|
+
idle_alerts = detect_idle_fallbacks(workspace, state, store, event_log)
|
|
286
|
+
deadlock_alerts = detect_cross_worker_deadlocks(workspace, state, store, event_log)
|
|
287
|
+
compaction_results: list[dict[str, Any]] = []
|
|
288
|
+
for agent_id, agent_state in state.get("agents", {}).items():
|
|
289
|
+
provider = str(agent_state.get("provider") or "")
|
|
290
|
+
if provider != "codex":
|
|
291
|
+
continue
|
|
292
|
+
cap = captures.get(agent_id) or {}
|
|
293
|
+
scrollback = str(cap.get("scrollback") or "")
|
|
294
|
+
if not scrollback:
|
|
295
|
+
continue
|
|
296
|
+
stuck_loop = agent_id in (stuck or [])
|
|
297
|
+
result = detect_compaction_degradation(
|
|
298
|
+
workspace,
|
|
299
|
+
state,
|
|
300
|
+
event_log,
|
|
301
|
+
agent_id=agent_id,
|
|
302
|
+
provider=provider,
|
|
303
|
+
scrollback=scrollback,
|
|
304
|
+
stuck_loop=stuck_loop,
|
|
305
|
+
)
|
|
306
|
+
if result.get("event") and result.get("event") != "compaction_threshold_crossed.none":
|
|
307
|
+
compaction_results.append(result)
|
|
308
|
+
drift_results: list[dict[str, Any]] = []
|
|
309
|
+
for agent_id, agent_state in state.get("agents", {}).items():
|
|
310
|
+
if str(agent_state.get("provider") or "") != "codex":
|
|
311
|
+
continue
|
|
312
|
+
scrollback = str((captures.get(agent_id) or {}).get("scrollback") or "")
|
|
313
|
+
if not scrollback:
|
|
314
|
+
continue
|
|
315
|
+
drift = detect_session_drift(
|
|
316
|
+
workspace, state, event_log,
|
|
317
|
+
agent_id=agent_id, agent_state=agent_state, scrollback=scrollback,
|
|
318
|
+
)
|
|
319
|
+
if drift:
|
|
320
|
+
drift_results.append(drift)
|
|
321
|
+
save_runtime_state(workspace, state)
|
|
322
|
+
results = _collect_results_and_notify_watchers(workspace, event_log)
|
|
323
|
+
return {
|
|
324
|
+
"ok": True,
|
|
325
|
+
"stop": False,
|
|
326
|
+
"delivered": delivered,
|
|
327
|
+
"scheduled": fired,
|
|
328
|
+
"stuck": stuck,
|
|
329
|
+
"idle_alerts": idle_alerts,
|
|
330
|
+
"deadlock_alerts": deadlock_alerts,
|
|
331
|
+
"compaction": compaction_results,
|
|
332
|
+
"session_drift": drift_results,
|
|
333
|
+
"results": results,
|
|
334
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from team_agent.coordinator.paths import coordinator_meta_path
|
|
10
|
+
from team_agent.message_store import MessageStore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
COORDINATOR_PROTOCOL_VERSION = 2
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def pid_is_running(pid: int) -> bool:
|
|
17
|
+
from team_agent.runtime import run_cmd
|
|
18
|
+
try:
|
|
19
|
+
os.kill(pid, 0)
|
|
20
|
+
except OSError:
|
|
21
|
+
return False
|
|
22
|
+
proc = run_cmd(["ps", "-p", str(pid), "-o", "stat="], timeout=5)
|
|
23
|
+
if proc.returncode == 0 and proc.stdout.strip().upper().startswith("Z"):
|
|
24
|
+
return False
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def read_coordinator_metadata(workspace: Path) -> dict[str, Any] | None:
|
|
29
|
+
path = coordinator_meta_path(workspace)
|
|
30
|
+
try:
|
|
31
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
32
|
+
except (OSError, json.JSONDecodeError):
|
|
33
|
+
return None
|
|
34
|
+
return raw if isinstance(raw, dict) else None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def coordinator_metadata_ok(metadata: dict[str, Any] | None, pid: int) -> bool:
|
|
38
|
+
return bool(
|
|
39
|
+
metadata
|
|
40
|
+
and metadata.get("pid") == pid
|
|
41
|
+
and metadata.get("protocol_version") == COORDINATOR_PROTOCOL_VERSION
|
|
42
|
+
and metadata.get("message_store_schema_version") == MessageStore.SCHEMA_VERSION
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def write_coordinator_metadata(workspace: Path, pid: int, source: str) -> None:
|
|
47
|
+
path = coordinator_meta_path(workspace)
|
|
48
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
path.write_text(
|
|
50
|
+
json.dumps(
|
|
51
|
+
{
|
|
52
|
+
"pid": pid,
|
|
53
|
+
"protocol_version": COORDINATOR_PROTOCOL_VERSION,
|
|
54
|
+
"message_store_schema_version": MessageStore.SCHEMA_VERSION,
|
|
55
|
+
"source": source,
|
|
56
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
57
|
+
},
|
|
58
|
+
indent=2,
|
|
59
|
+
),
|
|
60
|
+
encoding="utf-8",
|
|
61
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from team_agent.paths import runtime_dir
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def coordinator_pid_path(workspace: Path) -> Path:
|
|
9
|
+
return runtime_dir(workspace) / "coordinator.pid"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def coordinator_meta_path(workspace: Path) -> Path:
|
|
13
|
+
return runtime_dir(workspace) / "coordinator.json"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def coordinator_log_path(workspace: Path) -> Path:
|
|
17
|
+
return runtime_dir(workspace) / "coordinator.log"
|