@team-agent/installer 0.1.11 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crates/team-agent-core/src/lib.rs +50 -5
- package/package.json +1 -1
- package/schemas/team.schema.json +1 -0
- package/src/team_agent/approvals/__init__.py +65 -0
- package/src/team_agent/approvals/constants.py +6 -0
- package/src/team_agent/approvals/parsing.py +176 -0
- package/src/team_agent/approvals/runtime_prompts.py +171 -0
- package/src/team_agent/approvals/status.py +165 -0
- package/src/team_agent/cli/__init__.py +135 -0
- package/src/team_agent/cli/commands.py +335 -0
- package/src/team_agent/cli/e2e.py +202 -0
- package/src/team_agent/cli/helpers.py +137 -0
- package/src/team_agent/cli/parser.py +470 -0
- package/src/team_agent/compiler.py +98 -33
- package/src/team_agent/coordinator/__init__.py +53 -0
- package/src/team_agent/{coordinator.py → coordinator/__main__.py} +3 -1
- package/src/team_agent/coordinator/lifecycle.py +319 -0
- package/src/team_agent/coordinator/metadata.py +61 -0
- package/src/team_agent/coordinator/paths.py +17 -0
- package/src/team_agent/diagnose/__init__.py +48 -0
- package/src/team_agent/diagnose/checks.py +101 -0
- package/src/team_agent/diagnose/health.py +241 -0
- package/src/team_agent/diagnose/preflight.py +194 -0
- package/src/team_agent/diagnose/quick_start.py +233 -0
- package/src/team_agent/display/__init__.py +61 -0
- package/src/team_agent/display/close.py +147 -0
- package/src/team_agent/display/ghostty.py +77 -0
- package/src/team_agent/display/worker_window.py +110 -0
- package/src/team_agent/display/workspace.py +473 -0
- package/src/team_agent/launch/__init__.py +41 -0
- package/src/team_agent/launch/bootstrap.py +85 -0
- package/src/team_agent/launch/config.py +106 -0
- package/src/team_agent/launch/core.py +291 -0
- package/src/team_agent/launch/requirements.py +57 -0
- package/src/team_agent/leader/__init__.py +320 -0
- package/src/team_agent/lifecycle/__init__.py +5 -0
- package/src/team_agent/lifecycle/agents.py +226 -0
- package/src/team_agent/lifecycle/operations.py +321 -0
- package/src/team_agent/lifecycle/start.py +360 -0
- package/src/team_agent/mcp_server/__init__.py +42 -0
- package/src/team_agent/mcp_server/__main__.py +7 -0
- package/src/team_agent/mcp_server/contracts.py +148 -0
- package/src/team_agent/mcp_server/normalize.py +257 -0
- package/src/team_agent/mcp_server/server.py +150 -0
- package/src/team_agent/mcp_server/tools.py +205 -0
- package/src/team_agent/message_store/__init__.py +23 -0
- package/src/team_agent/message_store/agent_health.py +109 -0
- package/src/team_agent/{message_store.py → message_store/core.py} +188 -245
- package/src/team_agent/message_store/result_watchers.py +102 -0
- package/src/team_agent/message_store/schema.py +266 -0
- package/src/team_agent/messaging/__init__.py +1 -0
- package/src/team_agent/messaging/activity_detector.py +190 -0
- package/src/team_agent/messaging/delivery.py +128 -0
- package/src/team_agent/messaging/deps.py +263 -0
- package/src/team_agent/messaging/idle_alerts.py +217 -0
- package/src/team_agent/messaging/internal_delivery.py +46 -0
- package/src/team_agent/messaging/leader.py +317 -0
- package/src/team_agent/messaging/leader_panes.py +343 -0
- package/src/team_agent/messaging/result_delivery.py +300 -0
- package/src/team_agent/messaging/results.py +456 -0
- package/src/team_agent/messaging/scheduler.py +418 -0
- package/src/team_agent/messaging/send.py +493 -0
- package/src/team_agent/messaging/tmux_io.py +337 -0
- package/src/team_agent/messaging/tmux_prompt.py +229 -0
- package/src/team_agent/orchestrator/__init__.py +376 -0
- package/src/team_agent/orchestrator/plan.py +122 -0
- package/src/team_agent/orchestrator/state.py +128 -0
- package/src/team_agent/profiles/__init__.py +82 -0
- package/src/team_agent/profiles/constants.py +19 -0
- package/src/team_agent/profiles/core.py +407 -0
- package/src/team_agent/profiles/helpers.py +69 -0
- package/src/team_agent/profiles/provider_env.py +188 -0
- package/src/team_agent/profiles/smoke.py +201 -0
- package/src/team_agent/provider_cli/__init__.py +43 -0
- package/src/team_agent/provider_cli/adapter.py +167 -0
- package/src/team_agent/provider_cli/base.py +48 -0
- package/src/team_agent/provider_cli/claude.py +457 -0
- package/src/team_agent/provider_cli/codex.py +319 -0
- package/src/team_agent/provider_cli/copilot.py +8 -0
- package/src/team_agent/provider_cli/fake.py +39 -0
- package/src/team_agent/provider_cli/gemini.py +95 -0
- package/src/team_agent/provider_cli/opencode.py +8 -0
- package/src/team_agent/provider_cli/prompt.py +62 -0
- package/src/team_agent/provider_cli/registry.py +18 -0
- package/src/team_agent/provider_cli/unsupported.py +32 -0
- package/src/team_agent/providers.py +67 -949
- package/src/team_agent/quality_gates.py +104 -0
- package/src/team_agent/restart/__init__.py +34 -0
- package/src/team_agent/restart/orchestration.py +328 -0
- package/src/team_agent/restart/selection.py +89 -0
- package/src/team_agent/restart/snapshot.py +70 -0
- package/src/team_agent/runtime.py +802 -5893
- package/src/team_agent/rust_core.py +22 -5
- package/src/team_agent/sessions/__init__.py +25 -0
- package/src/team_agent/sessions/capture.py +93 -0
- package/src/team_agent/sessions/inventory.py +44 -0
- package/src/team_agent/sessions/resume.py +135 -0
- package/src/team_agent/spec.py +3 -1
- package/src/team_agent/state.py +204 -4
- package/src/team_agent/status/__init__.py +63 -0
- package/src/team_agent/status/approvals.py +52 -0
- package/src/team_agent/status/compact.py +158 -0
- package/src/team_agent/status/constants.py +18 -0
- package/src/team_agent/status/inbox.py +28 -0
- package/src/team_agent/status/peek.py +117 -0
- package/src/team_agent/status/queries.py +168 -0
- package/src/team_agent/terminal.py +57 -0
- package/src/team_agent/cli.py +0 -858
- package/src/team_agent/mcp_server.py +0 -579
- package/src/team_agent/profiles.py +0 -882
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Iterable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class LineCountResult:
|
|
11
|
+
path: str
|
|
12
|
+
lines: int
|
|
13
|
+
allowed: bool
|
|
14
|
+
reason: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
ALLOWLIST_KEYS = {"approved_exceptions", "temporary_debt"}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_line_count_allowlist_payload(payload: object, *, source: str = "line-count allowlist") -> dict[str, dict[str, Any]]:
|
|
21
|
+
if not isinstance(payload, dict):
|
|
22
|
+
raise ValueError(f"{source}: line-count allowlist must be a JSON object")
|
|
23
|
+
unexpected = sorted(set(payload) - ALLOWLIST_KEYS)
|
|
24
|
+
if unexpected:
|
|
25
|
+
keys = ", ".join(unexpected)
|
|
26
|
+
expected = ", ".join(sorted(ALLOWLIST_KEYS))
|
|
27
|
+
raise ValueError(f"{source}: unexpected top-level key(s): {keys}; expected new schema keys: {expected}")
|
|
28
|
+
approved = payload.get("approved_exceptions", {})
|
|
29
|
+
temporary = payload.get("temporary_debt", {})
|
|
30
|
+
if not isinstance(approved, dict):
|
|
31
|
+
raise ValueError(f"{source}: approved_exceptions must be an object")
|
|
32
|
+
if not isinstance(temporary, dict):
|
|
33
|
+
raise ValueError(f"{source}: temporary_debt must be an object")
|
|
34
|
+
for file_path, entry in approved.items():
|
|
35
|
+
if not isinstance(entry, dict):
|
|
36
|
+
raise ValueError(f"{source}: approved exception for {file_path} must be an object")
|
|
37
|
+
max_lines = entry.get("max_lines")
|
|
38
|
+
if not isinstance(max_lines, int) or max_lines < 1:
|
|
39
|
+
raise ValueError(f"{source}: approved exception for {file_path} must declare positive integer max_lines")
|
|
40
|
+
return {"approved_exceptions": approved, "temporary_debt": temporary}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_line_count_allowlist(path: Path) -> dict[str, dict[str, Any]]:
|
|
44
|
+
text = path.read_text(encoding="utf-8").strip()
|
|
45
|
+
if not text:
|
|
46
|
+
return {"approved_exceptions": {}, "temporary_debt": {}}
|
|
47
|
+
try:
|
|
48
|
+
data = json.loads(text)
|
|
49
|
+
except json.JSONDecodeError as exc:
|
|
50
|
+
raise ValueError(f"{path}: invalid JSON in line-count allowlist: {exc}") from exc
|
|
51
|
+
return parse_line_count_allowlist_payload(data, source=str(path))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def check_python_file_line_counts(
|
|
55
|
+
root: Path,
|
|
56
|
+
*,
|
|
57
|
+
allowlist_path: Path,
|
|
58
|
+
include_roots: Iterable[str] = ("src/team_agent", "tests"),
|
|
59
|
+
max_lines: int = 500,
|
|
60
|
+
) -> list[LineCountResult]:
|
|
61
|
+
allowlist = load_line_count_allowlist(allowlist_path)
|
|
62
|
+
approved = allowlist["approved_exceptions"]
|
|
63
|
+
results: list[LineCountResult] = []
|
|
64
|
+
for relative_path in _iter_python_files(root, include_roots):
|
|
65
|
+
full_path = root / relative_path
|
|
66
|
+
line_count = _line_count(full_path)
|
|
67
|
+
entry = approved.get(relative_path)
|
|
68
|
+
if line_count <= max_lines:
|
|
69
|
+
results.append(LineCountResult(relative_path, line_count, allowed=True))
|
|
70
|
+
continue
|
|
71
|
+
if entry:
|
|
72
|
+
allowed_max = int(entry["max_lines"])
|
|
73
|
+
results.append(
|
|
74
|
+
LineCountResult(
|
|
75
|
+
relative_path,
|
|
76
|
+
line_count,
|
|
77
|
+
allowed=line_count <= allowed_max,
|
|
78
|
+
reason=str(entry.get("reason") or ""),
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
continue
|
|
82
|
+
results.append(LineCountResult(relative_path, line_count, allowed=False))
|
|
83
|
+
return results
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def line_count_failures(results: Iterable[LineCountResult]) -> list[LineCountResult]:
|
|
87
|
+
return [result for result in results if not result.allowed]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _iter_python_files(root: Path, include_roots: Iterable[str]) -> list[str]:
|
|
91
|
+
paths: list[str] = []
|
|
92
|
+
for include_root in include_roots:
|
|
93
|
+
base = root / include_root
|
|
94
|
+
if not base.exists():
|
|
95
|
+
continue
|
|
96
|
+
for path in base.rglob("*.py"):
|
|
97
|
+
if "__pycache__" in path.parts:
|
|
98
|
+
continue
|
|
99
|
+
paths.append(path.relative_to(root).as_posix())
|
|
100
|
+
return sorted(paths)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _line_count(path: Path) -> int:
|
|
104
|
+
return len(path.read_text(encoding="utf-8").splitlines())
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from team_agent.restart.orchestration import restart, rollback_restart_session
|
|
4
|
+
from team_agent.restart.selection import (
|
|
5
|
+
format_restart_candidates,
|
|
6
|
+
quick_start_existing_context,
|
|
7
|
+
restart_candidate_from_state,
|
|
8
|
+
restart_candidates,
|
|
9
|
+
select_restart_state,
|
|
10
|
+
state_has_restart_context,
|
|
11
|
+
)
|
|
12
|
+
from team_agent.restart.snapshot import (
|
|
13
|
+
load_snapshot_state,
|
|
14
|
+
safe_snapshot_name,
|
|
15
|
+
save_team_runtime_snapshot,
|
|
16
|
+
state_team_name,
|
|
17
|
+
team_runtime_snapshot_dir,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"format_restart_candidates",
|
|
22
|
+
"load_snapshot_state",
|
|
23
|
+
"quick_start_existing_context",
|
|
24
|
+
"restart",
|
|
25
|
+
"restart_candidate_from_state",
|
|
26
|
+
"restart_candidates",
|
|
27
|
+
"rollback_restart_session",
|
|
28
|
+
"safe_snapshot_name",
|
|
29
|
+
"save_team_runtime_snapshot",
|
|
30
|
+
"select_restart_state",
|
|
31
|
+
"state_has_restart_context",
|
|
32
|
+
"state_team_name",
|
|
33
|
+
"team_runtime_snapshot_dir",
|
|
34
|
+
]
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from team_agent.events import EventLog
|
|
9
|
+
from team_agent.message_store import MessageStore
|
|
10
|
+
from team_agent.permissions import resolve_permissions
|
|
11
|
+
from team_agent.restart.selection import select_restart_state
|
|
12
|
+
from team_agent.restart.snapshot import save_team_runtime_snapshot
|
|
13
|
+
from team_agent.spec import load_spec
|
|
14
|
+
from team_agent.state import (
|
|
15
|
+
check_team_owner,
|
|
16
|
+
load_runtime_state,
|
|
17
|
+
populate_team_owner_from_env,
|
|
18
|
+
save_runtime_state,
|
|
19
|
+
write_team_state,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def restart(workspace: Path, allow_fresh: bool = False, team: str | None = None) -> dict[str, Any]:
|
|
24
|
+
# Lazy-import everything from team_agent.runtime so existing tests that
|
|
25
|
+
# patch runtime.shell_resume_command_for_agent / runtime.run_cmd /
|
|
26
|
+
# runtime.start_coordinator / runtime.get_adapter continue to take effect
|
|
27
|
+
# at call time. Runtime re-exports the provider helpers, so this also
|
|
28
|
+
# routes through the providers module without binding it directly.
|
|
29
|
+
from team_agent.runtime import (
|
|
30
|
+
GHOSTTY_DISPLAY_BACKENDS,
|
|
31
|
+
ResumeUnavailable,
|
|
32
|
+
RuntimeError,
|
|
33
|
+
_attach_profile_resume_root,
|
|
34
|
+
_attach_team_profile_dirs,
|
|
35
|
+
_capture_agent_session,
|
|
36
|
+
_clear_session_capture_fields,
|
|
37
|
+
_close_ghostty_display,
|
|
38
|
+
_close_ghostty_workspace,
|
|
39
|
+
_compile_team_dir_spec,
|
|
40
|
+
_effective_runtime_config,
|
|
41
|
+
_ensure_agent_start_requirements,
|
|
42
|
+
_handle_startup_prompts_and_verify_window,
|
|
43
|
+
_is_team_doc_dir,
|
|
44
|
+
_open_worker_displays,
|
|
45
|
+
_prepare_resume_state,
|
|
46
|
+
_spec_team_dir,
|
|
47
|
+
_tmux_session_conflict_error,
|
|
48
|
+
_tmux_session_exists,
|
|
49
|
+
_tmux_start_command_for_agent_window,
|
|
50
|
+
_tmux_window_exists,
|
|
51
|
+
ensure_workspace_dirs,
|
|
52
|
+
get_adapter,
|
|
53
|
+
run_cmd,
|
|
54
|
+
shell_command_for_agent,
|
|
55
|
+
shell_resume_command_for_agent,
|
|
56
|
+
start_coordinator,
|
|
57
|
+
)
|
|
58
|
+
state = select_restart_state(workspace, team)
|
|
59
|
+
gate = check_team_owner(state)
|
|
60
|
+
if gate:
|
|
61
|
+
return gate
|
|
62
|
+
spec_path = Path(state.get("spec_path", workspace / "team.spec.yaml"))
|
|
63
|
+
team_dir = Path(str(state.get("team_dir"))) if state.get("team_dir") else _spec_team_dir(spec_path, workspace)
|
|
64
|
+
if _is_team_doc_dir(team_dir):
|
|
65
|
+
compiled = _compile_team_dir_spec(team_dir, workspace)
|
|
66
|
+
spec = compiled["spec"]
|
|
67
|
+
spec_path = team_dir / "team.spec.yaml"
|
|
68
|
+
state["spec_path"] = str(spec_path)
|
|
69
|
+
else:
|
|
70
|
+
if not spec_path.exists():
|
|
71
|
+
raise RuntimeError(f"missing spec for restart: {spec_path}")
|
|
72
|
+
spec = load_spec(spec_path)
|
|
73
|
+
_attach_team_profile_dirs(spec, spec_path, workspace, team_dir)
|
|
74
|
+
ensure_workspace_dirs(workspace)
|
|
75
|
+
event_log = EventLog(workspace)
|
|
76
|
+
session_name = state.get("session_name") or spec.get("runtime", {}).get("session_name") or f"team-{spec['team']['name']}"
|
|
77
|
+
state.setdefault("team_dir", str(team_dir))
|
|
78
|
+
if _tmux_session_exists(session_name):
|
|
79
|
+
event_log.write(
|
|
80
|
+
"restart.session_conflict",
|
|
81
|
+
session=session_name,
|
|
82
|
+
action="use a different team name or runtime.session_name; do not terminate existing tmux sessions from restart",
|
|
83
|
+
)
|
|
84
|
+
raise RuntimeError(_tmux_session_conflict_error(session_name))
|
|
85
|
+
runtime_cfg = _effective_runtime_config(spec.get("runtime", {}))
|
|
86
|
+
display_backend = spec.get("runtime", {}).get("display_backend", state.get("display_backend", "none"))
|
|
87
|
+
_close_ghostty_workspace(state, event_log)
|
|
88
|
+
for agent_id, agent_state in state.get("agents", {}).items():
|
|
89
|
+
_close_ghostty_display(agent_id, agent_state, event_log)
|
|
90
|
+
state["display_backend"] = display_backend
|
|
91
|
+
restart_agents = [
|
|
92
|
+
agent
|
|
93
|
+
for agent in spec.get("agents", [])
|
|
94
|
+
if state.get("agents", {}).get(agent["id"], {}).get("status") != "paused" and not agent.get("paused")
|
|
95
|
+
]
|
|
96
|
+
_ensure_agent_start_requirements(workspace, restart_agents, event_log, "restart")
|
|
97
|
+
first = True
|
|
98
|
+
restarted: list[dict[str, Any]] = []
|
|
99
|
+
new_agents: dict[str, Any] = {}
|
|
100
|
+
display_jobs: list[tuple[str, dict[str, Any]]] = []
|
|
101
|
+
for agent in spec.get("agents", []):
|
|
102
|
+
previous = state.get("agents", {}).get(agent["id"], {})
|
|
103
|
+
if previous.get("status") == "paused" or agent.get("paused"):
|
|
104
|
+
new_agents[agent["id"]] = dict(previous or {"status": "paused", "provider": agent["provider"]})
|
|
105
|
+
new_agents[agent["id"]]["status"] = "paused"
|
|
106
|
+
continue
|
|
107
|
+
adapter = get_adapter(agent["provider"])
|
|
108
|
+
if not adapter.is_installed():
|
|
109
|
+
event_log.write(
|
|
110
|
+
"restart.provider_missing",
|
|
111
|
+
agent_id=agent["id"],
|
|
112
|
+
provider=agent["provider"],
|
|
113
|
+
command=adapter.command_name,
|
|
114
|
+
)
|
|
115
|
+
raise RuntimeError(
|
|
116
|
+
f"Provider {agent['provider']} command {adapter.command_name!r} not found for agent {agent['id']}"
|
|
117
|
+
)
|
|
118
|
+
mcp_config = adapter.mcp_config(workspace, agent["id"])
|
|
119
|
+
mcp_path = adapter.install_mcp(workspace, agent["id"], mcp_config)
|
|
120
|
+
command_agent = copy.deepcopy(agent)
|
|
121
|
+
command_agent["_runtime"] = runtime_cfg
|
|
122
|
+
previous = _attach_profile_resume_root(workspace, command_agent, previous)
|
|
123
|
+
known_session_ids = {
|
|
124
|
+
str(item.get("session_id"))
|
|
125
|
+
for aid, item in {**state.get("agents", {}), **new_agents}.items()
|
|
126
|
+
if aid != agent["id"] and item.get("session_id")
|
|
127
|
+
}
|
|
128
|
+
try:
|
|
129
|
+
previous = _prepare_resume_state(
|
|
130
|
+
workspace,
|
|
131
|
+
agent["id"],
|
|
132
|
+
previous,
|
|
133
|
+
adapter,
|
|
134
|
+
event_log,
|
|
135
|
+
known_session_ids,
|
|
136
|
+
allow_fresh_on_resume_failure=allow_fresh,
|
|
137
|
+
)
|
|
138
|
+
except ResumeUnavailable as exc:
|
|
139
|
+
try:
|
|
140
|
+
adapter.cleanup_mcp(workspace, agent["id"], mcp_path)
|
|
141
|
+
except Exception as cleanup_exc:
|
|
142
|
+
event_log.write(
|
|
143
|
+
"restart.mcp_cleanup_failed",
|
|
144
|
+
agent_id=agent["id"],
|
|
145
|
+
provider=agent["provider"],
|
|
146
|
+
mcp_config=str(mcp_path),
|
|
147
|
+
error=str(cleanup_exc),
|
|
148
|
+
)
|
|
149
|
+
raise RuntimeError(str(exc)) from exc
|
|
150
|
+
restart_mode = "resumed" if previous.get("session_id") else "fresh"
|
|
151
|
+
if restart_mode == "resumed":
|
|
152
|
+
try:
|
|
153
|
+
command = shell_resume_command_for_agent(command_agent, previous, workspace, mcp_config)
|
|
154
|
+
except ResumeUnavailable as exc:
|
|
155
|
+
event_log.write("restart.resume_unavailable", agent_id=agent["id"], error=str(exc))
|
|
156
|
+
if not allow_fresh:
|
|
157
|
+
try:
|
|
158
|
+
adapter.cleanup_mcp(workspace, agent["id"], mcp_path)
|
|
159
|
+
except Exception as cleanup_exc:
|
|
160
|
+
event_log.write(
|
|
161
|
+
"restart.mcp_cleanup_failed",
|
|
162
|
+
agent_id=agent["id"],
|
|
163
|
+
provider=agent["provider"],
|
|
164
|
+
mcp_config=str(mcp_path),
|
|
165
|
+
error=str(cleanup_exc),
|
|
166
|
+
)
|
|
167
|
+
raise RuntimeError(
|
|
168
|
+
f"Cannot resume agent {agent['id']}: {exc}. "
|
|
169
|
+
"Use team-agent restart --allow-fresh only if losing that worker context is acceptable."
|
|
170
|
+
) from exc
|
|
171
|
+
command = shell_command_for_agent(command_agent, workspace, mcp_config)
|
|
172
|
+
restart_mode = "fresh"
|
|
173
|
+
else:
|
|
174
|
+
command = shell_command_for_agent(command_agent, workspace, mcp_config)
|
|
175
|
+
event_log.write("restart.fresh_spawn", agent_id=agent["id"], provider=agent["provider"], reason="session_id_missing")
|
|
176
|
+
event_log.write(
|
|
177
|
+
"restart.agent_start",
|
|
178
|
+
agent_id=agent["id"],
|
|
179
|
+
provider=agent["provider"],
|
|
180
|
+
restart_mode=restart_mode,
|
|
181
|
+
session_id=previous.get("session_id"),
|
|
182
|
+
session=session_name,
|
|
183
|
+
window=agent["id"],
|
|
184
|
+
tmux_start_mode="new-session" if first else "new-window",
|
|
185
|
+
command=command,
|
|
186
|
+
mcp_config=str(mcp_path),
|
|
187
|
+
)
|
|
188
|
+
if first:
|
|
189
|
+
proc = run_cmd(["tmux", "new-session", "-d", "-s", session_name, "-n", agent["id"], "sh", "-lc", command])
|
|
190
|
+
first = False
|
|
191
|
+
else:
|
|
192
|
+
proc = run_cmd(["tmux", "new-window", "-t", session_name, "-n", agent["id"], "sh", "-lc", command])
|
|
193
|
+
if proc.returncode != 0:
|
|
194
|
+
raise RuntimeError(f"Failed to restart agent {agent['id']}: {proc.stderr.strip()}")
|
|
195
|
+
if not _handle_startup_prompts_and_verify_window(
|
|
196
|
+
adapter, event_log, "restart", agent["id"], agent["provider"], session_name, restart_mode
|
|
197
|
+
):
|
|
198
|
+
if restart_mode != "resumed":
|
|
199
|
+
raise RuntimeError(f"Failed to restart agent {agent['id']}: tmux window exited after start")
|
|
200
|
+
if not allow_fresh:
|
|
201
|
+
try:
|
|
202
|
+
adapter.cleanup_mcp(workspace, agent["id"], mcp_path)
|
|
203
|
+
except Exception as cleanup_exc:
|
|
204
|
+
event_log.write(
|
|
205
|
+
"restart.mcp_cleanup_failed",
|
|
206
|
+
agent_id=agent["id"],
|
|
207
|
+
provider=agent["provider"],
|
|
208
|
+
mcp_config=str(mcp_path),
|
|
209
|
+
error=str(cleanup_exc),
|
|
210
|
+
)
|
|
211
|
+
raise RuntimeError(
|
|
212
|
+
f"Cannot resume agent {agent['id']}: resume window exited or did not become visible. "
|
|
213
|
+
"Use team-agent restart --allow-fresh only if losing that worker context is acceptable."
|
|
214
|
+
)
|
|
215
|
+
event_log.write(
|
|
216
|
+
"restart.resume_window_missing_fallback_fresh",
|
|
217
|
+
agent_id=agent["id"],
|
|
218
|
+
provider=agent["provider"],
|
|
219
|
+
session_id=previous.get("session_id"),
|
|
220
|
+
)
|
|
221
|
+
command = shell_command_for_agent(command_agent, workspace, mcp_config)
|
|
222
|
+
restart_mode = "fresh"
|
|
223
|
+
tmux_cmd, tmux_start_mode = _tmux_start_command_for_agent_window(session_name, agent["id"], command)
|
|
224
|
+
event_log.write(
|
|
225
|
+
"restart.agent_start",
|
|
226
|
+
agent_id=agent["id"],
|
|
227
|
+
provider=agent["provider"],
|
|
228
|
+
restart_mode=restart_mode,
|
|
229
|
+
session_id=None,
|
|
230
|
+
session=session_name,
|
|
231
|
+
window=agent["id"],
|
|
232
|
+
tmux_start_mode=tmux_start_mode,
|
|
233
|
+
command=command,
|
|
234
|
+
mcp_config=str(mcp_path),
|
|
235
|
+
)
|
|
236
|
+
proc = run_cmd(tmux_cmd)
|
|
237
|
+
if proc.returncode != 0:
|
|
238
|
+
raise RuntimeError(f"Failed to restart agent {agent['id']} fresh after resume exit: {proc.stderr.strip()}")
|
|
239
|
+
if not _handle_startup_prompts_and_verify_window(
|
|
240
|
+
adapter, event_log, "restart", agent["id"], agent["provider"], session_name, restart_mode
|
|
241
|
+
):
|
|
242
|
+
raise RuntimeError(f"Failed to restart agent {agent['id']} fresh: tmux window exited after start")
|
|
243
|
+
spawn_time = datetime.now(timezone.utc)
|
|
244
|
+
agent_state = dict(previous)
|
|
245
|
+
agent_state.update(
|
|
246
|
+
{
|
|
247
|
+
"status": "running",
|
|
248
|
+
"provider": agent["provider"],
|
|
249
|
+
"agent_id": agent["id"],
|
|
250
|
+
"model": agent.get("model"),
|
|
251
|
+
"auth_mode": agent.get("auth_mode"),
|
|
252
|
+
"profile": agent.get("profile"),
|
|
253
|
+
"window": agent["id"],
|
|
254
|
+
"mcp_config": str(mcp_path),
|
|
255
|
+
"permissions": resolve_permissions(agent),
|
|
256
|
+
"spawn_cwd": str(workspace),
|
|
257
|
+
"spawned_at": spawn_time.isoformat(),
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
profile_launch = command_agent.get("_provider_profile") or {}
|
|
261
|
+
if profile_launch.get("claude_projects_root"):
|
|
262
|
+
agent_state["claude_projects_root"] = profile_launch["claude_projects_root"]
|
|
263
|
+
if restart_mode == "fresh":
|
|
264
|
+
_clear_session_capture_fields(agent_state)
|
|
265
|
+
if command_agent.get("_session_id"):
|
|
266
|
+
agent_state["_pending_session_id"] = command_agent["_session_id"]
|
|
267
|
+
_capture_agent_session(
|
|
268
|
+
workspace,
|
|
269
|
+
agent["id"],
|
|
270
|
+
agent_state,
|
|
271
|
+
event_log,
|
|
272
|
+
timeout_s=1.5,
|
|
273
|
+
exclude_session_ids=known_session_ids,
|
|
274
|
+
)
|
|
275
|
+
if display_backend in GHOSTTY_DISPLAY_BACKENDS:
|
|
276
|
+
display_jobs.append((agent["id"], agent))
|
|
277
|
+
new_agents[agent["id"]] = agent_state
|
|
278
|
+
restarted.append(
|
|
279
|
+
{
|
|
280
|
+
"agent_id": agent["id"],
|
|
281
|
+
"restart_mode": restart_mode,
|
|
282
|
+
"session_id": agent_state.get("session_id"),
|
|
283
|
+
"display_target": None,
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
display_results = _open_worker_displays(workspace, session_name, display_jobs, event_log, display_backend)
|
|
287
|
+
for agent_id, display in display_results.items():
|
|
288
|
+
if agent_id in new_agents:
|
|
289
|
+
new_agents[agent_id]["display"] = display
|
|
290
|
+
for item in restarted:
|
|
291
|
+
agent_id = item["agent_id"]
|
|
292
|
+
if agent_id in display_results:
|
|
293
|
+
item["display_target"] = display_results[agent_id]
|
|
294
|
+
missing_after_start = [item["agent_id"] for item in restarted if not _tmux_window_exists(session_name, item["agent_id"])]
|
|
295
|
+
if missing_after_start:
|
|
296
|
+
for agent_id in missing_after_start:
|
|
297
|
+
event_log.write("restart.agent_missing_after_start", agent_id=agent_id, target=f"{session_name}:{agent_id}")
|
|
298
|
+
rollback = rollback_restart_session(session_name, event_log)
|
|
299
|
+
raise RuntimeError(
|
|
300
|
+
f"Failed to restart agent {missing_after_start[0]}: tmux window exited after start; "
|
|
301
|
+
f"rollback_session_ok={rollback.get('ok')}"
|
|
302
|
+
)
|
|
303
|
+
state["session_name"] = session_name
|
|
304
|
+
state["agents"] = new_agents
|
|
305
|
+
populate_team_owner_from_env(state, source="restart")
|
|
306
|
+
save_runtime_state(workspace, state)
|
|
307
|
+
save_team_runtime_snapshot(workspace, state)
|
|
308
|
+
MessageStore(workspace)
|
|
309
|
+
write_team_state(workspace, spec, state)
|
|
310
|
+
from team_agent.leader import autobind_leader_receiver_from_env
|
|
311
|
+
leader_provider = str(spec.get("leader", {}).get("provider") or "codex")
|
|
312
|
+
autobind_leader_receiver_from_env(workspace, leader_provider, source="restart")
|
|
313
|
+
coordinator = start_coordinator(workspace)
|
|
314
|
+
event_log.write("restart.complete", session=session_name, agents=restarted, coordinator=coordinator)
|
|
315
|
+
return {"ok": True, "session_name": session_name, "agents": restarted, "coordinator": coordinator}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def rollback_restart_session(session_name: str, event_log: EventLog) -> dict[str, Any]:
|
|
319
|
+
from team_agent.runtime import run_cmd
|
|
320
|
+
proc = run_cmd(["tmux", "kill-session", "-t", session_name], timeout=10)
|
|
321
|
+
result = {
|
|
322
|
+
"ok": proc.returncode == 0,
|
|
323
|
+
"session": session_name,
|
|
324
|
+
"stdout": proc.stdout.strip(),
|
|
325
|
+
"stderr": proc.stderr.strip(),
|
|
326
|
+
}
|
|
327
|
+
event_log.write("restart.rollback_session", **result)
|
|
328
|
+
return result
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from team_agent.paths import runtime_dir
|
|
8
|
+
from team_agent.state import load_runtime_state, runtime_state_path
|
|
9
|
+
from team_agent.restart.snapshot import load_snapshot_state, state_team_name
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def restart_candidates(workspace: Path) -> list[dict[str, Any]]:
|
|
13
|
+
by_session: dict[str, dict[str, Any]] = {}
|
|
14
|
+
snapshots_root = runtime_dir(workspace) / "teams"
|
|
15
|
+
for path in sorted(snapshots_root.glob("*/state.json")) if snapshots_root.exists() else []:
|
|
16
|
+
state = load_snapshot_state(path)
|
|
17
|
+
if not state or not state.get("session_name"):
|
|
18
|
+
continue
|
|
19
|
+
session_name = str(state["session_name"])
|
|
20
|
+
by_session[session_name] = restart_candidate_from_state(state, path)
|
|
21
|
+
active = load_runtime_state(workspace)
|
|
22
|
+
if active.get("session_name"):
|
|
23
|
+
by_session[str(active["session_name"])] = restart_candidate_from_state(active, runtime_state_path(workspace))
|
|
24
|
+
return sorted(by_session.values(), key=lambda item: item.get("session_name") or "")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def restart_candidate_from_state(state: dict[str, Any], state_path: Path) -> dict[str, Any]:
|
|
28
|
+
session_name = str(state.get("session_name") or "")
|
|
29
|
+
return {
|
|
30
|
+
"session_name": session_name,
|
|
31
|
+
"team_name": state_team_name(state),
|
|
32
|
+
"state_path": str(state_path),
|
|
33
|
+
"spec_path": state.get("spec_path"),
|
|
34
|
+
"agents": sorted(state.get("agents", {}).keys()),
|
|
35
|
+
"has_context": state_has_restart_context(state),
|
|
36
|
+
"state": state,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def state_has_restart_context(state: dict[str, Any]) -> bool:
|
|
41
|
+
for agent_state in state.get("agents", {}).values():
|
|
42
|
+
if not isinstance(agent_state, dict):
|
|
43
|
+
continue
|
|
44
|
+
if agent_state.get("session_id") or agent_state.get("rollout_path") or agent_state.get("captured_at"):
|
|
45
|
+
return True
|
|
46
|
+
return bool(state.get("agents"))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def select_restart_state(workspace: Path, team: str | None = None) -> dict[str, Any]:
|
|
50
|
+
from team_agent.runtime import RuntimeError
|
|
51
|
+
candidates = [item for item in restart_candidates(workspace) if item.get("has_context")]
|
|
52
|
+
if team:
|
|
53
|
+
matches = [
|
|
54
|
+
item
|
|
55
|
+
for item in candidates
|
|
56
|
+
if team in {item.get("session_name"), item.get("team_name"), Path(str(item.get("state_path"))).parent.name}
|
|
57
|
+
]
|
|
58
|
+
if len(matches) == 1:
|
|
59
|
+
return copy.deepcopy(matches[0]["state"])
|
|
60
|
+
if len(matches) > 1:
|
|
61
|
+
raise RuntimeError("restart team selector is ambiguous. " + format_restart_candidates(matches))
|
|
62
|
+
raise RuntimeError(f"restart team {team!r} not found. " + format_restart_candidates(candidates))
|
|
63
|
+
if len(candidates) == 1:
|
|
64
|
+
return copy.deepcopy(candidates[0]["state"])
|
|
65
|
+
if len(candidates) > 1:
|
|
66
|
+
raise RuntimeError(
|
|
67
|
+
"multiple restartable teams found in this workspace; pass --team <session_name> to choose. "
|
|
68
|
+
+ format_restart_candidates(candidates)
|
|
69
|
+
)
|
|
70
|
+
return load_runtime_state(workspace)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def format_restart_candidates(candidates: list[dict[str, Any]]) -> str:
|
|
74
|
+
if not candidates:
|
|
75
|
+
return "No restartable team state was found."
|
|
76
|
+
parts = []
|
|
77
|
+
for item in candidates:
|
|
78
|
+
parts.append(
|
|
79
|
+
f"{item.get('session_name')} team={item.get('team_name') or '-'} "
|
|
80
|
+
f"agents={','.join(item.get('agents') or []) or '-'}"
|
|
81
|
+
)
|
|
82
|
+
return "Candidates: " + "; ".join(parts)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def quick_start_existing_context(workspace: Path, session_name: str) -> dict[str, Any] | None:
|
|
86
|
+
for item in restart_candidates(workspace):
|
|
87
|
+
if item.get("session_name") == session_name and item.get("has_context"):
|
|
88
|
+
return item
|
|
89
|
+
return None
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from team_agent.paths import runtime_dir
|
|
13
|
+
from team_agent.spec import load_spec
|
|
14
|
+
from team_agent.state import normalize_agent_session_state
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def save_team_runtime_snapshot(workspace: Path, state: dict[str, Any]) -> Path | None:
|
|
18
|
+
from team_agent.runtime import _spec_team_dir
|
|
19
|
+
session_name = state.get("session_name")
|
|
20
|
+
if not session_name:
|
|
21
|
+
return None
|
|
22
|
+
snapshot_dir = team_runtime_snapshot_dir(workspace, str(session_name))
|
|
23
|
+
snapshot_dir.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
snapshot_state = copy.deepcopy(state)
|
|
25
|
+
spec_path = Path(str(state.get("spec_path") or ""))
|
|
26
|
+
if spec_path.is_file():
|
|
27
|
+
if not snapshot_state.get("team_dir"):
|
|
28
|
+
snapshot_state["team_dir"] = str(_spec_team_dir(spec_path, workspace))
|
|
29
|
+
snapshot_spec = snapshot_dir / "team.spec.yaml"
|
|
30
|
+
if spec_path.resolve() != snapshot_spec.resolve():
|
|
31
|
+
shutil.copy2(spec_path, snapshot_spec)
|
|
32
|
+
snapshot_state["spec_path"] = str(snapshot_spec)
|
|
33
|
+
snapshot_state["team_snapshot"] = {
|
|
34
|
+
"session_name": session_name,
|
|
35
|
+
"team_name": state_team_name(snapshot_state),
|
|
36
|
+
"snapshot_dir": str(snapshot_dir),
|
|
37
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
38
|
+
}
|
|
39
|
+
state_path = snapshot_dir / "state.json"
|
|
40
|
+
tmp_path = state_path.with_suffix(".json.tmp")
|
|
41
|
+
tmp_path.write_text(json.dumps(snapshot_state, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
42
|
+
os.replace(tmp_path, state_path)
|
|
43
|
+
return state_path
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def team_runtime_snapshot_dir(workspace: Path, session_name: str) -> Path:
|
|
47
|
+
return runtime_dir(workspace) / "teams" / safe_snapshot_name(session_name)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def safe_snapshot_name(value: str) -> str:
|
|
51
|
+
return re.sub(r"[^A-Za-z0-9_.-]", "_", value).strip("._-") or "team"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def state_team_name(state: dict[str, Any]) -> str | None:
|
|
55
|
+
spec_path = state.get("spec_path")
|
|
56
|
+
if not spec_path:
|
|
57
|
+
return None
|
|
58
|
+
try:
|
|
59
|
+
return str(load_spec(Path(str(spec_path))).get("team", {}).get("name") or "")
|
|
60
|
+
except Exception:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def load_snapshot_state(path: Path) -> dict[str, Any] | None:
|
|
65
|
+
try:
|
|
66
|
+
state = json.loads(path.read_text(encoding="utf-8"))
|
|
67
|
+
except (OSError, json.JSONDecodeError):
|
|
68
|
+
return None
|
|
69
|
+
normalize_agent_session_state(state)
|
|
70
|
+
return state
|