agentworks-cli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. agentworks/__init__.py +1 -0
  2. agentworks/agents/__init__.py +0 -0
  3. agentworks/agents/manager.py +1095 -0
  4. agentworks/agents/templates.py +145 -0
  5. agentworks/catalog.py +264 -0
  6. agentworks/catalog.toml +131 -0
  7. agentworks/cli.py +1462 -0
  8. agentworks/completions/__init__.py +33 -0
  9. agentworks/completions/bash.py +179 -0
  10. agentworks/completions/install.py +122 -0
  11. agentworks/completions/powershell.py +270 -0
  12. agentworks/completions/spec.py +216 -0
  13. agentworks/completions/zsh.py +256 -0
  14. agentworks/config.py +894 -0
  15. agentworks/db.py +1083 -0
  16. agentworks/doctor.py +430 -0
  17. agentworks/git_credentials/__init__.py +0 -0
  18. agentworks/git_credentials/azdo.py +29 -0
  19. agentworks/git_credentials/base.py +71 -0
  20. agentworks/git_credentials/github.py +22 -0
  21. agentworks/nerf-config.yaml +16 -0
  22. agentworks/output.py +296 -0
  23. agentworks/remote_exec.py +286 -0
  24. agentworks/sample-config.toml +289 -0
  25. agentworks/sessions/__init__.py +0 -0
  26. agentworks/sessions/console.py +164 -0
  27. agentworks/sessions/manager.py +1297 -0
  28. agentworks/sessions/templates.py +101 -0
  29. agentworks/sessions/tmux.py +503 -0
  30. agentworks/sources.py +303 -0
  31. agentworks/ssh.py +759 -0
  32. agentworks/ssh_config.py +255 -0
  33. agentworks/vm_hosts/__init__.py +0 -0
  34. agentworks/vm_hosts/manager.py +86 -0
  35. agentworks/vms/__init__.py +0 -0
  36. agentworks/vms/backup.py +409 -0
  37. agentworks/vms/base.py +56 -0
  38. agentworks/vms/bootstrap_script.py +185 -0
  39. agentworks/vms/cloud_init.py +55 -0
  40. agentworks/vms/initializer.py +1523 -0
  41. agentworks/vms/manager.py +1122 -0
  42. agentworks/vms/provisioners/__init__.py +0 -0
  43. agentworks/vms/provisioners/azure.py +602 -0
  44. agentworks/vms/provisioners/lima.py +295 -0
  45. agentworks/vms/provisioners/proxmox.py +279 -0
  46. agentworks/vms/provisioners/proxmox_api.py +261 -0
  47. agentworks/vms/provisioners/wsl2.py +340 -0
  48. agentworks/vms/templates.py +152 -0
  49. agentworks/workspaces/__init__.py +0 -0
  50. agentworks/workspaces/backends/__init__.py +0 -0
  51. agentworks/workspaces/backends/local.py +119 -0
  52. agentworks/workspaces/backends/vm.py +175 -0
  53. agentworks/workspaces/manager.py +1080 -0
  54. agentworks/workspaces/templates.py +76 -0
  55. agentworks/workspaces/tmuxinator.py +80 -0
  56. agentworks_cli-0.2.1.dist-info/METADATA +635 -0
  57. agentworks_cli-0.2.1.dist-info/RECORD +59 -0
  58. agentworks_cli-0.2.1.dist-info/WHEEL +4 -0
  59. agentworks_cli-0.2.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1297 @@
1
+ """Session lifecycle orchestration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import re
7
+ import shlex
8
+ import sys
9
+ from functools import partial
10
+ from typing import TYPE_CHECKING
11
+
12
+ import typer
13
+
14
+ from agentworks import output
15
+ from agentworks.db import PID_STOPPED, SessionMode, SessionStatus
16
+ from agentworks.sessions.tmux import AGENT_SOCKET_ROOT
17
+ from agentworks.ssh import SSH_TRANSPORT_ERROR, admin_exec_target
18
+
19
+ _ENV_KEY_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
20
+
21
+ # Template variable substitution -- uses {{var}} syntax consistent with nerftools.
22
+ _TEMPLATE_VAR_RE = re.compile(r"\{\{(\w+)\}\}")
23
+ _KNOWN_TEMPLATE_VARS = {"session_name", "workspace_name"}
24
+
25
+ if TYPE_CHECKING:
26
+ from agentworks.config import Config
27
+ from agentworks.db import Database, SessionRow, VMRow, WorkspaceRow
28
+ from agentworks.sessions.templates import ResolvedSessionTemplate
29
+ from agentworks.sessions.tmux import RunCommand
30
+ from agentworks.ssh import ExecTarget, SSHLogger
31
+
32
+
33
+ # -- Helpers ---------------------------------------------------------------
34
+
35
+ # Grace period (seconds) to wait after sending C-c before killing a session
36
+ _STOP_GRACE_SECONDS = 5
37
+
38
+
39
+ def _resolve_session_linux_user(db: Database, session: SessionRow, vm: VMRow) -> str:
40
+ """Resolve the Linux user for a session.
41
+
42
+ Agent-mode sessions look up the agent by name. Admin-mode sessions use the VM admin.
43
+ """
44
+ if session.agent_name:
45
+ agent = db.get_agent(session.agent_name)
46
+ if agent is None:
47
+ raise output.AgentError(
48
+ f"agent '{session.agent_name}' not found "
49
+ f"(referenced by session '{session.name}')"
50
+ )
51
+ return agent.linux_user
52
+ return vm.admin_username
53
+
54
+
55
+
56
+ def _kill_session(
57
+ session_name: str,
58
+ *,
59
+ run_command: RunCommand,
60
+ socket_path: str | None,
61
+ ) -> bool:
62
+ """Kill a session on its expected tmux server. Returns True if successful."""
63
+ from agentworks.sessions.tmux import kill_session
64
+
65
+ return kill_session(session_name, run_command=run_command, socket_path=socket_path)
66
+
67
+
68
+ def _repair_session_pid(
69
+ session: SessionRow,
70
+ *,
71
+ target: ExecTarget,
72
+ db: Database,
73
+ ) -> bool:
74
+ """Core repair logic for a single session. Returns True if the DB was updated.
75
+
76
+ Raises SessionError if the session is alive but PID/boot_id can't be recovered,
77
+ or if the tmux server is unreachable (permissions issue).
78
+ """
79
+ from agentworks.sessions.tmux import get_tmux_server_pid, tmux_cmd
80
+
81
+ sock = session.socket_path
82
+ q_session = shlex.quote(session.name)
83
+
84
+ # Step 1: try has-session (the primary liveness check)
85
+ has_cmd = tmux_cmd(f"has-session -t {q_session}", sock) + " 2>/dev/null"
86
+ has_result = target.run(has_cmd, check=False)
87
+ if has_result.returncode == SSH_TRANSPORT_ERROR:
88
+ raise output.SessionError(f"cannot reach VM for session '{session.name}' (SSH connection failed)")
89
+ if has_result.ok:
90
+ # Session is alive -- recover PID + boot ID
91
+ pid = get_tmux_server_pid(target=target, socket_path=sock)
92
+ boot_id = _get_boot_id(target) if pid is not None else None
93
+ if pid is not None and boot_id is not None:
94
+ db.update_session_pid(session.name, pid, boot_id=boot_id)
95
+ output.warn(f"Recovered PID {pid} for session '{session.name}'")
96
+ return True
97
+ raise output.SessionError(
98
+ f"session '{session.name}' is alive but PID/boot ID recovery failed. "
99
+ "Investigate the tmux server manually."
100
+ )
101
+
102
+ # Step 2: has-session failed -- determine if genuinely stopped or ambiguous
103
+ if sock and target.run(f"test -e {shlex.quote(sock)}", sudo=True, check=False).ok:
104
+ # Socket exists. Probe with sudo to distinguish stale from unreachable.
105
+ probe_cmd = tmux_cmd("list-sessions", sock, sudo=True) + " 2>/dev/null"
106
+ if target.run(probe_cmd, check=False).ok:
107
+ raise output.SessionError(
108
+ f"session '{session.name}' has a live tmux server but it is unreachable. "
109
+ "This may indicate a permissions issue. Investigate manually."
110
+ )
111
+ # Stale socket, server is dead
112
+ db.update_session_pid(session.name, PID_STOPPED)
113
+ output.warn(f"Session '{session.name}' is not running, marked stopped")
114
+ return True
115
+
116
+ # No socket (or admin session) and has-session failed -- genuinely stopped
117
+ db.update_session_pid(session.name, PID_STOPPED)
118
+ output.warn(f"Session '{session.name}' is not running, marked stopped")
119
+ return True
120
+
121
+
122
+ def _needs_repair(session: SessionRow) -> bool:
123
+ """True if the session is missing PID or boot_id and needs auto-repair."""
124
+ if session.pid == PID_STOPPED:
125
+ return False
126
+ return session.pid is None or session.boot_id is None
127
+
128
+
129
+ def _ensure_pid(session: SessionRow, *, target: ExecTarget, db: Database) -> SessionRow:
130
+ """Auto-recover PID + boot ID for a session missing either.
131
+
132
+ Strict gate: after this returns, the session is guaranteed to be either
133
+ PID_STOPPED or have valid PID + boot_id. Raises SessionError if the
134
+ session cannot be resolved.
135
+ """
136
+ if not _needs_repair(session):
137
+ return session
138
+ _repair_session_pid(session, target=target, db=db) # raises on failure
139
+ result = db.get_session(session.name)
140
+ assert result is not None
141
+ return result
142
+
143
+
144
+ def ensure_pids_batch(sessions: list[SessionRow], *, db: Database, config: Config) -> list[SessionRow]:
145
+ """Auto-recover PID + boot ID for sessions missing either. Returns updated list."""
146
+ need_repair = [s for s in sessions if _needs_repair(s)]
147
+ if not need_repair:
148
+ return sessions
149
+
150
+ # Group by VM (not workspace) to reuse one ExecTarget per VM
151
+ by_vm: dict[str, list[SessionRow]] = {}
152
+ vm_cache: dict[str, ExecTarget] = {}
153
+ for s in need_repair:
154
+ ws = db.get_workspace(s.workspace_name)
155
+ if not ws or ws.type != "vm" or not ws.vm_name:
156
+ continue
157
+ if ws.vm_name not in vm_cache:
158
+ vm = db.get_vm(ws.vm_name)
159
+ if not vm or not vm.tailscale_host:
160
+ continue
161
+ try:
162
+ vm_cache[ws.vm_name] = admin_exec_target(vm, config)
163
+ except Exception as exc:
164
+ output.warn(f"Cannot reach VM '{ws.vm_name}': {exc}")
165
+ continue
166
+ by_vm.setdefault(ws.vm_name, []).append(s)
167
+
168
+ repaired_names: set[str] = set()
169
+ for vm_name, vm_sessions in by_vm.items():
170
+ target = vm_cache[vm_name]
171
+ for session in vm_sessions:
172
+ try:
173
+ if _repair_session_pid(session, target=target, db=db):
174
+ repaired_names.add(session.name)
175
+ except output.SessionError as exc:
176
+ output.warn(str(exc))
177
+ except Exception as exc:
178
+ output.warn(f"Failed to repair session '{session.name}': {exc}")
179
+
180
+ # Return original list with repaired sessions refreshed from DB
181
+ if not repaired_names:
182
+ return sessions
183
+ result = []
184
+ for s in sessions:
185
+ if s.name in repaired_names:
186
+ refreshed = db.get_session(s.name)
187
+ result.append(refreshed if refreshed else s)
188
+ else:
189
+ result.append(s)
190
+ return result
191
+
192
+
193
+ def _require_workspace(db: Database, name: str) -> WorkspaceRow:
194
+ ws = db.get_workspace(name)
195
+ if ws is None:
196
+ raise output.WorkspaceError(f"workspace '{name}' not found")
197
+ return ws
198
+
199
+
200
+ def _require_vm_for_workspace(db: Database, ws: WorkspaceRow) -> VMRow:
201
+ if ws.type != "vm":
202
+ raise output.SessionError("sessions are only supported on VM workspaces")
203
+ vm = db.get_vm(ws.vm_name) # type: ignore[arg-type]
204
+ if vm is None:
205
+ raise output.VMError(f"VM '{ws.vm_name}' not found")
206
+ return vm
207
+
208
+
209
+ def _prepare_vm(
210
+ db: Database, config: Config, workspace_name: str, *, operation: str | None = None
211
+ ) -> tuple[WorkspaceRow, VMRow, RunCommand, RunCommand, ExecTarget]:
212
+ """Validate workspace/VM, ensure running, and return (ws, vm, run_command, run_as_root, target).
213
+
214
+ If operation is set, creates an SSHLogger and attaches it to the ExecTarget
215
+ so all calls log automatically. run_command and run_as_root are bound from
216
+ the target's methods for callers that consume RunCommand callables.
217
+ """
218
+ from agentworks.ssh import SSHLogger
219
+
220
+ ws = _require_workspace(db, workspace_name)
221
+ vm = _require_vm_for_workspace(db, ws)
222
+
223
+ from agentworks.workspaces.manager import _ensure_vm_running
224
+
225
+ _ensure_vm_running(db, config, vm)
226
+
227
+ if vm.tailscale_host is None:
228
+ raise output.VMError(f"VM '{vm.name}' has no Tailscale address")
229
+
230
+ logger = SSHLogger(vm.name, operation) if operation else None
231
+ target = admin_exec_target(vm, config, logger=logger)
232
+ run_command: RunCommand = target.run
233
+ run_as_root: RunCommand = partial(target.run, sudo=True)
234
+ return ws, vm, run_command, run_as_root, target
235
+
236
+
237
+ def _require_session(db: Database, name: str) -> SessionRow:
238
+ session = db.get_session(name)
239
+ if session is None:
240
+ raise output.SessionError(f"session '{name}' not found")
241
+ return session
242
+
243
+
244
+ def _regenerate_tmuxinator(
245
+ db: Database,
246
+ config: Config,
247
+ vm: VMRow,
248
+ ws: WorkspaceRow,
249
+ *,
250
+ logger: SSHLogger | None = None,
251
+ ) -> None:
252
+ """Regenerate the workspace tmuxinator config from current session state."""
253
+ from agentworks.ssh import write_file
254
+ from agentworks.workspaces.tmuxinator import generate_config
255
+
256
+ sessions = db.list_sessions(workspace_name=ws.name)
257
+ # Build socket paths for tmuxinator (admin sessions have NULL, agent sessions always set)
258
+ socket_paths = {s.name: s.socket_path for s in sessions}
259
+ config_text = generate_config(ws.name, ws.workspace_path, sessions=sessions, socket_paths=socket_paths)
260
+ target = admin_exec_target(vm, config)
261
+ write_file(target, f"{ws.workspace_path}/.tmuxinator.yml", config_text, logger=logger)
262
+
263
+
264
+ def filter_sessions(
265
+ db: Database,
266
+ *,
267
+ workspace_name: str | None = None,
268
+ vm_name: str | None = None,
269
+ ) -> list[SessionRow]:
270
+ """Load sessions with optional workspace/VM filters."""
271
+ sessions = db.list_sessions(workspace_name=workspace_name)
272
+ if vm_name is not None:
273
+ vm_workspaces = {ws.name for ws in db.list_workspaces(vm_name=vm_name)}
274
+ sessions = [s for s in sessions if s.workspace_name in vm_workspaces]
275
+ return sessions
276
+
277
+
278
+ def _resolve_template(config: Config, template_name: str | None) -> ResolvedSessionTemplate:
279
+ """Resolve a session template by name, applying inheritance."""
280
+ from agentworks.sessions.templates import resolve_template
281
+
282
+ try:
283
+ return resolve_template(config, template_name)
284
+ except ValueError as e:
285
+ raise output.SessionError(str(e)) from None
286
+
287
+
288
+ def _substitute_template_vars(text: str, variables: dict[str, str]) -> str:
289
+ """Replace {{var}} placeholders in a string with their values."""
290
+
291
+ def replace(m: re.Match[str]) -> str:
292
+ name = m.group(1)
293
+ if name not in _KNOWN_TEMPLATE_VARS:
294
+ raise output.SessionError(f"unknown template variable '{{{{{name}}}}}'")
295
+ return variables[name]
296
+
297
+ return _TEMPLATE_VAR_RE.sub(replace, text)
298
+
299
+
300
+ def _build_session_command(
301
+ template: ResolvedSessionTemplate,
302
+ *,
303
+ session_name: str,
304
+ workspace_name: str,
305
+ restart: bool = False,
306
+ ) -> str:
307
+ """Build the shell command string for a session from its template.
308
+
309
+ Returns an empty string if the template has no command (login shell only).
310
+ Uses restart_command (if defined) when restart=True.
311
+ """
312
+ variables = {
313
+ "session_name": session_name,
314
+ "workspace_name": workspace_name,
315
+ }
316
+
317
+ raw_command = template.restart_command if restart and template.restart_command else template.command
318
+ command = _substitute_template_vars(raw_command, variables)
319
+
320
+ parts = []
321
+ for key, val in template.env.items():
322
+ if not _ENV_KEY_RE.match(key):
323
+ raise output.SessionError(f"invalid env var name {key!r} in template '{template.name}'")
324
+ val = _substitute_template_vars(val, variables)
325
+ parts.append(f"export {key}={shlex.quote(val)}")
326
+
327
+ if command:
328
+ parts.append(f"exec {command}")
329
+
330
+ return " && ".join(parts)
331
+
332
+
333
+ # -- Liveness checks -------------------------------------------------------
334
+
335
+
336
+ def _pid_alive(pid: int, *, target: ExecTarget) -> bool:
337
+ """Check if a PID is alive via /proc."""
338
+ return target.run(f"test -d /proc/{pid}", check=False).ok
339
+
340
+
341
+ def _get_boot_id(target: ExecTarget) -> str | None:
342
+ """Read the current VM boot ID. Returns None on failure."""
343
+ result = target.run("cat /proc/sys/kernel/random/boot_id", check=False)
344
+ boot_id = (getattr(result, "stdout", "") or "").strip()
345
+ return boot_id or None
346
+
347
+
348
+ def check_session_status(
349
+ session: SessionRow,
350
+ *,
351
+ target: ExecTarget,
352
+ ) -> SessionStatus:
353
+ """Determine session status. Dispatches by session type.
354
+
355
+ Pure function -- no DB side effects.
356
+ """
357
+ if session.pid == PID_STOPPED:
358
+ return SessionStatus.STOPPED
359
+ if session.pid is None or session.boot_id is None:
360
+ return SessionStatus.UNKNOWN
361
+
362
+ if session.mode == SessionMode.AGENT.value and session.socket_path is not None:
363
+ return _check_dedicated_agent_session(session, target=target)
364
+ if session.mode == SessionMode.ADMIN.value and session.socket_path is None:
365
+ return _check_shared_admin_session(session, target=target)
366
+ raise RuntimeError(f"unexpected session config: mode={session.mode}, socket_path={session.socket_path}")
367
+
368
+
369
+ def _check_dedicated_agent_session(session: SessionRow, *, target: ExecTarget) -> SessionStatus:
370
+ """Agent sessions with their own tmux server and socket."""
371
+ from agentworks.sessions.tmux import tmux_cmd
372
+
373
+ q_session = shlex.quote(session.name)
374
+ cmd = tmux_cmd(f"has-session -t {q_session}", session.socket_path) + " 2>/dev/null"
375
+ result = target.run(cmd, check=False)
376
+ if result.returncode == SSH_TRANSPORT_ERROR:
377
+ return SessionStatus.UNKNOWN # SSH transport failure, not a session state
378
+ if result.ok:
379
+ return SessionStatus.OK
380
+
381
+ # has-session failed -- STOPPED or BROKEN?
382
+ assert session.pid is not None and session.pid > 0
383
+ current_boot = _get_boot_id(target)
384
+ if current_boot is None:
385
+ return SessionStatus.UNKNOWN # can't verify boot cycle, unsafe to offer --force
386
+ if session.boot_id is not None and session.boot_id != current_boot:
387
+ return SessionStatus.STOPPED # stale boot, PID is meaningless
388
+ if not _pid_alive(session.pid, target=target):
389
+ return SessionStatus.STOPPED # process is dead
390
+ return SessionStatus.BROKEN # same boot, process alive, socket unreachable
391
+
392
+
393
+ def _check_shared_admin_session(session: SessionRow, *, target: ExecTarget) -> SessionStatus:
394
+ """Admin sessions on the default tmux server. BROKEN does not apply."""
395
+ from agentworks.sessions.tmux import tmux_cmd
396
+
397
+ q_session = shlex.quote(session.name)
398
+ cmd = tmux_cmd(f"has-session -t {q_session}") + " 2>/dev/null"
399
+ result = target.run(cmd, check=False)
400
+ if result.returncode == SSH_TRANSPORT_ERROR:
401
+ return SessionStatus.UNKNOWN # SSH transport failure, not a session state
402
+ if result.ok:
403
+ return SessionStatus.OK
404
+ return SessionStatus.STOPPED
405
+
406
+
407
+ def batch_check_status(
408
+ sessions: list[SessionRow],
409
+ *,
410
+ target: ExecTarget,
411
+ ) -> dict[str, SessionStatus]:
412
+ """Check status for multiple sessions in one SSH call per VM.
413
+
414
+ Returns {session_name: SessionStatus}. Sessions with pid=None or PID_STOPPED
415
+ are excluded (callers handle those via the enum directly).
416
+ """
417
+ from agentworks.sessions.tmux import tmux_cmd
418
+
419
+ checkable = [s for s in sessions if s.pid is not None and s.pid > 0 and s.boot_id is not None]
420
+ if not checkable:
421
+ return {}
422
+
423
+ # Build compound command: has-session with inline boot_id + PID for agent failures
424
+ parts = []
425
+ for s in checkable:
426
+ q_session = shlex.quote(s.name) # quoted for tmux -t argument
427
+ name = s.name # raw for output field (names are validated, no shell-special chars)
428
+ has_cmd = tmux_cmd(f"has-session -t {q_session}", s.socket_path)
429
+ if s.mode == SessionMode.AGENT.value and s.socket_path is not None:
430
+ # Agent session: inline follow-up on failure
431
+ parts.append(
432
+ f"{has_cmd} 2>/dev/null; "
433
+ f"if [ $? -ne 0 ]; then "
434
+ f"BOOT=$(cat /proc/sys/kernel/random/boot_id); "
435
+ f"test -d /proc/{s.pid}; "
436
+ f"echo \"S:{name}:1:$BOOT:$?\"; "
437
+ f"else echo \"S:{name}:0\"; fi"
438
+ )
439
+ elif s.mode == SessionMode.ADMIN.value and s.socket_path is None:
440
+ # Admin session: has-session only
441
+ parts.append(f"{has_cmd} 2>/dev/null; echo \"S:{name}:$?\"")
442
+ else:
443
+ raise RuntimeError(f"unexpected session config: mode={s.mode}, socket_path={s.socket_path}")
444
+ cmd = "; ".join(parts)
445
+
446
+ result = target.run(cmd, check=False)
447
+ stdout = getattr(result, "stdout", "") or ""
448
+
449
+ status_map: dict[str, SessionStatus] = {}
450
+ # Build a quick lookup for stored boot_ids
451
+ boot_ids = {s.name: s.boot_id for s in checkable}
452
+
453
+ for line in stdout.strip().splitlines():
454
+ if not line.startswith("S:"):
455
+ continue
456
+ fields = line.split(":", maxsplit=4)
457
+ if len(fields) < 3:
458
+ continue
459
+ name = fields[1]
460
+ exit_code = fields[2]
461
+
462
+ if exit_code == "0":
463
+ status_map[name] = SessionStatus.OK
464
+ elif len(fields) == 5:
465
+ # Agent session failure: S:name:1:<boot_id>:<pid_exit>
466
+ current_boot = fields[3]
467
+ pid_exit = fields[4]
468
+ if not current_boot:
469
+ # Boot ID read failed -- can't safely determine STOPPED vs BROKEN
470
+ pass # omit from map, callers treat missing entries as unknown
471
+ else:
472
+ stored_boot = boot_ids.get(name)
473
+ if stored_boot and stored_boot != current_boot:
474
+ status_map[name] = SessionStatus.STOPPED # stale boot
475
+ elif pid_exit == "0":
476
+ status_map[name] = SessionStatus.BROKEN # PID alive, socket unreachable
477
+ else:
478
+ status_map[name] = SessionStatus.STOPPED # PID dead
479
+ else:
480
+ # Admin session failure
481
+ status_map[name] = SessionStatus.STOPPED
482
+
483
+ return status_map
484
+
485
+
486
+ # -- Public API ------------------------------------------------------------
487
+
488
+
489
+ def create_session(
490
+ db: Database,
491
+ config: Config,
492
+ *,
493
+ name: str,
494
+ workspace_name: str,
495
+ template_name: str | None = None,
496
+ agent_name: str | None = None,
497
+ created_workspace: bool = False,
498
+ ) -> None:
499
+ """Create and start a session."""
500
+ from agentworks.config import validate_name
501
+ from agentworks.sessions.tmux import (
502
+ create_session as create_tmux_session,
503
+ )
504
+ from agentworks.sessions.tmux import (
505
+ deploy_restricted_config,
506
+ )
507
+
508
+ validate_name(name)
509
+ ws, vm, run_command, run_as_root, target = _prepare_vm(db, config, workspace_name, operation="session-create")
510
+
511
+ if db.get_session(name) is not None:
512
+ raise output.SessionError(f"session '{name}' already exists")
513
+
514
+ # Resolve mode and linux user
515
+ resolved_agent_name: str | None = None
516
+ if agent_name is not None:
517
+ mode = SessionMode.AGENT
518
+ agent = db.get_agent(agent_name)
519
+ if agent is None:
520
+ raise output.AgentError(f"agent '{agent_name}' not found")
521
+ if agent.vm_name != vm.name:
522
+ raise output.SessionError(
523
+ f"agent '{agent_name}' is on VM '{agent.vm_name}', "
524
+ f"but workspace '{workspace_name}' is on VM '{vm.name}'"
525
+ )
526
+ linux_user = agent.linux_user
527
+ resolved_agent_name = agent_name
528
+
529
+ # Auto-grant implicit workspace access if needed
530
+ if not db.has_any_grant(agent_name, workspace_name):
531
+ from agentworks.agents.manager import _add_to_workspace_group
532
+
533
+ _add_to_workspace_group(vm, config, linux_user, workspace_name)
534
+ db.insert_agent_grant(agent_name, workspace_name, "implicit", session_name=name)
535
+ else:
536
+ mode = SessionMode.ADMIN
537
+ linux_user = vm.admin_username
538
+
539
+ template = _resolve_template(config, template_name)
540
+
541
+ # Compute socket path up front (deterministic from linux_user + session name).
542
+ # Needed for the DB insert since the CHECK constraint requires agent sessions
543
+ # to have a socket_path.
544
+ expected_socket: str | None = None
545
+ if mode == SessionMode.AGENT:
546
+ from agentworks.sessions.tmux import agent_socket_path
547
+
548
+ expected_socket = agent_socket_path(linux_user, name)
549
+
550
+ # Insert DB record first to avoid orphaned tmux sessions on crash
551
+ db.insert_session(
552
+ name,
553
+ workspace_name,
554
+ template.name,
555
+ mode,
556
+ agent_name=resolved_agent_name,
557
+ created_workspace=created_workspace,
558
+ socket_path=expected_socket,
559
+ )
560
+
561
+ deploy_restricted_config(run_command, history_limit=config.session.history_limit)
562
+ command = _build_session_command(template, session_name=name, workspace_name=workspace_name)
563
+
564
+ try:
565
+ sock, pid = create_tmux_session(
566
+ name,
567
+ ws.workspace_path,
568
+ command,
569
+ linux_user,
570
+ run_command=run_command,
571
+ target=target,
572
+ run_as_root=run_as_root,
573
+ admin_username=vm.admin_username,
574
+ is_admin=(mode == SessionMode.ADMIN),
575
+ )
576
+ except Exception:
577
+ db.delete_session(name)
578
+ if resolved_agent_name:
579
+ db.delete_agent_grant(resolved_agent_name, workspace_name, "implicit", session_name=name)
580
+ raise
581
+
582
+ # Persist socket path, PID, and boot ID
583
+ if sock:
584
+ db.update_session_socket_path(name, sock)
585
+ if pid is not None:
586
+ boot_id = _get_boot_id(target)
587
+ if boot_id is not None:
588
+ db.update_session_pid(name, pid, boot_id=boot_id)
589
+ else:
590
+ output.warn(f"Could not read boot ID for session '{name}', PID not stored")
591
+ else:
592
+ output.warn(f"Could not capture PID for session '{name}', will auto-repair on next access")
593
+
594
+ mode_label = f"agent: {resolved_agent_name}" if resolved_agent_name else "admin"
595
+ output.info(f"Session '{name}' started ({mode_label}, template: {template.name})")
596
+
597
+ # Update tmuxinator config and add to console if it exists
598
+ _regenerate_tmuxinator(db, config, vm, ws)
599
+ from agentworks.sessions.console import add_session_to_console
600
+
601
+ add_session_to_console(name, run_command=run_command, socket_path=sock)
602
+
603
+
604
+ def _execute_stop(
605
+ targets: list[tuple[SessionRow, ExecTarget]],
606
+ *,
607
+ db: Database,
608
+ force: bool = False,
609
+ ) -> list[tuple[str, str]]:
610
+ """Core stop logic: C-c all, single grace period, kill survivors.
611
+
612
+ Handles both single and batch stops. Returns list of (name, error) failures.
613
+ """
614
+ import time
615
+
616
+ from agentworks.sessions.tmux import force_kill_tmux_server, send_keys
617
+
618
+ if not targets:
619
+ return []
620
+
621
+ # Phase 1: send C-c to all sessions (best effort).
622
+ # This gives processes that handle SIGINT gracefully (save state, flush)
623
+ # a chance to clean up before we kill the session. In practice, tmux
624
+ # kill-session sends SIGHUP which cascades through the shell to children,
625
+ # so the C-c is rarely necessary. Consider removing the C-c + grace
626
+ # period if the 5-second wait becomes a pain point.
627
+ output.detail("Sending C-c to stop any running commands...")
628
+ for session, target in targets:
629
+ sock = session.socket_path
630
+ with contextlib.suppress(Exception):
631
+ send_keys(session.name, "C-c", run_command=target.run, socket_path=sock)
632
+
633
+ # Phase 2: single grace period
634
+ output.detail(f"Waiting {_STOP_GRACE_SECONDS}s for graceful exit...")
635
+ time.sleep(_STOP_GRACE_SECONDS)
636
+
637
+ # Phase 3: check survivors per VM (reuse existing targets)
638
+ by_target: dict[int, tuple[ExecTarget, list[SessionRow]]] = {}
639
+ for session, target in targets:
640
+ tid = id(target)
641
+ if tid not in by_target:
642
+ by_target[tid] = (target, [])
643
+ by_target[tid][1].append(session)
644
+
645
+ survivor_map: dict[str, SessionStatus] = {}
646
+ for target, group in by_target.values():
647
+ survivor_map.update(batch_check_status(group, target=target))
648
+
649
+ failed: list[tuple[str, str]] = []
650
+
651
+ for session, target in targets:
652
+ status = survivor_map.get(session.name)
653
+ if status is None:
654
+ # Status check failed (SSH error or parse issue) -- don't assume stopped
655
+ failed.append((session.name, "could not verify session status after stop"))
656
+ output.warn(f"Could not verify status of '{session.name}', not marking as stopped")
657
+ continue
658
+ if status == SessionStatus.OK or status == SessionStatus.BROKEN:
659
+ output.detail(f"Killing session '{session.name}'")
660
+ sock = session.socket_path
661
+ killed = _kill_session(session.name, run_command=target.run, socket_path=sock)
662
+ if not killed:
663
+ # Race condition: session may have exited between survivor check and kill.
664
+ # Recheck before treating as failure.
665
+ recheck = check_session_status(session, target=target)
666
+ if recheck == SessionStatus.STOPPED:
667
+ pass # session exited on its own, that's success
668
+ elif force and session.socket_path is not None and session.pid and session.pid > 0:
669
+ # Escalate to PID kill for agent sessions only (admin shares PID)
670
+ output.detail(f"tmux kill failed for '{session.name}', force-killing PID {session.pid}")
671
+ if not force_kill_tmux_server(
672
+ session.pid, target=target, socket_path=session.socket_path, log=output.detail,
673
+ ):
674
+ failed.append((session.name, f"PID {session.pid} survived force-kill"))
675
+ continue
676
+ else:
677
+ failed.append((session.name, f"tmux kill-session failed for '{session.name}'"))
678
+ if session.socket_path is not None and session.pid and session.pid > 0:
679
+ output.warn(f"Failed to stop '{session.name}' (tmux unreachable, use --force)")
680
+ else:
681
+ output.warn(f"Failed to stop '{session.name}' (tmux unreachable)")
682
+ continue
683
+
684
+ # Clean up agent socket only after confirming the server process is dead
685
+ if (
686
+ session.socket_path
687
+ and session.socket_path.startswith(AGENT_SOCKET_ROOT + "/")
688
+ and session.pid
689
+ and session.pid > 0
690
+ and not _pid_alive(session.pid, target=target)
691
+ ):
692
+ target.run(f"rm -f {shlex.quote(session.socket_path)}", sudo=True, check=False)
693
+
694
+ db.update_session_pid(session.name, PID_STOPPED)
695
+ output.info(f"Session '{session.name}' stopped")
696
+
697
+ return failed
698
+
699
+
700
+ def stop_session(
701
+ db: Database,
702
+ config: Config,
703
+ *,
704
+ name: str,
705
+ force: bool = False,
706
+ ) -> None:
707
+ """Stop a running session. Sends C-c first, then kills after a grace period."""
708
+ from agentworks.sessions.tmux import force_kill_tmux_server
709
+
710
+ session = _require_session(db, name)
711
+ _ws, _vm, _run_command, _, target = _prepare_vm(db, config, session.workspace_name, operation="session-stop")
712
+ session = _ensure_pid(session, target=target, db=db)
713
+ status = check_session_status(session, target=target)
714
+
715
+ if status == SessionStatus.STOPPED:
716
+ output.info(f"Session '{name}' is already stopped")
717
+ return
718
+ # UNKNOWN is impossible here -- _ensure_pid raises on unresolvable sessions
719
+ if status == SessionStatus.BROKEN:
720
+ if not force:
721
+ raise output.BrokenSessionError(
722
+ f"session '{name}' is broken (PID alive but tmux unreachable). Use --force to kill the process."
723
+ )
724
+ output.warn(f"Session '{name}' is broken (tmux unreachable), force-killing via PID")
725
+ assert session.pid is not None
726
+ if not force_kill_tmux_server(session.pid, target=target, socket_path=session.socket_path, log=output.detail):
727
+ raise output.SessionError(f"failed to kill PID {session.pid} for session '{name}'")
728
+ db.update_session_pid(name, PID_STOPPED)
729
+ output.info(f"Session '{name}' force-stopped")
730
+ return
731
+
732
+ # OK -- delegate to shared stop logic
733
+ failed = _execute_stop([(session, target)], db=db, force=force)
734
+ if failed:
735
+ raise output.SessionError(f"failed to stop session '{name}': {failed[0][1]}")
736
+
737
+
738
+ def restart_session(
739
+ db: Database,
740
+ config: Config,
741
+ *,
742
+ name: str,
743
+ force: bool = False,
744
+ yes: bool = False,
745
+ ) -> None:
746
+ """Restart a session. Prompts if running (--yes to skip). --force for BROKEN."""
747
+ from agentworks.sessions.tmux import (
748
+ create_session as create_tmux_session,
749
+ )
750
+ from agentworks.sessions.tmux import (
751
+ deploy_restricted_config,
752
+ )
753
+
754
+ session = _require_session(db, name)
755
+ ws, vm, run_command, run_as_root, target = _prepare_vm(
756
+ db, config, session.workspace_name, operation="session-restart",
757
+ )
758
+ session = _ensure_pid(session, target=target, db=db)
759
+ status = check_session_status(session, target=target)
760
+
761
+ # UNKNOWN is impossible here -- _ensure_pid raises on unresolvable sessions
762
+ if status == SessionStatus.BROKEN:
763
+ if not force:
764
+ raise output.BrokenSessionError(
765
+ f"session '{name}' is broken (PID alive but tmux unreachable). Use --force to restart."
766
+ )
767
+ from agentworks.sessions.tmux import force_kill_tmux_server
768
+
769
+ output.warn(f"Session '{name}' is broken (tmux unreachable), force-killing via PID")
770
+ assert session.pid is not None
771
+ if not force_kill_tmux_server(session.pid, target=target, socket_path=session.socket_path, log=output.detail):
772
+ raise output.SessionError(f"failed to kill PID {session.pid} for session '{name}'")
773
+ elif status == SessionStatus.OK:
774
+ if not yes and not output.confirm(f"Session '{name}' is running. Restart?"):
775
+ raise output.UserAbort("restart cancelled")
776
+ sock = session.socket_path
777
+ if not _kill_session(name, run_command=run_command, socket_path=sock):
778
+ raise output.SessionError(f"failed to stop session '{name}' for restart")
779
+
780
+ template = _resolve_template(config, session.template)
781
+ deploy_restricted_config(run_command, history_limit=config.session.history_limit)
782
+
783
+ # Use restart_command if available, otherwise fall back to command
784
+ command = _build_session_command(
785
+ template,
786
+ session_name=name,
787
+ workspace_name=session.workspace_name,
788
+ restart=True,
789
+ )
790
+ is_admin = session.mode == SessionMode.ADMIN.value
791
+ linux_user = _resolve_session_linux_user(db, session, vm)
792
+
793
+ try:
794
+ new_sock, pid = create_tmux_session(
795
+ name,
796
+ ws.workspace_path,
797
+ command,
798
+ linux_user,
799
+ run_command=run_command,
800
+ target=target,
801
+ run_as_root=run_as_root,
802
+ admin_username=vm.admin_username,
803
+ is_admin=is_admin,
804
+ )
805
+ except RuntimeError as exc:
806
+ if "already has an active tmux server" in str(exc):
807
+ raise output.SessionError(
808
+ f"session '{name}' has an active tmux server that was not detected by the status check. "
809
+ "Use 'session stop --force' to kill it, then retry."
810
+ ) from exc
811
+ raise
812
+
813
+ # Persist socket path if it differs from what's stored.
814
+ if new_sock != session.socket_path:
815
+ db.update_session_socket_path(name, new_sock)
816
+ if pid is not None:
817
+ boot_id = _get_boot_id(target)
818
+ if boot_id is not None:
819
+ db.update_session_pid(name, pid, boot_id=boot_id)
820
+ else:
821
+ output.warn(f"Could not read boot ID for session '{name}', PID not stored")
822
+ else:
823
+ output.warn(f"Could not capture PID for session '{name}', will auto-repair on next access")
824
+
825
+ output.info(f"Session '{name}' restarted")
826
+
827
+ _regenerate_tmuxinator(db, config, vm, ws)
828
+ from agentworks.sessions.console import add_session_to_console
829
+
830
+ add_session_to_console(name, run_command=run_command, socket_path=new_sock)
831
+
832
+
833
+ def stop_all_sessions(
834
+ db: Database,
835
+ config: Config,
836
+ *,
837
+ vm_name: str | None = None,
838
+ workspace_name: str | None = None,
839
+ force: bool = False,
840
+ ) -> None:
841
+ """Stop all running sessions, optionally filtered by VM or workspace."""
842
+ sessions = filter_sessions(db, workspace_name=workspace_name, vm_name=vm_name)
843
+
844
+ # Auto-repair NULL-PID sessions, then batch check
845
+ sessions = ensure_pids_batch(sessions, db=db, config=config)
846
+ status_map = batch_check_all_sessions(sessions, db=db, config=config)
847
+
848
+ # Error if any sessions are still unknown after auto-repair.
849
+ # PID_STOPPED sessions are known-stopped (excluded from status_map by design).
850
+ unknown = [
851
+ s for s in sessions
852
+ if s.pid != PID_STOPPED
853
+ and (s.pid is None or s.boot_id is None or s.name not in status_map)
854
+ ]
855
+ if unknown:
856
+ names = ", ".join(s.name for s in unknown)
857
+ raise output.SessionError(
858
+ f"{len(unknown)} session(s) have unknown status after auto-repair ({names}). "
859
+ "Resolve manually before retrying."
860
+ )
861
+
862
+ broken = [s for s in sessions if status_map.get(s.name) == SessionStatus.BROKEN]
863
+ if broken and not force:
864
+ names = ", ".join(s.name for s in broken)
865
+ output.warn(f"Skipping {len(broken)} broken session(s) ({names}). Use --force to kill.")
866
+
867
+ ok_statuses = {SessionStatus.OK}
868
+ if force:
869
+ ok_statuses.add(SessionStatus.BROKEN)
870
+ alive_sessions = [s for s in sessions if status_map.get(s.name) in ok_statuses]
871
+
872
+ if not alive_sessions:
873
+ output.info("No running sessions to stop.")
874
+ return
875
+
876
+ output.info(f"Stopping {len(alive_sessions)} session(s)...")
877
+
878
+ # Resolve VM targets (reuse across sessions on the same VM)
879
+ vm_targets: dict[str, ExecTarget] = {}
880
+ for s in alive_sessions:
881
+ ws = db.get_workspace(s.workspace_name)
882
+ if ws and ws.vm_name and ws.vm_name not in vm_targets:
883
+ vm = db.get_vm(ws.vm_name)
884
+ if vm and vm.tailscale_host:
885
+ vm_targets[ws.vm_name] = admin_exec_target(vm, config)
886
+
887
+ # Build (session, target) pairs for _execute_stop
888
+ stop_targets: list[tuple[SessionRow, ExecTarget]] = []
889
+ for s in alive_sessions:
890
+ ws = db.get_workspace(s.workspace_name)
891
+ if ws and ws.vm_name and ws.vm_name in vm_targets:
892
+ stop_targets.append((s, vm_targets[ws.vm_name]))
893
+
894
+ failed = _execute_stop(stop_targets, db=db, force=force)
895
+ if failed:
896
+ raise output.SessionError(f"{len(failed)} session(s) failed to stop.")
897
+
898
+
899
+ def restart_all_sessions(
900
+ db: Database,
901
+ config: Config,
902
+ *,
903
+ vm_name: str | None = None,
904
+ workspace_name: str | None = None,
905
+ include_running: bool = False,
906
+ force: bool = False,
907
+ ) -> None:
908
+ """Restart sessions, optionally filtered by VM or workspace.
909
+
910
+ With include_running=False (--all-stopped), only stopped sessions are
911
+ restarted. With include_running=True (--all), all sessions are targeted;
912
+ if any are running, the caller should have prompted or passed yes=True.
913
+ """
914
+ sessions = filter_sessions(db, workspace_name=workspace_name, vm_name=vm_name)
915
+
916
+ # Auto-repair NULL-PID sessions, then batch check
917
+ sessions = ensure_pids_batch(sessions, db=db, config=config)
918
+ status_map = batch_check_all_sessions(sessions, db=db, config=config)
919
+
920
+ # Error if any sessions are still unknown after auto-repair.
921
+ # PID_STOPPED sessions are known-stopped (excluded from status_map by design).
922
+ unknown = [
923
+ s for s in sessions
924
+ if s.pid != PID_STOPPED
925
+ and (s.pid is None or s.boot_id is None or s.name not in status_map)
926
+ ]
927
+ if unknown:
928
+ names = ", ".join(s.name for s in unknown)
929
+ raise output.SessionError(
930
+ f"{len(unknown)} session(s) have unknown status after auto-repair ({names}). "
931
+ "Resolve manually before retrying."
932
+ )
933
+
934
+ if not include_running:
935
+ # Only stopped sessions
936
+ sessions = [
937
+ s
938
+ for s in sessions
939
+ if s.pid == PID_STOPPED
940
+ or status_map.get(s.name) == SessionStatus.STOPPED
941
+ ]
942
+
943
+ if not sessions:
944
+ output.info("No matching sessions to restart.")
945
+ return
946
+
947
+ output.info(f"Restarting {len(sessions)} session(s)...")
948
+ failed: list[tuple[str, str]] = []
949
+ for session in sessions:
950
+ try:
951
+ restart_session(db, config, name=session.name, force=force, yes=include_running)
952
+ except output.BrokenSessionError as exc:
953
+ if not force:
954
+ output.warn(f"Skipping '{session.name}': {exc}")
955
+ else:
956
+ failed.append((session.name, str(exc)))
957
+ output.warn(f"Error restarting '{session.name}': {exc}")
958
+ except output.SessionError as exc:
959
+ failed.append((session.name, str(exc)))
960
+ output.warn(f"Error restarting '{session.name}': {exc}")
961
+ except Exception as exc:
962
+ failed.append((session.name, str(exc)))
963
+ output.warn(f"Error restarting '{session.name}': {exc}")
964
+
965
+ if failed:
966
+ raise output.SessionError(f"{len(failed)} session(s) failed to restart.")
967
+
968
+
969
+ def delete_session(
970
+ db: Database,
971
+ config: Config,
972
+ *,
973
+ name: str,
974
+ force: bool = False,
975
+ yes: bool = False,
976
+ ) -> None:
977
+ """Delete a session. Prompts if running/unknown (--yes to skip). --force for BROKEN."""
978
+ session = _require_session(db, name)
979
+ ws, vm, run_command, _, target = _prepare_vm(db, config, session.workspace_name, operation="session-delete")
980
+ session = _ensure_pid(session, target=target, db=db)
981
+ status = check_session_status(session, target=target)
982
+
983
+ # UNKNOWN is impossible here -- _ensure_pid raises on unresolvable sessions
984
+ if status == SessionStatus.BROKEN and not force:
985
+ raise output.BrokenSessionError(
986
+ f"session '{name}' is broken (PID alive but tmux unreachable). Use --force to delete."
987
+ )
988
+
989
+ # Confirm before any destructive action
990
+ if not yes and not output.confirm(f"Delete session '{name}'?"):
991
+ raise output.UserAbort("delete cancelled")
992
+
993
+ # Now kill if needed
994
+ if status == SessionStatus.OK:
995
+ sock = session.socket_path
996
+ if not _kill_session(name, run_command=run_command, socket_path=sock):
997
+ # Race: session may have exited between check and kill. Recheck.
998
+ recheck = check_session_status(session, target=target)
999
+ if recheck != SessionStatus.STOPPED:
1000
+ raise output.SessionError(f"failed to stop session '{name}' for deletion")
1001
+ elif status == SessionStatus.BROKEN:
1002
+ from agentworks.sessions.tmux import force_kill_tmux_server
1003
+
1004
+ output.warn(f"Session '{name}' is broken (tmux unreachable), force-killing via PID")
1005
+ assert session.pid is not None
1006
+ if not force_kill_tmux_server(session.pid, target=target, socket_path=session.socket_path, log=output.detail):
1007
+ raise output.SessionError(f"failed to kill PID {session.pid} for session '{name}'")
1008
+
1009
+ # Clean up socket if the server is dead (don't remove a live socket)
1010
+ sock = session.socket_path
1011
+ if sock and sock.startswith(AGENT_SOCKET_ROOT + "/"):
1012
+ post_status = check_session_status(session, target=target)
1013
+ if post_status == SessionStatus.STOPPED:
1014
+ target.run(f"rm -f {shlex.quote(sock)}", sudo=True, check=False)
1015
+ else:
1016
+ output.warn(f"Session '{name}' status is {post_status.value} after delete, socket preserved at {sock}")
1017
+
1018
+ db.delete_session(name)
1019
+
1020
+ # Clean up implicit grant for this session
1021
+ if session.agent_name:
1022
+ db.delete_agent_grant(session.agent_name, session.workspace_name, "implicit", session_name=name)
1023
+ # If no grants remain, remove from workspace group
1024
+ if not db.has_any_grant(session.agent_name, session.workspace_name):
1025
+ from agentworks.agents.manager import _remove_from_workspace_group
1026
+
1027
+ agent = db.get_agent(session.agent_name)
1028
+ if agent:
1029
+ _remove_from_workspace_group(vm, config, agent.linux_user, session.workspace_name)
1030
+
1031
+ _regenerate_tmuxinator(db, config, vm, ws)
1032
+ output.info(f"Session '{name}' deleted")
1033
+
1034
+ # If this session created its workspace, offer to delete it
1035
+ if session.created_workspace:
1036
+ remaining = db.list_sessions(workspace_name=session.workspace_name)
1037
+ if remaining:
1038
+ output.detail(
1039
+ f"Workspace '{session.workspace_name}' was created with this session but has "
1040
+ f"{len(remaining)} other session(s), not offering to delete."
1041
+ )
1042
+ elif not yes:
1043
+ if output.confirm(
1044
+ f"Workspace '{session.workspace_name}' was created with this session "
1045
+ f"and has no other sessions. Delete it?",
1046
+ ):
1047
+ from agentworks.workspaces.manager import delete_workspace
1048
+
1049
+ delete_workspace(db, config, session.workspace_name, yes=True)
1050
+ else:
1051
+ from agentworks.workspaces.manager import delete_workspace
1052
+
1053
+ output.detail(f"Deleting workspace '{session.workspace_name}' (created with this session)...")
1054
+ delete_workspace(db, config, session.workspace_name, yes=True)
1055
+
1056
+
1057
+ def describe_session(
1058
+ db: Database,
1059
+ config: Config,
1060
+ *,
1061
+ name: str,
1062
+ ) -> None:
1063
+ """Show session details."""
1064
+ session = _require_session(db, name)
1065
+ ws, vm, run_command, _, target = _prepare_vm(db, config, session.workspace_name, operation=None)
1066
+ session = _ensure_pid(session, target=target, db=db)
1067
+
1068
+ status = check_session_status(session, target=target)
1069
+
1070
+ # Build status label with PID if running and current boot
1071
+ if status == SessionStatus.OK and session.pid and session.pid > 0:
1072
+ status_label = f"running (PID {session.pid})"
1073
+ elif status == SessionStatus.BROKEN and session.pid and session.pid > 0:
1074
+ status_label = f"broken (PID {session.pid} alive, tmux unreachable)"
1075
+ else:
1076
+ status_label = {
1077
+ SessionStatus.OK: "running",
1078
+ SessionStatus.STOPPED: "stopped",
1079
+ SessionStatus.BROKEN: "broken",
1080
+ SessionStatus.UNKNOWN: "unknown",
1081
+ }[status]
1082
+
1083
+ mode_label = f"agent ({session.agent_name})" if session.agent_name else "admin"
1084
+
1085
+ output.info(f"Name: {session.name}")
1086
+ output.info(f"Workspace: {session.workspace_name}")
1087
+ output.info(f"VM: {vm.name}")
1088
+ output.info(f"Template: {session.template}")
1089
+ output.info(f"Mode: {mode_label}")
1090
+ output.info(f"Status: {status_label}")
1091
+ output.info(f"Created: {session.created_at}")
1092
+ output.info(f"Updated: {session.updated_at}")
1093
+
1094
+
1095
+ def batch_check_all_sessions(
1096
+ sessions: list[SessionRow],
1097
+ *,
1098
+ db: Database,
1099
+ config: Config,
1100
+ ) -> dict[str, SessionStatus]:
1101
+ """Batch status check grouped by VM, parallel across VMs (capped at 8).
1102
+
1103
+ Returns {session_name: SessionStatus}. Sessions with no reachable VM or
1104
+ pid=None/PID_STOPPED are excluded from the result.
1105
+ """
1106
+ from concurrent.futures import ThreadPoolExecutor, as_completed
1107
+
1108
+ # Resolve each session's VM and group
1109
+ by_vm: dict[str, list[SessionRow]] = {}
1110
+ vm_targets: dict[str, ExecTarget] = {}
1111
+
1112
+ for s in sessions:
1113
+ ws = db.get_workspace(s.workspace_name)
1114
+ if not ws or ws.type != "vm" or not ws.vm_name:
1115
+ continue
1116
+ if ws.vm_name not in vm_targets:
1117
+ vm = db.get_vm(ws.vm_name)
1118
+ if not vm or not vm.tailscale_host:
1119
+ continue
1120
+ vm_targets[ws.vm_name] = admin_exec_target(vm, config)
1121
+ by_vm.setdefault(ws.vm_name, []).append(s)
1122
+
1123
+ if not by_vm:
1124
+ return {}
1125
+
1126
+ result_map: dict[str, SessionStatus] = {}
1127
+
1128
+ def _check_vm(vm_name: str) -> dict[str, SessionStatus]:
1129
+ return batch_check_status(by_vm[vm_name], target=vm_targets[vm_name])
1130
+
1131
+ with ThreadPoolExecutor(max_workers=min(8, len(by_vm))) as executor:
1132
+ futures = {executor.submit(_check_vm, name): name for name in by_vm}
1133
+ for future in as_completed(futures):
1134
+ vm_name = futures[future]
1135
+ try:
1136
+ result_map.update(future.result())
1137
+ except Exception as exc:
1138
+ output.warn(f"Failed to check sessions on VM '{vm_name}': {exc}")
1139
+
1140
+ return result_map
1141
+
1142
+
1143
+ def list_sessions(
1144
+ db: Database,
1145
+ config: Config,
1146
+ *,
1147
+ workspace_name: str | None = None,
1148
+ no_status: bool = False,
1149
+ ) -> None:
1150
+ """List sessions with batched status checks (one SSH call per VM, parallel).
1151
+
1152
+ Status resolution is has-session-first; PID/boot_id are only used as a
1153
+ follow-up when agent checks fail.
1154
+ """
1155
+ sessions = db.list_sessions(workspace_name=workspace_name)
1156
+ if not sessions:
1157
+ output.info("No sessions found.")
1158
+ return
1159
+
1160
+ # Auto-repair sessions with missing PIDs, then batch check
1161
+ if not no_status:
1162
+ sessions = ensure_pids_batch(sessions, db=db, config=config)
1163
+ status_map: dict[str, SessionStatus] = {}
1164
+ if not no_status:
1165
+ status_map = batch_check_all_sessions(sessions, db=db, config=config)
1166
+
1167
+ # Build table rows grouped by workspace
1168
+ by_workspace: dict[str, list[SessionRow]] = {}
1169
+ for session in sessions:
1170
+ by_workspace.setdefault(session.workspace_name, []).append(session)
1171
+
1172
+ rows: list[tuple[str, str, str, str, str, str]] = []
1173
+ for ws_name, ws_sessions in sorted(by_workspace.items()):
1174
+ ws = db.get_workspace(ws_name)
1175
+ vm_name = ws.vm_name or "-" if ws else "-"
1176
+
1177
+ for session in ws_sessions:
1178
+ if no_status:
1179
+ status = "-"
1180
+ elif session.pid == PID_STOPPED:
1181
+ status = "stopped"
1182
+ elif session.pid is None or session.boot_id is None:
1183
+ status = "unknown"
1184
+ elif session.name in status_map:
1185
+ s_status = status_map[session.name]
1186
+ status = {
1187
+ SessionStatus.OK: "running",
1188
+ SessionStatus.STOPPED: "stopped",
1189
+ SessionStatus.BROKEN: "broken",
1190
+ SessionStatus.UNKNOWN: "unknown",
1191
+ }[s_status]
1192
+ else:
1193
+ # No status available (VM unreachable or SSH failure during batch check)
1194
+ status = "-"
1195
+ mode_label = f"agent ({session.agent_name})" if session.agent_name else "admin"
1196
+ rows.append((session.name, ws_name, vm_name, session.template, mode_label, status))
1197
+
1198
+ if not rows:
1199
+ output.info("No sessions found.")
1200
+ return
1201
+
1202
+ name_w = max(len("NAME"), max(len(r[0]) for r in rows))
1203
+ ws_w = max(len("WORKSPACE"), max(len(r[1]) for r in rows))
1204
+ vm_w = max(len("VM"), max(len(r[2]) for r in rows))
1205
+ tpl_w = max(len("TEMPLATE"), max(len(r[3]) for r in rows))
1206
+ mode_w = max(len("MODE"), max(len(r[4]) for r in rows))
1207
+
1208
+ header = (
1209
+ f"{'NAME':<{name_w}} {'WORKSPACE':<{ws_w}} {'VM':<{vm_w}} {'TEMPLATE':<{tpl_w}} {'MODE':<{mode_w}} STATUS"
1210
+ )
1211
+ output.info(header)
1212
+ output.info("-" * len(header))
1213
+ broken_names = []
1214
+ unknown_names = []
1215
+ for sname, ws_name, vm_col, tpl, mode, status in rows:
1216
+ output.info(
1217
+ f"{sname:<{name_w}} {ws_name:<{ws_w}} {vm_col:<{vm_w}} {tpl:<{tpl_w}} {mode:<{mode_w}} {status}"
1218
+ )
1219
+ if status == "broken":
1220
+ broken_names.append(sname)
1221
+ elif status == "unknown":
1222
+ unknown_names.append(sname)
1223
+
1224
+ if broken_names or unknown_names:
1225
+ output.info("")
1226
+ if broken_names:
1227
+ output.warn(
1228
+ f"{len(broken_names)} session(s) are broken (tmux unreachable): "
1229
+ f"{', '.join(broken_names)}. Use restart/stop/delete --force."
1230
+ )
1231
+ if unknown_names:
1232
+ output.warn(
1233
+ f"{len(unknown_names)} session(s) have unknown status: "
1234
+ f"{', '.join(unknown_names)}. Status could not be determined."
1235
+ )
1236
+
1237
+
1238
+ def attach_session(
1239
+ db: Database,
1240
+ config: Config,
1241
+ *,
1242
+ name: str,
1243
+ ) -> None:
1244
+ """Attach to a session's tmux session (interactive)."""
1245
+ from agentworks.sessions.tmux import tmux_cmd
1246
+ from agentworks.ssh import interactive
1247
+
1248
+ session = _require_session(db, name)
1249
+ _ws, _vm, _run_command, _, target = _prepare_vm(db, config, session.workspace_name, operation="session-attach")
1250
+ session = _ensure_pid(session, target=target, db=db)
1251
+ status = check_session_status(session, target=target)
1252
+
1253
+ if status == SessionStatus.STOPPED:
1254
+ raise output.SessionError(f"session '{name}' is not running")
1255
+ if status == SessionStatus.BROKEN:
1256
+ raise output.SessionError(
1257
+ f"session '{name}' is broken (PID alive but tmux unreachable)."
1258
+ )
1259
+
1260
+ q_session = shlex.quote(name)
1261
+ sys.exit(interactive(target, tmux_cmd(f"attach -t {q_session}", session.socket_path)))
1262
+
1263
+
1264
+ def session_logs(
1265
+ db: Database,
1266
+ config: Config,
1267
+ *,
1268
+ name: str,
1269
+ lines: int | None = None,
1270
+ ) -> None:
1271
+ """Dump the scrollback buffer for a session."""
1272
+ from agentworks.sessions.tmux import capture_output
1273
+
1274
+ session = _require_session(db, name)
1275
+ _ws, _vm, run_command, _, target = _prepare_vm(db, config, session.workspace_name, operation="session-logs")
1276
+ session = _ensure_pid(session, target=target, db=db)
1277
+ status = check_session_status(session, target=target)
1278
+
1279
+ if status == SessionStatus.STOPPED:
1280
+ raise output.SessionError(f"session '{name}' is not running")
1281
+ if status == SessionStatus.BROKEN:
1282
+ raise output.SessionError(
1283
+ f"session '{name}' is broken (PID alive but tmux unreachable)."
1284
+ )
1285
+
1286
+ sock = session.socket_path
1287
+ captured = capture_output(
1288
+ name,
1289
+ run_command=run_command,
1290
+ lines=lines or config.session.history_limit,
1291
+ socket_path=sock,
1292
+ )
1293
+ # Raw data pipe (opaque tmux capture-pane output), not a structured message.
1294
+ # Intentionally not routed through the output handler.
1295
+ typer.echo(captured, nl=False)
1296
+
1297
+