agentworks-cli 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentworks/__init__.py +1 -0
- agentworks/agents/__init__.py +0 -0
- agentworks/agents/manager.py +1095 -0
- agentworks/agents/templates.py +145 -0
- agentworks/catalog.py +264 -0
- agentworks/catalog.toml +131 -0
- agentworks/cli.py +1462 -0
- agentworks/completions/__init__.py +33 -0
- agentworks/completions/bash.py +179 -0
- agentworks/completions/install.py +122 -0
- agentworks/completions/powershell.py +270 -0
- agentworks/completions/spec.py +216 -0
- agentworks/completions/zsh.py +256 -0
- agentworks/config.py +894 -0
- agentworks/db.py +1083 -0
- agentworks/doctor.py +430 -0
- agentworks/git_credentials/__init__.py +0 -0
- agentworks/git_credentials/azdo.py +29 -0
- agentworks/git_credentials/base.py +71 -0
- agentworks/git_credentials/github.py +22 -0
- agentworks/nerf-config.yaml +16 -0
- agentworks/output.py +296 -0
- agentworks/remote_exec.py +286 -0
- agentworks/sample-config.toml +289 -0
- agentworks/sessions/__init__.py +0 -0
- agentworks/sessions/console.py +164 -0
- agentworks/sessions/manager.py +1297 -0
- agentworks/sessions/templates.py +101 -0
- agentworks/sessions/tmux.py +503 -0
- agentworks/sources.py +303 -0
- agentworks/ssh.py +759 -0
- agentworks/ssh_config.py +255 -0
- agentworks/vm_hosts/__init__.py +0 -0
- agentworks/vm_hosts/manager.py +86 -0
- agentworks/vms/__init__.py +0 -0
- agentworks/vms/backup.py +409 -0
- agentworks/vms/base.py +56 -0
- agentworks/vms/bootstrap_script.py +185 -0
- agentworks/vms/cloud_init.py +55 -0
- agentworks/vms/initializer.py +1523 -0
- agentworks/vms/manager.py +1122 -0
- agentworks/vms/provisioners/__init__.py +0 -0
- agentworks/vms/provisioners/azure.py +602 -0
- agentworks/vms/provisioners/lima.py +295 -0
- agentworks/vms/provisioners/proxmox.py +279 -0
- agentworks/vms/provisioners/proxmox_api.py +261 -0
- agentworks/vms/provisioners/wsl2.py +340 -0
- agentworks/vms/templates.py +152 -0
- agentworks/workspaces/__init__.py +0 -0
- agentworks/workspaces/backends/__init__.py +0 -0
- agentworks/workspaces/backends/local.py +119 -0
- agentworks/workspaces/backends/vm.py +175 -0
- agentworks/workspaces/manager.py +1080 -0
- agentworks/workspaces/templates.py +76 -0
- agentworks/workspaces/tmuxinator.py +80 -0
- agentworks_cli-0.2.1.dist-info/METADATA +635 -0
- agentworks_cli-0.2.1.dist-info/RECORD +59 -0
- agentworks_cli-0.2.1.dist-info/WHEEL +4 -0
- agentworks_cli-0.2.1.dist-info/entry_points.txt +2 -0
agentworks/vms/backup.py
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""VM backup -- export all metadata and workspace files to a local archive."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import shlex
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import asdict
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from agentworks import output
|
|
14
|
+
from agentworks.output import BackupError, VMError
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from agentworks.config import Config
|
|
20
|
+
from agentworks.db import Database, WorkspaceRow
|
|
21
|
+
from agentworks.ssh import ExecTarget, SSHTarget
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def backup_vm(
|
|
25
|
+
db: Database,
|
|
26
|
+
config: Config,
|
|
27
|
+
vm_name: str,
|
|
28
|
+
) -> Path:
|
|
29
|
+
"""Create a full backup of a VM: metadata + workspace files.
|
|
30
|
+
|
|
31
|
+
Returns the path to the backup archive.
|
|
32
|
+
"""
|
|
33
|
+
from agentworks.ssh import SSHError, SSHLogger, _unwrap_ssh, admin_exec_target
|
|
34
|
+
from agentworks.workspaces.manager import _ensure_vm_running
|
|
35
|
+
|
|
36
|
+
vm = db.get_vm(vm_name)
|
|
37
|
+
if vm is None:
|
|
38
|
+
raise VMError(f"VM '{vm_name}' not found")
|
|
39
|
+
_ensure_vm_running(db, config, vm)
|
|
40
|
+
|
|
41
|
+
if vm.tailscale_host is None:
|
|
42
|
+
raise VMError(f"VM '{vm_name}' has no Tailscale address")
|
|
43
|
+
|
|
44
|
+
# Create backup directory first so the log goes inside it
|
|
45
|
+
timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
|
|
46
|
+
backup_name = f"{vm_name}-{timestamp}"
|
|
47
|
+
backup_dir = config.paths.backups / backup_name
|
|
48
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
ssh_logger = SSHLogger(vm_name, "vm-backup")
|
|
51
|
+
ssh_logger.path = backup_dir / "backup.log"
|
|
52
|
+
target = admin_exec_target(vm, config, logger=ssh_logger)
|
|
53
|
+
|
|
54
|
+
# Log the backup event
|
|
55
|
+
db.insert_vm_event(vm_name, "backup_started")
|
|
56
|
+
|
|
57
|
+
output.info(f"Backing up VM '{vm_name}' to {backup_dir}...")
|
|
58
|
+
|
|
59
|
+
# Snapshot all DB data in a single transaction for consistency
|
|
60
|
+
output.detail("Reading database (consistent snapshot)...")
|
|
61
|
+
_vm, agents, workspaces, sessions, events, grants_by_agent = db.snapshot_vm_backup_data(vm_name)
|
|
62
|
+
|
|
63
|
+
# 1. VM metadata
|
|
64
|
+
output.detail("Exporting VM metadata...")
|
|
65
|
+
_write_json(backup_dir / "vm.json", asdict(vm))
|
|
66
|
+
|
|
67
|
+
# 2. Events
|
|
68
|
+
output.detail(f"Exporting {len(events)} VM events...")
|
|
69
|
+
_write_json(backup_dir / "events.json", [asdict(e) for e in events])
|
|
70
|
+
|
|
71
|
+
# 3. Agents with grants and live UID verification
|
|
72
|
+
output.detail(f"Exporting {len(agents)} agents...")
|
|
73
|
+
agents_data = []
|
|
74
|
+
for agent in agents:
|
|
75
|
+
agent_data = asdict(agent)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
result = target.run(f"id -u {shlex.quote(agent.linux_user)}", check=False)
|
|
79
|
+
if result.ok:
|
|
80
|
+
agent_data["live_uid"] = result.stdout.strip()
|
|
81
|
+
else:
|
|
82
|
+
agent_data["live_uid"] = None
|
|
83
|
+
output.warn(f"user '{agent.linux_user}' not found on VM")
|
|
84
|
+
except SSHError:
|
|
85
|
+
agent_data["live_uid"] = None
|
|
86
|
+
|
|
87
|
+
agent_data["grants"] = [asdict(g) for g in grants_by_agent.get(agent.name, [])]
|
|
88
|
+
agents_data.append(agent_data)
|
|
89
|
+
_write_json(backup_dir / "agents.json", agents_data)
|
|
90
|
+
|
|
91
|
+
# 4. Workspaces with live GID verification
|
|
92
|
+
output.detail(f"Exporting {len(workspaces)} workspaces...")
|
|
93
|
+
ws_data = []
|
|
94
|
+
for ws in workspaces:
|
|
95
|
+
ws_entry = asdict(ws)
|
|
96
|
+
ws_group = f"ws--{ws.name}"
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
result = target.run(f"getent group {shlex.quote(ws_group)}", check=False)
|
|
100
|
+
if result.ok:
|
|
101
|
+
parts = result.stdout.strip().split(":")
|
|
102
|
+
ws_entry["live_gid"] = parts[2] if len(parts) > 2 else None
|
|
103
|
+
else:
|
|
104
|
+
ws_entry["live_gid"] = None
|
|
105
|
+
output.warn(f"group '{ws_group}' not found on VM")
|
|
106
|
+
except SSHError:
|
|
107
|
+
ws_entry["live_gid"] = None
|
|
108
|
+
|
|
109
|
+
ws_data.append(ws_entry)
|
|
110
|
+
_write_json(backup_dir / "workspaces.json", ws_data)
|
|
111
|
+
|
|
112
|
+
# 5. Sessions
|
|
113
|
+
output.detail(f"Exporting {len(sessions)} sessions...")
|
|
114
|
+
_write_json(backup_dir / "sessions.json", [asdict(s) for s in sessions])
|
|
115
|
+
|
|
116
|
+
# 6. Workspace files -- single archive of all workspace paths
|
|
117
|
+
vm_workspaces = [ws for ws in workspaces if ws.type == "vm"]
|
|
118
|
+
|
|
119
|
+
archived_paths: list[str] = []
|
|
120
|
+
skipped_paths: list[str] = []
|
|
121
|
+
if vm_workspaces:
|
|
122
|
+
local_archive = backup_dir / "workspaces.tar.zst"
|
|
123
|
+
try:
|
|
124
|
+
archived_paths, skipped_paths = _archive_workspaces(
|
|
125
|
+
target, _unwrap_ssh(target), vm_workspaces, local_archive,
|
|
126
|
+
)
|
|
127
|
+
except Exception:
|
|
128
|
+
db.insert_vm_event(vm_name, "backup_failed")
|
|
129
|
+
raise
|
|
130
|
+
else:
|
|
131
|
+
output.detail("No VM workspaces to archive.")
|
|
132
|
+
|
|
133
|
+
# 7. Manifest
|
|
134
|
+
manifest = {
|
|
135
|
+
"version": 2,
|
|
136
|
+
"vm_name": vm_name,
|
|
137
|
+
"timestamp": timestamp,
|
|
138
|
+
"agent_count": len(agents_data),
|
|
139
|
+
"workspace_count": len(ws_data),
|
|
140
|
+
"session_count": len(sessions),
|
|
141
|
+
"event_count": len(events),
|
|
142
|
+
"archived_paths": archived_paths,
|
|
143
|
+
"skipped_paths": skipped_paths,
|
|
144
|
+
}
|
|
145
|
+
_write_json(backup_dir / "manifest.json", manifest)
|
|
146
|
+
|
|
147
|
+
db.insert_vm_event(vm_name, "backup_completed", detail=str(backup_dir))
|
|
148
|
+
ssh_logger.close()
|
|
149
|
+
|
|
150
|
+
output.info(f"\nBackup complete: {backup_dir}")
|
|
151
|
+
|
|
152
|
+
return backup_dir
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _archive_workspaces(
|
|
156
|
+
target: ExecTarget,
|
|
157
|
+
target_ssh: SSHTarget,
|
|
158
|
+
vm_workspaces: list[WorkspaceRow],
|
|
159
|
+
local_archive: Path,
|
|
160
|
+
) -> tuple[list[str], list[str]]:
|
|
161
|
+
"""Create a single zstd-compressed tar of all workspace paths and transfer locally.
|
|
162
|
+
|
|
163
|
+
Runs tar via nohup so it survives SSH disconnects. Polls for completion
|
|
164
|
+
and reports archive size periodically.
|
|
165
|
+
|
|
166
|
+
The archive is created in a root-owned temp directory to avoid symlink
|
|
167
|
+
attacks and collisions in /tmp.
|
|
168
|
+
|
|
169
|
+
Returns (archived_paths, skipped_paths) -- paths that were actually included
|
|
170
|
+
and paths that were skipped because they didn't exist on the VM.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
# Create a secure temp directory (root-owned, mode 0700)
|
|
174
|
+
tmp_dir = target.run("mktemp -d /tmp/agentworks-backup-XXXXXX", sudo=True).stdout.strip()
|
|
175
|
+
q_tmp = shlex.quote(tmp_dir)
|
|
176
|
+
archive = f"{tmp_dir}/workspaces.tar.zst"
|
|
177
|
+
q_archive = shlex.quote(archive)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Verify workspace paths exist on the VM
|
|
181
|
+
valid: list[WorkspaceRow] = []
|
|
182
|
+
skipped: list[str] = []
|
|
183
|
+
for ws in vm_workspaces:
|
|
184
|
+
if target.run(f"test -d {shlex.quote(ws.workspace_path)}", sudo=True, check=False).ok:
|
|
185
|
+
valid.append(ws)
|
|
186
|
+
else:
|
|
187
|
+
output.warn(f"path not found, skipping: {ws.workspace_path}")
|
|
188
|
+
skipped.append(ws.workspace_path)
|
|
189
|
+
|
|
190
|
+
if not valid:
|
|
191
|
+
raise BackupError("no workspace paths exist on the VM")
|
|
192
|
+
|
|
193
|
+
# Verify zstd is available
|
|
194
|
+
if not target.run("command -v zstd >/dev/null 2>&1", check=False).ok:
|
|
195
|
+
raise BackupError(
|
|
196
|
+
"zstd is not installed on the VM. Run 'agentworks vm reinit' to install it."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Calculate total uncompressed size
|
|
200
|
+
du_paths = " ".join(shlex.quote(ws.workspace_path) for ws in valid)
|
|
201
|
+
du_result = target.run(f"du -sb {du_paths} | awk '{{s+=$1}} END {{print s}}'", sudo=True, check=False)
|
|
202
|
+
if du_result.ok and du_result.stdout.strip().isdigit():
|
|
203
|
+
total_size = int(du_result.stdout.strip())
|
|
204
|
+
output.detail(f"Total workspace size: {_fmt_size(total_size)} (uncompressed)")
|
|
205
|
+
|
|
206
|
+
# Use zstd at level 15 for high compression (trades CPU for smaller archive,
|
|
207
|
+
# which is worthwhile since cross-workspace deduplication benefits from it).
|
|
208
|
+
output.detail(f"Archiving {len(valid)} workspace(s) with zstd (this may take a while)...")
|
|
209
|
+
output.detail(f"Remote archive: {archive}", indent=2)
|
|
210
|
+
output.detail(f"Local archive: {local_archive}", indent=2)
|
|
211
|
+
|
|
212
|
+
# Write paths file via scp to avoid shell escaping issues.
|
|
213
|
+
paths_file = f"{tmp_dir}/paths.txt"
|
|
214
|
+
q_paths_file = shlex.quote(paths_file)
|
|
215
|
+
path_content = "\n".join(ws.workspace_path.lstrip("/") for ws in valid) + "\n"
|
|
216
|
+
|
|
217
|
+
from agentworks.ssh import write_file as ssh_write_file
|
|
218
|
+
|
|
219
|
+
# Admin can't write to root-owned temp dir, so stage via a securely
|
|
220
|
+
# created temp file (mktemp creates with mode 0600), then move as root.
|
|
221
|
+
staging_paths = target.run("mktemp /tmp/_aw_paths_XXXXXX.txt").stdout.strip()
|
|
222
|
+
q_staging = shlex.quote(staging_paths)
|
|
223
|
+
ssh_write_file(target_ssh, staging_paths, path_content)
|
|
224
|
+
target.run(f"mv {q_staging} {q_paths_file}", sudo=True)
|
|
225
|
+
|
|
226
|
+
# Use run_detached in a background thread so we can poll archive size.
|
|
227
|
+
# run_detached handles nohup reliably via scp'd wrapper script.
|
|
228
|
+
tar_cmd = f"ZSTD_CLEVEL=15 tar --zstd -cf {q_archive} -C / -T {q_paths_file}"
|
|
229
|
+
|
|
230
|
+
# Create a secure admin-owned directory (mktemp -d creates mode 0700)
|
|
231
|
+
# for run_detached's files. Can't use the root-owned tmp_dir because
|
|
232
|
+
# run_detached writes its wrapper script via scp (as admin). Using
|
|
233
|
+
# mktemp -d (not -u) avoids the race/symlink risks of mktemp -u.
|
|
234
|
+
detached_dir = target.run("mktemp -d /tmp/_aw_detached_XXXXXX").stdout.strip()
|
|
235
|
+
detached_base = f"{detached_dir}/run"
|
|
236
|
+
|
|
237
|
+
import threading
|
|
238
|
+
|
|
239
|
+
from agentworks.remote_exec import DetachedResult, run_detached
|
|
240
|
+
|
|
241
|
+
result_holder: list[DetachedResult] = []
|
|
242
|
+
error_holder: list[Exception] = []
|
|
243
|
+
|
|
244
|
+
def _run_tar() -> None:
|
|
245
|
+
try:
|
|
246
|
+
r = run_detached(
|
|
247
|
+
target,
|
|
248
|
+
tar_cmd,
|
|
249
|
+
label="Archive",
|
|
250
|
+
base_path=detached_base,
|
|
251
|
+
poll_interval=5,
|
|
252
|
+
quiet_timeout=300,
|
|
253
|
+
as_root=True,
|
|
254
|
+
quiet=True,
|
|
255
|
+
)
|
|
256
|
+
result_holder.append(r)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
error_holder.append(e)
|
|
259
|
+
|
|
260
|
+
thread = threading.Thread(target=_run_tar, daemon=True)
|
|
261
|
+
thread.start()
|
|
262
|
+
|
|
263
|
+
# Poll archive size while tar runs
|
|
264
|
+
try:
|
|
265
|
+
last_report = time.monotonic()
|
|
266
|
+
while thread.is_alive():
|
|
267
|
+
thread.join(timeout=15)
|
|
268
|
+
if thread.is_alive() and time.monotonic() - last_report >= 30:
|
|
269
|
+
_report_size(target, archive)
|
|
270
|
+
last_report = time.monotonic()
|
|
271
|
+
except KeyboardInterrupt:
|
|
272
|
+
output.warn("Interrupted. Killing remote tar and cleaning up...")
|
|
273
|
+
# Read the PID that run_detached's wrapper wrote, kill the process group
|
|
274
|
+
pid_result = target.run(f"cat {shlex.quote(detached_base)}.pid", sudo=True, check=False)
|
|
275
|
+
pid = pid_result.stdout.strip() if pid_result.ok else ""
|
|
276
|
+
if pid.isdigit():
|
|
277
|
+
# Kill the wrapper shell's process group (tar + wrapper)
|
|
278
|
+
target.run(f"kill -TERM -{pid} 2>/dev/null", sudo=True, check=False)
|
|
279
|
+
from agentworks.output import UserAbort
|
|
280
|
+
|
|
281
|
+
raise UserAbort("backup interrupted") from None
|
|
282
|
+
|
|
283
|
+
if error_holder:
|
|
284
|
+
raise error_holder[0]
|
|
285
|
+
if not result_holder:
|
|
286
|
+
raise BackupError("tar did not produce a result")
|
|
287
|
+
|
|
288
|
+
result = result_holder[0]
|
|
289
|
+
if result.exit_code != 0:
|
|
290
|
+
detail = f"Command: {tar_cmd}"
|
|
291
|
+
if result.output:
|
|
292
|
+
detail += f"\nOutput:\n{result.output.strip()}"
|
|
293
|
+
raise BackupError(f"tar failed (exit {result.exit_code})\n{detail}")
|
|
294
|
+
|
|
295
|
+
_report_size(target, archive)
|
|
296
|
+
|
|
297
|
+
if result.output.strip():
|
|
298
|
+
output.warn("tar warnings:")
|
|
299
|
+
for line in result.output.strip().splitlines()[-10:]:
|
|
300
|
+
output.detail(line, indent=2)
|
|
301
|
+
|
|
302
|
+
# Transfer to local. Chown the temp dir and archive to the admin
|
|
303
|
+
# user so scp can read it (avoids making it world-readable).
|
|
304
|
+
admin = shlex.quote(target_ssh.user or "agentworks")
|
|
305
|
+
target.run(f"chown {admin} {q_tmp} {q_archive}", sudo=True)
|
|
306
|
+
|
|
307
|
+
# Get remote archive size for progress reporting
|
|
308
|
+
size_result = target.run(f"stat -c %s {q_archive}", sudo=True, check=False)
|
|
309
|
+
remote_size = int(size_result.stdout.strip()) if size_result.ok else 0
|
|
310
|
+
|
|
311
|
+
output.detail("Transferring remote archive to local...")
|
|
312
|
+
_transfer_with_progress(target_ssh, archive, local_archive, remote_size)
|
|
313
|
+
|
|
314
|
+
except Exception:
|
|
315
|
+
output.warn(f"Remote temp dir preserved for debugging: {tmp_dir}")
|
|
316
|
+
raise
|
|
317
|
+
else:
|
|
318
|
+
target.run(f"rm -rf {q_tmp}", sudo=True, check=False)
|
|
319
|
+
target.run(f"rm -rf {shlex.quote(detached_dir)}", check=False)
|
|
320
|
+
|
|
321
|
+
return [ws.workspace_path for ws in valid], skipped
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _transfer_with_progress(
|
|
325
|
+
target_ssh: SSHTarget,
|
|
326
|
+
remote_path: str,
|
|
327
|
+
local_path: Path,
|
|
328
|
+
remote_size: int,
|
|
329
|
+
) -> None:
|
|
330
|
+
"""Transfer a file via scp with progress reporting based on local file size.
|
|
331
|
+
|
|
332
|
+
Uses Popen so the process can be terminated on Ctrl-C and the partially
|
|
333
|
+
downloaded file cleaned up.
|
|
334
|
+
"""
|
|
335
|
+
from agentworks.ssh import SSHError, scp_base_args
|
|
336
|
+
|
|
337
|
+
args = scp_base_args(target_ssh)
|
|
338
|
+
if target_ssh.user:
|
|
339
|
+
src = f"{target_ssh.user}@{target_ssh.host}:{remote_path}"
|
|
340
|
+
else:
|
|
341
|
+
src = f"{target_ssh.host}:{remote_path}"
|
|
342
|
+
args.append(src)
|
|
343
|
+
args.append(str(local_path))
|
|
344
|
+
|
|
345
|
+
proc = subprocess.Popen(
|
|
346
|
+
args, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
347
|
+
)
|
|
348
|
+
try:
|
|
349
|
+
last_report = time.monotonic()
|
|
350
|
+
while proc.poll() is None:
|
|
351
|
+
time.sleep(15)
|
|
352
|
+
if time.monotonic() - last_report >= 30:
|
|
353
|
+
try:
|
|
354
|
+
local_size = local_path.stat().st_size
|
|
355
|
+
if remote_size > 0:
|
|
356
|
+
pct = local_size / remote_size * 100
|
|
357
|
+
output.detail(
|
|
358
|
+
f"Transfer: {_fmt_size(local_size)} / "
|
|
359
|
+
f"{_fmt_size(remote_size)} ({pct:.0f}%)"
|
|
360
|
+
)
|
|
361
|
+
else:
|
|
362
|
+
output.detail(f"Transfer: {_fmt_size(local_size)}")
|
|
363
|
+
except FileNotFoundError:
|
|
364
|
+
pass
|
|
365
|
+
last_report = time.monotonic()
|
|
366
|
+
|
|
367
|
+
if proc.returncode != 0:
|
|
368
|
+
assert proc.stderr is not None
|
|
369
|
+
stderr = (proc.stderr.read() or b"").decode("utf-8", errors="replace").strip()
|
|
370
|
+
raise SSHError(f"scp failed: {stderr}")
|
|
371
|
+
|
|
372
|
+
output.detail(f"Saved: {local_path} ({_fmt_size(local_path.stat().st_size)})")
|
|
373
|
+
|
|
374
|
+
except (KeyboardInterrupt, Exception):
|
|
375
|
+
proc.terminate()
|
|
376
|
+
try:
|
|
377
|
+
proc.wait(timeout=5)
|
|
378
|
+
except subprocess.TimeoutExpired:
|
|
379
|
+
proc.kill()
|
|
380
|
+
proc.wait()
|
|
381
|
+
# Clean up partial download
|
|
382
|
+
if local_path.exists():
|
|
383
|
+
local_path.unlink()
|
|
384
|
+
raise
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _fmt_size(size_bytes: int) -> str:
|
|
388
|
+
"""Format a byte count as a human-readable string."""
|
|
389
|
+
if size_bytes >= 1024 * 1024 * 1024:
|
|
390
|
+
return f"{size_bytes / (1024**3):.1f} GB"
|
|
391
|
+
if size_bytes >= 1024 * 1024:
|
|
392
|
+
return f"{size_bytes / (1024**2):.1f} MB"
|
|
393
|
+
if size_bytes >= 1024:
|
|
394
|
+
return f"{size_bytes / 1024:.1f} KB"
|
|
395
|
+
return f"{size_bytes} B"
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _report_size(target: ExecTarget, remote_path: str) -> None:
|
|
399
|
+
"""Print the size of a remote file."""
|
|
400
|
+
try:
|
|
401
|
+
result = target.run(f"stat -c %s {shlex.quote(remote_path)}", sudo=True, check=False)
|
|
402
|
+
if result.ok:
|
|
403
|
+
output.detail(f"Archive size: {_fmt_size(int(result.stdout.strip()))}")
|
|
404
|
+
except Exception:
|
|
405
|
+
pass
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _write_json(path: Path, data: object) -> None:
|
|
409
|
+
path.write_text(json.dumps(data, indent=2, default=str) + "\n")
|
agentworks/vms/base.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Base interface for VM provisioners."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from agentworks.config import Config
|
|
11
|
+
from agentworks.db import VMRow, VMStatus
|
|
12
|
+
from agentworks.ssh import ExecTarget
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ProvisionResult:
|
|
17
|
+
"""Result of VM provisioning -- exec target plus platform metadata."""
|
|
18
|
+
|
|
19
|
+
admin_exec_target: ExecTarget
|
|
20
|
+
azure_resource_id: str | None = None
|
|
21
|
+
wsl_distro_name: str | None = None
|
|
22
|
+
proxmox_vmid: str | None = None
|
|
23
|
+
bootstrap_complete: bool = False
|
|
24
|
+
tailscale_ip: str | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VMProvisioner(ABC):
|
|
28
|
+
"""Interface that each platform provisioner must implement."""
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def create(self, vm_name: str, config: Config) -> ProvisionResult:
|
|
32
|
+
"""Create a raw VM and return provisioning result for the initializer."""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def start(self, vm: VMRow) -> None:
|
|
36
|
+
"""Start a stopped VM."""
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def stop(self, vm: VMRow) -> None:
|
|
40
|
+
"""Stop a running VM."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def delete(self, vm: VMRow) -> None:
|
|
44
|
+
"""Delete a VM and clean up platform resources."""
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def status(self, vm: VMRow) -> VMStatus:
|
|
48
|
+
"""Query the live runtime status of a VM."""
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def admin_exec_target(self, vm: VMRow, *, config: object | None = None) -> ExecTarget:
|
|
52
|
+
"""Return an ExecTarget for the admin user for a running VM (provisioning transport).
|
|
53
|
+
|
|
54
|
+
config is optional; Azure needs it for the SSH identity file when
|
|
55
|
+
connecting via public IP (e.g., during Tailscale logout on delete).
|
|
56
|
+
"""
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Phase A bootstrap script generation and output parsing.
|
|
2
|
+
|
|
3
|
+
Generates a self-contained bash script that runs all Phase A (bootstrap)
|
|
4
|
+
steps on a fresh VM. The script uses structured markers in stdout so the
|
|
5
|
+
Python side can drive logging and console output.
|
|
6
|
+
|
|
7
|
+
Markers:
|
|
8
|
+
##STEP## <name> - step boundary
|
|
9
|
+
##SUCCESS## <msg> - step succeeded
|
|
10
|
+
##WARN## <msg> - non-fatal warning
|
|
11
|
+
##ERROR## <msg> - fatal error
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import shlex
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
|
|
19
|
+
SCRIPT_TEMPLATE = """\
|
|
20
|
+
#!/bin/bash
|
|
21
|
+
set -euo pipefail
|
|
22
|
+
|
|
23
|
+
VM_USER={admin_username}
|
|
24
|
+
SSH_PUBLIC_KEY={ssh_public_key}
|
|
25
|
+
PROVISIONING_PACKAGES={provisioning_packages}
|
|
26
|
+
TAILSCALE_AUTH_KEY={tailscale_auth_key}
|
|
27
|
+
VM_HOSTNAME={vm_hostname}
|
|
28
|
+
TS_EXTRA_FLAGS={ts_extra_flags}
|
|
29
|
+
SWAP_GB={swap}
|
|
30
|
+
|
|
31
|
+
# -- Step 1: Ensure user --
|
|
32
|
+
echo "##STEP## Ensure user"
|
|
33
|
+
if id "$VM_USER" >/dev/null 2>&1; then
|
|
34
|
+
echo "##SUCCESS## user $VM_USER already exists"
|
|
35
|
+
else
|
|
36
|
+
useradd -m -s /bin/bash "$VM_USER"
|
|
37
|
+
echo "##SUCCESS## user $VM_USER created"
|
|
38
|
+
fi
|
|
39
|
+
usermod -aG sudo "$VM_USER"
|
|
40
|
+
echo "$VM_USER ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$VM_USER"
|
|
41
|
+
|
|
42
|
+
# -- Step 2: Provisioning packages --
|
|
43
|
+
echo "##STEP## Provisioning packages"
|
|
44
|
+
export DEBIAN_FRONTEND=noninteractive
|
|
45
|
+
apt-get update -qq
|
|
46
|
+
timeout 600 apt-get dist-upgrade -y -qq -o Dpkg::Options::="--force-confnew"
|
|
47
|
+
# shellcheck disable=SC2086
|
|
48
|
+
apt-get install -y -qq -o Dpkg::Options::="--force-confnew" $PROVISIONING_PACKAGES
|
|
49
|
+
echo "##SUCCESS## provisioning packages installed"
|
|
50
|
+
|
|
51
|
+
# -- Step 2b: Preserve SSH host keys across reboots --
|
|
52
|
+
# By default, cloud-init may delete and regenerate SSH host keys on certain
|
|
53
|
+
# boot events (e.g., VM stop/start). This causes SSH clients to reject the
|
|
54
|
+
# connection due to a changed host key. Tell cloud-init to preserve existing keys.
|
|
55
|
+
echo "##STEP## Preserve SSH host keys"
|
|
56
|
+
mkdir -p /etc/cloud/cloud.cfg.d
|
|
57
|
+
cat > /etc/cloud/cloud.cfg.d/99-preserve-ssh-keys.cfg <<'CLOUDCFG'
|
|
58
|
+
ssh_deletekeys: false
|
|
59
|
+
ssh_genkeytypes: []
|
|
60
|
+
CLOUDCFG
|
|
61
|
+
echo "##SUCCESS## SSH host key preservation configured"
|
|
62
|
+
|
|
63
|
+
# -- Step 3: SSH public key --
|
|
64
|
+
echo "##STEP## SSH public key"
|
|
65
|
+
HOME_DIR="/home/$VM_USER"
|
|
66
|
+
mkdir -p "$HOME_DIR/.ssh"
|
|
67
|
+
echo "$SSH_PUBLIC_KEY" >> "$HOME_DIR/.ssh/authorized_keys"
|
|
68
|
+
chown -R "$VM_USER:$VM_USER" "$HOME_DIR/.ssh"
|
|
69
|
+
chmod 700 "$HOME_DIR/.ssh"
|
|
70
|
+
chmod 600 "$HOME_DIR/.ssh/authorized_keys"
|
|
71
|
+
echo "##SUCCESS## SSH key installed"
|
|
72
|
+
|
|
73
|
+
# -- Step 4: Swap file --
|
|
74
|
+
echo "##STEP## Swap file"
|
|
75
|
+
if [ "$SWAP_GB" -gt 0 ]; then
|
|
76
|
+
if [ -f /swapfile ]; then
|
|
77
|
+
echo "##SUCCESS## swap file already exists"
|
|
78
|
+
else
|
|
79
|
+
SWAP_MB=$((SWAP_GB * 1024))
|
|
80
|
+
fallocate -l "${{SWAP_MB}}M" /swapfile
|
|
81
|
+
chmod 600 /swapfile
|
|
82
|
+
mkswap /swapfile
|
|
83
|
+
swapon /swapfile
|
|
84
|
+
echo '/swapfile none swap sw 0 0' >> /etc/fstab
|
|
85
|
+
echo "##SUCCESS## ${{SWAP_GB}} GiB swap file created"
|
|
86
|
+
fi
|
|
87
|
+
else
|
|
88
|
+
echo "##SUCCESS## swap disabled"
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# -- Step 5: Set hostname --
|
|
92
|
+
echo "##STEP## Hostname"
|
|
93
|
+
hostnamectl set-hostname "$VM_HOSTNAME" 2>/dev/null || hostname "$VM_HOSTNAME"
|
|
94
|
+
echo "##SUCCESS## hostname set to $VM_HOSTNAME"
|
|
95
|
+
|
|
96
|
+
# -- Step 6: Install Tailscale --
|
|
97
|
+
echo "##STEP## Tailscale install"
|
|
98
|
+
if command -v tailscale >/dev/null 2>&1; then
|
|
99
|
+
echo "##SUCCESS## tailscale already installed"
|
|
100
|
+
else
|
|
101
|
+
curl -fsSL https://tailscale.com/install.sh | sh
|
|
102
|
+
echo "##SUCCESS## tailscale installed"
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
# -- Step 7: Join Tailscale --
|
|
106
|
+
echo "##STEP## Tailscale join"
|
|
107
|
+
# shellcheck disable=SC2086
|
|
108
|
+
tailscale up --auth-key "$TAILSCALE_AUTH_KEY" $TS_EXTRA_FLAGS
|
|
109
|
+
TS_IP=$(tailscale ip -4)
|
|
110
|
+
echo "##SUCCESS## tailscale-ip=$TS_IP"
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def vm_hostname(platform: str, vm_name: str) -> str:
|
|
115
|
+
"""Build a consistent VM hostname: <platform>--<vm_name>."""
|
|
116
|
+
return f"{platform}--{vm_name}"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def generate_bootstrap_script(
|
|
120
|
+
*,
|
|
121
|
+
admin_username: str,
|
|
122
|
+
ssh_public_key: str,
|
|
123
|
+
provisioning_packages: list[str],
|
|
124
|
+
tailscale_auth_key: str,
|
|
125
|
+
hostname: str,
|
|
126
|
+
swap: int = 0,
|
|
127
|
+
is_wsl2: bool = False,
|
|
128
|
+
) -> str:
|
|
129
|
+
"""Generate the Phase A bootstrap script with parameters baked in."""
|
|
130
|
+
ts_extra_flags = "--userspace-networking" if is_wsl2 else ""
|
|
131
|
+
|
|
132
|
+
return SCRIPT_TEMPLATE.format(
|
|
133
|
+
admin_username=shlex.quote(admin_username),
|
|
134
|
+
ssh_public_key=shlex.quote(ssh_public_key),
|
|
135
|
+
provisioning_packages=shlex.quote(" ".join(provisioning_packages)),
|
|
136
|
+
tailscale_auth_key=shlex.quote(tailscale_auth_key),
|
|
137
|
+
vm_hostname=shlex.quote(hostname),
|
|
138
|
+
ts_extra_flags=shlex.quote(ts_extra_flags),
|
|
139
|
+
swap=swap,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class StepResult:
|
|
145
|
+
name: str
|
|
146
|
+
success_msg: str | None = None
|
|
147
|
+
warnings: list[str] = field(default_factory=list)
|
|
148
|
+
error: str | None = None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass
|
|
152
|
+
class BootstrapResult:
|
|
153
|
+
exit_code: int
|
|
154
|
+
tailscale_ip: str | None = None
|
|
155
|
+
steps: list[StepResult] = field(default_factory=list)
|
|
156
|
+
raw_output: str = ""
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def ok(self) -> bool:
|
|
160
|
+
return self.exit_code == 0 and self.tailscale_ip is not None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def parse_bootstrap_output(stdout: str, exit_code: int) -> BootstrapResult:
|
|
164
|
+
"""Parse structured markers from bootstrap script output."""
|
|
165
|
+
result = BootstrapResult(exit_code=exit_code, raw_output=stdout)
|
|
166
|
+
current_step: StepResult | None = None
|
|
167
|
+
|
|
168
|
+
for line in stdout.splitlines():
|
|
169
|
+
if line.startswith("##STEP## "):
|
|
170
|
+
current_step = StepResult(name=line[9:])
|
|
171
|
+
result.steps.append(current_step)
|
|
172
|
+
elif line.startswith("##SUCCESS## "):
|
|
173
|
+
msg = line[12:]
|
|
174
|
+
if current_step is not None:
|
|
175
|
+
current_step.success_msg = msg
|
|
176
|
+
if msg.startswith("tailscale-ip="):
|
|
177
|
+
result.tailscale_ip = msg.split("=", 1)[1].strip()
|
|
178
|
+
elif line.startswith("##WARN## "):
|
|
179
|
+
if current_step is not None:
|
|
180
|
+
current_step.warnings.append(line[9:])
|
|
181
|
+
elif line.startswith("##ERROR## "):
|
|
182
|
+
if current_step is not None:
|
|
183
|
+
current_step.error = line[10:]
|
|
184
|
+
|
|
185
|
+
return result
|