agentworks-cli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. agentworks/__init__.py +1 -0
  2. agentworks/agents/__init__.py +0 -0
  3. agentworks/agents/manager.py +1095 -0
  4. agentworks/agents/templates.py +145 -0
  5. agentworks/catalog.py +264 -0
  6. agentworks/catalog.toml +131 -0
  7. agentworks/cli.py +1462 -0
  8. agentworks/completions/__init__.py +33 -0
  9. agentworks/completions/bash.py +179 -0
  10. agentworks/completions/install.py +122 -0
  11. agentworks/completions/powershell.py +270 -0
  12. agentworks/completions/spec.py +216 -0
  13. agentworks/completions/zsh.py +256 -0
  14. agentworks/config.py +894 -0
  15. agentworks/db.py +1083 -0
  16. agentworks/doctor.py +430 -0
  17. agentworks/git_credentials/__init__.py +0 -0
  18. agentworks/git_credentials/azdo.py +29 -0
  19. agentworks/git_credentials/base.py +71 -0
  20. agentworks/git_credentials/github.py +22 -0
  21. agentworks/nerf-config.yaml +16 -0
  22. agentworks/output.py +296 -0
  23. agentworks/remote_exec.py +286 -0
  24. agentworks/sample-config.toml +289 -0
  25. agentworks/sessions/__init__.py +0 -0
  26. agentworks/sessions/console.py +164 -0
  27. agentworks/sessions/manager.py +1297 -0
  28. agentworks/sessions/templates.py +101 -0
  29. agentworks/sessions/tmux.py +503 -0
  30. agentworks/sources.py +303 -0
  31. agentworks/ssh.py +759 -0
  32. agentworks/ssh_config.py +255 -0
  33. agentworks/vm_hosts/__init__.py +0 -0
  34. agentworks/vm_hosts/manager.py +86 -0
  35. agentworks/vms/__init__.py +0 -0
  36. agentworks/vms/backup.py +409 -0
  37. agentworks/vms/base.py +56 -0
  38. agentworks/vms/bootstrap_script.py +185 -0
  39. agentworks/vms/cloud_init.py +55 -0
  40. agentworks/vms/initializer.py +1523 -0
  41. agentworks/vms/manager.py +1122 -0
  42. agentworks/vms/provisioners/__init__.py +0 -0
  43. agentworks/vms/provisioners/azure.py +602 -0
  44. agentworks/vms/provisioners/lima.py +295 -0
  45. agentworks/vms/provisioners/proxmox.py +279 -0
  46. agentworks/vms/provisioners/proxmox_api.py +261 -0
  47. agentworks/vms/provisioners/wsl2.py +340 -0
  48. agentworks/vms/templates.py +152 -0
  49. agentworks/workspaces/__init__.py +0 -0
  50. agentworks/workspaces/backends/__init__.py +0 -0
  51. agentworks/workspaces/backends/local.py +119 -0
  52. agentworks/workspaces/backends/vm.py +175 -0
  53. agentworks/workspaces/manager.py +1080 -0
  54. agentworks/workspaces/templates.py +76 -0
  55. agentworks/workspaces/tmuxinator.py +80 -0
  56. agentworks_cli-0.2.1.dist-info/METADATA +635 -0
  57. agentworks_cli-0.2.1.dist-info/RECORD +59 -0
  58. agentworks_cli-0.2.1.dist-info/WHEEL +4 -0
  59. agentworks_cli-0.2.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,409 @@
1
+ """VM backup -- export all metadata and workspace files to a local archive."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import shlex
7
+ import subprocess
8
+ import time
9
+ from dataclasses import asdict
10
+ from datetime import UTC, datetime
11
+ from typing import TYPE_CHECKING
12
+
13
+ from agentworks import output
14
+ from agentworks.output import BackupError, VMError
15
+
16
+ if TYPE_CHECKING:
17
+ from pathlib import Path
18
+
19
+ from agentworks.config import Config
20
+ from agentworks.db import Database, WorkspaceRow
21
+ from agentworks.ssh import ExecTarget, SSHTarget
22
+
23
+
24
+ def backup_vm(
25
+ db: Database,
26
+ config: Config,
27
+ vm_name: str,
28
+ ) -> Path:
29
+ """Create a full backup of a VM: metadata + workspace files.
30
+
31
+ Returns the path to the backup archive.
32
+ """
33
+ from agentworks.ssh import SSHError, SSHLogger, _unwrap_ssh, admin_exec_target
34
+ from agentworks.workspaces.manager import _ensure_vm_running
35
+
36
+ vm = db.get_vm(vm_name)
37
+ if vm is None:
38
+ raise VMError(f"VM '{vm_name}' not found")
39
+ _ensure_vm_running(db, config, vm)
40
+
41
+ if vm.tailscale_host is None:
42
+ raise VMError(f"VM '{vm_name}' has no Tailscale address")
43
+
44
+ # Create backup directory first so the log goes inside it
45
+ timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
46
+ backup_name = f"{vm_name}-{timestamp}"
47
+ backup_dir = config.paths.backups / backup_name
48
+ backup_dir.mkdir(parents=True, exist_ok=True)
49
+
50
+ ssh_logger = SSHLogger(vm_name, "vm-backup")
51
+ ssh_logger.path = backup_dir / "backup.log"
52
+ target = admin_exec_target(vm, config, logger=ssh_logger)
53
+
54
+ # Log the backup event
55
+ db.insert_vm_event(vm_name, "backup_started")
56
+
57
+ output.info(f"Backing up VM '{vm_name}' to {backup_dir}...")
58
+
59
+ # Snapshot all DB data in a single transaction for consistency
60
+ output.detail("Reading database (consistent snapshot)...")
61
+ _vm, agents, workspaces, sessions, events, grants_by_agent = db.snapshot_vm_backup_data(vm_name)
62
+
63
+ # 1. VM metadata
64
+ output.detail("Exporting VM metadata...")
65
+ _write_json(backup_dir / "vm.json", asdict(vm))
66
+
67
+ # 2. Events
68
+ output.detail(f"Exporting {len(events)} VM events...")
69
+ _write_json(backup_dir / "events.json", [asdict(e) for e in events])
70
+
71
+ # 3. Agents with grants and live UID verification
72
+ output.detail(f"Exporting {len(agents)} agents...")
73
+ agents_data = []
74
+ for agent in agents:
75
+ agent_data = asdict(agent)
76
+
77
+ try:
78
+ result = target.run(f"id -u {shlex.quote(agent.linux_user)}", check=False)
79
+ if result.ok:
80
+ agent_data["live_uid"] = result.stdout.strip()
81
+ else:
82
+ agent_data["live_uid"] = None
83
+ output.warn(f"user '{agent.linux_user}' not found on VM")
84
+ except SSHError:
85
+ agent_data["live_uid"] = None
86
+
87
+ agent_data["grants"] = [asdict(g) for g in grants_by_agent.get(agent.name, [])]
88
+ agents_data.append(agent_data)
89
+ _write_json(backup_dir / "agents.json", agents_data)
90
+
91
+ # 4. Workspaces with live GID verification
92
+ output.detail(f"Exporting {len(workspaces)} workspaces...")
93
+ ws_data = []
94
+ for ws in workspaces:
95
+ ws_entry = asdict(ws)
96
+ ws_group = f"ws--{ws.name}"
97
+
98
+ try:
99
+ result = target.run(f"getent group {shlex.quote(ws_group)}", check=False)
100
+ if result.ok:
101
+ parts = result.stdout.strip().split(":")
102
+ ws_entry["live_gid"] = parts[2] if len(parts) > 2 else None
103
+ else:
104
+ ws_entry["live_gid"] = None
105
+ output.warn(f"group '{ws_group}' not found on VM")
106
+ except SSHError:
107
+ ws_entry["live_gid"] = None
108
+
109
+ ws_data.append(ws_entry)
110
+ _write_json(backup_dir / "workspaces.json", ws_data)
111
+
112
+ # 5. Sessions
113
+ output.detail(f"Exporting {len(sessions)} sessions...")
114
+ _write_json(backup_dir / "sessions.json", [asdict(s) for s in sessions])
115
+
116
+ # 6. Workspace files -- single archive of all workspace paths
117
+ vm_workspaces = [ws for ws in workspaces if ws.type == "vm"]
118
+
119
+ archived_paths: list[str] = []
120
+ skipped_paths: list[str] = []
121
+ if vm_workspaces:
122
+ local_archive = backup_dir / "workspaces.tar.zst"
123
+ try:
124
+ archived_paths, skipped_paths = _archive_workspaces(
125
+ target, _unwrap_ssh(target), vm_workspaces, local_archive,
126
+ )
127
+ except Exception:
128
+ db.insert_vm_event(vm_name, "backup_failed")
129
+ raise
130
+ else:
131
+ output.detail("No VM workspaces to archive.")
132
+
133
+ # 7. Manifest
134
+ manifest = {
135
+ "version": 2,
136
+ "vm_name": vm_name,
137
+ "timestamp": timestamp,
138
+ "agent_count": len(agents_data),
139
+ "workspace_count": len(ws_data),
140
+ "session_count": len(sessions),
141
+ "event_count": len(events),
142
+ "archived_paths": archived_paths,
143
+ "skipped_paths": skipped_paths,
144
+ }
145
+ _write_json(backup_dir / "manifest.json", manifest)
146
+
147
+ db.insert_vm_event(vm_name, "backup_completed", detail=str(backup_dir))
148
+ ssh_logger.close()
149
+
150
+ output.info(f"\nBackup complete: {backup_dir}")
151
+
152
+ return backup_dir
153
+
154
+
155
+ def _archive_workspaces(
156
+ target: ExecTarget,
157
+ target_ssh: SSHTarget,
158
+ vm_workspaces: list[WorkspaceRow],
159
+ local_archive: Path,
160
+ ) -> tuple[list[str], list[str]]:
161
+ """Create a single zstd-compressed tar of all workspace paths and transfer locally.
162
+
163
+ Runs tar via nohup so it survives SSH disconnects. Polls for completion
164
+ and reports archive size periodically.
165
+
166
+ The archive is created in a root-owned temp directory to avoid symlink
167
+ attacks and collisions in /tmp.
168
+
169
+ Returns (archived_paths, skipped_paths) -- paths that were actually included
170
+ and paths that were skipped because they didn't exist on the VM.
171
+ """
172
+
173
+ # Create a secure temp directory (root-owned, mode 0700)
174
+ tmp_dir = target.run("mktemp -d /tmp/agentworks-backup-XXXXXX", sudo=True).stdout.strip()
175
+ q_tmp = shlex.quote(tmp_dir)
176
+ archive = f"{tmp_dir}/workspaces.tar.zst"
177
+ q_archive = shlex.quote(archive)
178
+
179
+ try:
180
+ # Verify workspace paths exist on the VM
181
+ valid: list[WorkspaceRow] = []
182
+ skipped: list[str] = []
183
+ for ws in vm_workspaces:
184
+ if target.run(f"test -d {shlex.quote(ws.workspace_path)}", sudo=True, check=False).ok:
185
+ valid.append(ws)
186
+ else:
187
+ output.warn(f"path not found, skipping: {ws.workspace_path}")
188
+ skipped.append(ws.workspace_path)
189
+
190
+ if not valid:
191
+ raise BackupError("no workspace paths exist on the VM")
192
+
193
+ # Verify zstd is available
194
+ if not target.run("command -v zstd >/dev/null 2>&1", check=False).ok:
195
+ raise BackupError(
196
+ "zstd is not installed on the VM. Run 'agentworks vm reinit' to install it."
197
+ )
198
+
199
+ # Calculate total uncompressed size
200
+ du_paths = " ".join(shlex.quote(ws.workspace_path) for ws in valid)
201
+ du_result = target.run(f"du -sb {du_paths} | awk '{{s+=$1}} END {{print s}}'", sudo=True, check=False)
202
+ if du_result.ok and du_result.stdout.strip().isdigit():
203
+ total_size = int(du_result.stdout.strip())
204
+ output.detail(f"Total workspace size: {_fmt_size(total_size)} (uncompressed)")
205
+
206
+ # Use zstd at level 15 for high compression (trades CPU for smaller archive,
207
+ # which is worthwhile since cross-workspace deduplication benefits from it).
208
+ output.detail(f"Archiving {len(valid)} workspace(s) with zstd (this may take a while)...")
209
+ output.detail(f"Remote archive: {archive}", indent=2)
210
+ output.detail(f"Local archive: {local_archive}", indent=2)
211
+
212
+ # Write paths file via scp to avoid shell escaping issues.
213
+ paths_file = f"{tmp_dir}/paths.txt"
214
+ q_paths_file = shlex.quote(paths_file)
215
+ path_content = "\n".join(ws.workspace_path.lstrip("/") for ws in valid) + "\n"
216
+
217
+ from agentworks.ssh import write_file as ssh_write_file
218
+
219
+ # Admin can't write to root-owned temp dir, so stage via a securely
220
+ # created temp file (mktemp creates with mode 0600), then move as root.
221
+ staging_paths = target.run("mktemp /tmp/_aw_paths_XXXXXX.txt").stdout.strip()
222
+ q_staging = shlex.quote(staging_paths)
223
+ ssh_write_file(target_ssh, staging_paths, path_content)
224
+ target.run(f"mv {q_staging} {q_paths_file}", sudo=True)
225
+
226
+ # Use run_detached in a background thread so we can poll archive size.
227
+ # run_detached handles nohup reliably via scp'd wrapper script.
228
+ tar_cmd = f"ZSTD_CLEVEL=15 tar --zstd -cf {q_archive} -C / -T {q_paths_file}"
229
+
230
+ # Create a secure admin-owned directory (mktemp -d creates mode 0700)
231
+ # for run_detached's files. Can't use the root-owned tmp_dir because
232
+ # run_detached writes its wrapper script via scp (as admin). Using
233
+ # mktemp -d (not -u) avoids the race/symlink risks of mktemp -u.
234
+ detached_dir = target.run("mktemp -d /tmp/_aw_detached_XXXXXX").stdout.strip()
235
+ detached_base = f"{detached_dir}/run"
236
+
237
+ import threading
238
+
239
+ from agentworks.remote_exec import DetachedResult, run_detached
240
+
241
+ result_holder: list[DetachedResult] = []
242
+ error_holder: list[Exception] = []
243
+
244
+ def _run_tar() -> None:
245
+ try:
246
+ r = run_detached(
247
+ target,
248
+ tar_cmd,
249
+ label="Archive",
250
+ base_path=detached_base,
251
+ poll_interval=5,
252
+ quiet_timeout=300,
253
+ as_root=True,
254
+ quiet=True,
255
+ )
256
+ result_holder.append(r)
257
+ except Exception as e:
258
+ error_holder.append(e)
259
+
260
+ thread = threading.Thread(target=_run_tar, daemon=True)
261
+ thread.start()
262
+
263
+ # Poll archive size while tar runs
264
+ try:
265
+ last_report = time.monotonic()
266
+ while thread.is_alive():
267
+ thread.join(timeout=15)
268
+ if thread.is_alive() and time.monotonic() - last_report >= 30:
269
+ _report_size(target, archive)
270
+ last_report = time.monotonic()
271
+ except KeyboardInterrupt:
272
+ output.warn("Interrupted. Killing remote tar and cleaning up...")
273
+ # Read the PID that run_detached's wrapper wrote, kill the process group
274
+ pid_result = target.run(f"cat {shlex.quote(detached_base)}.pid", sudo=True, check=False)
275
+ pid = pid_result.stdout.strip() if pid_result.ok else ""
276
+ if pid.isdigit():
277
+ # Kill the wrapper shell's process group (tar + wrapper)
278
+ target.run(f"kill -TERM -{pid} 2>/dev/null", sudo=True, check=False)
279
+ from agentworks.output import UserAbort
280
+
281
+ raise UserAbort("backup interrupted") from None
282
+
283
+ if error_holder:
284
+ raise error_holder[0]
285
+ if not result_holder:
286
+ raise BackupError("tar did not produce a result")
287
+
288
+ result = result_holder[0]
289
+ if result.exit_code != 0:
290
+ detail = f"Command: {tar_cmd}"
291
+ if result.output:
292
+ detail += f"\nOutput:\n{result.output.strip()}"
293
+ raise BackupError(f"tar failed (exit {result.exit_code})\n{detail}")
294
+
295
+ _report_size(target, archive)
296
+
297
+ if result.output.strip():
298
+ output.warn("tar warnings:")
299
+ for line in result.output.strip().splitlines()[-10:]:
300
+ output.detail(line, indent=2)
301
+
302
+ # Transfer to local. Chown the temp dir and archive to the admin
303
+ # user so scp can read it (avoids making it world-readable).
304
+ admin = shlex.quote(target_ssh.user or "agentworks")
305
+ target.run(f"chown {admin} {q_tmp} {q_archive}", sudo=True)
306
+
307
+ # Get remote archive size for progress reporting
308
+ size_result = target.run(f"stat -c %s {q_archive}", sudo=True, check=False)
309
+ remote_size = int(size_result.stdout.strip()) if size_result.ok else 0
310
+
311
+ output.detail("Transferring remote archive to local...")
312
+ _transfer_with_progress(target_ssh, archive, local_archive, remote_size)
313
+
314
+ except Exception:
315
+ output.warn(f"Remote temp dir preserved for debugging: {tmp_dir}")
316
+ raise
317
+ else:
318
+ target.run(f"rm -rf {q_tmp}", sudo=True, check=False)
319
+ target.run(f"rm -rf {shlex.quote(detached_dir)}", check=False)
320
+
321
+ return [ws.workspace_path for ws in valid], skipped
322
+
323
+
324
+ def _transfer_with_progress(
325
+ target_ssh: SSHTarget,
326
+ remote_path: str,
327
+ local_path: Path,
328
+ remote_size: int,
329
+ ) -> None:
330
+ """Transfer a file via scp with progress reporting based on local file size.
331
+
332
+ Uses Popen so the process can be terminated on Ctrl-C and the partially
333
+ downloaded file cleaned up.
334
+ """
335
+ from agentworks.ssh import SSHError, scp_base_args
336
+
337
+ args = scp_base_args(target_ssh)
338
+ if target_ssh.user:
339
+ src = f"{target_ssh.user}@{target_ssh.host}:{remote_path}"
340
+ else:
341
+ src = f"{target_ssh.host}:{remote_path}"
342
+ args.append(src)
343
+ args.append(str(local_path))
344
+
345
+ proc = subprocess.Popen(
346
+ args, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
347
+ )
348
+ try:
349
+ last_report = time.monotonic()
350
+ while proc.poll() is None:
351
+ time.sleep(15)
352
+ if time.monotonic() - last_report >= 30:
353
+ try:
354
+ local_size = local_path.stat().st_size
355
+ if remote_size > 0:
356
+ pct = local_size / remote_size * 100
357
+ output.detail(
358
+ f"Transfer: {_fmt_size(local_size)} / "
359
+ f"{_fmt_size(remote_size)} ({pct:.0f}%)"
360
+ )
361
+ else:
362
+ output.detail(f"Transfer: {_fmt_size(local_size)}")
363
+ except FileNotFoundError:
364
+ pass
365
+ last_report = time.monotonic()
366
+
367
+ if proc.returncode != 0:
368
+ assert proc.stderr is not None
369
+ stderr = (proc.stderr.read() or b"").decode("utf-8", errors="replace").strip()
370
+ raise SSHError(f"scp failed: {stderr}")
371
+
372
+ output.detail(f"Saved: {local_path} ({_fmt_size(local_path.stat().st_size)})")
373
+
374
+ except (KeyboardInterrupt, Exception):
375
+ proc.terminate()
376
+ try:
377
+ proc.wait(timeout=5)
378
+ except subprocess.TimeoutExpired:
379
+ proc.kill()
380
+ proc.wait()
381
+ # Clean up partial download
382
+ if local_path.exists():
383
+ local_path.unlink()
384
+ raise
385
+
386
+
387
+ def _fmt_size(size_bytes: int) -> str:
388
+ """Format a byte count as a human-readable string."""
389
+ if size_bytes >= 1024 * 1024 * 1024:
390
+ return f"{size_bytes / (1024**3):.1f} GB"
391
+ if size_bytes >= 1024 * 1024:
392
+ return f"{size_bytes / (1024**2):.1f} MB"
393
+ if size_bytes >= 1024:
394
+ return f"{size_bytes / 1024:.1f} KB"
395
+ return f"{size_bytes} B"
396
+
397
+
398
+ def _report_size(target: ExecTarget, remote_path: str) -> None:
399
+ """Print the size of a remote file."""
400
+ try:
401
+ result = target.run(f"stat -c %s {shlex.quote(remote_path)}", sudo=True, check=False)
402
+ if result.ok:
403
+ output.detail(f"Archive size: {_fmt_size(int(result.stdout.strip()))}")
404
+ except Exception:
405
+ pass
406
+
407
+
408
+ def _write_json(path: Path, data: object) -> None:
409
+ path.write_text(json.dumps(data, indent=2, default=str) + "\n")
agentworks/vms/base.py ADDED
@@ -0,0 +1,56 @@
1
+ """Base interface for VM provisioners."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from agentworks.config import Config
11
+ from agentworks.db import VMRow, VMStatus
12
+ from agentworks.ssh import ExecTarget
13
+
14
+
15
+ @dataclass
16
+ class ProvisionResult:
17
+ """Result of VM provisioning -- exec target plus platform metadata."""
18
+
19
+ admin_exec_target: ExecTarget
20
+ azure_resource_id: str | None = None
21
+ wsl_distro_name: str | None = None
22
+ proxmox_vmid: str | None = None
23
+ bootstrap_complete: bool = False
24
+ tailscale_ip: str | None = None
25
+
26
+
27
+ class VMProvisioner(ABC):
28
+ """Interface that each platform provisioner must implement."""
29
+
30
+ @abstractmethod
31
+ def create(self, vm_name: str, config: Config) -> ProvisionResult:
32
+ """Create a raw VM and return provisioning result for the initializer."""
33
+
34
+ @abstractmethod
35
+ def start(self, vm: VMRow) -> None:
36
+ """Start a stopped VM."""
37
+
38
+ @abstractmethod
39
+ def stop(self, vm: VMRow) -> None:
40
+ """Stop a running VM."""
41
+
42
+ @abstractmethod
43
+ def delete(self, vm: VMRow) -> None:
44
+ """Delete a VM and clean up platform resources."""
45
+
46
+ @abstractmethod
47
+ def status(self, vm: VMRow) -> VMStatus:
48
+ """Query the live runtime status of a VM."""
49
+
50
+ @abstractmethod
51
+ def admin_exec_target(self, vm: VMRow, *, config: object | None = None) -> ExecTarget:
52
+ """Return an ExecTarget for the admin user for a running VM (provisioning transport).
53
+
54
+ config is optional; Azure needs it for the SSH identity file when
55
+ connecting via public IP (e.g., during Tailscale logout on delete).
56
+ """
@@ -0,0 +1,185 @@
1
+ """Phase A bootstrap script generation and output parsing.
2
+
3
+ Generates a self-contained bash script that runs all Phase A (bootstrap)
4
+ steps on a fresh VM. The script uses structured markers in stdout so the
5
+ Python side can drive logging and console output.
6
+
7
+ Markers:
8
+ ##STEP## <name> - step boundary
9
+ ##SUCCESS## <msg> - step succeeded
10
+ ##WARN## <msg> - non-fatal warning
11
+ ##ERROR## <msg> - fatal error
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import shlex
17
+ from dataclasses import dataclass, field
18
+
19
+ SCRIPT_TEMPLATE = """\
20
+ #!/bin/bash
21
+ set -euo pipefail
22
+
23
+ VM_USER={admin_username}
24
+ SSH_PUBLIC_KEY={ssh_public_key}
25
+ PROVISIONING_PACKAGES={provisioning_packages}
26
+ TAILSCALE_AUTH_KEY={tailscale_auth_key}
27
+ VM_HOSTNAME={vm_hostname}
28
+ TS_EXTRA_FLAGS={ts_extra_flags}
29
+ SWAP_GB={swap}
30
+
31
+ # -- Step 1: Ensure user --
32
+ echo "##STEP## Ensure user"
33
+ if id "$VM_USER" >/dev/null 2>&1; then
34
+ echo "##SUCCESS## user $VM_USER already exists"
35
+ else
36
+ useradd -m -s /bin/bash "$VM_USER"
37
+ echo "##SUCCESS## user $VM_USER created"
38
+ fi
39
+ usermod -aG sudo "$VM_USER"
40
+ echo "$VM_USER ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$VM_USER"
41
+
42
+ # -- Step 2: Provisioning packages --
43
+ echo "##STEP## Provisioning packages"
44
+ export DEBIAN_FRONTEND=noninteractive
45
+ apt-get update -qq
46
+ timeout 600 apt-get dist-upgrade -y -qq -o Dpkg::Options::="--force-confnew"
47
+ # shellcheck disable=SC2086
48
+ apt-get install -y -qq -o Dpkg::Options::="--force-confnew" $PROVISIONING_PACKAGES
49
+ echo "##SUCCESS## provisioning packages installed"
50
+
51
+ # -- Step 2b: Preserve SSH host keys across reboots --
52
+ # By default, cloud-init may delete and regenerate SSH host keys on certain
53
+ # boot events (e.g., VM stop/start). This causes SSH clients to reject the
54
+ # connection due to a changed host key. Tell cloud-init to preserve existing keys.
55
+ echo "##STEP## Preserve SSH host keys"
56
+ mkdir -p /etc/cloud/cloud.cfg.d
57
+ cat > /etc/cloud/cloud.cfg.d/99-preserve-ssh-keys.cfg <<'CLOUDCFG'
58
+ ssh_deletekeys: false
59
+ ssh_genkeytypes: []
60
+ CLOUDCFG
61
+ echo "##SUCCESS## SSH host key preservation configured"
62
+
63
+ # -- Step 3: SSH public key --
64
+ echo "##STEP## SSH public key"
65
+ HOME_DIR="/home/$VM_USER"
66
+ mkdir -p "$HOME_DIR/.ssh"
67
+ echo "$SSH_PUBLIC_KEY" >> "$HOME_DIR/.ssh/authorized_keys"
68
+ chown -R "$VM_USER:$VM_USER" "$HOME_DIR/.ssh"
69
+ chmod 700 "$HOME_DIR/.ssh"
70
+ chmod 600 "$HOME_DIR/.ssh/authorized_keys"
71
+ echo "##SUCCESS## SSH key installed"
72
+
73
+ # -- Step 4: Swap file --
74
+ echo "##STEP## Swap file"
75
+ if [ "$SWAP_GB" -gt 0 ]; then
76
+ if [ -f /swapfile ]; then
77
+ echo "##SUCCESS## swap file already exists"
78
+ else
79
+ SWAP_MB=$((SWAP_GB * 1024))
80
+ fallocate -l "${{SWAP_MB}}M" /swapfile
81
+ chmod 600 /swapfile
82
+ mkswap /swapfile
83
+ swapon /swapfile
84
+ echo '/swapfile none swap sw 0 0' >> /etc/fstab
85
+ echo "##SUCCESS## ${{SWAP_GB}} GiB swap file created"
86
+ fi
87
+ else
88
+ echo "##SUCCESS## swap disabled"
89
+ fi
90
+
91
+ # -- Step 5: Set hostname --
92
+ echo "##STEP## Hostname"
93
+ hostnamectl set-hostname "$VM_HOSTNAME" 2>/dev/null || hostname "$VM_HOSTNAME"
94
+ echo "##SUCCESS## hostname set to $VM_HOSTNAME"
95
+
96
+ # -- Step 6: Install Tailscale --
97
+ echo "##STEP## Tailscale install"
98
+ if command -v tailscale >/dev/null 2>&1; then
99
+ echo "##SUCCESS## tailscale already installed"
100
+ else
101
+ curl -fsSL https://tailscale.com/install.sh | sh
102
+ echo "##SUCCESS## tailscale installed"
103
+ fi
104
+
105
+ # -- Step 7: Join Tailscale --
106
+ echo "##STEP## Tailscale join"
107
+ # shellcheck disable=SC2086
108
+ tailscale up --auth-key "$TAILSCALE_AUTH_KEY" $TS_EXTRA_FLAGS
109
+ TS_IP=$(tailscale ip -4)
110
+ echo "##SUCCESS## tailscale-ip=$TS_IP"
111
+ """
112
+
113
+
114
+ def vm_hostname(platform: str, vm_name: str) -> str:
115
+ """Build a consistent VM hostname: <platform>--<vm_name>."""
116
+ return f"{platform}--{vm_name}"
117
+
118
+
119
+ def generate_bootstrap_script(
120
+ *,
121
+ admin_username: str,
122
+ ssh_public_key: str,
123
+ provisioning_packages: list[str],
124
+ tailscale_auth_key: str,
125
+ hostname: str,
126
+ swap: int = 0,
127
+ is_wsl2: bool = False,
128
+ ) -> str:
129
+ """Generate the Phase A bootstrap script with parameters baked in."""
130
+ ts_extra_flags = "--userspace-networking" if is_wsl2 else ""
131
+
132
+ return SCRIPT_TEMPLATE.format(
133
+ admin_username=shlex.quote(admin_username),
134
+ ssh_public_key=shlex.quote(ssh_public_key),
135
+ provisioning_packages=shlex.quote(" ".join(provisioning_packages)),
136
+ tailscale_auth_key=shlex.quote(tailscale_auth_key),
137
+ vm_hostname=shlex.quote(hostname),
138
+ ts_extra_flags=shlex.quote(ts_extra_flags),
139
+ swap=swap,
140
+ )
141
+
142
+
143
+ @dataclass
144
+ class StepResult:
145
+ name: str
146
+ success_msg: str | None = None
147
+ warnings: list[str] = field(default_factory=list)
148
+ error: str | None = None
149
+
150
+
151
+ @dataclass
152
+ class BootstrapResult:
153
+ exit_code: int
154
+ tailscale_ip: str | None = None
155
+ steps: list[StepResult] = field(default_factory=list)
156
+ raw_output: str = ""
157
+
158
+ @property
159
+ def ok(self) -> bool:
160
+ return self.exit_code == 0 and self.tailscale_ip is not None
161
+
162
+
163
+ def parse_bootstrap_output(stdout: str, exit_code: int) -> BootstrapResult:
164
+ """Parse structured markers from bootstrap script output."""
165
+ result = BootstrapResult(exit_code=exit_code, raw_output=stdout)
166
+ current_step: StepResult | None = None
167
+
168
+ for line in stdout.splitlines():
169
+ if line.startswith("##STEP## "):
170
+ current_step = StepResult(name=line[9:])
171
+ result.steps.append(current_step)
172
+ elif line.startswith("##SUCCESS## "):
173
+ msg = line[12:]
174
+ if current_step is not None:
175
+ current_step.success_msg = msg
176
+ if msg.startswith("tailscale-ip="):
177
+ result.tailscale_ip = msg.split("=", 1)[1].strip()
178
+ elif line.startswith("##WARN## "):
179
+ if current_step is not None:
180
+ current_step.warnings.append(line[9:])
181
+ elif line.startswith("##ERROR## "):
182
+ if current_step is not None:
183
+ current_step.error = line[10:]
184
+
185
+ return result