agentworks-cli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. agentworks/__init__.py +1 -0
  2. agentworks/agents/__init__.py +0 -0
  3. agentworks/agents/manager.py +1095 -0
  4. agentworks/agents/templates.py +145 -0
  5. agentworks/catalog.py +264 -0
  6. agentworks/catalog.toml +131 -0
  7. agentworks/cli.py +1462 -0
  8. agentworks/completions/__init__.py +33 -0
  9. agentworks/completions/bash.py +179 -0
  10. agentworks/completions/install.py +122 -0
  11. agentworks/completions/powershell.py +270 -0
  12. agentworks/completions/spec.py +216 -0
  13. agentworks/completions/zsh.py +256 -0
  14. agentworks/config.py +894 -0
  15. agentworks/db.py +1083 -0
  16. agentworks/doctor.py +430 -0
  17. agentworks/git_credentials/__init__.py +0 -0
  18. agentworks/git_credentials/azdo.py +29 -0
  19. agentworks/git_credentials/base.py +71 -0
  20. agentworks/git_credentials/github.py +22 -0
  21. agentworks/nerf-config.yaml +16 -0
  22. agentworks/output.py +296 -0
  23. agentworks/remote_exec.py +286 -0
  24. agentworks/sample-config.toml +289 -0
  25. agentworks/sessions/__init__.py +0 -0
  26. agentworks/sessions/console.py +164 -0
  27. agentworks/sessions/manager.py +1297 -0
  28. agentworks/sessions/templates.py +101 -0
  29. agentworks/sessions/tmux.py +503 -0
  30. agentworks/sources.py +303 -0
  31. agentworks/ssh.py +759 -0
  32. agentworks/ssh_config.py +255 -0
  33. agentworks/vm_hosts/__init__.py +0 -0
  34. agentworks/vm_hosts/manager.py +86 -0
  35. agentworks/vms/__init__.py +0 -0
  36. agentworks/vms/backup.py +409 -0
  37. agentworks/vms/base.py +56 -0
  38. agentworks/vms/bootstrap_script.py +185 -0
  39. agentworks/vms/cloud_init.py +55 -0
  40. agentworks/vms/initializer.py +1523 -0
  41. agentworks/vms/manager.py +1122 -0
  42. agentworks/vms/provisioners/__init__.py +0 -0
  43. agentworks/vms/provisioners/azure.py +602 -0
  44. agentworks/vms/provisioners/lima.py +295 -0
  45. agentworks/vms/provisioners/proxmox.py +279 -0
  46. agentworks/vms/provisioners/proxmox_api.py +261 -0
  47. agentworks/vms/provisioners/wsl2.py +340 -0
  48. agentworks/vms/templates.py +152 -0
  49. agentworks/workspaces/__init__.py +0 -0
  50. agentworks/workspaces/backends/__init__.py +0 -0
  51. agentworks/workspaces/backends/local.py +119 -0
  52. agentworks/workspaces/backends/vm.py +175 -0
  53. agentworks/workspaces/manager.py +1080 -0
  54. agentworks/workspaces/templates.py +76 -0
  55. agentworks/workspaces/tmuxinator.py +80 -0
  56. agentworks_cli-0.2.1.dist-info/METADATA +635 -0
  57. agentworks_cli-0.2.1.dist-info/RECORD +59 -0
  58. agentworks_cli-0.2.1.dist-info/WHEEL +4 -0
  59. agentworks_cli-0.2.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1122 @@
1
+ """VM lifecycle management -- create, list, start, stop, delete."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from agentworks import output
8
+ from agentworks.config import VALID_PLATFORMS, validate_admin_username, validate_name
9
+ from agentworks.db import InitStatus, ProvisioningStatus, VMStatus
10
+ from agentworks.output import VMError
11
+ from agentworks.vms.initializer import (
12
+ initialize_vm,
13
+ rejoin_tailscale,
14
+ resolve_git_credential_providers,
15
+ run_initialization,
16
+ verify_git_credential_auth,
17
+ verify_tailscale_available,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from agentworks.config import Config
22
+ from agentworks.db import Database, VMRow
23
+ from agentworks.git_credentials.base import GitCredentialProvider
24
+ from agentworks.vms.base import VMProvisioner
25
+
26
+
27
+ def get_provisioner(platform: str, vm_host_ssh: str | None = None) -> VMProvisioner:
28
+ """Get the appropriate provisioner for a platform."""
29
+ if platform == "lima":
30
+ from agentworks.vms.provisioners.lima import LimaProvisioner
31
+
32
+ return LimaProvisioner(vm_host_ssh=vm_host_ssh)
33
+ elif platform == "azure":
34
+ from agentworks.vms.provisioners.azure import AzureProvisioner
35
+
36
+ return AzureProvisioner()
37
+ elif platform == "wsl2":
38
+ from agentworks.vms.provisioners.wsl2 import WSL2Provisioner
39
+
40
+ return WSL2Provisioner()
41
+ elif platform == "proxmox":
42
+
43
+ # ProxmoxProvisioner requires config; caller must use create_vm flow
44
+ raise ValueError("Use create_vm for proxmox provisioning")
45
+ else:
46
+ msg = f"Unknown platform: {platform}"
47
+ raise ValueError(msg)
48
+
49
+
50
+ def create_vm(
51
+ db: Database,
52
+ config: Config,
53
+ *,
54
+ name: str,
55
+ template: str | None = None,
56
+ platform: str | None = None,
57
+ vm_host: str | None = None,
58
+ cpus: int | None = None,
59
+ memory: int | None = None,
60
+ disk: int | None = None,
61
+ azure_vm_size: str | None = None,
62
+ admin_username: str | None = None,
63
+ ) -> None:
64
+ """Create a new VM: provision + initialize."""
65
+ from dataclasses import replace as _replace
66
+
67
+ from agentworks.vms.templates import resolve_template
68
+
69
+ vm_tmpl = resolve_template(config, template)
70
+
71
+ # Replace config.vm with the resolved template so downstream code
72
+ # (initializer, provisioners) uses the right template values.
73
+ if template is not None:
74
+ config = _replace(config, vm=vm_tmpl)
75
+
76
+ # Resolve defaults
77
+ platform = platform or config.defaults.platform or "lima"
78
+ if platform not in VALID_PLATFORMS:
79
+ raise VMError(f"invalid platform '{platform}'")
80
+
81
+ vm_name = name
82
+ validate_name(vm_name)
83
+
84
+ if db.get_vm(vm_name) is not None:
85
+ raise VMError(f"VM '{vm_name}' already exists")
86
+
87
+ # Resolve VM host for Lima
88
+ vm_host_ssh: str | None = None
89
+ vm_host_name: str | None = None
90
+ if platform == "lima":
91
+ vm_host_name = vm_host or config.defaults.vm_host
92
+ if vm_host_name:
93
+ host_row = db.get_vm_host(vm_host_name)
94
+ if host_row is None:
95
+ raise VMError(f"VM host '{vm_host_name}' not found")
96
+ vm_host_ssh = host_row.ssh_host
97
+
98
+ # Azure config validation
99
+ if platform == "azure" and config.azure is None:
100
+ raise VMError("[azure] config section required for azure platform")
101
+
102
+ # Proxmox config validation
103
+ if platform == "proxmox" and config.proxmox is None:
104
+ raise VMError("[proxmox] config section required for proxmox platform")
105
+
106
+ # Resolve resource settings: CLI flag > template > built-in default
107
+ resolved_cpus = cpus if cpus is not None else vm_tmpl.cpus
108
+ resolved_memory = memory if memory is not None else vm_tmpl.memory
109
+ resolved_disk = disk if disk is not None else vm_tmpl.disk
110
+ resolved_azure_size = azure_vm_size or vm_tmpl.azure_vm_size
111
+ resolved_admin_username = admin_username or config.admin.username
112
+ validate_admin_username(resolved_admin_username)
113
+
114
+ # Pre-flight checks
115
+ verify_tailscale_available()
116
+ providers = resolve_git_credential_providers(config, config.admin.git_credentials)
117
+ verify_git_credential_auth(providers)
118
+
119
+ # Collect secrets upfront so the user isn't interrupted mid-provisioning
120
+ tailscale_auth_key, git_tokens = _collect_secrets(providers, vm_name)
121
+
122
+ # Create DB record with as-provisioned resource values
123
+ db.insert_vm(
124
+ vm_name,
125
+ platform=platform,
126
+ vm_host_name=vm_host_name,
127
+ template=vm_tmpl.name,
128
+ cpus=resolved_cpus,
129
+ memory_gib=resolved_memory,
130
+ disk_gib=resolved_disk,
131
+ swap_gib=vm_tmpl.swap,
132
+ admin_username=resolved_admin_username,
133
+ )
134
+
135
+ # -- Provisioning --
136
+ # If this fails, nothing was created on the remote host (or the remote
137
+ # couldn't be reached), so we clean up the DB record.
138
+ try:
139
+ if platform == "lima":
140
+ from agentworks.vms.provisioners.lima import LimaProvisioner
141
+
142
+ lima = LimaProvisioner(vm_host_ssh=vm_host_ssh)
143
+ result = lima.create(
144
+ vm_name,
145
+ config,
146
+ cpus=resolved_cpus,
147
+ memory=resolved_memory,
148
+ disk=resolved_disk,
149
+ tailscale_auth_key=tailscale_auth_key,
150
+ )
151
+ elif platform == "azure":
152
+ from agentworks.vms.provisioners.azure import AzureProvisioner
153
+
154
+ azure = AzureProvisioner()
155
+ result = azure.create(
156
+ vm_name,
157
+ config,
158
+ azure_vm_size=resolved_azure_size,
159
+ disk=resolved_disk,
160
+ admin_username=resolved_admin_username,
161
+ tailscale_auth_key=tailscale_auth_key,
162
+ )
163
+ elif platform == "wsl2":
164
+ from agentworks.vms.provisioners.wsl2 import WSL2Provisioner
165
+
166
+ wsl2 = WSL2Provisioner()
167
+ result = wsl2.create(
168
+ vm_name,
169
+ config,
170
+ admin_username=resolved_admin_username,
171
+ )
172
+ elif platform == "proxmox":
173
+ from agentworks.vms.provisioners.proxmox import ProxmoxProvisioner
174
+
175
+ proxmox = ProxmoxProvisioner(config.proxmox) # type: ignore[arg-type]
176
+ result = proxmox.create(
177
+ vm_name,
178
+ config,
179
+ cpus=resolved_cpus,
180
+ memory=resolved_memory,
181
+ disk=resolved_disk,
182
+ admin_username=resolved_admin_username,
183
+ tailscale_auth_key=tailscale_auth_key,
184
+ )
185
+ else:
186
+ msg = f"Unknown platform: {platform}"
187
+ raise ValueError(msg)
188
+ except Exception as e:
189
+ db.delete_vm(vm_name)
190
+ raise VMError(f"provisioning failed: {e}") from e
191
+
192
+ # Update DB with platform-specific metadata
193
+ if result.azure_resource_id:
194
+ db.update_vm_azure_resource_id(vm_name, result.azure_resource_id)
195
+ if result.wsl_distro_name:
196
+ db.update_vm_wsl_distro_name(vm_name, result.wsl_distro_name)
197
+ if result.proxmox_vmid:
198
+ db.update_vm_proxmox_vmid(vm_name, result.proxmox_vmid)
199
+
200
+ # -- Initialization --
201
+ # If this fails, the VM exists on the remote host and may be debuggable.
202
+ # Keep the DB record so the user can reinit or delete.
203
+ # Build a callback to detach the Azure public IP once Tailscale is up
204
+ # (before Phase B starts). This minimizes the window where the VM has
205
+ # a public IP exposed to the internet.
206
+ def _on_tailscale_ready() -> None:
207
+ if platform == "azure":
208
+ from agentworks.vms.provisioners.azure import AzureProvisioner as _AP
209
+
210
+ _created_vm = db.get_vm(vm_name)
211
+ assert _created_vm is not None
212
+ _AP().detach_public_ip(_created_vm)
213
+
214
+ try:
215
+ initialize_vm(
216
+ db,
217
+ config,
218
+ vm_name,
219
+ exec_target=result.admin_exec_target,
220
+ providers=providers,
221
+ is_wsl2=(platform == "wsl2"),
222
+ admin_username=resolved_admin_username,
223
+ tailscale_auth_key=tailscale_auth_key,
224
+ git_tokens=git_tokens,
225
+ bootstrap_complete=result.bootstrap_complete,
226
+ tailscale_ip=result.tailscale_ip,
227
+ on_tailscale_ready=_on_tailscale_ready,
228
+ )
229
+ except Exception as e:
230
+ from agentworks.ssh import LOG_DIR
231
+
232
+ log_hint = ""
233
+ logs = sorted(LOG_DIR.glob(f"{vm_name}-*-vm-create.log"), reverse=True)
234
+ if logs:
235
+ log_hint = f"\nDetails: {logs[0]}"
236
+
237
+ vm = db.get_vm(vm_name)
238
+ if vm is not None and vm.provisioning_status == ProvisioningStatus.FAILED.value:
239
+ raise VMError(
240
+ f"provisioning failed: {e}{log_hint}\n"
241
+ f"VM '{vm_name}' is in a failed state. Use 'vm delete {vm_name}' to clean up."
242
+ ) from e
243
+ else:
244
+ raise VMError(
245
+ f"initialization failed: {e}{log_hint}\n"
246
+ f"VM '{vm_name}' may still be usable. Use 'vm reinit {vm_name}' to retry."
247
+ ) from e
248
+
249
+ # -- Post-init: SSH config --
250
+ try:
251
+ from agentworks.ssh_config import sync_ssh_config
252
+
253
+ sync_ssh_config(config, db)
254
+ except Exception as e:
255
+ output.warn(f"SSH config sync failed: {e}")
256
+ output.detail("VM is likely still usable.")
257
+
258
+ # Final status is set by initialize_vm (COMPLETE or PARTIAL)
259
+ vm = db.get_vm(vm_name)
260
+ assert vm is not None
261
+ if vm.init_status == InitStatus.PARTIAL.value:
262
+ output.info(f"VM '{vm_name}' is ready (with warnings -- see above)")
263
+ else:
264
+ output.info(f"VM '{vm_name}' is ready!")
265
+
266
+
267
+ def list_vms(db: Database) -> None:
268
+ """List all VMs with their init and runtime status."""
269
+ vms = db.list_vms()
270
+ if not vms:
271
+ output.info("No VMs registered.")
272
+ return
273
+
274
+ header = (
275
+ f"{'NAME':<20} {'PLATFORM':<10} {'TEMPLATE':<12} {'HOST':<15} {'PROV':<12} {'INIT':<12} "
276
+ f"{'WS/AG/TS':<10} {'TAILSCALE':<20} {'CREATED'}"
277
+ )
278
+ output.info(header)
279
+ output.info("-" * len(header))
280
+ for vm in vms:
281
+ ws = db.count_workspaces_on_vm(vm.name)
282
+ ag = db.count_agents_on_vm(vm.name)
283
+ ts = db.count_sessions_on_vm(vm.name)
284
+ counts = f"{ws}/{ag}/{ts}"
285
+ output.info(
286
+ f"{vm.name:<20} {vm.platform:<10} {vm.template or '-':<12} {vm.vm_host_name or '-':<15} "
287
+ f"{vm.provisioning_status:<12} {vm.init_status:<12} "
288
+ f"{counts:<10} {vm.tailscale_host or '-':<20} {vm.created_at}"
289
+ )
290
+
291
+
292
+ def describe_vm(db: Database, config: Config, name: str) -> None:
293
+ """Show detailed information about a VM."""
294
+ vm = _require_vm(db, name)
295
+
296
+ # VM details
297
+ output.info(f"Name: {vm.name}")
298
+ output.info(f"Created: {vm.created_at}")
299
+ output.info(f"Platform: {vm.platform}")
300
+ output.info(f"Template: {vm.template or '-'}")
301
+ output.info(f"VM Host: {vm.vm_host_name or '-'}")
302
+ output.info(f"Admin User: {vm.admin_username}")
303
+ output.info(f"Provisioning: {vm.provisioning_status}")
304
+ output.info(f"Initialization: {vm.init_status}")
305
+ output.info(f"Tailscale IP: {vm.tailscale_host or '-'}")
306
+
307
+ # Resources table: Initial / Current / Used (Used%)
308
+ live = None
309
+ if vm.tailscale_host is not None:
310
+ live = _query_live_resources(vm, config)
311
+
312
+ if vm.cpus is not None or live is not None:
313
+ output.info(f"\n{'Resources':<16}{'Provisioned':<14}{'Current':<14}{'Used'}")
314
+ output.detail(
315
+ f"{'CPU':<16}"
316
+ f"{str(vm.cpus) if vm.cpus else '-':<14}"
317
+ f"{live['cpus'] if live else '-':<14}"
318
+ f"{'load ' + live['load_avg'] if live else '-'}"
319
+ )
320
+ output.detail(
321
+ f"{'Memory':<16}"
322
+ f"{str(vm.memory_gib) + 'G' if vm.memory_gib else '-':<14}"
323
+ f"{live['mem_total'] if live else '-':<14}"
324
+ f"{live['mem_used'] + ' (' + live['mem_pct'] + ')' if live else '-'}"
325
+ )
326
+ output.detail(
327
+ f"{'Swap':<16}"
328
+ f"{str(vm.swap_gib) + 'G' if vm.swap_gib else '-':<14}"
329
+ f"{live['swap_total'] if live else '-':<14}"
330
+ f"{live['swap_used'] + ' (' + live['swap_pct'] + ')' if live else '-'}"
331
+ )
332
+ output.detail(
333
+ f"{'Disk':<16}"
334
+ f"{str(vm.disk_gib) + 'G' if vm.disk_gib else '-':<14}"
335
+ f"{live['disk_total'] if live else '-':<14}"
336
+ f"{live['disk_used'] + ' (' + live['disk_pct'] + ')' if live else '-'}"
337
+ )
338
+
339
+ if vm.azure_resource_id:
340
+ output.info(f"Azure ID: {vm.azure_resource_id}")
341
+ if vm.wsl_distro_name:
342
+ output.info(f"WSL Distro: {vm.wsl_distro_name}")
343
+ if vm.proxmox_vmid:
344
+ output.info(f"Proxmox VMID: {vm.proxmox_vmid}")
345
+ if vm.last_seen_at:
346
+ output.info(f"Last Seen: {vm.last_seen_at}")
347
+
348
+ # Agents on this VM
349
+ agents = db.list_agents(vm_name=name)
350
+ output.info(f"\nAgents ({len(agents)}):")
351
+ if agents:
352
+ for agent in agents:
353
+ grant_count = db.count_agent_grants(agent.name)
354
+ grant_label = "all" if agent.grant_all else str(grant_count)
355
+ output.detail(f"{agent.name} (user: {agent.linux_user}, grants: {grant_label})")
356
+ else:
357
+ output.detail("(none)")
358
+
359
+ # Workspaces with sessions
360
+ workspaces = db.list_workspaces(vm_name=name)
361
+ output.info(f"\nWorkspaces ({len(workspaces)}):")
362
+ if workspaces:
363
+ for ws in workspaces:
364
+ output.detail(f"{ws.name} ({ws.workspace_path})")
365
+
366
+ sessions = db.list_sessions(workspace_name=ws.name)
367
+ if sessions:
368
+ output.detail(f"Sessions ({len(sessions)}):", indent=2)
369
+ for s in sessions:
370
+ mode_label = f"agent:{s.agent_name}" if s.agent_name else "admin"
371
+ output.detail(f"{s.name} [{s.template}] {mode_label}", indent=3)
372
+ else:
373
+ output.detail("(no sessions)", indent=2)
374
+ else:
375
+ output.detail("(none)")
376
+
377
+ # Events
378
+ events = db.list_vm_events(name)
379
+ output.info(f"\nEvents ({len(events)}):")
380
+ if events:
381
+ for event in events:
382
+ evt_detail = f" {event.detail}" if event.detail else ""
383
+ output.detail(f"{event.created_at} {event.event}{evt_detail}")
384
+ else:
385
+ output.detail("(none)")
386
+
387
+
388
+ def shell_vm(db: Database, config: Config, name: str) -> None:
389
+ """Open a shell on a VM's home directory."""
390
+ import subprocess
391
+ import sys
392
+
393
+ vm = _require_vm(db, name)
394
+ _guard_failed_vm(vm)
395
+ if vm.tailscale_host is None:
396
+ raise VMError(f"VM '{name}' has no Tailscale IP (init may not be complete)")
397
+
398
+ ssh_cmd = ["ssh", "-t"]
399
+ if config.operator.ssh_private_key:
400
+ ssh_cmd.extend(["-i", str(config.operator.ssh_private_key)])
401
+ ssh_cmd.append(f"{vm.admin_username}@{vm.tailscale_host}")
402
+
403
+ sys.exit(subprocess.call(ssh_cmd))
404
+
405
+
406
+ def exec_vm(db: Database, config: Config, name: str, command: list[str]) -> int:
407
+ """Execute a command on a VM via direct SSH subprocess.
408
+
409
+ Uses inherited stdio for streaming output without buffering.
410
+ Returns the remote exit code.
411
+ """
412
+ import shlex
413
+ import subprocess
414
+
415
+ vm = _require_vm(db, name)
416
+ _guard_failed_vm(vm)
417
+ if vm.tailscale_host is None:
418
+ raise VMError(f"VM '{name}' has no Tailscale IP (init may not be complete)")
419
+
420
+ ssh_cmd = ["ssh", "-T", "-o", "StrictHostKeyChecking=accept-new", "-o", "BatchMode=yes"]
421
+ if config.operator.ssh_private_key:
422
+ ssh_cmd.extend(["-i", str(config.operator.ssh_private_key)])
423
+ ssh_cmd.append(f"{vm.admin_username}@{vm.tailscale_host}")
424
+ ssh_cmd.append(command[0] if len(command) == 1 else shlex.join(command))
425
+
426
+ return subprocess.call(ssh_cmd)
427
+
428
+
429
+ def add_git_credential(db: Database, config: Config, name: str, credential_name: str) -> None:
430
+ """Add or update a git credential on a VM."""
431
+ from agentworks.ssh import admin_exec_target
432
+
433
+ vm = _require_vm(db, name)
434
+ _guard_failed_vm(vm)
435
+ if vm.tailscale_host is None:
436
+ raise VMError(f"VM '{name}' has no Tailscale IP (init may not be complete)")
437
+
438
+ cred_config = config.git_credentials.get(credential_name)
439
+ if cred_config is None:
440
+ raise VMError(f"git credential '{credential_name}' not found in config")
441
+
442
+ providers = resolve_git_credential_providers(config, [credential_name])
443
+ provider = providers[credential_name]
444
+
445
+ token = provider.obtain_token(name)
446
+ new_lines = provider.credential_lines(token)
447
+
448
+ target = admin_exec_target(vm, config)
449
+
450
+ # Read existing credentials, filter out entries for the same host/path
451
+ result = target.run("cat ~/.git-credentials 2>/dev/null || true")
452
+ existing = result.stdout.strip().splitlines() if result.stdout.strip() else []
453
+
454
+ # Extract host/path from new lines for matching: "https://user:tok@host/path" -> "host/path"
455
+ new_hostpaths = {line.split("@", 1)[1] for line in new_lines if "@" in line}
456
+
457
+ # Filter out old entries whose host/path matches any new entry
458
+ filtered = [e for e in existing if "@" not in e or e.split("@", 1)[1] not in new_hostpaths]
459
+
460
+ # Write back filtered + new
461
+ all_lines = filtered + new_lines
462
+ cred_content = "\n".join(all_lines) + "\n"
463
+ target.write_file("~/.git-credentials", cred_content, mode="600")
464
+ target.run("git config --global credential.helper store")
465
+
466
+ output.info(f"Git credential '{credential_name}' configured on VM '{name}'")
467
+
468
+
469
+ def start_vm(db: Database, config: Config, name: str) -> None:
470
+ """Start a stopped VM."""
471
+ vm = _require_vm(db, name)
472
+ _guard_failed_vm(vm)
473
+ provisioner = _get_provisioner_for_vm(db, vm)
474
+ status = provisioner.status(vm)
475
+ if status == VMStatus.RUNNING:
476
+ output.info(f"VM '{name}' is already running")
477
+ else:
478
+ provisioner.start(vm)
479
+
480
+ _ensure_tailscale(db, config, vm, provisioner)
481
+ output.info(f"VM '{name}' is ready")
482
+
483
+
484
+ def stop_vm(db: Database, config: Config, name: str) -> None:
485
+ """Stop a running VM."""
486
+ vm = _require_vm(db, name)
487
+ _guard_failed_vm(vm)
488
+ provisioner = _get_provisioner_for_vm(db, vm)
489
+ status = provisioner.status(vm)
490
+ if status in (VMStatus.STOPPED, VMStatus.DEALLOCATED):
491
+ output.info(f"VM '{name}' is already stopped")
492
+ return
493
+ provisioner.stop(vm)
494
+ output.info(f"VM '{name}' stopped")
495
+
496
+
497
+ def rekey_vm(
498
+ db: Database,
499
+ config: Config,
500
+ name: str,
501
+ *,
502
+ wait_for_share: bool = False,
503
+ ignore_env: bool = False,
504
+ ) -> None:
505
+ """Assign a new Tailscale auth key to a VM (logout + rejoin).
506
+
507
+ Useful for rotating keys, switching tailnets, or recovering from
508
+ expired ephemeral keys. Uses the provisioner's admin_exec_target
509
+ (out-of-band transport) since Tailscale connectivity drops during
510
+ the operation.
511
+ """
512
+ import ipaddress
513
+ import os
514
+ import shlex
515
+ import time
516
+
517
+ from agentworks.ssh import SSHError, admin_exec_target, wait_for_reconnect
518
+ from agentworks.ssh_config import sync_ssh_config
519
+ from agentworks.vms.provisioners.azure import AzureProvisioner
520
+
521
+ vm = _require_vm(db, name)
522
+ _guard_failed_vm(vm)
523
+
524
+ provisioner = _get_provisioner_for_vm(db, vm, config)
525
+ status = provisioner.status(vm)
526
+ if status != VMStatus.RUNNING:
527
+ raise VMError(f"VM '{name}' is not running (status: {status.value})")
528
+
529
+ # Collect new auth key
530
+ ts_auth_key = os.environ.get("TAILSCALE_AUTH_KEY") if not ignore_env else None
531
+ if ts_auth_key:
532
+ output.detail("Tailscale auth key found in environment")
533
+ else:
534
+ ts_auth_key = output.prompt_secret(
535
+ "Tailscale auth key",
536
+ hint="Generate a key at https://login.tailscale.com/admin/settings/keys",
537
+ )
538
+
539
+ output.info(f"Rekeying '{name}'...")
540
+
541
+ # For Azure, attach a temporary public IP for out-of-band access
542
+ azure_provisioner = provisioner if isinstance(provisioner, AzureProvisioner) else None
543
+ if azure_provisioner is not None:
544
+ azure_provisioner.attach_public_ip(vm)
545
+
546
+ try:
547
+ exec_target = provisioner.admin_exec_target(vm, config=config)
548
+
549
+ # Wait for the provisioning transport to be reachable
550
+ output.detail("Waiting for provisioning transport...")
551
+ for attempt in range(6):
552
+ try:
553
+ exec_target.run("echo ok", timeout=10)
554
+ break
555
+ except SSHError:
556
+ if attempt == 5:
557
+ raise
558
+ output.detail(f"Attempt {attempt + 1} failed, retrying...")
559
+ time.sleep(5)
560
+ output.detail("Connected.")
561
+
562
+ # Restart, logout, login, restart. The initial restart clears any
563
+ # stale daemon state (a previous interrupted rekey can leave the
564
+ # daemon in a state where `tailscale logout` hangs waiting for a
565
+ # control plane response that never comes). The final restart
566
+ # fixes a Tailscale bug where the node registers but peers can't
567
+ # reach it after rekeying to a different tailnet.
568
+ # Restart command varies by platform. WSL2 may not have systemd.
569
+ is_wsl2 = vm.platform == "wsl2"
570
+ restart_cmd = "service tailscaled restart" if is_wsl2 else "systemctl restart tailscaled"
571
+ stabilize_secs = 15 # pause between steps for daemon/network stability
572
+
573
+ output.detail("Restarting Tailscale daemon...")
574
+ exec_target.run(restart_cmd, sudo=True, timeout=15)
575
+ time.sleep(stabilize_secs)
576
+
577
+ output.detail("Logging out of current tailnet...")
578
+ exec_target.run("tailscale logout", sudo=True, timeout=30)
579
+ time.sleep(stabilize_secs)
580
+
581
+ output.detail("Joining new tailnet...")
582
+ quoted_key = shlex.quote(ts_auth_key)
583
+ ts_up_cmd = f"tailscale up --auth-key {quoted_key}"
584
+ if is_wsl2:
585
+ ts_up_cmd += " --userspace-networking"
586
+ exec_target.run(ts_up_cmd, sudo=True, timeout=30)
587
+ time.sleep(stabilize_secs)
588
+
589
+ output.detail("Restarting Tailscale daemon...")
590
+ exec_target.run(restart_cmd, sudo=True, timeout=15)
591
+ time.sleep(stabilize_secs)
592
+
593
+ output.detail("Reading new Tailscale IP...")
594
+ result = exec_target.run("tailscale ip -4", sudo=True, timeout=15)
595
+ raw_ip = result.stdout.strip()
596
+ new_ip = raw_ip.splitlines()[0].strip() if raw_ip else ""
597
+ try:
598
+ ipaddress.IPv4Address(new_ip)
599
+ except ValueError:
600
+ raise SSHError(
601
+ f"tailscale ip -4 returned invalid address: {new_ip!r}\nfull output: {raw_ip}"
602
+ ) from None
603
+ output.detail(f"Tailscale IP: {new_ip}")
604
+
605
+ # Update DB and SSH config with the new IP (correct regardless of
606
+ # reachability -- the old IP is definitely dead after logout)
607
+ db.update_vm_tailscale(name, new_ip)
608
+ sync_ssh_config(config, db)
609
+ db.insert_vm_event(name, "rekey", f"new_ip={new_ip}")
610
+
611
+ # If the operator needs to share the VM back, pause before connectivity check
612
+ if wait_for_share:
613
+ output.pause(
614
+ "Share the VM back to your tailnet, then press Enter to verify connectivity..."
615
+ )
616
+
617
+ # Always verify Tailscale SSH connectivity to the new IP
618
+ output.detail(f"Verifying SSH to {new_ip}...")
619
+ from dataclasses import replace
620
+
621
+ ts_target = admin_exec_target(vm, config)
622
+ assert ts_target.ssh is not None
623
+ ts_target = replace(ts_target, ssh=replace(ts_target.ssh, host=new_ip))
624
+ if wait_for_reconnect(ts_target):
625
+ output.info(f"VM '{name}' rekeyed successfully. Tailscale IP: {new_ip}")
626
+ else:
627
+ output.warn(
628
+ f"VM '{name}' rekeyed but {new_ip} is not reachable via SSH. "
629
+ "Check tailnet sharing/ACLs. Run 'vm rekey' again to retry."
630
+ )
631
+
632
+ finally:
633
+ if azure_provisioner is not None:
634
+ azure_provisioner.detach_public_ip(vm)
635
+
636
+
637
+ def delete_vm(
638
+ db: Database,
639
+ config: Config,
640
+ name: str,
641
+ *,
642
+ force: bool = False,
643
+ yes: bool = False,
644
+ ) -> None:
645
+ """Delete a VM, cleaning up all associated resources."""
646
+ vm = _require_vm(db, name)
647
+
648
+ # Check for workspaces (which contain agents and sessions)
649
+ ws_count = db.count_workspaces_on_vm(name)
650
+ ag_count = db.count_agents_on_vm(name)
651
+ ts_count = db.count_sessions_on_vm(name)
652
+ has_children = ws_count > 0
653
+
654
+ if has_children and not force:
655
+ parts = [f"{ws_count} workspace(s)"]
656
+ if ag_count > 0:
657
+ parts.append(f"{ag_count} agent(s)")
658
+ if ts_count > 0:
659
+ parts.append(f"{ts_count} session(s)")
660
+ raise VMError(f"VM '{name}' has {', '.join(parts)}. Delete them first, or use --force.")
661
+
662
+ if not yes and not force:
663
+ msg = f"Delete VM '{name}'?"
664
+ if has_children:
665
+ parts = [f"{ws_count} workspace(s)"]
666
+ if ag_count > 0:
667
+ parts.append(f"{ag_count} agent(s)")
668
+ if ts_count > 0:
669
+ parts.append(f"{ts_count} session(s)")
670
+ msg += f" ({', '.join(parts)} will also be deleted)"
671
+ if not output.confirm(msg):
672
+ raise output.UserAbort("delete cancelled")
673
+
674
+ # Platform-specific cleanup (also handles Tailscale logout)
675
+ try:
676
+ provisioner = _get_provisioner_for_vm(db, vm)
677
+
678
+ # Tailscale logout (best-effort, via provisioning transport)
679
+ if vm.tailscale_host:
680
+ _tailscale_logout(provisioner, vm, config)
681
+
682
+ provisioner.delete(vm)
683
+ except Exception as e:
684
+ output.warn(f"platform cleanup failed: {e}")
685
+
686
+ # Clean up logs
687
+ from agentworks.ssh import LOG_DIR
688
+
689
+ vm_logs = list(LOG_DIR.glob(f"{name}-*.log")) if LOG_DIR.exists() else []
690
+ for log in vm_logs:
691
+ log.unlink(missing_ok=True)
692
+ if vm_logs:
693
+ output.info(f"Cleaned up {len(vm_logs)} log(s)")
694
+
695
+ # Remove from DB (cascades workspaces and agents), then rebuild SSH config
696
+ db.delete_vm(name)
697
+
698
+ from agentworks.ssh_config import sync_ssh_config
699
+
700
+ sync_ssh_config(config, db)
701
+ output.info(f"VM '{name}' deleted")
702
+
703
+
704
+ def reinit_vm(
705
+ db: Database,
706
+ config: Config,
707
+ name: str,
708
+ ) -> None:
709
+ """Re-run initialization on a VM that has already been provisioned.
710
+
711
+ Requires provisioning_status == complete and a valid Tailscale connection.
712
+ """
713
+ from agentworks.ssh import admin_exec_target
714
+
715
+ vm = _require_vm(db, name)
716
+
717
+ # Resolve the VM's template so init uses the right values
718
+ if vm.template and vm.template != "default":
719
+ from dataclasses import replace as _replace
720
+
721
+ from agentworks.vms.templates import resolve_template
722
+
723
+ config = _replace(config, vm=resolve_template(config, vm.template))
724
+
725
+ if vm.provisioning_status != ProvisioningStatus.COMPLETE.value:
726
+ raise VMError(
727
+ f"VM '{name}' provisioning is '{vm.provisioning_status}', not 'complete'. Cannot reinitialize."
728
+ )
729
+
730
+ if vm.tailscale_host is None:
731
+ raise VMError(f"VM '{name}' has no Tailscale IP")
732
+
733
+ # Pre-flight checks
734
+ verify_tailscale_available()
735
+ providers = resolve_git_credential_providers(config, config.admin.git_credentials)
736
+ verify_git_credential_auth(providers)
737
+
738
+ # Collect git tokens upfront
739
+ git_tokens: dict[str, str] = {}
740
+ for cred_name, provider in providers.items():
741
+ git_tokens[cred_name] = provider.obtain_token(name)
742
+
743
+ # Build Tailscale SSH target with logging
744
+ from agentworks.ssh import SSHLogger
745
+
746
+ logger = SSHLogger(name, "vm-reinit")
747
+ for token in git_tokens.values():
748
+ logger.add_redaction(token)
749
+ ts_target = admin_exec_target(vm, config, default_timeout=60, logger=logger)
750
+
751
+ home = f"/home/{vm.admin_username}"
752
+
753
+ try:
754
+ run_initialization(
755
+ db,
756
+ config,
757
+ name,
758
+ ts_target,
759
+ providers,
760
+ home,
761
+ vm.admin_username,
762
+ logger,
763
+ git_tokens=git_tokens,
764
+ )
765
+ except Exception:
766
+ logger.close()
767
+ output.warn(f"Log: {logger.path}")
768
+ raise
769
+
770
+ logger.close()
771
+
772
+ refreshed_vm = db.get_vm(name)
773
+ assert refreshed_vm is not None
774
+ if refreshed_vm.init_status == InitStatus.PARTIAL.value:
775
+ output.info(f"VM '{name}' reinitialized (with warnings -- see above)")
776
+ output.detail(f"Log: {logger.path}")
777
+ else:
778
+ output.info(f"VM '{name}' reinitialized successfully!")
779
+
780
+
781
+ def _tailscale_logout(provisioner: VMProvisioner, vm: VMRow, config: Config) -> None:
782
+ """Best-effort: deregister from Tailscale via the provisioning transport.
783
+
784
+ Uses the provisioner's admin_exec_target (not Tailscale SSH) because we
785
+ can't ask Tailscale to tear itself down over the connection it provides.
786
+ For Azure VMs, temporarily attaches a public IP for SSH access.
787
+ Proxmox raises NotImplementedError (guest agent not yet wired in).
788
+ """
789
+ import time
790
+
791
+ from agentworks.ssh import SSHError as _SSHError
792
+ from agentworks.vms.provisioners.azure import AzureProvisioner
793
+
794
+ output.info("Deregistering from Tailscale...")
795
+ try:
796
+ azure_provisioner = provisioner if isinstance(provisioner, AzureProvisioner) else None
797
+ if azure_provisioner is not None:
798
+ azure_provisioner.attach_public_ip(vm)
799
+ exec_target = provisioner.admin_exec_target(vm, config=config)
800
+
801
+ # Wait for SSH to be reachable (public IP may have just been attached)
802
+ for attempt in range(6):
803
+ try:
804
+ exec_target.run("echo ok", timeout=10)
805
+ break
806
+ except (_SSHError, Exception):
807
+ if attempt == 5:
808
+ raise
809
+ time.sleep(5)
810
+
811
+ # Fire and forget: tailscale down + logout can disrupt networking
812
+ # on the VM, killing SSH-based transports before they get a response.
813
+ # Lima/WSL2 use local transports and are unaffected, but the nohup
814
+ # approach works universally.
815
+ exec_target.run(
816
+ "nohup sh -c 'tailscale down && tailscale logout' >/dev/null 2>&1 &",
817
+ sudo=True,
818
+ timeout=10,
819
+ )
820
+ output.info("Tailscale node deregistered")
821
+ except Exception as e:
822
+ output.warn(f"Tailscale logout failed (node may remain in admin console): {e}")
823
+
824
+
825
+ def _init_log_hint(vm_name: str) -> str:
826
+ """Return a log hint suffix like ' See log: <path>' or empty string."""
827
+ from agentworks.ssh import LOG_DIR
828
+
829
+ if not LOG_DIR.exists():
830
+ return ""
831
+ logs = sorted(LOG_DIR.glob(f"{vm_name}-*.log"), reverse=True)
832
+ return f" See log: {logs[0]}" if logs else ""
833
+
834
+
835
+ def _guard_failed_vm(vm: VMRow) -> None:
836
+ """Block operations on VMs with failed provisioning or initialization."""
837
+ if vm.provisioning_status == ProvisioningStatus.FAILED.value:
838
+ raise VMError(
839
+ f"VM '{vm.name}' has failed provisioning. Only 'vm delete' is supported.{_init_log_hint(vm.name)}"
840
+ )
841
+ if vm.init_status == InitStatus.FAILED.value:
842
+ raise VMError(
843
+ f"VM '{vm.name}' has failed initialization. "
844
+ f"Use 'vm reinit' to retry or 'vm delete' to remove.{_init_log_hint(vm.name)}"
845
+ )
846
+
847
+
848
+ def _collect_secrets(
849
+ providers: dict[str, GitCredentialProvider],
850
+ vm_name: str,
851
+ ) -> tuple[str | None, dict[str, str]]:
852
+ """Collect all secrets upfront before provisioning starts.
853
+
854
+ Returns (tailscale_auth_key, git_tokens).
855
+ """
856
+ import os
857
+
858
+ output.info("Collecting credentials...")
859
+
860
+ # Tailscale
861
+ ts_auth_key = os.environ.get("TAILSCALE_AUTH_KEY")
862
+ if ts_auth_key:
863
+ output.detail("Tailscale auth key found in environment")
864
+ else:
865
+ ts_auth_key = output.prompt_secret(
866
+ " Tailscale auth key",
867
+ hint="Generate a key at https://login.tailscale.com/admin/settings/keys",
868
+ )
869
+
870
+ # Git credentials
871
+ git_tokens: dict[str, str] = {}
872
+ for name, provider in providers.items():
873
+ token = provider.obtain_token(vm_name)
874
+ git_tokens[name] = token
875
+
876
+ return ts_auth_key, git_tokens
877
+
878
+
879
+ def _query_live_resources(vm: VMRow, config: Config) -> dict[str, str] | None:
880
+ """Query live resource usage from a VM over SSH."""
881
+ from agentworks.ssh import admin_exec_target, run
882
+
883
+ target = admin_exec_target(vm, config)
884
+ cmd = (
885
+ "nproc && "
886
+ "uptime | grep -oP 'load average: \\K[^,]+' && "
887
+ "free -b | awk '/^Mem:/{print $2,$3} /^Swap:/{print $2,$3}' && "
888
+ "df -h / | awk 'NR==2{print $2,$3,$5}'"
889
+ )
890
+
891
+ try:
892
+ result = run(target, cmd, check=False, retries=3)
893
+ except Exception:
894
+ return None
895
+
896
+ if not result.ok:
897
+ return None
898
+
899
+ lines = result.stdout.strip().splitlines()
900
+ if len(lines) < 5:
901
+ return None
902
+
903
+ try:
904
+ cpus = lines[0].strip()
905
+ load_avg = lines[1].strip()
906
+ mem_parts = lines[2].split()
907
+ swap_parts = lines[3].split()
908
+ disk_parts = lines[4].split()
909
+
910
+ mem_total_b = int(mem_parts[0])
911
+ mem_used_b = int(mem_parts[1])
912
+ swap_total_b = int(swap_parts[0])
913
+ swap_used_b = int(swap_parts[1])
914
+
915
+ mem_pct = f"{mem_used_b * 100 // mem_total_b}%" if mem_total_b > 0 else "0%"
916
+ swap_pct = f"{swap_used_b * 100 // swap_total_b}%" if swap_total_b > 0 else "0%"
917
+
918
+ return {
919
+ "cpus": cpus,
920
+ "load_avg": load_avg,
921
+ "mem_total": _human_bytes(mem_total_b),
922
+ "mem_used": _human_bytes(mem_used_b),
923
+ "mem_pct": mem_pct,
924
+ "swap_total": _human_bytes(swap_total_b),
925
+ "swap_used": _human_bytes(swap_used_b),
926
+ "swap_pct": swap_pct,
927
+ "disk_total": disk_parts[0],
928
+ "disk_used": disk_parts[1],
929
+ "disk_pct": disk_parts[2],
930
+ }
931
+ except (IndexError, ValueError):
932
+ return None
933
+
934
+
935
+ def _human_bytes(b: int) -> str:
936
+ """Format bytes as a human-readable string (e.g. 494M, 8.0G)."""
937
+ if b < 1024:
938
+ return f"{b}B"
939
+ for unit in ("K", "M", "G", "T"):
940
+ b_f = b / 1024
941
+ if b_f < 1024 or unit == "T":
942
+ return f"{b_f:.1f}{unit}" if b_f >= 10 else f"{b_f:.2f}{unit}"
943
+ b = int(b_f)
944
+ return f"{b}T"
945
+
946
+
947
+ def _require_vm(db: Database, name: str) -> VMRow:
948
+ vm = db.get_vm(name)
949
+ if vm is None:
950
+ raise VMError(f"VM '{name}' not found")
951
+ return vm
952
+
953
+
954
+ def _get_provisioner_for_vm(db: Database, vm: VMRow, config: Config | None = None) -> VMProvisioner:
955
+ if vm.platform == "proxmox":
956
+ from agentworks.vms.provisioners.proxmox import ProxmoxProvisioner
957
+
958
+ if config is None:
959
+ from agentworks.config import load_config
960
+ config = load_config()
961
+ return ProxmoxProvisioner(config.proxmox) # type: ignore[arg-type]
962
+
963
+ vm_host_ssh: str | None = None
964
+ if vm.vm_host_name:
965
+ host = db.get_vm_host(vm.vm_host_name)
966
+ if host:
967
+ vm_host_ssh = host.ssh_host
968
+ return get_provisioner(vm.platform, vm_host_ssh)
969
+
970
+
971
+ def _is_tailscale_reachable(tailscale_host: str) -> bool:
972
+ """Quick check whether a Tailscale IP is still reachable."""
973
+ import subprocess
974
+
975
+ try:
976
+ result = subprocess.run(
977
+ ["tailscale", "ping", "--timeout=5s", "-c=1", tailscale_host],
978
+ capture_output=True,
979
+ text=True,
980
+ encoding="utf-8",
981
+ errors="replace",
982
+ timeout=10,
983
+ )
984
+ return result.returncode == 0
985
+ except (subprocess.TimeoutExpired, FileNotFoundError):
986
+ return False
987
+
988
+
989
+ def port_forward_vm(
990
+ db: Database,
991
+ config: Config,
992
+ name: str,
993
+ ports: list[str],
994
+ address: str = "localhost",
995
+ verbose: bool = False,
996
+ ) -> None:
997
+ """Forward one or more local ports to a VM via SSH tunnels.
998
+
999
+ Each port spec is either REMOTE_PORT (local defaults to same) or
1000
+ LOCAL_PORT:REMOTE_PORT, matching kubectl port-forward syntax.
1001
+ """
1002
+ import signal
1003
+ import subprocess
1004
+ import sys
1005
+
1006
+ vm = _require_vm(db, name)
1007
+ _guard_failed_vm(vm)
1008
+ if vm.tailscale_host is None:
1009
+ raise VMError(f"VM '{name}' has no Tailscale IP (init may not be complete)")
1010
+
1011
+ # Parse port specs
1012
+ forwards: list[tuple[int, int]] = [] # (local_port, remote_port)
1013
+ for spec in ports:
1014
+ parts = spec.split(":")
1015
+ if len(parts) == 1:
1016
+ try:
1017
+ port = int(parts[0])
1018
+ except ValueError:
1019
+ raise VMError(f"invalid port '{spec}'") from None
1020
+ forwards.append((port, port))
1021
+ elif len(parts) == 2:
1022
+ try:
1023
+ local_port = int(parts[0])
1024
+ remote_port = int(parts[1])
1025
+ except ValueError:
1026
+ raise VMError(f"invalid port spec '{spec}'") from None
1027
+ forwards.append((local_port, remote_port))
1028
+ else:
1029
+ raise VMError(f"invalid port spec '{spec}' (expected [LOCAL:]REMOTE)")
1030
+
1031
+ # Validate port ranges
1032
+ for local_port, remote_port in forwards:
1033
+ for label, port in [("local", local_port), ("remote", remote_port)]:
1034
+ if port < 1 or port > 65535:
1035
+ raise VMError(f"{label} port {port} out of range (1-65535)")
1036
+
1037
+ # Build SSH command with -L flags for each forward
1038
+ ssh_cmd = ["ssh", "-N", "-o", "StrictHostKeyChecking=accept-new"]
1039
+ if config.operator.ssh_private_key:
1040
+ ssh_cmd.extend(["-i", str(config.operator.ssh_private_key)])
1041
+ for local_port, remote_port in forwards:
1042
+ ssh_cmd.extend(["-L", f"{address}:{local_port}:localhost:{remote_port}"])
1043
+ if verbose:
1044
+ ssh_cmd.append("-v")
1045
+ ssh_cmd.append(f"{vm.admin_username}@{vm.tailscale_host}")
1046
+
1047
+ # Print forwarding info
1048
+ for local_port, remote_port in forwards:
1049
+ output.info(f"Forwarding {address}:{local_port} -> {vm.tailscale_host}:{remote_port}")
1050
+ if not verbose:
1051
+ output.info("Use --verbose for detailed SSH output.")
1052
+
1053
+ # Run in foreground until interrupted
1054
+ try:
1055
+ proc = subprocess.Popen(ssh_cmd)
1056
+
1057
+ # Forward SIGINT/SIGTERM to the SSH process for clean shutdown
1058
+ def _handle_signal(sig: int, _frame: object) -> None:
1059
+ proc.terminate()
1060
+
1061
+ signal.signal(signal.SIGINT, _handle_signal)
1062
+ signal.signal(signal.SIGTERM, _handle_signal)
1063
+
1064
+ rc = proc.wait()
1065
+ sys.exit(rc)
1066
+ except OSError as e:
1067
+ raise VMError(f"failed to start SSH: {e}") from e
1068
+
1069
+
1070
+ def _ensure_tailscale(
1071
+ db: Database,
1072
+ config: Config,
1073
+ vm: VMRow,
1074
+ provisioner: VMProvisioner,
1075
+ ) -> None:
1076
+ """After starting a VM, verify Tailscale connectivity and rejoin if needed."""
1077
+ from agentworks.ssh import admin_exec_target, wait_for_reconnect
1078
+
1079
+ # Refresh VM row in case tailscale_host was cleared on stop
1080
+ vm = _require_vm(db, vm.name)
1081
+
1082
+ # If we have a known Tailscale host, wait for it to reconnect after boot.
1083
+ # This avoids unnecessarily attaching a public IP on Azure.
1084
+ if vm.tailscale_host:
1085
+ if wait_for_reconnect(admin_exec_target(vm, config)):
1086
+ return
1087
+
1088
+ # Tailscale didn't reconnect (ephemeral key expired, etc.)
1089
+ output.info(f"Tailscale node {vm.tailscale_host} did not reconnect, rejoining...")
1090
+ db.clear_vm_tailscale(vm.name)
1091
+
1092
+ # For Azure, attach a temporary public IP for the rejoin
1093
+ from agentworks.vms.provisioners.azure import AzureProvisioner
1094
+
1095
+ azure_provisioner = provisioner if isinstance(provisioner, AzureProvisioner) else None
1096
+ if azure_provisioner is not None:
1097
+ azure_provisioner.attach_public_ip(vm)
1098
+
1099
+ try:
1100
+ verify_tailscale_available()
1101
+ exec_target = provisioner.admin_exec_target(vm, config=config)
1102
+ rejoin_tailscale(
1103
+ db,
1104
+ vm.name,
1105
+ exec_target,
1106
+ is_wsl2=(vm.platform == "wsl2"),
1107
+ )
1108
+ finally:
1109
+ if azure_provisioner is not None:
1110
+ azure_provisioner.detach_public_ip(vm)
1111
+
1112
+ # Wait for Tailscale SSH to reconnect after IP change
1113
+ from agentworks.ssh import admin_exec_target, wait_for_reconnect
1114
+
1115
+ refreshed = db.get_vm(vm.name)
1116
+ if refreshed and refreshed.tailscale_host:
1117
+ wait_for_reconnect(admin_exec_target(refreshed, config))
1118
+
1119
+ # Update SSH config in case the Tailscale IP changed
1120
+ from agentworks.ssh_config import sync_ssh_config
1121
+
1122
+ sync_ssh_config(config, db)