plato-sdk-v2 2.8.6__py3-none-any.whl → 2.8.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plato/cli/compose.py ADDED
@@ -0,0 +1,1379 @@
1
+ """Plato Compose - Run docker compose across multiple Firecracker VMs.
2
+
3
+ This module provides CLI commands to:
4
+ 1. Parse docker-compose files with x-plato annotations
5
+ 2. Validate VM groupings and configurations
6
+ 3. Create VMs for each service group
7
+ 4. Run docker compose services on the appropriate VMs
8
+ 5. Set up internal networking between VMs
9
+ 6. Sync local build contexts and volumes via rsync
10
+ 7. Save/restore VM snapshots for fast iteration
11
+ """
12
+
13
+ import asyncio
14
+ import logging
15
+ import os
16
+ import re
17
+ import subprocess
18
+ import tempfile
19
+ import time
20
+ from pathlib import Path
21
+ from typing import Annotated
22
+
23
+ import typer
24
+ import yaml
25
+ from pydantic import BaseModel, Field
26
+ from rich.console import Console
27
+ from rich.table import Table
28
+
29
+ from plato.v2 import AsyncPlato, Env
30
+ from plato.v2.async_.environment import Environment
31
+ from plato.v2.types import SimConfigCompute
32
+
33
+ GATEWAY_HOST = os.getenv("PLATO_GATEWAY_HOST", "gateway.plato.so")
34
+
35
+
36
+ def parse_gitignore(gitignore_path: Path) -> list[str]:
37
+ """Parse .gitignore file and return list of patterns for rsync --exclude."""
38
+ if not gitignore_path.exists():
39
+ return [".git"] # Always exclude .git
40
+
41
+ patterns = [".git"] # Always exclude .git
42
+ with open(gitignore_path) as f:
43
+ for line in f:
44
+ line = line.strip()
45
+ # Skip empty lines and comments
46
+ if not line or line.startswith("#"):
47
+ continue
48
+ # Skip negation patterns (rsync doesn't handle these the same way)
49
+ if line.startswith("!"):
50
+ continue
51
+ patterns.append(line)
52
+ return patterns
53
+
54
+
55
+ # Suppress httpx logging
56
+ logging.getLogger("httpx").setLevel(logging.WARNING)
57
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
58
+
59
+ logger = logging.getLogger(__name__)
60
+ console = Console()
61
+
62
+ app = typer.Typer(help="Run docker compose across multiple Firecracker VMs")
63
+
64
+
65
+ # =============================================================================
66
+ # Models
67
+ # =============================================================================
68
+
69
+
70
+ class ValidationError(BaseModel):
71
+ """A validation error or warning."""
72
+
73
+ level: str # "error" or "warning"
74
+ service: str
75
+ message: str
76
+
77
+
78
+ class VMConfig(BaseModel):
79
+ """Configuration for a VM group."""
80
+
81
+ name: str
82
+ memory: int # in MB
83
+ cpus: int = 1
84
+ snapshot: str = "disk" # "disk" or "memory"
85
+ services: list[str] = Field(default_factory=list)
86
+
87
+
88
+ class PlatoComposeConfig(BaseModel):
89
+ """Parsed x-plato configuration from docker-compose file."""
90
+
91
+ vms: dict[str, VMConfig] = Field(default_factory=dict)
92
+ compose_file: Path
93
+ compose_content: dict
94
+ errors: list[ValidationError] = Field(default_factory=list)
95
+ warnings: list[ValidationError] = Field(default_factory=list)
96
+ ignore_paths: list[str] = Field(default_factory=list) # Paths to exclude from rsync
97
+ name: str = "compose" # Stack name used for simulator naming in snapshots
98
+
99
+
100
+ # =============================================================================
101
+ # Parsing
102
+ # =============================================================================
103
+
104
+
105
+ def parse_memory(memory_str: str) -> int:
106
+ """Parse memory string like '2GB' or '512MB' to MB."""
107
+ memory_str = memory_str.strip().upper()
108
+ match = re.match(r"^(\d+(?:\.\d+)?)\s*(GB|MB|G|M)?$", memory_str)
109
+ if not match:
110
+ raise ValueError(f"Invalid memory format: {memory_str}")
111
+
112
+ value = float(match.group(1))
113
+ unit = match.group(2) or "MB"
114
+
115
+ if unit in ("GB", "G"):
116
+ return int(value * 1024)
117
+ return int(value)
118
+
119
+
120
+ def parse_compose_file(compose_path: Path) -> PlatoComposeConfig:
121
+ """Parse a docker-compose file and extract x-plato configuration."""
122
+
123
+ if not compose_path.exists():
124
+ raise FileNotFoundError(f"Compose file not found: {compose_path}")
125
+
126
+ with open(compose_path) as f:
127
+ compose = yaml.safe_load(f)
128
+
129
+ if not compose:
130
+ raise ValueError("Empty compose file")
131
+
132
+ services = compose.get("services", {})
133
+ if not services:
134
+ raise ValueError("No services defined in compose file")
135
+
136
+ # Get global x-plato config
137
+ global_plato = compose.get("x-plato", {})
138
+ vm_configs_raw = global_plato.get("services", {})
139
+ ignore_paths = global_plato.get("ignore", [])
140
+ stack_name = global_plato.get("name", "compose") # Stack name for simulator naming
141
+
142
+ # Build VM configs
143
+ vms: dict[str, VMConfig] = {}
144
+
145
+ # First, create VM configs from global x-plato.services
146
+ for vm_name, vm_config in vm_configs_raw.items():
147
+ memory = parse_memory(vm_config.get("memory", "2GB"))
148
+ cpus = vm_config.get("cpus", 1)
149
+ snapshot = vm_config.get("snapshot", "disk")
150
+
151
+ if snapshot not in ("disk", "memory"):
152
+ raise ValueError(f"Invalid snapshot type for {vm_name}: {snapshot}")
153
+
154
+ vms[vm_name] = VMConfig(
155
+ name=vm_name,
156
+ memory=memory,
157
+ cpus=cpus,
158
+ snapshot=snapshot,
159
+ )
160
+
161
+ # Now assign services to VMs
162
+ for service_name, service_config in services.items():
163
+ plato_config = service_config.get("x-plato", {})
164
+ vm_name = plato_config.get("service")
165
+
166
+ if not vm_name:
167
+ raise ValueError(f"Service '{service_name}' missing x-plato.service annotation")
168
+
169
+ # Create VM config if not defined in global x-plato.services
170
+ if vm_name not in vms:
171
+ vms[vm_name] = VMConfig(
172
+ name=vm_name,
173
+ memory=2048, # 2GB default
174
+ cpus=1,
175
+ snapshot="disk",
176
+ )
177
+
178
+ vms[vm_name].services.append(service_name)
179
+
180
+ config = PlatoComposeConfig(
181
+ vms=vms,
182
+ compose_file=compose_path,
183
+ compose_content=compose,
184
+ ignore_paths=ignore_paths,
185
+ name=stack_name,
186
+ )
187
+
188
+ # Run validation
189
+ validate_config(config, services)
190
+
191
+ return config
192
+
193
+
194
+ def validate_config(config: PlatoComposeConfig, services: dict) -> None:
195
+ """Validate the parsed configuration for common issues."""
196
+
197
+ errors = config.errors
198
+ warnings = config.warnings
199
+
200
+ # Build service -> VM mapping
201
+ service_to_vm: dict[str, str] = {}
202
+ for vm_name, vm_config in config.vms.items():
203
+ for svc in vm_config.services:
204
+ service_to_vm[svc] = vm_name
205
+
206
+ # Track volumes used by each VM
207
+ vm_volumes: dict[str, set[str]] = {vm: set() for vm in config.vms}
208
+
209
+ # If only one VM, skip cross-VM validation
210
+ single_vm = len(config.vms) == 1
211
+
212
+ for service_name, service_config in services.items():
213
+ vm_name = service_to_vm.get(service_name)
214
+ if not vm_name:
215
+ continue
216
+
217
+ # ---------------------------------------------------------------------
218
+ # Check 1: Port exposure
219
+ # ---------------------------------------------------------------------
220
+ exposed_ports = set()
221
+
222
+ # Collect from 'ports'
223
+ for port in service_config.get("ports", []):
224
+ if isinstance(port, str):
225
+ # Format: "host:container" or "container"
226
+ parts = str(port).replace("/tcp", "").replace("/udp", "").split(":")
227
+ container_port = int(parts[-1].split("-")[0])
228
+ exposed_ports.add(container_port)
229
+ elif isinstance(port, dict):
230
+ exposed_ports.add(port.get("target", 0))
231
+ elif isinstance(port, int):
232
+ exposed_ports.add(port)
233
+
234
+ # Collect from 'expose'
235
+ for port in service_config.get("expose", []):
236
+ exposed_ports.add(int(str(port).replace("/tcp", "").replace("/udp", "")))
237
+
238
+ # Only warn if NO ports are exposed at all (not about specific missing ports)
239
+ # Services often use non-standard ports, so we shouldn't be prescriptive
240
+ if not exposed_ports and not service_config.get("network_mode") and not service_config.get("build"):
241
+ # Only warn for image-based services with no ports
242
+ # Build-based services might expose ports dynamically
243
+ warnings.append(
244
+ ValidationError(
245
+ level="warning",
246
+ service=service_name,
247
+ message="No ports exposed. Service won't be accessible from other VMs via .plato.internal",
248
+ )
249
+ )
250
+
251
+ # ---------------------------------------------------------------------
252
+ # Check 2: Volume sharing across VMs
253
+ # ---------------------------------------------------------------------
254
+ for volume in service_config.get("volumes", []):
255
+ vol_str = str(volume) if isinstance(volume, str) else volume.get("source", "")
256
+
257
+ # Named volume (doesn't start with / or .)
258
+ if vol_str and not vol_str.startswith("/") and not vol_str.startswith("."):
259
+ vol_name = vol_str.split(":")[0]
260
+ vm_volumes[vm_name].add(vol_name)
261
+
262
+ # Bind mounts are synced via rsync, so no warning needed
263
+ # They'll be available on the VM after sync
264
+
265
+ # Cross-VM depends_on is handled by plato compose - we wait for
266
+ # healthchecks on dependent VMs before starting services
267
+
268
+ # ---------------------------------------------------------------------
269
+ # Check 4: network_mode issues (only if multiple VMs)
270
+ # ---------------------------------------------------------------------
271
+ if not single_vm:
272
+ network_mode = service_config.get("network_mode", "")
273
+ if network_mode.startswith("service:"):
274
+ ref_service = network_mode.replace("service:", "")
275
+ ref_vm = service_to_vm.get(ref_service)
276
+ if ref_vm and ref_vm != vm_name:
277
+ errors.append(
278
+ ValidationError(
279
+ level="error",
280
+ service=service_name,
281
+ message=f"network_mode 'service:{ref_service}' references a service in a different VM ({ref_vm}). "
282
+ "This won't work. Services sharing network_mode must be in the same VM.",
283
+ )
284
+ )
285
+
286
+ # -----------------------------------------------------------------
287
+ # Check 5: links (deprecated but still used)
288
+ # -----------------------------------------------------------------
289
+ for link in service_config.get("links", []):
290
+ link_service = link.split(":")[0]
291
+ link_vm = service_to_vm.get(link_service)
292
+ if link_vm and link_vm != vm_name:
293
+ errors.append(
294
+ ValidationError(
295
+ level="error",
296
+ service=service_name,
297
+ message=f"links to '{link_service}' which is in a different VM ({link_vm}). "
298
+ "Links don't work across VMs. Use {service}.plato.internal instead.",
299
+ )
300
+ )
301
+
302
+ # ---------------------------------------------------------------------
303
+ # Check 6: Named volumes shared across VMs (only if multiple VMs)
304
+ # ---------------------------------------------------------------------
305
+ if not single_vm:
306
+ all_volumes: dict[str, list[str]] = {}
307
+ for vm_name, volumes in vm_volumes.items():
308
+ for vol in volumes:
309
+ if vol not in all_volumes:
310
+ all_volumes[vol] = []
311
+ all_volumes[vol].append(vm_name)
312
+
313
+ for vol_name, vms_using in all_volumes.items():
314
+ if len(vms_using) > 1:
315
+ errors.append(
316
+ ValidationError(
317
+ level="error",
318
+ service="(global)",
319
+ message=f"Named volume '{vol_name}' is used by services in multiple VMs: {', '.join(vms_using)}. "
320
+ "Volumes cannot be shared across VMs. Use separate volumes or a shared storage service.",
321
+ )
322
+ )
323
+
324
+ # -----------------------------------------------------------------
325
+ # Check 7: Custom networks (only if multiple VMs)
326
+ # -----------------------------------------------------------------
327
+ if "networks" in config.compose_content:
328
+ for network_name, network_config in config.compose_content.get("networks", {}).items():
329
+ if network_config and network_config.get("external"):
330
+ errors.append(
331
+ ValidationError(
332
+ level="error",
333
+ service="(global)",
334
+ message=f"External network '{network_name}' won't exist across VMs. "
335
+ "Remove external networks or use .plato.internal DNS for cross-VM communication.",
336
+ )
337
+ )
338
+
339
+
340
+ def collect_local_paths(config: PlatoComposeConfig) -> dict[str, list[tuple[str, str]]]:
341
+ """Collect local paths that need to be synced for each VM.
342
+
343
+ Finds the common ancestor of all build contexts and syncs that.
344
+
345
+ Returns:
346
+ Dict mapping vm_name -> list of (local_path, remote_path) tuples
347
+ """
348
+ services = config.compose_content.get("services", {})
349
+ compose_dir = config.compose_file.parent.resolve()
350
+
351
+ # Collect all build context paths
352
+ context_paths: list[Path] = [compose_dir] # Always include compose dir
353
+
354
+ for service_config in services.values():
355
+ build_config = service_config.get("build")
356
+ if build_config:
357
+ if isinstance(build_config, str):
358
+ context_path = build_config
359
+ elif isinstance(build_config, dict):
360
+ context_path = build_config.get("context", ".")
361
+ else:
362
+ continue
363
+
364
+ # Resolve relative to compose file
365
+ resolved = (compose_dir / context_path).resolve()
366
+ context_paths.append(resolved)
367
+
368
+ # Find common ancestor of all paths
369
+ if len(context_paths) == 1:
370
+ sync_dir = context_paths[0]
371
+ else:
372
+ # Get the common prefix of all paths
373
+ sync_dir = Path(os.path.commonpath([str(p) for p in context_paths]))
374
+
375
+ sync_dir_str = str(sync_dir)
376
+
377
+ vm_paths: dict[str, list[tuple[str, str]]] = {}
378
+ for vm_name in config.vms:
379
+ # Sync to /app on VM
380
+ vm_paths[vm_name] = [(sync_dir_str, "/app")]
381
+
382
+ return vm_paths
383
+
384
+
385
+ def generate_vm_compose(config: PlatoComposeConfig, vm_name: str) -> str:
386
+ """Generate a docker-compose file for a specific VM with only its services."""
387
+
388
+ vm_config = config.vms[vm_name]
389
+ original = config.compose_content.copy()
390
+
391
+ # Build set of services on this VM for filtering depends_on
392
+ services_on_this_vm = set(vm_config.services)
393
+
394
+ # Filter services to only those for this VM
395
+ filtered_services = {}
396
+ for service_name in vm_config.services:
397
+ if service_name in original.get("services", {}):
398
+ service = original["services"][service_name].copy()
399
+ # Remove x-plato annotation from output
400
+ service.pop("x-plato", None)
401
+
402
+ # Remove depends_on entries that reference services on other VMs
403
+ depends_on = service.get("depends_on")
404
+ if depends_on:
405
+ if isinstance(depends_on, list):
406
+ # Simple list format
407
+ filtered_deps = [d for d in depends_on if d in services_on_this_vm]
408
+ if filtered_deps:
409
+ service["depends_on"] = filtered_deps
410
+ else:
411
+ del service["depends_on"]
412
+ elif isinstance(depends_on, dict):
413
+ # Extended format with conditions
414
+ filtered_deps = {k: v for k, v in depends_on.items() if k in services_on_this_vm}
415
+ if filtered_deps:
416
+ service["depends_on"] = filtered_deps
417
+ else:
418
+ del service["depends_on"]
419
+
420
+ filtered_services[service_name] = service
421
+
422
+ # Build new compose file (no version - it's obsolete)
423
+ new_compose = {
424
+ "services": filtered_services,
425
+ }
426
+
427
+ # Include networks if defined
428
+ if "networks" in original:
429
+ new_compose["networks"] = original["networks"]
430
+
431
+ # Include volumes if defined
432
+ if "volumes" in original:
433
+ new_compose["volumes"] = original["volumes"]
434
+
435
+ return yaml.dump(new_compose, default_flow_style=False)
436
+
437
+
438
+ # =============================================================================
439
+ # Dependency Graph
440
+ # =============================================================================
441
+
442
+
443
+ def build_vm_dependency_order(config: PlatoComposeConfig) -> list[list[str]]:
444
+ """Build VM startup order based on service depends_on.
445
+
446
+ Returns:
447
+ List of VM groups to start in order. VMs in the same group can start in parallel.
448
+ """
449
+ services = config.compose_content.get("services", {})
450
+
451
+ # Build service -> VM mapping
452
+ service_to_vm: dict[str, str] = {}
453
+ for vm_name, vm_config in config.vms.items():
454
+ for svc in vm_config.services:
455
+ service_to_vm[svc] = vm_name
456
+
457
+ # Build VM -> VM dependencies
458
+ vm_deps: dict[str, set[str]] = {vm: set() for vm in config.vms}
459
+
460
+ for service_name, service_config in services.items():
461
+ service_vm = service_to_vm.get(service_name)
462
+ if not service_vm:
463
+ continue
464
+
465
+ depends_on = service_config.get("depends_on", [])
466
+ if isinstance(depends_on, dict):
467
+ depends_on = list(depends_on.keys())
468
+
469
+ for dep in depends_on:
470
+ dep_vm = service_to_vm.get(dep)
471
+ if dep_vm and dep_vm != service_vm:
472
+ vm_deps[service_vm].add(dep_vm)
473
+
474
+ # Topological sort into levels
475
+ levels: list[list[str]] = []
476
+ remaining = set(config.vms.keys())
477
+ satisfied: set[str] = set()
478
+
479
+ while remaining:
480
+ # Find VMs with all dependencies satisfied
481
+ level = [vm for vm in remaining if vm_deps[vm].issubset(satisfied)]
482
+
483
+ if not level:
484
+ # Circular dependency - just add remaining
485
+ level = list(remaining)
486
+
487
+ levels.append(level)
488
+ satisfied.update(level)
489
+ remaining -= set(level)
490
+
491
+ return levels
492
+
493
+
494
+ # =============================================================================
495
+ # SSH Helpers
496
+ # =============================================================================
497
+
498
+
499
+ def generate_ssh_keypair(key_dir: Path) -> tuple[str, str]:
500
+ """Generate an ED25519 SSH keypair.
501
+
502
+ Returns:
503
+ (private_key_path, public_key)
504
+ """
505
+ private_key_path = key_dir / "id_ed25519"
506
+ public_key_path = key_dir / "id_ed25519.pub"
507
+
508
+ subprocess.run(
509
+ ["ssh-keygen", "-t", "ed25519", "-f", str(private_key_path), "-N", "", "-q"],
510
+ check=True,
511
+ )
512
+
513
+ public_key = public_key_path.read_text().strip()
514
+ return str(private_key_path), public_key
515
+
516
+
517
+ # =============================================================================
518
+ # FsWatch + Rsync Hot Reload
519
+ # =============================================================================
520
+
521
+
522
+ def check_fswatch() -> bool:
523
+ """Check if fswatch is installed."""
524
+ try:
525
+ subprocess.run(["fswatch", "--version"], capture_output=True, check=True)
526
+ return True
527
+ except (subprocess.CalledProcessError, FileNotFoundError):
528
+ return False
529
+
530
+
531
+ class FsWatchSync:
532
+ """Watch local directories and rsync changes to VMs.
533
+
534
+ Uses fswatch to detect file changes and rsync to sync them to VMs.
535
+ Supports syncing to multiple VMs in parallel.
536
+ """
537
+
538
+ def __init__(
539
+ self,
540
+ local_paths: list[str],
541
+ vm_syncs: list[tuple[Environment, str, str]], # (env, remote_path, private_key_path)
542
+ excludes: list[str],
543
+ verbose: bool = False,
544
+ ):
545
+ """Initialize the watcher.
546
+
547
+ Args:
548
+ local_paths: Local directories to watch
549
+ vm_syncs: List of (env, remote_path, private_key_path) for each VM sync target
550
+ excludes: Patterns to exclude (from .gitignore)
551
+ verbose: Print sync status
552
+ """
553
+ self.local_paths = local_paths
554
+ self.vm_syncs = vm_syncs
555
+ self.excludes = excludes
556
+ self.verbose = verbose
557
+ self._fswatch_proc: subprocess.Popen | None = None
558
+ self._sync_task: asyncio.Task | None = None
559
+ self._running = False
560
+
561
+ def _build_rsync_cmd(
562
+ self,
563
+ local_path: str,
564
+ remote_path: str,
565
+ job_id: str,
566
+ private_key_path: str,
567
+ ) -> list[str]:
568
+ """Build rsync command for a sync."""
569
+ sni = f"{job_id}--22.{GATEWAY_HOST}"
570
+ proxy_cmd = f"openssl s_client -quiet -connect {GATEWAY_HOST}:443 -servername {sni} 2>/dev/null"
571
+ ssh_cmd = f"ssh -i {private_key_path} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o 'ProxyCommand={proxy_cmd}'"
572
+
573
+ cmd = [
574
+ "rsync",
575
+ "-az",
576
+ "--delete",
577
+ ]
578
+ # Add excludes from .gitignore
579
+ for pattern in self.excludes:
580
+ cmd.extend(["--exclude", pattern])
581
+
582
+ cmd.extend(
583
+ [
584
+ "-e",
585
+ ssh_cmd,
586
+ f"{local_path}/",
587
+ f"root@{job_id}.plato:{remote_path}/",
588
+ ]
589
+ )
590
+ return cmd
591
+
592
+ def _sync_path(self, local_path: str, env: Environment, remote_path: str, private_key_path: str) -> bool:
593
+ """Sync a single path to a single VM."""
594
+ try:
595
+ result = subprocess.run(
596
+ self._build_rsync_cmd(local_path, remote_path, env.job_id, private_key_path),
597
+ capture_output=True,
598
+ text=True,
599
+ timeout=120,
600
+ )
601
+ return result.returncode == 0
602
+ except Exception:
603
+ return False
604
+
605
+ def sync_all(self) -> int:
606
+ """Sync all paths to all VMs. Returns number of successful syncs."""
607
+ success = 0
608
+ for local_path in self.local_paths:
609
+ for env, remote_path, private_key_path in self.vm_syncs:
610
+ if self._sync_path(local_path, env, remote_path, private_key_path):
611
+ success += 1
612
+ return success
613
+
614
+ async def _watch_and_sync(self):
615
+ """Watch for changes and sync."""
616
+ # Build fswatch command
617
+ fswatch_cmd = [
618
+ "fswatch",
619
+ "-o", # Output number of events (batch mode)
620
+ "--latency",
621
+ "0.5",
622
+ "-r", # Recursive
623
+ ]
624
+ # Add exclude patterns from .gitignore
625
+ for pattern in self.excludes:
626
+ fswatch_cmd.extend(["-e", pattern])
627
+
628
+ fswatch_cmd.extend(self.local_paths)
629
+
630
+ self._fswatch_proc = subprocess.Popen(
631
+ fswatch_cmd,
632
+ stdout=subprocess.PIPE,
633
+ stderr=subprocess.DEVNULL,
634
+ )
635
+
636
+ loop = asyncio.get_event_loop()
637
+
638
+ while self._running:
639
+ try:
640
+ stdout = self._fswatch_proc.stdout
641
+ if stdout is None:
642
+ break
643
+ line = await loop.run_in_executor(None, stdout.readline)
644
+ if line:
645
+ # Changes detected, sync all
646
+ count = await loop.run_in_executor(None, self.sync_all)
647
+ if self.verbose:
648
+ console.print(f" [dim]synced to {count} VM(s)[/dim]")
649
+ except Exception:
650
+ break
651
+
652
+ def start(self):
653
+ """Start watching and syncing."""
654
+ self._running = True
655
+ self._sync_task = asyncio.create_task(self._watch_and_sync())
656
+
657
+ def stop(self):
658
+ """Stop watching."""
659
+ self._running = False
660
+ if self._fswatch_proc:
661
+ self._fswatch_proc.terminate()
662
+ self._fswatch_proc = None
663
+ if self._sync_task:
664
+ self._sync_task.cancel()
665
+ self._sync_task = None
666
+
667
+
668
+ # =============================================================================
669
+ # Async Helpers
670
+ # =============================================================================
671
+
672
+
673
+ async def deploy_to_vm(
674
+ env: Environment,
675
+ vm_name: str,
676
+ vm_config: VMConfig,
677
+ compose_content: str,
678
+ compose_dir: str,
679
+ ) -> tuple[str, bool, str]:
680
+ """Deploy docker compose to a single VM.
681
+
682
+ Args:
683
+ env: The VM environment
684
+ vm_name: Name of the VM
685
+ vm_config: VM configuration
686
+ compose_content: The docker-compose.yml content for this VM
687
+ compose_dir: Directory where the original compose file lives (for build contexts)
688
+
689
+ Returns:
690
+ (vm_name, success, message)
691
+ """
692
+ try:
693
+ # Write compose file to the synced compose directory (so build contexts work)
694
+ compose_file = f"{compose_dir}/docker-compose.plato.yml"
695
+ escaped_compose = compose_content.replace("'", "'\\''")
696
+ await env.execute(
697
+ f"echo '{escaped_compose}' > '{compose_file}'",
698
+ timeout=30,
699
+ )
700
+
701
+ # Start docker compose from the original directory
702
+ result = await env.execute(
703
+ f"cd '{compose_dir}' && docker compose -f docker-compose.plato.yml up --build -d 2>&1 | tee -a /tmp/plato-init.log",
704
+ timeout=300,
705
+ )
706
+
707
+ output = result.stdout.strip()
708
+
709
+ # Check for common error patterns in output
710
+ error_patterns = ["no such file or directory", "error", "failed", "cannot", "not found"]
711
+ has_error = any(pattern in output.lower() for pattern in error_patterns)
712
+
713
+ if result.exit_code != 0 or has_error:
714
+ # Show the actual error output
715
+ error_output = output or result.stderr.strip() or "(no output)"
716
+ return (vm_name, False, f"docker compose failed:\n{error_output}")
717
+
718
+ # Verify containers are running
719
+ ps_result = await env.execute(
720
+ f"cd '{compose_dir}' && docker compose -f docker-compose.plato.yml ps -q | wc -l", timeout=30
721
+ )
722
+ container_count = int(ps_result.stdout.strip() or "0")
723
+
724
+ if container_count == 0:
725
+ return (vm_name, False, "no containers running - check /tmp/plato-init.log")
726
+
727
+ return (vm_name, True, f"started {container_count} container(s)")
728
+
729
+ except Exception as e:
730
+ return (vm_name, False, str(e))
731
+
732
+
733
+ async def check_vm_health(env: Environment, vm_name: str) -> tuple[str, bool]:
734
+ """Check if all containers on a VM are healthy.
735
+
736
+ Returns:
737
+ (vm_name, all_healthy)
738
+ """
739
+ try:
740
+ result = await env.execute(
741
+ "docker ps --format '{{.Status}}' | grep -v 'Up' | wc -l",
742
+ timeout=30,
743
+ )
744
+ healthy = bool(result.stdout and result.stdout.strip() == "0")
745
+ return (vm_name, healthy)
746
+ except Exception:
747
+ return (vm_name, False)
748
+
749
+
750
+ # =============================================================================
751
+ # CLI Commands
752
+ # =============================================================================
753
+
754
+
755
+ @app.command()
756
+ def validate(
757
+ compose_file: Annotated[
758
+ Path,
759
+ typer.Argument(help="Path to docker-compose file"),
760
+ ] = Path("docker-compose.yml"),
761
+ ):
762
+ """Validate a docker-compose file with x-plato annotations."""
763
+
764
+ try:
765
+ config = parse_compose_file(compose_file)
766
+ except Exception as e:
767
+ console.print(f"[red]Parse error:[/red] {e}")
768
+ raise typer.Exit(1)
769
+
770
+ has_issues = False
771
+
772
+ # Show errors
773
+ if config.errors:
774
+ console.print(f"\n[red bold]Errors ({len(config.errors)}):[/red bold]")
775
+ for err in config.errors:
776
+ console.print(f" [red]✗[/red] [{err.service}] {err.message}")
777
+ has_issues = True
778
+
779
+ # Show warnings (treated as errors)
780
+ if config.warnings:
781
+ console.print(f"\n[red bold]Errors ({len(config.warnings)}):[/red bold]")
782
+ for warn in config.warnings:
783
+ console.print(f" [red]✗[/red] [{warn.service}] {warn.message}")
784
+ has_issues = True
785
+
786
+ if has_issues:
787
+ total = len(config.errors) + len(config.warnings)
788
+ console.print(f"\n[red]Validation failed with {total} error(s)[/red]")
789
+ raise typer.Exit(1)
790
+
791
+ # Show success
792
+ console.print(f"\n[green]✓ Valid![/green] Found {len(config.vms)} VM groups:\n")
793
+
794
+ # VM Groups table
795
+ table = Table(title="VM Groups")
796
+ table.add_column("VM", style="cyan")
797
+ table.add_column("Memory", style="green")
798
+ table.add_column("CPUs", style="green")
799
+ table.add_column("Snapshot", style="yellow")
800
+ table.add_column("Services", style="white")
801
+
802
+ for vm_name, vm_config in config.vms.items():
803
+ table.add_row(
804
+ vm_name,
805
+ f"{vm_config.memory}MB",
806
+ str(vm_config.cpus),
807
+ vm_config.snapshot,
808
+ ", ".join(vm_config.services),
809
+ )
810
+
811
+ console.print(table)
812
+
813
+ # Startup order
814
+ vm_levels = build_vm_dependency_order(config)
815
+ console.print("\n[bold]Deploy Order:[/bold]")
816
+ for i, level in enumerate(vm_levels):
817
+ vms_str = ", ".join(level)
818
+ console.print(f" {i + 1}. {vms_str}")
819
+
820
+ # Rsync plans
821
+ local_paths = collect_local_paths(config)
822
+ has_local_paths = any(paths for paths in local_paths.values())
823
+
824
+ if has_local_paths:
825
+ console.print("\n[bold]Rsync Plans:[/bold]")
826
+ for vm_name, paths in local_paths.items():
827
+ if paths:
828
+ console.print(f" [cyan]{vm_name}[/cyan]:")
829
+ for local_p, remote_p in paths:
830
+ console.print(f" {local_p} -> {remote_p}")
831
+
832
+
833
+ async def _up_async(
834
+ config: PlatoComposeConfig,
835
+ timeout: int,
836
+ save_name: str | None,
837
+ verbose: bool,
838
+ watch: bool = False,
839
+ from_snapshot: str | None = None,
840
+ ):
841
+ """Async implementation of the up command."""
842
+
843
+ # Collect local paths that need syncing
844
+ local_paths = collect_local_paths(config)
845
+ has_local_paths = any(paths for paths in local_paths.values())
846
+
847
+ # Check for fswatch if hot reload requested
848
+ use_watch = False
849
+ if watch:
850
+ if check_fswatch():
851
+ use_watch = True
852
+ console.print("[bold]Watch mode:[/bold] Using rsync + fswatch for hot reload")
853
+ else:
854
+ console.print("[yellow]Warning:[/yellow] fswatch not installed")
855
+ console.print("[dim]Install: brew install fswatch (macOS) or apt install fswatch (Linux)[/dim]")
856
+ use_watch = False
857
+
858
+ if has_local_paths and verbose:
859
+ console.print("Local paths to sync:")
860
+ for vm_name, paths in local_paths.items():
861
+ if paths:
862
+ console.print(f" {vm_name}:")
863
+ for local_p, remote_p in paths:
864
+ console.print(f" {local_p} -> {remote_p}")
865
+ console.print("")
866
+
867
+ vm_names = ", ".join(config.vms.keys())
868
+ if from_snapshot:
869
+ console.print(f"[bold]Starting {len(config.vms)} VMs from snapshot '{from_snapshot}':[/bold] {vm_names}")
870
+ else:
871
+ console.print(f"[bold]Starting {len(config.vms)} VMs:[/bold] {vm_names}")
872
+
873
+ plato = AsyncPlato()
874
+ session = None
875
+ watcher: FsWatchSync | None = None
876
+ total_start = time.time()
877
+
878
+ try:
879
+ # Create VMs - either from snapshot (cached) or fresh
880
+ envs = []
881
+ for vm_name, vm_config in config.vms.items():
882
+ if from_snapshot:
883
+ # Use disk snapshot as base (warm cache for docker layers, deps, etc.)
884
+ # Uses version lookup: {config.name}-{vm_name}:version@base
885
+ # restore_memory=False for disk-only snapshots (fresh boot with preserved disk)
886
+ envs.append(
887
+ Env.simulator(
888
+ f"{config.name}-{vm_name}:{from_snapshot}@base",
889
+ alias=vm_name,
890
+ restore_memory=False,
891
+ )
892
+ )
893
+ else:
894
+ # Create fresh VMs
895
+ envs.append(
896
+ Env.resource(
897
+ simulator=f"{config.name}-{vm_name}",
898
+ sim_config=SimConfigCompute(
899
+ cpus=vm_config.cpus,
900
+ memory=vm_config.memory,
901
+ disk=10240, # 10GB default
902
+ ),
903
+ alias=vm_name,
904
+ )
905
+ )
906
+
907
+ step_start = time.time()
908
+ status_msg = "[bold green]Starting VMs from snapshot..." if from_snapshot else "[bold green]Creating VMs..."
909
+ with console.status(status_msg, spinner="dots"):
910
+ session = await plato.sessions.create(
911
+ envs=envs,
912
+ timeout=timeout,
913
+ connect_network=True,
914
+ )
915
+ await session.start_heartbeat()
916
+ console.print(f"[green]✓[/green] VMs ready: {time.time() - step_start:.1f}s")
917
+
918
+ # Map alias to Environment object
919
+ env_map: dict[str, Environment] = {env.alias: env for env in session.envs}
920
+
921
+ # Show job IDs immediately after VMs start
922
+ console.print("")
923
+ for vm_name in config.vms:
924
+ env = env_map.get(vm_name)
925
+ if env:
926
+ console.print(f" [cyan]{vm_name}[/cyan]: {env.job_id}")
927
+
928
+ if verbose:
929
+ console.print("")
930
+ console.print("[dim]SSH into VMs:[/dim]")
931
+ for vm_name in config.vms:
932
+ env = env_map.get(vm_name)
933
+ if env:
934
+ console.print(f" [dim]plato sandbox ssh -J {env.job_id}[/dim]")
935
+ console.print("")
936
+ console.print("[dim]Logs stored at /tmp/plato-init.log on each VM[/dim]")
937
+
938
+ console.print("")
939
+
940
+ # Generate SSH keypair for rsync (if needed)
941
+ private_key_path = None
942
+
943
+ if has_local_paths:
944
+ key_dir = Path(tempfile.mkdtemp(prefix="plato_ssh_"))
945
+ private_key_path, public_key = generate_ssh_keypair(key_dir)
946
+
947
+ # Set up SSH keys on all VMs using the sessions API (one call adds to all VMs)
948
+ step_start = time.time()
949
+ with console.status("[bold green]Setting up SSH keys...", spinner="dots"):
950
+ ssh_response = await session.add_ssh_key(public_key)
951
+
952
+ if ssh_response.success:
953
+ console.print(f"[green]✓[/green] SSH keys ready: {time.time() - step_start:.1f}s")
954
+ else:
955
+ console.print("[red]✗[/red] Failed to set up SSH keys")
956
+ has_local_paths = False # Skip sync
957
+
958
+ if has_local_paths:
959
+ # Show unique sync paths
960
+ unique_syncs = set()
961
+ for paths in local_paths.values():
962
+ for local_p, remote_p in paths:
963
+ unique_syncs.add((local_p, remote_p))
964
+
965
+ console.print(f"[bold]Syncing to {len(config.vms)} VMs:[/bold]")
966
+ for local_p, remote_p in unique_syncs:
967
+ console.print(f" {local_p} -> {remote_p}")
968
+
969
+ step_start = time.time()
970
+ with console.status("[bold green]Installing rsync on VMs...", spinner="dots"):
971
+ rsync_install_tasks = [env.ensure_rsync() for env in session.envs]
972
+ rsync_install_results = await asyncio.gather(*rsync_install_tasks)
973
+
974
+ if all(rsync_install_results):
975
+ console.print(f"[green]✓[/green] rsync ready: {time.time() - step_start:.1f}s")
976
+ else:
977
+ console.print("[yellow]![/yellow] Failed to install rsync on some VMs")
978
+ console.print("")
979
+ else:
980
+ console.print("[dim]No local paths to sync[/dim]")
981
+ console.print("")
982
+
983
+ # Build VM startup order based on depends_on
984
+ vm_levels = build_vm_dependency_order(config)
985
+
986
+ if verbose:
987
+ console.print("Deploy order:")
988
+ for i, level in enumerate(vm_levels):
989
+ console.print(f" {i + 1}. {', '.join(level)}")
990
+ console.print("")
991
+
992
+ # Start ALL rsync tasks upfront (they run in parallel across all VMs)
993
+ # Each stage will wait for only its VMs' rsyncs to complete before deploying
994
+ vm_rsync_tasks: dict[str, list[asyncio.Task]] = {}
995
+
996
+ if has_local_paths and private_key_path:
997
+ console.print("[bold]Starting rsync for all VMs...[/bold]")
998
+ for vm_name in config.vms:
999
+ paths = local_paths.get(vm_name, [])
1000
+ env = env_map.get(vm_name)
1001
+ if not env or not paths:
1002
+ continue
1003
+
1004
+ vm_rsync_tasks[vm_name] = []
1005
+ for local_p, remote_p in paths:
1006
+ task = asyncio.create_task(
1007
+ env.rsync(local_p, remote_p, private_key_path, exclude=config.ignore_paths, verbose=verbose)
1008
+ )
1009
+ vm_rsync_tasks[vm_name].append(task)
1010
+
1011
+ if verbose:
1012
+ console.print(f" [dim]{vm_name}: {len(paths)} path(s)[/dim]")
1013
+
1014
+ total_tasks = sum(len(tasks) for tasks in vm_rsync_tasks.values())
1015
+ console.print(f" [dim]{total_tasks} rsync task(s) started in background[/dim]")
1016
+ console.print("")
1017
+
1018
+ # Collect /etc/hosts entries from all VMs and propagate to all
1019
+ console.print("[bold]Setting up /etc/hosts...[/bold]")
1020
+
1021
+ # Get hosts entries from each VM (Plato networking adds entries per-VM)
1022
+ all_hosts_lines: set[str] = set()
1023
+ for env in session.envs:
1024
+ result = await env.execute("grep '.plato.internal' /etc/hosts 2>/dev/null || true", timeout=30)
1025
+ if result.stdout:
1026
+ for line in result.stdout.strip().split("\n"):
1027
+ if line.strip():
1028
+ all_hosts_lines.add(line.strip())
1029
+
1030
+ if all_hosts_lines:
1031
+ # Write all entries to all VMs
1032
+ hosts_content = "\\n".join(sorted(all_hosts_lines))
1033
+ hosts_tasks = []
1034
+ for env in session.envs:
1035
+ # Remove existing .plato.internal entries and add complete set
1036
+ hosts_tasks.append(
1037
+ env.execute(
1038
+ f"sed -i '/.plato.internal/d' /etc/hosts && echo -e \"{hosts_content}\" >> /etc/hosts",
1039
+ timeout=30,
1040
+ )
1041
+ )
1042
+ await asyncio.gather(*hosts_tasks)
1043
+ console.print(f" [green]✓[/green] Synced {len(all_hosts_lines)} host entries across all VMs")
1044
+ else:
1045
+ console.print(" [yellow]![/yellow] No .plato.internal entries found")
1046
+ console.print("")
1047
+
1048
+ # Deploy VMs in dependency order
1049
+ for level_idx, vm_level in enumerate(vm_levels):
1050
+ level_name = ", ".join(vm_level)
1051
+ stage_start = time.time()
1052
+ console.print(f"\n[bold]Stage {level_idx + 1}:[/bold] {level_name}")
1053
+
1054
+ # 1. Wait for rsync tasks for THIS stage's VMs only
1055
+ if has_local_paths and private_key_path:
1056
+ stage_rsync_tasks = []
1057
+ for vm_name in vm_level:
1058
+ stage_rsync_tasks.extend(vm_rsync_tasks.get(vm_name, []))
1059
+
1060
+ if stage_rsync_tasks:
1061
+ with console.status(f"[bold green]Syncing {len(stage_rsync_tasks)} path(s)...", spinner="dots"):
1062
+ rsync_results = await asyncio.gather(*stage_rsync_tasks)
1063
+ failed_syncs = [r for r in rsync_results if not r.success]
1064
+ success_count = len(rsync_results) - len(failed_syncs)
1065
+ if failed_syncs:
1066
+ for r in failed_syncs:
1067
+ console.print(f" [red]✗[/red] rsync {Path(r.local_path).name}: {r.message}")
1068
+ console.print(f" [yellow]![/yellow] rsync: {success_count}/{len(rsync_results)} synced")
1069
+ else:
1070
+ console.print(f" [green]✓[/green] rsync: {success_count} path(s) synced")
1071
+
1072
+ # 2. Deploy docker compose for this stage (in parallel)
1073
+ deploy_start = time.time()
1074
+ deploy_tasks = []
1075
+ for vm_name in vm_level:
1076
+ env = env_map.get(vm_name)
1077
+ vm_config = config.vms.get(vm_name)
1078
+ if not env or not vm_config:
1079
+ continue
1080
+
1081
+ vm_compose = generate_vm_compose(config, vm_name)
1082
+ # Compute remote compose dir: /app + relative path from sync root
1083
+ local_compose_dir = config.compose_file.parent.resolve()
1084
+ local_sync_dir = Path(list(local_paths.values())[0][0][0]) # Get the sync source dir
1085
+ relative_compose_dir = local_compose_dir.relative_to(local_sync_dir)
1086
+ remote_compose_dir = f"/app/{relative_compose_dir}"
1087
+ deploy_tasks.append(deploy_to_vm(env, vm_name, vm_config, vm_compose, remote_compose_dir))
1088
+
1089
+ with console.status("[bold green]Building and starting containers...", spinner="dots"):
1090
+ results = await asyncio.gather(*deploy_tasks)
1091
+ deploy_elapsed = time.time() - deploy_start
1092
+
1093
+ for vm_name, success, message in results:
1094
+ if success:
1095
+ console.print(f" [green]✓[/green] {vm_name}: {message}")
1096
+ else:
1097
+ console.print(f" [red]✗[/red] {vm_name}: {message}")
1098
+
1099
+ console.print(f" [dim]deploy: {deploy_elapsed:.1f}s[/dim]")
1100
+
1101
+ # 3. Wait for healthchecks on this stage before proceeding
1102
+ health_start = time.time()
1103
+ max_wait = 300
1104
+ all_healthy = False
1105
+
1106
+ with console.status("[bold green]Waiting for containers to be healthy...", spinner="dots"):
1107
+ while time.time() - health_start < max_wait:
1108
+ health_tasks = [
1109
+ check_vm_health(env_map[vm_name], vm_name) for vm_name in vm_level if vm_name in env_map
1110
+ ]
1111
+
1112
+ health_results = await asyncio.gather(*health_tasks)
1113
+ all_healthy = all(healthy for _, healthy in health_results)
1114
+
1115
+ if all_healthy:
1116
+ break
1117
+
1118
+ await asyncio.sleep(5)
1119
+
1120
+ stage_elapsed = time.time() - stage_start
1121
+ if all_healthy:
1122
+ console.print(f" [green]✓[/green] healthy ({stage_elapsed:.1f}s)")
1123
+ else:
1124
+ console.print(f" [yellow]![/yellow] healthcheck timeout ({stage_elapsed:.1f}s)")
1125
+
1126
+ console.print(f"\n[bold]Total time:[/bold] {time.time() - total_start:.1f}s")
1127
+
1128
+ # Print summary
1129
+ console.print("")
1130
+ table = Table(title="Running VMs", show_lines=True)
1131
+ table.add_column("VM", style="cyan", no_wrap=True)
1132
+ table.add_column("Job ID", style="dim", no_wrap=True)
1133
+ table.add_column("Services", style="white")
1134
+ table.add_column("Connect URL", style="blue", no_wrap=True)
1135
+
1136
+ for vm_name, vm_config in config.vms.items():
1137
+ env = env_map.get(vm_name)
1138
+ if env:
1139
+ table.add_row(
1140
+ vm_name,
1141
+ env.job_id,
1142
+ ", ".join(vm_config.services),
1143
+ f"https://{env.job_id}.connect.plato.so",
1144
+ )
1145
+
1146
+ console.print(table)
1147
+
1148
+ # Start fswatch for hot reload if enabled
1149
+ if use_watch and has_local_paths and private_key_path:
1150
+ console.print("")
1151
+ console.print("[bold]Starting hot reload watcher...[/bold]")
1152
+
1153
+ # Parse .gitignore for excludes + x-plato.ignore
1154
+ gitignore_path = config.compose_file.parent / ".gitignore"
1155
+ excludes = parse_gitignore(gitignore_path) + config.ignore_paths
1156
+ if verbose:
1157
+ console.print(f" [dim]Excluding {len(excludes)} patterns[/dim]")
1158
+
1159
+ # Collect all unique local paths and their VM sync targets
1160
+ all_local_paths: set[str] = set()
1161
+ vm_syncs: list[tuple[Environment, str, str]] = []
1162
+
1163
+ for vm_name, paths in local_paths.items():
1164
+ env = env_map.get(vm_name)
1165
+ if not env:
1166
+ continue
1167
+ for local_p, remote_p in paths:
1168
+ all_local_paths.add(local_p)
1169
+ vm_syncs.append((env, remote_p, private_key_path))
1170
+
1171
+ watcher = FsWatchSync(
1172
+ local_paths=list(all_local_paths),
1173
+ vm_syncs=vm_syncs,
1174
+ excludes=excludes,
1175
+ verbose=verbose,
1176
+ )
1177
+ watcher.start()
1178
+ console.print(f" [green]Watching {len(all_local_paths)} path(s) for changes[/green]")
1179
+
1180
+ console.print("")
1181
+ console.print(f"Session: {session.session_id}")
1182
+ if verbose:
1183
+ console.print("[dim]View logs: cat /tmp/plato-init.log[/dim]")
1184
+ console.print("")
1185
+ if use_watch:
1186
+ console.print("[dim]Hot reload active - file changes will sync automatically[/dim]")
1187
+ if save_name:
1188
+ console.print(f"Press Enter to save snapshot '{save_name}' and close, Ctrl+C to close without saving")
1189
+ else:
1190
+ console.print("Press Enter to close, Ctrl+C to close")
1191
+
1192
+ # Wait for user input (blocking)
1193
+ try:
1194
+ await asyncio.get_event_loop().run_in_executor(None, input)
1195
+ except EOFError:
1196
+ await asyncio.sleep(10)
1197
+
1198
+ if save_name:
1199
+ # Snapshot each VM with its own service/version/dataset
1200
+ # Naming: {config.name}:{save_name}@{vm_name}
1201
+ # e.g., plato-preview:v1@db, plato-preview:v1@app
1202
+ console.print(f"[bold]Saving snapshot '{save_name}'...[/bold]")
1203
+ success_count = 0
1204
+
1205
+ # Snapshot each VM in parallel with per-VM naming
1206
+ # Naming: {config.name}-{vm_name}:{tag}@base
1207
+ async def snapshot_vm(vm_name: str, env: Environment):
1208
+ result = await env.disk_snapshot(
1209
+ override_service=f"{config.name}-{vm_name}",
1210
+ override_version=save_name,
1211
+ override_dataset="base",
1212
+ target="sims.plato.so",
1213
+ )
1214
+ return vm_name, result
1215
+
1216
+ step_start = time.time()
1217
+ tasks = [snapshot_vm(vm_name, env) for vm_name, env in env_map.items()]
1218
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1219
+
1220
+ for result in results:
1221
+ if isinstance(result, BaseException):
1222
+ console.print(f" [red]✗[/red] Error: {result}")
1223
+ continue
1224
+ vm_name, info = result
1225
+ if info and hasattr(info, "artifact_id") and info.artifact_id:
1226
+ success_count += 1
1227
+ console.print(f" [green]✓[/green] {vm_name}: {config.name}-{vm_name}:{save_name}")
1228
+ else:
1229
+ error = getattr(info, "error", "unknown error") if info else "no result"
1230
+ console.print(f" [red]✗[/red] {vm_name}: failed - {error}")
1231
+
1232
+ elapsed = time.time() - step_start
1233
+ if success_count > 0:
1234
+ console.print(
1235
+ f"\n[green]✓[/green] Snapshot '{save_name}' saved ({success_count} VM(s), {elapsed:.1f}s)"
1236
+ )
1237
+ console.print(f" [dim]Restore with: plato compose up {config.compose_file} --from {save_name}[/dim]")
1238
+ else:
1239
+ console.print("\n[red]✗[/red] Failed to save snapshot - no VMs snapshotted")
1240
+
1241
+ console.print("\nClosing session...")
1242
+
1243
+ except asyncio.CancelledError:
1244
+ console.print("\nClosing session...")
1245
+ except Exception as e:
1246
+ console.print(f"[red]Error:[/red] {e}")
1247
+ import traceback
1248
+
1249
+ traceback.print_exc()
1250
+ raise
1251
+ finally:
1252
+ # Clean up watcher
1253
+ if watcher:
1254
+ watcher.stop()
1255
+ if session:
1256
+ await session.close()
1257
+ await plato.close()
1258
+ console.print("Done!")
1259
+
1260
+
1261
+ @app.command()
1262
+ def up(
1263
+ compose_file: Annotated[
1264
+ Path,
1265
+ typer.Argument(help="Path to docker-compose file"),
1266
+ ] = Path("docker-compose.yml"),
1267
+ timeout: Annotated[
1268
+ int,
1269
+ typer.Option("--timeout", "-t", help="Session timeout in seconds"),
1270
+ ] = 7200,
1271
+ save: Annotated[
1272
+ str | None,
1273
+ typer.Option("--save", "-s", help="Save snapshot with this name when exiting"),
1274
+ ] = None,
1275
+ restore: Annotated[
1276
+ str | None,
1277
+ typer.Option("--from", help="Restore from a named snapshot instead of creating fresh VMs"),
1278
+ ] = None,
1279
+ verbose: Annotated[
1280
+ bool,
1281
+ typer.Option("--verbose", "-v", help="Show detailed logs and SSH commands"),
1282
+ ] = False,
1283
+ watch: Annotated[
1284
+ bool,
1285
+ typer.Option("--watch", "-w", help="Enable hot reload - watch for file changes and rsync automatically"),
1286
+ ] = False,
1287
+ ):
1288
+ """Start docker compose services across multiple VMs.
1289
+
1290
+ Uses rsync to sync local bind mount paths to VMs. With --watch, uses fswatch
1291
+ to detect file changes and automatically rsync them to VMs for hot reload.
1292
+
1293
+ Snapshots provide warm caching - docker layers, dependencies, and build artifacts
1294
+ are preserved, making subsequent builds much faster.
1295
+
1296
+ Examples:
1297
+ # Fresh start
1298
+ plato compose up docker-compose.yml
1299
+
1300
+ # Save snapshot when done (for faster future starts)
1301
+ plato compose up docker-compose.yml --save my-stack
1302
+
1303
+ # Start from snapshot (warm cache - faster docker builds)
1304
+ plato compose up docker-compose.yml --from my-stack
1305
+
1306
+ Requirements for --watch:
1307
+ macOS: brew install fswatch
1308
+ Linux: apt install fswatch
1309
+ """
1310
+
1311
+ # Parse compose file
1312
+ try:
1313
+ config = parse_compose_file(compose_file)
1314
+ except Exception as e:
1315
+ console.print(f"[red]Parse error:[/red] {e}")
1316
+ raise typer.Exit(1)
1317
+
1318
+ # Check for validation errors (warnings are also treated as errors)
1319
+ has_issues = False
1320
+
1321
+ if config.errors:
1322
+ console.print(f"[red bold]Errors ({len(config.errors)}):[/red bold]")
1323
+ for err in config.errors:
1324
+ console.print(f" [red]✗[/red] [{err.service}] {err.message}")
1325
+ has_issues = True
1326
+
1327
+ if config.warnings:
1328
+ console.print(f"\n[red bold]Errors ({len(config.warnings)}):[/red bold]")
1329
+ for warn in config.warnings:
1330
+ console.print(f" [red]✗[/red] [{warn.service}] {warn.message}")
1331
+ has_issues = True
1332
+
1333
+ if has_issues:
1334
+ total = len(config.errors) + len(config.warnings)
1335
+ console.print(f"\n[red]Validation failed with {total} error(s)[/red]")
1336
+ console.print("Fix errors before running. Use 'plato compose validate' for details.")
1337
+ raise typer.Exit(1)
1338
+
1339
+ # Run async implementation
1340
+ try:
1341
+ asyncio.run(_up_async(config, timeout, save, verbose, watch, restore))
1342
+ except KeyboardInterrupt:
1343
+ console.print("\nInterrupted.")
1344
+ raise typer.Exit(0)
1345
+
1346
+
1347
+ @app.command()
1348
+ def generate(
1349
+ compose_file: Annotated[
1350
+ Path,
1351
+ typer.Argument(help="Path to docker-compose file"),
1352
+ ] = Path("docker-compose.yml"),
1353
+ vm: Annotated[
1354
+ str,
1355
+ typer.Option("--vm", "-v", help="Generate compose for specific VM"),
1356
+ ] = "",
1357
+ ):
1358
+ """Generate per-VM docker-compose files (for debugging)."""
1359
+
1360
+ try:
1361
+ config = parse_compose_file(compose_file)
1362
+ except Exception as e:
1363
+ console.print(f"[red]Parse error:[/red] {e}")
1364
+ raise typer.Exit(1)
1365
+
1366
+ vms_to_generate = [vm] if vm else list(config.vms.keys())
1367
+
1368
+ for vm_name in vms_to_generate:
1369
+ if vm_name not in config.vms:
1370
+ console.print(f"[red]VM not found:[/red] {vm_name}")
1371
+ continue
1372
+
1373
+ console.print(f"[cyan]# {vm_name}[/cyan]")
1374
+ console.print(generate_vm_compose(config, vm_name))
1375
+ console.print("")
1376
+
1377
+
1378
+ if __name__ == "__main__":
1379
+ app()