dayhoff-tools 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1310 @@
1
+ """Engine and Studio management commands for DHT CLI."""
2
+
3
+ import json
4
+ import subprocess
5
+ import sys
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ import boto3
11
+ import requests
12
+ import typer
13
+ from botocore.exceptions import ClientError, NoCredentialsError
14
+ from rich import box
15
+ from rich.console import Console
16
+ from rich.panel import Panel
17
+ from rich.progress import Progress, SpinnerColumn, TextColumn
18
+ from rich.prompt import Confirm, IntPrompt, Prompt
19
+ from rich.table import Table
20
+
21
+ # Initialize Typer apps
22
+ engine_app = typer.Typer(help="Manage compute engines for development.")
23
+ studio_app = typer.Typer(help="Manage persistent development studios.")
24
+
25
+ console = Console()
26
+
27
+ # Cost information
28
+ HOURLY_COSTS = {
29
+ "cpu": 0.50, # r6i.2xlarge
30
+ "cpumax": 1.00, # r7i.8xlarge
31
+ "t4": 1.00, # g4dn.2xlarge
32
+ "a10g": 2.00, # g5.2xlarge
33
+ "a100": 5.00, # p4d.24xlarge
34
+ }
35
+
36
+ # SSH config management
37
+ SSH_MANAGED_COMMENT = "# Managed by dh engine"
38
+
39
+
40
+ def check_aws_sso() -> str:
41
+ """Check AWS SSO status and return username."""
42
+ try:
43
+ sts = boto3.client("sts")
44
+ identity = sts.get_caller_identity()
45
+ # Parse username from assumed role ARN
46
+ # Format: arn:aws:sts::123456789012:assumed-role/AWSReservedSSO_DeveloperAccess_xxxx/username
47
+ arn = identity["Arn"]
48
+ if "assumed-role" in arn:
49
+ username = arn.split("/")[-1]
50
+ return username
51
+ else:
52
+ # Fallback for other auth methods
53
+ return identity["UserId"].split(":")[-1]
54
+ except (NoCredentialsError, ClientError):
55
+ console.print("[red]❌ Not logged in to AWS SSO[/red]")
56
+ console.print("Please run: [cyan]aws sso login[/cyan]")
57
+ if Confirm.ask("Would you like to login now?"):
58
+ try:
59
+ result = subprocess.run(
60
+ ["aws", "sso", "login"],
61
+ capture_output=True,
62
+ text=True,
63
+ check=True,
64
+ )
65
+ if result.returncode == 0:
66
+ console.print("[green]✓ Successfully logged in![/green]")
67
+ return check_aws_sso()
68
+ except subprocess.CalledProcessError as e:
69
+ console.print(f"[red]Login failed: {e}[/red]")
70
+ raise typer.Exit(1)
71
+
72
+
73
+ def get_api_url() -> str:
74
+ """Get Studio Manager API URL from SSM Parameter Store."""
75
+ ssm = boto3.client("ssm", region_name="us-east-1")
76
+ try:
77
+ response = ssm.get_parameter(Name="/dev/studio-manager/api-url")
78
+ return response["Parameter"]["Value"]
79
+ except ClientError as e:
80
+ if e.response["Error"]["Code"] == "ParameterNotFound":
81
+ console.print(
82
+ "[red]❌ API URL parameter not found in SSM Parameter Store[/red]"
83
+ )
84
+ console.print(
85
+ "Please ensure the Studio Manager infrastructure is deployed."
86
+ )
87
+ else:
88
+ console.print(f"[red]❌ Error retrieving API URL: {e}[/red]")
89
+ raise typer.Exit(1)
90
+
91
+
92
+ def make_api_request(
93
+ method: str, endpoint: str, json_data: Optional[Dict] = None, params: Optional[Dict] = None
94
+ ) -> requests.Response:
95
+ """Make an API request with error handling."""
96
+ api_url = get_api_url()
97
+ url = f"{api_url}{endpoint}"
98
+
99
+ try:
100
+ if method == "GET":
101
+ response = requests.get(url, params=params)
102
+ elif method == "POST":
103
+ response = requests.post(url, json=json_data)
104
+ elif method == "DELETE":
105
+ response = requests.delete(url)
106
+ else:
107
+ raise ValueError(f"Unsupported HTTP method: {method}")
108
+
109
+ return response
110
+ except requests.exceptions.RequestException as e:
111
+ console.print(f"[red]❌ API request failed: {e}[/red]")
112
+ raise typer.Exit(1)
113
+
114
+
115
+ def format_duration(duration: timedelta) -> str:
116
+ """Format a duration as a human-readable string."""
117
+ total_seconds = int(duration.total_seconds())
118
+ hours = total_seconds // 3600
119
+ minutes = (total_seconds % 3600) // 60
120
+
121
+ if hours > 0:
122
+ return f"{hours}h {minutes}m"
123
+ else:
124
+ return f"{minutes}m"
125
+
126
+
127
+ def parse_launch_time(launch_time_str: str) -> datetime:
128
+ """Parse launch time from API response."""
129
+ # Try different datetime formats
130
+ formats = [
131
+ "%Y-%m-%dT%H:%M:%S.%fZ",
132
+ "%Y-%m-%dT%H:%M:%SZ",
133
+ "%Y-%m-%d %H:%M:%S",
134
+ ]
135
+ for fmt in formats:
136
+ try:
137
+ return datetime.strptime(launch_time_str, fmt)
138
+ except ValueError:
139
+ continue
140
+ # Fallback: assume it's recent
141
+ return datetime.utcnow()
142
+
143
+
144
+ def format_status(state: str, ready: Optional[bool]) -> str:
145
+ """Format engine status with ready indicator."""
146
+ if state.lower() == "running":
147
+ if ready is True:
148
+ return "[green]Running ✓[/green]"
149
+ elif ready is False:
150
+ return "[yellow]Running ⚠ (Initializing...)[/yellow]"
151
+ else:
152
+ return "[green]Running[/green]"
153
+ elif state.lower() == "stopped":
154
+ return "[dim]Stopped[/dim]"
155
+ elif state.lower() == "stopping":
156
+ return "[yellow]Stopping...[/yellow]"
157
+ elif state.lower() == "pending":
158
+ return "[yellow]Starting...[/yellow]"
159
+ else:
160
+ return state
161
+
162
+
163
+ def resolve_engine(name_or_id: str, engines: List[Dict]) -> Dict:
164
+ """Resolve engine by name or ID with interactive selection."""
165
+ # Exact ID match
166
+ exact_id = [e for e in engines if e["instance_id"] == name_or_id]
167
+ if exact_id:
168
+ return exact_id[0]
169
+
170
+ # Exact name match
171
+ exact_name = [e for e in engines if e["name"] == name_or_id]
172
+ if len(exact_name) == 1:
173
+ return exact_name[0]
174
+
175
+ # Prefix matches
176
+ matches = [
177
+ e
178
+ for e in engines
179
+ if e["name"].startswith(name_or_id) or e["instance_id"].startswith(name_or_id)
180
+ ]
181
+
182
+ if len(matches) == 0:
183
+ console.print(f"[red]❌ No engine found matching '{name_or_id}'[/red]")
184
+ raise typer.Exit(1)
185
+ elif len(matches) == 1:
186
+ return matches[0]
187
+ else:
188
+ # Interactive selection
189
+ console.print(f"Multiple engines match '{name_or_id}':")
190
+ for i, engine in enumerate(matches, 1):
191
+ cost = HOURLY_COSTS.get(engine["engine_type"], 0)
192
+ console.print(
193
+ f" {i}. [cyan]{engine['name']}[/cyan] ({engine['instance_id']}) "
194
+ f"- {engine['engine_type']} - {engine['state']} - ${cost:.2f}/hr"
195
+ )
196
+
197
+ while True:
198
+ try:
199
+ choice = IntPrompt.ask(
200
+ "Select engine", default=1, choices=[str(i) for i in range(1, len(matches) + 1)]
201
+ )
202
+ return matches[choice - 1]
203
+ except (ValueError, IndexError):
204
+ console.print("[red]Invalid selection, please try again[/red]")
205
+
206
+
207
+ def get_ssh_public_key() -> str:
208
+ """Get the user's SSH public key."""
209
+ home = Path.home()
210
+ key_paths = [home / ".ssh" / "id_ed25519.pub", home / ".ssh" / "id_rsa.pub"]
211
+
212
+ for key_path in key_paths:
213
+ if key_path.is_file():
214
+ return key_path.read_text().strip()
215
+
216
+ raise FileNotFoundError(
217
+ "No SSH public key found. Please create one with 'ssh-keygen' first."
218
+ )
219
+
220
+
221
+ def update_ssh_config_entry(engine_name: str, instance_id: str, username: str):
222
+ """Add or update a single SSH config entry."""
223
+ config_path = Path.home() / ".ssh" / "config"
224
+ config_path.parent.mkdir(mode=0o700, exist_ok=True)
225
+
226
+ # Touch the file if it doesn't exist
227
+ if not config_path.exists():
228
+ config_path.touch(mode=0o600)
229
+
230
+ # Read existing config
231
+ content = config_path.read_text()
232
+
233
+ # Create new entry
234
+ new_entry = f"""
235
+ Host {engine_name} {SSH_MANAGED_COMMENT}
236
+ HostName {instance_id}
237
+ User {username}
238
+ ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
239
+ """
240
+
241
+ # Check if entry already exists
242
+ host_line = f"Host {engine_name} {SSH_MANAGED_COMMENT}"
243
+ if host_line in content:
244
+ # Update existing entry
245
+ lines = content.splitlines()
246
+ new_lines = []
247
+ skip_count = 0
248
+ for line in lines:
249
+ if line.strip() == host_line.strip():
250
+ new_lines.extend(new_entry.strip().splitlines())
251
+ skip_count = 4 # Skip the next 4 lines (old entry)
252
+ elif skip_count > 0:
253
+ skip_count -= 1
254
+ continue
255
+ else:
256
+ new_lines.append(line)
257
+ content = "\n".join(new_lines)
258
+ else:
259
+ # Append new entry
260
+ content = content.rstrip() + "\n" + new_entry
261
+
262
+ # Write back
263
+ config_path.write_text(content)
264
+ config_path.chmod(0o600)
265
+
266
+
267
+ # ==================== ENGINE COMMANDS ====================
268
+
269
+
270
+ @engine_app.command("launch")
271
+ def launch_engine(
272
+ name: str = typer.Argument(help="Name for the new engine"),
273
+ engine_type: str = typer.Option(
274
+ "cpu",
275
+ "--type",
276
+ "-t",
277
+ help="Engine type: cpu, cpumax, t4, a10g, a100",
278
+ ),
279
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
280
+ ):
281
+ """Launch a new engine instance."""
282
+ username = check_aws_sso()
283
+ if user:
284
+ username = user
285
+
286
+ # Validate engine type
287
+ valid_types = ["cpu", "cpumax", "t4", "a10g", "a100"]
288
+ if engine_type not in valid_types:
289
+ console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
290
+ console.print(f"Valid types: {', '.join(valid_types)}")
291
+ raise typer.Exit(1)
292
+
293
+ cost = HOURLY_COSTS.get(engine_type, 0)
294
+ console.print(f"Launching [cyan]{name}[/cyan] ({engine_type}) for ${cost:.2f}/hour...")
295
+
296
+ with Progress(
297
+ SpinnerColumn(),
298
+ TextColumn("[progress.description]{task.description}"),
299
+ transient=True,
300
+ ) as progress:
301
+ progress.add_task("Creating engine...", total=None)
302
+
303
+ response = make_api_request(
304
+ "POST",
305
+ "/engines",
306
+ json_data={"name": name, "user": username, "engine_type": engine_type},
307
+ )
308
+
309
+ if response.status_code == 201:
310
+ data = response.json()
311
+ console.print(f"[green]✓ Engine launched successfully![/green]")
312
+ console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
313
+ console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
314
+ console.print("\nThe engine is initializing. This may take a few minutes.")
315
+ console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
316
+ else:
317
+ error = response.json().get("error", "Unknown error")
318
+ console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
319
+ raise typer.Exit(1)
320
+
321
+
322
+ @engine_app.command("list")
323
+ def list_engines(
324
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
325
+ all_users: bool = typer.Option(False, "--all", "-a", help="Show all users' engines"),
326
+ running_only: bool = typer.Option(False, "--running", help="Show only running engines"),
327
+ stopped_only: bool = typer.Option(False, "--stopped", help="Show only stopped engines"),
328
+ ):
329
+ """List all engines."""
330
+ current_user = check_aws_sso()
331
+
332
+ params = {}
333
+ if not all_users and not user:
334
+ params["user"] = current_user
335
+ elif user:
336
+ params["user"] = user
337
+
338
+ response = make_api_request("GET", "/engines", params=params)
339
+
340
+ if response.status_code == 200:
341
+ data = response.json()
342
+ engines = data.get("engines", [])
343
+
344
+ # Filter by state if requested
345
+ if running_only:
346
+ engines = [e for e in engines if e["state"].lower() == "running"]
347
+ elif stopped_only:
348
+ engines = [e for e in engines if e["state"].lower() == "stopped"]
349
+
350
+ if not engines:
351
+ console.print("No engines found.")
352
+ return
353
+
354
+ # Create table
355
+ table = Table(title="Engines", box=box.ROUNDED)
356
+ table.add_column("Name", style="cyan")
357
+ table.add_column("Instance ID", style="dim")
358
+ table.add_column("Type")
359
+ table.add_column("User")
360
+ table.add_column("Status")
361
+ table.add_column("Uptime/Since")
362
+ table.add_column("$/hour", justify="right")
363
+ table.add_column("Cost Today", justify="right", style="yellow")
364
+
365
+ total_cost = 0.0
366
+ for engine in engines:
367
+ launch_time = parse_launch_time(engine["launch_time"])
368
+ uptime = datetime.utcnow() - launch_time
369
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
370
+
371
+ if engine["state"].lower() == "running":
372
+ daily_cost = hourly_cost * min(uptime.total_seconds() / 3600, 24)
373
+ total_cost += daily_cost
374
+ time_str = format_duration(uptime)
375
+ else:
376
+ daily_cost = 0
377
+ time_str = launch_time.strftime("%Y-%m-%d %H:%M")
378
+
379
+ table.add_row(
380
+ engine["name"],
381
+ engine["instance_id"],
382
+ engine["engine_type"],
383
+ engine["user"],
384
+ format_status(engine["state"], engine.get("ready")),
385
+ time_str,
386
+ f"${hourly_cost:.2f}",
387
+ f"${daily_cost:.2f}" if daily_cost > 0 else "-",
388
+ )
389
+
390
+ console.print(table)
391
+
392
+ if total_cost > 0:
393
+ console.print(f"\n[yellow]Total cost today: ${total_cost:.2f}[/yellow]")
394
+ else:
395
+ error = response.json().get("error", "Unknown error")
396
+ console.print(f"[red]❌ Failed to list engines: {error}[/red]")
397
+
398
+
399
+ @engine_app.command("status")
400
+ def engine_status(
401
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
402
+ ):
403
+ """Show detailed status of an engine."""
404
+ check_aws_sso()
405
+
406
+ # Get all engines to resolve name
407
+ response = make_api_request("GET", "/engines")
408
+ if response.status_code != 200:
409
+ console.print("[red]❌ Failed to fetch engines[/red]")
410
+ raise typer.Exit(1)
411
+
412
+ engines = response.json().get("engines", [])
413
+ engine = resolve_engine(name_or_id, engines)
414
+
415
+ # Get attached studios info
416
+ response = make_api_request("GET", f"/engines/{engine['instance_id']}/attached-studios")
417
+ attached_studios = []
418
+ if response.status_code == 200:
419
+ attached_studios = response.json().get("studios", [])
420
+
421
+ # Calculate costs
422
+ launch_time = parse_launch_time(engine["launch_time"])
423
+ uptime = datetime.utcnow() - launch_time
424
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
425
+ total_cost = hourly_cost * (uptime.total_seconds() / 3600)
426
+
427
+ # Create status panel
428
+ status_lines = [
429
+ f"[bold]Name:[/bold] {engine['name']}",
430
+ f"[bold]Instance:[/bold] {engine['instance_id']}",
431
+ f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
432
+ f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
433
+ f"[bold]User:[/bold] {engine['user']}",
434
+ f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
435
+ f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
436
+ f"[bold]Cost:[/bold] ${hourly_cost:.2f}/hour (${total_cost:.2f} total)",
437
+ ]
438
+
439
+ if attached_studios:
440
+ status_lines.append("")
441
+ status_lines.append("[bold]Attached Studios:[/bold]")
442
+ for studio in attached_studios:
443
+ attach_time = studio.get("attach_time", "Unknown")
444
+ status_lines.append(
445
+ f" • {studio['user']} ({studio['studio_id']}) - attached {attach_time}"
446
+ )
447
+
448
+ panel = Panel(
449
+ "\n".join(status_lines),
450
+ title="Engine Details",
451
+ border_style="blue",
452
+ )
453
+ console.print(panel)
454
+
455
+
456
+ @engine_app.command("stop")
457
+ def stop_engine(
458
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
459
+ force: bool = typer.Option(False, "--force", "-f", help="Force stop and detach all studios"),
460
+ ):
461
+ """Stop an engine."""
462
+ check_aws_sso()
463
+
464
+ # Get all engines to resolve name
465
+ response = make_api_request("GET", "/engines")
466
+ if response.status_code != 200:
467
+ console.print("[red]❌ Failed to fetch engines[/red]")
468
+ raise typer.Exit(1)
469
+
470
+ engines = response.json().get("engines", [])
471
+ engine = resolve_engine(name_or_id, engines)
472
+
473
+ console.print(f"Stopping engine [cyan]{engine['name']}[/cyan]...")
474
+
475
+ # First attempt without detaching
476
+ response = make_api_request(
477
+ "POST",
478
+ f"/engines/{engine['instance_id']}/stop",
479
+ json_data={"detach_studios": force},
480
+ )
481
+
482
+ if response.status_code == 409 and not force:
483
+ # Engine has attached studios
484
+ data = response.json()
485
+ attached_studios = data.get("attached_studios", [])
486
+
487
+ console.print("\n[yellow]⚠️ This engine has attached studios:[/yellow]")
488
+ for studio in attached_studios:
489
+ console.print(f" • {studio['user']} ({studio['studio_id']})")
490
+
491
+ if Confirm.ask("\nDetach all studios and stop the engine?"):
492
+ response = make_api_request(
493
+ "POST",
494
+ f"/engines/{engine['instance_id']}/stop",
495
+ json_data={"detach_studios": True},
496
+ )
497
+ else:
498
+ console.print("Stop cancelled.")
499
+ return
500
+
501
+ if response.status_code == 200:
502
+ console.print(f"[green]✓ Engine stopped successfully![/green]")
503
+ else:
504
+ error = response.json().get("error", "Unknown error")
505
+ console.print(f"[red]❌ Failed to stop engine: {error}[/red]")
506
+
507
+
508
+ @engine_app.command("start")
509
+ def start_engine(
510
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
511
+ ):
512
+ """Start a stopped engine."""
513
+ check_aws_sso()
514
+
515
+ # Get all engines to resolve name
516
+ response = make_api_request("GET", "/engines")
517
+ if response.status_code != 200:
518
+ console.print("[red]❌ Failed to fetch engines[/red]")
519
+ raise typer.Exit(1)
520
+
521
+ engines = response.json().get("engines", [])
522
+ engine = resolve_engine(name_or_id, engines)
523
+
524
+ console.print(f"Starting engine [cyan]{engine['name']}[/cyan]...")
525
+
526
+ response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
527
+
528
+ if response.status_code == 200:
529
+ data = response.json()
530
+ console.print(f"[green]✓ Engine started successfully![/green]")
531
+ console.print(f"New public IP: {data.get('public_ip', 'Pending...')}")
532
+ else:
533
+ error = response.json().get("error", "Unknown error")
534
+ console.print(f"[red]❌ Failed to start engine: {error}[/red]")
535
+
536
+
537
+ @engine_app.command("terminate")
538
+ def terminate_engine(
539
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
540
+ ):
541
+ """Permanently terminate an engine."""
542
+ check_aws_sso()
543
+
544
+ # Get all engines to resolve name
545
+ response = make_api_request("GET", "/engines")
546
+ if response.status_code != 200:
547
+ console.print("[red]❌ Failed to fetch engines[/red]")
548
+ raise typer.Exit(1)
549
+
550
+ engines = response.json().get("engines", [])
551
+ engine = resolve_engine(name_or_id, engines)
552
+
553
+ # Calculate cost
554
+ launch_time = parse_launch_time(engine["launch_time"])
555
+ uptime = datetime.utcnow() - launch_time
556
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
557
+ total_cost = hourly_cost * (uptime.total_seconds() / 3600)
558
+
559
+ console.print(f"\n[yellow]⚠️ This will permanently terminate engine '{engine['name']}'[/yellow]")
560
+ console.print(f"Total cost for this session: ${total_cost:.2f}")
561
+
562
+ if not Confirm.ask("\nAre you sure you want to terminate this engine?"):
563
+ console.print("Termination cancelled.")
564
+ return
565
+
566
+ response = make_api_request("DELETE", f"/engines/{engine['instance_id']}")
567
+
568
+ if response.status_code == 200:
569
+ console.print(f"[green]✓ Engine terminated successfully![/green]")
570
+ else:
571
+ error = response.json().get("error", "Unknown error")
572
+ console.print(f"[red]❌ Failed to terminate engine: {error}[/red]")
573
+
574
+
575
+ @engine_app.command("ssh")
576
+ def ssh_engine(
577
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
578
+ ):
579
+ """Connect to an engine via SSH."""
580
+ username = check_aws_sso()
581
+
582
+ # Get all engines to resolve name
583
+ response = make_api_request("GET", "/engines")
584
+ if response.status_code != 200:
585
+ console.print("[red]❌ Failed to fetch engines[/red]")
586
+ raise typer.Exit(1)
587
+
588
+ engines = response.json().get("engines", [])
589
+ engine = resolve_engine(name_or_id, engines)
590
+
591
+ if engine["state"].lower() != "running":
592
+ console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
593
+ raise typer.Exit(1)
594
+
595
+ # Update SSH config
596
+ console.print(f"Updating SSH config for [cyan]{engine['name']}[/cyan]...")
597
+ update_ssh_config_entry(engine["name"], engine["instance_id"], username)
598
+
599
+ # Connect
600
+ console.print(f"[green]✓ Connecting to {engine['name']}...[/green]")
601
+ subprocess.run(["ssh", engine["name"]])
602
+
603
+
604
+ @engine_app.command("config-ssh")
605
+ def config_ssh(
606
+ clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
607
+ ):
608
+ """Update SSH config with all available engines."""
609
+ username = check_aws_sso()
610
+
611
+ if clean:
612
+ console.print("Removing all managed SSH entries...")
613
+ else:
614
+ console.print("Updating SSH config with all running engines...")
615
+
616
+ # Get all engines
617
+ response = make_api_request("GET", "/engines")
618
+ if response.status_code != 200:
619
+ console.print("[red]❌ Failed to fetch engines[/red]")
620
+ raise typer.Exit(1)
621
+
622
+ engines = response.json().get("engines", [])
623
+ running_engines = [e for e in engines if e["state"].lower() == "running"]
624
+
625
+ # Read existing config
626
+ config_path = Path.home() / ".ssh" / "config"
627
+ config_path.parent.mkdir(mode=0o700, exist_ok=True)
628
+
629
+ if config_path.exists():
630
+ content = config_path.read_text()
631
+ lines = content.splitlines()
632
+ else:
633
+ content = ""
634
+ lines = []
635
+
636
+ # Remove old managed entries
637
+ new_lines = []
638
+ skip_until_next_host = False
639
+ for line in lines:
640
+ if SSH_MANAGED_COMMENT in line:
641
+ skip_until_next_host = True
642
+ elif line.strip().startswith("Host ") and skip_until_next_host:
643
+ skip_until_next_host = False
644
+ # Check if this is a managed host
645
+ if SSH_MANAGED_COMMENT not in line:
646
+ new_lines.append(line)
647
+ elif not skip_until_next_host:
648
+ new_lines.append(line)
649
+
650
+ # Add new entries if not cleaning
651
+ if not clean:
652
+ for engine in running_engines:
653
+ new_lines.extend(
654
+ [
655
+ "",
656
+ f"Host {engine['name']} {SSH_MANAGED_COMMENT}",
657
+ f" HostName {engine['instance_id']}",
658
+ f" User {username}",
659
+ f' ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters \'portNumber=%p\'"',
660
+ ]
661
+ )
662
+
663
+ # Write back
664
+ config_path.write_text("\n".join(new_lines))
665
+ config_path.chmod(0o600)
666
+
667
+ if clean:
668
+ console.print("[green]✓ Removed all managed SSH entries[/green]")
669
+ else:
670
+ console.print(
671
+ f"[green]✓ Updated SSH config with {len(running_engines)} engines[/green]"
672
+ )
673
+ for engine in running_engines:
674
+ console.print(f" • {engine['name']} → {engine['instance_id']}")
675
+
676
+
677
+ @engine_app.command("keep-awake")
678
+ def keep_awake(
679
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
680
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 4h30m)"),
681
+ ):
682
+ """Prevent an engine from auto-shutting down."""
683
+ username = check_aws_sso()
684
+
685
+ # Parse duration
686
+ import re
687
+ match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
688
+ if not match or (not match.group(1) and not match.group(2)):
689
+ console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
690
+ console.print("Use format like: 4h, 30m, 2h30m")
691
+ raise typer.Exit(1)
692
+
693
+ hours = int(match.group(1) or 0)
694
+ minutes = int(match.group(2) or 0)
695
+ total_minutes = hours * 60 + minutes
696
+
697
+ # Get all engines to resolve name
698
+ response = make_api_request("GET", "/engines")
699
+ if response.status_code != 200:
700
+ console.print("[red]❌ Failed to fetch engines[/red]")
701
+ raise typer.Exit(1)
702
+
703
+ engines = response.json().get("engines", [])
704
+ engine = resolve_engine(name_or_id, engines)
705
+
706
+ if engine["state"].lower() != "running":
707
+ console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
708
+ raise typer.Exit(1)
709
+
710
+ console.print(
711
+ f"Setting keep-awake for [cyan]{engine['name']}[/cyan] for {duration}..."
712
+ )
713
+
714
+ # Use SSM to run the engine keep-alive command
715
+ ssm = boto3.client("ssm", region_name="us-east-1")
716
+ try:
717
+ response = ssm.send_command(
718
+ InstanceIds=[engine["instance_id"]],
719
+ DocumentName="AWS-RunShellScript",
720
+ Parameters={
721
+ "commands": [f"engine keep-alive {duration}"],
722
+ "executionTimeout": ["60"],
723
+ },
724
+ )
725
+
726
+ command_id = response["Command"]["CommandId"]
727
+
728
+ # Wait for command to complete
729
+ import time
730
+ for _ in range(10):
731
+ time.sleep(1)
732
+ result = ssm.get_command_invocation(
733
+ CommandId=command_id,
734
+ InstanceId=engine["instance_id"],
735
+ )
736
+ if result["Status"] in ["Success", "Failed"]:
737
+ break
738
+
739
+ if result["Status"] == "Success":
740
+ console.print(f"[green]✓ Engine will stay awake for {duration}[/green]")
741
+ console.print(
742
+ "\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
743
+ )
744
+ console.print(
745
+ "[dim]Use keep-awake for nohup operations or other background tasks.[/dim]"
746
+ )
747
+ else:
748
+ console.print(f"[red]❌ Failed to set keep-awake: {result.get('StatusDetails', 'Unknown error')}[/red]")
749
+
750
+ except ClientError as e:
751
+ console.print(f"[red]❌ Failed to set keep-awake: {e}[/red]")
752
+
753
+
754
+ @engine_app.command("cancel-keep-awake")
755
+ def cancel_keep_awake(
756
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
757
+ ):
758
+ """Cancel keep-awake and re-enable auto-shutdown."""
759
+ username = check_aws_sso()
760
+
761
+ # Get all engines to resolve name
762
+ response = make_api_request("GET", "/engines")
763
+ if response.status_code != 200:
764
+ console.print("[red]❌ Failed to fetch engines[/red]")
765
+ raise typer.Exit(1)
766
+
767
+ engines = response.json().get("engines", [])
768
+ engine = resolve_engine(name_or_id, engines)
769
+
770
+ console.print(f"Cancelling keep-awake for [cyan]{engine['name']}[/cyan]...")
771
+
772
+ # Use SSM to run the engine cancel command
773
+ ssm = boto3.client("ssm", region_name="us-east-1")
774
+ try:
775
+ response = ssm.send_command(
776
+ InstanceIds=[engine["instance_id"]],
777
+ DocumentName="AWS-RunShellScript",
778
+ Parameters={
779
+ "commands": ["engine cancel"],
780
+ "executionTimeout": ["60"],
781
+ },
782
+ )
783
+
784
+ command_id = response["Command"]["CommandId"]
785
+
786
+ # Wait for command to complete
787
+ import time
788
+ for _ in range(10):
789
+ time.sleep(1)
790
+ result = ssm.get_command_invocation(
791
+ CommandId=command_id,
792
+ InstanceId=engine["instance_id"],
793
+ )
794
+ if result["Status"] in ["Success", "Failed"]:
795
+ break
796
+
797
+ if result["Status"] == "Success":
798
+ console.print("[green]✓ Keep-awake cancelled, auto-shutdown re-enabled[/green]")
799
+ else:
800
+ console.print(f"[red]❌ Failed to cancel keep-awake: {result.get('StatusDetails', 'Unknown error')}[/red]")
801
+
802
+ except ClientError as e:
803
+ console.print(f"[red]❌ Failed to cancel keep-awake: {e}[/red]")
804
+
805
+
806
+ @engine_app.command("create-ami")
807
+ def create_ami(
808
+ name_or_id: str = typer.Argument(help="Engine name or instance ID to create AMI from"),
809
+ ):
810
+ """Create a Golden AMI from an engine."""
811
+ check_aws_sso()
812
+
813
+ # Get all engines to resolve name
814
+ response = make_api_request("GET", "/engines")
815
+ if response.status_code != 200:
816
+ console.print("[red]❌ Failed to fetch engines[/red]")
817
+ raise typer.Exit(1)
818
+
819
+ engines = response.json().get("engines", [])
820
+ engine = resolve_engine(name_or_id, engines)
821
+
822
+ console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
823
+
824
+ # Get engine type from the engine
825
+ engine_type = engine["engine_type"]
826
+
827
+ # Generate AMI name
828
+ date_str = datetime.now().strftime("%Y%m%d")
829
+ ami_name = f"prewarmed-engine-{engine_type}-{date_str}"
830
+ ami_description = (
831
+ f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled dev container "
832
+ f"image for {engine_type} engines"
833
+ )
834
+
835
+ console.print(f"AMI Name: [cyan]{ami_name}[/cyan]")
836
+ console.print(f"Description: {ami_description}")
837
+ console.print("\n[yellow]⚠️ Important: This will reboot the engine to ensure a clean snapshot.[/yellow]")
838
+
839
+ if not Confirm.ask("\nContinue with AMI creation?"):
840
+ console.print("AMI creation cancelled.")
841
+ return
842
+
843
+ # Create AMI using EC2 client
844
+ ec2 = boto3.client("ec2", region_name="us-east-1")
845
+
846
+ try:
847
+ # First, we need to clean up the sentinel file via SSM
848
+ console.print("Cleaning up bootstrap sentinel file...")
849
+ ssm = boto3.client("ssm", region_name="us-east-1")
850
+
851
+ cleanup_response = ssm.send_command(
852
+ InstanceIds=[engine["instance_id"]],
853
+ DocumentName="AWS-RunShellScript",
854
+ Parameters={
855
+ "commands": [
856
+ "sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
857
+ "history -c",
858
+ "sudo rm -rf /tmp/* /var/log/*",
859
+ ],
860
+ "executionTimeout": ["60"],
861
+ },
862
+ )
863
+
864
+ # Wait for cleanup to complete
865
+ import time
866
+ command_id = cleanup_response["Command"]["CommandId"]
867
+ for _ in range(10):
868
+ time.sleep(1)
869
+ result = ssm.get_command_invocation(
870
+ CommandId=command_id,
871
+ InstanceId=engine["instance_id"],
872
+ )
873
+ if result["Status"] in ["Success", "Failed"]:
874
+ break
875
+
876
+ if result["Status"] != "Success":
877
+ console.print("[yellow]⚠️ Warning: Cleanup command may have failed[/yellow]")
878
+
879
+ # Get instance details to find volumes to exclude
880
+ instances = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
881
+ instance = instances["Reservations"][0]["Instances"][0]
882
+
883
+ root_device = instance.get("RootDeviceName", "/dev/xvda")
884
+ block_mappings = instance.get("BlockDeviceMappings", [])
885
+
886
+ # Build exclusion list for non-root volumes
887
+ block_device_mappings = []
888
+ for mapping in block_mappings:
889
+ device_name = mapping.get("DeviceName", "")
890
+ if device_name != root_device:
891
+ block_device_mappings.append({"DeviceName": device_name, "NoDevice": ""})
892
+ console.print(f" Excluding volume at {device_name}")
893
+
894
+ # Create the AMI
895
+ with Progress(
896
+ SpinnerColumn(),
897
+ TextColumn("[progress.description]{task.description}"),
898
+ transient=True,
899
+ ) as progress:
900
+ progress.add_task("Creating AMI (this will take several minutes)...", total=None)
901
+
902
+ create_params = {
903
+ "InstanceId": engine["instance_id"],
904
+ "Name": ami_name,
905
+ "Description": ami_description,
906
+ "NoReboot": False, # Important: reboot for clean snapshot
907
+ "TagSpecifications": [
908
+ {
909
+ "ResourceType": "image",
910
+ "Tags": [
911
+ {"Key": "Environment", "Value": "dev"},
912
+ {"Key": "Type", "Value": "golden-ami"},
913
+ {"Key": "EngineType", "Value": engine_type},
914
+ {"Key": "Name", "Value": ami_name},
915
+ ],
916
+ }
917
+ ],
918
+ }
919
+
920
+ if block_device_mappings:
921
+ create_params["BlockDeviceMappings"] = block_device_mappings
922
+
923
+ response = ec2.create_image(**create_params)
924
+
925
+ ami_id = response["ImageId"]
926
+ console.print(f"[green]✓ AMI creation initiated![/green]")
927
+ console.print(f"AMI ID: [cyan]{ami_id}[/cyan]")
928
+ console.print("\n[dim]The AMI creation process will continue in the background.[/dim]")
929
+ console.print("[dim]You can monitor progress in the EC2 Console under 'AMIs'.[/dim]")
930
+ console.print(
931
+ f"\nOnce complete, run [cyan]terraform apply[/cyan] in "
932
+ f"terraform/environments/dev to use the new AMI."
933
+ )
934
+
935
+ except ClientError as e:
936
+ console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
937
+ raise typer.Exit(1)
938
+
939
+
940
+ # ==================== STUDIO COMMANDS ====================
941
+
942
+
943
+ def get_user_studio(username: str) -> Optional[Dict]:
944
+ """Get the current user's studio."""
945
+ response = make_api_request("GET", "/studios")
946
+ if response.status_code != 200:
947
+ return None
948
+
949
+ studios = response.json().get("studios", [])
950
+ user_studios = [s for s in studios if s["UserID"] == username]
951
+
952
+ return user_studios[0] if user_studios else None
953
+
954
+
955
+ @studio_app.command("create")
956
+ def create_studio(
957
+ size_gb: int = typer.Option(500, "--size", "-s", help="Studio size in GB"),
958
+ ):
959
+ """Create a new studio for the current user."""
960
+ username = check_aws_sso()
961
+
962
+ # Check if user already has a studio
963
+ existing = get_user_studio(username)
964
+ if existing:
965
+ console.print(f"[yellow]You already have a studio: {existing['StudioID']}[/yellow]")
966
+ return
967
+
968
+ console.print(f"Creating {size_gb}GB studio for user [cyan]{username}[/cyan]...")
969
+
970
+ with Progress(
971
+ SpinnerColumn(),
972
+ TextColumn("[progress.description]{task.description}"),
973
+ transient=True,
974
+ ) as progress:
975
+ progress.add_task("Creating studio volume...", total=None)
976
+
977
+ response = make_api_request(
978
+ "POST",
979
+ "/studios",
980
+ json_data={"user": username, "size_gb": size_gb},
981
+ )
982
+
983
+ if response.status_code == 201:
984
+ data = response.json()
985
+ console.print(f"[green]✓ Studio created successfully![/green]")
986
+ console.print(f"Studio ID: [cyan]{data['studio_id']}[/cyan]")
987
+ console.print(f"Size: {data['size_gb']}GB")
988
+ console.print(f"\nNext step: [cyan]dh studio attach <engine-name>[/cyan]")
989
+ else:
990
+ error = response.json().get("error", "Unknown error")
991
+ console.print(f"[red]❌ Failed to create studio: {error}[/red]")
992
+
993
+
994
+ @studio_app.command("status")
995
+ def studio_status():
996
+ """Show status of your studio."""
997
+ username = check_aws_sso()
998
+
999
+ studio = get_user_studio(username)
1000
+ if not studio:
1001
+ console.print("[yellow]You don't have a studio yet.[/yellow]")
1002
+ console.print("Create one with: [cyan]dh studio create[/cyan]")
1003
+ return
1004
+
1005
+ # Create status panel
1006
+ status_lines = [
1007
+ f"[bold]Studio ID:[/bold] {studio['StudioID']}",
1008
+ f"[bold]User:[/bold] {studio['UserID']}",
1009
+ f"[bold]Status:[/bold] {studio['Status']}",
1010
+ f"[bold]Size:[/bold] {studio['SizeGB']}GB",
1011
+ f"[bold]Created:[/bold] {studio['CreationDate']}",
1012
+ ]
1013
+
1014
+ if studio.get("AttachedVMID"):
1015
+ status_lines.append(f"[bold]Attached to:[/bold] {studio['AttachedVMID']}")
1016
+
1017
+ # Try to get engine details
1018
+ response = make_api_request("GET", "/engines")
1019
+ if response.status_code == 200:
1020
+ engines = response.json().get("engines", [])
1021
+ attached_engine = next(
1022
+ (e for e in engines if e["instance_id"] == studio["AttachedVMID"]),
1023
+ None
1024
+ )
1025
+ if attached_engine:
1026
+ status_lines.append(f"[bold]Engine Name:[/bold] {attached_engine['name']}")
1027
+
1028
+ panel = Panel(
1029
+ "\n".join(status_lines),
1030
+ title="Studio Details",
1031
+ border_style="blue",
1032
+ )
1033
+ console.print(panel)
1034
+
1035
+
1036
+ @studio_app.command("attach")
1037
+ def attach_studio(
1038
+ engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
1039
+ ):
1040
+ """Attach your studio to an engine."""
1041
+ username = check_aws_sso()
1042
+
1043
+ # Get user's studio
1044
+ studio = get_user_studio(username)
1045
+ if not studio:
1046
+ console.print("[yellow]You don't have a studio yet.[/yellow]")
1047
+ if Confirm.ask("Would you like to create one now?"):
1048
+ size = IntPrompt.ask("Studio size (GB)", default=500)
1049
+ response = make_api_request(
1050
+ "POST",
1051
+ "/studios",
1052
+ json_data={"user": username, "size_gb": size},
1053
+ )
1054
+ if response.status_code != 201:
1055
+ console.print("[red]❌ Failed to create studio[/red]")
1056
+ raise typer.Exit(1)
1057
+ studio = response.json()
1058
+ studio["StudioID"] = studio["studio_id"] # Normalize key
1059
+ else:
1060
+ raise typer.Exit(0)
1061
+
1062
+ # Check if already attached
1063
+ if studio.get("Status") == "in-use":
1064
+ console.print(
1065
+ f"[yellow]Studio is already attached to {studio.get('AttachedVMID')}[/yellow]"
1066
+ )
1067
+ if not Confirm.ask("Detach and reattach to new engine?"):
1068
+ return
1069
+ # Detach first
1070
+ response = make_api_request("POST", f"/studios/{studio['StudioID']}/detach")
1071
+ if response.status_code != 200:
1072
+ console.print("[red]❌ Failed to detach studio[/red]")
1073
+ raise typer.Exit(1)
1074
+
1075
+ # Get all engines to resolve name
1076
+ response = make_api_request("GET", "/engines")
1077
+ if response.status_code != 200:
1078
+ console.print("[red]❌ Failed to fetch engines[/red]")
1079
+ raise typer.Exit(1)
1080
+
1081
+ engines = response.json().get("engines", [])
1082
+ engine = resolve_engine(engine_name_or_id, engines)
1083
+
1084
+ if engine["state"].lower() != "running":
1085
+ console.print(f"[yellow]⚠️ Engine is {engine['state']}[/yellow]")
1086
+ if engine["state"].lower() == "stopped" and Confirm.ask("Start the engine first?"):
1087
+ response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
1088
+ if response.status_code != 200:
1089
+ console.print("[red]❌ Failed to start engine[/red]")
1090
+ raise typer.Exit(1)
1091
+ console.print("[green]✓ Engine started[/green]")
1092
+ console.print("Waiting for engine to be ready...")
1093
+ import time
1094
+ time.sleep(10)
1095
+ else:
1096
+ raise typer.Exit(1)
1097
+
1098
+ # Get SSH key
1099
+ try:
1100
+ public_key = get_ssh_public_key()
1101
+ except FileNotFoundError as e:
1102
+ console.print(f"[red]❌ {e}[/red]")
1103
+ raise typer.Exit(1)
1104
+
1105
+ console.print(f"Attaching studio to engine [cyan]{engine['name']}[/cyan]...")
1106
+
1107
+ with Progress(
1108
+ SpinnerColumn(),
1109
+ TextColumn("[progress.description]{task.description}"),
1110
+ transient=True,
1111
+ ) as progress:
1112
+ task = progress.add_task("Attaching studio...", total=100)
1113
+
1114
+ response = make_api_request(
1115
+ "POST",
1116
+ f"/studios/{studio['StudioID']}/attach",
1117
+ json_data={
1118
+ "vm_id": engine["instance_id"],
1119
+ "user": username,
1120
+ "public_key": public_key,
1121
+ },
1122
+ )
1123
+
1124
+ progress.update(task, completed=100)
1125
+
1126
+ if response.status_code == 200:
1127
+ console.print(f"[green]✓ Studio attached successfully![/green]")
1128
+
1129
+ # Update SSH config
1130
+ update_ssh_config_entry(engine["name"], engine["instance_id"], username)
1131
+ console.print(f"[green]✓ SSH config updated[/green]")
1132
+ console.print(f"\nConnect with: [cyan]ssh {engine['name']}[/cyan]")
1133
+ console.print(f"Your files are at: [cyan]/studios/{username}[/cyan]")
1134
+ else:
1135
+ error = response.json().get("error", "Unknown error")
1136
+ console.print(f"[red]❌ Failed to attach studio: {error}[/red]")
1137
+
1138
+
1139
+ @studio_app.command("detach")
1140
+ def detach_studio():
1141
+ """Detach your studio from its current engine."""
1142
+ username = check_aws_sso()
1143
+
1144
+ studio = get_user_studio(username)
1145
+ if not studio:
1146
+ console.print("[yellow]You don't have a studio.[/yellow]")
1147
+ return
1148
+
1149
+ if studio.get("Status") != "in-use":
1150
+ console.print("[yellow]Your studio is not attached to any engine.[/yellow]")
1151
+ return
1152
+
1153
+ console.print(f"Detaching studio from {studio.get('AttachedVMID')}...")
1154
+
1155
+ response = make_api_request("POST", f"/studios/{studio['StudioID']}/detach")
1156
+
1157
+ if response.status_code == 200:
1158
+ console.print(f"[green]✓ Studio detached successfully![/green]")
1159
+ else:
1160
+ error = response.json().get("error", "Unknown error")
1161
+ console.print(f"[red]❌ Failed to detach studio: {error}[/red]")
1162
+
1163
+
1164
+ @studio_app.command("delete")
1165
+ def delete_studio():
1166
+ """Delete your studio permanently."""
1167
+ username = check_aws_sso()
1168
+
1169
+ studio = get_user_studio(username)
1170
+ if not studio:
1171
+ console.print("[yellow]You don't have a studio to delete.[/yellow]")
1172
+ return
1173
+
1174
+ console.print("[red]⚠️ WARNING: This will permanently delete your studio and all data![/red]")
1175
+ console.print(f"Studio ID: {studio['StudioID']}")
1176
+ console.print(f"Size: {studio['SizeGB']}GB")
1177
+
1178
+ # Multiple confirmations
1179
+ if not Confirm.ask("\nAre you sure you want to delete your studio?"):
1180
+ console.print("Deletion cancelled.")
1181
+ return
1182
+
1183
+ if not Confirm.ask("[red]This action cannot be undone. Continue?[/red]"):
1184
+ console.print("Deletion cancelled.")
1185
+ return
1186
+
1187
+ typed_confirm = Prompt.ask(
1188
+ 'Type "DELETE" to confirm permanent deletion'
1189
+ )
1190
+ if typed_confirm != "DELETE":
1191
+ console.print("Deletion cancelled.")
1192
+ return
1193
+
1194
+ response = make_api_request("DELETE", f"/studios/{studio['StudioID']}")
1195
+
1196
+ if response.status_code == 200:
1197
+ console.print(f"[green]✓ Studio deleted successfully![/green]")
1198
+ else:
1199
+ error = response.json().get("error", "Unknown error")
1200
+ console.print(f"[red]❌ Failed to delete studio: {error}[/red]")
1201
+
1202
+
1203
+ @studio_app.command("list")
1204
+ def list_studios(
1205
+ all_users: bool = typer.Option(False, "--all", "-a", help="Show all users' studios"),
1206
+ ):
1207
+ """List studios."""
1208
+ username = check_aws_sso()
1209
+
1210
+ response = make_api_request("GET", "/studios")
1211
+
1212
+ if response.status_code == 200:
1213
+ studios = response.json().get("studios", [])
1214
+
1215
+ if not all_users:
1216
+ studios = [s for s in studios if s["UserID"] == username]
1217
+
1218
+ if not studios:
1219
+ console.print("No studios found.")
1220
+ return
1221
+
1222
+ # Create table
1223
+ table = Table(title="Studios", box=box.ROUNDED)
1224
+ table.add_column("Studio ID", style="cyan")
1225
+ table.add_column("User")
1226
+ table.add_column("Status")
1227
+ table.add_column("Size", justify="right")
1228
+ table.add_column("Attached To")
1229
+ table.add_column("Created")
1230
+
1231
+ for studio in studios:
1232
+ status_color = "green" if studio["Status"] == "available" else "yellow"
1233
+ table.add_row(
1234
+ studio["StudioID"],
1235
+ studio["UserID"],
1236
+ f"[{status_color}]{studio['Status']}[/{status_color}]",
1237
+ f"{studio['SizeGB']}GB",
1238
+ studio.get("AttachedVMID", "-"),
1239
+ studio["CreationDate"],
1240
+ )
1241
+
1242
+ console.print(table)
1243
+ else:
1244
+ error = response.json().get("error", "Unknown error")
1245
+ console.print(f"[red]❌ Failed to list studios: {error}[/red]")
1246
+
1247
+
1248
+ @studio_app.command("reset")
1249
+ def reset_studio():
1250
+ """Reset a stuck studio (admin operation)."""
1251
+ username = check_aws_sso()
1252
+
1253
+ studio = get_user_studio(username)
1254
+ if not studio:
1255
+ console.print("[yellow]You don't have a studio.[/yellow]")
1256
+ return
1257
+
1258
+ console.print(f"[yellow]⚠️ This will force-reset your studio state[/yellow]")
1259
+ console.print(f"Current status: {studio['Status']}")
1260
+ if studio.get("AttachedVMID"):
1261
+ console.print(f"Listed as attached to: {studio['AttachedVMID']}")
1262
+
1263
+ if not Confirm.ask("\nReset studio state?"):
1264
+ console.print("Reset cancelled.")
1265
+ return
1266
+
1267
+ # Direct DynamoDB update
1268
+ console.print("Resetting studio state...")
1269
+
1270
+ dynamodb = boto3.resource("dynamodb", region_name="us-east-1")
1271
+ table = dynamodb.Table("dev-studios")
1272
+
1273
+ try:
1274
+ # Check if volume is actually attached
1275
+ ec2 = boto3.client("ec2", region_name="us-east-1")
1276
+ volumes = ec2.describe_volumes(VolumeIds=[studio["StudioID"]])
1277
+
1278
+ if volumes["Volumes"]:
1279
+ volume = volumes["Volumes"][0]
1280
+ attachments = volume.get("Attachments", [])
1281
+ if attachments:
1282
+ console.print(
1283
+ f"[red]Volume is still attached to {attachments[0]['InstanceId']}![/red]"
1284
+ )
1285
+ if Confirm.ask("Force-detach the volume?"):
1286
+ ec2.detach_volume(
1287
+ VolumeId=studio["StudioID"],
1288
+ InstanceId=attachments[0]["InstanceId"],
1289
+ Force=True,
1290
+ )
1291
+ console.print("Waiting for volume to detach...")
1292
+ waiter = ec2.get_waiter("volume_available")
1293
+ waiter.wait(VolumeIds=[studio["StudioID"]])
1294
+
1295
+ # Reset in DynamoDB
1296
+ table.update_item(
1297
+ Key={"StudioID": studio["StudioID"]},
1298
+ UpdateExpression="SET #status = :status, AttachedVMID = :vm_id, AttachedDevice = :device",
1299
+ ExpressionAttributeNames={"#status": "Status"},
1300
+ ExpressionAttributeValues={
1301
+ ":status": "available",
1302
+ ":vm_id": None,
1303
+ ":device": None,
1304
+ },
1305
+ )
1306
+
1307
+ console.print(f"[green]✓ Studio reset to available state![/green]")
1308
+
1309
+ except ClientError as e:
1310
+ console.print(f"[red]❌ Failed to reset studio: {e}[/red]")
dayhoff_tools/cli/main.py CHANGED
@@ -4,6 +4,7 @@ import sys
4
4
 
5
5
  import typer
6
6
  from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
7
+ from dayhoff_tools.cli.engine_commands import engine_app, studio_app
7
8
  from dayhoff_tools.cli.utility_commands import (
8
9
  add_to_warehouse_typer,
9
10
  build_and_upload_wheel,
@@ -38,6 +39,10 @@ app.command("wimport")(import_from_warehouse_typer)
38
39
  app.add_typer(gcp_app, name="gcp", help="Manage GCP authentication and impersonation.")
39
40
  app.add_typer(aws_app, name="aws", help="Manage AWS SSO authentication.")
40
41
 
42
+ # Engine and Studio commands
43
+ app.add_typer(engine_app, name="engine", help="Manage compute engines for development.")
44
+ app.add_typer(studio_app, name="studio", help="Manage persistent development studios.")
45
+
41
46
 
42
47
  @app.command("wheel")
43
48
  def build_and_upload_wheel_command(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -3,7 +3,8 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
3
3
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
4
4
  dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
6
- dayhoff_tools/cli/main.py,sha256=47EGb28ALaYFc7oAUGlY1D66AIDmc4RZiXxN-gPVrpQ,4519
6
+ dayhoff_tools/cli/engine_commands.py,sha256=zHyisrYz0kgm_EXFIRqSR10jPPXrBmhMpZnN296x86U,46408
7
+ dayhoff_tools/cli/main.py,sha256=rgeEHD9lJ8SBCR34BTLb7gVInHUUdmEBNXAJnq5yEU4,4795
7
8
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
8
9
  dayhoff_tools/cli/utility_commands.py,sha256=qs8vH9TBFHsOPC3X8cU3qZigM3dDn-2Ytq4o_F2WubU,27874
9
10
  dayhoff_tools/deployment/base.py,sha256=mYp560l6hSDFtyY2H42VoM8k9VUzfwuiyh9Knqpgc28,17441
@@ -26,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
26
27
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
27
28
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
28
29
  dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
29
- dayhoff_tools-1.3.1.dist-info/METADATA,sha256=AyP_2vo_5tVylBVzP-EMkI3tPPxJIV-VpBdQxRQFIZU,2842
30
- dayhoff_tools-1.3.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
- dayhoff_tools-1.3.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
- dayhoff_tools-1.3.1.dist-info/RECORD,,
30
+ dayhoff_tools-1.3.2.dist-info/METADATA,sha256=i0AQJh4nEsStInWpyTviptfaOltjscM-MObda7o0vlI,2842
31
+ dayhoff_tools-1.3.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ dayhoff_tools-1.3.2.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
+ dayhoff_tools-1.3.2.dist-info/RECORD,,