dayhoff-tools 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/cli/engine_commands.py +1305 -0
- dayhoff_tools/cli/main.py +5 -0
- {dayhoff_tools-1.3.1.dist-info → dayhoff_tools-1.3.3.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.3.1.dist-info → dayhoff_tools-1.3.3.dist-info}/RECORD +6 -5
- {dayhoff_tools-1.3.1.dist-info → dayhoff_tools-1.3.3.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.3.1.dist-info → dayhoff_tools-1.3.3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1305 @@
|
|
1
|
+
"""Engine and Studio management commands for DHT CLI."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import subprocess
|
5
|
+
import sys
|
6
|
+
from datetime import datetime, timedelta
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
9
|
+
|
10
|
+
import boto3
|
11
|
+
import requests
|
12
|
+
import typer
|
13
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
14
|
+
from rich import box
|
15
|
+
from rich.console import Console
|
16
|
+
from rich.panel import Panel
|
17
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
18
|
+
from rich.prompt import Confirm, IntPrompt, Prompt
|
19
|
+
from rich.table import Table
|
20
|
+
|
21
|
+
# Initialize Typer apps
|
22
|
+
engine_app = typer.Typer(help="Manage compute engines for development.")
|
23
|
+
studio_app = typer.Typer(help="Manage persistent development studios.")
|
24
|
+
|
25
|
+
console = Console()
|
26
|
+
|
27
|
+
# Cost information
|
28
|
+
HOURLY_COSTS = {
|
29
|
+
"cpu": 0.50, # r6i.2xlarge
|
30
|
+
"cpumax": 1.00, # r7i.8xlarge
|
31
|
+
"t4": 1.00, # g4dn.2xlarge
|
32
|
+
"a10g": 2.00, # g5.2xlarge
|
33
|
+
"a100": 5.00, # p4d.24xlarge
|
34
|
+
}
|
35
|
+
|
36
|
+
# SSH config management
|
37
|
+
SSH_MANAGED_COMMENT = "# Managed by dh engine"
|
38
|
+
|
39
|
+
|
40
|
+
def check_aws_sso() -> str:
|
41
|
+
"""Check AWS SSO status and return username."""
|
42
|
+
try:
|
43
|
+
sts = boto3.client("sts")
|
44
|
+
identity = sts.get_caller_identity()
|
45
|
+
# Parse username from assumed role ARN
|
46
|
+
# Format: arn:aws:sts::123456789012:assumed-role/AWSReservedSSO_DeveloperAccess_xxxx/username
|
47
|
+
arn = identity["Arn"]
|
48
|
+
if "assumed-role" in arn:
|
49
|
+
username = arn.split("/")[-1]
|
50
|
+
return username
|
51
|
+
else:
|
52
|
+
# Fallback for other auth methods
|
53
|
+
return identity["UserId"].split(":")[-1]
|
54
|
+
except (NoCredentialsError, ClientError):
|
55
|
+
console.print("[red]❌ Not logged in to AWS SSO[/red]")
|
56
|
+
console.print("Please run: [cyan]aws sso login[/cyan]")
|
57
|
+
if Confirm.ask("Would you like to login now?"):
|
58
|
+
try:
|
59
|
+
result = subprocess.run(
|
60
|
+
["aws", "sso", "login"],
|
61
|
+
capture_output=True,
|
62
|
+
text=True,
|
63
|
+
check=True,
|
64
|
+
)
|
65
|
+
if result.returncode == 0:
|
66
|
+
console.print("[green]✓ Successfully logged in![/green]")
|
67
|
+
return check_aws_sso()
|
68
|
+
except subprocess.CalledProcessError as e:
|
69
|
+
console.print(f"[red]Login failed: {e}[/red]")
|
70
|
+
raise typer.Exit(1)
|
71
|
+
|
72
|
+
|
73
|
+
def get_api_url() -> str:
|
74
|
+
"""Get Studio Manager API URL from SSM Parameter Store."""
|
75
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
76
|
+
try:
|
77
|
+
response = ssm.get_parameter(Name="/dev/studio-manager/api-url")
|
78
|
+
return response["Parameter"]["Value"]
|
79
|
+
except ClientError as e:
|
80
|
+
if e.response["Error"]["Code"] == "ParameterNotFound":
|
81
|
+
console.print(
|
82
|
+
"[red]❌ API URL parameter not found in SSM Parameter Store[/red]"
|
83
|
+
)
|
84
|
+
console.print(
|
85
|
+
"Please ensure the Studio Manager infrastructure is deployed."
|
86
|
+
)
|
87
|
+
else:
|
88
|
+
console.print(f"[red]❌ Error retrieving API URL: {e}[/red]")
|
89
|
+
raise typer.Exit(1)
|
90
|
+
|
91
|
+
|
92
|
+
def make_api_request(
|
93
|
+
method: str, endpoint: str, json_data: Optional[Dict] = None, params: Optional[Dict] = None
|
94
|
+
) -> requests.Response:
|
95
|
+
"""Make an API request with error handling."""
|
96
|
+
api_url = get_api_url()
|
97
|
+
url = f"{api_url}{endpoint}"
|
98
|
+
|
99
|
+
try:
|
100
|
+
if method == "GET":
|
101
|
+
response = requests.get(url, params=params)
|
102
|
+
elif method == "POST":
|
103
|
+
response = requests.post(url, json=json_data)
|
104
|
+
elif method == "DELETE":
|
105
|
+
response = requests.delete(url)
|
106
|
+
else:
|
107
|
+
raise ValueError(f"Unsupported HTTP method: {method}")
|
108
|
+
|
109
|
+
return response
|
110
|
+
except requests.exceptions.RequestException as e:
|
111
|
+
console.print(f"[red]❌ API request failed: {e}[/red]")
|
112
|
+
raise typer.Exit(1)
|
113
|
+
|
114
|
+
|
115
|
+
def format_duration(duration: timedelta) -> str:
|
116
|
+
"""Format a duration as a human-readable string."""
|
117
|
+
total_seconds = int(duration.total_seconds())
|
118
|
+
hours = total_seconds // 3600
|
119
|
+
minutes = (total_seconds % 3600) // 60
|
120
|
+
|
121
|
+
if hours > 0:
|
122
|
+
return f"{hours}h {minutes}m"
|
123
|
+
else:
|
124
|
+
return f"{minutes}m"
|
125
|
+
|
126
|
+
|
127
|
+
def parse_launch_time(launch_time_str: str) -> datetime:
|
128
|
+
"""Parse launch time from API response."""
|
129
|
+
# Try different datetime formats
|
130
|
+
formats = [
|
131
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
132
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
133
|
+
"%Y-%m-%d %H:%M:%S",
|
134
|
+
]
|
135
|
+
for fmt in formats:
|
136
|
+
try:
|
137
|
+
return datetime.strptime(launch_time_str, fmt)
|
138
|
+
except ValueError:
|
139
|
+
continue
|
140
|
+
# Fallback: assume it's recent
|
141
|
+
return datetime.utcnow()
|
142
|
+
|
143
|
+
|
144
|
+
def format_status(state: str, ready: Optional[bool]) -> str:
|
145
|
+
"""Format engine status with ready indicator."""
|
146
|
+
if state.lower() == "running":
|
147
|
+
if ready is True:
|
148
|
+
return "[green]Running ✓[/green]"
|
149
|
+
elif ready is False:
|
150
|
+
return "[yellow]Running ⚠ (Initializing...)[/yellow]"
|
151
|
+
else:
|
152
|
+
return "[green]Running[/green]"
|
153
|
+
elif state.lower() == "stopped":
|
154
|
+
return "[dim]Stopped[/dim]"
|
155
|
+
elif state.lower() == "stopping":
|
156
|
+
return "[yellow]Stopping...[/yellow]"
|
157
|
+
elif state.lower() == "pending":
|
158
|
+
return "[yellow]Starting...[/yellow]"
|
159
|
+
else:
|
160
|
+
return state
|
161
|
+
|
162
|
+
|
163
|
+
def resolve_engine(name_or_id: str, engines: List[Dict]) -> Dict:
|
164
|
+
"""Resolve engine by name or ID with interactive selection."""
|
165
|
+
# Exact ID match
|
166
|
+
exact_id = [e for e in engines if e["instance_id"] == name_or_id]
|
167
|
+
if exact_id:
|
168
|
+
return exact_id[0]
|
169
|
+
|
170
|
+
# Exact name match
|
171
|
+
exact_name = [e for e in engines if e["name"] == name_or_id]
|
172
|
+
if len(exact_name) == 1:
|
173
|
+
return exact_name[0]
|
174
|
+
|
175
|
+
# Prefix matches
|
176
|
+
matches = [
|
177
|
+
e
|
178
|
+
for e in engines
|
179
|
+
if e["name"].startswith(name_or_id) or e["instance_id"].startswith(name_or_id)
|
180
|
+
]
|
181
|
+
|
182
|
+
if len(matches) == 0:
|
183
|
+
console.print(f"[red]❌ No engine found matching '{name_or_id}'[/red]")
|
184
|
+
raise typer.Exit(1)
|
185
|
+
elif len(matches) == 1:
|
186
|
+
return matches[0]
|
187
|
+
else:
|
188
|
+
# Interactive selection
|
189
|
+
console.print(f"Multiple engines match '{name_or_id}':")
|
190
|
+
for i, engine in enumerate(matches, 1):
|
191
|
+
cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
192
|
+
console.print(
|
193
|
+
f" {i}. [cyan]{engine['name']}[/cyan] ({engine['instance_id']}) "
|
194
|
+
f"- {engine['engine_type']} - {engine['state']} - ${cost:.2f}/hr"
|
195
|
+
)
|
196
|
+
|
197
|
+
while True:
|
198
|
+
try:
|
199
|
+
choice = IntPrompt.ask(
|
200
|
+
"Select engine", default=1, choices=[str(i) for i in range(1, len(matches) + 1)]
|
201
|
+
)
|
202
|
+
return matches[choice - 1]
|
203
|
+
except (ValueError, IndexError):
|
204
|
+
console.print("[red]Invalid selection, please try again[/red]")
|
205
|
+
|
206
|
+
|
207
|
+
def get_ssh_public_key() -> str:
|
208
|
+
"""Get the user's SSH public key."""
|
209
|
+
home = Path.home()
|
210
|
+
key_paths = [home / ".ssh" / "id_ed25519.pub", home / ".ssh" / "id_rsa.pub"]
|
211
|
+
|
212
|
+
for key_path in key_paths:
|
213
|
+
if key_path.is_file():
|
214
|
+
return key_path.read_text().strip()
|
215
|
+
|
216
|
+
raise FileNotFoundError(
|
217
|
+
"No SSH public key found. Please create one with 'ssh-keygen' first."
|
218
|
+
)
|
219
|
+
|
220
|
+
|
221
|
+
def update_ssh_config_entry(engine_name: str, instance_id: str, username: str):
|
222
|
+
"""Add or update a single SSH config entry."""
|
223
|
+
config_path = Path.home() / ".ssh" / "config"
|
224
|
+
config_path.parent.mkdir(mode=0o700, exist_ok=True)
|
225
|
+
|
226
|
+
# Touch the file if it doesn't exist
|
227
|
+
if not config_path.exists():
|
228
|
+
config_path.touch(mode=0o600)
|
229
|
+
|
230
|
+
# Read existing config
|
231
|
+
content = config_path.read_text()
|
232
|
+
|
233
|
+
# Create new entry
|
234
|
+
new_entry = f"""
|
235
|
+
Host {engine_name} {SSH_MANAGED_COMMENT}
|
236
|
+
HostName {instance_id}
|
237
|
+
User {username}
|
238
|
+
ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
|
239
|
+
"""
|
240
|
+
|
241
|
+
# Check if entry already exists
|
242
|
+
host_line = f"Host {engine_name} {SSH_MANAGED_COMMENT}"
|
243
|
+
if host_line in content:
|
244
|
+
# Update existing entry
|
245
|
+
lines = content.splitlines()
|
246
|
+
new_lines = []
|
247
|
+
skip_count = 0
|
248
|
+
for line in lines:
|
249
|
+
if line.strip() == host_line.strip():
|
250
|
+
new_lines.extend(new_entry.strip().splitlines())
|
251
|
+
skip_count = 4 # Skip the next 4 lines (old entry)
|
252
|
+
elif skip_count > 0:
|
253
|
+
skip_count -= 1
|
254
|
+
continue
|
255
|
+
else:
|
256
|
+
new_lines.append(line)
|
257
|
+
content = "\n".join(new_lines)
|
258
|
+
else:
|
259
|
+
# Append new entry
|
260
|
+
content = content.rstrip() + "\n" + new_entry
|
261
|
+
|
262
|
+
# Write back
|
263
|
+
config_path.write_text(content)
|
264
|
+
config_path.chmod(0o600)
|
265
|
+
|
266
|
+
|
267
|
+
# ==================== ENGINE COMMANDS ====================
|
268
|
+
|
269
|
+
|
270
|
+
@engine_app.command("launch")
|
271
|
+
def launch_engine(
|
272
|
+
name: str = typer.Argument(help="Name for the new engine"),
|
273
|
+
engine_type: str = typer.Option(
|
274
|
+
"cpu",
|
275
|
+
"--type",
|
276
|
+
"-t",
|
277
|
+
help="Engine type: cpu, cpumax, t4, a10g, a100",
|
278
|
+
),
|
279
|
+
user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
|
280
|
+
):
|
281
|
+
"""Launch a new engine instance."""
|
282
|
+
username = check_aws_sso()
|
283
|
+
if user:
|
284
|
+
username = user
|
285
|
+
|
286
|
+
# Validate engine type
|
287
|
+
valid_types = ["cpu", "cpumax", "t4", "a10g", "a100"]
|
288
|
+
if engine_type not in valid_types:
|
289
|
+
console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
|
290
|
+
console.print(f"Valid types: {', '.join(valid_types)}")
|
291
|
+
raise typer.Exit(1)
|
292
|
+
|
293
|
+
cost = HOURLY_COSTS.get(engine_type, 0)
|
294
|
+
console.print(f"Launching [cyan]{name}[/cyan] ({engine_type}) for ${cost:.2f}/hour...")
|
295
|
+
|
296
|
+
with Progress(
|
297
|
+
SpinnerColumn(),
|
298
|
+
TextColumn("[progress.description]{task.description}"),
|
299
|
+
transient=True,
|
300
|
+
) as progress:
|
301
|
+
progress.add_task("Creating engine...", total=None)
|
302
|
+
|
303
|
+
response = make_api_request(
|
304
|
+
"POST",
|
305
|
+
"/engines",
|
306
|
+
json_data={"name": name, "user": username, "engine_type": engine_type},
|
307
|
+
)
|
308
|
+
|
309
|
+
if response.status_code == 201:
|
310
|
+
data = response.json()
|
311
|
+
console.print(f"[green]✓ Engine launched successfully![/green]")
|
312
|
+
console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
|
313
|
+
console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
|
314
|
+
console.print("\nThe engine is initializing. This may take a few minutes.")
|
315
|
+
console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
|
316
|
+
else:
|
317
|
+
error = response.json().get("error", "Unknown error")
|
318
|
+
console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
|
319
|
+
raise typer.Exit(1)
|
320
|
+
|
321
|
+
|
322
|
+
@engine_app.command("list")
|
323
|
+
def list_engines(
|
324
|
+
user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
|
325
|
+
all_users: bool = typer.Option(False, "--all", "-a", help="Show all users' engines"),
|
326
|
+
running_only: bool = typer.Option(False, "--running", help="Show only running engines"),
|
327
|
+
stopped_only: bool = typer.Option(False, "--stopped", help="Show only stopped engines"),
|
328
|
+
):
|
329
|
+
"""List all engines."""
|
330
|
+
current_user = check_aws_sso()
|
331
|
+
|
332
|
+
params = {}
|
333
|
+
if user:
|
334
|
+
params["user"] = user
|
335
|
+
|
336
|
+
response = make_api_request("GET", "/engines", params=params)
|
337
|
+
|
338
|
+
if response.status_code == 200:
|
339
|
+
data = response.json()
|
340
|
+
engines = data.get("engines", [])
|
341
|
+
|
342
|
+
# Filter by state if requested
|
343
|
+
if running_only:
|
344
|
+
engines = [e for e in engines if e["state"].lower() == "running"]
|
345
|
+
elif stopped_only:
|
346
|
+
engines = [e for e in engines if e["state"].lower() == "stopped"]
|
347
|
+
|
348
|
+
if not engines:
|
349
|
+
console.print("No engines found.")
|
350
|
+
return
|
351
|
+
|
352
|
+
# Create table
|
353
|
+
table = Table(title="Engines", box=box.ROUNDED)
|
354
|
+
table.add_column("Name", style="cyan")
|
355
|
+
table.add_column("Instance ID", style="dim")
|
356
|
+
table.add_column("Type")
|
357
|
+
table.add_column("User")
|
358
|
+
table.add_column("Status")
|
359
|
+
table.add_column("Uptime/Since")
|
360
|
+
table.add_column("$/hour", justify="right")
|
361
|
+
table.add_column("Cost Today", justify="right", style="yellow")
|
362
|
+
|
363
|
+
total_cost = 0.0
|
364
|
+
for engine in engines:
|
365
|
+
launch_time = parse_launch_time(engine["launch_time"])
|
366
|
+
uptime = datetime.utcnow() - launch_time
|
367
|
+
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
368
|
+
|
369
|
+
if engine["state"].lower() == "running":
|
370
|
+
daily_cost = hourly_cost * min(uptime.total_seconds() / 3600, 24)
|
371
|
+
total_cost += daily_cost
|
372
|
+
time_str = format_duration(uptime)
|
373
|
+
else:
|
374
|
+
daily_cost = 0
|
375
|
+
time_str = launch_time.strftime("%Y-%m-%d %H:%M")
|
376
|
+
|
377
|
+
table.add_row(
|
378
|
+
engine["name"],
|
379
|
+
engine["instance_id"],
|
380
|
+
engine["engine_type"],
|
381
|
+
engine["user"],
|
382
|
+
format_status(engine["state"], engine.get("ready")),
|
383
|
+
time_str,
|
384
|
+
f"${hourly_cost:.2f}",
|
385
|
+
f"${daily_cost:.2f}" if daily_cost > 0 else "-",
|
386
|
+
)
|
387
|
+
|
388
|
+
console.print(table)
|
389
|
+
|
390
|
+
if total_cost > 0:
|
391
|
+
console.print(f"\n[yellow]Total cost today: ${total_cost:.2f}[/yellow]")
|
392
|
+
else:
|
393
|
+
error = response.json().get("error", "Unknown error")
|
394
|
+
console.print(f"[red]❌ Failed to list engines: {error}[/red]")
|
395
|
+
|
396
|
+
|
397
|
+
@engine_app.command("status")
|
398
|
+
def engine_status(
|
399
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
400
|
+
):
|
401
|
+
"""Show detailed status of an engine."""
|
402
|
+
check_aws_sso()
|
403
|
+
|
404
|
+
# Get all engines to resolve name
|
405
|
+
response = make_api_request("GET", "/engines")
|
406
|
+
if response.status_code != 200:
|
407
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
408
|
+
raise typer.Exit(1)
|
409
|
+
|
410
|
+
engines = response.json().get("engines", [])
|
411
|
+
engine = resolve_engine(name_or_id, engines)
|
412
|
+
|
413
|
+
# Get attached studios info
|
414
|
+
response = make_api_request("GET", f"/engines/{engine['instance_id']}/attached-studios")
|
415
|
+
attached_studios = []
|
416
|
+
if response.status_code == 200:
|
417
|
+
attached_studios = response.json().get("studios", [])
|
418
|
+
|
419
|
+
# Calculate costs
|
420
|
+
launch_time = parse_launch_time(engine["launch_time"])
|
421
|
+
uptime = datetime.utcnow() - launch_time
|
422
|
+
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
423
|
+
total_cost = hourly_cost * (uptime.total_seconds() / 3600)
|
424
|
+
|
425
|
+
# Create status panel
|
426
|
+
status_lines = [
|
427
|
+
f"[bold]Name:[/bold] {engine['name']}",
|
428
|
+
f"[bold]Instance:[/bold] {engine['instance_id']}",
|
429
|
+
f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
|
430
|
+
f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
|
431
|
+
f"[bold]User:[/bold] {engine['user']}",
|
432
|
+
f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
|
433
|
+
f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
|
434
|
+
f"[bold]Cost:[/bold] ${hourly_cost:.2f}/hour (${total_cost:.2f} total)",
|
435
|
+
]
|
436
|
+
|
437
|
+
if attached_studios:
|
438
|
+
status_lines.append("")
|
439
|
+
status_lines.append("[bold]Attached Studios:[/bold]")
|
440
|
+
for studio in attached_studios:
|
441
|
+
attach_time = studio.get("attach_time", "Unknown")
|
442
|
+
status_lines.append(
|
443
|
+
f" • {studio['user']} ({studio['studio_id']}) - attached {attach_time}"
|
444
|
+
)
|
445
|
+
|
446
|
+
panel = Panel(
|
447
|
+
"\n".join(status_lines),
|
448
|
+
title="Engine Details",
|
449
|
+
border_style="blue",
|
450
|
+
)
|
451
|
+
console.print(panel)
|
452
|
+
|
453
|
+
|
454
|
+
@engine_app.command("stop")
|
455
|
+
def stop_engine(
|
456
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
457
|
+
force: bool = typer.Option(False, "--force", "-f", help="Force stop and detach all studios"),
|
458
|
+
):
|
459
|
+
"""Stop an engine."""
|
460
|
+
check_aws_sso()
|
461
|
+
|
462
|
+
# Get all engines to resolve name
|
463
|
+
response = make_api_request("GET", "/engines")
|
464
|
+
if response.status_code != 200:
|
465
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
466
|
+
raise typer.Exit(1)
|
467
|
+
|
468
|
+
engines = response.json().get("engines", [])
|
469
|
+
engine = resolve_engine(name_or_id, engines)
|
470
|
+
|
471
|
+
console.print(f"Stopping engine [cyan]{engine['name']}[/cyan]...")
|
472
|
+
|
473
|
+
# First attempt without detaching
|
474
|
+
response = make_api_request(
|
475
|
+
"POST",
|
476
|
+
f"/engines/{engine['instance_id']}/stop",
|
477
|
+
json_data={"detach_studios": force},
|
478
|
+
)
|
479
|
+
|
480
|
+
if response.status_code == 409 and not force:
|
481
|
+
# Engine has attached studios
|
482
|
+
data = response.json()
|
483
|
+
attached_studios = data.get("attached_studios", [])
|
484
|
+
|
485
|
+
console.print("\n[yellow]⚠️ This engine has attached studios:[/yellow]")
|
486
|
+
for studio in attached_studios:
|
487
|
+
console.print(f" • {studio['user']} ({studio['studio_id']})")
|
488
|
+
|
489
|
+
if Confirm.ask("\nDetach all studios and stop the engine?"):
|
490
|
+
response = make_api_request(
|
491
|
+
"POST",
|
492
|
+
f"/engines/{engine['instance_id']}/stop",
|
493
|
+
json_data={"detach_studios": True},
|
494
|
+
)
|
495
|
+
else:
|
496
|
+
console.print("Stop cancelled.")
|
497
|
+
return
|
498
|
+
|
499
|
+
if response.status_code == 200:
|
500
|
+
console.print(f"[green]✓ Engine stopped successfully![/green]")
|
501
|
+
else:
|
502
|
+
error = response.json().get("error", "Unknown error")
|
503
|
+
console.print(f"[red]❌ Failed to stop engine: {error}[/red]")
|
504
|
+
|
505
|
+
|
506
|
+
@engine_app.command("start")
|
507
|
+
def start_engine(
|
508
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
509
|
+
):
|
510
|
+
"""Start a stopped engine."""
|
511
|
+
check_aws_sso()
|
512
|
+
|
513
|
+
# Get all engines to resolve name
|
514
|
+
response = make_api_request("GET", "/engines")
|
515
|
+
if response.status_code != 200:
|
516
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
517
|
+
raise typer.Exit(1)
|
518
|
+
|
519
|
+
engines = response.json().get("engines", [])
|
520
|
+
engine = resolve_engine(name_or_id, engines)
|
521
|
+
|
522
|
+
console.print(f"Starting engine [cyan]{engine['name']}[/cyan]...")
|
523
|
+
|
524
|
+
response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
|
525
|
+
|
526
|
+
if response.status_code == 200:
|
527
|
+
data = response.json()
|
528
|
+
console.print(f"[green]✓ Engine started successfully![/green]")
|
529
|
+
console.print(f"New public IP: {data.get('public_ip', 'Pending...')}")
|
530
|
+
else:
|
531
|
+
error = response.json().get("error", "Unknown error")
|
532
|
+
console.print(f"[red]❌ Failed to start engine: {error}[/red]")
|
533
|
+
|
534
|
+
|
535
|
+
@engine_app.command("terminate")
|
536
|
+
def terminate_engine(
|
537
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
538
|
+
):
|
539
|
+
"""Permanently terminate an engine."""
|
540
|
+
check_aws_sso()
|
541
|
+
|
542
|
+
# Get all engines to resolve name
|
543
|
+
response = make_api_request("GET", "/engines")
|
544
|
+
if response.status_code != 200:
|
545
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
546
|
+
raise typer.Exit(1)
|
547
|
+
|
548
|
+
engines = response.json().get("engines", [])
|
549
|
+
engine = resolve_engine(name_or_id, engines)
|
550
|
+
|
551
|
+
# Calculate cost
|
552
|
+
launch_time = parse_launch_time(engine["launch_time"])
|
553
|
+
uptime = datetime.utcnow() - launch_time
|
554
|
+
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
555
|
+
total_cost = hourly_cost * (uptime.total_seconds() / 3600)
|
556
|
+
|
557
|
+
console.print(f"\n[yellow]⚠️ This will permanently terminate engine '{engine['name']}'[/yellow]")
|
558
|
+
console.print(f"Total cost for this session: ${total_cost:.2f}")
|
559
|
+
|
560
|
+
if not Confirm.ask("\nAre you sure you want to terminate this engine?"):
|
561
|
+
console.print("Termination cancelled.")
|
562
|
+
return
|
563
|
+
|
564
|
+
response = make_api_request("DELETE", f"/engines/{engine['instance_id']}")
|
565
|
+
|
566
|
+
if response.status_code == 200:
|
567
|
+
console.print(f"[green]✓ Engine terminated successfully![/green]")
|
568
|
+
else:
|
569
|
+
error = response.json().get("error", "Unknown error")
|
570
|
+
console.print(f"[red]❌ Failed to terminate engine: {error}[/red]")
|
571
|
+
|
572
|
+
|
573
|
+
@engine_app.command("ssh")
|
574
|
+
def ssh_engine(
|
575
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
576
|
+
):
|
577
|
+
"""Connect to an engine via SSH."""
|
578
|
+
username = check_aws_sso()
|
579
|
+
|
580
|
+
# Get all engines to resolve name
|
581
|
+
response = make_api_request("GET", "/engines")
|
582
|
+
if response.status_code != 200:
|
583
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
584
|
+
raise typer.Exit(1)
|
585
|
+
|
586
|
+
engines = response.json().get("engines", [])
|
587
|
+
engine = resolve_engine(name_or_id, engines)
|
588
|
+
|
589
|
+
if engine["state"].lower() != "running":
|
590
|
+
console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
|
591
|
+
raise typer.Exit(1)
|
592
|
+
|
593
|
+
# Update SSH config
|
594
|
+
console.print(f"Updating SSH config for [cyan]{engine['name']}[/cyan]...")
|
595
|
+
update_ssh_config_entry(engine["name"], engine["instance_id"], username)
|
596
|
+
|
597
|
+
# Connect
|
598
|
+
console.print(f"[green]✓ Connecting to {engine['name']}...[/green]")
|
599
|
+
subprocess.run(["ssh", engine["name"]])
|
600
|
+
|
601
|
+
|
602
|
+
@engine_app.command("config-ssh")
|
603
|
+
def config_ssh(
|
604
|
+
clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
|
605
|
+
):
|
606
|
+
"""Update SSH config with all available engines."""
|
607
|
+
username = check_aws_sso()
|
608
|
+
|
609
|
+
if clean:
|
610
|
+
console.print("Removing all managed SSH entries...")
|
611
|
+
else:
|
612
|
+
console.print("Updating SSH config with all running engines...")
|
613
|
+
|
614
|
+
# Get all engines
|
615
|
+
response = make_api_request("GET", "/engines")
|
616
|
+
if response.status_code != 200:
|
617
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
618
|
+
raise typer.Exit(1)
|
619
|
+
|
620
|
+
engines = response.json().get("engines", [])
|
621
|
+
running_engines = [e for e in engines if e["state"].lower() == "running"]
|
622
|
+
|
623
|
+
# Read existing config
|
624
|
+
config_path = Path.home() / ".ssh" / "config"
|
625
|
+
config_path.parent.mkdir(mode=0o700, exist_ok=True)
|
626
|
+
|
627
|
+
if config_path.exists():
|
628
|
+
content = config_path.read_text()
|
629
|
+
lines = content.splitlines()
|
630
|
+
else:
|
631
|
+
content = ""
|
632
|
+
lines = []
|
633
|
+
|
634
|
+
# Remove old managed entries
|
635
|
+
new_lines = []
|
636
|
+
skip_until_next_host = False
|
637
|
+
for line in lines:
|
638
|
+
if SSH_MANAGED_COMMENT in line:
|
639
|
+
skip_until_next_host = True
|
640
|
+
elif line.strip().startswith("Host ") and skip_until_next_host:
|
641
|
+
skip_until_next_host = False
|
642
|
+
# Check if this is a managed host
|
643
|
+
if SSH_MANAGED_COMMENT not in line:
|
644
|
+
new_lines.append(line)
|
645
|
+
elif not skip_until_next_host:
|
646
|
+
new_lines.append(line)
|
647
|
+
|
648
|
+
# Add new entries if not cleaning
|
649
|
+
if not clean:
|
650
|
+
for engine in running_engines:
|
651
|
+
new_lines.extend(
|
652
|
+
[
|
653
|
+
"",
|
654
|
+
f"Host {engine['name']} {SSH_MANAGED_COMMENT}",
|
655
|
+
f" HostName {engine['instance_id']}",
|
656
|
+
f" User {username}",
|
657
|
+
f' ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters \'portNumber=%p\'"',
|
658
|
+
]
|
659
|
+
)
|
660
|
+
|
661
|
+
# Write back
|
662
|
+
config_path.write_text("\n".join(new_lines))
|
663
|
+
config_path.chmod(0o600)
|
664
|
+
|
665
|
+
if clean:
|
666
|
+
console.print("[green]✓ Removed all managed SSH entries[/green]")
|
667
|
+
else:
|
668
|
+
console.print(
|
669
|
+
f"[green]✓ Updated SSH config with {len(running_engines)} engines[/green]"
|
670
|
+
)
|
671
|
+
for engine in running_engines:
|
672
|
+
console.print(f" • {engine['name']} → {engine['instance_id']}")
|
673
|
+
|
674
|
+
|
675
|
+
@engine_app.command("keep-awake")
|
676
|
+
def keep_awake(
|
677
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
678
|
+
duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 4h30m)"),
|
679
|
+
):
|
680
|
+
"""Prevent an engine from auto-shutting down."""
|
681
|
+
username = check_aws_sso()
|
682
|
+
|
683
|
+
# Parse duration
|
684
|
+
import re
|
685
|
+
match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
|
686
|
+
if not match or (not match.group(1) and not match.group(2)):
|
687
|
+
console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
|
688
|
+
console.print("Use format like: 4h, 30m, 2h30m")
|
689
|
+
raise typer.Exit(1)
|
690
|
+
|
691
|
+
hours = int(match.group(1) or 0)
|
692
|
+
minutes = int(match.group(2) or 0)
|
693
|
+
total_minutes = hours * 60 + minutes
|
694
|
+
|
695
|
+
# Get all engines to resolve name
|
696
|
+
response = make_api_request("GET", "/engines")
|
697
|
+
if response.status_code != 200:
|
698
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
699
|
+
raise typer.Exit(1)
|
700
|
+
|
701
|
+
engines = response.json().get("engines", [])
|
702
|
+
engine = resolve_engine(name_or_id, engines)
|
703
|
+
|
704
|
+
if engine["state"].lower() != "running":
|
705
|
+
console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
|
706
|
+
raise typer.Exit(1)
|
707
|
+
|
708
|
+
console.print(
|
709
|
+
f"Setting keep-awake for [cyan]{engine['name']}[/cyan] for {duration}..."
|
710
|
+
)
|
711
|
+
|
712
|
+
# Use SSM to run the engine keep-alive command
|
713
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
714
|
+
try:
|
715
|
+
response = ssm.send_command(
|
716
|
+
InstanceIds=[engine["instance_id"]],
|
717
|
+
DocumentName="AWS-RunShellScript",
|
718
|
+
Parameters={
|
719
|
+
"commands": [f"engine keep-alive {duration}"],
|
720
|
+
"executionTimeout": ["60"],
|
721
|
+
},
|
722
|
+
)
|
723
|
+
|
724
|
+
command_id = response["Command"]["CommandId"]
|
725
|
+
|
726
|
+
# Wait for command to complete
|
727
|
+
import time
|
728
|
+
for _ in range(10):
|
729
|
+
time.sleep(1)
|
730
|
+
result = ssm.get_command_invocation(
|
731
|
+
CommandId=command_id,
|
732
|
+
InstanceId=engine["instance_id"],
|
733
|
+
)
|
734
|
+
if result["Status"] in ["Success", "Failed"]:
|
735
|
+
break
|
736
|
+
|
737
|
+
if result["Status"] == "Success":
|
738
|
+
console.print(f"[green]✓ Engine will stay awake for {duration}[/green]")
|
739
|
+
console.print(
|
740
|
+
"\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
|
741
|
+
)
|
742
|
+
console.print(
|
743
|
+
"[dim]Use keep-awake for nohup operations or other background tasks.[/dim]"
|
744
|
+
)
|
745
|
+
else:
|
746
|
+
console.print(f"[red]❌ Failed to set keep-awake: {result.get('StatusDetails', 'Unknown error')}[/red]")
|
747
|
+
|
748
|
+
except ClientError as e:
|
749
|
+
console.print(f"[red]❌ Failed to set keep-awake: {e}[/red]")
|
750
|
+
|
751
|
+
|
752
|
+
@engine_app.command("cancel-keep-awake")
|
753
|
+
def cancel_keep_awake(
|
754
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
755
|
+
):
|
756
|
+
"""Cancel keep-awake and re-enable auto-shutdown."""
|
757
|
+
username = check_aws_sso()
|
758
|
+
|
759
|
+
# Get all engines to resolve name
|
760
|
+
response = make_api_request("GET", "/engines")
|
761
|
+
if response.status_code != 200:
|
762
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
763
|
+
raise typer.Exit(1)
|
764
|
+
|
765
|
+
engines = response.json().get("engines", [])
|
766
|
+
engine = resolve_engine(name_or_id, engines)
|
767
|
+
|
768
|
+
console.print(f"Cancelling keep-awake for [cyan]{engine['name']}[/cyan]...")
|
769
|
+
|
770
|
+
# Use SSM to run the engine cancel command
|
771
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
772
|
+
try:
|
773
|
+
response = ssm.send_command(
|
774
|
+
InstanceIds=[engine["instance_id"]],
|
775
|
+
DocumentName="AWS-RunShellScript",
|
776
|
+
Parameters={
|
777
|
+
"commands": ["engine cancel"],
|
778
|
+
"executionTimeout": ["60"],
|
779
|
+
},
|
780
|
+
)
|
781
|
+
|
782
|
+
command_id = response["Command"]["CommandId"]
|
783
|
+
|
784
|
+
# Wait for command to complete
|
785
|
+
import time
|
786
|
+
for _ in range(10):
|
787
|
+
time.sleep(1)
|
788
|
+
result = ssm.get_command_invocation(
|
789
|
+
CommandId=command_id,
|
790
|
+
InstanceId=engine["instance_id"],
|
791
|
+
)
|
792
|
+
if result["Status"] in ["Success", "Failed"]:
|
793
|
+
break
|
794
|
+
|
795
|
+
if result["Status"] == "Success":
|
796
|
+
console.print("[green]✓ Keep-awake cancelled, auto-shutdown re-enabled[/green]")
|
797
|
+
else:
|
798
|
+
console.print(f"[red]❌ Failed to cancel keep-awake: {result.get('StatusDetails', 'Unknown error')}[/red]")
|
799
|
+
|
800
|
+
except ClientError as e:
|
801
|
+
console.print(f"[red]❌ Failed to cancel keep-awake: {e}[/red]")
|
802
|
+
|
803
|
+
|
804
|
+
@engine_app.command("create-ami")
|
805
|
+
def create_ami(
|
806
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID to create AMI from"),
|
807
|
+
):
|
808
|
+
"""Create a Golden AMI from an engine."""
|
809
|
+
check_aws_sso()
|
810
|
+
|
811
|
+
# Get all engines to resolve name
|
812
|
+
response = make_api_request("GET", "/engines")
|
813
|
+
if response.status_code != 200:
|
814
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
815
|
+
raise typer.Exit(1)
|
816
|
+
|
817
|
+
engines = response.json().get("engines", [])
|
818
|
+
engine = resolve_engine(name_or_id, engines)
|
819
|
+
|
820
|
+
console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
|
821
|
+
|
822
|
+
# Get engine type from the engine
|
823
|
+
engine_type = engine["engine_type"]
|
824
|
+
|
825
|
+
# Generate AMI name
|
826
|
+
date_str = datetime.now().strftime("%Y%m%d")
|
827
|
+
ami_name = f"prewarmed-engine-{engine_type}-{date_str}"
|
828
|
+
ami_description = (
|
829
|
+
f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled dev container "
|
830
|
+
f"image for {engine_type} engines"
|
831
|
+
)
|
832
|
+
|
833
|
+
console.print(f"AMI Name: [cyan]{ami_name}[/cyan]")
|
834
|
+
console.print(f"Description: {ami_description}")
|
835
|
+
console.print("\n[yellow]⚠️ Important: This will reboot the engine to ensure a clean snapshot.[/yellow]")
|
836
|
+
|
837
|
+
if not Confirm.ask("\nContinue with AMI creation?"):
|
838
|
+
console.print("AMI creation cancelled.")
|
839
|
+
return
|
840
|
+
|
841
|
+
# Create AMI using EC2 client
|
842
|
+
ec2 = boto3.client("ec2", region_name="us-east-1")
|
843
|
+
|
844
|
+
try:
|
845
|
+
# First, we need to clean up the sentinel file via SSM
|
846
|
+
console.print("Cleaning up bootstrap sentinel file...")
|
847
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
848
|
+
|
849
|
+
cleanup_response = ssm.send_command(
|
850
|
+
InstanceIds=[engine["instance_id"]],
|
851
|
+
DocumentName="AWS-RunShellScript",
|
852
|
+
Parameters={
|
853
|
+
"commands": [
|
854
|
+
"sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
|
855
|
+
"history -c",
|
856
|
+
"sudo rm -rf /tmp/* /var/log/*",
|
857
|
+
],
|
858
|
+
"executionTimeout": ["60"],
|
859
|
+
},
|
860
|
+
)
|
861
|
+
|
862
|
+
# Wait for cleanup to complete
|
863
|
+
import time
|
864
|
+
command_id = cleanup_response["Command"]["CommandId"]
|
865
|
+
for _ in range(10):
|
866
|
+
time.sleep(1)
|
867
|
+
result = ssm.get_command_invocation(
|
868
|
+
CommandId=command_id,
|
869
|
+
InstanceId=engine["instance_id"],
|
870
|
+
)
|
871
|
+
if result["Status"] in ["Success", "Failed"]:
|
872
|
+
break
|
873
|
+
|
874
|
+
if result["Status"] != "Success":
|
875
|
+
console.print("[yellow]⚠️ Warning: Cleanup command may have failed[/yellow]")
|
876
|
+
|
877
|
+
# Get instance details to find volumes to exclude
|
878
|
+
instances = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
|
879
|
+
instance = instances["Reservations"][0]["Instances"][0]
|
880
|
+
|
881
|
+
root_device = instance.get("RootDeviceName", "/dev/xvda")
|
882
|
+
block_mappings = instance.get("BlockDeviceMappings", [])
|
883
|
+
|
884
|
+
# Build exclusion list for non-root volumes
|
885
|
+
block_device_mappings = []
|
886
|
+
for mapping in block_mappings:
|
887
|
+
device_name = mapping.get("DeviceName", "")
|
888
|
+
if device_name != root_device:
|
889
|
+
block_device_mappings.append({"DeviceName": device_name, "NoDevice": ""})
|
890
|
+
console.print(f" Excluding volume at {device_name}")
|
891
|
+
|
892
|
+
# Create the AMI
|
893
|
+
with Progress(
|
894
|
+
SpinnerColumn(),
|
895
|
+
TextColumn("[progress.description]{task.description}"),
|
896
|
+
transient=True,
|
897
|
+
) as progress:
|
898
|
+
progress.add_task("Creating AMI (this will take several minutes)...", total=None)
|
899
|
+
|
900
|
+
create_params = {
|
901
|
+
"InstanceId": engine["instance_id"],
|
902
|
+
"Name": ami_name,
|
903
|
+
"Description": ami_description,
|
904
|
+
"NoReboot": False, # Important: reboot for clean snapshot
|
905
|
+
"TagSpecifications": [
|
906
|
+
{
|
907
|
+
"ResourceType": "image",
|
908
|
+
"Tags": [
|
909
|
+
{"Key": "Environment", "Value": "dev"},
|
910
|
+
{"Key": "Type", "Value": "golden-ami"},
|
911
|
+
{"Key": "EngineType", "Value": engine_type},
|
912
|
+
{"Key": "Name", "Value": ami_name},
|
913
|
+
],
|
914
|
+
}
|
915
|
+
],
|
916
|
+
}
|
917
|
+
|
918
|
+
if block_device_mappings:
|
919
|
+
create_params["BlockDeviceMappings"] = block_device_mappings
|
920
|
+
|
921
|
+
response = ec2.create_image(**create_params)
|
922
|
+
|
923
|
+
ami_id = response["ImageId"]
|
924
|
+
console.print(f"[green]✓ AMI creation initiated![/green]")
|
925
|
+
console.print(f"AMI ID: [cyan]{ami_id}[/cyan]")
|
926
|
+
console.print("\n[dim]The AMI creation process will continue in the background.[/dim]")
|
927
|
+
console.print("[dim]You can monitor progress in the EC2 Console under 'AMIs'.[/dim]")
|
928
|
+
console.print(
|
929
|
+
f"\nOnce complete, run [cyan]terraform apply[/cyan] in "
|
930
|
+
f"terraform/environments/dev to use the new AMI."
|
931
|
+
)
|
932
|
+
|
933
|
+
except ClientError as e:
|
934
|
+
console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
|
935
|
+
raise typer.Exit(1)
|
936
|
+
|
937
|
+
|
938
|
+
# ==================== STUDIO COMMANDS ====================
|
939
|
+
|
940
|
+
|
941
|
+
def get_user_studio(username: str) -> Optional[Dict]:
|
942
|
+
"""Get the current user's studio."""
|
943
|
+
response = make_api_request("GET", "/studios")
|
944
|
+
if response.status_code != 200:
|
945
|
+
return None
|
946
|
+
|
947
|
+
studios = response.json().get("studios", [])
|
948
|
+
user_studios = [s for s in studios if s["user"] == username]
|
949
|
+
|
950
|
+
return user_studios[0] if user_studios else None
|
951
|
+
|
952
|
+
|
953
|
+
@studio_app.command("create")
|
954
|
+
def create_studio(
|
955
|
+
size_gb: int = typer.Option(500, "--size", "-s", help="Studio size in GB"),
|
956
|
+
):
|
957
|
+
"""Create a new studio for the current user."""
|
958
|
+
username = check_aws_sso()
|
959
|
+
|
960
|
+
# Check if user already has a studio
|
961
|
+
existing = get_user_studio(username)
|
962
|
+
if existing:
|
963
|
+
console.print(f"[yellow]You already have a studio: {existing['studio_id']}[/yellow]")
|
964
|
+
return
|
965
|
+
|
966
|
+
console.print(f"Creating {size_gb}GB studio for user [cyan]{username}[/cyan]...")
|
967
|
+
|
968
|
+
with Progress(
|
969
|
+
SpinnerColumn(),
|
970
|
+
TextColumn("[progress.description]{task.description}"),
|
971
|
+
transient=True,
|
972
|
+
) as progress:
|
973
|
+
progress.add_task("Creating studio volume...", total=None)
|
974
|
+
|
975
|
+
response = make_api_request(
|
976
|
+
"POST",
|
977
|
+
"/studios",
|
978
|
+
json_data={"user": username, "size_gb": size_gb},
|
979
|
+
)
|
980
|
+
|
981
|
+
if response.status_code == 201:
|
982
|
+
data = response.json()
|
983
|
+
console.print(f"[green]✓ Studio created successfully![/green]")
|
984
|
+
console.print(f"Studio ID: [cyan]{data['studio_id']}[/cyan]")
|
985
|
+
console.print(f"Size: {data['size_gb']}GB")
|
986
|
+
console.print(f"\nNext step: [cyan]dh studio attach <engine-name>[/cyan]")
|
987
|
+
else:
|
988
|
+
error = response.json().get("error", "Unknown error")
|
989
|
+
console.print(f"[red]❌ Failed to create studio: {error}[/red]")
|
990
|
+
|
991
|
+
|
992
|
+
@studio_app.command("status")
|
993
|
+
def studio_status():
|
994
|
+
"""Show status of your studio."""
|
995
|
+
username = check_aws_sso()
|
996
|
+
|
997
|
+
studio = get_user_studio(username)
|
998
|
+
if not studio:
|
999
|
+
console.print("[yellow]You don't have a studio yet.[/yellow]")
|
1000
|
+
console.print("Create one with: [cyan]dh studio create[/cyan]")
|
1001
|
+
return
|
1002
|
+
|
1003
|
+
# Create status panel
|
1004
|
+
status_lines = [
|
1005
|
+
f"[bold]Studio ID:[/bold] {studio['studio_id']}",
|
1006
|
+
f"[bold]User:[/bold] {studio['user']}",
|
1007
|
+
f"[bold]Status:[/bold] {studio['status']}",
|
1008
|
+
f"[bold]Size:[/bold] {studio['size_gb']}GB",
|
1009
|
+
f"[bold]Created:[/bold] {studio['creation_date']}",
|
1010
|
+
]
|
1011
|
+
|
1012
|
+
if studio.get("attached_vm_id"):
|
1013
|
+
status_lines.append(f"[bold]Attached to:[/bold] {studio['attached_vm_id']}")
|
1014
|
+
|
1015
|
+
# Try to get engine details
|
1016
|
+
response = make_api_request("GET", "/engines")
|
1017
|
+
if response.status_code == 200:
|
1018
|
+
engines = response.json().get("engines", [])
|
1019
|
+
attached_engine = next(
|
1020
|
+
(e for e in engines if e["instance_id"] == studio["attached_vm_id"]),
|
1021
|
+
None
|
1022
|
+
)
|
1023
|
+
if attached_engine:
|
1024
|
+
status_lines.append(f"[bold]Engine Name:[/bold] {attached_engine['name']}")
|
1025
|
+
|
1026
|
+
panel = Panel(
|
1027
|
+
"\n".join(status_lines),
|
1028
|
+
title="Studio Details",
|
1029
|
+
border_style="blue",
|
1030
|
+
)
|
1031
|
+
console.print(panel)
|
1032
|
+
|
1033
|
+
|
1034
|
+
@studio_app.command("attach")
|
1035
|
+
def attach_studio(
|
1036
|
+
engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
1037
|
+
):
|
1038
|
+
"""Attach your studio to an engine."""
|
1039
|
+
username = check_aws_sso()
|
1040
|
+
|
1041
|
+
# Get user's studio
|
1042
|
+
studio = get_user_studio(username)
|
1043
|
+
if not studio:
|
1044
|
+
console.print("[yellow]You don't have a studio yet.[/yellow]")
|
1045
|
+
if Confirm.ask("Would you like to create one now?"):
|
1046
|
+
size = IntPrompt.ask("Studio size (GB)", default=500)
|
1047
|
+
response = make_api_request(
|
1048
|
+
"POST",
|
1049
|
+
"/studios",
|
1050
|
+
json_data={"user": username, "size_gb": size},
|
1051
|
+
)
|
1052
|
+
if response.status_code != 201:
|
1053
|
+
console.print("[red]❌ Failed to create studio[/red]")
|
1054
|
+
raise typer.Exit(1)
|
1055
|
+
studio = response.json()
|
1056
|
+
studio["studio_id"] = studio["studio_id"] # Normalize key
|
1057
|
+
else:
|
1058
|
+
raise typer.Exit(0)
|
1059
|
+
|
1060
|
+
# Check if already attached
|
1061
|
+
if studio.get("status") == "in-use":
|
1062
|
+
console.print(
|
1063
|
+
f"[yellow]Studio is already attached to {studio.get('attached_vm_id')}[/yellow]"
|
1064
|
+
)
|
1065
|
+
if not Confirm.ask("Detach and reattach to new engine?"):
|
1066
|
+
return
|
1067
|
+
# Detach first
|
1068
|
+
response = make_api_request("POST", f"/studios/{studio['studio_id']}/detach")
|
1069
|
+
if response.status_code != 200:
|
1070
|
+
console.print("[red]❌ Failed to detach studio[/red]")
|
1071
|
+
raise typer.Exit(1)
|
1072
|
+
|
1073
|
+
# Get all engines to resolve name
|
1074
|
+
response = make_api_request("GET", "/engines")
|
1075
|
+
if response.status_code != 200:
|
1076
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
1077
|
+
raise typer.Exit(1)
|
1078
|
+
|
1079
|
+
engines = response.json().get("engines", [])
|
1080
|
+
engine = resolve_engine(engine_name_or_id, engines)
|
1081
|
+
|
1082
|
+
if engine["state"].lower() != "running":
|
1083
|
+
console.print(f"[yellow]⚠️ Engine is {engine['state']}[/yellow]")
|
1084
|
+
if engine["state"].lower() == "stopped" and Confirm.ask("Start the engine first?"):
|
1085
|
+
response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
|
1086
|
+
if response.status_code != 200:
|
1087
|
+
console.print("[red]❌ Failed to start engine[/red]")
|
1088
|
+
raise typer.Exit(1)
|
1089
|
+
console.print("[green]✓ Engine started[/green]")
|
1090
|
+
console.print("Waiting for engine to be ready...")
|
1091
|
+
import time
|
1092
|
+
time.sleep(10)
|
1093
|
+
else:
|
1094
|
+
raise typer.Exit(1)
|
1095
|
+
|
1096
|
+
# Get SSH key
|
1097
|
+
try:
|
1098
|
+
public_key = get_ssh_public_key()
|
1099
|
+
except FileNotFoundError as e:
|
1100
|
+
console.print(f"[red]❌ {e}[/red]")
|
1101
|
+
raise typer.Exit(1)
|
1102
|
+
|
1103
|
+
console.print(f"Attaching studio to engine [cyan]{engine['name']}[/cyan]...")
|
1104
|
+
|
1105
|
+
with Progress(
|
1106
|
+
SpinnerColumn(),
|
1107
|
+
TextColumn("[progress.description]{task.description}"),
|
1108
|
+
transient=True,
|
1109
|
+
) as progress:
|
1110
|
+
task = progress.add_task("Attaching studio...", total=100)
|
1111
|
+
|
1112
|
+
response = make_api_request(
|
1113
|
+
"POST",
|
1114
|
+
f"/studios/{studio['studio_id']}/attach",
|
1115
|
+
json_data={
|
1116
|
+
"vm_id": engine["instance_id"],
|
1117
|
+
"user": username,
|
1118
|
+
"public_key": public_key,
|
1119
|
+
},
|
1120
|
+
)
|
1121
|
+
|
1122
|
+
progress.update(task, completed=100)
|
1123
|
+
|
1124
|
+
if response.status_code == 200:
|
1125
|
+
console.print(f"[green]✓ Studio attached successfully![/green]")
|
1126
|
+
|
1127
|
+
# Update SSH config
|
1128
|
+
update_ssh_config_entry(engine["name"], engine["instance_id"], username)
|
1129
|
+
console.print(f"[green]✓ SSH config updated[/green]")
|
1130
|
+
console.print(f"\nConnect with: [cyan]ssh {engine['name']}[/cyan]")
|
1131
|
+
console.print(f"Your files are at: [cyan]/studios/{username}[/cyan]")
|
1132
|
+
else:
|
1133
|
+
error = response.json().get("error", "Unknown error")
|
1134
|
+
console.print(f"[red]❌ Failed to attach studio: {error}[/red]")
|
1135
|
+
|
1136
|
+
|
1137
|
+
@studio_app.command("detach")
|
1138
|
+
def detach_studio():
|
1139
|
+
"""Detach your studio from its current engine."""
|
1140
|
+
username = check_aws_sso()
|
1141
|
+
|
1142
|
+
studio = get_user_studio(username)
|
1143
|
+
if not studio:
|
1144
|
+
console.print("[yellow]You don't have a studio.[/yellow]")
|
1145
|
+
return
|
1146
|
+
|
1147
|
+
if studio.get("status") != "in-use":
|
1148
|
+
console.print("[yellow]Your studio is not attached to any engine.[/yellow]")
|
1149
|
+
return
|
1150
|
+
|
1151
|
+
console.print(f"Detaching studio from {studio.get('attached_vm_id')}...")
|
1152
|
+
|
1153
|
+
response = make_api_request("POST", f"/studios/{studio['studio_id']}/detach")
|
1154
|
+
|
1155
|
+
if response.status_code == 200:
|
1156
|
+
console.print(f"[green]✓ Studio detached successfully![/green]")
|
1157
|
+
else:
|
1158
|
+
error = response.json().get("error", "Unknown error")
|
1159
|
+
console.print(f"[red]❌ Failed to detach studio: {error}[/red]")
|
1160
|
+
|
1161
|
+
|
1162
|
+
@studio_app.command("delete")
|
1163
|
+
def delete_studio():
|
1164
|
+
"""Delete your studio permanently."""
|
1165
|
+
username = check_aws_sso()
|
1166
|
+
|
1167
|
+
studio = get_user_studio(username)
|
1168
|
+
if not studio:
|
1169
|
+
console.print("[yellow]You don't have a studio to delete.[/yellow]")
|
1170
|
+
return
|
1171
|
+
|
1172
|
+
console.print("[red]⚠️ WARNING: This will permanently delete your studio and all data![/red]")
|
1173
|
+
console.print(f"Studio ID: {studio['studio_id']}")
|
1174
|
+
console.print(f"Size: {studio['size_gb']}GB")
|
1175
|
+
|
1176
|
+
# Multiple confirmations
|
1177
|
+
if not Confirm.ask("\nAre you sure you want to delete your studio?"):
|
1178
|
+
console.print("Deletion cancelled.")
|
1179
|
+
return
|
1180
|
+
|
1181
|
+
if not Confirm.ask("[red]This action cannot be undone. Continue?[/red]"):
|
1182
|
+
console.print("Deletion cancelled.")
|
1183
|
+
return
|
1184
|
+
|
1185
|
+
typed_confirm = Prompt.ask(
|
1186
|
+
'Type "DELETE" to confirm permanent deletion'
|
1187
|
+
)
|
1188
|
+
if typed_confirm != "DELETE":
|
1189
|
+
console.print("Deletion cancelled.")
|
1190
|
+
return
|
1191
|
+
|
1192
|
+
response = make_api_request("DELETE", f"/studios/{studio['studio_id']}")
|
1193
|
+
|
1194
|
+
if response.status_code == 200:
|
1195
|
+
console.print(f"[green]✓ Studio deleted successfully![/green]")
|
1196
|
+
else:
|
1197
|
+
error = response.json().get("error", "Unknown error")
|
1198
|
+
console.print(f"[red]❌ Failed to delete studio: {error}[/red]")
|
1199
|
+
|
1200
|
+
|
1201
|
+
@studio_app.command("list")
|
1202
|
+
def list_studios(
|
1203
|
+
all_users: bool = typer.Option(False, "--all", "-a", help="Show all users' studios"),
|
1204
|
+
):
|
1205
|
+
"""List studios."""
|
1206
|
+
username = check_aws_sso()
|
1207
|
+
|
1208
|
+
response = make_api_request("GET", "/studios")
|
1209
|
+
|
1210
|
+
if response.status_code == 200:
|
1211
|
+
studios = response.json().get("studios", [])
|
1212
|
+
|
1213
|
+
if not studios:
|
1214
|
+
console.print("No studios found.")
|
1215
|
+
return
|
1216
|
+
|
1217
|
+
# Create table
|
1218
|
+
table = Table(title="Studios", box=box.ROUNDED)
|
1219
|
+
table.add_column("Studio ID", style="cyan")
|
1220
|
+
table.add_column("User")
|
1221
|
+
table.add_column("Status")
|
1222
|
+
table.add_column("Size", justify="right")
|
1223
|
+
table.add_column("Attached To")
|
1224
|
+
table.add_column("Created")
|
1225
|
+
|
1226
|
+
for studio in studios:
|
1227
|
+
status_color = "green" if studio["status"] == "available" else "yellow"
|
1228
|
+
table.add_row(
|
1229
|
+
studio["studio_id"],
|
1230
|
+
studio["user"],
|
1231
|
+
f"[{status_color}]{studio['status']}[/{status_color}]",
|
1232
|
+
f"{studio['size_gb']}GB",
|
1233
|
+
studio.get("attached_vm_id", "-"),
|
1234
|
+
studio["creation_date"],
|
1235
|
+
)
|
1236
|
+
|
1237
|
+
console.print(table)
|
1238
|
+
else:
|
1239
|
+
error = response.json().get("error", "Unknown error")
|
1240
|
+
console.print(f"[red]❌ Failed to list studios: {error}[/red]")
|
1241
|
+
|
1242
|
+
|
1243
|
+
@studio_app.command("reset")
|
1244
|
+
def reset_studio():
|
1245
|
+
"""Reset a stuck studio (admin operation)."""
|
1246
|
+
username = check_aws_sso()
|
1247
|
+
|
1248
|
+
studio = get_user_studio(username)
|
1249
|
+
if not studio:
|
1250
|
+
console.print("[yellow]You don't have a studio.[/yellow]")
|
1251
|
+
return
|
1252
|
+
|
1253
|
+
console.print(f"[yellow]⚠️ This will force-reset your studio state[/yellow]")
|
1254
|
+
console.print(f"Current status: {studio['status']}")
|
1255
|
+
if studio.get("attached_vm_id"):
|
1256
|
+
console.print(f"Listed as attached to: {studio['attached_vm_id']}")
|
1257
|
+
|
1258
|
+
if not Confirm.ask("\nReset studio state?"):
|
1259
|
+
console.print("Reset cancelled.")
|
1260
|
+
return
|
1261
|
+
|
1262
|
+
# Direct DynamoDB update
|
1263
|
+
console.print("Resetting studio state...")
|
1264
|
+
|
1265
|
+
dynamodb = boto3.resource("dynamodb", region_name="us-east-1")
|
1266
|
+
table = dynamodb.Table("dev-studios")
|
1267
|
+
|
1268
|
+
try:
|
1269
|
+
# Check if volume is actually attached
|
1270
|
+
ec2 = boto3.client("ec2", region_name="us-east-1")
|
1271
|
+
volumes = ec2.describe_volumes(VolumeIds=[studio["studio_id"]])
|
1272
|
+
|
1273
|
+
if volumes["Volumes"]:
|
1274
|
+
volume = volumes["Volumes"][0]
|
1275
|
+
attachments = volume.get("Attachments", [])
|
1276
|
+
if attachments:
|
1277
|
+
console.print(
|
1278
|
+
f"[red]Volume is still attached to {attachments[0]['InstanceId']}![/red]"
|
1279
|
+
)
|
1280
|
+
if Confirm.ask("Force-detach the volume?"):
|
1281
|
+
ec2.detach_volume(
|
1282
|
+
VolumeId=studio["studio_id"],
|
1283
|
+
InstanceId=attachments[0]["InstanceId"],
|
1284
|
+
Force=True,
|
1285
|
+
)
|
1286
|
+
console.print("Waiting for volume to detach...")
|
1287
|
+
waiter = ec2.get_waiter("volume_available")
|
1288
|
+
waiter.wait(VolumeIds=[studio["studio_id"]])
|
1289
|
+
|
1290
|
+
# Reset in DynamoDB
|
1291
|
+
table.update_item(
|
1292
|
+
Key={"studio_id": studio["studio_id"]},
|
1293
|
+
UpdateExpression="SET #status = :status, attached_vm_id = :vm_id, attached_device = :device",
|
1294
|
+
ExpressionAttributeNames={"#status": "status"},
|
1295
|
+
ExpressionAttributeValues={
|
1296
|
+
":status": "available",
|
1297
|
+
":vm_id": None,
|
1298
|
+
":device": None,
|
1299
|
+
},
|
1300
|
+
)
|
1301
|
+
|
1302
|
+
console.print(f"[green]✓ Studio reset to available state![/green]")
|
1303
|
+
|
1304
|
+
except ClientError as e:
|
1305
|
+
console.print(f"[red]❌ Failed to reset studio: {e}[/red]")
|
dayhoff_tools/cli/main.py
CHANGED
@@ -4,6 +4,7 @@ import sys
|
|
4
4
|
|
5
5
|
import typer
|
6
6
|
from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
|
7
|
+
from dayhoff_tools.cli.engine_commands import engine_app, studio_app
|
7
8
|
from dayhoff_tools.cli.utility_commands import (
|
8
9
|
add_to_warehouse_typer,
|
9
10
|
build_and_upload_wheel,
|
@@ -38,6 +39,10 @@ app.command("wimport")(import_from_warehouse_typer)
|
|
38
39
|
app.add_typer(gcp_app, name="gcp", help="Manage GCP authentication and impersonation.")
|
39
40
|
app.add_typer(aws_app, name="aws", help="Manage AWS SSO authentication.")
|
40
41
|
|
42
|
+
# Engine and Studio commands
|
43
|
+
app.add_typer(engine_app, name="engine", help="Manage compute engines for development.")
|
44
|
+
app.add_typer(studio_app, name="studio", help="Manage persistent development studios.")
|
45
|
+
|
41
46
|
|
42
47
|
@app.command("wheel")
|
43
48
|
def build_and_upload_wheel_command(
|
@@ -3,7 +3,8 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
|
|
3
3
|
dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
|
4
4
|
dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
|
6
|
-
dayhoff_tools/cli/
|
6
|
+
dayhoff_tools/cli/engine_commands.py,sha256=uqS46dGMaF5UGNbIJ3G1oY3QxD38jUNHiBAfYMBpmf4,46268
|
7
|
+
dayhoff_tools/cli/main.py,sha256=rgeEHD9lJ8SBCR34BTLb7gVInHUUdmEBNXAJnq5yEU4,4795
|
7
8
|
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
8
9
|
dayhoff_tools/cli/utility_commands.py,sha256=qs8vH9TBFHsOPC3X8cU3qZigM3dDn-2Ytq4o_F2WubU,27874
|
9
10
|
dayhoff_tools/deployment/base.py,sha256=mYp560l6hSDFtyY2H42VoM8k9VUzfwuiyh9Knqpgc28,17441
|
@@ -26,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
26
27
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
27
28
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
28
29
|
dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
|
29
|
-
dayhoff_tools-1.3.
|
30
|
-
dayhoff_tools-1.3.
|
31
|
-
dayhoff_tools-1.3.
|
32
|
-
dayhoff_tools-1.3.
|
30
|
+
dayhoff_tools-1.3.3.dist-info/METADATA,sha256=mWVT1xFladhd-RuJOeESPAoFxe1FDKsJP-G9tTlO1yk,2842
|
31
|
+
dayhoff_tools-1.3.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
32
|
+
dayhoff_tools-1.3.3.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
33
|
+
dayhoff_tools-1.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|