dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/__init__.py +10 -0
- dayhoff_tools/cli/cloud_commands.py +179 -43
- dayhoff_tools/cli/engine1/__init__.py +323 -0
- dayhoff_tools/cli/engine1/engine_core.py +703 -0
- dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
- dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
- dayhoff_tools/cli/engine1/engine_management.py +505 -0
- dayhoff_tools/cli/engine1/shared.py +501 -0
- dayhoff_tools/cli/engine1/studio_commands.py +825 -0
- dayhoff_tools/cli/engines_studios/__init__.py +6 -0
- dayhoff_tools/cli/engines_studios/api_client.py +351 -0
- dayhoff_tools/cli/engines_studios/auth.py +144 -0
- dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
- dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
- dayhoff_tools/cli/engines_studios/progress.py +260 -0
- dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
- dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
- dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
- dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
- dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
- dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
- dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
- dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
- dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
- dayhoff_tools/cli/main.py +106 -7
- dayhoff_tools/cli/utility_commands.py +896 -179
- dayhoff_tools/deployment/base.py +70 -6
- dayhoff_tools/deployment/deploy_aws.py +165 -25
- dayhoff_tools/deployment/deploy_gcp.py +78 -5
- dayhoff_tools/deployment/deploy_utils.py +20 -7
- dayhoff_tools/deployment/job_runner.py +9 -4
- dayhoff_tools/deployment/processors.py +230 -418
- dayhoff_tools/deployment/swarm.py +47 -12
- dayhoff_tools/embedders.py +28 -26
- dayhoff_tools/fasta.py +181 -64
- dayhoff_tools/warehouse.py +268 -1
- {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
- dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
- {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
- dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
- {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,1151 @@
|
|
|
1
|
+
"""Engine CLI commands for engines_studios system."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from .api_client import StudioManagerClient
|
|
10
|
+
from .auth import check_aws_auth, detect_aws_environment, get_aws_username
|
|
11
|
+
from .progress import format_idle_state, format_time_ago, wait_with_progress
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _update_ssh_config_silent(client: StudioManagerClient, env: str) -> bool:
|
|
15
|
+
"""Update SSH config silently. Returns True if successful."""
|
|
16
|
+
ssh_config_path = os.path.expanduser("~/.ssh/config")
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
# Read existing config
|
|
20
|
+
if os.path.exists(ssh_config_path):
|
|
21
|
+
with open(ssh_config_path, "r") as f:
|
|
22
|
+
lines = f.readlines()
|
|
23
|
+
else:
|
|
24
|
+
lines = []
|
|
25
|
+
|
|
26
|
+
# Remove managed entries
|
|
27
|
+
managed_start = "# BEGIN DAYHOFF ENGINES\n"
|
|
28
|
+
managed_end = "# END DAYHOFF ENGINES\n"
|
|
29
|
+
|
|
30
|
+
new_lines = []
|
|
31
|
+
skip = False
|
|
32
|
+
for line in lines:
|
|
33
|
+
if line == managed_start:
|
|
34
|
+
skip = True
|
|
35
|
+
elif line == managed_end:
|
|
36
|
+
skip = False
|
|
37
|
+
continue
|
|
38
|
+
elif not skip:
|
|
39
|
+
new_lines.append(line)
|
|
40
|
+
|
|
41
|
+
# Get engines
|
|
42
|
+
result = client.list_engines()
|
|
43
|
+
engines = result.get("engines", [])
|
|
44
|
+
|
|
45
|
+
if not engines:
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
# Generate new entries
|
|
49
|
+
config_entries = [managed_start]
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
current_user = get_aws_username()
|
|
53
|
+
except RuntimeError:
|
|
54
|
+
# Not authenticated - can't determine user, skip filtering
|
|
55
|
+
current_user = None
|
|
56
|
+
|
|
57
|
+
for engine in engines:
|
|
58
|
+
user = engine.get("user", "unknown")
|
|
59
|
+
|
|
60
|
+
# Skip engines owned by other users (unless user is unknown or we can't determine current user)
|
|
61
|
+
if current_user and user != "unknown" and user != current_user:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
instance_id = engine.get("instance_id")
|
|
65
|
+
name = engine.get("name", instance_id)
|
|
66
|
+
state = engine.get("state", "unknown")
|
|
67
|
+
|
|
68
|
+
# Only add running engines
|
|
69
|
+
if state != "running":
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Map environment to AWS profile
|
|
73
|
+
profile = f"{env}-devaccess"
|
|
74
|
+
|
|
75
|
+
config_entries.append(f"\nHost {name}\n")
|
|
76
|
+
config_entries.append(f" HostName {instance_id}\n")
|
|
77
|
+
config_entries.append(f" User {user}\n")
|
|
78
|
+
config_entries.append(f" ForwardAgent yes\n")
|
|
79
|
+
config_entries.append(
|
|
80
|
+
f" ProxyCommand aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p' --profile {profile}\n"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
config_entries.append(managed_end)
|
|
84
|
+
|
|
85
|
+
# Write back
|
|
86
|
+
new_lines.extend(config_entries)
|
|
87
|
+
|
|
88
|
+
with open(ssh_config_path, "w") as f:
|
|
89
|
+
f.writelines(new_lines)
|
|
90
|
+
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
except Exception:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@click.group()
|
|
98
|
+
def engine_cli():
|
|
99
|
+
"""Manage engines."""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ============================================================================
|
|
104
|
+
# Lifecycle Management
|
|
105
|
+
# ============================================================================
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@engine_cli.command("launch")
|
|
109
|
+
@click.argument("name")
|
|
110
|
+
@click.option(
|
|
111
|
+
"--type",
|
|
112
|
+
"engine_type",
|
|
113
|
+
required=True,
|
|
114
|
+
type=click.Choice(
|
|
115
|
+
["cpu", "cpumax", "t4", "a10g", "a100", "4_t4", "8_t4", "4_a10g", "8_a10g"]
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
@click.option("--size", "boot_disk_size", type=int, help="Boot disk size in GB")
|
|
119
|
+
@click.option(
|
|
120
|
+
"--user",
|
|
121
|
+
default=None,
|
|
122
|
+
help="User to launch engine for (defaults to current user, use for testing/admin)",
|
|
123
|
+
)
|
|
124
|
+
@click.option(
|
|
125
|
+
"--no-wait", is_flag=True, help="Return immediately without waiting for readiness"
|
|
126
|
+
)
|
|
127
|
+
@click.option(
|
|
128
|
+
"--skip-ssh-config", is_flag=True, help="Don't automatically update SSH config"
|
|
129
|
+
)
|
|
130
|
+
@click.option(
|
|
131
|
+
"--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
|
|
132
|
+
)
|
|
133
|
+
@click.option(
|
|
134
|
+
"--env",
|
|
135
|
+
default=None,
|
|
136
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
137
|
+
)
|
|
138
|
+
def launch_engine(
|
|
139
|
+
name: str,
|
|
140
|
+
engine_type: str,
|
|
141
|
+
boot_disk_size: Optional[int],
|
|
142
|
+
yes: bool,
|
|
143
|
+
user: Optional[str],
|
|
144
|
+
no_wait: bool,
|
|
145
|
+
skip_ssh_config: bool,
|
|
146
|
+
env: Optional[str],
|
|
147
|
+
):
|
|
148
|
+
"""Launch a new engine for the current user (or specified user with --user flag)."""
|
|
149
|
+
|
|
150
|
+
# Check AWS auth first to provide clear error messages
|
|
151
|
+
check_aws_auth()
|
|
152
|
+
|
|
153
|
+
# Auto-detect environment if not specified
|
|
154
|
+
if env is None:
|
|
155
|
+
env = detect_aws_environment()
|
|
156
|
+
click.echo(f"🔍 Detected environment: {env}")
|
|
157
|
+
|
|
158
|
+
# Require confirmation for non-dev environments
|
|
159
|
+
if env != "dev" and not yes:
|
|
160
|
+
if not click.confirm(
|
|
161
|
+
f"⚠️ You are about to launch in {env.upper()}. Continue?"
|
|
162
|
+
):
|
|
163
|
+
click.echo("Cancelled")
|
|
164
|
+
raise click.Abort()
|
|
165
|
+
|
|
166
|
+
client = StudioManagerClient(environment=env)
|
|
167
|
+
|
|
168
|
+
# Get user (from flag or current AWS user)
|
|
169
|
+
if user is None:
|
|
170
|
+
try:
|
|
171
|
+
user = get_aws_username()
|
|
172
|
+
except RuntimeError as e:
|
|
173
|
+
click.echo(f"✗ {e}", err=True)
|
|
174
|
+
raise click.Abort()
|
|
175
|
+
|
|
176
|
+
click.echo(f"🚀 Launching {engine_type} engine '{name}' for {user}...")
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
# Launch the engine
|
|
180
|
+
engine = client.launch_engine(
|
|
181
|
+
name=name, user=user, engine_type=engine_type, boot_disk_size=boot_disk_size
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
engine_id = engine["instance_id"]
|
|
185
|
+
click.echo(f"✓ EC2 instance launched: {engine_id}")
|
|
186
|
+
|
|
187
|
+
if no_wait:
|
|
188
|
+
click.echo(f"\nEngine is initializing. Check status with:")
|
|
189
|
+
click.echo(f" dh engine status {name}")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# Wait for readiness with progress updates
|
|
193
|
+
click.echo(f"\n⏳ Waiting for engine to be ready (typically 2-3 minutes)...\n")
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
_final_status = wait_with_progress(
|
|
197
|
+
status_func=lambda: client.get_engine_readiness(engine_id),
|
|
198
|
+
is_complete_func=lambda s: s.get("ready", False),
|
|
199
|
+
label="Progress",
|
|
200
|
+
timeout_seconds=600,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
click.echo(f"\n✓ Engine ready!")
|
|
204
|
+
|
|
205
|
+
# Update SSH config unless skipped
|
|
206
|
+
if not skip_ssh_config:
|
|
207
|
+
if _update_ssh_config_silent(client, env):
|
|
208
|
+
click.echo("✓ SSH config updated")
|
|
209
|
+
|
|
210
|
+
click.echo(f"\nConnect with:")
|
|
211
|
+
click.echo(f" dh studio attach {name}")
|
|
212
|
+
click.echo(f" ssh {name}")
|
|
213
|
+
|
|
214
|
+
except TimeoutError:
|
|
215
|
+
click.echo(f"\n⚠ Engine is still initializing. Check status with:")
|
|
216
|
+
click.echo(f" dh engine status {name}")
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
error_msg = str(e)
|
|
220
|
+
|
|
221
|
+
# Check for quota/limit errors
|
|
222
|
+
if "VcpuLimitExceeded" in error_msg or "vCPU limit" in error_msg:
|
|
223
|
+
click.echo(f"✗ Failed to launch engine: vCPU quota exceeded", err=True)
|
|
224
|
+
click.echo("", err=True)
|
|
225
|
+
click.echo(
|
|
226
|
+
f"The {env} AWS account has insufficient vCPU quota for {engine_type} instances.",
|
|
227
|
+
err=True,
|
|
228
|
+
)
|
|
229
|
+
click.echo("", err=True)
|
|
230
|
+
click.echo("Solutions:", err=True)
|
|
231
|
+
click.echo(
|
|
232
|
+
" 1. Use a different instance type (e.g., --type cpu)", err=True
|
|
233
|
+
)
|
|
234
|
+
click.echo(" 2. Request a quota increase:", err=True)
|
|
235
|
+
click.echo(" • AWS Console → Service Quotas → Amazon EC2", err=True)
|
|
236
|
+
click.echo(" • Find quota for the instance family", err=True)
|
|
237
|
+
click.echo(
|
|
238
|
+
" • Request increase (typically approved within 24h)", err=True
|
|
239
|
+
)
|
|
240
|
+
click.echo("", err=True)
|
|
241
|
+
click.echo(
|
|
242
|
+
"For testing infrastructure, use CPU instances instead of GPU.",
|
|
243
|
+
err=True,
|
|
244
|
+
)
|
|
245
|
+
raise click.Abort()
|
|
246
|
+
|
|
247
|
+
# Check for insufficient capacity errors
|
|
248
|
+
if "InsufficientInstanceCapacity" in error_msg:
|
|
249
|
+
click.echo(
|
|
250
|
+
f"✗ Failed to launch engine: insufficient EC2 capacity", err=True
|
|
251
|
+
)
|
|
252
|
+
click.echo("", err=True)
|
|
253
|
+
click.echo(
|
|
254
|
+
f"AWS does not have available {engine_type} capacity in your region/AZ.",
|
|
255
|
+
err=True,
|
|
256
|
+
)
|
|
257
|
+
click.echo("", err=True)
|
|
258
|
+
click.echo("Solutions:", err=True)
|
|
259
|
+
click.echo(
|
|
260
|
+
" 1. Try again in a few minutes (capacity fluctuates)", err=True
|
|
261
|
+
)
|
|
262
|
+
click.echo(" 2. Use a different instance type", err=True)
|
|
263
|
+
click.echo(" 3. Contact AWS support for capacity reservations", err=True)
|
|
264
|
+
raise click.Abort()
|
|
265
|
+
|
|
266
|
+
# Check for instance limit errors
|
|
267
|
+
if (
|
|
268
|
+
"InstanceLimitExceeded" in error_msg
|
|
269
|
+
or "instance limit" in error_msg.lower()
|
|
270
|
+
):
|
|
271
|
+
click.echo(f"✗ Failed to launch engine: instance limit exceeded", err=True)
|
|
272
|
+
click.echo("", err=True)
|
|
273
|
+
click.echo(
|
|
274
|
+
f"You have reached the maximum number of running instances in {env}.",
|
|
275
|
+
err=True,
|
|
276
|
+
)
|
|
277
|
+
click.echo("", err=True)
|
|
278
|
+
click.echo("Solutions:", err=True)
|
|
279
|
+
click.echo(
|
|
280
|
+
" 1. Terminate unused engines: dh engine2 list --env {env}", err=True
|
|
281
|
+
)
|
|
282
|
+
click.echo(" 2. Request a limit increase via AWS Service Quotas", err=True)
|
|
283
|
+
raise click.Abort()
|
|
284
|
+
|
|
285
|
+
# Generic error
|
|
286
|
+
click.echo(f"✗ Failed to launch engine: {e}", err=True)
|
|
287
|
+
raise click.Abort()
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@engine_cli.command("start")
|
|
291
|
+
@click.argument("name_or_id")
|
|
292
|
+
@click.option(
|
|
293
|
+
"--no-wait", is_flag=True, help="Return immediately without waiting for readiness"
|
|
294
|
+
)
|
|
295
|
+
@click.option(
|
|
296
|
+
"--skip-ssh-config", is_flag=True, help="Don't automatically update SSH config"
|
|
297
|
+
)
|
|
298
|
+
@click.option(
|
|
299
|
+
"--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
|
|
300
|
+
)
|
|
301
|
+
@click.option(
|
|
302
|
+
"--env",
|
|
303
|
+
default=None,
|
|
304
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
305
|
+
)
|
|
306
|
+
def start_engine(
|
|
307
|
+
name_or_id: str, no_wait: bool, skip_ssh_config: bool, yes: bool, env: Optional[str]
|
|
308
|
+
):
|
|
309
|
+
"""Start a stopped engine."""
|
|
310
|
+
|
|
311
|
+
# Check AWS auth first to provide clear error messages
|
|
312
|
+
check_aws_auth()
|
|
313
|
+
|
|
314
|
+
# Auto-detect environment if not specified
|
|
315
|
+
if env is None:
|
|
316
|
+
env = detect_aws_environment()
|
|
317
|
+
click.echo(f"🔍 Detected environment: {env}")
|
|
318
|
+
|
|
319
|
+
# Require confirmation for non-dev environments
|
|
320
|
+
if env != "dev" and not yes:
|
|
321
|
+
if not click.confirm(
|
|
322
|
+
f"⚠️ You are about to operate in {env.upper()}. Continue?"
|
|
323
|
+
):
|
|
324
|
+
click.echo("Cancelled")
|
|
325
|
+
raise click.Abort()
|
|
326
|
+
|
|
327
|
+
client = StudioManagerClient(environment=env)
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
# Find engine
|
|
331
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
332
|
+
if not engine:
|
|
333
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
334
|
+
|
|
335
|
+
engine_id = engine["instance_id"]
|
|
336
|
+
engine_name = engine.get("name", engine_id)
|
|
337
|
+
|
|
338
|
+
result = client.start_engine(engine_id)
|
|
339
|
+
|
|
340
|
+
if "error" in result:
|
|
341
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
342
|
+
raise click.Abort()
|
|
343
|
+
|
|
344
|
+
click.echo(f"✓ Engine '{engine_name}' is starting")
|
|
345
|
+
|
|
346
|
+
if no_wait:
|
|
347
|
+
click.echo(f"\nCheck status with:")
|
|
348
|
+
click.echo(f" dh engine status {engine_name}")
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
# Wait for engine to be running and fully ready (including status checks)
|
|
352
|
+
click.echo(f"\n⏳ Waiting for engine to be ready...\n")
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
|
|
356
|
+
def check_engine_running():
|
|
357
|
+
"""Check if engine is running, status checks passed, and SSM is accessible."""
|
|
358
|
+
# Check EC2 state and status checks
|
|
359
|
+
instance_status = client.check_instance_status(engine_id)
|
|
360
|
+
if "error" in instance_status:
|
|
361
|
+
return {"ready": False, "progress_percent": 0}
|
|
362
|
+
|
|
363
|
+
state = instance_status.get("state", "unknown")
|
|
364
|
+
status_checks_passed = instance_status.get("reachable", False)
|
|
365
|
+
|
|
366
|
+
# Check SSM accessibility via idle state
|
|
367
|
+
engine_status = client.get_engine_status(engine_id)
|
|
368
|
+
ssm_working = (
|
|
369
|
+
not ("error" in engine_status)
|
|
370
|
+
and engine_status.get("idle_state") is not None
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Progress based on state and checks
|
|
374
|
+
if state == "pending":
|
|
375
|
+
progress = 30
|
|
376
|
+
elif state == "running" and not status_checks_passed:
|
|
377
|
+
# Running but status checks still initializing
|
|
378
|
+
progress = 60
|
|
379
|
+
elif state == "running" and status_checks_passed and not ssm_working:
|
|
380
|
+
# Status checks passed but SSM not yet responding
|
|
381
|
+
progress = 85
|
|
382
|
+
elif state == "running" and status_checks_passed and ssm_working:
|
|
383
|
+
# Fully ready
|
|
384
|
+
progress = 100
|
|
385
|
+
else:
|
|
386
|
+
progress = 10
|
|
387
|
+
|
|
388
|
+
# Ready when running AND status checks pass AND SSM works
|
|
389
|
+
ready = state == "running" and status_checks_passed and ssm_working
|
|
390
|
+
|
|
391
|
+
return {"ready": ready, "progress_percent": progress}
|
|
392
|
+
|
|
393
|
+
_final_status = wait_with_progress(
|
|
394
|
+
status_func=check_engine_running,
|
|
395
|
+
is_complete_func=lambda s: s.get("ready", False),
|
|
396
|
+
label="Starting",
|
|
397
|
+
timeout_seconds=300,
|
|
398
|
+
show_stages=False,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
click.echo(f"\n✓ Engine ready!")
|
|
402
|
+
|
|
403
|
+
# Update SSH config unless skipped
|
|
404
|
+
if not skip_ssh_config:
|
|
405
|
+
if _update_ssh_config_silent(client, env):
|
|
406
|
+
click.echo("✓ SSH config updated")
|
|
407
|
+
|
|
408
|
+
click.echo(f"\nConnect with:")
|
|
409
|
+
click.echo(f" dh studio attach {engine_name}")
|
|
410
|
+
click.echo(f" ssh {engine_name}")
|
|
411
|
+
|
|
412
|
+
except TimeoutError:
|
|
413
|
+
click.echo(f"\n⚠ Engine is still starting. Check status with:")
|
|
414
|
+
click.echo(f" dh engine status {engine_name}")
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
418
|
+
raise click.Abort()
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
@engine_cli.command("stop")
|
|
422
|
+
@click.argument("name_or_id")
|
|
423
|
+
@click.option(
|
|
424
|
+
"--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
|
|
425
|
+
)
|
|
426
|
+
@click.option(
|
|
427
|
+
"--env",
|
|
428
|
+
default=None,
|
|
429
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
430
|
+
)
|
|
431
|
+
def stop_engine(name_or_id: str, yes: bool, env: Optional[str]):
|
|
432
|
+
"""Stop a running engine."""
|
|
433
|
+
|
|
434
|
+
# Check AWS auth first to provide clear error messages
|
|
435
|
+
check_aws_auth()
|
|
436
|
+
|
|
437
|
+
# Auto-detect environment if not specified
|
|
438
|
+
if env is None:
|
|
439
|
+
env = detect_aws_environment()
|
|
440
|
+
click.echo(f"🔍 Detected environment: {env}")
|
|
441
|
+
|
|
442
|
+
# Require confirmation for non-dev environments
|
|
443
|
+
if env != "dev" and not yes:
|
|
444
|
+
if not click.confirm(
|
|
445
|
+
f"⚠️ You are about to operate in {env.upper()}. Continue?"
|
|
446
|
+
):
|
|
447
|
+
click.echo("Cancelled")
|
|
448
|
+
raise click.Abort()
|
|
449
|
+
|
|
450
|
+
client = StudioManagerClient(environment=env)
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
# Find engine
|
|
454
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
455
|
+
if not engine:
|
|
456
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
457
|
+
|
|
458
|
+
engine_id = engine["instance_id"]
|
|
459
|
+
engine_name = engine.get("name", engine_id)
|
|
460
|
+
|
|
461
|
+
click.echo(f"Stopping engine '{engine_name}'...")
|
|
462
|
+
|
|
463
|
+
result = client.stop_engine(engine_id)
|
|
464
|
+
|
|
465
|
+
if "error" in result:
|
|
466
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
467
|
+
raise click.Abort()
|
|
468
|
+
|
|
469
|
+
click.echo(f"✓ Engine '{engine_name}' is stopping")
|
|
470
|
+
|
|
471
|
+
except Exception as e:
|
|
472
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
473
|
+
raise click.Abort()
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
@engine_cli.command("terminate")
|
|
477
|
+
@click.argument("name_or_id")
|
|
478
|
+
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
|
|
479
|
+
@click.option(
|
|
480
|
+
"--env",
|
|
481
|
+
default=None,
|
|
482
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
483
|
+
)
|
|
484
|
+
def terminate_engine(name_or_id: str, yes: bool, env: Optional[str]):
|
|
485
|
+
"""Terminate an engine."""
|
|
486
|
+
|
|
487
|
+
# Check AWS auth first to provide clear error messages
|
|
488
|
+
check_aws_auth()
|
|
489
|
+
|
|
490
|
+
# Auto-detect environment if not specified
|
|
491
|
+
if env is None:
|
|
492
|
+
env = detect_aws_environment()
|
|
493
|
+
click.echo(f"🔍 Detected environment: {env}")
|
|
494
|
+
|
|
495
|
+
# Require confirmation for non-dev environments
|
|
496
|
+
if env != "dev" and not yes:
|
|
497
|
+
if not click.confirm(
|
|
498
|
+
f"⚠️ You are about to operate in {env.upper()}. Continue?"
|
|
499
|
+
):
|
|
500
|
+
click.echo("Cancelled")
|
|
501
|
+
raise click.Abort()
|
|
502
|
+
|
|
503
|
+
client = StudioManagerClient(environment=env)
|
|
504
|
+
|
|
505
|
+
try:
|
|
506
|
+
# Find engine
|
|
507
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
508
|
+
if not engine:
|
|
509
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
510
|
+
|
|
511
|
+
engine_id = engine["instance_id"]
|
|
512
|
+
engine_name = engine.get("name", engine_id)
|
|
513
|
+
|
|
514
|
+
# Confirm
|
|
515
|
+
if not yes:
|
|
516
|
+
if not click.confirm(f"Terminate engine '{engine_name}' ({engine_id})?"):
|
|
517
|
+
click.echo("Cancelled")
|
|
518
|
+
return
|
|
519
|
+
|
|
520
|
+
# Terminate
|
|
521
|
+
result = client.terminate_engine(engine_id)
|
|
522
|
+
|
|
523
|
+
if "error" in result:
|
|
524
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
525
|
+
raise click.Abort()
|
|
526
|
+
|
|
527
|
+
click.echo(f"✓ Engine '{engine_name}' is terminating")
|
|
528
|
+
|
|
529
|
+
except Exception as e:
|
|
530
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
531
|
+
raise click.Abort()
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
# ============================================================================
|
|
535
|
+
# Status and Information
|
|
536
|
+
# ============================================================================
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
@engine_cli.command("status")
|
|
540
|
+
@click.argument("name_or_id")
|
|
541
|
+
@click.option(
|
|
542
|
+
"--env",
|
|
543
|
+
default=None,
|
|
544
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
545
|
+
)
|
|
546
|
+
def engine_status(name_or_id: str, env: Optional[str]):
|
|
547
|
+
"""Show engine status including idle detector state."""
|
|
548
|
+
|
|
549
|
+
# Check AWS auth first to provide clear error messages
|
|
550
|
+
check_aws_auth()
|
|
551
|
+
|
|
552
|
+
# Auto-detect environment if not specified
|
|
553
|
+
if env is None:
|
|
554
|
+
env = detect_aws_environment()
|
|
555
|
+
|
|
556
|
+
client = StudioManagerClient(environment=env)
|
|
557
|
+
|
|
558
|
+
try:
|
|
559
|
+
# Try to find by name first
|
|
560
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
561
|
+
if not engine:
|
|
562
|
+
# Assume it's an instance ID
|
|
563
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
564
|
+
|
|
565
|
+
engine_id = engine["instance_id"]
|
|
566
|
+
|
|
567
|
+
# Get full status
|
|
568
|
+
status_data = client.get_engine_status(engine_id)
|
|
569
|
+
|
|
570
|
+
if "error" in status_data:
|
|
571
|
+
click.echo(f"✗ Error: {status_data['error']}", err=True)
|
|
572
|
+
raise click.Abort()
|
|
573
|
+
|
|
574
|
+
# Display basic info - reordered per user request
|
|
575
|
+
engine_name = status_data.get("name", engine_id)
|
|
576
|
+
click.echo(
|
|
577
|
+
f"Name: \033[34m{engine_name}\033[0m"
|
|
578
|
+
) # Blue engine name (renamed from "Engine")
|
|
579
|
+
|
|
580
|
+
# Show state with color coding
|
|
581
|
+
engine_state = status_data.get("state", "unknown")
|
|
582
|
+
state_lower = engine_state.lower()
|
|
583
|
+
if state_lower == "running":
|
|
584
|
+
click.echo(f"State: \033[32m{engine_state}\033[0m") # Green for running
|
|
585
|
+
elif state_lower in ["stopped", "terminated"]:
|
|
586
|
+
click.echo(
|
|
587
|
+
f"State: \033[31m{engine_state}\033[0m"
|
|
588
|
+
) # Red for stopped/terminated
|
|
589
|
+
elif state_lower in ["stopping", "starting", "pending"]:
|
|
590
|
+
click.echo(
|
|
591
|
+
f"State: \033[33m{engine_state}\033[0m"
|
|
592
|
+
) # Yellow for transitional states
|
|
593
|
+
else:
|
|
594
|
+
click.echo(f"State: {engine_state}") # No color for unknown states
|
|
595
|
+
|
|
596
|
+
# Show account (environment)
|
|
597
|
+
click.echo(f"Account: {env}")
|
|
598
|
+
|
|
599
|
+
if status_data.get("launch_time"):
|
|
600
|
+
click.echo(f"Launched: {format_time_ago(status_data['launch_time'])}")
|
|
601
|
+
|
|
602
|
+
click.echo(f"Type: {status_data.get('instance_type', 'unknown')}")
|
|
603
|
+
click.echo(f"Instance ID: {engine_id}")
|
|
604
|
+
|
|
605
|
+
if status_data.get("public_ip"):
|
|
606
|
+
click.echo(f"Public IP: {status_data['public_ip']}")
|
|
607
|
+
|
|
608
|
+
# Check if engine is stopped - don't show idle state or activity sensors
|
|
609
|
+
if engine_state.lower() in ["stopped", "stopping", "terminated", "terminating"]:
|
|
610
|
+
return
|
|
611
|
+
|
|
612
|
+
# Show readiness if not ready
|
|
613
|
+
if status_data.get("readiness"):
|
|
614
|
+
readiness = status_data["readiness"]
|
|
615
|
+
if not readiness.get("ready"):
|
|
616
|
+
click.echo(
|
|
617
|
+
f"\n⏳ Initialization: {readiness.get('progress_percent', 0)}%"
|
|
618
|
+
)
|
|
619
|
+
click.echo(
|
|
620
|
+
f"Current Stage: {readiness.get('current_stage', 'unknown')}"
|
|
621
|
+
)
|
|
622
|
+
if readiness.get("estimated_time_remaining_seconds"):
|
|
623
|
+
remaining = readiness["estimated_time_remaining_seconds"]
|
|
624
|
+
click.echo(f"Estimated Time Remaining: {remaining}s")
|
|
625
|
+
|
|
626
|
+
# Show idle state (only for running engines) - always detailed per user request
|
|
627
|
+
attached_studios = status_data.get("attached_studios", [])
|
|
628
|
+
if status_data.get("idle_state"):
|
|
629
|
+
click.echo(
|
|
630
|
+
f"\n{format_idle_state(status_data['idle_state'], detailed=True, attached_studios=attached_studios)}"
|
|
631
|
+
)
|
|
632
|
+
else:
|
|
633
|
+
# If no idle state yet, still show attached studios
|
|
634
|
+
if attached_studios:
|
|
635
|
+
studio_names = ", ".join(
|
|
636
|
+
[
|
|
637
|
+
f"\033[35m{s.get('user', 'unknown')}\033[0m"
|
|
638
|
+
for s in attached_studios
|
|
639
|
+
]
|
|
640
|
+
)
|
|
641
|
+
click.echo(f"\nAttached Studios: {studio_names}")
|
|
642
|
+
else:
|
|
643
|
+
click.echo(f"\nAttached Studios: None")
|
|
644
|
+
|
|
645
|
+
except Exception as e:
|
|
646
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
647
|
+
raise click.Abort()
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
@engine_cli.command("list")
|
|
651
|
+
@click.option(
|
|
652
|
+
"--env",
|
|
653
|
+
default=None,
|
|
654
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
655
|
+
)
|
|
656
|
+
def list_engines(env: Optional[str]):
|
|
657
|
+
"""List all engines."""
|
|
658
|
+
|
|
659
|
+
# Check AWS auth first to provide clear error messages
|
|
660
|
+
check_aws_auth()
|
|
661
|
+
|
|
662
|
+
# Auto-detect environment if not specified
|
|
663
|
+
if env is None:
|
|
664
|
+
env = detect_aws_environment()
|
|
665
|
+
|
|
666
|
+
client = StudioManagerClient(environment=env)
|
|
667
|
+
|
|
668
|
+
try:
|
|
669
|
+
result = client.list_engines()
|
|
670
|
+
engines = result.get("engines", [])
|
|
671
|
+
|
|
672
|
+
# Show account header with blue account name
|
|
673
|
+
click.echo(f"\nEngines for AWS Account \033[34m{env}\033[0m")
|
|
674
|
+
|
|
675
|
+
if not engines:
|
|
676
|
+
click.echo("No engines found\n")
|
|
677
|
+
return
|
|
678
|
+
|
|
679
|
+
# Calculate dynamic width for Name column (longest name + 2 for padding)
|
|
680
|
+
max_name_len = max(
|
|
681
|
+
(len(engine.get("name", "unknown")) for engine in engines), default=4
|
|
682
|
+
)
|
|
683
|
+
name_width = max(max_name_len + 2, len("Name") + 2)
|
|
684
|
+
|
|
685
|
+
# Fixed widths for other columns
|
|
686
|
+
state_width = 12
|
|
687
|
+
user_width = 12
|
|
688
|
+
type_width = 12
|
|
689
|
+
id_width = 20
|
|
690
|
+
|
|
691
|
+
# Table top border
|
|
692
|
+
click.echo(
|
|
693
|
+
"╭"
|
|
694
|
+
+ "─" * (name_width + 1)
|
|
695
|
+
+ "┬"
|
|
696
|
+
+ "─" * (state_width + 1)
|
|
697
|
+
+ "┬"
|
|
698
|
+
+ "─" * (user_width + 1)
|
|
699
|
+
+ "┬"
|
|
700
|
+
+ "─" * (type_width + 1)
|
|
701
|
+
+ "┬"
|
|
702
|
+
+ "─" * (id_width + 1)
|
|
703
|
+
+ "╮"
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
# Table header
|
|
707
|
+
click.echo(
|
|
708
|
+
f"│ {'Name':<{name_width}}│ {'State':<{state_width}}│ {'User':<{user_width}}│ {'Type':<{type_width}}│ {'Instance ID':<{id_width}}│"
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Header separator
|
|
712
|
+
click.echo(
|
|
713
|
+
"├"
|
|
714
|
+
+ "─" * (name_width + 1)
|
|
715
|
+
+ "┼"
|
|
716
|
+
+ "─" * (state_width + 1)
|
|
717
|
+
+ "┼"
|
|
718
|
+
+ "─" * (user_width + 1)
|
|
719
|
+
+ "┼"
|
|
720
|
+
+ "─" * (type_width + 1)
|
|
721
|
+
+ "┼"
|
|
722
|
+
+ "─" * (id_width + 1)
|
|
723
|
+
+ "┤"
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
# Table rows
|
|
727
|
+
for engine in engines:
|
|
728
|
+
name = engine.get("name", "unknown")
|
|
729
|
+
state = engine.get("state", "unknown")
|
|
730
|
+
user = engine.get("user", "unknown")
|
|
731
|
+
engine_type = engine.get("engine_type", "unknown")
|
|
732
|
+
instance_id = engine.get("instance_id", "unknown")
|
|
733
|
+
|
|
734
|
+
# Truncate if needed
|
|
735
|
+
if len(name) > name_width - 1:
|
|
736
|
+
name = name[: name_width - 1]
|
|
737
|
+
if len(user) > user_width - 1:
|
|
738
|
+
user = user[: user_width - 1]
|
|
739
|
+
if len(engine_type) > type_width - 1:
|
|
740
|
+
engine_type = engine_type[: type_width - 1]
|
|
741
|
+
|
|
742
|
+
# Color the name (blue)
|
|
743
|
+
name_display = f"\033[34m{name:<{name_width}}\033[0m"
|
|
744
|
+
|
|
745
|
+
# Color the state
|
|
746
|
+
if state == "running":
|
|
747
|
+
state_display = f"\033[32m{state:<{state_width}}\033[0m" # Green
|
|
748
|
+
elif state in ["starting", "stopping", "pending"]:
|
|
749
|
+
state_display = f"\033[33m{state:<{state_width}}\033[0m" # Yellow
|
|
750
|
+
elif state == "stopped":
|
|
751
|
+
state_display = f"\033[90m{state:<{state_width}}\033[0m" # Grey (dim)
|
|
752
|
+
else:
|
|
753
|
+
state_display = f"{state:<{state_width}}" # No color for other states
|
|
754
|
+
|
|
755
|
+
# Color the instance ID (grey)
|
|
756
|
+
instance_id_display = f"\033[90m{instance_id:<{id_width}}\033[0m"
|
|
757
|
+
|
|
758
|
+
click.echo(
|
|
759
|
+
f"│ {name_display}│ {state_display}│ {user:<{user_width}}│ {engine_type:<{type_width}}│ {instance_id_display}│"
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Table bottom border
|
|
763
|
+
click.echo(
|
|
764
|
+
"╰"
|
|
765
|
+
+ "─" * (name_width + 1)
|
|
766
|
+
+ "┴"
|
|
767
|
+
+ "─" * (state_width + 1)
|
|
768
|
+
+ "┴"
|
|
769
|
+
+ "─" * (user_width + 1)
|
|
770
|
+
+ "┴"
|
|
771
|
+
+ "─" * (type_width + 1)
|
|
772
|
+
+ "┴"
|
|
773
|
+
+ "─" * (id_width + 1)
|
|
774
|
+
+ "╯"
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
click.echo(f"Total: {len(engines)}\n")
|
|
778
|
+
|
|
779
|
+
except Exception as e:
|
|
780
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
781
|
+
raise click.Abort()
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
# ============================================================================
|
|
785
|
+
# Access (SSH Config Management)
|
|
786
|
+
# ============================================================================
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
@engine_cli.command("config-ssh")
|
|
790
|
+
@click.option("--clean", is_flag=True, help="Remove all managed entries")
|
|
791
|
+
@click.option("--all", is_flag=True, help="Include engines from all users")
|
|
792
|
+
@click.option(
|
|
793
|
+
"--admin",
|
|
794
|
+
is_flag=True,
|
|
795
|
+
help="Generate entries using ec2-user instead of owner",
|
|
796
|
+
)
|
|
797
|
+
@click.option(
|
|
798
|
+
"--env",
|
|
799
|
+
default=None,
|
|
800
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
801
|
+
)
|
|
802
|
+
def config_ssh(clean: bool, all: bool, admin: bool, env: Optional[str]):
|
|
803
|
+
"""Update SSH config with available engines."""
|
|
804
|
+
|
|
805
|
+
# Auto-detect environment if not specified (and not just cleaning)
|
|
806
|
+
if env is None and not clean:
|
|
807
|
+
check_aws_auth()
|
|
808
|
+
env = detect_aws_environment()
|
|
809
|
+
elif env is None:
|
|
810
|
+
env = "dev" # Default for clean operation
|
|
811
|
+
|
|
812
|
+
client = StudioManagerClient(environment=env)
|
|
813
|
+
ssh_config_path = os.path.expanduser("~/.ssh/config")
|
|
814
|
+
|
|
815
|
+
try:
|
|
816
|
+
# Read existing config
|
|
817
|
+
if os.path.exists(ssh_config_path):
|
|
818
|
+
with open(ssh_config_path, "r") as f:
|
|
819
|
+
lines = f.readlines()
|
|
820
|
+
else:
|
|
821
|
+
lines = []
|
|
822
|
+
|
|
823
|
+
# Remove managed entries
|
|
824
|
+
managed_start = "# BEGIN DAYHOFF ENGINES\n"
|
|
825
|
+
managed_end = "# END DAYHOFF ENGINES\n"
|
|
826
|
+
|
|
827
|
+
new_lines = []
|
|
828
|
+
skip = False
|
|
829
|
+
for line in lines:
|
|
830
|
+
if line == managed_start:
|
|
831
|
+
skip = True
|
|
832
|
+
elif line == managed_end:
|
|
833
|
+
skip = False
|
|
834
|
+
continue
|
|
835
|
+
elif not skip:
|
|
836
|
+
new_lines.append(line)
|
|
837
|
+
|
|
838
|
+
if clean:
|
|
839
|
+
# Write back without managed section
|
|
840
|
+
with open(ssh_config_path, "w") as f:
|
|
841
|
+
f.writelines(new_lines)
|
|
842
|
+
click.echo("✓ Removed managed engine entries from SSH config")
|
|
843
|
+
return
|
|
844
|
+
|
|
845
|
+
# Get engines
|
|
846
|
+
result = client.list_engines()
|
|
847
|
+
engines = result.get("engines", [])
|
|
848
|
+
|
|
849
|
+
if not engines:
|
|
850
|
+
click.echo("No engines found")
|
|
851
|
+
return
|
|
852
|
+
|
|
853
|
+
# Generate new entries
|
|
854
|
+
config_entries = [managed_start]
|
|
855
|
+
|
|
856
|
+
try:
|
|
857
|
+
current_user = get_aws_username()
|
|
858
|
+
except RuntimeError:
|
|
859
|
+
# Not authenticated - can't determine user
|
|
860
|
+
current_user = None
|
|
861
|
+
|
|
862
|
+
for engine in engines:
|
|
863
|
+
user = engine.get("user", "unknown")
|
|
864
|
+
|
|
865
|
+
# Skip if not all and not owned by current user (unless user is unknown or we can't determine current user)
|
|
866
|
+
if not all and current_user and user != "unknown" and user != current_user:
|
|
867
|
+
continue
|
|
868
|
+
|
|
869
|
+
instance_id = engine.get("instance_id")
|
|
870
|
+
name = engine.get("name", instance_id)
|
|
871
|
+
state = engine.get("state", "unknown")
|
|
872
|
+
|
|
873
|
+
# Only add running engines
|
|
874
|
+
if state != "running":
|
|
875
|
+
continue
|
|
876
|
+
|
|
877
|
+
username = "ec2-user" if admin else user
|
|
878
|
+
|
|
879
|
+
# Map environment to AWS profile
|
|
880
|
+
profile_map = {
|
|
881
|
+
"dev": "dev-devaccess",
|
|
882
|
+
"sand": "sand-devaccess",
|
|
883
|
+
"prod": "prod-devaccess",
|
|
884
|
+
}
|
|
885
|
+
aws_profile = profile_map.get(env, f"{env}-devaccess")
|
|
886
|
+
|
|
887
|
+
config_entries.append(f"\nHost {name}\n")
|
|
888
|
+
config_entries.append(f" HostName {instance_id}\n")
|
|
889
|
+
config_entries.append(f" User {username}\n")
|
|
890
|
+
config_entries.append(f" ForwardAgent yes\n")
|
|
891
|
+
config_entries.append(
|
|
892
|
+
f" ProxyCommand aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p' --profile {aws_profile}\n"
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
config_entries.append(managed_end)
|
|
896
|
+
|
|
897
|
+
# Write back
|
|
898
|
+
new_lines.extend(config_entries)
|
|
899
|
+
|
|
900
|
+
with open(ssh_config_path, "w") as f:
|
|
901
|
+
f.writelines(new_lines)
|
|
902
|
+
|
|
903
|
+
click.echo(f"✓ Updated SSH config with {len(engines)} engine(s)")
|
|
904
|
+
|
|
905
|
+
except Exception as e:
|
|
906
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
907
|
+
raise click.Abort()
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
# ============================================================================
|
|
911
|
+
# Idle Detection Control
|
|
912
|
+
# ============================================================================
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
@engine_cli.command("coffee")
|
|
916
|
+
@click.argument("name_or_id")
|
|
917
|
+
@click.argument("duration", required=False)
|
|
918
|
+
@click.option("--cancel", is_flag=True, help="Cancel existing coffee lock")
|
|
919
|
+
@click.option(
|
|
920
|
+
"--env",
|
|
921
|
+
default=None,
|
|
922
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
923
|
+
)
|
|
924
|
+
def coffee(name_or_id: str, duration: Optional[str], cancel: bool, env: Optional[str]):
|
|
925
|
+
"""Keep engine awake for specified duration (e.g., '4h', '2h30m')."""
|
|
926
|
+
|
|
927
|
+
# Check AWS auth and auto-detect environment if not specified
|
|
928
|
+
check_aws_auth()
|
|
929
|
+
|
|
930
|
+
if env is None:
|
|
931
|
+
env = detect_aws_environment()
|
|
932
|
+
|
|
933
|
+
client = StudioManagerClient(environment=env)
|
|
934
|
+
|
|
935
|
+
try:
|
|
936
|
+
# Find engine
|
|
937
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
938
|
+
if not engine:
|
|
939
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
940
|
+
|
|
941
|
+
engine_id = engine["instance_id"]
|
|
942
|
+
engine_name = engine.get("name", engine_id)
|
|
943
|
+
|
|
944
|
+
if cancel:
|
|
945
|
+
result = client.cancel_coffee(engine_id)
|
|
946
|
+
if "error" in result:
|
|
947
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
948
|
+
raise click.Abort()
|
|
949
|
+
click.echo(f"✓ Coffee lock cancelled for '{engine_name}'")
|
|
950
|
+
else:
|
|
951
|
+
if not duration:
|
|
952
|
+
click.echo("✗ Error: duration required (e.g., '4h', '2h30m')", err=True)
|
|
953
|
+
raise click.Abort()
|
|
954
|
+
|
|
955
|
+
result = client.set_coffee(engine_id, duration)
|
|
956
|
+
if "error" in result:
|
|
957
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
958
|
+
raise click.Abort()
|
|
959
|
+
click.echo(f"✓ Coffee lock set for '{engine_name}': {duration}")
|
|
960
|
+
|
|
961
|
+
except Exception as e:
|
|
962
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
963
|
+
raise click.Abort()
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
@engine_cli.command("idle")
|
|
967
|
+
@click.argument("name_or_id")
|
|
968
|
+
@click.option("--set", "set_timeout", help="Set new timeout (e.g., '2h30m', '45m')")
|
|
969
|
+
@click.option(
|
|
970
|
+
"--slack",
|
|
971
|
+
type=click.Choice(["none", "default", "all"]),
|
|
972
|
+
help="Set Slack notifications",
|
|
973
|
+
)
|
|
974
|
+
@click.option(
|
|
975
|
+
"--env",
|
|
976
|
+
default=None,
|
|
977
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
978
|
+
)
|
|
979
|
+
def idle_timeout_cmd(
|
|
980
|
+
name_or_id: str,
|
|
981
|
+
set_timeout: Optional[str],
|
|
982
|
+
slack: Optional[str],
|
|
983
|
+
env: Optional[str],
|
|
984
|
+
):
|
|
985
|
+
"""Show or configure idle detector settings."""
|
|
986
|
+
|
|
987
|
+
# Check AWS auth and auto-detect environment if not specified
|
|
988
|
+
check_aws_auth()
|
|
989
|
+
|
|
990
|
+
if env is None:
|
|
991
|
+
env = detect_aws_environment()
|
|
992
|
+
|
|
993
|
+
client = StudioManagerClient(environment=env)
|
|
994
|
+
|
|
995
|
+
try:
|
|
996
|
+
# Find engine
|
|
997
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
998
|
+
if not engine:
|
|
999
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
1000
|
+
|
|
1001
|
+
engine_id = engine["instance_id"]
|
|
1002
|
+
engine_name = engine.get("name", engine_id)
|
|
1003
|
+
|
|
1004
|
+
# Get current settings
|
|
1005
|
+
status = client.get_engine_status(engine_id)
|
|
1006
|
+
|
|
1007
|
+
if "error" in status:
|
|
1008
|
+
click.echo(f"✗ Error: {status['error']}", err=True)
|
|
1009
|
+
raise click.Abort()
|
|
1010
|
+
|
|
1011
|
+
# Update if requested
|
|
1012
|
+
if set_timeout or slack:
|
|
1013
|
+
result = client.update_idle_settings(
|
|
1014
|
+
engine_id, timeout=set_timeout, slack=slack
|
|
1015
|
+
)
|
|
1016
|
+
if "error" in result:
|
|
1017
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
1018
|
+
raise click.Abort()
|
|
1019
|
+
click.echo(f"✓ Idle settings updated for '{engine_name}'")
|
|
1020
|
+
|
|
1021
|
+
# Show current settings
|
|
1022
|
+
idle_state = status.get("idle_state", {})
|
|
1023
|
+
timeout_seconds = int(idle_state.get("timeout_seconds", 1800))
|
|
1024
|
+
timeout_minutes = timeout_seconds // 60
|
|
1025
|
+
|
|
1026
|
+
click.echo(f"\nIdle Settings for '{engine_name}':")
|
|
1027
|
+
click.echo(f" Timeout: {timeout_minutes} minutes")
|
|
1028
|
+
click.echo(
|
|
1029
|
+
f" Current State: {'IDLE' if idle_state.get('is_idle') else 'ACTIVE'}"
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
except Exception as e:
|
|
1033
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
1034
|
+
raise click.Abort()
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
# ============================================================================
|
|
1038
|
+
# Maintenance
|
|
1039
|
+
# ============================================================================
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
@engine_cli.command("resize")
|
|
1043
|
+
@click.argument("name_or_id")
|
|
1044
|
+
@click.option("--size", "-s", required=True, type=int, help="New size in GB")
|
|
1045
|
+
@click.option(
|
|
1046
|
+
"--online",
|
|
1047
|
+
is_flag=True,
|
|
1048
|
+
help="Resize while running (requires manual filesystem expansion)",
|
|
1049
|
+
)
|
|
1050
|
+
@click.option("--force", "-f", is_flag=True, help="Force resize")
|
|
1051
|
+
@click.option(
|
|
1052
|
+
"--env",
|
|
1053
|
+
default=None,
|
|
1054
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
1055
|
+
)
|
|
1056
|
+
def resize_engine(
|
|
1057
|
+
name_or_id: str, size: int, online: bool, force: bool, env: Optional[str]
|
|
1058
|
+
):
|
|
1059
|
+
"""Resize an engine's boot disk."""
|
|
1060
|
+
|
|
1061
|
+
# Check AWS auth and auto-detect environment if not specified
|
|
1062
|
+
check_aws_auth()
|
|
1063
|
+
|
|
1064
|
+
if env is None:
|
|
1065
|
+
env = detect_aws_environment()
|
|
1066
|
+
|
|
1067
|
+
client = StudioManagerClient(environment=env)
|
|
1068
|
+
|
|
1069
|
+
try:
|
|
1070
|
+
# Find engine
|
|
1071
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
1072
|
+
if not engine:
|
|
1073
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
1074
|
+
|
|
1075
|
+
engine_id = engine["instance_id"]
|
|
1076
|
+
engine_name = engine.get("name", engine_id)
|
|
1077
|
+
|
|
1078
|
+
if not force:
|
|
1079
|
+
if not click.confirm(f"Resize boot disk of '{engine_name}' to {size}GB?"):
|
|
1080
|
+
click.echo("Cancelled")
|
|
1081
|
+
return
|
|
1082
|
+
|
|
1083
|
+
result = client.resize_engine(engine_id, size, online)
|
|
1084
|
+
|
|
1085
|
+
if "error" in result:
|
|
1086
|
+
click.echo(f"✗ Error: {result['error']}", err=True)
|
|
1087
|
+
raise click.Abort()
|
|
1088
|
+
|
|
1089
|
+
click.echo(f"✓ Boot disk resize initiated for '{engine_name}'")
|
|
1090
|
+
if online:
|
|
1091
|
+
click.echo(" Note: Manual filesystem expansion required")
|
|
1092
|
+
|
|
1093
|
+
except Exception as e:
|
|
1094
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
1095
|
+
raise click.Abort()
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
@engine_cli.command("debug")
|
|
1099
|
+
@click.argument("name_or_id")
|
|
1100
|
+
@click.option(
|
|
1101
|
+
"--env",
|
|
1102
|
+
default=None,
|
|
1103
|
+
help="Environment (dev, sand, prod) - auto-detected if not specified",
|
|
1104
|
+
)
|
|
1105
|
+
def debug_engine(name_or_id: str, env: Optional[str]):
|
|
1106
|
+
"""Debug engine bootstrap status and files."""
|
|
1107
|
+
|
|
1108
|
+
# Check AWS auth and auto-detect environment if not specified
|
|
1109
|
+
check_aws_auth()
|
|
1110
|
+
|
|
1111
|
+
if env is None:
|
|
1112
|
+
env = detect_aws_environment()
|
|
1113
|
+
|
|
1114
|
+
client = StudioManagerClient(environment=env)
|
|
1115
|
+
|
|
1116
|
+
try:
|
|
1117
|
+
# Find engine
|
|
1118
|
+
engine = client.get_engine_by_name(name_or_id)
|
|
1119
|
+
if not engine:
|
|
1120
|
+
engine = {"instance_id": name_or_id, "name": name_or_id}
|
|
1121
|
+
|
|
1122
|
+
engine_id = engine["instance_id"]
|
|
1123
|
+
|
|
1124
|
+
# Get readiness status
|
|
1125
|
+
readiness = client.get_engine_readiness(engine_id)
|
|
1126
|
+
|
|
1127
|
+
click.echo(f"Engine: {engine_id}")
|
|
1128
|
+
click.echo(f"Ready: {readiness.get('ready', False)}")
|
|
1129
|
+
click.echo(f"Current Stage: {readiness.get('current_stage', 'unknown')}")
|
|
1130
|
+
click.echo(f"\nBootstrap Stages:")
|
|
1131
|
+
|
|
1132
|
+
stages = readiness.get("stages", [])
|
|
1133
|
+
for i, stage in enumerate(stages, 1):
|
|
1134
|
+
status = stage.get("status", "unknown")
|
|
1135
|
+
name = stage.get("name", "unknown")
|
|
1136
|
+
duration = (
|
|
1137
|
+
stage.get("duration_ms", 0) / 1000 if stage.get("duration_ms") else None
|
|
1138
|
+
)
|
|
1139
|
+
|
|
1140
|
+
icon = (
|
|
1141
|
+
"✓"
|
|
1142
|
+
if status == "completed"
|
|
1143
|
+
else "⏳" if status == "in_progress" else "✗"
|
|
1144
|
+
)
|
|
1145
|
+
duration_str = f" ({duration:.1f}s)" if duration else ""
|
|
1146
|
+
|
|
1147
|
+
click.echo(f" {icon} {i}. {name}{duration_str}")
|
|
1148
|
+
|
|
1149
|
+
except Exception as e:
|
|
1150
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
1151
|
+
raise click.Abort()
|