dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dayhoff_tools/__init__.py +10 -0
  2. dayhoff_tools/cli/cloud_commands.py +179 -43
  3. dayhoff_tools/cli/engine1/__init__.py +323 -0
  4. dayhoff_tools/cli/engine1/engine_core.py +703 -0
  5. dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
  6. dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
  7. dayhoff_tools/cli/engine1/engine_management.py +505 -0
  8. dayhoff_tools/cli/engine1/shared.py +501 -0
  9. dayhoff_tools/cli/engine1/studio_commands.py +825 -0
  10. dayhoff_tools/cli/engines_studios/__init__.py +6 -0
  11. dayhoff_tools/cli/engines_studios/api_client.py +351 -0
  12. dayhoff_tools/cli/engines_studios/auth.py +144 -0
  13. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
  14. dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
  15. dayhoff_tools/cli/engines_studios/progress.py +260 -0
  16. dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
  17. dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
  18. dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
  19. dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
  20. dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
  21. dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
  22. dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
  23. dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
  25. dayhoff_tools/cli/main.py +106 -7
  26. dayhoff_tools/cli/utility_commands.py +896 -179
  27. dayhoff_tools/deployment/base.py +70 -6
  28. dayhoff_tools/deployment/deploy_aws.py +165 -25
  29. dayhoff_tools/deployment/deploy_gcp.py +78 -5
  30. dayhoff_tools/deployment/deploy_utils.py +20 -7
  31. dayhoff_tools/deployment/job_runner.py +9 -4
  32. dayhoff_tools/deployment/processors.py +230 -418
  33. dayhoff_tools/deployment/swarm.py +47 -12
  34. dayhoff_tools/embedders.py +28 -26
  35. dayhoff_tools/fasta.py +181 -64
  36. dayhoff_tools/warehouse.py +268 -1
  37. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
  38. dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
  39. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
  40. dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
  41. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1151 @@
1
+ """Engine CLI commands for engines_studios system."""
2
+
3
+ import os
4
+ import subprocess
5
+ from typing import Optional
6
+
7
+ import click
8
+
9
+ from .api_client import StudioManagerClient
10
+ from .auth import check_aws_auth, detect_aws_environment, get_aws_username
11
+ from .progress import format_idle_state, format_time_ago, wait_with_progress
12
+
13
+
14
+ def _update_ssh_config_silent(client: StudioManagerClient, env: str) -> bool:
15
+ """Update SSH config silently. Returns True if successful."""
16
+ ssh_config_path = os.path.expanduser("~/.ssh/config")
17
+
18
+ try:
19
+ # Read existing config
20
+ if os.path.exists(ssh_config_path):
21
+ with open(ssh_config_path, "r") as f:
22
+ lines = f.readlines()
23
+ else:
24
+ lines = []
25
+
26
+ # Remove managed entries
27
+ managed_start = "# BEGIN DAYHOFF ENGINES\n"
28
+ managed_end = "# END DAYHOFF ENGINES\n"
29
+
30
+ new_lines = []
31
+ skip = False
32
+ for line in lines:
33
+ if line == managed_start:
34
+ skip = True
35
+ elif line == managed_end:
36
+ skip = False
37
+ continue
38
+ elif not skip:
39
+ new_lines.append(line)
40
+
41
+ # Get engines
42
+ result = client.list_engines()
43
+ engines = result.get("engines", [])
44
+
45
+ if not engines:
46
+ return False
47
+
48
+ # Generate new entries
49
+ config_entries = [managed_start]
50
+
51
+ try:
52
+ current_user = get_aws_username()
53
+ except RuntimeError:
54
+ # Not authenticated - can't determine user, skip filtering
55
+ current_user = None
56
+
57
+ for engine in engines:
58
+ user = engine.get("user", "unknown")
59
+
60
+ # Skip engines owned by other users (unless user is unknown or we can't determine current user)
61
+ if current_user and user != "unknown" and user != current_user:
62
+ continue
63
+
64
+ instance_id = engine.get("instance_id")
65
+ name = engine.get("name", instance_id)
66
+ state = engine.get("state", "unknown")
67
+
68
+ # Only add running engines
69
+ if state != "running":
70
+ continue
71
+
72
+ # Map environment to AWS profile
73
+ profile = f"{env}-devaccess"
74
+
75
+ config_entries.append(f"\nHost {name}\n")
76
+ config_entries.append(f" HostName {instance_id}\n")
77
+ config_entries.append(f" User {user}\n")
78
+ config_entries.append(f" ForwardAgent yes\n")
79
+ config_entries.append(
80
+ f" ProxyCommand aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p' --profile {profile}\n"
81
+ )
82
+
83
+ config_entries.append(managed_end)
84
+
85
+ # Write back
86
+ new_lines.extend(config_entries)
87
+
88
+ with open(ssh_config_path, "w") as f:
89
+ f.writelines(new_lines)
90
+
91
+ return True
92
+
93
+ except Exception:
94
+ return False
95
+
96
+
97
+ @click.group()
98
+ def engine_cli():
99
+ """Manage engines."""
100
+ pass
101
+
102
+
103
+ # ============================================================================
104
+ # Lifecycle Management
105
+ # ============================================================================
106
+
107
+
108
+ @engine_cli.command("launch")
109
+ @click.argument("name")
110
+ @click.option(
111
+ "--type",
112
+ "engine_type",
113
+ required=True,
114
+ type=click.Choice(
115
+ ["cpu", "cpumax", "t4", "a10g", "a100", "4_t4", "8_t4", "4_a10g", "8_a10g"]
116
+ ),
117
+ )
118
+ @click.option("--size", "boot_disk_size", type=int, help="Boot disk size in GB")
119
+ @click.option(
120
+ "--user",
121
+ default=None,
122
+ help="User to launch engine for (defaults to current user, use for testing/admin)",
123
+ )
124
+ @click.option(
125
+ "--no-wait", is_flag=True, help="Return immediately without waiting for readiness"
126
+ )
127
+ @click.option(
128
+ "--skip-ssh-config", is_flag=True, help="Don't automatically update SSH config"
129
+ )
130
+ @click.option(
131
+ "--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
132
+ )
133
+ @click.option(
134
+ "--env",
135
+ default=None,
136
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
137
+ )
138
+ def launch_engine(
139
+ name: str,
140
+ engine_type: str,
141
+ boot_disk_size: Optional[int],
142
+ yes: bool,
143
+ user: Optional[str],
144
+ no_wait: bool,
145
+ skip_ssh_config: bool,
146
+ env: Optional[str],
147
+ ):
148
+ """Launch a new engine for the current user (or specified user with --user flag)."""
149
+
150
+ # Check AWS auth first to provide clear error messages
151
+ check_aws_auth()
152
+
153
+ # Auto-detect environment if not specified
154
+ if env is None:
155
+ env = detect_aws_environment()
156
+ click.echo(f"🔍 Detected environment: {env}")
157
+
158
+ # Require confirmation for non-dev environments
159
+ if env != "dev" and not yes:
160
+ if not click.confirm(
161
+ f"⚠️ You are about to launch in {env.upper()}. Continue?"
162
+ ):
163
+ click.echo("Cancelled")
164
+ raise click.Abort()
165
+
166
+ client = StudioManagerClient(environment=env)
167
+
168
+ # Get user (from flag or current AWS user)
169
+ if user is None:
170
+ try:
171
+ user = get_aws_username()
172
+ except RuntimeError as e:
173
+ click.echo(f"✗ {e}", err=True)
174
+ raise click.Abort()
175
+
176
+ click.echo(f"🚀 Launching {engine_type} engine '{name}' for {user}...")
177
+
178
+ try:
179
+ # Launch the engine
180
+ engine = client.launch_engine(
181
+ name=name, user=user, engine_type=engine_type, boot_disk_size=boot_disk_size
182
+ )
183
+
184
+ engine_id = engine["instance_id"]
185
+ click.echo(f"✓ EC2 instance launched: {engine_id}")
186
+
187
+ if no_wait:
188
+ click.echo(f"\nEngine is initializing. Check status with:")
189
+ click.echo(f" dh engine status {name}")
190
+ return
191
+
192
+ # Wait for readiness with progress updates
193
+ click.echo(f"\n⏳ Waiting for engine to be ready (typically 2-3 minutes)...\n")
194
+
195
+ try:
196
+ _final_status = wait_with_progress(
197
+ status_func=lambda: client.get_engine_readiness(engine_id),
198
+ is_complete_func=lambda s: s.get("ready", False),
199
+ label="Progress",
200
+ timeout_seconds=600,
201
+ )
202
+
203
+ click.echo(f"\n✓ Engine ready!")
204
+
205
+ # Update SSH config unless skipped
206
+ if not skip_ssh_config:
207
+ if _update_ssh_config_silent(client, env):
208
+ click.echo("✓ SSH config updated")
209
+
210
+ click.echo(f"\nConnect with:")
211
+ click.echo(f" dh studio attach {name}")
212
+ click.echo(f" ssh {name}")
213
+
214
+ except TimeoutError:
215
+ click.echo(f"\n⚠ Engine is still initializing. Check status with:")
216
+ click.echo(f" dh engine status {name}")
217
+
218
+ except Exception as e:
219
+ error_msg = str(e)
220
+
221
+ # Check for quota/limit errors
222
+ if "VcpuLimitExceeded" in error_msg or "vCPU limit" in error_msg:
223
+ click.echo(f"✗ Failed to launch engine: vCPU quota exceeded", err=True)
224
+ click.echo("", err=True)
225
+ click.echo(
226
+ f"The {env} AWS account has insufficient vCPU quota for {engine_type} instances.",
227
+ err=True,
228
+ )
229
+ click.echo("", err=True)
230
+ click.echo("Solutions:", err=True)
231
+ click.echo(
232
+ " 1. Use a different instance type (e.g., --type cpu)", err=True
233
+ )
234
+ click.echo(" 2. Request a quota increase:", err=True)
235
+ click.echo(" • AWS Console → Service Quotas → Amazon EC2", err=True)
236
+ click.echo(" • Find quota for the instance family", err=True)
237
+ click.echo(
238
+ " • Request increase (typically approved within 24h)", err=True
239
+ )
240
+ click.echo("", err=True)
241
+ click.echo(
242
+ "For testing infrastructure, use CPU instances instead of GPU.",
243
+ err=True,
244
+ )
245
+ raise click.Abort()
246
+
247
+ # Check for insufficient capacity errors
248
+ if "InsufficientInstanceCapacity" in error_msg:
249
+ click.echo(
250
+ f"✗ Failed to launch engine: insufficient EC2 capacity", err=True
251
+ )
252
+ click.echo("", err=True)
253
+ click.echo(
254
+ f"AWS does not have available {engine_type} capacity in your region/AZ.",
255
+ err=True,
256
+ )
257
+ click.echo("", err=True)
258
+ click.echo("Solutions:", err=True)
259
+ click.echo(
260
+ " 1. Try again in a few minutes (capacity fluctuates)", err=True
261
+ )
262
+ click.echo(" 2. Use a different instance type", err=True)
263
+ click.echo(" 3. Contact AWS support for capacity reservations", err=True)
264
+ raise click.Abort()
265
+
266
+ # Check for instance limit errors
267
+ if (
268
+ "InstanceLimitExceeded" in error_msg
269
+ or "instance limit" in error_msg.lower()
270
+ ):
271
+ click.echo(f"✗ Failed to launch engine: instance limit exceeded", err=True)
272
+ click.echo("", err=True)
273
+ click.echo(
274
+ f"You have reached the maximum number of running instances in {env}.",
275
+ err=True,
276
+ )
277
+ click.echo("", err=True)
278
+ click.echo("Solutions:", err=True)
279
+ click.echo(
280
+ " 1. Terminate unused engines: dh engine2 list --env {env}", err=True
281
+ )
282
+ click.echo(" 2. Request a limit increase via AWS Service Quotas", err=True)
283
+ raise click.Abort()
284
+
285
+ # Generic error
286
+ click.echo(f"✗ Failed to launch engine: {e}", err=True)
287
+ raise click.Abort()
288
+
289
+
290
+ @engine_cli.command("start")
291
+ @click.argument("name_or_id")
292
+ @click.option(
293
+ "--no-wait", is_flag=True, help="Return immediately without waiting for readiness"
294
+ )
295
+ @click.option(
296
+ "--skip-ssh-config", is_flag=True, help="Don't automatically update SSH config"
297
+ )
298
+ @click.option(
299
+ "--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
300
+ )
301
+ @click.option(
302
+ "--env",
303
+ default=None,
304
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
305
+ )
306
+ def start_engine(
307
+ name_or_id: str, no_wait: bool, skip_ssh_config: bool, yes: bool, env: Optional[str]
308
+ ):
309
+ """Start a stopped engine."""
310
+
311
+ # Check AWS auth first to provide clear error messages
312
+ check_aws_auth()
313
+
314
+ # Auto-detect environment if not specified
315
+ if env is None:
316
+ env = detect_aws_environment()
317
+ click.echo(f"🔍 Detected environment: {env}")
318
+
319
+ # Require confirmation for non-dev environments
320
+ if env != "dev" and not yes:
321
+ if not click.confirm(
322
+ f"⚠️ You are about to operate in {env.upper()}. Continue?"
323
+ ):
324
+ click.echo("Cancelled")
325
+ raise click.Abort()
326
+
327
+ client = StudioManagerClient(environment=env)
328
+
329
+ try:
330
+ # Find engine
331
+ engine = client.get_engine_by_name(name_or_id)
332
+ if not engine:
333
+ engine = {"instance_id": name_or_id, "name": name_or_id}
334
+
335
+ engine_id = engine["instance_id"]
336
+ engine_name = engine.get("name", engine_id)
337
+
338
+ result = client.start_engine(engine_id)
339
+
340
+ if "error" in result:
341
+ click.echo(f"✗ Error: {result['error']}", err=True)
342
+ raise click.Abort()
343
+
344
+ click.echo(f"✓ Engine '{engine_name}' is starting")
345
+
346
+ if no_wait:
347
+ click.echo(f"\nCheck status with:")
348
+ click.echo(f" dh engine status {engine_name}")
349
+ return
350
+
351
+ # Wait for engine to be running and fully ready (including status checks)
352
+ click.echo(f"\n⏳ Waiting for engine to be ready...\n")
353
+
354
+ try:
355
+
356
+ def check_engine_running():
357
+ """Check if engine is running, status checks passed, and SSM is accessible."""
358
+ # Check EC2 state and status checks
359
+ instance_status = client.check_instance_status(engine_id)
360
+ if "error" in instance_status:
361
+ return {"ready": False, "progress_percent": 0}
362
+
363
+ state = instance_status.get("state", "unknown")
364
+ status_checks_passed = instance_status.get("reachable", False)
365
+
366
+ # Check SSM accessibility via idle state
367
+ engine_status = client.get_engine_status(engine_id)
368
+ ssm_working = (
369
+ not ("error" in engine_status)
370
+ and engine_status.get("idle_state") is not None
371
+ )
372
+
373
+ # Progress based on state and checks
374
+ if state == "pending":
375
+ progress = 30
376
+ elif state == "running" and not status_checks_passed:
377
+ # Running but status checks still initializing
378
+ progress = 60
379
+ elif state == "running" and status_checks_passed and not ssm_working:
380
+ # Status checks passed but SSM not yet responding
381
+ progress = 85
382
+ elif state == "running" and status_checks_passed and ssm_working:
383
+ # Fully ready
384
+ progress = 100
385
+ else:
386
+ progress = 10
387
+
388
+ # Ready when running AND status checks pass AND SSM works
389
+ ready = state == "running" and status_checks_passed and ssm_working
390
+
391
+ return {"ready": ready, "progress_percent": progress}
392
+
393
+ _final_status = wait_with_progress(
394
+ status_func=check_engine_running,
395
+ is_complete_func=lambda s: s.get("ready", False),
396
+ label="Starting",
397
+ timeout_seconds=300,
398
+ show_stages=False,
399
+ )
400
+
401
+ click.echo(f"\n✓ Engine ready!")
402
+
403
+ # Update SSH config unless skipped
404
+ if not skip_ssh_config:
405
+ if _update_ssh_config_silent(client, env):
406
+ click.echo("✓ SSH config updated")
407
+
408
+ click.echo(f"\nConnect with:")
409
+ click.echo(f" dh studio attach {engine_name}")
410
+ click.echo(f" ssh {engine_name}")
411
+
412
+ except TimeoutError:
413
+ click.echo(f"\n⚠ Engine is still starting. Check status with:")
414
+ click.echo(f" dh engine status {engine_name}")
415
+
416
+ except Exception as e:
417
+ click.echo(f"✗ Error: {e}", err=True)
418
+ raise click.Abort()
419
+
420
+
421
+ @engine_cli.command("stop")
422
+ @click.argument("name_or_id")
423
+ @click.option(
424
+ "--yes", "-y", is_flag=True, help="Skip confirmation for non-dev environments"
425
+ )
426
+ @click.option(
427
+ "--env",
428
+ default=None,
429
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
430
+ )
431
+ def stop_engine(name_or_id: str, yes: bool, env: Optional[str]):
432
+ """Stop a running engine."""
433
+
434
+ # Check AWS auth first to provide clear error messages
435
+ check_aws_auth()
436
+
437
+ # Auto-detect environment if not specified
438
+ if env is None:
439
+ env = detect_aws_environment()
440
+ click.echo(f"🔍 Detected environment: {env}")
441
+
442
+ # Require confirmation for non-dev environments
443
+ if env != "dev" and not yes:
444
+ if not click.confirm(
445
+ f"⚠️ You are about to operate in {env.upper()}. Continue?"
446
+ ):
447
+ click.echo("Cancelled")
448
+ raise click.Abort()
449
+
450
+ client = StudioManagerClient(environment=env)
451
+
452
+ try:
453
+ # Find engine
454
+ engine = client.get_engine_by_name(name_or_id)
455
+ if not engine:
456
+ engine = {"instance_id": name_or_id, "name": name_or_id}
457
+
458
+ engine_id = engine["instance_id"]
459
+ engine_name = engine.get("name", engine_id)
460
+
461
+ click.echo(f"Stopping engine '{engine_name}'...")
462
+
463
+ result = client.stop_engine(engine_id)
464
+
465
+ if "error" in result:
466
+ click.echo(f"✗ Error: {result['error']}", err=True)
467
+ raise click.Abort()
468
+
469
+ click.echo(f"✓ Engine '{engine_name}' is stopping")
470
+
471
+ except Exception as e:
472
+ click.echo(f"✗ Error: {e}", err=True)
473
+ raise click.Abort()
474
+
475
+
476
+ @engine_cli.command("terminate")
477
+ @click.argument("name_or_id")
478
+ @click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
479
+ @click.option(
480
+ "--env",
481
+ default=None,
482
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
483
+ )
484
+ def terminate_engine(name_or_id: str, yes: bool, env: Optional[str]):
485
+ """Terminate an engine."""
486
+
487
+ # Check AWS auth first to provide clear error messages
488
+ check_aws_auth()
489
+
490
+ # Auto-detect environment if not specified
491
+ if env is None:
492
+ env = detect_aws_environment()
493
+ click.echo(f"🔍 Detected environment: {env}")
494
+
495
+ # Require confirmation for non-dev environments
496
+ if env != "dev" and not yes:
497
+ if not click.confirm(
498
+ f"⚠️ You are about to operate in {env.upper()}. Continue?"
499
+ ):
500
+ click.echo("Cancelled")
501
+ raise click.Abort()
502
+
503
+ client = StudioManagerClient(environment=env)
504
+
505
+ try:
506
+ # Find engine
507
+ engine = client.get_engine_by_name(name_or_id)
508
+ if not engine:
509
+ engine = {"instance_id": name_or_id, "name": name_or_id}
510
+
511
+ engine_id = engine["instance_id"]
512
+ engine_name = engine.get("name", engine_id)
513
+
514
+ # Confirm
515
+ if not yes:
516
+ if not click.confirm(f"Terminate engine '{engine_name}' ({engine_id})?"):
517
+ click.echo("Cancelled")
518
+ return
519
+
520
+ # Terminate
521
+ result = client.terminate_engine(engine_id)
522
+
523
+ if "error" in result:
524
+ click.echo(f"✗ Error: {result['error']}", err=True)
525
+ raise click.Abort()
526
+
527
+ click.echo(f"✓ Engine '{engine_name}' is terminating")
528
+
529
+ except Exception as e:
530
+ click.echo(f"✗ Error: {e}", err=True)
531
+ raise click.Abort()
532
+
533
+
534
+ # ============================================================================
535
+ # Status and Information
536
+ # ============================================================================
537
+
538
+
539
+ @engine_cli.command("status")
540
+ @click.argument("name_or_id")
541
+ @click.option(
542
+ "--env",
543
+ default=None,
544
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
545
+ )
546
+ def engine_status(name_or_id: str, env: Optional[str]):
547
+ """Show engine status including idle detector state."""
548
+
549
+ # Check AWS auth first to provide clear error messages
550
+ check_aws_auth()
551
+
552
+ # Auto-detect environment if not specified
553
+ if env is None:
554
+ env = detect_aws_environment()
555
+
556
+ client = StudioManagerClient(environment=env)
557
+
558
+ try:
559
+ # Try to find by name first
560
+ engine = client.get_engine_by_name(name_or_id)
561
+ if not engine:
562
+ # Assume it's an instance ID
563
+ engine = {"instance_id": name_or_id, "name": name_or_id}
564
+
565
+ engine_id = engine["instance_id"]
566
+
567
+ # Get full status
568
+ status_data = client.get_engine_status(engine_id)
569
+
570
+ if "error" in status_data:
571
+ click.echo(f"✗ Error: {status_data['error']}", err=True)
572
+ raise click.Abort()
573
+
574
+ # Display basic info - reordered per user request
575
+ engine_name = status_data.get("name", engine_id)
576
+ click.echo(
577
+ f"Name: \033[34m{engine_name}\033[0m"
578
+ ) # Blue engine name (renamed from "Engine")
579
+
580
+ # Show state with color coding
581
+ engine_state = status_data.get("state", "unknown")
582
+ state_lower = engine_state.lower()
583
+ if state_lower == "running":
584
+ click.echo(f"State: \033[32m{engine_state}\033[0m") # Green for running
585
+ elif state_lower in ["stopped", "terminated"]:
586
+ click.echo(
587
+ f"State: \033[31m{engine_state}\033[0m"
588
+ ) # Red for stopped/terminated
589
+ elif state_lower in ["stopping", "starting", "pending"]:
590
+ click.echo(
591
+ f"State: \033[33m{engine_state}\033[0m"
592
+ ) # Yellow for transitional states
593
+ else:
594
+ click.echo(f"State: {engine_state}") # No color for unknown states
595
+
596
+ # Show account (environment)
597
+ click.echo(f"Account: {env}")
598
+
599
+ if status_data.get("launch_time"):
600
+ click.echo(f"Launched: {format_time_ago(status_data['launch_time'])}")
601
+
602
+ click.echo(f"Type: {status_data.get('instance_type', 'unknown')}")
603
+ click.echo(f"Instance ID: {engine_id}")
604
+
605
+ if status_data.get("public_ip"):
606
+ click.echo(f"Public IP: {status_data['public_ip']}")
607
+
608
+ # Check if engine is stopped - don't show idle state or activity sensors
609
+ if engine_state.lower() in ["stopped", "stopping", "terminated", "terminating"]:
610
+ return
611
+
612
+ # Show readiness if not ready
613
+ if status_data.get("readiness"):
614
+ readiness = status_data["readiness"]
615
+ if not readiness.get("ready"):
616
+ click.echo(
617
+ f"\n⏳ Initialization: {readiness.get('progress_percent', 0)}%"
618
+ )
619
+ click.echo(
620
+ f"Current Stage: {readiness.get('current_stage', 'unknown')}"
621
+ )
622
+ if readiness.get("estimated_time_remaining_seconds"):
623
+ remaining = readiness["estimated_time_remaining_seconds"]
624
+ click.echo(f"Estimated Time Remaining: {remaining}s")
625
+
626
+ # Show idle state (only for running engines) - always detailed per user request
627
+ attached_studios = status_data.get("attached_studios", [])
628
+ if status_data.get("idle_state"):
629
+ click.echo(
630
+ f"\n{format_idle_state(status_data['idle_state'], detailed=True, attached_studios=attached_studios)}"
631
+ )
632
+ else:
633
+ # If no idle state yet, still show attached studios
634
+ if attached_studios:
635
+ studio_names = ", ".join(
636
+ [
637
+ f"\033[35m{s.get('user', 'unknown')}\033[0m"
638
+ for s in attached_studios
639
+ ]
640
+ )
641
+ click.echo(f"\nAttached Studios: {studio_names}")
642
+ else:
643
+ click.echo(f"\nAttached Studios: None")
644
+
645
+ except Exception as e:
646
+ click.echo(f"✗ Error: {e}", err=True)
647
+ raise click.Abort()
648
+
649
+
650
+ @engine_cli.command("list")
651
+ @click.option(
652
+ "--env",
653
+ default=None,
654
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
655
+ )
656
+ def list_engines(env: Optional[str]):
657
+ """List all engines."""
658
+
659
+ # Check AWS auth first to provide clear error messages
660
+ check_aws_auth()
661
+
662
+ # Auto-detect environment if not specified
663
+ if env is None:
664
+ env = detect_aws_environment()
665
+
666
+ client = StudioManagerClient(environment=env)
667
+
668
+ try:
669
+ result = client.list_engines()
670
+ engines = result.get("engines", [])
671
+
672
+ # Show account header with blue account name
673
+ click.echo(f"\nEngines for AWS Account \033[34m{env}\033[0m")
674
+
675
+ if not engines:
676
+ click.echo("No engines found\n")
677
+ return
678
+
679
+ # Calculate dynamic width for Name column (longest name + 2 for padding)
680
+ max_name_len = max(
681
+ (len(engine.get("name", "unknown")) for engine in engines), default=4
682
+ )
683
+ name_width = max(max_name_len + 2, len("Name") + 2)
684
+
685
+ # Fixed widths for other columns
686
+ state_width = 12
687
+ user_width = 12
688
+ type_width = 12
689
+ id_width = 20
690
+
691
+ # Table top border
692
+ click.echo(
693
+ "╭"
694
+ + "─" * (name_width + 1)
695
+ + "┬"
696
+ + "─" * (state_width + 1)
697
+ + "┬"
698
+ + "─" * (user_width + 1)
699
+ + "┬"
700
+ + "─" * (type_width + 1)
701
+ + "┬"
702
+ + "─" * (id_width + 1)
703
+ + "╮"
704
+ )
705
+
706
+ # Table header
707
+ click.echo(
708
+ f"│ {'Name':<{name_width}}│ {'State':<{state_width}}│ {'User':<{user_width}}│ {'Type':<{type_width}}│ {'Instance ID':<{id_width}}│"
709
+ )
710
+
711
+ # Header separator
712
+ click.echo(
713
+ "├"
714
+ + "─" * (name_width + 1)
715
+ + "┼"
716
+ + "─" * (state_width + 1)
717
+ + "┼"
718
+ + "─" * (user_width + 1)
719
+ + "┼"
720
+ + "─" * (type_width + 1)
721
+ + "┼"
722
+ + "─" * (id_width + 1)
723
+ + "┤"
724
+ )
725
+
726
+ # Table rows
727
+ for engine in engines:
728
+ name = engine.get("name", "unknown")
729
+ state = engine.get("state", "unknown")
730
+ user = engine.get("user", "unknown")
731
+ engine_type = engine.get("engine_type", "unknown")
732
+ instance_id = engine.get("instance_id", "unknown")
733
+
734
+ # Truncate if needed
735
+ if len(name) > name_width - 1:
736
+ name = name[: name_width - 1]
737
+ if len(user) > user_width - 1:
738
+ user = user[: user_width - 1]
739
+ if len(engine_type) > type_width - 1:
740
+ engine_type = engine_type[: type_width - 1]
741
+
742
+ # Color the name (blue)
743
+ name_display = f"\033[34m{name:<{name_width}}\033[0m"
744
+
745
+ # Color the state
746
+ if state == "running":
747
+ state_display = f"\033[32m{state:<{state_width}}\033[0m" # Green
748
+ elif state in ["starting", "stopping", "pending"]:
749
+ state_display = f"\033[33m{state:<{state_width}}\033[0m" # Yellow
750
+ elif state == "stopped":
751
+ state_display = f"\033[90m{state:<{state_width}}\033[0m" # Grey (dim)
752
+ else:
753
+ state_display = f"{state:<{state_width}}" # No color for other states
754
+
755
+ # Color the instance ID (grey)
756
+ instance_id_display = f"\033[90m{instance_id:<{id_width}}\033[0m"
757
+
758
+ click.echo(
759
+ f"│ {name_display}│ {state_display}│ {user:<{user_width}}│ {engine_type:<{type_width}}│ {instance_id_display}│"
760
+ )
761
+
762
+ # Table bottom border
763
+ click.echo(
764
+ "╰"
765
+ + "─" * (name_width + 1)
766
+ + "┴"
767
+ + "─" * (state_width + 1)
768
+ + "┴"
769
+ + "─" * (user_width + 1)
770
+ + "┴"
771
+ + "─" * (type_width + 1)
772
+ + "┴"
773
+ + "─" * (id_width + 1)
774
+ + "╯"
775
+ )
776
+
777
+ click.echo(f"Total: {len(engines)}\n")
778
+
779
+ except Exception as e:
780
+ click.echo(f"✗ Error: {e}", err=True)
781
+ raise click.Abort()
782
+
783
+
784
+ # ============================================================================
785
+ # Access (SSH Config Management)
786
+ # ============================================================================
787
+
788
+
789
+ @engine_cli.command("config-ssh")
790
+ @click.option("--clean", is_flag=True, help="Remove all managed entries")
791
+ @click.option("--all", is_flag=True, help="Include engines from all users")
792
+ @click.option(
793
+ "--admin",
794
+ is_flag=True,
795
+ help="Generate entries using ec2-user instead of owner",
796
+ )
797
+ @click.option(
798
+ "--env",
799
+ default=None,
800
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
801
+ )
802
+ def config_ssh(clean: bool, all: bool, admin: bool, env: Optional[str]):
803
+ """Update SSH config with available engines."""
804
+
805
+ # Auto-detect environment if not specified (and not just cleaning)
806
+ if env is None and not clean:
807
+ check_aws_auth()
808
+ env = detect_aws_environment()
809
+ elif env is None:
810
+ env = "dev" # Default for clean operation
811
+
812
+ client = StudioManagerClient(environment=env)
813
+ ssh_config_path = os.path.expanduser("~/.ssh/config")
814
+
815
+ try:
816
+ # Read existing config
817
+ if os.path.exists(ssh_config_path):
818
+ with open(ssh_config_path, "r") as f:
819
+ lines = f.readlines()
820
+ else:
821
+ lines = []
822
+
823
+ # Remove managed entries
824
+ managed_start = "# BEGIN DAYHOFF ENGINES\n"
825
+ managed_end = "# END DAYHOFF ENGINES\n"
826
+
827
+ new_lines = []
828
+ skip = False
829
+ for line in lines:
830
+ if line == managed_start:
831
+ skip = True
832
+ elif line == managed_end:
833
+ skip = False
834
+ continue
835
+ elif not skip:
836
+ new_lines.append(line)
837
+
838
+ if clean:
839
+ # Write back without managed section
840
+ with open(ssh_config_path, "w") as f:
841
+ f.writelines(new_lines)
842
+ click.echo("✓ Removed managed engine entries from SSH config")
843
+ return
844
+
845
+ # Get engines
846
+ result = client.list_engines()
847
+ engines = result.get("engines", [])
848
+
849
+ if not engines:
850
+ click.echo("No engines found")
851
+ return
852
+
853
+ # Generate new entries
854
+ config_entries = [managed_start]
855
+
856
+ try:
857
+ current_user = get_aws_username()
858
+ except RuntimeError:
859
+ # Not authenticated - can't determine user
860
+ current_user = None
861
+
862
+ for engine in engines:
863
+ user = engine.get("user", "unknown")
864
+
865
+ # Skip if not all and not owned by current user (unless user is unknown or we can't determine current user)
866
+ if not all and current_user and user != "unknown" and user != current_user:
867
+ continue
868
+
869
+ instance_id = engine.get("instance_id")
870
+ name = engine.get("name", instance_id)
871
+ state = engine.get("state", "unknown")
872
+
873
+ # Only add running engines
874
+ if state != "running":
875
+ continue
876
+
877
+ username = "ec2-user" if admin else user
878
+
879
+ # Map environment to AWS profile
880
+ profile_map = {
881
+ "dev": "dev-devaccess",
882
+ "sand": "sand-devaccess",
883
+ "prod": "prod-devaccess",
884
+ }
885
+ aws_profile = profile_map.get(env, f"{env}-devaccess")
886
+
887
+ config_entries.append(f"\nHost {name}\n")
888
+ config_entries.append(f" HostName {instance_id}\n")
889
+ config_entries.append(f" User {username}\n")
890
+ config_entries.append(f" ForwardAgent yes\n")
891
+ config_entries.append(
892
+ f" ProxyCommand aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p' --profile {aws_profile}\n"
893
+ )
894
+
895
+ config_entries.append(managed_end)
896
+
897
+ # Write back
898
+ new_lines.extend(config_entries)
899
+
900
+ with open(ssh_config_path, "w") as f:
901
+ f.writelines(new_lines)
902
+
903
+ click.echo(f"✓ Updated SSH config with {len(engines)} engine(s)")
904
+
905
+ except Exception as e:
906
+ click.echo(f"✗ Error: {e}", err=True)
907
+ raise click.Abort()
908
+
909
+
910
+ # ============================================================================
911
+ # Idle Detection Control
912
+ # ============================================================================
913
+
914
+
915
+ @engine_cli.command("coffee")
916
+ @click.argument("name_or_id")
917
+ @click.argument("duration", required=False)
918
+ @click.option("--cancel", is_flag=True, help="Cancel existing coffee lock")
919
+ @click.option(
920
+ "--env",
921
+ default=None,
922
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
923
+ )
924
+ def coffee(name_or_id: str, duration: Optional[str], cancel: bool, env: Optional[str]):
925
+ """Keep engine awake for specified duration (e.g., '4h', '2h30m')."""
926
+
927
+ # Check AWS auth and auto-detect environment if not specified
928
+ check_aws_auth()
929
+
930
+ if env is None:
931
+ env = detect_aws_environment()
932
+
933
+ client = StudioManagerClient(environment=env)
934
+
935
+ try:
936
+ # Find engine
937
+ engine = client.get_engine_by_name(name_or_id)
938
+ if not engine:
939
+ engine = {"instance_id": name_or_id, "name": name_or_id}
940
+
941
+ engine_id = engine["instance_id"]
942
+ engine_name = engine.get("name", engine_id)
943
+
944
+ if cancel:
945
+ result = client.cancel_coffee(engine_id)
946
+ if "error" in result:
947
+ click.echo(f"✗ Error: {result['error']}", err=True)
948
+ raise click.Abort()
949
+ click.echo(f"✓ Coffee lock cancelled for '{engine_name}'")
950
+ else:
951
+ if not duration:
952
+ click.echo("✗ Error: duration required (e.g., '4h', '2h30m')", err=True)
953
+ raise click.Abort()
954
+
955
+ result = client.set_coffee(engine_id, duration)
956
+ if "error" in result:
957
+ click.echo(f"✗ Error: {result['error']}", err=True)
958
+ raise click.Abort()
959
+ click.echo(f"✓ Coffee lock set for '{engine_name}': {duration}")
960
+
961
+ except Exception as e:
962
+ click.echo(f"✗ Error: {e}", err=True)
963
+ raise click.Abort()
964
+
965
+
966
+ @engine_cli.command("idle")
967
+ @click.argument("name_or_id")
968
+ @click.option("--set", "set_timeout", help="Set new timeout (e.g., '2h30m', '45m')")
969
+ @click.option(
970
+ "--slack",
971
+ type=click.Choice(["none", "default", "all"]),
972
+ help="Set Slack notifications",
973
+ )
974
+ @click.option(
975
+ "--env",
976
+ default=None,
977
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
978
+ )
979
+ def idle_timeout_cmd(
980
+ name_or_id: str,
981
+ set_timeout: Optional[str],
982
+ slack: Optional[str],
983
+ env: Optional[str],
984
+ ):
985
+ """Show or configure idle detector settings."""
986
+
987
+ # Check AWS auth and auto-detect environment if not specified
988
+ check_aws_auth()
989
+
990
+ if env is None:
991
+ env = detect_aws_environment()
992
+
993
+ client = StudioManagerClient(environment=env)
994
+
995
+ try:
996
+ # Find engine
997
+ engine = client.get_engine_by_name(name_or_id)
998
+ if not engine:
999
+ engine = {"instance_id": name_or_id, "name": name_or_id}
1000
+
1001
+ engine_id = engine["instance_id"]
1002
+ engine_name = engine.get("name", engine_id)
1003
+
1004
+ # Get current settings
1005
+ status = client.get_engine_status(engine_id)
1006
+
1007
+ if "error" in status:
1008
+ click.echo(f"✗ Error: {status['error']}", err=True)
1009
+ raise click.Abort()
1010
+
1011
+ # Update if requested
1012
+ if set_timeout or slack:
1013
+ result = client.update_idle_settings(
1014
+ engine_id, timeout=set_timeout, slack=slack
1015
+ )
1016
+ if "error" in result:
1017
+ click.echo(f"✗ Error: {result['error']}", err=True)
1018
+ raise click.Abort()
1019
+ click.echo(f"✓ Idle settings updated for '{engine_name}'")
1020
+
1021
+ # Show current settings
1022
+ idle_state = status.get("idle_state", {})
1023
+ timeout_seconds = int(idle_state.get("timeout_seconds", 1800))
1024
+ timeout_minutes = timeout_seconds // 60
1025
+
1026
+ click.echo(f"\nIdle Settings for '{engine_name}':")
1027
+ click.echo(f" Timeout: {timeout_minutes} minutes")
1028
+ click.echo(
1029
+ f" Current State: {'IDLE' if idle_state.get('is_idle') else 'ACTIVE'}"
1030
+ )
1031
+
1032
+ except Exception as e:
1033
+ click.echo(f"✗ Error: {e}", err=True)
1034
+ raise click.Abort()
1035
+
1036
+
1037
+ # ============================================================================
1038
+ # Maintenance
1039
+ # ============================================================================
1040
+
1041
+
1042
+ @engine_cli.command("resize")
1043
+ @click.argument("name_or_id")
1044
+ @click.option("--size", "-s", required=True, type=int, help="New size in GB")
1045
+ @click.option(
1046
+ "--online",
1047
+ is_flag=True,
1048
+ help="Resize while running (requires manual filesystem expansion)",
1049
+ )
1050
+ @click.option("--force", "-f", is_flag=True, help="Force resize")
1051
+ @click.option(
1052
+ "--env",
1053
+ default=None,
1054
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
1055
+ )
1056
+ def resize_engine(
1057
+ name_or_id: str, size: int, online: bool, force: bool, env: Optional[str]
1058
+ ):
1059
+ """Resize an engine's boot disk."""
1060
+
1061
+ # Check AWS auth and auto-detect environment if not specified
1062
+ check_aws_auth()
1063
+
1064
+ if env is None:
1065
+ env = detect_aws_environment()
1066
+
1067
+ client = StudioManagerClient(environment=env)
1068
+
1069
+ try:
1070
+ # Find engine
1071
+ engine = client.get_engine_by_name(name_or_id)
1072
+ if not engine:
1073
+ engine = {"instance_id": name_or_id, "name": name_or_id}
1074
+
1075
+ engine_id = engine["instance_id"]
1076
+ engine_name = engine.get("name", engine_id)
1077
+
1078
+ if not force:
1079
+ if not click.confirm(f"Resize boot disk of '{engine_name}' to {size}GB?"):
1080
+ click.echo("Cancelled")
1081
+ return
1082
+
1083
+ result = client.resize_engine(engine_id, size, online)
1084
+
1085
+ if "error" in result:
1086
+ click.echo(f"✗ Error: {result['error']}", err=True)
1087
+ raise click.Abort()
1088
+
1089
+ click.echo(f"✓ Boot disk resize initiated for '{engine_name}'")
1090
+ if online:
1091
+ click.echo(" Note: Manual filesystem expansion required")
1092
+
1093
+ except Exception as e:
1094
+ click.echo(f"✗ Error: {e}", err=True)
1095
+ raise click.Abort()
1096
+
1097
+
1098
+ @engine_cli.command("debug")
1099
+ @click.argument("name_or_id")
1100
+ @click.option(
1101
+ "--env",
1102
+ default=None,
1103
+ help="Environment (dev, sand, prod) - auto-detected if not specified",
1104
+ )
1105
+ def debug_engine(name_or_id: str, env: Optional[str]):
1106
+ """Debug engine bootstrap status and files."""
1107
+
1108
+ # Check AWS auth and auto-detect environment if not specified
1109
+ check_aws_auth()
1110
+
1111
+ if env is None:
1112
+ env = detect_aws_environment()
1113
+
1114
+ client = StudioManagerClient(environment=env)
1115
+
1116
+ try:
1117
+ # Find engine
1118
+ engine = client.get_engine_by_name(name_or_id)
1119
+ if not engine:
1120
+ engine = {"instance_id": name_or_id, "name": name_or_id}
1121
+
1122
+ engine_id = engine["instance_id"]
1123
+
1124
+ # Get readiness status
1125
+ readiness = client.get_engine_readiness(engine_id)
1126
+
1127
+ click.echo(f"Engine: {engine_id}")
1128
+ click.echo(f"Ready: {readiness.get('ready', False)}")
1129
+ click.echo(f"Current Stage: {readiness.get('current_stage', 'unknown')}")
1130
+ click.echo(f"\nBootstrap Stages:")
1131
+
1132
+ stages = readiness.get("stages", [])
1133
+ for i, stage in enumerate(stages, 1):
1134
+ status = stage.get("status", "unknown")
1135
+ name = stage.get("name", "unknown")
1136
+ duration = (
1137
+ stage.get("duration_ms", 0) / 1000 if stage.get("duration_ms") else None
1138
+ )
1139
+
1140
+ icon = (
1141
+ "✓"
1142
+ if status == "completed"
1143
+ else "⏳" if status == "in_progress" else "✗"
1144
+ )
1145
+ duration_str = f" ({duration:.1f}s)" if duration else ""
1146
+
1147
+ click.echo(f" {icon} {i}. {name}{duration_str}")
1148
+
1149
+ except Exception as e:
1150
+ click.echo(f"✗ Error: {e}", err=True)
1151
+ raise click.Abort()