dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dayhoff_tools/__init__.py +10 -0
  2. dayhoff_tools/cli/cloud_commands.py +179 -43
  3. dayhoff_tools/cli/engine1/__init__.py +323 -0
  4. dayhoff_tools/cli/engine1/engine_core.py +703 -0
  5. dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
  6. dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
  7. dayhoff_tools/cli/engine1/engine_management.py +505 -0
  8. dayhoff_tools/cli/engine1/shared.py +501 -0
  9. dayhoff_tools/cli/engine1/studio_commands.py +825 -0
  10. dayhoff_tools/cli/engines_studios/__init__.py +6 -0
  11. dayhoff_tools/cli/engines_studios/api_client.py +351 -0
  12. dayhoff_tools/cli/engines_studios/auth.py +144 -0
  13. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
  14. dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
  15. dayhoff_tools/cli/engines_studios/progress.py +260 -0
  16. dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
  17. dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
  18. dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
  19. dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
  20. dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
  21. dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
  22. dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
  23. dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
  25. dayhoff_tools/cli/main.py +106 -7
  26. dayhoff_tools/cli/utility_commands.py +896 -179
  27. dayhoff_tools/deployment/base.py +70 -6
  28. dayhoff_tools/deployment/deploy_aws.py +165 -25
  29. dayhoff_tools/deployment/deploy_gcp.py +78 -5
  30. dayhoff_tools/deployment/deploy_utils.py +20 -7
  31. dayhoff_tools/deployment/job_runner.py +9 -4
  32. dayhoff_tools/deployment/processors.py +230 -418
  33. dayhoff_tools/deployment/swarm.py +47 -12
  34. dayhoff_tools/embedders.py +28 -26
  35. dayhoff_tools/fasta.py +181 -64
  36. dayhoff_tools/warehouse.py +268 -1
  37. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
  38. dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
  39. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
  40. dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
  41. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,703 @@
1
+ """Core engine commands: launch, list, and status."""
2
+
3
+ import json
4
+ import time
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict, Optional
7
+
8
+ import boto3
9
+ import typer
10
+ from rich import box
11
+ from rich.panel import Panel
12
+ from rich.progress import Progress, SpinnerColumn, TextColumn
13
+ from rich.table import Table
14
+
15
+ from .shared import (
16
+ HOURLY_COSTS,
17
+ _fetch_init_stages,
18
+ check_aws_sso,
19
+ console,
20
+ format_duration,
21
+ format_status,
22
+ get_disk_usage_via_ssm,
23
+ make_api_request,
24
+ parse_launch_time,
25
+ resolve_engine,
26
+ )
27
+
28
+
29
+ def launch_engine(
30
+ name: str = typer.Argument(help="Name for the new engine"),
31
+ engine_type: str = typer.Option(
32
+ "cpu",
33
+ "--type",
34
+ "-t",
35
+ help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
36
+ ),
37
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
38
+ boot_disk_size: Optional[int] = typer.Option(
39
+ None,
40
+ "--size",
41
+ "-s",
42
+ help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
43
+ ),
44
+ availability_zone: Optional[str] = typer.Option(
45
+ None,
46
+ "--az",
47
+ help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
48
+ ),
49
+ ):
50
+ """Launch a new engine instance."""
51
+ username = check_aws_sso()
52
+ if user:
53
+ username = user
54
+
55
+ # Validate engine type
56
+ valid_types = [
57
+ "cpu",
58
+ "cpumax",
59
+ "t4",
60
+ "a10g",
61
+ "a100",
62
+ "4_t4",
63
+ "8_t4",
64
+ "4_a10g",
65
+ "8_a10g",
66
+ ]
67
+ if engine_type not in valid_types:
68
+ console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
69
+ console.print(f"Valid types: {', '.join(valid_types)}")
70
+ raise typer.Exit(1)
71
+
72
+ # Validate boot disk size
73
+ if boot_disk_size is not None:
74
+ if boot_disk_size < 20:
75
+ console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
76
+ raise typer.Exit(1)
77
+ if boot_disk_size > 1000:
78
+ console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
79
+ raise typer.Exit(1)
80
+
81
+ cost = HOURLY_COSTS.get(engine_type, 0)
82
+ disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
83
+ console.print(
84
+ f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
85
+ )
86
+
87
+ with Progress(
88
+ SpinnerColumn(),
89
+ TextColumn("[progress.description]{task.description}"),
90
+ transient=True,
91
+ ) as progress:
92
+ progress.add_task("Creating engine...", total=None)
93
+
94
+ request_data: Dict[str, Any] = {
95
+ "name": name,
96
+ "user": username,
97
+ "engine_type": engine_type,
98
+ }
99
+ if boot_disk_size is not None:
100
+ request_data["boot_disk_size"] = boot_disk_size
101
+ if availability_zone:
102
+ request_data["availability_zone"] = availability_zone
103
+
104
+ response = make_api_request("POST", "/engines", json_data=request_data)
105
+
106
+ if response.status_code == 201:
107
+ data = response.json()
108
+ console.print(f"[green]✓ Engine launched successfully![/green]")
109
+ console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
110
+ console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
111
+ if boot_disk_size:
112
+ console.print(f"Boot disk: {boot_disk_size}GB")
113
+ console.print("\nThe engine is initializing. This may take a few minutes.")
114
+ console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
115
+ else:
116
+ error = response.json().get("error", "Unknown error")
117
+ console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
118
+
119
+
120
+ def list_engines(
121
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
122
+ running_only: bool = typer.Option(
123
+ False, "--running", help="Show only running engines"
124
+ ),
125
+ stopped_only: bool = typer.Option(
126
+ False, "--stopped", help="Show only stopped engines"
127
+ ),
128
+ detailed: bool = typer.Option(
129
+ False, "--detailed", "-d", help="Show detailed status (slower)"
130
+ ),
131
+ ):
132
+ """List engines (shows all engines by default)."""
133
+ current_user = check_aws_sso()
134
+
135
+ params = {}
136
+ if user:
137
+ params["user"] = user
138
+ if detailed:
139
+ params["check_ready"] = "true"
140
+
141
+ response = make_api_request("GET", "/engines", params=params)
142
+
143
+ if response.status_code == 200:
144
+ data = response.json()
145
+ engines = data.get("engines", [])
146
+
147
+ # Filter by state if requested
148
+ if running_only:
149
+ engines = [e for e in engines if e["state"].lower() == "running"]
150
+ elif stopped_only:
151
+ engines = [e for e in engines if e["state"].lower() == "stopped"]
152
+
153
+ if not engines:
154
+ console.print("No engines found.")
155
+ return
156
+
157
+ # Only fetch detailed info if requested (slow)
158
+ stages_map = {}
159
+ if detailed:
160
+ stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
161
+
162
+ # Create table
163
+ table = Table(title="Engines", box=box.ROUNDED)
164
+ table.add_column("Name", style="cyan")
165
+ table.add_column("Instance ID", style="dim")
166
+ table.add_column("Type")
167
+ table.add_column("User")
168
+ table.add_column("Status")
169
+ if detailed:
170
+ table.add_column("Disk Usage")
171
+ table.add_column("Uptime/Since")
172
+ table.add_column("$/hour", justify="right")
173
+
174
+ for engine in engines:
175
+ launch_time = parse_launch_time(engine["launch_time"])
176
+ uptime = datetime.now(timezone.utc) - launch_time
177
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
178
+
179
+ if engine["state"].lower() == "running":
180
+ time_str = format_duration(uptime)
181
+ # Only get disk usage if detailed mode
182
+ if detailed:
183
+ disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
184
+ else:
185
+ disk_usage = None
186
+ else:
187
+ time_str = launch_time.strftime("%Y-%m-%d %H:%M")
188
+ disk_usage = "-" if detailed else None
189
+
190
+ row_data = [
191
+ engine["name"],
192
+ engine["instance_id"],
193
+ engine["engine_type"],
194
+ engine["user"],
195
+ format_status(engine["state"], engine.get("ready")),
196
+ ]
197
+ if detailed:
198
+ row_data.append(disk_usage)
199
+ row_data.extend(
200
+ [
201
+ time_str,
202
+ f"${hourly_cost:.2f}",
203
+ ]
204
+ )
205
+
206
+ table.add_row(*row_data)
207
+
208
+ console.print(table)
209
+ if not detailed and any(e["state"].lower() == "running" for e in engines):
210
+ console.print(
211
+ "\n[dim]Tip: Use --detailed to see disk usage and bootstrap status (slower)[/dim]"
212
+ )
213
+ else:
214
+ error = response.json().get("error", "Unknown error")
215
+ console.print(f"[red]❌ Failed to list engines: {error}[/red]")
216
+
217
+
218
+ def engine_status(
219
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
220
+ detailed: bool = typer.Option(
221
+ False, "--detailed", "-d", help="Show detailed status (slower)"
222
+ ),
223
+ show_log: bool = typer.Option(
224
+ False, "--show-log", help="Show bootstrap log (requires --detailed)"
225
+ ),
226
+ ):
227
+ """Show engine status and information."""
228
+ check_aws_sso()
229
+
230
+ # Get all engines to resolve name
231
+ response = make_api_request("GET", "/engines")
232
+ if response.status_code != 200:
233
+ console.print("[red]❌ Failed to fetch engines[/red]")
234
+ raise typer.Exit(1)
235
+
236
+ engines = response.json().get("engines", [])
237
+ engine = resolve_engine(name_or_id, engines)
238
+
239
+ # Always try to fetch live idle data from the engine for both views
240
+ live_idle_data = _fetch_live_idle_data(engine["instance_id"])
241
+
242
+ # Fast status display (default)
243
+ if not detailed:
244
+ # Determine running state display
245
+ running_state = engine["state"].lower()
246
+ if running_state == "running":
247
+ run_disp = "[green]Running[/green]"
248
+ elif running_state == "pending":
249
+ run_disp = "[yellow]Starting...[/yellow]"
250
+ elif running_state == "stopping":
251
+ run_disp = "[yellow]Stopping...[/yellow]"
252
+ elif running_state == "stopped":
253
+ run_disp = "[dim]Stopped[/dim]"
254
+ else:
255
+ run_disp = engine["state"].capitalize()
256
+
257
+ # Format idle display using the unified function
258
+ idle_disp = " " + _format_idle_status_display(live_idle_data, running_state)
259
+
260
+ # Build status lines - minimal info for fast view
261
+ status_lines = [
262
+ f"[blue]{engine['name']}[/blue] {run_disp}{idle_disp}",
263
+ ]
264
+
265
+ # Add activity sensors if we have live data
266
+ if live_idle_data and live_idle_data.get("_reasons_raw"):
267
+ status_lines.append("") # blank line before sensors
268
+
269
+ sensor_map = {
270
+ "CoffeeLockSensor": ("♨️ ", "Coffee"),
271
+ "ActiveLoginSensor": ("🐚", "SSH"),
272
+ "IDEConnectionSensor": ("🖥 ", "IDE"),
273
+ "DockerWorkloadSensor": ("🐳", "Docker"),
274
+ }
275
+
276
+ for r in live_idle_data.get("_reasons_raw", []):
277
+ sensor = r.get("sensor", "Unknown")
278
+ active = r.get("active", False)
279
+ icon, label = sensor_map.get(sensor, ("?", sensor))
280
+ status_str = "[green]YES[/green]" if active else "[dim]nope[/dim]"
281
+ status_lines.append(f" {icon} {label:6} {status_str}")
282
+
283
+ # Display in a nice panel
284
+ console.print(
285
+ Panel("\n".join(status_lines), title="Engine Status", border_style="blue")
286
+ )
287
+ return # Exit early for fast status
288
+
289
+ # Get detailed engine status including idle detector info (for --detailed mode)
290
+ response = make_api_request("GET", f"/engines/{engine['instance_id']}")
291
+ if response.status_code != 200:
292
+ console.print("[red]❌ Failed to fetch engine details[/red]")
293
+ raise typer.Exit(1)
294
+
295
+ engine_details = response.json()
296
+ engine = engine_details.get("engine", engine) # Use detailed info if available
297
+ idle_detector = engine_details.get("idle_detector", {}) or {}
298
+ attached_studios = engine_details.get("attached_studios", [])
299
+
300
+ # Overlay stale API data with fresh data from the engine
301
+ if live_idle_data:
302
+ # If API didn't indicate availability, replace entirely; otherwise, update.
303
+ if not idle_detector.get("available"):
304
+ idle_detector = live_idle_data
305
+ else:
306
+ idle_detector.update(live_idle_data)
307
+ else:
308
+ # SSM failed - mark as unavailable if we don't have good data from API
309
+ if not idle_detector.get("available"):
310
+ idle_detector = {"available": False} # Mark as unavailable
311
+
312
+ # Calculate costs
313
+ launch_time = parse_launch_time(engine["launch_time"])
314
+ uptime = datetime.now(timezone.utc) - launch_time
315
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
316
+ # total_cost intentionally not shown in status view
317
+
318
+ stages_map = _fetch_init_stages([engine["instance_id"]])
319
+ stage_val = stages_map.get(engine["instance_id"], "-")
320
+
321
+ # Try to fetch actual boot time via SSM (best-effort)
322
+ boot_time_str: Optional[str] = None
323
+ try:
324
+ if engine["state"].lower() == "running":
325
+ ssm = boto3.client("ssm", region_name="us-east-1")
326
+ resp = ssm.send_command(
327
+ InstanceIds=[engine["instance_id"]],
328
+ DocumentName="AWS-RunShellScript",
329
+ Parameters={
330
+ "commands": ["uptime -s || who -b | awk '{print $3\" \"$4}'"]
331
+ },
332
+ )
333
+ cid = resp["Command"]["CommandId"]
334
+ time.sleep(1)
335
+ inv = ssm.get_command_invocation(
336
+ CommandId=cid, InstanceId=engine["instance_id"]
337
+ )
338
+ if inv.get("Status") == "Success":
339
+ boot_time_str = (
340
+ (inv.get("StandardOutputContent") or "").strip().splitlines()[0]
341
+ if inv.get("StandardOutputContent")
342
+ else None
343
+ )
344
+ except Exception:
345
+ boot_time_str = None
346
+
347
+ started_line = (
348
+ f"[bold]Started:[/bold] {boot_time_str} ({format_duration(uptime)} ago)"
349
+ if boot_time_str
350
+ else f"[bold]Started:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)"
351
+ )
352
+
353
+ # ---------------- Front-loaded summary ----------------
354
+ running_state = engine["state"].lower()
355
+ if running_state == "running":
356
+ run_disp = "[green]Running[/green]"
357
+ elif running_state == "pending":
358
+ run_disp = "[yellow]Starting...[/yellow]"
359
+ elif running_state == "stopping":
360
+ run_disp = "[yellow]Stopping...[/yellow]"
361
+ elif running_state == "stopped":
362
+ run_disp = "[dim]Stopped[/dim]"
363
+ else:
364
+ run_disp = engine["state"].capitalize()
365
+
366
+ # Recompute header display with latest data
367
+ active_disp = _format_idle_status_display(idle_detector, running_state)
368
+
369
+ top_lines = [
370
+ f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n",
371
+ ]
372
+
373
+ # Studios summary next, with studio name in purple/magenta
374
+ studios_line = None
375
+ if attached_studios:
376
+ stu_texts = [
377
+ f"[magenta]{s.get('user', 'studio')}[/magenta] ({s.get('studio_id', 'unknown')})"
378
+ for s in attached_studios
379
+ ]
380
+ studios_line = "Studios: " + ", ".join(stu_texts)
381
+ top_lines.append(studios_line)
382
+
383
+ # Paragraph break
384
+ top_lines.append("")
385
+
386
+ # ---------------- Details block (white/default) ----------------
387
+ status_lines = [
388
+ f"Name: {engine['name']}",
389
+ f"Instance: {engine['instance_id']}",
390
+ f"Type: {engine['engine_type']} ({engine['instance_type']})",
391
+ f"Status: {engine['state']}",
392
+ f"User: {engine['user']}",
393
+ f"IP: {engine.get('public_ip', 'N/A')}",
394
+ started_line,
395
+ f"$/hour: ${hourly_cost:.2f}",
396
+ ]
397
+
398
+ # Disk usage (like list --detailed)
399
+ if engine["state"].lower() == "running":
400
+ disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
401
+ status_lines.append(f"Disk: {disk_usage}")
402
+
403
+ # Idle timeout (show even when not idle) - but only if we have data
404
+ if idle_detector.get("available"):
405
+ idle_threshold_secs: Optional[int] = None
406
+ # Prefer value from idle detector overlay if present
407
+ try:
408
+ if isinstance(idle_detector.get("idle_threshold"), (int, float)):
409
+ idle_threshold_secs = int(idle_detector["idle_threshold"])
410
+ except Exception:
411
+ idle_threshold_secs = None
412
+
413
+ if idle_threshold_secs is None and engine["state"].lower() == "running":
414
+ # Fallback: read /etc/engine.env via SSM
415
+ try:
416
+ ssm = boto3.client("ssm", region_name="us-east-1")
417
+ resp = ssm.send_command(
418
+ InstanceIds=[engine["instance_id"]],
419
+ DocumentName="AWS-RunShellScript",
420
+ Parameters={
421
+ "commands": [
422
+ "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env | cut -d'=' -f2 || echo '?'",
423
+ ],
424
+ "executionTimeout": ["5"],
425
+ },
426
+ )
427
+ cid = resp["Command"]["CommandId"]
428
+ time.sleep(1)
429
+ inv = ssm.get_command_invocation(
430
+ CommandId=cid, InstanceId=engine["instance_id"]
431
+ )
432
+ if inv.get("Status") == "Success":
433
+ out = (inv.get("StandardOutputContent") or "").strip()
434
+ if out and out != "?" and out.isdigit():
435
+ idle_threshold_secs = int(out)
436
+ except Exception:
437
+ idle_threshold_secs = None
438
+
439
+ if idle_threshold_secs is not None:
440
+ status_lines.append(
441
+ f"Idle timeout: {idle_threshold_secs//60}m ({idle_threshold_secs}s)"
442
+ )
443
+ else:
444
+ status_lines.append("Idle timeout: unknown")
445
+ else:
446
+ # No idle detector data available
447
+ status_lines.append("Idle timeout: N/A")
448
+
449
+ # Health report (only if bootstrap finished)
450
+ if stage_val == "finished":
451
+ try:
452
+ ssm = boto3.client("ssm", region_name="us-east-1")
453
+ res = ssm.send_command(
454
+ InstanceIds=[engine["instance_id"]],
455
+ DocumentName="AWS-RunShellScript",
456
+ Parameters={
457
+ "commands": [
458
+ "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || true"
459
+ ],
460
+ "executionTimeout": ["10"],
461
+ },
462
+ )
463
+ cid = res["Command"]["CommandId"]
464
+ time.sleep(1)
465
+ inv = ssm.get_command_invocation(
466
+ CommandId=cid, InstanceId=engine["instance_id"]
467
+ )
468
+ if inv["Status"] == "Success":
469
+ import json as _json
470
+
471
+ health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
472
+ status_lines.append("")
473
+ status_lines.append("[bold]Health:[/bold]")
474
+ status_lines.append(
475
+ f" • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}"
476
+ )
477
+ idle_stat = health.get("idle_detector_service") or health.get(
478
+ "idle_detector_timer", "unknown"
479
+ )
480
+ status_lines.append(f" • Idle Detector: {idle_stat}")
481
+ except Exception:
482
+ pass
483
+
484
+ # Slack notifications status (detailed view only)
485
+ try:
486
+ ssm = boto3.client("ssm", region_name="us-east-1")
487
+ resp = ssm.send_command(
488
+ InstanceIds=[engine["instance_id"]],
489
+ DocumentName="AWS-RunShellScript",
490
+ Parameters={
491
+ "commands": ["grep '^SLACK_NOTIFY_' /etc/engine.env || true"],
492
+ "executionTimeout": ["10"],
493
+ },
494
+ )
495
+ cid = resp["Command"]["CommandId"]
496
+ time.sleep(1)
497
+ inv = ssm.get_command_invocation(
498
+ CommandId=cid, InstanceId=engine["instance_id"]
499
+ )
500
+ if inv["Status"] == "Success":
501
+ settings_raw = inv["StandardOutputContent"].strip()
502
+ settings = {}
503
+ for line in settings_raw.splitlines():
504
+ if "=" in line:
505
+ key, value = line.split("=", 1)
506
+ settings[key.strip()] = value.strip().lower()
507
+
508
+ status_lines.append("")
509
+ status_lines.append("[bold]Slack Notifications:[/bold]")
510
+
511
+ def _setting_line(label: str, key: str) -> str:
512
+ val = settings.get(key, "false") # Default to false if not set
513
+ status = "[green]on[/green]" if val == "true" else "[dim]off[/dim]"
514
+ return f" - {label:15} {status}"
515
+
516
+ status_lines.append(_setting_line("Idle Start", "SLACK_NOTIFY_IDLE_START"))
517
+ status_lines.append(_setting_line("Idle End", "SLACK_NOTIFY_IDLE_END"))
518
+ status_lines.append(_setting_line("Warnings", "SLACK_NOTIFY_WARNINGS"))
519
+ status_lines.append(_setting_line("Shutdown", "SLACK_NOTIFY_SHUTDOWN"))
520
+ except Exception:
521
+ pass
522
+
523
+ # Activity Sensors (show all with YES/no)
524
+ if idle_detector.get("available"):
525
+ status_lines.append("")
526
+ status_lines.append("[bold]Activity Sensors:[/bold]")
527
+ reasons_raw = idle_detector.get("_reasons_raw", [])
528
+ # Ensure reasons_raw is actually a list (fix linter error)
529
+ if not isinstance(reasons_raw, list):
530
+ reasons_raw = []
531
+ by_sensor: Dict[str, Dict[str, Any]] = {}
532
+ for r in reasons_raw:
533
+ nm = r.get("sensor")
534
+ if nm:
535
+ by_sensor[nm] = r
536
+
537
+ def _sensor_line(label: str, key: str, emoji: str) -> str:
538
+ r = by_sensor.get(key, {})
539
+ active = bool(r.get("active"))
540
+ reason_txt = r.get("reason") or ("" if not active else "active")
541
+ flag = "[green]YES[/green]" if active else "[dim]nope[/dim]"
542
+ return (
543
+ f" {emoji} {label}: {flag} {('- ' + reason_txt) if reason_txt else ''}"
544
+ )
545
+
546
+ status_lines.append(_sensor_line("Coffee", "CoffeeLockSensor", "♨️ "))
547
+ status_lines.append(_sensor_line("Shell ", "ActiveLoginSensor", "🐚"))
548
+ status_lines.append(_sensor_line(" IDE ", "IDEConnectionSensor", "🖥"))
549
+ status_lines.append(_sensor_line("Docker", "DockerWorkloadSensor", "🐳"))
550
+
551
+ # Combine top summary and details
552
+ all_lines = top_lines + status_lines
553
+ console.print(
554
+ Panel("\n".join(all_lines), title="Engine Status", border_style="blue")
555
+ )
556
+
557
+ if show_log:
558
+ if not detailed:
559
+ console.print("[yellow]Note: --show-log requires --detailed flag[/yellow]")
560
+ return
561
+ console.print("\n[bold]Bootstrap Log:[/bold]")
562
+ try:
563
+ ssm = boto3.client("ssm", region_name="us-east-1")
564
+ resp = ssm.send_command(
565
+ InstanceIds=[engine["instance_id"]],
566
+ DocumentName="AWS-RunShellScript",
567
+ Parameters={
568
+ "commands": [
569
+ "cat /var/log/engine-setup.log 2>/dev/null || echo 'No setup log found'"
570
+ ],
571
+ "executionTimeout": ["15"],
572
+ },
573
+ )
574
+ cid = resp["Command"]["CommandId"]
575
+ time.sleep(2)
576
+ inv = ssm.get_command_invocation(
577
+ CommandId=cid, InstanceId=engine["instance_id"]
578
+ )
579
+ if inv["Status"] == "Success":
580
+ log_content = inv["StandardOutputContent"].strip()
581
+ if log_content:
582
+ console.print(f"[dim]{log_content}[/dim]")
583
+ else:
584
+ console.print("[yellow]No bootstrap log available[/yellow]")
585
+ else:
586
+ console.print("[red]❌ Could not retrieve bootstrap log[/red]")
587
+ except Exception as e:
588
+ console.print(f"[red]❌ Error fetching log: {e}[/red]")
589
+
590
+
591
+ def _format_idle_status_display(
592
+ idle_info: Optional[Dict[str, Any]], running_state: str
593
+ ) -> str:
594
+ """Computes the rich string for active/idle status display."""
595
+ # If we don't have idle info or it's explicitly unavailable, show N/A
596
+ if not idle_info or idle_info.get("available") is False:
597
+ return "[dim]N/A[/dim]"
598
+
599
+ if idle_info.get("status") == "active":
600
+ return "[green]Active[/green]"
601
+ if running_state in ("stopped", "stopping"):
602
+ return "[dim]N/A[/dim]"
603
+
604
+ # If idle, show time/threshold with time remaining if available
605
+ if idle_info.get("status") == "idle":
606
+ idle_seconds_v = idle_info.get("idle_seconds")
607
+ thresh_v = idle_info.get("idle_threshold")
608
+ if isinstance(idle_seconds_v, (int, float)) and isinstance(
609
+ thresh_v, (int, float)
610
+ ):
611
+ remaining = max(0, int(thresh_v) - int(idle_seconds_v))
612
+ remaining_mins = remaining // 60
613
+ remaining_secs = remaining % 60
614
+
615
+ if remaining < 60:
616
+ time_left_str = f"[red]{remaining}s[/red] left"
617
+ else:
618
+ time_left_str = f"[red]{remaining_mins}m {remaining_secs}s[/red] left"
619
+
620
+ return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: {time_left_str}[/yellow]"
621
+ elif isinstance(thresh_v, (int, float)):
622
+ return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
623
+ else:
624
+ return "[yellow]Idle ?/?[/yellow]"
625
+
626
+ # Default to N/A if we can't determine status
627
+ return "[dim]N/A[/dim]"
628
+
629
+
630
+ def _fetch_live_idle_data(instance_id: str) -> Optional[Dict]:
631
+ """
632
+ Fetch and parse the live idle detector state from an engine via SSM.
633
+
634
+ This is the single source of truth for on-engine idle status. It fetches
635
+ the `last_state.json` file, parses it, and transforms it into the schema
636
+ used by the CLI for display logic.
637
+ """
638
+ try:
639
+ ssm = boto3.client("ssm", region_name="us-east-1")
640
+ res = ssm.send_command(
641
+ InstanceIds=[instance_id],
642
+ DocumentName="AWS-RunShellScript",
643
+ Parameters={
644
+ "commands": [
645
+ "cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
646
+ ],
647
+ "executionTimeout": ["5"],
648
+ },
649
+ )
650
+ cid = res["Command"]["CommandId"]
651
+ # Wait up to 3 seconds for SSM command to complete
652
+ for _ in range(6): # 6 * 0.5 = 3 seconds
653
+ time.sleep(0.5)
654
+ inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
655
+ if inv["Status"] in ["Success", "Failed"]:
656
+ break
657
+ if inv["Status"] != "Success":
658
+ return None
659
+ content = inv["StandardOutputContent"].strip()
660
+ if not content:
661
+ return None
662
+ data = json.loads(content)
663
+ # Convert last_state schema (new or old) to idle_detector schema used by CLI output
664
+ idle_info: Dict[str, Any] = {"available": True}
665
+
666
+ # Active/idle
667
+ idle_flag = bool(data.get("idle", False))
668
+ idle_info["status"] = "idle" if idle_flag else "active"
669
+
670
+ # Threshold and elapsed
671
+ if isinstance(data.get("timeout_sec"), (int, float)):
672
+ idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
673
+ if isinstance(data.get("idle_seconds"), (int, float)):
674
+ idle_info["idle_seconds"] = int(data["idle_seconds"])
675
+
676
+ # Keep raw reasons for sensor display when available (new schema)
677
+ if isinstance(data.get("reasons"), list):
678
+ idle_info["_reasons_raw"] = data["reasons"]
679
+ else:
680
+ # Fallback: synthesize reasons from the old forensics layout
681
+ f_all = data.get("forensics", {}) or {}
682
+ synthesized = []
683
+
684
+ def _mk(sensor_name: str, key: str):
685
+ entry = f_all.get(key, {}) or {}
686
+ synthesized.append(
687
+ {
688
+ "sensor": sensor_name,
689
+ "active": bool(entry.get("active", False)),
690
+ "reason": entry.get("reason", ""),
691
+ "forensic": entry.get("forensic", {}),
692
+ }
693
+ )
694
+
695
+ _mk("CoffeeLockSensor", "coffee")
696
+ _mk("ActiveLoginSensor", "ssh")
697
+ _mk("IDEConnectionSensor", "ide")
698
+ _mk("DockerWorkloadSensor", "docker")
699
+ idle_info["_reasons_raw"] = synthesized
700
+
701
+ return idle_info
702
+ except Exception:
703
+ return None