dayhoff-tools 1.9.9__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,739 @@
1
+ """Core engine commands: launch, list, and status."""
2
+
3
+ import json
4
+ import time
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict, Optional
7
+
8
+ import boto3
9
+ import typer
10
+ from rich import box
11
+ from rich.panel import Panel
12
+ from rich.progress import Progress, SpinnerColumn, TextColumn
13
+ from rich.table import Table
14
+
15
+ from .shared import (
16
+ HOURLY_COSTS,
17
+ _fetch_init_stages,
18
+ check_aws_sso,
19
+ console,
20
+ format_duration,
21
+ format_status,
22
+ get_disk_usage_via_ssm,
23
+ make_api_request,
24
+ parse_launch_time,
25
+ resolve_engine,
26
+ )
27
+
28
+
29
+ def launch_engine(
30
+ name: str = typer.Argument(help="Name for the new engine"),
31
+ engine_type: str = typer.Option(
32
+ "cpu",
33
+ "--type",
34
+ "-t",
35
+ help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
36
+ ),
37
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
38
+ boot_disk_size: Optional[int] = typer.Option(
39
+ None,
40
+ "--size",
41
+ "-s",
42
+ help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
43
+ ),
44
+ availability_zone: Optional[str] = typer.Option(
45
+ None,
46
+ "--az",
47
+ help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
48
+ ),
49
+ ):
50
+ """Launch a new engine instance."""
51
+ username = check_aws_sso()
52
+ if user:
53
+ username = user
54
+
55
+ # Validate engine type
56
+ valid_types = [
57
+ "cpu",
58
+ "cpumax",
59
+ "t4",
60
+ "a10g",
61
+ "a100",
62
+ "4_t4",
63
+ "8_t4",
64
+ "4_a10g",
65
+ "8_a10g",
66
+ ]
67
+ if engine_type not in valid_types:
68
+ console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
69
+ console.print(f"Valid types: {', '.join(valid_types)}")
70
+ raise typer.Exit(1)
71
+
72
+ # Validate boot disk size
73
+ if boot_disk_size is not None:
74
+ if boot_disk_size < 20:
75
+ console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
76
+ raise typer.Exit(1)
77
+ if boot_disk_size > 1000:
78
+ console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
79
+ raise typer.Exit(1)
80
+
81
+ cost = HOURLY_COSTS.get(engine_type, 0)
82
+ disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
83
+ console.print(
84
+ f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
85
+ )
86
+
87
+ with Progress(
88
+ SpinnerColumn(),
89
+ TextColumn("[progress.description]{task.description}"),
90
+ transient=True,
91
+ ) as progress:
92
+ progress.add_task("Creating engine...", total=None)
93
+
94
+ request_data: Dict[str, Any] = {
95
+ "name": name,
96
+ "user": username,
97
+ "engine_type": engine_type,
98
+ }
99
+ if boot_disk_size is not None:
100
+ request_data["boot_disk_size"] = boot_disk_size
101
+ if availability_zone:
102
+ request_data["availability_zone"] = availability_zone
103
+
104
+ response = make_api_request("POST", "/engines", json_data=request_data)
105
+
106
+ if response.status_code == 201:
107
+ data = response.json()
108
+ console.print(f"[green]✓ Engine launched successfully![/green]")
109
+ console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
110
+ console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
111
+ if boot_disk_size:
112
+ console.print(f"Boot disk: {boot_disk_size}GB")
113
+ console.print("\nThe engine is initializing. This may take a few minutes.")
114
+ console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
115
+ else:
116
+ error = response.json().get("error", "Unknown error")
117
+ console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
118
+
119
+
120
+ def list_engines(
121
+ user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
122
+ running_only: bool = typer.Option(
123
+ False, "--running", help="Show only running engines"
124
+ ),
125
+ stopped_only: bool = typer.Option(
126
+ False, "--stopped", help="Show only stopped engines"
127
+ ),
128
+ detailed: bool = typer.Option(
129
+ False, "--detailed", "-d", help="Show detailed status (slower)"
130
+ ),
131
+ ):
132
+ """List engines (shows all engines by default)."""
133
+ current_user = check_aws_sso()
134
+
135
+ params = {}
136
+ if user:
137
+ params["user"] = user
138
+ if detailed:
139
+ params["check_ready"] = "true"
140
+
141
+ response = make_api_request("GET", "/engines", params=params)
142
+
143
+ if response.status_code == 200:
144
+ data = response.json()
145
+ engines = data.get("engines", [])
146
+
147
+ # Filter by state if requested
148
+ if running_only:
149
+ engines = [e for e in engines if e["state"].lower() == "running"]
150
+ elif stopped_only:
151
+ engines = [e for e in engines if e["state"].lower() == "stopped"]
152
+
153
+ if not engines:
154
+ console.print("No engines found.")
155
+ return
156
+
157
+ # Only fetch detailed info if requested (slow)
158
+ stages_map = {}
159
+ if detailed:
160
+ stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
161
+
162
+ # Create table
163
+ table = Table(title="Engines", box=box.ROUNDED)
164
+ table.add_column("Name", style="cyan")
165
+ table.add_column("Instance ID", style="dim")
166
+ table.add_column("Type")
167
+ table.add_column("User")
168
+ table.add_column("Status")
169
+ if detailed:
170
+ table.add_column("Disk Usage")
171
+ table.add_column("Uptime/Since")
172
+ table.add_column("$/hour", justify="right")
173
+
174
+ for engine in engines:
175
+ launch_time = parse_launch_time(engine["launch_time"])
176
+ uptime = datetime.now(timezone.utc) - launch_time
177
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
178
+
179
+ if engine["state"].lower() == "running":
180
+ time_str = format_duration(uptime)
181
+ # Only get disk usage if detailed mode
182
+ if detailed:
183
+ disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
184
+ else:
185
+ disk_usage = None
186
+ else:
187
+ time_str = launch_time.strftime("%Y-%m-%d %H:%M")
188
+ disk_usage = "-" if detailed else None
189
+
190
+ row_data = [
191
+ engine["name"],
192
+ engine["instance_id"],
193
+ engine["engine_type"],
194
+ engine["user"],
195
+ format_status(engine["state"], engine.get("ready")),
196
+ ]
197
+ if detailed:
198
+ row_data.append(disk_usage)
199
+ row_data.extend(
200
+ [
201
+ time_str,
202
+ f"${hourly_cost:.2f}",
203
+ ]
204
+ )
205
+
206
+ table.add_row(*row_data)
207
+
208
+ console.print(table)
209
+ if not detailed and any(e["state"].lower() == "running" for e in engines):
210
+ console.print(
211
+ "\n[dim]Tip: Use --detailed to see disk usage and bootstrap status (slower)[/dim]"
212
+ )
213
+ else:
214
+ error = response.json().get("error", "Unknown error")
215
+ console.print(f"[red]❌ Failed to list engines: {error}[/red]")
216
+
217
+
218
+ def engine_status(
219
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
220
+ detailed: bool = typer.Option(False, "--detailed", "-d", help="Show detailed status (slower)"),
221
+ show_log: bool = typer.Option(False, "--show-log", help="Show bootstrap log (requires --detailed)"),
222
+ ):
223
+ """Show engine status and information."""
224
+ check_aws_sso()
225
+
226
+ # Get all engines to resolve name
227
+ response = make_api_request("GET", "/engines")
228
+ if response.status_code != 200:
229
+ console.print("[red]❌ Failed to fetch engines[/red]")
230
+ raise typer.Exit(1)
231
+
232
+ engines = response.json().get("engines", [])
233
+ engine = resolve_engine(name_or_id, engines)
234
+
235
+ # Fast status display (default)
236
+ if not detailed:
237
+ # Fetch idle status via SSM with longer timeout
238
+ ssm = boto3.client("ssm", region_name="us-east-1")
239
+ idle_data = None # Use None to indicate no data received
240
+
241
+ if engine["state"].lower() == "running":
242
+ try:
243
+ resp = ssm.send_command(
244
+ InstanceIds=[engine["instance_id"]],
245
+ DocumentName="AWS-RunShellScript",
246
+ Parameters={
247
+ "commands": [
248
+ "cat /var/run/idle-detector/last_state.json 2>/dev/null || echo '{}'"
249
+ ],
250
+ "executionTimeout": ["10"],
251
+ },
252
+ )
253
+ cid = resp["Command"]["CommandId"]
254
+
255
+ # Wait up to 3 seconds for result
256
+ for _ in range(6): # 6 * 0.5 = 3 seconds
257
+ time.sleep(0.5)
258
+ inv = ssm.get_command_invocation(
259
+ CommandId=cid, InstanceId=engine["instance_id"]
260
+ )
261
+ if inv["Status"] in ["Success", "Failed"]:
262
+ break
263
+
264
+ if inv["Status"] == "Success":
265
+ content = inv["StandardOutputContent"].strip()
266
+ if content and content != "{}":
267
+ idle_data = json.loads(content)
268
+ else:
269
+ idle_data = {} # Empty response but SSM worked
270
+ except Exception:
271
+ idle_data = None # SSM failed
272
+
273
+ # Determine running state display
274
+ running_state = engine["state"].lower()
275
+ if running_state == "running":
276
+ run_disp = "[green]Running[/green]"
277
+ elif running_state == "pending":
278
+ run_disp = "[yellow]Starting...[/yellow]"
279
+ elif running_state == "stopping":
280
+ run_disp = "[yellow]Stopping...[/yellow]"
281
+ elif running_state == "stopped":
282
+ run_disp = "[dim]Stopped[/dim]"
283
+ else:
284
+ run_disp = engine["state"].capitalize()
285
+
286
+ # Determine idle/active status
287
+ idle_disp = ""
288
+ if running_state == "running":
289
+ if idle_data is None:
290
+ # SSM failed - we don't know the status
291
+ idle_disp = " [dim]N/A[/dim]"
292
+ elif not idle_data:
293
+ # Empty data - likely very early in boot
294
+ idle_disp = " [dim]N/A[/dim]"
295
+ else:
296
+ # We have data
297
+ is_idle = idle_data.get("idle", False)
298
+ timeout_sec = idle_data.get("timeout_sec")
299
+ idle_seconds = idle_data.get("idle_seconds", 0) if is_idle else 0
300
+
301
+ if is_idle:
302
+ if isinstance(timeout_sec, int) and isinstance(idle_seconds, int):
303
+ remaining = max(0, timeout_sec - idle_seconds)
304
+ remaining_mins = remaining // 60
305
+ if remaining_mins == 0:
306
+ idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]<1m[/red] left[/yellow]"
307
+ else:
308
+ idle_disp = f" [yellow]Idle {idle_seconds//60}m/{timeout_sec//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
309
+ else:
310
+ idle_disp = " [yellow]Idle ?/?[/yellow]"
311
+ else:
312
+ # Actively not idle
313
+ idle_disp = " [green]Active[/green]"
314
+
315
+ # Build status lines - minimal info for fast view
316
+ status_lines = [
317
+ f"[blue]{engine['name']}[/blue] {run_disp}{idle_disp}",
318
+ ]
319
+
320
+ # Add activity sensors if we have idle data
321
+ if idle_data and idle_data.get("reasons"):
322
+ status_lines.append("") # blank line before sensors
323
+
324
+ sensor_map = {
325
+ "CoffeeLockSensor": ("☕", "Coffee"),
326
+ "ActiveLoginSensor": ("🐚", "SSH"),
327
+ "IDEConnectionSensor": ("🖥 ", "IDE"),
328
+ "DockerWorkloadSensor": ("🐳", "Docker"),
329
+ }
330
+
331
+ for r in idle_data.get("reasons", []):
332
+ sensor = r.get("sensor", "Unknown")
333
+ active = r.get("active", False)
334
+ icon, label = sensor_map.get(sensor, ("?", sensor))
335
+ status_str = "[green]YES[/green]" if active else "[dim]nope[/dim]"
336
+ status_lines.append(f" {icon} {label:6} {status_str}")
337
+
338
+ # Display in a nice panel
339
+ console.print(
340
+ Panel("\n".join(status_lines), title="Engine Status", border_style="blue")
341
+ )
342
+ return # Exit early for fast status
343
+
344
+ # Get detailed engine status including idle detector info (for --detailed mode)
345
+ response = make_api_request("GET", f"/engines/{engine['instance_id']}")
346
+ if response.status_code != 200:
347
+ console.print("[red]❌ Failed to fetch engine details[/red]")
348
+ raise typer.Exit(1)
349
+
350
+ engine_details = response.json()
351
+ engine = engine_details.get("engine", engine) # Use detailed info if available
352
+ idle_detector = engine_details.get("idle_detector", {}) or {}
353
+ attached_studios = engine_details.get("attached_studios", [])
354
+
355
+ # Calculate costs
356
+ launch_time = parse_launch_time(engine["launch_time"])
357
+ uptime = datetime.now(timezone.utc) - launch_time
358
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
359
+ # total_cost intentionally not shown in status view
360
+
361
+ stages_map = _fetch_init_stages([engine["instance_id"]])
362
+ stage_val = stages_map.get(engine["instance_id"], "-")
363
+
364
+ # Try to fetch actual boot time via SSM (best-effort)
365
+ boot_time_str: Optional[str] = None
366
+ try:
367
+ if engine["state"].lower() == "running":
368
+ ssm = boto3.client("ssm", region_name="us-east-1")
369
+ resp = ssm.send_command(
370
+ InstanceIds=[engine["instance_id"]],
371
+ DocumentName="AWS-RunShellScript",
372
+ Parameters={
373
+ "commands": ["uptime -s || who -b | awk '{print $3\" \"$4}'"]
374
+ },
375
+ )
376
+ cid = resp["Command"]["CommandId"]
377
+ time.sleep(1)
378
+ inv = ssm.get_command_invocation(
379
+ CommandId=cid, InstanceId=engine["instance_id"]
380
+ )
381
+ if inv.get("Status") == "Success":
382
+ boot_time_str = (
383
+ (inv.get("StandardOutputContent") or "").strip().splitlines()[0]
384
+ if inv.get("StandardOutputContent")
385
+ else None
386
+ )
387
+ except Exception:
388
+ boot_time_str = None
389
+
390
+ started_line = (
391
+ f"[bold]Started:[/bold] {boot_time_str} ({format_duration(uptime)} ago)"
392
+ if boot_time_str
393
+ else f"[bold]Started:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)"
394
+ )
395
+
396
+ # ---------------- Front-loaded summary ----------------
397
+ running_state = engine["state"].lower()
398
+ if running_state == "running":
399
+ run_disp = "[green]Running[/green]"
400
+ elif running_state == "pending":
401
+ run_disp = "[yellow]Starting...[/yellow]"
402
+ elif running_state == "stopping":
403
+ run_disp = "[yellow]Stopping...[/yellow]"
404
+ elif running_state == "stopped":
405
+ run_disp = "[dim]Stopped[/dim]"
406
+ else:
407
+ run_disp = engine["state"].capitalize()
408
+
409
+ # Compose Active/Idle header with extra detail when idle
410
+ def _compute_active_disp(idle_info: Dict[str, Any]) -> str:
411
+ # If we don't have idle info or it's explicitly unavailable, show N/A
412
+ if not idle_info or idle_info.get("available") == False:
413
+ return "[dim]N/A[/dim]"
414
+
415
+ if idle_info.get("status") == "active":
416
+ return "[green]Active[/green]"
417
+ if running_state in ("stopped", "stopping"):
418
+ return "[dim]N/A[/dim]"
419
+
420
+ # If idle, show time/threshold with time remaining if available
421
+ if idle_info.get("status") == "idle":
422
+ idle_seconds_v = idle_info.get("idle_seconds")
423
+ thresh_v = idle_info.get("idle_threshold")
424
+ if isinstance(idle_seconds_v, (int, float)) and isinstance(thresh_v, (int, float)):
425
+ remaining = max(0, int(thresh_v) - int(idle_seconds_v))
426
+ remaining_mins = remaining // 60
427
+ if remaining_mins == 0:
428
+ return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]<1m[/red] left[/yellow]"
429
+ else:
430
+ return f"[yellow]Idle {int(idle_seconds_v)//60}m/{int(thresh_v)//60}m: [red]{remaining_mins}m[/red] left[/yellow]"
431
+ elif isinstance(thresh_v, (int, float)):
432
+ return f"[yellow]Idle ?/{int(thresh_v)//60}m[/yellow]"
433
+ else:
434
+ return "[yellow]Idle ?/?[/yellow]"
435
+
436
+ # Default to N/A if we can't determine status
437
+ return "[dim]N/A[/dim]"
438
+
439
+ active_disp = _compute_active_disp(idle_detector)
440
+
441
+ top_lines = [
442
+ f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n",
443
+ ]
444
+
445
+ # Studios summary next, with studio name in purple/magenta
446
+ studios_line = None
447
+ if attached_studios:
448
+ stu_texts = [
449
+ f"[magenta]{s.get('user', 'studio')}[/magenta] ({s.get('studio_id', 'unknown')})"
450
+ for s in attached_studios
451
+ ]
452
+ studios_line = "Studios: " + ", ".join(stu_texts)
453
+ top_lines.append(studios_line)
454
+
455
+ # Paragraph break
456
+ top_lines.append("")
457
+
458
+ # ---------------- Details block (white/default) ----------------
459
+ status_lines = [
460
+ f"Name: {engine['name']}",
461
+ f"Instance: {engine['instance_id']}",
462
+ f"Type: {engine['engine_type']} ({engine['instance_type']})",
463
+ f"Status: {engine['state']}",
464
+ f"User: {engine['user']}",
465
+ f"IP: {engine.get('public_ip', 'N/A')}",
466
+ started_line,
467
+ f"$/hour: ${hourly_cost:.2f}",
468
+ ]
469
+
470
+ # Disk usage (like list --detailed)
471
+ if engine["state"].lower() == "running":
472
+ disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
473
+ status_lines.append(f"Disk: {disk_usage}")
474
+
475
+ # Idle timeout (show even when not idle) - but only if we have data
476
+ if idle_detector.get("available"):
477
+ idle_threshold_secs: Optional[int] = None
478
+ # Prefer value from idle detector overlay if present
479
+ try:
480
+ if isinstance(idle_detector.get("idle_threshold"), (int, float)):
481
+ idle_threshold_secs = int(idle_detector["idle_threshold"])
482
+ except Exception:
483
+ idle_threshold_secs = None
484
+
485
+ if idle_threshold_secs is None and engine["state"].lower() == "running":
486
+ # Fallback: read /etc/engine.env via SSM
487
+ try:
488
+ ssm = boto3.client("ssm", region_name="us-east-1")
489
+ resp = ssm.send_command(
490
+ InstanceIds=[engine["instance_id"]],
491
+ DocumentName="AWS-RunShellScript",
492
+ Parameters={
493
+ "commands": [
494
+ "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env | cut -d'=' -f2 || echo '?'",
495
+ ],
496
+ "executionTimeout": ["5"],
497
+ },
498
+ )
499
+ cid = resp["Command"]["CommandId"]
500
+ time.sleep(1)
501
+ inv = ssm.get_command_invocation(
502
+ CommandId=cid, InstanceId=engine["instance_id"]
503
+ )
504
+ if inv.get("Status") == "Success":
505
+ out = (inv.get("StandardOutputContent") or "").strip()
506
+ if out and out != "?" and out.isdigit():
507
+ idle_threshold_secs = int(out)
508
+ except Exception:
509
+ idle_threshold_secs = None
510
+
511
+ if idle_threshold_secs is not None:
512
+ status_lines.append(
513
+ f"Idle timeout: {idle_threshold_secs//60}m ({idle_threshold_secs}s)"
514
+ )
515
+ else:
516
+ status_lines.append("Idle timeout: unknown")
517
+ else:
518
+ # No idle detector data available
519
+ status_lines.append("Idle timeout: N/A")
520
+
521
+ # Health report (only if bootstrap finished)
522
+ if stage_val == "finished":
523
+ try:
524
+ ssm = boto3.client("ssm", region_name="us-east-1")
525
+ res = ssm.send_command(
526
+ InstanceIds=[engine["instance_id"]],
527
+ DocumentName="AWS-RunShellScript",
528
+ Parameters={
529
+ "commands": [
530
+ "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || true"
531
+ ],
532
+ "executionTimeout": ["10"],
533
+ },
534
+ )
535
+ cid = res["Command"]["CommandId"]
536
+ time.sleep(1)
537
+ inv = ssm.get_command_invocation(
538
+ CommandId=cid, InstanceId=engine["instance_id"]
539
+ )
540
+ if inv["Status"] == "Success":
541
+ import json as _json
542
+
543
+ health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
544
+ status_lines.append("")
545
+ status_lines.append("[bold]Health:[/bold]")
546
+ status_lines.append(
547
+ f" • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}"
548
+ )
549
+ idle_stat = health.get("idle_detector_service") or health.get(
550
+ "idle_detector_timer", "unknown"
551
+ )
552
+ status_lines.append(f" • Idle Detector: {idle_stat}")
553
+ except Exception:
554
+ pass
555
+
556
+ # Try to enrich/fallback idle-detector details from on-engine summary file via SSM
557
+ def _fetch_idle_summary_via_ssm(instance_id: str) -> Optional[Dict]:
558
+ try:
559
+ ssm = boto3.client("ssm", region_name="us-east-1")
560
+ res = ssm.send_command(
561
+ InstanceIds=[instance_id],
562
+ DocumentName="AWS-RunShellScript",
563
+ Parameters={
564
+ "commands": [
565
+ "cat /var/run/idle-detector/last_state.json 2>/dev/null || true",
566
+ ],
567
+ "executionTimeout": ["5"],
568
+ },
569
+ )
570
+ cid = res["Command"]["CommandId"]
571
+ # Wait up to 2 seconds for SSM command to complete (was 1 second)
572
+ for _ in range(4): # 4 * 0.5 = 2 seconds
573
+ time.sleep(0.5)
574
+ inv = ssm.get_command_invocation(CommandId=cid, InstanceId=instance_id)
575
+ if inv["Status"] in ["Success", "Failed"]:
576
+ break
577
+ if inv["Status"] != "Success":
578
+ return None
579
+ content = inv["StandardOutputContent"].strip()
580
+ if not content:
581
+ return None
582
+ data = json.loads(content)
583
+ # Convert last_state schema (new or old) to idle_detector schema used by CLI output
584
+ idle_info: Dict[str, Any] = {"available": True}
585
+
586
+ # Active/idle
587
+ idle_flag = bool(data.get("idle", False))
588
+ idle_info["status"] = "idle" if idle_flag else "active"
589
+
590
+ # Threshold and elapsed
591
+ if isinstance(data.get("timeout_sec"), (int, float)):
592
+ idle_info["idle_threshold"] = int(data["timeout_sec"]) # seconds
593
+ if isinstance(data.get("idle_seconds"), (int, float)):
594
+ idle_info["idle_seconds"] = int(data["idle_seconds"])
595
+
596
+ # Keep raw reasons for sensor display when available (new schema)
597
+ if isinstance(data.get("reasons"), list):
598
+ idle_info["_reasons_raw"] = data["reasons"]
599
+ else:
600
+ # Fallback: synthesize reasons from the old forensics layout
601
+ f_all = data.get("forensics", {}) or {}
602
+ synthesized = []
603
+
604
+ def _mk(sensor_name: str, key: str):
605
+ entry = f_all.get(key, {}) or {}
606
+ synthesized.append(
607
+ {
608
+ "sensor": sensor_name,
609
+ "active": bool(entry.get("active", False)),
610
+ "reason": entry.get("reason", ""),
611
+ "forensic": entry.get("forensic", {}),
612
+ }
613
+ )
614
+
615
+ _mk("CoffeeLockSensor", "coffee")
616
+ _mk("ActiveLoginSensor", "ssh")
617
+ _mk("IDEConnectionSensor", "ide")
618
+ _mk("DockerWorkloadSensor", "docker")
619
+ idle_info["_reasons_raw"] = synthesized
620
+
621
+ # Derive details from sensors
622
+ for r in idle_info.get("_reasons_raw", []):
623
+ if not r.get("active"):
624
+ continue
625
+ sensor = (r.get("sensor") or "").lower()
626
+ forensic = r.get("forensic") or {}
627
+ if sensor == "ideconnectionsensor":
628
+ # Prefer unique_pid_count written by new detector
629
+ cnt = forensic.get("unique_pid_count")
630
+ if not isinstance(cnt, int):
631
+ cnt = forensic.get("matches")
632
+ if isinstance(cnt, int):
633
+ idle_info["ide_connections"] = {"connection_count": cnt}
634
+ else:
635
+ idle_info["ide_connections"] = {"connection_count": 1}
636
+ elif sensor == "coffeelocksensor":
637
+ rem = forensic.get("remaining_sec")
638
+ if isinstance(rem, (int, float)) and rem > 0:
639
+ idle_info["coffee_lock"] = format_duration(
640
+ timedelta(seconds=int(rem))
641
+ )
642
+ elif sensor == "activeloginsensor":
643
+ sess = {
644
+ "tty": forensic.get("tty", "pts/?"),
645
+ "pid": forensic.get("pid", "?"),
646
+ "idle_time": forensic.get("idle_sec", 0),
647
+ "from_ip": forensic.get("remote_addr", "unknown"),
648
+ }
649
+ idle_info.setdefault("ssh_sessions", []).append(sess)
650
+ return idle_info
651
+ except Exception:
652
+ return None
653
+
654
+ # Always try to enrich from on-engine summary (fast, best-effort)
655
+ overlay = _fetch_idle_summary_via_ssm(engine["instance_id"])
656
+ if overlay:
657
+ # If API didn't indicate availability, replace entirely; otherwise fill gaps
658
+ if not idle_detector.get("available"):
659
+ idle_detector = overlay
660
+ else:
661
+ for k, v in overlay.items():
662
+ idle_detector.setdefault(k, v)
663
+ else:
664
+ # SSM failed - mark as unavailable if we don't have good data
665
+ if not idle_detector.get("available"):
666
+ idle_detector = {"available": False} # Mark as unavailable
667
+
668
+ # Recompute header display with latest data
669
+ active_disp = _compute_active_disp(idle_detector)
670
+ top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
671
+
672
+ # Activity Sensors (show all with YES/no)
673
+ if idle_detector.get("available"):
674
+ status_lines.append("")
675
+ status_lines.append("[bold]Activity Sensors:[/bold]")
676
+ reasons_raw = idle_detector.get("_reasons_raw", []) or []
677
+ by_sensor: Dict[str, Dict[str, Any]] = {}
678
+ for r in reasons_raw:
679
+ nm = r.get("sensor")
680
+ if nm:
681
+ by_sensor[nm] = r
682
+
683
+ def _sensor_line(label: str, key: str, emoji: str) -> str:
684
+ r = by_sensor.get(key, {})
685
+ active = bool(r.get("active"))
686
+ reason_txt = r.get("reason") or ("" if not active else "active")
687
+ flag = "[green]YES[/green]" if active else "[dim]nope[/dim]"
688
+ return (
689
+ f" {emoji} {label}: {flag} {('- ' + reason_txt) if reason_txt else ''}"
690
+ )
691
+
692
+ status_lines.append(_sensor_line("Coffee", "CoffeeLockSensor", "☕"))
693
+ status_lines.append(_sensor_line("Shell ", "ActiveLoginSensor", "🐚"))
694
+ status_lines.append(_sensor_line(" IDE ", "IDEConnectionSensor", "🖥"))
695
+ status_lines.append(_sensor_line("Docker", "DockerWorkloadSensor", "🐳"))
696
+
697
+ # Recompute display with latest idle detector data
698
+ active_disp = _compute_active_disp(idle_detector)
699
+ # Rewrite top header line (index 0) to include updated display
700
+ top_lines[0] = f"[blue]{engine['name']}[/blue] {run_disp} {active_disp}\n"
701
+
702
+ # Combine top summary and details
703
+ all_lines = top_lines + status_lines
704
+ console.print(
705
+ Panel("\n".join(all_lines), title="Engine Status", border_style="blue")
706
+ )
707
+
708
+ if show_log:
709
+ if not detailed:
710
+ console.print("[yellow]Note: --show-log requires --detailed flag[/yellow]")
711
+ return
712
+ console.print("\n[bold]Bootstrap Log:[/bold]")
713
+ try:
714
+ ssm = boto3.client("ssm", region_name="us-east-1")
715
+ resp = ssm.send_command(
716
+ InstanceIds=[engine["instance_id"]],
717
+ DocumentName="AWS-RunShellScript",
718
+ Parameters={
719
+ "commands": [
720
+ "cat /var/log/engine-setup.log 2>/dev/null || echo 'No setup log found'"
721
+ ],
722
+ "executionTimeout": ["15"],
723
+ },
724
+ )
725
+ cid = resp["Command"]["CommandId"]
726
+ time.sleep(2)
727
+ inv = ssm.get_command_invocation(
728
+ CommandId=cid, InstanceId=engine["instance_id"]
729
+ )
730
+ if inv["Status"] == "Success":
731
+ log_content = inv["StandardOutputContent"].strip()
732
+ if log_content:
733
+ console.print(f"[dim]{log_content}[/dim]")
734
+ else:
735
+ console.print("[yellow]No bootstrap log available[/yellow]")
736
+ else:
737
+ console.print("[red]❌ Could not retrieve bootstrap log[/red]")
738
+ except Exception as e:
739
+ console.print(f"[red]❌ Error fetching log: {e}[/red]")