dayhoff-tools 1.3.15__py3-none-any.whl → 1.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ from rich.panel import Panel
18
18
  from rich.progress import Progress, SpinnerColumn, TextColumn
19
19
  from rich.prompt import Confirm, IntPrompt, Prompt
20
20
  from rich.table import Table
21
+ import re
21
22
 
22
23
  # Initialize Typer apps
23
24
  engine_app = typer.Typer(help="Manage compute engines for development.")
@@ -37,6 +38,41 @@ HOURLY_COSTS = {
37
38
  # SSH config management
38
39
  SSH_MANAGED_COMMENT = "# Managed by dh engine"
39
40
 
41
+ # --------------------------------------------------------------------------------
42
+ # Bootstrap stage helpers
43
+ # --------------------------------------------------------------------------------
44
+
45
+ def _colour_stage(stage: str) -> str:
46
+ """Return colourised stage name for table output."""
47
+ if not stage:
48
+ return "[dim]-[/dim]"
49
+ low = stage.lower()
50
+ if low.startswith("error"):
51
+ return f"[red]{stage}[/red]"
52
+ if low == "finished":
53
+ return f"[green]{stage}[/green]"
54
+ return f"[yellow]{stage}[/yellow]"
55
+
56
+
57
+ def _fetch_init_stages(instance_ids: List[str]) -> Dict[str, str]:
58
+ """Fetch DayhoffInitStage tag for many instances in one call."""
59
+ if not instance_ids:
60
+ return {}
61
+ ec2 = boto3.client("ec2", region_name="us-east-1")
62
+ stages: Dict[str, str] = {}
63
+ try:
64
+ paginator = ec2.get_paginator("describe_instances")
65
+ for page in paginator.paginate(InstanceIds=instance_ids):
66
+ for res in page["Reservations"]:
67
+ for inst in res["Instances"]:
68
+ iid = inst["InstanceId"]
69
+ tag_val = next((t["Value"] for t in inst.get("Tags", []) if t["Key"] == "DayhoffInitStage"), None)
70
+ if tag_val:
71
+ stages[iid] = tag_val
72
+ except Exception:
73
+ pass # best-effort
74
+ return stages
75
+
40
76
 
41
77
  def check_aws_sso() -> str:
42
78
  """Check AWS SSO status and return username."""
@@ -388,6 +424,12 @@ def launch_engine(
388
424
  help="Engine type: cpu, cpumax, t4, a10g, a100",
389
425
  ),
390
426
  user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
427
+ boot_disk_size: Optional[int] = typer.Option(
428
+ None,
429
+ "--size",
430
+ "-s",
431
+ help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
432
+ ),
391
433
  ):
392
434
  """Launch a new engine instance."""
393
435
  username = check_aws_sso()
@@ -401,9 +443,19 @@ def launch_engine(
401
443
  console.print(f"Valid types: {', '.join(valid_types)}")
402
444
  raise typer.Exit(1)
403
445
 
446
+ # Validate boot disk size
447
+ if boot_disk_size is not None:
448
+ if boot_disk_size < 20:
449
+ console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
450
+ raise typer.Exit(1)
451
+ if boot_disk_size > 1000:
452
+ console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
453
+ raise typer.Exit(1)
454
+
404
455
  cost = HOURLY_COSTS.get(engine_type, 0)
456
+ disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
405
457
  console.print(
406
- f"Launching [cyan]{name}[/cyan] ({engine_type}) for ${cost:.2f}/hour..."
458
+ f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
407
459
  )
408
460
 
409
461
  with Progress(
@@ -413,17 +465,23 @@ def launch_engine(
413
465
  ) as progress:
414
466
  progress.add_task("Creating engine...", total=None)
415
467
 
416
- response = make_api_request(
417
- "POST",
418
- "/engines",
419
- json_data={"name": name, "user": username, "engine_type": engine_type},
420
- )
468
+ request_data = {
469
+ "name": name,
470
+ "user": username,
471
+ "engine_type": engine_type,
472
+ }
473
+ if boot_disk_size is not None:
474
+ request_data["boot_disk_size"] = boot_disk_size
475
+
476
+ response = make_api_request("POST", "/engines", json_data=request_data)
421
477
 
422
478
  if response.status_code == 201:
423
479
  data = response.json()
424
480
  console.print(f"[green]✓ Engine launched successfully![/green]")
425
481
  console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
426
482
  console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
483
+ if boot_disk_size:
484
+ console.print(f"Boot disk: {boot_disk_size}GB")
427
485
  console.print("\nThe engine is initializing. This may take a few minutes.")
428
486
  console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
429
487
  else:
@@ -464,6 +522,9 @@ def list_engines(
464
522
  console.print("No engines found.")
465
523
  return
466
524
 
525
+ # Fetch bootstrap stages once
526
+ stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
527
+
467
528
  # Create table
468
529
  table = Table(title="Engines", box=box.ROUNDED)
469
530
  table.add_column("Name", style="cyan")
@@ -471,6 +532,7 @@ def list_engines(
471
532
  table.add_column("Type")
472
533
  table.add_column("User")
473
534
  table.add_column("Status")
535
+ table.add_column("Stage")
474
536
  table.add_column("Disk Usage")
475
537
  table.add_column("Uptime/Since")
476
538
  table.add_column("$/hour", justify="right")
@@ -493,12 +555,15 @@ def list_engines(
493
555
  time_str = launch_time.strftime("%Y-%m-%d %H:%M")
494
556
  disk_usage = "-"
495
557
 
558
+ stage_display = _colour_stage(stages_map.get(engine["instance_id"], "-"))
559
+
496
560
  table.add_row(
497
561
  engine["name"],
498
562
  engine["instance_id"],
499
563
  engine["engine_type"],
500
564
  engine["user"],
501
565
  format_status(engine["state"], engine.get("ready")),
566
+ stage_display,
502
567
  disk_usage,
503
568
  time_str,
504
569
  f"${hourly_cost:.2f}",
@@ -544,18 +609,43 @@ def engine_status(
544
609
  hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
545
610
  total_cost = hourly_cost * (uptime.total_seconds() / 3600)
546
611
 
547
- # Create status panel
612
+ stages_map = _fetch_init_stages([engine["instance_id"]])
613
+ stage_val = stages_map.get(engine["instance_id"], "-")
614
+
548
615
  status_lines = [
549
616
  f"[bold]Name:[/bold] {engine['name']}",
550
617
  f"[bold]Instance:[/bold] {engine['instance_id']}",
551
618
  f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
552
619
  f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
620
+ f"[bold]Bootstrap:[/bold] {_colour_stage(stage_val)}",
553
621
  f"[bold]User:[/bold] {engine['user']}",
554
622
  f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
555
623
  f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
556
624
  f"[bold]Cost:[/bold] ${hourly_cost:.2f}/hour (${total_cost:.2f} total)",
557
625
  ]
558
626
 
627
+ # Health report (only if bootstrap finished)
628
+ if stage_val == "finished":
629
+ try:
630
+ ssm = boto3.client("ssm", region_name="us-east-1")
631
+ res = ssm.send_command(
632
+ InstanceIds=[engine["instance_id"]],
633
+ DocumentName="AWS-RunShellScript",
634
+ Parameters={"commands": ["cat /var/run/engine-health.json || true"], "executionTimeout": ["10"]},
635
+ )
636
+ cid = res["Command"]["CommandId"]
637
+ time.sleep(1)
638
+ inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
639
+ if inv["Status"] == "Success":
640
+ import json as _json
641
+ health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
642
+ status_lines.append("")
643
+ status_lines.append("[bold]Health:[/bold]")
644
+ status_lines.append(f" • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}")
645
+ status_lines.append(f" • Idle Detector: {health.get('idle_detector_timer', 'unknown')}")
646
+ except Exception:
647
+ pass
648
+
559
649
  if attached_studios:
560
650
  status_lines.append("")
561
651
  status_lines.append("[bold]Attached Studios:[/bold]")
@@ -1887,3 +1977,72 @@ def resize_studio(
1887
1977
 
1888
1978
  console.print("\n[dim]The filesystem will be automatically expanded when you next attach the studio.[/dim]")
1889
1979
  console.print(f"To attach: [cyan]dh studio attach <engine-name>[/cyan]")
1980
+
1981
+ # ================= Idle timeout command =================
1982
+
1983
+
1984
+ @engine_app.command("idle-timeout")
1985
+ def idle_timeout_cmd(
1986
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
1987
+ set: Optional[str] = typer.Option(None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)")
1988
+ ):
1989
+ """Show or set the engine idle-detector timeout."""
1990
+ check_aws_sso()
1991
+
1992
+ # Resolve engine
1993
+ response = make_api_request("GET", "/engines")
1994
+ if response.status_code != 200:
1995
+ console.print("[red]❌ Failed to fetch engines[/red]")
1996
+ raise typer.Exit(1)
1997
+
1998
+ engines = response.json().get("engines", [])
1999
+ engine = resolve_engine(name_or_id, engines)
2000
+
2001
+ ssm = boto3.client("ssm", region_name="us-east-1")
2002
+
2003
+ if set is None:
2004
+ # Show current
2005
+ resp = ssm.send_command(
2006
+ InstanceIds=[engine["instance_id"]],
2007
+ DocumentName="AWS-RunShellScript",
2008
+ Parameters={"commands": ["grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"], "executionTimeout": ["10"]},
2009
+ )
2010
+ cid = resp["Command"]["CommandId"]
2011
+ time.sleep(1)
2012
+ inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
2013
+ if inv["Status"] == "Success":
2014
+ line = inv["StandardOutputContent"].strip()
2015
+ secs = int(line.split("=")[1]) if "=" in line else 1800
2016
+ console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
2017
+ else:
2018
+ console.print("[red]❌ Could not retrieve idle timeout[/red]")
2019
+ return
2020
+
2021
+ # ----- set new value -----
2022
+ m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
2023
+ if not m:
2024
+ console.print("[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]")
2025
+ raise typer.Exit(1)
2026
+ hours = int(m.group(1) or 0)
2027
+ minutes = int(m.group(2) or 0)
2028
+ seconds = hours * 3600 + minutes * 60
2029
+ if seconds == 0:
2030
+ console.print("[red]❌ Duration must be greater than zero[/red]")
2031
+ raise typer.Exit(1)
2032
+
2033
+ console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
2034
+
2035
+ cmd = (
2036
+ "sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
2037
+ f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
2038
+ "sudo systemctl restart engine-idle-detector.timer"
2039
+ )
2040
+
2041
+ resp = ssm.send_command(
2042
+ InstanceIds=[engine["instance_id"]],
2043
+ DocumentName="AWS-RunShellScript",
2044
+ Parameters={"commands": [cmd], "executionTimeout": ["60"]},
2045
+ )
2046
+ cid = resp["Command"]["CommandId"]
2047
+ time.sleep(2)
2048
+ console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.3.15
3
+ Version: 1.3.17
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
3
3
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
4
4
  dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
6
- dayhoff_tools/cli/engine_commands.py,sha256=DPD0rsctEz4iI4KXyPdMwS5XzlXaTv0DKUiYAtlsT1w,69414
6
+ dayhoff_tools/cli/engine_commands.py,sha256=DlWw8oUzOJqN6uUgsxF5whfY8jfwvBko8hvRm3ZHxVc,75814
7
7
  dayhoff_tools/cli/main.py,sha256=rgeEHD9lJ8SBCR34BTLb7gVInHUUdmEBNXAJnq5yEU4,4795
8
8
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
9
9
  dayhoff_tools/cli/utility_commands.py,sha256=qs8vH9TBFHsOPC3X8cU3qZigM3dDn-2Ytq4o_F2WubU,27874
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
27
27
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
28
28
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
29
29
  dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
30
- dayhoff_tools-1.3.15.dist-info/METADATA,sha256=2JVTqkvXrTEVUfMuItJ9WEr9xQtmd2TLqHadTZlzUgc,2825
31
- dayhoff_tools-1.3.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
- dayhoff_tools-1.3.15.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
- dayhoff_tools-1.3.15.dist-info/RECORD,,
30
+ dayhoff_tools-1.3.17.dist-info/METADATA,sha256=69jNn8FqivgUWrwrLZ9S-ZEherpCtEHUrBABW5dewWs,2825
31
+ dayhoff_tools-1.3.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ dayhoff_tools-1.3.17.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
+ dayhoff_tools-1.3.17.dist-info/RECORD,,