PyPI - dayhoff-tools - Versions diffs - 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl - Mend

dayhoff-tools 1.5.2py3-none-any.whl → 1.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dayhoff-tools might be problematic. Click here for more details.

Files changed (5) hide show

dayhoff_tools/cli/engine_commands.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Engine and Studio management commands for DHT CLI."""
 import json
+import re
 import shutil
 import subprocess
 import sys
@@ -19,7 +20,6 @@ from rich.panel import Panel
 from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 from rich.prompt import Confirm, IntPrompt, Prompt
 from rich.table import Table
-import re
 # Initialize Typer apps
 engine_app = typer.Typer(help="Manage compute engines for development.")
@@ -34,6 +34,10 @@ HOURLY_COSTS = {
     "t4": 0.75,  # g4dn.2xlarge
     "a10g": 1.50,  # g5.2xlarge
     "a100": 21.96,  # p4d.24xlarge
+    "4_t4": 3.91,  # g4dn.12xlarge
+    "8_t4": 7.83,  # g4dn.metal
+    "4_a10g": 6.24,  # g5.12xlarge
+    "8_a10g": 16.29,  # g5.48xlarge
 }
 # SSH config management
@@ -43,6 +47,7 @@ SSH_MANAGED_COMMENT = "# Managed by dh engine"
 # Bootstrap stage helpers
 # --------------------------------------------------------------------------------
 def _colour_stage(stage: str) -> str:
     """Return colourised stage name for table output."""
     if not stage:
@@ -67,7 +72,14 @@ def _fetch_init_stages(instance_ids: List[str]) -> Dict[str, str]:
             for res in page["Reservations"]:
                 for inst in res["Instances"]:
                     iid = inst["InstanceId"]
-                    tag_val = next((t["Value"] for t in inst.get("Tags", []) if t["Key"] == "DayhoffInitStage"), None)
+                    tag_val = next(
+                        (
+                            t["Value"]
+                            for t in inst.get("Tags", [])
+                            if t["Key"] == "DayhoffInitStage"
+                        ),
+                        None,
+                    )
                     if tag_val:
                         stages[iid] = tag_val
     except Exception:
@@ -167,13 +179,13 @@ def format_duration(duration: timedelta) -> str:
 def get_disk_usage_via_ssm(instance_id: str) -> Optional[str]:
     """Get disk usage for an engine via SSM.
     Returns:
         String like "17/50 GB" or None if failed
     """
     try:
         ssm = boto3.client("ssm", region_name="us-east-1")
         # Run df command to get disk usage
         response = ssm.send_command(
             InstanceIds=[instance_id],
@@ -181,14 +193,14 @@ def get_disk_usage_via_ssm(instance_id: str) -> Optional[str]:
             Parameters={
                 "commands": [
                     # Get root filesystem usage in GB
-                    "df -BG / | tail -1 | awk '{gsub(/G/, \"\", $2); gsub(/G/, \"\", $3); print $3 \"/\" $2 \" GB\"}'"
+                    'df -BG / | tail -1 | awk \'{gsub(/G/, "", $2); gsub(/G/, "", $3); print $3 "/" $2 " GB"}\''
                 ],
                 "executionTimeout": ["10"],
             },
         )
         command_id = response["Command"]["CommandId"]
         # Wait for command to complete (with timeout)
         for _ in range(5):  # 5 second timeout
             time.sleep(1)
@@ -198,13 +210,13 @@ def get_disk_usage_via_ssm(instance_id: str) -> Optional[str]:
             )
             if result["Status"] in ["Success", "Failed"]:
                 break
         if result["Status"] == "Success":
             output = result["StandardOutputContent"].strip()
             return output if output else None
         return None
     except Exception as e:
         # logger.debug(f"Failed to get disk usage for {instance_id}: {e}") # Original code had this line commented out
         return None
@@ -212,13 +224,13 @@ def get_disk_usage_via_ssm(instance_id: str) -> Optional[str]:
 def get_studio_disk_usage_via_ssm(instance_id: str, username: str) -> Optional[str]:
     """Get disk usage for a studio via SSM.
     Returns:
         String like "333/500 GB" or None if failed
     """
     try:
         ssm = boto3.client("ssm", region_name="us-east-1")
         # Run df command to get studio disk usage
         response = ssm.send_command(
             InstanceIds=[instance_id],
@@ -226,14 +238,14 @@ def get_studio_disk_usage_via_ssm(instance_id: str, username: str) -> Optional[s
             Parameters={
                 "commands": [
                     # Get studio filesystem usage in GB
-                    f"df -BG /studios/{username} 2>/dev/null | tail -1 | awk '{{gsub(/G/, \"\", $2); gsub(/G/, \"\", $3); print $3 \"/\" $2 \" GB\"}}'"
+                    f'df -BG /studios/{username} 2>/dev/null | tail -1 | awk \'{{gsub(/G/, "", $2); gsub(/G/, "", $3); print $3 "/" $2 " GB"}}\''
                 ],
                 "executionTimeout": ["10"],
             },
         )
         command_id = response["Command"]["CommandId"]
         # Wait for command to complete (with timeout)
         for _ in range(5):  # 5 second timeout
             time.sleep(1)
@@ -243,13 +255,13 @@ def get_studio_disk_usage_via_ssm(instance_id: str, username: str) -> Optional[s
             )
             if result["Status"] in ["Success", "Failed"]:
                 break
         if result["Status"] == "Success":
             output = result["StandardOutputContent"].strip()
             return output if output else None
         return None
     except Exception:
         return None
@@ -434,7 +446,7 @@ def launch_engine(
         "cpu",
         "--type",
         "-t",
-        help="Engine type: cpu, cpumax, t4, a10g, a100",
+        help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
     ),
     user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
     boot_disk_size: Optional[int] = typer.Option(
@@ -455,7 +467,17 @@ def launch_engine(
         username = user
     # Validate engine type
-    valid_types = ["cpu", "cpumax", "t4", "a10g", "a100"]
+    valid_types = [
+        "cpu",
+        "cpumax",
+        "t4",
+        "a10g",
+        "a100",
+        "4_t4",
+        "8_t4",
+        "4_a10g",
+        "8_a10g",
+    ]
     if engine_type not in valid_types:
         console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
         console.print(f"Valid types: {', '.join(valid_types)}")
@@ -489,7 +511,7 @@ def launch_engine(
             "engine_type": engine_type,
         }
         if boot_disk_size is not None:
-            request_data["boot_disk_size"] = boot_disk_size
+            request_data["boot_disk_size"] = str(boot_disk_size)
         if availability_zone:
             request_data["availability_zone"] = availability_zone
@@ -555,22 +577,17 @@ def list_engines(
         table.add_column("Disk Usage")
         table.add_column("Uptime/Since")
         table.add_column("$/hour", justify="right")
-        table.add_column("Cost Today", justify="right", style="yellow")
-        total_cost = 0.0
         for engine in engines:
             launch_time = parse_launch_time(engine["launch_time"])
             uptime = datetime.now(timezone.utc) - launch_time
             hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
             if engine["state"].lower() == "running":
-                daily_cost = hourly_cost * min(uptime.total_seconds() / 3600, 24)
-                total_cost += daily_cost
                 time_str = format_duration(uptime)
                 # Get disk usage for running engines
                 disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
             else:
-                daily_cost = 0
                 time_str = launch_time.strftime("%Y-%m-%d %H:%M")
                 disk_usage = "-"
@@ -583,13 +600,9 @@ def list_engines(
                 disk_usage,
                 time_str,
                 f"${hourly_cost:.2f}",
-                f"${daily_cost:.2f}" if daily_cost > 0 else "-",
             )
         console.print(table)
-        if total_cost > 0:
-            console.print(f"\n[yellow]Total cost today: ${total_cost:.2f}[/yellow]")
     else:
         error = response.json().get("error", "Unknown error")
         console.print(f"[red]❌ Failed to list engines: {error}[/red]")
@@ -617,7 +630,7 @@ def engine_status(
     if response.status_code != 200:
         console.print("[red]❌ Failed to fetch engine details[/red]")
         raise typer.Exit(1)
     engine_details = response.json()
     engine = engine_details.get("engine", engine)  # Use detailed info if available
     idle_detector = engine_details.get("idle_detector", {})
@@ -650,26 +663,38 @@ def engine_status(
             res = ssm.send_command(
                 InstanceIds=[engine["instance_id"]],
                 DocumentName="AWS-RunShellScript",
-                Parameters={"commands": ["cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || true"], "executionTimeout": ["10"]},
+                Parameters={
+                    "commands": [
+                        "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || true"
+                    ],
+                    "executionTimeout": ["10"],
+                },
             )
             cid = res["Command"]["CommandId"]
             time.sleep(1)
-            inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
+            inv = ssm.get_command_invocation(
+                CommandId=cid, InstanceId=engine["instance_id"]
+            )
             if inv["Status"] == "Success":
                 import json as _json
                 health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
                 status_lines.append("")
                 status_lines.append("[bold]Health:[/bold]")
-                status_lines.append(f"  • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}")
-                status_lines.append(f"  • Idle Detector: {health.get('idle_detector_timer', 'unknown')}")
+                status_lines.append(
+                    f"  • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}"
+                )
+                status_lines.append(
+                    f"  • Idle Detector: {health.get('idle_detector_timer', 'unknown')}"
+                )
         except Exception:
             pass
     # Idle detector status (from new API endpoint)
     if idle_detector.get("available"):
         status_lines.append("")
         status_lines.append("[bold]Idle Detector:[/bold]")
         # Overall status
         if idle_detector["status"] == "active":
             status_lines.append("  [green]✓ Engine ACTIVE[/green]")
@@ -678,33 +703,41 @@ def engine_status(
             idle_threshold = idle_detector.get("idle_threshold", 1800)
             idle_minutes = idle_seconds // 60
             threshold_minutes = idle_threshold // 60
-            status_lines.append(f"  [yellow]⏱ Engine IDLE ({idle_minutes}/{threshold_minutes} minutes)[/yellow]")
+            status_lines.append(
+                f"  [yellow]⏱ Engine IDLE ({idle_minutes}/{threshold_minutes} minutes)[/yellow]"
+            )
         # Coffee lock
         if idle_detector.get("coffee_lock"):
-            status_lines.append(f"  • [cyan]☕ Caffeinated for another {idle_detector['coffee_lock']}[/cyan]")
+            status_lines.append(
+                f"  • [cyan]☕ Caffeinated for another {idle_detector['coffee_lock']}[/cyan]"
+            )
         # SSH sessions
         ssh_sessions = idle_detector.get("ssh_sessions", [])
         if ssh_sessions:
             status_lines.append(f"  • [blue]SSH Sessions ({len(ssh_sessions)}):[/blue]")
             for session in ssh_sessions:
-                status_lines.append(f"    - {session['tty']} (pid {session['pid']}, idle {session['idle_time']}) from {session['from_ip']}")
+                status_lines.append(
+                    f"    - {session['tty']} (pid {session['pid']}, idle {session['idle_time']}) from {session['from_ip']}"
+                )
         # IDE connections
         ide_conn = idle_detector.get("ide_connections")
         if ide_conn:
-            status_lines.append(f"  • [magenta]🖥  IDE connected ({ide_conn['connection_count']} connections)[/magenta]")
+            status_lines.append(
+                f"  • [magenta]🖥  IDE connected ({ide_conn['connection_count']} connections)[/magenta]"
+            )
     if attached_studios:
         status_lines.append("")
         status_lines.append("[bold]Attached Studios:[/bold]")
         for studio in attached_studios:
-            status_lines.append(
-                f"  • {studio['user']} ({studio['studio_id']})"
-            )
+            status_lines.append(f"  • {studio['user']} ({studio['studio_id']})")
-    console.print(Panel("\n".join(status_lines), title="Engine Status", border_style="blue"))
+    console.print(
+        Panel("\n".join(status_lines), title="Engine Status", border_style="blue")
+    )
     if show_log:
         console.print("\n[bold]Bootstrap Log:[/bold]")
@@ -713,11 +746,18 @@ def engine_status(
             resp = ssm.send_command(
                 InstanceIds=[engine["instance_id"]],
                 DocumentName="AWS-RunShellScript",
-                Parameters={"commands": ["cat /var/log/engine-setup.log 2>/dev/null || echo 'No setup log found'"], "executionTimeout": ["15"]},
+                Parameters={
+                    "commands": [
+                        "cat /var/log/engine-setup.log 2>/dev/null || echo 'No setup log found'"
+                    ],
+                    "executionTimeout": ["15"],
+                },
             )
             cid = resp["Command"]["CommandId"]
             time.sleep(2)
-            inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
+            inv = ssm.get_command_invocation(
+                CommandId=cid, InstanceId=engine["instance_id"]
+            )
             if inv["Status"] == "Success":
                 log_content = inv["StandardOutputContent"].strip()
                 if log_content:
@@ -856,7 +896,9 @@ def terminate_engine(
 @engine_app.command("ssh")
 def ssh_engine(
     name_or_id: str = typer.Argument(help="Engine name or instance ID"),
-    admin: bool = typer.Option(False, "--admin", help="Connect as ec2-user instead of the engine owner user"),
+    admin: bool = typer.Option(
+        False, "--admin", help="Connect as ec2-user instead of the engine owner user"
+    ),
 ):
     """Connect to an engine via SSH.
@@ -864,7 +906,7 @@ def ssh_engine(
     Pass `--admin` to connect with the underlying [`ec2-user`] account for break-glass or debugging.
     """
     username = check_aws_sso()
     # Check for Session Manager Plugin
     if not check_session_manager_plugin():
         raise typer.Exit(1)
@@ -886,7 +928,9 @@ def ssh_engine(
     ssh_user = "ec2-user" if admin else username
     # Update SSH config
-    console.print(f"Updating SSH config for [cyan]{engine['name']}[/cyan] (user: {ssh_user})...")
+    console.print(
+        f"Updating SSH config for [cyan]{engine['name']}[/cyan] (user: {ssh_user})..."
+    )
     update_ssh_config_entry(engine["name"], engine["instance_id"], ssh_user)
     # Connect
@@ -900,7 +944,11 @@ def config_ssh(
     all_engines: bool = typer.Option(
         False, "--all", "-a", help="Include all engines from all users"
     ),
-    admin: bool = typer.Option(False, "--admin", help="Generate entries that use ec2-user instead of per-engine owner user"),
+    admin: bool = typer.Option(
+        False,
+        "--admin",
+        help="Generate entries that use ec2-user instead of per-engine owner user",
+    ),
 ):
     """Update SSH config with available engines."""
     username = check_aws_sso()
@@ -964,7 +1012,7 @@ def config_ssh(
     if not clean:
         for engine in running_engines:
             # Determine ssh user based on --admin flag
-            ssh_user = 'ec2-user' if admin else username
+            ssh_user = "ec2-user" if admin else username
             new_lines.extend(
                 [
                     "",
@@ -998,7 +1046,9 @@ def config_ssh(
 def coffee(
     name_or_id: str = typer.Argument(help="Engine name or instance ID"),
     duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
-    cancel: bool = typer.Option(False, "--cancel", help="Cancel existing coffee lock instead of extending"),
+    cancel: bool = typer.Option(
+        False, "--cancel", help="Cancel existing coffee lock instead of extending"
+    ),
 ):
     """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
     username = check_aws_sso()
@@ -1036,7 +1086,9 @@ def coffee(
     if cancel:
         console.print(f"Cancelling coffee for [cyan]{engine['name']}[/cyan]…")
     else:
-        console.print(f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…")
+        console.print(
+            f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…"
+        )
     # Use SSM to run the engine coffee command
     ssm = boto3.client("ssm", region_name="us-east-1")
@@ -1046,7 +1098,11 @@ def coffee(
             DocumentName="AWS-RunShellScript",
             Parameters={
                 "commands": [
-                    ("/usr/local/bin/engine-coffee --cancel" if cancel else f"/usr/local/bin/engine-coffee {seconds_total}")
+                    (
+                        "/usr/local/bin/engine-coffee --cancel"
+                        if cancel
+                        else f"/usr/local/bin/engine-coffee {seconds_total}"
+                    )
                 ],
                 "executionTimeout": ["60"],
             },
@@ -1066,7 +1122,9 @@ def coffee(
         if result["Status"] == "Success":
             if cancel:
-                console.print("[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]")
+                console.print(
+                    "[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]"
+                )
             else:
                 console.print(f"[green]✓ Coffee poured for {duration}[/green]")
             console.print(
@@ -1089,7 +1147,9 @@ def resize_engine(
     name_or_id: str = typer.Argument(help="Engine name or instance ID"),
     size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
     online: bool = typer.Option(
-        False, "--online", help="Resize while running (requires manual filesystem expansion)"
+        False,
+        "--online",
+        help="Resize while running (requires manual filesystem expansion)",
     ),
     force: bool = typer.Option(
         False, "--force", "-f", help="Force resize and detach all studios"
@@ -1109,59 +1169,65 @@ def resize_engine(
     # Get current volume info to validate size
     ec2 = boto3.client("ec2", region_name="us-east-1")
     try:
         # Get instance details to find root volume
         instance_info = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
         instance = instance_info["Reservations"][0]["Instances"][0]
         # Find root volume
         root_device = instance.get("RootDeviceName", "/dev/xvda")
         root_volume_id = None
         for bdm in instance.get("BlockDeviceMappings", []):
             if bdm["DeviceName"] == root_device:
                 root_volume_id = bdm["Ebs"]["VolumeId"]
                 break
         if not root_volume_id:
             console.print("[red]❌ Could not find root volume[/red]")
             raise typer.Exit(1)
         # Get current volume size
         volumes = ec2.describe_volumes(VolumeIds=[root_volume_id])
         current_size = volumes["Volumes"][0]["Size"]
         if size <= current_size:
-            console.print(f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]")
+            console.print(
+                f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]"
+            )
             raise typer.Exit(1)
-        console.print(f"[yellow]Resizing engine boot disk from {current_size}GB to {size}GB[/yellow]")
+        console.print(
+            f"[yellow]Resizing engine boot disk from {current_size}GB to {size}GB[/yellow]"
+        )
         # Check if we need to stop the instance
         if not online and engine["state"].lower() == "running":
             console.print("Stopping engine for offline resize...")
             stop_response = make_api_request(
-                "POST", f"/engines/{engine['instance_id']}/stop", json_data={"detach_studios": False}
+                "POST",
+                f"/engines/{engine['instance_id']}/stop",
+                json_data={"detach_studios": False},
             )
             if stop_response.status_code != 200:
                 console.print("[red]❌ Failed to stop engine[/red]")
                 raise typer.Exit(1)
             # Wait for instance to stop
             console.print("Waiting for engine to stop...")
             waiter = ec2.get_waiter("instance_stopped")
             waiter.wait(InstanceIds=[engine["instance_id"]])
             console.print("[green]✓ Engine stopped[/green]")
         # Call the resize API
         console.print("Resizing volume...")
         resize_response = make_api_request(
-            "POST",
+            "POST",
             f"/engines/{engine['instance_id']}/resize",
-            json_data={"size": size, "detach_studios": force}
+            json_data={"size": size, "detach_studios": force},
         )
         if resize_response.status_code == 409 and not force:
             # Engine has attached studios
             data = resize_response.json()
@@ -1175,71 +1241,85 @@ def resize_engine(
                 resize_response = make_api_request(
                     "POST",
                     f"/engines/{engine['instance_id']}/resize",
-                    json_data={"size": size, "detach_studios": True}
+                    json_data={"size": size, "detach_studios": True},
                 )
             else:
                 console.print("Resize cancelled.")
                 return
         if resize_response.status_code != 200:
             error = resize_response.json().get("error", "Unknown error")
             console.print(f"[red]❌ Failed to resize engine: {error}[/red]")
             raise typer.Exit(1)
         # Check if studios were detached
         data = resize_response.json()
         detached_studios = data.get("detached_studios", 0)
         if detached_studios > 0:
-            console.print(f"[green]✓ Detached {detached_studios} studio(s) before resize[/green]")
+            console.print(
+                f"[green]✓ Detached {detached_studios} studio(s) before resize[/green]"
+            )
         # Wait for modification to complete
         console.print("Waiting for volume modification to complete...")
         while True:
             mod_state = ec2.describe_volumes_modifications(VolumeIds=[root_volume_id])
             if not mod_state["VolumesModifications"]:
                 break  # Modification complete
             modification = mod_state["VolumesModifications"][0]
             state = modification["ModificationState"]
             progress = modification.get("Progress", 0)
             # Show progress updates only for the resize phase
             if state == "modifying":
                 console.print(f"[yellow]Progress: {progress}%[/yellow]")
             # Exit as soon as optimization starts (resize is complete)
             if state == "optimizing":
                 console.print("[green]✓ Volume resized successfully[/green]")
-                console.print("[dim]AWS is optimizing the volume in the background (no action needed).[/dim]")
+                console.print(
+                    "[dim]AWS is optimizing the volume in the background (no action needed).[/dim]"
+                )
                 break
             if state == "completed":
                 console.print("[green]✓ Volume resized successfully[/green]")
                 break
             elif state == "failed":
                 console.print("[red]❌ Volume modification failed[/red]")
                 raise typer.Exit(1)
             time.sleep(2)  # Check more frequently for better UX
         # If offline resize, start the instance back up
         if not online and engine["state"].lower() == "running":
             console.print("Starting engine back up...")
-            start_response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
+            start_response = make_api_request(
+                "POST", f"/engines/{engine['instance_id']}/start"
+            )
             if start_response.status_code != 200:
-                console.print("[yellow]⚠️  Failed to restart engine automatically[/yellow]")
-                console.print(f"Please start it manually: [cyan]dh engine start {engine['name']}[/cyan]")
+                console.print(
+                    "[yellow]⚠️  Failed to restart engine automatically[/yellow]"
+                )
+                console.print(
+                    f"Please start it manually: [cyan]dh engine start {engine['name']}[/cyan]"
+                )
             else:
                 console.print("[green]✓ Engine started[/green]")
                 console.print("The filesystem will be automatically expanded on boot.")
         elif online and engine["state"].lower() == "running":
-            console.print("\n[yellow]⚠️  Online resize complete. You must now expand the filesystem:[/yellow]")
+            console.print(
+                "\n[yellow]⚠️  Online resize complete. You must now expand the filesystem:[/yellow]"
+            )
             console.print(f"1. SSH into the engine: [cyan]ssh {engine['name']}[/cyan]")
             console.print("2. Find the root device: [cyan]lsblk[/cyan]")
-            console.print("3. Expand the partition: [cyan]sudo growpart /dev/nvme0n1 1[/cyan] (adjust device name as needed)")
+            console.print(
+                "3. Expand the partition: [cyan]sudo growpart /dev/nvme0n1 1[/cyan] (adjust device name as needed)"
+            )
             console.print("4. Expand the filesystem: [cyan]sudo xfs_growfs /[/cyan]")
     except ClientError as e:
         console.print(f"[red]❌ Failed to resize engine: {e}[/red]")
         raise typer.Exit(1)
@@ -1355,30 +1435,44 @@ def create_ami(
         # If any user studios are still attached we must detach them before the instance reboots
         # for snapshot consistency; otherwise Studio-Manager metadata becomes stale.
-        attached_resp = make_api_request("GET", f"/engines/{engine['instance_id']}/studios")
-        attached_studios = attached_resp.json().get("studios", []) if attached_resp.status_code == 200 else []
+        attached_resp = make_api_request(
+            "GET", f"/engines/{engine['instance_id']}/studios"
+        )
+        attached_studios = (
+            attached_resp.json().get("studios", [])
+            if attached_resp.status_code == 200
+            else []
+        )
         if attached_studios:
-            console.print(f"Detaching {len(attached_studios)} studio(s) from this engine…")
+            console.print(
+                f"Detaching {len(attached_studios)} studio(s) from this engine…"
+            )
             for s in attached_studios:
                 console.print(f"  • {s['user']} ({s['studio_id']})")
             for s in attached_studios:
                 resp = make_api_request("POST", f"/studios/{s['studio_id']}/detach")
                 if resp.status_code != 200:
-                    console.print(f"[red]❌ Failed to detach {s['studio_id']} – aborting.[/red]")
+                    console.print(
+                        f"[red]❌ Failed to detach {s['studio_id']} – aborting.[/red]"
+                    )
                     return
             # Wait briefly for volumes to become available (max 2 min)
             # (time is already imported at module level)
             ec2_wait = boto3.client("ec2", region_name="us-east-1")
-            vol_ids = [s['studio_id'] for s in attached_studios]
+            vol_ids = [s["studio_id"] for s in attached_studios]
             console.print("Waiting for volumes to detach…")
             waiter = ec2_wait.get_waiter("volume_available")
             try:
-                waiter.wait(VolumeIds=vol_ids, WaiterConfig={"Delay": 5, "MaxAttempts": 24})
+                waiter.wait(
+                    VolumeIds=vol_ids, WaiterConfig={"Delay": 5, "MaxAttempts": 24}
+                )
             except Exception:
-                console.print("[yellow]Proceeding even though some volumes may still be detaching.[/yellow]")
+                console.print(
+                    "[yellow]Proceeding even though some volumes may still be detaching.[/yellow]"
+                )
         # Create the AMI
         with Progress(
@@ -1386,7 +1480,9 @@ def create_ami(
             TextColumn("[progress.description]{task.description}"),
             transient=True,
         ) as progress:
-            progress.add_task("Creating AMI (this will take several minutes)...", total=None)
+            progress.add_task(
+                "Creating AMI (this will take several minutes)...", total=None
+            )
             create_params = {
                 "InstanceId": engine["instance_id"],
@@ -1519,18 +1615,22 @@ def create_studio(
 @studio_app.command("status")
 def studio_status(
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Check status for a different user (admin only)"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Check status for a different user (admin only)"
+    ),
 ):
     """Show status of your studio."""
     username = check_aws_sso()
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Add warning when checking another user's studio
     if target_user != username:
-        console.print(f"[yellow]⚠️  Checking studio status for user: {target_user}[/yellow]")
+        console.print(
+            f"[yellow]⚠️  Checking studio status for user: {target_user}[/yellow]"
+        )
     studio = get_user_studio(target_user)
     if not studio:
         if target_user == username:
@@ -1585,18 +1685,20 @@ def studio_status(
 @studio_app.command("attach")
 def attach_studio(
     engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Attach a different user's studio (admin only)"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Attach a different user's studio (admin only)"
+    ),
 ):
     """Attach your studio to an engine."""
     username = check_aws_sso()
     # Check for Session Manager Plugin since we'll update SSH config
     if not check_session_manager_plugin():
         raise typer.Exit(1)
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Add confirmation when attaching another user's studio
     if target_user != username:
         console.print(f"[yellow]⚠️  Managing studio for user: {target_user}[/yellow]")
@@ -1682,7 +1784,7 @@ def attach_studio(
     # Determine retry strategy
     max_attempts = 40 if engine_started_now else 3
-    retry_delay  = 10 if engine_started_now else 3
+    retry_delay = 10 if engine_started_now else 3
     if engine_started_now:
         # Long spinner-based loop while the freshly started engine finishes booting
@@ -1692,17 +1794,24 @@ def attach_studio(
             TextColumn("[progress.description]{task.description}"),
             transient=True,
         ) as prog:
-            task = prog.add_task("Attaching studio (engine is still booting)…", total=None)
+            task = prog.add_task(
+                "Attaching studio (engine is still booting)…", total=None
+            )
             for attempt in range(max_attempts):
-                success, error_msg = _attempt_studio_attach(studio, engine, target_user, public_key)
+                success, error_msg = _attempt_studio_attach(
+                    studio, engine, target_user, public_key
+                )
                 if success:
                     break  # success!
                 # Update spinner every 3rd try to avoid log spam
                 if attempt % 3 == 0:
-                    prog.update(task, description=f"Attaching studio (engine is still booting)… {attempt+1}/{max_attempts}")
+                    prog.update(
+                        task,
+                        description=f"Attaching studio (engine is still booting)… {attempt+1}/{max_attempts}",
+                    )
                 if error_msg:
                     console.print(f"[red]❌ Failed to attach studio: {error_msg}[/red]")
@@ -1711,15 +1820,19 @@ def attach_studio(
                 time.sleep(retry_delay)
             else:
-                console.print("[yellow]Engine is still starting up – please retry in a minute.[/yellow]")
+                console.print(
+                    "[yellow]Engine is still starting up – please retry in a minute.[/yellow]"
+                )
                 return
     else:
         # Give the (already-running) engine a little breathing room – e.g. it may still be mounting EFS
         max_attempts = 10  # ~1 min total
-        retry_delay  = 6
+        retry_delay = 6
         for attempt in range(max_attempts):
-            success, error_msg = _attempt_studio_attach(studio, engine, target_user, public_key)
+            success, error_msg = _attempt_studio_attach(
+                studio, engine, target_user, public_key
+            )
             if success:
                 break  # attached!
@@ -1735,7 +1848,9 @@ def attach_studio(
                 time.sleep(retry_delay)
         else:
-            console.print("[yellow]Engine is busy or still initialising – please retry in about a minute.[/yellow]")
+            console.print(
+                "[yellow]Engine is busy or still initialising – please retry in about a minute.[/yellow]"
+            )
             return
     # Successful attach path
@@ -1799,14 +1914,16 @@ def _attempt_studio_attach(studio, engine, target_user, public_key):
 @studio_app.command("detach")
 def detach_studio(
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Detach a different user's studio (admin only)"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Detach a different user's studio (admin only)"
+    ),
 ):
     """Detach your studio from its current engine."""
     username = check_aws_sso()
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Add confirmation when detaching another user's studio
     if target_user != username:
         console.print(f"[yellow]⚠️  Managing studio for user: {target_user}[/yellow]")
@@ -1826,7 +1943,9 @@ def detach_studio(
         if target_user == username:
             console.print("[yellow]Your studio is not attached to any engine.[/yellow]")
         else:
-            console.print(f"[yellow]{target_user}'s studio is not attached to any engine.[/yellow]")
+            console.print(
+                f"[yellow]{target_user}'s studio is not attached to any engine.[/yellow]"
+            )
         return
     console.print(f"Detaching studio from {studio.get('attached_vm_id')}...")
@@ -1842,24 +1961,30 @@ def detach_studio(
 @studio_app.command("delete")
 def delete_studio(
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Delete a different user's studio (admin only)"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Delete a different user's studio (admin only)"
+    ),
 ):
     """Delete your studio permanently."""
     username = check_aws_sso()
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Extra warning when deleting another user's studio
     if target_user != username:
-        console.print(f"[red]⚠️  ADMIN ACTION: Deleting studio for user: {target_user}[/red]")
+        console.print(
+            f"[red]⚠️  ADMIN ACTION: Deleting studio for user: {target_user}[/red]"
+        )
     studio = get_user_studio(target_user)
     if not studio:
         if target_user == username:
             console.print("[yellow]You don't have a studio to delete.[/yellow]")
         else:
-            console.print(f"[yellow]User {target_user} doesn't have a studio to delete.[/yellow]")
+            console.print(
+                f"[yellow]User {target_user} doesn't have a studio to delete.[/yellow]"
+            )
         return
     console.print(
@@ -1870,7 +1995,11 @@ def delete_studio(
     console.print(f"Size: {studio['size_gb']}GB")
     # Multiple confirmations
-    if not Confirm.ask(f"\nAre you sure you want to delete {target_user}'s studio?" if target_user != username else "\nAre you sure you want to delete your studio?"):
+    if not Confirm.ask(
+        f"\nAre you sure you want to delete {target_user}'s studio?"
+        if target_user != username
+        else "\nAre you sure you want to delete your studio?"
+    ):
         console.print("Deletion cancelled.")
         return
@@ -1942,7 +2071,7 @@ def list_studios(
                 vm_id = studio["attached_vm_id"]
                 engine_name = engines.get(vm_id, "unknown")
                 attached_to = f"{engine_name} ({vm_id})"
                 # Try to get disk usage if attached
                 if studio["status"] == "in-use":
                     usage = get_studio_disk_usage_via_ssm(vm_id, studio["user"])
@@ -1966,14 +2095,16 @@ def list_studios(
 @studio_app.command("reset")
 def reset_studio(
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Reset a different user's studio"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Reset a different user's studio"
+    ),
 ):
     """Reset a stuck studio (admin operation)."""
     username = check_aws_sso()
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Add warning when resetting another user's studio
     if target_user != username:
         console.print(f"[yellow]⚠️  Resetting studio for user: {target_user}[/yellow]")
@@ -2044,14 +2175,16 @@ def reset_studio(
 @studio_app.command("resize")
 def resize_studio(
     size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
-    user: Optional[str] = typer.Option(None, "--user", "-u", help="Resize a different user's studio (admin only)"),
+    user: Optional[str] = typer.Option(
+        None, "--user", "-u", help="Resize a different user's studio (admin only)"
+    ),
 ):
     """Resize your studio volume (requires detachment)."""
     username = check_aws_sso()
     # Use specified user if provided, otherwise use current user
     target_user = user if user else username
     # Add warning when resizing another user's studio
     if target_user != username:
         console.print(f"[yellow]⚠️  Resizing studio for user: {target_user}[/yellow]")
@@ -2065,29 +2198,31 @@ def resize_studio(
         return
     current_size = studio["size_gb"]
     if size <= current_size:
-        console.print(f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]")
+        console.print(
+            f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]"
+        )
         raise typer.Exit(1)
     # Check if studio is attached
     if studio["status"] == "in-use":
         console.print("[yellow]⚠️  Studio must be detached before resizing[/yellow]")
         console.print(f"Currently attached to: {studio.get('attached_vm_id')}")
         if not Confirm.ask("\nDetach studio and proceed with resize?"):
             console.print("Resize cancelled.")
             return
         # Detach the studio
         console.print("Detaching studio...")
         response = make_api_request("POST", f"/studios/{studio['studio_id']}/detach")
         if response.status_code != 200:
             console.print("[red]❌ Failed to detach studio[/red]")
             raise typer.Exit(1)
         console.print("[green]✓ Studio detached[/green]")
         # Wait a moment for detachment to complete
         time.sleep(5)
@@ -2095,68 +2230,79 @@ def resize_studio(
     # Call the resize API
     resize_response = make_api_request(
-        "POST",
-        f"/studios/{studio['studio_id']}/resize",
-        json_data={"size": size}
+        "POST", f"/studios/{studio['studio_id']}/resize", json_data={"size": size}
     )
     if resize_response.status_code != 200:
         error = resize_response.json().get("error", "Unknown error")
         console.print(f"[red]❌ Failed to resize studio: {error}[/red]")
         raise typer.Exit(1)
     # Wait for volume modification to complete
     ec2 = boto3.client("ec2", region_name="us-east-1")
     console.print("Resizing volume...")
     # Track progress
     last_progress = 0
     while True:
         try:
-            mod_state = ec2.describe_volumes_modifications(VolumeIds=[studio["studio_id"]])
+            mod_state = ec2.describe_volumes_modifications(
+                VolumeIds=[studio["studio_id"]]
+            )
             if not mod_state["VolumesModifications"]:
                 break  # Modification complete
             modification = mod_state["VolumesModifications"][0]
             state = modification["ModificationState"]
             progress = modification.get("Progress", 0)
             # Show progress updates only for the resize phase
             if state == "modifying" and progress > last_progress:
                 console.print(f"[yellow]Progress: {progress}%[/yellow]")
                 last_progress = progress
             # Exit as soon as optimization starts (resize is complete)
             if state == "optimizing":
-                console.print(f"[green]✓ Studio resized successfully to {size}GB![/green]")
-                console.print("[dim]AWS is optimizing the volume in the background (no action needed).[/dim]")
+                console.print(
+                    f"[green]✓ Studio resized successfully to {size}GB![/green]"
+                )
+                console.print(
+                    "[dim]AWS is optimizing the volume in the background (no action needed).[/dim]"
+                )
                 break
             if state == "completed":
-                console.print(f"[green]✓ Studio resized successfully to {size}GB![/green]")
+                console.print(
+                    f"[green]✓ Studio resized successfully to {size}GB![/green]"
+                )
                 break
             elif state == "failed":
                 console.print("[red]❌ Volume modification failed[/red]")
                 raise typer.Exit(1)
             time.sleep(2)  # Check more frequently for better UX
         except ClientError:
             # Modification might be complete
             console.print(f"[green]✓ Studio resized successfully to {size}GB![/green]")
             break
-    console.print("\n[dim]The filesystem will be automatically expanded when you next attach the studio.[/dim]")
+    console.print(
+        "\n[dim]The filesystem will be automatically expanded when you next attach the studio.[/dim]"
+    )
     console.print(f"To attach: [cyan]dh studio attach <engine-name>[/cyan]")
 # ================= Idle timeout command =================
 @engine_app.command("idle")
 def idle_timeout_cmd(
     name_or_id: str = typer.Argument(help="Engine name or instance ID"),
-    set: Optional[str] = typer.Option(None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)")
+    set: Optional[str] = typer.Option(
+        None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
+    ),
 ):
     """Show or set the engine idle-detector timeout."""
     check_aws_sso()
@@ -2177,11 +2323,18 @@ def idle_timeout_cmd(
         resp = ssm.send_command(
             InstanceIds=[engine["instance_id"]],
             DocumentName="AWS-RunShellScript",
-            Parameters={"commands": ["grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"], "executionTimeout": ["10"]},
+            Parameters={
+                "commands": [
+                    "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"
+                ],
+                "executionTimeout": ["10"],
+            },
         )
         cid = resp["Command"]["CommandId"]
         time.sleep(1)
-        inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
+        inv = ssm.get_command_invocation(
+            CommandId=cid, InstanceId=engine["instance_id"]
+        )
         if inv["Status"] == "Success":
             line = inv["StandardOutputContent"].strip()
             secs = int(line.split("=")[1]) if "=" in line else 1800
@@ -2219,8 +2372,10 @@ def idle_timeout_cmd(
     time.sleep(2)
     console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
 # Add this near the end, after the idle-timeout command
 @engine_app.command("debug")
 def debug_engine(
     name_or_id: str = typer.Argument(help="Engine name or instance ID"),
@@ -2240,17 +2395,32 @@ def debug_engine(
     console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
     ssm = boto3.client("ssm", region_name="us-east-1")
     # Check multiple files and systemd status
     checks = [
-        ("Stage file", "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'"),
-        ("Health file", "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'"),
-        ("Sentinel file", "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'"),
-        ("Setup service", "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'"),
-        ("Bootstrap log tail", "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'"),
+        (
+            "Stage file",
+            "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'",
+        ),
+        (
+            "Health file",
+            "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'",
+        ),
+        (
+            "Sentinel file",
+            "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'",
+        ),
+        (
+            "Setup service",
+            "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'",
+        ),
+        (
+            "Bootstrap log tail",
+            "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'",
+        ),
         ("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
     ]
     for name, cmd in checks:
         try:
             resp = ssm.send_command(
@@ -2260,14 +2430,16 @@ def debug_engine(
             )
             cid = resp["Command"]["CommandId"]
             time.sleep(1)
-            inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
+            inv = ssm.get_command_invocation(
+                CommandId=cid, InstanceId=engine["instance_id"]
+            )
             if inv["Status"] == "Success":
                 output = inv["StandardOutputContent"].strip()
                 console.print(f"[cyan]{name}:[/cyan]")
                 console.print(f"[dim]{output}[/dim]\n")
             else:
                 console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
         except Exception as e:
             console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")

{dayhoff_tools-1.5.2.dist-info → dayhoff_tools-1.5.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: dayhoff-tools
-Version: 1.5.2
+Version: 1.5.3
 Summary: Common tools for all the repos at Dayhoff Labs
 Author: Daniel Martin-Alarcon
 Author-email: dma@dayhofflabs.com

{dayhoff_tools-1.5.2.dist-info → dayhoff_tools-1.5.3.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
 dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
 dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
-dayhoff_tools/cli/engine_commands.py,sha256=FvQTWb43x2Ns5rOW--ryfWkzzqvSzcpExDKyERY53Zk,87320
+dayhoff_tools/cli/engine_commands.py,sha256=8iMAKny8tWEcNtV6l90F8jlW6jx7FPAruJrsuyHFa58,88985
 dayhoff_tools/cli/main.py,sha256=tRN7WCBHg6uyNp6rA54pKTCoVmBntta2i0Yas3bUpZ4,4853
 dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
 dayhoff_tools/cli/utility_commands.py,sha256=FRZTPrjsG_qmIIqoNxd1Q1vVkS_5w8aY33IrVYVNCLg,18131
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
 dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
 dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
 dayhoff_tools/warehouse.py,sha256=fV3goH2cH1Y0oLpGERnu4p70P2JfByJHjBh_oMRv9C0,23134
-dayhoff_tools-1.5.2.dist-info/METADATA,sha256=sb8H_nJZYquFb8E_uvrj2xT3stsRS4x_uHdxQk7z15A,2824
-dayhoff_tools-1.5.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-dayhoff_tools-1.5.2.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
-dayhoff_tools-1.5.2.dist-info/RECORD,,
+dayhoff_tools-1.5.3.dist-info/METADATA,sha256=lrN8SkfIylBQrb-w5mEBzOZek3-fV-izpylkRkI-pdU,2824
+dayhoff_tools-1.5.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+dayhoff_tools-1.5.3.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
+dayhoff_tools-1.5.3.dist-info/RECORD,,

{dayhoff_tools-1.5.2.dist-info → dayhoff_tools-1.5.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{dayhoff_tools-1.5.2.dist-info → dayhoff_tools-1.5.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dayhoff-tools 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl

Potentially problematic release.

dayhoff-tools 1.5.2py3-none-any.whl → 1.5.3py3-none-any.whl