dayhoff-tools 1.10.1__py3-none-any.whl → 1.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. dayhoff_tools/cli/engine/__init__.py +323 -1
  2. dayhoff_tools/cli/engine/{status.py → engine_core.py} +201 -6
  3. dayhoff_tools/cli/engine/engine_maintenance.py +431 -0
  4. dayhoff_tools/cli/engine/engine_management.py +505 -0
  5. dayhoff_tools/cli/engine/shared.py +501 -0
  6. dayhoff_tools/cli/engine/studio_commands.py +825 -0
  7. dayhoff_tools/cli/main.py +2 -1
  8. dayhoff_tools/deployment/base.py +10 -2
  9. {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.3.dist-info}/METADATA +3 -2
  10. dayhoff_tools-1.10.3.dist-info/RECORD +39 -0
  11. {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.3.dist-info}/WHEEL +1 -1
  12. dayhoff_tools/cli/engine/coffee.py +0 -110
  13. dayhoff_tools/cli/engine/config_ssh.py +0 -113
  14. dayhoff_tools/cli/engine/debug.py +0 -79
  15. dayhoff_tools/cli/engine/gami.py +0 -160
  16. dayhoff_tools/cli/engine/idle.py +0 -148
  17. dayhoff_tools/cli/engine/launch.py +0 -101
  18. dayhoff_tools/cli/engine/list.py +0 -116
  19. dayhoff_tools/cli/engine/repair.py +0 -128
  20. dayhoff_tools/cli/engine/resize.py +0 -195
  21. dayhoff_tools/cli/engine/ssh.py +0 -62
  22. dayhoff_tools/cli/engine_studio_commands.py +0 -323
  23. dayhoff_tools/cli/engine_studio_utils/__init__.py +0 -1
  24. dayhoff_tools/cli/engine_studio_utils/api_utils.py +0 -47
  25. dayhoff_tools/cli/engine_studio_utils/aws_utils.py +0 -102
  26. dayhoff_tools/cli/engine_studio_utils/constants.py +0 -21
  27. dayhoff_tools/cli/engine_studio_utils/formatting.py +0 -210
  28. dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +0 -141
  29. dayhoff_tools/cli/studio/__init__.py +0 -1
  30. dayhoff_tools/cli/studio/attach.py +0 -314
  31. dayhoff_tools/cli/studio/create.py +0 -48
  32. dayhoff_tools/cli/studio/delete.py +0 -71
  33. dayhoff_tools/cli/studio/detach.py +0 -56
  34. dayhoff_tools/cli/studio/list.py +0 -81
  35. dayhoff_tools/cli/studio/reset.py +0 -90
  36. dayhoff_tools/cli/studio/resize.py +0 -134
  37. dayhoff_tools/cli/studio/status.py +0 -78
  38. dayhoff_tools-1.10.1.dist-info/RECORD +0 -61
  39. /dayhoff_tools/cli/engine/{lifecycle.py → engine_lifecycle.py} +0 -0
  40. {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.3.dist-info}/entry_points.txt +0 -0
@@ -1,148 +0,0 @@
1
- """Engine idle timeout command."""
2
-
3
- import re
4
- import time
5
- from typing import Optional
6
-
7
- import boto3
8
- import typer
9
-
10
- from ..engine_studio_utils.api_utils import make_api_request
11
- from ..engine_studio_utils.aws_utils import check_aws_sso
12
- from ..engine_studio_utils.constants import console
13
- from ..engine_studio_utils.formatting import resolve_engine
14
-
15
-
16
- def idle_timeout_cmd(
17
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
- set: Optional[str] = typer.Option(
19
- None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
20
- ),
21
- slack: Optional[str] = typer.Option(
22
- None, "--slack", help="Set Slack notifications: none, default, all"
23
- ),
24
- ):
25
- """Show or set engine idle-detector settings."""
26
- check_aws_sso()
27
-
28
- # Resolve engine
29
- response = make_api_request("GET", "/engines")
30
- if response.status_code != 200:
31
- console.print("[red]❌ Failed to fetch engines[/red]")
32
- raise typer.Exit(1)
33
-
34
- engines = response.json().get("engines", [])
35
- engine = resolve_engine(name_or_id, engines)
36
-
37
- ssm = boto3.client("ssm", region_name="us-east-1")
38
-
39
- # Handle slack notifications change
40
- if slack:
41
- slack = slack.lower()
42
- if slack not in ["none", "default", "all"]:
43
- console.print("[red]❌ Invalid slack option. Use: none, default, all[/red]")
44
- raise typer.Exit(1)
45
-
46
- console.print(f"Setting Slack notifications to [bold]{slack}[/bold]...")
47
-
48
- if slack == "none":
49
- settings = {
50
- "SLACK_NOTIFY_WARNINGS": "false",
51
- "SLACK_NOTIFY_IDLE_START": "false",
52
- "SLACK_NOTIFY_IDLE_END": "false",
53
- "SLACK_NOTIFY_SHUTDOWN": "false",
54
- }
55
- elif slack == "default":
56
- settings = {
57
- "SLACK_NOTIFY_WARNINGS": "true",
58
- "SLACK_NOTIFY_IDLE_START": "false",
59
- "SLACK_NOTIFY_IDLE_END": "false",
60
- "SLACK_NOTIFY_SHUTDOWN": "true",
61
- }
62
- else: # all
63
- settings = {
64
- "SLACK_NOTIFY_WARNINGS": "true",
65
- "SLACK_NOTIFY_IDLE_START": "true",
66
- "SLACK_NOTIFY_IDLE_END": "true",
67
- "SLACK_NOTIFY_SHUTDOWN": "true",
68
- }
69
-
70
- commands = []
71
- for key, value in settings.items():
72
- # Use a robust sed command that adds the line if it doesn't exist
73
- commands.append(
74
- f"grep -q '^{key}=' /etc/engine.env && sudo sed -i 's|^{key}=.*|{key}={value}|' /etc/engine.env || echo '{key}={value}' | sudo tee -a /etc/engine.env > /dev/null"
75
- )
76
-
77
- # Instead of restarting service, send SIGHUP to reload config
78
- commands.append(
79
- "sudo pkill -HUP -f engine-idle-detector.py || sudo systemctl restart engine-idle-detector.service"
80
- )
81
-
82
- resp = ssm.send_command(
83
- InstanceIds=[engine["instance_id"]],
84
- DocumentName="AWS-RunShellScript",
85
- Parameters={"commands": commands, "executionTimeout": ["60"]},
86
- )
87
- cid = resp["Command"]["CommandId"]
88
- time.sleep(2) # Give it a moment to process
89
- console.print(f"[green]✓ Slack notifications updated to '{slack}'[/green]")
90
- console.print("[dim]Note: Settings updated without resetting idle timer[/dim]")
91
-
92
- # Handle setting new timeout value
93
- if set is not None:
94
- m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
95
- if not m:
96
- console.print(
97
- "[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]"
98
- )
99
- raise typer.Exit(1)
100
- hours = int(m.group(1) or 0)
101
- minutes = int(m.group(2) or 0)
102
- seconds = hours * 3600 + minutes * 60
103
- if seconds == 0:
104
- console.print("[red]❌ Duration must be greater than zero[/red]")
105
- raise typer.Exit(1)
106
-
107
- console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
108
-
109
- cmd = (
110
- "sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
111
- f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
112
- "sudo systemctl restart engine-idle-detector.service"
113
- )
114
-
115
- resp = ssm.send_command(
116
- InstanceIds=[engine["instance_id"]],
117
- DocumentName="AWS-RunShellScript",
118
- Parameters={"commands": [cmd], "executionTimeout": ["60"]},
119
- )
120
- cid = resp["Command"]["CommandId"]
121
- time.sleep(2)
122
- console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
123
-
124
- # If no action was specified, show current timeout
125
- if set is None and slack is None:
126
- # Show current timeout setting
127
- resp = ssm.send_command(
128
- InstanceIds=[engine["instance_id"]],
129
- DocumentName="AWS-RunShellScript",
130
- Parameters={
131
- "commands": [
132
- "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"
133
- ],
134
- "executionTimeout": ["10"],
135
- },
136
- )
137
- cid = resp["Command"]["CommandId"]
138
- time.sleep(1)
139
- inv = ssm.get_command_invocation(
140
- CommandId=cid, InstanceId=engine["instance_id"]
141
- )
142
- if inv["Status"] == "Success":
143
- line = inv["StandardOutputContent"].strip()
144
- secs = int(line.split("=")[1]) if "=" in line else 1800
145
- console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
146
- else:
147
- console.print("[red]❌ Could not retrieve idle timeout[/red]")
148
- return
@@ -1,101 +0,0 @@
1
- """Engine launch command."""
2
-
3
- from typing import Any, Dict, Optional
4
-
5
- import typer
6
- from rich.progress import Progress, SpinnerColumn, TextColumn
7
-
8
- from ..engine_studio_utils.api_utils import make_api_request
9
- from ..engine_studio_utils.aws_utils import check_aws_sso
10
- from ..engine_studio_utils.constants import HOURLY_COSTS, console
11
-
12
-
13
- def launch_engine(
14
- name: str = typer.Argument(help="Name for the new engine"),
15
- engine_type: str = typer.Option(
16
- "cpu",
17
- "--type",
18
- "-t",
19
- help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
20
- ),
21
- user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
22
- boot_disk_size: Optional[int] = typer.Option(
23
- None,
24
- "--size",
25
- "-s",
26
- help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
27
- ),
28
- availability_zone: Optional[str] = typer.Option(
29
- None,
30
- "--az",
31
- help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
32
- ),
33
- ):
34
- """Launch a new engine instance."""
35
- username = check_aws_sso()
36
- if user:
37
- username = user
38
-
39
- # Validate engine type
40
- valid_types = [
41
- "cpu",
42
- "cpumax",
43
- "t4",
44
- "a10g",
45
- "a100",
46
- "4_t4",
47
- "8_t4",
48
- "4_a10g",
49
- "8_a10g",
50
- ]
51
- if engine_type not in valid_types:
52
- console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
53
- console.print(f"Valid types: {', '.join(valid_types)}")
54
- raise typer.Exit(1)
55
-
56
- # Validate boot disk size
57
- if boot_disk_size is not None:
58
- if boot_disk_size < 20:
59
- console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
60
- raise typer.Exit(1)
61
- if boot_disk_size > 1000:
62
- console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
63
- raise typer.Exit(1)
64
-
65
- cost = HOURLY_COSTS.get(engine_type, 0)
66
- disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
67
- console.print(
68
- f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
69
- )
70
-
71
- with Progress(
72
- SpinnerColumn(),
73
- TextColumn("[progress.description]{task.description}"),
74
- transient=True,
75
- ) as progress:
76
- progress.add_task("Creating engine...", total=None)
77
-
78
- request_data: Dict[str, Any] = {
79
- "name": name,
80
- "user": username,
81
- "engine_type": engine_type,
82
- }
83
- if boot_disk_size is not None:
84
- request_data["boot_disk_size"] = boot_disk_size
85
- if availability_zone:
86
- request_data["availability_zone"] = availability_zone
87
-
88
- response = make_api_request("POST", "/engines", json_data=request_data)
89
-
90
- if response.status_code == 201:
91
- data = response.json()
92
- console.print(f"[green]✓ Engine launched successfully![/green]")
93
- console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
94
- console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
95
- if boot_disk_size:
96
- console.print(f"Boot disk: {boot_disk_size}GB")
97
- console.print("\nThe engine is initializing. This may take a few minutes.")
98
- console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
99
- else:
100
- error = response.json().get("error", "Unknown error")
101
- console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
@@ -1,116 +0,0 @@
1
- """Engine list command."""
2
-
3
- from datetime import datetime, timezone
4
- from typing import Optional
5
-
6
- import typer
7
- from rich import box
8
- from rich.table import Table
9
-
10
- from ..engine_studio_utils.api_utils import make_api_request
11
- from ..engine_studio_utils.aws_utils import _fetch_init_stages, check_aws_sso
12
- from ..engine_studio_utils.constants import HOURLY_COSTS, console
13
- from ..engine_studio_utils.formatting import (
14
- format_duration,
15
- format_status,
16
- get_disk_usage_via_ssm,
17
- parse_launch_time,
18
- )
19
-
20
-
21
- def list_engines(
22
- user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
23
- running_only: bool = typer.Option(
24
- False, "--running", help="Show only running engines"
25
- ),
26
- stopped_only: bool = typer.Option(
27
- False, "--stopped", help="Show only stopped engines"
28
- ),
29
- detailed: bool = typer.Option(
30
- False, "--detailed", "-d", help="Show detailed status (slower)"
31
- ),
32
- ):
33
- """List engines (shows all engines by default)."""
34
- current_user = check_aws_sso()
35
-
36
- params = {}
37
- if user:
38
- params["user"] = user
39
- if detailed:
40
- params["check_ready"] = "true"
41
-
42
- response = make_api_request("GET", "/engines", params=params)
43
-
44
- if response.status_code == 200:
45
- data = response.json()
46
- engines = data.get("engines", [])
47
-
48
- # Filter by state if requested
49
- if running_only:
50
- engines = [e for e in engines if e["state"].lower() == "running"]
51
- elif stopped_only:
52
- engines = [e for e in engines if e["state"].lower() == "stopped"]
53
-
54
- if not engines:
55
- console.print("No engines found.")
56
- return
57
-
58
- # Only fetch detailed info if requested (slow)
59
- stages_map = {}
60
- if detailed:
61
- stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
62
-
63
- # Create table
64
- table = Table(title="Engines", box=box.ROUNDED)
65
- table.add_column("Name", style="cyan")
66
- table.add_column("Instance ID", style="dim")
67
- table.add_column("Type")
68
- table.add_column("User")
69
- table.add_column("Status")
70
- if detailed:
71
- table.add_column("Disk Usage")
72
- table.add_column("Uptime/Since")
73
- table.add_column("$/hour", justify="right")
74
-
75
- for engine in engines:
76
- launch_time = parse_launch_time(engine["launch_time"])
77
- uptime = datetime.now(timezone.utc) - launch_time
78
- hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
79
-
80
- if engine["state"].lower() == "running":
81
- time_str = format_duration(uptime)
82
- # Only get disk usage if detailed mode
83
- if detailed:
84
- disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
85
- else:
86
- disk_usage = None
87
- else:
88
- time_str = launch_time.strftime("%Y-%m-%d %H:%M")
89
- disk_usage = "-" if detailed else None
90
-
91
- row_data = [
92
- engine["name"],
93
- engine["instance_id"],
94
- engine["engine_type"],
95
- engine["user"],
96
- format_status(engine["state"], engine.get("ready")),
97
- ]
98
- if detailed:
99
- row_data.append(disk_usage)
100
- row_data.extend(
101
- [
102
- time_str,
103
- f"${hourly_cost:.2f}",
104
- ]
105
- )
106
-
107
- table.add_row(*row_data)
108
-
109
- console.print(table)
110
- if not detailed and any(e["state"].lower() == "running" for e in engines):
111
- console.print(
112
- "\n[dim]Tip: Use --detailed to see disk usage and bootstrap status (slower)[/dim]"
113
- )
114
- else:
115
- error = response.json().get("error", "Unknown error")
116
- console.print(f"[red]❌ Failed to list engines: {error}[/red]")
@@ -1,128 +0,0 @@
1
- """Engine repair command."""
2
-
3
- import time
4
-
5
- import boto3
6
- import typer
7
- from rich.progress import Progress, SpinnerColumn, TextColumn
8
- from rich.prompt import Confirm
9
-
10
- from ..engine_studio_utils.api_utils import make_api_request
11
- from ..engine_studio_utils.aws_utils import check_aws_sso
12
- from ..engine_studio_utils.constants import console
13
- from ..engine_studio_utils.formatting import resolve_engine
14
-
15
-
16
- def repair_engine(
17
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
- ):
19
- """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
20
- check_aws_sso()
21
-
22
- # Get all engines to resolve name
23
- response = make_api_request("GET", "/engines")
24
- if response.status_code != 200:
25
- console.print("[red]❌ Failed to fetch engines[/red]")
26
- raise typer.Exit(1)
27
-
28
- engines = response.json().get("engines", [])
29
- engine = resolve_engine(name_or_id, engines)
30
-
31
- if engine["state"].lower() != "running":
32
- console.print(
33
- f"[yellow]⚠️ Engine is {engine['state']}. Must be running to repair.[/yellow]"
34
- )
35
- if engine["state"].lower() == "stopped" and Confirm.ask(
36
- "Start the engine first?"
37
- ):
38
- response = make_api_request(
39
- "POST", f"/engines/{engine['instance_id']}/start"
40
- )
41
- if response.status_code != 200:
42
- console.print("[red]❌ Failed to start engine[/red]")
43
- raise typer.Exit(1)
44
- console.print("[green]✓ Engine started[/green]")
45
- console.print("Waiting for engine to become ready...")
46
- time.sleep(30) # Give it time to boot
47
- else:
48
- raise typer.Exit(1)
49
-
50
- console.print(f"[bold]Repairing engine [cyan]{engine['name']}[/cyan][/bold]")
51
- console.print(
52
- "[dim]This will restore bootstrap state and ensure all services are running[/dim]\n"
53
- )
54
-
55
- ssm = boto3.client("ssm", region_name="us-east-1")
56
-
57
- # Repair commands
58
- repair_commands = [
59
- # Create necessary directories
60
- "sudo mkdir -p /opt/dayhoff /opt/dayhoff/state /opt/dayhoff/scripts",
61
- # Download scripts from S3 if missing
62
- "source /etc/engine.env && sudo aws s3 sync s3://${VM_SCRIPTS_BUCKET}/ /opt/dayhoff/scripts/ --exclude '*' --include '*.sh' --quiet",
63
- "sudo chmod +x /opt/dayhoff/scripts/*.sh 2>/dev/null || true",
64
- # Restore bootstrap state
65
- "sudo touch /opt/dayhoff/first_boot_complete.sentinel",
66
- "echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
67
- # Ensure SSM agent is running
68
- "sudo systemctl restart amazon-ssm-agent 2>/dev/null || true",
69
- # Restart idle detector (service only)
70
- "sudo systemctl restart engine-idle-detector.service 2>/dev/null || true",
71
- # Report status
72
- "echo '=== Repair Complete ===' && echo 'Sentinel: ' && ls -la /opt/dayhoff/first_boot_complete.sentinel",
73
- "echo 'Stage: ' && cat /opt/dayhoff/state/engine-init.stage",
74
- "echo 'Scripts: ' && ls /opt/dayhoff/scripts/*.sh 2>/dev/null | wc -l",
75
- ]
76
-
77
- try:
78
- with Progress(
79
- SpinnerColumn(),
80
- TextColumn("[progress.description]{task.description}"),
81
- transient=True,
82
- ) as progress:
83
- task = progress.add_task("Repairing engine...", total=None)
84
-
85
- response = ssm.send_command(
86
- InstanceIds=[engine["instance_id"]],
87
- DocumentName="AWS-RunShellScript",
88
- Parameters={
89
- "commands": repair_commands,
90
- "executionTimeout": ["60"],
91
- },
92
- )
93
-
94
- command_id = response["Command"]["CommandId"]
95
-
96
- # Wait for command
97
- for _ in range(60):
98
- time.sleep(1)
99
- result = ssm.get_command_invocation(
100
- CommandId=command_id,
101
- InstanceId=engine["instance_id"],
102
- )
103
- if result["Status"] in ["Success", "Failed"]:
104
- break
105
-
106
- if result["Status"] == "Success":
107
- output = result["StandardOutputContent"]
108
- console.print("[green]✓ Engine repaired successfully![/green]\n")
109
-
110
- # Show repair results
111
- if "=== Repair Complete ===" in output:
112
- repair_section = output.split("=== Repair Complete ===")[1].strip()
113
- console.print("[bold]Repair Results:[/bold]")
114
- console.print(repair_section)
115
-
116
- console.print(
117
- "\n[dim]You should now be able to attach studios to this engine.[/dim]"
118
- )
119
- else:
120
- console.print(
121
- f"[red]❌ Repair failed: {result.get('StandardErrorContent', 'Unknown error')}[/red]"
122
- )
123
- console.print(
124
- "\n[yellow]Try running 'dh engine debug' for more information.[/yellow]"
125
- )
126
-
127
- except Exception as e:
128
- console.print(f"[red]❌ Failed to repair engine: {e}[/red]")
@@ -1,195 +0,0 @@
1
- """Engine resize command."""
2
-
3
- import time
4
-
5
- import boto3
6
- import typer
7
- from botocore.exceptions import ClientError
8
- from rich.prompt import Confirm
9
-
10
- from ..engine_studio_utils.api_utils import make_api_request
11
- from ..engine_studio_utils.aws_utils import check_aws_sso
12
- from ..engine_studio_utils.constants import console
13
- from ..engine_studio_utils.formatting import resolve_engine
14
-
15
-
16
- def resize_engine(
17
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
- size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
19
- online: bool = typer.Option(
20
- False,
21
- "--online",
22
- help="Resize while running (requires manual filesystem expansion)",
23
- ),
24
- force: bool = typer.Option(
25
- False, "--force", "-f", help="Force resize and detach all studios"
26
- ),
27
- ):
28
- """Resize an engine's boot disk."""
29
- check_aws_sso()
30
-
31
- # Get all engines to resolve name
32
- response = make_api_request("GET", "/engines")
33
- if response.status_code != 200:
34
- console.print("[red]❌ Failed to fetch engines[/red]")
35
- raise typer.Exit(1)
36
-
37
- engines = response.json().get("engines", [])
38
- engine = resolve_engine(name_or_id, engines)
39
-
40
- # Get current volume info to validate size
41
- ec2 = boto3.client("ec2", region_name="us-east-1")
42
-
43
- try:
44
- # Get instance details to find root volume
45
- instance_info = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
46
- instance = instance_info["Reservations"][0]["Instances"][0]
47
-
48
- # Find root volume
49
- root_device = instance.get("RootDeviceName", "/dev/xvda")
50
- root_volume_id = None
51
-
52
- for bdm in instance.get("BlockDeviceMappings", []):
53
- if bdm["DeviceName"] == root_device:
54
- root_volume_id = bdm["Ebs"]["VolumeId"]
55
- break
56
-
57
- if not root_volume_id:
58
- console.print("[red]❌ Could not find root volume[/red]")
59
- raise typer.Exit(1)
60
-
61
- # Get current volume size
62
- volumes = ec2.describe_volumes(VolumeIds=[root_volume_id])
63
- current_size = volumes["Volumes"][0]["Size"]
64
-
65
- if size <= current_size:
66
- console.print(
67
- f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]"
68
- )
69
- raise typer.Exit(1)
70
-
71
- console.print(
72
- f"[yellow]Resizing engine boot disk from {current_size}GB to {size}GB[/yellow]"
73
- )
74
-
75
- # Check if we need to stop the instance
76
- if not online and engine["state"].lower() == "running":
77
- console.print("Stopping engine for offline resize...")
78
- stop_response = make_api_request(
79
- "POST",
80
- f"/engines/{engine['instance_id']}/stop",
81
- json_data={"detach_studios": False},
82
- )
83
- if stop_response.status_code != 200:
84
- console.print("[red]❌ Failed to stop engine[/red]")
85
- raise typer.Exit(1)
86
-
87
- # Wait for instance to stop
88
- console.print("Waiting for engine to stop...")
89
- waiter = ec2.get_waiter("instance_stopped")
90
- waiter.wait(InstanceIds=[engine["instance_id"]])
91
- console.print("[green]✓ Engine stopped[/green]")
92
-
93
- # Call the resize API
94
- console.print("Resizing volume...")
95
- resize_response = make_api_request(
96
- "POST",
97
- f"/engines/{engine['instance_id']}/resize",
98
- json_data={"size": size, "detach_studios": force},
99
- )
100
-
101
- if resize_response.status_code == 409 and not force:
102
- # Engine has attached studios
103
- data = resize_response.json()
104
- attached_studios = data.get("attached_studios", [])
105
-
106
- console.print("\n[yellow]⚠️ This engine has attached studios:[/yellow]")
107
- for studio in attached_studios:
108
- console.print(f" • {studio['user']} ({studio['studio_id']})")
109
-
110
- if Confirm.ask("\nDetach all studios and resize the engine?"):
111
- resize_response = make_api_request(
112
- "POST",
113
- f"/engines/{engine['instance_id']}/resize",
114
- json_data={"size": size, "detach_studios": True},
115
- )
116
- else:
117
- console.print("Resize cancelled.")
118
- return
119
-
120
- if resize_response.status_code != 200:
121
- error = resize_response.json().get("error", "Unknown error")
122
- console.print(f"[red]❌ Failed to resize engine: {error}[/red]")
123
- raise typer.Exit(1)
124
-
125
- # Check if studios were detached
126
- data = resize_response.json()
127
- detached_studios = data.get("detached_studios", 0)
128
- if detached_studios > 0:
129
- console.print(
130
- f"[green]✓ Detached {detached_studios} studio(s) before resize[/green]"
131
- )
132
-
133
- # Wait for modification to complete
134
- console.print("Waiting for volume modification to complete...")
135
- while True:
136
- mod_state = ec2.describe_volumes_modifications(VolumeIds=[root_volume_id])
137
- if not mod_state["VolumesModifications"]:
138
- break # Modification complete
139
-
140
- modification = mod_state["VolumesModifications"][0]
141
- state = modification["ModificationState"]
142
- progress = modification.get("Progress", 0)
143
-
144
- # Show progress updates only for the resize phase
145
- if state == "modifying":
146
- console.print(f"[yellow]Progress: {progress}%[/yellow]")
147
-
148
- # Exit as soon as optimization starts (resize is complete)
149
- if state == "optimizing":
150
- console.print("[green]✓ Volume resized successfully[/green]")
151
- console.print(
152
- "[dim]AWS is optimizing the volume in the background (no action needed).[/dim]"
153
- )
154
- break
155
-
156
- if state == "completed":
157
- console.print("[green]✓ Volume resized successfully[/green]")
158
- break
159
- elif state == "failed":
160
- console.print("[red]❌ Volume modification failed[/red]")
161
- raise typer.Exit(1)
162
-
163
- time.sleep(2) # Check more frequently for better UX
164
-
165
- # If offline resize, start the instance back up
166
- if not online and engine["state"].lower() == "running":
167
- console.print("Starting engine back up...")
168
- start_response = make_api_request(
169
- "POST", f"/engines/{engine['instance_id']}/start"
170
- )
171
- if start_response.status_code != 200:
172
- console.print(
173
- "[yellow]⚠️ Failed to restart engine automatically[/yellow]"
174
- )
175
- console.print(
176
- f"Please start it manually: [cyan]dh engine start {engine['name']}[/cyan]"
177
- )
178
- else:
179
- console.print("[green]✓ Engine started[/green]")
180
- console.print("The filesystem will be automatically expanded on boot.")
181
-
182
- elif online and engine["state"].lower() == "running":
183
- console.print(
184
- "\n[yellow]⚠️ Online resize complete. You must now expand the filesystem:[/yellow]"
185
- )
186
- console.print(f"1. SSH into the engine: [cyan]ssh {engine['name']}[/cyan]")
187
- console.print("2. Find the root device: [cyan]lsblk[/cyan]")
188
- console.print(
189
- "3. Expand the partition: [cyan]sudo growpart /dev/nvme0n1 1[/cyan] (adjust device name as needed)"
190
- )
191
- console.print("4. Expand the filesystem: [cyan]sudo xfs_growfs /[/cyan]")
192
-
193
- except ClientError as e:
194
- console.print(f"[red]❌ Failed to resize engine: {e}[/red]")
195
- raise typer.Exit(1)