dayhoff-tools 1.10.1__py3-none-any.whl → 1.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/cli/engine/__init__.py +323 -1
- dayhoff_tools/cli/engine/{status.py → engine_core.py} +201 -6
- dayhoff_tools/cli/engine/engine_maintenance.py +431 -0
- dayhoff_tools/cli/engine/engine_management.py +505 -0
- dayhoff_tools/cli/engine/shared.py +501 -0
- dayhoff_tools/cli/engine/studio_commands.py +825 -0
- dayhoff_tools/cli/main.py +2 -1
- {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.2.dist-info}/METADATA +1 -1
- dayhoff_tools-1.10.2.dist-info/RECORD +39 -0
- dayhoff_tools/cli/engine/coffee.py +0 -110
- dayhoff_tools/cli/engine/config_ssh.py +0 -113
- dayhoff_tools/cli/engine/debug.py +0 -79
- dayhoff_tools/cli/engine/gami.py +0 -160
- dayhoff_tools/cli/engine/idle.py +0 -148
- dayhoff_tools/cli/engine/launch.py +0 -101
- dayhoff_tools/cli/engine/list.py +0 -116
- dayhoff_tools/cli/engine/repair.py +0 -128
- dayhoff_tools/cli/engine/resize.py +0 -195
- dayhoff_tools/cli/engine/ssh.py +0 -62
- dayhoff_tools/cli/engine_studio_commands.py +0 -323
- dayhoff_tools/cli/engine_studio_utils/__init__.py +0 -1
- dayhoff_tools/cli/engine_studio_utils/api_utils.py +0 -47
- dayhoff_tools/cli/engine_studio_utils/aws_utils.py +0 -102
- dayhoff_tools/cli/engine_studio_utils/constants.py +0 -21
- dayhoff_tools/cli/engine_studio_utils/formatting.py +0 -210
- dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +0 -141
- dayhoff_tools/cli/studio/__init__.py +0 -1
- dayhoff_tools/cli/studio/attach.py +0 -314
- dayhoff_tools/cli/studio/create.py +0 -48
- dayhoff_tools/cli/studio/delete.py +0 -71
- dayhoff_tools/cli/studio/detach.py +0 -56
- dayhoff_tools/cli/studio/list.py +0 -81
- dayhoff_tools/cli/studio/reset.py +0 -90
- dayhoff_tools/cli/studio/resize.py +0 -134
- dayhoff_tools/cli/studio/status.py +0 -78
- dayhoff_tools-1.10.1.dist-info/RECORD +0 -61
- /dayhoff_tools/cli/engine/{lifecycle.py → engine_lifecycle.py} +0 -0
- {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.2.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.10.1.dist-info → dayhoff_tools-1.10.2.dist-info}/entry_points.txt +0 -0
@@ -1,101 +0,0 @@
|
|
1
|
-
"""Engine launch command."""
|
2
|
-
|
3
|
-
from typing import Any, Dict, Optional
|
4
|
-
|
5
|
-
import typer
|
6
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn
|
7
|
-
|
8
|
-
from ..engine_studio_utils.api_utils import make_api_request
|
9
|
-
from ..engine_studio_utils.aws_utils import check_aws_sso
|
10
|
-
from ..engine_studio_utils.constants import HOURLY_COSTS, console
|
11
|
-
|
12
|
-
|
13
|
-
def launch_engine(
|
14
|
-
name: str = typer.Argument(help="Name for the new engine"),
|
15
|
-
engine_type: str = typer.Option(
|
16
|
-
"cpu",
|
17
|
-
"--type",
|
18
|
-
"-t",
|
19
|
-
help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
|
20
|
-
),
|
21
|
-
user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
|
22
|
-
boot_disk_size: Optional[int] = typer.Option(
|
23
|
-
None,
|
24
|
-
"--size",
|
25
|
-
"-s",
|
26
|
-
help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
|
27
|
-
),
|
28
|
-
availability_zone: Optional[str] = typer.Option(
|
29
|
-
None,
|
30
|
-
"--az",
|
31
|
-
help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
|
32
|
-
),
|
33
|
-
):
|
34
|
-
"""Launch a new engine instance."""
|
35
|
-
username = check_aws_sso()
|
36
|
-
if user:
|
37
|
-
username = user
|
38
|
-
|
39
|
-
# Validate engine type
|
40
|
-
valid_types = [
|
41
|
-
"cpu",
|
42
|
-
"cpumax",
|
43
|
-
"t4",
|
44
|
-
"a10g",
|
45
|
-
"a100",
|
46
|
-
"4_t4",
|
47
|
-
"8_t4",
|
48
|
-
"4_a10g",
|
49
|
-
"8_a10g",
|
50
|
-
]
|
51
|
-
if engine_type not in valid_types:
|
52
|
-
console.print(f"[red]❌ Invalid engine type: {engine_type}[/red]")
|
53
|
-
console.print(f"Valid types: {', '.join(valid_types)}")
|
54
|
-
raise typer.Exit(1)
|
55
|
-
|
56
|
-
# Validate boot disk size
|
57
|
-
if boot_disk_size is not None:
|
58
|
-
if boot_disk_size < 20:
|
59
|
-
console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
|
60
|
-
raise typer.Exit(1)
|
61
|
-
if boot_disk_size > 1000:
|
62
|
-
console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
|
63
|
-
raise typer.Exit(1)
|
64
|
-
|
65
|
-
cost = HOURLY_COSTS.get(engine_type, 0)
|
66
|
-
disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
|
67
|
-
console.print(
|
68
|
-
f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
|
69
|
-
)
|
70
|
-
|
71
|
-
with Progress(
|
72
|
-
SpinnerColumn(),
|
73
|
-
TextColumn("[progress.description]{task.description}"),
|
74
|
-
transient=True,
|
75
|
-
) as progress:
|
76
|
-
progress.add_task("Creating engine...", total=None)
|
77
|
-
|
78
|
-
request_data: Dict[str, Any] = {
|
79
|
-
"name": name,
|
80
|
-
"user": username,
|
81
|
-
"engine_type": engine_type,
|
82
|
-
}
|
83
|
-
if boot_disk_size is not None:
|
84
|
-
request_data["boot_disk_size"] = boot_disk_size
|
85
|
-
if availability_zone:
|
86
|
-
request_data["availability_zone"] = availability_zone
|
87
|
-
|
88
|
-
response = make_api_request("POST", "/engines", json_data=request_data)
|
89
|
-
|
90
|
-
if response.status_code == 201:
|
91
|
-
data = response.json()
|
92
|
-
console.print(f"[green]✓ Engine launched successfully![/green]")
|
93
|
-
console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
|
94
|
-
console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
|
95
|
-
if boot_disk_size:
|
96
|
-
console.print(f"Boot disk: {boot_disk_size}GB")
|
97
|
-
console.print("\nThe engine is initializing. This may take a few minutes.")
|
98
|
-
console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
|
99
|
-
else:
|
100
|
-
error = response.json().get("error", "Unknown error")
|
101
|
-
console.print(f"[red]❌ Failed to launch engine: {error}[/red]")
|
dayhoff_tools/cli/engine/list.py
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
"""Engine list command."""
|
2
|
-
|
3
|
-
from datetime import datetime, timezone
|
4
|
-
from typing import Optional
|
5
|
-
|
6
|
-
import typer
|
7
|
-
from rich import box
|
8
|
-
from rich.table import Table
|
9
|
-
|
10
|
-
from ..engine_studio_utils.api_utils import make_api_request
|
11
|
-
from ..engine_studio_utils.aws_utils import _fetch_init_stages, check_aws_sso
|
12
|
-
from ..engine_studio_utils.constants import HOURLY_COSTS, console
|
13
|
-
from ..engine_studio_utils.formatting import (
|
14
|
-
format_duration,
|
15
|
-
format_status,
|
16
|
-
get_disk_usage_via_ssm,
|
17
|
-
parse_launch_time,
|
18
|
-
)
|
19
|
-
|
20
|
-
|
21
|
-
def list_engines(
|
22
|
-
user: Optional[str] = typer.Option(None, "--user", "-u", help="Filter by user"),
|
23
|
-
running_only: bool = typer.Option(
|
24
|
-
False, "--running", help="Show only running engines"
|
25
|
-
),
|
26
|
-
stopped_only: bool = typer.Option(
|
27
|
-
False, "--stopped", help="Show only stopped engines"
|
28
|
-
),
|
29
|
-
detailed: bool = typer.Option(
|
30
|
-
False, "--detailed", "-d", help="Show detailed status (slower)"
|
31
|
-
),
|
32
|
-
):
|
33
|
-
"""List engines (shows all engines by default)."""
|
34
|
-
current_user = check_aws_sso()
|
35
|
-
|
36
|
-
params = {}
|
37
|
-
if user:
|
38
|
-
params["user"] = user
|
39
|
-
if detailed:
|
40
|
-
params["check_ready"] = "true"
|
41
|
-
|
42
|
-
response = make_api_request("GET", "/engines", params=params)
|
43
|
-
|
44
|
-
if response.status_code == 200:
|
45
|
-
data = response.json()
|
46
|
-
engines = data.get("engines", [])
|
47
|
-
|
48
|
-
# Filter by state if requested
|
49
|
-
if running_only:
|
50
|
-
engines = [e for e in engines if e["state"].lower() == "running"]
|
51
|
-
elif stopped_only:
|
52
|
-
engines = [e for e in engines if e["state"].lower() == "stopped"]
|
53
|
-
|
54
|
-
if not engines:
|
55
|
-
console.print("No engines found.")
|
56
|
-
return
|
57
|
-
|
58
|
-
# Only fetch detailed info if requested (slow)
|
59
|
-
stages_map = {}
|
60
|
-
if detailed:
|
61
|
-
stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
|
62
|
-
|
63
|
-
# Create table
|
64
|
-
table = Table(title="Engines", box=box.ROUNDED)
|
65
|
-
table.add_column("Name", style="cyan")
|
66
|
-
table.add_column("Instance ID", style="dim")
|
67
|
-
table.add_column("Type")
|
68
|
-
table.add_column("User")
|
69
|
-
table.add_column("Status")
|
70
|
-
if detailed:
|
71
|
-
table.add_column("Disk Usage")
|
72
|
-
table.add_column("Uptime/Since")
|
73
|
-
table.add_column("$/hour", justify="right")
|
74
|
-
|
75
|
-
for engine in engines:
|
76
|
-
launch_time = parse_launch_time(engine["launch_time"])
|
77
|
-
uptime = datetime.now(timezone.utc) - launch_time
|
78
|
-
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
79
|
-
|
80
|
-
if engine["state"].lower() == "running":
|
81
|
-
time_str = format_duration(uptime)
|
82
|
-
# Only get disk usage if detailed mode
|
83
|
-
if detailed:
|
84
|
-
disk_usage = get_disk_usage_via_ssm(engine["instance_id"]) or "-"
|
85
|
-
else:
|
86
|
-
disk_usage = None
|
87
|
-
else:
|
88
|
-
time_str = launch_time.strftime("%Y-%m-%d %H:%M")
|
89
|
-
disk_usage = "-" if detailed else None
|
90
|
-
|
91
|
-
row_data = [
|
92
|
-
engine["name"],
|
93
|
-
engine["instance_id"],
|
94
|
-
engine["engine_type"],
|
95
|
-
engine["user"],
|
96
|
-
format_status(engine["state"], engine.get("ready")),
|
97
|
-
]
|
98
|
-
if detailed:
|
99
|
-
row_data.append(disk_usage)
|
100
|
-
row_data.extend(
|
101
|
-
[
|
102
|
-
time_str,
|
103
|
-
f"${hourly_cost:.2f}",
|
104
|
-
]
|
105
|
-
)
|
106
|
-
|
107
|
-
table.add_row(*row_data)
|
108
|
-
|
109
|
-
console.print(table)
|
110
|
-
if not detailed and any(e["state"].lower() == "running" for e in engines):
|
111
|
-
console.print(
|
112
|
-
"\n[dim]Tip: Use --detailed to see disk usage and bootstrap status (slower)[/dim]"
|
113
|
-
)
|
114
|
-
else:
|
115
|
-
error = response.json().get("error", "Unknown error")
|
116
|
-
console.print(f"[red]❌ Failed to list engines: {error}[/red]")
|
@@ -1,128 +0,0 @@
|
|
1
|
-
"""Engine repair command."""
|
2
|
-
|
3
|
-
import time
|
4
|
-
|
5
|
-
import boto3
|
6
|
-
import typer
|
7
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn
|
8
|
-
from rich.prompt import Confirm
|
9
|
-
|
10
|
-
from ..engine_studio_utils.api_utils import make_api_request
|
11
|
-
from ..engine_studio_utils.aws_utils import check_aws_sso
|
12
|
-
from ..engine_studio_utils.constants import console
|
13
|
-
from ..engine_studio_utils.formatting import resolve_engine
|
14
|
-
|
15
|
-
|
16
|
-
def repair_engine(
|
17
|
-
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
18
|
-
):
|
19
|
-
"""Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
|
20
|
-
check_aws_sso()
|
21
|
-
|
22
|
-
# Get all engines to resolve name
|
23
|
-
response = make_api_request("GET", "/engines")
|
24
|
-
if response.status_code != 200:
|
25
|
-
console.print("[red]❌ Failed to fetch engines[/red]")
|
26
|
-
raise typer.Exit(1)
|
27
|
-
|
28
|
-
engines = response.json().get("engines", [])
|
29
|
-
engine = resolve_engine(name_or_id, engines)
|
30
|
-
|
31
|
-
if engine["state"].lower() != "running":
|
32
|
-
console.print(
|
33
|
-
f"[yellow]⚠️ Engine is {engine['state']}. Must be running to repair.[/yellow]"
|
34
|
-
)
|
35
|
-
if engine["state"].lower() == "stopped" and Confirm.ask(
|
36
|
-
"Start the engine first?"
|
37
|
-
):
|
38
|
-
response = make_api_request(
|
39
|
-
"POST", f"/engines/{engine['instance_id']}/start"
|
40
|
-
)
|
41
|
-
if response.status_code != 200:
|
42
|
-
console.print("[red]❌ Failed to start engine[/red]")
|
43
|
-
raise typer.Exit(1)
|
44
|
-
console.print("[green]✓ Engine started[/green]")
|
45
|
-
console.print("Waiting for engine to become ready...")
|
46
|
-
time.sleep(30) # Give it time to boot
|
47
|
-
else:
|
48
|
-
raise typer.Exit(1)
|
49
|
-
|
50
|
-
console.print(f"[bold]Repairing engine [cyan]{engine['name']}[/cyan][/bold]")
|
51
|
-
console.print(
|
52
|
-
"[dim]This will restore bootstrap state and ensure all services are running[/dim]\n"
|
53
|
-
)
|
54
|
-
|
55
|
-
ssm = boto3.client("ssm", region_name="us-east-1")
|
56
|
-
|
57
|
-
# Repair commands
|
58
|
-
repair_commands = [
|
59
|
-
# Create necessary directories
|
60
|
-
"sudo mkdir -p /opt/dayhoff /opt/dayhoff/state /opt/dayhoff/scripts",
|
61
|
-
# Download scripts from S3 if missing
|
62
|
-
"source /etc/engine.env && sudo aws s3 sync s3://${VM_SCRIPTS_BUCKET}/ /opt/dayhoff/scripts/ --exclude '*' --include '*.sh' --quiet",
|
63
|
-
"sudo chmod +x /opt/dayhoff/scripts/*.sh 2>/dev/null || true",
|
64
|
-
# Restore bootstrap state
|
65
|
-
"sudo touch /opt/dayhoff/first_boot_complete.sentinel",
|
66
|
-
"echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
|
67
|
-
# Ensure SSM agent is running
|
68
|
-
"sudo systemctl restart amazon-ssm-agent 2>/dev/null || true",
|
69
|
-
# Restart idle detector (service only)
|
70
|
-
"sudo systemctl restart engine-idle-detector.service 2>/dev/null || true",
|
71
|
-
# Report status
|
72
|
-
"echo '=== Repair Complete ===' && echo 'Sentinel: ' && ls -la /opt/dayhoff/first_boot_complete.sentinel",
|
73
|
-
"echo 'Stage: ' && cat /opt/dayhoff/state/engine-init.stage",
|
74
|
-
"echo 'Scripts: ' && ls /opt/dayhoff/scripts/*.sh 2>/dev/null | wc -l",
|
75
|
-
]
|
76
|
-
|
77
|
-
try:
|
78
|
-
with Progress(
|
79
|
-
SpinnerColumn(),
|
80
|
-
TextColumn("[progress.description]{task.description}"),
|
81
|
-
transient=True,
|
82
|
-
) as progress:
|
83
|
-
task = progress.add_task("Repairing engine...", total=None)
|
84
|
-
|
85
|
-
response = ssm.send_command(
|
86
|
-
InstanceIds=[engine["instance_id"]],
|
87
|
-
DocumentName="AWS-RunShellScript",
|
88
|
-
Parameters={
|
89
|
-
"commands": repair_commands,
|
90
|
-
"executionTimeout": ["60"],
|
91
|
-
},
|
92
|
-
)
|
93
|
-
|
94
|
-
command_id = response["Command"]["CommandId"]
|
95
|
-
|
96
|
-
# Wait for command
|
97
|
-
for _ in range(60):
|
98
|
-
time.sleep(1)
|
99
|
-
result = ssm.get_command_invocation(
|
100
|
-
CommandId=command_id,
|
101
|
-
InstanceId=engine["instance_id"],
|
102
|
-
)
|
103
|
-
if result["Status"] in ["Success", "Failed"]:
|
104
|
-
break
|
105
|
-
|
106
|
-
if result["Status"] == "Success":
|
107
|
-
output = result["StandardOutputContent"]
|
108
|
-
console.print("[green]✓ Engine repaired successfully![/green]\n")
|
109
|
-
|
110
|
-
# Show repair results
|
111
|
-
if "=== Repair Complete ===" in output:
|
112
|
-
repair_section = output.split("=== Repair Complete ===")[1].strip()
|
113
|
-
console.print("[bold]Repair Results:[/bold]")
|
114
|
-
console.print(repair_section)
|
115
|
-
|
116
|
-
console.print(
|
117
|
-
"\n[dim]You should now be able to attach studios to this engine.[/dim]"
|
118
|
-
)
|
119
|
-
else:
|
120
|
-
console.print(
|
121
|
-
f"[red]❌ Repair failed: {result.get('StandardErrorContent', 'Unknown error')}[/red]"
|
122
|
-
)
|
123
|
-
console.print(
|
124
|
-
"\n[yellow]Try running 'dh engine debug' for more information.[/yellow]"
|
125
|
-
)
|
126
|
-
|
127
|
-
except Exception as e:
|
128
|
-
console.print(f"[red]❌ Failed to repair engine: {e}[/red]")
|
@@ -1,195 +0,0 @@
|
|
1
|
-
"""Engine resize command."""
|
2
|
-
|
3
|
-
import time
|
4
|
-
|
5
|
-
import boto3
|
6
|
-
import typer
|
7
|
-
from botocore.exceptions import ClientError
|
8
|
-
from rich.prompt import Confirm
|
9
|
-
|
10
|
-
from ..engine_studio_utils.api_utils import make_api_request
|
11
|
-
from ..engine_studio_utils.aws_utils import check_aws_sso
|
12
|
-
from ..engine_studio_utils.constants import console
|
13
|
-
from ..engine_studio_utils.formatting import resolve_engine
|
14
|
-
|
15
|
-
|
16
|
-
def resize_engine(
|
17
|
-
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
18
|
-
size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
|
19
|
-
online: bool = typer.Option(
|
20
|
-
False,
|
21
|
-
"--online",
|
22
|
-
help="Resize while running (requires manual filesystem expansion)",
|
23
|
-
),
|
24
|
-
force: bool = typer.Option(
|
25
|
-
False, "--force", "-f", help="Force resize and detach all studios"
|
26
|
-
),
|
27
|
-
):
|
28
|
-
"""Resize an engine's boot disk."""
|
29
|
-
check_aws_sso()
|
30
|
-
|
31
|
-
# Get all engines to resolve name
|
32
|
-
response = make_api_request("GET", "/engines")
|
33
|
-
if response.status_code != 200:
|
34
|
-
console.print("[red]❌ Failed to fetch engines[/red]")
|
35
|
-
raise typer.Exit(1)
|
36
|
-
|
37
|
-
engines = response.json().get("engines", [])
|
38
|
-
engine = resolve_engine(name_or_id, engines)
|
39
|
-
|
40
|
-
# Get current volume info to validate size
|
41
|
-
ec2 = boto3.client("ec2", region_name="us-east-1")
|
42
|
-
|
43
|
-
try:
|
44
|
-
# Get instance details to find root volume
|
45
|
-
instance_info = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
|
46
|
-
instance = instance_info["Reservations"][0]["Instances"][0]
|
47
|
-
|
48
|
-
# Find root volume
|
49
|
-
root_device = instance.get("RootDeviceName", "/dev/xvda")
|
50
|
-
root_volume_id = None
|
51
|
-
|
52
|
-
for bdm in instance.get("BlockDeviceMappings", []):
|
53
|
-
if bdm["DeviceName"] == root_device:
|
54
|
-
root_volume_id = bdm["Ebs"]["VolumeId"]
|
55
|
-
break
|
56
|
-
|
57
|
-
if not root_volume_id:
|
58
|
-
console.print("[red]❌ Could not find root volume[/red]")
|
59
|
-
raise typer.Exit(1)
|
60
|
-
|
61
|
-
# Get current volume size
|
62
|
-
volumes = ec2.describe_volumes(VolumeIds=[root_volume_id])
|
63
|
-
current_size = volumes["Volumes"][0]["Size"]
|
64
|
-
|
65
|
-
if size <= current_size:
|
66
|
-
console.print(
|
67
|
-
f"[red]❌ New size ({size}GB) must be larger than current size ({current_size}GB)[/red]"
|
68
|
-
)
|
69
|
-
raise typer.Exit(1)
|
70
|
-
|
71
|
-
console.print(
|
72
|
-
f"[yellow]Resizing engine boot disk from {current_size}GB to {size}GB[/yellow]"
|
73
|
-
)
|
74
|
-
|
75
|
-
# Check if we need to stop the instance
|
76
|
-
if not online and engine["state"].lower() == "running":
|
77
|
-
console.print("Stopping engine for offline resize...")
|
78
|
-
stop_response = make_api_request(
|
79
|
-
"POST",
|
80
|
-
f"/engines/{engine['instance_id']}/stop",
|
81
|
-
json_data={"detach_studios": False},
|
82
|
-
)
|
83
|
-
if stop_response.status_code != 200:
|
84
|
-
console.print("[red]❌ Failed to stop engine[/red]")
|
85
|
-
raise typer.Exit(1)
|
86
|
-
|
87
|
-
# Wait for instance to stop
|
88
|
-
console.print("Waiting for engine to stop...")
|
89
|
-
waiter = ec2.get_waiter("instance_stopped")
|
90
|
-
waiter.wait(InstanceIds=[engine["instance_id"]])
|
91
|
-
console.print("[green]✓ Engine stopped[/green]")
|
92
|
-
|
93
|
-
# Call the resize API
|
94
|
-
console.print("Resizing volume...")
|
95
|
-
resize_response = make_api_request(
|
96
|
-
"POST",
|
97
|
-
f"/engines/{engine['instance_id']}/resize",
|
98
|
-
json_data={"size": size, "detach_studios": force},
|
99
|
-
)
|
100
|
-
|
101
|
-
if resize_response.status_code == 409 and not force:
|
102
|
-
# Engine has attached studios
|
103
|
-
data = resize_response.json()
|
104
|
-
attached_studios = data.get("attached_studios", [])
|
105
|
-
|
106
|
-
console.print("\n[yellow]⚠️ This engine has attached studios:[/yellow]")
|
107
|
-
for studio in attached_studios:
|
108
|
-
console.print(f" • {studio['user']} ({studio['studio_id']})")
|
109
|
-
|
110
|
-
if Confirm.ask("\nDetach all studios and resize the engine?"):
|
111
|
-
resize_response = make_api_request(
|
112
|
-
"POST",
|
113
|
-
f"/engines/{engine['instance_id']}/resize",
|
114
|
-
json_data={"size": size, "detach_studios": True},
|
115
|
-
)
|
116
|
-
else:
|
117
|
-
console.print("Resize cancelled.")
|
118
|
-
return
|
119
|
-
|
120
|
-
if resize_response.status_code != 200:
|
121
|
-
error = resize_response.json().get("error", "Unknown error")
|
122
|
-
console.print(f"[red]❌ Failed to resize engine: {error}[/red]")
|
123
|
-
raise typer.Exit(1)
|
124
|
-
|
125
|
-
# Check if studios were detached
|
126
|
-
data = resize_response.json()
|
127
|
-
detached_studios = data.get("detached_studios", 0)
|
128
|
-
if detached_studios > 0:
|
129
|
-
console.print(
|
130
|
-
f"[green]✓ Detached {detached_studios} studio(s) before resize[/green]"
|
131
|
-
)
|
132
|
-
|
133
|
-
# Wait for modification to complete
|
134
|
-
console.print("Waiting for volume modification to complete...")
|
135
|
-
while True:
|
136
|
-
mod_state = ec2.describe_volumes_modifications(VolumeIds=[root_volume_id])
|
137
|
-
if not mod_state["VolumesModifications"]:
|
138
|
-
break # Modification complete
|
139
|
-
|
140
|
-
modification = mod_state["VolumesModifications"][0]
|
141
|
-
state = modification["ModificationState"]
|
142
|
-
progress = modification.get("Progress", 0)
|
143
|
-
|
144
|
-
# Show progress updates only for the resize phase
|
145
|
-
if state == "modifying":
|
146
|
-
console.print(f"[yellow]Progress: {progress}%[/yellow]")
|
147
|
-
|
148
|
-
# Exit as soon as optimization starts (resize is complete)
|
149
|
-
if state == "optimizing":
|
150
|
-
console.print("[green]✓ Volume resized successfully[/green]")
|
151
|
-
console.print(
|
152
|
-
"[dim]AWS is optimizing the volume in the background (no action needed).[/dim]"
|
153
|
-
)
|
154
|
-
break
|
155
|
-
|
156
|
-
if state == "completed":
|
157
|
-
console.print("[green]✓ Volume resized successfully[/green]")
|
158
|
-
break
|
159
|
-
elif state == "failed":
|
160
|
-
console.print("[red]❌ Volume modification failed[/red]")
|
161
|
-
raise typer.Exit(1)
|
162
|
-
|
163
|
-
time.sleep(2) # Check more frequently for better UX
|
164
|
-
|
165
|
-
# If offline resize, start the instance back up
|
166
|
-
if not online and engine["state"].lower() == "running":
|
167
|
-
console.print("Starting engine back up...")
|
168
|
-
start_response = make_api_request(
|
169
|
-
"POST", f"/engines/{engine['instance_id']}/start"
|
170
|
-
)
|
171
|
-
if start_response.status_code != 200:
|
172
|
-
console.print(
|
173
|
-
"[yellow]⚠️ Failed to restart engine automatically[/yellow]"
|
174
|
-
)
|
175
|
-
console.print(
|
176
|
-
f"Please start it manually: [cyan]dh engine start {engine['name']}[/cyan]"
|
177
|
-
)
|
178
|
-
else:
|
179
|
-
console.print("[green]✓ Engine started[/green]")
|
180
|
-
console.print("The filesystem will be automatically expanded on boot.")
|
181
|
-
|
182
|
-
elif online and engine["state"].lower() == "running":
|
183
|
-
console.print(
|
184
|
-
"\n[yellow]⚠️ Online resize complete. You must now expand the filesystem:[/yellow]"
|
185
|
-
)
|
186
|
-
console.print(f"1. SSH into the engine: [cyan]ssh {engine['name']}[/cyan]")
|
187
|
-
console.print("2. Find the root device: [cyan]lsblk[/cyan]")
|
188
|
-
console.print(
|
189
|
-
"3. Expand the partition: [cyan]sudo growpart /dev/nvme0n1 1[/cyan] (adjust device name as needed)"
|
190
|
-
)
|
191
|
-
console.print("4. Expand the filesystem: [cyan]sudo xfs_growfs /[/cyan]")
|
192
|
-
|
193
|
-
except ClientError as e:
|
194
|
-
console.print(f"[red]❌ Failed to resize engine: {e}[/red]")
|
195
|
-
raise typer.Exit(1)
|
dayhoff_tools/cli/engine/ssh.py
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
"""Engine SSH command."""
|
2
|
-
|
3
|
-
import subprocess
|
4
|
-
|
5
|
-
import typer
|
6
|
-
|
7
|
-
from ..engine_studio_utils.api_utils import make_api_request
|
8
|
-
from ..engine_studio_utils.aws_utils import check_aws_sso
|
9
|
-
from ..engine_studio_utils.constants import console
|
10
|
-
from ..engine_studio_utils.formatting import resolve_engine
|
11
|
-
from ..engine_studio_utils.ssh_utils import check_session_manager_plugin, update_ssh_config_entry
|
12
|
-
|
13
|
-
|
14
|
-
def ssh_engine(
|
15
|
-
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
16
|
-
admin: bool = typer.Option(
|
17
|
-
False, "--admin", help="Connect as ec2-user instead of the engine owner user"
|
18
|
-
),
|
19
|
-
idle_timeout: int = typer.Option(
|
20
|
-
600,
|
21
|
-
"--idle-timeout",
|
22
|
-
help="Idle timeout (seconds) for the SSM port-forward (0 = disable)",
|
23
|
-
),
|
24
|
-
):
|
25
|
-
"""Connect to an engine via SSH.
|
26
|
-
|
27
|
-
By default the CLI connects using the engine's owner username (the same one stored in the `User` tag).
|
28
|
-
Pass `--admin` to connect with the underlying [`ec2-user`] account for break-glass or debugging.
|
29
|
-
"""
|
30
|
-
username = check_aws_sso()
|
31
|
-
|
32
|
-
# Check for Session Manager Plugin
|
33
|
-
if not check_session_manager_plugin():
|
34
|
-
raise typer.Exit(1)
|
35
|
-
|
36
|
-
# Get all engines to resolve name
|
37
|
-
response = make_api_request("GET", "/engines")
|
38
|
-
if response.status_code != 200:
|
39
|
-
console.print("[red]❌ Failed to fetch engines[/red]")
|
40
|
-
raise typer.Exit(1)
|
41
|
-
|
42
|
-
engines = response.json().get("engines", [])
|
43
|
-
engine = resolve_engine(name_or_id, engines)
|
44
|
-
|
45
|
-
if engine["state"].lower() != "running":
|
46
|
-
console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
|
47
|
-
raise typer.Exit(1)
|
48
|
-
|
49
|
-
# Choose SSH user
|
50
|
-
ssh_user = "ec2-user" if admin else username
|
51
|
-
|
52
|
-
# Update SSH config
|
53
|
-
console.print(
|
54
|
-
f"Updating SSH config for [cyan]{engine['name']}[/cyan] (user: {ssh_user})..."
|
55
|
-
)
|
56
|
-
update_ssh_config_entry(
|
57
|
-
engine["name"], engine["instance_id"], ssh_user, idle_timeout
|
58
|
-
)
|
59
|
-
|
60
|
-
# Connect
|
61
|
-
console.print(f"[green]✓ Connecting to {engine['name']}...[/green]")
|
62
|
-
subprocess.run(["ssh", engine["name"]])
|