dayhoff-tools 1.9.26__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. dayhoff_tools/cli/engine/__init__.py +1 -323
  2. dayhoff_tools/cli/engine/coffee.py +110 -0
  3. dayhoff_tools/cli/engine/config_ssh.py +113 -0
  4. dayhoff_tools/cli/engine/debug.py +79 -0
  5. dayhoff_tools/cli/engine/gami.py +160 -0
  6. dayhoff_tools/cli/engine/idle.py +148 -0
  7. dayhoff_tools/cli/engine/launch.py +101 -0
  8. dayhoff_tools/cli/engine/list.py +116 -0
  9. dayhoff_tools/cli/engine/repair.py +128 -0
  10. dayhoff_tools/cli/engine/resize.py +195 -0
  11. dayhoff_tools/cli/engine/ssh.py +62 -0
  12. dayhoff_tools/cli/engine/{engine_core.py → status.py} +6 -201
  13. dayhoff_tools/cli/engine_studio_commands.py +323 -0
  14. dayhoff_tools/cli/engine_studio_utils/__init__.py +1 -0
  15. dayhoff_tools/cli/engine_studio_utils/api_utils.py +47 -0
  16. dayhoff_tools/cli/engine_studio_utils/aws_utils.py +102 -0
  17. dayhoff_tools/cli/engine_studio_utils/constants.py +21 -0
  18. dayhoff_tools/cli/engine_studio_utils/formatting.py +210 -0
  19. dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +141 -0
  20. dayhoff_tools/cli/main.py +1 -2
  21. dayhoff_tools/cli/studio/__init__.py +1 -0
  22. dayhoff_tools/cli/studio/attach.py +314 -0
  23. dayhoff_tools/cli/studio/create.py +48 -0
  24. dayhoff_tools/cli/studio/delete.py +71 -0
  25. dayhoff_tools/cli/studio/detach.py +56 -0
  26. dayhoff_tools/cli/studio/list.py +81 -0
  27. dayhoff_tools/cli/studio/reset.py +90 -0
  28. dayhoff_tools/cli/studio/resize.py +134 -0
  29. dayhoff_tools/cli/studio/status.py +78 -0
  30. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/METADATA +1 -1
  31. dayhoff_tools-1.10.1.dist-info/RECORD +61 -0
  32. dayhoff_tools/cli/engine/engine_maintenance.py +0 -431
  33. dayhoff_tools/cli/engine/engine_management.py +0 -505
  34. dayhoff_tools/cli/engine/shared.py +0 -501
  35. dayhoff_tools/cli/engine/studio_commands.py +0 -825
  36. dayhoff_tools-1.9.26.dist-info/RECORD +0 -39
  37. /dayhoff_tools/cli/engine/{engine_lifecycle.py → lifecycle.py} +0 -0
  38. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/WHEEL +0 -0
  39. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,323 @@
1
+ """Engine and Studio management commands for DHT CLI."""
2
+
3
+ from typing import Optional
4
+
5
+ import typer
6
+
7
+ # Initialize Typer apps
8
+ engine_app = typer.Typer(help="Manage compute engines for development.")
9
+ studio_app = typer.Typer(help="Manage persistent development studios.")
10
+
11
+ # Use lazy loading pattern similar to main.py swarm commands
12
+ # Import functions only when commands are actually called
13
+
14
+
15
+ # Engine commands
16
+ @engine_app.command("launch")
17
+ def launch_engine_cmd(
18
+ name: str = typer.Argument(help="Name for the new engine"),
19
+ engine_type: str = typer.Option(
20
+ "cpu",
21
+ "--type",
22
+ "-t",
23
+ help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
24
+ ),
25
+ user: str = typer.Option(None, "--user", "-u", help="Override username"),
26
+ boot_disk_size: int = typer.Option(
27
+ None,
28
+ "--size",
29
+ "-s",
30
+ help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
31
+ ),
32
+ availability_zone: str = typer.Option(
33
+ None,
34
+ "--az",
35
+ help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
36
+ ),
37
+ ):
38
+ """Launch a new engine instance."""
39
+ from .engine.launch import launch_engine
40
+
41
+ return launch_engine(name, engine_type, user, boot_disk_size, availability_zone)
42
+
43
+
44
+ @engine_app.command("list")
45
+ def list_engines_cmd(
46
+ user: str = typer.Option(None, "--user", "-u", help="Filter by user"),
47
+ running_only: bool = typer.Option(
48
+ False, "--running", help="Show only running engines"
49
+ ),
50
+ stopped_only: bool = typer.Option(
51
+ False, "--stopped", help="Show only stopped engines"
52
+ ),
53
+ detailed: bool = typer.Option(
54
+ False, "--detailed", "-d", help="Show detailed status (slower)"
55
+ ),
56
+ ):
57
+ """List engines (shows all engines by default)."""
58
+ from .engine.list import list_engines
59
+
60
+ return list_engines(user, running_only, stopped_only, detailed)
61
+
62
+
63
+ @engine_app.command("status")
64
+ def engine_status_cmd(
65
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
66
+ detailed: bool = typer.Option(
67
+ False, "--detailed", "-d", help="Show detailed status (slower)"
68
+ ),
69
+ show_log: bool = typer.Option(
70
+ False, "--show-log", help="Show bootstrap log (requires --detailed)"
71
+ ),
72
+ ):
73
+ """Show engine status and information."""
74
+ from .engine.status import engine_status
75
+
76
+ return engine_status(name_or_id, detailed, show_log)
77
+
78
+
79
+ @engine_app.command("start")
80
+ def start_engine_cmd(
81
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
82
+ ):
83
+ """Start a stopped engine."""
84
+ from .engine.lifecycle import start_engine
85
+
86
+ return start_engine(name_or_id)
87
+
88
+
89
+ @engine_app.command("stop")
90
+ def stop_engine_cmd(
91
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
92
+ force: bool = typer.Option(
93
+ False, "--force", "-f", help="Force stop and detach all studios"
94
+ ),
95
+ ):
96
+ """Stop an engine."""
97
+ from .engine.lifecycle import stop_engine
98
+
99
+ return stop_engine(name_or_id, force)
100
+
101
+
102
+ @engine_app.command("terminate")
103
+ def terminate_engine_cmd(
104
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
105
+ ):
106
+ """Permanently terminate an engine."""
107
+ from .engine.lifecycle import terminate_engine
108
+
109
+ return terminate_engine(name_or_id)
110
+
111
+
112
+ @engine_app.command("ssh")
113
+ def ssh_engine_cmd(
114
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
115
+ admin: bool = typer.Option(
116
+ False, "--admin", help="Connect as ec2-user instead of the engine owner user"
117
+ ),
118
+ idle_timeout: int = typer.Option(
119
+ 600,
120
+ "--idle-timeout",
121
+ help="Idle timeout (seconds) for the SSM port-forward (0 = disable)",
122
+ ),
123
+ ):
124
+ """Connect to an engine via SSH."""
125
+ from .engine.ssh import ssh_engine
126
+
127
+ return ssh_engine(name_or_id, admin, idle_timeout)
128
+
129
+
130
+ @engine_app.command("config-ssh")
131
+ def config_ssh_cmd(
132
+ clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
133
+ all_engines: bool = typer.Option(
134
+ False, "--all", "-a", help="Include all engines from all users"
135
+ ),
136
+ admin: bool = typer.Option(
137
+ False,
138
+ "--admin",
139
+ help="Generate entries that use ec2-user instead of per-engine owner user",
140
+ ),
141
+ ):
142
+ """Update SSH config with available engines."""
143
+ from .engine.config_ssh import config_ssh
144
+
145
+ return config_ssh(clean, all_engines, admin)
146
+
147
+
148
+ @engine_app.command("resize")
149
+ def resize_engine_cmd(
150
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
151
+ size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
152
+ online: bool = typer.Option(
153
+ False,
154
+ "--online",
155
+ help="Resize while running (requires manual filesystem expansion)",
156
+ ),
157
+ force: bool = typer.Option(
158
+ False, "--force", "-f", help="Force resize and detach all studios"
159
+ ),
160
+ ):
161
+ """Resize an engine's boot disk."""
162
+ from .engine.resize import resize_engine
163
+
164
+ return resize_engine(name_or_id, size, online, force)
165
+
166
+
167
+ @engine_app.command("gami")
168
+ def create_ami_cmd(
169
+ name_or_id: str = typer.Argument(
170
+ help="Engine name or instance ID to create AMI from"
171
+ ),
172
+ ):
173
+ """Create a 'Golden AMI' from a running engine."""
174
+ from .engine.gami import create_ami
175
+
176
+ return create_ami(name_or_id)
177
+
178
+
179
+ @engine_app.command("coffee")
180
+ def coffee_cmd(
181
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
182
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
183
+ cancel: bool = typer.Option(
184
+ False, "--cancel", help="Cancel existing coffee lock instead of extending"
185
+ ),
186
+ ):
187
+ """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
188
+ from .engine.coffee import coffee
189
+
190
+ return coffee(name_or_id, duration, cancel)
191
+
192
+
193
+ @engine_app.command("idle")
194
+ def idle_timeout_cmd_wrapper(
195
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
196
+ set: Optional[str] = typer.Option(
197
+ None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
198
+ ),
199
+ slack: Optional[str] = typer.Option(
200
+ None, "--slack", help="Set Slack notifications: none, default, all"
201
+ ),
202
+ ):
203
+ """Show or set engine idle-detector settings."""
204
+ from .engine.idle import idle_timeout_cmd
205
+
206
+ return idle_timeout_cmd(name_or_id=name_or_id, set=set, slack=slack)
207
+
208
+
209
+ @engine_app.command("debug")
210
+ def debug_engine_cmd(
211
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
212
+ ):
213
+ """Debug engine bootstrap status and files."""
214
+ from .engine.debug import debug_engine
215
+
216
+ return debug_engine(name_or_id)
217
+
218
+
219
+ @engine_app.command("repair")
220
+ def repair_engine_cmd(
221
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
222
+ ):
223
+ """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
224
+ from .engine.repair import repair_engine
225
+
226
+ return repair_engine(name_or_id)
227
+
228
+
229
+ # Studio commands
230
+ @studio_app.command("create")
231
+ def create_studio_cmd(
232
+ size_gb: int = typer.Option(50, "--size", "-s", help="Studio size in GB"),
233
+ ):
234
+ """Create a new studio for the current user."""
235
+ from .studio.create import create_studio
236
+
237
+ return create_studio(size_gb)
238
+
239
+
240
+ @studio_app.command("status")
241
+ def studio_status_cmd(
242
+ user: str = typer.Option(
243
+ None, "--user", "-u", help="Check status for a different user (admin only)"
244
+ ),
245
+ ):
246
+ """Show status of your studio."""
247
+ from .studio.status import studio_status
248
+
249
+ return studio_status(user)
250
+
251
+
252
+ @studio_app.command("attach")
253
+ def attach_studio_cmd(
254
+ engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
255
+ user: str = typer.Option(
256
+ None, "--user", "-u", help="Attach a different user's studio (admin only)"
257
+ ),
258
+ ):
259
+ """Attach your studio to an engine."""
260
+ from .studio.attach import attach_studio
261
+
262
+ return attach_studio(engine_name_or_id, user)
263
+
264
+
265
+ @studio_app.command("detach")
266
+ def detach_studio_cmd(
267
+ user: str = typer.Option(
268
+ None, "--user", "-u", help="Detach a different user's studio (admin only)"
269
+ ),
270
+ ):
271
+ """Detach your studio from its current engine."""
272
+ from .studio.detach import detach_studio
273
+
274
+ return detach_studio(user)
275
+
276
+
277
+ @studio_app.command("delete")
278
+ def delete_studio_cmd(
279
+ user: str = typer.Option(
280
+ None, "--user", "-u", help="Delete a different user's studio (admin only)"
281
+ ),
282
+ ):
283
+ """Delete your studio permanently."""
284
+ from .studio.delete import delete_studio
285
+
286
+ return delete_studio(user)
287
+
288
+
289
+ @studio_app.command("list")
290
+ def list_studios_cmd(
291
+ all_users: bool = typer.Option(
292
+ False, "--all", "-a", help="Show all users' studios"
293
+ ),
294
+ ):
295
+ """List studios."""
296
+ from .studio.list import list_studios
297
+
298
+ return list_studios(all_users)
299
+
300
+
301
+ @studio_app.command("reset")
302
+ def reset_studio_cmd(
303
+ user: str = typer.Option(
304
+ None, "--user", "-u", help="Reset a different user's studio"
305
+ ),
306
+ ):
307
+ """Reset a stuck studio (admin operation)."""
308
+ from .studio.reset import reset_studio
309
+
310
+ return reset_studio(user)
311
+
312
+
313
+ @studio_app.command("resize")
314
+ def resize_studio_cmd(
315
+ size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
316
+ user: str = typer.Option(
317
+ None, "--user", "-u", help="Resize a different user's studio (admin only)"
318
+ ),
319
+ ):
320
+ """Resize your studio volume (requires detachment)."""
321
+ from .studio.resize import resize_studio
322
+
323
+ return resize_studio(size, user)
@@ -0,0 +1 @@
1
+ """Utility modules for engine and studio commands."""
@@ -0,0 +1,47 @@
1
+ """API request utilities for engine and studio commands."""
2
+
3
+ from typing import Dict, Optional
4
+
5
+ import requests
6
+ import typer
7
+
8
+ from .aws_utils import get_api_url
9
+ from .constants import console
10
+
11
+
12
+ def make_api_request(
13
+ method: str,
14
+ endpoint: str,
15
+ json_data: Optional[Dict] = None,
16
+ params: Optional[Dict] = None,
17
+ ) -> requests.Response:
18
+ """Make an API request with error handling."""
19
+ api_url = get_api_url()
20
+ url = f"{api_url}{endpoint}"
21
+
22
+ try:
23
+ if method == "GET":
24
+ response = requests.get(url, params=params)
25
+ elif method == "POST":
26
+ response = requests.post(url, json=json_data)
27
+ elif method == "DELETE":
28
+ response = requests.delete(url)
29
+ else:
30
+ raise ValueError(f"Unsupported HTTP method: {method}")
31
+
32
+ return response
33
+ except requests.exceptions.RequestException as e:
34
+ console.print(f"[red]❌ API request failed: {e}[/red]")
35
+ raise typer.Exit(1)
36
+
37
+
38
+ def get_user_studio(username: str) -> Optional[Dict]:
39
+ """Get the current user's studio."""
40
+ response = make_api_request("GET", "/studios")
41
+ if response.status_code != 200:
42
+ return None
43
+
44
+ studios = response.json().get("studios", [])
45
+ user_studios = [s for s in studios if s["user"] == username]
46
+
47
+ return user_studios[0] if user_studios else None
@@ -0,0 +1,102 @@
1
+ """AWS-specific utilities for engine and studio commands."""
2
+
3
+ import subprocess
4
+ from typing import Dict, List
5
+
6
+ import boto3
7
+ import typer
8
+ from botocore.exceptions import ClientError, NoCredentialsError
9
+ from rich.prompt import Confirm
10
+
11
+ from .constants import console
12
+
13
+
14
+ def check_aws_sso() -> str:
15
+ """Check AWS SSO status and return username."""
16
+ try:
17
+ sts = boto3.client("sts")
18
+ identity = sts.get_caller_identity()
19
+ # Parse username from assumed role ARN
20
+ # Format: arn:aws:sts::123456789012:assumed-role/AWSReservedSSO_DeveloperAccess_xxxx/username
21
+ arn = identity["Arn"]
22
+ if "assumed-role" in arn:
23
+ username = arn.split("/")[-1]
24
+ return username
25
+ else:
26
+ # Fallback for other auth methods
27
+ return identity["UserId"].split(":")[-1]
28
+ except (NoCredentialsError, ClientError):
29
+ console.print("[red]❌ Not logged in to AWS SSO[/red]")
30
+ console.print("Please run: [cyan]aws sso login[/cyan]")
31
+ if Confirm.ask("Would you like to login now?"):
32
+ try:
33
+ result = subprocess.run(
34
+ ["aws", "sso", "login"],
35
+ capture_output=True,
36
+ text=True,
37
+ check=True,
38
+ )
39
+ if result.returncode == 0:
40
+ console.print("[green]✓ Successfully logged in![/green]")
41
+ return check_aws_sso()
42
+ except subprocess.CalledProcessError as e:
43
+ console.print(f"[red]Login failed: {e}[/red]")
44
+ raise typer.Exit(1)
45
+
46
+
47
+ def get_api_url() -> str:
48
+ """Get Studio Manager API URL from SSM Parameter Store."""
49
+ ssm = boto3.client("ssm", region_name="us-east-1")
50
+ try:
51
+ response = ssm.get_parameter(Name="/dev/studio-manager/api-url")
52
+ return response["Parameter"]["Value"]
53
+ except ClientError as e:
54
+ if e.response["Error"]["Code"] == "ParameterNotFound":
55
+ console.print(
56
+ "[red]❌ API URL parameter not found in SSM Parameter Store[/red]"
57
+ )
58
+ console.print(
59
+ "Please ensure the Studio Manager infrastructure is deployed."
60
+ )
61
+ else:
62
+ console.print(f"[red]❌ Error retrieving API URL: {e}[/red]")
63
+ raise typer.Exit(1)
64
+
65
+
66
+ def _colour_stage(stage: str) -> str:
67
+ """Return colourised stage name for table output."""
68
+ if not stage:
69
+ return "[dim]-[/dim]"
70
+ low = stage.lower()
71
+ if low.startswith("error"):
72
+ return f"[red]{stage}[/red]"
73
+ if low == "finished":
74
+ return f"[green]{stage}[/green]"
75
+ return f"[yellow]{stage}[/yellow]"
76
+
77
+
78
+ def _fetch_init_stages(instance_ids: List[str]) -> Dict[str, str]:
79
+ """Fetch DayhoffInitStage tag for many instances in one call."""
80
+ if not instance_ids:
81
+ return {}
82
+ ec2 = boto3.client("ec2", region_name="us-east-1")
83
+ stages: Dict[str, str] = {}
84
+ try:
85
+ paginator = ec2.get_paginator("describe_instances")
86
+ for page in paginator.paginate(InstanceIds=instance_ids):
87
+ for res in page["Reservations"]:
88
+ for inst in res["Instances"]:
89
+ iid = inst["InstanceId"]
90
+ tag_val = next(
91
+ (
92
+ t["Value"]
93
+ for t in inst.get("Tags", [])
94
+ if t["Key"] == "DayhoffInitStage"
95
+ ),
96
+ None,
97
+ )
98
+ if tag_val:
99
+ stages[iid] = tag_val
100
+ except Exception:
101
+ pass # best-effort
102
+ return stages
@@ -0,0 +1,21 @@
1
+ """Constants used across engine and studio commands."""
2
+
3
+ from rich.console import Console
4
+
5
+ console = Console()
6
+
7
+ # Cost information
8
+ HOURLY_COSTS = {
9
+ "cpu": 0.50, # r6i.2xlarge
10
+ "cpumax": 2.02, # r7i.8xlarge
11
+ "t4": 0.75, # g4dn.2xlarge
12
+ "a10g": 1.50, # g5.2xlarge
13
+ "a100": 21.96, # p4d.24xlarge
14
+ "4_t4": 3.91, # g4dn.12xlarge
15
+ "8_t4": 7.83, # g4dn.metal
16
+ "4_a10g": 6.24, # g5.12xlarge
17
+ "8_a10g": 16.29, # g5.48xlarge
18
+ }
19
+
20
+ # SSH config management
21
+ SSH_MANAGED_COMMENT = "# Managed by dh engine"