dayhoff-tools 1.9.26__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. dayhoff_tools/cli/engine/__init__.py +1 -323
  2. dayhoff_tools/cli/engine/coffee.py +110 -0
  3. dayhoff_tools/cli/engine/config_ssh.py +113 -0
  4. dayhoff_tools/cli/engine/debug.py +79 -0
  5. dayhoff_tools/cli/engine/gami.py +160 -0
  6. dayhoff_tools/cli/engine/idle.py +148 -0
  7. dayhoff_tools/cli/engine/launch.py +101 -0
  8. dayhoff_tools/cli/engine/list.py +116 -0
  9. dayhoff_tools/cli/engine/repair.py +128 -0
  10. dayhoff_tools/cli/engine/resize.py +195 -0
  11. dayhoff_tools/cli/engine/ssh.py +62 -0
  12. dayhoff_tools/cli/engine/{engine_core.py → status.py} +6 -201
  13. dayhoff_tools/cli/engine_studio_commands.py +323 -0
  14. dayhoff_tools/cli/engine_studio_utils/__init__.py +1 -0
  15. dayhoff_tools/cli/engine_studio_utils/api_utils.py +47 -0
  16. dayhoff_tools/cli/engine_studio_utils/aws_utils.py +102 -0
  17. dayhoff_tools/cli/engine_studio_utils/constants.py +21 -0
  18. dayhoff_tools/cli/engine_studio_utils/formatting.py +210 -0
  19. dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +141 -0
  20. dayhoff_tools/cli/main.py +1 -2
  21. dayhoff_tools/cli/studio/__init__.py +1 -0
  22. dayhoff_tools/cli/studio/attach.py +314 -0
  23. dayhoff_tools/cli/studio/create.py +48 -0
  24. dayhoff_tools/cli/studio/delete.py +71 -0
  25. dayhoff_tools/cli/studio/detach.py +56 -0
  26. dayhoff_tools/cli/studio/list.py +81 -0
  27. dayhoff_tools/cli/studio/reset.py +90 -0
  28. dayhoff_tools/cli/studio/resize.py +134 -0
  29. dayhoff_tools/cli/studio/status.py +78 -0
  30. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/METADATA +1 -1
  31. dayhoff_tools-1.10.1.dist-info/RECORD +61 -0
  32. dayhoff_tools/cli/engine/engine_maintenance.py +0 -431
  33. dayhoff_tools/cli/engine/engine_management.py +0 -505
  34. dayhoff_tools/cli/engine/shared.py +0 -501
  35. dayhoff_tools/cli/engine/studio_commands.py +0 -825
  36. dayhoff_tools-1.9.26.dist-info/RECORD +0 -39
  37. /dayhoff_tools/cli/engine/{engine_lifecycle.py → lifecycle.py} +0 -0
  38. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/WHEEL +0 -0
  39. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/entry_points.txt +0 -0
@@ -1,323 +1 @@
1
- """Engine and Studio management commands for DHT CLI."""
2
-
3
- from typing import Optional
4
-
5
- import typer
6
-
7
- # Initialize Typer apps
8
- engine_app = typer.Typer(help="Manage compute engines for development.")
9
- studio_app = typer.Typer(help="Manage persistent development studios.")
10
-
11
- # Use lazy loading pattern similar to main.py swarm commands
12
- # Import functions only when commands are actually called
13
-
14
-
15
- # Engine commands
16
- @engine_app.command("launch")
17
- def launch_engine_cmd(
18
- name: str = typer.Argument(help="Name for the new engine"),
19
- engine_type: str = typer.Option(
20
- "cpu",
21
- "--type",
22
- "-t",
23
- help="Engine type: cpu, cpumax, t4, a10g, a100, 4_t4, 8_t4, 4_a10g, 8_a10g",
24
- ),
25
- user: str = typer.Option(None, "--user", "-u", help="Override username"),
26
- boot_disk_size: int = typer.Option(
27
- None,
28
- "--size",
29
- "-s",
30
- help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
31
- ),
32
- availability_zone: str = typer.Option(
33
- None,
34
- "--az",
35
- help="Prefer a specific Availability Zone (e.g., us-east-1b). If omitted the service will try all public subnets.",
36
- ),
37
- ):
38
- """Launch a new engine instance."""
39
- from .engine_core import launch_engine
40
-
41
- return launch_engine(name, engine_type, user, boot_disk_size, availability_zone)
42
-
43
-
44
- @engine_app.command("list")
45
- def list_engines_cmd(
46
- user: str = typer.Option(None, "--user", "-u", help="Filter by user"),
47
- running_only: bool = typer.Option(
48
- False, "--running", help="Show only running engines"
49
- ),
50
- stopped_only: bool = typer.Option(
51
- False, "--stopped", help="Show only stopped engines"
52
- ),
53
- detailed: bool = typer.Option(
54
- False, "--detailed", "-d", help="Show detailed status (slower)"
55
- ),
56
- ):
57
- """List engines (shows all engines by default)."""
58
- from .engine_core import list_engines
59
-
60
- return list_engines(user, running_only, stopped_only, detailed)
61
-
62
-
63
- @engine_app.command("status")
64
- def engine_status_cmd(
65
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
66
- detailed: bool = typer.Option(
67
- False, "--detailed", "-d", help="Show detailed status (slower)"
68
- ),
69
- show_log: bool = typer.Option(
70
- False, "--show-log", help="Show bootstrap log (requires --detailed)"
71
- ),
72
- ):
73
- """Show engine status and information."""
74
- from .engine_core import engine_status
75
-
76
- return engine_status(name_or_id, detailed, show_log)
77
-
78
-
79
- @engine_app.command("start")
80
- def start_engine_cmd(
81
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
82
- ):
83
- """Start a stopped engine."""
84
- from .engine_lifecycle import start_engine
85
-
86
- return start_engine(name_or_id)
87
-
88
-
89
- @engine_app.command("stop")
90
- def stop_engine_cmd(
91
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
92
- force: bool = typer.Option(
93
- False, "--force", "-f", help="Force stop and detach all studios"
94
- ),
95
- ):
96
- """Stop an engine."""
97
- from .engine_lifecycle import stop_engine
98
-
99
- return stop_engine(name_or_id, force)
100
-
101
-
102
- @engine_app.command("terminate")
103
- def terminate_engine_cmd(
104
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
105
- ):
106
- """Permanently terminate an engine."""
107
- from .engine_lifecycle import terminate_engine
108
-
109
- return terminate_engine(name_or_id)
110
-
111
-
112
- @engine_app.command("ssh")
113
- def ssh_engine_cmd(
114
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
115
- admin: bool = typer.Option(
116
- False, "--admin", help="Connect as ec2-user instead of the engine owner user"
117
- ),
118
- idle_timeout: int = typer.Option(
119
- 600,
120
- "--idle-timeout",
121
- help="Idle timeout (seconds) for the SSM port-forward (0 = disable)",
122
- ),
123
- ):
124
- """Connect to an engine via SSH."""
125
- from .engine_management import ssh_engine
126
-
127
- return ssh_engine(name_or_id, admin, idle_timeout)
128
-
129
-
130
- @engine_app.command("config-ssh")
131
- def config_ssh_cmd(
132
- clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
133
- all_engines: bool = typer.Option(
134
- False, "--all", "-a", help="Include all engines from all users"
135
- ),
136
- admin: bool = typer.Option(
137
- False,
138
- "--admin",
139
- help="Generate entries that use ec2-user instead of per-engine owner user",
140
- ),
141
- ):
142
- """Update SSH config with available engines."""
143
- from .engine_management import config_ssh
144
-
145
- return config_ssh(clean, all_engines, admin)
146
-
147
-
148
- @engine_app.command("resize")
149
- def resize_engine_cmd(
150
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
151
- size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
152
- online: bool = typer.Option(
153
- False,
154
- "--online",
155
- help="Resize while running (requires manual filesystem expansion)",
156
- ),
157
- force: bool = typer.Option(
158
- False, "--force", "-f", help="Force resize and detach all studios"
159
- ),
160
- ):
161
- """Resize an engine's boot disk."""
162
- from .engine_management import resize_engine
163
-
164
- return resize_engine(name_or_id, size, online, force)
165
-
166
-
167
- @engine_app.command("gami")
168
- def create_ami_cmd(
169
- name_or_id: str = typer.Argument(
170
- help="Engine name or instance ID to create AMI from"
171
- ),
172
- ):
173
- """Create a 'Golden AMI' from a running engine."""
174
- from .engine_management import create_ami
175
-
176
- return create_ami(name_or_id)
177
-
178
-
179
- @engine_app.command("coffee")
180
- def coffee_cmd(
181
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
182
- duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
183
- cancel: bool = typer.Option(
184
- False, "--cancel", help="Cancel existing coffee lock instead of extending"
185
- ),
186
- ):
187
- """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
188
- from .engine_maintenance import coffee
189
-
190
- return coffee(name_or_id, duration, cancel)
191
-
192
-
193
- @engine_app.command("idle")
194
- def idle_timeout_cmd_wrapper(
195
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
196
- set: Optional[str] = typer.Option(
197
- None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
198
- ),
199
- slack: Optional[str] = typer.Option(
200
- None, "--slack", help="Set Slack notifications: none, default, all"
201
- ),
202
- ):
203
- """Show or set engine idle-detector settings."""
204
- from .engine_maintenance import idle_timeout_cmd
205
-
206
- return idle_timeout_cmd(name_or_id=name_or_id, set=set, slack=slack)
207
-
208
-
209
- @engine_app.command("debug")
210
- def debug_engine_cmd(
211
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
212
- ):
213
- """Debug engine bootstrap status and files."""
214
- from .engine_maintenance import debug_engine
215
-
216
- return debug_engine(name_or_id)
217
-
218
-
219
- @engine_app.command("repair")
220
- def repair_engine_cmd(
221
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
222
- ):
223
- """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
224
- from .engine_maintenance import repair_engine
225
-
226
- return repair_engine(name_or_id)
227
-
228
-
229
- # Studio commands
230
- @studio_app.command("create")
231
- def create_studio_cmd(
232
- size_gb: int = typer.Option(50, "--size", "-s", help="Studio size in GB"),
233
- ):
234
- """Create a new studio for the current user."""
235
- from .studio_commands import create_studio
236
-
237
- return create_studio(size_gb)
238
-
239
-
240
- @studio_app.command("status")
241
- def studio_status_cmd(
242
- user: str = typer.Option(
243
- None, "--user", "-u", help="Check status for a different user (admin only)"
244
- ),
245
- ):
246
- """Show status of your studio."""
247
- from .studio_commands import studio_status
248
-
249
- return studio_status(user)
250
-
251
-
252
- @studio_app.command("attach")
253
- def attach_studio_cmd(
254
- engine_name_or_id: str = typer.Argument(help="Engine name or instance ID"),
255
- user: str = typer.Option(
256
- None, "--user", "-u", help="Attach a different user's studio (admin only)"
257
- ),
258
- ):
259
- """Attach your studio to an engine."""
260
- from .studio_commands import attach_studio
261
-
262
- return attach_studio(engine_name_or_id, user)
263
-
264
-
265
- @studio_app.command("detach")
266
- def detach_studio_cmd(
267
- user: str = typer.Option(
268
- None, "--user", "-u", help="Detach a different user's studio (admin only)"
269
- ),
270
- ):
271
- """Detach your studio from its current engine."""
272
- from .studio_commands import detach_studio
273
-
274
- return detach_studio(user)
275
-
276
-
277
- @studio_app.command("delete")
278
- def delete_studio_cmd(
279
- user: str = typer.Option(
280
- None, "--user", "-u", help="Delete a different user's studio (admin only)"
281
- ),
282
- ):
283
- """Delete your studio permanently."""
284
- from .studio_commands import delete_studio
285
-
286
- return delete_studio(user)
287
-
288
-
289
- @studio_app.command("list")
290
- def list_studios_cmd(
291
- all_users: bool = typer.Option(
292
- False, "--all", "-a", help="Show all users' studios"
293
- ),
294
- ):
295
- """List studios."""
296
- from .studio_commands import list_studios
297
-
298
- return list_studios(all_users)
299
-
300
-
301
- @studio_app.command("reset")
302
- def reset_studio_cmd(
303
- user: str = typer.Option(
304
- None, "--user", "-u", help="Reset a different user's studio"
305
- ),
306
- ):
307
- """Reset a stuck studio (admin operation)."""
308
- from .studio_commands import reset_studio
309
-
310
- return reset_studio(user)
311
-
312
-
313
- @studio_app.command("resize")
314
- def resize_studio_cmd(
315
- size: int = typer.Option(..., "--size", "-s", help="New size in GB"),
316
- user: str = typer.Option(
317
- None, "--user", "-u", help="Resize a different user's studio (admin only)"
318
- ),
319
- ):
320
- """Resize your studio volume (requires detachment)."""
321
- from .studio_commands import resize_studio
322
-
323
- return resize_studio(size, user)
1
+ """Engine management commands."""
@@ -0,0 +1,110 @@
1
+ """Engine coffee command."""
2
+
3
+ import re
4
+ import time
5
+
6
+ import boto3
7
+ import typer
8
+ from botocore.exceptions import ClientError
9
+
10
+ from ..engine_studio_utils.api_utils import make_api_request
11
+ from ..engine_studio_utils.aws_utils import check_aws_sso
12
+ from ..engine_studio_utils.constants import console
13
+ from ..engine_studio_utils.formatting import resolve_engine
14
+
15
+
16
+ def coffee(
17
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
19
+ cancel: bool = typer.Option(
20
+ False, "--cancel", help="Cancel existing coffee lock instead of extending"
21
+ ),
22
+ ):
23
+ """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
24
+ username = check_aws_sso()
25
+
26
+ # Parse duration
27
+ if not cancel:
28
+ match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
29
+ if not match or (not match.group(1) and not match.group(2)):
30
+ console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
31
+ console.print("Use format like: 4h, 30m, 2h30m")
32
+ raise typer.Exit(1)
33
+
34
+ hours = int(match.group(1) or 0)
35
+ minutes = int(match.group(2) or 0)
36
+ seconds_total = (hours * 60 + minutes) * 60
37
+ if seconds_total == 0:
38
+ console.print("[red]❌ Duration must be greater than zero[/red]")
39
+ raise typer.Exit(1)
40
+
41
+ # Get all engines to resolve name
42
+ response = make_api_request("GET", "/engines")
43
+ if response.status_code != 200:
44
+ console.print("[red]❌ Failed to fetch engines[/red]")
45
+ raise typer.Exit(1)
46
+
47
+ engines = response.json().get("engines", [])
48
+ engine = resolve_engine(name_or_id, engines)
49
+
50
+ if engine["state"].lower() != "running":
51
+ console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
52
+ raise typer.Exit(1)
53
+
54
+ if cancel:
55
+ console.print(f"Cancelling coffee for [cyan]{engine['name']}[/cyan]…")
56
+ else:
57
+ console.print(
58
+ f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…"
59
+ )
60
+
61
+ # Use SSM to run the engine coffee command
62
+ ssm = boto3.client("ssm", region_name="us-east-1")
63
+ try:
64
+ response = ssm.send_command(
65
+ InstanceIds=[engine["instance_id"]],
66
+ DocumentName="AWS-RunShellScript",
67
+ Parameters={
68
+ "commands": [
69
+ (
70
+ "/usr/local/bin/engine-coffee --cancel"
71
+ if cancel
72
+ else f"/usr/local/bin/engine-coffee {seconds_total}"
73
+ )
74
+ ],
75
+ "executionTimeout": ["60"],
76
+ },
77
+ )
78
+
79
+ command_id = response["Command"]["CommandId"]
80
+
81
+ # Wait for command to complete
82
+ for _ in range(10):
83
+ time.sleep(1)
84
+ result = ssm.get_command_invocation(
85
+ CommandId=command_id,
86
+ InstanceId=engine["instance_id"],
87
+ )
88
+ if result["Status"] in ["Success", "Failed"]:
89
+ break
90
+
91
+ if result["Status"] == "Success":
92
+ if cancel:
93
+ console.print(
94
+ "[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]"
95
+ )
96
+ else:
97
+ console.print(f"[green]✓ Coffee poured for {duration}[/green]")
98
+ console.print(
99
+ "\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
100
+ )
101
+ console.print(
102
+ "[dim]Use coffee for nohup operations or other background tasks.[/dim]"
103
+ )
104
+ else:
105
+ console.print(
106
+ f"[red]❌ Failed to manage coffee: {result.get('StatusDetails', 'Unknown error')}[/red]"
107
+ )
108
+
109
+ except ClientError as e:
110
+ console.print(f"[red]❌ Failed to manage coffee: {e}[/red]")
@@ -0,0 +1,113 @@
1
+ """Engine config-ssh command."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+
7
+ from ..engine_studio_utils.api_utils import make_api_request
8
+ from ..engine_studio_utils.aws_utils import check_aws_sso
9
+ from ..engine_studio_utils.constants import SSH_MANAGED_COMMENT, console
10
+ from ..engine_studio_utils.ssh_utils import check_session_manager_plugin
11
+
12
+
13
+ def config_ssh(
14
+ clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
15
+ all_engines: bool = typer.Option(
16
+ False, "--all", "-a", help="Include all engines from all users"
17
+ ),
18
+ admin: bool = typer.Option(
19
+ False,
20
+ "--admin",
21
+ help="Generate entries that use ec2-user instead of per-engine owner user",
22
+ ),
23
+ ):
24
+ """Update SSH config with available engines."""
25
+ username = check_aws_sso()
26
+
27
+ # Only check for Session Manager Plugin if we're not just cleaning
28
+ if not clean and not check_session_manager_plugin():
29
+ raise typer.Exit(1)
30
+
31
+ if clean:
32
+ console.print("Removing all managed SSH entries...")
33
+ else:
34
+ if all_engines:
35
+ console.print("Updating SSH config with all running engines...")
36
+ else:
37
+ console.print(
38
+ f"Updating SSH config with running engines for [cyan]{username}[/cyan] and [cyan]shared[/cyan]..."
39
+ )
40
+
41
+ # Get all engines
42
+ response = make_api_request("GET", "/engines")
43
+ if response.status_code != 200:
44
+ console.print("[red]❌ Failed to fetch engines[/red]")
45
+ raise typer.Exit(1)
46
+
47
+ engines = response.json().get("engines", [])
48
+ running_engines = [e for e in engines if e["state"].lower() == "running"]
49
+
50
+ # Filter engines based on options
51
+ if not all_engines:
52
+ # Show only current user's engines and shared engines
53
+ running_engines = [
54
+ e for e in running_engines if e["user"] == username or e["user"] == "shared"
55
+ ]
56
+
57
+ # Read existing config
58
+ config_path = Path.home() / ".ssh" / "config"
59
+ config_path.parent.mkdir(mode=0o700, exist_ok=True)
60
+
61
+ if config_path.exists():
62
+ content = config_path.read_text()
63
+ lines = content.splitlines()
64
+ else:
65
+ content = ""
66
+ lines = []
67
+
68
+ # Remove old managed entries
69
+ new_lines = []
70
+ skip_until_next_host = False
71
+ for line in lines:
72
+ if SSH_MANAGED_COMMENT in line:
73
+ skip_until_next_host = True
74
+ elif line.strip().startswith("Host ") and skip_until_next_host:
75
+ skip_until_next_host = False
76
+ # Check if this is a managed host
77
+ if SSH_MANAGED_COMMENT not in line:
78
+ new_lines.append(line)
79
+ elif not skip_until_next_host:
80
+ new_lines.append(line)
81
+
82
+ # Add new entries if not cleaning
83
+ if not clean:
84
+ for engine in running_engines:
85
+ # Determine ssh user based on --admin flag
86
+ ssh_user = "ec2-user" if admin else username
87
+ new_lines.extend(
88
+ [
89
+ "",
90
+ f"Host {engine['name']} {SSH_MANAGED_COMMENT}",
91
+ f" HostName {engine['instance_id']}",
92
+ f" User {ssh_user}",
93
+ f" ProxyCommand sh -c \"AWS_SSM_IDLE_TIMEOUT=600 aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'\"",
94
+ ]
95
+ )
96
+
97
+ # Write back
98
+ config_path.write_text("\n".join(new_lines))
99
+ config_path.chmod(0o600)
100
+
101
+ if clean:
102
+ console.print("[green]✓ Removed all managed SSH entries[/green]")
103
+ else:
104
+ console.print(
105
+ f"[green]✓ Updated SSH config with {len(running_engines)} engines[/green]"
106
+ )
107
+ for engine in running_engines:
108
+ user_display = (
109
+ f"[dim]({engine['user']})[/dim]" if engine["user"] != username else ""
110
+ )
111
+ console.print(
112
+ f" • {engine['name']} → {engine['instance_id']} {user_display}"
113
+ )
@@ -0,0 +1,79 @@
1
+ """Engine debug command."""
2
+
3
+ import time
4
+
5
+ import boto3
6
+ import typer
7
+
8
+ from ..engine_studio_utils.api_utils import make_api_request
9
+ from ..engine_studio_utils.aws_utils import check_aws_sso
10
+ from ..engine_studio_utils.constants import console
11
+ from ..engine_studio_utils.formatting import resolve_engine
12
+
13
+
14
+ def debug_engine(
15
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
16
+ ):
17
+ """Debug engine bootstrap status and files."""
18
+ check_aws_sso()
19
+
20
+ # Resolve engine
21
+ response = make_api_request("GET", "/engines")
22
+ if response.status_code != 200:
23
+ console.print("[red]❌ Failed to fetch engines[/red]")
24
+ raise typer.Exit(1)
25
+
26
+ engines = response.json().get("engines", [])
27
+ engine = resolve_engine(name_or_id, engines)
28
+
29
+ console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
30
+
31
+ ssm = boto3.client("ssm", region_name="us-east-1")
32
+
33
+ # Check multiple files and systemd status
34
+ checks = [
35
+ (
36
+ "Stage file",
37
+ "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'",
38
+ ),
39
+ (
40
+ "Health file",
41
+ "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'",
42
+ ),
43
+ (
44
+ "Sentinel file",
45
+ "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'",
46
+ ),
47
+ (
48
+ "Setup service",
49
+ "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'",
50
+ ),
51
+ (
52
+ "Bootstrap log tail",
53
+ "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'",
54
+ ),
55
+ ("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
56
+ ]
57
+
58
+ for name, cmd in checks:
59
+ try:
60
+ resp = ssm.send_command(
61
+ InstanceIds=[engine["instance_id"]],
62
+ DocumentName="AWS-RunShellScript",
63
+ Parameters={"commands": [cmd], "executionTimeout": ["10"]},
64
+ )
65
+ cid = resp["Command"]["CommandId"]
66
+ time.sleep(1)
67
+ inv = ssm.get_command_invocation(
68
+ CommandId=cid, InstanceId=engine["instance_id"]
69
+ )
70
+
71
+ if inv["Status"] == "Success":
72
+ output = inv["StandardOutputContent"].strip()
73
+ console.print(f"[cyan]{name}:[/cyan]")
74
+ console.print(f"[dim]{output}[/dim]\n")
75
+ else:
76
+ console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
77
+
78
+ except Exception as e:
79
+ console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")