dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dayhoff_tools/__init__.py +10 -0
  2. dayhoff_tools/cli/cloud_commands.py +179 -43
  3. dayhoff_tools/cli/engine1/__init__.py +323 -0
  4. dayhoff_tools/cli/engine1/engine_core.py +703 -0
  5. dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
  6. dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
  7. dayhoff_tools/cli/engine1/engine_management.py +505 -0
  8. dayhoff_tools/cli/engine1/shared.py +501 -0
  9. dayhoff_tools/cli/engine1/studio_commands.py +825 -0
  10. dayhoff_tools/cli/engines_studios/__init__.py +6 -0
  11. dayhoff_tools/cli/engines_studios/api_client.py +351 -0
  12. dayhoff_tools/cli/engines_studios/auth.py +144 -0
  13. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
  14. dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
  15. dayhoff_tools/cli/engines_studios/progress.py +260 -0
  16. dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
  17. dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
  18. dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
  19. dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
  20. dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
  21. dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
  22. dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
  23. dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
  25. dayhoff_tools/cli/main.py +106 -7
  26. dayhoff_tools/cli/utility_commands.py +896 -179
  27. dayhoff_tools/deployment/base.py +70 -6
  28. dayhoff_tools/deployment/deploy_aws.py +165 -25
  29. dayhoff_tools/deployment/deploy_gcp.py +78 -5
  30. dayhoff_tools/deployment/deploy_utils.py +20 -7
  31. dayhoff_tools/deployment/job_runner.py +9 -4
  32. dayhoff_tools/deployment/processors.py +230 -418
  33. dayhoff_tools/deployment/swarm.py +47 -12
  34. dayhoff_tools/embedders.py +28 -26
  35. dayhoff_tools/fasta.py +181 -64
  36. dayhoff_tools/warehouse.py +268 -1
  37. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
  38. dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
  39. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
  40. dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
  41. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,136 @@
1
+ """Engine lifecycle commands: start, stop, and terminate."""
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ import typer
6
+ from rich.prompt import Confirm
7
+
8
+ from .shared import (
9
+ HOURLY_COSTS,
10
+ check_aws_sso,
11
+ console,
12
+ format_duration,
13
+ make_api_request,
14
+ parse_launch_time,
15
+ resolve_engine,
16
+ )
17
+
18
+
19
+ def start_engine(
20
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
21
+ ):
22
+ """Start a stopped engine."""
23
+ check_aws_sso()
24
+
25
+ # Get all engines to resolve name
26
+ response = make_api_request("GET", "/engines")
27
+ if response.status_code != 200:
28
+ console.print("[red]❌ Failed to fetch engines[/red]")
29
+ raise typer.Exit(1)
30
+
31
+ engines = response.json().get("engines", [])
32
+ engine = resolve_engine(name_or_id, engines)
33
+
34
+ console.print(f"Starting engine [cyan]{engine['name']}[/cyan]...")
35
+
36
+ response = make_api_request("POST", f"/engines/{engine['instance_id']}/start")
37
+
38
+ if response.status_code == 200:
39
+ data = response.json()
40
+ console.print(f"[green]✓ Engine started successfully![/green]")
41
+ console.print(f"New public IP: {data.get('public_ip', 'Pending...')}")
42
+ else:
43
+ error = response.json().get("error", "Unknown error")
44
+ console.print(f"[red]❌ Failed to start engine: {error}[/red]")
45
+
46
+
47
+ def stop_engine(
48
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
49
+ force: bool = typer.Option(
50
+ False, "--force", "-f", help="Force stop and detach all studios"
51
+ ),
52
+ ):
53
+ """Stop an engine."""
54
+ check_aws_sso()
55
+
56
+ # Get all engines to resolve name
57
+ response = make_api_request("GET", "/engines")
58
+ if response.status_code != 200:
59
+ console.print("[red]❌ Failed to fetch engines[/red]")
60
+ raise typer.Exit(1)
61
+
62
+ engines = response.json().get("engines", [])
63
+ engine = resolve_engine(name_or_id, engines)
64
+
65
+ console.print(f"Stopping engine [cyan]{engine['name']}[/cyan]...")
66
+
67
+ # First attempt without detaching
68
+ response = make_api_request(
69
+ "POST",
70
+ f"/engines/{engine['instance_id']}/stop",
71
+ json_data={"detach_studios": force},
72
+ )
73
+
74
+ if response.status_code == 409 and not force:
75
+ # Engine has attached studios
76
+ data = response.json()
77
+ attached_studios = data.get("attached_studios", [])
78
+
79
+ console.print("\n[yellow]⚠️ This engine has attached studios:[/yellow]")
80
+ for studio in attached_studios:
81
+ console.print(f" • {studio['user']} ({studio['studio_id']})")
82
+
83
+ if Confirm.ask("\nDetach all studios and stop the engine?"):
84
+ response = make_api_request(
85
+ "POST",
86
+ f"/engines/{engine['instance_id']}/stop",
87
+ json_data={"detach_studios": True},
88
+ )
89
+ else:
90
+ console.print("Stop cancelled.")
91
+ return
92
+
93
+ if response.status_code == 200:
94
+ console.print(f"[green]✓ Engine stopped successfully![/green]")
95
+ else:
96
+ error = response.json().get("error", "Unknown error")
97
+ console.print(f"[red]❌ Failed to stop engine: {error}[/red]")
98
+
99
+
100
+ def terminate_engine(
101
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
102
+ ):
103
+ """Permanently terminate an engine."""
104
+ check_aws_sso()
105
+
106
+ # Get all engines to resolve name
107
+ response = make_api_request("GET", "/engines")
108
+ if response.status_code != 200:
109
+ console.print("[red]❌ Failed to fetch engines[/red]")
110
+ raise typer.Exit(1)
111
+
112
+ engines = response.json().get("engines", [])
113
+ engine = resolve_engine(name_or_id, engines)
114
+
115
+ # Calculate cost
116
+ launch_time = parse_launch_time(engine["launch_time"])
117
+ uptime = datetime.now(timezone.utc) - launch_time
118
+ hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
119
+ total_cost = hourly_cost * (uptime.total_seconds() / 3600)
120
+
121
+ console.print(
122
+ f"\n[yellow]⚠️ This will permanently terminate engine '{engine['name']}'[/yellow]"
123
+ )
124
+ console.print(f"Total cost for this session: ${total_cost:.2f}")
125
+
126
+ if not Confirm.ask("\nAre you sure you want to terminate this engine?"):
127
+ console.print("Termination cancelled.")
128
+ return
129
+
130
+ response = make_api_request("DELETE", f"/engines/{engine['instance_id']}")
131
+
132
+ if response.status_code == 200:
133
+ console.print(f"[green]✓ Engine terminated successfully![/green]")
134
+ else:
135
+ error = response.json().get("error", "Unknown error")
136
+ console.print(f"[red]❌ Failed to terminate engine: {error}[/red]")
@@ -0,0 +1,431 @@
1
+ """Engine maintenance commands: coffee, idle timeout, debug, and repair."""
2
+
3
+ import re
4
+ import subprocess
5
+ import time
6
+ from typing import Optional
7
+
8
+ import boto3
9
+ import typer
10
+ from botocore.exceptions import ClientError
11
+ from rich.progress import Progress, SpinnerColumn, TextColumn
12
+ from rich.prompt import Confirm
13
+
14
+ from .shared import check_aws_sso, console, make_api_request, resolve_engine
15
+
16
+
17
+ def coffee(
18
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
19
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
20
+ cancel: bool = typer.Option(
21
+ False, "--cancel", help="Cancel existing coffee lock instead of extending"
22
+ ),
23
+ ):
24
+ """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
25
+ username = check_aws_sso()
26
+
27
+ # Parse duration
28
+ import re
29
+
30
+ if not cancel:
31
+ match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
32
+ if not match or (not match.group(1) and not match.group(2)):
33
+ console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
34
+ console.print("Use format like: 4h, 30m, 2h30m")
35
+ raise typer.Exit(1)
36
+
37
+ hours = int(match.group(1) or 0)
38
+ minutes = int(match.group(2) or 0)
39
+ seconds_total = (hours * 60 + minutes) * 60
40
+ if seconds_total == 0:
41
+ console.print("[red]❌ Duration must be greater than zero[/red]")
42
+ raise typer.Exit(1)
43
+
44
+ # Get all engines to resolve name
45
+ response = make_api_request("GET", "/engines")
46
+ if response.status_code != 200:
47
+ console.print("[red]❌ Failed to fetch engines[/red]")
48
+ raise typer.Exit(1)
49
+
50
+ engines = response.json().get("engines", [])
51
+ engine = resolve_engine(name_or_id, engines)
52
+
53
+ if engine["state"].lower() != "running":
54
+ console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
55
+ raise typer.Exit(1)
56
+
57
+ if cancel:
58
+ console.print(f"Cancelling coffee for [cyan]{engine['name']}[/cyan]…")
59
+ else:
60
+ console.print(
61
+ f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…"
62
+ )
63
+
64
+ # Use SSM to run the engine coffee command
65
+ ssm = boto3.client("ssm", region_name="us-east-1")
66
+ try:
67
+ response = ssm.send_command(
68
+ InstanceIds=[engine["instance_id"]],
69
+ DocumentName="AWS-RunShellScript",
70
+ Parameters={
71
+ "commands": [
72
+ (
73
+ "/usr/local/bin/engine-coffee --cancel"
74
+ if cancel
75
+ else f"/usr/local/bin/engine-coffee {seconds_total}"
76
+ )
77
+ ],
78
+ "executionTimeout": ["60"],
79
+ },
80
+ )
81
+
82
+ command_id = response["Command"]["CommandId"]
83
+
84
+ # Wait for command to complete
85
+ for _ in range(10):
86
+ time.sleep(1)
87
+ result = ssm.get_command_invocation(
88
+ CommandId=command_id,
89
+ InstanceId=engine["instance_id"],
90
+ )
91
+ if result["Status"] in ["Success", "Failed"]:
92
+ break
93
+
94
+ if result["Status"] == "Success":
95
+ if cancel:
96
+ console.print(
97
+ "[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]"
98
+ )
99
+ else:
100
+ console.print(f"[green]✓ Coffee poured for {duration}[/green]")
101
+ console.print(
102
+ "\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
103
+ )
104
+ console.print(
105
+ "[dim]Use coffee for nohup operations or other background tasks.[/dim]"
106
+ )
107
+ else:
108
+ console.print(
109
+ f"[red]❌ Failed to manage coffee: {result.get('StatusDetails', 'Unknown error')}[/red]"
110
+ )
111
+
112
+ except ClientError as e:
113
+ console.print(f"[red]❌ Failed to manage coffee: {e}[/red]")
114
+
115
+
116
+ def idle_timeout_cmd(
117
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
118
+ set: Optional[str] = typer.Option(
119
+ None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
120
+ ),
121
+ slack: Optional[str] = typer.Option(
122
+ None, "--slack", help="Set Slack notifications: none, default, all"
123
+ ),
124
+ ):
125
+ """Show or set engine idle-detector settings."""
126
+ check_aws_sso()
127
+
128
+ # Resolve engine
129
+ response = make_api_request("GET", "/engines")
130
+ if response.status_code != 200:
131
+ console.print("[red]❌ Failed to fetch engines[/red]")
132
+ raise typer.Exit(1)
133
+
134
+ engines = response.json().get("engines", [])
135
+ engine = resolve_engine(name_or_id, engines)
136
+
137
+ ssm = boto3.client("ssm", region_name="us-east-1")
138
+
139
+ # Handle slack notifications change
140
+ if slack:
141
+ slack = slack.lower()
142
+ if slack not in ["none", "default", "all"]:
143
+ console.print("[red]❌ Invalid slack option. Use: none, default, all[/red]")
144
+ raise typer.Exit(1)
145
+
146
+ console.print(f"Setting Slack notifications to [bold]{slack}[/bold]...")
147
+
148
+ if slack == "none":
149
+ settings = {
150
+ "SLACK_NOTIFY_WARNINGS": "false",
151
+ "SLACK_NOTIFY_IDLE_START": "false",
152
+ "SLACK_NOTIFY_IDLE_END": "false",
153
+ "SLACK_NOTIFY_SHUTDOWN": "false",
154
+ }
155
+ elif slack == "default":
156
+ settings = {
157
+ "SLACK_NOTIFY_WARNINGS": "true",
158
+ "SLACK_NOTIFY_IDLE_START": "false",
159
+ "SLACK_NOTIFY_IDLE_END": "false",
160
+ "SLACK_NOTIFY_SHUTDOWN": "true",
161
+ }
162
+ else: # all
163
+ settings = {
164
+ "SLACK_NOTIFY_WARNINGS": "true",
165
+ "SLACK_NOTIFY_IDLE_START": "true",
166
+ "SLACK_NOTIFY_IDLE_END": "true",
167
+ "SLACK_NOTIFY_SHUTDOWN": "true",
168
+ }
169
+
170
+ commands = []
171
+ for key, value in settings.items():
172
+ # Use a robust sed command that adds the line if it doesn't exist
173
+ commands.append(
174
+ f"grep -q '^{key}=' /etc/engine.env && sudo sed -i 's|^{key}=.*|{key}={value}|' /etc/engine.env || echo '{key}={value}' | sudo tee -a /etc/engine.env > /dev/null"
175
+ )
176
+
177
+ # Instead of restarting service, send SIGHUP to reload config
178
+ commands.append(
179
+ "sudo pkill -HUP -f engine-idle-detector.py || sudo systemctl restart engine-idle-detector.service"
180
+ )
181
+
182
+ resp = ssm.send_command(
183
+ InstanceIds=[engine["instance_id"]],
184
+ DocumentName="AWS-RunShellScript",
185
+ Parameters={"commands": commands, "executionTimeout": ["60"]},
186
+ )
187
+ cid = resp["Command"]["CommandId"]
188
+ time.sleep(2) # Give it a moment to process
189
+ console.print(f"[green]✓ Slack notifications updated to '{slack}'[/green]")
190
+ console.print("[dim]Note: Settings updated without resetting idle timer[/dim]")
191
+
192
+ # Handle setting new timeout value
193
+ if set is not None:
194
+ m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
195
+ if not m:
196
+ console.print(
197
+ "[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]"
198
+ )
199
+ raise typer.Exit(1)
200
+ hours = int(m.group(1) or 0)
201
+ minutes = int(m.group(2) or 0)
202
+ seconds = hours * 3600 + minutes * 60
203
+ if seconds == 0:
204
+ console.print("[red]❌ Duration must be greater than zero[/red]")
205
+ raise typer.Exit(1)
206
+
207
+ console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
208
+
209
+ cmd = (
210
+ "sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
211
+ f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
212
+ "sudo systemctl restart engine-idle-detector.service"
213
+ )
214
+
215
+ resp = ssm.send_command(
216
+ InstanceIds=[engine["instance_id"]],
217
+ DocumentName="AWS-RunShellScript",
218
+ Parameters={"commands": [cmd], "executionTimeout": ["60"]},
219
+ )
220
+ cid = resp["Command"]["CommandId"]
221
+ time.sleep(2)
222
+ console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
223
+
224
+ # If no action was specified, show current timeout
225
+ if set is None and slack is None:
226
+ # Show current timeout setting
227
+ resp = ssm.send_command(
228
+ InstanceIds=[engine["instance_id"]],
229
+ DocumentName="AWS-RunShellScript",
230
+ Parameters={
231
+ "commands": [
232
+ "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"
233
+ ],
234
+ "executionTimeout": ["10"],
235
+ },
236
+ )
237
+ cid = resp["Command"]["CommandId"]
238
+ time.sleep(1)
239
+ inv = ssm.get_command_invocation(
240
+ CommandId=cid, InstanceId=engine["instance_id"]
241
+ )
242
+ if inv["Status"] == "Success":
243
+ line = inv["StandardOutputContent"].strip()
244
+ secs = int(line.split("=")[1]) if "=" in line else 1800
245
+ console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
246
+ else:
247
+ console.print("[red]❌ Could not retrieve idle timeout[/red]")
248
+ return
249
+
250
+
251
+ def debug_engine(
252
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
253
+ ):
254
+ """Debug engine bootstrap status and files."""
255
+ check_aws_sso()
256
+
257
+ # Resolve engine
258
+ response = make_api_request("GET", "/engines")
259
+ if response.status_code != 200:
260
+ console.print("[red]❌ Failed to fetch engines[/red]")
261
+ raise typer.Exit(1)
262
+
263
+ engines = response.json().get("engines", [])
264
+ engine = resolve_engine(name_or_id, engines)
265
+
266
+ console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
267
+
268
+ ssm = boto3.client("ssm", region_name="us-east-1")
269
+
270
+ # Check multiple files and systemd status
271
+ checks = [
272
+ (
273
+ "Stage file",
274
+ "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'",
275
+ ),
276
+ (
277
+ "Health file",
278
+ "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'",
279
+ ),
280
+ (
281
+ "Sentinel file",
282
+ "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'",
283
+ ),
284
+ (
285
+ "Setup service",
286
+ "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'",
287
+ ),
288
+ (
289
+ "Bootstrap log tail",
290
+ "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'",
291
+ ),
292
+ ("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
293
+ ]
294
+
295
+ for name, cmd in checks:
296
+ try:
297
+ resp = ssm.send_command(
298
+ InstanceIds=[engine["instance_id"]],
299
+ DocumentName="AWS-RunShellScript",
300
+ Parameters={"commands": [cmd], "executionTimeout": ["10"]},
301
+ )
302
+ cid = resp["Command"]["CommandId"]
303
+ time.sleep(1)
304
+ inv = ssm.get_command_invocation(
305
+ CommandId=cid, InstanceId=engine["instance_id"]
306
+ )
307
+
308
+ if inv["Status"] == "Success":
309
+ output = inv["StandardOutputContent"].strip()
310
+ console.print(f"[cyan]{name}:[/cyan]")
311
+ console.print(f"[dim]{output}[/dim]\n")
312
+ else:
313
+ console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
314
+
315
+ except Exception as e:
316
+ console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")
317
+
318
+
319
+ def repair_engine(
320
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
321
+ ):
322
+ """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
323
+ check_aws_sso()
324
+
325
+ # Get all engines to resolve name
326
+ response = make_api_request("GET", "/engines")
327
+ if response.status_code != 200:
328
+ console.print("[red]❌ Failed to fetch engines[/red]")
329
+ raise typer.Exit(1)
330
+
331
+ engines = response.json().get("engines", [])
332
+ engine = resolve_engine(name_or_id, engines)
333
+
334
+ if engine["state"].lower() != "running":
335
+ console.print(
336
+ f"[yellow]⚠️ Engine is {engine['state']}. Must be running to repair.[/yellow]"
337
+ )
338
+ if engine["state"].lower() == "stopped" and Confirm.ask(
339
+ "Start the engine first?"
340
+ ):
341
+ response = make_api_request(
342
+ "POST", f"/engines/{engine['instance_id']}/start"
343
+ )
344
+ if response.status_code != 200:
345
+ console.print("[red]❌ Failed to start engine[/red]")
346
+ raise typer.Exit(1)
347
+ console.print("[green]✓ Engine started[/green]")
348
+ console.print("Waiting for engine to become ready...")
349
+ time.sleep(30) # Give it time to boot
350
+ else:
351
+ raise typer.Exit(1)
352
+
353
+ console.print(f"[bold]Repairing engine [cyan]{engine['name']}[/cyan][/bold]")
354
+ console.print(
355
+ "[dim]This will restore bootstrap state and ensure all services are running[/dim]\n"
356
+ )
357
+
358
+ ssm = boto3.client("ssm", region_name="us-east-1")
359
+
360
+ # Repair commands
361
+ repair_commands = [
362
+ # Create necessary directories
363
+ "sudo mkdir -p /opt/dayhoff /opt/dayhoff/state /opt/dayhoff/scripts",
364
+ # Download scripts from S3 if missing
365
+ "source /etc/engine.env && sudo aws s3 sync s3://${VM_SCRIPTS_BUCKET}/ /opt/dayhoff/scripts/ --exclude '*' --include '*.sh' --quiet",
366
+ "sudo chmod +x /opt/dayhoff/scripts/*.sh 2>/dev/null || true",
367
+ # Restore bootstrap state
368
+ "sudo touch /opt/dayhoff/first_boot_complete.sentinel",
369
+ "echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
370
+ # Ensure SSM agent is running
371
+ "sudo systemctl restart amazon-ssm-agent 2>/dev/null || true",
372
+ # Restart idle detector (service only)
373
+ "sudo systemctl restart engine-idle-detector.service 2>/dev/null || true",
374
+ # Report status
375
+ "echo '=== Repair Complete ===' && echo 'Sentinel: ' && ls -la /opt/dayhoff/first_boot_complete.sentinel",
376
+ "echo 'Stage: ' && cat /opt/dayhoff/state/engine-init.stage",
377
+ "echo 'Scripts: ' && ls /opt/dayhoff/scripts/*.sh 2>/dev/null | wc -l",
378
+ ]
379
+
380
+ try:
381
+ with Progress(
382
+ SpinnerColumn(),
383
+ TextColumn("[progress.description]{task.description}"),
384
+ transient=True,
385
+ ) as progress:
386
+ task = progress.add_task("Repairing engine...", total=None)
387
+
388
+ response = ssm.send_command(
389
+ InstanceIds=[engine["instance_id"]],
390
+ DocumentName="AWS-RunShellScript",
391
+ Parameters={
392
+ "commands": repair_commands,
393
+ "executionTimeout": ["60"],
394
+ },
395
+ )
396
+
397
+ command_id = response["Command"]["CommandId"]
398
+
399
+ # Wait for command
400
+ for _ in range(60):
401
+ time.sleep(1)
402
+ result = ssm.get_command_invocation(
403
+ CommandId=command_id,
404
+ InstanceId=engine["instance_id"],
405
+ )
406
+ if result["Status"] in ["Success", "Failed"]:
407
+ break
408
+
409
+ if result["Status"] == "Success":
410
+ output = result["StandardOutputContent"]
411
+ console.print("[green]✓ Engine repaired successfully![/green]\n")
412
+
413
+ # Show repair results
414
+ if "=== Repair Complete ===" in output:
415
+ repair_section = output.split("=== Repair Complete ===")[1].strip()
416
+ console.print("[bold]Repair Results:[/bold]")
417
+ console.print(repair_section)
418
+
419
+ console.print(
420
+ "\n[dim]You should now be able to attach studios to this engine.[/dim]"
421
+ )
422
+ else:
423
+ console.print(
424
+ f"[red]❌ Repair failed: {result.get('StandardErrorContent', 'Unknown error')}[/red]"
425
+ )
426
+ console.print(
427
+ "\n[yellow]Try running 'dh engine debug' for more information.[/yellow]"
428
+ )
429
+
430
+ except Exception as e:
431
+ console.print(f"[red]❌ Failed to repair engine: {e}[/red]")