dayhoff-tools 1.9.26__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. dayhoff_tools/cli/engine/__init__.py +1 -323
  2. dayhoff_tools/cli/engine/coffee.py +110 -0
  3. dayhoff_tools/cli/engine/config_ssh.py +113 -0
  4. dayhoff_tools/cli/engine/debug.py +79 -0
  5. dayhoff_tools/cli/engine/gami.py +160 -0
  6. dayhoff_tools/cli/engine/idle.py +148 -0
  7. dayhoff_tools/cli/engine/launch.py +101 -0
  8. dayhoff_tools/cli/engine/list.py +116 -0
  9. dayhoff_tools/cli/engine/repair.py +128 -0
  10. dayhoff_tools/cli/engine/resize.py +195 -0
  11. dayhoff_tools/cli/engine/ssh.py +62 -0
  12. dayhoff_tools/cli/engine/{engine_core.py → status.py} +6 -201
  13. dayhoff_tools/cli/engine_studio_commands.py +323 -0
  14. dayhoff_tools/cli/engine_studio_utils/__init__.py +1 -0
  15. dayhoff_tools/cli/engine_studio_utils/api_utils.py +47 -0
  16. dayhoff_tools/cli/engine_studio_utils/aws_utils.py +102 -0
  17. dayhoff_tools/cli/engine_studio_utils/constants.py +21 -0
  18. dayhoff_tools/cli/engine_studio_utils/formatting.py +210 -0
  19. dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +141 -0
  20. dayhoff_tools/cli/main.py +1 -2
  21. dayhoff_tools/cli/studio/__init__.py +1 -0
  22. dayhoff_tools/cli/studio/attach.py +314 -0
  23. dayhoff_tools/cli/studio/create.py +48 -0
  24. dayhoff_tools/cli/studio/delete.py +71 -0
  25. dayhoff_tools/cli/studio/detach.py +56 -0
  26. dayhoff_tools/cli/studio/list.py +81 -0
  27. dayhoff_tools/cli/studio/reset.py +90 -0
  28. dayhoff_tools/cli/studio/resize.py +134 -0
  29. dayhoff_tools/cli/studio/status.py +78 -0
  30. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/METADATA +1 -1
  31. dayhoff_tools-1.10.1.dist-info/RECORD +61 -0
  32. dayhoff_tools/cli/engine/engine_maintenance.py +0 -431
  33. dayhoff_tools/cli/engine/engine_management.py +0 -505
  34. dayhoff_tools/cli/engine/shared.py +0 -501
  35. dayhoff_tools/cli/engine/studio_commands.py +0 -825
  36. dayhoff_tools-1.9.26.dist-info/RECORD +0 -39
  37. /dayhoff_tools/cli/engine/{engine_lifecycle.py → lifecycle.py} +0 -0
  38. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/WHEEL +0 -0
  39. {dayhoff_tools-1.9.26.dist-info → dayhoff_tools-1.10.1.dist-info}/entry_points.txt +0 -0
@@ -1,431 +0,0 @@
1
- """Engine maintenance commands: coffee, idle timeout, debug, and repair."""
2
-
3
- import re
4
- import subprocess
5
- import time
6
- from typing import Optional
7
-
8
- import boto3
9
- import typer
10
- from botocore.exceptions import ClientError
11
- from rich.progress import Progress, SpinnerColumn, TextColumn
12
- from rich.prompt import Confirm
13
-
14
- from .shared import check_aws_sso, console, make_api_request, resolve_engine
15
-
16
-
17
- def coffee(
18
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
19
- duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
20
- cancel: bool = typer.Option(
21
- False, "--cancel", help="Cancel existing coffee lock instead of extending"
22
- ),
23
- ):
24
- """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
25
- username = check_aws_sso()
26
-
27
- # Parse duration
28
- import re
29
-
30
- if not cancel:
31
- match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
32
- if not match or (not match.group(1) and not match.group(2)):
33
- console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
34
- console.print("Use format like: 4h, 30m, 2h30m")
35
- raise typer.Exit(1)
36
-
37
- hours = int(match.group(1) or 0)
38
- minutes = int(match.group(2) or 0)
39
- seconds_total = (hours * 60 + minutes) * 60
40
- if seconds_total == 0:
41
- console.print("[red]❌ Duration must be greater than zero[/red]")
42
- raise typer.Exit(1)
43
-
44
- # Get all engines to resolve name
45
- response = make_api_request("GET", "/engines")
46
- if response.status_code != 200:
47
- console.print("[red]❌ Failed to fetch engines[/red]")
48
- raise typer.Exit(1)
49
-
50
- engines = response.json().get("engines", [])
51
- engine = resolve_engine(name_or_id, engines)
52
-
53
- if engine["state"].lower() != "running":
54
- console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
55
- raise typer.Exit(1)
56
-
57
- if cancel:
58
- console.print(f"Cancelling coffee for [cyan]{engine['name']}[/cyan]…")
59
- else:
60
- console.print(
61
- f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…"
62
- )
63
-
64
- # Use SSM to run the engine coffee command
65
- ssm = boto3.client("ssm", region_name="us-east-1")
66
- try:
67
- response = ssm.send_command(
68
- InstanceIds=[engine["instance_id"]],
69
- DocumentName="AWS-RunShellScript",
70
- Parameters={
71
- "commands": [
72
- (
73
- "/usr/local/bin/engine-coffee --cancel"
74
- if cancel
75
- else f"/usr/local/bin/engine-coffee {seconds_total}"
76
- )
77
- ],
78
- "executionTimeout": ["60"],
79
- },
80
- )
81
-
82
- command_id = response["Command"]["CommandId"]
83
-
84
- # Wait for command to complete
85
- for _ in range(10):
86
- time.sleep(1)
87
- result = ssm.get_command_invocation(
88
- CommandId=command_id,
89
- InstanceId=engine["instance_id"],
90
- )
91
- if result["Status"] in ["Success", "Failed"]:
92
- break
93
-
94
- if result["Status"] == "Success":
95
- if cancel:
96
- console.print(
97
- "[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]"
98
- )
99
- else:
100
- console.print(f"[green]✓ Coffee poured for {duration}[/green]")
101
- console.print(
102
- "\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
103
- )
104
- console.print(
105
- "[dim]Use coffee for nohup operations or other background tasks.[/dim]"
106
- )
107
- else:
108
- console.print(
109
- f"[red]❌ Failed to manage coffee: {result.get('StatusDetails', 'Unknown error')}[/red]"
110
- )
111
-
112
- except ClientError as e:
113
- console.print(f"[red]❌ Failed to manage coffee: {e}[/red]")
114
-
115
-
116
- def idle_timeout_cmd(
117
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
118
- set: Optional[str] = typer.Option(
119
- None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
120
- ),
121
- slack: Optional[str] = typer.Option(
122
- None, "--slack", help="Set Slack notifications: none, default, all"
123
- ),
124
- ):
125
- """Show or set engine idle-detector settings."""
126
- check_aws_sso()
127
-
128
- # Resolve engine
129
- response = make_api_request("GET", "/engines")
130
- if response.status_code != 200:
131
- console.print("[red]❌ Failed to fetch engines[/red]")
132
- raise typer.Exit(1)
133
-
134
- engines = response.json().get("engines", [])
135
- engine = resolve_engine(name_or_id, engines)
136
-
137
- ssm = boto3.client("ssm", region_name="us-east-1")
138
-
139
- # Handle slack notifications change
140
- if slack:
141
- slack = slack.lower()
142
- if slack not in ["none", "default", "all"]:
143
- console.print("[red]❌ Invalid slack option. Use: none, default, all[/red]")
144
- raise typer.Exit(1)
145
-
146
- console.print(f"Setting Slack notifications to [bold]{slack}[/bold]...")
147
-
148
- if slack == "none":
149
- settings = {
150
- "SLACK_NOTIFY_WARNINGS": "false",
151
- "SLACK_NOTIFY_IDLE_START": "false",
152
- "SLACK_NOTIFY_IDLE_END": "false",
153
- "SLACK_NOTIFY_SHUTDOWN": "false",
154
- }
155
- elif slack == "default":
156
- settings = {
157
- "SLACK_NOTIFY_WARNINGS": "true",
158
- "SLACK_NOTIFY_IDLE_START": "false",
159
- "SLACK_NOTIFY_IDLE_END": "false",
160
- "SLACK_NOTIFY_SHUTDOWN": "true",
161
- }
162
- else: # all
163
- settings = {
164
- "SLACK_NOTIFY_WARNINGS": "true",
165
- "SLACK_NOTIFY_IDLE_START": "true",
166
- "SLACK_NOTIFY_IDLE_END": "true",
167
- "SLACK_NOTIFY_SHUTDOWN": "true",
168
- }
169
-
170
- commands = []
171
- for key, value in settings.items():
172
- # Use a robust sed command that adds the line if it doesn't exist
173
- commands.append(
174
- f"grep -q '^{key}=' /etc/engine.env && sudo sed -i 's|^{key}=.*|{key}={value}|' /etc/engine.env || echo '{key}={value}' | sudo tee -a /etc/engine.env > /dev/null"
175
- )
176
-
177
- # Instead of restarting service, send SIGHUP to reload config
178
- commands.append(
179
- "sudo pkill -HUP -f engine-idle-detector.py || sudo systemctl restart engine-idle-detector.service"
180
- )
181
-
182
- resp = ssm.send_command(
183
- InstanceIds=[engine["instance_id"]],
184
- DocumentName="AWS-RunShellScript",
185
- Parameters={"commands": commands, "executionTimeout": ["60"]},
186
- )
187
- cid = resp["Command"]["CommandId"]
188
- time.sleep(2) # Give it a moment to process
189
- console.print(f"[green]✓ Slack notifications updated to '{slack}'[/green]")
190
- console.print("[dim]Note: Settings updated without resetting idle timer[/dim]")
191
-
192
- # Handle setting new timeout value
193
- if set is not None:
194
- m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
195
- if not m:
196
- console.print(
197
- "[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]"
198
- )
199
- raise typer.Exit(1)
200
- hours = int(m.group(1) or 0)
201
- minutes = int(m.group(2) or 0)
202
- seconds = hours * 3600 + minutes * 60
203
- if seconds == 0:
204
- console.print("[red]❌ Duration must be greater than zero[/red]")
205
- raise typer.Exit(1)
206
-
207
- console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
208
-
209
- cmd = (
210
- "sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
211
- f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
212
- "sudo systemctl restart engine-idle-detector.service"
213
- )
214
-
215
- resp = ssm.send_command(
216
- InstanceIds=[engine["instance_id"]],
217
- DocumentName="AWS-RunShellScript",
218
- Parameters={"commands": [cmd], "executionTimeout": ["60"]},
219
- )
220
- cid = resp["Command"]["CommandId"]
221
- time.sleep(2)
222
- console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
223
-
224
- # If no action was specified, show current timeout
225
- if set is None and slack is None:
226
- # Show current timeout setting
227
- resp = ssm.send_command(
228
- InstanceIds=[engine["instance_id"]],
229
- DocumentName="AWS-RunShellScript",
230
- Parameters={
231
- "commands": [
232
- "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"
233
- ],
234
- "executionTimeout": ["10"],
235
- },
236
- )
237
- cid = resp["Command"]["CommandId"]
238
- time.sleep(1)
239
- inv = ssm.get_command_invocation(
240
- CommandId=cid, InstanceId=engine["instance_id"]
241
- )
242
- if inv["Status"] == "Success":
243
- line = inv["StandardOutputContent"].strip()
244
- secs = int(line.split("=")[1]) if "=" in line else 1800
245
- console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
246
- else:
247
- console.print("[red]❌ Could not retrieve idle timeout[/red]")
248
- return
249
-
250
-
251
- def debug_engine(
252
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
253
- ):
254
- """Debug engine bootstrap status and files."""
255
- check_aws_sso()
256
-
257
- # Resolve engine
258
- response = make_api_request("GET", "/engines")
259
- if response.status_code != 200:
260
- console.print("[red]❌ Failed to fetch engines[/red]")
261
- raise typer.Exit(1)
262
-
263
- engines = response.json().get("engines", [])
264
- engine = resolve_engine(name_or_id, engines)
265
-
266
- console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
267
-
268
- ssm = boto3.client("ssm", region_name="us-east-1")
269
-
270
- # Check multiple files and systemd status
271
- checks = [
272
- (
273
- "Stage file",
274
- "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'",
275
- ),
276
- (
277
- "Health file",
278
- "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'",
279
- ),
280
- (
281
- "Sentinel file",
282
- "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'",
283
- ),
284
- (
285
- "Setup service",
286
- "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'",
287
- ),
288
- (
289
- "Bootstrap log tail",
290
- "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'",
291
- ),
292
- ("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
293
- ]
294
-
295
- for name, cmd in checks:
296
- try:
297
- resp = ssm.send_command(
298
- InstanceIds=[engine["instance_id"]],
299
- DocumentName="AWS-RunShellScript",
300
- Parameters={"commands": [cmd], "executionTimeout": ["10"]},
301
- )
302
- cid = resp["Command"]["CommandId"]
303
- time.sleep(1)
304
- inv = ssm.get_command_invocation(
305
- CommandId=cid, InstanceId=engine["instance_id"]
306
- )
307
-
308
- if inv["Status"] == "Success":
309
- output = inv["StandardOutputContent"].strip()
310
- console.print(f"[cyan]{name}:[/cyan]")
311
- console.print(f"[dim]{output}[/dim]\n")
312
- else:
313
- console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
314
-
315
- except Exception as e:
316
- console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")
317
-
318
-
319
- def repair_engine(
320
- name_or_id: str = typer.Argument(help="Engine name or instance ID"),
321
- ):
322
- """Repair an engine that's stuck in a bad state (e.g., after GAMI creation)."""
323
- check_aws_sso()
324
-
325
- # Get all engines to resolve name
326
- response = make_api_request("GET", "/engines")
327
- if response.status_code != 200:
328
- console.print("[red]❌ Failed to fetch engines[/red]")
329
- raise typer.Exit(1)
330
-
331
- engines = response.json().get("engines", [])
332
- engine = resolve_engine(name_or_id, engines)
333
-
334
- if engine["state"].lower() != "running":
335
- console.print(
336
- f"[yellow]⚠️ Engine is {engine['state']}. Must be running to repair.[/yellow]"
337
- )
338
- if engine["state"].lower() == "stopped" and Confirm.ask(
339
- "Start the engine first?"
340
- ):
341
- response = make_api_request(
342
- "POST", f"/engines/{engine['instance_id']}/start"
343
- )
344
- if response.status_code != 200:
345
- console.print("[red]❌ Failed to start engine[/red]")
346
- raise typer.Exit(1)
347
- console.print("[green]✓ Engine started[/green]")
348
- console.print("Waiting for engine to become ready...")
349
- time.sleep(30) # Give it time to boot
350
- else:
351
- raise typer.Exit(1)
352
-
353
- console.print(f"[bold]Repairing engine [cyan]{engine['name']}[/cyan][/bold]")
354
- console.print(
355
- "[dim]This will restore bootstrap state and ensure all services are running[/dim]\n"
356
- )
357
-
358
- ssm = boto3.client("ssm", region_name="us-east-1")
359
-
360
- # Repair commands
361
- repair_commands = [
362
- # Create necessary directories
363
- "sudo mkdir -p /opt/dayhoff /opt/dayhoff/state /opt/dayhoff/scripts",
364
- # Download scripts from S3 if missing
365
- "source /etc/engine.env && sudo aws s3 sync s3://${VM_SCRIPTS_BUCKET}/ /opt/dayhoff/scripts/ --exclude '*' --include '*.sh' --quiet",
366
- "sudo chmod +x /opt/dayhoff/scripts/*.sh 2>/dev/null || true",
367
- # Restore bootstrap state
368
- "sudo touch /opt/dayhoff/first_boot_complete.sentinel",
369
- "echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
370
- # Ensure SSM agent is running
371
- "sudo systemctl restart amazon-ssm-agent 2>/dev/null || true",
372
- # Restart idle detector (service only)
373
- "sudo systemctl restart engine-idle-detector.service 2>/dev/null || true",
374
- # Report status
375
- "echo '=== Repair Complete ===' && echo 'Sentinel: ' && ls -la /opt/dayhoff/first_boot_complete.sentinel",
376
- "echo 'Stage: ' && cat /opt/dayhoff/state/engine-init.stage",
377
- "echo 'Scripts: ' && ls /opt/dayhoff/scripts/*.sh 2>/dev/null | wc -l",
378
- ]
379
-
380
- try:
381
- with Progress(
382
- SpinnerColumn(),
383
- TextColumn("[progress.description]{task.description}"),
384
- transient=True,
385
- ) as progress:
386
- task = progress.add_task("Repairing engine...", total=None)
387
-
388
- response = ssm.send_command(
389
- InstanceIds=[engine["instance_id"]],
390
- DocumentName="AWS-RunShellScript",
391
- Parameters={
392
- "commands": repair_commands,
393
- "executionTimeout": ["60"],
394
- },
395
- )
396
-
397
- command_id = response["Command"]["CommandId"]
398
-
399
- # Wait for command
400
- for _ in range(60):
401
- time.sleep(1)
402
- result = ssm.get_command_invocation(
403
- CommandId=command_id,
404
- InstanceId=engine["instance_id"],
405
- )
406
- if result["Status"] in ["Success", "Failed"]:
407
- break
408
-
409
- if result["Status"] == "Success":
410
- output = result["StandardOutputContent"]
411
- console.print("[green]✓ Engine repaired successfully![/green]\n")
412
-
413
- # Show repair results
414
- if "=== Repair Complete ===" in output:
415
- repair_section = output.split("=== Repair Complete ===")[1].strip()
416
- console.print("[bold]Repair Results:[/bold]")
417
- console.print(repair_section)
418
-
419
- console.print(
420
- "\n[dim]You should now be able to attach studios to this engine.[/dim]"
421
- )
422
- else:
423
- console.print(
424
- f"[red]❌ Repair failed: {result.get('StandardErrorContent', 'Unknown error')}[/red]"
425
- )
426
- console.print(
427
- "\n[yellow]Try running 'dh engine debug' for more information.[/yellow]"
428
- )
429
-
430
- except Exception as e:
431
- console.print(f"[red]❌ Failed to repair engine: {e}[/red]")