dayhoff-tools 1.9.26__tar.gz → 1.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/PKG-INFO +1 -1
  2. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/__init__.py +1 -0
  3. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/coffee.py +110 -0
  4. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/config_ssh.py +113 -0
  5. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/debug.py +79 -0
  6. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/gami.py +160 -0
  7. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/idle.py +148 -0
  8. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/launch.py +101 -0
  9. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/list.py +116 -0
  10. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/repair.py +128 -0
  11. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/resize.py +195 -0
  12. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/ssh.py +62 -0
  13. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/engine_core.py → dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/status.py +6 -201
  14. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/__init__.py → dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_commands.py +22 -22
  15. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/__init__.py +1 -0
  16. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/api_utils.py +47 -0
  17. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/aws_utils.py +102 -0
  18. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/constants.py +21 -0
  19. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/formatting.py +210 -0
  20. dayhoff_tools-1.10.1/dayhoff_tools/cli/engine_studio_utils/ssh_utils.py +141 -0
  21. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/cli/main.py +1 -2
  22. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/__init__.py +1 -0
  23. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/attach.py +314 -0
  24. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/create.py +48 -0
  25. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/delete.py +71 -0
  26. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/detach.py +56 -0
  27. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/list.py +81 -0
  28. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/reset.py +90 -0
  29. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/resize.py +134 -0
  30. dayhoff_tools-1.10.1/dayhoff_tools/cli/studio/status.py +78 -0
  31. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/pyproject.toml +1 -1
  32. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/engine_maintenance.py +0 -431
  33. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/engine_management.py +0 -505
  34. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/shared.py +0 -501
  35. dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/studio_commands.py +0 -825
  36. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/README.md +0 -0
  37. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/__init__.py +0 -0
  38. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/chemistry/standardizer.py +0 -0
  39. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/chemistry/utils.py +0 -0
  40. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/cli/__init__.py +0 -0
  41. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/cli/cloud_commands.py +0 -0
  42. /dayhoff_tools-1.9.26/dayhoff_tools/cli/engine/engine_lifecycle.py → /dayhoff_tools-1.10.1/dayhoff_tools/cli/engine/lifecycle.py +0 -0
  43. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/cli/swarm_commands.py +0 -0
  44. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/cli/utility_commands.py +0 -0
  45. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/base.py +0 -0
  46. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  47. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  48. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  49. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/job_runner.py +0 -0
  50. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/processors.py +0 -0
  51. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/deployment/swarm.py +0 -0
  52. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/embedders.py +0 -0
  53. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/fasta.py +0 -0
  54. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/file_ops.py +0 -0
  55. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/h5.py +0 -0
  56. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/gcp.py +0 -0
  57. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/gtdb.py +0 -0
  58. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/kegg.py +0 -0
  59. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/mmseqs.py +0 -0
  60. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/structure.py +0 -0
  61. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/intake/uniprot.py +0 -0
  62. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/logs.py +0 -0
  63. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/sqlite.py +0 -0
  64. {dayhoff_tools-1.9.26 → dayhoff_tools-1.10.1}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.9.26
3
+ Version: 1.10.1
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -0,0 +1 @@
1
+ """Engine management commands."""
@@ -0,0 +1,110 @@
1
+ """Engine coffee command."""
2
+
3
+ import re
4
+ import time
5
+
6
+ import boto3
7
+ import typer
8
+ from botocore.exceptions import ClientError
9
+
10
+ from ..engine_studio_utils.api_utils import make_api_request
11
+ from ..engine_studio_utils.aws_utils import check_aws_sso
12
+ from ..engine_studio_utils.constants import console
13
+ from ..engine_studio_utils.formatting import resolve_engine
14
+
15
+
16
+ def coffee(
17
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
+ duration: str = typer.Argument("4h", help="Duration (e.g., 2h, 30m, 2h30m)"),
19
+ cancel: bool = typer.Option(
20
+ False, "--cancel", help="Cancel existing coffee lock instead of extending"
21
+ ),
22
+ ):
23
+ """Pour ☕ for an engine: keeps it awake for the given duration (or cancel)."""
24
+ username = check_aws_sso()
25
+
26
+ # Parse duration
27
+ if not cancel:
28
+ match = re.match(r"(?:(\d+)h)?(?:(\d+)m)?", duration)
29
+ if not match or (not match.group(1) and not match.group(2)):
30
+ console.print(f"[red]❌ Invalid duration format: {duration}[/red]")
31
+ console.print("Use format like: 4h, 30m, 2h30m")
32
+ raise typer.Exit(1)
33
+
34
+ hours = int(match.group(1) or 0)
35
+ minutes = int(match.group(2) or 0)
36
+ seconds_total = (hours * 60 + minutes) * 60
37
+ if seconds_total == 0:
38
+ console.print("[red]❌ Duration must be greater than zero[/red]")
39
+ raise typer.Exit(1)
40
+
41
+ # Get all engines to resolve name
42
+ response = make_api_request("GET", "/engines")
43
+ if response.status_code != 200:
44
+ console.print("[red]❌ Failed to fetch engines[/red]")
45
+ raise typer.Exit(1)
46
+
47
+ engines = response.json().get("engines", [])
48
+ engine = resolve_engine(name_or_id, engines)
49
+
50
+ if engine["state"].lower() != "running":
51
+ console.print(f"[red]❌ Engine is not running (state: {engine['state']})[/red]")
52
+ raise typer.Exit(1)
53
+
54
+ if cancel:
55
+ console.print(f"Cancelling coffee for [cyan]{engine['name']}[/cyan]…")
56
+ else:
57
+ console.print(
58
+ f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…"
59
+ )
60
+
61
+ # Use SSM to run the engine coffee command
62
+ ssm = boto3.client("ssm", region_name="us-east-1")
63
+ try:
64
+ response = ssm.send_command(
65
+ InstanceIds=[engine["instance_id"]],
66
+ DocumentName="AWS-RunShellScript",
67
+ Parameters={
68
+ "commands": [
69
+ (
70
+ "/usr/local/bin/engine-coffee --cancel"
71
+ if cancel
72
+ else f"/usr/local/bin/engine-coffee {seconds_total}"
73
+ )
74
+ ],
75
+ "executionTimeout": ["60"],
76
+ },
77
+ )
78
+
79
+ command_id = response["Command"]["CommandId"]
80
+
81
+ # Wait for command to complete
82
+ for _ in range(10):
83
+ time.sleep(1)
84
+ result = ssm.get_command_invocation(
85
+ CommandId=command_id,
86
+ InstanceId=engine["instance_id"],
87
+ )
88
+ if result["Status"] in ["Success", "Failed"]:
89
+ break
90
+
91
+ if result["Status"] == "Success":
92
+ if cancel:
93
+ console.print(
94
+ "[green]✓ Coffee cancelled – auto-shutdown re-enabled[/green]"
95
+ )
96
+ else:
97
+ console.print(f"[green]✓ Coffee poured for {duration}[/green]")
98
+ console.print(
99
+ "\n[dim]Note: Detached Docker containers (except dev containers) will also keep the engine awake.[/dim]"
100
+ )
101
+ console.print(
102
+ "[dim]Use coffee for nohup operations or other background tasks.[/dim]"
103
+ )
104
+ else:
105
+ console.print(
106
+ f"[red]❌ Failed to manage coffee: {result.get('StatusDetails', 'Unknown error')}[/red]"
107
+ )
108
+
109
+ except ClientError as e:
110
+ console.print(f"[red]❌ Failed to manage coffee: {e}[/red]")
@@ -0,0 +1,113 @@
1
+ """Engine config-ssh command."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+
7
+ from ..engine_studio_utils.api_utils import make_api_request
8
+ from ..engine_studio_utils.aws_utils import check_aws_sso
9
+ from ..engine_studio_utils.constants import SSH_MANAGED_COMMENT, console
10
+ from ..engine_studio_utils.ssh_utils import check_session_manager_plugin
11
+
12
+
13
+ def config_ssh(
14
+ clean: bool = typer.Option(False, "--clean", help="Remove all managed entries"),
15
+ all_engines: bool = typer.Option(
16
+ False, "--all", "-a", help="Include all engines from all users"
17
+ ),
18
+ admin: bool = typer.Option(
19
+ False,
20
+ "--admin",
21
+ help="Generate entries that use ec2-user instead of per-engine owner user",
22
+ ),
23
+ ):
24
+ """Update SSH config with available engines."""
25
+ username = check_aws_sso()
26
+
27
+ # Only check for Session Manager Plugin if we're not just cleaning
28
+ if not clean and not check_session_manager_plugin():
29
+ raise typer.Exit(1)
30
+
31
+ if clean:
32
+ console.print("Removing all managed SSH entries...")
33
+ else:
34
+ if all_engines:
35
+ console.print("Updating SSH config with all running engines...")
36
+ else:
37
+ console.print(
38
+ f"Updating SSH config with running engines for [cyan]{username}[/cyan] and [cyan]shared[/cyan]..."
39
+ )
40
+
41
+ # Get all engines
42
+ response = make_api_request("GET", "/engines")
43
+ if response.status_code != 200:
44
+ console.print("[red]❌ Failed to fetch engines[/red]")
45
+ raise typer.Exit(1)
46
+
47
+ engines = response.json().get("engines", [])
48
+ running_engines = [e for e in engines if e["state"].lower() == "running"]
49
+
50
+ # Filter engines based on options
51
+ if not all_engines:
52
+ # Show only current user's engines and shared engines
53
+ running_engines = [
54
+ e for e in running_engines if e["user"] == username or e["user"] == "shared"
55
+ ]
56
+
57
+ # Read existing config
58
+ config_path = Path.home() / ".ssh" / "config"
59
+ config_path.parent.mkdir(mode=0o700, exist_ok=True)
60
+
61
+ if config_path.exists():
62
+ content = config_path.read_text()
63
+ lines = content.splitlines()
64
+ else:
65
+ content = ""
66
+ lines = []
67
+
68
+ # Remove old managed entries
69
+ new_lines = []
70
+ skip_until_next_host = False
71
+ for line in lines:
72
+ if SSH_MANAGED_COMMENT in line:
73
+ skip_until_next_host = True
74
+ elif line.strip().startswith("Host ") and skip_until_next_host:
75
+ skip_until_next_host = False
76
+ # Check if this is a managed host
77
+ if SSH_MANAGED_COMMENT not in line:
78
+ new_lines.append(line)
79
+ elif not skip_until_next_host:
80
+ new_lines.append(line)
81
+
82
+ # Add new entries if not cleaning
83
+ if not clean:
84
+ for engine in running_engines:
85
+ # Determine ssh user based on --admin flag
86
+ ssh_user = "ec2-user" if admin else username
87
+ new_lines.extend(
88
+ [
89
+ "",
90
+ f"Host {engine['name']} {SSH_MANAGED_COMMENT}",
91
+ f" HostName {engine['instance_id']}",
92
+ f" User {ssh_user}",
93
+ f" ProxyCommand sh -c \"AWS_SSM_IDLE_TIMEOUT=600 aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'\"",
94
+ ]
95
+ )
96
+
97
+ # Write back
98
+ config_path.write_text("\n".join(new_lines))
99
+ config_path.chmod(0o600)
100
+
101
+ if clean:
102
+ console.print("[green]✓ Removed all managed SSH entries[/green]")
103
+ else:
104
+ console.print(
105
+ f"[green]✓ Updated SSH config with {len(running_engines)} engines[/green]"
106
+ )
107
+ for engine in running_engines:
108
+ user_display = (
109
+ f"[dim]({engine['user']})[/dim]" if engine["user"] != username else ""
110
+ )
111
+ console.print(
112
+ f" • {engine['name']} → {engine['instance_id']} {user_display}"
113
+ )
@@ -0,0 +1,79 @@
1
+ """Engine debug command."""
2
+
3
+ import time
4
+
5
+ import boto3
6
+ import typer
7
+
8
+ from ..engine_studio_utils.api_utils import make_api_request
9
+ from ..engine_studio_utils.aws_utils import check_aws_sso
10
+ from ..engine_studio_utils.constants import console
11
+ from ..engine_studio_utils.formatting import resolve_engine
12
+
13
+
14
+ def debug_engine(
15
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
16
+ ):
17
+ """Debug engine bootstrap status and files."""
18
+ check_aws_sso()
19
+
20
+ # Resolve engine
21
+ response = make_api_request("GET", "/engines")
22
+ if response.status_code != 200:
23
+ console.print("[red]❌ Failed to fetch engines[/red]")
24
+ raise typer.Exit(1)
25
+
26
+ engines = response.json().get("engines", [])
27
+ engine = resolve_engine(name_or_id, engines)
28
+
29
+ console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
30
+
31
+ ssm = boto3.client("ssm", region_name="us-east-1")
32
+
33
+ # Check multiple files and systemd status
34
+ checks = [
35
+ (
36
+ "Stage file",
37
+ "cat /opt/dayhoff/state/engine-init.stage 2>/dev/null || cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'",
38
+ ),
39
+ (
40
+ "Health file",
41
+ "cat /opt/dayhoff/state/engine-health.json 2>/dev/null || cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'",
42
+ ),
43
+ (
44
+ "Sentinel file",
45
+ "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'",
46
+ ),
47
+ (
48
+ "Setup service",
49
+ "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'",
50
+ ),
51
+ (
52
+ "Bootstrap log tail",
53
+ "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'",
54
+ ),
55
+ ("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
56
+ ]
57
+
58
+ for name, cmd in checks:
59
+ try:
60
+ resp = ssm.send_command(
61
+ InstanceIds=[engine["instance_id"]],
62
+ DocumentName="AWS-RunShellScript",
63
+ Parameters={"commands": [cmd], "executionTimeout": ["10"]},
64
+ )
65
+ cid = resp["Command"]["CommandId"]
66
+ time.sleep(1)
67
+ inv = ssm.get_command_invocation(
68
+ CommandId=cid, InstanceId=engine["instance_id"]
69
+ )
70
+
71
+ if inv["Status"] == "Success":
72
+ output = inv["StandardOutputContent"].strip()
73
+ console.print(f"[cyan]{name}:[/cyan]")
74
+ console.print(f"[dim]{output}[/dim]\n")
75
+ else:
76
+ console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
77
+
78
+ except Exception as e:
79
+ console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")
@@ -0,0 +1,160 @@
1
+ """Engine GAMI (Golden AMI) creation command."""
2
+
3
+ from datetime import datetime
4
+
5
+ import boto3
6
+ import typer
7
+ from botocore.exceptions import ClientError
8
+ from rich.progress import Progress, SpinnerColumn, TextColumn
9
+ from rich.prompt import Confirm
10
+
11
+ from ..engine_studio_utils.api_utils import make_api_request
12
+ from ..engine_studio_utils.aws_utils import check_aws_sso
13
+ from ..engine_studio_utils.constants import console
14
+ from ..engine_studio_utils.formatting import resolve_engine
15
+
16
+
17
+ def create_ami(
18
+ name_or_id: str = typer.Argument(
19
+ help="Engine name or instance ID to create AMI from"
20
+ ),
21
+ ):
22
+ """Create a 'Golden AMI' from a running engine.
23
+
24
+ This process is for creating a pre-warmed, standardized machine image
25
+ that can be used to launch new engines more quickly.
26
+
27
+ IMPORTANT:
28
+ - The engine MUST have all studios detached before running this command.
29
+ - This process will make the source engine unusable. You should
30
+ plan to TERMINATE the engine after the AMI is created.
31
+ """
32
+ check_aws_sso()
33
+
34
+ # Get all engines to resolve name and check status
35
+ # We pass check_ready=True to get attached studio info
36
+ response = make_api_request("GET", "/engines", params={"check_ready": "true"})
37
+ if response.status_code != 200:
38
+ console.print("[red]❌ Failed to fetch engines[/red]")
39
+ raise typer.Exit(1)
40
+
41
+ engines = response.json().get("engines", [])
42
+ engine = resolve_engine(name_or_id, engines)
43
+
44
+ # --- Pre-flight checks ---
45
+
46
+ # 1. Check if engine is running
47
+ if engine["state"].lower() != "running":
48
+ console.print(f"[red]❌ Engine '{engine['name']}' is not running.[/red]")
49
+ console.print("Please start it before creating an AMI.")
50
+ raise typer.Exit(1)
51
+
52
+ # 2. Check for attached studios from the detailed API response
53
+ attached_studios = engine.get("studios", [])
54
+ if attached_studios:
55
+ console.print(
56
+ f"[bold red]❌ Engine '{engine['name']}' has studios attached.[/bold red]"
57
+ )
58
+ console.print("Please detach all studios before creating an AMI:")
59
+ for studio in attached_studios:
60
+ console.print(f" - {studio['user']} ({studio['studio_id']})")
61
+ console.print("\nTo detach, run [bold]dh studio detach[/bold]")
62
+ raise typer.Exit(1)
63
+
64
+ # Construct AMI name and description
65
+ ami_name = (
66
+ f"prewarmed-engine-{engine['engine_type']}-{datetime.now().strftime('%Y%m%d')}"
67
+ )
68
+ description = (
69
+ f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled "
70
+ f"dev container image for {engine['engine_type']} engines"
71
+ )
72
+
73
+ console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
74
+ console.print(f"[bold]AMI Name:[/] {ami_name}")
75
+ console.print(f"[bold]Description:[/] {description}")
76
+
77
+ console.print(
78
+ "\n[bold yellow]⚠️ Important:[/bold yellow]\n"
79
+ "1. This process will run cleanup scripts on the engine.\n"
80
+ "2. The source engine should be [bold]terminated[/bold] after the AMI is created.\n"
81
+ )
82
+
83
+ if not Confirm.ask("Continue with AMI creation?"):
84
+ raise typer.Exit()
85
+
86
+ # Create AMI using EC2 client directly, as the backend logic is too complex
87
+ ec2 = boto3.client("ec2", region_name="us-east-1")
88
+ ssm = boto3.client("ssm", region_name="us-east-1")
89
+
90
+ try:
91
+ # Clean up instance state before snapshotting
92
+ console.print("Cleaning up instance for AMI creation...")
93
+ cleanup_commands = [
94
+ "sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
95
+ "history -c",
96
+ "sudo rm -rf /tmp/* /var/log/messages /var/log/cloud-init.log",
97
+ "sudo rm -rf /var/lib/amazon/ssm/* /etc/amazon/ssm/*",
98
+ "sleep 2 && sudo systemctl stop amazon-ssm-agent &", # Stop agent last
99
+ ]
100
+
101
+ cleanup_response = ssm.send_command(
102
+ InstanceIds=[engine["instance_id"]],
103
+ DocumentName="AWS-RunShellScript",
104
+ Parameters={"commands": cleanup_commands, "executionTimeout": ["120"]},
105
+ )
106
+
107
+ # Acknowledge that the SSM command might be in progress as the agent shuts down
108
+ console.print(
109
+ "[dim]ℹ️ Cleanup command sent (status may show 'InProgress' as SSM agent stops)[/dim]"
110
+ )
111
+
112
+ # Create the AMI
113
+ with Progress(
114
+ SpinnerColumn(),
115
+ TextColumn("[progress.description]{task.description}"),
116
+ transient=True,
117
+ ) as progress:
118
+ task = progress.add_task(
119
+ "Creating AMI (this will take several minutes)...", total=None
120
+ )
121
+
122
+ response = ec2.create_image(
123
+ InstanceId=engine["instance_id"],
124
+ Name=ami_name,
125
+ Description=description,
126
+ NoReboot=False,
127
+ TagSpecifications=[
128
+ {
129
+ "ResourceType": "image",
130
+ "Tags": [
131
+ {"Key": "Environment", "Value": "dev"},
132
+ {"Key": "Type", "Value": "golden-ami"},
133
+ {"Key": "EngineType", "Value": engine["engine_type"]},
134
+ {"Key": "Name", "Value": ami_name},
135
+ ],
136
+ }
137
+ ],
138
+ )
139
+
140
+ ami_id = response["ImageId"]
141
+ progress.update(
142
+ task,
143
+ completed=True,
144
+ description=f"[green]✓ AMI creation initiated![/green]",
145
+ )
146
+
147
+ console.print(f" [bold]AMI ID:[/] {ami_id}")
148
+ console.print("\nThe AMI creation process will continue in the background.")
149
+ console.print("You can monitor progress in the EC2 Console under 'AMIs'.")
150
+ console.print(
151
+ "\nOnce complete, update the AMI ID in [bold]terraform/environments/dev/variables.tf[/bold] "
152
+ "and run [bold]terraform apply[/bold]."
153
+ )
154
+ console.print(
155
+ f"\nRemember to [bold red]terminate the source engine '{engine['name']}'[/bold red] to save costs."
156
+ )
157
+
158
+ except ClientError as e:
159
+ console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
160
+ raise typer.Exit(1)
@@ -0,0 +1,148 @@
1
+ """Engine idle timeout command."""
2
+
3
+ import re
4
+ import time
5
+ from typing import Optional
6
+
7
+ import boto3
8
+ import typer
9
+
10
+ from ..engine_studio_utils.api_utils import make_api_request
11
+ from ..engine_studio_utils.aws_utils import check_aws_sso
12
+ from ..engine_studio_utils.constants import console
13
+ from ..engine_studio_utils.formatting import resolve_engine
14
+
15
+
16
+ def idle_timeout_cmd(
17
+ name_or_id: str = typer.Argument(help="Engine name or instance ID"),
18
+ set: Optional[str] = typer.Option(
19
+ None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)"
20
+ ),
21
+ slack: Optional[str] = typer.Option(
22
+ None, "--slack", help="Set Slack notifications: none, default, all"
23
+ ),
24
+ ):
25
+ """Show or set engine idle-detector settings."""
26
+ check_aws_sso()
27
+
28
+ # Resolve engine
29
+ response = make_api_request("GET", "/engines")
30
+ if response.status_code != 200:
31
+ console.print("[red]❌ Failed to fetch engines[/red]")
32
+ raise typer.Exit(1)
33
+
34
+ engines = response.json().get("engines", [])
35
+ engine = resolve_engine(name_or_id, engines)
36
+
37
+ ssm = boto3.client("ssm", region_name="us-east-1")
38
+
39
+ # Handle slack notifications change
40
+ if slack:
41
+ slack = slack.lower()
42
+ if slack not in ["none", "default", "all"]:
43
+ console.print("[red]❌ Invalid slack option. Use: none, default, all[/red]")
44
+ raise typer.Exit(1)
45
+
46
+ console.print(f"Setting Slack notifications to [bold]{slack}[/bold]...")
47
+
48
+ if slack == "none":
49
+ settings = {
50
+ "SLACK_NOTIFY_WARNINGS": "false",
51
+ "SLACK_NOTIFY_IDLE_START": "false",
52
+ "SLACK_NOTIFY_IDLE_END": "false",
53
+ "SLACK_NOTIFY_SHUTDOWN": "false",
54
+ }
55
+ elif slack == "default":
56
+ settings = {
57
+ "SLACK_NOTIFY_WARNINGS": "true",
58
+ "SLACK_NOTIFY_IDLE_START": "false",
59
+ "SLACK_NOTIFY_IDLE_END": "false",
60
+ "SLACK_NOTIFY_SHUTDOWN": "true",
61
+ }
62
+ else: # all
63
+ settings = {
64
+ "SLACK_NOTIFY_WARNINGS": "true",
65
+ "SLACK_NOTIFY_IDLE_START": "true",
66
+ "SLACK_NOTIFY_IDLE_END": "true",
67
+ "SLACK_NOTIFY_SHUTDOWN": "true",
68
+ }
69
+
70
+ commands = []
71
+ for key, value in settings.items():
72
+ # Use a robust sed command that adds the line if it doesn't exist
73
+ commands.append(
74
+ f"grep -q '^{key}=' /etc/engine.env && sudo sed -i 's|^{key}=.*|{key}={value}|' /etc/engine.env || echo '{key}={value}' | sudo tee -a /etc/engine.env > /dev/null"
75
+ )
76
+
77
+ # Instead of restarting service, send SIGHUP to reload config
78
+ commands.append(
79
+ "sudo pkill -HUP -f engine-idle-detector.py || sudo systemctl restart engine-idle-detector.service"
80
+ )
81
+
82
+ resp = ssm.send_command(
83
+ InstanceIds=[engine["instance_id"]],
84
+ DocumentName="AWS-RunShellScript",
85
+ Parameters={"commands": commands, "executionTimeout": ["60"]},
86
+ )
87
+ cid = resp["Command"]["CommandId"]
88
+ time.sleep(2) # Give it a moment to process
89
+ console.print(f"[green]✓ Slack notifications updated to '{slack}'[/green]")
90
+ console.print("[dim]Note: Settings updated without resetting idle timer[/dim]")
91
+
92
+ # Handle setting new timeout value
93
+ if set is not None:
94
+ m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
95
+ if not m:
96
+ console.print(
97
+ "[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]"
98
+ )
99
+ raise typer.Exit(1)
100
+ hours = int(m.group(1) or 0)
101
+ minutes = int(m.group(2) or 0)
102
+ seconds = hours * 3600 + minutes * 60
103
+ if seconds == 0:
104
+ console.print("[red]❌ Duration must be greater than zero[/red]")
105
+ raise typer.Exit(1)
106
+
107
+ console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
108
+
109
+ cmd = (
110
+ "sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
111
+ f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
112
+ "sudo systemctl restart engine-idle-detector.service"
113
+ )
114
+
115
+ resp = ssm.send_command(
116
+ InstanceIds=[engine["instance_id"]],
117
+ DocumentName="AWS-RunShellScript",
118
+ Parameters={"commands": [cmd], "executionTimeout": ["60"]},
119
+ )
120
+ cid = resp["Command"]["CommandId"]
121
+ time.sleep(2)
122
+ console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
123
+
124
+ # If no action was specified, show current timeout
125
+ if set is None and slack is None:
126
+ # Show current timeout setting
127
+ resp = ssm.send_command(
128
+ InstanceIds=[engine["instance_id"]],
129
+ DocumentName="AWS-RunShellScript",
130
+ Parameters={
131
+ "commands": [
132
+ "grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"
133
+ ],
134
+ "executionTimeout": ["10"],
135
+ },
136
+ )
137
+ cid = resp["Command"]["CommandId"]
138
+ time.sleep(1)
139
+ inv = ssm.get_command_invocation(
140
+ CommandId=cid, InstanceId=engine["instance_id"]
141
+ )
142
+ if inv["Status"] == "Success":
143
+ line = inv["StandardOutputContent"].strip()
144
+ secs = int(line.split("=")[1]) if "=" in line else 1800
145
+ console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
146
+ else:
147
+ console.print("[red]❌ Could not retrieve idle timeout[/red]")
148
+ return