dayhoff-tools 1.3.16__tar.gz → 1.3.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/PKG-INFO +1 -1
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/engine_commands.py +217 -8
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/pyproject.toml +1 -1
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/README.md +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/cli/utility_commands.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.3.16 → dayhoff_tools-1.3.18}/dayhoff_tools/warehouse.py +0 -0
@@ -18,6 +18,7 @@ from rich.panel import Panel
|
|
18
18
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
19
19
|
from rich.prompt import Confirm, IntPrompt, Prompt
|
20
20
|
from rich.table import Table
|
21
|
+
import re
|
21
22
|
|
22
23
|
# Initialize Typer apps
|
23
24
|
engine_app = typer.Typer(help="Manage compute engines for development.")
|
@@ -37,6 +38,41 @@ HOURLY_COSTS = {
|
|
37
38
|
# SSH config management
|
38
39
|
SSH_MANAGED_COMMENT = "# Managed by dh engine"
|
39
40
|
|
41
|
+
# --------------------------------------------------------------------------------
|
42
|
+
# Bootstrap stage helpers
|
43
|
+
# --------------------------------------------------------------------------------
|
44
|
+
|
45
|
+
def _colour_stage(stage: str) -> str:
|
46
|
+
"""Return colourised stage name for table output."""
|
47
|
+
if not stage:
|
48
|
+
return "[dim]-[/dim]"
|
49
|
+
low = stage.lower()
|
50
|
+
if low.startswith("error"):
|
51
|
+
return f"[red]{stage}[/red]"
|
52
|
+
if low == "finished":
|
53
|
+
return f"[green]{stage}[/green]"
|
54
|
+
return f"[yellow]{stage}[/yellow]"
|
55
|
+
|
56
|
+
|
57
|
+
def _fetch_init_stages(instance_ids: List[str]) -> Dict[str, str]:
|
58
|
+
"""Fetch DayhoffInitStage tag for many instances in one call."""
|
59
|
+
if not instance_ids:
|
60
|
+
return {}
|
61
|
+
ec2 = boto3.client("ec2", region_name="us-east-1")
|
62
|
+
stages: Dict[str, str] = {}
|
63
|
+
try:
|
64
|
+
paginator = ec2.get_paginator("describe_instances")
|
65
|
+
for page in paginator.paginate(InstanceIds=instance_ids):
|
66
|
+
for res in page["Reservations"]:
|
67
|
+
for inst in res["Instances"]:
|
68
|
+
iid = inst["InstanceId"]
|
69
|
+
tag_val = next((t["Value"] for t in inst.get("Tags", []) if t["Key"] == "DayhoffInitStage"), None)
|
70
|
+
if tag_val:
|
71
|
+
stages[iid] = tag_val
|
72
|
+
except Exception:
|
73
|
+
pass # best-effort
|
74
|
+
return stages
|
75
|
+
|
40
76
|
|
41
77
|
def check_aws_sso() -> str:
|
42
78
|
"""Check AWS SSO status and return username."""
|
@@ -486,6 +522,9 @@ def list_engines(
|
|
486
522
|
console.print("No engines found.")
|
487
523
|
return
|
488
524
|
|
525
|
+
# Fetch bootstrap stages once
|
526
|
+
stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
|
527
|
+
|
489
528
|
# Create table
|
490
529
|
table = Table(title="Engines", box=box.ROUNDED)
|
491
530
|
table.add_column("Name", style="cyan")
|
@@ -493,6 +532,7 @@ def list_engines(
|
|
493
532
|
table.add_column("Type")
|
494
533
|
table.add_column("User")
|
495
534
|
table.add_column("Status")
|
535
|
+
table.add_column("Stage")
|
496
536
|
table.add_column("Disk Usage")
|
497
537
|
table.add_column("Uptime/Since")
|
498
538
|
table.add_column("$/hour", justify="right")
|
@@ -515,12 +555,15 @@ def list_engines(
|
|
515
555
|
time_str = launch_time.strftime("%Y-%m-%d %H:%M")
|
516
556
|
disk_usage = "-"
|
517
557
|
|
558
|
+
stage_display = _colour_stage(stages_map.get(engine["instance_id"], "-"))
|
559
|
+
|
518
560
|
table.add_row(
|
519
561
|
engine["name"],
|
520
562
|
engine["instance_id"],
|
521
563
|
engine["engine_type"],
|
522
564
|
engine["user"],
|
523
565
|
format_status(engine["state"], engine.get("ready")),
|
566
|
+
stage_display,
|
524
567
|
disk_usage,
|
525
568
|
time_str,
|
526
569
|
f"${hourly_cost:.2f}",
|
@@ -539,8 +582,9 @@ def list_engines(
|
|
539
582
|
@engine_app.command("status")
|
540
583
|
def engine_status(
|
541
584
|
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
585
|
+
show_log: bool = typer.Option(False, "--show-log", help="Show bootstrap log"),
|
542
586
|
):
|
543
|
-
"""Show detailed status
|
587
|
+
"""Show detailed engine status and information."""
|
544
588
|
check_aws_sso()
|
545
589
|
|
546
590
|
# Get all engines to resolve name
|
@@ -566,18 +610,43 @@ def engine_status(
|
|
566
610
|
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
567
611
|
total_cost = hourly_cost * (uptime.total_seconds() / 3600)
|
568
612
|
|
569
|
-
|
613
|
+
stages_map = _fetch_init_stages([engine["instance_id"]])
|
614
|
+
stage_val = stages_map.get(engine["instance_id"], "-")
|
615
|
+
|
570
616
|
status_lines = [
|
571
617
|
f"[bold]Name:[/bold] {engine['name']}",
|
572
618
|
f"[bold]Instance:[/bold] {engine['instance_id']}",
|
573
619
|
f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
|
574
620
|
f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
|
621
|
+
f"[bold]Bootstrap:[/bold] {_colour_stage(stage_val)}",
|
575
622
|
f"[bold]User:[/bold] {engine['user']}",
|
576
623
|
f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
|
577
624
|
f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
|
578
625
|
f"[bold]Cost:[/bold] ${hourly_cost:.2f}/hour (${total_cost:.2f} total)",
|
579
626
|
]
|
580
627
|
|
628
|
+
# Health report (only if bootstrap finished)
|
629
|
+
if stage_val == "finished":
|
630
|
+
try:
|
631
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
632
|
+
res = ssm.send_command(
|
633
|
+
InstanceIds=[engine["instance_id"]],
|
634
|
+
DocumentName="AWS-RunShellScript",
|
635
|
+
Parameters={"commands": ["cat /var/run/engine-health.json || true"], "executionTimeout": ["10"]},
|
636
|
+
)
|
637
|
+
cid = res["Command"]["CommandId"]
|
638
|
+
time.sleep(1)
|
639
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
640
|
+
if inv["Status"] == "Success":
|
641
|
+
import json as _json
|
642
|
+
health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
|
643
|
+
status_lines.append("")
|
644
|
+
status_lines.append("[bold]Health:[/bold]")
|
645
|
+
status_lines.append(f" • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}")
|
646
|
+
status_lines.append(f" • Idle Detector: {health.get('idle_detector_timer', 'unknown')}")
|
647
|
+
except Exception:
|
648
|
+
pass
|
649
|
+
|
581
650
|
if attached_studios:
|
582
651
|
status_lines.append("")
|
583
652
|
status_lines.append("[bold]Attached Studios:[/bold]")
|
@@ -587,12 +656,30 @@ def engine_status(
|
|
587
656
|
f" • {studio['user']} ({studio['studio_id']}) - attached {attach_time}"
|
588
657
|
)
|
589
658
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
659
|
+
console.print(Panel("\n".join(status_lines), title="Engine Status", border_style="blue"))
|
660
|
+
|
661
|
+
if show_log:
|
662
|
+
console.print("\n[bold]Bootstrap Log:[/bold]")
|
663
|
+
try:
|
664
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
665
|
+
resp = ssm.send_command(
|
666
|
+
InstanceIds=[engine["instance_id"]],
|
667
|
+
DocumentName="AWS-RunShellScript",
|
668
|
+
Parameters={"commands": ["cat /var/log/engine-setup.log 2>/dev/null || echo 'No setup log found'"], "executionTimeout": ["15"]},
|
669
|
+
)
|
670
|
+
cid = resp["Command"]["CommandId"]
|
671
|
+
time.sleep(2)
|
672
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
673
|
+
if inv["Status"] == "Success":
|
674
|
+
log_content = inv["StandardOutputContent"].strip()
|
675
|
+
if log_content:
|
676
|
+
console.print(f"[dim]{log_content}[/dim]")
|
677
|
+
else:
|
678
|
+
console.print("[yellow]No bootstrap log available[/yellow]")
|
679
|
+
else:
|
680
|
+
console.print("[red]❌ Could not retrieve bootstrap log[/red]")
|
681
|
+
except Exception as e:
|
682
|
+
console.print(f"[red]❌ Error fetching log: {e}[/red]")
|
596
683
|
|
597
684
|
|
598
685
|
@engine_app.command("stop")
|
@@ -1909,3 +1996,125 @@ def resize_studio(
|
|
1909
1996
|
|
1910
1997
|
console.print("\n[dim]The filesystem will be automatically expanded when you next attach the studio.[/dim]")
|
1911
1998
|
console.print(f"To attach: [cyan]dh studio attach <engine-name>[/cyan]")
|
1999
|
+
|
2000
|
+
# ================= Idle timeout command =================
|
2001
|
+
|
2002
|
+
|
2003
|
+
@engine_app.command("idle-timeout")
|
2004
|
+
def idle_timeout_cmd(
|
2005
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
2006
|
+
set: Optional[str] = typer.Option(None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)")
|
2007
|
+
):
|
2008
|
+
"""Show or set the engine idle-detector timeout."""
|
2009
|
+
check_aws_sso()
|
2010
|
+
|
2011
|
+
# Resolve engine
|
2012
|
+
response = make_api_request("GET", "/engines")
|
2013
|
+
if response.status_code != 200:
|
2014
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
2015
|
+
raise typer.Exit(1)
|
2016
|
+
|
2017
|
+
engines = response.json().get("engines", [])
|
2018
|
+
engine = resolve_engine(name_or_id, engines)
|
2019
|
+
|
2020
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
2021
|
+
|
2022
|
+
if set is None:
|
2023
|
+
# Show current
|
2024
|
+
resp = ssm.send_command(
|
2025
|
+
InstanceIds=[engine["instance_id"]],
|
2026
|
+
DocumentName="AWS-RunShellScript",
|
2027
|
+
Parameters={"commands": ["grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"], "executionTimeout": ["10"]},
|
2028
|
+
)
|
2029
|
+
cid = resp["Command"]["CommandId"]
|
2030
|
+
time.sleep(1)
|
2031
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
2032
|
+
if inv["Status"] == "Success":
|
2033
|
+
line = inv["StandardOutputContent"].strip()
|
2034
|
+
secs = int(line.split("=")[1]) if "=" in line else 1800
|
2035
|
+
console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
|
2036
|
+
else:
|
2037
|
+
console.print("[red]❌ Could not retrieve idle timeout[/red]")
|
2038
|
+
return
|
2039
|
+
|
2040
|
+
# ----- set new value -----
|
2041
|
+
m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
|
2042
|
+
if not m:
|
2043
|
+
console.print("[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]")
|
2044
|
+
raise typer.Exit(1)
|
2045
|
+
hours = int(m.group(1) or 0)
|
2046
|
+
minutes = int(m.group(2) or 0)
|
2047
|
+
seconds = hours * 3600 + minutes * 60
|
2048
|
+
if seconds == 0:
|
2049
|
+
console.print("[red]❌ Duration must be greater than zero[/red]")
|
2050
|
+
raise typer.Exit(1)
|
2051
|
+
|
2052
|
+
console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
|
2053
|
+
|
2054
|
+
cmd = (
|
2055
|
+
"sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
|
2056
|
+
f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
|
2057
|
+
"sudo systemctl restart engine-idle-detector.timer"
|
2058
|
+
)
|
2059
|
+
|
2060
|
+
resp = ssm.send_command(
|
2061
|
+
InstanceIds=[engine["instance_id"]],
|
2062
|
+
DocumentName="AWS-RunShellScript",
|
2063
|
+
Parameters={"commands": [cmd], "executionTimeout": ["60"]},
|
2064
|
+
)
|
2065
|
+
cid = resp["Command"]["CommandId"]
|
2066
|
+
time.sleep(2)
|
2067
|
+
console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
|
2068
|
+
|
2069
|
+
# Add this near the end, after the idle-timeout command
|
2070
|
+
|
2071
|
+
@engine_app.command("debug")
|
2072
|
+
def debug_engine(
|
2073
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
2074
|
+
):
|
2075
|
+
"""Debug engine bootstrap status and files."""
|
2076
|
+
check_aws_sso()
|
2077
|
+
|
2078
|
+
# Resolve engine
|
2079
|
+
response = make_api_request("GET", "/engines")
|
2080
|
+
if response.status_code != 200:
|
2081
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
2082
|
+
raise typer.Exit(1)
|
2083
|
+
|
2084
|
+
engines = response.json().get("engines", [])
|
2085
|
+
engine = resolve_engine(name_or_id, engines)
|
2086
|
+
|
2087
|
+
console.print(f"[bold]Debug info for {engine['name']}:[/bold]\n")
|
2088
|
+
|
2089
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
2090
|
+
|
2091
|
+
# Check multiple files and systemd status
|
2092
|
+
checks = [
|
2093
|
+
("Stage file", "cat /var/run/engine-init.stage 2>/dev/null || echo 'MISSING'"),
|
2094
|
+
("Health file", "cat /var/run/engine-health.json 2>/dev/null || echo 'MISSING'"),
|
2095
|
+
("Sentinel file", "ls -la /opt/dayhoff/first_boot_complete.sentinel 2>/dev/null || echo 'MISSING'"),
|
2096
|
+
("Setup service", "systemctl status setup-aws-vm.service --no-pager || echo 'Service not found'"),
|
2097
|
+
("Bootstrap log tail", "tail -20 /var/log/engine-setup.log 2>/dev/null || echo 'No log'"),
|
2098
|
+
("Environment file", "cat /etc/engine.env 2>/dev/null || echo 'MISSING'"),
|
2099
|
+
]
|
2100
|
+
|
2101
|
+
for name, cmd in checks:
|
2102
|
+
try:
|
2103
|
+
resp = ssm.send_command(
|
2104
|
+
InstanceIds=[engine["instance_id"]],
|
2105
|
+
DocumentName="AWS-RunShellScript",
|
2106
|
+
Parameters={"commands": [cmd], "executionTimeout": ["10"]},
|
2107
|
+
)
|
2108
|
+
cid = resp["Command"]["CommandId"]
|
2109
|
+
time.sleep(1)
|
2110
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
2111
|
+
|
2112
|
+
if inv["Status"] == "Success":
|
2113
|
+
output = inv["StandardOutputContent"].strip()
|
2114
|
+
console.print(f"[cyan]{name}:[/cyan]")
|
2115
|
+
console.print(f"[dim]{output}[/dim]\n")
|
2116
|
+
else:
|
2117
|
+
console.print(f"[cyan]{name}:[/cyan] [red]FAILED[/red]\n")
|
2118
|
+
|
2119
|
+
except Exception as e:
|
2120
|
+
console.print(f"[cyan]{name}:[/cyan] [red]ERROR: {e}[/red]\n")
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
5
5
|
|
6
6
|
[project]
|
7
7
|
name = "dayhoff-tools"
|
8
|
-
version = "1.3.
|
8
|
+
version = "1.3.18"
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
10
10
|
authors = [
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|