dayhoff-tools 1.3.15__tar.gz → 1.3.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/PKG-INFO +1 -1
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/engine_commands.py +166 -7
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/pyproject.toml +1 -1
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/README.md +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/cli/utility_commands.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.3.15 → dayhoff_tools-1.3.17}/dayhoff_tools/warehouse.py +0 -0
@@ -18,6 +18,7 @@ from rich.panel import Panel
|
|
18
18
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
19
19
|
from rich.prompt import Confirm, IntPrompt, Prompt
|
20
20
|
from rich.table import Table
|
21
|
+
import re
|
21
22
|
|
22
23
|
# Initialize Typer apps
|
23
24
|
engine_app = typer.Typer(help="Manage compute engines for development.")
|
@@ -37,6 +38,41 @@ HOURLY_COSTS = {
|
|
37
38
|
# SSH config management
|
38
39
|
SSH_MANAGED_COMMENT = "# Managed by dh engine"
|
39
40
|
|
41
|
+
# --------------------------------------------------------------------------------
|
42
|
+
# Bootstrap stage helpers
|
43
|
+
# --------------------------------------------------------------------------------
|
44
|
+
|
45
|
+
def _colour_stage(stage: str) -> str:
|
46
|
+
"""Return colourised stage name for table output."""
|
47
|
+
if not stage:
|
48
|
+
return "[dim]-[/dim]"
|
49
|
+
low = stage.lower()
|
50
|
+
if low.startswith("error"):
|
51
|
+
return f"[red]{stage}[/red]"
|
52
|
+
if low == "finished":
|
53
|
+
return f"[green]{stage}[/green]"
|
54
|
+
return f"[yellow]{stage}[/yellow]"
|
55
|
+
|
56
|
+
|
57
|
+
def _fetch_init_stages(instance_ids: List[str]) -> Dict[str, str]:
|
58
|
+
"""Fetch DayhoffInitStage tag for many instances in one call."""
|
59
|
+
if not instance_ids:
|
60
|
+
return {}
|
61
|
+
ec2 = boto3.client("ec2", region_name="us-east-1")
|
62
|
+
stages: Dict[str, str] = {}
|
63
|
+
try:
|
64
|
+
paginator = ec2.get_paginator("describe_instances")
|
65
|
+
for page in paginator.paginate(InstanceIds=instance_ids):
|
66
|
+
for res in page["Reservations"]:
|
67
|
+
for inst in res["Instances"]:
|
68
|
+
iid = inst["InstanceId"]
|
69
|
+
tag_val = next((t["Value"] for t in inst.get("Tags", []) if t["Key"] == "DayhoffInitStage"), None)
|
70
|
+
if tag_val:
|
71
|
+
stages[iid] = tag_val
|
72
|
+
except Exception:
|
73
|
+
pass # best-effort
|
74
|
+
return stages
|
75
|
+
|
40
76
|
|
41
77
|
def check_aws_sso() -> str:
|
42
78
|
"""Check AWS SSO status and return username."""
|
@@ -388,6 +424,12 @@ def launch_engine(
|
|
388
424
|
help="Engine type: cpu, cpumax, t4, a10g, a100",
|
389
425
|
),
|
390
426
|
user: Optional[str] = typer.Option(None, "--user", "-u", help="Override username"),
|
427
|
+
boot_disk_size: Optional[int] = typer.Option(
|
428
|
+
None,
|
429
|
+
"--size",
|
430
|
+
"-s",
|
431
|
+
help="Boot disk size in GB (default: 50GB, min: 20GB, max: 1000GB)",
|
432
|
+
),
|
391
433
|
):
|
392
434
|
"""Launch a new engine instance."""
|
393
435
|
username = check_aws_sso()
|
@@ -401,9 +443,19 @@ def launch_engine(
|
|
401
443
|
console.print(f"Valid types: {', '.join(valid_types)}")
|
402
444
|
raise typer.Exit(1)
|
403
445
|
|
446
|
+
# Validate boot disk size
|
447
|
+
if boot_disk_size is not None:
|
448
|
+
if boot_disk_size < 20:
|
449
|
+
console.print("[red]❌ Boot disk size must be at least 20GB[/red]")
|
450
|
+
raise typer.Exit(1)
|
451
|
+
if boot_disk_size > 1000:
|
452
|
+
console.print("[red]❌ Boot disk size cannot exceed 1000GB[/red]")
|
453
|
+
raise typer.Exit(1)
|
454
|
+
|
404
455
|
cost = HOURLY_COSTS.get(engine_type, 0)
|
456
|
+
disk_info = f" with {boot_disk_size}GB boot disk" if boot_disk_size else ""
|
405
457
|
console.print(
|
406
|
-
f"Launching [cyan]{name}[/cyan] ({engine_type}) for ${cost:.2f}/hour..."
|
458
|
+
f"Launching [cyan]{name}[/cyan] ({engine_type}){disk_info} for ${cost:.2f}/hour..."
|
407
459
|
)
|
408
460
|
|
409
461
|
with Progress(
|
@@ -413,17 +465,23 @@ def launch_engine(
|
|
413
465
|
) as progress:
|
414
466
|
progress.add_task("Creating engine...", total=None)
|
415
467
|
|
416
|
-
|
417
|
-
"
|
418
|
-
"
|
419
|
-
|
420
|
-
|
468
|
+
request_data = {
|
469
|
+
"name": name,
|
470
|
+
"user": username,
|
471
|
+
"engine_type": engine_type,
|
472
|
+
}
|
473
|
+
if boot_disk_size is not None:
|
474
|
+
request_data["boot_disk_size"] = boot_disk_size
|
475
|
+
|
476
|
+
response = make_api_request("POST", "/engines", json_data=request_data)
|
421
477
|
|
422
478
|
if response.status_code == 201:
|
423
479
|
data = response.json()
|
424
480
|
console.print(f"[green]✓ Engine launched successfully![/green]")
|
425
481
|
console.print(f"Instance ID: [cyan]{data['instance_id']}[/cyan]")
|
426
482
|
console.print(f"Type: {data['instance_type']} (${cost:.2f}/hour)")
|
483
|
+
if boot_disk_size:
|
484
|
+
console.print(f"Boot disk: {boot_disk_size}GB")
|
427
485
|
console.print("\nThe engine is initializing. This may take a few minutes.")
|
428
486
|
console.print(f"Check status with: [cyan]dh engine status {name}[/cyan]")
|
429
487
|
else:
|
@@ -464,6 +522,9 @@ def list_engines(
|
|
464
522
|
console.print("No engines found.")
|
465
523
|
return
|
466
524
|
|
525
|
+
# Fetch bootstrap stages once
|
526
|
+
stages_map = _fetch_init_stages([e["instance_id"] for e in engines])
|
527
|
+
|
467
528
|
# Create table
|
468
529
|
table = Table(title="Engines", box=box.ROUNDED)
|
469
530
|
table.add_column("Name", style="cyan")
|
@@ -471,6 +532,7 @@ def list_engines(
|
|
471
532
|
table.add_column("Type")
|
472
533
|
table.add_column("User")
|
473
534
|
table.add_column("Status")
|
535
|
+
table.add_column("Stage")
|
474
536
|
table.add_column("Disk Usage")
|
475
537
|
table.add_column("Uptime/Since")
|
476
538
|
table.add_column("$/hour", justify="right")
|
@@ -493,12 +555,15 @@ def list_engines(
|
|
493
555
|
time_str = launch_time.strftime("%Y-%m-%d %H:%M")
|
494
556
|
disk_usage = "-"
|
495
557
|
|
558
|
+
stage_display = _colour_stage(stages_map.get(engine["instance_id"], "-"))
|
559
|
+
|
496
560
|
table.add_row(
|
497
561
|
engine["name"],
|
498
562
|
engine["instance_id"],
|
499
563
|
engine["engine_type"],
|
500
564
|
engine["user"],
|
501
565
|
format_status(engine["state"], engine.get("ready")),
|
566
|
+
stage_display,
|
502
567
|
disk_usage,
|
503
568
|
time_str,
|
504
569
|
f"${hourly_cost:.2f}",
|
@@ -544,18 +609,43 @@ def engine_status(
|
|
544
609
|
hourly_cost = HOURLY_COSTS.get(engine["engine_type"], 0)
|
545
610
|
total_cost = hourly_cost * (uptime.total_seconds() / 3600)
|
546
611
|
|
547
|
-
|
612
|
+
stages_map = _fetch_init_stages([engine["instance_id"]])
|
613
|
+
stage_val = stages_map.get(engine["instance_id"], "-")
|
614
|
+
|
548
615
|
status_lines = [
|
549
616
|
f"[bold]Name:[/bold] {engine['name']}",
|
550
617
|
f"[bold]Instance:[/bold] {engine['instance_id']}",
|
551
618
|
f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
|
552
619
|
f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
|
620
|
+
f"[bold]Bootstrap:[/bold] {_colour_stage(stage_val)}",
|
553
621
|
f"[bold]User:[/bold] {engine['user']}",
|
554
622
|
f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
|
555
623
|
f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
|
556
624
|
f"[bold]Cost:[/bold] ${hourly_cost:.2f}/hour (${total_cost:.2f} total)",
|
557
625
|
]
|
558
626
|
|
627
|
+
# Health report (only if bootstrap finished)
|
628
|
+
if stage_val == "finished":
|
629
|
+
try:
|
630
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
631
|
+
res = ssm.send_command(
|
632
|
+
InstanceIds=[engine["instance_id"]],
|
633
|
+
DocumentName="AWS-RunShellScript",
|
634
|
+
Parameters={"commands": ["cat /var/run/engine-health.json || true"], "executionTimeout": ["10"]},
|
635
|
+
)
|
636
|
+
cid = res["Command"]["CommandId"]
|
637
|
+
time.sleep(1)
|
638
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
639
|
+
if inv["Status"] == "Success":
|
640
|
+
import json as _json
|
641
|
+
health = _json.loads(inv["StandardOutputContent"].strip() or "{}")
|
642
|
+
status_lines.append("")
|
643
|
+
status_lines.append("[bold]Health:[/bold]")
|
644
|
+
status_lines.append(f" • GPU Drivers: {'OK' if health.get('drivers_ok') else 'MISSING'}")
|
645
|
+
status_lines.append(f" • Idle Detector: {health.get('idle_detector_timer', 'unknown')}")
|
646
|
+
except Exception:
|
647
|
+
pass
|
648
|
+
|
559
649
|
if attached_studios:
|
560
650
|
status_lines.append("")
|
561
651
|
status_lines.append("[bold]Attached Studios:[/bold]")
|
@@ -1887,3 +1977,72 @@ def resize_studio(
|
|
1887
1977
|
|
1888
1978
|
console.print("\n[dim]The filesystem will be automatically expanded when you next attach the studio.[/dim]")
|
1889
1979
|
console.print(f"To attach: [cyan]dh studio attach <engine-name>[/cyan]")
|
1980
|
+
|
1981
|
+
# ================= Idle timeout command =================
|
1982
|
+
|
1983
|
+
|
1984
|
+
@engine_app.command("idle-timeout")
|
1985
|
+
def idle_timeout_cmd(
|
1986
|
+
name_or_id: str = typer.Argument(help="Engine name or instance ID"),
|
1987
|
+
set: Optional[str] = typer.Option(None, "--set", "-s", help="New timeout (e.g., 2h30m, 45m)")
|
1988
|
+
):
|
1989
|
+
"""Show or set the engine idle-detector timeout."""
|
1990
|
+
check_aws_sso()
|
1991
|
+
|
1992
|
+
# Resolve engine
|
1993
|
+
response = make_api_request("GET", "/engines")
|
1994
|
+
if response.status_code != 200:
|
1995
|
+
console.print("[red]❌ Failed to fetch engines[/red]")
|
1996
|
+
raise typer.Exit(1)
|
1997
|
+
|
1998
|
+
engines = response.json().get("engines", [])
|
1999
|
+
engine = resolve_engine(name_or_id, engines)
|
2000
|
+
|
2001
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
2002
|
+
|
2003
|
+
if set is None:
|
2004
|
+
# Show current
|
2005
|
+
resp = ssm.send_command(
|
2006
|
+
InstanceIds=[engine["instance_id"]],
|
2007
|
+
DocumentName="AWS-RunShellScript",
|
2008
|
+
Parameters={"commands": ["grep -E '^IDLE_TIMEOUT_SECONDS=' /etc/engine.env || echo 'IDLE_TIMEOUT_SECONDS=1800'"], "executionTimeout": ["10"]},
|
2009
|
+
)
|
2010
|
+
cid = resp["Command"]["CommandId"]
|
2011
|
+
time.sleep(1)
|
2012
|
+
inv = ssm.get_command_invocation(CommandId=cid, InstanceId=engine["instance_id"])
|
2013
|
+
if inv["Status"] == "Success":
|
2014
|
+
line = inv["StandardOutputContent"].strip()
|
2015
|
+
secs = int(line.split("=")[1]) if "=" in line else 1800
|
2016
|
+
console.print(f"Current idle timeout: {secs//60}m ({secs} seconds)")
|
2017
|
+
else:
|
2018
|
+
console.print("[red]❌ Could not retrieve idle timeout[/red]")
|
2019
|
+
return
|
2020
|
+
|
2021
|
+
# ----- set new value -----
|
2022
|
+
m = re.match(r"^(?:(\d+)h)?(?:(\d+)m)?$", set)
|
2023
|
+
if not m:
|
2024
|
+
console.print("[red]❌ Invalid duration format. Use e.g. 2h, 45m, 1h30m[/red]")
|
2025
|
+
raise typer.Exit(1)
|
2026
|
+
hours = int(m.group(1) or 0)
|
2027
|
+
minutes = int(m.group(2) or 0)
|
2028
|
+
seconds = hours * 3600 + minutes * 60
|
2029
|
+
if seconds == 0:
|
2030
|
+
console.print("[red]❌ Duration must be greater than zero[/red]")
|
2031
|
+
raise typer.Exit(1)
|
2032
|
+
|
2033
|
+
console.print(f"Setting idle timeout to {set} ({seconds} seconds)…")
|
2034
|
+
|
2035
|
+
cmd = (
|
2036
|
+
"sudo sed -i '/^IDLE_TIMEOUT_SECONDS=/d' /etc/engine.env && "
|
2037
|
+
f"echo 'IDLE_TIMEOUT_SECONDS={seconds}' | sudo tee -a /etc/engine.env >/dev/null && "
|
2038
|
+
"sudo systemctl restart engine-idle-detector.timer"
|
2039
|
+
)
|
2040
|
+
|
2041
|
+
resp = ssm.send_command(
|
2042
|
+
InstanceIds=[engine["instance_id"]],
|
2043
|
+
DocumentName="AWS-RunShellScript",
|
2044
|
+
Parameters={"commands": [cmd], "executionTimeout": ["60"]},
|
2045
|
+
)
|
2046
|
+
cid = resp["Command"]["CommandId"]
|
2047
|
+
time.sleep(2)
|
2048
|
+
console.print(f"[green]✓ Idle timeout updated to {set}[/green]")
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
5
5
|
|
6
6
|
[project]
|
7
7
|
name = "dayhoff-tools"
|
8
|
-
version = "1.3.
|
8
|
+
version = "1.3.17"
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
10
10
|
authors = [
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|