dayhoff-tools 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dayhoff-tools might be problematic. Click here for more details.
- dayhoff_tools/cli/engine_commands.py +72 -220
- {dayhoff_tools-1.6.1.dist-info → dayhoff_tools-1.6.2.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.6.1.dist-info → dayhoff_tools-1.6.2.dist-info}/RECORD +5 -5
- {dayhoff_tools-1.6.1.dist-info → dayhoff_tools-1.6.2.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.6.1.dist-info → dayhoff_tools-1.6.2.dist-info}/entry_points.txt +0 -0
|
@@ -1357,282 +1357,134 @@ def create_ami(
|
|
|
1357
1357
|
help="Engine name or instance ID to create AMI from"
|
|
1358
1358
|
),
|
|
1359
1359
|
):
|
|
1360
|
-
"""Create a Golden AMI from
|
|
1360
|
+
"""Create a 'Golden AMI' from a running engine.
|
|
1361
|
+
|
|
1362
|
+
This process is for creating a pre-warmed, standardized machine image
|
|
1363
|
+
that can be used to launch new engines more quickly.
|
|
1364
|
+
|
|
1365
|
+
IMPORTANT:
|
|
1366
|
+
- The engine MUST have all studios detached before running this command.
|
|
1367
|
+
- This process will make the source engine unusable. You should
|
|
1368
|
+
plan to TERMINATE the engine after the AMI is created.
|
|
1369
|
+
"""
|
|
1361
1370
|
check_aws_sso()
|
|
1362
1371
|
|
|
1363
|
-
# Get all engines to resolve name
|
|
1364
|
-
|
|
1372
|
+
# Get all engines to resolve name and check status
|
|
1373
|
+
# We pass check_ready=True to get attached studio info
|
|
1374
|
+
response = make_api_request("GET", "/engines", params={"check_ready": "true"})
|
|
1365
1375
|
if response.status_code != 200:
|
|
1366
1376
|
console.print("[red]❌ Failed to fetch engines[/red]")
|
|
1367
1377
|
raise typer.Exit(1)
|
|
1368
|
-
|
|
1378
|
+
|
|
1369
1379
|
engines = response.json().get("engines", [])
|
|
1370
1380
|
engine = resolve_engine(name_or_id, engines)
|
|
1371
1381
|
|
|
1372
|
-
|
|
1382
|
+
# --- Pre-flight checks ---
|
|
1383
|
+
|
|
1384
|
+
# 1. Check if engine is running
|
|
1385
|
+
if engine["state"].lower() != "running":
|
|
1386
|
+
console.print(f"[red]❌ Engine '{engine['name']}' is not running.[/red]")
|
|
1387
|
+
console.print("Please start it before creating an AMI.")
|
|
1388
|
+
raise typer.Exit(1)
|
|
1373
1389
|
|
|
1374
|
-
#
|
|
1375
|
-
|
|
1390
|
+
# 2. Check for attached studios from the detailed API response
|
|
1391
|
+
attached_studios = engine.get("studios", [])
|
|
1392
|
+
if attached_studios:
|
|
1393
|
+
console.print(f"[bold red]❌ Engine '{engine['name']}' has studios attached.[/bold red]")
|
|
1394
|
+
console.print("Please detach all studios before creating an AMI:")
|
|
1395
|
+
for studio in attached_studios:
|
|
1396
|
+
console.print(f" - {studio['user']} ({studio['studio_id']})")
|
|
1397
|
+
console.print("\nTo detach, run [bold]dh studio detach[/bold]")
|
|
1398
|
+
raise typer.Exit(1)
|
|
1376
1399
|
|
|
1377
|
-
#
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
f"
|
|
1382
|
-
f"image for {engine_type} engines"
|
|
1400
|
+
# Construct AMI name and description
|
|
1401
|
+
ami_name = f"prewarmed-engine-{engine['engine_type']}-{datetime.now().strftime('%Y%m%d')}"
|
|
1402
|
+
description = (
|
|
1403
|
+
f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled "
|
|
1404
|
+
f"dev container image for {engine['engine_type']} engines"
|
|
1383
1405
|
)
|
|
1384
1406
|
|
|
1385
|
-
console.print(f"AMI
|
|
1386
|
-
console.print(f"
|
|
1407
|
+
console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
|
|
1408
|
+
console.print(f"[bold]AMI Name:[/] {ami_name}")
|
|
1409
|
+
console.print(f"[bold]Description:[/] {description}")
|
|
1410
|
+
|
|
1387
1411
|
console.print(
|
|
1388
|
-
"\n[yellow]⚠️ Important:
|
|
1412
|
+
"\n[bold yellow]⚠️ Important:[/bold yellow]\n"
|
|
1413
|
+
"1. This process will run cleanup scripts on the engine.\n"
|
|
1414
|
+
"2. The source engine should be [bold]terminated[/bold] after the AMI is created.\n"
|
|
1389
1415
|
)
|
|
1390
1416
|
|
|
1391
|
-
if not Confirm.ask("
|
|
1392
|
-
|
|
1393
|
-
return
|
|
1417
|
+
if not Confirm.ask("Continue with AMI creation?"):
|
|
1418
|
+
raise typer.Exit()
|
|
1394
1419
|
|
|
1395
|
-
# Create AMI using EC2 client
|
|
1420
|
+
# Create AMI using EC2 client directly, as the backend logic is too complex
|
|
1396
1421
|
ec2 = boto3.client("ec2", region_name="us-east-1")
|
|
1422
|
+
ssm = boto3.client("ssm", region_name="us-east-1")
|
|
1397
1423
|
|
|
1398
1424
|
try:
|
|
1399
|
-
#
|
|
1425
|
+
# Clean up instance state before snapshotting
|
|
1400
1426
|
console.print("Cleaning up instance for AMI creation...")
|
|
1401
|
-
ssm = boto3.client("ssm", region_name="us-east-1")
|
|
1402
|
-
|
|
1403
|
-
# Clean up instance state, stopping SSM agent last to allow proper status reporting
|
|
1404
1427
|
cleanup_commands = [
|
|
1405
1428
|
"sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
|
|
1406
1429
|
"history -c",
|
|
1407
1430
|
"sudo rm -rf /tmp/* /var/log/messages /var/log/cloud-init.log",
|
|
1408
1431
|
"sudo rm -rf /var/lib/amazon/ssm/* /etc/amazon/ssm/*",
|
|
1409
|
-
|
|
1410
|
-
"sleep 2 && sudo systemctl stop amazon-ssm-agent &",
|
|
1432
|
+
"sleep 2 && sudo systemctl stop amazon-ssm-agent &", # Stop agent last
|
|
1411
1433
|
]
|
|
1412
1434
|
|
|
1413
1435
|
cleanup_response = ssm.send_command(
|
|
1414
1436
|
InstanceIds=[engine["instance_id"]],
|
|
1415
1437
|
DocumentName="AWS-RunShellScript",
|
|
1416
|
-
Parameters={
|
|
1417
|
-
"commands": cleanup_commands,
|
|
1418
|
-
"executionTimeout": ["120"],
|
|
1419
|
-
},
|
|
1438
|
+
Parameters={"commands": cleanup_commands, "executionTimeout": ["120"]},
|
|
1420
1439
|
)
|
|
1421
1440
|
|
|
1422
|
-
#
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
time.sleep(1)
|
|
1426
|
-
result = ssm.get_command_invocation(
|
|
1427
|
-
CommandId=command_id,
|
|
1428
|
-
InstanceId=engine["instance_id"],
|
|
1429
|
-
)
|
|
1430
|
-
if result["Status"] in ["Success", "Failed"]:
|
|
1431
|
-
break
|
|
1432
|
-
|
|
1433
|
-
# Note: InProgress status is expected when SSM agent stops itself
|
|
1434
|
-
if result["Status"] not in ["Success", "InProgress"]:
|
|
1435
|
-
console.print(
|
|
1436
|
-
f"[yellow]⚠️ Warning: Cleanup command status: {result['Status']}[/yellow]"
|
|
1437
|
-
)
|
|
1438
|
-
elif result["Status"] == "InProgress":
|
|
1439
|
-
console.print(
|
|
1440
|
-
"[dim]ℹ️ Cleanup command still in progress (expected when SSM agent stops itself)[/dim]"
|
|
1441
|
-
)
|
|
1442
|
-
|
|
1443
|
-
# Get instance details to find volumes to exclude
|
|
1444
|
-
instances = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
|
|
1445
|
-
instance = instances["Reservations"][0]["Instances"][0]
|
|
1446
|
-
|
|
1447
|
-
root_device = instance.get("RootDeviceName", "/dev/xvda")
|
|
1448
|
-
block_mappings = instance.get("BlockDeviceMappings", [])
|
|
1449
|
-
|
|
1450
|
-
# Build exclusion list for non-root volumes
|
|
1451
|
-
block_device_mappings = []
|
|
1452
|
-
for mapping in block_mappings:
|
|
1453
|
-
device_name = mapping.get("DeviceName", "")
|
|
1454
|
-
if device_name != root_device:
|
|
1455
|
-
block_device_mappings.append(
|
|
1456
|
-
{"DeviceName": device_name, "NoDevice": ""}
|
|
1457
|
-
)
|
|
1458
|
-
console.print(f" Excluding volume at {device_name}")
|
|
1459
|
-
|
|
1460
|
-
# --- Check & detach attached studios --------------------------------------------------
|
|
1461
|
-
# If any user studios are still attached we must detach them before the instance reboots
|
|
1462
|
-
# for snapshot consistency; otherwise Studio-Manager metadata becomes stale.
|
|
1463
|
-
|
|
1464
|
-
attached_resp = make_api_request(
|
|
1465
|
-
"GET", f"/engines/{engine['instance_id']}/studios"
|
|
1466
|
-
)
|
|
1467
|
-
attached_studios = (
|
|
1468
|
-
attached_resp.json().get("studios", [])
|
|
1469
|
-
if attached_resp.status_code == 200
|
|
1470
|
-
else []
|
|
1471
|
-
)
|
|
1472
|
-
|
|
1473
|
-
if attached_studios:
|
|
1474
|
-
console.print(
|
|
1475
|
-
f"Detaching {len(attached_studios)} studio(s) from this engine…"
|
|
1476
|
-
)
|
|
1477
|
-
for s in attached_studios:
|
|
1478
|
-
console.print(f" • {s['user']} ({s['studio_id']})")
|
|
1479
|
-
|
|
1480
|
-
detach_failed = []
|
|
1481
|
-
for s in attached_studios:
|
|
1482
|
-
resp = make_api_request("POST", f"/studios/{s['studio_id']}/detach")
|
|
1483
|
-
if resp.status_code != 200:
|
|
1484
|
-
# Check if actually detached despite error
|
|
1485
|
-
time.sleep(2)
|
|
1486
|
-
check_resp = make_api_request("GET", f"/studios/{s['studio_id']}")
|
|
1487
|
-
if check_resp.status_code == 200:
|
|
1488
|
-
studio_data = check_resp.json()
|
|
1489
|
-
if studio_data.get("status") == "available":
|
|
1490
|
-
console.print(
|
|
1491
|
-
f" [yellow]⚠ {s['studio_id']} reported error but is detached[/yellow]"
|
|
1492
|
-
)
|
|
1493
|
-
continue # It's actually detached, continue
|
|
1494
|
-
|
|
1495
|
-
console.print(
|
|
1496
|
-
f" [red]❌ Failed to detach {s['studio_id']}[/red]"
|
|
1497
|
-
)
|
|
1498
|
-
detach_failed.append(s['studio_id'])
|
|
1499
|
-
|
|
1500
|
-
if detach_failed:
|
|
1501
|
-
console.print(
|
|
1502
|
-
f"[red]Failed to detach {len(detach_failed)} studio(s). Aborting AMI creation.[/red]"
|
|
1503
|
-
)
|
|
1504
|
-
return
|
|
1505
|
-
|
|
1506
|
-
# Wait briefly for volumes to become available (max 2 min)
|
|
1507
|
-
# (time is already imported at module level)
|
|
1508
|
-
ec2_wait = boto3.client("ec2", region_name="us-east-1")
|
|
1509
|
-
vol_ids = [s["studio_id"] for s in attached_studios]
|
|
1510
|
-
console.print("Waiting for volumes to detach…")
|
|
1511
|
-
|
|
1512
|
-
# Check volume states directly instead of using waiter
|
|
1513
|
-
for attempt in range(24): # Max 2 minutes
|
|
1514
|
-
time.sleep(5)
|
|
1515
|
-
volumes = ec2_wait.describe_volumes(VolumeIds=vol_ids)["Volumes"]
|
|
1516
|
-
all_available = all(v["State"] == "available" for v in volumes)
|
|
1517
|
-
if all_available:
|
|
1518
|
-
console.print("[green]✓ All studios detached[/green]")
|
|
1519
|
-
break
|
|
1520
|
-
else:
|
|
1521
|
-
console.print(
|
|
1522
|
-
"[yellow]⚠ Some volumes may still be detaching, but proceeding...[/yellow]"
|
|
1523
|
-
)
|
|
1524
|
-
|
|
1441
|
+
# Acknowledge that the SSM command might be in progress as the agent shuts down
|
|
1442
|
+
console.print("[dim]ℹ️ Cleanup command sent (status may show 'InProgress' as SSM agent stops)[/dim]")
|
|
1443
|
+
|
|
1525
1444
|
# Create the AMI
|
|
1526
1445
|
with Progress(
|
|
1527
1446
|
SpinnerColumn(),
|
|
1528
1447
|
TextColumn("[progress.description]{task.description}"),
|
|
1529
1448
|
transient=True,
|
|
1530
1449
|
) as progress:
|
|
1531
|
-
progress.add_task(
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
"NoReboot": False, # Important: reboot for clean snapshot
|
|
1540
|
-
"TagSpecifications": [
|
|
1450
|
+
task = progress.add_task("Creating AMI (this will take several minutes)...", total=None)
|
|
1451
|
+
|
|
1452
|
+
response = ec2.create_image(
|
|
1453
|
+
InstanceId=engine["instance_id"],
|
|
1454
|
+
Name=ami_name,
|
|
1455
|
+
Description=description,
|
|
1456
|
+
NoReboot=False,
|
|
1457
|
+
TagSpecifications=[
|
|
1541
1458
|
{
|
|
1542
1459
|
"ResourceType": "image",
|
|
1543
1460
|
"Tags": [
|
|
1544
1461
|
{"Key": "Environment", "Value": "dev"},
|
|
1545
1462
|
{"Key": "Type", "Value": "golden-ami"},
|
|
1546
|
-
{"Key": "EngineType", "Value": engine_type},
|
|
1463
|
+
{"Key": "EngineType", "Value": engine['engine_type']},
|
|
1547
1464
|
{"Key": "Name", "Value": ami_name},
|
|
1548
1465
|
],
|
|
1549
1466
|
}
|
|
1550
1467
|
],
|
|
1551
|
-
}
|
|
1552
|
-
|
|
1553
|
-
if block_device_mappings:
|
|
1554
|
-
create_params["BlockDeviceMappings"] = block_device_mappings
|
|
1555
|
-
|
|
1556
|
-
response = ec2.create_image(**create_params)
|
|
1557
|
-
|
|
1558
|
-
ami_id = response["ImageId"]
|
|
1559
|
-
console.print(f"[green]✓ AMI creation initiated![/green]")
|
|
1560
|
-
console.print(f"AMI ID: [cyan]{ami_id}[/cyan]")
|
|
1561
|
-
|
|
1562
|
-
# Restore the source engine to a normal state
|
|
1563
|
-
console.print("Restoring source engine state...")
|
|
1564
|
-
|
|
1565
|
-
# Wait for instance to come back after reboot (AMI creation reboots by default)
|
|
1566
|
-
console.print("[dim]Waiting for engine to reboot after snapshot...[/dim]")
|
|
1567
|
-
ec2_waiter = ec2.get_waiter('instance_status_ok')
|
|
1568
|
-
try:
|
|
1569
|
-
ec2_waiter.wait(
|
|
1570
|
-
InstanceIds=[engine["instance_id"]],
|
|
1571
|
-
WaiterConfig={'Delay': 10, 'MaxAttempts': 30} # Wait up to 5 minutes
|
|
1572
|
-
)
|
|
1573
|
-
except Exception as e:
|
|
1574
|
-
console.print(f"[yellow]⚠️ Warning: Engine may still be rebooting: {e}[/yellow]")
|
|
1575
|
-
|
|
1576
|
-
# Now restore the sentinel and restart services
|
|
1577
|
-
restore_response = ssm.send_command(
|
|
1578
|
-
InstanceIds=[engine["instance_id"]],
|
|
1579
|
-
DocumentName="AWS-RunShellScript",
|
|
1580
|
-
Parameters={
|
|
1581
|
-
"commands": [
|
|
1582
|
-
# Ensure the directories exist
|
|
1583
|
-
"sudo mkdir -p /opt/dayhoff /opt/dayhoff/state",
|
|
1584
|
-
# Recreate the sentinel file
|
|
1585
|
-
"sudo touch /opt/dayhoff/first_boot_complete.sentinel",
|
|
1586
|
-
# Mark bootstrap as finished
|
|
1587
|
-
"echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
|
|
1588
|
-
# Restart idle detector if it exists
|
|
1589
|
-
"sudo systemctl restart engine-idle-detector.timer 2>/dev/null || true",
|
|
1590
|
-
# Ensure SSM agent is running
|
|
1591
|
-
"sudo systemctl start amazon-ssm-agent 2>/dev/null || true",
|
|
1592
|
-
],
|
|
1593
|
-
"executionTimeout": ["60"],
|
|
1594
|
-
},
|
|
1595
1468
|
)
|
|
1596
1469
|
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
for _ in range(10):
|
|
1600
|
-
time.sleep(2)
|
|
1601
|
-
result = ssm.get_command_invocation(
|
|
1602
|
-
CommandId=restore_command_id,
|
|
1603
|
-
InstanceId=engine["instance_id"],
|
|
1604
|
-
)
|
|
1605
|
-
if result["Status"] in ["Success", "Failed"]:
|
|
1606
|
-
break
|
|
1607
|
-
|
|
1608
|
-
if result["Status"] == "Success":
|
|
1609
|
-
console.print(
|
|
1610
|
-
"[green]✓ Source engine restored to normal operation.[/green]"
|
|
1611
|
-
)
|
|
1612
|
-
else:
|
|
1613
|
-
console.print(
|
|
1614
|
-
"[yellow]⚠️ Warning: Engine state restoration incomplete. You may need to run:[/yellow]"
|
|
1615
|
-
)
|
|
1616
|
-
console.print(
|
|
1617
|
-
f"[dim] dh engine repair {engine['name']}[/dim]"
|
|
1618
|
-
)
|
|
1470
|
+
ami_id = response["ImageId"]
|
|
1471
|
+
progress.update(task, completed=True, description=f"[green]✓ AMI creation initiated![/green]")
|
|
1619
1472
|
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
f"terraform/environments/dev to use the new AMI."
|
|
1629
|
-
)
|
|
1473
|
+
console.print(f" [bold]AMI ID:[/] {ami_id}")
|
|
1474
|
+
console.print("\nThe AMI creation process will continue in the background.")
|
|
1475
|
+
console.print("You can monitor progress in the EC2 Console under 'AMIs'.")
|
|
1476
|
+
console.print(
|
|
1477
|
+
"\nOnce complete, update the AMI ID in [bold]terraform/environments/dev/variables.tf[/bold] "
|
|
1478
|
+
"and run [bold]terraform apply[/bold]."
|
|
1479
|
+
)
|
|
1480
|
+
console.print(f"\nRemember to [bold red]terminate the source engine '{engine['name']}'[/bold red] to save costs.")
|
|
1630
1481
|
|
|
1631
1482
|
except ClientError as e:
|
|
1632
1483
|
console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
|
|
1633
1484
|
raise typer.Exit(1)
|
|
1634
1485
|
|
|
1635
1486
|
|
|
1487
|
+
|
|
1636
1488
|
# ==================== STUDIO COMMANDS ====================
|
|
1637
1489
|
|
|
1638
1490
|
|
|
@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
|
|
|
3
3
|
dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
|
|
4
4
|
dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
|
|
6
|
-
dayhoff_tools/cli/engine_commands.py,sha256=
|
|
6
|
+
dayhoff_tools/cli/engine_commands.py,sha256=Pqh-x2dycEuAE-Ts762KWMdXFZXhClbMmSWTEjsb45o,92065
|
|
7
7
|
dayhoff_tools/cli/main.py,sha256=tRN7WCBHg6uyNp6rA54pKTCoVmBntta2i0Yas3bUpZ4,4853
|
|
8
8
|
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
|
9
9
|
dayhoff_tools/cli/utility_commands.py,sha256=FRZTPrjsG_qmIIqoNxd1Q1vVkS_5w8aY33IrVYVNCLg,18131
|
|
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
|
27
27
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
|
28
28
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
|
29
29
|
dayhoff_tools/warehouse.py,sha256=heaYc64qplgN3_1WVPFmqj53goStioWwY5NqlWc4c0s,24453
|
|
30
|
-
dayhoff_tools-1.6.
|
|
31
|
-
dayhoff_tools-1.6.
|
|
32
|
-
dayhoff_tools-1.6.
|
|
33
|
-
dayhoff_tools-1.6.
|
|
30
|
+
dayhoff_tools-1.6.2.dist-info/METADATA,sha256=wIKq0EVcXH3E0fz12fWus4_LYESTopdNP-5aT2rjkVk,2914
|
|
31
|
+
dayhoff_tools-1.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
32
|
+
dayhoff_tools-1.6.2.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
|
33
|
+
dayhoff_tools-1.6.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|