dayhoff-tools 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dayhoff-tools might be problematic. Click here for more details.

@@ -1357,282 +1357,134 @@ def create_ami(
1357
1357
  help="Engine name or instance ID to create AMI from"
1358
1358
  ),
1359
1359
  ):
1360
- """Create a Golden AMI from an engine."""
1360
+ """Create a 'Golden AMI' from a running engine.
1361
+
1362
+ This process is for creating a pre-warmed, standardized machine image
1363
+ that can be used to launch new engines more quickly.
1364
+
1365
+ IMPORTANT:
1366
+ - The engine MUST have all studios detached before running this command.
1367
+ - This process will make the source engine unusable. You should
1368
+ plan to TERMINATE the engine after the AMI is created.
1369
+ """
1361
1370
  check_aws_sso()
1362
1371
 
1363
- # Get all engines to resolve name
1364
- response = make_api_request("GET", "/engines")
1372
+ # Get all engines to resolve name and check status
1373
+ # We pass check_ready=True to get attached studio info
1374
+ response = make_api_request("GET", "/engines", params={"check_ready": "true"})
1365
1375
  if response.status_code != 200:
1366
1376
  console.print("[red]❌ Failed to fetch engines[/red]")
1367
1377
  raise typer.Exit(1)
1368
-
1378
+
1369
1379
  engines = response.json().get("engines", [])
1370
1380
  engine = resolve_engine(name_or_id, engines)
1371
1381
 
1372
- console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
1382
+ # --- Pre-flight checks ---
1383
+
1384
+ # 1. Check if engine is running
1385
+ if engine["state"].lower() != "running":
1386
+ console.print(f"[red]❌ Engine '{engine['name']}' is not running.[/red]")
1387
+ console.print("Please start it before creating an AMI.")
1388
+ raise typer.Exit(1)
1373
1389
 
1374
- # Get engine type from the engine
1375
- engine_type = engine["engine_type"]
1390
+ # 2. Check for attached studios from the detailed API response
1391
+ attached_studios = engine.get("studios", [])
1392
+ if attached_studios:
1393
+ console.print(f"[bold red]❌ Engine '{engine['name']}' has studios attached.[/bold red]")
1394
+ console.print("Please detach all studios before creating an AMI:")
1395
+ for studio in attached_studios:
1396
+ console.print(f" - {studio['user']} ({studio['studio_id']})")
1397
+ console.print("\nTo detach, run [bold]dh studio detach[/bold]")
1398
+ raise typer.Exit(1)
1376
1399
 
1377
- # Generate AMI name
1378
- date_str = datetime.now().strftime("%Y%m%d")
1379
- ami_name = f"prewarmed-engine-{engine_type}-{date_str}"
1380
- ami_description = (
1381
- f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled dev container "
1382
- f"image for {engine_type} engines"
1400
+ # Construct AMI name and description
1401
+ ami_name = f"prewarmed-engine-{engine['engine_type']}-{datetime.now().strftime('%Y%m%d')}"
1402
+ description = (
1403
+ f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled "
1404
+ f"dev container image for {engine['engine_type']} engines"
1383
1405
  )
1384
1406
 
1385
- console.print(f"AMI Name: [cyan]{ami_name}[/cyan]")
1386
- console.print(f"Description: {ami_description}")
1407
+ console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
1408
+ console.print(f"[bold]AMI Name:[/] {ami_name}")
1409
+ console.print(f"[bold]Description:[/] {description}")
1410
+
1387
1411
  console.print(
1388
- "\n[yellow]⚠️ Important: This will detach all studios and reboot the engine to ensure a clean snapshot.[/yellow]"
1412
+ "\n[bold yellow]⚠️ Important:[/bold yellow]\n"
1413
+ "1. This process will run cleanup scripts on the engine.\n"
1414
+ "2. The source engine should be [bold]terminated[/bold] after the AMI is created.\n"
1389
1415
  )
1390
1416
 
1391
- if not Confirm.ask("\nContinue with AMI creation?"):
1392
- console.print("AMI creation cancelled.")
1393
- return
1417
+ if not Confirm.ask("Continue with AMI creation?"):
1418
+ raise typer.Exit()
1394
1419
 
1395
- # Create AMI using EC2 client
1420
+ # Create AMI using EC2 client directly, as the backend logic is too complex
1396
1421
  ec2 = boto3.client("ec2", region_name="us-east-1")
1422
+ ssm = boto3.client("ssm", region_name="us-east-1")
1397
1423
 
1398
1424
  try:
1399
- # First, we need to clean up the instance before snapshotting
1425
+ # Clean up instance state before snapshotting
1400
1426
  console.print("Cleaning up instance for AMI creation...")
1401
- ssm = boto3.client("ssm", region_name="us-east-1")
1402
-
1403
- # Clean up instance state, stopping SSM agent last to allow proper status reporting
1404
1427
  cleanup_commands = [
1405
1428
  "sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
1406
1429
  "history -c",
1407
1430
  "sudo rm -rf /tmp/* /var/log/messages /var/log/cloud-init.log",
1408
1431
  "sudo rm -rf /var/lib/amazon/ssm/* /etc/amazon/ssm/*",
1409
- # Stop SSM agent last with a delay to allow status reporting
1410
- "sleep 2 && sudo systemctl stop amazon-ssm-agent &",
1432
+ "sleep 2 && sudo systemctl stop amazon-ssm-agent &", # Stop agent last
1411
1433
  ]
1412
1434
 
1413
1435
  cleanup_response = ssm.send_command(
1414
1436
  InstanceIds=[engine["instance_id"]],
1415
1437
  DocumentName="AWS-RunShellScript",
1416
- Parameters={
1417
- "commands": cleanup_commands,
1418
- "executionTimeout": ["120"],
1419
- },
1438
+ Parameters={"commands": cleanup_commands, "executionTimeout": ["120"]},
1420
1439
  )
1421
1440
 
1422
- # Wait for cleanup to complete
1423
- command_id = cleanup_response["Command"]["CommandId"]
1424
- for _ in range(10):
1425
- time.sleep(1)
1426
- result = ssm.get_command_invocation(
1427
- CommandId=command_id,
1428
- InstanceId=engine["instance_id"],
1429
- )
1430
- if result["Status"] in ["Success", "Failed"]:
1431
- break
1432
-
1433
- # Note: InProgress status is expected when SSM agent stops itself
1434
- if result["Status"] not in ["Success", "InProgress"]:
1435
- console.print(
1436
- f"[yellow]⚠️ Warning: Cleanup command status: {result['Status']}[/yellow]"
1437
- )
1438
- elif result["Status"] == "InProgress":
1439
- console.print(
1440
- "[dim]ℹ️ Cleanup command still in progress (expected when SSM agent stops itself)[/dim]"
1441
- )
1442
-
1443
- # Get instance details to find volumes to exclude
1444
- instances = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
1445
- instance = instances["Reservations"][0]["Instances"][0]
1446
-
1447
- root_device = instance.get("RootDeviceName", "/dev/xvda")
1448
- block_mappings = instance.get("BlockDeviceMappings", [])
1449
-
1450
- # Build exclusion list for non-root volumes
1451
- block_device_mappings = []
1452
- for mapping in block_mappings:
1453
- device_name = mapping.get("DeviceName", "")
1454
- if device_name != root_device:
1455
- block_device_mappings.append(
1456
- {"DeviceName": device_name, "NoDevice": ""}
1457
- )
1458
- console.print(f" Excluding volume at {device_name}")
1459
-
1460
- # --- Check & detach attached studios --------------------------------------------------
1461
- # If any user studios are still attached we must detach them before the instance reboots
1462
- # for snapshot consistency; otherwise Studio-Manager metadata becomes stale.
1463
-
1464
- attached_resp = make_api_request(
1465
- "GET", f"/engines/{engine['instance_id']}/studios"
1466
- )
1467
- attached_studios = (
1468
- attached_resp.json().get("studios", [])
1469
- if attached_resp.status_code == 200
1470
- else []
1471
- )
1472
-
1473
- if attached_studios:
1474
- console.print(
1475
- f"Detaching {len(attached_studios)} studio(s) from this engine…"
1476
- )
1477
- for s in attached_studios:
1478
- console.print(f" • {s['user']} ({s['studio_id']})")
1479
-
1480
- detach_failed = []
1481
- for s in attached_studios:
1482
- resp = make_api_request("POST", f"/studios/{s['studio_id']}/detach")
1483
- if resp.status_code != 200:
1484
- # Check if actually detached despite error
1485
- time.sleep(2)
1486
- check_resp = make_api_request("GET", f"/studios/{s['studio_id']}")
1487
- if check_resp.status_code == 200:
1488
- studio_data = check_resp.json()
1489
- if studio_data.get("status") == "available":
1490
- console.print(
1491
- f" [yellow]⚠ {s['studio_id']} reported error but is detached[/yellow]"
1492
- )
1493
- continue # It's actually detached, continue
1494
-
1495
- console.print(
1496
- f" [red]❌ Failed to detach {s['studio_id']}[/red]"
1497
- )
1498
- detach_failed.append(s['studio_id'])
1499
-
1500
- if detach_failed:
1501
- console.print(
1502
- f"[red]Failed to detach {len(detach_failed)} studio(s). Aborting AMI creation.[/red]"
1503
- )
1504
- return
1505
-
1506
- # Wait briefly for volumes to become available (max 2 min)
1507
- # (time is already imported at module level)
1508
- ec2_wait = boto3.client("ec2", region_name="us-east-1")
1509
- vol_ids = [s["studio_id"] for s in attached_studios]
1510
- console.print("Waiting for volumes to detach…")
1511
-
1512
- # Check volume states directly instead of using waiter
1513
- for attempt in range(24): # Max 2 minutes
1514
- time.sleep(5)
1515
- volumes = ec2_wait.describe_volumes(VolumeIds=vol_ids)["Volumes"]
1516
- all_available = all(v["State"] == "available" for v in volumes)
1517
- if all_available:
1518
- console.print("[green]✓ All studios detached[/green]")
1519
- break
1520
- else:
1521
- console.print(
1522
- "[yellow]⚠ Some volumes may still be detaching, but proceeding...[/yellow]"
1523
- )
1524
-
1441
+ # Acknowledge that the SSM command might be in progress as the agent shuts down
1442
+ console.print("[dim]ℹ️ Cleanup command sent (status may show 'InProgress' as SSM agent stops)[/dim]")
1443
+
1525
1444
  # Create the AMI
1526
1445
  with Progress(
1527
1446
  SpinnerColumn(),
1528
1447
  TextColumn("[progress.description]{task.description}"),
1529
1448
  transient=True,
1530
1449
  ) as progress:
1531
- progress.add_task(
1532
- "Creating AMI (this will take several minutes)...", total=None
1533
- )
1534
-
1535
- create_params = {
1536
- "InstanceId": engine["instance_id"],
1537
- "Name": ami_name,
1538
- "Description": ami_description,
1539
- "NoReboot": False, # Important: reboot for clean snapshot
1540
- "TagSpecifications": [
1450
+ task = progress.add_task("Creating AMI (this will take several minutes)...", total=None)
1451
+
1452
+ response = ec2.create_image(
1453
+ InstanceId=engine["instance_id"],
1454
+ Name=ami_name,
1455
+ Description=description,
1456
+ NoReboot=False,
1457
+ TagSpecifications=[
1541
1458
  {
1542
1459
  "ResourceType": "image",
1543
1460
  "Tags": [
1544
1461
  {"Key": "Environment", "Value": "dev"},
1545
1462
  {"Key": "Type", "Value": "golden-ami"},
1546
- {"Key": "EngineType", "Value": engine_type},
1463
+ {"Key": "EngineType", "Value": engine['engine_type']},
1547
1464
  {"Key": "Name", "Value": ami_name},
1548
1465
  ],
1549
1466
  }
1550
1467
  ],
1551
- }
1552
-
1553
- if block_device_mappings:
1554
- create_params["BlockDeviceMappings"] = block_device_mappings
1555
-
1556
- response = ec2.create_image(**create_params)
1557
-
1558
- ami_id = response["ImageId"]
1559
- console.print(f"[green]✓ AMI creation initiated![/green]")
1560
- console.print(f"AMI ID: [cyan]{ami_id}[/cyan]")
1561
-
1562
- # Restore the source engine to a normal state
1563
- console.print("Restoring source engine state...")
1564
-
1565
- # Wait for instance to come back after reboot (AMI creation reboots by default)
1566
- console.print("[dim]Waiting for engine to reboot after snapshot...[/dim]")
1567
- ec2_waiter = ec2.get_waiter('instance_status_ok')
1568
- try:
1569
- ec2_waiter.wait(
1570
- InstanceIds=[engine["instance_id"]],
1571
- WaiterConfig={'Delay': 10, 'MaxAttempts': 30} # Wait up to 5 minutes
1572
- )
1573
- except Exception as e:
1574
- console.print(f"[yellow]⚠️ Warning: Engine may still be rebooting: {e}[/yellow]")
1575
-
1576
- # Now restore the sentinel and restart services
1577
- restore_response = ssm.send_command(
1578
- InstanceIds=[engine["instance_id"]],
1579
- DocumentName="AWS-RunShellScript",
1580
- Parameters={
1581
- "commands": [
1582
- # Ensure the directories exist
1583
- "sudo mkdir -p /opt/dayhoff /opt/dayhoff/state",
1584
- # Recreate the sentinel file
1585
- "sudo touch /opt/dayhoff/first_boot_complete.sentinel",
1586
- # Mark bootstrap as finished
1587
- "echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
1588
- # Restart idle detector if it exists
1589
- "sudo systemctl restart engine-idle-detector.timer 2>/dev/null || true",
1590
- # Ensure SSM agent is running
1591
- "sudo systemctl start amazon-ssm-agent 2>/dev/null || true",
1592
- ],
1593
- "executionTimeout": ["60"],
1594
- },
1595
1468
  )
1596
1469
 
1597
- # Wait for restore command to complete
1598
- restore_command_id = restore_response["Command"]["CommandId"]
1599
- for _ in range(10):
1600
- time.sleep(2)
1601
- result = ssm.get_command_invocation(
1602
- CommandId=restore_command_id,
1603
- InstanceId=engine["instance_id"],
1604
- )
1605
- if result["Status"] in ["Success", "Failed"]:
1606
- break
1607
-
1608
- if result["Status"] == "Success":
1609
- console.print(
1610
- "[green]✓ Source engine restored to normal operation.[/green]"
1611
- )
1612
- else:
1613
- console.print(
1614
- "[yellow]⚠️ Warning: Engine state restoration incomplete. You may need to run:[/yellow]"
1615
- )
1616
- console.print(
1617
- f"[dim] dh engine repair {engine['name']}[/dim]"
1618
- )
1470
+ ami_id = response["ImageId"]
1471
+ progress.update(task, completed=True, description=f"[green]✓ AMI creation initiated![/green]")
1619
1472
 
1620
- console.print(
1621
- "\n[dim]The AMI creation process will continue in the background.[/dim]"
1622
- )
1623
- console.print(
1624
- "[dim]You can monitor progress in the EC2 Console under 'AMIs'.[/dim]"
1625
- )
1626
- console.print(
1627
- f"\nOnce complete, run [cyan]terraform apply[/cyan] in "
1628
- f"terraform/environments/dev to use the new AMI."
1629
- )
1473
+ console.print(f" [bold]AMI ID:[/] {ami_id}")
1474
+ console.print("\nThe AMI creation process will continue in the background.")
1475
+ console.print("You can monitor progress in the EC2 Console under 'AMIs'.")
1476
+ console.print(
1477
+ "\nOnce complete, update the AMI ID in [bold]terraform/environments/dev/variables.tf[/bold] "
1478
+ "and run [bold]terraform apply[/bold]."
1479
+ )
1480
+ console.print(f"\nRemember to [bold red]terminate the source engine '{engine['name']}'[/bold red] to save costs.")
1630
1481
 
1631
1482
  except ClientError as e:
1632
1483
  console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
1633
1484
  raise typer.Exit(1)
1634
1485
 
1635
1486
 
1487
+
1636
1488
  # ==================== STUDIO COMMANDS ====================
1637
1489
 
1638
1490
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.6.1
3
+ Version: 1.6.2
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
3
3
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
4
4
  dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
6
- dayhoff_tools/cli/engine_commands.py,sha256=iZdl1eyJqwGVVm0vukY8sbnMv_k7CaesdnGBjE3cF0c,98283
6
+ dayhoff_tools/cli/engine_commands.py,sha256=Pqh-x2dycEuAE-Ts762KWMdXFZXhClbMmSWTEjsb45o,92065
7
7
  dayhoff_tools/cli/main.py,sha256=tRN7WCBHg6uyNp6rA54pKTCoVmBntta2i0Yas3bUpZ4,4853
8
8
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
9
9
  dayhoff_tools/cli/utility_commands.py,sha256=FRZTPrjsG_qmIIqoNxd1Q1vVkS_5w8aY33IrVYVNCLg,18131
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
27
27
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
28
28
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
29
29
  dayhoff_tools/warehouse.py,sha256=heaYc64qplgN3_1WVPFmqj53goStioWwY5NqlWc4c0s,24453
30
- dayhoff_tools-1.6.1.dist-info/METADATA,sha256=HL_pHrcjy_JbaTdP8O0mAP0xUzrEDp7VWpcFfuWwzbM,2914
31
- dayhoff_tools-1.6.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
- dayhoff_tools-1.6.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
- dayhoff_tools-1.6.1.dist-info/RECORD,,
30
+ dayhoff_tools-1.6.2.dist-info/METADATA,sha256=wIKq0EVcXH3E0fz12fWus4_LYESTopdNP-5aT2rjkVk,2914
31
+ dayhoff_tools-1.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ dayhoff_tools-1.6.2.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
+ dayhoff_tools-1.6.2.dist-info/RECORD,,