dayhoff-tools 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dayhoff-tools might be problematic. Click here for more details.

@@ -1357,260 +1357,134 @@ def create_ami(
1357
1357
  help="Engine name or instance ID to create AMI from"
1358
1358
  ),
1359
1359
  ):
1360
- """Create a Golden AMI from an engine."""
1360
+ """Create a 'Golden AMI' from a running engine.
1361
+
1362
+ This process is for creating a pre-warmed, standardized machine image
1363
+ that can be used to launch new engines more quickly.
1364
+
1365
+ IMPORTANT:
1366
+ - The engine MUST have all studios detached before running this command.
1367
+ - This process will make the source engine unusable. You should
1368
+ plan to TERMINATE the engine after the AMI is created.
1369
+ """
1361
1370
  check_aws_sso()
1362
1371
 
1363
- # Get all engines to resolve name
1364
- response = make_api_request("GET", "/engines")
1372
+ # Get all engines to resolve name and check status
1373
+ # We pass check_ready=True to get attached studio info
1374
+ response = make_api_request("GET", "/engines", params={"check_ready": "true"})
1365
1375
  if response.status_code != 200:
1366
1376
  console.print("[red]❌ Failed to fetch engines[/red]")
1367
1377
  raise typer.Exit(1)
1368
-
1378
+
1369
1379
  engines = response.json().get("engines", [])
1370
1380
  engine = resolve_engine(name_or_id, engines)
1371
1381
 
1372
- console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
1382
+ # --- Pre-flight checks ---
1383
+
1384
+ # 1. Check if engine is running
1385
+ if engine["state"].lower() != "running":
1386
+ console.print(f"[red]❌ Engine '{engine['name']}' is not running.[/red]")
1387
+ console.print("Please start it before creating an AMI.")
1388
+ raise typer.Exit(1)
1373
1389
 
1374
- # Get engine type from the engine
1375
- engine_type = engine["engine_type"]
1390
+ # 2. Check for attached studios from the detailed API response
1391
+ attached_studios = engine.get("studios", [])
1392
+ if attached_studios:
1393
+ console.print(f"[bold red]❌ Engine '{engine['name']}' has studios attached.[/bold red]")
1394
+ console.print("Please detach all studios before creating an AMI:")
1395
+ for studio in attached_studios:
1396
+ console.print(f" - {studio['user']} ({studio['studio_id']})")
1397
+ console.print("\nTo detach, run [bold]dh studio detach[/bold]")
1398
+ raise typer.Exit(1)
1376
1399
 
1377
- # Generate AMI name
1378
- date_str = datetime.now().strftime("%Y%m%d")
1379
- ami_name = f"prewarmed-engine-{engine_type}-{date_str}"
1380
- ami_description = (
1381
- f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled dev container "
1382
- f"image for {engine_type} engines"
1400
+ # Construct AMI name and description
1401
+ ami_name = f"prewarmed-engine-{engine['engine_type']}-{datetime.now().strftime('%Y%m%d')}"
1402
+ description = (
1403
+ f"Amazon Linux 2023 with NVIDIA drivers, Docker, and pre-pulled "
1404
+ f"dev container image for {engine['engine_type']} engines"
1383
1405
  )
1384
1406
 
1385
- console.print(f"AMI Name: [cyan]{ami_name}[/cyan]")
1386
- console.print(f"Description: {ami_description}")
1407
+ console.print(f"Creating AMI from engine [cyan]{engine['name']}[/cyan]...")
1408
+ console.print(f"[bold]AMI Name:[/] {ami_name}")
1409
+ console.print(f"[bold]Description:[/] {description}")
1410
+
1387
1411
  console.print(
1388
- "\n[yellow]⚠️ Important: This will detach all studios and reboot the engine to ensure a clean snapshot.[/yellow]"
1412
+ "\n[bold yellow]⚠️ Important:[/bold yellow]\n"
1413
+ "1. This process will run cleanup scripts on the engine.\n"
1414
+ "2. The source engine should be [bold]terminated[/bold] after the AMI is created.\n"
1389
1415
  )
1390
1416
 
1391
- if not Confirm.ask("\nContinue with AMI creation?"):
1392
- console.print("AMI creation cancelled.")
1393
- return
1417
+ if not Confirm.ask("Continue with AMI creation?"):
1418
+ raise typer.Exit()
1394
1419
 
1395
- # Create AMI using EC2 client
1420
+ # Create AMI using EC2 client directly, as the backend logic is too complex
1396
1421
  ec2 = boto3.client("ec2", region_name="us-east-1")
1422
+ ssm = boto3.client("ssm", region_name="us-east-1")
1397
1423
 
1398
1424
  try:
1399
- # First, we need to clean up the instance before snapshotting
1425
+ # Clean up instance state before snapshotting
1400
1426
  console.print("Cleaning up instance for AMI creation...")
1401
- ssm = boto3.client("ssm", region_name="us-east-1")
1402
-
1403
- # Clean up instance state, stopping SSM agent last to allow proper status reporting
1404
1427
  cleanup_commands = [
1405
1428
  "sudo rm -f /opt/dayhoff/first_boot_complete.sentinel",
1406
1429
  "history -c",
1407
1430
  "sudo rm -rf /tmp/* /var/log/messages /var/log/cloud-init.log",
1408
1431
  "sudo rm -rf /var/lib/amazon/ssm/* /etc/amazon/ssm/*",
1409
- # Stop SSM agent last with a delay to allow status reporting
1410
- "sleep 2 && sudo systemctl stop amazon-ssm-agent &",
1432
+ "sleep 2 && sudo systemctl stop amazon-ssm-agent &", # Stop agent last
1411
1433
  ]
1412
1434
 
1413
1435
  cleanup_response = ssm.send_command(
1414
1436
  InstanceIds=[engine["instance_id"]],
1415
1437
  DocumentName="AWS-RunShellScript",
1416
- Parameters={
1417
- "commands": cleanup_commands,
1418
- "executionTimeout": ["120"],
1419
- },
1420
- )
1421
-
1422
- # Wait for cleanup to complete
1423
- command_id = cleanup_response["Command"]["CommandId"]
1424
- for _ in range(10):
1425
- time.sleep(1)
1426
- result = ssm.get_command_invocation(
1427
- CommandId=command_id,
1428
- InstanceId=engine["instance_id"],
1429
- )
1430
- if result["Status"] in ["Success", "Failed"]:
1431
- break
1432
-
1433
- # Note: InProgress status is expected when SSM agent stops itself
1434
- if result["Status"] not in ["Success", "InProgress"]:
1435
- console.print(
1436
- f"[yellow]⚠️ Warning: Cleanup command status: {result['Status']}[/yellow]"
1437
- )
1438
- elif result["Status"] == "InProgress":
1439
- console.print(
1440
- "[dim]ℹ️ Cleanup command still in progress (expected when SSM agent stops itself)[/dim]"
1441
- )
1442
-
1443
- # Get instance details to find volumes to exclude
1444
- instances = ec2.describe_instances(InstanceIds=[engine["instance_id"]])
1445
- instance = instances["Reservations"][0]["Instances"][0]
1446
-
1447
- root_device = instance.get("RootDeviceName", "/dev/xvda")
1448
- block_mappings = instance.get("BlockDeviceMappings", [])
1449
-
1450
- # Build exclusion list for non-root volumes
1451
- block_device_mappings = []
1452
- for mapping in block_mappings:
1453
- device_name = mapping.get("DeviceName", "")
1454
- if device_name != root_device:
1455
- block_device_mappings.append(
1456
- {"DeviceName": device_name, "NoDevice": ""}
1457
- )
1458
- console.print(f" Excluding volume at {device_name}")
1459
-
1460
- # --- Check & detach attached studios --------------------------------------------------
1461
- # If any user studios are still attached we must detach them before the instance reboots
1462
- # for snapshot consistency; otherwise Studio-Manager metadata becomes stale.
1463
-
1464
- attached_resp = make_api_request(
1465
- "GET", f"/engines/{engine['instance_id']}/studios"
1466
- )
1467
- attached_studios = (
1468
- attached_resp.json().get("studios", [])
1469
- if attached_resp.status_code == 200
1470
- else []
1438
+ Parameters={"commands": cleanup_commands, "executionTimeout": ["120"]},
1471
1439
  )
1472
1440
 
1473
- if attached_studios:
1474
- console.print(
1475
- f"Detaching {len(attached_studios)} studio(s) from this engine…"
1476
- )
1477
- for s in attached_studios:
1478
- console.print(f" • {s['user']} ({s['studio_id']})")
1479
-
1480
- for s in attached_studios:
1481
- resp = make_api_request("POST", f"/studios/{s['studio_id']}/detach")
1482
- if resp.status_code != 200:
1483
- console.print(
1484
- f"[red]❌ Failed to detach {s['studio_id']} – aborting.[/red]"
1485
- )
1486
- return
1487
-
1488
- # Wait briefly for volumes to become available (max 2 min)
1489
- # (time is already imported at module level)
1490
- ec2_wait = boto3.client("ec2", region_name="us-east-1")
1491
- vol_ids = [s["studio_id"] for s in attached_studios]
1492
- console.print("Waiting for volumes to detach…")
1493
- waiter = ec2_wait.get_waiter("volume_available")
1494
- try:
1495
- waiter.wait(
1496
- VolumeIds=vol_ids, WaiterConfig={"Delay": 5, "MaxAttempts": 24}
1497
- )
1498
- except Exception:
1499
- console.print(
1500
- "[yellow]Proceeding even though some volumes may still be detaching.[/yellow]"
1501
- )
1502
-
1441
+ # Acknowledge that the SSM command might be in progress as the agent shuts down
1442
+ console.print("[dim]ℹ️ Cleanup command sent (status may show 'InProgress' as SSM agent stops)[/dim]")
1443
+
1503
1444
  # Create the AMI
1504
1445
  with Progress(
1505
1446
  SpinnerColumn(),
1506
1447
  TextColumn("[progress.description]{task.description}"),
1507
1448
  transient=True,
1508
1449
  ) as progress:
1509
- progress.add_task(
1510
- "Creating AMI (this will take several minutes)...", total=None
1511
- )
1512
-
1513
- create_params = {
1514
- "InstanceId": engine["instance_id"],
1515
- "Name": ami_name,
1516
- "Description": ami_description,
1517
- "NoReboot": False, # Important: reboot for clean snapshot
1518
- "TagSpecifications": [
1450
+ task = progress.add_task("Creating AMI (this will take several minutes)...", total=None)
1451
+
1452
+ response = ec2.create_image(
1453
+ InstanceId=engine["instance_id"],
1454
+ Name=ami_name,
1455
+ Description=description,
1456
+ NoReboot=False,
1457
+ TagSpecifications=[
1519
1458
  {
1520
1459
  "ResourceType": "image",
1521
1460
  "Tags": [
1522
1461
  {"Key": "Environment", "Value": "dev"},
1523
1462
  {"Key": "Type", "Value": "golden-ami"},
1524
- {"Key": "EngineType", "Value": engine_type},
1463
+ {"Key": "EngineType", "Value": engine['engine_type']},
1525
1464
  {"Key": "Name", "Value": ami_name},
1526
1465
  ],
1527
1466
  }
1528
1467
  ],
1529
- }
1530
-
1531
- if block_device_mappings:
1532
- create_params["BlockDeviceMappings"] = block_device_mappings
1533
-
1534
- response = ec2.create_image(**create_params)
1535
-
1536
- ami_id = response["ImageId"]
1537
- console.print(f"[green]✓ AMI creation initiated![/green]")
1538
- console.print(f"AMI ID: [cyan]{ami_id}[/cyan]")
1539
-
1540
- # Restore the source engine to a normal state
1541
- console.print("Restoring source engine state...")
1542
-
1543
- # Wait for instance to come back after reboot (AMI creation reboots by default)
1544
- console.print("[dim]Waiting for engine to reboot after snapshot...[/dim]")
1545
- ec2_waiter = ec2.get_waiter('instance_status_ok')
1546
- try:
1547
- ec2_waiter.wait(
1548
- InstanceIds=[engine["instance_id"]],
1549
- WaiterConfig={'Delay': 10, 'MaxAttempts': 30} # Wait up to 5 minutes
1550
- )
1551
- except Exception as e:
1552
- console.print(f"[yellow]⚠️ Warning: Engine may still be rebooting: {e}[/yellow]")
1553
-
1554
- # Now restore the sentinel and restart services
1555
- restore_response = ssm.send_command(
1556
- InstanceIds=[engine["instance_id"]],
1557
- DocumentName="AWS-RunShellScript",
1558
- Parameters={
1559
- "commands": [
1560
- # Ensure the directories exist
1561
- "sudo mkdir -p /opt/dayhoff /opt/dayhoff/state",
1562
- # Recreate the sentinel file
1563
- "sudo touch /opt/dayhoff/first_boot_complete.sentinel",
1564
- # Mark bootstrap as finished
1565
- "echo 'finished' | sudo tee /opt/dayhoff/state/engine-init.stage > /dev/null",
1566
- # Restart idle detector if it exists
1567
- "sudo systemctl restart engine-idle-detector.timer 2>/dev/null || true",
1568
- # Ensure SSM agent is running
1569
- "sudo systemctl start amazon-ssm-agent 2>/dev/null || true",
1570
- ],
1571
- "executionTimeout": ["60"],
1572
- },
1573
1468
  )
1574
1469
 
1575
- # Wait for restore command to complete
1576
- restore_command_id = restore_response["Command"]["CommandId"]
1577
- for _ in range(10):
1578
- time.sleep(2)
1579
- result = ssm.get_command_invocation(
1580
- CommandId=restore_command_id,
1581
- InstanceId=engine["instance_id"],
1582
- )
1583
- if result["Status"] in ["Success", "Failed"]:
1584
- break
1585
-
1586
- if result["Status"] == "Success":
1587
- console.print(
1588
- "[green]✓ Source engine restored to normal operation.[/green]"
1589
- )
1590
- else:
1591
- console.print(
1592
- "[yellow]⚠️ Warning: Engine state restoration incomplete. You may need to run:[/yellow]"
1593
- )
1594
- console.print(
1595
- f"[dim] dh engine repair {engine['name']}[/dim]"
1596
- )
1470
+ ami_id = response["ImageId"]
1471
+ progress.update(task, completed=True, description=f"[green]✓ AMI creation initiated![/green]")
1597
1472
 
1598
- console.print(
1599
- "\n[dim]The AMI creation process will continue in the background.[/dim]"
1600
- )
1601
- console.print(
1602
- "[dim]You can monitor progress in the EC2 Console under 'AMIs'.[/dim]"
1603
- )
1604
- console.print(
1605
- f"\nOnce complete, run [cyan]terraform apply[/cyan] in "
1606
- f"terraform/environments/dev to use the new AMI."
1607
- )
1473
+ console.print(f" [bold]AMI ID:[/] {ami_id}")
1474
+ console.print("\nThe AMI creation process will continue in the background.")
1475
+ console.print("You can monitor progress in the EC2 Console under 'AMIs'.")
1476
+ console.print(
1477
+ "\nOnce complete, update the AMI ID in [bold]terraform/environments/dev/variables.tf[/bold] "
1478
+ "and run [bold]terraform apply[/bold]."
1479
+ )
1480
+ console.print(f"\nRemember to [bold red]terminate the source engine '{engine['name']}'[/bold red] to save costs.")
1608
1481
 
1609
1482
  except ClientError as e:
1610
1483
  console.print(f"[red]❌ Failed to create AMI: {e}[/red]")
1611
1484
  raise typer.Exit(1)
1612
1485
 
1613
1486
 
1487
+
1614
1488
  # ==================== STUDIO COMMANDS ====================
1615
1489
 
1616
1490
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.6.0
3
+ Version: 1.6.2
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
3
3
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
4
4
  dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
6
- dayhoff_tools/cli/engine_commands.py,sha256=RSbZK2hN3bTBQrZxAgooqMsqSiw0hnhmzTVkL44xdD4,97147
6
+ dayhoff_tools/cli/engine_commands.py,sha256=Pqh-x2dycEuAE-Ts762KWMdXFZXhClbMmSWTEjsb45o,92065
7
7
  dayhoff_tools/cli/main.py,sha256=tRN7WCBHg6uyNp6rA54pKTCoVmBntta2i0Yas3bUpZ4,4853
8
8
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
9
9
  dayhoff_tools/cli/utility_commands.py,sha256=FRZTPrjsG_qmIIqoNxd1Q1vVkS_5w8aY33IrVYVNCLg,18131
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
27
27
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
28
28
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
29
29
  dayhoff_tools/warehouse.py,sha256=heaYc64qplgN3_1WVPFmqj53goStioWwY5NqlWc4c0s,24453
30
- dayhoff_tools-1.6.0.dist-info/METADATA,sha256=aXQi1sUTKyyRPmViY6r7pWBnTaTDV2v8krFQK1wLvNY,2914
31
- dayhoff_tools-1.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
- dayhoff_tools-1.6.0.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
- dayhoff_tools-1.6.0.dist-info/RECORD,,
30
+ dayhoff_tools-1.6.2.dist-info/METADATA,sha256=wIKq0EVcXH3E0fz12fWus4_LYESTopdNP-5aT2rjkVk,2914
31
+ dayhoff_tools-1.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ dayhoff_tools-1.6.2.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
+ dayhoff_tools-1.6.2.dist-info/RECORD,,