dayhoff-tools 1.3.25__py3-none-any.whl → 1.4.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ from botocore.exceptions import ClientError, NoCredentialsError
15
15
  from rich import box
16
16
  from rich.console import Console
17
17
  from rich.panel import Panel
18
- from rich.progress import Progress, SpinnerColumn, TextColumn
18
+ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
19
19
  from rich.prompt import Confirm, IntPrompt, Prompt
20
20
  from rich.table import Table
21
21
  import re
@@ -382,8 +382,7 @@ def update_ssh_config_entry(engine_name: str, instance_id: str, ssh_user: str):
382
382
  Host {engine_name} {SSH_MANAGED_COMMENT}
383
383
  HostName {instance_id}
384
384
  User {ssh_user}
385
- ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
386
- """
385
+ ProxyCommand sh -c \"AWS_SSM_IDLE_TIMEOUT=600 aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'\"\n"""
387
386
 
388
387
  # Check if entry already exists
389
388
  host_line = f"Host {engine_name} {SSH_MANAGED_COMMENT}"
@@ -592,13 +591,16 @@ def engine_status(
592
591
  engines = response.json().get("engines", [])
593
592
  engine = resolve_engine(name_or_id, engines)
594
593
 
595
- # Get attached studios info
596
- response = make_api_request(
597
- "GET", f"/engines/{engine['instance_id']}/attached-studios"
598
- )
599
- attached_studios = []
600
- if response.status_code == 200:
601
- attached_studios = response.json().get("studios", [])
594
+ # Get detailed engine status including idle detector info
595
+ response = make_api_request("GET", f"/engines/{engine['instance_id']}")
596
+ if response.status_code != 200:
597
+ console.print("[red]❌ Failed to fetch engine details[/red]")
598
+ raise typer.Exit(1)
599
+
600
+ engine_details = response.json()
601
+ engine = engine_details.get("engine", engine) # Use detailed info if available
602
+ idle_detector = engine_details.get("idle_detector", {})
603
+ attached_studios = engine_details.get("attached_studios", [])
602
604
 
603
605
  # Calculate costs
604
606
  launch_time = parse_launch_time(engine["launch_time"])
@@ -614,7 +616,6 @@ def engine_status(
614
616
  f"[bold]Instance:[/bold] {engine['instance_id']}",
615
617
  f"[bold]Type:[/bold] {engine['engine_type']} ({engine['instance_type']})",
616
618
  f"[bold]Status:[/bold] {format_status(engine['state'], engine.get('ready'))}",
617
- f"[bold]Bootstrap:[/bold] {_colour_stage(stage_val)}",
618
619
  f"[bold]User:[/bold] {engine['user']}",
619
620
  f"[bold]IP:[/bold] {engine.get('public_ip', 'N/A')}",
620
621
  f"[bold]Launched:[/bold] {launch_time.strftime('%Y-%m-%d %H:%M:%S')} ({format_duration(uptime)} ago)",
@@ -642,6 +643,37 @@ def engine_status(
642
643
  status_lines.append(f" • Idle Detector: {health.get('idle_detector_timer', 'unknown')}")
643
644
  except Exception:
644
645
  pass
646
+
647
+ # Idle detector status (from new API endpoint)
648
+ if idle_detector.get("available"):
649
+ status_lines.append("")
650
+ status_lines.append("[bold]Idle Detector:[/bold]")
651
+
652
+ # Overall status
653
+ if idle_detector["status"] == "active":
654
+ status_lines.append(" [green]✓ Engine ACTIVE[/green]")
655
+ else:
656
+ idle_seconds = idle_detector.get("idle_seconds", 0)
657
+ idle_threshold = idle_detector.get("idle_threshold", 1800)
658
+ idle_minutes = idle_seconds // 60
659
+ threshold_minutes = idle_threshold // 60
660
+ status_lines.append(f" [yellow]⏱ Engine IDLE ({idle_minutes}/{threshold_minutes} minutes)[/yellow]")
661
+
662
+ # Coffee lock
663
+ if idle_detector.get("coffee_lock"):
664
+ status_lines.append(f" • [cyan]☕ Caffeinated for another {idle_detector['coffee_lock']}[/cyan]")
665
+
666
+ # SSH sessions
667
+ ssh_sessions = idle_detector.get("ssh_sessions", [])
668
+ if ssh_sessions:
669
+ status_lines.append(f" • [blue]SSH Sessions ({len(ssh_sessions)}):[/blue]")
670
+ for session in ssh_sessions:
671
+ status_lines.append(f" - {session['tty']} (pid {session['pid']}, idle {session['idle_time']}) from {session['from_ip']}")
672
+
673
+ # IDE connections
674
+ ide_conn = idle_detector.get("ide_connections")
675
+ if ide_conn:
676
+ status_lines.append(f" • [magenta]🖥 IDE connected ({ide_conn['connection_count']} connections)[/magenta]")
645
677
 
646
678
  if attached_studios:
647
679
  status_lines.append("")
@@ -911,7 +943,7 @@ def config_ssh(
911
943
  f"Host {engine['name']} {SSH_MANAGED_COMMENT}",
912
944
  f" HostName {engine['instance_id']}",
913
945
  f" User {ssh_user}",
914
- f" ProxyCommand sh -c \"aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'\"",
946
+ f" ProxyCommand sh -c \"AWS_SSM_IDLE_TIMEOUT=600 aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'\"",
915
947
  ]
916
948
  )
917
949
 
@@ -978,7 +1010,7 @@ def coffee(
978
1010
  else:
979
1011
  console.print(f"Pouring coffee for [cyan]{engine['name']}[/cyan] for {duration}…")
980
1012
 
981
- # Use SSM to run the engine keep-alive command
1013
+ # Use SSM to run the engine coffee command
982
1014
  ssm = boto3.client("ssm", region_name="us-east-1")
983
1015
  try:
984
1016
  response = ssm.send_command(
@@ -1219,7 +1251,7 @@ def create_ami(
1219
1251
  console.print(f"AMI Name: [cyan]{ami_name}[/cyan]")
1220
1252
  console.print(f"Description: {ami_description}")
1221
1253
  console.print(
1222
- "\n[yellow]⚠️ Important: This will reboot the engine to ensure a clean snapshot.[/yellow]"
1254
+ "\n[yellow]⚠️ Important: This will detach all studios and reboot the engine to ensure a clean snapshot.[/yellow]"
1223
1255
  )
1224
1256
 
1225
1257
  if not Confirm.ask("\nContinue with AMI creation?"):
@@ -1280,15 +1312,42 @@ def create_ami(
1280
1312
  )
1281
1313
  console.print(f" Excluding volume at {device_name}")
1282
1314
 
1315
+ # --- Check & detach attached studios --------------------------------------------------
1316
+ # If any user studios are still attached we must detach them before the instance reboots
1317
+ # for snapshot consistency; otherwise Studio-Manager metadata becomes stale.
1318
+
1319
+ attached_resp = make_api_request("GET", f"/engines/{engine['instance_id']}/studios")
1320
+ attached_studios = attached_resp.json().get("studios", []) if attached_resp.status_code == 200 else []
1321
+
1322
+ if attached_studios:
1323
+ console.print(f"Detaching {len(attached_studios)} studio(s) from this engine…")
1324
+ for s in attached_studios:
1325
+ console.print(f" • {s['user']} ({s['studio_id']})")
1326
+
1327
+ for s in attached_studios:
1328
+ resp = make_api_request("POST", f"/studios/{s['studio_id']}/detach")
1329
+ if resp.status_code != 200:
1330
+ console.print(f"[red]❌ Failed to detach {s['studio_id']} – aborting.[/red]")
1331
+ return
1332
+
1333
+ # Wait briefly for volumes to become available (max 2 min)
1334
+ # (time is already imported at module level)
1335
+ ec2_wait = boto3.client("ec2", region_name="us-east-1")
1336
+ vol_ids = [s['studio_id'] for s in attached_studios]
1337
+ console.print("Waiting for volumes to detach…")
1338
+ waiter = ec2_wait.get_waiter("volume_available")
1339
+ try:
1340
+ waiter.wait(VolumeIds=vol_ids, WaiterConfig={"Delay": 5, "MaxAttempts": 24})
1341
+ except Exception:
1342
+ console.print("[yellow]Proceeding even though some volumes may still be detaching.[/yellow]")
1343
+
1283
1344
  # Create the AMI
1284
1345
  with Progress(
1285
1346
  SpinnerColumn(),
1286
1347
  TextColumn("[progress.description]{task.description}"),
1287
1348
  transient=True,
1288
1349
  ) as progress:
1289
- progress.add_task(
1290
- "Creating AMI (this will take several minutes)...", total=None
1291
- )
1350
+ progress.add_task("Creating AMI (this will take several minutes)...", total=None)
1292
1351
 
1293
1352
  create_params = {
1294
1353
  "InstanceId": engine["instance_id"],
@@ -1559,14 +1618,15 @@ def attach_studio(
1559
1618
  console.print("[red]❌ Failed to start engine[/red]")
1560
1619
  raise typer.Exit(1)
1561
1620
  console.print("[green]✓ Engine started[/green]")
1562
- console.print("Waiting for engine to be ready...")
1563
- import time
1564
-
1565
- time.sleep(10)
1621
+ # No further waiting here – attachment attempts below handle retry logic while the
1622
+ # engine finishes booting.
1566
1623
  else:
1567
1624
  raise typer.Exit(1)
1568
1625
 
1569
- # Get SSH key
1626
+ # Track whether this command just started the engine (affects retry length)
1627
+ engine_started_now = False
1628
+
1629
+ # Retrieve SSH public key (required for authorised_keys provisioning)
1570
1630
  try:
1571
1631
  public_key = get_ssh_public_key()
1572
1632
  except FileNotFoundError as e:
@@ -1577,34 +1637,89 @@ def attach_studio(
1577
1637
 
1578
1638
  with Progress(
1579
1639
  SpinnerColumn(),
1640
+ TimeElapsedColumn(),
1580
1641
  TextColumn("[progress.description]{task.description}"),
1581
1642
  transient=True,
1582
- ) as progress:
1583
- task = progress.add_task("Attaching studio...", total=100)
1643
+ ) as prog:
1644
+ task = prog.add_task("Attaching studio (engine is still booting)…", total=None)
1645
+ ATTEMPT_LIMIT = 40 if engine_started_now else 6 # shorter retries for already-running engines
1646
+ RETRY_DELAY = 10
1647
+ for attempt in range(ATTEMPT_LIMIT):
1648
+ success, error_msg = _attempt_studio_attach(studio, engine, target_user, public_key)
1649
+
1650
+ if success:
1651
+ break # success!
1652
+
1653
+ # Update spinner description with attempt number
1654
+ if attempt % 3 == 0: # update every 3rd attempt to avoid spam
1655
+ prog.update(task, description=f"Attaching studio (engine is still booting)… {attempt+1}/{ATTEMPT_LIMIT}")
1656
+
1657
+ if error_msg:
1658
+ console.print(f"[red]❌ Failed to attach studio: {error_msg}[/red]")
1659
+ return
1584
1660
 
1585
- response = make_api_request(
1586
- "POST",
1587
- f"/studios/{studio['studio_id']}/attach",
1588
- json_data={
1589
- "vm_id": engine["instance_id"],
1590
- "user": target_user, # Use target_user instead of username
1591
- "public_key": public_key,
1592
- },
1593
- )
1661
+ time.sleep(RETRY_DELAY)
1662
+
1663
+ else:
1664
+ console.print("[yellow]Engine is still starting up – please retry in a minute.[/yellow]")
1665
+ return
1666
+
1667
+ # Successful attach path
1668
+ console.print(f"[green]✓ Studio attached successfully![/green]")
1594
1669
 
1595
- progress.update(task, completed=100)
1670
+ # Update SSH config - use target_user for the connection
1671
+ update_ssh_config_entry(engine["name"], engine["instance_id"], target_user)
1672
+ console.print(f"[green]✓ SSH config updated[/green]")
1673
+ console.print(f"\nConnect with: [cyan]ssh {engine['name']}[/cyan]")
1674
+ console.print(f"Files are at: [cyan]/studios/{target_user}[/cyan]")
1675
+
1676
+
1677
+ def _attempt_studio_attach(studio, engine, target_user, public_key):
1678
+ response = make_api_request(
1679
+ "POST",
1680
+ f"/studios/{studio['studio_id']}/attach",
1681
+ json_data={
1682
+ "vm_id": engine["instance_id"],
1683
+ "user": target_user,
1684
+ "public_key": public_key,
1685
+ },
1686
+ )
1596
1687
 
1597
1688
  if response.status_code == 200:
1598
- console.print(f"[green]✓ Studio attached successfully![/green]")
1689
+ return True, None
1599
1690
 
1600
- # Update SSH config - use target_user for the connection
1601
- update_ssh_config_entry(engine["name"], engine["instance_id"], target_user)
1602
- console.print(f"[green]✓ SSH config updated[/green]")
1603
- console.print(f"\nConnect with: [cyan]ssh {engine['name']}[/cyan]")
1604
- console.print(f"Files are at: [cyan]/studios/{target_user}[/cyan]")
1691
+ # --- determine if we should retry ---
1692
+ recoverable = False
1693
+ if response.status_code in (409, 503):
1694
+ recoverable = True
1605
1695
  else:
1696
+ err_msg = response.json().get("error", "").lower()
1697
+ RECOVERABLE_PATTERNS = [
1698
+ "not ready",
1699
+ "still starting",
1700
+ "initializing",
1701
+ "failed to mount",
1702
+ "device busy",
1703
+ "not available",
1704
+ "pending", # VM state pending
1705
+ ]
1706
+ FATAL_PATTERNS = [
1707
+ "in-use",
1708
+ "already attached",
1709
+ "permission",
1710
+ ]
1711
+ if any(p in err_msg for p in FATAL_PATTERNS):
1712
+ recoverable = False
1713
+ elif any(p in err_msg for p in RECOVERABLE_PATTERNS):
1714
+ recoverable = True
1715
+
1716
+ if not recoverable:
1717
+ # fatal – abort immediately and show message
1606
1718
  error = response.json().get("error", "Unknown error")
1607
- console.print(f"[red]❌ Failed to attach studio: {error}[/red]")
1719
+ return False, error
1720
+
1721
+ # otherwise wait and retry
1722
+ return False, None
1608
1723
 
1609
1724
 
1610
1725
  @studio_app.command("detach")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.3.25
3
+ Version: 1.4.10
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -3,7 +3,7 @@ dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf
3
3
  dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
4
4
  dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
6
- dayhoff_tools/cli/engine_commands.py,sha256=XVg03q0MG4DpzMxep3VRbEWZqDU_LHRD3j_xKui4Ync,78722
6
+ dayhoff_tools/cli/engine_commands.py,sha256=ul6hdl3oMLXeKqwG5p9bjZbWJisU_l9siObg1Ls_fKs,84109
7
7
  dayhoff_tools/cli/main.py,sha256=rgeEHD9lJ8SBCR34BTLb7gVInHUUdmEBNXAJnq5yEU4,4795
8
8
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
9
9
  dayhoff_tools/cli/utility_commands.py,sha256=qs8vH9TBFHsOPC3X8cU3qZigM3dDn-2Ytq4o_F2WubU,27874
@@ -27,7 +27,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
27
27
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
28
28
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
29
29
  dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
30
- dayhoff_tools-1.3.25.dist-info/METADATA,sha256=-M4-C9Y0ICSbkfUiaIGTw_Voh9QnrIcYNGmklHYyZUU,2825
31
- dayhoff_tools-1.3.25.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
- dayhoff_tools-1.3.25.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
- dayhoff_tools-1.3.25.dist-info/RECORD,,
30
+ dayhoff_tools-1.4.10.dist-info/METADATA,sha256=Fenvb3Ijq1lKIoXpOaqu2Sb3tS82qQSgHN5iLFd1c9E,2825
31
+ dayhoff_tools-1.4.10.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ dayhoff_tools-1.4.10.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
33
+ dayhoff_tools-1.4.10.dist-info/RECORD,,