dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dayhoff_tools/__init__.py +10 -0
  2. dayhoff_tools/cli/cloud_commands.py +179 -43
  3. dayhoff_tools/cli/engine1/__init__.py +323 -0
  4. dayhoff_tools/cli/engine1/engine_core.py +703 -0
  5. dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
  6. dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
  7. dayhoff_tools/cli/engine1/engine_management.py +505 -0
  8. dayhoff_tools/cli/engine1/shared.py +501 -0
  9. dayhoff_tools/cli/engine1/studio_commands.py +825 -0
  10. dayhoff_tools/cli/engines_studios/__init__.py +6 -0
  11. dayhoff_tools/cli/engines_studios/api_client.py +351 -0
  12. dayhoff_tools/cli/engines_studios/auth.py +144 -0
  13. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
  14. dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
  15. dayhoff_tools/cli/engines_studios/progress.py +260 -0
  16. dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
  17. dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
  18. dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
  19. dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
  20. dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
  21. dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
  22. dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
  23. dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
  25. dayhoff_tools/cli/main.py +106 -7
  26. dayhoff_tools/cli/utility_commands.py +896 -179
  27. dayhoff_tools/deployment/base.py +70 -6
  28. dayhoff_tools/deployment/deploy_aws.py +165 -25
  29. dayhoff_tools/deployment/deploy_gcp.py +78 -5
  30. dayhoff_tools/deployment/deploy_utils.py +20 -7
  31. dayhoff_tools/deployment/job_runner.py +9 -4
  32. dayhoff_tools/deployment/processors.py +230 -418
  33. dayhoff_tools/deployment/swarm.py +47 -12
  34. dayhoff_tools/embedders.py +28 -26
  35. dayhoff_tools/fasta.py +181 -64
  36. dayhoff_tools/warehouse.py +268 -1
  37. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
  38. dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
  39. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
  40. dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
  41. {dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,6 @@
1
+ """Engine and Studio CLI commands."""
2
+
3
+ from .engine_commands import engine_cli
4
+ from .studio_commands import studio_cli
5
+
6
+ __all__ = ["engine_cli", "studio_cli"]
@@ -0,0 +1,351 @@
1
+ """API client for Studio Manager API."""
2
+
3
+ import os
4
+ from typing import Any, Dict, Optional
5
+
6
+ import boto3
7
+ import click
8
+ import requests
9
+ from botocore.exceptions import ClientError, NoCredentialsError, TokenRetrievalError
10
+
11
+
12
+ class StudioManagerClient:
13
+ """Client for Studio Manager API v2."""
14
+
15
+ def __init__(self, api_url: Optional[str] = None, environment: str = "dev"):
16
+ """Initialize client.
17
+
18
+ Args:
19
+ api_url: Optional API URL (fetched from SSM if not provided)
20
+ environment: Environment name (dev, sand, prod)
21
+
22
+ Raises:
23
+ click.ClickException: If authentication fails or API URL cannot be fetched
24
+ """
25
+ self.api_url = api_url
26
+ self.environment = environment
27
+
28
+ if not self.api_url:
29
+ # Fetch from SSM Parameter Store
30
+ param_name = f"/{environment}/studio-manager/api-url"
31
+ try:
32
+ ssm = boto3.client("ssm")
33
+ param = ssm.get_parameter(Name=param_name)
34
+ self.api_url = param["Parameter"]["Value"]
35
+ except NoCredentialsError:
36
+ raise click.ClickException(
37
+ f"✗ Not authenticated to AWS\n\n"
38
+ f"Cannot fetch API URL from {param_name}\n\n"
39
+ f"Please authenticate:\n"
40
+ f" dh aws login --profile <profile-name>"
41
+ )
42
+ except TokenRetrievalError as e:
43
+ # SSO token retrieval errors - most common case for expired SSO sessions
44
+ error_msg = str(e)
45
+ if "Token has expired" in error_msg and "refresh failed" in error_msg:
46
+ raise click.ClickException(
47
+ f"✗ AWS SSO token has expired\n\n"
48
+ f"Cannot fetch API URL from {param_name}\n\n"
49
+ f"Please refresh your AWS SSO session:\n"
50
+ f" dh aws login --profile <profile-name>"
51
+ )
52
+ # Other token retrieval errors
53
+ raise click.ClickException(
54
+ f"✗ AWS SSO token error\n\n"
55
+ f"Cannot fetch API URL from {param_name}\n\n"
56
+ f"Error: {error_msg}\n\n"
57
+ f"Please refresh your AWS SSO session:\n"
58
+ f" dh aws login --profile <profile-name>"
59
+ )
60
+ except ClientError as e:
61
+ error_code = e.response.get("Error", {}).get("Code", "")
62
+ error_msg = str(e)
63
+
64
+ # SSO token error - check this first as it's more specific than general "expired"
65
+ # This is the specific case from user's terminal
66
+ if "Token has expired" in error_msg and "refresh failed" in error_msg:
67
+ raise click.ClickException(
68
+ f"✗ AWS SSO token has expired\n\n"
69
+ f"Cannot fetch API URL from {param_name}\n\n"
70
+ f"Please refresh your AWS SSO session:\n"
71
+ f" dh aws login --profile <profile-name>"
72
+ )
73
+
74
+ # Auth/token errors (generic)
75
+ if "ExpiredToken" in error_code or "expired" in error_msg.lower():
76
+ raise click.ClickException(
77
+ f"✗ AWS credentials have expired\n\n"
78
+ f"Cannot fetch API URL from {param_name}\n\n"
79
+ f"Please refresh your credentials:\n"
80
+ f" dh aws login --profile <profile-name>"
81
+ )
82
+
83
+ # Parameter not found
84
+ if error_code == "ParameterNotFound":
85
+ raise click.ClickException(
86
+ f"✗ API URL parameter not found: {param_name}\n\n"
87
+ f"This usually means the infrastructure is not deployed in the '{environment}' environment.\n\n"
88
+ f"Try:\n"
89
+ f" • Check if the environment name is correct (--env {environment})\n"
90
+ f" • Verify the infrastructure is deployed\n"
91
+ f" • Contact your admin if you're unsure"
92
+ )
93
+
94
+ # Generic error
95
+ raise click.ClickException(
96
+ f"✗ Could not fetch API URL from {param_name}\n\n"
97
+ f"Error: {error_msg}\n\n"
98
+ f"Set STUDIO_MANAGER_API_URL environment variable to bypass SSM lookup"
99
+ )
100
+ except Exception as e:
101
+ raise click.ClickException(
102
+ f"✗ Unexpected error fetching API URL from {param_name}\n\n"
103
+ f"Error: {e}\n\n"
104
+ f"Set STUDIO_MANAGER_API_URL environment variable to bypass SSM lookup"
105
+ )
106
+
107
+ def _request(self, method: str, path: str, **kwargs) -> Dict[str, Any]:
108
+ """Make HTTP request to API.
109
+
110
+ Args:
111
+ method: HTTP method
112
+ path: API path
113
+ **kwargs: Additional arguments for requests
114
+
115
+ Returns:
116
+ Response JSON
117
+
118
+ Raises:
119
+ RuntimeError: If request fails with error message from API
120
+ """
121
+ url = f"{self.api_url}{path}"
122
+ response = requests.request(method, url, **kwargs)
123
+
124
+ # Parse error body if request failed
125
+ if not response.ok:
126
+ try:
127
+ error_body = response.json()
128
+ error_message = error_body.get("error", response.text)
129
+ except Exception:
130
+ error_message = response.text or f"HTTP {response.status_code}"
131
+
132
+ # Raise exception with the actual error message from API
133
+ raise RuntimeError(error_message)
134
+
135
+ return response.json()
136
+
137
+ # Engine operations
138
+ def list_engines(self) -> Dict[str, Any]:
139
+ """List all engines."""
140
+ return self._request("GET", "/engines")
141
+
142
+ def get_engine_readiness(self, engine_id: str) -> Dict[str, Any]:
143
+ """Get engine readiness status with progress."""
144
+ return self._request("GET", f"/engines/{engine_id}/readiness")
145
+
146
+ def get_engine_status(self, engine_id: str) -> Dict[str, Any]:
147
+ """Get comprehensive engine status including idle state."""
148
+ return self._request("GET", f"/engines/{engine_id}")
149
+
150
+ def launch_engine(
151
+ self,
152
+ name: str,
153
+ user: str,
154
+ engine_type: str,
155
+ boot_disk_size: Optional[int] = None,
156
+ ) -> Dict[str, Any]:
157
+ """Launch a new engine."""
158
+ payload = {"name": name, "user": user, "engine_type": engine_type}
159
+ if boot_disk_size:
160
+ payload["boot_disk_size"] = boot_disk_size
161
+ return self._request("POST", "/engines", json=payload)
162
+
163
+ def terminate_engine(self, engine_id: str) -> Dict[str, Any]:
164
+ """Terminate an engine."""
165
+ return self._request("DELETE", f"/engines/{engine_id}")
166
+
167
+ def start_engine(self, engine_id: str) -> Dict[str, Any]:
168
+ """Start a stopped engine."""
169
+ return self._request("POST", f"/engines/{engine_id}/start")
170
+
171
+ def stop_engine(self, engine_id: str) -> Dict[str, Any]:
172
+ """Stop a running engine."""
173
+ return self._request("POST", f"/engines/{engine_id}/stop")
174
+
175
+ def resize_engine(
176
+ self, engine_id: str, size_gb: int, online: bool = False
177
+ ) -> Dict[str, Any]:
178
+ """Resize engine boot disk."""
179
+ return self._request(
180
+ "POST",
181
+ f"/engines/{engine_id}/resize",
182
+ json={"size_gb": size_gb, "online": online},
183
+ )
184
+
185
+ def set_coffee(self, engine_id: str, duration: str) -> Dict[str, Any]:
186
+ """Set coffee lock (keep-alive) for engine."""
187
+ return self._request(
188
+ "POST", f"/engines/{engine_id}/coffee", json={"duration": duration}
189
+ )
190
+
191
+ def cancel_coffee(self, engine_id: str) -> Dict[str, Any]:
192
+ """Cancel coffee lock for engine."""
193
+ return self._request("DELETE", f"/engines/{engine_id}/coffee")
194
+
195
+ def update_idle_settings(
196
+ self, engine_id: str, timeout: Optional[str] = None, slack: Optional[str] = None
197
+ ) -> Dict[str, Any]:
198
+ """Update idle detector settings."""
199
+ payload = {}
200
+ if timeout:
201
+ payload["timeout"] = timeout
202
+ if slack:
203
+ payload["slack"] = slack
204
+ return self._request(
205
+ "PATCH", f"/engines/{engine_id}/idle-settings", json=payload
206
+ )
207
+
208
+ # Studio operations
209
+ def list_studios(self) -> Dict[str, Any]:
210
+ """List all studios."""
211
+ return self._request("GET", "/studios")
212
+
213
+ def get_studio(self, studio_id: str) -> Dict[str, Any]:
214
+ """Get studio information."""
215
+ return self._request("GET", f"/studios/{studio_id}")
216
+
217
+ def create_studio(self, user: str, size_gb: int = 100) -> Dict[str, Any]:
218
+ """Create a new studio."""
219
+ return self._request(
220
+ "POST", "/studios", json={"user": user, "size_gb": size_gb}
221
+ )
222
+
223
+ def delete_studio(self, studio_id: str) -> Dict[str, Any]:
224
+ """Delete a studio."""
225
+ return self._request("DELETE", f"/studios/{studio_id}")
226
+
227
+ def resize_studio(self, studio_id: str, size_gb: int) -> Dict[str, Any]:
228
+ """Resize a studio volume."""
229
+ return self._request(
230
+ "POST", f"/studios/{studio_id}/resize", json={"size_gb": size_gb}
231
+ )
232
+
233
+ def reset_studio(self, studio_id: str) -> Dict[str, Any]:
234
+ """Reset a stuck studio to available status."""
235
+ return self._request("POST", f"/studios/{studio_id}/reset")
236
+
237
+ # Attachment operations
238
+ def attach_studio(
239
+ self, studio_id: str, engine_id: str, user: str
240
+ ) -> Dict[str, Any]:
241
+ """Initiate studio attachment."""
242
+ return self._request(
243
+ "POST",
244
+ f"/studios/{studio_id}/attach",
245
+ json={"engine_id": engine_id, "user": user},
246
+ )
247
+
248
+ def detach_studio(self, studio_id: str) -> Dict[str, Any]:
249
+ """Detach a studio."""
250
+ return self._request("POST", f"/studios/{studio_id}/detach")
251
+
252
+ def get_attachment_progress(self, operation_id: str) -> Dict[str, Any]:
253
+ """Get attachment operation progress."""
254
+ return self._request("GET", f"/operations/{operation_id}")
255
+
256
+ # Helper methods
257
+ def check_instance_status(self, instance_id: str) -> Dict[str, Any]:
258
+ """Check EC2 instance status including status checks.
259
+
260
+ Args:
261
+ instance_id: EC2 instance ID
262
+
263
+ Returns:
264
+ Dict with:
265
+ - state: Instance state (pending, running, etc.)
266
+ - instance_status: Instance status check (initializing, ok, impaired)
267
+ - system_status: System status check (initializing, ok, impaired)
268
+ - reachable: True if both status checks passed
269
+ """
270
+ ec2 = boto3.client("ec2")
271
+
272
+ try:
273
+ # Get instance state
274
+ instances_resp = ec2.describe_instances(InstanceIds=[instance_id])
275
+ if not instances_resp["Reservations"]:
276
+ return {"error": "Instance not found"}
277
+
278
+ instance = instances_resp["Reservations"][0]["Instances"][0]
279
+ state = instance["State"]["Name"]
280
+
281
+ # Get status checks (only available when running)
282
+ if state != "running":
283
+ return {
284
+ "state": state,
285
+ "instance_status": None,
286
+ "system_status": None,
287
+ "reachable": False,
288
+ }
289
+
290
+ # Fetch instance status checks
291
+ status_resp = ec2.describe_instance_status(
292
+ InstanceIds=[instance_id],
293
+ IncludeAllInstances=False, # Only get running instances
294
+ )
295
+
296
+ if not status_resp["InstanceStatuses"]:
297
+ # No status yet - still initializing
298
+ return {
299
+ "state": state,
300
+ "instance_status": "initializing",
301
+ "system_status": "initializing",
302
+ "reachable": False,
303
+ }
304
+
305
+ status = status_resp["InstanceStatuses"][0]
306
+ instance_status = status["InstanceStatus"]["Status"]
307
+ system_status = status["SystemStatus"]["Status"]
308
+
309
+ return {
310
+ "state": state,
311
+ "instance_status": instance_status,
312
+ "system_status": system_status,
313
+ "reachable": instance_status == "ok" and system_status == "ok",
314
+ }
315
+
316
+ except ClientError as e:
317
+ return {"error": str(e)}
318
+
319
+ def get_engine_by_name(self, name: str) -> Optional[Dict[str, Any]]:
320
+ """Find engine by name.
321
+
322
+ Args:
323
+ name: Engine name
324
+
325
+ Returns:
326
+ Engine dict or None if not found
327
+ """
328
+ engines = self.list_engines().get("engines", [])
329
+ for engine in engines:
330
+ if engine["name"] == name:
331
+ return engine
332
+ return None
333
+
334
+ def get_my_studio(self) -> Optional[Dict[str, Any]]:
335
+ """Get current user's studio.
336
+
337
+ Returns:
338
+ Studio dict or None if not found
339
+
340
+ Raises:
341
+ RuntimeError: If not authenticated to AWS
342
+ """
343
+ from .auth import get_aws_username
344
+
345
+ user = get_aws_username()
346
+
347
+ studios = self.list_studios().get("studios", [])
348
+ for studio in studios:
349
+ if studio["user"] == user:
350
+ return studio
351
+ return None
@@ -0,0 +1,144 @@
1
+ """AWS authentication and identity helpers."""
2
+
3
+ import boto3
4
+ import click
5
+ from botocore.exceptions import ClientError, NoCredentialsError, TokenRetrievalError
6
+
7
+
8
+ def detect_aws_environment() -> str:
9
+ """Detect environment (dev/sand/prod) from AWS account ID.
10
+
11
+ Returns:
12
+ Environment name: "dev", "sand", or "prod"
13
+
14
+ Raises:
15
+ click.ClickException: If account cannot be detected or is not recognized
16
+ """
17
+ try:
18
+ sts = boto3.client("sts")
19
+ identity = sts.get_caller_identity()
20
+ account_id = identity["Account"]
21
+
22
+ # Map account IDs to environments (from aws_config)
23
+ account_to_env = {
24
+ "074735440724": "dev",
25
+ "006207983460": "sand",
26
+ "011117009798": "prod",
27
+ }
28
+
29
+ env = account_to_env.get(account_id)
30
+ if not env:
31
+ raise click.ClickException(
32
+ f"✗ Unknown AWS account: {account_id}\n\n"
33
+ f"This account is not recognized as dev, sand, or prod.\n"
34
+ f"Please specify --env explicitly."
35
+ )
36
+
37
+ return env
38
+
39
+ except (NoCredentialsError, ClientError, TokenRetrievalError) as e:
40
+ raise click.ClickException(
41
+ "✗ Could not detect AWS environment\n\n"
42
+ "Please authenticate first or specify --env explicitly:\n"
43
+ " dh aws login --profile <profile-name>"
44
+ ) from e
45
+
46
+
47
+ def check_aws_auth() -> None:
48
+ """Check AWS authentication status and provide clear error if not authenticated.
49
+
50
+ This function proactively checks AWS credentials before any AWS API calls
51
+ to provide clear, actionable error messages.
52
+
53
+ Raises:
54
+ click.ClickException: If not authenticated to AWS with instructions to fix
55
+ """
56
+ try:
57
+ sts = boto3.client("sts")
58
+ sts.get_caller_identity()
59
+ except NoCredentialsError:
60
+ raise click.ClickException(
61
+ "✗ Not authenticated to AWS\n\n"
62
+ "Please authenticate using one of these methods:\n"
63
+ " • dh aws login --profile <profile-name>\n"
64
+ " • aws sso login --profile <profile-name>\n"
65
+ " • export AWS_PROFILE=<profile-name> && aws sso login"
66
+ )
67
+ except TokenRetrievalError as e:
68
+ # SSO token retrieval errors - most common case for expired SSO sessions
69
+ error_msg = str(e)
70
+ if "Token has expired" in error_msg and "refresh failed" in error_msg:
71
+ raise click.ClickException(
72
+ "✗ AWS SSO token has expired\n\n"
73
+ "Please refresh your AWS SSO session:\n"
74
+ " dh aws login --profile <profile-name>"
75
+ )
76
+ # Other token retrieval errors
77
+ raise click.ClickException(
78
+ f"✗ AWS SSO token error\n\n"
79
+ f"Error: {error_msg}\n\n"
80
+ f"Please refresh your AWS SSO session:\n"
81
+ f" dh aws login --profile <profile-name>"
82
+ )
83
+ except ClientError as e:
84
+ error_code = e.response.get("Error", {}).get("Code", "")
85
+ error_msg = str(e)
86
+
87
+ # SSO token error - check this first as it's more specific than general "expired"
88
+ # This is the specific case from the user's terminal
89
+ if "Token has expired" in error_msg and "refresh failed" in error_msg:
90
+ raise click.ClickException(
91
+ "✗ AWS SSO token has expired\n\n"
92
+ "Please refresh your AWS SSO session:\n"
93
+ " dh aws login --profile <profile-name>"
94
+ )
95
+
96
+ # Token expired error (generic)
97
+ if "ExpiredToken" in error_code or "expired" in error_msg.lower():
98
+ raise click.ClickException(
99
+ "✗ AWS credentials have expired\n\n"
100
+ "Please refresh your credentials:\n"
101
+ " dh aws login --profile <profile-name>"
102
+ )
103
+
104
+ # Generic auth error
105
+ raise click.ClickException(
106
+ f"✗ AWS authentication error\n\n"
107
+ f"Error: {error_msg}\n\n"
108
+ f"Try refreshing your credentials:\n"
109
+ f" dh aws login --profile <profile-name>"
110
+ )
111
+
112
+
113
+ def get_aws_username() -> str:
114
+ """Get username from AWS STS caller identity.
115
+
116
+ Parses username from the AWS SSO assumed role ARN.
117
+ This works even when running as root in containers where $USER is empty.
118
+
119
+ Returns:
120
+ Username from AWS identity
121
+
122
+ Raises:
123
+ RuntimeError: If not authenticated to AWS
124
+ """
125
+ try:
126
+ sts = boto3.client("sts")
127
+ identity = sts.get_caller_identity()
128
+
129
+ # Parse username from assumed role ARN
130
+ # Format: arn:aws:sts::123456789012:assumed-role/AWSReservedSSO_DeveloperAccess_xxxx/username
131
+ arn = identity["Arn"]
132
+
133
+ if "assumed-role" in arn:
134
+ # SSO auth - username is last component
135
+ username = arn.split("/")[-1]
136
+ return username
137
+ else:
138
+ # Other auth methods - use last part of UserId
139
+ return identity["UserId"].split(":")[-1]
140
+
141
+ except (NoCredentialsError, ClientError, TokenRetrievalError) as e:
142
+ raise RuntimeError(
143
+ "Not authenticated to AWS. " "Run: dh aws login --profile <profile-name>"
144
+ ) from e