dayhoff-tools 1.1.33__py3-none-any.whl → 1.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -286,15 +286,15 @@ def create_or_update_job_definition(
286
286
  raise ValueError("docker.container_entrypoint is required in configuration")
287
287
 
288
288
  # Create linux parameters with devices
289
- linux_params: dict[str, Any] = {
290
- "devices": [
289
+ linux_params: dict[str, Any] = {}
290
+ if compute_specs.get("gpus", 0) > 0:
291
+ linux_params["devices"] = [
291
292
  {
292
293
  "hostPath": "/dev/nvidia0",
293
294
  "containerPath": "/dev/nvidia0",
294
295
  "permissions": ["READ", "WRITE"],
295
296
  },
296
- ],
297
- }
297
+ ]
298
298
 
299
299
  # Add shared memory configuration if specified in docker config
300
300
  if "shared_memory" in config.get("docker", {}):
@@ -318,6 +318,32 @@ def create_or_update_job_definition(
318
318
  linux_params["sharedMemorySize"] = shared_memory_mib
319
319
  print(f"Setting shared memory size to {shared_memory_mib} MiB")
320
320
 
321
+ # Prepare containerProperties
322
+ container_properties = {
323
+ "image": image_uri,
324
+ "vcpus": compute_specs["vcpus"],
325
+ "memory": compute_specs["memory"],
326
+ "resourceRequirements": gpu_requirements,
327
+ "executionRoleArn": aws_config["execution_role_arn"],
328
+ "jobRoleArn": aws_config["job_role_arn"],
329
+ "privileged": compute_specs.get("gpus", 0) > 0,
330
+ "command": entrypoint_command,
331
+ }
332
+
333
+ if linux_params:
334
+ container_properties["linuxParameters"] = linux_params
335
+
336
+ # Add volumes and mount points if defined in AWS batch_job config
337
+ batch_job_config = aws_config.get("batch_job", {})
338
+ if "volumes" in batch_job_config:
339
+ container_properties["volumes"] = batch_job_config["volumes"]
340
+ print(f"Adding volumes to job definition: {batch_job_config['volumes']}")
341
+ if "mountPoints" in batch_job_config:
342
+ container_properties["mountPoints"] = batch_job_config["mountPoints"]
343
+ print(
344
+ f"Adding mount points to job definition: {batch_job_config['mountPoints']}"
345
+ )
346
+
321
347
  # Check if job definition already exists using the session client
322
348
  try:
323
349
  existing = batch.describe_job_definitions(
@@ -330,31 +356,20 @@ def create_or_update_job_definition(
330
356
  print(f"\nCreating new job definition: {job_def_name}")
331
357
 
332
358
  except batch.exceptions.ClientError as e:
333
- # Handle case where the error is specifically 'JobDefinitionNotFoundException'
334
- # Boto3 typically includes error codes in the response
335
- if (
336
- e.response.get("Error", {}).get("Code") == "ClientError"
337
- ): # Simple check, might need refinement
359
+ if e.response.get("Error", {}).get(
360
+ "Code"
361
+ ) == "ClientError" and "JobDefinitionNotFoundException" in str(
362
+ e
363
+ ): # More specific check for not found
338
364
  print(f"\nCreating new job definition: {job_def_name}")
339
365
  else:
340
- # Re-raise unexpected client errors
341
366
  raise
342
367
 
343
- # Prepare job definition properties
368
+ # Prepare job definition arguments
344
369
  job_definition_args = {
345
370
  "jobDefinitionName": job_def_name,
346
371
  "type": "container",
347
- "containerProperties": {
348
- "image": image_uri,
349
- "vcpus": compute_specs["vcpus"],
350
- "memory": compute_specs["memory"],
351
- "resourceRequirements": gpu_requirements,
352
- "executionRoleArn": aws_config["execution_role_arn"],
353
- "jobRoleArn": aws_config["job_role_arn"],
354
- "privileged": compute_specs.get("gpus", 0) > 0,
355
- "command": entrypoint_command,
356
- **({"linuxParameters": linux_params} if linux_params else {}),
357
- },
372
+ "containerProperties": container_properties,
358
373
  "platformCapabilities": ["EC2"],
359
374
  "timeout": {"attemptDurationSeconds": aws_config.get("timeout_seconds", 86400)},
360
375
  }
@@ -403,9 +418,30 @@ def submit_aws_batch_job(
403
418
  print(f"- Job Role: {aws_config['job_role_arn']}")
404
419
 
405
420
  # Get all environment variables, including special ones like WANDB_API_KEY and GCP credentials
406
- env_vars = get_container_env_vars(config)
421
+ env_vars_map = get_container_env_vars(config) # This returns a dict
422
+
423
+ # If EFS is configured for InterProScan, override INTERPROSCAN_INSTALL_DIR
424
+ # Check based on the conventional volume name used in interp_bulk.yaml
425
+ efs_interproscan_mount_path = None
426
+ aws_batch_job_config = aws_config.get("batch_job", {})
427
+ if "mountPoints" in aws_batch_job_config:
428
+ for mp in aws_batch_job_config["mountPoints"]:
429
+ if (
430
+ mp.get("sourceVolume") == "interproscan-efs-volume"
431
+ ): # Convention from YAML
432
+ efs_interproscan_mount_path = mp.get("containerPath")
433
+ break
434
+
435
+ if efs_interproscan_mount_path:
436
+ env_vars_map["INTERPROSCAN_INSTALL_DIR"] = efs_interproscan_mount_path
437
+ print(
438
+ f"INTERPROSCAN_INSTALL_DIR overridden to EFS mount path: {efs_interproscan_mount_path}"
439
+ )
407
440
 
408
- print("Environment Variables:", list(env_vars.keys()))
441
+ print(
442
+ "Environment Variables (after potential EFS override):",
443
+ list(env_vars_map.keys()),
444
+ )
409
445
 
410
446
  # Create/Update Job Definition using the config (now implicitly uses the correct session)
411
447
  job_definition = create_or_update_job_definition(image_uri, config)
@@ -418,7 +454,8 @@ def submit_aws_batch_job(
418
454
  "jobDefinition": job_definition,
419
455
  "containerOverrides": {
420
456
  "environment": [
421
- {"name": key, "value": str(value)} for key, value in env_vars.items()
457
+ {"name": key, "value": str(value)}
458
+ for key, value in env_vars_map.items()
422
459
  ],
423
460
  },
424
461
  }
@@ -50,9 +50,55 @@ def create_batch_job_config(config: dict, image_uri: str) -> dict:
50
50
 
51
51
  Returns:
52
52
  Dictionary containing GCP Batch job configuration
53
+
54
+ Raises:
55
+ ValueError: If the configuration contains unexpected keys.
53
56
  """
54
57
  gcp_config = config["gcp"]
55
58
 
59
+ # Validate top-level gcp_config keys used for Batch job JSON construction
60
+ EXPECTED_GCP_CONFIG_KEYS = {
61
+ "allocation_policy", # Goes into batch_config.allocationPolicy
62
+ "logs_policy", # Goes into batch_config.logsPolicy
63
+ "batch_job", # Contains detailed task and resource specs
64
+ # Keys like job_name, region, registry_uri, repository are used by other functions
65
+ # or for other purposes, not directly for constructing the core batch_config JSON here.
66
+ }
67
+ actual_gcp_keys = set(gcp_config.keys())
68
+ # Filter out keys not relevant to this function's direct Batch config construction
69
+ # These keys are used by the calling context or other parts of the deployment.
70
+ keys_to_ignore_for_this_check = {"job_name", "region", "registry_uri", "repository"}
71
+ relevant_gcp_keys = {
72
+ key for key in actual_gcp_keys if key not in keys_to_ignore_for_this_check
73
+ }
74
+
75
+ unhandled_gcp_keys = relevant_gcp_keys - EXPECTED_GCP_CONFIG_KEYS
76
+ if unhandled_gcp_keys:
77
+ raise ValueError(
78
+ f"Unexpected keys in 'gcp' configuration section: {unhandled_gcp_keys}. "
79
+ f"Expected keys for Batch job JSON construction are: {EXPECTED_GCP_CONFIG_KEYS}"
80
+ )
81
+
82
+ # Validate keys within gcp_config["batch_job"]
83
+ if "batch_job" not in gcp_config:
84
+ raise ValueError("Missing 'batch_job' section in 'gcp' configuration.")
85
+
86
+ gcp_batch_job_config = gcp_config["batch_job"]
87
+ EXPECTED_GCP_BATCH_JOB_KEYS = {
88
+ "taskCount",
89
+ "parallelism",
90
+ "computeResource",
91
+ "instance", # Contains machineType, accelerators
92
+ "volumes",
93
+ }
94
+ actual_batch_job_keys = set(gcp_batch_job_config.keys())
95
+ unhandled_batch_job_keys = actual_batch_job_keys - EXPECTED_GCP_BATCH_JOB_KEYS
96
+ if unhandled_batch_job_keys:
97
+ raise ValueError(
98
+ f"Unexpected keys in 'gcp.batch_job' configuration section: {unhandled_batch_job_keys}. "
99
+ f"Expected keys are: {EXPECTED_GCP_BATCH_JOB_KEYS}"
100
+ )
101
+
56
102
  # Start with the allocation and logs policies
57
103
  batch_config = {
58
104
  "allocationPolicy": gcp_config["allocation_policy"],
@@ -92,10 +138,10 @@ def create_batch_job_config(config: dict, image_uri: str) -> dict:
92
138
 
93
139
  # Build the task group configuration
94
140
  task_group = {
95
- "taskCount": gcp_config["batch_job"]["taskCount"],
96
- "parallelism": gcp_config["batch_job"]["parallelism"],
141
+ "taskCount": gcp_batch_job_config["taskCount"],
142
+ "parallelism": gcp_batch_job_config["parallelism"],
97
143
  "taskSpec": {
98
- "computeResource": gcp_config["batch_job"]["computeResource"],
144
+ "computeResource": gcp_batch_job_config["computeResource"],
99
145
  "runnables": [{"container": container_config}],
100
146
  },
101
147
  }
@@ -107,8 +153,12 @@ def create_batch_job_config(config: dict, image_uri: str) -> dict:
107
153
  if env_vars:
108
154
  task_group["taskSpec"]["runnables"][0]["environment"] = {"variables": env_vars}
109
155
 
156
+ # Add volumes to the taskSpec if specified in the config
157
+ if "volumes" in gcp_batch_job_config and gcp_batch_job_config["volumes"]:
158
+ task_group["taskSpec"]["volumes"] = gcp_batch_job_config["volumes"]
159
+
110
160
  # Add machine type and optional accelerators from instance config
111
- instance_config = gcp_config["batch_job"]["instance"]
161
+ instance_config = gcp_batch_job_config["instance"]
112
162
  if "machineType" in instance_config:
113
163
  # Add machine type to the allocation policy
114
164
  if "policy" not in batch_config["allocationPolicy"]["instances"]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.33
3
+ Version: 1.1.35
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -7,8 +7,8 @@ dayhoff_tools/cli/main.py,sha256=47EGb28ALaYFc7oAUGlY1D66AIDmc4RZiXxN-gPVrpQ,451
7
7
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
8
8
  dayhoff_tools/cli/utility_commands.py,sha256=ER4VrJt4hu904MwrcltUXjwBWT4uFrP-aPXjdXyT3F8,24685
9
9
  dayhoff_tools/deployment/base.py,sha256=8tXwsPYvRo-zV-aNhHw1c7Rji-KWg8S5xoCCznFnVVI,17412
10
- dayhoff_tools/deployment/deploy_aws.py,sha256=O0gQxHioSU_sNU8T8MD4wSOPvWc--V8eRRZzlRu035I,16446
11
- dayhoff_tools/deployment/deploy_gcp.py,sha256=s3nt-VsF0NrEA-J9Ire8G9zsFgtp8A6dHWT7IYQL5uQ,6529
10
+ dayhoff_tools/deployment/deploy_aws.py,sha256=jQyQ0fbm2793jEHFO84lr5tNqiOpdBg6U0S5zCVJr1M,17884
11
+ dayhoff_tools/deployment/deploy_gcp.py,sha256=jiEE_tBVeSavAI8o_6qPDPpaoXKexcaNIa4uXcv3y0M,8839
12
12
  dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
13
13
  dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
14
14
  dayhoff_tools/deployment/processors.py,sha256=q2io07xO6f6twEH1iLz9GFoGaKh76qC4kcv519Q2G7g,34583
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
26
26
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
27
27
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
28
28
  dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
29
- dayhoff_tools-1.1.33.dist-info/METADATA,sha256=8v8Kj6e0GdUHhtDx3l1huf7t1YwcnA0vlbbaKWwpBAw,2843
30
- dayhoff_tools-1.1.33.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
- dayhoff_tools-1.1.33.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
- dayhoff_tools-1.1.33.dist-info/RECORD,,
29
+ dayhoff_tools-1.1.35.dist-info/METADATA,sha256=lula1chZ1oJh4kiICRO_J1Lh4nhG49THMge3MBuM428,2843
30
+ dayhoff_tools-1.1.35.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
+ dayhoff_tools-1.1.35.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
+ dayhoff_tools-1.1.35.dist-info/RECORD,,