dayhoff-tools 1.14.4__py3-none-any.whl → 1.14.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/batch/workers/base.py +9 -13
- dayhoff_tools/cli/batch/commands/boltz.py +62 -24
- dayhoff_tools/cli/batch/commands/finalize.py +3 -2
- {dayhoff_tools-1.14.4.dist-info → dayhoff_tools-1.14.6.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.14.4.dist-info → dayhoff_tools-1.14.6.dist-info}/RECORD +7 -7
- {dayhoff_tools-1.14.4.dist-info → dayhoff_tools-1.14.6.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.14.4.dist-info → dayhoff_tools-1.14.6.dist-info}/entry_points.txt +0 -0
|
@@ -30,12 +30,10 @@ def get_array_index() -> int:
|
|
|
30
30
|
|
|
31
31
|
For array jobs, reads AWS_BATCH_JOB_ARRAY_INDEX.
|
|
32
32
|
For retry jobs, maps from BATCH_RETRY_INDICES.
|
|
33
|
+
For single jobs (array_size=1), defaults to 0.
|
|
33
34
|
|
|
34
35
|
Returns:
|
|
35
36
|
The array index this worker should process
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
RuntimeError: If no array index can be determined
|
|
39
37
|
"""
|
|
40
38
|
# Check for retry mode first
|
|
41
39
|
retry_indices = os.environ.get("BATCH_RETRY_INDICES")
|
|
@@ -49,15 +47,11 @@ def get_array_index() -> int:
|
|
|
49
47
|
)
|
|
50
48
|
return indices[array_idx]
|
|
51
49
|
|
|
52
|
-
# Standard array job mode
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
raise RuntimeError(
|
|
58
|
-
"Could not determine array index. "
|
|
59
|
-
"Set AWS_BATCH_JOB_ARRAY_INDEX or BATCH_RETRY_INDICES environment variable."
|
|
60
|
-
)
|
|
50
|
+
# Standard array job mode - default to 0 for single jobs
|
|
51
|
+
# Note: When array_size=1, AWS Batch runs a single job (not an array),
|
|
52
|
+
# so AWS_BATCH_JOB_ARRAY_INDEX is not set. Default to 0.
|
|
53
|
+
array_idx = os.environ.get("AWS_BATCH_JOB_ARRAY_INDEX", "0")
|
|
54
|
+
return int(array_idx)
|
|
61
55
|
|
|
62
56
|
|
|
63
57
|
def get_job_dir() -> Path:
|
|
@@ -89,7 +83,9 @@ def get_input_file(index: int, job_dir: Path, prefix: str = "chunk") -> Path:
|
|
|
89
83
|
return job_dir / "input" / f"{prefix}_{index:03d}.fasta"
|
|
90
84
|
|
|
91
85
|
|
|
92
|
-
def get_output_file(
|
|
86
|
+
def get_output_file(
|
|
87
|
+
index: int, job_dir: Path, prefix: str = "embed", suffix: str = ".h5"
|
|
88
|
+
) -> Path:
|
|
93
89
|
"""Get the output file path for a given index.
|
|
94
90
|
|
|
95
91
|
Args:
|
|
@@ -273,46 +273,84 @@ def _submit_batch_job(
|
|
|
273
273
|
|
|
274
274
|
|
|
275
275
|
def _run_local_mode(input_path: Path):
|
|
276
|
-
"""Run Boltz locally
|
|
277
|
-
|
|
276
|
+
"""Run Boltz locally in a Docker container.
|
|
277
|
+
|
|
278
|
+
This runs the boltz container with the normal entrypoint, processing
|
|
279
|
+
the first YAML file (index 0) for testing purposes.
|
|
280
|
+
"""
|
|
281
|
+
import subprocess
|
|
282
|
+
|
|
283
|
+
click.echo("Running Boltz locally in container...")
|
|
278
284
|
click.echo(f"Input directory: {input_path}")
|
|
279
285
|
|
|
280
|
-
# Find
|
|
286
|
+
# Find YAML files
|
|
281
287
|
yaml_files = list(input_path.glob("*.yaml"))
|
|
282
288
|
if not yaml_files:
|
|
283
289
|
click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
|
|
284
290
|
raise SystemExit(1)
|
|
285
291
|
|
|
286
|
-
|
|
287
|
-
click.echo(f"Processing: {input_file.name}")
|
|
288
|
-
click.echo()
|
|
292
|
+
click.echo(f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}")
|
|
289
293
|
|
|
290
|
-
|
|
291
|
-
|
|
294
|
+
# Create a temporary job directory structure
|
|
295
|
+
temp_job_dir = input_path / ".local_boltz_job"
|
|
296
|
+
temp_input_dir = temp_job_dir / "input"
|
|
297
|
+
temp_output_dir = temp_job_dir / "output"
|
|
292
298
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
cache_dir=(
|
|
297
|
-
"/primordial/.cache/boltz" if os.path.exists("/primordial") else None
|
|
298
|
-
),
|
|
299
|
-
)
|
|
299
|
+
# Clean up any previous run
|
|
300
|
+
if temp_job_dir.exists():
|
|
301
|
+
shutil.rmtree(temp_job_dir)
|
|
300
302
|
|
|
301
|
-
|
|
303
|
+
temp_input_dir.mkdir(parents=True)
|
|
304
|
+
temp_output_dir.mkdir(parents=True)
|
|
302
305
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
+
# Copy YAML files to input directory
|
|
307
|
+
for yaml_file in yaml_files:
|
|
308
|
+
shutil.copy2(yaml_file, temp_input_dir / yaml_file.name)
|
|
309
|
+
|
|
310
|
+
click.echo(f"Output will be at: {temp_output_dir}/")
|
|
311
|
+
click.echo()
|
|
306
312
|
|
|
307
|
-
|
|
313
|
+
cmd = [
|
|
314
|
+
"docker", "run", "--rm",
|
|
315
|
+
"--gpus", "all",
|
|
316
|
+
"-v", "/primordial:/primordial",
|
|
317
|
+
"-v", f"{temp_job_dir}:{temp_job_dir}",
|
|
318
|
+
"-e", f"JOB_DIR={temp_job_dir}",
|
|
319
|
+
"-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
|
|
320
|
+
"-e", "BOLTZ_CACHE=/primordial/.cache/boltz",
|
|
321
|
+
"-e", "MSA_DIR=/primordial/.cache/msas",
|
|
322
|
+
DEFAULT_IMAGE_URI,
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
click.echo(f"Running: {' '.join(cmd)}")
|
|
326
|
+
click.echo()
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
result = subprocess.run(cmd)
|
|
330
|
+
if result.returncode != 0:
|
|
331
|
+
click.echo(
|
|
332
|
+
click.style(f"Container exited with code {result.returncode}", fg="red"),
|
|
333
|
+
err=True,
|
|
334
|
+
)
|
|
335
|
+
raise SystemExit(result.returncode)
|
|
336
|
+
|
|
337
|
+
# Check for output
|
|
338
|
+
output_dirs = list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
|
|
339
|
+
if output_dirs:
|
|
340
|
+
click.echo()
|
|
341
|
+
click.echo(click.style("✓ Prediction complete!", fg="green"))
|
|
342
|
+
click.echo(f"Output directory: {temp_output_dir}")
|
|
343
|
+
for d in output_dirs:
|
|
344
|
+
click.echo(f" - {d.name}")
|
|
345
|
+
else:
|
|
346
|
+
click.echo(click.style("Warning: No output found", fg="yellow"))
|
|
347
|
+
|
|
348
|
+
except FileNotFoundError:
|
|
308
349
|
click.echo(
|
|
309
|
-
click.style(
|
|
350
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
310
351
|
err=True,
|
|
311
352
|
)
|
|
312
353
|
raise SystemExit(1)
|
|
313
|
-
except Exception as e:
|
|
314
|
-
click.echo(click.style(f"Error: {e}", fg="red"), err=True)
|
|
315
|
-
raise SystemExit(1)
|
|
316
354
|
|
|
317
355
|
|
|
318
356
|
def _run_shell_mode(input_path: Path):
|
|
@@ -136,10 +136,11 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
|
|
|
136
136
|
|
|
137
137
|
# Combine H5 files
|
|
138
138
|
click.echo("Combining H5 files...")
|
|
139
|
+
# Get list of h5 file paths as strings
|
|
140
|
+
h5_file_paths = [str(f) for f in h5_files]
|
|
139
141
|
combine_h5_files(
|
|
140
|
-
|
|
142
|
+
input_files=h5_file_paths,
|
|
141
143
|
output_file=str(output_path),
|
|
142
|
-
glob_pattern="embed_*.h5",
|
|
143
144
|
)
|
|
144
145
|
|
|
145
146
|
# Deduplicate
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
dayhoff_tools/__init__.py,sha256=M5zThPyEBRYa5CfwlzKhcqTevWn3OKu62cjV6Zqie2A,469
|
|
2
2
|
dayhoff_tools/batch/__init__.py,sha256=N7v1pUg3rp68W6J7cX2Gssxdfp57Z7G0WeFJh7gKeiM,163
|
|
3
3
|
dayhoff_tools/batch/workers/__init__.py,sha256=DJTtiBSE5k9J6qwG_MsXDn8hgz_lvEdaMwqKa6YyqXo,369
|
|
4
|
-
dayhoff_tools/batch/workers/base.py,sha256=
|
|
4
|
+
dayhoff_tools/batch/workers/base.py,sha256=Jie5ScJrPSRkmLQdcM2hPJTmk3pIeLL0oA8Y9pkP0HA,4269
|
|
5
5
|
dayhoff_tools/batch/workers/boltz.py,sha256=QTIqMWuV_o3v1tYaZhvjyCl5F65ZlPcc0-TXtitMaFQ,14663
|
|
6
6
|
dayhoff_tools/batch/workers/embed_t5.py,sha256=A5WqsQa1WZ7_la5X5wt0XUP-VwOglH04WyEINhwipeY,2750
|
|
7
7
|
dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf2ElfZDXEpY,11188
|
|
@@ -10,10 +10,10 @@ dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
10
10
|
dayhoff_tools/cli/batch/__init__.py,sha256=gEuiDPWio8ihrIfF_Q6-5hKOnliPTxDKQrKN8-4Y3Ac,2320
|
|
11
11
|
dayhoff_tools/cli/batch/aws_batch.py,sha256=L6lNpUaxyhMvLcM3dF8yEi2yCgTNdviaNMmG75bpBIc,12719
|
|
12
12
|
dayhoff_tools/cli/batch/commands/__init__.py,sha256=1xRzzL_mc1hz1Pv0OWNr-g6fkL5XbEsOTGHzrqddLCA,458
|
|
13
|
-
dayhoff_tools/cli/batch/commands/boltz.py,sha256=
|
|
13
|
+
dayhoff_tools/cli/batch/commands/boltz.py,sha256=WARoklK5QTo71echUkm-anyj2AlEI9AEtWaiD_ds-EU,11759
|
|
14
14
|
dayhoff_tools/cli/batch/commands/cancel.py,sha256=5FHLRoq8nvOr8HuKxXUZqAyAdjQP8seaNSaeAdhOoE8,2890
|
|
15
15
|
dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=UtF8ulJcz15sdd1QzvPVPeioN_AMks1DZ7c6vKtwhDE,11052
|
|
16
|
-
dayhoff_tools/cli/batch/commands/finalize.py,sha256=
|
|
16
|
+
dayhoff_tools/cli/batch/commands/finalize.py,sha256=i9In09vrzc9jQlCA6F82ceuZB9mHUXeOaYYyRUrHQCc,7254
|
|
17
17
|
dayhoff_tools/cli/batch/commands/list_jobs.py,sha256=y90_XqFI2qcvV2XUC01P3P2aIdtl5hXP3cBDEofsRtU,2432
|
|
18
18
|
dayhoff_tools/cli/batch/commands/local.py,sha256=vOTojTAp4YZXIMLKNqc76oIE0oWMjB1mtTe6r19LrEc,3075
|
|
19
19
|
dayhoff_tools/cli/batch/commands/logs.py,sha256=FOYpANQCsQTJKigq77k1xbhgOqtRPw9hyHkljILWlmo,5241
|
|
@@ -70,7 +70,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
|
70
70
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
|
71
71
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
|
72
72
|
dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
|
|
73
|
-
dayhoff_tools-1.14.
|
|
74
|
-
dayhoff_tools-1.14.
|
|
75
|
-
dayhoff_tools-1.14.
|
|
76
|
-
dayhoff_tools-1.14.
|
|
73
|
+
dayhoff_tools-1.14.6.dist-info/METADATA,sha256=3rnOAUIQQN2JYIy6MtfSgAraglhmDY64Aq7fQWs-iRo,3184
|
|
74
|
+
dayhoff_tools-1.14.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
75
|
+
dayhoff_tools-1.14.6.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
|
76
|
+
dayhoff_tools-1.14.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|