dayhoff-tools 1.14.8__py3-none-any.whl → 1.14.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/cli/batch/commands/clean.py +3 -3
- dayhoff_tools/cli/batch/commands/finalize.py +97 -13
- {dayhoff_tools-1.14.8.dist-info → dayhoff_tools-1.14.9.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.14.8.dist-info → dayhoff_tools-1.14.9.dist-info}/RECORD +6 -6
- {dayhoff_tools-1.14.8.dist-info → dayhoff_tools-1.14.9.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.14.8.dist-info → dayhoff_tools-1.14.9.dist-info}/entry_points.txt +0 -0
|
@@ -80,9 +80,9 @@ def clean(user, older_than, dry_run, force, base_path):
|
|
|
80
80
|
client = BatchClient()
|
|
81
81
|
live_statuses = client.get_job_statuses_batch(batch_job_ids)
|
|
82
82
|
except BatchError as e:
|
|
83
|
-
click.echo(f"Error: Could not fetch status from AWS Batch: {e}")
|
|
84
|
-
click.echo("Cannot safely clean jobs without knowing their status.")
|
|
85
|
-
|
|
83
|
+
click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
|
|
84
|
+
click.echo("Cannot safely clean jobs without knowing their status.", err=True)
|
|
85
|
+
raise SystemExit(1)
|
|
86
86
|
|
|
87
87
|
# Find jobs that are safe to clean (SUCCEEDED or FAILED)
|
|
88
88
|
safe_to_clean = []
|
|
@@ -29,17 +29,30 @@ from ..manifest import (
|
|
|
29
29
|
is_flag=True,
|
|
30
30
|
help="Don't delete job directory after finalizing",
|
|
31
31
|
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--full-output",
|
|
34
|
+
is_flag=True,
|
|
35
|
+
help="For Boltz: copy entire output directory (default: only essential files)",
|
|
36
|
+
)
|
|
32
37
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
33
|
-
def finalize(job_id, output, force, keep_intermediates, base_path):
|
|
38
|
+
def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
|
|
34
39
|
"""Combine results and clean up job intermediates.
|
|
35
40
|
|
|
36
41
|
For embedding jobs, combines H5 files into a single output file.
|
|
37
|
-
For
|
|
42
|
+
For Boltz jobs, extracts essential files (CIF structures and confidence JSON).
|
|
38
43
|
|
|
39
44
|
\b
|
|
40
45
|
Examples:
|
|
46
|
+
# Embedding job - combine H5 files
|
|
41
47
|
dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5
|
|
42
|
-
|
|
48
|
+
|
|
49
|
+
# Boltz job - extract essential files only (default)
|
|
50
|
+
dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/
|
|
51
|
+
|
|
52
|
+
# Boltz job - copy all output files
|
|
53
|
+
dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/ --full-output
|
|
54
|
+
|
|
55
|
+
# Keep job directory after finalizing
|
|
43
56
|
dh batch finalize dma-embed-20260109-a3f2 --output /primordial/out.h5 --keep-intermediates
|
|
44
57
|
"""
|
|
45
58
|
# Load manifest
|
|
@@ -81,7 +94,7 @@ def finalize(job_id, output, force, keep_intermediates, base_path):
|
|
|
81
94
|
if manifest.pipeline in ("embed-t5", "embed"):
|
|
82
95
|
_finalize_embeddings(output_dir, output_path)
|
|
83
96
|
elif manifest.pipeline == "boltz":
|
|
84
|
-
_finalize_boltz(output_dir, output_path)
|
|
97
|
+
_finalize_boltz(output_dir, output_path, full_output=full_output)
|
|
85
98
|
else:
|
|
86
99
|
_finalize_generic(output_dir, output_path)
|
|
87
100
|
|
|
@@ -196,29 +209,100 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
|
|
|
196
209
|
shutil.copy2(h5_files[0], output_path)
|
|
197
210
|
|
|
198
211
|
|
|
199
|
-
def _finalize_boltz(output_dir: Path, output_path: Path):
|
|
200
|
-
"""Move Boltz output
|
|
212
|
+
def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = False):
|
|
213
|
+
"""Move Boltz output to destination.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
output_dir: Source directory containing boltz_results_* folders
|
|
217
|
+
output_path: Destination directory for outputs
|
|
218
|
+
full_output: If True, copy entire output directories. If False (default),
|
|
219
|
+
extract only essential files (CIF structures and confidence JSON).
|
|
220
|
+
"""
|
|
201
221
|
# Find all output directories (one per complex)
|
|
202
|
-
complex_dirs = [d for d in output_dir.iterdir() if d.is_dir()]
|
|
222
|
+
complex_dirs = [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith("boltz_results_")]
|
|
203
223
|
|
|
204
224
|
if not complex_dirs:
|
|
205
225
|
click.echo("No output directories found.", err=True)
|
|
206
226
|
raise SystemExit(1)
|
|
207
227
|
|
|
208
|
-
click.echo(f"Found {len(complex_dirs)} structure predictions
|
|
228
|
+
click.echo(f"Found {len(complex_dirs)} structure predictions")
|
|
229
|
+
|
|
230
|
+
if full_output:
|
|
231
|
+
click.echo("Mode: Copying full output (all files)")
|
|
232
|
+
else:
|
|
233
|
+
click.echo("Mode: Extracting essential files only (CIF + confidence JSON)")
|
|
234
|
+
click.echo(" Use --full-output to copy all files")
|
|
235
|
+
|
|
236
|
+
# Confirm before proceeding
|
|
237
|
+
click.echo()
|
|
238
|
+
if not click.confirm(f"Copy results to {output_path}?"):
|
|
239
|
+
click.echo("Cancelled.")
|
|
240
|
+
raise SystemExit(0)
|
|
209
241
|
|
|
210
242
|
# Ensure output directory exists
|
|
211
243
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
212
244
|
|
|
245
|
+
copied_count = 0
|
|
246
|
+
skipped_count = 0
|
|
247
|
+
|
|
213
248
|
for complex_dir in complex_dirs:
|
|
214
|
-
|
|
249
|
+
complex_name = complex_dir.name.replace("boltz_results_", "")
|
|
250
|
+
dest = output_path / complex_name
|
|
251
|
+
|
|
215
252
|
if dest.exists():
|
|
216
|
-
click.echo(f" Skipping {
|
|
253
|
+
click.echo(f" Skipping {complex_name} (already exists)")
|
|
254
|
+
skipped_count += 1
|
|
217
255
|
continue
|
|
218
|
-
|
|
219
|
-
|
|
256
|
+
|
|
257
|
+
if full_output:
|
|
258
|
+
# Copy entire directory
|
|
259
|
+
shutil.copytree(complex_dir, dest)
|
|
260
|
+
click.echo(f" Copied {complex_name} (full output)")
|
|
261
|
+
else:
|
|
262
|
+
# Extract only essential files
|
|
263
|
+
_extract_essential_boltz_files(complex_dir, dest, complex_name)
|
|
264
|
+
click.echo(f" Extracted {complex_name} (essential files)")
|
|
265
|
+
|
|
266
|
+
copied_count += 1
|
|
220
267
|
|
|
221
|
-
click.echo(
|
|
268
|
+
click.echo()
|
|
269
|
+
if skipped_count > 0:
|
|
270
|
+
click.echo(f"Copied {copied_count} predictions, skipped {skipped_count} existing")
|
|
271
|
+
else:
|
|
272
|
+
click.echo(click.style(f"✓ Copied {copied_count} structure predictions successfully", fg="green"))
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_name: str):
|
|
276
|
+
"""Extract only essential files from Boltz output.
|
|
277
|
+
|
|
278
|
+
Essential files are:
|
|
279
|
+
- predictions/*/*.cif (structure files)
|
|
280
|
+
- predictions/*/confidence_*.json (confidence metrics)
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
source_dir: Source boltz_results_* directory
|
|
284
|
+
dest_dir: Destination directory to create
|
|
285
|
+
complex_name: Name of the complex (for better error messages)
|
|
286
|
+
"""
|
|
287
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
|
|
289
|
+
predictions_dir = source_dir / "predictions"
|
|
290
|
+
if not predictions_dir.exists():
|
|
291
|
+
click.echo(f" Warning: No predictions directory found for {complex_name}", err=True)
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
# Find all subdirectories in predictions/ (usually just one named after the complex)
|
|
295
|
+
for pred_subdir in predictions_dir.iterdir():
|
|
296
|
+
if not pred_subdir.is_dir():
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
# Copy CIF files (structures)
|
|
300
|
+
for cif_file in pred_subdir.glob("*.cif"):
|
|
301
|
+
shutil.copy2(cif_file, dest_dir / cif_file.name)
|
|
302
|
+
|
|
303
|
+
# Copy confidence JSON files
|
|
304
|
+
for json_file in pred_subdir.glob("confidence_*.json"):
|
|
305
|
+
shutil.copy2(json_file, dest_dir / json_file.name)
|
|
222
306
|
|
|
223
307
|
|
|
224
308
|
def _finalize_generic(output_dir: Path, output_path: Path):
|
|
@@ -12,9 +12,9 @@ dayhoff_tools/cli/batch/aws_batch.py,sha256=DOp8KvmTrie15O61DP1HT13PSr3s5imxM-VZ
|
|
|
12
12
|
dayhoff_tools/cli/batch/commands/__init__.py,sha256=1xRzzL_mc1hz1Pv0OWNr-g6fkL5XbEsOTGHzrqddLCA,458
|
|
13
13
|
dayhoff_tools/cli/batch/commands/boltz.py,sha256=KbBxeF0HS_ysgd7MyeJgrkcGTAXChJLQxOhvJ14SxjY,11989
|
|
14
14
|
dayhoff_tools/cli/batch/commands/cancel.py,sha256=ZnHAJVzMGC0_1EQGpMSdYUlzm9yi-E9NxRJKBsetYW8,3111
|
|
15
|
-
dayhoff_tools/cli/batch/commands/clean.py,sha256=
|
|
15
|
+
dayhoff_tools/cli/batch/commands/clean.py,sha256=nWOKbVM2nDuLMpyC038Q9aylOQxk2bq4N0JF65qJg-s,4570
|
|
16
16
|
dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=QXFydAw0wndevdzXF1cxikxMmvn1BuQ5p9lwutQFajU,11453
|
|
17
|
-
dayhoff_tools/cli/batch/commands/finalize.py,sha256=
|
|
17
|
+
dayhoff_tools/cli/batch/commands/finalize.py,sha256=Ir8XeA62lk_nUcWCLjjsYeqS58BBM1LTaEorEmQxOhA,11118
|
|
18
18
|
dayhoff_tools/cli/batch/commands/list_jobs.py,sha256=COfxZddDVUAHeTayNAB3ruYNhgrE3osgFxY2qzf33cg,4284
|
|
19
19
|
dayhoff_tools/cli/batch/commands/local.py,sha256=dZeKhNakaM1jS-EoByAwg1nWspRRoOmYzcwzjEKBaIA,3226
|
|
20
20
|
dayhoff_tools/cli/batch/commands/logs.py,sha256=ctgJksdzFmqBdD18ePPsZe2BpuJYtHz2xAaMPnUplmQ,5293
|
|
@@ -71,7 +71,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
|
71
71
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
|
72
72
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
|
73
73
|
dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
|
|
74
|
-
dayhoff_tools-1.14.
|
|
75
|
-
dayhoff_tools-1.14.
|
|
76
|
-
dayhoff_tools-1.14.
|
|
77
|
-
dayhoff_tools-1.14.
|
|
74
|
+
dayhoff_tools-1.14.9.dist-info/METADATA,sha256=wMJQu1j3rSQF2WQLf_zecDpfp_Ly7JGAfUqI8uwRtPY,3184
|
|
75
|
+
dayhoff_tools-1.14.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
76
|
+
dayhoff_tools-1.14.9.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
|
77
|
+
dayhoff_tools-1.14.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|