dayhoff-tools 1.14.8__tar.gz → 1.14.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/boltz.py +93 -26
  3. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/boltz.py +2 -0
  4. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/clean.py +3 -3
  5. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/finalize.py +97 -13
  6. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/pyproject.toml +1 -1
  7. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/README.md +0 -0
  8. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/__init__.py +0 -0
  9. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/__init__.py +0 -0
  10. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/__init__.py +0 -0
  11. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/base.py +0 -0
  12. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
  13. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/chemistry/standardizer.py +0 -0
  14. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/chemistry/utils.py +0 -0
  15. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/__init__.py +0 -0
  16. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/__init__.py +0 -0
  17. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
  18. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
  19. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/cancel.py +0 -0
  20. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -0
  21. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
  22. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/local.py +0 -0
  23. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
  24. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
  25. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/status.py +0 -0
  26. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/submit.py +0 -0
  27. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/job_id.py +0 -0
  28. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/manifest.py +0 -0
  29. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/cloud_commands.py +0 -0
  30. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/__init__.py +0 -0
  31. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
  32. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
  33. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
  34. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
  35. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/shared.py +0 -0
  36. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
  37. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
  38. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
  39. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
  40. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
  41. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
  42. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
  43. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
  44. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
  45. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
  46. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
  47. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
  48. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
  49. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
  50. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
  51. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
  52. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
  53. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/github_commands.py +0 -0
  54. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/main.py +0 -0
  55. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/swarm_commands.py +0 -0
  56. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/utility_commands.py +0 -0
  57. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/base.py +0 -0
  58. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  59. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  60. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  61. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/job_runner.py +0 -0
  62. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/processors.py +0 -0
  63. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/swarm.py +0 -0
  64. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/embedders.py +0 -0
  65. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/fasta.py +0 -0
  66. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/file_ops.py +0 -0
  67. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/h5.py +0 -0
  68. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/gcp.py +0 -0
  69. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/gtdb.py +0 -0
  70. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/kegg.py +0 -0
  71. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/mmseqs.py +0 -0
  72. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/structure.py +0 -0
  73. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/uniprot.py +0 -0
  74. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/logs.py +0 -0
  75. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/sqlite.py +0 -0
  76. {dayhoff_tools-1.14.8 → dayhoff_tools-1.14.10}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.8
3
+ Version: 1.14.10
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -341,10 +341,31 @@ class BoltzProcessor:
341
341
  return expected_output_dir
342
342
 
343
343
 
344
+ def _get_done_marker_for_file(job_dir: Path, file_stem: str) -> Path:
345
+ """Get the done marker path for a specific input file."""
346
+ return job_dir / "output" / f"boltz_{file_stem}.done"
347
+
348
+
349
+ def _check_file_complete(job_dir: Path, file_stem: str) -> bool:
350
+ """Check if a specific file has been processed."""
351
+ return _get_done_marker_for_file(job_dir, file_stem).exists()
352
+
353
+
354
+ def _mark_file_complete(job_dir: Path, file_stem: str):
355
+ """Mark a specific file as complete."""
356
+ done_marker = _get_done_marker_for_file(job_dir, file_stem)
357
+ done_marker.parent.mkdir(parents=True, exist_ok=True)
358
+ done_marker.touch()
359
+ logger.info(f"File {file_stem} marked complete: {done_marker}")
360
+
361
+
344
362
  def main():
345
- """Boltz worker main entrypoint for AWS Batch array jobs."""
363
+ """Boltz worker main entrypoint for AWS Batch array jobs.
364
+
365
+ Each worker processes multiple files based on array index and total workers.
366
+ With N files and M workers, worker i processes files where file_index % M == i.
367
+ """
346
368
  from .base import (
347
- check_already_complete,
348
369
  configure_worker_logging,
349
370
  get_array_index,
350
371
  get_job_dir,
@@ -358,34 +379,36 @@ def main():
358
379
  # Get configuration from environment
359
380
  index = get_array_index()
360
381
  job_dir = get_job_dir()
382
+ array_size = int(os.environ.get("BATCH_ARRAY_SIZE", "1"))
383
+ num_files = int(os.environ.get("BATCH_NUM_FILES", "0"))
361
384
 
362
385
  logger.info(f"Worker configuration:")
363
386
  logger.info(f" Array index: {index}")
387
+ logger.info(f" Array size: {array_size}")
388
+ logger.info(f" Total files: {num_files}")
364
389
  logger.info(f" Job directory: {job_dir}")
365
390
 
366
- # Check idempotency
367
- if check_already_complete(index, job_dir, prefix="boltz"):
368
- logger.info("Exiting - complex already processed")
369
- return
370
-
371
- # Find input file by index
391
+ # Find all input files
372
392
  input_dir = job_dir / "input"
373
393
  input_files = sorted(input_dir.glob("*.yaml"))
394
+ total_files = len(input_files)
374
395
 
375
- if index >= len(input_files):
376
- logger.error(
377
- f"Index {index} out of range. Found {len(input_files)} input files."
378
- )
379
- raise RuntimeError(f"Index {index} out of range")
396
+ if total_files == 0:
397
+ logger.error("No input files found")
398
+ raise RuntimeError("No input files found")
380
399
 
381
- input_file = input_files[index]
382
- logger.info(f" Input file: {input_file}")
400
+ # Calculate which files this worker should process
401
+ # Worker i processes files where file_index % array_size == index
402
+ my_files = [f for i, f in enumerate(input_files) if i % array_size == index]
383
403
 
384
- # Determine output directory
385
- output_base = input_file.stem
386
- output_dir = job_dir / "output" / output_base
404
+ logger.info(f" Files assigned to this worker: {len(my_files)}")
387
405
 
388
- # Get MSA directories
406
+ if not my_files:
407
+ logger.info("No files assigned to this worker, exiting successfully")
408
+ mark_complete(index, job_dir, prefix="boltz")
409
+ return
410
+
411
+ # Get MSA directories (shared across all files)
389
412
  job_msa_dir = job_dir / "msas"
390
413
  global_msa_dir = Path(os.environ.get("MSA_DIR", "/primordial/.cache/msas"))
391
414
 
@@ -408,7 +431,7 @@ def main():
408
431
  if boltz_options:
409
432
  logger.info(f" Boltz options: {boltz_options}")
410
433
 
411
- # Create processor and run
434
+ # Create processor (reused for all files)
412
435
  processor = BoltzProcessor(
413
436
  num_workers=None, # Auto-detect
414
437
  boltz_options=boltz_options,
@@ -416,16 +439,60 @@ def main():
416
439
  cache_dir=cache_dir,
417
440
  )
418
441
 
419
- # Ensure output directory exists
420
- output_dir.parent.mkdir(parents=True, exist_ok=True)
442
+ # Process each assigned file
443
+ completed = 0
444
+ failed = 0
445
+
446
+ for file_idx, input_file in enumerate(my_files):
447
+ file_stem = input_file.stem
448
+
449
+ # Check if this file is already complete (idempotency)
450
+ if _check_file_complete(job_dir, file_stem):
451
+ logger.info(
452
+ f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
453
+ "already complete, skipping"
454
+ )
455
+ completed += 1
456
+ continue
457
+
458
+ logger.info(
459
+ f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}..."
460
+ )
461
+
462
+ try:
463
+ # Determine output directory
464
+ output_dir = job_dir / "output" / file_stem
465
+ output_dir.parent.mkdir(parents=True, exist_ok=True)
466
+
467
+ result_dir = processor.run(str(input_file), str(output_dir))
468
+
469
+ # Mark this file as complete
470
+ _mark_file_complete(job_dir, file_stem)
471
+
472
+ logger.info(
473
+ f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
474
+ f"completed successfully -> {result_dir}"
475
+ )
476
+ completed += 1
477
+
478
+ except Exception as e:
479
+ logger.error(
480
+ f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
481
+ f"failed with error: {e}"
482
+ )
483
+ failed += 1
484
+ # Continue processing other files even if one fails
421
485
 
422
- result_dir = processor.run(str(input_file), str(output_dir))
486
+ # Summary
487
+ logger.info(f"Worker {index} finished: {completed} completed, {failed} failed")
423
488
 
424
- # Mark as complete
489
+ # Mark worker as complete
425
490
  mark_complete(index, job_dir, prefix="boltz")
426
491
 
427
- logger.info(f"Complex {input_file.stem} completed successfully")
428
- logger.info(f"Output: {result_dir}")
492
+ if failed > 0:
493
+ logger.warning(f"{failed} file(s) failed to process")
494
+ # Don't exit with error - some files succeeded and are marked complete
495
+ # The failed files can be retried later
429
496
 
430
497
  except Exception as e:
431
498
  logger.exception(f"Worker failed with error: {e}")
@@ -232,6 +232,8 @@ def _submit_batch_job(
232
232
  "JOB_ID": job_id,
233
233
  "BOLTZ_CACHE": "/primordial/.cache/boltz",
234
234
  "MSA_DIR": "/primordial/.cache/msas",
235
+ "BATCH_ARRAY_SIZE": str(array_size),
236
+ "BATCH_NUM_FILES": str(num_files),
235
237
  }
236
238
 
237
239
  batch_job_id = client.submit_job(
@@ -80,9 +80,9 @@ def clean(user, older_than, dry_run, force, base_path):
80
80
  client = BatchClient()
81
81
  live_statuses = client.get_job_statuses_batch(batch_job_ids)
82
82
  except BatchError as e:
83
- click.echo(f"Error: Could not fetch status from AWS Batch: {e}")
84
- click.echo("Cannot safely clean jobs without knowing their status.")
85
- return
83
+ click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
84
+ click.echo("Cannot safely clean jobs without knowing their status.", err=True)
85
+ raise SystemExit(1)
86
86
 
87
87
  # Find jobs that are safe to clean (SUCCEEDED or FAILED)
88
88
  safe_to_clean = []
@@ -29,17 +29,30 @@ from ..manifest import (
29
29
  is_flag=True,
30
30
  help="Don't delete job directory after finalizing",
31
31
  )
32
+ @click.option(
33
+ "--full-output",
34
+ is_flag=True,
35
+ help="For Boltz: copy entire output directory (default: only essential files)",
36
+ )
32
37
  @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
33
- def finalize(job_id, output, force, keep_intermediates, base_path):
38
+ def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
34
39
  """Combine results and clean up job intermediates.
35
40
 
36
41
  For embedding jobs, combines H5 files into a single output file.
37
- For structure prediction, moves outputs to the destination.
42
+ For Boltz jobs, extracts essential files (CIF structures and confidence JSON).
38
43
 
39
44
  \b
40
45
  Examples:
46
+ # Embedding job - combine H5 files
41
47
  dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5
42
- dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5 --force
48
+
49
+ # Boltz job - extract essential files only (default)
50
+ dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/
51
+
52
+ # Boltz job - copy all output files
53
+ dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/ --full-output
54
+
55
+ # Keep job directory after finalizing
43
56
  dh batch finalize dma-embed-20260109-a3f2 --output /primordial/out.h5 --keep-intermediates
44
57
  """
45
58
  # Load manifest
@@ -81,7 +94,7 @@ def finalize(job_id, output, force, keep_intermediates, base_path):
81
94
  if manifest.pipeline in ("embed-t5", "embed"):
82
95
  _finalize_embeddings(output_dir, output_path)
83
96
  elif manifest.pipeline == "boltz":
84
- _finalize_boltz(output_dir, output_path)
97
+ _finalize_boltz(output_dir, output_path, full_output=full_output)
85
98
  else:
86
99
  _finalize_generic(output_dir, output_path)
87
100
 
@@ -196,29 +209,100 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
196
209
  shutil.copy2(h5_files[0], output_path)
197
210
 
198
211
 
199
- def _finalize_boltz(output_dir: Path, output_path: Path):
200
- """Move Boltz output directories to destination."""
212
+ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = False):
213
+ """Move Boltz output to destination.
214
+
215
+ Args:
216
+ output_dir: Source directory containing boltz_results_* folders
217
+ output_path: Destination directory for outputs
218
+ full_output: If True, copy entire output directories. If False (default),
219
+ extract only essential files (CIF structures and confidence JSON).
220
+ """
201
221
  # Find all output directories (one per complex)
202
- complex_dirs = [d for d in output_dir.iterdir() if d.is_dir()]
222
+ complex_dirs = [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith("boltz_results_")]
203
223
 
204
224
  if not complex_dirs:
205
225
  click.echo("No output directories found.", err=True)
206
226
  raise SystemExit(1)
207
227
 
208
- click.echo(f"Found {len(complex_dirs)} structure predictions to move")
228
+ click.echo(f"Found {len(complex_dirs)} structure predictions")
229
+
230
+ if full_output:
231
+ click.echo("Mode: Copying full output (all files)")
232
+ else:
233
+ click.echo("Mode: Extracting essential files only (CIF + confidence JSON)")
234
+ click.echo(" Use --full-output to copy all files")
235
+
236
+ # Confirm before proceeding
237
+ click.echo()
238
+ if not click.confirm(f"Copy results to {output_path}?"):
239
+ click.echo("Cancelled.")
240
+ raise SystemExit(0)
209
241
 
210
242
  # Ensure output directory exists
211
243
  output_path.mkdir(parents=True, exist_ok=True)
212
244
 
245
+ copied_count = 0
246
+ skipped_count = 0
247
+
213
248
  for complex_dir in complex_dirs:
214
- dest = output_path / complex_dir.name
249
+ complex_name = complex_dir.name.replace("boltz_results_", "")
250
+ dest = output_path / complex_name
251
+
215
252
  if dest.exists():
216
- click.echo(f" Skipping {complex_dir.name} (already exists)")
253
+ click.echo(f" Skipping {complex_name} (already exists)")
254
+ skipped_count += 1
217
255
  continue
218
- shutil.move(str(complex_dir), str(dest))
219
- click.echo(f" Moved {complex_dir.name}")
256
+
257
+ if full_output:
258
+ # Copy entire directory
259
+ shutil.copytree(complex_dir, dest)
260
+ click.echo(f" Copied {complex_name} (full output)")
261
+ else:
262
+ # Extract only essential files
263
+ _extract_essential_boltz_files(complex_dir, dest, complex_name)
264
+ click.echo(f" Extracted {complex_name} (essential files)")
265
+
266
+ copied_count += 1
220
267
 
221
- click.echo(click.style("✓ Structures moved successfully", fg="green"))
268
+ click.echo()
269
+ if skipped_count > 0:
270
+ click.echo(f"Copied {copied_count} predictions, skipped {skipped_count} existing")
271
+ else:
272
+ click.echo(click.style(f"✓ Copied {copied_count} structure predictions successfully", fg="green"))
273
+
274
+
275
+ def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_name: str):
276
+ """Extract only essential files from Boltz output.
277
+
278
+ Essential files are:
279
+ - predictions/*/*.cif (structure files)
280
+ - predictions/*/confidence_*.json (confidence metrics)
281
+
282
+ Args:
283
+ source_dir: Source boltz_results_* directory
284
+ dest_dir: Destination directory to create
285
+ complex_name: Name of the complex (for better error messages)
286
+ """
287
+ dest_dir.mkdir(parents=True, exist_ok=True)
288
+
289
+ predictions_dir = source_dir / "predictions"
290
+ if not predictions_dir.exists():
291
+ click.echo(f" Warning: No predictions directory found for {complex_name}", err=True)
292
+ return
293
+
294
+ # Find all subdirectories in predictions/ (usually just one named after the complex)
295
+ for pred_subdir in predictions_dir.iterdir():
296
+ if not pred_subdir.is_dir():
297
+ continue
298
+
299
+ # Copy CIF files (structures)
300
+ for cif_file in pred_subdir.glob("*.cif"):
301
+ shutil.copy2(cif_file, dest_dir / cif_file.name)
302
+
303
+ # Copy confidence JSON files
304
+ for json_file in pred_subdir.glob("confidence_*.json"):
305
+ shutil.copy2(json_file, dest_dir / json_file.name)
222
306
 
223
307
 
224
308
  def _finalize_generic(output_dir: Path, output_path: Path):
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.14.8"
8
+ version = "1.14.10"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}