dayhoff-tools 1.14.9__tar.gz → 1.14.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/PKG-INFO +1 -1
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/boltz.py +93 -26
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/boltz.py +2 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/pyproject.toml +1 -1
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/README.md +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/base.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/cancel.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/clean.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/finalize.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/local.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/status.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/submit.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/job_id.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/manifest.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/shared.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/github_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/utility_commands.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/warehouse.py +0 -0
|
@@ -341,10 +341,31 @@ class BoltzProcessor:
|
|
|
341
341
|
return expected_output_dir
|
|
342
342
|
|
|
343
343
|
|
|
344
|
+
def _get_done_marker_for_file(job_dir: Path, file_stem: str) -> Path:
|
|
345
|
+
"""Get the done marker path for a specific input file."""
|
|
346
|
+
return job_dir / "output" / f"boltz_{file_stem}.done"
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _check_file_complete(job_dir: Path, file_stem: str) -> bool:
|
|
350
|
+
"""Check if a specific file has been processed."""
|
|
351
|
+
return _get_done_marker_for_file(job_dir, file_stem).exists()
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _mark_file_complete(job_dir: Path, file_stem: str):
|
|
355
|
+
"""Mark a specific file as complete."""
|
|
356
|
+
done_marker = _get_done_marker_for_file(job_dir, file_stem)
|
|
357
|
+
done_marker.parent.mkdir(parents=True, exist_ok=True)
|
|
358
|
+
done_marker.touch()
|
|
359
|
+
logger.info(f"File {file_stem} marked complete: {done_marker}")
|
|
360
|
+
|
|
361
|
+
|
|
344
362
|
def main():
|
|
345
|
-
"""Boltz worker main entrypoint for AWS Batch array jobs.
|
|
363
|
+
"""Boltz worker main entrypoint for AWS Batch array jobs.
|
|
364
|
+
|
|
365
|
+
Each worker processes multiple files based on array index and total workers.
|
|
366
|
+
With N files and M workers, worker i processes files where file_index % M == i.
|
|
367
|
+
"""
|
|
346
368
|
from .base import (
|
|
347
|
-
check_already_complete,
|
|
348
369
|
configure_worker_logging,
|
|
349
370
|
get_array_index,
|
|
350
371
|
get_job_dir,
|
|
@@ -358,34 +379,36 @@ def main():
|
|
|
358
379
|
# Get configuration from environment
|
|
359
380
|
index = get_array_index()
|
|
360
381
|
job_dir = get_job_dir()
|
|
382
|
+
array_size = int(os.environ.get("BATCH_ARRAY_SIZE", "1"))
|
|
383
|
+
num_files = int(os.environ.get("BATCH_NUM_FILES", "0"))
|
|
361
384
|
|
|
362
385
|
logger.info(f"Worker configuration:")
|
|
363
386
|
logger.info(f" Array index: {index}")
|
|
387
|
+
logger.info(f" Array size: {array_size}")
|
|
388
|
+
logger.info(f" Total files: {num_files}")
|
|
364
389
|
logger.info(f" Job directory: {job_dir}")
|
|
365
390
|
|
|
366
|
-
#
|
|
367
|
-
if check_already_complete(index, job_dir, prefix="boltz"):
|
|
368
|
-
logger.info("Exiting - complex already processed")
|
|
369
|
-
return
|
|
370
|
-
|
|
371
|
-
# Find input file by index
|
|
391
|
+
# Find all input files
|
|
372
392
|
input_dir = job_dir / "input"
|
|
373
393
|
input_files = sorted(input_dir.glob("*.yaml"))
|
|
394
|
+
total_files = len(input_files)
|
|
374
395
|
|
|
375
|
-
if
|
|
376
|
-
logger.error(
|
|
377
|
-
|
|
378
|
-
)
|
|
379
|
-
raise RuntimeError(f"Index {index} out of range")
|
|
396
|
+
if total_files == 0:
|
|
397
|
+
logger.error("No input files found")
|
|
398
|
+
raise RuntimeError("No input files found")
|
|
380
399
|
|
|
381
|
-
|
|
382
|
-
|
|
400
|
+
# Calculate which files this worker should process
|
|
401
|
+
# Worker i processes files where file_index % array_size == index
|
|
402
|
+
my_files = [f for i, f in enumerate(input_files) if i % array_size == index]
|
|
383
403
|
|
|
384
|
-
|
|
385
|
-
output_base = input_file.stem
|
|
386
|
-
output_dir = job_dir / "output" / output_base
|
|
404
|
+
logger.info(f" Files assigned to this worker: {len(my_files)}")
|
|
387
405
|
|
|
388
|
-
|
|
406
|
+
if not my_files:
|
|
407
|
+
logger.info("No files assigned to this worker, exiting successfully")
|
|
408
|
+
mark_complete(index, job_dir, prefix="boltz")
|
|
409
|
+
return
|
|
410
|
+
|
|
411
|
+
# Get MSA directories (shared across all files)
|
|
389
412
|
job_msa_dir = job_dir / "msas"
|
|
390
413
|
global_msa_dir = Path(os.environ.get("MSA_DIR", "/primordial/.cache/msas"))
|
|
391
414
|
|
|
@@ -408,7 +431,7 @@ def main():
|
|
|
408
431
|
if boltz_options:
|
|
409
432
|
logger.info(f" Boltz options: {boltz_options}")
|
|
410
433
|
|
|
411
|
-
# Create processor
|
|
434
|
+
# Create processor (reused for all files)
|
|
412
435
|
processor = BoltzProcessor(
|
|
413
436
|
num_workers=None, # Auto-detect
|
|
414
437
|
boltz_options=boltz_options,
|
|
@@ -416,16 +439,60 @@ def main():
|
|
|
416
439
|
cache_dir=cache_dir,
|
|
417
440
|
)
|
|
418
441
|
|
|
419
|
-
#
|
|
420
|
-
|
|
442
|
+
# Process each assigned file
|
|
443
|
+
completed = 0
|
|
444
|
+
failed = 0
|
|
445
|
+
|
|
446
|
+
for file_idx, input_file in enumerate(my_files):
|
|
447
|
+
file_stem = input_file.stem
|
|
448
|
+
|
|
449
|
+
# Check if this file is already complete (idempotency)
|
|
450
|
+
if _check_file_complete(job_dir, file_stem):
|
|
451
|
+
logger.info(
|
|
452
|
+
f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
|
|
453
|
+
"already complete, skipping"
|
|
454
|
+
)
|
|
455
|
+
completed += 1
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
logger.info(
|
|
459
|
+
f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}..."
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
# Determine output directory
|
|
464
|
+
output_dir = job_dir / "output" / file_stem
|
|
465
|
+
output_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
466
|
+
|
|
467
|
+
result_dir = processor.run(str(input_file), str(output_dir))
|
|
468
|
+
|
|
469
|
+
# Mark this file as complete
|
|
470
|
+
_mark_file_complete(job_dir, file_stem)
|
|
471
|
+
|
|
472
|
+
logger.info(
|
|
473
|
+
f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
|
|
474
|
+
f"completed successfully -> {result_dir}"
|
|
475
|
+
)
|
|
476
|
+
completed += 1
|
|
477
|
+
|
|
478
|
+
except Exception as e:
|
|
479
|
+
logger.error(
|
|
480
|
+
f"[{file_idx + 1}/{len(my_files)}] {file_stem}: "
|
|
481
|
+
f"failed with error: {e}"
|
|
482
|
+
)
|
|
483
|
+
failed += 1
|
|
484
|
+
# Continue processing other files even if one fails
|
|
421
485
|
|
|
422
|
-
|
|
486
|
+
# Summary
|
|
487
|
+
logger.info(f"Worker {index} finished: {completed} completed, {failed} failed")
|
|
423
488
|
|
|
424
|
-
# Mark as complete
|
|
489
|
+
# Mark worker as complete
|
|
425
490
|
mark_complete(index, job_dir, prefix="boltz")
|
|
426
491
|
|
|
427
|
-
|
|
428
|
-
|
|
492
|
+
if failed > 0:
|
|
493
|
+
logger.warning(f"{failed} file(s) failed to process")
|
|
494
|
+
# Don't exit with error - some files succeeded and are marked complete
|
|
495
|
+
# The failed files can be retried later
|
|
429
496
|
|
|
430
497
|
except Exception as e:
|
|
431
498
|
logger.exception(f"Worker failed with error: {e}")
|
|
@@ -232,6 +232,8 @@ def _submit_batch_job(
|
|
|
232
232
|
"JOB_ID": job_id,
|
|
233
233
|
"BOLTZ_CACHE": "/primordial/.cache/boltz",
|
|
234
234
|
"MSA_DIR": "/primordial/.cache/msas",
|
|
235
|
+
"BATCH_ARRAY_SIZE": str(array_size),
|
|
236
|
+
"BATCH_NUM_FILES": str(num_files),
|
|
235
237
|
}
|
|
236
238
|
|
|
237
239
|
batch_job_id = client.submit_job(
|
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "dayhoff-tools"
|
|
8
|
-
version = "1.14.
|
|
8
|
+
version = "1.14.10"
|
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
|
10
10
|
authors = [
|
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/batch/commands/list_jobs.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_lifecycle.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_maintenance.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engine1/engine_management.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/__init__.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/api_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/engine_commands.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/progress.py
RENAMED
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/simulators/demo.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/ssh_config.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.9 → dayhoff_tools-1.14.10}/dayhoff_tools/cli/engines_studios/studio_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|