PyPI - dayhoff-tools - Versions diffs - 1.14.1__py3-none-any.whl → 1.14.3__py3-none-any.whl - Mend

dayhoff-tools 1.14.1py3-none-any.whl → 1.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

dayhoff_tools/batch/__init__.py +8 -0
dayhoff_tools/batch/workers/__init__.py +12 -0
dayhoff_tools/batch/workers/base.py +150 -0
dayhoff_tools/batch/workers/boltz.py +407 -0
dayhoff_tools/batch/workers/embed_t5.py +92 -0
dayhoff_tools/cli/batch/__init__.py +85 -0
dayhoff_tools/cli/batch/aws_batch.py +401 -0
dayhoff_tools/cli/batch/commands/__init__.py +25 -0
dayhoff_tools/cli/batch/commands/boltz.py +362 -0
dayhoff_tools/cli/batch/commands/cancel.py +82 -0
dayhoff_tools/cli/batch/commands/embed_t5.py +303 -0
dayhoff_tools/cli/batch/commands/finalize.py +206 -0
dayhoff_tools/cli/batch/commands/list_jobs.py +78 -0
dayhoff_tools/cli/batch/commands/local.py +95 -0
dayhoff_tools/cli/batch/commands/logs.py +142 -0
dayhoff_tools/cli/batch/commands/retry.py +142 -0
dayhoff_tools/cli/batch/commands/status.py +214 -0
dayhoff_tools/cli/batch/commands/submit.py +215 -0
dayhoff_tools/cli/batch/job_id.py +151 -0
dayhoff_tools/cli/batch/manifest.py +293 -0
dayhoff_tools/cli/engines_studios/engine-studio-cli.md +26 -21
dayhoff_tools/cli/engines_studios/engine_commands.py +16 -89
dayhoff_tools/cli/engines_studios/ssh_config.py +96 -0
dayhoff_tools/cli/engines_studios/studio_commands.py +15 -4
dayhoff_tools/cli/main.py +14 -0
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/METADATA +6 -1
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/RECORD +29 -8
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/WHEEL +0 -0
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/entry_points.txt +0 -0

dayhoff_tools/cli/batch/commands/boltz.py ADDED Viewed

@@ -0,0 +1,362 @@
+"""Boltz structure prediction pipeline command."""
+import os
+import shutil
+from pathlib import Path
+import click
+from ..aws_batch import BatchClient, BatchError
+from ..job_id import generate_job_id
+from ..manifest import (
+    BATCH_JOBS_BASE,
+    BatchConfig,
+    InputConfig,
+    JobManifest,
+    JobStatus,
+    OutputConfig,
+    create_job_directory,
+    get_job_dir,
+    save_manifest,
+)
+# Default settings for Boltz
+# NOTE: A10G would be preferred (24GB vs 16GB VRAM) but has a bug.
+# Using T4 until A10G is debugged. See new_batch.md Known Issues.
+DEFAULT_QUEUE = "t4-1x-spot"
+DEFAULT_WORKERS = 50
+DEFAULT_JOB_DEFINITION = "dayhoff-boltz"
+DEFAULT_IMAGE_URI = "074735440724.dkr.ecr.us-east-1.amazonaws.com/dayhoff:boltz-latest"
+@click.command()
+@click.argument("input_dir", type=click.Path(exists=True))
+@click.option(
+    "--workers",
+    default=DEFAULT_WORKERS,
+    type=int,
+    help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]",
+)
+@click.option(
+    "--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
+)
+@click.option(
+    "--msa-dir",
+    type=click.Path(exists=True),
+    help="Path to pre-computed MSA files (optional)",
+)
+@click.option(
+    "--local",
+    "run_local",
+    is_flag=True,
+    help="Run single complex locally instead of Batch",
+)
+@click.option(
+    "--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
+)
+@click.option("--dry-run", is_flag=True, help="Show plan without submitting")
+@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
+def boltz(input_dir, workers, queue, msa_dir, run_local, run_shell, dry_run, base_path):
+    """Predict protein structures with Boltz.
+    Processes a directory of YAML config files, each defining a protein complex.
+    Each YAML file is processed independently in parallel using AWS Batch array jobs.
+    \b
+    Examples:
+      # Submit to AWS Batch with 100 workers
+      dh batch boltz /primordial/complexes/ --workers 100
+      # Include pre-computed MSA files
+      dh batch boltz /primordial/complexes/ --workers 50 --msa-dir /primordial/msas/
+      # Test locally with a single complex
+      dh batch boltz /primordial/complexes/ --local
+      # Debug by dropping into container shell
+      dh batch boltz /primordial/complexes/ --shell
+    \b
+    After job completes:
+      dh batch status <job-id>                          # Check status
+      dh batch finalize <job-id> --output /primordial/structures/  # Move results
+    \b
+    YAML config format:
+      version: 1
+      sequences:
+        - protein:
+            id: A
+            sequence: MKTVRQERLKSIVRILERSKEPVSGAQ...
+        - ligand:
+            id: B
+            smiles: CCO
+    """
+    input_path = Path(input_dir).resolve()
+    if run_shell:
+        _run_shell_mode(input_path)
+        return
+    if run_local:
+        _run_local_mode(input_path)
+        return
+    # Batch submission mode
+    _submit_batch_job(input_path, workers, queue, msa_dir, dry_run, base_path)
+def _count_yaml_files(input_path: Path) -> int:
+    """Count YAML files in directory."""
+    return len(list(input_path.glob("*.yaml")))
+def _copy_inputs_to_job_dir(input_path: Path, job_dir: Path) -> int:
+    """Copy input YAML files to job directory.
+    Returns:
+        Number of files copied
+    """
+    input_dir = job_dir / "input"
+    input_dir.mkdir(parents=True, exist_ok=True)
+    count = 0
+    for yaml_file in sorted(input_path.glob("*.yaml")):
+        dest = input_dir / yaml_file.name
+        shutil.copy2(yaml_file, dest)
+        count += 1
+    return count
+def _submit_batch_job(
+    input_path: Path,
+    workers: int,
+    queue: str,
+    msa_dir: str | None,
+    dry_run: bool,
+    base_path: str,
+):
+    """Submit Boltz job to AWS Batch."""
+    # Count input files
+    click.echo(f"Scanning {input_path} for YAML files...")
+    num_files = _count_yaml_files(input_path)
+    if num_files == 0:
+        click.echo(
+            click.style("Error: No YAML files found in input directory", fg="red"),
+            err=True,
+        )
+        raise SystemExit(1)
+    click.echo(f"Found {num_files} complexes to predict")
+    # Calculate array size
+    array_size = min(num_files, workers)
+    # Generate job ID
+    job_id = generate_job_id("boltz")
+    # Show plan
+    click.echo()
+    click.echo(f"Job ID:           {job_id}")
+    click.echo(f"Input:            {input_path}")
+    click.echo(f"Complexes:        {num_files}")
+    click.echo(f"Array Size:       {array_size}")
+    click.echo(f"Queue:            {queue}")
+    click.echo(f"Job definition:   {DEFAULT_JOB_DEFINITION}")
+    if msa_dir:
+        click.echo(f"MSA directory:    {msa_dir}")
+    if dry_run:
+        click.echo()
+        click.echo(click.style("Dry run - job not submitted", fg="yellow"))
+        return
+    click.echo()
+    # Create job directory
+    job_dir = create_job_directory(job_id, base_path)
+    click.echo(f"Created job directory: {job_dir}")
+    # Copy input files
+    click.echo("Copying input files...")
+    copied = _copy_inputs_to_job_dir(input_path, job_dir)
+    click.echo(f"Copied {copied} YAML files")
+    # Copy or symlink MSA directory if provided
+    if msa_dir:
+        msa_dest = job_dir / "msas"
+        msa_src = Path(msa_dir)
+        # If on same filesystem (Primordial), symlink; otherwise copy
+        try:
+            msa_dest.symlink_to(msa_src)
+            click.echo(f"Linked MSA directory: {msa_dir}")
+        except OSError:
+            click.echo("Copying MSA directory (this may take a while)...")
+            shutil.copytree(msa_src, msa_dest)
+            click.echo(f"Copied MSA directory")
+    # Create manifest
+    manifest = JobManifest(
+        job_id=job_id,
+        user=job_id.split("-")[0],
+        pipeline="boltz",
+        status=JobStatus.PENDING,
+        image_uri=DEFAULT_IMAGE_URI,
+        input=InputConfig(
+            source=str(input_path),
+            num_sequences=num_files,  # Using num_sequences field for num_complexes
+            num_chunks=array_size,
+        ),
+        batch=BatchConfig(
+            queue=queue,
+            job_definition=DEFAULT_JOB_DEFINITION,
+            array_size=array_size,
+        ),
+        output=OutputConfig(
+            destination=None,
+            finalized=False,
+        ),
+    )
+    save_manifest(manifest, base_path)
+    # Submit to AWS Batch
+    try:
+        client = BatchClient()
+        environment = {
+            "JOB_DIR": str(job_dir),
+            "JOB_ID": job_id,
+            "BOLTZ_CACHE": "/primordial/.cache/boltz",
+            "MSA_DIR": "/primordial/.cache/msas",
+        }
+        batch_job_id = client.submit_job(
+            job_name=job_id,
+            job_definition=DEFAULT_JOB_DEFINITION,
+            job_queue=queue,
+            array_size=array_size,
+            environment=environment,
+            timeout_seconds=12 * 3600,  # 12 hours (Boltz can be slow)
+            retry_attempts=2,  # Fewer retries for expensive jobs
+        )
+        # Update manifest
+        manifest.status = JobStatus.SUBMITTED
+        manifest.batch.job_id = batch_job_id
+        save_manifest(manifest, base_path)
+        click.echo()
+        click.echo(click.style("✓ Job submitted successfully!", fg="green"))
+        click.echo()
+        click.echo(f"AWS Batch Job ID: {batch_job_id}")
+        click.echo()
+        click.echo("Next steps:")
+        click.echo(f"  Check status:  dh batch status {job_id}")
+        click.echo(f"  View logs:     dh batch logs {job_id}")
+        click.echo(f"  Cancel:        dh batch cancel {job_id}")
+        click.echo()
+        click.echo("After completion:")
+        click.echo(
+            f"  Finalize:      dh batch finalize {job_id} --output /primordial/structures/"
+        )
+    except BatchError as e:
+        manifest.status = JobStatus.FAILED
+        manifest.error_message = str(e)
+        save_manifest(manifest, base_path)
+        click.echo(click.style(f"✗ Failed to submit job: {e}", fg="red"), err=True)
+        raise SystemExit(1)
+def _run_local_mode(input_path: Path):
+    """Run Boltz locally for a single complex."""
+    click.echo("Running Boltz locally...")
+    click.echo(f"Input directory: {input_path}")
+    # Find first YAML file
+    yaml_files = list(input_path.glob("*.yaml"))
+    if not yaml_files:
+        click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
+        raise SystemExit(1)
+    input_file = yaml_files[0]
+    click.echo(f"Processing: {input_file.name}")
+    click.echo()
+    try:
+        from dayhoff_tools.batch.workers.boltz import BoltzProcessor
+        processor = BoltzProcessor(
+            num_workers=None,  # Auto-detect
+            msa_folder=None,
+            cache_dir=(
+                "/primordial/.cache/boltz" if os.path.exists("/primordial") else None
+            ),
+        )
+        result_dir = processor.run(str(input_file))
+        click.echo()
+        click.echo(click.style("✓ Prediction complete!", fg="green"))
+        click.echo(f"Output: {result_dir}")
+    except ImportError as e:
+        click.echo(
+            click.style(f"Error: Missing dependency: {e}", fg="red"),
+            err=True,
+        )
+        raise SystemExit(1)
+    except Exception as e:
+        click.echo(click.style(f"Error: {e}", fg="red"), err=True)
+        raise SystemExit(1)
+def _run_shell_mode(input_path: Path):
+    """Drop into container shell for debugging."""
+    import subprocess
+    click.echo("Dropping into container shell...")
+    click.echo(f"Input will be available at: /input/")
+    click.echo()
+    cmd = [
+        "docker",
+        "run",
+        "--rm",
+        "-it",
+        "--gpus",
+        "all",
+        "-v",
+        "/primordial:/primordial",
+        "-v",
+        f"{input_path}:/input",
+        "-e",
+        "JOB_DIR=/input",
+        "-e",
+        "AWS_BATCH_JOB_ARRAY_INDEX=0",
+        "-e",
+        "BOLTZ_CACHE=/primordial/.cache/boltz",
+        "-e",
+        "MSA_DIR=/primordial/.cache/msas",
+        "--entrypoint",
+        "/bin/bash",
+        DEFAULT_IMAGE_URI,
+    ]
+    click.echo(f"Running: {' '.join(cmd)}")
+    click.echo()
+    try:
+        subprocess.run(cmd)
+    except FileNotFoundError:
+        click.echo(
+            click.style(
+                "Error: Docker not found. Is Docker installed and running?", fg="red"
+            ),
+            err=True,
+        )
+        raise SystemExit(1)

dayhoff_tools/cli/batch/commands/cancel.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Cancel command for stopping running jobs."""
+import click
+from ..aws_batch import BatchClient, BatchError
+from ..manifest import (
+    BATCH_JOBS_BASE,
+    JobStatus,
+    load_manifest,
+    save_manifest,
+)
+@click.command()
+@click.argument("job_id")
+@click.option("--force", is_flag=True, help="Force termination of running containers")
+@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
+def cancel(job_id, force, base_path):
+    """Cancel a running batch job.
+    Cancels the job in AWS Batch and updates the manifest status.
+    \b
+    Examples:
+      dh batch cancel dma-embed-20260109-a3f2
+      dh batch cancel dma-embed-20260109-a3f2 --force
+    """
+    # Load manifest
+    try:
+        manifest = load_manifest(job_id, base_path)
+    except FileNotFoundError:
+        click.echo(f"Job not found: {job_id}", err=True)
+        raise SystemExit(1)
+    # Check if job can be cancelled
+    if manifest.status in (JobStatus.SUCCEEDED, JobStatus.FINALIZED, JobStatus.CANCELLED):
+        click.echo(f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True)
+        raise SystemExit(1)
+    # Get Batch job ID
+    if not manifest.batch or not manifest.batch.job_id:
+        click.echo("Job has no AWS Batch job ID, updating status only.")
+        manifest.status = JobStatus.CANCELLED
+        save_manifest(manifest, base_path)
+        click.echo(click.style(f"✓ Job {job_id} marked as cancelled", fg="green"))
+        return
+    batch_job_id = manifest.batch.job_id
+    # Cancel in AWS Batch
+    try:
+        client = BatchClient()
+        if force:
+            click.echo(f"Terminating job {batch_job_id}...")
+            client.terminate_job(batch_job_id, reason="Terminated by user via dh batch cancel --force")
+        else:
+            click.echo(f"Cancelling job {batch_job_id}...")
+            client.cancel_job(batch_job_id, reason="Cancelled by user via dh batch cancel")
+        # Update manifest
+        manifest.status = JobStatus.CANCELLED
+        save_manifest(manifest, base_path)
+        click.echo()
+        click.echo(click.style(f"✓ Job {job_id} cancelled successfully", fg="green"))
+        # Handle retries too
+        for retry_info in manifest.retries:
+            if retry_info.batch_job_id:
+                try:
+                    if force:
+                        client.terminate_job(retry_info.batch_job_id, reason="Parent job cancelled")
+                    else:
+                        client.cancel_job(retry_info.batch_job_id, reason="Parent job cancelled")
+                    click.echo(f"  Also cancelled retry job: {retry_info.retry_id}")
+                except BatchError:
+                    pass  # Retry job may already be complete
+    except BatchError as e:
+        click.echo(click.style(f"✗ Failed to cancel job: {e}", fg="red"), err=True)
+        raise SystemExit(1)

dayhoff-tools 1.14.1__py3-none-any.whl → 1.14.3__py3-none-any.whl

dayhoff-tools 1.14.1py3-none-any.whl → 1.14.3py3-none-any.whl