PyPI - dayhoff-tools - Versions diffs - 1.14.6__tar.gz → 1.14.7__tar.gz - Mend

dayhoff-tools 1.14.6tar.gz → 1.14.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dayhoff-tools
-Version: 1.14.6
+Version: 1.14.7
 Summary: Common tools for all the repos at Dayhoff Labs
 Author: Daniel Martin-Alarcon
 Author-email: dma@dayhofflabs.com

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/workers/boltz.py RENAMED Viewed

@@ -88,7 +88,9 @@ class BoltzProcessor:
         match = re.match(pattern1, base_name)
         if match:
             protein_id = match.group(1)
-            logger.debug(f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 1)")
+            logger.debug(
+                f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 1)"
+            )
             return protein_id
         # Pattern 2: PROTEINID_suffix (no leading number)
@@ -96,7 +98,9 @@ class BoltzProcessor:
         match = re.match(pattern2, base_name)
         if match:
             protein_id = match.group(1)
-            logger.debug(f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 2)")
+            logger.debug(
+                f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 2)"
+            )
             return protein_id
         # Pattern 3: Just PROTEINID (no suffix)
@@ -104,7 +108,9 @@ class BoltzProcessor:
         match = re.match(pattern3, base_name)
         if match:
             protein_id = match.group(1)
-            logger.debug(f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 3)")
+            logger.debug(
+                f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 3)"
+            )
             return protein_id
         logger.debug(f"Could not extract protein ID from filename '{filename}'")
@@ -222,16 +228,24 @@ class BoltzProcessor:
             raise FileNotFoundError(f"Input file not found: {input_file}")
         # Enhance with MSA if available
-        enhanced_input_file, msa_found, original_yaml_data = self._enhance_yaml_with_msa(
-            input_file
+        enhanced_input_file, msa_found, original_yaml_data = (
+            self._enhance_yaml_with_msa(input_file)
         )
         # Determine output directory
+        # Boltz always creates boltz_results_{input_name} inside --out_dir
         input_base = os.path.splitext(os.path.basename(input_file))[0]
         if output_dir is None:
+            # No output_dir specified, boltz creates in current directory
             expected_output_dir = f"boltz_results_{input_base}"
+            out_dir_arg = None
         else:
-            expected_output_dir = output_dir
+            # output_dir specified - use its parent for --out_dir
+            # and expect boltz_results_{input_base} inside it
+            parent_dir = os.path.dirname(output_dir)
+            expected_output_dir = os.path.join(parent_dir, f"boltz_results_{input_base}")
+            out_dir_arg = parent_dir if parent_dir else None
         logger.info(f"Running Boltz prediction for {input_file}")
         logger.info(f"Output directory: {expected_output_dir}")
@@ -239,6 +253,10 @@ class BoltzProcessor:
         # Build command
         cmd = ["boltz", "predict", input_file]
+        # Add output directory if specified
+        if out_dir_arg:
+            cmd.extend(["--out_dir", out_dir_arg])
         # Add cache directory
         cmd.extend(["--cache", self.cache_dir])
@@ -259,7 +277,9 @@ class BoltzProcessor:
         # Handle MSA server option
         if msa_found:
             if use_msa_server_in_opts:
-                additional_args = [arg for arg in additional_args if arg != "--use_msa_server"]
+                additional_args = [
+                    arg for arg in additional_args if arg != "--use_msa_server"
+                ]
                 logger.info("Removed --use_msa_server since local MSA was found")
         else:
             if not use_msa_server_in_opts:
@@ -270,6 +290,11 @@ class BoltzProcessor:
         if not num_workers_in_opts:
             cmd.extend(["--num_workers", str(self.num_workers)])
+        # Disable cuequivariance kernels - they require cuda-devel image
+        # which is much larger. The performance difference is modest.
+        # TODO: Consider switching to cuda-devel base image if perf is critical
+        cmd.append("--no_kernels")
         cmd.extend(additional_args)
         # Log and run command
@@ -305,7 +330,9 @@ class BoltzProcessor:
         # Copy input config to output directory
         try:
-            config_dest = os.path.join(expected_output_dir, os.path.basename(input_file))
+            config_dest = os.path.join(
+                expected_output_dir, os.path.basename(input_file)
+            )
             shutil.copy2(input_file, config_dest)
             logger.debug(f"Copied input config to results: {config_dest}")
         except Exception as e:
@@ -346,7 +373,9 @@ def main():
         input_files = sorted(input_dir.glob("*.yaml"))
         if index >= len(input_files):
-            logger.error(f"Index {index} out of range. Found {len(input_files)} input files.")
+            logger.error(
+                f"Index {index} out of range. Found {len(input_files)} input files."
+            )
             raise RuntimeError(f"Index {index} out of range")
         input_file = input_files[index]

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/aws_batch.py RENAMED Viewed

@@ -256,9 +256,7 @@ class BatchClient:
         # List child jobs with FAILED status
         try:
             paginator = self.batch.get_paginator("list_jobs")
-            for page in paginator.paginate(
-                arrayJobId=job_id, jobStatus="FAILED"
-            ):
+            for page in paginator.paginate(arrayJobId=job_id, jobStatus="FAILED"):
                 for job_summary in page.get("jobSummaryList", []):
                     # Extract array index from job ID (format: jobId:index)
                     child_id = job_summary.get("jobId", "")
@@ -361,7 +359,9 @@ class BatchClient:
                 timestamp = event.get("timestamp", 0)
                 message = event.get("message", "")
                 # Format timestamp
-                dt = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp / 1000))
+                dt = time.strftime(
+                    "%Y-%m-%d %H:%M:%S", time.localtime(timestamp / 1000)
+                )
                 messages.append(f"[{dt}] {message}")
         except ClientError as e:

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/boltz.py RENAMED Viewed

@@ -289,7 +289,9 @@ def _run_local_mode(input_path: Path):
         click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
         raise SystemExit(1)
-    click.echo(f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}")
+    click.echo(
+        f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}"
+    )
     # Create a temporary job directory structure
     temp_job_dir = input_path / ".local_boltz_job"
@@ -311,14 +313,25 @@ def _run_local_mode(input_path: Path):
     click.echo()
     cmd = [
-        "docker", "run", "--rm",
-        "--gpus", "all",
-        "-v", "/primordial:/primordial",
-        "-v", f"{temp_job_dir}:{temp_job_dir}",
-        "-e", f"JOB_DIR={temp_job_dir}",
-        "-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
-        "-e", "BOLTZ_CACHE=/primordial/.cache/boltz",
-        "-e", "MSA_DIR=/primordial/.cache/msas",
+        "docker",
+        "run",
+        "--rm",
+        "--gpus",
+        "all",
+        "-v",
+        "/primordial:/primordial",
+        "-v",
+        f"{temp_job_dir}:{temp_job_dir}",
+        "-e",
+        f"JOB_DIR={temp_job_dir}",
+        "-e",
+        "AWS_BATCH_JOB_ARRAY_INDEX=0",
+        "-e",
+        "BOLTZ_CACHE=/primordial/.cache/boltz",
+        "-e",
+        "MSA_DIR=/primordial/.cache/msas",
+        "-e",
+        "BOLTZ_OPTIONS=--no_kernels",
         DEFAULT_IMAGE_URI,
     ]
@@ -329,13 +342,17 @@ def _run_local_mode(input_path: Path):
         result = subprocess.run(cmd)
         if result.returncode != 0:
             click.echo(
-                click.style(f"Container exited with code {result.returncode}", fg="red"),
+                click.style(
+                    f"Container exited with code {result.returncode}", fg="red"
+                ),
                 err=True,
             )
             raise SystemExit(result.returncode)
         # Check for output
-        output_dirs = list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
+        output_dirs = (
+            list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
+        )
         if output_dirs:
             click.echo()
             click.echo(click.style("✓ Prediction complete!", fg="green"))
@@ -347,7 +364,9 @@ def _run_local_mode(input_path: Path):
     except FileNotFoundError:
         click.echo(
-            click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
+            click.style(
+                "Error: Docker not found. Is Docker installed and running?", fg="red"
+            ),
             err=True,
         )
         raise SystemExit(1)

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/cancel.py RENAMED Viewed

@@ -33,8 +33,14 @@ def cancel(job_id, force, base_path):
         raise SystemExit(1)
     # Check if job can be cancelled
-    if manifest.status in (JobStatus.SUCCEEDED, JobStatus.FINALIZED, JobStatus.CANCELLED):
-        click.echo(f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True)
+    if manifest.status in (
+        JobStatus.SUCCEEDED,
+        JobStatus.FINALIZED,
+        JobStatus.CANCELLED,
+    ):
+        click.echo(
+            f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True
+        )
         raise SystemExit(1)
     # Get Batch job ID
@@ -53,10 +59,14 @@ def cancel(job_id, force, base_path):
         if force:
             click.echo(f"Terminating job {batch_job_id}...")
-            client.terminate_job(batch_job_id, reason="Terminated by user via dh batch cancel --force")
+            client.terminate_job(
+                batch_job_id, reason="Terminated by user via dh batch cancel --force"
+            )
         else:
             click.echo(f"Cancelling job {batch_job_id}...")
-            client.cancel_job(batch_job_id, reason="Cancelled by user via dh batch cancel")
+            client.cancel_job(
+                batch_job_id, reason="Cancelled by user via dh batch cancel"
+            )
         # Update manifest
         manifest.status = JobStatus.CANCELLED
@@ -70,9 +80,13 @@ def cancel(job_id, force, base_path):
             if retry_info.batch_job_id:
                 try:
                     if force:
-                        client.terminate_job(retry_info.batch_job_id, reason="Parent job cancelled")
+                        client.terminate_job(
+                            retry_info.batch_job_id, reason="Parent job cancelled"
+                        )
                     else:
-                        client.cancel_job(retry_info.batch_job_id, reason="Parent job cancelled")
+                        client.cancel_job(
+                            retry_info.batch_job_id, reason="Parent job cancelled"
+                        )
                     click.echo(f"  Also cancelled retry job: {retry_info.retry_id}")
                 except BatchError:
                     pass  # Retry job may already be complete

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/embed_t5.py RENAMED Viewed

@@ -32,14 +32,42 @@ DEFAULT_IMAGE_URI = "074735440724.dkr.ecr.us-east-1.amazonaws.com/dayhoff:embed-
 @click.command()
 @click.argument("input_fasta", type=click.Path(exists=True))
-@click.option("--workers", default=DEFAULT_WORKERS, type=int, help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]")
-@click.option("--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]")
-@click.option("--seqs-per-chunk", default=DEFAULT_SEQS_PER_CHUNK, type=int, help=f"Sequences per chunk [default: {DEFAULT_SEQS_PER_CHUNK}]")
-@click.option("--local", "run_local", is_flag=True, help="Run single chunk locally instead of Batch")
-@click.option("--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging")
+@click.option(
+    "--workers",
+    default=DEFAULT_WORKERS,
+    type=int,
+    help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]",
+)
+@click.option(
+    "--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
+)
+@click.option(
+    "--seqs-per-chunk",
+    default=DEFAULT_SEQS_PER_CHUNK,
+    type=int,
+    help=f"Sequences per chunk [default: {DEFAULT_SEQS_PER_CHUNK}]",
+)
+@click.option(
+    "--local",
+    "run_local",
+    is_flag=True,
+    help="Run single chunk locally instead of Batch",
+)
+@click.option(
+    "--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
+)
 @click.option("--dry-run", is_flag=True, help="Show plan without submitting")
 @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
-def embed_t5(input_fasta, workers, queue, seqs_per_chunk, run_local, run_shell, dry_run, base_path):
+def embed_t5(
+    input_fasta,
+    workers,
+    queue,
+    seqs_per_chunk,
+    run_local,
+    run_shell,
+    dry_run,
+    base_path,
+):
     """Generate T5 protein embeddings for a FASTA file.
     Splits the input FASTA into chunks and processes them in parallel using
@@ -115,7 +143,14 @@ def _split_fasta(input_path: Path, output_dir: Path, seqs_per_chunk: int) -> int
     return num_chunks
-def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk: int, dry_run: bool, base_path: str):
+def _submit_batch_job(
+    input_path: Path,
+    workers: int,
+    queue: str,
+    seqs_per_chunk: int,
+    dry_run: bool,
+    base_path: str,
+):
     """Submit embedding job to AWS Batch."""
     # Count sequences
     click.echo(f"Counting sequences in {input_path}...")
@@ -123,7 +158,9 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
     click.echo(f"Found {num_sequences:,} sequences")
     if num_sequences == 0:
-        click.echo(click.style("Error: No sequences found in input file", fg="red"), err=True)
+        click.echo(
+            click.style("Error: No sequences found in input file", fg="red"), err=True
+        )
         raise SystemExit(1)
     # Calculate chunks
@@ -223,7 +260,9 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
         click.echo(f"  Cancel:        dh batch cancel {job_id}")
         click.echo()
         click.echo("After completion:")
-        click.echo(f"  Finalize:      dh batch finalize {job_id} --output /primordial/embeddings.h5")
+        click.echo(
+            f"  Finalize:      dh batch finalize {job_id} --output /primordial/embeddings.h5"
+        )
     except BatchError as e:
         manifest.status = JobStatus.FAILED
@@ -265,12 +304,19 @@ def _run_local_mode(input_path: Path):
     click.echo()
     cmd = [
-        "docker", "run", "--rm",
-        "--gpus", "all",
-        "-v", "/primordial:/primordial",
-        "-v", f"{temp_job_dir}:{temp_job_dir}",
-        "-e", f"JOB_DIR={temp_job_dir}",
-        "-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
+        "docker",
+        "run",
+        "--rm",
+        "--gpus",
+        "all",
+        "-v",
+        "/primordial:/primordial",
+        "-v",
+        f"{temp_job_dir}:{temp_job_dir}",
+        "-e",
+        f"JOB_DIR={temp_job_dir}",
+        "-e",
+        "AWS_BATCH_JOB_ARRAY_INDEX=0",
         DEFAULT_IMAGE_URI,
     ]
@@ -281,7 +327,9 @@ def _run_local_mode(input_path: Path):
         result = subprocess.run(cmd)
         if result.returncode != 0:
             click.echo(
-                click.style(f"Container exited with code {result.returncode}", fg="red"),
+                click.style(
+                    f"Container exited with code {result.returncode}", fg="red"
+                ),
                 err=True,
             )
             raise SystemExit(result.returncode)
@@ -303,7 +351,9 @@ def _run_local_mode(input_path: Path):
     except FileNotFoundError:
         click.echo(
-            click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
+            click.style(
+                "Error: Docker not found. Is Docker installed and running?", fg="red"
+            ),
             err=True,
         )
         raise SystemExit(1)
@@ -318,13 +368,22 @@ def _run_shell_mode(input_path: Path):
     input_dir = input_path.parent
     cmd = [
-        "docker", "run", "--rm", "-it",
-        "--gpus", "all",
-        "-v", "/primordial:/primordial",
-        "-v", f"{input_dir}:/input",
-        "-e", "JOB_DIR=/input",
-        "-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
-        "--entrypoint", "/bin/bash",
+        "docker",
+        "run",
+        "--rm",
+        "-it",
+        "--gpus",
+        "all",
+        "-v",
+        "/primordial:/primordial",
+        "-v",
+        f"{input_dir}:/input",
+        "-e",
+        "JOB_DIR=/input",
+        "-e",
+        "AWS_BATCH_JOB_ARRAY_INDEX=0",
+        "--entrypoint",
+        "/bin/bash",
         DEFAULT_IMAGE_URI,
     ]
@@ -335,7 +394,9 @@ def _run_shell_mode(input_path: Path):
         subprocess.run(cmd)
     except FileNotFoundError:
         click.echo(
-            click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
+            click.style(
+                "Error: Docker not found. Is Docker installed and running?", fg="red"
+            ),
             err=True,
         )
         raise SystemExit(1)

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/finalize.py RENAMED Viewed

@@ -17,9 +17,18 @@ from ..manifest import (
 @click.command()
 @click.argument("job_id")
-@click.option("--output", required=True, type=click.Path(), help="Output path for combined results")
+@click.option(
+    "--output",
+    required=True,
+    type=click.Path(),
+    help="Output path for combined results",
+)
 @click.option("--force", is_flag=True, help="Finalize even if some chunks failed")
-@click.option("--keep-intermediates", is_flag=True, help="Don't delete job directory after finalizing")
+@click.option(
+    "--keep-intermediates",
+    is_flag=True,
+    help="Don't delete job directory after finalizing",
+)
 @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
 def finalize(job_id, output, force, keep_intermediates, base_path):
     """Combine results and clean up job intermediates.
@@ -59,7 +68,9 @@ def finalize(job_id, output, force, keep_intermediates, base_path):
             click.echo(f"  dh batch retry {job_id}")
             raise SystemExit(1)
         click.echo()
-        click.echo(click.style("Warning: Finalizing with incomplete chunks", fg="yellow"))
+        click.echo(
+            click.style("Warning: Finalizing with incomplete chunks", fg="yellow")
+        )
     # Update status
     manifest.status = JobStatus.FINALIZING
@@ -132,24 +143,38 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
     output_path.parent.mkdir(parents=True, exist_ok=True)
     try:
-        from dayhoff_tools.h5 import combine_h5_files, deduplicate_h5_file, optimize_protein_embedding_chunks
-        # Combine H5 files
-        click.echo("Combining H5 files...")
-        # Get list of h5 file paths as strings
-        h5_file_paths = [str(f) for f in h5_files]
-        combine_h5_files(
-            input_files=h5_file_paths,
-            output_file=str(output_path),
+        from dayhoff_tools.h5 import (
+            combine_h5_files,
+            deduplicate_h5_file,
+            optimize_protein_embedding_chunks,
         )
+        import tempfile
+        if len(h5_files) == 1:
+            # Single file - just copy, no need to combine/dedup/optimize
+            click.echo("Single chunk - copying directly...")
+            shutil.copy2(h5_files[0], output_path)
+        else:
+            # Multiple files - combine, deduplicate, and optimize
+            with tempfile.TemporaryDirectory() as tmpdir:
+                combined_path = Path(tmpdir) / "combined.h5"
+                deduped_path = Path(tmpdir) / "deduped.h5"
+                # Combine H5 files
+                click.echo("Combining H5 files...")
+                h5_file_paths = [str(f) for f in h5_files]
+                combine_h5_files(
+                    input_files=h5_file_paths,
+                    output_file=str(combined_path),
+                )
-        # Deduplicate
-        click.echo("Deduplicating...")
-        deduplicate_h5_file(str(output_path))
+                # Deduplicate
+                click.echo("Deduplicating...")
+                deduplicate_h5_file(str(combined_path), str(deduped_path))
-        # Optimize chunks
-        click.echo("Optimizing chunks...")
-        optimize_protein_embedding_chunks(str(output_path))
+                # Optimize chunks
+                click.echo("Optimizing chunks...")
+                optimize_protein_embedding_chunks(str(deduped_path), str(output_path))
         click.echo(click.style("✓ H5 files combined successfully", fg="green"))

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/list_jobs.py RENAMED Viewed

@@ -15,7 +15,9 @@ from .status import format_status, format_time_ago
     help="Filter by status",
 )
 @click.option("--pipeline", help="Filter by pipeline type")
-@click.option("--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]")
+@click.option(
+    "--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]"
+)
 @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
 def list_jobs(user, status_filter, pipeline, limit, base_path):
     """List recent batch jobs.

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/local.py RENAMED Viewed

@@ -10,7 +10,12 @@ from ..manifest import BATCH_JOBS_BASE, get_job_dir, load_manifest
 @click.command()
 @click.argument("job_id")
 @click.option("--index", required=True, type=int, help="Array index to run")
-@click.option("--shell", "run_shell", is_flag=True, help="Drop into shell instead of running command")
+@click.option(
+    "--shell",
+    "run_shell",
+    is_flag=True,
+    help="Drop into shell instead of running command",
+)
 @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
 def local(job_id, index, run_shell, base_path):
     """Run a job chunk locally for debugging.
@@ -54,13 +59,21 @@ def local(job_id, index, run_shell, base_path):
     # Build Docker command
     cmd = [
-        "docker", "run", "--rm",
-        "--gpus", "all",
-        "-v", "/primordial:/primordial",
-        "-v", f"{job_dir}:{job_dir}",
-        "-e", f"AWS_BATCH_JOB_ARRAY_INDEX={index}",
-        "-e", f"JOB_DIR={job_dir}",
-        "-e", f"JOB_ID={job_id}",
+        "docker",
+        "run",
+        "--rm",
+        "--gpus",
+        "all",
+        "-v",
+        "/primordial:/primordial",
+        "-v",
+        f"{job_dir}:{job_dir}",
+        "-e",
+        f"AWS_BATCH_JOB_ARRAY_INDEX={index}",
+        "-e",
+        f"JOB_DIR={job_dir}",
+        "-e",
+        f"JOB_ID={job_id}",
     ]
     if run_shell:
@@ -81,7 +94,9 @@ def local(job_id, index, run_shell, base_path):
         result = subprocess.run(cmd)
         if result.returncode != 0:
             click.echo(
-                click.style(f"Container exited with code {result.returncode}", fg="red"),
+                click.style(
+                    f"Container exited with code {result.returncode}", fg="red"
+                ),
                 err=True,
             )
             raise SystemExit(result.returncode)
@@ -89,7 +104,9 @@ def local(job_id, index, run_shell, base_path):
             click.echo(click.style("✓ Container completed successfully", fg="green"))
     except FileNotFoundError:
         click.echo(
-            click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
+            click.style(
+                "Error: Docker not found. Is Docker installed and running?", fg="red"
+            ),
             err=True,
         )
         raise SystemExit(1)

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/logs.py RENAMED Viewed

@@ -77,7 +77,9 @@ def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bo
                     click.echo(f"  dh batch logs {batch_job_id.split('-')[0]} --failed")
                     click.echo()
                     click.echo("To view logs for a specific index:")
-                    click.echo(f"  dh batch logs {batch_job_id.split('-')[0]} --index {failed_indices[0]}")
+                    click.echo(
+                        f"  dh batch logs {batch_job_id.split('-')[0]} --index {failed_indices[0]}"
+                    )
             return
         # Single job - show logs
@@ -94,7 +96,9 @@ def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bo
         click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
-def _show_index_logs(client: BatchClient, batch_job_id: str, index: int, tail: int, follow: bool):
+def _show_index_logs(
+    client: BatchClient, batch_job_id: str, index: int, tail: int, follow: bool
+):
     """Show logs for a specific array index."""
     child_job_id = f"{batch_job_id}:{index}"

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/retry.py RENAMED Viewed

@@ -19,7 +19,9 @@ from ..manifest import (
 @click.command()
 @click.argument("job_id")
 @click.option("--indices", help="Specific indices to retry (comma-separated)")
-@click.option("--dry-run", is_flag=True, help="Show what would be retried without submitting")
+@click.option(
+    "--dry-run", is_flag=True, help="Show what would be retried without submitting"
+)
 @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
 def retry(job_id, indices, dry_run, base_path):
     """Retry failed chunks of a batch job.
@@ -112,7 +114,9 @@ def retry(job_id, indices, dry_run, base_path):
         click.echo(f"  View logs:     dh batch logs {job_id}")
     except BatchError as e:
-        click.echo(click.style(f"✗ Failed to submit retry job: {e}", fg="red"), err=True)
+        click.echo(
+            click.style(f"✗ Failed to submit retry job: {e}", fg="red"), err=True
+        )
         raise SystemExit(1)

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/status.py RENAMED Viewed

@@ -3,7 +3,12 @@
 import click
 from ..aws_batch import BatchClient, BatchError
-from ..manifest import BATCH_JOBS_BASE, JobStatus, list_jobs as list_manifests, load_manifest
+from ..manifest import (
+    BATCH_JOBS_BASE,
+    JobStatus,
+    list_jobs as list_manifests,
+    load_manifest,
+)
 def format_status(status: JobStatus) -> str:
@@ -125,8 +130,12 @@ def _show_job_details(job_id: str, base_path: str):
     click.echo(f"Status:    {format_status(manifest.status)}")
     click.echo(f"Pipeline:  {manifest.pipeline}")
     click.echo(f"User:      {manifest.user}")
-    click.echo(f"Created:   {manifest.created.isoformat()} ({format_time_ago(manifest.created)})")
-    click.echo(f"Updated:   {manifest.updated.isoformat()} ({format_time_ago(manifest.updated)})")
+    click.echo(
+        f"Created:   {manifest.created.isoformat()} ({format_time_ago(manifest.created)})"
+    )
+    click.echo(
+        f"Updated:   {manifest.updated.isoformat()} ({format_time_ago(manifest.updated)})"
+    )
     if manifest.input:
         click.echo()
@@ -182,7 +191,9 @@ def _show_job_details(job_id: str, base_path: str):
         click.echo(f"  Retry:       dh batch retry {job_id}")
     elif manifest.status == JobStatus.SUCCEEDED:
         click.echo("Next steps:")
-        click.echo(f"  Finalize:    dh batch finalize {job_id} --output /primordial/output.h5")
+        click.echo(
+            f"  Finalize:    dh batch finalize {job_id} --output /primordial/output.h5"
+        )
 def _show_array_status(batch_job_id: str):
@@ -205,10 +216,14 @@ def _show_array_status(batch_job_id: str):
         if array_status.is_complete:
             pct = array_status.success_rate * 100
             color = "green" if pct == 100 else "yellow" if pct > 90 else "red"
-            click.echo(f"    Complete:  {click.style(f'{pct:.1f}%', fg=color)} success rate")
+            click.echo(
+                f"    Complete:  {click.style(f'{pct:.1f}%', fg=color)} success rate"
+            )
         else:
             pct = array_status.completed / array_status.total * 100
-            click.echo(f"    Progress:  {pct:.1f}% ({array_status.completed}/{array_status.total})")
+            click.echo(
+                f"    Progress:  {pct:.1f}% ({array_status.completed}/{array_status.total})"
+            )
     except BatchError as e:
         click.echo(f"    (Could not fetch live status: {e})")

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/submit.py RENAMED Viewed

@@ -25,9 +25,13 @@ DEFAULT_QUEUE = "t4-1x-spot"
 @click.command()
-@click.option("-f", "--file", "config_file", type=click.Path(exists=True), help="Config file path")
+@click.option(
+    "-f", "--file", "config_file", type=click.Path(exists=True), help="Config file path"
+)
 @click.option("--command", help="Command to run (alternative to config file)")
-@click.option("--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]")
+@click.option(
+    "--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
+)
 @click.option("--memory", default="30G", help="Memory limit (e.g., 30G)")
 @click.option("--vcpus", default=8, type=int, help="Number of vCPUs")
 @click.option("--gpus", default=1, type=int, help="Number of GPUs")
@@ -91,7 +95,9 @@ def submit(
     # Override with command-line options
     job_command = command or config.get("command")
     if not job_command:
-        raise click.UsageError("Must specify --command or provide config file with 'command' field")
+        raise click.UsageError(
+            "Must specify --command or provide config file with 'command' field"
+        )
     job_queue = queue if queue != DEFAULT_QUEUE else config.get("queue", queue)
     job_memory = memory if memory != "30G" else config.get("memory", memory)

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/manifest.py RENAMED Viewed

@@ -33,7 +33,9 @@ class InputConfig(BaseModel):
     """Configuration for job input."""
     source: str = Field(..., description="Path to input file or directory")
-    num_sequences: int | None = Field(None, description="Number of sequences (for FASTA)")
+    num_sequences: int | None = Field(
+        None, description="Number of sequences (for FASTA)"
+    )
     num_chunks: int | None = Field(None, description="Number of chunks created")
     sequences_per_chunk: int | None = Field(None, description="Sequences per chunk")

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/main.py RENAMED Viewed

@@ -6,7 +6,10 @@ from importlib.metadata import PackageNotFoundError, version
 import typer
 from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
 from dayhoff_tools.cli.github_commands import gh_app
-from dayhoff_tools.cli.engine1 import engine_app as engine1_app, studio_app as studio1_app
+from dayhoff_tools.cli.engine1 import (
+    engine_app as engine1_app,
+    studio_app as studio1_app,
+)
 from dayhoff_tools.cli.utility_commands import (
     add_dependency,
     build_and_upload_wheel,
@@ -70,6 +73,7 @@ app.add_typer(gcp_app, name="gcp", help="Manage GCP authentication and impersona
 app.add_typer(aws_app, name="aws", help="Manage AWS SSO authentication.")
 app.add_typer(gh_app, name="gh", help="Manage GitHub authentication.")
 # Engine and Studio commands (v2 - new default with progress tracking)
 # These use Click instead of Typer, so we need a passthrough wrapper
 @app.command(

{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 [project]
 name = "dayhoff-tools"
-version = "1.14.6"
+version = "1.14.7"
 description = "Common tools for all the repos at Dayhoff Labs"
 authors = [
     {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}