PyPI - dayhoff-tools - Versions diffs - 1.14.1__py3-none-any.whl → 1.14.2__py3-none-any.whl - Mend

dayhoff-tools 1.14.1py3-none-any.whl → 1.14.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

dayhoff_tools/batch/__init__.py +8 -0
dayhoff_tools/batch/workers/__init__.py +12 -0
dayhoff_tools/batch/workers/base.py +150 -0
dayhoff_tools/batch/workers/boltz.py +407 -0
dayhoff_tools/batch/workers/embed_t5.py +92 -0
dayhoff_tools/cli/batch/__init__.py +85 -0
dayhoff_tools/cli/batch/aws_batch.py +401 -0
dayhoff_tools/cli/batch/commands/__init__.py +25 -0
dayhoff_tools/cli/batch/commands/boltz.py +362 -0
dayhoff_tools/cli/batch/commands/cancel.py +82 -0
dayhoff_tools/cli/batch/commands/embed_t5.py +303 -0
dayhoff_tools/cli/batch/commands/finalize.py +206 -0
dayhoff_tools/cli/batch/commands/list_jobs.py +78 -0
dayhoff_tools/cli/batch/commands/local.py +95 -0
dayhoff_tools/cli/batch/commands/logs.py +142 -0
dayhoff_tools/cli/batch/commands/retry.py +142 -0
dayhoff_tools/cli/batch/commands/status.py +214 -0
dayhoff_tools/cli/batch/commands/submit.py +215 -0
dayhoff_tools/cli/batch/job_id.py +151 -0
dayhoff_tools/cli/batch/manifest.py +293 -0
dayhoff_tools/cli/engines_studios/engine-studio-cli.md +26 -21
dayhoff_tools/cli/engines_studios/engine_commands.py +16 -89
dayhoff_tools/cli/engines_studios/ssh_config.py +96 -0
dayhoff_tools/cli/engines_studios/studio_commands.py +13 -2
dayhoff_tools/cli/main.py +14 -0
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/METADATA +6 -1
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/RECORD +29 -8
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/WHEEL +0 -0
{dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/entry_points.txt +0 -0

dayhoff_tools/cli/batch/commands/logs.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Logs command for viewing job logs."""
+import click
+from ..aws_batch import BatchClient, BatchError
+from ..manifest import BATCH_JOBS_BASE, load_manifest
+@click.command()
+@click.argument("job_id")
+@click.option("--index", type=int, help="Show logs for specific array index")
+@click.option("--failed", is_flag=True, help="Show logs for all failed indices")
+@click.option("--follow", is_flag=True, help="Stream logs in real-time")
+@click.option("--tail", default=100, type=int, help="Show last N lines [default: 100]")
+@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
+def logs(job_id, index, failed, follow, tail, base_path):
+    """View logs for a batch job.
+    Shows CloudWatch logs for the job. For array jobs, you can view logs
+    for specific indices or all failed indices.
+    \b
+    Examples:
+      dh batch logs dma-embed-20260109-a3f2              # Summary + recent logs
+      dh batch logs dma-embed-20260109-a3f2 --index 27   # Specific array index
+      dh batch logs dma-embed-20260109-a3f2 --failed     # All failed indices
+      dh batch logs dma-embed-20260109-a3f2 --follow     # Stream live logs
+    """
+    # Load manifest
+    try:
+        manifest = load_manifest(job_id, base_path)
+    except FileNotFoundError:
+        click.echo(f"Job not found: {job_id}", err=True)
+        raise SystemExit(1)
+    if not manifest.batch or not manifest.batch.job_id:
+        click.echo("Job has no AWS Batch job ID.", err=True)
+        raise SystemExit(1)
+    batch_job_id = manifest.batch.job_id
+    client = BatchClient()
+    if failed:
+        _show_failed_logs(client, batch_job_id, tail)
+    elif index is not None:
+        _show_index_logs(client, batch_job_id, index, tail, follow)
+    else:
+        _show_job_logs(client, batch_job_id, tail, follow)
+def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bool):
+    """Show logs for the main job or first array element."""
+    try:
+        job = client.describe_job(batch_job_id)
+        # Check if it's an array job
+        if "arrayProperties" in job:
+            click.echo("This is an array job. Showing parent job status:")
+            click.echo()
+            array_status = client.get_array_job_status(batch_job_id)
+            click.echo(f"  Succeeded: {array_status.succeeded}/{array_status.total}")
+            click.echo(f"  Failed:    {array_status.failed}/{array_status.total}")
+            click.echo(f"  Running:   {array_status.running}")
+            click.echo()
+            if array_status.failed > 0:
+                failed_indices = client.get_failed_indices(batch_job_id)
+                if failed_indices:
+                    click.echo("Failed indices:")
+                    for idx in failed_indices[:10]:  # Show first 10
+                        click.echo(f"  - {idx}")
+                    if len(failed_indices) > 10:
+                        click.echo(f"  ... and {len(failed_indices) - 10} more")
+                    click.echo()
+                    click.echo("To view logs for failed indices:")
+                    click.echo(f"  dh batch logs {batch_job_id.split('-')[0]} --failed")
+                    click.echo()
+                    click.echo("To view logs for a specific index:")
+                    click.echo(f"  dh batch logs {batch_job_id.split('-')[0]} --index {failed_indices[0]}")
+            return
+        # Single job - show logs
+        log_messages = client.get_logs(batch_job_id, tail=tail, follow=follow)
+        if not log_messages:
+            click.echo("No logs available yet.")
+            return
+        for msg in log_messages:
+            click.echo(msg)
+    except BatchError as e:
+        click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
+def _show_index_logs(client: BatchClient, batch_job_id: str, index: int, tail: int, follow: bool):
+    """Show logs for a specific array index."""
+    child_job_id = f"{batch_job_id}:{index}"
+    click.echo(f"Logs for array index {index}:")
+    click.echo()
+    try:
+        log_messages = client.get_logs(child_job_id, tail=tail, follow=follow)
+        if not log_messages:
+            click.echo("No logs available for this index.")
+            return
+        for msg in log_messages:
+            click.echo(msg)
+    except BatchError as e:
+        click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
+def _show_failed_logs(client: BatchClient, batch_job_id: str, tail: int):
+    """Show logs for all failed array indices."""
+    try:
+        failed_indices = client.get_failed_indices(batch_job_id)
+        if not failed_indices:
+            click.echo("No failed indices found.")
+            return
+        click.echo(f"Found {len(failed_indices)} failed indices")
+        click.echo()
+        for idx in failed_indices:
+            click.echo(click.style(f"=== Index {idx} ===", fg="red", bold=True))
+            child_job_id = f"{batch_job_id}:{idx}"
+            log_messages = client.get_logs(child_job_id, tail=min(tail, 50))
+            for msg in log_messages:
+                click.echo(msg)
+            click.echo()
+    except BatchError as e:
+        click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)

dayhoff_tools/cli/batch/commands/retry.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Retry command for re-running failed chunks."""
+from datetime import datetime
+import click
+from ..aws_batch import BatchClient, BatchError
+from ..job_id import generate_job_id
+from ..manifest import (
+    BATCH_JOBS_BASE,
+    JobStatus,
+    RetryInfo,
+    get_job_dir,
+    load_manifest,
+    save_manifest,
+)
+@click.command()
+@click.argument("job_id")
+@click.option("--indices", help="Specific indices to retry (comma-separated)")
+@click.option("--dry-run", is_flag=True, help="Show what would be retried without submitting")
+@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
+def retry(job_id, indices, dry_run, base_path):
+    """Retry failed chunks of a batch job.
+    Identifies failed array indices and submits a new job to retry only
+    those specific indices.
+    \b
+    Examples:
+      dh batch retry dma-embed-20260109-a3f2              # Retry all failed
+      dh batch retry dma-embed-20260109-a3f2 --indices 5,12,27  # Retry specific indices
+      dh batch retry dma-embed-20260109-a3f2 --dry-run   # Show what would be retried
+    """
+    # Load manifest
+    try:
+        manifest = load_manifest(job_id, base_path)
+    except FileNotFoundError:
+        click.echo(f"Job not found: {job_id}", err=True)
+        raise SystemExit(1)
+    # Get failed indices
+    if indices:
+        # User specified indices
+        retry_indices = [int(i.strip()) for i in indices.split(",")]
+    else:
+        # Auto-detect from .done markers
+        retry_indices = _find_incomplete_chunks(job_id, base_path)
+    if not retry_indices:
+        click.echo("No failed or incomplete chunks found. Nothing to retry.")
+        return
+    click.echo(f"Found {len(retry_indices)} chunks to retry: {retry_indices}")
+    if dry_run:
+        click.echo()
+        click.echo(click.style("Dry run - job not submitted", fg="yellow"))
+        return
+    # Check if we have the required info
+    if not manifest.batch:
+        click.echo("Job has no batch configuration.", err=True)
+        raise SystemExit(1)
+    # Generate retry job ID
+    retry_id = f"{job_id}-r{len(manifest.retries) + 1}"
+    click.echo()
+    click.echo(f"Retry job ID: {retry_id}")
+    # Submit retry job
+    try:
+        client = BatchClient()
+        job_dir = get_job_dir(job_id, base_path)
+        environment = {
+            "JOB_DIR": str(job_dir),
+            "JOB_ID": job_id,
+            "BATCH_RETRY_INDICES": ",".join(str(i) for i in retry_indices),
+        }
+        batch_job_id = client.submit_array_job_with_indices(
+            job_name=retry_id,
+            job_definition=manifest.batch.job_definition or "dayhoff-embed-t5",
+            job_queue=manifest.batch.queue,
+            indices=retry_indices,
+            environment=environment,
+            timeout_seconds=6 * 3600,
+            retry_attempts=3,
+        )
+        # Update manifest with retry info
+        retry_info = RetryInfo(
+            retry_id=retry_id,
+            indices=retry_indices,
+            batch_job_id=batch_job_id,
+            created=datetime.utcnow(),
+        )
+        manifest.retries.append(retry_info)
+        manifest.status = JobStatus.RUNNING
+        save_manifest(manifest, base_path)
+        click.echo()
+        click.echo(click.style("✓ Retry job submitted successfully!", fg="green"))
+        click.echo()
+        click.echo(f"AWS Batch Job ID: {batch_job_id}")
+        click.echo()
+        click.echo("Next steps:")
+        click.echo(f"  Check status:  dh batch status {job_id}")
+        click.echo(f"  View logs:     dh batch logs {job_id}")
+    except BatchError as e:
+        click.echo(click.style(f"✗ Failed to submit retry job: {e}", fg="red"), err=True)
+        raise SystemExit(1)
+def _find_incomplete_chunks(job_id: str, base_path: str) -> list[int]:
+    """Find chunks that don't have .done markers."""
+    job_dir = get_job_dir(job_id, base_path)
+    input_dir = job_dir / "input"
+    output_dir = job_dir / "output"
+    if not input_dir.exists():
+        return []
+    # Find all input chunks
+    input_chunks = sorted(input_dir.glob("chunk_*.fasta"))
+    incomplete = []
+    for chunk_path in input_chunks:
+        # Extract index from filename (chunk_000.fasta -> 0)
+        idx_str = chunk_path.stem.split("_")[1]
+        idx = int(idx_str)
+        # Check for .done marker
+        done_marker = output_dir / f"embed_{idx:03d}.done"
+        if not done_marker.exists():
+            incomplete.append(idx)
+    return incomplete

dayhoff_tools/cli/batch/commands/status.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Status command for viewing job status."""
+import click
+from ..aws_batch import BatchClient, BatchError
+from ..manifest import BATCH_JOBS_BASE, JobStatus, list_jobs as list_manifests, load_manifest
+def format_status(status: JobStatus) -> str:
+    """Format status with color."""
+    colors = {
+        JobStatus.PENDING: "yellow",
+        JobStatus.SUBMITTED: "yellow",
+        JobStatus.RUNNING: "cyan",
+        JobStatus.SUCCEEDED: "green",
+        JobStatus.FAILED: "red",
+        JobStatus.CANCELLED: "magenta",
+        JobStatus.FINALIZING: "cyan",
+        JobStatus.FINALIZED: "green",
+    }
+    return click.style(status.value, fg=colors.get(status, "white"))
+def format_time_ago(dt) -> str:
+    """Format a datetime as a relative time string."""
+    from datetime import datetime, timezone
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    now = datetime.now(timezone.utc)
+    delta = now - dt
+    seconds = delta.total_seconds()
+    if seconds < 60:
+        return "just now"
+    elif seconds < 3600:
+        mins = int(seconds / 60)
+        return f"{mins}m ago"
+    elif seconds < 86400:
+        hours = int(seconds / 3600)
+        return f"{hours}h ago"
+    else:
+        days = int(seconds / 86400)
+        return f"{days}d ago"
+@click.command()
+@click.argument("job_id", required=False)
+@click.option("--user", help="Filter by username")
+@click.option(
+    "--status",
+    "status_filter",
+    type=click.Choice([s.value for s in JobStatus]),
+    help="Filter by status",
+)
+@click.option("--pipeline", help="Filter by pipeline type")
+@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
+def status(job_id, user, status_filter, pipeline, base_path):
+    """Show job status.
+    Without JOB_ID, shows a summary of recent jobs.
+    With JOB_ID, shows detailed status for that job.
+    \b
+    Examples:
+      dh batch status                          # List recent jobs
+      dh batch status dma-embed-20260109-a3f2  # Show specific job
+      dh batch status --user dma               # Filter by user
+      dh batch status --status running         # Filter by status
+    """
+    if job_id:
+        _show_job_details(job_id, base_path)
+    else:
+        _show_job_list(user, status_filter, pipeline, base_path)
+def _show_job_list(user, status_filter, pipeline, base_path):
+    """Show a list of recent jobs."""
+    status_enum = JobStatus(status_filter) if status_filter else None
+    manifests = list_manifests(
+        base_path=base_path,
+        user=user,
+        status=status_enum,
+        pipeline=pipeline,
+        limit=20,
+    )
+    if not manifests:
+        click.echo("No jobs found.")
+        return
+    # Print header
+    click.echo()
+    click.echo(
+        f"{'JOB ID':<35} {'STATUS':<12} {'PIPELINE':<12} {'USER':<10} {'CREATED':<12}"
+    )
+    click.echo("-" * 85)
+    for manifest in manifests:
+        click.echo(
+            f"{manifest.job_id:<35} "
+            f"{format_status(manifest.status):<21} "  # Extra space for color codes
+            f"{manifest.pipeline:<12} "
+            f"{manifest.user:<10} "
+            f"{format_time_ago(manifest.created):<12}"
+        )
+    click.echo()
+    click.echo(f"Showing {len(manifests)} most recent jobs.")
+    click.echo("Use 'dh batch status <job-id>' for details.")
+def _show_job_details(job_id: str, base_path: str):
+    """Show detailed status for a specific job."""
+    try:
+        manifest = load_manifest(job_id, base_path)
+    except FileNotFoundError:
+        click.echo(f"Job not found: {job_id}", err=True)
+        click.echo(f"Looking in: {base_path}/{job_id}/manifest.json", err=True)
+        raise SystemExit(1)
+    click.echo()
+    click.echo(f"Job ID:    {manifest.job_id}")
+    click.echo(f"Status:    {format_status(manifest.status)}")
+    click.echo(f"Pipeline:  {manifest.pipeline}")
+    click.echo(f"User:      {manifest.user}")
+    click.echo(f"Created:   {manifest.created.isoformat()} ({format_time_ago(manifest.created)})")
+    click.echo(f"Updated:   {manifest.updated.isoformat()} ({format_time_ago(manifest.updated)})")
+    if manifest.input:
+        click.echo()
+        click.echo("Input:")
+        click.echo(f"  Source:     {manifest.input.source}")
+        if manifest.input.num_sequences:
+            click.echo(f"  Sequences:  {manifest.input.num_sequences:,}")
+        if manifest.input.num_chunks:
+            click.echo(f"  Chunks:     {manifest.input.num_chunks}")
+    if manifest.batch:
+        click.echo()
+        click.echo("Batch:")
+        click.echo(f"  Queue:      {manifest.batch.queue}")
+        if manifest.batch.job_id:
+            click.echo(f"  AWS Job ID: {manifest.batch.job_id}")
+        if manifest.batch.job_definition:
+            click.echo(f"  Definition: {manifest.batch.job_definition}")
+        if manifest.batch.array_size:
+            click.echo(f"  Array Size: {manifest.batch.array_size}")
+            # Try to get live status from AWS Batch
+            if manifest.batch.job_id:
+                _show_array_status(manifest.batch.job_id)
+    if manifest.output:
+        click.echo()
+        click.echo("Output:")
+        if manifest.output.destination:
+            click.echo(f"  Destination: {manifest.output.destination}")
+        click.echo(f"  Finalized:   {manifest.output.finalized}")
+    if manifest.error_message:
+        click.echo()
+        click.echo(click.style("Error:", fg="red"))
+        click.echo(f"  {manifest.error_message}")
+    if manifest.retries:
+        click.echo()
+        click.echo("Retries:")
+        for retry in manifest.retries:
+            click.echo(f"  - {retry.retry_id}: indices {retry.indices}")
+    # Suggest next steps
+    click.echo()
+    if manifest.status == JobStatus.RUNNING:
+        click.echo("Next steps:")
+        click.echo(f"  View logs:   dh batch logs {job_id}")
+        click.echo(f"  Cancel job:  dh batch cancel {job_id}")
+    elif manifest.status == JobStatus.FAILED:
+        click.echo("Next steps:")
+        click.echo(f"  View logs:   dh batch logs {job_id} --failed")
+        click.echo(f"  Retry:       dh batch retry {job_id}")
+    elif manifest.status == JobStatus.SUCCEEDED:
+        click.echo("Next steps:")
+        click.echo(f"  Finalize:    dh batch finalize {job_id} --output /primordial/output.h5")
+def _show_array_status(batch_job_id: str):
+    """Show live array job status from AWS Batch."""
+    try:
+        client = BatchClient()
+        array_status = client.get_array_job_status(batch_job_id)
+        click.echo()
+        click.echo("  Array Status:")
+        click.echo(f"    Pending:   {array_status.pending}")
+        click.echo(f"    Runnable:  {array_status.runnable}")
+        click.echo(f"    Starting:  {array_status.starting}")
+        click.echo(f"    Running:   {array_status.running}")
+        click.echo(
+            f"    Succeeded: {click.style(str(array_status.succeeded), fg='green')}"
+        )
+        click.echo(f"    Failed:    {click.style(str(array_status.failed), fg='red')}")
+        if array_status.is_complete:
+            pct = array_status.success_rate * 100
+            color = "green" if pct == 100 else "yellow" if pct > 90 else "red"
+            click.echo(f"    Complete:  {click.style(f'{pct:.1f}%', fg=color)} success rate")
+        else:
+            pct = array_status.completed / array_status.total * 100
+            click.echo(f"    Progress:  {pct:.1f}% ({array_status.completed}/{array_status.total})")
+    except BatchError as e:
+        click.echo(f"    (Could not fetch live status: {e})")

dayhoff-tools 1.14.1__py3-none-any.whl → 1.14.2__py3-none-any.whl

dayhoff-tools 1.14.1py3-none-any.whl → 1.14.2py3-none-any.whl