dayhoff-tools 1.14.1__py3-none-any.whl → 1.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. dayhoff_tools/batch/__init__.py +8 -0
  2. dayhoff_tools/batch/workers/__init__.py +12 -0
  3. dayhoff_tools/batch/workers/base.py +150 -0
  4. dayhoff_tools/batch/workers/boltz.py +407 -0
  5. dayhoff_tools/batch/workers/embed_t5.py +92 -0
  6. dayhoff_tools/cli/batch/__init__.py +85 -0
  7. dayhoff_tools/cli/batch/aws_batch.py +401 -0
  8. dayhoff_tools/cli/batch/commands/__init__.py +25 -0
  9. dayhoff_tools/cli/batch/commands/boltz.py +362 -0
  10. dayhoff_tools/cli/batch/commands/cancel.py +82 -0
  11. dayhoff_tools/cli/batch/commands/embed_t5.py +303 -0
  12. dayhoff_tools/cli/batch/commands/finalize.py +206 -0
  13. dayhoff_tools/cli/batch/commands/list_jobs.py +78 -0
  14. dayhoff_tools/cli/batch/commands/local.py +95 -0
  15. dayhoff_tools/cli/batch/commands/logs.py +142 -0
  16. dayhoff_tools/cli/batch/commands/retry.py +142 -0
  17. dayhoff_tools/cli/batch/commands/status.py +214 -0
  18. dayhoff_tools/cli/batch/commands/submit.py +215 -0
  19. dayhoff_tools/cli/batch/job_id.py +151 -0
  20. dayhoff_tools/cli/batch/manifest.py +293 -0
  21. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +26 -21
  22. dayhoff_tools/cli/engines_studios/engine_commands.py +16 -89
  23. dayhoff_tools/cli/engines_studios/ssh_config.py +96 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +15 -4
  25. dayhoff_tools/cli/main.py +14 -0
  26. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/METADATA +6 -1
  27. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/RECORD +29 -8
  28. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/WHEEL +0 -0
  29. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,142 @@
1
+ """Logs command for viewing job logs."""
2
+
3
+ import click
4
+
5
+ from ..aws_batch import BatchClient, BatchError
6
+ from ..manifest import BATCH_JOBS_BASE, load_manifest
7
+
8
+
9
+ @click.command()
10
+ @click.argument("job_id")
11
+ @click.option("--index", type=int, help="Show logs for specific array index")
12
+ @click.option("--failed", is_flag=True, help="Show logs for all failed indices")
13
+ @click.option("--follow", is_flag=True, help="Stream logs in real-time")
14
+ @click.option("--tail", default=100, type=int, help="Show last N lines [default: 100]")
15
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
16
+ def logs(job_id, index, failed, follow, tail, base_path):
17
+ """View logs for a batch job.
18
+
19
+ Shows CloudWatch logs for the job. For array jobs, you can view logs
20
+ for specific indices or all failed indices.
21
+
22
+ \b
23
+ Examples:
24
+ dh batch logs dma-embed-20260109-a3f2 # Summary + recent logs
25
+ dh batch logs dma-embed-20260109-a3f2 --index 27 # Specific array index
26
+ dh batch logs dma-embed-20260109-a3f2 --failed # All failed indices
27
+ dh batch logs dma-embed-20260109-a3f2 --follow # Stream live logs
28
+ """
29
+ # Load manifest
30
+ try:
31
+ manifest = load_manifest(job_id, base_path)
32
+ except FileNotFoundError:
33
+ click.echo(f"Job not found: {job_id}", err=True)
34
+ raise SystemExit(1)
35
+
36
+ if not manifest.batch or not manifest.batch.job_id:
37
+ click.echo("Job has no AWS Batch job ID.", err=True)
38
+ raise SystemExit(1)
39
+
40
+ batch_job_id = manifest.batch.job_id
41
+ client = BatchClient()
42
+
43
+ if failed:
44
+ _show_failed_logs(client, batch_job_id, tail)
45
+ elif index is not None:
46
+ _show_index_logs(client, batch_job_id, index, tail, follow)
47
+ else:
48
+ _show_job_logs(client, batch_job_id, tail, follow)
49
+
50
+
51
+ def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bool):
52
+ """Show logs for the main job or first array element."""
53
+ try:
54
+ job = client.describe_job(batch_job_id)
55
+
56
+ # Check if it's an array job
57
+ if "arrayProperties" in job:
58
+ click.echo("This is an array job. Showing parent job status:")
59
+ click.echo()
60
+
61
+ array_status = client.get_array_job_status(batch_job_id)
62
+ click.echo(f" Succeeded: {array_status.succeeded}/{array_status.total}")
63
+ click.echo(f" Failed: {array_status.failed}/{array_status.total}")
64
+ click.echo(f" Running: {array_status.running}")
65
+ click.echo()
66
+
67
+ if array_status.failed > 0:
68
+ failed_indices = client.get_failed_indices(batch_job_id)
69
+ if failed_indices:
70
+ click.echo("Failed indices:")
71
+ for idx in failed_indices[:10]: # Show first 10
72
+ click.echo(f" - {idx}")
73
+ if len(failed_indices) > 10:
74
+ click.echo(f" ... and {len(failed_indices) - 10} more")
75
+ click.echo()
76
+ click.echo("To view logs for failed indices:")
77
+ click.echo(f" dh batch logs {batch_job_id.split('-')[0]} --failed")
78
+ click.echo()
79
+ click.echo("To view logs for a specific index:")
80
+ click.echo(f" dh batch logs {batch_job_id.split('-')[0]} --index {failed_indices[0]}")
81
+ return
82
+
83
+ # Single job - show logs
84
+ log_messages = client.get_logs(batch_job_id, tail=tail, follow=follow)
85
+
86
+ if not log_messages:
87
+ click.echo("No logs available yet.")
88
+ return
89
+
90
+ for msg in log_messages:
91
+ click.echo(msg)
92
+
93
+ except BatchError as e:
94
+ click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
95
+
96
+
97
+ def _show_index_logs(client: BatchClient, batch_job_id: str, index: int, tail: int, follow: bool):
98
+ """Show logs for a specific array index."""
99
+ child_job_id = f"{batch_job_id}:{index}"
100
+
101
+ click.echo(f"Logs for array index {index}:")
102
+ click.echo()
103
+
104
+ try:
105
+ log_messages = client.get_logs(child_job_id, tail=tail, follow=follow)
106
+
107
+ if not log_messages:
108
+ click.echo("No logs available for this index.")
109
+ return
110
+
111
+ for msg in log_messages:
112
+ click.echo(msg)
113
+
114
+ except BatchError as e:
115
+ click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
116
+
117
+
118
+ def _show_failed_logs(client: BatchClient, batch_job_id: str, tail: int):
119
+ """Show logs for all failed array indices."""
120
+ try:
121
+ failed_indices = client.get_failed_indices(batch_job_id)
122
+
123
+ if not failed_indices:
124
+ click.echo("No failed indices found.")
125
+ return
126
+
127
+ click.echo(f"Found {len(failed_indices)} failed indices")
128
+ click.echo()
129
+
130
+ for idx in failed_indices:
131
+ click.echo(click.style(f"=== Index {idx} ===", fg="red", bold=True))
132
+
133
+ child_job_id = f"{batch_job_id}:{idx}"
134
+ log_messages = client.get_logs(child_job_id, tail=min(tail, 50))
135
+
136
+ for msg in log_messages:
137
+ click.echo(msg)
138
+
139
+ click.echo()
140
+
141
+ except BatchError as e:
142
+ click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
@@ -0,0 +1,142 @@
1
+ """Retry command for re-running failed chunks."""
2
+
3
+ from datetime import datetime
4
+
5
+ import click
6
+
7
+ from ..aws_batch import BatchClient, BatchError
8
+ from ..job_id import generate_job_id
9
+ from ..manifest import (
10
+ BATCH_JOBS_BASE,
11
+ JobStatus,
12
+ RetryInfo,
13
+ get_job_dir,
14
+ load_manifest,
15
+ save_manifest,
16
+ )
17
+
18
+
19
+ @click.command()
20
+ @click.argument("job_id")
21
+ @click.option("--indices", help="Specific indices to retry (comma-separated)")
22
+ @click.option("--dry-run", is_flag=True, help="Show what would be retried without submitting")
23
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
24
+ def retry(job_id, indices, dry_run, base_path):
25
+ """Retry failed chunks of a batch job.
26
+
27
+ Identifies failed array indices and submits a new job to retry only
28
+ those specific indices.
29
+
30
+ \b
31
+ Examples:
32
+ dh batch retry dma-embed-20260109-a3f2 # Retry all failed
33
+ dh batch retry dma-embed-20260109-a3f2 --indices 5,12,27 # Retry specific indices
34
+ dh batch retry dma-embed-20260109-a3f2 --dry-run # Show what would be retried
35
+ """
36
+ # Load manifest
37
+ try:
38
+ manifest = load_manifest(job_id, base_path)
39
+ except FileNotFoundError:
40
+ click.echo(f"Job not found: {job_id}", err=True)
41
+ raise SystemExit(1)
42
+
43
+ # Get failed indices
44
+ if indices:
45
+ # User specified indices
46
+ retry_indices = [int(i.strip()) for i in indices.split(",")]
47
+ else:
48
+ # Auto-detect from .done markers
49
+ retry_indices = _find_incomplete_chunks(job_id, base_path)
50
+
51
+ if not retry_indices:
52
+ click.echo("No failed or incomplete chunks found. Nothing to retry.")
53
+ return
54
+
55
+ click.echo(f"Found {len(retry_indices)} chunks to retry: {retry_indices}")
56
+
57
+ if dry_run:
58
+ click.echo()
59
+ click.echo(click.style("Dry run - job not submitted", fg="yellow"))
60
+ return
61
+
62
+ # Check if we have the required info
63
+ if not manifest.batch:
64
+ click.echo("Job has no batch configuration.", err=True)
65
+ raise SystemExit(1)
66
+
67
+ # Generate retry job ID
68
+ retry_id = f"{job_id}-r{len(manifest.retries) + 1}"
69
+
70
+ click.echo()
71
+ click.echo(f"Retry job ID: {retry_id}")
72
+
73
+ # Submit retry job
74
+ try:
75
+ client = BatchClient()
76
+ job_dir = get_job_dir(job_id, base_path)
77
+
78
+ environment = {
79
+ "JOB_DIR": str(job_dir),
80
+ "JOB_ID": job_id,
81
+ "BATCH_RETRY_INDICES": ",".join(str(i) for i in retry_indices),
82
+ }
83
+
84
+ batch_job_id = client.submit_array_job_with_indices(
85
+ job_name=retry_id,
86
+ job_definition=manifest.batch.job_definition or "dayhoff-embed-t5",
87
+ job_queue=manifest.batch.queue,
88
+ indices=retry_indices,
89
+ environment=environment,
90
+ timeout_seconds=6 * 3600,
91
+ retry_attempts=3,
92
+ )
93
+
94
+ # Update manifest with retry info
95
+ retry_info = RetryInfo(
96
+ retry_id=retry_id,
97
+ indices=retry_indices,
98
+ batch_job_id=batch_job_id,
99
+ created=datetime.utcnow(),
100
+ )
101
+ manifest.retries.append(retry_info)
102
+ manifest.status = JobStatus.RUNNING
103
+ save_manifest(manifest, base_path)
104
+
105
+ click.echo()
106
+ click.echo(click.style("✓ Retry job submitted successfully!", fg="green"))
107
+ click.echo()
108
+ click.echo(f"AWS Batch Job ID: {batch_job_id}")
109
+ click.echo()
110
+ click.echo("Next steps:")
111
+ click.echo(f" Check status: dh batch status {job_id}")
112
+ click.echo(f" View logs: dh batch logs {job_id}")
113
+
114
+ except BatchError as e:
115
+ click.echo(click.style(f"✗ Failed to submit retry job: {e}", fg="red"), err=True)
116
+ raise SystemExit(1)
117
+
118
+
119
+ def _find_incomplete_chunks(job_id: str, base_path: str) -> list[int]:
120
+ """Find chunks that don't have .done markers."""
121
+ job_dir = get_job_dir(job_id, base_path)
122
+ input_dir = job_dir / "input"
123
+ output_dir = job_dir / "output"
124
+
125
+ if not input_dir.exists():
126
+ return []
127
+
128
+ # Find all input chunks
129
+ input_chunks = sorted(input_dir.glob("chunk_*.fasta"))
130
+ incomplete = []
131
+
132
+ for chunk_path in input_chunks:
133
+ # Extract index from filename (chunk_000.fasta -> 0)
134
+ idx_str = chunk_path.stem.split("_")[1]
135
+ idx = int(idx_str)
136
+
137
+ # Check for .done marker
138
+ done_marker = output_dir / f"embed_{idx:03d}.done"
139
+ if not done_marker.exists():
140
+ incomplete.append(idx)
141
+
142
+ return incomplete
@@ -0,0 +1,214 @@
1
+ """Status command for viewing job status."""
2
+
3
+ import click
4
+
5
+ from ..aws_batch import BatchClient, BatchError
6
+ from ..manifest import BATCH_JOBS_BASE, JobStatus, list_jobs as list_manifests, load_manifest
7
+
8
+
9
+ def format_status(status: JobStatus) -> str:
10
+ """Format status with color."""
11
+ colors = {
12
+ JobStatus.PENDING: "yellow",
13
+ JobStatus.SUBMITTED: "yellow",
14
+ JobStatus.RUNNING: "cyan",
15
+ JobStatus.SUCCEEDED: "green",
16
+ JobStatus.FAILED: "red",
17
+ JobStatus.CANCELLED: "magenta",
18
+ JobStatus.FINALIZING: "cyan",
19
+ JobStatus.FINALIZED: "green",
20
+ }
21
+ return click.style(status.value, fg=colors.get(status, "white"))
22
+
23
+
24
+ def format_time_ago(dt) -> str:
25
+ """Format a datetime as a relative time string."""
26
+ from datetime import datetime, timezone
27
+
28
+ if dt.tzinfo is None:
29
+ dt = dt.replace(tzinfo=timezone.utc)
30
+
31
+ now = datetime.now(timezone.utc)
32
+ delta = now - dt
33
+
34
+ seconds = delta.total_seconds()
35
+ if seconds < 60:
36
+ return "just now"
37
+ elif seconds < 3600:
38
+ mins = int(seconds / 60)
39
+ return f"{mins}m ago"
40
+ elif seconds < 86400:
41
+ hours = int(seconds / 3600)
42
+ return f"{hours}h ago"
43
+ else:
44
+ days = int(seconds / 86400)
45
+ return f"{days}d ago"
46
+
47
+
48
+ @click.command()
49
+ @click.argument("job_id", required=False)
50
+ @click.option("--user", help="Filter by username")
51
+ @click.option(
52
+ "--status",
53
+ "status_filter",
54
+ type=click.Choice([s.value for s in JobStatus]),
55
+ help="Filter by status",
56
+ )
57
+ @click.option("--pipeline", help="Filter by pipeline type")
58
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
59
+ def status(job_id, user, status_filter, pipeline, base_path):
60
+ """Show job status.
61
+
62
+ Without JOB_ID, shows a summary of recent jobs.
63
+ With JOB_ID, shows detailed status for that job.
64
+
65
+ \b
66
+ Examples:
67
+ dh batch status # List recent jobs
68
+ dh batch status dma-embed-20260109-a3f2 # Show specific job
69
+ dh batch status --user dma # Filter by user
70
+ dh batch status --status running # Filter by status
71
+ """
72
+ if job_id:
73
+ _show_job_details(job_id, base_path)
74
+ else:
75
+ _show_job_list(user, status_filter, pipeline, base_path)
76
+
77
+
78
+ def _show_job_list(user, status_filter, pipeline, base_path):
79
+ """Show a list of recent jobs."""
80
+ status_enum = JobStatus(status_filter) if status_filter else None
81
+ manifests = list_manifests(
82
+ base_path=base_path,
83
+ user=user,
84
+ status=status_enum,
85
+ pipeline=pipeline,
86
+ limit=20,
87
+ )
88
+
89
+ if not manifests:
90
+ click.echo("No jobs found.")
91
+ return
92
+
93
+ # Print header
94
+ click.echo()
95
+ click.echo(
96
+ f"{'JOB ID':<35} {'STATUS':<12} {'PIPELINE':<12} {'USER':<10} {'CREATED':<12}"
97
+ )
98
+ click.echo("-" * 85)
99
+
100
+ for manifest in manifests:
101
+ click.echo(
102
+ f"{manifest.job_id:<35} "
103
+ f"{format_status(manifest.status):<21} " # Extra space for color codes
104
+ f"{manifest.pipeline:<12} "
105
+ f"{manifest.user:<10} "
106
+ f"{format_time_ago(manifest.created):<12}"
107
+ )
108
+
109
+ click.echo()
110
+ click.echo(f"Showing {len(manifests)} most recent jobs.")
111
+ click.echo("Use 'dh batch status <job-id>' for details.")
112
+
113
+
114
+ def _show_job_details(job_id: str, base_path: str):
115
+ """Show detailed status for a specific job."""
116
+ try:
117
+ manifest = load_manifest(job_id, base_path)
118
+ except FileNotFoundError:
119
+ click.echo(f"Job not found: {job_id}", err=True)
120
+ click.echo(f"Looking in: {base_path}/{job_id}/manifest.json", err=True)
121
+ raise SystemExit(1)
122
+
123
+ click.echo()
124
+ click.echo(f"Job ID: {manifest.job_id}")
125
+ click.echo(f"Status: {format_status(manifest.status)}")
126
+ click.echo(f"Pipeline: {manifest.pipeline}")
127
+ click.echo(f"User: {manifest.user}")
128
+ click.echo(f"Created: {manifest.created.isoformat()} ({format_time_ago(manifest.created)})")
129
+ click.echo(f"Updated: {manifest.updated.isoformat()} ({format_time_ago(manifest.updated)})")
130
+
131
+ if manifest.input:
132
+ click.echo()
133
+ click.echo("Input:")
134
+ click.echo(f" Source: {manifest.input.source}")
135
+ if manifest.input.num_sequences:
136
+ click.echo(f" Sequences: {manifest.input.num_sequences:,}")
137
+ if manifest.input.num_chunks:
138
+ click.echo(f" Chunks: {manifest.input.num_chunks}")
139
+
140
+ if manifest.batch:
141
+ click.echo()
142
+ click.echo("Batch:")
143
+ click.echo(f" Queue: {manifest.batch.queue}")
144
+ if manifest.batch.job_id:
145
+ click.echo(f" AWS Job ID: {manifest.batch.job_id}")
146
+ if manifest.batch.job_definition:
147
+ click.echo(f" Definition: {manifest.batch.job_definition}")
148
+ if manifest.batch.array_size:
149
+ click.echo(f" Array Size: {manifest.batch.array_size}")
150
+
151
+ # Try to get live status from AWS Batch
152
+ if manifest.batch.job_id:
153
+ _show_array_status(manifest.batch.job_id)
154
+
155
+ if manifest.output:
156
+ click.echo()
157
+ click.echo("Output:")
158
+ if manifest.output.destination:
159
+ click.echo(f" Destination: {manifest.output.destination}")
160
+ click.echo(f" Finalized: {manifest.output.finalized}")
161
+
162
+ if manifest.error_message:
163
+ click.echo()
164
+ click.echo(click.style("Error:", fg="red"))
165
+ click.echo(f" {manifest.error_message}")
166
+
167
+ if manifest.retries:
168
+ click.echo()
169
+ click.echo("Retries:")
170
+ for retry in manifest.retries:
171
+ click.echo(f" - {retry.retry_id}: indices {retry.indices}")
172
+
173
+ # Suggest next steps
174
+ click.echo()
175
+ if manifest.status == JobStatus.RUNNING:
176
+ click.echo("Next steps:")
177
+ click.echo(f" View logs: dh batch logs {job_id}")
178
+ click.echo(f" Cancel job: dh batch cancel {job_id}")
179
+ elif manifest.status == JobStatus.FAILED:
180
+ click.echo("Next steps:")
181
+ click.echo(f" View logs: dh batch logs {job_id} --failed")
182
+ click.echo(f" Retry: dh batch retry {job_id}")
183
+ elif manifest.status == JobStatus.SUCCEEDED:
184
+ click.echo("Next steps:")
185
+ click.echo(f" Finalize: dh batch finalize {job_id} --output /primordial/output.h5")
186
+
187
+
188
+ def _show_array_status(batch_job_id: str):
189
+ """Show live array job status from AWS Batch."""
190
+ try:
191
+ client = BatchClient()
192
+ array_status = client.get_array_job_status(batch_job_id)
193
+
194
+ click.echo()
195
+ click.echo(" Array Status:")
196
+ click.echo(f" Pending: {array_status.pending}")
197
+ click.echo(f" Runnable: {array_status.runnable}")
198
+ click.echo(f" Starting: {array_status.starting}")
199
+ click.echo(f" Running: {array_status.running}")
200
+ click.echo(
201
+ f" Succeeded: {click.style(str(array_status.succeeded), fg='green')}"
202
+ )
203
+ click.echo(f" Failed: {click.style(str(array_status.failed), fg='red')}")
204
+
205
+ if array_status.is_complete:
206
+ pct = array_status.success_rate * 100
207
+ color = "green" if pct == 100 else "yellow" if pct > 90 else "red"
208
+ click.echo(f" Complete: {click.style(f'{pct:.1f}%', fg=color)} success rate")
209
+ else:
210
+ pct = array_status.completed / array_status.total * 100
211
+ click.echo(f" Progress: {pct:.1f}% ({array_status.completed}/{array_status.total})")
212
+
213
+ except BatchError as e:
214
+ click.echo(f" (Could not fetch live status: {e})")