dayhoff-tools 1.14.1__py3-none-any.whl → 1.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/batch/__init__.py +8 -0
- dayhoff_tools/batch/workers/__init__.py +12 -0
- dayhoff_tools/batch/workers/base.py +150 -0
- dayhoff_tools/batch/workers/boltz.py +407 -0
- dayhoff_tools/batch/workers/embed_t5.py +92 -0
- dayhoff_tools/cli/batch/__init__.py +85 -0
- dayhoff_tools/cli/batch/aws_batch.py +401 -0
- dayhoff_tools/cli/batch/commands/__init__.py +25 -0
- dayhoff_tools/cli/batch/commands/boltz.py +362 -0
- dayhoff_tools/cli/batch/commands/cancel.py +82 -0
- dayhoff_tools/cli/batch/commands/embed_t5.py +303 -0
- dayhoff_tools/cli/batch/commands/finalize.py +206 -0
- dayhoff_tools/cli/batch/commands/list_jobs.py +78 -0
- dayhoff_tools/cli/batch/commands/local.py +95 -0
- dayhoff_tools/cli/batch/commands/logs.py +142 -0
- dayhoff_tools/cli/batch/commands/retry.py +142 -0
- dayhoff_tools/cli/batch/commands/status.py +214 -0
- dayhoff_tools/cli/batch/commands/submit.py +215 -0
- dayhoff_tools/cli/batch/job_id.py +151 -0
- dayhoff_tools/cli/batch/manifest.py +293 -0
- dayhoff_tools/cli/engines_studios/engine-studio-cli.md +26 -21
- dayhoff_tools/cli/engines_studios/engine_commands.py +16 -89
- dayhoff_tools/cli/engines_studios/ssh_config.py +96 -0
- dayhoff_tools/cli/engines_studios/studio_commands.py +13 -2
- dayhoff_tools/cli/main.py +14 -0
- {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/METADATA +6 -1
- {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/RECORD +29 -8
- {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""T5 embedding pipeline command."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from ..aws_batch import BatchClient, BatchError
|
|
9
|
+
from ..job_id import generate_job_id
|
|
10
|
+
from ..manifest import (
|
|
11
|
+
BATCH_JOBS_BASE,
|
|
12
|
+
BatchConfig,
|
|
13
|
+
InputConfig,
|
|
14
|
+
JobManifest,
|
|
15
|
+
JobStatus,
|
|
16
|
+
OutputConfig,
|
|
17
|
+
create_job_directory,
|
|
18
|
+
get_job_dir,
|
|
19
|
+
save_manifest,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Default settings for T5 embedding
|
|
24
|
+
DEFAULT_QUEUE = "t4-1x-spot"
|
|
25
|
+
DEFAULT_WORKERS = 50
|
|
26
|
+
DEFAULT_SEQS_PER_CHUNK = 5000
|
|
27
|
+
DEFAULT_JOB_DEFINITION = "dayhoff-embed-t5"
|
|
28
|
+
DEFAULT_IMAGE_URI = "074735440724.dkr.ecr.us-east-1.amazonaws.com/dayhoff:embed-latest"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@click.command()
|
|
32
|
+
@click.argument("input_fasta", type=click.Path(exists=True))
|
|
33
|
+
@click.option("--workers", default=DEFAULT_WORKERS, type=int, help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]")
|
|
34
|
+
@click.option("--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]")
|
|
35
|
+
@click.option("--seqs-per-chunk", default=DEFAULT_SEQS_PER_CHUNK, type=int, help=f"Sequences per chunk [default: {DEFAULT_SEQS_PER_CHUNK}]")
|
|
36
|
+
@click.option("--local", "run_local", is_flag=True, help="Run single chunk locally instead of Batch")
|
|
37
|
+
@click.option("--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging")
|
|
38
|
+
@click.option("--dry-run", is_flag=True, help="Show plan without submitting")
|
|
39
|
+
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
40
|
+
def embed_t5(input_fasta, workers, queue, seqs_per_chunk, run_local, run_shell, dry_run, base_path):
|
|
41
|
+
"""Generate T5 protein embeddings for a FASTA file.
|
|
42
|
+
|
|
43
|
+
Splits the input FASTA into chunks and processes them in parallel using
|
|
44
|
+
AWS Batch array jobs. Each worker generates embeddings for its chunk
|
|
45
|
+
and writes an H5 file.
|
|
46
|
+
|
|
47
|
+
\b
|
|
48
|
+
Examples:
|
|
49
|
+
# Submit to AWS Batch with 50 workers
|
|
50
|
+
dh batch embed-t5 /primordial/proteins.fasta --workers 50
|
|
51
|
+
|
|
52
|
+
# Use a faster queue with more workers
|
|
53
|
+
dh batch embed-t5 /primordial/big.fasta --workers 100 --queue a10g-1x-spot
|
|
54
|
+
|
|
55
|
+
# Test locally with a single chunk
|
|
56
|
+
dh batch embed-t5 /primordial/test.fasta --local
|
|
57
|
+
|
|
58
|
+
# Debug by dropping into container shell
|
|
59
|
+
dh batch embed-t5 /primordial/test.fasta --shell
|
|
60
|
+
|
|
61
|
+
\b
|
|
62
|
+
After job completes:
|
|
63
|
+
dh batch status <job-id> # Check status
|
|
64
|
+
dh batch finalize <job-id> --output out.h5 # Combine results
|
|
65
|
+
"""
|
|
66
|
+
input_path = Path(input_fasta).resolve()
|
|
67
|
+
|
|
68
|
+
if run_shell:
|
|
69
|
+
_run_shell_mode(input_path)
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if run_local:
|
|
73
|
+
_run_local_mode(input_path)
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
# Batch submission mode
|
|
77
|
+
_submit_batch_job(input_path, workers, queue, seqs_per_chunk, dry_run, base_path)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _count_sequences(fasta_path: Path) -> int:
|
|
81
|
+
"""Count sequences in a FASTA file (fast, just counts > lines)."""
|
|
82
|
+
count = 0
|
|
83
|
+
with open(fasta_path) as f:
|
|
84
|
+
for line in f:
|
|
85
|
+
if line.startswith(">"):
|
|
86
|
+
count += 1
|
|
87
|
+
return count
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _split_fasta(input_path: Path, output_dir: Path, seqs_per_chunk: int) -> int:
|
|
91
|
+
"""Split FASTA file into chunks.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Number of chunks created
|
|
95
|
+
"""
|
|
96
|
+
from dayhoff_tools.fasta import split_fasta
|
|
97
|
+
|
|
98
|
+
num_chunks = split_fasta(
|
|
99
|
+
fasta_file=str(input_path),
|
|
100
|
+
target_folder=str(output_dir),
|
|
101
|
+
base_name="chunk",
|
|
102
|
+
sequences_per_file=seqs_per_chunk,
|
|
103
|
+
show_progress=True,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Rename files to use zero-padded indices (chunk_000.fasta, etc.)
|
|
107
|
+
for i in range(1, num_chunks + 1):
|
|
108
|
+
old_name = output_dir / f"chunk_{i}.fasta"
|
|
109
|
+
new_name = output_dir / f"chunk_{i-1:03d}.fasta"
|
|
110
|
+
if old_name.exists():
|
|
111
|
+
old_name.rename(new_name)
|
|
112
|
+
|
|
113
|
+
return num_chunks
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk: int, dry_run: bool, base_path: str):
|
|
117
|
+
"""Submit embedding job to AWS Batch."""
|
|
118
|
+
# Count sequences
|
|
119
|
+
click.echo(f"Counting sequences in {input_path}...")
|
|
120
|
+
num_sequences = _count_sequences(input_path)
|
|
121
|
+
click.echo(f"Found {num_sequences:,} sequences")
|
|
122
|
+
|
|
123
|
+
if num_sequences == 0:
|
|
124
|
+
click.echo(click.style("Error: No sequences found in input file", fg="red"), err=True)
|
|
125
|
+
raise SystemExit(1)
|
|
126
|
+
|
|
127
|
+
# Calculate chunks
|
|
128
|
+
num_chunks = min((num_sequences + seqs_per_chunk - 1) // seqs_per_chunk, workers)
|
|
129
|
+
actual_seqs_per_chunk = (num_sequences + num_chunks - 1) // num_chunks
|
|
130
|
+
|
|
131
|
+
# Generate job ID
|
|
132
|
+
job_id = generate_job_id("embed")
|
|
133
|
+
|
|
134
|
+
# Show plan
|
|
135
|
+
click.echo()
|
|
136
|
+
click.echo(f"Job ID: {job_id}")
|
|
137
|
+
click.echo(f"Input: {input_path}")
|
|
138
|
+
click.echo(f"Sequences: {num_sequences:,}")
|
|
139
|
+
click.echo(f"Chunks: {num_chunks}")
|
|
140
|
+
click.echo(f"Seqs per chunk: ~{actual_seqs_per_chunk:,}")
|
|
141
|
+
click.echo(f"Queue: {queue}")
|
|
142
|
+
click.echo(f"Job definition: {DEFAULT_JOB_DEFINITION}")
|
|
143
|
+
|
|
144
|
+
if dry_run:
|
|
145
|
+
click.echo()
|
|
146
|
+
click.echo(click.style("Dry run - job not submitted", fg="yellow"))
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
click.echo()
|
|
150
|
+
|
|
151
|
+
# Create job directory
|
|
152
|
+
job_dir = create_job_directory(job_id, base_path)
|
|
153
|
+
input_dir = job_dir / "input"
|
|
154
|
+
output_dir = job_dir / "output"
|
|
155
|
+
|
|
156
|
+
click.echo(f"Created job directory: {job_dir}")
|
|
157
|
+
|
|
158
|
+
# Split FASTA into chunks
|
|
159
|
+
click.echo("Splitting FASTA into chunks...")
|
|
160
|
+
actual_chunks = _split_fasta(input_path, input_dir, actual_seqs_per_chunk)
|
|
161
|
+
click.echo(f"Created {actual_chunks} chunks")
|
|
162
|
+
|
|
163
|
+
# Create manifest
|
|
164
|
+
manifest = JobManifest(
|
|
165
|
+
job_id=job_id,
|
|
166
|
+
user=job_id.split("-")[0],
|
|
167
|
+
pipeline="embed-t5",
|
|
168
|
+
status=JobStatus.PENDING,
|
|
169
|
+
image_uri=DEFAULT_IMAGE_URI,
|
|
170
|
+
input=InputConfig(
|
|
171
|
+
source=str(input_path),
|
|
172
|
+
num_sequences=num_sequences,
|
|
173
|
+
num_chunks=actual_chunks,
|
|
174
|
+
sequences_per_chunk=actual_seqs_per_chunk,
|
|
175
|
+
),
|
|
176
|
+
batch=BatchConfig(
|
|
177
|
+
queue=queue,
|
|
178
|
+
job_definition=DEFAULT_JOB_DEFINITION,
|
|
179
|
+
array_size=actual_chunks,
|
|
180
|
+
),
|
|
181
|
+
output=OutputConfig(
|
|
182
|
+
destination=None,
|
|
183
|
+
finalized=False,
|
|
184
|
+
),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
save_manifest(manifest, base_path)
|
|
188
|
+
|
|
189
|
+
# Submit to AWS Batch
|
|
190
|
+
try:
|
|
191
|
+
client = BatchClient()
|
|
192
|
+
|
|
193
|
+
environment = {
|
|
194
|
+
"JOB_DIR": str(job_dir),
|
|
195
|
+
"JOB_ID": job_id,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
batch_job_id = client.submit_job(
|
|
199
|
+
job_name=job_id,
|
|
200
|
+
job_definition=DEFAULT_JOB_DEFINITION,
|
|
201
|
+
job_queue=queue,
|
|
202
|
+
array_size=actual_chunks,
|
|
203
|
+
environment=environment,
|
|
204
|
+
timeout_seconds=6 * 3600, # 6 hours
|
|
205
|
+
retry_attempts=3,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Update manifest
|
|
209
|
+
manifest.status = JobStatus.SUBMITTED
|
|
210
|
+
manifest.batch.job_id = batch_job_id
|
|
211
|
+
save_manifest(manifest, base_path)
|
|
212
|
+
|
|
213
|
+
click.echo()
|
|
214
|
+
click.echo(click.style("✓ Job submitted successfully!", fg="green"))
|
|
215
|
+
click.echo()
|
|
216
|
+
click.echo(f"AWS Batch Job ID: {batch_job_id}")
|
|
217
|
+
click.echo()
|
|
218
|
+
click.echo("Next steps:")
|
|
219
|
+
click.echo(f" Check status: dh batch status {job_id}")
|
|
220
|
+
click.echo(f" View logs: dh batch logs {job_id}")
|
|
221
|
+
click.echo(f" Cancel: dh batch cancel {job_id}")
|
|
222
|
+
click.echo()
|
|
223
|
+
click.echo("After completion:")
|
|
224
|
+
click.echo(f" Finalize: dh batch finalize {job_id} --output /primordial/embeddings.h5")
|
|
225
|
+
|
|
226
|
+
except BatchError as e:
|
|
227
|
+
manifest.status = JobStatus.FAILED
|
|
228
|
+
manifest.error_message = str(e)
|
|
229
|
+
save_manifest(manifest, base_path)
|
|
230
|
+
click.echo(click.style(f"✗ Failed to submit job: {e}", fg="red"), err=True)
|
|
231
|
+
raise SystemExit(1)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _run_local_mode(input_path: Path):
|
|
235
|
+
"""Run embedding locally for a single chunk."""
|
|
236
|
+
import subprocess
|
|
237
|
+
|
|
238
|
+
click.echo("Running T5 embedding locally...")
|
|
239
|
+
click.echo(f"Input: {input_path}")
|
|
240
|
+
|
|
241
|
+
# Check if we have the embedder available
|
|
242
|
+
try:
|
|
243
|
+
from dayhoff_tools.embedders import T5Embedder
|
|
244
|
+
|
|
245
|
+
output_file = input_path.with_suffix(".h5")
|
|
246
|
+
click.echo(f"Output: {output_file}")
|
|
247
|
+
click.echo()
|
|
248
|
+
|
|
249
|
+
embedder = T5Embedder(
|
|
250
|
+
max_seq_length=4500,
|
|
251
|
+
large_protein_threshold=2500,
|
|
252
|
+
batch_residue_limit=4500,
|
|
253
|
+
)
|
|
254
|
+
embedder.run(str(input_path), str(output_file))
|
|
255
|
+
|
|
256
|
+
click.echo()
|
|
257
|
+
click.echo(click.style("✓ Embedding complete!", fg="green"))
|
|
258
|
+
click.echo(f"Output: {output_file}")
|
|
259
|
+
|
|
260
|
+
except ImportError:
|
|
261
|
+
click.echo(
|
|
262
|
+
click.style(
|
|
263
|
+
"Error: T5Embedder requires 'embedders' extra. "
|
|
264
|
+
"Install with: pip install 'dayhoff-tools[embedders]'",
|
|
265
|
+
fg="red",
|
|
266
|
+
),
|
|
267
|
+
err=True,
|
|
268
|
+
)
|
|
269
|
+
raise SystemExit(1)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _run_shell_mode(input_path: Path):
|
|
273
|
+
"""Drop into container shell for debugging."""
|
|
274
|
+
import subprocess
|
|
275
|
+
|
|
276
|
+
click.echo("Dropping into container shell...")
|
|
277
|
+
click.echo(f"Input will be available at: /input/{input_path.name}")
|
|
278
|
+
click.echo()
|
|
279
|
+
|
|
280
|
+
input_dir = input_path.parent
|
|
281
|
+
|
|
282
|
+
cmd = [
|
|
283
|
+
"docker", "run", "--rm", "-it",
|
|
284
|
+
"--gpus", "all",
|
|
285
|
+
"-v", "/primordial:/primordial",
|
|
286
|
+
"-v", f"{input_dir}:/input",
|
|
287
|
+
"-e", "JOB_DIR=/input",
|
|
288
|
+
"-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
|
|
289
|
+
"--entrypoint", "/bin/bash",
|
|
290
|
+
DEFAULT_IMAGE_URI,
|
|
291
|
+
]
|
|
292
|
+
|
|
293
|
+
click.echo(f"Running: {' '.join(cmd)}")
|
|
294
|
+
click.echo()
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
subprocess.run(cmd)
|
|
298
|
+
except FileNotFoundError:
|
|
299
|
+
click.echo(
|
|
300
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
301
|
+
err=True,
|
|
302
|
+
)
|
|
303
|
+
raise SystemExit(1)
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Finalize command for combining results and cleaning up."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from ..manifest import (
|
|
9
|
+
BATCH_JOBS_BASE,
|
|
10
|
+
JobStatus,
|
|
11
|
+
delete_job_directory,
|
|
12
|
+
get_job_dir,
|
|
13
|
+
load_manifest,
|
|
14
|
+
save_manifest,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@click.command()
|
|
19
|
+
@click.argument("job_id")
|
|
20
|
+
@click.option("--output", required=True, type=click.Path(), help="Output path for combined results")
|
|
21
|
+
@click.option("--force", is_flag=True, help="Finalize even if some chunks failed")
|
|
22
|
+
@click.option("--keep-intermediates", is_flag=True, help="Don't delete job directory after finalizing")
|
|
23
|
+
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
24
|
+
def finalize(job_id, output, force, keep_intermediates, base_path):
|
|
25
|
+
"""Combine results and clean up job intermediates.
|
|
26
|
+
|
|
27
|
+
For embedding jobs, combines H5 files into a single output file.
|
|
28
|
+
For structure prediction, moves outputs to the destination.
|
|
29
|
+
|
|
30
|
+
\b
|
|
31
|
+
Examples:
|
|
32
|
+
dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5
|
|
33
|
+
dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5 --force
|
|
34
|
+
dh batch finalize dma-embed-20260109-a3f2 --output /primordial/out.h5 --keep-intermediates
|
|
35
|
+
"""
|
|
36
|
+
# Load manifest
|
|
37
|
+
try:
|
|
38
|
+
manifest = load_manifest(job_id, base_path)
|
|
39
|
+
except FileNotFoundError:
|
|
40
|
+
click.echo(f"Job not found: {job_id}", err=True)
|
|
41
|
+
raise SystemExit(1)
|
|
42
|
+
|
|
43
|
+
# Check job status
|
|
44
|
+
if manifest.status == JobStatus.FINALIZED:
|
|
45
|
+
click.echo(f"Job {job_id} is already finalized.", err=True)
|
|
46
|
+
raise SystemExit(1)
|
|
47
|
+
|
|
48
|
+
job_dir = get_job_dir(job_id, base_path)
|
|
49
|
+
output_dir = job_dir / "output"
|
|
50
|
+
output_path = Path(output).resolve()
|
|
51
|
+
|
|
52
|
+
# Check completion status
|
|
53
|
+
incomplete = _check_completion(job_id, base_path)
|
|
54
|
+
if incomplete:
|
|
55
|
+
click.echo(f"Found {len(incomplete)} incomplete chunks: {incomplete[:10]}...")
|
|
56
|
+
if not force:
|
|
57
|
+
click.echo()
|
|
58
|
+
click.echo("Use --force to finalize anyway, or retry failed chunks:")
|
|
59
|
+
click.echo(f" dh batch retry {job_id}")
|
|
60
|
+
raise SystemExit(1)
|
|
61
|
+
click.echo()
|
|
62
|
+
click.echo(click.style("Warning: Finalizing with incomplete chunks", fg="yellow"))
|
|
63
|
+
|
|
64
|
+
# Update status
|
|
65
|
+
manifest.status = JobStatus.FINALIZING
|
|
66
|
+
save_manifest(manifest, base_path)
|
|
67
|
+
|
|
68
|
+
# Finalize based on pipeline type
|
|
69
|
+
click.echo()
|
|
70
|
+
if manifest.pipeline in ("embed-t5", "embed"):
|
|
71
|
+
_finalize_embeddings(output_dir, output_path)
|
|
72
|
+
elif manifest.pipeline == "boltz":
|
|
73
|
+
_finalize_boltz(output_dir, output_path)
|
|
74
|
+
else:
|
|
75
|
+
_finalize_generic(output_dir, output_path)
|
|
76
|
+
|
|
77
|
+
# Update manifest
|
|
78
|
+
manifest.status = JobStatus.FINALIZED
|
|
79
|
+
if manifest.output:
|
|
80
|
+
manifest.output.destination = str(output_path)
|
|
81
|
+
manifest.output.finalized = True
|
|
82
|
+
save_manifest(manifest, base_path)
|
|
83
|
+
|
|
84
|
+
click.echo()
|
|
85
|
+
click.echo(click.style(f"✓ Results saved to: {output_path}", fg="green"))
|
|
86
|
+
|
|
87
|
+
# Clean up
|
|
88
|
+
if not keep_intermediates:
|
|
89
|
+
click.echo(f"Cleaning up job directory: {job_dir}")
|
|
90
|
+
delete_job_directory(job_id, base_path)
|
|
91
|
+
click.echo(click.style("✓ Job directory deleted", fg="green"))
|
|
92
|
+
else:
|
|
93
|
+
click.echo(f"Job directory preserved: {job_dir}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _check_completion(job_id: str, base_path: str) -> list[int]:
|
|
97
|
+
"""Check which chunks are incomplete (no .done marker)."""
|
|
98
|
+
job_dir = get_job_dir(job_id, base_path)
|
|
99
|
+
input_dir = job_dir / "input"
|
|
100
|
+
output_dir = job_dir / "output"
|
|
101
|
+
|
|
102
|
+
if not input_dir.exists():
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
incomplete = []
|
|
106
|
+
for chunk_path in sorted(input_dir.glob("chunk_*.fasta")):
|
|
107
|
+
idx_str = chunk_path.stem.split("_")[1]
|
|
108
|
+
idx = int(idx_str)
|
|
109
|
+
done_marker = output_dir / f"embed_{idx:03d}.done"
|
|
110
|
+
if not done_marker.exists():
|
|
111
|
+
incomplete.append(idx)
|
|
112
|
+
|
|
113
|
+
return incomplete
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _finalize_embeddings(output_dir: Path, output_path: Path):
|
|
117
|
+
"""Combine H5 embedding files into a single output."""
|
|
118
|
+
h5_files = sorted(output_dir.glob("embed_*.h5"))
|
|
119
|
+
|
|
120
|
+
if not h5_files:
|
|
121
|
+
click.echo("No H5 files found in output directory.", err=True)
|
|
122
|
+
raise SystemExit(1)
|
|
123
|
+
|
|
124
|
+
click.echo(f"Found {len(h5_files)} H5 files to combine")
|
|
125
|
+
|
|
126
|
+
# Check if output already exists
|
|
127
|
+
if output_path.exists():
|
|
128
|
+
click.echo(f"Output file already exists: {output_path}", err=True)
|
|
129
|
+
raise SystemExit(1)
|
|
130
|
+
|
|
131
|
+
# Ensure output directory exists
|
|
132
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
from dayhoff_tools.h5 import combine_h5_files, deduplicate_h5_file, optimize_protein_embedding_chunks
|
|
136
|
+
|
|
137
|
+
# Combine H5 files
|
|
138
|
+
click.echo("Combining H5 files...")
|
|
139
|
+
combine_h5_files(
|
|
140
|
+
input_folder=str(output_dir),
|
|
141
|
+
output_file=str(output_path),
|
|
142
|
+
glob_pattern="embed_*.h5",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Deduplicate
|
|
146
|
+
click.echo("Deduplicating...")
|
|
147
|
+
deduplicate_h5_file(str(output_path))
|
|
148
|
+
|
|
149
|
+
# Optimize chunks
|
|
150
|
+
click.echo("Optimizing chunks...")
|
|
151
|
+
optimize_protein_embedding_chunks(str(output_path))
|
|
152
|
+
|
|
153
|
+
click.echo(click.style("✓ H5 files combined successfully", fg="green"))
|
|
154
|
+
|
|
155
|
+
except ImportError:
|
|
156
|
+
# Fall back to simple concatenation
|
|
157
|
+
click.echo("h5 module not available, using simple copy...")
|
|
158
|
+
if len(h5_files) == 1:
|
|
159
|
+
shutil.copy2(h5_files[0], output_path)
|
|
160
|
+
else:
|
|
161
|
+
# For multiple files without h5 module, just copy first file
|
|
162
|
+
# This is a fallback - the h5 module should be available
|
|
163
|
+
click.echo(
|
|
164
|
+
click.style(
|
|
165
|
+
"Warning: Cannot combine multiple H5 files without dayhoff_tools.h5 module. "
|
|
166
|
+
"Only copying first file.",
|
|
167
|
+
fg="yellow",
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
shutil.copy2(h5_files[0], output_path)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _finalize_boltz(output_dir: Path, output_path: Path):
|
|
174
|
+
"""Move Boltz output directories to destination."""
|
|
175
|
+
# Find all output directories (one per complex)
|
|
176
|
+
complex_dirs = [d for d in output_dir.iterdir() if d.is_dir()]
|
|
177
|
+
|
|
178
|
+
if not complex_dirs:
|
|
179
|
+
click.echo("No output directories found.", err=True)
|
|
180
|
+
raise SystemExit(1)
|
|
181
|
+
|
|
182
|
+
click.echo(f"Found {len(complex_dirs)} structure predictions to move")
|
|
183
|
+
|
|
184
|
+
# Ensure output directory exists
|
|
185
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
186
|
+
|
|
187
|
+
for complex_dir in complex_dirs:
|
|
188
|
+
dest = output_path / complex_dir.name
|
|
189
|
+
if dest.exists():
|
|
190
|
+
click.echo(f" Skipping {complex_dir.name} (already exists)")
|
|
191
|
+
continue
|
|
192
|
+
shutil.move(str(complex_dir), str(dest))
|
|
193
|
+
click.echo(f" Moved {complex_dir.name}")
|
|
194
|
+
|
|
195
|
+
click.echo(click.style("✓ Structures moved successfully", fg="green"))
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _finalize_generic(output_dir: Path, output_path: Path):
|
|
199
|
+
"""Generic finalization - copy output directory."""
|
|
200
|
+
if output_path.exists():
|
|
201
|
+
click.echo(f"Output path already exists: {output_path}", err=True)
|
|
202
|
+
raise SystemExit(1)
|
|
203
|
+
|
|
204
|
+
click.echo(f"Copying output directory to {output_path}...")
|
|
205
|
+
shutil.copytree(output_dir, output_path)
|
|
206
|
+
click.echo(click.style("✓ Output copied successfully", fg="green"))
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""List command for showing recent jobs."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from ..manifest import BATCH_JOBS_BASE, JobStatus, list_jobs as list_manifests
|
|
6
|
+
from .status import format_status, format_time_ago
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@click.command("list")
|
|
10
|
+
@click.option("--user", help="Filter by username")
|
|
11
|
+
@click.option(
|
|
12
|
+
"--status",
|
|
13
|
+
"status_filter",
|
|
14
|
+
type=click.Choice([s.value for s in JobStatus]),
|
|
15
|
+
help="Filter by status",
|
|
16
|
+
)
|
|
17
|
+
@click.option("--pipeline", help="Filter by pipeline type")
|
|
18
|
+
@click.option("--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]")
|
|
19
|
+
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
20
|
+
def list_jobs(user, status_filter, pipeline, limit, base_path):
|
|
21
|
+
"""List recent batch jobs.
|
|
22
|
+
|
|
23
|
+
Shows a table of recent jobs with their status, pipeline type, and creation time.
|
|
24
|
+
|
|
25
|
+
\b
|
|
26
|
+
Examples:
|
|
27
|
+
dh batch list # All recent jobs
|
|
28
|
+
dh batch list --user dma # Filter by user
|
|
29
|
+
dh batch list --status running # Filter by status
|
|
30
|
+
dh batch list --pipeline embed-t5 # Filter by pipeline type
|
|
31
|
+
dh batch list --limit 50 # Show more jobs
|
|
32
|
+
"""
|
|
33
|
+
status_enum = JobStatus(status_filter) if status_filter else None
|
|
34
|
+
|
|
35
|
+
manifests = list_manifests(
|
|
36
|
+
base_path=base_path,
|
|
37
|
+
user=user,
|
|
38
|
+
status=status_enum,
|
|
39
|
+
pipeline=pipeline,
|
|
40
|
+
limit=limit,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if not manifests:
|
|
44
|
+
click.echo("No jobs found.")
|
|
45
|
+
if user or status_filter or pipeline:
|
|
46
|
+
click.echo("Try removing filters to see all jobs.")
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
# Print header
|
|
50
|
+
click.echo()
|
|
51
|
+
click.echo(
|
|
52
|
+
f"{'JOB ID':<35} {'STATUS':<12} {'PIPELINE':<12} {'USER':<10} {'CREATED':<12}"
|
|
53
|
+
)
|
|
54
|
+
click.echo("-" * 85)
|
|
55
|
+
|
|
56
|
+
for manifest in manifests:
|
|
57
|
+
click.echo(
|
|
58
|
+
f"{manifest.job_id:<35} "
|
|
59
|
+
f"{format_status(manifest.status):<21} " # Extra space for ANSI color codes
|
|
60
|
+
f"{manifest.pipeline:<12} "
|
|
61
|
+
f"{manifest.user:<10} "
|
|
62
|
+
f"{format_time_ago(manifest.created):<12}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
click.echo()
|
|
66
|
+
click.echo(f"Showing {len(manifests)} jobs.")
|
|
67
|
+
|
|
68
|
+
# Show filter hints
|
|
69
|
+
hints = []
|
|
70
|
+
if not user:
|
|
71
|
+
hints.append("--user <name>")
|
|
72
|
+
if not status_filter:
|
|
73
|
+
hints.append("--status <status>")
|
|
74
|
+
if not pipeline:
|
|
75
|
+
hints.append("--pipeline <type>")
|
|
76
|
+
|
|
77
|
+
if hints:
|
|
78
|
+
click.echo(f"Filter with: {' '.join(hints)}")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Local command for debugging job chunks locally."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from ..manifest import BATCH_JOBS_BASE, get_job_dir, load_manifest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.command()
|
|
11
|
+
@click.argument("job_id")
|
|
12
|
+
@click.option("--index", required=True, type=int, help="Array index to run")
|
|
13
|
+
@click.option("--shell", "run_shell", is_flag=True, help="Drop into shell instead of running command")
|
|
14
|
+
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
15
|
+
def local(job_id, index, run_shell, base_path):
|
|
16
|
+
"""Run a job chunk locally for debugging.
|
|
17
|
+
|
|
18
|
+
Runs a specific array index of a job in a local Docker container,
|
|
19
|
+
allowing you to debug failed chunks or test changes.
|
|
20
|
+
|
|
21
|
+
\b
|
|
22
|
+
Examples:
|
|
23
|
+
dh batch local dma-embed-20260109-a3f2 --index 27
|
|
24
|
+
dh batch local dma-embed-20260109-a3f2 --index 27 --shell
|
|
25
|
+
"""
|
|
26
|
+
# Load manifest
|
|
27
|
+
try:
|
|
28
|
+
manifest = load_manifest(job_id, base_path)
|
|
29
|
+
except FileNotFoundError:
|
|
30
|
+
click.echo(f"Job not found: {job_id}", err=True)
|
|
31
|
+
raise SystemExit(1)
|
|
32
|
+
|
|
33
|
+
# Get job directory and image
|
|
34
|
+
job_dir = get_job_dir(job_id, base_path)
|
|
35
|
+
image_uri = manifest.image_uri
|
|
36
|
+
|
|
37
|
+
if not image_uri:
|
|
38
|
+
click.echo("Job has no image URI, cannot run locally.", err=True)
|
|
39
|
+
raise SystemExit(1)
|
|
40
|
+
|
|
41
|
+
# Validate index
|
|
42
|
+
if manifest.input and manifest.input.num_chunks:
|
|
43
|
+
if index >= manifest.input.num_chunks:
|
|
44
|
+
click.echo(
|
|
45
|
+
f"Index {index} out of range. Job has {manifest.input.num_chunks} chunks (0-{manifest.input.num_chunks - 1}).",
|
|
46
|
+
err=True,
|
|
47
|
+
)
|
|
48
|
+
raise SystemExit(1)
|
|
49
|
+
|
|
50
|
+
click.echo(f"Running job {job_id} index {index} locally")
|
|
51
|
+
click.echo(f"Image: {image_uri}")
|
|
52
|
+
click.echo(f"Job directory: {job_dir}")
|
|
53
|
+
click.echo()
|
|
54
|
+
|
|
55
|
+
# Build Docker command
|
|
56
|
+
cmd = [
|
|
57
|
+
"docker", "run", "--rm",
|
|
58
|
+
"--gpus", "all",
|
|
59
|
+
"-v", "/primordial:/primordial",
|
|
60
|
+
"-v", f"{job_dir}:{job_dir}",
|
|
61
|
+
"-e", f"AWS_BATCH_JOB_ARRAY_INDEX={index}",
|
|
62
|
+
"-e", f"JOB_DIR={job_dir}",
|
|
63
|
+
"-e", f"JOB_ID={job_id}",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
if run_shell:
|
|
67
|
+
cmd.extend(["-it", "--entrypoint", "/bin/bash"])
|
|
68
|
+
click.echo("Dropping into container shell...")
|
|
69
|
+
click.echo(f" JOB_DIR={job_dir}")
|
|
70
|
+
click.echo(f" AWS_BATCH_JOB_ARRAY_INDEX={index}")
|
|
71
|
+
else:
|
|
72
|
+
click.echo("Running worker command...")
|
|
73
|
+
|
|
74
|
+
cmd.append(image_uri)
|
|
75
|
+
|
|
76
|
+
click.echo()
|
|
77
|
+
click.echo(f"Command: {' '.join(cmd)}")
|
|
78
|
+
click.echo()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
result = subprocess.run(cmd)
|
|
82
|
+
if result.returncode != 0:
|
|
83
|
+
click.echo(
|
|
84
|
+
click.style(f"Container exited with code {result.returncode}", fg="red"),
|
|
85
|
+
err=True,
|
|
86
|
+
)
|
|
87
|
+
raise SystemExit(result.returncode)
|
|
88
|
+
else:
|
|
89
|
+
click.echo(click.style("✓ Container completed successfully", fg="green"))
|
|
90
|
+
except FileNotFoundError:
|
|
91
|
+
click.echo(
|
|
92
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
93
|
+
err=True,
|
|
94
|
+
)
|
|
95
|
+
raise SystemExit(1)
|