dayhoff-tools 1.14.1__py3-none-any.whl → 1.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. dayhoff_tools/batch/__init__.py +8 -0
  2. dayhoff_tools/batch/workers/__init__.py +12 -0
  3. dayhoff_tools/batch/workers/base.py +150 -0
  4. dayhoff_tools/batch/workers/boltz.py +407 -0
  5. dayhoff_tools/batch/workers/embed_t5.py +92 -0
  6. dayhoff_tools/cli/batch/__init__.py +85 -0
  7. dayhoff_tools/cli/batch/aws_batch.py +401 -0
  8. dayhoff_tools/cli/batch/commands/__init__.py +25 -0
  9. dayhoff_tools/cli/batch/commands/boltz.py +362 -0
  10. dayhoff_tools/cli/batch/commands/cancel.py +82 -0
  11. dayhoff_tools/cli/batch/commands/embed_t5.py +303 -0
  12. dayhoff_tools/cli/batch/commands/finalize.py +206 -0
  13. dayhoff_tools/cli/batch/commands/list_jobs.py +78 -0
  14. dayhoff_tools/cli/batch/commands/local.py +95 -0
  15. dayhoff_tools/cli/batch/commands/logs.py +142 -0
  16. dayhoff_tools/cli/batch/commands/retry.py +142 -0
  17. dayhoff_tools/cli/batch/commands/status.py +214 -0
  18. dayhoff_tools/cli/batch/commands/submit.py +215 -0
  19. dayhoff_tools/cli/batch/job_id.py +151 -0
  20. dayhoff_tools/cli/batch/manifest.py +293 -0
  21. dayhoff_tools/cli/engines_studios/engine-studio-cli.md +26 -21
  22. dayhoff_tools/cli/engines_studios/engine_commands.py +16 -89
  23. dayhoff_tools/cli/engines_studios/ssh_config.py +96 -0
  24. dayhoff_tools/cli/engines_studios/studio_commands.py +15 -4
  25. dayhoff_tools/cli/main.py +14 -0
  26. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/METADATA +6 -1
  27. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/RECORD +29 -8
  28. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/WHEEL +0 -0
  29. {dayhoff_tools-1.14.1.dist-info → dayhoff_tools-1.14.3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,362 @@
1
+ """Boltz structure prediction pipeline command."""
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from ..aws_batch import BatchClient, BatchError
10
+ from ..job_id import generate_job_id
11
+ from ..manifest import (
12
+ BATCH_JOBS_BASE,
13
+ BatchConfig,
14
+ InputConfig,
15
+ JobManifest,
16
+ JobStatus,
17
+ OutputConfig,
18
+ create_job_directory,
19
+ get_job_dir,
20
+ save_manifest,
21
+ )
22
+
23
+ # Default settings for Boltz
24
+ # NOTE: A10G would be preferred (24GB vs 16GB VRAM) but has a bug.
25
+ # Using T4 until A10G is debugged. See new_batch.md Known Issues.
26
+ DEFAULT_QUEUE = "t4-1x-spot"
27
+ DEFAULT_WORKERS = 50
28
+ DEFAULT_JOB_DEFINITION = "dayhoff-boltz"
29
+ DEFAULT_IMAGE_URI = "074735440724.dkr.ecr.us-east-1.amazonaws.com/dayhoff:boltz-latest"
30
+
31
+
32
+ @click.command()
33
+ @click.argument("input_dir", type=click.Path(exists=True))
34
+ @click.option(
35
+ "--workers",
36
+ default=DEFAULT_WORKERS,
37
+ type=int,
38
+ help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]",
39
+ )
40
+ @click.option(
41
+ "--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
42
+ )
43
+ @click.option(
44
+ "--msa-dir",
45
+ type=click.Path(exists=True),
46
+ help="Path to pre-computed MSA files (optional)",
47
+ )
48
+ @click.option(
49
+ "--local",
50
+ "run_local",
51
+ is_flag=True,
52
+ help="Run single complex locally instead of Batch",
53
+ )
54
+ @click.option(
55
+ "--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
56
+ )
57
+ @click.option("--dry-run", is_flag=True, help="Show plan without submitting")
58
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
59
+ def boltz(input_dir, workers, queue, msa_dir, run_local, run_shell, dry_run, base_path):
60
+ """Predict protein structures with Boltz.
61
+
62
+ Processes a directory of YAML config files, each defining a protein complex.
63
+ Each YAML file is processed independently in parallel using AWS Batch array jobs.
64
+
65
+ \b
66
+ Examples:
67
+ # Submit to AWS Batch with 100 workers
68
+ dh batch boltz /primordial/complexes/ --workers 100
69
+
70
+ # Include pre-computed MSA files
71
+ dh batch boltz /primordial/complexes/ --workers 50 --msa-dir /primordial/msas/
72
+
73
+ # Test locally with a single complex
74
+ dh batch boltz /primordial/complexes/ --local
75
+
76
+ # Debug by dropping into container shell
77
+ dh batch boltz /primordial/complexes/ --shell
78
+
79
+ \b
80
+ After job completes:
81
+ dh batch status <job-id> # Check status
82
+ dh batch finalize <job-id> --output /primordial/structures/ # Move results
83
+
84
+ \b
85
+ YAML config format:
86
+ version: 1
87
+ sequences:
88
+ - protein:
89
+ id: A
90
+ sequence: MKTVRQERLKSIVRILERSKEPVSGAQ...
91
+ - ligand:
92
+ id: B
93
+ smiles: CCO
94
+ """
95
+ input_path = Path(input_dir).resolve()
96
+
97
+ if run_shell:
98
+ _run_shell_mode(input_path)
99
+ return
100
+
101
+ if run_local:
102
+ _run_local_mode(input_path)
103
+ return
104
+
105
+ # Batch submission mode
106
+ _submit_batch_job(input_path, workers, queue, msa_dir, dry_run, base_path)
107
+
108
+
109
+ def _count_yaml_files(input_path: Path) -> int:
110
+ """Count YAML files in directory."""
111
+ return len(list(input_path.glob("*.yaml")))
112
+
113
+
114
+ def _copy_inputs_to_job_dir(input_path: Path, job_dir: Path) -> int:
115
+ """Copy input YAML files to job directory.
116
+
117
+ Returns:
118
+ Number of files copied
119
+ """
120
+ input_dir = job_dir / "input"
121
+ input_dir.mkdir(parents=True, exist_ok=True)
122
+
123
+ count = 0
124
+ for yaml_file in sorted(input_path.glob("*.yaml")):
125
+ dest = input_dir / yaml_file.name
126
+ shutil.copy2(yaml_file, dest)
127
+ count += 1
128
+
129
+ return count
130
+
131
+
132
+ def _submit_batch_job(
133
+ input_path: Path,
134
+ workers: int,
135
+ queue: str,
136
+ msa_dir: str | None,
137
+ dry_run: bool,
138
+ base_path: str,
139
+ ):
140
+ """Submit Boltz job to AWS Batch."""
141
+ # Count input files
142
+ click.echo(f"Scanning {input_path} for YAML files...")
143
+ num_files = _count_yaml_files(input_path)
144
+
145
+ if num_files == 0:
146
+ click.echo(
147
+ click.style("Error: No YAML files found in input directory", fg="red"),
148
+ err=True,
149
+ )
150
+ raise SystemExit(1)
151
+
152
+ click.echo(f"Found {num_files} complexes to predict")
153
+
154
+ # Calculate array size
155
+ array_size = min(num_files, workers)
156
+
157
+ # Generate job ID
158
+ job_id = generate_job_id("boltz")
159
+
160
+ # Show plan
161
+ click.echo()
162
+ click.echo(f"Job ID: {job_id}")
163
+ click.echo(f"Input: {input_path}")
164
+ click.echo(f"Complexes: {num_files}")
165
+ click.echo(f"Array Size: {array_size}")
166
+ click.echo(f"Queue: {queue}")
167
+ click.echo(f"Job definition: {DEFAULT_JOB_DEFINITION}")
168
+ if msa_dir:
169
+ click.echo(f"MSA directory: {msa_dir}")
170
+
171
+ if dry_run:
172
+ click.echo()
173
+ click.echo(click.style("Dry run - job not submitted", fg="yellow"))
174
+ return
175
+
176
+ click.echo()
177
+
178
+ # Create job directory
179
+ job_dir = create_job_directory(job_id, base_path)
180
+ click.echo(f"Created job directory: {job_dir}")
181
+
182
+ # Copy input files
183
+ click.echo("Copying input files...")
184
+ copied = _copy_inputs_to_job_dir(input_path, job_dir)
185
+ click.echo(f"Copied {copied} YAML files")
186
+
187
+ # Copy or symlink MSA directory if provided
188
+ if msa_dir:
189
+ msa_dest = job_dir / "msas"
190
+ msa_src = Path(msa_dir)
191
+
192
+ # If on same filesystem (Primordial), symlink; otherwise copy
193
+ try:
194
+ msa_dest.symlink_to(msa_src)
195
+ click.echo(f"Linked MSA directory: {msa_dir}")
196
+ except OSError:
197
+ click.echo("Copying MSA directory (this may take a while)...")
198
+ shutil.copytree(msa_src, msa_dest)
199
+ click.echo(f"Copied MSA directory")
200
+
201
+ # Create manifest
202
+ manifest = JobManifest(
203
+ job_id=job_id,
204
+ user=job_id.split("-")[0],
205
+ pipeline="boltz",
206
+ status=JobStatus.PENDING,
207
+ image_uri=DEFAULT_IMAGE_URI,
208
+ input=InputConfig(
209
+ source=str(input_path),
210
+ num_sequences=num_files, # Using num_sequences field for num_complexes
211
+ num_chunks=array_size,
212
+ ),
213
+ batch=BatchConfig(
214
+ queue=queue,
215
+ job_definition=DEFAULT_JOB_DEFINITION,
216
+ array_size=array_size,
217
+ ),
218
+ output=OutputConfig(
219
+ destination=None,
220
+ finalized=False,
221
+ ),
222
+ )
223
+
224
+ save_manifest(manifest, base_path)
225
+
226
+ # Submit to AWS Batch
227
+ try:
228
+ client = BatchClient()
229
+
230
+ environment = {
231
+ "JOB_DIR": str(job_dir),
232
+ "JOB_ID": job_id,
233
+ "BOLTZ_CACHE": "/primordial/.cache/boltz",
234
+ "MSA_DIR": "/primordial/.cache/msas",
235
+ }
236
+
237
+ batch_job_id = client.submit_job(
238
+ job_name=job_id,
239
+ job_definition=DEFAULT_JOB_DEFINITION,
240
+ job_queue=queue,
241
+ array_size=array_size,
242
+ environment=environment,
243
+ timeout_seconds=12 * 3600, # 12 hours (Boltz can be slow)
244
+ retry_attempts=2, # Fewer retries for expensive jobs
245
+ )
246
+
247
+ # Update manifest
248
+ manifest.status = JobStatus.SUBMITTED
249
+ manifest.batch.job_id = batch_job_id
250
+ save_manifest(manifest, base_path)
251
+
252
+ click.echo()
253
+ click.echo(click.style("✓ Job submitted successfully!", fg="green"))
254
+ click.echo()
255
+ click.echo(f"AWS Batch Job ID: {batch_job_id}")
256
+ click.echo()
257
+ click.echo("Next steps:")
258
+ click.echo(f" Check status: dh batch status {job_id}")
259
+ click.echo(f" View logs: dh batch logs {job_id}")
260
+ click.echo(f" Cancel: dh batch cancel {job_id}")
261
+ click.echo()
262
+ click.echo("After completion:")
263
+ click.echo(
264
+ f" Finalize: dh batch finalize {job_id} --output /primordial/structures/"
265
+ )
266
+
267
+ except BatchError as e:
268
+ manifest.status = JobStatus.FAILED
269
+ manifest.error_message = str(e)
270
+ save_manifest(manifest, base_path)
271
+ click.echo(click.style(f"✗ Failed to submit job: {e}", fg="red"), err=True)
272
+ raise SystemExit(1)
273
+
274
+
275
+ def _run_local_mode(input_path: Path):
276
+ """Run Boltz locally for a single complex."""
277
+ click.echo("Running Boltz locally...")
278
+ click.echo(f"Input directory: {input_path}")
279
+
280
+ # Find first YAML file
281
+ yaml_files = list(input_path.glob("*.yaml"))
282
+ if not yaml_files:
283
+ click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
284
+ raise SystemExit(1)
285
+
286
+ input_file = yaml_files[0]
287
+ click.echo(f"Processing: {input_file.name}")
288
+ click.echo()
289
+
290
+ try:
291
+ from dayhoff_tools.batch.workers.boltz import BoltzProcessor
292
+
293
+ processor = BoltzProcessor(
294
+ num_workers=None, # Auto-detect
295
+ msa_folder=None,
296
+ cache_dir=(
297
+ "/primordial/.cache/boltz" if os.path.exists("/primordial") else None
298
+ ),
299
+ )
300
+
301
+ result_dir = processor.run(str(input_file))
302
+
303
+ click.echo()
304
+ click.echo(click.style("✓ Prediction complete!", fg="green"))
305
+ click.echo(f"Output: {result_dir}")
306
+
307
+ except ImportError as e:
308
+ click.echo(
309
+ click.style(f"Error: Missing dependency: {e}", fg="red"),
310
+ err=True,
311
+ )
312
+ raise SystemExit(1)
313
+ except Exception as e:
314
+ click.echo(click.style(f"Error: {e}", fg="red"), err=True)
315
+ raise SystemExit(1)
316
+
317
+
318
+ def _run_shell_mode(input_path: Path):
319
+ """Drop into container shell for debugging."""
320
+ import subprocess
321
+
322
+ click.echo("Dropping into container shell...")
323
+ click.echo(f"Input will be available at: /input/")
324
+ click.echo()
325
+
326
+ cmd = [
327
+ "docker",
328
+ "run",
329
+ "--rm",
330
+ "-it",
331
+ "--gpus",
332
+ "all",
333
+ "-v",
334
+ "/primordial:/primordial",
335
+ "-v",
336
+ f"{input_path}:/input",
337
+ "-e",
338
+ "JOB_DIR=/input",
339
+ "-e",
340
+ "AWS_BATCH_JOB_ARRAY_INDEX=0",
341
+ "-e",
342
+ "BOLTZ_CACHE=/primordial/.cache/boltz",
343
+ "-e",
344
+ "MSA_DIR=/primordial/.cache/msas",
345
+ "--entrypoint",
346
+ "/bin/bash",
347
+ DEFAULT_IMAGE_URI,
348
+ ]
349
+
350
+ click.echo(f"Running: {' '.join(cmd)}")
351
+ click.echo()
352
+
353
+ try:
354
+ subprocess.run(cmd)
355
+ except FileNotFoundError:
356
+ click.echo(
357
+ click.style(
358
+ "Error: Docker not found. Is Docker installed and running?", fg="red"
359
+ ),
360
+ err=True,
361
+ )
362
+ raise SystemExit(1)
@@ -0,0 +1,82 @@
1
+ """Cancel command for stopping running jobs."""
2
+
3
+ import click
4
+
5
+ from ..aws_batch import BatchClient, BatchError
6
+ from ..manifest import (
7
+ BATCH_JOBS_BASE,
8
+ JobStatus,
9
+ load_manifest,
10
+ save_manifest,
11
+ )
12
+
13
+
14
+ @click.command()
15
+ @click.argument("job_id")
16
+ @click.option("--force", is_flag=True, help="Force termination of running containers")
17
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
18
+ def cancel(job_id, force, base_path):
19
+ """Cancel a running batch job.
20
+
21
+ Cancels the job in AWS Batch and updates the manifest status.
22
+
23
+ \b
24
+ Examples:
25
+ dh batch cancel dma-embed-20260109-a3f2
26
+ dh batch cancel dma-embed-20260109-a3f2 --force
27
+ """
28
+ # Load manifest
29
+ try:
30
+ manifest = load_manifest(job_id, base_path)
31
+ except FileNotFoundError:
32
+ click.echo(f"Job not found: {job_id}", err=True)
33
+ raise SystemExit(1)
34
+
35
+ # Check if job can be cancelled
36
+ if manifest.status in (JobStatus.SUCCEEDED, JobStatus.FINALIZED, JobStatus.CANCELLED):
37
+ click.echo(f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True)
38
+ raise SystemExit(1)
39
+
40
+ # Get Batch job ID
41
+ if not manifest.batch or not manifest.batch.job_id:
42
+ click.echo("Job has no AWS Batch job ID, updating status only.")
43
+ manifest.status = JobStatus.CANCELLED
44
+ save_manifest(manifest, base_path)
45
+ click.echo(click.style(f"✓ Job {job_id} marked as cancelled", fg="green"))
46
+ return
47
+
48
+ batch_job_id = manifest.batch.job_id
49
+
50
+ # Cancel in AWS Batch
51
+ try:
52
+ client = BatchClient()
53
+
54
+ if force:
55
+ click.echo(f"Terminating job {batch_job_id}...")
56
+ client.terminate_job(batch_job_id, reason="Terminated by user via dh batch cancel --force")
57
+ else:
58
+ click.echo(f"Cancelling job {batch_job_id}...")
59
+ client.cancel_job(batch_job_id, reason="Cancelled by user via dh batch cancel")
60
+
61
+ # Update manifest
62
+ manifest.status = JobStatus.CANCELLED
63
+ save_manifest(manifest, base_path)
64
+
65
+ click.echo()
66
+ click.echo(click.style(f"✓ Job {job_id} cancelled successfully", fg="green"))
67
+
68
+ # Handle retries too
69
+ for retry_info in manifest.retries:
70
+ if retry_info.batch_job_id:
71
+ try:
72
+ if force:
73
+ client.terminate_job(retry_info.batch_job_id, reason="Parent job cancelled")
74
+ else:
75
+ client.cancel_job(retry_info.batch_job_id, reason="Parent job cancelled")
76
+ click.echo(f" Also cancelled retry job: {retry_info.retry_id}")
77
+ except BatchError:
78
+ pass # Retry job may already be complete
79
+
80
+ except BatchError as e:
81
+ click.echo(click.style(f"✗ Failed to cancel job: {e}", fg="red"), err=True)
82
+ raise SystemExit(1)