dh-cli 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dh_cli-0.5.0 → dh_cli-0.5.2}/.gitignore +0 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/PKG-INFO +1 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/pyproject.toml +1 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/aws_batch.py +12 -30
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/boltz.py +12 -32
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/cancel.py +7 -21
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/clean.py +6 -12
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/embed_t5.py +8 -24
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/finalize.py +36 -44
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/list_jobs.py +5 -8
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/local.py +2 -6
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/logs.py +2 -6
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/orca.py +55 -34
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/protmpnn.py +47 -43
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/protmpnn_to_boltz.py +21 -23
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/retry.py +6 -16
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/status.py +18 -38
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/submit.py +3 -13
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/train.py +16 -20
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/h5_utils.py +6 -20
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/manifest.py +8 -24
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/s3_transport.py +0 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/cloud_commands.py +22 -58
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/codeartifact.py +6 -15
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/api_client.py +7 -22
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/auth.py +1 -3
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/engine_commands.py +134 -216
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/progress.py +8 -17
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/ssh_config.py +1 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/studio_commands.py +26 -60
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/github_commands.py +8 -22
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/__init__.py +3 -4
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/test.py +5 -7
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/users.py +3 -1
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/main.py +2 -3
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/utility_commands.py +13 -41
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/warehouse.py +4 -13
- {dh_cli-0.5.0 → dh_cli-0.5.2}/tests/hz/test_init.py +1 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/tests/hz/test_suites.py +0 -2
- {dh_cli-0.5.0 → dh_cli-0.5.2}/tests/hz/test_users.py +0 -2
- {dh_cli-0.5.0 → dh_cli-0.5.2}/tests/test_cloud_gcp.py +3 -12
- dh_cli-0.5.2/tests/test_finalize_protmpnn.py +155 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/LICENSE +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/README.md +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/__init__.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/__init__.py +2 -2
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/__init__.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/commands/wait_for.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/fasta_utils.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/batch/job_id.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/engines_studios/__init__.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/deploy.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/local.py +0 -0
- {dh_cli-0.5.0 → dh_cli-0.5.2}/src/dh_cli/hz/tf.py +0 -0
|
@@ -6,15 +6,15 @@ import time
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
+
import boto3
|
|
10
|
+
from botocore.exceptions import ClientError
|
|
11
|
+
|
|
9
12
|
from dh_cli.batch.manifest import (
|
|
10
13
|
BATCH_JOBS_BASE,
|
|
11
14
|
JobStatus,
|
|
12
15
|
load_manifest,
|
|
13
16
|
)
|
|
14
17
|
|
|
15
|
-
import boto3
|
|
16
|
-
from botocore.exceptions import ClientError
|
|
17
|
-
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
|
|
@@ -107,9 +107,7 @@ class BatchClient:
|
|
|
107
107
|
"""
|
|
108
108
|
try:
|
|
109
109
|
if image_override:
|
|
110
|
-
job_definition = self._register_image_override(
|
|
111
|
-
job_definition, image_override
|
|
112
|
-
)
|
|
110
|
+
job_definition = self._register_image_override(job_definition, image_override)
|
|
113
111
|
|
|
114
112
|
submit_args: dict[str, Any] = {
|
|
115
113
|
"jobName": job_name,
|
|
@@ -133,9 +131,7 @@ class BatchClient:
|
|
|
133
131
|
if command:
|
|
134
132
|
container_overrides["command"] = command
|
|
135
133
|
if environment:
|
|
136
|
-
container_overrides["environment"] = [
|
|
137
|
-
{"name": k, "value": v} for k, v in environment.items()
|
|
138
|
-
]
|
|
134
|
+
container_overrides["environment"] = [{"name": k, "value": v} for k, v in environment.items()]
|
|
139
135
|
if resource_requirements:
|
|
140
136
|
container_overrides["resourceRequirements"] = resource_requirements
|
|
141
137
|
if container_overrides:
|
|
@@ -162,9 +158,7 @@ class BatchClient:
|
|
|
162
158
|
except ClientError as e:
|
|
163
159
|
raise BatchError(f"Failed to submit job: {e}")
|
|
164
160
|
|
|
165
|
-
def _register_image_override(
|
|
166
|
-
self, base_definition: str, image: str
|
|
167
|
-
) -> str:
|
|
161
|
+
def _register_image_override(self, base_definition: str, image: str) -> str:
|
|
168
162
|
"""Register a new revision of a job definition with a custom image.
|
|
169
163
|
|
|
170
164
|
AWS Batch containerOverrides doesn't support an image field, so we
|
|
@@ -173,14 +167,10 @@ class BatchClient:
|
|
|
173
167
|
|
|
174
168
|
Returns the ARN of the new revision.
|
|
175
169
|
"""
|
|
176
|
-
resp = self.batch.describe_job_definitions(
|
|
177
|
-
jobDefinitionName=base_definition, status="ACTIVE"
|
|
178
|
-
)
|
|
170
|
+
resp = self.batch.describe_job_definitions(jobDefinitionName=base_definition, status="ACTIVE")
|
|
179
171
|
definitions = resp.get("jobDefinitions", [])
|
|
180
172
|
if not definitions:
|
|
181
|
-
raise BatchError(
|
|
182
|
-
f"Job definition not found: {base_definition}"
|
|
183
|
-
)
|
|
173
|
+
raise BatchError(f"Job definition not found: {base_definition}")
|
|
184
174
|
|
|
185
175
|
latest = sorted(definitions, key=lambda d: d["revision"])[-1]
|
|
186
176
|
container_props = latest["containerProperties"].copy()
|
|
@@ -488,9 +478,7 @@ class BatchClient:
|
|
|
488
478
|
timestamp = event.get("timestamp", 0)
|
|
489
479
|
message = event.get("message", "")
|
|
490
480
|
# Format timestamp
|
|
491
|
-
dt = time.strftime(
|
|
492
|
-
"%Y-%m-%d %H:%M:%S", time.localtime(timestamp / 1000)
|
|
493
|
-
)
|
|
481
|
+
dt = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp / 1000))
|
|
494
482
|
messages.append(f"[{dt}] {message}")
|
|
495
483
|
|
|
496
484
|
except ClientError as e:
|
|
@@ -498,9 +486,7 @@ class BatchClient:
|
|
|
498
486
|
|
|
499
487
|
return messages
|
|
500
488
|
|
|
501
|
-
def wait_for_job(
|
|
502
|
-
self, job_id: str, poll_interval: int = 30, timeout: int = 86400
|
|
503
|
-
) -> str:
|
|
489
|
+
def wait_for_job(self, job_id: str, poll_interval: int = 30, timeout: int = 86400) -> str:
|
|
504
490
|
"""Wait for a job to complete.
|
|
505
491
|
|
|
506
492
|
Args:
|
|
@@ -530,14 +516,10 @@ class BatchClient:
|
|
|
530
516
|
time.sleep(poll_interval)
|
|
531
517
|
|
|
532
518
|
|
|
533
|
-
AWS_BATCH_UUID_PATTERN = re.compile(
|
|
534
|
-
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
|
535
|
-
)
|
|
519
|
+
AWS_BATCH_UUID_PATTERN = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
|
536
520
|
|
|
537
521
|
|
|
538
|
-
def resolve_dependency(
|
|
539
|
-
job_id_or_aws_id: str, base_path: str = BATCH_JOBS_BASE
|
|
540
|
-
) -> str | None:
|
|
522
|
+
def resolve_dependency(job_id_or_aws_id: str, base_path: str = BATCH_JOBS_BASE) -> str | None:
|
|
541
523
|
"""Accept either a Dayhoff job ID (e.g. dma-embed-20260309-abc123)
|
|
542
524
|
or a raw AWS Batch job ID (UUID).
|
|
543
525
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Boltz structure prediction pipeline command."""
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
|
-
import os
|
|
5
4
|
import shutil
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
|
|
@@ -17,10 +16,9 @@ from ..manifest import (
|
|
|
17
16
|
JobStatus,
|
|
18
17
|
OutputConfig,
|
|
19
18
|
create_job_directory,
|
|
20
|
-
|
|
19
|
+
save_local_stub,
|
|
21
20
|
save_manifest,
|
|
22
21
|
save_manifest_s3,
|
|
23
|
-
save_local_stub,
|
|
24
22
|
)
|
|
25
23
|
from ..s3_transport import s3_job_prefix, upload_directory
|
|
26
24
|
|
|
@@ -48,9 +46,7 @@ def _is_primordial_path(path: Path) -> bool:
|
|
|
48
46
|
type=int,
|
|
49
47
|
help="Number of parallel workers [default: ~1 per 5 files]",
|
|
50
48
|
)
|
|
51
|
-
@click.option(
|
|
52
|
-
"--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
|
|
53
|
-
)
|
|
49
|
+
@click.option("--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]")
|
|
54
50
|
@click.option(
|
|
55
51
|
"--msa-dir",
|
|
56
52
|
type=click.Path(exists=True),
|
|
@@ -62,9 +58,7 @@ def _is_primordial_path(path: Path) -> bool:
|
|
|
62
58
|
is_flag=True,
|
|
63
59
|
help="Run single complex locally instead of Batch",
|
|
64
60
|
)
|
|
65
|
-
@click.option(
|
|
66
|
-
"--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
|
|
67
|
-
)
|
|
61
|
+
@click.option("--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging")
|
|
68
62
|
@click.option(
|
|
69
63
|
"--full-output",
|
|
70
64
|
is_flag=True,
|
|
@@ -176,9 +170,7 @@ def _submit_batch_job(
|
|
|
176
170
|
# Determine storage mode
|
|
177
171
|
use_s3 = not _is_primordial_path(input_path)
|
|
178
172
|
if use_s3 and num_files > S3_MAX_FILES:
|
|
179
|
-
click.echo(
|
|
180
|
-
f"Error: {num_files} files exceeds S3 transport limit ({S3_MAX_FILES})."
|
|
181
|
-
)
|
|
173
|
+
click.echo(f"Error: {num_files} files exceeds S3 transport limit ({S3_MAX_FILES}).")
|
|
182
174
|
click.echo("Use Primordial for large jobs: copy configs to /primordial/ first.")
|
|
183
175
|
raise SystemExit(1)
|
|
184
176
|
|
|
@@ -254,7 +246,7 @@ def _submit_batch_job(
|
|
|
254
246
|
except OSError:
|
|
255
247
|
click.echo("Copying MSA directory (this may take a while)...")
|
|
256
248
|
shutil.copytree(msa_src, msa_dest)
|
|
257
|
-
click.echo(
|
|
249
|
+
click.echo("Copied MSA directory")
|
|
258
250
|
|
|
259
251
|
# Create manifest
|
|
260
252
|
manifest = JobManifest(
|
|
@@ -342,9 +334,7 @@ def _submit_batch_job(
|
|
|
342
334
|
click.echo()
|
|
343
335
|
click.echo("After completion:")
|
|
344
336
|
finalize_output = "./structures/" if use_s3 else "/primordial/structures/"
|
|
345
|
-
click.echo(
|
|
346
|
-
f" Finalize: dh batch finalize {job_id} --output {finalize_output}"
|
|
347
|
-
)
|
|
337
|
+
click.echo(f" Finalize: dh batch finalize {job_id} --output {finalize_output}")
|
|
348
338
|
|
|
349
339
|
except BatchError as e:
|
|
350
340
|
manifest.status = JobStatus.FAILED
|
|
@@ -374,9 +364,7 @@ def _run_local_mode(input_path: Path):
|
|
|
374
364
|
click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
|
|
375
365
|
raise SystemExit(1)
|
|
376
366
|
|
|
377
|
-
click.echo(
|
|
378
|
-
f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}"
|
|
379
|
-
)
|
|
367
|
+
click.echo(f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}")
|
|
380
368
|
|
|
381
369
|
# Create a temporary job directory structure
|
|
382
370
|
temp_job_dir = input_path / ".local_boltz_job"
|
|
@@ -425,17 +413,13 @@ def _run_local_mode(input_path: Path):
|
|
|
425
413
|
result = subprocess.run(cmd)
|
|
426
414
|
if result.returncode != 0:
|
|
427
415
|
click.echo(
|
|
428
|
-
click.style(
|
|
429
|
-
f"Container exited with code {result.returncode}", fg="red"
|
|
430
|
-
),
|
|
416
|
+
click.style(f"Container exited with code {result.returncode}", fg="red"),
|
|
431
417
|
err=True,
|
|
432
418
|
)
|
|
433
419
|
raise SystemExit(result.returncode)
|
|
434
420
|
|
|
435
421
|
# Check for output
|
|
436
|
-
output_dirs = (
|
|
437
|
-
list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
|
|
438
|
-
)
|
|
422
|
+
output_dirs = list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
|
|
439
423
|
if output_dirs:
|
|
440
424
|
click.echo()
|
|
441
425
|
click.echo(click.style("✓ Prediction complete!", fg="green"))
|
|
@@ -447,9 +431,7 @@ def _run_local_mode(input_path: Path):
|
|
|
447
431
|
|
|
448
432
|
except FileNotFoundError:
|
|
449
433
|
click.echo(
|
|
450
|
-
click.style(
|
|
451
|
-
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
452
|
-
),
|
|
434
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
453
435
|
err=True,
|
|
454
436
|
)
|
|
455
437
|
raise SystemExit(1)
|
|
@@ -460,7 +442,7 @@ def _run_shell_mode(input_path: Path):
|
|
|
460
442
|
import subprocess
|
|
461
443
|
|
|
462
444
|
click.echo("Dropping into container shell...")
|
|
463
|
-
click.echo(
|
|
445
|
+
click.echo("Input will be available at: /input/")
|
|
464
446
|
click.echo()
|
|
465
447
|
|
|
466
448
|
cmd = [
|
|
@@ -494,9 +476,7 @@ def _run_shell_mode(input_path: Path):
|
|
|
494
476
|
subprocess.run(cmd)
|
|
495
477
|
except FileNotFoundError:
|
|
496
478
|
click.echo(
|
|
497
|
-
click.style(
|
|
498
|
-
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
499
|
-
),
|
|
479
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
500
480
|
err=True,
|
|
501
481
|
)
|
|
502
482
|
raise SystemExit(1)
|
|
@@ -58,9 +58,7 @@ def cancel(job_id, force, base_path):
|
|
|
58
58
|
JobStatus.FINALIZED,
|
|
59
59
|
JobStatus.CANCELLED,
|
|
60
60
|
):
|
|
61
|
-
click.echo(
|
|
62
|
-
f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True
|
|
63
|
-
)
|
|
61
|
+
click.echo(f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True)
|
|
64
62
|
raise SystemExit(1)
|
|
65
63
|
|
|
66
64
|
# Get Batch job ID
|
|
@@ -79,14 +77,10 @@ def cancel(job_id, force, base_path):
|
|
|
79
77
|
|
|
80
78
|
if force:
|
|
81
79
|
click.echo(f"Terminating job {batch_job_id}...")
|
|
82
|
-
client.terminate_job(
|
|
83
|
-
batch_job_id, reason="Terminated by user via dh batch cancel --force"
|
|
84
|
-
)
|
|
80
|
+
client.terminate_job(batch_job_id, reason="Terminated by user via dh batch cancel --force")
|
|
85
81
|
else:
|
|
86
82
|
click.echo(f"Cancelling job {batch_job_id}...")
|
|
87
|
-
client.cancel_job(
|
|
88
|
-
batch_job_id, reason="Cancelled by user via dh batch cancel"
|
|
89
|
-
)
|
|
83
|
+
client.cancel_job(batch_job_id, reason="Cancelled by user via dh batch cancel")
|
|
90
84
|
|
|
91
85
|
# Update manifest
|
|
92
86
|
manifest.status = JobStatus.CANCELLED
|
|
@@ -100,13 +94,9 @@ def cancel(job_id, force, base_path):
|
|
|
100
94
|
if retry_info.batch_job_id:
|
|
101
95
|
try:
|
|
102
96
|
if force:
|
|
103
|
-
client.terminate_job(
|
|
104
|
-
retry_info.batch_job_id, reason="Parent job cancelled"
|
|
105
|
-
)
|
|
97
|
+
client.terminate_job(retry_info.batch_job_id, reason="Parent job cancelled")
|
|
106
98
|
else:
|
|
107
|
-
client.cancel_job(
|
|
108
|
-
retry_info.batch_job_id, reason="Parent job cancelled"
|
|
109
|
-
)
|
|
99
|
+
client.cancel_job(retry_info.batch_job_id, reason="Parent job cancelled")
|
|
110
100
|
click.echo(f" Also cancelled retry job: {retry_info.retry_id}")
|
|
111
101
|
except BatchError:
|
|
112
102
|
pass # Retry job may already be complete
|
|
@@ -152,13 +142,9 @@ def _cancel_retry_job(manifest, retry_id: str, force: bool, base_path: str):
|
|
|
152
142
|
)
|
|
153
143
|
|
|
154
144
|
click.echo()
|
|
155
|
-
click.echo(
|
|
156
|
-
click.style(f"✓ Retry job {retry_id} cancelled successfully", fg="green")
|
|
157
|
-
)
|
|
145
|
+
click.echo(click.style(f"✓ Retry job {retry_id} cancelled successfully", fg="green"))
|
|
158
146
|
click.echo(f"Parent job: {manifest.job_id}")
|
|
159
147
|
|
|
160
148
|
except BatchError as e:
|
|
161
|
-
click.echo(
|
|
162
|
-
click.style(f"✗ Failed to cancel retry job: {e}", fg="red"), err=True
|
|
163
|
-
)
|
|
149
|
+
click.echo(click.style(f"✗ Failed to cancel retry job: {e}", fg="red"), err=True)
|
|
164
150
|
raise SystemExit(1)
|
|
@@ -10,9 +10,11 @@ from ..manifest import (
|
|
|
10
10
|
BATCH_JOBS_BASE,
|
|
11
11
|
JobStatus,
|
|
12
12
|
delete_job_directory,
|
|
13
|
+
)
|
|
14
|
+
from ..manifest import (
|
|
13
15
|
list_jobs as list_manifests,
|
|
14
16
|
)
|
|
15
|
-
from .status import format_time_ago
|
|
17
|
+
from .status import format_time_ago
|
|
16
18
|
|
|
17
19
|
LOCAL_STAGING_DIR = "/primordial/.tmp/local-train"
|
|
18
20
|
|
|
@@ -49,9 +51,7 @@ def _clean_local_staging(dry_run: bool) -> bool:
|
|
|
49
51
|
default=7,
|
|
50
52
|
help="Only clean jobs older than N days [default: 7]",
|
|
51
53
|
)
|
|
52
|
-
@click.option(
|
|
53
|
-
"--dry-run", is_flag=True, help="Show what would be cleaned without deleting"
|
|
54
|
-
)
|
|
54
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be cleaned without deleting")
|
|
55
55
|
@click.option("--force", is_flag=True, help="Delete without confirmation")
|
|
56
56
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
57
57
|
@click.option(
|
|
@@ -129,9 +129,7 @@ def clean(user, older_than, dry_run, force, base_path, local_only):
|
|
|
129
129
|
live_statuses = client.get_job_statuses_batch(batch_job_ids)
|
|
130
130
|
except BatchError as e:
|
|
131
131
|
click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
|
|
132
|
-
click.echo(
|
|
133
|
-
"Cannot safely clean jobs without knowing their status.", err=True
|
|
134
|
-
)
|
|
132
|
+
click.echo("Cannot safely clean jobs without knowing their status.", err=True)
|
|
135
133
|
raise SystemExit(1)
|
|
136
134
|
|
|
137
135
|
# Find jobs that are safe to clean (SUCCEEDED or FAILED)
|
|
@@ -156,11 +154,7 @@ def clean(user, older_than, dry_run, force, base_path, local_only):
|
|
|
156
154
|
click.echo("-" * 65)
|
|
157
155
|
|
|
158
156
|
for manifest, status in safe_to_clean:
|
|
159
|
-
click.echo(
|
|
160
|
-
f"{manifest.job_id:<35} "
|
|
161
|
-
f"{status:<12} "
|
|
162
|
-
f"{format_time_ago(manifest.created):<12}"
|
|
163
|
-
)
|
|
157
|
+
click.echo(f"{manifest.job_id:<35} {status:<12} {format_time_ago(manifest.created):<12}")
|
|
164
158
|
|
|
165
159
|
click.echo()
|
|
166
160
|
click.echo(f"Found {len(safe_to_clean)} completed jobs to clean.")
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""T5 embedding pipeline command."""
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
3
|
import shutil
|
|
5
4
|
import subprocess
|
|
6
5
|
from pathlib import Path
|
|
@@ -17,10 +16,9 @@ from ..manifest import (
|
|
|
17
16
|
JobStatus,
|
|
18
17
|
OutputConfig,
|
|
19
18
|
create_job_directory,
|
|
20
|
-
|
|
19
|
+
save_local_stub,
|
|
21
20
|
save_manifest,
|
|
22
21
|
save_manifest_s3,
|
|
23
|
-
save_local_stub,
|
|
24
22
|
)
|
|
25
23
|
from ..s3_transport import s3_job_prefix, upload_directory
|
|
26
24
|
|
|
@@ -45,9 +43,7 @@ def _is_primordial_path(path: Path) -> bool:
|
|
|
45
43
|
type=int,
|
|
46
44
|
help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]",
|
|
47
45
|
)
|
|
48
|
-
@click.option(
|
|
49
|
-
"--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
|
|
50
|
-
)
|
|
46
|
+
@click.option("--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]")
|
|
51
47
|
@click.option(
|
|
52
48
|
"--seqs-per-chunk",
|
|
53
49
|
default=DEFAULT_SEQS_PER_CHUNK,
|
|
@@ -60,9 +56,7 @@ def _is_primordial_path(path: Path) -> bool:
|
|
|
60
56
|
is_flag=True,
|
|
61
57
|
help="Run single chunk locally instead of Batch",
|
|
62
58
|
)
|
|
63
|
-
@click.option(
|
|
64
|
-
"--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
|
|
65
|
-
)
|
|
59
|
+
@click.option("--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging")
|
|
66
60
|
@click.option("--dry-run", is_flag=True, help="Show plan without submitting")
|
|
67
61
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
68
62
|
@click.option("--after", "after", multiple=True, help="Job ID(s) to wait for before starting")
|
|
@@ -153,9 +147,7 @@ def _submit_batch_job(
|
|
|
153
147
|
click.echo(f"Found {num_sequences:,} sequences")
|
|
154
148
|
|
|
155
149
|
if num_sequences == 0:
|
|
156
|
-
click.echo(
|
|
157
|
-
click.style("Error: No sequences found in input file", fg="red"), err=True
|
|
158
|
-
)
|
|
150
|
+
click.echo(click.style("Error: No sequences found in input file", fg="red"), err=True)
|
|
159
151
|
raise SystemExit(1)
|
|
160
152
|
|
|
161
153
|
# Determine storage mode
|
|
@@ -300,9 +292,7 @@ def _submit_batch_job(
|
|
|
300
292
|
click.echo()
|
|
301
293
|
click.echo("After completion:")
|
|
302
294
|
finalize_output = "./embeddings.h5" if use_s3 else "/primordial/embeddings.h5"
|
|
303
|
-
click.echo(
|
|
304
|
-
f" Finalize: dh batch finalize {job_id} --output {finalize_output}"
|
|
305
|
-
)
|
|
295
|
+
click.echo(f" Finalize: dh batch finalize {job_id} --output {finalize_output}")
|
|
306
296
|
|
|
307
297
|
except BatchError as e:
|
|
308
298
|
manifest.status = JobStatus.FAILED
|
|
@@ -370,9 +360,7 @@ def _run_local_mode(input_path: Path):
|
|
|
370
360
|
result = subprocess.run(cmd)
|
|
371
361
|
if result.returncode != 0:
|
|
372
362
|
click.echo(
|
|
373
|
-
click.style(
|
|
374
|
-
f"Container exited with code {result.returncode}", fg="red"
|
|
375
|
-
),
|
|
363
|
+
click.style(f"Container exited with code {result.returncode}", fg="red"),
|
|
376
364
|
err=True,
|
|
377
365
|
)
|
|
378
366
|
raise SystemExit(result.returncode)
|
|
@@ -394,9 +382,7 @@ def _run_local_mode(input_path: Path):
|
|
|
394
382
|
|
|
395
383
|
except FileNotFoundError:
|
|
396
384
|
click.echo(
|
|
397
|
-
click.style(
|
|
398
|
-
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
399
|
-
),
|
|
385
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
400
386
|
err=True,
|
|
401
387
|
)
|
|
402
388
|
raise SystemExit(1)
|
|
@@ -437,9 +423,7 @@ def _run_shell_mode(input_path: Path):
|
|
|
437
423
|
subprocess.run(cmd)
|
|
438
424
|
except FileNotFoundError:
|
|
439
425
|
click.echo(
|
|
440
|
-
click.style(
|
|
441
|
-
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
442
|
-
),
|
|
426
|
+
click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
|
|
443
427
|
err=True,
|
|
444
428
|
)
|
|
445
429
|
raise SystemExit(1)
|
|
@@ -42,9 +42,7 @@ from ..manifest import (
|
|
|
42
42
|
help="Skip deduplication step (use if input has no duplicates)",
|
|
43
43
|
)
|
|
44
44
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
45
|
-
def finalize(
|
|
46
|
-
job_id, output, force, keep_intermediates, full_output, skip_dedup, base_path
|
|
47
|
-
):
|
|
45
|
+
def finalize(job_id, output, force, keep_intermediates, full_output, skip_dedup, base_path):
|
|
48
46
|
"""Combine results and clean up job intermediates.
|
|
49
47
|
|
|
50
48
|
For embedding jobs, combines H5 files into a single output file.
|
|
@@ -90,7 +88,7 @@ def finalize(
|
|
|
90
88
|
click.echo(f"Output is a directory, writing to: {output_path}")
|
|
91
89
|
|
|
92
90
|
if is_s3:
|
|
93
|
-
from ..s3_transport import
|
|
91
|
+
from ..s3_transport import delete_prefix, download_directory
|
|
94
92
|
|
|
95
93
|
# Download outputs from S3 to a temp directory
|
|
96
94
|
s3_temp_dir = Path(tempfile.mkdtemp())
|
|
@@ -113,9 +111,7 @@ def finalize(
|
|
|
113
111
|
shutil.rmtree(s3_temp_dir, ignore_errors=True)
|
|
114
112
|
raise SystemExit(1)
|
|
115
113
|
click.echo()
|
|
116
|
-
click.echo(
|
|
117
|
-
click.style("Warning: Finalizing with incomplete chunks", fg="yellow")
|
|
118
|
-
)
|
|
114
|
+
click.echo(click.style("Warning: Finalizing with incomplete chunks", fg="yellow"))
|
|
119
115
|
|
|
120
116
|
# Update status
|
|
121
117
|
manifest.status = JobStatus.FINALIZING
|
|
@@ -130,11 +126,7 @@ def finalize(
|
|
|
130
126
|
_finalize_embeddings(output_dir, output_path, skip_dedup=skip_dedup)
|
|
131
127
|
elif manifest.pipeline == "boltz":
|
|
132
128
|
# Check if S3 essential mode was used — output is already filtered
|
|
133
|
-
s3_essential = (
|
|
134
|
-
is_s3
|
|
135
|
-
and manifest.output
|
|
136
|
-
and getattr(manifest.output, "output_mode", "full") == "essential"
|
|
137
|
-
)
|
|
129
|
+
s3_essential = is_s3 and manifest.output and getattr(manifest.output, "output_mode", "full") == "essential"
|
|
138
130
|
if full_output and s3_essential:
|
|
139
131
|
click.echo(
|
|
140
132
|
click.style(
|
|
@@ -183,9 +175,7 @@ def finalize(
|
|
|
183
175
|
click.echo(f"Job directory preserved: {job_dir}")
|
|
184
176
|
|
|
185
177
|
|
|
186
|
-
def _check_completion(
|
|
187
|
-
job_id: str, base_path: str, output_dir: Path | None = None
|
|
188
|
-
) -> list[int]:
|
|
178
|
+
def _check_completion(job_id: str, base_path: str, output_dir: Path | None = None) -> list[int]:
|
|
189
179
|
"""Check which chunks are incomplete (no .done marker).
|
|
190
180
|
|
|
191
181
|
Handles both original chunks (chunk_000.fasta) and resliced chunks
|
|
@@ -236,15 +226,9 @@ def _check_completion(
|
|
|
236
226
|
# Find which retry covered this index and check if complete
|
|
237
227
|
is_covered = False
|
|
238
228
|
for retry in manifest.retries:
|
|
239
|
-
if
|
|
240
|
-
retry.reslice_prefix
|
|
241
|
-
and retry.reslice_count
|
|
242
|
-
and idx in retry.indices
|
|
243
|
-
):
|
|
229
|
+
if retry.reslice_prefix and retry.reslice_count and idx in retry.indices:
|
|
244
230
|
# Check if all resliced chunks for this retry completed
|
|
245
|
-
done_count = len(
|
|
246
|
-
list(output_dir.glob(f"embed_{retry.reslice_prefix}_*.done"))
|
|
247
|
-
)
|
|
231
|
+
done_count = len(list(output_dir.glob(f"embed_{retry.reslice_prefix}_*.done")))
|
|
248
232
|
if done_count >= retry.reslice_count:
|
|
249
233
|
is_covered = True
|
|
250
234
|
break
|
|
@@ -316,17 +300,13 @@ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool =
|
|
|
316
300
|
|
|
317
301
|
if skip_dedup:
|
|
318
302
|
click.echo("Optimizing chunks...")
|
|
319
|
-
optimize_protein_embedding_chunks(
|
|
320
|
-
str(combined_path), str(output_path)
|
|
321
|
-
)
|
|
303
|
+
optimize_protein_embedding_chunks(str(combined_path), str(output_path))
|
|
322
304
|
else:
|
|
323
305
|
deduped_path = Path(tmpdir) / "deduped.h5"
|
|
324
306
|
click.echo("Deduplicating...")
|
|
325
307
|
deduplicate_h5_file(str(combined_path), str(deduped_path))
|
|
326
308
|
click.echo("Optimizing chunks...")
|
|
327
|
-
optimize_protein_embedding_chunks(
|
|
328
|
-
str(deduped_path), str(output_path)
|
|
329
|
-
)
|
|
309
|
+
optimize_protein_embedding_chunks(str(deduped_path), str(output_path))
|
|
330
310
|
|
|
331
311
|
click.echo(click.style("✓ H5 files combined successfully", fg="green"))
|
|
332
312
|
|
|
@@ -341,11 +321,7 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
341
321
|
extract only essential files (CIF structures and confidence JSON).
|
|
342
322
|
"""
|
|
343
323
|
# Find all output directories (one per complex)
|
|
344
|
-
complex_dirs = [
|
|
345
|
-
d
|
|
346
|
-
for d in output_dir.iterdir()
|
|
347
|
-
if d.is_dir() and d.name.startswith("boltz_results_")
|
|
348
|
-
]
|
|
324
|
+
complex_dirs = [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith("boltz_results_")]
|
|
349
325
|
|
|
350
326
|
if not complex_dirs:
|
|
351
327
|
click.echo("No output directories found.", err=True)
|
|
@@ -393,9 +369,7 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
393
369
|
|
|
394
370
|
click.echo()
|
|
395
371
|
if skipped_count > 0:
|
|
396
|
-
click.echo(
|
|
397
|
-
f"Copied {copied_count} predictions, skipped {skipped_count} existing"
|
|
398
|
-
)
|
|
372
|
+
click.echo(f"Copied {copied_count} predictions, skipped {skipped_count} existing")
|
|
399
373
|
else:
|
|
400
374
|
click.echo(
|
|
401
375
|
click.style(
|
|
@@ -421,9 +395,7 @@ def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_nam
|
|
|
421
395
|
|
|
422
396
|
predictions_dir = source_dir / "predictions"
|
|
423
397
|
if not predictions_dir.exists():
|
|
424
|
-
click.echo(
|
|
425
|
-
f" Warning: No predictions directory found for {complex_name}", err=True
|
|
426
|
-
)
|
|
398
|
+
click.echo(f" Warning: No predictions directory found for {complex_name}", err=True)
|
|
427
399
|
return
|
|
428
400
|
|
|
429
401
|
# Find all subdirectories in predictions/ (usually just one named after the complex)
|
|
@@ -495,10 +467,30 @@ def _finalize_protmpnn(output_dir: Path, output_path: Path):
|
|
|
495
467
|
for pdb_file in config_pdbs.glob("*.pdb"):
|
|
496
468
|
shutil.copy2(pdb_file, pdbs_dest / pdb_file.name)
|
|
497
469
|
|
|
470
|
+
scores_dest = output_path / "scores"
|
|
471
|
+
has_scores = False
|
|
472
|
+
for config_dir in output_dir.iterdir():
|
|
473
|
+
if config_dir.is_dir():
|
|
474
|
+
config_scores = config_dir / "scores"
|
|
475
|
+
if config_scores.exists():
|
|
476
|
+
if not has_scores:
|
|
477
|
+
scores_dest.mkdir(exist_ok=True)
|
|
478
|
+
has_scores = True
|
|
479
|
+
for score_file in config_scores.iterdir():
|
|
480
|
+
if score_file.is_file():
|
|
481
|
+
shutil.copy2(score_file, scores_dest / score_file.name)
|
|
482
|
+
config_stats = config_dir / "stats"
|
|
483
|
+
if config_stats.exists():
|
|
484
|
+
if not has_scores:
|
|
485
|
+
scores_dest.mkdir(exist_ok=True)
|
|
486
|
+
has_scores = True
|
|
487
|
+
for pt_file in config_stats.glob("*.pt"):
|
|
488
|
+
shutil.copy2(pt_file, scores_dest / pt_file.name)
|
|
489
|
+
|
|
490
|
+
if has_scores:
|
|
491
|
+
click.echo(f"Scores: {scores_dest}/")
|
|
492
|
+
|
|
498
493
|
top_conf = merged.iloc[0]["overall_confidence"] if num_variants > 0 else "N/A"
|
|
499
494
|
|
|
500
|
-
click.echo(
|
|
501
|
-
f"{num_variants} variants from {num_configs} config(s), "
|
|
502
|
-
f"top confidence: {top_conf}"
|
|
503
|
-
)
|
|
495
|
+
click.echo(f"{num_variants} variants from {num_configs} config(s), top confidence: {top_conf}")
|
|
504
496
|
click.echo(click.style(f"Results: {output_path / 'results.csv'}", fg="green"))
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
5
|
from ..aws_batch import BatchClient, BatchError
|
|
6
|
-
from ..manifest import BATCH_JOBS_BASE, JobStatus
|
|
7
|
-
from
|
|
6
|
+
from ..manifest import BATCH_JOBS_BASE, JobStatus
|
|
7
|
+
from ..manifest import list_jobs as list_manifests
|
|
8
|
+
from .status import _aws_status_to_job_status, format_status, format_time_ago
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@click.command("list")
|
|
@@ -16,9 +17,7 @@ from .status import format_status, format_time_ago, _aws_status_to_job_status
|
|
|
16
17
|
help="Filter by status",
|
|
17
18
|
)
|
|
18
19
|
@click.option("--pipeline", help="Filter by pipeline type")
|
|
19
|
-
@click.option(
|
|
20
|
-
"--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]"
|
|
21
|
-
)
|
|
20
|
+
@click.option("--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]")
|
|
22
21
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
23
22
|
def list_jobs(user, status_filter, pipeline, limit, base_path):
|
|
24
23
|
"""List recent batch jobs.
|
|
@@ -98,9 +97,7 @@ def list_jobs(user, status_filter, pipeline, limit, base_path):
|
|
|
98
97
|
|
|
99
98
|
# Print header
|
|
100
99
|
click.echo()
|
|
101
|
-
click.echo(
|
|
102
|
-
f"{'JOB ID':<35} {'STATUS':<12} {'PIPELINE':<12} {'USER':<10} {'CREATED':<12}"
|
|
103
|
-
)
|
|
100
|
+
click.echo(f"{'JOB ID':<35} {'STATUS':<12} {'PIPELINE':<12} {'USER':<10} {'CREATED':<12}")
|
|
104
101
|
click.echo("-" * 85)
|
|
105
102
|
|
|
106
103
|
for manifest, live_status in display_data:
|