dh-cli 0.8.2__tar.gz → 0.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {dh_cli-0.8.2 → dh_cli-0.8.4}/PKG-INFO +1 -1
  2. {dh_cli-0.8.2 → dh_cli-0.8.4}/pyproject.toml +1 -1
  3. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/aws_batch.py +19 -1
  4. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/finalize.py +51 -1
  5. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/submit.py +8 -1
  6. dh_cli-0.8.4/tests/batch/test_image_override.py +141 -0
  7. dh_cli-0.8.4/tests/batch/test_submit_image_validation.py +90 -0
  8. dh_cli-0.8.4/tests/test_finalize_boltz_tar.py +245 -0
  9. {dh_cli-0.8.2 → dh_cli-0.8.4}/.gitignore +0 -0
  10. {dh_cli-0.8.2 → dh_cli-0.8.4}/LICENSE +0 -0
  11. {dh_cli-0.8.2 → dh_cli-0.8.4}/README.md +0 -0
  12. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/__init__.py +0 -0
  13. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/_identity.py +0 -0
  14. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/__init__.py +0 -0
  15. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/__init__.py +0 -0
  16. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/boltz.py +0 -0
  17. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/cancel.py +0 -0
  18. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/clean.py +0 -0
  19. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/embed_t5.py +0 -0
  20. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/list_jobs.py +0 -0
  21. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/local.py +0 -0
  22. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/logs.py +0 -0
  23. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/orca.py +0 -0
  24. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/protmpnn.py +0 -0
  25. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/protmpnn_to_boltz.py +0 -0
  26. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/retry.py +0 -0
  27. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/status.py +0 -0
  28. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/train.py +0 -0
  29. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/commands/wait_for.py +0 -0
  30. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/fasta_utils.py +0 -0
  31. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/h5_utils.py +0 -0
  32. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/job_id.py +0 -0
  33. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/manifest.py +0 -0
  34. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/batch/s3_transport.py +0 -0
  35. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/bedrock/__init__.py +0 -0
  36. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/bedrock/commands.py +0 -0
  37. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/bedrock/cost_report.py +0 -0
  38. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/bedrock/pricing.yaml +0 -0
  39. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/cloud_commands.py +0 -0
  40. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/codeartifact.py +0 -0
  41. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/__init__.py +0 -0
  42. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/api_client.py +0 -0
  43. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/auth.py +0 -0
  44. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/engine_commands.py +0 -0
  45. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/progress.py +0 -0
  46. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/ssh_config.py +0 -0
  47. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/engines_studios/studio_commands.py +0 -0
  48. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/github_commands.py +0 -0
  49. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/__init__.py +0 -0
  50. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/deploy.py +0 -0
  51. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/local.py +0 -0
  52. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/test.py +0 -0
  53. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/tf.py +0 -0
  54. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/hz/users.py +0 -0
  55. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/main.py +0 -0
  56. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/utility_commands.py +0 -0
  57. {dh_cli-0.8.2 → dh_cli-0.8.4}/src/dh_cli/warehouse.py +0 -0
  58. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/batch/__init__.py +0 -0
  59. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/batch/test_aws_batch_resources.py +0 -0
  60. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/batch/test_submit_cpu_only.py +0 -0
  61. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/batch/test_submit_merge.py +0 -0
  62. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/conftest.py +0 -0
  63. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/A_cache_write.json +0 -0
  64. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/B_cache_read.json +0 -0
  65. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/C_plain.json +0 -0
  66. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/D_cursor_user.json +0 -0
  67. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/E_service_role.json +0 -0
  68. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/F_legacy_shared.json +0 -0
  69. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/fixtures/G_unknown_model.json +0 -0
  70. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_build_report.py +0 -0
  71. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_classify_arn.py +0 -0
  72. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_cli_exit_codes.py +0 -0
  73. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_cost_calc.py +0 -0
  74. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_cost_command.py +0 -0
  75. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_cur_reconciliation.py +0 -0
  76. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_key_command.py +0 -0
  77. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_render_formats.py +0 -0
  78. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_resolve_base_model.py +0 -0
  79. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/bedrock/test_s3_walker.py +0 -0
  80. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/__init__.py +0 -0
  81. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/conftest.py +0 -0
  82. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_engine_role_cannot_read_github_pat.py +0 -0
  83. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_identity.py +0 -0
  84. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_login.py +0 -0
  85. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_login_error_paths.py +0 -0
  86. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_login_security.py +0 -0
  87. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_logout.py +0 -0
  88. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_rotate.py +0 -0
  89. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/github/test_status.py +0 -0
  90. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/hz/test_init.py +0 -0
  91. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/hz/test_suites.py +0 -0
  92. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/hz/test_users.py +0 -0
  93. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/test_cloud_gcp.py +0 -0
  94. {dh_cli-0.8.2 → dh_cli-0.8.4}/tests/test_finalize_protmpnn.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dh-cli
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: Dayhoff Labs developer CLI
5
5
  Author-email: Dayhoff Labs <dev@dayhofflabs.com>
6
6
  License: # PolyForm Noncommercial License 1.0.0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "dh-cli"
7
- version = "0.8.2"
7
+ version = "0.8.4"
8
8
  description = "Dayhoff Labs developer CLI"
9
9
  requires-python = ">=3.11"
10
10
  readme = "README.md"
@@ -17,6 +17,11 @@ from dh_cli.batch.manifest import (
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ _ECR_IMAGE_RE = re.compile(
21
+ r"^\d{12}\.dkr\.ecr\.[a-z0-9-]+\.amazonaws\.com/[^:@\s]+"
22
+ r"(?::[^\s]+|@sha256:[a-f0-9]{64})?$"
23
+ )
24
+
20
25
 
21
26
  class BatchError(Exception):
22
27
  """Error interacting with AWS Batch."""
@@ -165,14 +170,27 @@ class BatchClient:
165
170
  create a new revision of the base definition with only the image
166
171
  changed, preserving all other containerProperties (roles, volumes, etc.).
167
172
 
168
- Returns the ARN of the new revision.
173
+ Returns the ARN of the new revision, or the ARN of the latest existing
174
+ revision if its image already matches.
169
175
  """
176
+ if not _ECR_IMAGE_RE.match(image):
177
+ raise BatchError(
178
+ f"Image override must be a fully-qualified ECR URL "
179
+ f"(<account>.dkr.ecr.<region>.amazonaws.com/<repo>[:tag|@digest]); "
180
+ f"got: {image!r}"
181
+ )
182
+
170
183
  resp = self.batch.describe_job_definitions(jobDefinitionName=base_definition, status="ACTIVE")
171
184
  definitions = resp.get("jobDefinitions", [])
172
185
  if not definitions:
173
186
  raise BatchError(f"Job definition not found: {base_definition}")
174
187
 
175
188
  latest = sorted(definitions, key=lambda d: d["revision"])[-1]
189
+ if latest["containerProperties"].get("image") == image:
190
+ arn = latest["jobDefinitionArn"]
191
+ logger.info(f"Reusing job definition {arn}; image already matches {image}")
192
+ return arn
193
+
176
194
  container_props = latest["containerProperties"].copy()
177
195
  container_props["image"] = image
178
196
 
@@ -1,6 +1,7 @@
1
1
  """Finalize command for combining results and cleaning up."""
2
2
 
3
3
  import shutil
4
+ import tarfile
4
5
  import tempfile
5
6
  from pathlib import Path
6
7
 
@@ -94,7 +95,10 @@ def finalize(job_id, output, force, keep_intermediates, full_output, skip_dedup,
94
95
  s3_temp_dir = Path(tempfile.mkdtemp())
95
96
  s3_output_prefix = f"{manifest.s3_prefix}output/"
96
97
  click.echo("Downloading outputs from S3...")
97
- download_directory(s3_output_prefix, s3_temp_dir)
98
+ if manifest.pipeline == "boltz":
99
+ _download_boltz_s3_output(s3_output_prefix, s3_temp_dir)
100
+ else:
101
+ download_directory(s3_output_prefix, s3_temp_dir)
98
102
  output_dir = s3_temp_dir
99
103
  else:
100
104
  output_dir = job_dir / "output"
@@ -175,6 +179,52 @@ def finalize(job_id, output, force, keep_intermediates, full_output, skip_dedup,
175
179
  click.echo(f"Job directory preserved: {job_dir}")
176
180
 
177
181
 
182
+ def _download_boltz_s3_output(s3_output_prefix: str, local_dir: Path) -> None:
183
+ """Download Boltz tar outputs from S3 and extract into local_dir.
184
+
185
+ Workers produce one `boltz_results_<name>.tar` per prediction plus
186
+ `boltz_*.done` marker objects. This helper downloads only those keys
187
+ (ignoring anything else under the prefix), extracts each tar in place,
188
+ and removes the tar file afterwards so the resulting layout matches
189
+ what `_finalize_boltz` expects on Primordial.
190
+
191
+ Args:
192
+ s3_output_prefix: S3 URI prefix like s3://bucket/jobs/<id>/output/
193
+ local_dir: Local directory to extract into
194
+ """
195
+ from ..s3_transport import _get_client, parse_s3_uri
196
+
197
+ bucket, prefix_key = parse_s3_uri(s3_output_prefix)
198
+ client = _get_client()
199
+ local_dir.mkdir(parents=True, exist_ok=True)
200
+
201
+ paginator = client.get_paginator("list_objects_v2")
202
+ tar_count = 0
203
+ done_count = 0
204
+ for page in paginator.paginate(Bucket=bucket, Prefix=prefix_key):
205
+ for obj in page.get("Contents", []):
206
+ key = obj["Key"]
207
+ relative = key[len(prefix_key) :]
208
+ if not relative:
209
+ continue
210
+ basename = Path(relative).name
211
+ if basename.startswith("boltz_results_") and basename.endswith(".tar"):
212
+ tar_path = local_dir / basename
213
+ client.download_file(bucket, key, str(tar_path))
214
+ try:
215
+ with tarfile.open(tar_path, mode="r") as tf:
216
+ tf.extractall(local_dir)
217
+ finally:
218
+ tar_path.unlink(missing_ok=True)
219
+ tar_count += 1
220
+ elif basename.startswith("boltz_") and basename.endswith(".done"):
221
+ done_path = local_dir / basename
222
+ client.download_file(bucket, key, str(done_path))
223
+ done_count += 1
224
+
225
+ click.echo(f" Downloaded {tar_count} prediction tars, {done_count} done markers")
226
+
227
+
178
228
  def _check_completion(job_id: str, base_path: str, output_dir: Path | None = None) -> list[int]:
179
229
  """Check which chunks are incomplete (no .done marker).
180
230
 
@@ -4,7 +4,7 @@ import click
4
4
  import yaml
5
5
  from click.core import ParameterSource
6
6
 
7
- from ..aws_batch import BatchClient, BatchError, resolve_dependency
7
+ from ..aws_batch import _ECR_IMAGE_RE, BatchClient, BatchError, resolve_dependency
8
8
  from ..job_id import generate_job_id, get_aws_username
9
9
  from ..manifest import (
10
10
  BATCH_JOBS_BASE,
@@ -114,6 +114,13 @@ def submit(
114
114
  job_retry = _pick("retry", retry)
115
115
  job_timeout = _pick("timeout", timeout)
116
116
  job_image = image or config.get("image")
117
+ if job_image and not _ECR_IMAGE_RE.match(job_image):
118
+ raise click.BadParameter(
119
+ f"--image must be a fully-qualified ECR URL "
120
+ f"(<account>.dkr.ecr.<region>.amazonaws.com/<repo>[:tag|@digest]); "
121
+ f"got: {job_image!r}",
122
+ param_hint="--image",
123
+ )
117
124
 
118
125
  # Parse environment variables
119
126
  job_env = dict(config.get("env", {}))
@@ -0,0 +1,141 @@
1
+ """Tests for BatchClient._register_image_override.
2
+
3
+ Pins two contracts:
4
+
5
+ 1. Bare names (anything not a fully-qualified ECR URL) are rejected, so
6
+ typos like ``dh batch submit --image dayhoff-generic`` cannot drift the
7
+ floating ref by registering a revision pointing at Docker Hub.
8
+ 2. When the latest active revision already carries the requested image,
9
+ no new revision is registered — the existing ARN is returned. Without
10
+ this, every override invocation bumps the JD revision counter.
11
+ """
12
+
13
+ from unittest.mock import MagicMock, patch
14
+
15
+ import pytest
16
+
17
+
18
+ def _make_client():
19
+ with patch("dh_cli.batch.aws_batch.boto3") as mock_boto3:
20
+ mock_batch = MagicMock()
21
+ mock_logs = MagicMock()
22
+ mock_boto3.client.side_effect = [mock_batch, mock_logs]
23
+
24
+ from dh_cli.batch.aws_batch import BatchClient
25
+
26
+ return BatchClient(), mock_batch
27
+
28
+
29
+ _ECR_URL = "123456789012.dkr.ecr.us-east-1.amazonaws.com/dayhoff-generic:abc123"
30
+
31
+
32
+ class TestImageValidation:
33
+ """Bare image names must be rejected before any AWS call."""
34
+
35
+ @pytest.mark.parametrize(
36
+ "bad_image",
37
+ [
38
+ "dayhoff-generic",
39
+ "dayhoff-generic:latest",
40
+ "docker.io/library/python:3.12",
41
+ "ghcr.io/foo/bar:1.0",
42
+ "123456789012.dkr.ecr.us-east-1.amazonaws.com/dayhoff-generic with-space",
43
+ ],
44
+ )
45
+ def test_rejects_non_ecr_image(self, bad_image):
46
+ from dh_cli.batch.aws_batch import BatchError
47
+
48
+ client, mock_batch = _make_client()
49
+ with pytest.raises(BatchError, match="fully-qualified ECR URL"):
50
+ client._register_image_override("dayhoff-generic", bad_image)
51
+
52
+ mock_batch.describe_job_definitions.assert_not_called()
53
+ mock_batch.register_job_definition.assert_not_called()
54
+
55
+ @pytest.mark.parametrize(
56
+ "good_image",
57
+ [
58
+ "123456789012.dkr.ecr.us-east-1.amazonaws.com/dayhoff-generic:abc123",
59
+ "123456789012.dkr.ecr.us-west-2.amazonaws.com/foo/bar:latest",
60
+ "123456789012.dkr.ecr.us-east-1.amazonaws.com/x@sha256:"
61
+ + "a" * 64,
62
+ ],
63
+ )
64
+ def test_accepts_ecr_url(self, good_image):
65
+ client, mock_batch = _make_client()
66
+ mock_batch.describe_job_definitions.return_value = {
67
+ "jobDefinitions": [
68
+ {
69
+ "jobDefinitionArn": "arn:aws:batch:...:job-definition/dayhoff-generic:1",
70
+ "revision": 1,
71
+ "type": "container",
72
+ "containerProperties": {"image": "old-image"},
73
+ }
74
+ ]
75
+ }
76
+ mock_batch.register_job_definition.return_value = {
77
+ "jobDefinitionArn": "arn:aws:batch:...:job-definition/dayhoff-generic:2"
78
+ }
79
+
80
+ client._register_image_override("dayhoff-generic", good_image)
81
+ mock_batch.register_job_definition.assert_called_once()
82
+
83
+
84
+ class TestNoOpReuse:
85
+ """If latest revision already matches the requested image, reuse it."""
86
+
87
+ def test_reuses_latest_when_image_matches(self):
88
+ client, mock_batch = _make_client()
89
+ mock_batch.describe_job_definitions.return_value = {
90
+ "jobDefinitions": [
91
+ {
92
+ "jobDefinitionArn": "arn:...:job-definition/dayhoff-generic:42",
93
+ "revision": 42,
94
+ "type": "container",
95
+ "containerProperties": {"image": _ECR_URL},
96
+ },
97
+ {
98
+ "jobDefinitionArn": "arn:...:job-definition/dayhoff-generic:41",
99
+ "revision": 41,
100
+ "type": "container",
101
+ "containerProperties": {"image": "older"},
102
+ },
103
+ ]
104
+ }
105
+
106
+ arn = client._register_image_override("dayhoff-generic", _ECR_URL)
107
+
108
+ assert arn == "arn:...:job-definition/dayhoff-generic:42"
109
+ mock_batch.register_job_definition.assert_not_called()
110
+
111
+ def test_registers_new_revision_when_image_differs(self):
112
+ client, mock_batch = _make_client()
113
+ mock_batch.describe_job_definitions.return_value = {
114
+ "jobDefinitions": [
115
+ {
116
+ "jobDefinitionArn": "arn:...:job-definition/dayhoff-generic:42",
117
+ "revision": 42,
118
+ "type": "container",
119
+ "containerProperties": {"image": "different-image"},
120
+ }
121
+ ]
122
+ }
123
+ mock_batch.register_job_definition.return_value = {
124
+ "jobDefinitionArn": "arn:...:job-definition/dayhoff-generic:43"
125
+ }
126
+
127
+ arn = client._register_image_override("dayhoff-generic", _ECR_URL)
128
+
129
+ assert arn == "arn:...:job-definition/dayhoff-generic:43"
130
+ mock_batch.register_job_definition.assert_called_once()
131
+ kwargs = mock_batch.register_job_definition.call_args[1]
132
+ assert kwargs["containerProperties"]["image"] == _ECR_URL
133
+
134
+ def test_raises_when_no_active_definition(self):
135
+ from dh_cli.batch.aws_batch import BatchError
136
+
137
+ client, mock_batch = _make_client()
138
+ mock_batch.describe_job_definitions.return_value = {"jobDefinitions": []}
139
+
140
+ with pytest.raises(BatchError, match="Job definition not found"):
141
+ client._register_image_override("dayhoff-generic", _ECR_URL)
@@ -0,0 +1,90 @@
1
+ """CLI-layer image validation tests for `dh batch submit`.
2
+
3
+ Pins the contract: a non-ECR --image (or YAML image: ...) is rejected at
4
+ parse time, before any AWS call. Without this, bare names like
5
+ ``--image dayhoff-generic`` reach BatchClient and silently mint a JD
6
+ revision pointing at Docker Hub.
7
+ """
8
+
9
+ from unittest.mock import MagicMock, patch
10
+
11
+ import pytest
12
+ import yaml
13
+ from click.testing import CliRunner
14
+
15
+
16
+ @pytest.fixture
17
+ def cli_runner():
18
+ return CliRunner()
19
+
20
+
21
+ def _invoke(cli_runner, args, tmp_path):
22
+ base = tmp_path / "jobs"
23
+ with (
24
+ patch("dh_cli.batch.commands.submit.get_aws_username", return_value="jason"),
25
+ patch("dh_cli.batch.commands.submit.BatchClient") as mock_batch_cls,
26
+ patch(
27
+ "dh_cli.batch.commands.submit.generate_job_id",
28
+ return_value="jason-batch-20260519-img00001",
29
+ ),
30
+ ):
31
+ mock_client = MagicMock()
32
+ mock_client.submit_job.return_value = "aws-uuid-img"
33
+ mock_batch_cls.return_value = mock_client
34
+
35
+ from dh_cli.batch.commands.submit import submit
36
+
37
+ result = cli_runner.invoke(submit, args + ["--base-path", str(base)])
38
+ return result, mock_client, mock_batch_cls
39
+
40
+
41
+ class TestCliImageValidation:
42
+ @pytest.mark.parametrize(
43
+ "bad_image",
44
+ ["dayhoff-generic", "dayhoff-generic:latest", "ghcr.io/foo/bar:1.0"],
45
+ )
46
+ def test_bare_image_rejected_before_submit(self, cli_runner, tmp_path, bad_image):
47
+ result, _, mock_batch_cls = _invoke(
48
+ cli_runner,
49
+ ["--command", "echo hi", "--image", bad_image],
50
+ tmp_path,
51
+ )
52
+ assert result.exit_code != 0
53
+ assert "fully-qualified ECR URL" in result.output
54
+ mock_batch_cls.assert_not_called()
55
+
56
+ def test_yaml_bare_image_rejected_before_submit(self, cli_runner, tmp_path):
57
+ config_path = tmp_path / "job.yaml"
58
+ config_path.write_text(
59
+ yaml.dump(
60
+ {
61
+ "command": "echo hi",
62
+ "image": "dayhoff-generic",
63
+ }
64
+ )
65
+ )
66
+ result, _, mock_batch_cls = _invoke(
67
+ cli_runner, ["-f", str(config_path)], tmp_path
68
+ )
69
+ assert result.exit_code != 0
70
+ assert "fully-qualified ECR URL" in result.output
71
+ mock_batch_cls.assert_not_called()
72
+
73
+ def test_valid_ecr_url_accepted(self, cli_runner, tmp_path):
74
+ good = "123456789012.dkr.ecr.us-east-1.amazonaws.com/dayhoff-generic:abc"
75
+ result, mock_client, _ = _invoke(
76
+ cli_runner,
77
+ ["--command", "echo hi", "--image", good],
78
+ tmp_path,
79
+ )
80
+ assert result.exit_code == 0, result.output
81
+ call_kwargs = mock_client.submit_job.call_args[1]
82
+ assert call_kwargs["image_override"] == good
83
+
84
+ def test_no_image_skips_validation(self, cli_runner, tmp_path):
85
+ result, mock_client, _ = _invoke(
86
+ cli_runner, ["--command", "echo hi"], tmp_path
87
+ )
88
+ assert result.exit_code == 0, result.output
89
+ call_kwargs = mock_client.submit_job.call_args[1]
90
+ assert call_kwargs.get("image_override") is None
@@ -0,0 +1,245 @@
1
+ """Tests for the Boltz S3 tar-aware finalize download path.
2
+
3
+ When a Boltz job was run in S3 mode, workers upload one
4
+ `boltz_results_<name>.tar` per prediction (plus per-worker done markers).
5
+ Finalize must download those tars, extract them into `boltz_results_*/`
6
+ directories matching the legacy on-disk layout, and leave the existing
7
+ `_finalize_boltz` logic untouched.
8
+
9
+ See plan: nutshell/plans/dma/05_2026/0512_boltz_s3_fanout_and_cross_az_dig.md.
10
+ """
11
+
12
+ import tarfile
13
+ import tempfile
14
+ from pathlib import Path
15
+ from unittest.mock import MagicMock, patch
16
+
17
+ import pytest
18
+
19
+ from dh_cli.batch.commands.finalize import _download_boltz_s3_output
20
+
21
+
22
+ @pytest.fixture
23
+ def temp_dir():
24
+ with tempfile.TemporaryDirectory() as tmpdir:
25
+ yield Path(tmpdir)
26
+
27
+
28
+ def _build_essential_tar(tar_path: Path, complex_name: str) -> None:
29
+ """Build a boltz_results_<name>.tar like the worker produces."""
30
+ with tempfile.TemporaryDirectory() as src:
31
+ src_p = Path(src)
32
+ pred_subdir = src_p / f"boltz_results_{complex_name}" / "predictions" / complex_name
33
+ pred_subdir.mkdir(parents=True)
34
+ (pred_subdir / f"{complex_name}_model_0.cif").write_text(f"CIF {complex_name}\n")
35
+ (pred_subdir / f"confidence_{complex_name}_model_0.json").write_text(f'{{"cx":"{complex_name}"}}')
36
+ with tarfile.open(tar_path, mode="w") as tf:
37
+ root = src_p / f"boltz_results_{complex_name}"
38
+ for f in sorted(root.rglob("*")):
39
+ if f.is_file():
40
+ tf.add(f, arcname=f"boltz_results_{complex_name}/{f.relative_to(root)}")
41
+
42
+
43
+ def _make_mock_s3_client(objects: dict[str, bytes]) -> MagicMock:
44
+ """Build a boto3 S3 client mock backed by an in-memory bucket.
45
+
46
+ `objects` is a mapping of key -> bytes. The mock implements:
47
+ - list_objects_v2 (via a paginator)
48
+ - download_file (writes object bytes to the local path)
49
+ """
50
+ client = MagicMock()
51
+
52
+ def _paginate(Bucket, Prefix, **kwargs):
53
+ matching = [k for k in objects if k.startswith(Prefix)]
54
+ yield {"Contents": [{"Key": k} for k in sorted(matching)]}
55
+
56
+ paginator = MagicMock()
57
+ paginator.paginate.side_effect = _paginate
58
+ client.get_paginator.return_value = paginator
59
+
60
+ def _download_file(bucket, key, local_path, *args, **kwargs):
61
+ Path(local_path).parent.mkdir(parents=True, exist_ok=True)
62
+ Path(local_path).write_bytes(objects[key])
63
+
64
+ client.download_file.side_effect = _download_file
65
+ return client
66
+
67
+
68
+ class TestDownloadsAndExtracts:
69
+ def test_downloads_tars_and_extracts(self, temp_dir):
70
+ tar_a = temp_dir / "A.tar"
71
+ tar_b = temp_dir / "B.tar"
72
+ _build_essential_tar(tar_a, "A")
73
+ _build_essential_tar(tar_b, "B")
74
+
75
+ objects = {
76
+ "jobs/j/output/boltz_results_A.tar": tar_a.read_bytes(),
77
+ "jobs/j/output/boltz_results_B.tar": tar_b.read_bytes(),
78
+ "jobs/j/output/boltz_0.done": b"",
79
+ }
80
+ client = _make_mock_s3_client(objects)
81
+
82
+ dest = temp_dir / "extracted"
83
+ dest.mkdir()
84
+
85
+ with patch("dh_cli.batch.s3_transport._get_client", return_value=client):
86
+ _download_boltz_s3_output("s3://bucket/jobs/j/output/", dest)
87
+
88
+ assert (dest / "boltz_results_A" / "predictions" / "A" / "A_model_0.cif").read_text() == "CIF A\n"
89
+ assert (dest / "boltz_results_B" / "predictions" / "B" / "B_model_0.cif").read_text() == "CIF B\n"
90
+ assert (dest / "boltz_0.done").exists()
91
+
92
+ def test_local_tar_cleaned_up_after_extract(self, temp_dir):
93
+ tar_a = temp_dir / "A.tar"
94
+ _build_essential_tar(tar_a, "A")
95
+ objects = {"jobs/j/output/boltz_results_A.tar": tar_a.read_bytes()}
96
+ client = _make_mock_s3_client(objects)
97
+
98
+ dest = temp_dir / "extracted"
99
+ dest.mkdir()
100
+
101
+ with patch("dh_cli.batch.s3_transport._get_client", return_value=client):
102
+ _download_boltz_s3_output("s3://bucket/jobs/j/output/", dest)
103
+
104
+ leftover = list(dest.rglob("*.tar"))
105
+ assert leftover == []
106
+
107
+ def test_ignores_non_tar_non_done_keys(self, temp_dir):
108
+ """Extra objects in the output prefix that aren't tars or done markers are ignored."""
109
+ tar_a = temp_dir / "A.tar"
110
+ _build_essential_tar(tar_a, "A")
111
+ objects = {
112
+ "jobs/j/output/boltz_results_A.tar": tar_a.read_bytes(),
113
+ "jobs/j/output/stray.txt": b"ignore me",
114
+ "jobs/j/output/notes/readme.md": b"also ignored",
115
+ }
116
+ client = _make_mock_s3_client(objects)
117
+
118
+ dest = temp_dir / "extracted"
119
+ dest.mkdir()
120
+
121
+ with patch("dh_cli.batch.s3_transport._get_client", return_value=client):
122
+ _download_boltz_s3_output("s3://bucket/jobs/j/output/", dest)
123
+
124
+ assert (dest / "boltz_results_A" / "predictions" / "A" / "A_model_0.cif").exists()
125
+ assert not (dest / "stray.txt").exists()
126
+ assert not (dest / "notes").exists()
127
+
128
+
129
+ class TestRoundTrip:
130
+ def test_worker_to_finalize_bit_identical(self, temp_dir):
131
+ """Worker's sync_boltz_essential_to_s3 output -> finalize download -> same bytes."""
132
+ from dh_batch.s3_sync import sync_boltz_essential_to_s3
133
+
134
+ job_dir = temp_dir / "job"
135
+ output_dir = job_dir / "output"
136
+ pred_a = output_dir / "boltz_results_A" / "predictions" / "A"
137
+ pred_a.mkdir(parents=True)
138
+ (pred_a / "A_model_0.cif").write_text("CIF ROUNDTRIP\n")
139
+ (pred_a / "confidence_A_model_0.json").write_text('{"r":1}')
140
+ (output_dir / "boltz_0.done").write_text("ok")
141
+
142
+ uploaded: dict[str, bytes] = {}
143
+ worker_client = MagicMock()
144
+
145
+ def _upload_file(local_path, bucket, key, *args, **kwargs):
146
+ uploaded[key] = Path(local_path).read_bytes()
147
+
148
+ worker_client.upload_file.side_effect = _upload_file
149
+
150
+ with patch("dh_batch.s3_transport._get_client", return_value=worker_client):
151
+ sync_boltz_essential_to_s3(job_dir, "s3://bucket/jobs/j/")
152
+
153
+ finalize_client = _make_mock_s3_client(uploaded)
154
+ dest = temp_dir / "extracted"
155
+ dest.mkdir()
156
+
157
+ with patch("dh_cli.batch.s3_transport._get_client", return_value=finalize_client):
158
+ _download_boltz_s3_output("s3://bucket/jobs/j/output/", dest)
159
+
160
+ cif = dest / "boltz_results_A" / "predictions" / "A" / "A_model_0.cif"
161
+ conf = dest / "boltz_results_A" / "predictions" / "A" / "confidence_A_model_0.json"
162
+ done = dest / "boltz_0.done"
163
+
164
+ assert cif.read_text() == "CIF ROUNDTRIP\n"
165
+ assert conf.read_text() == '{"r":1}'
166
+ assert done.read_text() == "ok"
167
+
168
+
169
+ class TestFinalizeDispatches:
170
+ def test_boltz_s3_uses_tar_path(self, temp_dir):
171
+ """`dh batch finalize` for Boltz in S3 mode calls the tar-aware helper, not download_directory."""
172
+ from dh_cli.batch.commands.finalize import finalize as finalize_cmd
173
+ from dh_cli.batch.manifest import JobManifest, JobStatus
174
+
175
+ manifest = JobManifest(
176
+ job_id="test-boltz",
177
+ user="tester",
178
+ pipeline="boltz",
179
+ storage_mode="s3",
180
+ status=JobStatus.SUCCEEDED,
181
+ s3_prefix="s3://bucket/jobs/test-boltz/",
182
+ )
183
+
184
+ from click.testing import CliRunner
185
+
186
+ runner = CliRunner()
187
+
188
+ with patch("dh_cli.batch.commands.finalize.load_manifest", return_value=manifest):
189
+ with patch("dh_cli.batch.commands.finalize._download_boltz_s3_output") as mock_tar_download:
190
+ with patch("dh_cli.batch.commands.finalize._check_completion", return_value=[]):
191
+ with patch("dh_cli.batch.commands.finalize._finalize_boltz") as mock_fb:
192
+ with patch("dh_cli.batch.commands.finalize.save_manifest_s3"):
193
+ result = runner.invoke(
194
+ finalize_cmd,
195
+ [
196
+ "test-boltz",
197
+ "--output",
198
+ str(temp_dir / "final"),
199
+ "--keep-intermediates",
200
+ ],
201
+ input="y\n",
202
+ )
203
+
204
+ assert result.exit_code == 0, result.output
205
+ assert mock_tar_download.called, "tar-aware download helper must be called for Boltz S3"
206
+ assert mock_fb.called
207
+
208
+ def test_non_boltz_s3_uses_download_directory(self, temp_dir):
209
+ """Non-Boltz pipelines (e.g. embed-t5) in S3 mode keep using download_directory."""
210
+ from dh_cli.batch.commands.finalize import finalize as finalize_cmd
211
+ from dh_cli.batch.manifest import JobManifest, JobStatus
212
+
213
+ manifest = JobManifest(
214
+ job_id="test-embed",
215
+ user="tester",
216
+ pipeline="embed-t5",
217
+ storage_mode="s3",
218
+ status=JobStatus.SUCCEEDED,
219
+ s3_prefix="s3://bucket/jobs/test-embed/",
220
+ )
221
+
222
+ from click.testing import CliRunner
223
+
224
+ runner = CliRunner()
225
+
226
+ with patch("dh_cli.batch.commands.finalize.load_manifest", return_value=manifest):
227
+ with patch("dh_cli.batch.commands.finalize._download_boltz_s3_output") as mock_tar_download:
228
+ with patch("dh_cli.batch.s3_transport.download_directory") as mock_dd:
229
+ with patch("dh_cli.batch.commands.finalize._check_completion", return_value=[]):
230
+ with patch("dh_cli.batch.commands.finalize._finalize_embeddings") as mock_fe:
231
+ with patch("dh_cli.batch.commands.finalize.save_manifest_s3"):
232
+ result = runner.invoke(
233
+ finalize_cmd,
234
+ [
235
+ "test-embed",
236
+ "--output",
237
+ str(temp_dir / "out.h5"),
238
+ "--keep-intermediates",
239
+ ],
240
+ )
241
+
242
+ assert result.exit_code == 0, result.output
243
+ assert mock_dd.called, "non-Boltz pipelines should keep using download_directory"
244
+ assert not mock_tar_download.called, "tar-aware helper must not fire for embed-t5"
245
+ assert mock_fe.called
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes