dayhoff-tools 1.14.12__tar.gz → 1.14.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/cancel.py +66 -2
  3. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/finalize.py +25 -12
  4. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/pyproject.toml +1 -1
  5. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/README.md +0 -0
  6. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/__init__.py +0 -0
  7. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/batch/__init__.py +0 -0
  8. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/batch/workers/__init__.py +0 -0
  9. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/batch/workers/base.py +0 -0
  10. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/batch/workers/boltz.py +0 -0
  11. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
  12. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/chemistry/standardizer.py +0 -0
  13. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/chemistry/utils.py +0 -0
  14. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/__init__.py +0 -0
  15. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/__init__.py +0 -0
  16. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
  17. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
  18. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/boltz.py +0 -0
  19. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/clean.py +0 -0
  20. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -0
  21. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
  22. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/local.py +0 -0
  23. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
  24. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
  25. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/status.py +0 -0
  26. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/commands/submit.py +0 -0
  27. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/job_id.py +0 -0
  28. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/batch/manifest.py +0 -0
  29. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/cloud_commands.py +0 -0
  30. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/__init__.py +0 -0
  31. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
  32. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
  33. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
  34. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
  35. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/shared.py +0 -0
  36. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
  37. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
  38. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
  39. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
  40. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
  41. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
  42. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
  43. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
  44. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
  45. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
  46. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
  47. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
  48. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
  49. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
  50. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
  51. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
  52. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
  53. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/github_commands.py +0 -0
  54. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/main.py +0 -0
  55. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/swarm_commands.py +0 -0
  56. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/cli/utility_commands.py +0 -0
  57. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/base.py +0 -0
  58. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  59. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  60. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  61. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/job_runner.py +0 -0
  62. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/processors.py +0 -0
  63. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/deployment/swarm.py +0 -0
  64. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/embedders.py +0 -0
  65. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/fasta.py +0 -0
  66. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/file_ops.py +0 -0
  67. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/h5.py +0 -0
  68. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/gcp.py +0 -0
  69. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/gtdb.py +0 -0
  70. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/kegg.py +0 -0
  71. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/mmseqs.py +0 -0
  72. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/structure.py +0 -0
  73. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/intake/uniprot.py +0 -0
  74. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/logs.py +0 -0
  75. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/sqlite.py +0 -0
  76. {dayhoff_tools-1.14.12 → dayhoff_tools-1.14.14}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.12
3
+ Version: 1.14.14
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -1,5 +1,7 @@
1
1
  """Cancel command for stopping running jobs."""
2
2
 
3
+ import re
4
+
3
5
  import click
4
6
 
5
7
  from ..aws_batch import BatchClient, BatchError
@@ -11,6 +13,14 @@ from ..manifest import (
11
13
  )
12
14
 
13
15
 
16
+ def _parse_retry_job_id(job_id: str) -> tuple[str, str | None]:
17
+ """Parse a job ID to extract parent job ID and retry suffix."""
18
+ match = re.match(r"^(.+)(-r\d+)$", job_id)
19
+ if match:
20
+ return match.group(1), job_id
21
+ return job_id, None
22
+
23
+
14
24
  @click.command()
15
25
  @click.argument("job_id")
16
26
  @click.option("--force", is_flag=True, help="Force termination of running containers")
@@ -19,19 +29,29 @@ def cancel(job_id, force, base_path):
19
29
  """Cancel a running batch job.
20
30
 
21
31
  Cancels the job in AWS Batch and updates the manifest status.
32
+ Also supports cancelling retry jobs by their ID (e.g., job-id-r1).
22
33
 
23
34
  \b
24
35
  Examples:
25
36
  dh batch cancel dma-embed-20260109-a3f2
37
+ dh batch cancel dma-embed-20260109-a3f2-r1 # Cancel specific retry
26
38
  dh batch cancel dma-embed-20260109-a3f2 --force
27
39
  """
40
+ # Check if this is a retry job ID
41
+ parent_job_id, retry_id = _parse_retry_job_id(job_id)
42
+
28
43
  # Load manifest
29
44
  try:
30
- manifest = load_manifest(job_id, base_path)
45
+ manifest = load_manifest(parent_job_id, base_path)
31
46
  except FileNotFoundError:
32
- click.echo(f"Job not found: {job_id}", err=True)
47
+ click.echo(f"Job not found: {parent_job_id}", err=True)
33
48
  raise SystemExit(1)
34
49
 
50
+ # If cancelling a specific retry job
51
+ if retry_id:
52
+ _cancel_retry_job(manifest, retry_id, force, base_path)
53
+ return
54
+
35
55
  # Check if job can be cancelled
36
56
  if manifest.status in (
37
57
  JobStatus.SUCCEEDED,
@@ -94,3 +114,47 @@ def cancel(job_id, force, base_path):
94
114
  except BatchError as e:
95
115
  click.echo(click.style(f"✗ Failed to cancel job: {e}", fg="red"), err=True)
96
116
  raise SystemExit(1)
117
+
118
+
119
+ def _cancel_retry_job(manifest, retry_id: str, force: bool, base_path: str):
120
+ """Cancel a specific retry job."""
121
+ # Find the retry info
122
+ retry_info = None
123
+ for retry in manifest.retries:
124
+ if retry.retry_id == retry_id:
125
+ retry_info = retry
126
+ break
127
+
128
+ if not retry_info:
129
+ click.echo(f"Retry job not found: {retry_id}", err=True)
130
+ click.echo(f"Known retries: {[r.retry_id for r in manifest.retries]}", err=True)
131
+ raise SystemExit(1)
132
+
133
+ if not retry_info.batch_job_id:
134
+ click.echo(f"Retry job {retry_id} has no AWS Batch job ID.", err=True)
135
+ raise SystemExit(1)
136
+
137
+ # Cancel in AWS Batch
138
+ try:
139
+ client = BatchClient()
140
+
141
+ if force:
142
+ click.echo(f"Terminating retry job {retry_info.batch_job_id}...")
143
+ client.terminate_job(
144
+ retry_info.batch_job_id,
145
+ reason="Terminated by user via dh batch cancel --force",
146
+ )
147
+ else:
148
+ click.echo(f"Cancelling retry job {retry_info.batch_job_id}...")
149
+ client.cancel_job(
150
+ retry_info.batch_job_id,
151
+ reason="Cancelled by user via dh batch cancel",
152
+ )
153
+
154
+ click.echo()
155
+ click.echo(click.style(f"✓ Retry job {retry_id} cancelled successfully", fg="green"))
156
+ click.echo(f"Parent job: {manifest.job_id}")
157
+
158
+ except BatchError as e:
159
+ click.echo(click.style(f"✗ Failed to cancel retry job: {e}", fg="red"), err=True)
160
+ raise SystemExit(1)
@@ -34,8 +34,13 @@ from ..manifest import (
34
34
  is_flag=True,
35
35
  help="For Boltz: copy entire output directory (default: only essential files)",
36
36
  )
37
+ @click.option(
38
+ "--skip-dedup",
39
+ is_flag=True,
40
+ help="Skip deduplication step (use if input has no duplicates)",
41
+ )
37
42
  @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
38
- def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
43
+ def finalize(job_id, output, force, keep_intermediates, full_output, skip_dedup, base_path):
39
44
  """Combine results and clean up job intermediates.
40
45
 
41
46
  For embedding jobs, combines H5 files into a single output file.
@@ -46,6 +51,9 @@ def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
46
51
  # Embedding job - combine H5 files
47
52
  dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5
48
53
 
54
+ # Skip deduplication (faster if input has no duplicates)
55
+ dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5 --skip-dedup
56
+
49
57
  # Boltz job - extract essential files only (default)
50
58
  dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/
51
59
 
@@ -92,7 +100,7 @@ def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
92
100
  # Finalize based on pipeline type
93
101
  click.echo()
94
102
  if manifest.pipeline in ("embed-t5", "embed"):
95
- _finalize_embeddings(output_dir, output_path)
103
+ _finalize_embeddings(output_dir, output_path, skip_dedup=skip_dedup)
96
104
  elif manifest.pipeline == "boltz":
97
105
  _finalize_boltz(output_dir, output_path, full_output=full_output)
98
106
  else:
@@ -182,7 +190,7 @@ def _check_completion(job_id: str, base_path: str) -> list[int]:
182
190
  return incomplete
183
191
 
184
192
 
185
- def _finalize_embeddings(output_dir: Path, output_path: Path):
193
+ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool = False):
186
194
  """Combine H5 embedding files into a single output."""
187
195
  h5_files = sorted(output_dir.glob("embed_*.h5"))
188
196
 
@@ -191,6 +199,8 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
191
199
  raise SystemExit(1)
192
200
 
193
201
  click.echo(f"Found {len(h5_files)} H5 files to combine")
202
+ if skip_dedup:
203
+ click.echo("Skipping deduplication (--skip-dedup)")
194
204
 
195
205
  # Check if output already exists
196
206
  if output_path.exists():
@@ -213,10 +223,9 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
213
223
  click.echo("Single chunk - copying directly...")
214
224
  shutil.copy2(h5_files[0], output_path)
215
225
  else:
216
- # Multiple files - combine, deduplicate, and optimize
226
+ # Multiple files - combine and optionally deduplicate
217
227
  with tempfile.TemporaryDirectory() as tmpdir:
218
228
  combined_path = Path(tmpdir) / "combined.h5"
219
- deduped_path = Path(tmpdir) / "deduped.h5"
220
229
 
221
230
  # Combine H5 files
222
231
  click.echo("Combining H5 files...")
@@ -226,13 +235,17 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
226
235
  output_file=str(combined_path),
227
236
  )
228
237
 
229
- # Deduplicate
230
- click.echo("Deduplicating...")
231
- deduplicate_h5_file(str(combined_path), str(deduped_path))
232
-
233
- # Optimize chunks
234
- click.echo("Optimizing chunks...")
235
- optimize_protein_embedding_chunks(str(deduped_path), str(output_path))
238
+ if skip_dedup:
239
+ # Skip dedup - optimize directly from combined
240
+ click.echo("Optimizing chunks...")
241
+ optimize_protein_embedding_chunks(str(combined_path), str(output_path))
242
+ else:
243
+ # Full pipeline: combine -> dedup -> optimize
244
+ deduped_path = Path(tmpdir) / "deduped.h5"
245
+ click.echo("Deduplicating...")
246
+ deduplicate_h5_file(str(combined_path), str(deduped_path))
247
+ click.echo("Optimizing chunks...")
248
+ optimize_protein_embedding_chunks(str(deduped_path), str(output_path))
236
249
 
237
250
  click.echo(click.style("✓ H5 files combined successfully", fg="green"))
238
251
 
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.14.12"
8
+ version = "1.14.14"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}