dayhoff-tools 1.14.7__tar.gz → 1.14.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/__init__.py +3 -0
  3. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/aws_batch.py +58 -0
  4. dayhoff_tools-1.14.9/dayhoff_tools/cli/batch/commands/clean.py +139 -0
  5. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/finalize.py +97 -13
  6. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/list_jobs.py +54 -6
  7. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/status.py +63 -5
  8. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/pyproject.toml +1 -1
  9. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/README.md +0 -0
  10. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/__init__.py +0 -0
  11. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/batch/__init__.py +0 -0
  12. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/batch/workers/__init__.py +0 -0
  13. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/batch/workers/base.py +0 -0
  14. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/batch/workers/boltz.py +0 -0
  15. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
  16. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/chemistry/standardizer.py +0 -0
  17. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/chemistry/utils.py +0 -0
  18. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/__init__.py +0 -0
  19. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
  20. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/boltz.py +0 -0
  21. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/cancel.py +0 -0
  22. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -0
  23. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/local.py +0 -0
  24. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
  25. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
  26. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/commands/submit.py +0 -0
  27. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/job_id.py +0 -0
  28. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/batch/manifest.py +0 -0
  29. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/cloud_commands.py +0 -0
  30. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/__init__.py +0 -0
  31. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
  32. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
  33. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
  34. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
  35. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/shared.py +0 -0
  36. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
  37. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
  38. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
  39. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
  40. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
  41. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
  42. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
  43. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
  44. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
  45. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
  46. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
  47. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
  48. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
  49. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
  50. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
  51. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
  52. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
  53. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/github_commands.py +0 -0
  54. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/main.py +0 -0
  55. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/swarm_commands.py +0 -0
  56. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/cli/utility_commands.py +0 -0
  57. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/base.py +0 -0
  58. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  59. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  60. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  61. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/job_runner.py +0 -0
  62. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/processors.py +0 -0
  63. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/deployment/swarm.py +0 -0
  64. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/embedders.py +0 -0
  65. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/fasta.py +0 -0
  66. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/file_ops.py +0 -0
  67. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/h5.py +0 -0
  68. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/gcp.py +0 -0
  69. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/gtdb.py +0 -0
  70. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/kegg.py +0 -0
  71. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/mmseqs.py +0 -0
  72. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/structure.py +0 -0
  73. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/intake/uniprot.py +0 -0
  74. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/logs.py +0 -0
  75. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/sqlite.py +0 -0
  76. {dayhoff_tools-1.14.7 → dayhoff_tools-1.14.9}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.7
3
+ Version: 1.14.9
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -12,6 +12,7 @@ import click
12
12
 
13
13
  from .commands.boltz import boltz
14
14
  from .commands.cancel import cancel
15
+ from .commands.clean import clean
15
16
  from .commands.embed_t5 import embed_t5
16
17
  from .commands.finalize import finalize
17
18
  from .commands.list_jobs import list_jobs
@@ -36,6 +37,7 @@ def batch_cli():
36
37
  finalize Combine results and clean up
37
38
  local Run a chunk locally for debugging
38
39
  list List recent jobs
40
+ clean Remove old completed job directories
39
41
 
40
42
  \b
41
43
  Embedding Pipelines:
@@ -77,6 +79,7 @@ batch_cli.add_command(retry)
77
79
  batch_cli.add_command(finalize)
78
80
  batch_cli.add_command(local)
79
81
  batch_cli.add_command(list_jobs, name="list")
82
+ batch_cli.add_command(clean)
80
83
 
81
84
  # Register pipeline commands
82
85
  batch_cli.add_command(embed_t5, name="embed-t5")
@@ -242,6 +242,64 @@ class BatchClient:
242
242
  failed=status_summary.get("FAILED", 0),
243
243
  )
244
244
 
245
+ def get_job_statuses_batch(self, job_ids: list[str]) -> dict[str, str]:
246
+ """Get status for multiple jobs in a single API call.
247
+
248
+ AWS Batch allows up to 100 job IDs per describe_jobs call.
249
+ This method handles batching for larger lists.
250
+
251
+ Args:
252
+ job_ids: List of AWS Batch job IDs
253
+
254
+ Returns:
255
+ Dictionary mapping job_id -> status string
256
+ Status will be one of: SUBMITTED, PENDING, RUNNABLE, STARTING,
257
+ RUNNING, SUCCEEDED, FAILED, or "UNKNOWN" if not found.
258
+ For array jobs, derives overall status from child statuses.
259
+ """
260
+ if not job_ids:
261
+ return {}
262
+
263
+ results = {}
264
+ batch_size = 100 # AWS Batch limit
265
+
266
+ for i in range(0, len(job_ids), batch_size):
267
+ batch = job_ids[i : i + batch_size]
268
+ try:
269
+ response = self.batch.describe_jobs(jobs=batch)
270
+ for job in response.get("jobs", []):
271
+ job_id = job.get("jobId")
272
+ status = job.get("status", "UNKNOWN")
273
+
274
+ # For array jobs, derive overall status from children
275
+ if "arrayProperties" in job:
276
+ summary = job["arrayProperties"].get("statusSummary", {})
277
+ total = job["arrayProperties"].get("size", 0)
278
+ succeeded = summary.get("SUCCEEDED", 0)
279
+ failed = summary.get("FAILED", 0)
280
+
281
+ if succeeded + failed == total:
282
+ # All children complete
283
+ status = "SUCCEEDED" if failed == 0 else "FAILED"
284
+ elif summary.get("RUNNING", 0) > 0:
285
+ status = "RUNNING"
286
+ elif summary.get("STARTING", 0) > 0:
287
+ status = "STARTING"
288
+ elif summary.get("RUNNABLE", 0) > 0:
289
+ status = "RUNNABLE"
290
+ elif summary.get("PENDING", 0) > 0:
291
+ status = "PENDING"
292
+
293
+ results[job_id] = status
294
+ except ClientError as e:
295
+ logger.warning(f"Failed to describe batch of jobs: {e}")
296
+ # Mark these as unknown
297
+ for job_id in batch:
298
+ if job_id not in results:
299
+ results[job_id] = "UNKNOWN"
300
+
301
+ return results
302
+
245
303
  def get_failed_indices(self, job_id: str) -> list[int]:
246
304
  """Get the array indices that failed for an array job.
247
305
 
@@ -0,0 +1,139 @@
1
+ """Clean command for removing old job directories."""
2
+
3
+ import click
4
+
5
+ from ..aws_batch import BatchClient, BatchError
6
+ from ..manifest import (
7
+ BATCH_JOBS_BASE,
8
+ JobStatus,
9
+ delete_job_directory,
10
+ list_jobs as list_manifests,
11
+ )
12
+ from .status import format_time_ago, _aws_status_to_job_status
13
+
14
+
15
+ @click.command("clean")
16
+ @click.option("--user", help="Only clean jobs for this user")
17
+ @click.option(
18
+ "--older-than",
19
+ type=int,
20
+ default=7,
21
+ help="Only clean jobs older than N days [default: 7]",
22
+ )
23
+ @click.option("--dry-run", is_flag=True, help="Show what would be cleaned without deleting")
24
+ @click.option("--force", is_flag=True, help="Delete without confirmation")
25
+ @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
26
+ def clean(user, older_than, dry_run, force, base_path):
27
+ """Remove completed job directories to free up space.
28
+
29
+ Only removes jobs that have SUCCEEDED or FAILED in AWS Batch.
30
+ Jobs that are still running or pending are never removed.
31
+
32
+ \b
33
+ Examples:
34
+ dh batch clean # Clean jobs older than 7 days
35
+ dh batch clean --older-than 1 # Clean jobs older than 1 day
36
+ dh batch clean --dry-run # Show what would be cleaned
37
+ dh batch clean --user dma # Only clean dma's jobs
38
+ """
39
+ from datetime import datetime, timedelta, timezone
40
+
41
+ cutoff = datetime.now(timezone.utc) - timedelta(days=older_than)
42
+
43
+ # Get all manifests
44
+ manifests = list_manifests(
45
+ base_path=base_path,
46
+ user=user,
47
+ status=None,
48
+ pipeline=None,
49
+ limit=500,
50
+ )
51
+
52
+ if not manifests:
53
+ click.echo("No jobs found.")
54
+ return
55
+
56
+ # Filter to old jobs
57
+ old_manifests = []
58
+ for m in manifests:
59
+ created = m.created
60
+ if created.tzinfo is None:
61
+ created = created.replace(tzinfo=timezone.utc)
62
+ if created < cutoff:
63
+ old_manifests.append(m)
64
+
65
+ if not old_manifests:
66
+ click.echo(f"No jobs older than {older_than} days found.")
67
+ return
68
+
69
+ # Get live statuses for old jobs
70
+ batch_job_ids = []
71
+ manifest_to_batch_id = {}
72
+ for m in old_manifests:
73
+ if m.batch and m.batch.job_id:
74
+ batch_job_ids.append(m.batch.job_id)
75
+ manifest_to_batch_id[m.job_id] = m.batch.job_id
76
+
77
+ live_statuses = {}
78
+ if batch_job_ids:
79
+ try:
80
+ client = BatchClient()
81
+ live_statuses = client.get_job_statuses_batch(batch_job_ids)
82
+ except BatchError as e:
83
+ click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
84
+ click.echo("Cannot safely clean jobs without knowing their status.", err=True)
85
+ raise SystemExit(1)
86
+
87
+ # Find jobs that are safe to clean (SUCCEEDED or FAILED)
88
+ safe_to_clean = []
89
+ for manifest in old_manifests:
90
+ if manifest.job_id in manifest_to_batch_id:
91
+ batch_id = manifest_to_batch_id[manifest.job_id]
92
+ aws_status = live_statuses.get(batch_id, "UNKNOWN")
93
+ if aws_status in ("SUCCEEDED", "FAILED"):
94
+ safe_to_clean.append((manifest, aws_status))
95
+ elif manifest.status in (JobStatus.FINALIZED, JobStatus.CANCELLED):
96
+ # Already finalized or cancelled - safe to clean
97
+ safe_to_clean.append((manifest, manifest.status.value.upper()))
98
+
99
+ if not safe_to_clean:
100
+ click.echo(f"No completed jobs older than {older_than} days to clean.")
101
+ return
102
+
103
+ # Show what will be cleaned
104
+ click.echo()
105
+ click.echo(f"{'JOB ID':<35} {'STATUS':<12} {'CREATED':<12}")
106
+ click.echo("-" * 65)
107
+
108
+ for manifest, status in safe_to_clean:
109
+ click.echo(
110
+ f"{manifest.job_id:<35} "
111
+ f"{status:<12} "
112
+ f"{format_time_ago(manifest.created):<12}"
113
+ )
114
+
115
+ click.echo()
116
+ click.echo(f"Found {len(safe_to_clean)} completed jobs to clean.")
117
+
118
+ if dry_run:
119
+ click.echo("(dry-run: no changes made)")
120
+ return
121
+
122
+ # Confirm before deleting
123
+ if not force:
124
+ if not click.confirm("Delete these job directories?"):
125
+ click.echo("Cancelled.")
126
+ return
127
+
128
+ # Delete job directories
129
+ deleted = 0
130
+ for manifest, _ in safe_to_clean:
131
+ try:
132
+ delete_job_directory(manifest.job_id, base_path)
133
+ deleted += 1
134
+ click.echo(f" Deleted: {manifest.job_id}")
135
+ except Exception as e:
136
+ click.echo(f" Failed to delete {manifest.job_id}: {e}")
137
+
138
+ click.echo()
139
+ click.echo(f"Cleaned {deleted} job directories.")
@@ -29,17 +29,30 @@ from ..manifest import (
29
29
  is_flag=True,
30
30
  help="Don't delete job directory after finalizing",
31
31
  )
32
+ @click.option(
33
+ "--full-output",
34
+ is_flag=True,
35
+ help="For Boltz: copy entire output directory (default: only essential files)",
36
+ )
32
37
  @click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
33
- def finalize(job_id, output, force, keep_intermediates, base_path):
38
+ def finalize(job_id, output, force, keep_intermediates, full_output, base_path):
34
39
  """Combine results and clean up job intermediates.
35
40
 
36
41
  For embedding jobs, combines H5 files into a single output file.
37
- For structure prediction, moves outputs to the destination.
42
+ For Boltz jobs, extracts essential files (CIF structures and confidence JSON).
38
43
 
39
44
  \b
40
45
  Examples:
46
+ # Embedding job - combine H5 files
41
47
  dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5
42
- dh batch finalize dma-embed-20260109-a3f2 --output /primordial/embeddings.h5 --force
48
+
49
+ # Boltz job - extract essential files only (default)
50
+ dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/
51
+
52
+ # Boltz job - copy all output files
53
+ dh batch finalize dma-boltz-20260113-190a --output /primordial/structures/ --full-output
54
+
55
+ # Keep job directory after finalizing
43
56
  dh batch finalize dma-embed-20260109-a3f2 --output /primordial/out.h5 --keep-intermediates
44
57
  """
45
58
  # Load manifest
@@ -81,7 +94,7 @@ def finalize(job_id, output, force, keep_intermediates, base_path):
81
94
  if manifest.pipeline in ("embed-t5", "embed"):
82
95
  _finalize_embeddings(output_dir, output_path)
83
96
  elif manifest.pipeline == "boltz":
84
- _finalize_boltz(output_dir, output_path)
97
+ _finalize_boltz(output_dir, output_path, full_output=full_output)
85
98
  else:
86
99
  _finalize_generic(output_dir, output_path)
87
100
 
@@ -196,29 +209,100 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
196
209
  shutil.copy2(h5_files[0], output_path)
197
210
 
198
211
 
199
- def _finalize_boltz(output_dir: Path, output_path: Path):
200
- """Move Boltz output directories to destination."""
212
+ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = False):
213
+ """Move Boltz output to destination.
214
+
215
+ Args:
216
+ output_dir: Source directory containing boltz_results_* folders
217
+ output_path: Destination directory for outputs
218
+ full_output: If True, copy entire output directories. If False (default),
219
+ extract only essential files (CIF structures and confidence JSON).
220
+ """
201
221
  # Find all output directories (one per complex)
202
- complex_dirs = [d for d in output_dir.iterdir() if d.is_dir()]
222
+ complex_dirs = [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith("boltz_results_")]
203
223
 
204
224
  if not complex_dirs:
205
225
  click.echo("No output directories found.", err=True)
206
226
  raise SystemExit(1)
207
227
 
208
- click.echo(f"Found {len(complex_dirs)} structure predictions to move")
228
+ click.echo(f"Found {len(complex_dirs)} structure predictions")
229
+
230
+ if full_output:
231
+ click.echo("Mode: Copying full output (all files)")
232
+ else:
233
+ click.echo("Mode: Extracting essential files only (CIF + confidence JSON)")
234
+ click.echo(" Use --full-output to copy all files")
235
+
236
+ # Confirm before proceeding
237
+ click.echo()
238
+ if not click.confirm(f"Copy results to {output_path}?"):
239
+ click.echo("Cancelled.")
240
+ raise SystemExit(0)
209
241
 
210
242
  # Ensure output directory exists
211
243
  output_path.mkdir(parents=True, exist_ok=True)
212
244
 
245
+ copied_count = 0
246
+ skipped_count = 0
247
+
213
248
  for complex_dir in complex_dirs:
214
- dest = output_path / complex_dir.name
249
+ complex_name = complex_dir.name.replace("boltz_results_", "")
250
+ dest = output_path / complex_name
251
+
215
252
  if dest.exists():
216
- click.echo(f" Skipping {complex_dir.name} (already exists)")
253
+ click.echo(f" Skipping {complex_name} (already exists)")
254
+ skipped_count += 1
217
255
  continue
218
- shutil.move(str(complex_dir), str(dest))
219
- click.echo(f" Moved {complex_dir.name}")
256
+
257
+ if full_output:
258
+ # Copy entire directory
259
+ shutil.copytree(complex_dir, dest)
260
+ click.echo(f" Copied {complex_name} (full output)")
261
+ else:
262
+ # Extract only essential files
263
+ _extract_essential_boltz_files(complex_dir, dest, complex_name)
264
+ click.echo(f" Extracted {complex_name} (essential files)")
265
+
266
+ copied_count += 1
220
267
 
221
- click.echo(click.style("✓ Structures moved successfully", fg="green"))
268
+ click.echo()
269
+ if skipped_count > 0:
270
+ click.echo(f"Copied {copied_count} predictions, skipped {skipped_count} existing")
271
+ else:
272
+ click.echo(click.style(f"✓ Copied {copied_count} structure predictions successfully", fg="green"))
273
+
274
+
275
+ def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_name: str):
276
+ """Extract only essential files from Boltz output.
277
+
278
+ Essential files are:
279
+ - predictions/*/*.cif (structure files)
280
+ - predictions/*/confidence_*.json (confidence metrics)
281
+
282
+ Args:
283
+ source_dir: Source boltz_results_* directory
284
+ dest_dir: Destination directory to create
285
+ complex_name: Name of the complex (for better error messages)
286
+ """
287
+ dest_dir.mkdir(parents=True, exist_ok=True)
288
+
289
+ predictions_dir = source_dir / "predictions"
290
+ if not predictions_dir.exists():
291
+ click.echo(f" Warning: No predictions directory found for {complex_name}", err=True)
292
+ return
293
+
294
+ # Find all subdirectories in predictions/ (usually just one named after the complex)
295
+ for pred_subdir in predictions_dir.iterdir():
296
+ if not pred_subdir.is_dir():
297
+ continue
298
+
299
+ # Copy CIF files (structures)
300
+ for cif_file in pred_subdir.glob("*.cif"):
301
+ shutil.copy2(cif_file, dest_dir / cif_file.name)
302
+
303
+ # Copy confidence JSON files
304
+ for json_file in pred_subdir.glob("confidence_*.json"):
305
+ shutil.copy2(json_file, dest_dir / json_file.name)
222
306
 
223
307
 
224
308
  def _finalize_generic(output_dir: Path, output_path: Path):
@@ -2,8 +2,9 @@
2
2
 
3
3
  import click
4
4
 
5
+ from ..aws_batch import BatchClient, BatchError
5
6
  from ..manifest import BATCH_JOBS_BASE, JobStatus, list_jobs as list_manifests
6
- from .status import format_status, format_time_ago
7
+ from .status import format_status, format_time_ago, _aws_status_to_job_status
7
8
 
8
9
 
9
10
  @click.command("list")
@@ -23,6 +24,7 @@ def list_jobs(user, status_filter, pipeline, limit, base_path):
23
24
  """List recent batch jobs.
24
25
 
25
26
  Shows a table of recent jobs with their status, pipeline type, and creation time.
27
+ Status is fetched live from AWS Batch.
26
28
 
27
29
  \b
28
30
  Examples:
@@ -34,12 +36,13 @@ def list_jobs(user, status_filter, pipeline, limit, base_path):
34
36
  """
35
37
  status_enum = JobStatus(status_filter) if status_filter else None
36
38
 
39
+ # Fetch more manifests than requested to allow filtering by live status
37
40
  manifests = list_manifests(
38
41
  base_path=base_path,
39
42
  user=user,
40
- status=status_enum,
43
+ status=None, # Don't filter by status yet - will filter after getting live status
41
44
  pipeline=pipeline,
42
- limit=limit,
45
+ limit=limit * 3, # Fetch extra to account for status filtering
43
46
  )
44
47
 
45
48
  if not manifests:
@@ -48,6 +51,51 @@ def list_jobs(user, status_filter, pipeline, limit, base_path):
48
51
  click.echo("Try removing filters to see all jobs.")
49
52
  return
50
53
 
54
+ # Collect AWS Batch job IDs for live status lookup
55
+ batch_job_ids = []
56
+ manifest_to_batch_id = {}
57
+ for m in manifests:
58
+ if m.batch and m.batch.job_id:
59
+ batch_job_ids.append(m.batch.job_id)
60
+ manifest_to_batch_id[m.job_id] = m.batch.job_id
61
+
62
+ # Fetch live statuses from AWS Batch
63
+ live_statuses = {}
64
+ if batch_job_ids:
65
+ try:
66
+ client = BatchClient()
67
+ live_statuses = client.get_job_statuses_batch(batch_job_ids)
68
+ except BatchError as e:
69
+ click.echo(f"Warning: Could not fetch live status from AWS Batch: {e}")
70
+
71
+ # Build display data with live status
72
+ display_data = []
73
+ for manifest in manifests:
74
+ # Use live status if available, otherwise fall back to manifest status
75
+ if manifest.job_id in manifest_to_batch_id:
76
+ batch_id = manifest_to_batch_id[manifest.job_id]
77
+ aws_status = live_statuses.get(batch_id)
78
+ if aws_status:
79
+ live_status = _aws_status_to_job_status(aws_status)
80
+ else:
81
+ live_status = manifest.status
82
+ else:
83
+ live_status = manifest.status
84
+
85
+ # Apply status filter if specified
86
+ if status_enum and live_status != status_enum:
87
+ continue
88
+
89
+ display_data.append((manifest, live_status))
90
+
91
+ # Stop once we have enough
92
+ if len(display_data) >= limit:
93
+ break
94
+
95
+ if not display_data:
96
+ click.echo("No jobs found matching filters.")
97
+ return
98
+
51
99
  # Print header
52
100
  click.echo()
53
101
  click.echo(
@@ -55,17 +103,17 @@ def list_jobs(user, status_filter, pipeline, limit, base_path):
55
103
  )
56
104
  click.echo("-" * 85)
57
105
 
58
- for manifest in manifests:
106
+ for manifest, live_status in display_data:
59
107
  click.echo(
60
108
  f"{manifest.job_id:<35} "
61
- f"{format_status(manifest.status):<21} " # Extra space for ANSI color codes
109
+ f"{format_status(live_status):<21} " # Extra space for ANSI color codes
62
110
  f"{manifest.pipeline:<12} "
63
111
  f"{manifest.user:<10} "
64
112
  f"{format_time_ago(manifest.created):<12}"
65
113
  )
66
114
 
67
115
  click.echo()
68
- click.echo(f"Showing {len(manifests)} jobs.")
116
+ click.echo(f"Showing {len(display_data)} jobs.")
69
117
 
70
118
  # Show filter hints
71
119
  hints = []
@@ -80,21 +80,79 @@ def status(job_id, user, status_filter, pipeline, base_path):
80
80
  _show_job_list(user, status_filter, pipeline, base_path)
81
81
 
82
82
 
83
+ def _aws_status_to_job_status(aws_status: str) -> JobStatus:
84
+ """Convert AWS Batch status to JobStatus enum."""
85
+ mapping = {
86
+ "SUBMITTED": JobStatus.SUBMITTED,
87
+ "PENDING": JobStatus.PENDING,
88
+ "RUNNABLE": JobStatus.RUNNING, # Runnable means waiting for compute
89
+ "STARTING": JobStatus.RUNNING,
90
+ "RUNNING": JobStatus.RUNNING,
91
+ "SUCCEEDED": JobStatus.SUCCEEDED,
92
+ "FAILED": JobStatus.FAILED,
93
+ }
94
+ return mapping.get(aws_status, JobStatus.SUBMITTED)
95
+
96
+
83
97
  def _show_job_list(user, status_filter, pipeline, base_path):
84
98
  """Show a list of recent jobs."""
85
99
  status_enum = JobStatus(status_filter) if status_filter else None
86
100
  manifests = list_manifests(
87
101
  base_path=base_path,
88
102
  user=user,
89
- status=status_enum,
103
+ status=None, # Don't filter yet - we'll filter after getting live status
90
104
  pipeline=pipeline,
91
- limit=20,
105
+ limit=50, # Fetch more, filter later
92
106
  )
93
107
 
94
108
  if not manifests:
95
109
  click.echo("No jobs found.")
96
110
  return
97
111
 
112
+ # Collect AWS Batch job IDs for live status lookup
113
+ batch_job_ids = []
114
+ manifest_to_batch_id = {}
115
+ for m in manifests:
116
+ if m.batch and m.batch.job_id:
117
+ batch_job_ids.append(m.batch.job_id)
118
+ manifest_to_batch_id[m.job_id] = m.batch.job_id
119
+
120
+ # Fetch live statuses from AWS Batch
121
+ live_statuses = {}
122
+ if batch_job_ids:
123
+ try:
124
+ client = BatchClient()
125
+ live_statuses = client.get_job_statuses_batch(batch_job_ids)
126
+ except BatchError as e:
127
+ click.echo(f"Warning: Could not fetch live status from AWS Batch: {e}")
128
+
129
+ # Build display data with live status
130
+ display_data = []
131
+ for manifest in manifests:
132
+ # Use live status if available, otherwise fall back to manifest status
133
+ if manifest.job_id in manifest_to_batch_id:
134
+ batch_id = manifest_to_batch_id[manifest.job_id]
135
+ aws_status = live_statuses.get(batch_id)
136
+ if aws_status:
137
+ live_status = _aws_status_to_job_status(aws_status)
138
+ else:
139
+ live_status = manifest.status
140
+ else:
141
+ live_status = manifest.status
142
+
143
+ # Apply status filter if specified
144
+ if status_enum and live_status != status_enum:
145
+ continue
146
+
147
+ display_data.append((manifest, live_status))
148
+
149
+ if not display_data:
150
+ click.echo("No jobs found matching filters.")
151
+ return
152
+
153
+ # Limit to 20 after filtering
154
+ display_data = display_data[:20]
155
+
98
156
  # Print header
99
157
  click.echo()
100
158
  click.echo(
@@ -102,17 +160,17 @@ def _show_job_list(user, status_filter, pipeline, base_path):
102
160
  )
103
161
  click.echo("-" * 85)
104
162
 
105
- for manifest in manifests:
163
+ for manifest, live_status in display_data:
106
164
  click.echo(
107
165
  f"{manifest.job_id:<35} "
108
- f"{format_status(manifest.status):<21} " # Extra space for color codes
166
+ f"{format_status(live_status):<21} " # Extra space for color codes
109
167
  f"{manifest.pipeline:<12} "
110
168
  f"{manifest.user:<10} "
111
169
  f"{format_time_ago(manifest.created):<12}"
112
170
  )
113
171
 
114
172
  click.echo()
115
- click.echo(f"Showing {len(manifests)} most recent jobs.")
173
+ click.echo(f"Showing {len(display_data)} most recent jobs.")
116
174
  click.echo("Use 'dh batch status <job-id>' for details.")
117
175
 
118
176
 
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.14.7"
8
+ version = "1.14.9"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
File without changes