dayhoff-tools 1.14.6__tar.gz → 1.14.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/PKG-INFO +1 -1
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/workers/boltz.py +38 -9
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/aws_batch.py +4 -4
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/boltz.py +31 -12
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/cancel.py +20 -6
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/embed_t5.py +86 -25
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/finalize.py +43 -18
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/list_jobs.py +3 -1
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/local.py +27 -10
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/logs.py +6 -2
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/retry.py +6 -2
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/status.py +21 -6
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/submit.py +9 -3
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/manifest.py +3 -1
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/main.py +5 -1
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/pyproject.toml +1 -1
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/README.md +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/workers/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/workers/base.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/batch/job_id.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/shared.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/github_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/utility_commands.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/warehouse.py +0 -0
|
@@ -88,7 +88,9 @@ class BoltzProcessor:
|
|
|
88
88
|
match = re.match(pattern1, base_name)
|
|
89
89
|
if match:
|
|
90
90
|
protein_id = match.group(1)
|
|
91
|
-
logger.debug(
|
|
91
|
+
logger.debug(
|
|
92
|
+
f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 1)"
|
|
93
|
+
)
|
|
92
94
|
return protein_id
|
|
93
95
|
|
|
94
96
|
# Pattern 2: PROTEINID_suffix (no leading number)
|
|
@@ -96,7 +98,9 @@ class BoltzProcessor:
|
|
|
96
98
|
match = re.match(pattern2, base_name)
|
|
97
99
|
if match:
|
|
98
100
|
protein_id = match.group(1)
|
|
99
|
-
logger.debug(
|
|
101
|
+
logger.debug(
|
|
102
|
+
f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 2)"
|
|
103
|
+
)
|
|
100
104
|
return protein_id
|
|
101
105
|
|
|
102
106
|
# Pattern 3: Just PROTEINID (no suffix)
|
|
@@ -104,7 +108,9 @@ class BoltzProcessor:
|
|
|
104
108
|
match = re.match(pattern3, base_name)
|
|
105
109
|
if match:
|
|
106
110
|
protein_id = match.group(1)
|
|
107
|
-
logger.debug(
|
|
111
|
+
logger.debug(
|
|
112
|
+
f"Extracted protein ID '{protein_id}' from '{filename}' (pattern 3)"
|
|
113
|
+
)
|
|
108
114
|
return protein_id
|
|
109
115
|
|
|
110
116
|
logger.debug(f"Could not extract protein ID from filename '{filename}'")
|
|
@@ -222,16 +228,24 @@ class BoltzProcessor:
|
|
|
222
228
|
raise FileNotFoundError(f"Input file not found: {input_file}")
|
|
223
229
|
|
|
224
230
|
# Enhance with MSA if available
|
|
225
|
-
enhanced_input_file, msa_found, original_yaml_data =
|
|
226
|
-
input_file
|
|
231
|
+
enhanced_input_file, msa_found, original_yaml_data = (
|
|
232
|
+
self._enhance_yaml_with_msa(input_file)
|
|
227
233
|
)
|
|
228
234
|
|
|
229
235
|
# Determine output directory
|
|
236
|
+
# Boltz always creates boltz_results_{input_name} inside --out_dir
|
|
230
237
|
input_base = os.path.splitext(os.path.basename(input_file))[0]
|
|
238
|
+
|
|
231
239
|
if output_dir is None:
|
|
240
|
+
# No output_dir specified, boltz creates in current directory
|
|
232
241
|
expected_output_dir = f"boltz_results_{input_base}"
|
|
242
|
+
out_dir_arg = None
|
|
233
243
|
else:
|
|
234
|
-
|
|
244
|
+
# output_dir specified - use its parent for --out_dir
|
|
245
|
+
# and expect boltz_results_{input_base} inside it
|
|
246
|
+
parent_dir = os.path.dirname(output_dir)
|
|
247
|
+
expected_output_dir = os.path.join(parent_dir, f"boltz_results_{input_base}")
|
|
248
|
+
out_dir_arg = parent_dir if parent_dir else None
|
|
235
249
|
|
|
236
250
|
logger.info(f"Running Boltz prediction for {input_file}")
|
|
237
251
|
logger.info(f"Output directory: {expected_output_dir}")
|
|
@@ -239,6 +253,10 @@ class BoltzProcessor:
|
|
|
239
253
|
# Build command
|
|
240
254
|
cmd = ["boltz", "predict", input_file]
|
|
241
255
|
|
|
256
|
+
# Add output directory if specified
|
|
257
|
+
if out_dir_arg:
|
|
258
|
+
cmd.extend(["--out_dir", out_dir_arg])
|
|
259
|
+
|
|
242
260
|
# Add cache directory
|
|
243
261
|
cmd.extend(["--cache", self.cache_dir])
|
|
244
262
|
|
|
@@ -259,7 +277,9 @@ class BoltzProcessor:
|
|
|
259
277
|
# Handle MSA server option
|
|
260
278
|
if msa_found:
|
|
261
279
|
if use_msa_server_in_opts:
|
|
262
|
-
additional_args = [
|
|
280
|
+
additional_args = [
|
|
281
|
+
arg for arg in additional_args if arg != "--use_msa_server"
|
|
282
|
+
]
|
|
263
283
|
logger.info("Removed --use_msa_server since local MSA was found")
|
|
264
284
|
else:
|
|
265
285
|
if not use_msa_server_in_opts:
|
|
@@ -270,6 +290,11 @@ class BoltzProcessor:
|
|
|
270
290
|
if not num_workers_in_opts:
|
|
271
291
|
cmd.extend(["--num_workers", str(self.num_workers)])
|
|
272
292
|
|
|
293
|
+
# Disable cuequivariance kernels - they require cuda-devel image
|
|
294
|
+
# which is much larger. The performance difference is modest.
|
|
295
|
+
# TODO: Consider switching to cuda-devel base image if perf is critical
|
|
296
|
+
cmd.append("--no_kernels")
|
|
297
|
+
|
|
273
298
|
cmd.extend(additional_args)
|
|
274
299
|
|
|
275
300
|
# Log and run command
|
|
@@ -305,7 +330,9 @@ class BoltzProcessor:
|
|
|
305
330
|
|
|
306
331
|
# Copy input config to output directory
|
|
307
332
|
try:
|
|
308
|
-
config_dest = os.path.join(
|
|
333
|
+
config_dest = os.path.join(
|
|
334
|
+
expected_output_dir, os.path.basename(input_file)
|
|
335
|
+
)
|
|
309
336
|
shutil.copy2(input_file, config_dest)
|
|
310
337
|
logger.debug(f"Copied input config to results: {config_dest}")
|
|
311
338
|
except Exception as e:
|
|
@@ -346,7 +373,9 @@ def main():
|
|
|
346
373
|
input_files = sorted(input_dir.glob("*.yaml"))
|
|
347
374
|
|
|
348
375
|
if index >= len(input_files):
|
|
349
|
-
logger.error(
|
|
376
|
+
logger.error(
|
|
377
|
+
f"Index {index} out of range. Found {len(input_files)} input files."
|
|
378
|
+
)
|
|
350
379
|
raise RuntimeError(f"Index {index} out of range")
|
|
351
380
|
|
|
352
381
|
input_file = input_files[index]
|
|
@@ -256,9 +256,7 @@ class BatchClient:
|
|
|
256
256
|
# List child jobs with FAILED status
|
|
257
257
|
try:
|
|
258
258
|
paginator = self.batch.get_paginator("list_jobs")
|
|
259
|
-
for page in paginator.paginate(
|
|
260
|
-
arrayJobId=job_id, jobStatus="FAILED"
|
|
261
|
-
):
|
|
259
|
+
for page in paginator.paginate(arrayJobId=job_id, jobStatus="FAILED"):
|
|
262
260
|
for job_summary in page.get("jobSummaryList", []):
|
|
263
261
|
# Extract array index from job ID (format: jobId:index)
|
|
264
262
|
child_id = job_summary.get("jobId", "")
|
|
@@ -361,7 +359,9 @@ class BatchClient:
|
|
|
361
359
|
timestamp = event.get("timestamp", 0)
|
|
362
360
|
message = event.get("message", "")
|
|
363
361
|
# Format timestamp
|
|
364
|
-
dt = time.strftime(
|
|
362
|
+
dt = time.strftime(
|
|
363
|
+
"%Y-%m-%d %H:%M:%S", time.localtime(timestamp / 1000)
|
|
364
|
+
)
|
|
365
365
|
messages.append(f"[{dt}] {message}")
|
|
366
366
|
|
|
367
367
|
except ClientError as e:
|
|
@@ -289,7 +289,9 @@ def _run_local_mode(input_path: Path):
|
|
|
289
289
|
click.echo(click.style("Error: No YAML files found", fg="red"), err=True)
|
|
290
290
|
raise SystemExit(1)
|
|
291
291
|
|
|
292
|
-
click.echo(
|
|
292
|
+
click.echo(
|
|
293
|
+
f"Found {len(yaml_files)} YAML files, will process: {yaml_files[0].name}"
|
|
294
|
+
)
|
|
293
295
|
|
|
294
296
|
# Create a temporary job directory structure
|
|
295
297
|
temp_job_dir = input_path / ".local_boltz_job"
|
|
@@ -311,14 +313,25 @@ def _run_local_mode(input_path: Path):
|
|
|
311
313
|
click.echo()
|
|
312
314
|
|
|
313
315
|
cmd = [
|
|
314
|
-
"docker",
|
|
315
|
-
"
|
|
316
|
-
"
|
|
317
|
-
"
|
|
318
|
-
"
|
|
319
|
-
"-
|
|
320
|
-
"
|
|
321
|
-
"-
|
|
316
|
+
"docker",
|
|
317
|
+
"run",
|
|
318
|
+
"--rm",
|
|
319
|
+
"--gpus",
|
|
320
|
+
"all",
|
|
321
|
+
"-v",
|
|
322
|
+
"/primordial:/primordial",
|
|
323
|
+
"-v",
|
|
324
|
+
f"{temp_job_dir}:{temp_job_dir}",
|
|
325
|
+
"-e",
|
|
326
|
+
f"JOB_DIR={temp_job_dir}",
|
|
327
|
+
"-e",
|
|
328
|
+
"AWS_BATCH_JOB_ARRAY_INDEX=0",
|
|
329
|
+
"-e",
|
|
330
|
+
"BOLTZ_CACHE=/primordial/.cache/boltz",
|
|
331
|
+
"-e",
|
|
332
|
+
"MSA_DIR=/primordial/.cache/msas",
|
|
333
|
+
"-e",
|
|
334
|
+
"BOLTZ_OPTIONS=--no_kernels",
|
|
322
335
|
DEFAULT_IMAGE_URI,
|
|
323
336
|
]
|
|
324
337
|
|
|
@@ -329,13 +342,17 @@ def _run_local_mode(input_path: Path):
|
|
|
329
342
|
result = subprocess.run(cmd)
|
|
330
343
|
if result.returncode != 0:
|
|
331
344
|
click.echo(
|
|
332
|
-
click.style(
|
|
345
|
+
click.style(
|
|
346
|
+
f"Container exited with code {result.returncode}", fg="red"
|
|
347
|
+
),
|
|
333
348
|
err=True,
|
|
334
349
|
)
|
|
335
350
|
raise SystemExit(result.returncode)
|
|
336
351
|
|
|
337
352
|
# Check for output
|
|
338
|
-
output_dirs =
|
|
353
|
+
output_dirs = (
|
|
354
|
+
list(temp_output_dir.iterdir()) if temp_output_dir.exists() else []
|
|
355
|
+
)
|
|
339
356
|
if output_dirs:
|
|
340
357
|
click.echo()
|
|
341
358
|
click.echo(click.style("✓ Prediction complete!", fg="green"))
|
|
@@ -347,7 +364,9 @@ def _run_local_mode(input_path: Path):
|
|
|
347
364
|
|
|
348
365
|
except FileNotFoundError:
|
|
349
366
|
click.echo(
|
|
350
|
-
click.style(
|
|
367
|
+
click.style(
|
|
368
|
+
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
369
|
+
),
|
|
351
370
|
err=True,
|
|
352
371
|
)
|
|
353
372
|
raise SystemExit(1)
|
|
@@ -33,8 +33,14 @@ def cancel(job_id, force, base_path):
|
|
|
33
33
|
raise SystemExit(1)
|
|
34
34
|
|
|
35
35
|
# Check if job can be cancelled
|
|
36
|
-
if manifest.status in (
|
|
37
|
-
|
|
36
|
+
if manifest.status in (
|
|
37
|
+
JobStatus.SUCCEEDED,
|
|
38
|
+
JobStatus.FINALIZED,
|
|
39
|
+
JobStatus.CANCELLED,
|
|
40
|
+
):
|
|
41
|
+
click.echo(
|
|
42
|
+
f"Job {job_id} is already {manifest.status.value}, cannot cancel.", err=True
|
|
43
|
+
)
|
|
38
44
|
raise SystemExit(1)
|
|
39
45
|
|
|
40
46
|
# Get Batch job ID
|
|
@@ -53,10 +59,14 @@ def cancel(job_id, force, base_path):
|
|
|
53
59
|
|
|
54
60
|
if force:
|
|
55
61
|
click.echo(f"Terminating job {batch_job_id}...")
|
|
56
|
-
client.terminate_job(
|
|
62
|
+
client.terminate_job(
|
|
63
|
+
batch_job_id, reason="Terminated by user via dh batch cancel --force"
|
|
64
|
+
)
|
|
57
65
|
else:
|
|
58
66
|
click.echo(f"Cancelling job {batch_job_id}...")
|
|
59
|
-
client.cancel_job(
|
|
67
|
+
client.cancel_job(
|
|
68
|
+
batch_job_id, reason="Cancelled by user via dh batch cancel"
|
|
69
|
+
)
|
|
60
70
|
|
|
61
71
|
# Update manifest
|
|
62
72
|
manifest.status = JobStatus.CANCELLED
|
|
@@ -70,9 +80,13 @@ def cancel(job_id, force, base_path):
|
|
|
70
80
|
if retry_info.batch_job_id:
|
|
71
81
|
try:
|
|
72
82
|
if force:
|
|
73
|
-
client.terminate_job(
|
|
83
|
+
client.terminate_job(
|
|
84
|
+
retry_info.batch_job_id, reason="Parent job cancelled"
|
|
85
|
+
)
|
|
74
86
|
else:
|
|
75
|
-
client.cancel_job(
|
|
87
|
+
client.cancel_job(
|
|
88
|
+
retry_info.batch_job_id, reason="Parent job cancelled"
|
|
89
|
+
)
|
|
76
90
|
click.echo(f" Also cancelled retry job: {retry_info.retry_id}")
|
|
77
91
|
except BatchError:
|
|
78
92
|
pass # Retry job may already be complete
|
|
@@ -32,14 +32,42 @@ DEFAULT_IMAGE_URI = "074735440724.dkr.ecr.us-east-1.amazonaws.com/dayhoff:embed-
|
|
|
32
32
|
|
|
33
33
|
@click.command()
|
|
34
34
|
@click.argument("input_fasta", type=click.Path(exists=True))
|
|
35
|
-
@click.option(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
35
|
+
@click.option(
|
|
36
|
+
"--workers",
|
|
37
|
+
default=DEFAULT_WORKERS,
|
|
38
|
+
type=int,
|
|
39
|
+
help=f"Number of parallel workers [default: {DEFAULT_WORKERS}]",
|
|
40
|
+
)
|
|
41
|
+
@click.option(
|
|
42
|
+
"--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
|
|
43
|
+
)
|
|
44
|
+
@click.option(
|
|
45
|
+
"--seqs-per-chunk",
|
|
46
|
+
default=DEFAULT_SEQS_PER_CHUNK,
|
|
47
|
+
type=int,
|
|
48
|
+
help=f"Sequences per chunk [default: {DEFAULT_SEQS_PER_CHUNK}]",
|
|
49
|
+
)
|
|
50
|
+
@click.option(
|
|
51
|
+
"--local",
|
|
52
|
+
"run_local",
|
|
53
|
+
is_flag=True,
|
|
54
|
+
help="Run single chunk locally instead of Batch",
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
"--shell", "run_shell", is_flag=True, help="Drop into container shell for debugging"
|
|
58
|
+
)
|
|
40
59
|
@click.option("--dry-run", is_flag=True, help="Show plan without submitting")
|
|
41
60
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
42
|
-
def embed_t5(
|
|
61
|
+
def embed_t5(
|
|
62
|
+
input_fasta,
|
|
63
|
+
workers,
|
|
64
|
+
queue,
|
|
65
|
+
seqs_per_chunk,
|
|
66
|
+
run_local,
|
|
67
|
+
run_shell,
|
|
68
|
+
dry_run,
|
|
69
|
+
base_path,
|
|
70
|
+
):
|
|
43
71
|
"""Generate T5 protein embeddings for a FASTA file.
|
|
44
72
|
|
|
45
73
|
Splits the input FASTA into chunks and processes them in parallel using
|
|
@@ -115,7 +143,14 @@ def _split_fasta(input_path: Path, output_dir: Path, seqs_per_chunk: int) -> int
|
|
|
115
143
|
return num_chunks
|
|
116
144
|
|
|
117
145
|
|
|
118
|
-
def _submit_batch_job(
|
|
146
|
+
def _submit_batch_job(
|
|
147
|
+
input_path: Path,
|
|
148
|
+
workers: int,
|
|
149
|
+
queue: str,
|
|
150
|
+
seqs_per_chunk: int,
|
|
151
|
+
dry_run: bool,
|
|
152
|
+
base_path: str,
|
|
153
|
+
):
|
|
119
154
|
"""Submit embedding job to AWS Batch."""
|
|
120
155
|
# Count sequences
|
|
121
156
|
click.echo(f"Counting sequences in {input_path}...")
|
|
@@ -123,7 +158,9 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
|
|
|
123
158
|
click.echo(f"Found {num_sequences:,} sequences")
|
|
124
159
|
|
|
125
160
|
if num_sequences == 0:
|
|
126
|
-
click.echo(
|
|
161
|
+
click.echo(
|
|
162
|
+
click.style("Error: No sequences found in input file", fg="red"), err=True
|
|
163
|
+
)
|
|
127
164
|
raise SystemExit(1)
|
|
128
165
|
|
|
129
166
|
# Calculate chunks
|
|
@@ -223,7 +260,9 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
|
|
|
223
260
|
click.echo(f" Cancel: dh batch cancel {job_id}")
|
|
224
261
|
click.echo()
|
|
225
262
|
click.echo("After completion:")
|
|
226
|
-
click.echo(
|
|
263
|
+
click.echo(
|
|
264
|
+
f" Finalize: dh batch finalize {job_id} --output /primordial/embeddings.h5"
|
|
265
|
+
)
|
|
227
266
|
|
|
228
267
|
except BatchError as e:
|
|
229
268
|
manifest.status = JobStatus.FAILED
|
|
@@ -265,12 +304,19 @@ def _run_local_mode(input_path: Path):
|
|
|
265
304
|
click.echo()
|
|
266
305
|
|
|
267
306
|
cmd = [
|
|
268
|
-
"docker",
|
|
269
|
-
"
|
|
270
|
-
"
|
|
271
|
-
"
|
|
272
|
-
"
|
|
273
|
-
"-
|
|
307
|
+
"docker",
|
|
308
|
+
"run",
|
|
309
|
+
"--rm",
|
|
310
|
+
"--gpus",
|
|
311
|
+
"all",
|
|
312
|
+
"-v",
|
|
313
|
+
"/primordial:/primordial",
|
|
314
|
+
"-v",
|
|
315
|
+
f"{temp_job_dir}:{temp_job_dir}",
|
|
316
|
+
"-e",
|
|
317
|
+
f"JOB_DIR={temp_job_dir}",
|
|
318
|
+
"-e",
|
|
319
|
+
"AWS_BATCH_JOB_ARRAY_INDEX=0",
|
|
274
320
|
DEFAULT_IMAGE_URI,
|
|
275
321
|
]
|
|
276
322
|
|
|
@@ -281,7 +327,9 @@ def _run_local_mode(input_path: Path):
|
|
|
281
327
|
result = subprocess.run(cmd)
|
|
282
328
|
if result.returncode != 0:
|
|
283
329
|
click.echo(
|
|
284
|
-
click.style(
|
|
330
|
+
click.style(
|
|
331
|
+
f"Container exited with code {result.returncode}", fg="red"
|
|
332
|
+
),
|
|
285
333
|
err=True,
|
|
286
334
|
)
|
|
287
335
|
raise SystemExit(result.returncode)
|
|
@@ -303,7 +351,9 @@ def _run_local_mode(input_path: Path):
|
|
|
303
351
|
|
|
304
352
|
except FileNotFoundError:
|
|
305
353
|
click.echo(
|
|
306
|
-
click.style(
|
|
354
|
+
click.style(
|
|
355
|
+
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
356
|
+
),
|
|
307
357
|
err=True,
|
|
308
358
|
)
|
|
309
359
|
raise SystemExit(1)
|
|
@@ -318,13 +368,22 @@ def _run_shell_mode(input_path: Path):
|
|
|
318
368
|
input_dir = input_path.parent
|
|
319
369
|
|
|
320
370
|
cmd = [
|
|
321
|
-
"docker",
|
|
322
|
-
"
|
|
323
|
-
"
|
|
324
|
-
"-
|
|
325
|
-
"
|
|
326
|
-
"
|
|
327
|
-
"
|
|
371
|
+
"docker",
|
|
372
|
+
"run",
|
|
373
|
+
"--rm",
|
|
374
|
+
"-it",
|
|
375
|
+
"--gpus",
|
|
376
|
+
"all",
|
|
377
|
+
"-v",
|
|
378
|
+
"/primordial:/primordial",
|
|
379
|
+
"-v",
|
|
380
|
+
f"{input_dir}:/input",
|
|
381
|
+
"-e",
|
|
382
|
+
"JOB_DIR=/input",
|
|
383
|
+
"-e",
|
|
384
|
+
"AWS_BATCH_JOB_ARRAY_INDEX=0",
|
|
385
|
+
"--entrypoint",
|
|
386
|
+
"/bin/bash",
|
|
328
387
|
DEFAULT_IMAGE_URI,
|
|
329
388
|
]
|
|
330
389
|
|
|
@@ -335,7 +394,9 @@ def _run_shell_mode(input_path: Path):
|
|
|
335
394
|
subprocess.run(cmd)
|
|
336
395
|
except FileNotFoundError:
|
|
337
396
|
click.echo(
|
|
338
|
-
click.style(
|
|
397
|
+
click.style(
|
|
398
|
+
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
399
|
+
),
|
|
339
400
|
err=True,
|
|
340
401
|
)
|
|
341
402
|
raise SystemExit(1)
|
|
@@ -17,9 +17,18 @@ from ..manifest import (
|
|
|
17
17
|
|
|
18
18
|
@click.command()
|
|
19
19
|
@click.argument("job_id")
|
|
20
|
-
@click.option(
|
|
20
|
+
@click.option(
|
|
21
|
+
"--output",
|
|
22
|
+
required=True,
|
|
23
|
+
type=click.Path(),
|
|
24
|
+
help="Output path for combined results",
|
|
25
|
+
)
|
|
21
26
|
@click.option("--force", is_flag=True, help="Finalize even if some chunks failed")
|
|
22
|
-
@click.option(
|
|
27
|
+
@click.option(
|
|
28
|
+
"--keep-intermediates",
|
|
29
|
+
is_flag=True,
|
|
30
|
+
help="Don't delete job directory after finalizing",
|
|
31
|
+
)
|
|
23
32
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
24
33
|
def finalize(job_id, output, force, keep_intermediates, base_path):
|
|
25
34
|
"""Combine results and clean up job intermediates.
|
|
@@ -59,7 +68,9 @@ def finalize(job_id, output, force, keep_intermediates, base_path):
|
|
|
59
68
|
click.echo(f" dh batch retry {job_id}")
|
|
60
69
|
raise SystemExit(1)
|
|
61
70
|
click.echo()
|
|
62
|
-
click.echo(
|
|
71
|
+
click.echo(
|
|
72
|
+
click.style("Warning: Finalizing with incomplete chunks", fg="yellow")
|
|
73
|
+
)
|
|
63
74
|
|
|
64
75
|
# Update status
|
|
65
76
|
manifest.status = JobStatus.FINALIZING
|
|
@@ -132,24 +143,38 @@ def _finalize_embeddings(output_dir: Path, output_path: Path):
|
|
|
132
143
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
133
144
|
|
|
134
145
|
try:
|
|
135
|
-
from dayhoff_tools.h5 import
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
# Get list of h5 file paths as strings
|
|
140
|
-
h5_file_paths = [str(f) for f in h5_files]
|
|
141
|
-
combine_h5_files(
|
|
142
|
-
input_files=h5_file_paths,
|
|
143
|
-
output_file=str(output_path),
|
|
146
|
+
from dayhoff_tools.h5 import (
|
|
147
|
+
combine_h5_files,
|
|
148
|
+
deduplicate_h5_file,
|
|
149
|
+
optimize_protein_embedding_chunks,
|
|
144
150
|
)
|
|
151
|
+
import tempfile
|
|
152
|
+
|
|
153
|
+
if len(h5_files) == 1:
|
|
154
|
+
# Single file - just copy, no need to combine/dedup/optimize
|
|
155
|
+
click.echo("Single chunk - copying directly...")
|
|
156
|
+
shutil.copy2(h5_files[0], output_path)
|
|
157
|
+
else:
|
|
158
|
+
# Multiple files - combine, deduplicate, and optimize
|
|
159
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
160
|
+
combined_path = Path(tmpdir) / "combined.h5"
|
|
161
|
+
deduped_path = Path(tmpdir) / "deduped.h5"
|
|
162
|
+
|
|
163
|
+
# Combine H5 files
|
|
164
|
+
click.echo("Combining H5 files...")
|
|
165
|
+
h5_file_paths = [str(f) for f in h5_files]
|
|
166
|
+
combine_h5_files(
|
|
167
|
+
input_files=h5_file_paths,
|
|
168
|
+
output_file=str(combined_path),
|
|
169
|
+
)
|
|
145
170
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
171
|
+
# Deduplicate
|
|
172
|
+
click.echo("Deduplicating...")
|
|
173
|
+
deduplicate_h5_file(str(combined_path), str(deduped_path))
|
|
149
174
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
175
|
+
# Optimize chunks
|
|
176
|
+
click.echo("Optimizing chunks...")
|
|
177
|
+
optimize_protein_embedding_chunks(str(deduped_path), str(output_path))
|
|
153
178
|
|
|
154
179
|
click.echo(click.style("✓ H5 files combined successfully", fg="green"))
|
|
155
180
|
|
|
@@ -15,7 +15,9 @@ from .status import format_status, format_time_ago
|
|
|
15
15
|
help="Filter by status",
|
|
16
16
|
)
|
|
17
17
|
@click.option("--pipeline", help="Filter by pipeline type")
|
|
18
|
-
@click.option(
|
|
18
|
+
@click.option(
|
|
19
|
+
"--limit", default=20, type=int, help="Maximum number of jobs to show [default: 20]"
|
|
20
|
+
)
|
|
19
21
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
20
22
|
def list_jobs(user, status_filter, pipeline, limit, base_path):
|
|
21
23
|
"""List recent batch jobs.
|
|
@@ -10,7 +10,12 @@ from ..manifest import BATCH_JOBS_BASE, get_job_dir, load_manifest
|
|
|
10
10
|
@click.command()
|
|
11
11
|
@click.argument("job_id")
|
|
12
12
|
@click.option("--index", required=True, type=int, help="Array index to run")
|
|
13
|
-
@click.option(
|
|
13
|
+
@click.option(
|
|
14
|
+
"--shell",
|
|
15
|
+
"run_shell",
|
|
16
|
+
is_flag=True,
|
|
17
|
+
help="Drop into shell instead of running command",
|
|
18
|
+
)
|
|
14
19
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
15
20
|
def local(job_id, index, run_shell, base_path):
|
|
16
21
|
"""Run a job chunk locally for debugging.
|
|
@@ -54,13 +59,21 @@ def local(job_id, index, run_shell, base_path):
|
|
|
54
59
|
|
|
55
60
|
# Build Docker command
|
|
56
61
|
cmd = [
|
|
57
|
-
"docker",
|
|
58
|
-
"
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"-
|
|
63
|
-
"
|
|
62
|
+
"docker",
|
|
63
|
+
"run",
|
|
64
|
+
"--rm",
|
|
65
|
+
"--gpus",
|
|
66
|
+
"all",
|
|
67
|
+
"-v",
|
|
68
|
+
"/primordial:/primordial",
|
|
69
|
+
"-v",
|
|
70
|
+
f"{job_dir}:{job_dir}",
|
|
71
|
+
"-e",
|
|
72
|
+
f"AWS_BATCH_JOB_ARRAY_INDEX={index}",
|
|
73
|
+
"-e",
|
|
74
|
+
f"JOB_DIR={job_dir}",
|
|
75
|
+
"-e",
|
|
76
|
+
f"JOB_ID={job_id}",
|
|
64
77
|
]
|
|
65
78
|
|
|
66
79
|
if run_shell:
|
|
@@ -81,7 +94,9 @@ def local(job_id, index, run_shell, base_path):
|
|
|
81
94
|
result = subprocess.run(cmd)
|
|
82
95
|
if result.returncode != 0:
|
|
83
96
|
click.echo(
|
|
84
|
-
click.style(
|
|
97
|
+
click.style(
|
|
98
|
+
f"Container exited with code {result.returncode}", fg="red"
|
|
99
|
+
),
|
|
85
100
|
err=True,
|
|
86
101
|
)
|
|
87
102
|
raise SystemExit(result.returncode)
|
|
@@ -89,7 +104,9 @@ def local(job_id, index, run_shell, base_path):
|
|
|
89
104
|
click.echo(click.style("✓ Container completed successfully", fg="green"))
|
|
90
105
|
except FileNotFoundError:
|
|
91
106
|
click.echo(
|
|
92
|
-
click.style(
|
|
107
|
+
click.style(
|
|
108
|
+
"Error: Docker not found. Is Docker installed and running?", fg="red"
|
|
109
|
+
),
|
|
93
110
|
err=True,
|
|
94
111
|
)
|
|
95
112
|
raise SystemExit(1)
|
|
@@ -77,7 +77,9 @@ def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bo
|
|
|
77
77
|
click.echo(f" dh batch logs {batch_job_id.split('-')[0]} --failed")
|
|
78
78
|
click.echo()
|
|
79
79
|
click.echo("To view logs for a specific index:")
|
|
80
|
-
click.echo(
|
|
80
|
+
click.echo(
|
|
81
|
+
f" dh batch logs {batch_job_id.split('-')[0]} --index {failed_indices[0]}"
|
|
82
|
+
)
|
|
81
83
|
return
|
|
82
84
|
|
|
83
85
|
# Single job - show logs
|
|
@@ -94,7 +96,9 @@ def _show_job_logs(client: BatchClient, batch_job_id: str, tail: int, follow: bo
|
|
|
94
96
|
click.echo(click.style(f"Error fetching logs: {e}", fg="red"), err=True)
|
|
95
97
|
|
|
96
98
|
|
|
97
|
-
def _show_index_logs(
|
|
99
|
+
def _show_index_logs(
|
|
100
|
+
client: BatchClient, batch_job_id: str, index: int, tail: int, follow: bool
|
|
101
|
+
):
|
|
98
102
|
"""Show logs for a specific array index."""
|
|
99
103
|
child_job_id = f"{batch_job_id}:{index}"
|
|
100
104
|
|
|
@@ -19,7 +19,9 @@ from ..manifest import (
|
|
|
19
19
|
@click.command()
|
|
20
20
|
@click.argument("job_id")
|
|
21
21
|
@click.option("--indices", help="Specific indices to retry (comma-separated)")
|
|
22
|
-
@click.option(
|
|
22
|
+
@click.option(
|
|
23
|
+
"--dry-run", is_flag=True, help="Show what would be retried without submitting"
|
|
24
|
+
)
|
|
23
25
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
24
26
|
def retry(job_id, indices, dry_run, base_path):
|
|
25
27
|
"""Retry failed chunks of a batch job.
|
|
@@ -112,7 +114,9 @@ def retry(job_id, indices, dry_run, base_path):
|
|
|
112
114
|
click.echo(f" View logs: dh batch logs {job_id}")
|
|
113
115
|
|
|
114
116
|
except BatchError as e:
|
|
115
|
-
click.echo(
|
|
117
|
+
click.echo(
|
|
118
|
+
click.style(f"✗ Failed to submit retry job: {e}", fg="red"), err=True
|
|
119
|
+
)
|
|
116
120
|
raise SystemExit(1)
|
|
117
121
|
|
|
118
122
|
|
|
@@ -3,7 +3,12 @@
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
5
|
from ..aws_batch import BatchClient, BatchError
|
|
6
|
-
from ..manifest import
|
|
6
|
+
from ..manifest import (
|
|
7
|
+
BATCH_JOBS_BASE,
|
|
8
|
+
JobStatus,
|
|
9
|
+
list_jobs as list_manifests,
|
|
10
|
+
load_manifest,
|
|
11
|
+
)
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
def format_status(status: JobStatus) -> str:
|
|
@@ -125,8 +130,12 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
125
130
|
click.echo(f"Status: {format_status(manifest.status)}")
|
|
126
131
|
click.echo(f"Pipeline: {manifest.pipeline}")
|
|
127
132
|
click.echo(f"User: {manifest.user}")
|
|
128
|
-
click.echo(
|
|
129
|
-
|
|
133
|
+
click.echo(
|
|
134
|
+
f"Created: {manifest.created.isoformat()} ({format_time_ago(manifest.created)})"
|
|
135
|
+
)
|
|
136
|
+
click.echo(
|
|
137
|
+
f"Updated: {manifest.updated.isoformat()} ({format_time_ago(manifest.updated)})"
|
|
138
|
+
)
|
|
130
139
|
|
|
131
140
|
if manifest.input:
|
|
132
141
|
click.echo()
|
|
@@ -182,7 +191,9 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
182
191
|
click.echo(f" Retry: dh batch retry {job_id}")
|
|
183
192
|
elif manifest.status == JobStatus.SUCCEEDED:
|
|
184
193
|
click.echo("Next steps:")
|
|
185
|
-
click.echo(
|
|
194
|
+
click.echo(
|
|
195
|
+
f" Finalize: dh batch finalize {job_id} --output /primordial/output.h5"
|
|
196
|
+
)
|
|
186
197
|
|
|
187
198
|
|
|
188
199
|
def _show_array_status(batch_job_id: str):
|
|
@@ -205,10 +216,14 @@ def _show_array_status(batch_job_id: str):
|
|
|
205
216
|
if array_status.is_complete:
|
|
206
217
|
pct = array_status.success_rate * 100
|
|
207
218
|
color = "green" if pct == 100 else "yellow" if pct > 90 else "red"
|
|
208
|
-
click.echo(
|
|
219
|
+
click.echo(
|
|
220
|
+
f" Complete: {click.style(f'{pct:.1f}%', fg=color)} success rate"
|
|
221
|
+
)
|
|
209
222
|
else:
|
|
210
223
|
pct = array_status.completed / array_status.total * 100
|
|
211
|
-
click.echo(
|
|
224
|
+
click.echo(
|
|
225
|
+
f" Progress: {pct:.1f}% ({array_status.completed}/{array_status.total})"
|
|
226
|
+
)
|
|
212
227
|
|
|
213
228
|
except BatchError as e:
|
|
214
229
|
click.echo(f" (Could not fetch live status: {e})")
|
|
@@ -25,9 +25,13 @@ DEFAULT_QUEUE = "t4-1x-spot"
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
@click.command()
|
|
28
|
-
@click.option(
|
|
28
|
+
@click.option(
|
|
29
|
+
"-f", "--file", "config_file", type=click.Path(exists=True), help="Config file path"
|
|
30
|
+
)
|
|
29
31
|
@click.option("--command", help="Command to run (alternative to config file)")
|
|
30
|
-
@click.option(
|
|
32
|
+
@click.option(
|
|
33
|
+
"--queue", default=DEFAULT_QUEUE, help=f"Batch queue [default: {DEFAULT_QUEUE}]"
|
|
34
|
+
)
|
|
31
35
|
@click.option("--memory", default="30G", help="Memory limit (e.g., 30G)")
|
|
32
36
|
@click.option("--vcpus", default=8, type=int, help="Number of vCPUs")
|
|
33
37
|
@click.option("--gpus", default=1, type=int, help="Number of GPUs")
|
|
@@ -91,7 +95,9 @@ def submit(
|
|
|
91
95
|
# Override with command-line options
|
|
92
96
|
job_command = command or config.get("command")
|
|
93
97
|
if not job_command:
|
|
94
|
-
raise click.UsageError(
|
|
98
|
+
raise click.UsageError(
|
|
99
|
+
"Must specify --command or provide config file with 'command' field"
|
|
100
|
+
)
|
|
95
101
|
|
|
96
102
|
job_queue = queue if queue != DEFAULT_QUEUE else config.get("queue", queue)
|
|
97
103
|
job_memory = memory if memory != "30G" else config.get("memory", memory)
|
|
@@ -33,7 +33,9 @@ class InputConfig(BaseModel):
|
|
|
33
33
|
"""Configuration for job input."""
|
|
34
34
|
|
|
35
35
|
source: str = Field(..., description="Path to input file or directory")
|
|
36
|
-
num_sequences: int | None = Field(
|
|
36
|
+
num_sequences: int | None = Field(
|
|
37
|
+
None, description="Number of sequences (for FASTA)"
|
|
38
|
+
)
|
|
37
39
|
num_chunks: int | None = Field(None, description="Number of chunks created")
|
|
38
40
|
sequences_per_chunk: int | None = Field(None, description="Sequences per chunk")
|
|
39
41
|
|
|
@@ -6,7 +6,10 @@ from importlib.metadata import PackageNotFoundError, version
|
|
|
6
6
|
import typer
|
|
7
7
|
from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
|
|
8
8
|
from dayhoff_tools.cli.github_commands import gh_app
|
|
9
|
-
from dayhoff_tools.cli.engine1 import
|
|
9
|
+
from dayhoff_tools.cli.engine1 import (
|
|
10
|
+
engine_app as engine1_app,
|
|
11
|
+
studio_app as studio1_app,
|
|
12
|
+
)
|
|
10
13
|
from dayhoff_tools.cli.utility_commands import (
|
|
11
14
|
add_dependency,
|
|
12
15
|
build_and_upload_wheel,
|
|
@@ -70,6 +73,7 @@ app.add_typer(gcp_app, name="gcp", help="Manage GCP authentication and impersona
|
|
|
70
73
|
app.add_typer(aws_app, name="aws", help="Manage AWS SSO authentication.")
|
|
71
74
|
app.add_typer(gh_app, name="gh", help="Manage GitHub authentication.")
|
|
72
75
|
|
|
76
|
+
|
|
73
77
|
# Engine and Studio commands (v2 - new default with progress tracking)
|
|
74
78
|
# These use Click instead of Typer, so we need a passthrough wrapper
|
|
75
79
|
@app.command(
|
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
5
5
|
|
|
6
6
|
[project]
|
|
7
7
|
name = "dayhoff-tools"
|
|
8
|
-
version = "1.14.
|
|
8
|
+
version = "1.14.7"
|
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
|
10
10
|
authors = [
|
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_maintenance.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engine1/engine_management.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/api_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/engine_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/simulators/demo.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/ssh_config.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.14.6 → dayhoff_tools-1.14.7}/dayhoff_tools/cli/engines_studios/studio_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|