dayhoff-tools 1.14.15__tar.gz → 1.15.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/PKG-INFO +2 -2
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/boltz.py +5 -5
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/cancel.py +6 -2
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/clean.py +6 -2
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -1
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/finalize.py +39 -20
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/status.py +9 -3
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/submit.py +0 -1
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/main.py +2 -59
- dayhoff_tools-1.15.1/dayhoff_tools/cli/utility_commands.py +260 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/embedders.py +2 -2
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/fasta.py +2 -4
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/kegg.py +1 -3
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/structure.py +4 -4
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/pyproject.toml +2 -2
- dayhoff_tools-1.14.15/dayhoff_tools/cli/utility_commands.py +0 -1150
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/README.md +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/base.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/boltz.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/local.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/job_id.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/manifest.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/shared.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/github_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.14.15 → dayhoff_tools-1.15.1}/dayhoff_tools/warehouse.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dayhoff-tools
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.15.1
|
|
4
4
|
Summary: Common tools for all the repos at Dayhoff Labs
|
|
5
5
|
Author: Daniel Martin-Alarcon
|
|
6
6
|
Author-email: dma@dayhofflabs.com
|
|
@@ -40,8 +40,8 @@ Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
|
|
|
40
40
|
Requires-Dist: toml (>=0.10)
|
|
41
41
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
|
|
42
42
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
|
|
43
|
-
Requires-Dist: transformers (==4.36.2) ; extra == "full"
|
|
44
43
|
Requires-Dist: transformers (>=4.36.2) ; extra == "embedders"
|
|
44
|
+
Requires-Dist: transformers (>=4.36.2) ; extra == "full"
|
|
45
45
|
Requires-Dist: typer (>=0.9.0)
|
|
46
46
|
Requires-Dist: tzdata (>=2025.2)
|
|
47
47
|
Description-Content-Type: text/markdown
|
|
@@ -235,7 +235,7 @@ class BoltzProcessor:
|
|
|
235
235
|
# Determine output directory
|
|
236
236
|
# Boltz always creates boltz_results_{input_name} inside --out_dir
|
|
237
237
|
input_base = os.path.splitext(os.path.basename(input_file))[0]
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
if output_dir is None:
|
|
240
240
|
# No output_dir specified, boltz creates in current directory
|
|
241
241
|
expected_output_dir = f"boltz_results_{input_base}"
|
|
@@ -244,7 +244,9 @@ class BoltzProcessor:
|
|
|
244
244
|
# output_dir specified - use its parent for --out_dir
|
|
245
245
|
# and expect boltz_results_{input_base} inside it
|
|
246
246
|
parent_dir = os.path.dirname(output_dir)
|
|
247
|
-
expected_output_dir = os.path.join(
|
|
247
|
+
expected_output_dir = os.path.join(
|
|
248
|
+
parent_dir, f"boltz_results_{input_base}"
|
|
249
|
+
)
|
|
248
250
|
out_dir_arg = parent_dir if parent_dir else None
|
|
249
251
|
|
|
250
252
|
logger.info(f"Running Boltz prediction for {input_file}")
|
|
@@ -455,9 +457,7 @@ def main():
|
|
|
455
457
|
completed += 1
|
|
456
458
|
continue
|
|
457
459
|
|
|
458
|
-
logger.info(
|
|
459
|
-
f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}..."
|
|
460
|
-
)
|
|
460
|
+
logger.info(f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}...")
|
|
461
461
|
|
|
462
462
|
try:
|
|
463
463
|
# Determine output directory
|
|
@@ -152,9 +152,13 @@ def _cancel_retry_job(manifest, retry_id: str, force: bool, base_path: str):
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
click.echo()
|
|
155
|
-
click.echo(
|
|
155
|
+
click.echo(
|
|
156
|
+
click.style(f"✓ Retry job {retry_id} cancelled successfully", fg="green")
|
|
157
|
+
)
|
|
156
158
|
click.echo(f"Parent job: {manifest.job_id}")
|
|
157
159
|
|
|
158
160
|
except BatchError as e:
|
|
159
|
-
click.echo(
|
|
161
|
+
click.echo(
|
|
162
|
+
click.style(f"✗ Failed to cancel retry job: {e}", fg="red"), err=True
|
|
163
|
+
)
|
|
160
164
|
raise SystemExit(1)
|
|
@@ -20,7 +20,9 @@ from .status import format_time_ago, _aws_status_to_job_status
|
|
|
20
20
|
default=7,
|
|
21
21
|
help="Only clean jobs older than N days [default: 7]",
|
|
22
22
|
)
|
|
23
|
-
@click.option(
|
|
23
|
+
@click.option(
|
|
24
|
+
"--dry-run", is_flag=True, help="Show what would be cleaned without deleting"
|
|
25
|
+
)
|
|
24
26
|
@click.option("--force", is_flag=True, help="Delete without confirmation")
|
|
25
27
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
26
28
|
def clean(user, older_than, dry_run, force, base_path):
|
|
@@ -81,7 +83,9 @@ def clean(user, older_than, dry_run, force, base_path):
|
|
|
81
83
|
live_statuses = client.get_job_statuses_batch(batch_job_ids)
|
|
82
84
|
except BatchError as e:
|
|
83
85
|
click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
|
|
84
|
-
click.echo(
|
|
86
|
+
click.echo(
|
|
87
|
+
"Cannot safely clean jobs without knowing their status.", err=True
|
|
88
|
+
)
|
|
85
89
|
raise SystemExit(1)
|
|
86
90
|
|
|
87
91
|
# Find jobs that are safe to clean (SUCCEEDED or FAILED)
|
|
@@ -40,7 +40,9 @@ from ..manifest import (
|
|
|
40
40
|
help="Skip deduplication step (use if input has no duplicates)",
|
|
41
41
|
)
|
|
42
42
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
43
|
-
def finalize(
|
|
43
|
+
def finalize(
|
|
44
|
+
job_id, output, force, keep_intermediates, full_output, skip_dedup, base_path
|
|
45
|
+
):
|
|
44
46
|
"""Combine results and clean up job intermediates.
|
|
45
47
|
|
|
46
48
|
For embedding jobs, combines H5 files into a single output file.
|
|
@@ -238,14 +240,18 @@ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool =
|
|
|
238
240
|
if skip_dedup:
|
|
239
241
|
# Skip dedup - optimize directly from combined
|
|
240
242
|
click.echo("Optimizing chunks...")
|
|
241
|
-
optimize_protein_embedding_chunks(
|
|
243
|
+
optimize_protein_embedding_chunks(
|
|
244
|
+
str(combined_path), str(output_path)
|
|
245
|
+
)
|
|
242
246
|
else:
|
|
243
247
|
# Full pipeline: combine -> dedup -> optimize
|
|
244
248
|
deduped_path = Path(tmpdir) / "deduped.h5"
|
|
245
249
|
click.echo("Deduplicating...")
|
|
246
250
|
deduplicate_h5_file(str(combined_path), str(deduped_path))
|
|
247
251
|
click.echo("Optimizing chunks...")
|
|
248
|
-
optimize_protein_embedding_chunks(
|
|
252
|
+
optimize_protein_embedding_chunks(
|
|
253
|
+
str(deduped_path), str(output_path)
|
|
254
|
+
)
|
|
249
255
|
|
|
250
256
|
click.echo(click.style("✓ H5 files combined successfully", fg="green"))
|
|
251
257
|
|
|
@@ -269,7 +275,7 @@ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool =
|
|
|
269
275
|
|
|
270
276
|
def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = False):
|
|
271
277
|
"""Move Boltz output to destination.
|
|
272
|
-
|
|
278
|
+
|
|
273
279
|
Args:
|
|
274
280
|
output_dir: Source directory containing boltz_results_* folders
|
|
275
281
|
output_path: Destination directory for outputs
|
|
@@ -277,20 +283,24 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
277
283
|
extract only essential files (CIF structures and confidence JSON).
|
|
278
284
|
"""
|
|
279
285
|
# Find all output directories (one per complex)
|
|
280
|
-
complex_dirs = [
|
|
286
|
+
complex_dirs = [
|
|
287
|
+
d
|
|
288
|
+
for d in output_dir.iterdir()
|
|
289
|
+
if d.is_dir() and d.name.startswith("boltz_results_")
|
|
290
|
+
]
|
|
281
291
|
|
|
282
292
|
if not complex_dirs:
|
|
283
293
|
click.echo("No output directories found.", err=True)
|
|
284
294
|
raise SystemExit(1)
|
|
285
295
|
|
|
286
296
|
click.echo(f"Found {len(complex_dirs)} structure predictions")
|
|
287
|
-
|
|
297
|
+
|
|
288
298
|
if full_output:
|
|
289
299
|
click.echo("Mode: Copying full output (all files)")
|
|
290
300
|
else:
|
|
291
301
|
click.echo("Mode: Extracting essential files only (CIF + confidence JSON)")
|
|
292
302
|
click.echo(" Use --full-output to copy all files")
|
|
293
|
-
|
|
303
|
+
|
|
294
304
|
# Confirm before proceeding
|
|
295
305
|
click.echo()
|
|
296
306
|
if not click.confirm(f"Copy results to {output_path}?"):
|
|
@@ -302,16 +312,16 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
302
312
|
|
|
303
313
|
copied_count = 0
|
|
304
314
|
skipped_count = 0
|
|
305
|
-
|
|
315
|
+
|
|
306
316
|
for complex_dir in complex_dirs:
|
|
307
317
|
complex_name = complex_dir.name.replace("boltz_results_", "")
|
|
308
318
|
dest = output_path / complex_name
|
|
309
|
-
|
|
319
|
+
|
|
310
320
|
if dest.exists():
|
|
311
321
|
click.echo(f" Skipping {complex_name} (already exists)")
|
|
312
322
|
skipped_count += 1
|
|
313
323
|
continue
|
|
314
|
-
|
|
324
|
+
|
|
315
325
|
if full_output:
|
|
316
326
|
# Copy entire directory
|
|
317
327
|
shutil.copytree(complex_dir, dest)
|
|
@@ -320,44 +330,53 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
320
330
|
# Extract only essential files
|
|
321
331
|
_extract_essential_boltz_files(complex_dir, dest, complex_name)
|
|
322
332
|
click.echo(f" Extracted {complex_name} (essential files)")
|
|
323
|
-
|
|
333
|
+
|
|
324
334
|
copied_count += 1
|
|
325
335
|
|
|
326
336
|
click.echo()
|
|
327
337
|
if skipped_count > 0:
|
|
328
|
-
click.echo(
|
|
338
|
+
click.echo(
|
|
339
|
+
f"Copied {copied_count} predictions, skipped {skipped_count} existing"
|
|
340
|
+
)
|
|
329
341
|
else:
|
|
330
|
-
click.echo(
|
|
342
|
+
click.echo(
|
|
343
|
+
click.style(
|
|
344
|
+
f"✓ Copied {copied_count} structure predictions successfully",
|
|
345
|
+
fg="green",
|
|
346
|
+
)
|
|
347
|
+
)
|
|
331
348
|
|
|
332
349
|
|
|
333
350
|
def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_name: str):
|
|
334
351
|
"""Extract only essential files from Boltz output.
|
|
335
|
-
|
|
352
|
+
|
|
336
353
|
Essential files are:
|
|
337
354
|
- predictions/*/*.cif (structure files)
|
|
338
355
|
- predictions/*/confidence_*.json (confidence metrics)
|
|
339
|
-
|
|
356
|
+
|
|
340
357
|
Args:
|
|
341
358
|
source_dir: Source boltz_results_* directory
|
|
342
359
|
dest_dir: Destination directory to create
|
|
343
360
|
complex_name: Name of the complex (for better error messages)
|
|
344
361
|
"""
|
|
345
362
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
346
|
-
|
|
363
|
+
|
|
347
364
|
predictions_dir = source_dir / "predictions"
|
|
348
365
|
if not predictions_dir.exists():
|
|
349
|
-
click.echo(
|
|
366
|
+
click.echo(
|
|
367
|
+
f" Warning: No predictions directory found for {complex_name}", err=True
|
|
368
|
+
)
|
|
350
369
|
return
|
|
351
|
-
|
|
370
|
+
|
|
352
371
|
# Find all subdirectories in predictions/ (usually just one named after the complex)
|
|
353
372
|
for pred_subdir in predictions_dir.iterdir():
|
|
354
373
|
if not pred_subdir.is_dir():
|
|
355
374
|
continue
|
|
356
|
-
|
|
375
|
+
|
|
357
376
|
# Copy CIF files (structures)
|
|
358
377
|
for cif_file in pred_subdir.glob("*.cif"):
|
|
359
378
|
shutil.copy2(cif_file, dest_dir / cif_file.name)
|
|
360
|
-
|
|
379
|
+
|
|
361
380
|
# Copy confidence JSON files
|
|
362
381
|
for json_file in pred_subdir.glob("confidence_*.json"):
|
|
363
382
|
shutil.copy2(json_file, dest_dir / json_file.name)
|
|
@@ -264,7 +264,9 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
264
264
|
reslice_info = ""
|
|
265
265
|
if retry.reslice_prefix:
|
|
266
266
|
reslice_info = f" (resliced to {retry.reslice_count} chunks)"
|
|
267
|
-
click.echo(
|
|
267
|
+
click.echo(
|
|
268
|
+
f" - {retry.retry_id}: {len(retry.indices)} indices{reslice_info}"
|
|
269
|
+
)
|
|
268
270
|
click.echo(f" Indices: {retry.indices}")
|
|
269
271
|
if retry.batch_job_id:
|
|
270
272
|
# Show brief status for retry job
|
|
@@ -273,7 +275,9 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
273
275
|
array_status = client.get_array_job_status(retry.batch_job_id)
|
|
274
276
|
if array_status.is_complete:
|
|
275
277
|
pct = array_status.success_rate * 100
|
|
276
|
-
color =
|
|
278
|
+
color = (
|
|
279
|
+
"green" if pct == 100 else "yellow" if pct > 90 else "red"
|
|
280
|
+
)
|
|
277
281
|
click.echo(
|
|
278
282
|
f" Status: Complete - {click.style(f'{pct:.0f}%', fg=color)} "
|
|
279
283
|
f"({array_status.succeeded}/{array_status.total} succeeded)"
|
|
@@ -364,7 +368,9 @@ def _show_retry_details(manifest, retry_id: str):
|
|
|
364
368
|
click.echo("Retry Config:")
|
|
365
369
|
click.echo(f" Indices: {retry_info.indices}")
|
|
366
370
|
if retry_info.reslice_prefix:
|
|
367
|
-
click.echo(
|
|
371
|
+
click.echo(
|
|
372
|
+
f" Reslice: {retry_info.reslice_prefix} ({retry_info.reslice_count} chunks)"
|
|
373
|
+
)
|
|
368
374
|
else:
|
|
369
375
|
click.echo(f" Reslice: No (retrying original chunks)")
|
|
370
376
|
|
|
@@ -5,19 +5,13 @@ from importlib.metadata import PackageNotFoundError, version
|
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
7
|
from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
|
|
8
|
+
from dayhoff_tools.cli.engine1 import engine_app as engine1_app
|
|
9
|
+
from dayhoff_tools.cli.engine1 import studio_app as studio1_app
|
|
8
10
|
from dayhoff_tools.cli.github_commands import gh_app
|
|
9
|
-
from dayhoff_tools.cli.engine1 import (
|
|
10
|
-
engine_app as engine1_app,
|
|
11
|
-
studio_app as studio1_app,
|
|
12
|
-
)
|
|
13
11
|
from dayhoff_tools.cli.utility_commands import (
|
|
14
|
-
add_dependency,
|
|
15
12
|
build_and_upload_wheel,
|
|
16
13
|
delete_local_branch,
|
|
17
|
-
remove_dependency,
|
|
18
|
-
sync_with_toml,
|
|
19
14
|
test_github_actions_locally,
|
|
20
|
-
update_dependencies,
|
|
21
15
|
)
|
|
22
16
|
from dayhoff_tools.warehouse import (
|
|
23
17
|
_warn_if_gcp_default_sa,
|
|
@@ -48,19 +42,6 @@ app = typer.Typer(
|
|
|
48
42
|
# Utility commands
|
|
49
43
|
app.command("clean")(delete_local_branch)
|
|
50
44
|
|
|
51
|
-
# Dependency Management
|
|
52
|
-
app.command(
|
|
53
|
-
"tomlsync",
|
|
54
|
-
help="Sync environment with platform-specific TOML manifest (install/update dependencies).",
|
|
55
|
-
)(sync_with_toml)
|
|
56
|
-
app.command("add", help="Add a dependency to all platform manifests.")(add_dependency)
|
|
57
|
-
app.command("remove", help="Remove a dependency from all platform manifests.")(
|
|
58
|
-
remove_dependency
|
|
59
|
-
)
|
|
60
|
-
app.command("update", help="Update dayhoff-tools (or all deps) and sync environment.")(
|
|
61
|
-
update_dependencies
|
|
62
|
-
)
|
|
63
|
-
|
|
64
45
|
# Other Utilities
|
|
65
46
|
app.command("gha")(test_github_actions_locally)
|
|
66
47
|
app.command("wadd")(add_to_warehouse_typer)
|
|
@@ -183,44 +164,6 @@ def build_and_upload_wheel_command(
|
|
|
183
164
|
build_and_upload_wheel(bump_part=bump)
|
|
184
165
|
|
|
185
166
|
|
|
186
|
-
# Use lazy loading for slow-loading swarm commands
|
|
187
|
-
@app.command("reset")
|
|
188
|
-
def reset_wrapper(
|
|
189
|
-
firestore_collection: str = typer.Option(prompt=True),
|
|
190
|
-
old_status: str = typer.Option(default="failed", prompt=True),
|
|
191
|
-
new_status: str = typer.Option(default="available", prompt=True),
|
|
192
|
-
delete_old: bool = typer.Option(default=True, prompt=True),
|
|
193
|
-
):
|
|
194
|
-
"""Find all the documents in the database with a given status, and
|
|
195
|
-
make a new document with the same name and a new status."""
|
|
196
|
-
from dayhoff_tools.cli.swarm_commands import reset_failed_cards
|
|
197
|
-
|
|
198
|
-
reset_failed_cards(firestore_collection, old_status, new_status, delete_old)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
@app.command("zombie")
|
|
202
|
-
def zombie_wrapper(
|
|
203
|
-
firestore_collection: str = typer.Option(prompt=True),
|
|
204
|
-
delete_old: bool = typer.Option(default=True, prompt=True),
|
|
205
|
-
minutes_threshold: int = typer.Option(default=60, prompt=True),
|
|
206
|
-
):
|
|
207
|
-
"""Find all the documents in the database with status "assigned", and "last_updated"
|
|
208
|
-
older than a specified threshold, and make a new "available" document for them."""
|
|
209
|
-
from dayhoff_tools.cli.swarm_commands import reset_zombie_cards
|
|
210
|
-
|
|
211
|
-
reset_zombie_cards(firestore_collection, delete_old, minutes_threshold)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
@app.command("status")
|
|
215
|
-
def status_wrapper(
|
|
216
|
-
firestore_collection: str = typer.Argument(),
|
|
217
|
-
):
|
|
218
|
-
"""Count the various statuses of items in a given collection."""
|
|
219
|
-
from dayhoff_tools.cli.swarm_commands import get_firestore_collection_status
|
|
220
|
-
|
|
221
|
-
get_firestore_collection_status(firestore_collection)
|
|
222
|
-
|
|
223
|
-
|
|
224
167
|
# Deployment commands - use lazy loading but preserve argument passing
|
|
225
168
|
@app.command("deploy")
|
|
226
169
|
def deploy_command(
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""CLI commands common to all repos."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import toml
|
|
11
|
+
import typer
|
|
12
|
+
|
|
13
|
+
# Import cloud helper lazily inside functions to avoid heavy deps at module load
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_github_actions_locally():
|
|
17
|
+
"""Run the script test_pytest_in_github_actions_container.sh.sh."""
|
|
18
|
+
script_path = ".devcontainer/scripts/test_pytest_in_github_actions_container.sh"
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
subprocess.check_call(["bash", script_path])
|
|
22
|
+
print("Script ran successfully!")
|
|
23
|
+
except subprocess.CalledProcessError as e:
|
|
24
|
+
print(f"Error occurred while running the script: {e}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def delete_local_branch(branch_name: str, folder_path: str):
|
|
28
|
+
"""Delete a local Git branch after fetching with pruning.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
branch_name: Name of the branch to delete
|
|
32
|
+
folder_path: Path to the git repository folder
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
# Store current working directory
|
|
36
|
+
original_dir = os.getcwd()
|
|
37
|
+
|
|
38
|
+
# Change to the specified directory
|
|
39
|
+
os.chdir(folder_path)
|
|
40
|
+
print(f"Changed to directory: {folder_path}")
|
|
41
|
+
|
|
42
|
+
# Delete the specified branch
|
|
43
|
+
delete_branch_cmd = ["git", "branch", "-D", branch_name]
|
|
44
|
+
subprocess.run(delete_branch_cmd, check=True)
|
|
45
|
+
print(f"Deleted branch: {branch_name}")
|
|
46
|
+
|
|
47
|
+
# Fetch changes from the remote repository and prune obsolete branches
|
|
48
|
+
fetch_prune_cmd = ["git", "fetch", "-p"]
|
|
49
|
+
subprocess.run(fetch_prune_cmd, check=True)
|
|
50
|
+
print("Fetched changes and pruned obsolete branches")
|
|
51
|
+
|
|
52
|
+
except subprocess.CalledProcessError as e:
|
|
53
|
+
print(f"Error occurred while running Git commands: {e}")
|
|
54
|
+
finally:
|
|
55
|
+
# Always return to the original directory
|
|
56
|
+
os.chdir(original_dir)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_current_version_from_toml(file_path="pyproject.toml"):
|
|
60
|
+
"""Reads the version from a pyproject.toml file."""
|
|
61
|
+
try:
|
|
62
|
+
with open(file_path, "r") as f:
|
|
63
|
+
content = f.read()
|
|
64
|
+
version_match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE)
|
|
65
|
+
if version_match:
|
|
66
|
+
return version_match.group(1)
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Could not find version string in {file_path}")
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
raise FileNotFoundError(f"{file_path} not found.")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
raise e
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def build_and_upload_wheel(bump_part: str = "patch"):
|
|
76
|
+
"""Build a Python wheel and upload to PyPI using UV.
|
|
77
|
+
|
|
78
|
+
Automatically increments the version number in pyproject.toml before building
|
|
79
|
+
based on the bump_part argument ('major', 'minor', 'patch').
|
|
80
|
+
|
|
81
|
+
Expects PyPI authentication to be configured via the environment variable:
|
|
82
|
+
- UV_PUBLISH_TOKEN
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
bump_part (str): The part of the version to bump. Defaults to 'patch'.
|
|
86
|
+
"""
|
|
87
|
+
if bump_part not in ["major", "minor", "patch"]:
|
|
88
|
+
print(
|
|
89
|
+
f"Error: Invalid bump_part '{bump_part}'. Must be 'major', 'minor', or 'patch'."
|
|
90
|
+
)
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# ANSI color codes
|
|
94
|
+
BLUE = "\033[94m"
|
|
95
|
+
RESET = "\033[0m"
|
|
96
|
+
|
|
97
|
+
# --- Authentication Setup ---
|
|
98
|
+
token = os.environ.get("UV_PUBLISH_TOKEN")
|
|
99
|
+
|
|
100
|
+
if not token:
|
|
101
|
+
print("Error: PyPI authentication not configured.")
|
|
102
|
+
print(
|
|
103
|
+
"Please set the UV_PUBLISH_TOKEN environment variable with your PyPI API token."
|
|
104
|
+
)
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
# Build the command with token authentication
|
|
108
|
+
# IMPORTANT: Mask token for printing
|
|
109
|
+
publish_cmd_safe_print = ["uv", "publish", "--token", "*****"]
|
|
110
|
+
publish_cmd = ["uv", "publish", "--token", token]
|
|
111
|
+
print("Using UV_PUBLISH_TOKEN for authentication.")
|
|
112
|
+
|
|
113
|
+
# Use standard pyproject.toml
|
|
114
|
+
pyproject_path = "pyproject.toml"
|
|
115
|
+
if not Path(pyproject_path).exists():
|
|
116
|
+
print("Error: pyproject.toml not found in current directory.")
|
|
117
|
+
return
|
|
118
|
+
current_version = None # Initialize in case the first try block fails
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
# --- Clean dist directory ---
|
|
122
|
+
dist_dir = Path("dist")
|
|
123
|
+
if dist_dir.exists():
|
|
124
|
+
print(f"Removing existing build directory: {dist_dir}")
|
|
125
|
+
shutil.rmtree(dist_dir)
|
|
126
|
+
# --- End Clean dist directory ---
|
|
127
|
+
|
|
128
|
+
# --- Version Bumping Logic ---
|
|
129
|
+
current_version = get_current_version_from_toml(pyproject_path)
|
|
130
|
+
print(f"Current version: {current_version}")
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
major, minor, patch = map(int, current_version.split("."))
|
|
134
|
+
except ValueError:
|
|
135
|
+
print(
|
|
136
|
+
f"Error: Could not parse version '{current_version}'. Expected format X.Y.Z"
|
|
137
|
+
)
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
if bump_part == "major":
|
|
141
|
+
major += 1
|
|
142
|
+
minor = 0
|
|
143
|
+
patch = 0
|
|
144
|
+
elif bump_part == "minor":
|
|
145
|
+
minor += 1
|
|
146
|
+
patch = 0
|
|
147
|
+
else: # patch
|
|
148
|
+
patch += 1
|
|
149
|
+
|
|
150
|
+
new_version = f"{major}.{minor}.{patch}"
|
|
151
|
+
print(f"Bumping {bump_part} version to: {new_version}")
|
|
152
|
+
|
|
153
|
+
# Read pyproject.toml
|
|
154
|
+
with open(pyproject_path, "r") as f:
|
|
155
|
+
content = f.read()
|
|
156
|
+
|
|
157
|
+
# Replace the version string
|
|
158
|
+
pattern = re.compile(
|
|
159
|
+
f'^version\s*=\s*"{re.escape(current_version)}"', re.MULTILINE
|
|
160
|
+
)
|
|
161
|
+
new_content, num_replacements = pattern.subn(
|
|
162
|
+
f'version = "{new_version}"', content
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if num_replacements == 0:
|
|
166
|
+
print(
|
|
167
|
+
f"Error: Could not find 'version = \"{current_version}\"' in {pyproject_path}"
|
|
168
|
+
)
|
|
169
|
+
return # Exit before build/publish if version wasn't updated
|
|
170
|
+
if num_replacements > 1:
|
|
171
|
+
print(
|
|
172
|
+
f"Warning: Found multiple version lines for '{current_version}'. Only the first was updated."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Write the updated content back
|
|
176
|
+
with open(pyproject_path, "w") as f:
|
|
177
|
+
f.write(new_content)
|
|
178
|
+
print(f"Updated {pyproject_path} with version {new_version}")
|
|
179
|
+
|
|
180
|
+
# --- End Version Bumping Logic ---
|
|
181
|
+
|
|
182
|
+
# Build wheel and sdist
|
|
183
|
+
build_cmd = ["uv", "build"]
|
|
184
|
+
print(f"Running command: {BLUE}{' '.join(build_cmd)}{RESET}")
|
|
185
|
+
subprocess.run(build_cmd, check=True)
|
|
186
|
+
|
|
187
|
+
# Upload to PyPI
|
|
188
|
+
print(f"Running command: {BLUE}{' '.join(publish_cmd_safe_print)}{RESET}")
|
|
189
|
+
subprocess.run(publish_cmd, check=True)
|
|
190
|
+
|
|
191
|
+
print(f"Successfully built and uploaded version {new_version} to PyPI")
|
|
192
|
+
|
|
193
|
+
# Re-install DHT in Pixi environment when building from DHT itself
|
|
194
|
+
try:
|
|
195
|
+
proj_toml = toml.load(pyproject_path)
|
|
196
|
+
proj_name = proj_toml.get("project", {}).get("name")
|
|
197
|
+
if proj_name == "dayhoff-tools":
|
|
198
|
+
print("Re-installing dayhoff-tools into the Pixi environment...")
|
|
199
|
+
reinstall_cmd = ["pixi", "install"]
|
|
200
|
+
print(f"Running command: {BLUE}{' '.join(reinstall_cmd)}{RESET}")
|
|
201
|
+
subprocess.run(reinstall_cmd, check=True)
|
|
202
|
+
print("dayhoff-tools reinstalled in the Pixi environment.")
|
|
203
|
+
except subprocess.CalledProcessError as e:
|
|
204
|
+
print(f"Warning: Failed to reinstall dayhoff-tools locally: {e}")
|
|
205
|
+
except Exception:
|
|
206
|
+
pass # Not dayhoff-tools or couldn't read toml
|
|
207
|
+
|
|
208
|
+
except FileNotFoundError:
|
|
209
|
+
print(f"Error: {pyproject_path} not found.")
|
|
210
|
+
# No version change happened, so no rollback needed
|
|
211
|
+
except subprocess.CalledProcessError as e:
|
|
212
|
+
print(f"Error during build/upload: {e}")
|
|
213
|
+
# Attempt to roll back version change only if it was bumped successfully
|
|
214
|
+
if current_version and new_version:
|
|
215
|
+
try:
|
|
216
|
+
print(
|
|
217
|
+
f"Attempting to revert version in {pyproject_path} back to {current_version}..."
|
|
218
|
+
)
|
|
219
|
+
with open(pyproject_path, "r") as f:
|
|
220
|
+
content_revert = f.read()
|
|
221
|
+
# Use new_version in pattern for reverting
|
|
222
|
+
pattern_revert = re.compile(
|
|
223
|
+
f'^version\s*=\s*"{re.escape(new_version)}"', re.MULTILINE
|
|
224
|
+
)
|
|
225
|
+
reverted_content, num_revert = pattern_revert.subn(
|
|
226
|
+
f'version = "{current_version}"', content_revert
|
|
227
|
+
)
|
|
228
|
+
if num_revert > 0:
|
|
229
|
+
with open(pyproject_path, "w") as f:
|
|
230
|
+
f.write(reverted_content)
|
|
231
|
+
print(f"Successfully reverted version in {pyproject_path}.")
|
|
232
|
+
else:
|
|
233
|
+
print(
|
|
234
|
+
f"Warning: Could not find version {new_version} to revert in {pyproject_path}."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
except Exception as revert_e:
|
|
238
|
+
print(f"Warning: Failed to revert version change: {revert_e}")
|
|
239
|
+
except Exception as e:
|
|
240
|
+
print(f"An unexpected error occurred: {e}")
|
|
241
|
+
# Attempt rollback if version was bumped
|
|
242
|
+
if current_version and "new_version" in locals() and new_version:
|
|
243
|
+
try:
|
|
244
|
+
print(f"Attempting to revert version back to {current_version}...")
|
|
245
|
+
with open(pyproject_path, "r") as f:
|
|
246
|
+
content_revert = f.read()
|
|
247
|
+
pattern_revert = re.compile(
|
|
248
|
+
f'^version\\s*=\\s*"{re.escape(new_version)}"', re.MULTILINE
|
|
249
|
+
)
|
|
250
|
+
reverted_content, num_revert = pattern_revert.subn(
|
|
251
|
+
f'version = "{current_version}"', content_revert
|
|
252
|
+
)
|
|
253
|
+
if num_revert > 0:
|
|
254
|
+
with open(pyproject_path, "w") as f:
|
|
255
|
+
f.write(reverted_content)
|
|
256
|
+
print("Successfully reverted version in pyproject.toml.")
|
|
257
|
+
else:
|
|
258
|
+
print(f"Warning: Could not find version {new_version} to revert.")
|
|
259
|
+
except Exception as revert_e:
|
|
260
|
+
print(f"Warning: Failed to revert version change: {revert_e}")
|
|
@@ -179,8 +179,8 @@ class H5Reformatter(Processor):
|
|
|
179
179
|
def embedding_file_to_df(self, file_name: str) -> pd.DataFrame:
|
|
180
180
|
with h5py.File(file_name, "r") as f:
|
|
181
181
|
gene_names = list(f.keys())
|
|
182
|
-
Xg = [f[key][()] for key in gene_names] # type:ignore
|
|
183
|
-
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type:ignore
|
|
182
|
+
Xg = [f[key][()] for key in gene_names] # type: ignore
|
|
183
|
+
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type: ignore
|
|
184
184
|
|
|
185
185
|
def write_df_to_h5(self, df: pd.DataFrame, filename: str, description: str) -> None:
|
|
186
186
|
"""
|
|
@@ -857,14 +857,12 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
|
|
|
857
857
|
# Create the SQLite database and table
|
|
858
858
|
print("Creating SQLite database...")
|
|
859
859
|
with sqlite3.connect(db_file) as conn:
|
|
860
|
-
conn.execute(
|
|
861
|
-
"""
|
|
860
|
+
conn.execute("""
|
|
862
861
|
CREATE TABLE IF NOT EXISTS proteins (
|
|
863
862
|
protein_id TEXT PRIMARY KEY,
|
|
864
863
|
sequence TEXT NOT NULL
|
|
865
864
|
)
|
|
866
|
-
"""
|
|
867
|
-
)
|
|
865
|
+
""")
|
|
868
866
|
print("Database created successfully.")
|
|
869
867
|
|
|
870
868
|
# Estimate number of records for progress bar
|