dayhoff-tools 1.15.0__tar.gz → 1.15.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/PKG-INFO +2 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/boltz.py +5 -5
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/cancel.py +6 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/clean.py +6 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/embed_t5.py +0 -1
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/finalize.py +39 -20
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/status.py +9 -3
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/submit.py +0 -1
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/utility_commands.py +0 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/embedders.py +2 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/fasta.py +2 -4
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/kegg.py +1 -3
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/structure.py +4 -4
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/pyproject.toml +2 -2
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/README.md +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/base.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/boltz.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/local.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/job_id.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/batch/manifest.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/shared.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/studio_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/github_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/processors.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/warehouse.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dayhoff-tools
|
|
3
|
-
Version: 1.15.
|
|
3
|
+
Version: 1.15.1
|
|
4
4
|
Summary: Common tools for all the repos at Dayhoff Labs
|
|
5
5
|
Author: Daniel Martin-Alarcon
|
|
6
6
|
Author-email: dma@dayhofflabs.com
|
|
@@ -40,8 +40,8 @@ Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
|
|
|
40
40
|
Requires-Dist: toml (>=0.10)
|
|
41
41
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
|
|
42
42
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
|
|
43
|
-
Requires-Dist: transformers (==4.36.2) ; extra == "full"
|
|
44
43
|
Requires-Dist: transformers (>=4.36.2) ; extra == "embedders"
|
|
44
|
+
Requires-Dist: transformers (>=4.36.2) ; extra == "full"
|
|
45
45
|
Requires-Dist: typer (>=0.9.0)
|
|
46
46
|
Requires-Dist: tzdata (>=2025.2)
|
|
47
47
|
Description-Content-Type: text/markdown
|
|
@@ -235,7 +235,7 @@ class BoltzProcessor:
|
|
|
235
235
|
# Determine output directory
|
|
236
236
|
# Boltz always creates boltz_results_{input_name} inside --out_dir
|
|
237
237
|
input_base = os.path.splitext(os.path.basename(input_file))[0]
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
if output_dir is None:
|
|
240
240
|
# No output_dir specified, boltz creates in current directory
|
|
241
241
|
expected_output_dir = f"boltz_results_{input_base}"
|
|
@@ -244,7 +244,9 @@ class BoltzProcessor:
|
|
|
244
244
|
# output_dir specified - use its parent for --out_dir
|
|
245
245
|
# and expect boltz_results_{input_base} inside it
|
|
246
246
|
parent_dir = os.path.dirname(output_dir)
|
|
247
|
-
expected_output_dir = os.path.join(
|
|
247
|
+
expected_output_dir = os.path.join(
|
|
248
|
+
parent_dir, f"boltz_results_{input_base}"
|
|
249
|
+
)
|
|
248
250
|
out_dir_arg = parent_dir if parent_dir else None
|
|
249
251
|
|
|
250
252
|
logger.info(f"Running Boltz prediction for {input_file}")
|
|
@@ -455,9 +457,7 @@ def main():
|
|
|
455
457
|
completed += 1
|
|
456
458
|
continue
|
|
457
459
|
|
|
458
|
-
logger.info(
|
|
459
|
-
f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}..."
|
|
460
|
-
)
|
|
460
|
+
logger.info(f"[{file_idx + 1}/{len(my_files)}] Processing {file_stem}...")
|
|
461
461
|
|
|
462
462
|
try:
|
|
463
463
|
# Determine output directory
|
|
@@ -152,9 +152,13 @@ def _cancel_retry_job(manifest, retry_id: str, force: bool, base_path: str):
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
click.echo()
|
|
155
|
-
click.echo(
|
|
155
|
+
click.echo(
|
|
156
|
+
click.style(f"✓ Retry job {retry_id} cancelled successfully", fg="green")
|
|
157
|
+
)
|
|
156
158
|
click.echo(f"Parent job: {manifest.job_id}")
|
|
157
159
|
|
|
158
160
|
except BatchError as e:
|
|
159
|
-
click.echo(
|
|
161
|
+
click.echo(
|
|
162
|
+
click.style(f"✗ Failed to cancel retry job: {e}", fg="red"), err=True
|
|
163
|
+
)
|
|
160
164
|
raise SystemExit(1)
|
|
@@ -20,7 +20,9 @@ from .status import format_time_ago, _aws_status_to_job_status
|
|
|
20
20
|
default=7,
|
|
21
21
|
help="Only clean jobs older than N days [default: 7]",
|
|
22
22
|
)
|
|
23
|
-
@click.option(
|
|
23
|
+
@click.option(
|
|
24
|
+
"--dry-run", is_flag=True, help="Show what would be cleaned without deleting"
|
|
25
|
+
)
|
|
24
26
|
@click.option("--force", is_flag=True, help="Delete without confirmation")
|
|
25
27
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
26
28
|
def clean(user, older_than, dry_run, force, base_path):
|
|
@@ -81,7 +83,9 @@ def clean(user, older_than, dry_run, force, base_path):
|
|
|
81
83
|
live_statuses = client.get_job_statuses_batch(batch_job_ids)
|
|
82
84
|
except BatchError as e:
|
|
83
85
|
click.echo(f"Error: Could not fetch status from AWS Batch: {e}", err=True)
|
|
84
|
-
click.echo(
|
|
86
|
+
click.echo(
|
|
87
|
+
"Cannot safely clean jobs without knowing their status.", err=True
|
|
88
|
+
)
|
|
85
89
|
raise SystemExit(1)
|
|
86
90
|
|
|
87
91
|
# Find jobs that are safe to clean (SUCCEEDED or FAILED)
|
|
@@ -40,7 +40,9 @@ from ..manifest import (
|
|
|
40
40
|
help="Skip deduplication step (use if input has no duplicates)",
|
|
41
41
|
)
|
|
42
42
|
@click.option("--base-path", default=BATCH_JOBS_BASE, help="Base path for job data")
|
|
43
|
-
def finalize(
|
|
43
|
+
def finalize(
|
|
44
|
+
job_id, output, force, keep_intermediates, full_output, skip_dedup, base_path
|
|
45
|
+
):
|
|
44
46
|
"""Combine results and clean up job intermediates.
|
|
45
47
|
|
|
46
48
|
For embedding jobs, combines H5 files into a single output file.
|
|
@@ -238,14 +240,18 @@ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool =
|
|
|
238
240
|
if skip_dedup:
|
|
239
241
|
# Skip dedup - optimize directly from combined
|
|
240
242
|
click.echo("Optimizing chunks...")
|
|
241
|
-
optimize_protein_embedding_chunks(
|
|
243
|
+
optimize_protein_embedding_chunks(
|
|
244
|
+
str(combined_path), str(output_path)
|
|
245
|
+
)
|
|
242
246
|
else:
|
|
243
247
|
# Full pipeline: combine -> dedup -> optimize
|
|
244
248
|
deduped_path = Path(tmpdir) / "deduped.h5"
|
|
245
249
|
click.echo("Deduplicating...")
|
|
246
250
|
deduplicate_h5_file(str(combined_path), str(deduped_path))
|
|
247
251
|
click.echo("Optimizing chunks...")
|
|
248
|
-
optimize_protein_embedding_chunks(
|
|
252
|
+
optimize_protein_embedding_chunks(
|
|
253
|
+
str(deduped_path), str(output_path)
|
|
254
|
+
)
|
|
249
255
|
|
|
250
256
|
click.echo(click.style("✓ H5 files combined successfully", fg="green"))
|
|
251
257
|
|
|
@@ -269,7 +275,7 @@ def _finalize_embeddings(output_dir: Path, output_path: Path, skip_dedup: bool =
|
|
|
269
275
|
|
|
270
276
|
def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = False):
|
|
271
277
|
"""Move Boltz output to destination.
|
|
272
|
-
|
|
278
|
+
|
|
273
279
|
Args:
|
|
274
280
|
output_dir: Source directory containing boltz_results_* folders
|
|
275
281
|
output_path: Destination directory for outputs
|
|
@@ -277,20 +283,24 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
277
283
|
extract only essential files (CIF structures and confidence JSON).
|
|
278
284
|
"""
|
|
279
285
|
# Find all output directories (one per complex)
|
|
280
|
-
complex_dirs = [
|
|
286
|
+
complex_dirs = [
|
|
287
|
+
d
|
|
288
|
+
for d in output_dir.iterdir()
|
|
289
|
+
if d.is_dir() and d.name.startswith("boltz_results_")
|
|
290
|
+
]
|
|
281
291
|
|
|
282
292
|
if not complex_dirs:
|
|
283
293
|
click.echo("No output directories found.", err=True)
|
|
284
294
|
raise SystemExit(1)
|
|
285
295
|
|
|
286
296
|
click.echo(f"Found {len(complex_dirs)} structure predictions")
|
|
287
|
-
|
|
297
|
+
|
|
288
298
|
if full_output:
|
|
289
299
|
click.echo("Mode: Copying full output (all files)")
|
|
290
300
|
else:
|
|
291
301
|
click.echo("Mode: Extracting essential files only (CIF + confidence JSON)")
|
|
292
302
|
click.echo(" Use --full-output to copy all files")
|
|
293
|
-
|
|
303
|
+
|
|
294
304
|
# Confirm before proceeding
|
|
295
305
|
click.echo()
|
|
296
306
|
if not click.confirm(f"Copy results to {output_path}?"):
|
|
@@ -302,16 +312,16 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
302
312
|
|
|
303
313
|
copied_count = 0
|
|
304
314
|
skipped_count = 0
|
|
305
|
-
|
|
315
|
+
|
|
306
316
|
for complex_dir in complex_dirs:
|
|
307
317
|
complex_name = complex_dir.name.replace("boltz_results_", "")
|
|
308
318
|
dest = output_path / complex_name
|
|
309
|
-
|
|
319
|
+
|
|
310
320
|
if dest.exists():
|
|
311
321
|
click.echo(f" Skipping {complex_name} (already exists)")
|
|
312
322
|
skipped_count += 1
|
|
313
323
|
continue
|
|
314
|
-
|
|
324
|
+
|
|
315
325
|
if full_output:
|
|
316
326
|
# Copy entire directory
|
|
317
327
|
shutil.copytree(complex_dir, dest)
|
|
@@ -320,44 +330,53 @@ def _finalize_boltz(output_dir: Path, output_path: Path, full_output: bool = Fal
|
|
|
320
330
|
# Extract only essential files
|
|
321
331
|
_extract_essential_boltz_files(complex_dir, dest, complex_name)
|
|
322
332
|
click.echo(f" Extracted {complex_name} (essential files)")
|
|
323
|
-
|
|
333
|
+
|
|
324
334
|
copied_count += 1
|
|
325
335
|
|
|
326
336
|
click.echo()
|
|
327
337
|
if skipped_count > 0:
|
|
328
|
-
click.echo(
|
|
338
|
+
click.echo(
|
|
339
|
+
f"Copied {copied_count} predictions, skipped {skipped_count} existing"
|
|
340
|
+
)
|
|
329
341
|
else:
|
|
330
|
-
click.echo(
|
|
342
|
+
click.echo(
|
|
343
|
+
click.style(
|
|
344
|
+
f"✓ Copied {copied_count} structure predictions successfully",
|
|
345
|
+
fg="green",
|
|
346
|
+
)
|
|
347
|
+
)
|
|
331
348
|
|
|
332
349
|
|
|
333
350
|
def _extract_essential_boltz_files(source_dir: Path, dest_dir: Path, complex_name: str):
|
|
334
351
|
"""Extract only essential files from Boltz output.
|
|
335
|
-
|
|
352
|
+
|
|
336
353
|
Essential files are:
|
|
337
354
|
- predictions/*/*.cif (structure files)
|
|
338
355
|
- predictions/*/confidence_*.json (confidence metrics)
|
|
339
|
-
|
|
356
|
+
|
|
340
357
|
Args:
|
|
341
358
|
source_dir: Source boltz_results_* directory
|
|
342
359
|
dest_dir: Destination directory to create
|
|
343
360
|
complex_name: Name of the complex (for better error messages)
|
|
344
361
|
"""
|
|
345
362
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
346
|
-
|
|
363
|
+
|
|
347
364
|
predictions_dir = source_dir / "predictions"
|
|
348
365
|
if not predictions_dir.exists():
|
|
349
|
-
click.echo(
|
|
366
|
+
click.echo(
|
|
367
|
+
f" Warning: No predictions directory found for {complex_name}", err=True
|
|
368
|
+
)
|
|
350
369
|
return
|
|
351
|
-
|
|
370
|
+
|
|
352
371
|
# Find all subdirectories in predictions/ (usually just one named after the complex)
|
|
353
372
|
for pred_subdir in predictions_dir.iterdir():
|
|
354
373
|
if not pred_subdir.is_dir():
|
|
355
374
|
continue
|
|
356
|
-
|
|
375
|
+
|
|
357
376
|
# Copy CIF files (structures)
|
|
358
377
|
for cif_file in pred_subdir.glob("*.cif"):
|
|
359
378
|
shutil.copy2(cif_file, dest_dir / cif_file.name)
|
|
360
|
-
|
|
379
|
+
|
|
361
380
|
# Copy confidence JSON files
|
|
362
381
|
for json_file in pred_subdir.glob("confidence_*.json"):
|
|
363
382
|
shutil.copy2(json_file, dest_dir / json_file.name)
|
|
@@ -264,7 +264,9 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
264
264
|
reslice_info = ""
|
|
265
265
|
if retry.reslice_prefix:
|
|
266
266
|
reslice_info = f" (resliced to {retry.reslice_count} chunks)"
|
|
267
|
-
click.echo(
|
|
267
|
+
click.echo(
|
|
268
|
+
f" - {retry.retry_id}: {len(retry.indices)} indices{reslice_info}"
|
|
269
|
+
)
|
|
268
270
|
click.echo(f" Indices: {retry.indices}")
|
|
269
271
|
if retry.batch_job_id:
|
|
270
272
|
# Show brief status for retry job
|
|
@@ -273,7 +275,9 @@ def _show_job_details(job_id: str, base_path: str):
|
|
|
273
275
|
array_status = client.get_array_job_status(retry.batch_job_id)
|
|
274
276
|
if array_status.is_complete:
|
|
275
277
|
pct = array_status.success_rate * 100
|
|
276
|
-
color =
|
|
278
|
+
color = (
|
|
279
|
+
"green" if pct == 100 else "yellow" if pct > 90 else "red"
|
|
280
|
+
)
|
|
277
281
|
click.echo(
|
|
278
282
|
f" Status: Complete - {click.style(f'{pct:.0f}%', fg=color)} "
|
|
279
283
|
f"({array_status.succeeded}/{array_status.total} succeeded)"
|
|
@@ -364,7 +368,9 @@ def _show_retry_details(manifest, retry_id: str):
|
|
|
364
368
|
click.echo("Retry Config:")
|
|
365
369
|
click.echo(f" Indices: {retry_info.indices}")
|
|
366
370
|
if retry_info.reslice_prefix:
|
|
367
|
-
click.echo(
|
|
371
|
+
click.echo(
|
|
372
|
+
f" Reslice: {retry_info.reslice_prefix} ({retry_info.reslice_count} chunks)"
|
|
373
|
+
)
|
|
368
374
|
else:
|
|
369
375
|
click.echo(f" Reslice: No (retrying original chunks)")
|
|
370
376
|
|
|
@@ -179,8 +179,8 @@ class H5Reformatter(Processor):
|
|
|
179
179
|
def embedding_file_to_df(self, file_name: str) -> pd.DataFrame:
|
|
180
180
|
with h5py.File(file_name, "r") as f:
|
|
181
181
|
gene_names = list(f.keys())
|
|
182
|
-
Xg = [f[key][()] for key in gene_names] # type:ignore
|
|
183
|
-
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type:ignore
|
|
182
|
+
Xg = [f[key][()] for key in gene_names] # type: ignore
|
|
183
|
+
return pd.DataFrame(np.asmatrix(Xg), index=gene_names) # type: ignore
|
|
184
184
|
|
|
185
185
|
def write_df_to_h5(self, df: pd.DataFrame, filename: str, description: str) -> None:
|
|
186
186
|
"""
|
|
@@ -857,14 +857,12 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
|
|
|
857
857
|
# Create the SQLite database and table
|
|
858
858
|
print("Creating SQLite database...")
|
|
859
859
|
with sqlite3.connect(db_file) as conn:
|
|
860
|
-
conn.execute(
|
|
861
|
-
"""
|
|
860
|
+
conn.execute("""
|
|
862
861
|
CREATE TABLE IF NOT EXISTS proteins (
|
|
863
862
|
protein_id TEXT PRIMARY KEY,
|
|
864
863
|
sequence TEXT NOT NULL
|
|
865
864
|
)
|
|
866
|
-
"""
|
|
867
|
-
)
|
|
865
|
+
""")
|
|
868
866
|
print("Database created successfully.")
|
|
869
867
|
|
|
870
868
|
# Estimate number of records for progress bar
|
|
@@ -25,9 +25,7 @@ def get_ko2gene_df(db: str, ko: str | list[str] | None = None) -> pd.DataFrame:
|
|
|
25
25
|
query = (
|
|
26
26
|
f"SELECT gene,ko FROM gene_to_ko WHERE ko IN ({','.join('?' * len(ko))})"
|
|
27
27
|
)
|
|
28
|
-
result_df = pd.read_sql_query(
|
|
29
|
-
query, conn, params=ko # type:ignore
|
|
30
|
-
)
|
|
28
|
+
result_df = pd.read_sql_query(query, conn, params=ko) # type: ignore
|
|
31
29
|
else:
|
|
32
30
|
query = f"SELECT gene,ko FROM gene_to_ko"
|
|
33
31
|
result_df = pd.read_sql_query(query, conn)
|
|
@@ -409,10 +409,10 @@ class PDBFolderProcessor:
|
|
|
409
409
|
def _get_pdb_files(self) -> list[str]:
|
|
410
410
|
"""
|
|
411
411
|
Get a list of PDB files in the specified directory, optionally filtered by ID set.
|
|
412
|
-
Files are sorted
|
|
412
|
+
Files are sorted alphabetically to ensure consistent, reproducible processing order.
|
|
413
413
|
|
|
414
414
|
Returns:
|
|
415
|
-
List of PDB file names sorted
|
|
415
|
+
List of PDB file names sorted alphabetically.
|
|
416
416
|
"""
|
|
417
417
|
print("Scanning directory for PDB files...")
|
|
418
418
|
pdb_files = [
|
|
@@ -424,8 +424,8 @@ class PDBFolderProcessor:
|
|
|
424
424
|
f for f in pdb_files if self._extract_id_from_filename(f) in self.id_set
|
|
425
425
|
]
|
|
426
426
|
|
|
427
|
-
# Sort files
|
|
428
|
-
pdb_files.sort(
|
|
427
|
+
# Sort files alphabetically for deterministic, reproducible order
|
|
428
|
+
pdb_files.sort()
|
|
429
429
|
|
|
430
430
|
print(f"Found {len(pdb_files)} PDB files")
|
|
431
431
|
return pdb_files
|
|
@@ -11,7 +11,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
11
11
|
|
|
12
12
|
[project]
|
|
13
13
|
name = "dayhoff-tools"
|
|
14
|
-
version = "1.15.
|
|
14
|
+
version = "1.15.1"
|
|
15
15
|
description = "Common tools for all the repos at Dayhoff Labs"
|
|
16
16
|
authors = [
|
|
17
17
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
|
@@ -43,7 +43,7 @@ full = [
|
|
|
43
43
|
"sentencepiece>=0.2.0",
|
|
44
44
|
"sqlalchemy>=2.0.40,<3.0.0",
|
|
45
45
|
"tqdm>=4.67.1",
|
|
46
|
-
"transformers
|
|
46
|
+
"transformers>=4.36.2", # Relaxed: exact pin broke conda envs with huggingface-hub>=1.0
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
# Embedding models (requires torch - user must install separately for their platform)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_maintenance.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engine1/engine_management.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/api_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md
RENAMED
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/engine_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/simulators/demo.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/ssh_config.py
RENAMED
|
File without changes
|
{dayhoff_tools-1.15.0 → dayhoff_tools-1.15.1}/dayhoff_tools/cli/engines_studios/studio_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|