dayhoff-tools 1.1.8__tar.gz → 1.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/PKG-INFO +1 -1
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/processors.py +53 -15
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/pyproject.toml +1 -1
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/README.md +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/utility_commands.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.1.8 → dayhoff_tools-1.1.9}/dayhoff_tools/warehouse.py +0 -0
@@ -221,12 +221,16 @@ class MMSeqsProfileProcessor(Processor):
|
|
221
221
|
|
222
222
|
The input_file is the target FASTA. The query FASTA is provided
|
223
223
|
during initialization.
|
224
|
+
The method creates an output directory (e.g., {target_stem})
|
225
|
+
which contains the result files, now named meaningfully using the target stem
|
226
|
+
(e.g., {target_stem}_results.m8 and {target_stem}_hits.fasta).
|
224
227
|
|
225
228
|
Args:
|
226
229
|
input_file: Path to the input target FASTA file.
|
227
230
|
|
228
231
|
Returns:
|
229
|
-
Path to the output directory
|
232
|
+
Path to the output directory (e.g., {target_stem}) containing
|
233
|
+
the meaningfully named result files.
|
230
234
|
|
231
235
|
Raises:
|
232
236
|
subprocess.CalledProcessError: If any MMseqs2 command fails.
|
@@ -237,10 +241,11 @@ class MMSeqsProfileProcessor(Processor):
|
|
237
241
|
|
238
242
|
input_file_path = Path(input_file).resolve() # Ensure absolute path
|
239
243
|
target_fasta_filename = input_file_path.name
|
244
|
+
target_fasta_stem = input_file_path.stem # Get stem for naming
|
240
245
|
|
241
246
|
# Create a unique base directory for this run's outputs and temp files
|
242
247
|
# This directory will be returned and subsequently uploaded by the Operator
|
243
|
-
run_base_dir_name = f"
|
248
|
+
run_base_dir_name = f"{target_fasta_stem}" # Use stem as the dir name
|
244
249
|
run_base_dir = Path(run_base_dir_name).resolve()
|
245
250
|
run_base_dir.mkdir(parents=True, exist_ok=True)
|
246
251
|
logger.info(f"Created run base directory: {run_base_dir}")
|
@@ -259,9 +264,13 @@ class MMSeqsProfileProcessor(Processor):
|
|
259
264
|
mmseqs_temp_dir.mkdir(parents=True, exist_ok=True)
|
260
265
|
logger.info(f"Created MMseqs2 temporary directory: {mmseqs_temp_dir}")
|
261
266
|
|
262
|
-
# Define output file paths
|
263
|
-
|
264
|
-
|
267
|
+
# Define INTERMEDIATE output file paths within mmseqs_temp_dir
|
268
|
+
intermediate_results_m8_file = mmseqs_temp_dir / "results.m8"
|
269
|
+
intermediate_hits_fasta_file = mmseqs_temp_dir / "results.fasta"
|
270
|
+
|
271
|
+
# Define FINAL output file paths within run_base_dir, using target stem
|
272
|
+
final_results_m8_file = run_base_dir / f"{target_fasta_stem}_results.m8"
|
273
|
+
final_hits_fasta_file = run_base_dir / f"{target_fasta_stem}_hits.fasta"
|
265
274
|
|
266
275
|
# --- MMseqs2 Workflow Paths (intermediate files in mmseqs_temp_dir) ---
|
267
276
|
query_db = mmseqs_temp_dir / "queryDB"
|
@@ -381,7 +390,7 @@ class MMSeqsProfileProcessor(Processor):
|
|
381
390
|
run_base_dir,
|
382
391
|
)
|
383
392
|
|
384
|
-
# 8. Convert results to tabular format (M8)
|
393
|
+
# 8. Convert results to tabular format (M8) -> to intermediate file
|
385
394
|
self._run_mmseqs_command(
|
386
395
|
[
|
387
396
|
"mmseqs",
|
@@ -389,7 +398,7 @@ class MMSeqsProfileProcessor(Processor):
|
|
389
398
|
str(profile_db), # Query DB used for search (profileDB)
|
390
399
|
str(target_db),
|
391
400
|
str(result_db),
|
392
|
-
str(
|
401
|
+
str(intermediate_results_m8_file), # Output M8 file to temp dir
|
393
402
|
"--threads",
|
394
403
|
self.num_threads,
|
395
404
|
],
|
@@ -404,26 +413,55 @@ class MMSeqsProfileProcessor(Processor):
|
|
404
413
|
run_base_dir,
|
405
414
|
)
|
406
415
|
|
407
|
-
# 10. Convert hit sequences to FASTA
|
416
|
+
# 10. Convert hit sequences to FASTA -> to intermediate file
|
408
417
|
self._run_mmseqs_command(
|
409
|
-
[
|
418
|
+
[
|
419
|
+
"mmseqs",
|
420
|
+
"convert2fasta",
|
421
|
+
str(hits_db),
|
422
|
+
str(intermediate_hits_fasta_file),
|
423
|
+
],
|
410
424
|
"Convert hits to FASTA",
|
411
425
|
run_base_dir,
|
412
426
|
)
|
413
427
|
|
414
428
|
logger.info(
|
415
|
-
f"MMseqs2 workflow completed successfully.
|
429
|
+
f"MMseqs2 workflow completed successfully. Intermediate outputs in {mmseqs_temp_dir}"
|
416
430
|
)
|
417
431
|
|
432
|
+
# Move and rename final output files from mmseqs_temp_dir to run_base_dir
|
433
|
+
if intermediate_results_m8_file.exists():
|
434
|
+
shutil.move(
|
435
|
+
str(intermediate_results_m8_file), str(final_results_m8_file)
|
436
|
+
)
|
437
|
+
logger.info(f"Moved and renamed M8 results to {final_results_m8_file}")
|
438
|
+
else:
|
439
|
+
logger.warning(
|
440
|
+
f"Intermediate M8 file {intermediate_results_m8_file} not found. Creating empty target file."
|
441
|
+
)
|
442
|
+
final_results_m8_file.touch() # Create empty file in run_base_dir if not found
|
443
|
+
|
444
|
+
if intermediate_hits_fasta_file.exists():
|
445
|
+
shutil.move(
|
446
|
+
str(intermediate_hits_fasta_file), str(final_hits_fasta_file)
|
447
|
+
)
|
448
|
+
logger.info(f"Moved and renamed hits FASTA to {final_hits_fasta_file}")
|
449
|
+
else:
|
450
|
+
logger.warning(
|
451
|
+
f"Intermediate hits FASTA {intermediate_hits_fasta_file} not found. Creating empty target file."
|
452
|
+
)
|
453
|
+
final_hits_fasta_file.touch() # Create empty file in run_base_dir if not found
|
454
|
+
|
418
455
|
finally:
|
419
|
-
# Clean up the MMseqs2 temporary directory
|
456
|
+
# Clean up the MMseqs2 temporary directory (mmseqs_tmp) which contains intermediate DBs etc.
|
420
457
|
if mmseqs_temp_dir.exists():
|
421
458
|
shutil.rmtree(mmseqs_temp_dir)
|
422
459
|
logger.info(
|
423
460
|
f"Cleaned up MMseqs2 temporary directory: {mmseqs_temp_dir}"
|
424
461
|
)
|
425
|
-
# The
|
426
|
-
#
|
427
|
-
# by the Operator when run_base_dir is deleted after upload.
|
462
|
+
# The local_target_file (copy of input) inside run_base_dir will be cleaned up
|
463
|
+
# by the Operator when run_base_dir itself is deleted after upload.
|
428
464
|
|
429
|
-
return str(
|
465
|
+
return str(
|
466
|
+
run_base_dir
|
467
|
+
) # Return the path to the directory containing meaningfully named results
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
5
5
|
|
6
6
|
[project]
|
7
7
|
name = "dayhoff-tools"
|
8
|
-
version = "1.1.
|
8
|
+
version = "1.1.9"
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
10
10
|
authors = [
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|