dayhoff-tools 1.1.7__tar.gz → 1.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/processors.py +54 -16
  3. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/pyproject.toml +1 -1
  4. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/README.md +0 -0
  5. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/__init__.py +0 -0
  6. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/chemistry/standardizer.py +0 -0
  7. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/chemistry/utils.py +0 -0
  8. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/__init__.py +0 -0
  9. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/cloud_commands.py +0 -0
  10. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/main.py +0 -0
  11. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/swarm_commands.py +0 -0
  12. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/cli/utility_commands.py +0 -0
  13. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/base.py +0 -0
  14. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  15. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  16. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  17. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/job_runner.py +0 -0
  18. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/deployment/swarm.py +0 -0
  19. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/embedders.py +0 -0
  20. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/fasta.py +0 -0
  21. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/file_ops.py +0 -0
  22. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/h5.py +0 -0
  23. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/gcp.py +0 -0
  24. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/gtdb.py +0 -0
  25. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/kegg.py +0 -0
  26. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/mmseqs.py +0 -0
  27. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/structure.py +0 -0
  28. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/intake/uniprot.py +0 -0
  29. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/logs.py +0 -0
  30. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/sqlite.py +0 -0
  31. {dayhoff_tools-1.1.7 → dayhoff_tools-1.1.9}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.7
3
+ Version: 1.1.9
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -160,7 +160,7 @@ class MMSeqsProfileProcessor(Processor):
160
160
  self.num_threads = str(num_threads) # MMseqs2 expects string for threads
161
161
 
162
162
  default_mmseqs_args = {
163
- "memory_limit_gb": "30",
163
+ "memory_limit_gb": "25",
164
164
  "evalue": "10",
165
165
  "sensitivity": "7.5",
166
166
  "max_seqs_search": "300",
@@ -221,12 +221,16 @@ class MMSeqsProfileProcessor(Processor):
221
221
 
222
222
  The input_file is the target FASTA. The query FASTA is provided
223
223
  during initialization.
224
+ The method creates an output directory (e.g., {target_stem})
225
+ which contains the result files, now named meaningfully using the target stem
226
+ (e.g., {target_stem}_results.m8 and {target_stem}_hits.fasta).
224
227
 
225
228
  Args:
226
229
  input_file: Path to the input target FASTA file.
227
230
 
228
231
  Returns:
229
- Path to the output directory containing results.m8 and results.fasta.
232
+ Path to the output directory (e.g., {target_stem}) containing
233
+ the meaningfully named result files.
230
234
 
231
235
  Raises:
232
236
  subprocess.CalledProcessError: If any MMseqs2 command fails.
@@ -237,10 +241,11 @@ class MMSeqsProfileProcessor(Processor):
237
241
 
238
242
  input_file_path = Path(input_file).resolve() # Ensure absolute path
239
243
  target_fasta_filename = input_file_path.name
244
+ target_fasta_stem = input_file_path.stem # Get stem for naming
240
245
 
241
246
  # Create a unique base directory for this run's outputs and temp files
242
247
  # This directory will be returned and subsequently uploaded by the Operator
243
- run_base_dir_name = f"mmseqs_run_{Path(target_fasta_filename).stem}"
248
+ run_base_dir_name = f"{target_fasta_stem}" # Use stem as the dir name
244
249
  run_base_dir = Path(run_base_dir_name).resolve()
245
250
  run_base_dir.mkdir(parents=True, exist_ok=True)
246
251
  logger.info(f"Created run base directory: {run_base_dir}")
@@ -259,9 +264,13 @@ class MMSeqsProfileProcessor(Processor):
259
264
  mmseqs_temp_dir.mkdir(parents=True, exist_ok=True)
260
265
  logger.info(f"Created MMseqs2 temporary directory: {mmseqs_temp_dir}")
261
266
 
262
- # Define output file paths directly within run_base_dir
263
- local_results_m8_file = run_base_dir / "results.m8"
264
- local_hits_fasta_file = run_base_dir / "results.fasta"
267
+ # Define INTERMEDIATE output file paths within mmseqs_temp_dir
268
+ intermediate_results_m8_file = mmseqs_temp_dir / "results.m8"
269
+ intermediate_hits_fasta_file = mmseqs_temp_dir / "results.fasta"
270
+
271
+ # Define FINAL output file paths within run_base_dir, using target stem
272
+ final_results_m8_file = run_base_dir / f"{target_fasta_stem}_results.m8"
273
+ final_hits_fasta_file = run_base_dir / f"{target_fasta_stem}_hits.fasta"
265
274
 
266
275
  # --- MMseqs2 Workflow Paths (intermediate files in mmseqs_temp_dir) ---
267
276
  query_db = mmseqs_temp_dir / "queryDB"
@@ -381,7 +390,7 @@ class MMSeqsProfileProcessor(Processor):
381
390
  run_base_dir,
382
391
  )
383
392
 
384
- # 8. Convert results to tabular format (M8)
393
+ # 8. Convert results to tabular format (M8) -> to intermediate file
385
394
  self._run_mmseqs_command(
386
395
  [
387
396
  "mmseqs",
@@ -389,7 +398,7 @@ class MMSeqsProfileProcessor(Processor):
389
398
  str(profile_db), # Query DB used for search (profileDB)
390
399
  str(target_db),
391
400
  str(result_db),
392
- str(local_results_m8_file), # Output M8 file
401
+ str(intermediate_results_m8_file), # Output M8 file to temp dir
393
402
  "--threads",
394
403
  self.num_threads,
395
404
  ],
@@ -404,26 +413,55 @@ class MMSeqsProfileProcessor(Processor):
404
413
  run_base_dir,
405
414
  )
406
415
 
407
- # 10. Convert hit sequences to FASTA
416
+ # 10. Convert hit sequences to FASTA -> to intermediate file
408
417
  self._run_mmseqs_command(
409
- ["mmseqs", "convert2fasta", str(hits_db), str(local_hits_fasta_file)],
418
+ [
419
+ "mmseqs",
420
+ "convert2fasta",
421
+ str(hits_db),
422
+ str(intermediate_hits_fasta_file),
423
+ ],
410
424
  "Convert hits to FASTA",
411
425
  run_base_dir,
412
426
  )
413
427
 
414
428
  logger.info(
415
- f"MMseqs2 workflow completed successfully. Outputs in {run_base_dir}"
429
+ f"MMseqs2 workflow completed successfully. Intermediate outputs in {mmseqs_temp_dir}"
416
430
  )
417
431
 
432
+ # Move and rename final output files from mmseqs_temp_dir to run_base_dir
433
+ if intermediate_results_m8_file.exists():
434
+ shutil.move(
435
+ str(intermediate_results_m8_file), str(final_results_m8_file)
436
+ )
437
+ logger.info(f"Moved and renamed M8 results to {final_results_m8_file}")
438
+ else:
439
+ logger.warning(
440
+ f"Intermediate M8 file {intermediate_results_m8_file} not found. Creating empty target file."
441
+ )
442
+ final_results_m8_file.touch() # Create empty file in run_base_dir if not found
443
+
444
+ if intermediate_hits_fasta_file.exists():
445
+ shutil.move(
446
+ str(intermediate_hits_fasta_file), str(final_hits_fasta_file)
447
+ )
448
+ logger.info(f"Moved and renamed hits FASTA to {final_hits_fasta_file}")
449
+ else:
450
+ logger.warning(
451
+ f"Intermediate hits FASTA {intermediate_hits_fasta_file} not found. Creating empty target file."
452
+ )
453
+ final_hits_fasta_file.touch() # Create empty file in run_base_dir if not found
454
+
418
455
  finally:
419
- # Clean up the MMseqs2 temporary directory
456
+ # Clean up the MMseqs2 temporary directory (mmseqs_tmp) which contains intermediate DBs etc.
420
457
  if mmseqs_temp_dir.exists():
421
458
  shutil.rmtree(mmseqs_temp_dir)
422
459
  logger.info(
423
460
  f"Cleaned up MMseqs2 temporary directory: {mmseqs_temp_dir}"
424
461
  )
425
- # The input_file (original target) is managed by the Operator
426
- # The local_target_file (copy inside run_base_dir) will be cleaned up
427
- # by the Operator when run_base_dir is deleted after upload.
462
+ # The local_target_file (copy of input) inside run_base_dir will be cleaned up
463
+ # by the Operator when run_base_dir itself is deleted after upload.
428
464
 
429
- return str(run_base_dir) # Return the path to the directory containing results
465
+ return str(
466
+ run_base_dir
467
+ ) # Return the path to the directory containing meaningfully named results
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.1.7"
8
+ version = "1.1.9"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
File without changes