bscampp 1.0.5__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {bscampp-1.0.5 → bscampp-1.0.6}/CHANGELOG.md +4 -0
  2. {bscampp-1.0.5/bscampp.egg-info → bscampp-1.0.6}/PKG-INFO +1 -1
  3. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/__init__.py +1 -1
  4. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/functions.py +100 -23
  5. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/jobs.py +13 -1
  6. {bscampp-1.0.5 → bscampp-1.0.6/bscampp.egg-info}/PKG-INFO +1 -1
  7. {bscampp-1.0.5 → bscampp-1.0.6}/pyproject.toml +2 -2
  8. {bscampp-1.0.5 → bscampp-1.0.6}/LICENSE +0 -0
  9. {bscampp-1.0.5 → bscampp-1.0.6}/MANIFEST.in +0 -0
  10. {bscampp-1.0.5 → bscampp-1.0.6}/README.md +0 -0
  11. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/configs.py +0 -0
  12. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/default.config +0 -0
  13. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/init_configs.py +0 -0
  14. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/pipeline.py +0 -0
  15. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/epa-ng +0 -0
  16. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/CMakeLists.txt +0 -0
  17. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/fragment_hamming +0 -0
  18. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/hamming +0 -0
  19. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/homology +0 -0
  20. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/src/fragment_hamming.cpp +0 -0
  21. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp +0 -0
  22. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp +0 -0
  23. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/src/homology.cpp +0 -0
  24. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/hamming_distance/src/new_hamming.cpp +0 -0
  25. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/tools/pplacer +0 -0
  26. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp/utils.py +0 -0
  27. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp.egg-info/SOURCES.txt +0 -0
  28. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp.egg-info/dependency_links.txt +0 -0
  29. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp.egg-info/entry_points.txt +0 -0
  30. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp.egg-info/requires.txt +0 -0
  31. {bscampp-1.0.5 → bscampp-1.0.6}/bscampp.egg-info/top_level.txt +0 -0
  32. {bscampp-1.0.5 → bscampp-1.0.6}/requirements.txt +0 -0
  33. {bscampp-1.0.5 → bscampp-1.0.6}/run_bscampp.py +0 -0
  34. {bscampp-1.0.5 → bscampp-1.0.6}/setup.cfg +0 -0
  35. {bscampp-1.0.5 → bscampp-1.0.6}/tests/test_dry_run.py +0 -0
@@ -1,3 +1,7 @@
1
+ # BSCAMPP v1.0.6
2
+ 1. Implemented functionality to recompile C++ codes locally so that dynamic
3
+ libraries are referred to correctly.
4
+
1
5
  # BSCAMPP v1.0.5
2
6
  1. Changed from using original sequence names from query input to using
3
7
  pseudo names (indexed from 1 to N, N the number of queries). This is to prevent
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bscampp
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: BSCAMPP and SCAMPP - Scalable Phylogenetic Placement Tools
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -12,7 +12,7 @@ import logging, os
12
12
  # not really needed for BSCAMPP but safe to update here
13
13
  os.sys.setrecursionlimit(1000000)
14
14
 
15
- __version__ = "1.0.5"
15
+ __version__ = "1.0.6"
16
16
  _INSTALL_PATH = __path__[0]
17
17
 
18
18
  # global variables to store all loggers
@@ -1,10 +1,11 @@
1
1
  import json, time, os, sys
2
2
  import treeswift
3
3
  from collections import defaultdict, Counter
4
+ import subprocess
4
5
 
5
6
  from bscampp import get_logger, log_exception
6
7
  from bscampp.configs import Configs
7
- from bscampp.jobs import EPAngJob, TaxtasticJob, PplacerTaxtasticJob
8
+ from bscampp.jobs import GenericJob, EPAngJob, TaxtasticJob, PplacerTaxtasticJob
8
9
  from bscampp.utils import write_fasta
9
10
  import bscampp.utils as utils
10
11
 
@@ -12,6 +13,68 @@ import concurrent.futures
12
13
 
13
14
  _LOG = get_logger(__name__)
14
15
 
16
+ ############################# helper functions ################################
17
+ '''
18
+ Function to recompile binaries from the given directory.
19
+ Assumption, the directory contains a CMakeLists.txt file
20
+ '''
21
+ def recompileBinariesFromDir(dir):
22
+ _LOG.warning(f"Recompiling binaries with cmake/make at {dir}")
23
+
24
+ # need to recompile the binaries
25
+ cmake_p = subprocess.Popen(['cmake', dir],
26
+ cwd=dir, stdout=subprocess.PIPE,
27
+ stderr=subprocess.PIPE, text=True)
28
+ cmake_stdout, cmake_stderr = cmake_p.communicate()
29
+
30
+ if cmake_p.returncode != 0:
31
+ _LOG.error("cmake failed!")
32
+ exit(cmake_p.returncode)
33
+ else:
34
+ _LOG.warning("cmake succeeded!")
35
+
36
+ # run make
37
+ make_p = subprocess.Popen(['make'],
38
+ cwd=dir, stdout=subprocess.PIPE,
39
+ stderr=subprocess.PIPE, text=True)
40
+ make_stdout, make_stderr = make_p.communicate()
41
+
42
+ if make_p.returncode != 0:
43
+ _LOG.error(f"make failed!")
44
+ exit(make_p.returncode)
45
+ else:
46
+ _LOG.warning("make succeeded!")
47
+ _LOG.warning(f"Successfully recompiled binaries at {dir}!")
48
+
49
+ '''
50
+ Function to check hamming/fragment_hamming/homology binaries are executable,
51
+ since they were compiled using dynamic library
52
+ '''
53
+ def ensureBinaryExecutable(binpath):
54
+ dir = os.path.dirname(binpath)
55
+
56
+ # binpath does not exist
57
+ b_recompile = False
58
+ if not os.path.exists(binpath):
59
+ _LOG.warning(f"{binpath} does not exist!")
60
+ b_recompile = True
61
+ else:
62
+ p = subprocess.Popen([binpath], stdout=subprocess.PIPE,
63
+ stderr=subprocess.PIPE)
64
+ stdout, stderr = p.communicate()
65
+ # 255 or -1 indicates that the binaries work
66
+ if p.returncode == 255 or p.returncode == -1:
67
+ pass
68
+ else:
69
+ _LOG.warning(f"{binpath} return code is {p.returncode}!")
70
+ b_recompile = True
71
+
72
+ if b_recompile:
73
+ recompileBinariesFromDir(dir)
74
+ return
75
+
76
+ ########################## end of helper functions ############################
77
+
15
78
  '''
16
79
  Function to read in the placement tree and alignment.
17
80
  If query alignment is provided, will use the provided query instead of
@@ -47,17 +110,6 @@ def readData(workdir, dry_run=False):
47
110
  aln_dict = utils.read_data(Configs.aln_path)
48
111
  ref_dict, q_dict = utils.seperate(aln_dict, leaf_dict)
49
112
 
50
- # after separating queries from the reference alignment, write
51
- # them to to TEMP/
52
- # Updated on 3.5.2025 by Chengze Shen
53
- # - regardless of the input choices, write a copy of both reference
54
- # and query alignment to the workdir
55
- qaln_path = os.path.join(workdir, 'qaln.fa')
56
- write_fasta(qaln_path, q_dict)
57
-
58
- aln_path = os.path.join(workdir, 'aln.fa')
59
- write_fasta(aln_path, ref_dict)
60
-
61
113
  # Added on 3.8.2025 by Chengze Shen
62
114
  # - to ensure that any characters from the query has correct names
63
115
  # (e.g., having ":" can cause trouble), have a qname_map that maps
@@ -70,6 +122,22 @@ def readData(workdir, dry_run=False):
70
122
  qname_map[name] = cvt
71
123
  qname_map_rev[cvt] = name
72
124
  qidx += 1
125
+ # modify q_dict as well
126
+ for name, cvt in qname_map.items():
127
+ q_dict[cvt] = q_dict[name]
128
+ q_dict.pop(name)
129
+
130
+ # after separating queries from the reference alignment, write
131
+ # them to to TEMP/
132
+ # Updated on 3.5.2025 by Chengze Shen
133
+ # - regardless of the input choices, write a copy of both reference
134
+ # and query alignment to the workdir
135
+ qaln_path = os.path.join(workdir, 'qaln.fa')
136
+ write_fasta(qaln_path, q_dict)
137
+
138
+ aln_path = os.path.join(workdir, 'aln.fa')
139
+ write_fasta(aln_path, ref_dict)
140
+
73
141
 
74
142
  t1 = time.perf_counter()
75
143
  _LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
@@ -94,18 +162,29 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
94
162
  if Configs.subtreetype == "h":
95
163
  Configs.votes = Configs.subtreesize
96
164
 
97
- cmd = []
98
165
  if Configs.similarityflag:
99
- cmd.append(os.path.join(Configs.hamming_distance_dir, 'homology'))
166
+ job_type = 'homology'
100
167
  else:
101
- if Configs.fragmentflag == False:
102
- cmd.append(os.path.join(Configs.hamming_distance_dir, 'hamming'))
168
+ if Configs.fragmentflag:
169
+ job_type = 'fragment_hamming'
103
170
  else:
104
- cmd.append(os.path.join(
105
- Configs.hamming_distance_dir, 'fragment_hamming'))
171
+ job_type = 'hamming'
172
+ binpath = os.path.join(Configs.hamming_distance_dir, job_type)
173
+ cmd = [binpath]
174
+
175
+ # Added @ 3.9.2025 by Chengze Shen
176
+ # - check if binpath is executable, since the compiled files use dynamic
177
+ # libraries.
178
+ # If works: should have return code 255
179
+ # If not: should have return code 1,
180
+ # recompile the binaries using cmake and make
181
+ ensureBinaryExecutable(binpath)
182
+
106
183
  cmd.extend([aln_path, str(len(aln)), qaln_path, str(len(qaln)),
107
184
  tmp_output, str(Configs.votes)])
108
- os.system(' '.join(cmd))
185
+ job = GenericJob(cmd=cmd, job_type=job_type)
186
+ _ = job.run()
187
+ #os.system(' '.join(cmd))
109
188
 
110
189
  # process closest leaves
111
190
  unusable_queries = set()
@@ -366,10 +445,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
366
445
  if '' in tmp_leaf_dict:
367
446
  del tmp_leaf_dict['']
368
447
  tmp_ref_dict = {label : aln[label] for label in tmp_leaf_dict.keys()}
369
- # Changed @ 3.8.2025 by Chengze Shen
370
- # - wrote converted name for query sequences and convert them
371
- # - back when placements are done
372
- tmp_q_dict = {qname_map[name] : qaln[name] for name in query_list}
448
+ tmp_q_dict = {name : qaln[name] for name in query_list}
373
449
  write_fasta(tmp_aln, tmp_ref_dict)
374
450
  write_fasta(tmp_qaln, tmp_q_dict)
375
451
 
@@ -505,6 +581,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
505
581
  _LOG.info('Time to place queries to subtrees: {} seconds'.format(t1 - t0))
506
582
  return jplace
507
583
 
584
+
508
585
  '''
509
586
  Function to write a given jplace object to local output
510
587
  '''
@@ -112,7 +112,7 @@ class Job(object):
112
112
  else:
113
113
  _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
114
114
  '\nSTDERR: ' + stderr + logpath)
115
- exit(1)
115
+ exit(self.returncode)
116
116
  except Exception:
117
117
  log_exception(_LOG)
118
118
 
@@ -123,6 +123,18 @@ class Job(object):
123
123
  raise NotImplementedError(
124
124
  'get_invocation() should be implemented by subclasses.')
125
125
 
126
+ '''
127
+ Generic job that runs the given command, represented as a list of strings
128
+ '''
129
+ class GenericJob(Job):
130
+ def __init__(self, cmd=[], job_type='external'):
131
+ Job.__init__(self)
132
+ self.job_type = job_type
133
+ self.cmd = cmd
134
+
135
+ def get_invocation(self):
136
+ return self.cmd, None
137
+
126
138
  '''
127
139
  A EPA-ng job that runs EPA-ng with given parameters
128
140
  '''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bscampp
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: BSCAMPP and SCAMPP - Scalable Phylogenetic Placement Tools
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -11,8 +11,8 @@ dynamic = ["version", "dependencies"]
11
11
  description = "BSCAMPP and SCAMPP - Scalable Phylogenetic Placement Tools"
12
12
  readme = {file = "README.md", content-type = "text/markdown"}
13
13
  authors = [
14
- {name = "Eleanor Wedell", email = "ewedell2@illinois.edu"},
15
- {name = "Chengze Shen", email = "chengze5@illinois.edu"}
14
+ { name = "Eleanor Wedell", email = "ewedell2@illinois.edu" },
15
+ { name = "Chengze Shen", email = "chengze5@illinois.edu" },
16
16
  ]
17
17
  license = {file = "LICENSE"}
18
18
  requires-python = ">=3.7"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes