bscampp 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bscampp/__init__.py +1 -1
- bscampp/functions.py +100 -23
- bscampp/jobs.py +13 -1
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/METADATA +1 -1
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/RECORD +9 -9
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/WHEEL +1 -1
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/LICENSE +0 -0
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/entry_points.txt +0 -0
- {bscampp-1.0.5.dist-info → bscampp-1.0.6.dist-info}/top_level.txt +0 -0
bscampp/__init__.py
CHANGED
bscampp/functions.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
import json, time, os, sys
|
2
2
|
import treeswift
|
3
3
|
from collections import defaultdict, Counter
|
4
|
+
import subprocess
|
4
5
|
|
5
6
|
from bscampp import get_logger, log_exception
|
6
7
|
from bscampp.configs import Configs
|
7
|
-
from bscampp.jobs import EPAngJob, TaxtasticJob, PplacerTaxtasticJob
|
8
|
+
from bscampp.jobs import GenericJob, EPAngJob, TaxtasticJob, PplacerTaxtasticJob
|
8
9
|
from bscampp.utils import write_fasta
|
9
10
|
import bscampp.utils as utils
|
10
11
|
|
@@ -12,6 +13,68 @@ import concurrent.futures
|
|
12
13
|
|
13
14
|
_LOG = get_logger(__name__)
|
14
15
|
|
16
|
+
############################# helper functions ################################
|
17
|
+
'''
|
18
|
+
Function to recompile binaries from the given directory.
|
19
|
+
Assumption, the directory contains a CMakeLists.txt file
|
20
|
+
'''
|
21
|
+
def recompileBinariesFromDir(dir):
|
22
|
+
_LOG.warning(f"Recompiling binaries with cmake/make at {dir}")
|
23
|
+
|
24
|
+
# need to recompile the binaries
|
25
|
+
cmake_p = subprocess.Popen(['cmake', dir],
|
26
|
+
cwd=dir, stdout=subprocess.PIPE,
|
27
|
+
stderr=subprocess.PIPE, text=True)
|
28
|
+
cmake_stdout, cmake_stderr = cmake_p.communicate()
|
29
|
+
|
30
|
+
if cmake_p.returncode != 0:
|
31
|
+
_LOG.error("cmake failed!")
|
32
|
+
exit(cmake_p.returncode)
|
33
|
+
else:
|
34
|
+
_LOG.warning("cmake succeeded!")
|
35
|
+
|
36
|
+
# run make
|
37
|
+
make_p = subprocess.Popen(['make'],
|
38
|
+
cwd=dir, stdout=subprocess.PIPE,
|
39
|
+
stderr=subprocess.PIPE, text=True)
|
40
|
+
make_stdout, make_stderr = make_p.communicate()
|
41
|
+
|
42
|
+
if make_p.returncode != 0:
|
43
|
+
_LOG.error(f"make failed!")
|
44
|
+
exit(make_p.returncode)
|
45
|
+
else:
|
46
|
+
_LOG.warning("make succeeded!")
|
47
|
+
_LOG.warning(f"Successfully recompiled binaries at {dir}!")
|
48
|
+
|
49
|
+
'''
|
50
|
+
Function to check hamming/fragment_hamming/homology binaries are executable,
|
51
|
+
since they were compiled using dynamic library
|
52
|
+
'''
|
53
|
+
def ensureBinaryExecutable(binpath):
|
54
|
+
dir = os.path.dirname(binpath)
|
55
|
+
|
56
|
+
# binpath does not exist
|
57
|
+
b_recompile = False
|
58
|
+
if not os.path.exists(binpath):
|
59
|
+
_LOG.warning(f"{binpath} does not exist!")
|
60
|
+
b_recompile = True
|
61
|
+
else:
|
62
|
+
p = subprocess.Popen([binpath], stdout=subprocess.PIPE,
|
63
|
+
stderr=subprocess.PIPE)
|
64
|
+
stdout, stderr = p.communicate()
|
65
|
+
# 255 or -1 indicates that the binaries work
|
66
|
+
if p.returncode == 255 or p.returncode == -1:
|
67
|
+
pass
|
68
|
+
else:
|
69
|
+
_LOG.warning(f"{binpath} return code is {p.returncode}!")
|
70
|
+
b_recompile = True
|
71
|
+
|
72
|
+
if b_recompile:
|
73
|
+
recompileBinariesFromDir(dir)
|
74
|
+
return
|
75
|
+
|
76
|
+
########################## end of helper functions ############################
|
77
|
+
|
15
78
|
'''
|
16
79
|
Function to read in the placement tree and alignment.
|
17
80
|
If query alignment is provided, will use the provided query instead of
|
@@ -47,17 +110,6 @@ def readData(workdir, dry_run=False):
|
|
47
110
|
aln_dict = utils.read_data(Configs.aln_path)
|
48
111
|
ref_dict, q_dict = utils.seperate(aln_dict, leaf_dict)
|
49
112
|
|
50
|
-
# after separating queries from the reference alignment, write
|
51
|
-
# them to to TEMP/
|
52
|
-
# Updated on 3.5.2025 by Chengze Shen
|
53
|
-
# - regardless of the input choices, write a copy of both reference
|
54
|
-
# and query alignment to the workdir
|
55
|
-
qaln_path = os.path.join(workdir, 'qaln.fa')
|
56
|
-
write_fasta(qaln_path, q_dict)
|
57
|
-
|
58
|
-
aln_path = os.path.join(workdir, 'aln.fa')
|
59
|
-
write_fasta(aln_path, ref_dict)
|
60
|
-
|
61
113
|
# Added on 3.8.2025 by Chengze Shen
|
62
114
|
# - to ensure that any characters from the query has correct names
|
63
115
|
# (e.g., having ":" can cause trouble), have a qname_map that maps
|
@@ -70,6 +122,22 @@ def readData(workdir, dry_run=False):
|
|
70
122
|
qname_map[name] = cvt
|
71
123
|
qname_map_rev[cvt] = name
|
72
124
|
qidx += 1
|
125
|
+
# modify q_dict as well
|
126
|
+
for name, cvt in qname_map.items():
|
127
|
+
q_dict[cvt] = q_dict[name]
|
128
|
+
q_dict.pop(name)
|
129
|
+
|
130
|
+
# after separating queries from the reference alignment, write
|
131
|
+
# them to to TEMP/
|
132
|
+
# Updated on 3.5.2025 by Chengze Shen
|
133
|
+
# - regardless of the input choices, write a copy of both reference
|
134
|
+
# and query alignment to the workdir
|
135
|
+
qaln_path = os.path.join(workdir, 'qaln.fa')
|
136
|
+
write_fasta(qaln_path, q_dict)
|
137
|
+
|
138
|
+
aln_path = os.path.join(workdir, 'aln.fa')
|
139
|
+
write_fasta(aln_path, ref_dict)
|
140
|
+
|
73
141
|
|
74
142
|
t1 = time.perf_counter()
|
75
143
|
_LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
|
@@ -94,18 +162,29 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
|
|
94
162
|
if Configs.subtreetype == "h":
|
95
163
|
Configs.votes = Configs.subtreesize
|
96
164
|
|
97
|
-
cmd = []
|
98
165
|
if Configs.similarityflag:
|
99
|
-
|
166
|
+
job_type = 'homology'
|
100
167
|
else:
|
101
|
-
if Configs.fragmentflag
|
102
|
-
|
168
|
+
if Configs.fragmentflag:
|
169
|
+
job_type = 'fragment_hamming'
|
103
170
|
else:
|
104
|
-
|
105
|
-
|
171
|
+
job_type = 'hamming'
|
172
|
+
binpath = os.path.join(Configs.hamming_distance_dir, job_type)
|
173
|
+
cmd = [binpath]
|
174
|
+
|
175
|
+
# Added @ 3.9.2025 by Chengze Shen
|
176
|
+
# - check if binpath is executable, since the compiled files use dynamic
|
177
|
+
# libraries.
|
178
|
+
# If works: should have return code 255
|
179
|
+
# If not: should have return code 1,
|
180
|
+
# recompile the binaries using cmake and make
|
181
|
+
ensureBinaryExecutable(binpath)
|
182
|
+
|
106
183
|
cmd.extend([aln_path, str(len(aln)), qaln_path, str(len(qaln)),
|
107
184
|
tmp_output, str(Configs.votes)])
|
108
|
-
|
185
|
+
job = GenericJob(cmd=cmd, job_type=job_type)
|
186
|
+
_ = job.run()
|
187
|
+
#os.system(' '.join(cmd))
|
109
188
|
|
110
189
|
# process closest leaves
|
111
190
|
unusable_queries = set()
|
@@ -366,10 +445,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
366
445
|
if '' in tmp_leaf_dict:
|
367
446
|
del tmp_leaf_dict['']
|
368
447
|
tmp_ref_dict = {label : aln[label] for label in tmp_leaf_dict.keys()}
|
369
|
-
|
370
|
-
# - wrote converted name for query sequences and convert them
|
371
|
-
# - back when placements are done
|
372
|
-
tmp_q_dict = {qname_map[name] : qaln[name] for name in query_list}
|
448
|
+
tmp_q_dict = {name : qaln[name] for name in query_list}
|
373
449
|
write_fasta(tmp_aln, tmp_ref_dict)
|
374
450
|
write_fasta(tmp_qaln, tmp_q_dict)
|
375
451
|
|
@@ -505,6 +581,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
505
581
|
_LOG.info('Time to place queries to subtrees: {} seconds'.format(t1 - t0))
|
506
582
|
return jplace
|
507
583
|
|
584
|
+
|
508
585
|
'''
|
509
586
|
Function to write a given jplace object to local output
|
510
587
|
'''
|
bscampp/jobs.py
CHANGED
@@ -112,7 +112,7 @@ class Job(object):
|
|
112
112
|
else:
|
113
113
|
_LOG.error(error_msg + '\nSTDOUT: ' + stdout +
|
114
114
|
'\nSTDERR: ' + stderr + logpath)
|
115
|
-
exit(
|
115
|
+
exit(self.returncode)
|
116
116
|
except Exception:
|
117
117
|
log_exception(_LOG)
|
118
118
|
|
@@ -123,6 +123,18 @@ class Job(object):
|
|
123
123
|
raise NotImplementedError(
|
124
124
|
'get_invocation() should be implemented by subclasses.')
|
125
125
|
|
126
|
+
'''
|
127
|
+
Generic job that runs the given command, represented as a list of strings
|
128
|
+
'''
|
129
|
+
class GenericJob(Job):
|
130
|
+
def __init__(self, cmd=[], job_type='external'):
|
131
|
+
Job.__init__(self)
|
132
|
+
self.job_type = job_type
|
133
|
+
self.cmd = cmd
|
134
|
+
|
135
|
+
def get_invocation(self):
|
136
|
+
return self.cmd, None
|
137
|
+
|
126
138
|
'''
|
127
139
|
A EPA-ng job that runs EPA-ng with given parameters
|
128
140
|
'''
|
@@ -1,9 +1,9 @@
|
|
1
|
-
bscampp/__init__.py,sha256=
|
1
|
+
bscampp/__init__.py,sha256=eDIMYifzKrFdtA3Ac7OvPTyIHUO1ZLgVaM0pKFxxEHE,2289
|
2
2
|
bscampp/configs.py,sha256=perl6u5hto6J3JV1JMbsTQ6tqr2uGOk-Z9jfzflid0s,6122
|
3
3
|
bscampp/default.config,sha256=CEfsUHBy--vwJhEcUuJ0btfuGQWb_lKMVWUIP9f5YGw,112
|
4
|
-
bscampp/functions.py,sha256=
|
4
|
+
bscampp/functions.py,sha256=DGHQJLLzXSghDKbha0LW0YPip_45M6MI4t3zdDpzULI,22448
|
5
5
|
bscampp/init_configs.py,sha256=EA9sMN5jWj6zj2b-7tN19LhX2Ef61ByQLxQRLHAqLDM,3600
|
6
|
-
bscampp/jobs.py,sha256=
|
6
|
+
bscampp/jobs.py,sha256=v7buZJs1AnNoXiILwu-W8fo3QjxAh3i9Mp7xfmlJvAY,7569
|
7
7
|
bscampp/pipeline.py,sha256=IPZnXZmVxGGfbVUuGCQh5X9oBq48-6pA9QkuvMGPTag,14000
|
8
8
|
bscampp/utils.py,sha256=-wns6FaWMKD2wVqjxdBQvjTdagTjywBIaGfqb2mupe4,30039
|
9
9
|
bscampp/tools/epa-ng,sha256=f3EVoZAAOXLN6l521qp-TrWDl5J2nqL3tGgjPaQE9WQ,3772096
|
@@ -17,9 +17,9 @@ bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp,sha256=xCmyAT-OZJOD
|
|
17
17
|
bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp,sha256=eKxgODRlpf0hU84QjNhigvRhWCT9tiJZjA5oQFQ1bUk,7404
|
18
18
|
bscampp/tools/hamming_distance/src/homology.cpp,sha256=ZE0uXZWQ-cN4U1Wk5kUr_KKHgzsgA6Sno-IViRa4tmI,6053
|
19
19
|
bscampp/tools/hamming_distance/src/new_hamming.cpp,sha256=fBRm99RquBZgZjaLOn9xDI3cH9NchhrxKbL-11j8fmk,5342
|
20
|
-
bscampp-1.0.
|
21
|
-
bscampp-1.0.
|
22
|
-
bscampp-1.0.
|
23
|
-
bscampp-1.0.
|
24
|
-
bscampp-1.0.
|
25
|
-
bscampp-1.0.
|
20
|
+
bscampp-1.0.6.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
|
21
|
+
bscampp-1.0.6.dist-info/METADATA,sha256=0sWAKK30wlps8i0d1BdFqyv5MZVgefRnTn_-yMmO8lQ,12602
|
22
|
+
bscampp-1.0.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
23
|
+
bscampp-1.0.6.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
|
24
|
+
bscampp-1.0.6.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
|
25
|
+
bscampp-1.0.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|