bscampp 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bscampp/__init__.py +1 -1
- bscampp/configs.py +0 -1
- bscampp/functions.py +118 -28
- bscampp/jobs.py +13 -1
- bscampp/pipeline.py +0 -2
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info}/METADATA +5 -4
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info}/RECORD +11 -11
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info}/WHEEL +1 -1
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info}/entry_points.txt +0 -0
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info/licenses}/LICENSE +0 -0
- {bscampp-1.0.5.dist-info → bscampp-1.0.7.dist-info}/top_level.txt +0 -0
bscampp/__init__.py
CHANGED
bscampp/configs.py
CHANGED
bscampp/functions.py
CHANGED
@@ -1,17 +1,84 @@
|
|
1
1
|
import json, time, os, sys
|
2
2
|
import treeswift
|
3
3
|
from collections import defaultdict, Counter
|
4
|
+
import subprocess
|
4
5
|
|
5
6
|
from bscampp import get_logger, log_exception
|
6
7
|
from bscampp.configs import Configs
|
7
|
-
from bscampp.jobs import EPAngJob, TaxtasticJob, PplacerTaxtasticJob
|
8
|
+
from bscampp.jobs import GenericJob, EPAngJob, TaxtasticJob, PplacerTaxtasticJob
|
8
9
|
from bscampp.utils import write_fasta
|
9
10
|
import bscampp.utils as utils
|
10
11
|
|
11
12
|
import concurrent.futures
|
12
13
|
|
14
|
+
# suppress userwarning when doing subtree suppress_unifurcations
|
15
|
+
import warnings
|
16
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
17
|
+
|
13
18
|
_LOG = get_logger(__name__)
|
14
19
|
|
20
|
+
############################# helper functions ################################
|
21
|
+
'''
|
22
|
+
Function to recompile binaries from the given directory.
|
23
|
+
Assumption, the directory contains a CMakeLists.txt file
|
24
|
+
'''
|
25
|
+
def recompileBinariesFromDir(dir):
|
26
|
+
_LOG.warning(f"Recompiling binaries with cmake/make at {dir}")
|
27
|
+
|
28
|
+
# need to recompile the binaries
|
29
|
+
cmake_p = subprocess.Popen(['cmake', dir],
|
30
|
+
cwd=dir, stdout=subprocess.PIPE,
|
31
|
+
stderr=subprocess.PIPE, text=True)
|
32
|
+
cmake_stdout, cmake_stderr = cmake_p.communicate()
|
33
|
+
|
34
|
+
if cmake_p.returncode != 0:
|
35
|
+
_LOG.error("cmake failed!")
|
36
|
+
exit(cmake_p.returncode)
|
37
|
+
else:
|
38
|
+
_LOG.warning("cmake succeeded!")
|
39
|
+
|
40
|
+
# run make
|
41
|
+
make_p = subprocess.Popen(['make'],
|
42
|
+
cwd=dir, stdout=subprocess.PIPE,
|
43
|
+
stderr=subprocess.PIPE, text=True)
|
44
|
+
make_stdout, make_stderr = make_p.communicate()
|
45
|
+
|
46
|
+
if make_p.returncode != 0:
|
47
|
+
_LOG.error(f"make failed!")
|
48
|
+
exit(make_p.returncode)
|
49
|
+
else:
|
50
|
+
_LOG.warning("make succeeded!")
|
51
|
+
_LOG.warning(f"Successfully recompiled binaries at {dir}!")
|
52
|
+
|
53
|
+
'''
|
54
|
+
Function to check hamming/fragment_hamming/homology binaries are executable,
|
55
|
+
since they were compiled using dynamic library
|
56
|
+
'''
|
57
|
+
def ensureBinaryExecutable(binpath):
|
58
|
+
dir = os.path.dirname(binpath)
|
59
|
+
|
60
|
+
# binpath does not exist
|
61
|
+
b_recompile = False
|
62
|
+
if not os.path.exists(binpath):
|
63
|
+
_LOG.warning(f"{binpath} does not exist!")
|
64
|
+
b_recompile = True
|
65
|
+
else:
|
66
|
+
p = subprocess.Popen([binpath], stdout=subprocess.PIPE,
|
67
|
+
stderr=subprocess.PIPE)
|
68
|
+
stdout, stderr = p.communicate()
|
69
|
+
# 255 or -1 indicates that the binaries work
|
70
|
+
if p.returncode == 255 or p.returncode == -1:
|
71
|
+
pass
|
72
|
+
else:
|
73
|
+
_LOG.warning(f"{binpath} return code is {p.returncode}!")
|
74
|
+
b_recompile = True
|
75
|
+
|
76
|
+
if b_recompile:
|
77
|
+
recompileBinariesFromDir(dir)
|
78
|
+
return
|
79
|
+
|
80
|
+
########################## end of helper functions ############################
|
81
|
+
|
15
82
|
'''
|
16
83
|
Function to read in the placement tree and alignment.
|
17
84
|
If query alignment is provided, will use the provided query instead of
|
@@ -47,17 +114,6 @@ def readData(workdir, dry_run=False):
|
|
47
114
|
aln_dict = utils.read_data(Configs.aln_path)
|
48
115
|
ref_dict, q_dict = utils.seperate(aln_dict, leaf_dict)
|
49
116
|
|
50
|
-
# after separating queries from the reference alignment, write
|
51
|
-
# them to to TEMP/
|
52
|
-
# Updated on 3.5.2025 by Chengze Shen
|
53
|
-
# - regardless of the input choices, write a copy of both reference
|
54
|
-
# and query alignment to the workdir
|
55
|
-
qaln_path = os.path.join(workdir, 'qaln.fa')
|
56
|
-
write_fasta(qaln_path, q_dict)
|
57
|
-
|
58
|
-
aln_path = os.path.join(workdir, 'aln.fa')
|
59
|
-
write_fasta(aln_path, ref_dict)
|
60
|
-
|
61
117
|
# Added on 3.8.2025 by Chengze Shen
|
62
118
|
# - to ensure that any characters from the query has correct names
|
63
119
|
# (e.g., having ":" can cause trouble), have a qname_map that maps
|
@@ -70,6 +126,22 @@ def readData(workdir, dry_run=False):
|
|
70
126
|
qname_map[name] = cvt
|
71
127
|
qname_map_rev[cvt] = name
|
72
128
|
qidx += 1
|
129
|
+
# modify q_dict as well
|
130
|
+
for name, cvt in qname_map.items():
|
131
|
+
q_dict[cvt] = q_dict[name]
|
132
|
+
q_dict.pop(name)
|
133
|
+
|
134
|
+
# after separating queries from the reference alignment, write
|
135
|
+
# them to to TEMP/
|
136
|
+
# Updated on 3.5.2025 by Chengze Shen
|
137
|
+
# - regardless of the input choices, write a copy of both reference
|
138
|
+
# and query alignment to the workdir
|
139
|
+
qaln_path = os.path.join(workdir, 'qaln.fa')
|
140
|
+
write_fasta(qaln_path, q_dict)
|
141
|
+
|
142
|
+
aln_path = os.path.join(workdir, 'aln.fa')
|
143
|
+
write_fasta(aln_path, ref_dict)
|
144
|
+
|
73
145
|
|
74
146
|
t1 = time.perf_counter()
|
75
147
|
_LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
|
@@ -94,18 +166,29 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
|
|
94
166
|
if Configs.subtreetype == "h":
|
95
167
|
Configs.votes = Configs.subtreesize
|
96
168
|
|
97
|
-
cmd = []
|
98
169
|
if Configs.similarityflag:
|
99
|
-
|
170
|
+
job_type = 'homology'
|
100
171
|
else:
|
101
|
-
if Configs.fragmentflag
|
102
|
-
|
172
|
+
if Configs.fragmentflag:
|
173
|
+
job_type = 'fragment_hamming'
|
103
174
|
else:
|
104
|
-
|
105
|
-
|
175
|
+
job_type = 'hamming'
|
176
|
+
binpath = os.path.join(Configs.hamming_distance_dir, job_type)
|
177
|
+
cmd = [binpath]
|
178
|
+
|
179
|
+
# Added @ 3.9.2025 by Chengze Shen
|
180
|
+
# - check if binpath is executable, since the compiled files use dynamic
|
181
|
+
# libraries.
|
182
|
+
# If works: should have return code 255
|
183
|
+
# If not: should have return code 1,
|
184
|
+
# recompile the binaries using cmake and make
|
185
|
+
ensureBinaryExecutable(binpath)
|
186
|
+
|
106
187
|
cmd.extend([aln_path, str(len(aln)), qaln_path, str(len(qaln)),
|
107
188
|
tmp_output, str(Configs.votes)])
|
108
|
-
|
189
|
+
job = GenericJob(cmd=cmd, job_type=job_type)
|
190
|
+
_ = job.run()
|
191
|
+
#os.system(' '.join(cmd))
|
109
192
|
|
110
193
|
# process closest leaves
|
111
194
|
unusable_queries = set()
|
@@ -326,7 +409,8 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
326
409
|
aln, qaln, cmdline_args, workdir, qname_map, qname_map_rev,
|
327
410
|
pool, lock, dry_run=False):
|
328
411
|
t0 = time.perf_counter()
|
329
|
-
_LOG.info(
|
412
|
+
_LOG.info("Performing placement on each subtree with {}...".format(
|
413
|
+
Configs.placement_method))
|
330
414
|
|
331
415
|
if dry_run:
|
332
416
|
return dict()
|
@@ -366,10 +450,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
366
450
|
if '' in tmp_leaf_dict:
|
367
451
|
del tmp_leaf_dict['']
|
368
452
|
tmp_ref_dict = {label : aln[label] for label in tmp_leaf_dict.keys()}
|
369
|
-
|
370
|
-
# - wrote converted name for query sequences and convert them
|
371
|
-
# - back when placements are done
|
372
|
-
tmp_q_dict = {qname_map[name] : qaln[name] for name in query_list}
|
453
|
+
tmp_q_dict = {name : qaln[name] for name in query_list}
|
373
454
|
write_fasta(tmp_aln, tmp_ref_dict)
|
374
455
|
write_fasta(tmp_qaln, tmp_q_dict)
|
375
456
|
|
@@ -385,7 +466,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
385
466
|
job = EPAngJob(path=Configs.epang_path,
|
386
467
|
info_path=Configs.info_path, tree_path=tmp_tree,
|
387
468
|
aln_path=tmp_aln, qaln_path=tmp_qaln,
|
388
|
-
outdir=subtree_dir, num_cpus=Configs.
|
469
|
+
outdir=subtree_dir, num_cpus=Configs.cpus_per_job)
|
389
470
|
jobs.append(job)
|
390
471
|
## for EPA-ng, ensure that outpath name is changed to the one we want
|
391
472
|
#_outpath = job.run(logging=f'subtree_{final_subtree_count}')
|
@@ -405,7 +486,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
405
486
|
job = PplacerTaxtasticJob(path=Configs.pplacer_path,
|
406
487
|
refpkg_dir=refpkg_dir,
|
407
488
|
#molecule=Configs.molecule, model=Configs.model,
|
408
|
-
outpath=tmp_output, num_cpus=Configs.
|
489
|
+
outpath=tmp_output, num_cpus=Configs.cpus_per_job,
|
409
490
|
qaln_path=tmp_qaln)
|
410
491
|
#tmp_output = job.run(logging=f'subtree_{final_subtree_count}')
|
411
492
|
jobs.append(job)
|
@@ -445,8 +526,16 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
445
526
|
|
446
527
|
for tmp_place in place_json["placements"]:
|
447
528
|
# convert qname back using qname_map_rev
|
448
|
-
|
449
|
-
|
529
|
+
tmp_name = tmp_place[tgt][0]
|
530
|
+
|
531
|
+
# >EPA-ng: tgt=="n" --> qname is string
|
532
|
+
if isinstance(tmp_name, str):
|
533
|
+
qname = qname_map_rev[tmp_name]
|
534
|
+
tmp_place[tgt][0] = qname
|
535
|
+
# >pplacer: tgt=="nm" --> qname is a list of two fields
|
536
|
+
elif isinstance(tmp_name, list):
|
537
|
+
qname = qname_map_rev[tmp_name[0]]
|
538
|
+
tmp_place[tgt][0][0] = qname
|
450
539
|
placed_query_list.append(qname)
|
451
540
|
|
452
541
|
#placed_query_list.append(tmp_place[tgt][0])
|
@@ -505,6 +594,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
|
|
505
594
|
_LOG.info('Time to place queries to subtrees: {} seconds'.format(t1 - t0))
|
506
595
|
return jplace
|
507
596
|
|
597
|
+
|
508
598
|
'''
|
509
599
|
Function to write a given jplace object to local output
|
510
600
|
'''
|
bscampp/jobs.py
CHANGED
@@ -112,7 +112,7 @@ class Job(object):
|
|
112
112
|
else:
|
113
113
|
_LOG.error(error_msg + '\nSTDOUT: ' + stdout +
|
114
114
|
'\nSTDERR: ' + stderr + logpath)
|
115
|
-
exit(
|
115
|
+
exit(self.returncode)
|
116
116
|
except Exception:
|
117
117
|
log_exception(_LOG)
|
118
118
|
|
@@ -123,6 +123,18 @@ class Job(object):
|
|
123
123
|
raise NotImplementedError(
|
124
124
|
'get_invocation() should be implemented by subclasses.')
|
125
125
|
|
126
|
+
'''
|
127
|
+
Generic job that runs the given command, represented as a list of strings
|
128
|
+
'''
|
129
|
+
class GenericJob(Job):
|
130
|
+
def __init__(self, cmd=[], job_type='external'):
|
131
|
+
Job.__init__(self)
|
132
|
+
self.job_type = job_type
|
133
|
+
self.cmd = cmd
|
134
|
+
|
135
|
+
def get_invocation(self):
|
136
|
+
return self.cmd, None
|
137
|
+
|
126
138
|
'''
|
127
139
|
A EPA-ng job that runs EPA-ng with given parameters
|
128
140
|
'''
|
bscampp/pipeline.py
CHANGED
@@ -180,8 +180,6 @@ def clean_temp_files():
|
|
180
180
|
_LOG.info(f'- Removed {temp}')
|
181
181
|
|
182
182
|
def parseArguments(dry_run=False, method="BSCAMPP"):
|
183
|
-
global _root_dir, main_config_path
|
184
|
-
|
185
183
|
default_outdir = f"{method.lower()}_output"
|
186
184
|
default_outname = f"{method.lower()}_result"
|
187
185
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: bscampp
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.7
|
4
4
|
Summary: BSCAMPP and SCAMPP - Scalable Phylogenetic Placement Tools
|
5
5
|
Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
|
6
6
|
License: MIT License
|
@@ -49,12 +49,13 @@ Requires-Dist: ConfigParser>=5.0.0
|
|
49
49
|
Requires-Dist: numpy>=1.21.6
|
50
50
|
Requires-Dist: treeswift>=1.1.45
|
51
51
|
Requires-Dist: taxtastic>=0.9.3
|
52
|
+
Dynamic: license-file
|
52
53
|
|
53
54
|
# BSCAMPP and SCAMPP - Two Scalable Phylogenetic Placement Methods and Frameworks
|
55
|
+
[](https://pypi.org/project/bscampp/#history)
|
54
56
|
[](https://pypi.org/project/bscampp/)
|
55
|
-
[](https://pypi.org/project/bscampp/#history)
|
56
57
|
[](https://github.com/ewedell/BSCAMPP/)
|
57
|
-
[](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
|
58
59
|
[](https://github.com/ewedell/BSCAMPP/blob/main/CHANGELOG.md)
|
59
60
|
|
60
61
|
**Table of Contents**
|
@@ -1,10 +1,10 @@
|
|
1
|
-
bscampp/__init__.py,sha256=
|
2
|
-
bscampp/configs.py,sha256=
|
1
|
+
bscampp/__init__.py,sha256=iWcT9i33I9mkOxFgfa7aRxjn_tRPYY7yW6zcuhbjTWk,2289
|
2
|
+
bscampp/configs.py,sha256=M53nndokxi3WczhpKGWpxOcI2nXIkgpQKhj5gYpee8w,6101
|
3
3
|
bscampp/default.config,sha256=CEfsUHBy--vwJhEcUuJ0btfuGQWb_lKMVWUIP9f5YGw,112
|
4
|
-
bscampp/functions.py,sha256=
|
4
|
+
bscampp/functions.py,sha256=ywv3-h1l81YayioSTmNiciBZnbi56zyIGc-5Ni-jXG4,23016
|
5
5
|
bscampp/init_configs.py,sha256=EA9sMN5jWj6zj2b-7tN19LhX2Ef61ByQLxQRLHAqLDM,3600
|
6
|
-
bscampp/jobs.py,sha256=
|
7
|
-
bscampp/pipeline.py,sha256=
|
6
|
+
bscampp/jobs.py,sha256=v7buZJs1AnNoXiILwu-W8fo3QjxAh3i9Mp7xfmlJvAY,7569
|
7
|
+
bscampp/pipeline.py,sha256=J-RQH54R27m6fhzIpGX0MJuE3ZFk5rcnsROpwC_n5CE,13960
|
8
8
|
bscampp/utils.py,sha256=-wns6FaWMKD2wVqjxdBQvjTdagTjywBIaGfqb2mupe4,30039
|
9
9
|
bscampp/tools/epa-ng,sha256=f3EVoZAAOXLN6l521qp-TrWDl5J2nqL3tGgjPaQE9WQ,3772096
|
10
10
|
bscampp/tools/pplacer,sha256=p0H4eo9uuiYoWS_kJbPfauOV99i7BXJdZSiwXIuLxTw,7834576
|
@@ -17,9 +17,9 @@ bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp,sha256=xCmyAT-OZJOD
|
|
17
17
|
bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp,sha256=eKxgODRlpf0hU84QjNhigvRhWCT9tiJZjA5oQFQ1bUk,7404
|
18
18
|
bscampp/tools/hamming_distance/src/homology.cpp,sha256=ZE0uXZWQ-cN4U1Wk5kUr_KKHgzsgA6Sno-IViRa4tmI,6053
|
19
19
|
bscampp/tools/hamming_distance/src/new_hamming.cpp,sha256=fBRm99RquBZgZjaLOn9xDI3cH9NchhrxKbL-11j8fmk,5342
|
20
|
-
bscampp-1.0.
|
21
|
-
bscampp-1.0.
|
22
|
-
bscampp-1.0.
|
23
|
-
bscampp-1.0.
|
24
|
-
bscampp-1.0.
|
25
|
-
bscampp-1.0.
|
20
|
+
bscampp-1.0.7.dist-info/licenses/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
|
21
|
+
bscampp-1.0.7.dist-info/METADATA,sha256=2Tk1eF72pNPIttqSsfwiNclB8H0bShbMhca0aAr8wKo,12602
|
22
|
+
bscampp-1.0.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
23
|
+
bscampp-1.0.7.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
|
24
|
+
bscampp-1.0.7.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
|
25
|
+
bscampp-1.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|