bscampp 1.0.1b0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bscampp-1.0.1b0 → bscampp-1.0.2}/CHANGELOG.md +7 -0
- {bscampp-1.0.1b0/bscampp.egg-info → bscampp-1.0.2}/PKG-INFO +30 -10
- {bscampp-1.0.1b0 → bscampp-1.0.2}/README.md +29 -9
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/__init__.py +1 -1
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/configs.py +1 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/functions.py +48 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/pipeline.py +100 -11
- {bscampp-1.0.1b0 → bscampp-1.0.2/bscampp.egg-info}/PKG-INFO +30 -10
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp.egg-info/entry_points.txt +2 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/pyproject.toml +2 -0
- bscampp-1.0.2/tests/test_dry_run.py +21 -0
- bscampp-1.0.1b0/tests/test_dry_run.py +0 -11
- {bscampp-1.0.1b0 → bscampp-1.0.2}/LICENSE +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/MANIFEST.in +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/default.config +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/init_configs.py +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/jobs.py +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/epa-ng +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/CMakeLists.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/fragment_hamming +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/hamming +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/homology +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/fragment_hamming.cpp +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/homology.cpp +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/new_hamming.cpp +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/pplacer +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/utils.py +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp.egg-info/SOURCES.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp.egg-info/dependency_links.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp.egg-info/requires.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp.egg-info/top_level.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/requirements.txt +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/run_bscampp.py +0 -0
- {bscampp-1.0.1b0 → bscampp-1.0.2}/setup.cfg +0 -0
@@ -1,3 +1,10 @@
|
|
1
|
+
# BSCAMPP v1.0.2
|
2
|
+
1. Added SCAMPP funtionality and its binary executables.
|
3
|
+
|
4
|
+
# BSCAMPP v1.0.1
|
5
|
+
1. Bumped version to full release.
|
6
|
+
2. Completed examples for display in `bscampp --help`.
|
7
|
+
|
1
8
|
# BSCAMPP v1.0.1b
|
2
9
|
1. Removed redundant codes and fixed missing variables.
|
3
10
|
2. Added badges for PyPI installation and current Python Build, etc.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: bscampp
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.2
|
4
4
|
Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
|
5
5
|
Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
|
6
6
|
License: MIT License
|
@@ -88,8 +88,12 @@ It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 vot
|
|
88
88
|
are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and 5 respectively (see [Usage](#usage) for more details
|
89
89
|
on customizing BSCAMPP).
|
90
90
|
|
91
|
+
#### SCAMPP
|
92
|
+
SCAMPP is also implemented in BSCAMPP. The user can invoke SCAMPP by running
|
93
|
+
`run_scampp.py` or `scampp` (if installed with PyPI) after installation.
|
94
|
+
|
91
95
|
# Installation
|
92
|
-
BSCAMPP was tested on **Python 3.
|
96
|
+
BSCAMPP was tested on **Python 3.8 to 3.12**. There are two ways to install and use BSCAMPP: (1) with PyPI, or
|
93
97
|
(2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP, please contact Eleanor Wedell
|
94
98
|
(ewedell@illinois.edu).
|
95
99
|
|
@@ -98,19 +102,25 @@ EPA-ng and/or pplacer are requirements to run BSCAMPP since BSCAMPP will use the
|
|
98
102
|
By default, BSCAMPP will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
|
99
103
|
We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
|
100
104
|
|
101
|
-
### (1) Install with `pip`
|
105
|
+
### (1) Install with `pip`
|
102
106
|
The easiest way to install BSCAMPP is to use `pip install`. This will also install all required Python packages.
|
103
107
|
|
104
108
|
```bash
|
105
109
|
# 1. install with pip (--user if no root access)
|
106
110
|
pip install bscampp [--user]
|
107
111
|
|
108
|
-
# 2.
|
112
|
+
# 2. Four binary executables will be installed. The first time
|
109
113
|
# running any will create a config file at
|
110
114
|
# ~/.bscampp/main.config that resolves the links to all
|
111
115
|
# external software (e.g., epa-ng, pplacer)
|
116
|
+
|
117
|
+
# ---- BSCAMPP functions
|
112
118
|
bscampp [-h] # or
|
113
119
|
run_bscampp.py [-h]
|
120
|
+
|
121
|
+
# ---- SCAMPP functions
|
122
|
+
scampp [-h] # or
|
123
|
+
run_scampp.py
|
114
124
|
```
|
115
125
|
|
116
126
|
### (2) Install from GitHub
|
@@ -160,7 +170,8 @@ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment
|
|
160
170
|
### (3) Using `pplacer` as the base placement method
|
161
171
|
```bash
|
162
172
|
run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
163
|
-
-a [reference alignment] -q [query sequence alignment]
|
173
|
+
-a [reference alignment] -q [query sequence alignment] \
|
174
|
+
--placement-method pplacer
|
164
175
|
```
|
165
176
|
|
166
177
|
### More comprehensive usage
|
@@ -221,14 +232,23 @@ run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
|
221
232
|
> Temporary file indexing. Default: 0
|
222
233
|
> --fragmentflag FRAGMENTFLAG
|
223
234
|
> If queries contains fragments. Default: True
|
235
|
+
> --subtreetype SUBTREETYPE
|
236
|
+
> (SCAMPP only) Options for collecting nodes for the
|
237
|
+
> subtree - d for edge weighted distances, n for node
|
238
|
+
> distances, h for Hamming distances. Default: d
|
224
239
|
> --keeptemp KEEPTEMP Boolean, True to keep all temporary files. Default:
|
225
240
|
False
|
226
241
|
```
|
227
242
|
|
228
243
|
|
229
244
|
# Example Code and Data
|
230
|
-
Example script and data are provided in this GitHub repository in `examples/`.
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
- `
|
245
|
+
Example script and data are provided in this GitHub repository in `examples/`.
|
246
|
+
The data is originally from the
|
247
|
+
[RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
|
248
|
+
* `examples/run_bscampp.sh`: contains a simple script to test BSCAMPP with
|
249
|
+
`epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement
|
250
|
+
tree. The info file is from RAxML-ng when running `epa-ng`, and from
|
251
|
+
FastTree-2 when running `pplacer`.
|
252
|
+
- `run_bscampp.sh` will invoke BSCAMPP with `epa-ng`.
|
253
|
+
- `run_bscampp.sh pplacer` will invoke BSCAMPP with `pplacer`.
|
254
|
+
* `examples/run_scampp.sh`: the same test script but running SCAMPP.
|
@@ -36,8 +36,12 @@ It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 vot
|
|
36
36
|
are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and 5 respectively (see [Usage](#usage) for more details
|
37
37
|
on customizing BSCAMPP).
|
38
38
|
|
39
|
+
#### SCAMPP
|
40
|
+
SCAMPP is also implemented in BSCAMPP. The user can invoke SCAMPP by running
|
41
|
+
`run_scampp.py` or `scampp` (if installed with PyPI) after installation.
|
42
|
+
|
39
43
|
# Installation
|
40
|
-
BSCAMPP was tested on **Python 3.
|
44
|
+
BSCAMPP was tested on **Python 3.8 to 3.12**. There are two ways to install and use BSCAMPP: (1) with PyPI, or
|
41
45
|
(2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP, please contact Eleanor Wedell
|
42
46
|
(ewedell@illinois.edu).
|
43
47
|
|
@@ -46,19 +50,25 @@ EPA-ng and/or pplacer are requirements to run BSCAMPP since BSCAMPP will use the
|
|
46
50
|
By default, BSCAMPP will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
|
47
51
|
We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
|
48
52
|
|
49
|
-
### (1) Install with `pip`
|
53
|
+
### (1) Install with `pip`
|
50
54
|
The easiest way to install BSCAMPP is to use `pip install`. This will also install all required Python packages.
|
51
55
|
|
52
56
|
```bash
|
53
57
|
# 1. install with pip (--user if no root access)
|
54
58
|
pip install bscampp [--user]
|
55
59
|
|
56
|
-
# 2.
|
60
|
+
# 2. Four binary executables will be installed. The first time
|
57
61
|
# running any will create a config file at
|
58
62
|
# ~/.bscampp/main.config that resolves the links to all
|
59
63
|
# external software (e.g., epa-ng, pplacer)
|
64
|
+
|
65
|
+
# ---- BSCAMPP functions
|
60
66
|
bscampp [-h] # or
|
61
67
|
run_bscampp.py [-h]
|
68
|
+
|
69
|
+
# ---- SCAMPP functions
|
70
|
+
scampp [-h] # or
|
71
|
+
run_scampp.py
|
62
72
|
```
|
63
73
|
|
64
74
|
### (2) Install from GitHub
|
@@ -108,7 +118,8 @@ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment
|
|
108
118
|
### (3) Using `pplacer` as the base placement method
|
109
119
|
```bash
|
110
120
|
run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
111
|
-
-a [reference alignment] -q [query sequence alignment]
|
121
|
+
-a [reference alignment] -q [query sequence alignment] \
|
122
|
+
--placement-method pplacer
|
112
123
|
```
|
113
124
|
|
114
125
|
### More comprehensive usage
|
@@ -169,14 +180,23 @@ run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
|
169
180
|
> Temporary file indexing. Default: 0
|
170
181
|
> --fragmentflag FRAGMENTFLAG
|
171
182
|
> If queries contains fragments. Default: True
|
183
|
+
> --subtreetype SUBTREETYPE
|
184
|
+
> (SCAMPP only) Options for collecting nodes for the
|
185
|
+
> subtree - d for edge weighted distances, n for node
|
186
|
+
> distances, h for Hamming distances. Default: d
|
172
187
|
> --keeptemp KEEPTEMP Boolean, True to keep all temporary files. Default:
|
173
188
|
False
|
174
189
|
```
|
175
190
|
|
176
191
|
|
177
192
|
# Example Code and Data
|
178
|
-
Example script and data are provided in this GitHub repository in `examples/`.
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
- `
|
193
|
+
Example script and data are provided in this GitHub repository in `examples/`.
|
194
|
+
The data is originally from the
|
195
|
+
[RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
|
196
|
+
* `examples/run_bscampp.sh`: contains a simple script to test BSCAMPP with
|
197
|
+
`epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement
|
198
|
+
tree. The info file is from RAxML-ng when running `epa-ng`, and from
|
199
|
+
FastTree-2 when running `pplacer`.
|
200
|
+
- `run_bscampp.sh` will invoke BSCAMPP with `epa-ng`.
|
201
|
+
- `run_bscampp.sh pplacer` will invoke BSCAMPP with `pplacer`.
|
202
|
+
* `examples/run_scampp.sh`: the same test script but running SCAMPP.
|
@@ -71,6 +71,9 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
|
|
71
71
|
query_votes_dict = dict()
|
72
72
|
query_top_vote_dict = dict()
|
73
73
|
tmp_output = os.path.join(workdir, 'closest.txt')
|
74
|
+
|
75
|
+
if Configs.subtreetype == "h":
|
76
|
+
Configs.votes = Configs.subtreesize
|
74
77
|
|
75
78
|
cmd = []
|
76
79
|
if Configs.similarityflag:
|
@@ -226,6 +229,51 @@ def assignQueriesToSubtrees(query_votes_dict, query_top_vote_dict,
|
|
226
229
|
_LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
|
227
230
|
return new_subtree_dict, placed_query_list
|
228
231
|
|
232
|
+
|
233
|
+
'''
|
234
|
+
Function to assign queries to subtrees as used in SCAMPP
|
235
|
+
(subtrees are built using the nearest leaf as the seed sequence)
|
236
|
+
'''
|
237
|
+
def buildQuerySubtrees(query_votes_dict, query_top_vote_dict,
|
238
|
+
tree, leaf_dict, dry_run=False):
|
239
|
+
t0 = time.perf_counter()
|
240
|
+
|
241
|
+
if dry_run:
|
242
|
+
return dict(), []
|
243
|
+
|
244
|
+
# (1) go over the query seed sequences to see if any queries use
|
245
|
+
# the same seed sequence (i.e. subtree)
|
246
|
+
seed_queries = dict()
|
247
|
+
for query, closest_leaf in query_top_vote_dict.items():
|
248
|
+
if closest_leaf not in seed_queries:
|
249
|
+
seed_queries[closest_leaf] = [query]
|
250
|
+
else:
|
251
|
+
seed_queries[closest_leaf].append(query)
|
252
|
+
|
253
|
+
new_subtree_dict = dict()
|
254
|
+
# assign queries to subtrees, and remove them from the pool
|
255
|
+
# repeat until all queries are assigned
|
256
|
+
for seed_label, queries in seed_queries.items():
|
257
|
+
node_y = leaf_dict[seed_label]
|
258
|
+
# extract [subtreesize] leaves
|
259
|
+
if Configs.subtreetype == "h":
|
260
|
+
labels = query_votes_dict[queries[0]]
|
261
|
+
elif Configs.subtreetype == "n":
|
262
|
+
labels = utils.subtree_nodes(tree, node_y, Configs.subtreesize)
|
263
|
+
else:
|
264
|
+
labels = utils.subtree_nodes_with_edge_length(tree, node_y,
|
265
|
+
Configs.subtreesize)
|
266
|
+
subtree = tree.extract_tree_with(labels)
|
267
|
+
new_subtree_dict[subtree] = queries
|
268
|
+
|
269
|
+
|
270
|
+
placed_query_list = []
|
271
|
+
|
272
|
+
t1 = time.perf_counter()
|
273
|
+
_LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
|
274
|
+
return new_subtree_dict, placed_query_list
|
275
|
+
|
276
|
+
|
229
277
|
'''
|
230
278
|
Helper function to run a single placement task. Designed to use with
|
231
279
|
multiprocessing
|
@@ -89,6 +89,79 @@ def bscampp_pipeline(*args, **kwargs):
|
|
89
89
|
else:
|
90
90
|
return False
|
91
91
|
|
92
|
+
|
93
|
+
# main pipeline for SCAMPP
|
94
|
+
def scampp_pipeline(*args, **kwargs):
|
95
|
+
t0 = time.perf_counter()
|
96
|
+
m = Manager(); lock = m.Lock()
|
97
|
+
|
98
|
+
# set up a dry run if specified
|
99
|
+
dry_run = False
|
100
|
+
if 'dry_run' in kwargs and isinstance(kwargs['dry_run'], bool):
|
101
|
+
dry_run = kwargs['dry_run']
|
102
|
+
|
103
|
+
# parse command line arguments and build configurations
|
104
|
+
parser, cmdline_args = parseArguments(dry_run=dry_run, method="SCAMPP")
|
105
|
+
|
106
|
+
# initialize multiprocessing (if needed)
|
107
|
+
_LOG.warning('Initializing ProcessPoolExecutor...')
|
108
|
+
pool = ProcessPoolExecutor(Configs.num_cpus, initializer=initial_pool,
|
109
|
+
initargs=(parser, cmdline_args,))
|
110
|
+
|
111
|
+
# (0) temporary files wrote to here
|
112
|
+
if not dry_run:
|
113
|
+
workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
|
114
|
+
try:
|
115
|
+
if not os.path.isdir(workdir):
|
116
|
+
os.makedirs(workdir)
|
117
|
+
except OSError:
|
118
|
+
log_exception(_LOG)
|
119
|
+
else:
|
120
|
+
workdir = os.getcwd()
|
121
|
+
|
122
|
+
# (1) read in tree, alignment, and separate reference sequences from
|
123
|
+
# query sequences
|
124
|
+
tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir,
|
125
|
+
dry_run=dry_run)
|
126
|
+
|
127
|
+
# (2) compute closest leaves for all query sequences
|
128
|
+
query_votes_dict, query_top_vote_dict = getClosestLeaves(
|
129
|
+
aln_path, qaln_path, aln, qaln, workdir, dry_run=dry_run)
|
130
|
+
|
131
|
+
# (3) first assign each query to the subtree built using the closest
|
132
|
+
# leaf as the seed sequence
|
133
|
+
new_subtree_dict, placed_query_list = buildQuerySubtrees(
|
134
|
+
query_votes_dict, query_top_vote_dict, tree, leaf_dict,
|
135
|
+
dry_run=dry_run)
|
136
|
+
|
137
|
+
# (4) perform placement for each subtree
|
138
|
+
output_jplace = placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict,
|
139
|
+
placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock,
|
140
|
+
dry_run=dry_run)
|
141
|
+
|
142
|
+
# (5) write the output jplace to local
|
143
|
+
writeOutputJplace(output_jplace, dry_run=dry_run)
|
144
|
+
|
145
|
+
# shutdown pool
|
146
|
+
_LOG.warning('Shutting down ProcessPoolExecutor...')
|
147
|
+
pool.shutdown()
|
148
|
+
_LOG.warning('ProcessPoolExecutor shut down.')
|
149
|
+
|
150
|
+
# clean up temp files if not keeping
|
151
|
+
if not Configs.keeptemp:
|
152
|
+
_LOG.info('Removing temporary files...')
|
153
|
+
clean_temp_files()
|
154
|
+
|
155
|
+
# stop SCAMPP
|
156
|
+
send = time.perf_counter()
|
157
|
+
_LOG.info('SCAMPP completed in {} seconds...'.format(send - t0))
|
158
|
+
|
159
|
+
if dry_run:
|
160
|
+
return True
|
161
|
+
else:
|
162
|
+
return False
|
163
|
+
|
164
|
+
|
92
165
|
def clean_temp_files():
|
93
166
|
# all temporary files/directories to remove
|
94
167
|
temp_items = [f'tmp{Configs.tmpfilenbr}']
|
@@ -102,10 +175,14 @@ def clean_temp_files():
|
|
102
175
|
continue
|
103
176
|
_LOG.info(f'- Removed {temp}')
|
104
177
|
|
105
|
-
def parseArguments(dry_run=False):
|
178
|
+
def parseArguments(dry_run=False, method="BSCAMPP"):
|
106
179
|
global _root_dir, main_config_path
|
107
180
|
|
108
|
-
|
181
|
+
default_outdir = f"{method.lower()}_output"
|
182
|
+
default_outname = f"{method.lower()}_result"
|
183
|
+
|
184
|
+
parser = _init_parser(default_outdir=default_outdir,
|
185
|
+
default_outname=default_outname)
|
109
186
|
cmdline_args = sys.argv[1:]
|
110
187
|
|
111
188
|
if dry_run:
|
@@ -114,22 +191,27 @@ def parseArguments(dry_run=False):
|
|
114
191
|
|
115
192
|
# build config
|
116
193
|
buildConfigs(parser, cmdline_args)
|
117
|
-
_LOG.info('
|
194
|
+
_LOG.info('{} is running with: {}'.format(method,
|
118
195
|
' '.join(cmdline_args)))
|
119
196
|
getConfigs()
|
120
197
|
|
121
198
|
return parser, cmdline_args
|
122
199
|
|
123
|
-
def _init_parser(
|
200
|
+
def _init_parser(default_outdir="bscampp_output",
|
201
|
+
default_outname="bscampp_result"):
|
124
202
|
# example usage
|
125
203
|
example_usages = '''Example usages:
|
126
|
-
>
|
127
|
-
%(prog)s -i raxml.
|
204
|
+
> (1) Default
|
205
|
+
%(prog)s -i raxml.bestModel -t reference.tre -a alignment.fa
|
206
|
+
> (2) Separate alignment file for query sequences
|
207
|
+
%(prog)s -i raxml.bestModel -t reference.tre -a reference.fa -q query.fa
|
208
|
+
> (3) Use pplacer instead of EPA-ng as base method (need RAxML-ng info or FastTree log file)
|
209
|
+
%(prog)s -i fasttree.log -t reference.tre -a alignment.fa --placement-method pplacer
|
128
210
|
'''
|
129
211
|
|
130
212
|
parser = ArgumentParser(
|
131
213
|
description=(
|
132
|
-
"This program runs BSCAMPP, a scalable phylogenetic "
|
214
|
+
"This program runs BSCAMPP/SCAMPP, a scalable phylogenetic "
|
133
215
|
"placement framework that scales EPA-ng/pplacer "
|
134
216
|
"to very large tree placement."
|
135
217
|
),
|
@@ -156,7 +238,7 @@ def _init_parser():
|
|
156
238
|
# basic group
|
157
239
|
basic_group = parser.add_argument_group(
|
158
240
|
"Basic parameters".upper(),
|
159
|
-
"These are the basic parameters for BSCAMPP.")
|
241
|
+
"These are the basic parameters for BSCAMPP/SCAMPP.")
|
160
242
|
parser.groups['basic_group'] = basic_group
|
161
243
|
|
162
244
|
basic_group.add_argument('--placement-method', type=str,
|
@@ -185,10 +267,10 @@ def _init_parser():
|
|
185
267
|
required=False, default=None)
|
186
268
|
basic_group.add_argument("-d", "--outdir", type=str,
|
187
269
|
help="Directory path for output. Default: bscampp_output/",
|
188
|
-
required=False, default=
|
270
|
+
required=False, default=default_outdir)
|
189
271
|
basic_group.add_argument("-o", "--output", type=str, dest="outname",
|
190
272
|
help="Output file name. Default: bscampp_result.jplace",
|
191
|
-
required=False, default="
|
273
|
+
required=False, default=f"{default_outname}.jplace")
|
192
274
|
basic_group.add_argument("--threads", "--num-cpus", type=int,
|
193
275
|
dest="num_cpus",
|
194
276
|
help="Number of cores for parallelization, default: -1 (all)",
|
@@ -209,7 +291,8 @@ def _init_parser():
|
|
209
291
|
help="Integer size of the subtree. Default: 2000",
|
210
292
|
required=False, default=2000)
|
211
293
|
advance_group.add_argument("-V", "--votes", type=int,
|
212
|
-
help="Number of votes per
|
294
|
+
help="This is only used for BSCAMPP! Number of votes per "
|
295
|
+
"query sequence. Default: 5",
|
213
296
|
required=False, default=5)
|
214
297
|
advance_group.add_argument("--similarityflag", type=str2bool,
|
215
298
|
help="Boolean, True if maximizing sequence similarity "
|
@@ -228,6 +311,12 @@ def _init_parser():
|
|
228
311
|
misc_group.add_argument("--fragmentflag", type=str2bool,
|
229
312
|
help="If queries contains fragments. Default: True",
|
230
313
|
required=False, default=True)
|
314
|
+
misc_group.add_argument("--subtreetype", type=str,
|
315
|
+
help="(SCAMPP only) Options for collecting "
|
316
|
+
"nodes for the subtree - d for edge weighted "
|
317
|
+
"distances, n for node distances, h for Hamming "
|
318
|
+
"distances. Default: d",
|
319
|
+
required=False, default='d')
|
231
320
|
misc_group.add_argument("--keeptemp", type=str2bool,
|
232
321
|
help="Boolean, True to keep all temporary files. "
|
233
322
|
"Default: False",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: bscampp
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.2
|
4
4
|
Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
|
5
5
|
Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
|
6
6
|
License: MIT License
|
@@ -88,8 +88,12 @@ It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 vot
|
|
88
88
|
are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and 5 respectively (see [Usage](#usage) for more details
|
89
89
|
on customizing BSCAMPP).
|
90
90
|
|
91
|
+
#### SCAMPP
|
92
|
+
SCAMPP is also implemented in BSCAMPP. The user can invoke SCAMPP by running
|
93
|
+
`run_scampp.py` or `scampp` (if installed with PyPI) after installation.
|
94
|
+
|
91
95
|
# Installation
|
92
|
-
BSCAMPP was tested on **Python 3.
|
96
|
+
BSCAMPP was tested on **Python 3.8 to 3.12**. There are two ways to install and use BSCAMPP: (1) with PyPI, or
|
93
97
|
(2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP, please contact Eleanor Wedell
|
94
98
|
(ewedell@illinois.edu).
|
95
99
|
|
@@ -98,19 +102,25 @@ EPA-ng and/or pplacer are requirements to run BSCAMPP since BSCAMPP will use the
|
|
98
102
|
By default, BSCAMPP will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
|
99
103
|
We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
|
100
104
|
|
101
|
-
### (1) Install with `pip`
|
105
|
+
### (1) Install with `pip`
|
102
106
|
The easiest way to install BSCAMPP is to use `pip install`. This will also install all required Python packages.
|
103
107
|
|
104
108
|
```bash
|
105
109
|
# 1. install with pip (--user if no root access)
|
106
110
|
pip install bscampp [--user]
|
107
111
|
|
108
|
-
# 2.
|
112
|
+
# 2. Four binary executables will be installed. The first time
|
109
113
|
# running any will create a config file at
|
110
114
|
# ~/.bscampp/main.config that resolves the links to all
|
111
115
|
# external software (e.g., epa-ng, pplacer)
|
116
|
+
|
117
|
+
# ---- BSCAMPP functions
|
112
118
|
bscampp [-h] # or
|
113
119
|
run_bscampp.py [-h]
|
120
|
+
|
121
|
+
# ---- SCAMPP functions
|
122
|
+
scampp [-h] # or
|
123
|
+
run_scampp.py
|
114
124
|
```
|
115
125
|
|
116
126
|
### (2) Install from GitHub
|
@@ -160,7 +170,8 @@ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment
|
|
160
170
|
### (3) Using `pplacer` as the base placement method
|
161
171
|
```bash
|
162
172
|
run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
163
|
-
-a [reference alignment] -q [query sequence alignment]
|
173
|
+
-a [reference alignment] -q [query sequence alignment] \
|
174
|
+
--placement-method pplacer
|
164
175
|
```
|
165
176
|
|
166
177
|
### More comprehensive usage
|
@@ -221,14 +232,23 @@ run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
|
|
221
232
|
> Temporary file indexing. Default: 0
|
222
233
|
> --fragmentflag FRAGMENTFLAG
|
223
234
|
> If queries contains fragments. Default: True
|
235
|
+
> --subtreetype SUBTREETYPE
|
236
|
+
> (SCAMPP only) Options for collecting nodes for the
|
237
|
+
> subtree - d for edge weighted distances, n for node
|
238
|
+
> distances, h for Hamming distances. Default: d
|
224
239
|
> --keeptemp KEEPTEMP Boolean, True to keep all temporary files. Default:
|
225
240
|
False
|
226
241
|
```
|
227
242
|
|
228
243
|
|
229
244
|
# Example Code and Data
|
230
|
-
Example script and data are provided in this GitHub repository in `examples/`.
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
- `
|
245
|
+
Example script and data are provided in this GitHub repository in `examples/`.
|
246
|
+
The data is originally from the
|
247
|
+
[RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
|
248
|
+
* `examples/run_bscampp.sh`: contains a simple script to test BSCAMPP with
|
249
|
+
`epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement
|
250
|
+
tree. The info file is from RAxML-ng when running `epa-ng`, and from
|
251
|
+
FastTree-2 when running `pplacer`.
|
252
|
+
- `run_bscampp.sh` will invoke BSCAMPP with `epa-ng`.
|
253
|
+
- `run_bscampp.sh pplacer` will invoke BSCAMPP with `pplacer`.
|
254
|
+
* `examples/run_scampp.sh`: the same test script but running SCAMPP.
|
@@ -37,6 +37,8 @@ classifiers = [
|
|
37
37
|
[project.scripts]
|
38
38
|
bscampp = "bscampp.pipeline:bscampp_pipeline"
|
39
39
|
"run_bscampp.py" = "bscampp.pipeline:bscampp_pipeline"
|
40
|
+
scampp = "bscampp.pipeline:scampp_pipeline"
|
41
|
+
"run_scampp.py" = "bscampp.pipeline:scampp_pipeline"
|
40
42
|
|
41
43
|
[project.urls]
|
42
44
|
Homepage = "https://github.com/ewedell/BSCAMPP"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# tests/test_dry_run.py
|
2
|
+
import pytest, os
|
3
|
+
from bscampp.pipeline import bscampp_pipeline, scampp_pipeline
|
4
|
+
|
5
|
+
# test BSCAMPP
|
6
|
+
def test_bscampp_pipeline():
|
7
|
+
res = bscampp_pipeline(dry_run=True)
|
8
|
+
assert res == True
|
9
|
+
|
10
|
+
# remove bscampp_output that's created
|
11
|
+
if os.path.isdir('bscampp_output'):
|
12
|
+
os.rmdir('bscampp_output')
|
13
|
+
|
14
|
+
# test SCAMPP (almost the same as BSCAMPP)
|
15
|
+
def test_scampp_pipeline():
|
16
|
+
res = scampp_pipeline(dry_run=True)
|
17
|
+
assert res == True
|
18
|
+
|
19
|
+
# remove scampp_output that's created
|
20
|
+
if os.path.isdir('scampp_output'):
|
21
|
+
os.rmdir('scampp_output')
|
@@ -1,11 +0,0 @@
|
|
1
|
-
# tests/test_dry_run.py
|
2
|
-
import pytest, os
|
3
|
-
from bscampp.pipeline import bscampp_pipeline
|
4
|
-
|
5
|
-
def test_bscampp_pipeline():
|
6
|
-
res = bscampp_pipeline(dry_run=True)
|
7
|
-
assert res == True
|
8
|
-
|
9
|
-
# remove bscampp_output that's created
|
10
|
-
if os.path.isdir('bscampp_output'):
|
11
|
-
os.rmdir('bscampp_output')
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp
RENAMED
File without changes
|
{bscampp-1.0.1b0 → bscampp-1.0.2}/bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|