bscampp 1.0.1b0__py3-none-any.whl → 1.0.2b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bscampp/__init__.py CHANGED
@@ -12,7 +12,7 @@ import logging, os
12
12
  # not really needed for BSCAMPP but safe to update here
13
13
  os.sys.setrecursionlimit(1000000)
14
14
 
15
- __version__ = "1.0.1b"
15
+ __version__ = "1.0.2b"
16
16
  _INSTALL_PATH = __path__[0]
17
17
 
18
18
  # global variables to store all loggers
bscampp/configs.py CHANGED
@@ -50,6 +50,7 @@ class Configs:
50
50
  # miscellaneous
51
51
  tmpfilenbr = 0
52
52
  fragmentflag = True
53
+ subtreetype = 'd'
53
54
 
54
55
  # check if the given configuration is valid to add
55
56
  def set_valid_configuration(name, conf):
bscampp/functions.py CHANGED
@@ -71,6 +71,9 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
71
71
  query_votes_dict = dict()
72
72
  query_top_vote_dict = dict()
73
73
  tmp_output = os.path.join(workdir, 'closest.txt')
74
+
75
+ if Configs.subtreetype == "h":
76
+ Configs.votes = Configs.subtreesize
74
77
 
75
78
  cmd = []
76
79
  if Configs.similarityflag:
@@ -226,6 +229,56 @@ def assignQueriesToSubtrees(query_votes_dict, query_top_vote_dict,
226
229
  _LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
227
230
  return new_subtree_dict, placed_query_list
228
231
 
232
+
233
+ '''
234
+ Function to assign queries to subtrees as used in SCAMPP
235
+ (subtrees are built using the nearest leaf as the seed sequence)
236
+ '''
237
+ def buildQuerySubtrees(query_votes_dict, query_top_vote_dict,
238
+ tree, leaf_dict, dry_run=False):
239
+ t0 = time.perf_counter()
240
+ _LOG.info('(SCAMPP) Building query subtree for placement...')
241
+
242
+ if dry_run:
243
+ return dict(), []
244
+
245
+ # (1) go over the query seed sequences to see if any queries use
246
+ # the same seed sequence (i.e. subtree)
247
+ seed_queries = dict()
248
+ for query, closest_leaf in query_top_vote_dict.items():
249
+ if closest_leaf not in seed_queries:
250
+ seed_queries[closest_leaf] = [query]
251
+ else:
252
+ seed_queries[closest_leaf].append(query)
253
+
254
+ new_subtree_dict = dict()
255
+ # assign queries to subtrees, and remove them from the pool
256
+ # repeat until all queries are assigned
257
+ _total = 0
258
+ for seed_label, queries in seed_queries.items():
259
+ ####### additional logging for tracking progress
260
+ _total += 1
261
+ if _total % 1000 == 0 or _total == len(seed_queries):
262
+ _LOG.info(f"- Built {_total}/{len(seed_queries)} subtrees")
263
+
264
+ node_y = leaf_dict[seed_label]
265
+ # extract [subtreesize] leaves
266
+ if Configs.subtreetype == "h":
267
+ labels = query_votes_dict[queries[0]]
268
+ elif Configs.subtreetype == "n":
269
+ labels = utils.subtree_nodes(tree, node_y, Configs.subtreesize)
270
+ else:
271
+ labels = utils.subtree_nodes_with_edge_length(tree, node_y,
272
+ Configs.subtreesize)
273
+ subtree = tree.extract_tree_with(labels)
274
+ new_subtree_dict[subtree] = queries
275
+
276
+ placed_query_list = []
277
+
278
+ t1 = time.perf_counter()
279
+ _LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
280
+ return new_subtree_dict, placed_query_list
281
+
229
282
  '''
230
283
  Helper function to run a single placement task. Designed to use with
231
284
  multiprocessing
@@ -263,12 +316,16 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
263
316
  if len(query_list) == 0:
264
317
  continue
265
318
  final_subtree_count += 1
319
+
320
+ subtree_dir = os.path.join(workdir, f'subtree_{final_subtree_count}')
321
+ if not os.path.isdir(subtree_dir):
322
+ os.makedirs(subtree_dir)
266
323
 
267
324
  # name all temporary output files
268
- tmp_tree = os.path.join(workdir, 'tree')
269
- tmp_aln = os.path.join(workdir, f'subtree_{final_subtree_count}_aln.fa')
270
- tmp_qaln = os.path.join(workdir, f'subtree_{final_subtree_count}_qaln.fa')
271
- tmp_output = os.path.join(workdir,
325
+ tmp_tree = os.path.join(subtree_dir, 'tree')
326
+ tmp_aln = os.path.join(subtree_dir, f'subtree_{final_subtree_count}_aln.fa')
327
+ tmp_qaln = os.path.join(subtree_dir, f'subtree_{final_subtree_count}_qaln.fa')
328
+ tmp_output = os.path.join(subtree_dir,
272
329
  'subtree_{}_{}.jplace'.format(
273
330
  final_subtree_count, Configs.placement_method))
274
331
 
@@ -292,13 +349,13 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
292
349
  job = EPAngJob(path=Configs.epang_path,
293
350
  info_path=Configs.info_path, tree_path=tmp_tree,
294
351
  aln_path=tmp_aln, qaln_path=tmp_qaln,
295
- outdir=workdir, num_cpus=Configs.num_cpus)
352
+ outdir=subtree_dir, num_cpus=Configs.num_cpus)
296
353
  # for EPA-ng, ensure that outpath name is changed to the one we want
297
- _outpath = job.run()
354
+ _outpath = job.run(logging=f'subtree_{final_subtree_count}')
298
355
  os.system('mv {} {}'.format(_outpath, tmp_output))
299
356
  elif Configs.placement_method == 'pplacer':
300
357
  # build ref_pkg with info and tmp_tree and tmp_aln
301
- refpkg_dir = os.path.join(workdir,
358
+ refpkg_dir = os.path.join(subtree_dir,
302
359
  f'subtree_{final_subtree_count}.refpkg')
303
360
  taxit_job = TaxtasticJob(path=Configs.taxit_path,
304
361
  outdir=refpkg_dir, name=f'subtree_{final_subtree_count}',
@@ -311,7 +368,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
311
368
  refpkg_dir=refpkg_dir, model=Configs.model,
312
369
  outpath=tmp_output, num_cpus=Configs.num_cpus,
313
370
  qaln_path=tmp_qaln)
314
- tmp_output = job.run()
371
+ tmp_output = job.run(logging=f'subtree_{final_subtree_count}')
315
372
  else:
316
373
  raise ValueError(
317
374
  f"Placement method {Configs.placement_method} not recognized")
bscampp/jobs.py CHANGED
@@ -3,7 +3,7 @@ from subprocess import Popen
3
3
  from abc import abstractmethod
4
4
 
5
5
  from bscampp import get_logger, log_exception
6
- from bscampp.configs import Configs
6
+ #from bscampp.configs import Configs
7
7
 
8
8
  _LOG = get_logger(__name__)
9
9
 
@@ -25,7 +25,7 @@ class Job(object):
25
25
  return self.pid
26
26
 
27
27
  # run the job with given invocation and raise errors when encountered
28
- def run(self, stdin="", lock=None, logging=False, shell=False):
28
+ def run(self, stdin="", lock=None, logging=None, shell=False):
29
29
  try:
30
30
  cmd, outpath = self.get_invocation()
31
31
  _LOG.debug(f'Running job_type: {self.job_type}, output: {outpath}')
@@ -57,18 +57,22 @@ class Job(object):
57
57
  # logging to local or to PIPE
58
58
  stderr, stdout = '', ''
59
59
  scmd = ' '.join(cmd)
60
- if logging:
60
+ if logging != None:
61
61
  logpath = os.path.join(
62
- os.path.dirname(outpath), 'f{self.job_type}.txt')
62
+ os.path.dirname(outpath),
63
+ f'{logging}_{self.job_type}.txt')
63
64
  outlogging = open(logpath, 'w', 1)
64
65
 
65
66
  # TODO: may need to deal with piping in the future, for now
66
67
  # it is not needed
67
68
  p = Popen(cmd, text=True, bufsize=1,
68
69
  stdin=subprocess.PIPE,
69
- stdout=outlogging, stderr=subprocess.PIPE)
70
+ stdout=outlogging, stderr=outlogging)
70
71
  self.pid = p.pid
71
72
  stdout, stderr = p.communicate(input=stdin)
73
+ # stdout and stderr are both written to outlogging
74
+ # hence, assign them to be empty strings
75
+ stdout, stderr = '', ''
72
76
  outlogging.close()
73
77
  else:
74
78
  p = Popen(cmd, text=True, bufsize=1,
@@ -92,16 +96,22 @@ class Job(object):
92
96
  else:
93
97
  error_msg = ' '.join([f'Error occurred running {self.job_type}.',
94
98
  f'returncode: {self.returncode}'])
99
+ if logging != None:
100
+ logpath = '\nLOGPATH: ' + os.path.join(
101
+ os.path.dirname(outpath),
102
+ f'{logging}_{self.job_type}.txt')
103
+ else:
104
+ logpath = ''
95
105
  if lock:
96
106
  try:
97
107
  lock.acquire()
98
108
  _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
99
- '\nSTDERR: ' + stderr)
109
+ '\nSTDERR: ' + stderr + logpath)
100
110
  finally:
101
111
  lock.release()
102
112
  else:
103
113
  _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
104
- '\nSTDERR: ' + stderr)
114
+ '\nSTDERR: ' + stderr + logpath)
105
115
  exit(1)
106
116
  except Exception:
107
117
  log_exception(_LOG)
@@ -177,7 +187,7 @@ A pplacer job that uses taxtastic refpkg to place sequences
177
187
  class PplacerTaxtasticJob(Job):
178
188
  def __init__(self, **kwargs):
179
189
  Job.__init__(self)
180
- self.job_type = 'pplacer-taxtastic'
190
+ self.job_type = 'pplacer'
181
191
 
182
192
  self.path = ''
183
193
  self.refpkg_dir = ''
bscampp/pipeline.py CHANGED
@@ -89,6 +89,79 @@ def bscampp_pipeline(*args, **kwargs):
89
89
  else:
90
90
  return False
91
91
 
92
+
93
+ # main pipeline for SCAMPP
94
+ def scampp_pipeline(*args, **kwargs):
95
+ t0 = time.perf_counter()
96
+ m = Manager(); lock = m.Lock()
97
+
98
+ # set up a dry run if specified
99
+ dry_run = False
100
+ if 'dry_run' in kwargs and isinstance(kwargs['dry_run'], bool):
101
+ dry_run = kwargs['dry_run']
102
+
103
+ # parse command line arguments and build configurations
104
+ parser, cmdline_args = parseArguments(dry_run=dry_run, method="SCAMPP")
105
+
106
+ # initialize multiprocessing (if needed)
107
+ _LOG.warning('Initializing ProcessPoolExecutor...')
108
+ pool = ProcessPoolExecutor(Configs.num_cpus, initializer=initial_pool,
109
+ initargs=(parser, cmdline_args,))
110
+
111
+ # (0) temporary files wrote to here
112
+ if not dry_run:
113
+ workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
114
+ try:
115
+ if not os.path.isdir(workdir):
116
+ os.makedirs(workdir)
117
+ except OSError:
118
+ log_exception(_LOG)
119
+ else:
120
+ workdir = os.getcwd()
121
+
122
+ # (1) read in tree, alignment, and separate reference sequences from
123
+ # query sequences
124
+ tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir,
125
+ dry_run=dry_run)
126
+
127
+ # (2) compute closest leaves for all query sequences
128
+ query_votes_dict, query_top_vote_dict = getClosestLeaves(
129
+ aln_path, qaln_path, aln, qaln, workdir, dry_run=dry_run)
130
+
131
+ # (3) first assign each query to the subtree built using the closest
132
+ # leaf as the seed sequence
133
+ new_subtree_dict, placed_query_list = buildQuerySubtrees(
134
+ query_votes_dict, query_top_vote_dict, tree, leaf_dict,
135
+ dry_run=dry_run)
136
+
137
+ # (4) perform placement for each subtree
138
+ output_jplace = placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict,
139
+ placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock,
140
+ dry_run=dry_run)
141
+
142
+ # (5) write the output jplace to local
143
+ writeOutputJplace(output_jplace, dry_run=dry_run)
144
+
145
+ # shutdown pool
146
+ _LOG.warning('Shutting down ProcessPoolExecutor...')
147
+ pool.shutdown()
148
+ _LOG.warning('ProcessPoolExecutor shut down.')
149
+
150
+ # clean up temp files if not keeping
151
+ if not Configs.keeptemp:
152
+ _LOG.info('Removing temporary files...')
153
+ clean_temp_files()
154
+
155
+ # stop SCAMPP
156
+ send = time.perf_counter()
157
+ _LOG.info('SCAMPP completed in {} seconds...'.format(send - t0))
158
+
159
+ if dry_run:
160
+ return True
161
+ else:
162
+ return False
163
+
164
+
92
165
  def clean_temp_files():
93
166
  # all temporary files/directories to remove
94
167
  temp_items = [f'tmp{Configs.tmpfilenbr}']
@@ -102,10 +175,14 @@ def clean_temp_files():
102
175
  continue
103
176
  _LOG.info(f'- Removed {temp}')
104
177
 
105
- def parseArguments(dry_run=False):
178
+ def parseArguments(dry_run=False, method="BSCAMPP"):
106
179
  global _root_dir, main_config_path
107
180
 
108
- parser = _init_parser()
181
+ default_outdir = f"{method.lower()}_output"
182
+ default_outname = f"{method.lower()}_result"
183
+
184
+ parser = _init_parser(default_outdir=default_outdir,
185
+ default_outname=default_outname)
109
186
  cmdline_args = sys.argv[1:]
110
187
 
111
188
  if dry_run:
@@ -114,22 +191,27 @@ def parseArguments(dry_run=False):
114
191
 
115
192
  # build config
116
193
  buildConfigs(parser, cmdline_args)
117
- _LOG.info('BSCAMPP is running with: {}'.format(
194
+ _LOG.info('{} is running with: {}'.format(method,
118
195
  ' '.join(cmdline_args)))
119
196
  getConfigs()
120
197
 
121
198
  return parser, cmdline_args
122
199
 
123
- def _init_parser():
200
+ def _init_parser(default_outdir="bscampp_output",
201
+ default_outname="bscampp_result"):
124
202
  # example usage
125
203
  example_usages = '''Example usages:
126
- > default
127
- %(prog)s -i raxml.info
204
+ > (1) Default
205
+ %(prog)s -i raxml.bestModel -t reference.tre -a alignment.fa
206
+ > (2) Separate alignment file for query sequences
207
+ %(prog)s -i raxml.bestModel -t reference.tre -a reference.fa -q query.fa
208
+ > (3) Use pplacer instead of EPA-ng as base method (need RAxML-ng info or FastTree log file)
209
+ %(prog)s -i fasttree.log -t reference.tre -a alignment.fa --placement-method pplacer
128
210
  '''
129
211
 
130
212
  parser = ArgumentParser(
131
213
  description=(
132
- "This program runs BSCAMPP, a scalable phylogenetic "
214
+ "This program runs BSCAMPP/SCAMPP, a scalable phylogenetic "
133
215
  "placement framework that scales EPA-ng/pplacer "
134
216
  "to very large tree placement."
135
217
  ),
@@ -156,7 +238,7 @@ def _init_parser():
156
238
  # basic group
157
239
  basic_group = parser.add_argument_group(
158
240
  "Basic parameters".upper(),
159
- "These are the basic parameters for BSCAMPP.")
241
+ "These are the basic parameters for BSCAMPP/SCAMPP.")
160
242
  parser.groups['basic_group'] = basic_group
161
243
 
162
244
  basic_group.add_argument('--placement-method', type=str,
@@ -185,10 +267,10 @@ def _init_parser():
185
267
  required=False, default=None)
186
268
  basic_group.add_argument("-d", "--outdir", type=str,
187
269
  help="Directory path for output. Default: bscampp_output/",
188
- required=False, default="bscampp_output")
270
+ required=False, default=default_outdir)
189
271
  basic_group.add_argument("-o", "--output", type=str, dest="outname",
190
272
  help="Output file name. Default: bscampp_result.jplace",
191
- required=False, default="bscampp_result.jplace")
273
+ required=False, default=f"{default_outname}.jplace")
192
274
  basic_group.add_argument("--threads", "--num-cpus", type=int,
193
275
  dest="num_cpus",
194
276
  help="Number of cores for parallelization, default: -1 (all)",
@@ -209,7 +291,8 @@ def _init_parser():
209
291
  help="Integer size of the subtree. Default: 2000",
210
292
  required=False, default=2000)
211
293
  advance_group.add_argument("-V", "--votes", type=int,
212
- help="Number of votes per query sequence. Default: 5",
294
+ help="This is only used for BSCAMPP! Number of votes per "
295
+ "query sequence. Default: 5",
213
296
  required=False, default=5)
214
297
  advance_group.add_argument("--similarityflag", type=str2bool,
215
298
  help="Boolean, True if maximizing sequence similarity "
@@ -228,6 +311,12 @@ def _init_parser():
228
311
  misc_group.add_argument("--fragmentflag", type=str2bool,
229
312
  help="If queries contains fragments. Default: True",
230
313
  required=False, default=True)
314
+ misc_group.add_argument("--subtreetype", type=str,
315
+ help="(SCAMPP only) Options for collecting "
316
+ "nodes for the subtree - d for edge weighted "
317
+ "distances, n for node distances, h for Hamming "
318
+ "distances. Default: d",
319
+ required=False, default='d')
231
320
  misc_group.add_argument("--keeptemp", type=str2bool,
232
321
  help="Boolean, True to keep all temporary files. "
233
322
  "Default: False",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bscampp
3
- Version: 1.0.1b0
3
+ Version: 1.0.2b0
4
4
  Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: numpy>=1.21.6
50
50
  Requires-Dist: treeswift>=1.1.45
51
51
  Requires-Dist: taxtastic>=0.9.3
52
52
 
53
- # BSCAMPP - A Scalable Phylogenetic Placement Method and Framework
53
+ # BSCAMPP and SCAMPP - Two Scalable Phylogenetic Placement Methods and Frameworks
54
54
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
55
55
  [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
56
56
  [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
@@ -70,47 +70,63 @@ Requires-Dist: taxtastic>=0.9.3
70
70
  3. Alignment of query sequences (can be combined with ii.).
71
71
  4. Tree info file.
72
72
  - (EPA-ng as base method), RAxML-ng info file, typically with suffix `.bestModel`.
73
- - (pplacer as base method), RAxML-ng or FastTree log file.
73
+ - (pplacer as base method), RAxML-ng or FastTree log file containing model parameters.
74
74
  * **Output**
75
75
  1. Placement results of query sequences in the reference tree in `.jplace` format.
76
76
 
77
77
 
78
- BSCAMPP is an extension and scalable solution to its previous method [SCAMPP](https://github.com/chry04/PLUSplacer) for phylogenetic placement.
79
- BSCAMPP achieves some magnitudes of speedup compared to the SCAMPP framework.
78
+ SCAMPP and BSCAMPP are two scalable solutions for phylogenetic placement. SCAMPP is designed more for accuracy
79
+ and BSCAMPP is designed more for speed. BSCAMPP achieves some magnitudes of speedup compared to SCAMPP.
80
80
  The core algorithm is described in detail at <https://doi.org/10.1101/2022.10.26.513936>.
81
- In short, BSCAMPP in default uses EPA-ng as the base placement method, allowing it to scale to placement trees of up to ~200,000 leaves.
82
- BSCAMPP achieves this by extracting appropriate subtrees and assigning each query to its most fitting subtree.
81
+ In short, Both frameworks in default use EPA-ng as the base placement method, allowing it to scale to placement trees
82
+ of at least ~200,000 leaves. Our two methods achieve this by extracting appropriate subtrees and assigning each query
83
+ to its most fitting subtree.
83
84
 
84
- BSCAMPP essentially is a divide-and-conquer framework and can be used with any base placement methods (e.g., `pplacer` as well).
85
- Currently, BSCAMPP is implemented with `epa-ng` and `pplacer`.
85
+ They are divide-and-conquer frameworks and can be used with any base placement methods (e.g., `pplacer` as well).
86
+ Currently, BSCAMPP and SCAMPP are implemented with `epa-ng` and `pplacer`.
86
87
 
87
- It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 votes based on current best results, especially if sequences
88
- are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and 5 respectively (see [Usage](#usage) for more details
89
- on customizing BSCAMPP).
88
+ #### BSCAMPP
89
+ It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 votes based on current best results,
90
+ especially if sequences are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and
91
+ 5 respectively (see [Usage](#usage) for more details on customizing BSCAMPP).
92
+
93
+ #### SCAMPP
94
+ SCAMPP is also implemented in BSCAMPP, originally from <https://github.com/chry04/PLUSplacer>.
95
+ Its default also uses EPA-ng and a subtree size of 2,000.
96
+ The user can invoke SCAMPP by running `run_scampp.py` or `scampp` (if installed with PyPI) after installation.
90
97
 
91
98
  # Installation
92
- BSCAMPP was tested on **Python 3.7 to 3.12**. There are two ways to install and use BSCAMPP: (1) with PyPI, or
93
- (2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP, please contact Eleanor Wedell
94
- (ewedell@illinois.edu).
99
+ BSCAMPP and SCAMPP were tested on **Python 3.8 to 3.12**. There are two ways to install:
100
+ (1) with PyPI, or (2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP or SCAMPP,
101
+ please contact Eleanor Wedell (ewedell2@illinois.edu).
95
102
 
96
103
  ### External requirements
97
- EPA-ng and/or pplacer are requirements to run BSCAMPP since BSCAMPP will use them as the base phylogenetic placement methods.
98
- By default, BSCAMPP will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
99
- We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
104
+ * **Base placement method**:
105
+ EPA-ng and/or pplacer are requirements since BSCAMPP and SCAMPP will use them as the base phylogenetic placement methods.
106
+ By default, the software will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
107
+ We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
108
+ * **C++ OpenMP**:
109
+ We also use OpenMP to speed up the similarity comparison between sequences using C++, which is required to run the pre-compiled binaries.
100
110
 
101
- ### (1) Install with `pip` (Coming soon)
102
- The easiest way to install BSCAMPP is to use `pip install`. This will also install all required Python packages.
111
+ ### (1) Install with `pip`
112
+ The easiest way to install BSCAMPP and SCAMPP is to use `pip install`. This will also install all required Python packages.
103
113
 
104
114
  ```bash
105
115
  # 1. install with pip (--user if no root access)
106
116
  pip install bscampp [--user]
107
117
 
108
- # 2. Two binary executables will be installed. The first time
118
+ # 2. Four binary executables will be installed. The first time
109
119
  # running any will create a config file at
110
120
  # ~/.bscampp/main.config that resolves the links to all
111
121
  # external software (e.g., epa-ng, pplacer)
122
+
123
+ # ---- BSCAMPP functions
112
124
  bscampp [-h] # or
113
125
  run_bscampp.py [-h]
126
+
127
+ # ---- SCAMPP functions
128
+ scampp [-h] # or
129
+ run_scampp.py
114
130
  ```
115
131
 
116
132
  ### (2) Install from GitHub
@@ -132,22 +148,29 @@ git clone https://github.com/ewedell/BSCAMPP.git
132
148
  # 2. Install all requirements
133
149
  pip install -r requirements.txt
134
150
 
135
- # 3. Execute BSCAMPP executable `run_bscampp.py`
151
+ # 3. Execute BSCAMPP/SCAMPP executables
136
152
  python run_bscampp.py [-h]
153
+ python run_scampp.py [-h]
137
154
  ```
138
155
 
139
156
  # Usage
140
157
  All parameter settings can be found by running
141
158
  ```bash
142
- run_bscampp.py -h
159
+ run_bscampp.py -h #OR
160
+ run_scampp.py -h
143
161
  ```
144
162
 
145
163
  ### (1) Default case (`epa-ng`)
146
164
  ```bash
165
+ # for BSCAMPP
147
166
  run_bscampp.py -i [raxml best model] -t [reference tree] -a [alignment file]
167
+
168
+ # for SCAMPP
169
+ run_scampp.py -i [raxml best model] -t [reference tree] -a [alignment file]
148
170
  ```
149
- To run BSCAMPP in its default mode with EPA-ng. `[alignment file]` should contain both sequences from the placement tree and
150
- the query sequences to be placed. This will create an output directory `bscampp_output` and write the placement results to
171
+ BSCAMPP and SCAMPP in default mode run EPA-ng as the base method. `[alignment file]` should
172
+ contain both sequences from the placement tree and the query sequences to be placed.
173
+ This will create an output directory `bscampp_output` and write the placement results to
151
174
  `bscampp_output/bscampp_result.jplace`.
152
175
 
153
176
  ### (2) Separately giving query alignment and finer control of outputs
@@ -160,7 +183,13 @@ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment
160
183
  ### (3) Using `pplacer` as the base placement method
161
184
  ```bash
162
185
  run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
163
- -a [reference alignment] -q [query sequence alignment]
186
+ -a [reference alignment] -q [query sequence alignment] \
187
+ --placement-method pplacer
188
+ ```
189
+ ### (4) Changing the number of votes to 15 for BSCAMPP
190
+ ```bash
191
+ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment] \
192
+ -q [query sequence alignment] -V 15
164
193
  ```
165
194
 
166
195
  ### More comprehensive usage
@@ -221,14 +250,23 @@ run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
221
250
  > Temporary file indexing. Default: 0
222
251
  > --fragmentflag FRAGMENTFLAG
223
252
  > If queries contains fragments. Default: True
253
+ > --subtreetype SUBTREETYPE
254
+ > (SCAMPP only) Options for collecting nodes for the
255
+ > subtree - d for edge weighted distances, n for node
256
+ > distances, h for Hamming distances. Default: d
224
257
  > --keeptemp KEEPTEMP Boolean, True to keep all temporary files. Default:
225
258
  False
226
259
  ```
227
260
 
228
261
 
229
262
  # Example Code and Data
230
- Example script and data are provided in this GitHub repository in `examples/`. The data is originally from the [RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
231
- * `examples/run.sh`: contains a simple script to test BSCAMPP with `epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement tree.
232
- The info file is from RAxML-ng when running `epa-ng`, and from FastTree-2 when running `pplacer`.
233
- - `run.sh` will invoke BSCAMPP with `epa-ng`.
234
- - `run.sh pplacer` will invoke BSCAMPP with `pplacer`.
263
+ Example script and data are provided in this GitHub repository in `examples/`.
264
+ The data is originally from the
265
+ [RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
266
+ * `examples/run_bscampp.sh`: contains a simple script to test BSCAMPP with
267
+ `epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement
268
+ tree. The info file is from RAxML-ng when running `epa-ng`, and from
269
+ FastTree-2 when running `pplacer`.
270
+ - `run_bscampp.sh` will invoke BSCAMPP with `epa-ng`.
271
+ - `run_bscampp.sh pplacer` will invoke BSCAMPP with `pplacer`.
272
+ * `examples/run_scampp.sh`: the same test script but running SCAMPP.
@@ -1,10 +1,10 @@
1
- bscampp/__init__.py,sha256=Wnn_Bm543hAgQCd9PmwdT_kFBZzGLDW4dcSeP0iLVTk,2290
2
- bscampp/configs.py,sha256=XuzRbtcUE5bExe-vEZGZ1CeXBmp4oP7LWFveQySx2xs,5745
1
+ bscampp/__init__.py,sha256=mZGsa6XRWkYMo62gK_z5OFyFxRZHadW_SWHAirS1Dvg,2290
2
+ bscampp/configs.py,sha256=3HJHLN2fLV5Tv3TJL95NpOuSXUV6CvqxRqCOM6TpbJQ,5767
3
3
  bscampp/default.config,sha256=CEfsUHBy--vwJhEcUuJ0btfuGQWb_lKMVWUIP9f5YGw,112
4
- bscampp/functions.py,sha256=cPT5eSy_8CSNzDx-5ma43Hp9_AMmaWSTXM89bjdrkRs,15640
4
+ bscampp/functions.py,sha256=QYI5RsUEMGc6jLPzFdInpmxA8wiYyN7785P3WxWYiTo,17839
5
5
  bscampp/init_configs.py,sha256=EA9sMN5jWj6zj2b-7tN19LhX2Ef61ByQLxQRLHAqLDM,3600
6
- bscampp/jobs.py,sha256=de0Dr3ynORwACJqVbeWDfqTwJhWvMYG-7yfRYirGx8M,6703
7
- bscampp/pipeline.py,sha256=UT8y6ObFZ12q5Vw3731r50k8pLMioFNV4qCy0tz_wuk,9550
6
+ bscampp/jobs.py,sha256=PrVMJBabi4cYlrxVLo37XPOY82fY0zZ8Iyp9CWCNWhU,7181
7
+ bscampp/pipeline.py,sha256=C6I1vWeA6Rq_spPHy_il1FJA_DomWHUHYHLUUk9SnLk,13024
8
8
  bscampp/utils.py,sha256=ragaI14Lqb2fVp_uYDkFQnV7a50G9-sUOWdVM-sNhUE,29005
9
9
  bscampp/tools/epa-ng,sha256=f3EVoZAAOXLN6l521qp-TrWDl5J2nqL3tGgjPaQE9WQ,3772096
10
10
  bscampp/tools/pplacer,sha256=p0H4eo9uuiYoWS_kJbPfauOV99i7BXJdZSiwXIuLxTw,7834576
@@ -17,9 +17,9 @@ bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp,sha256=xCmyAT-OZJOD
17
17
  bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp,sha256=eKxgODRlpf0hU84QjNhigvRhWCT9tiJZjA5oQFQ1bUk,7404
18
18
  bscampp/tools/hamming_distance/src/homology.cpp,sha256=ZE0uXZWQ-cN4U1Wk5kUr_KKHgzsgA6Sno-IViRa4tmI,6053
19
19
  bscampp/tools/hamming_distance/src/new_hamming.cpp,sha256=fBRm99RquBZgZjaLOn9xDI3cH9NchhrxKbL-11j8fmk,5342
20
- bscampp-1.0.1b0.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
21
- bscampp-1.0.1b0.dist-info/METADATA,sha256=hCpwS1vbd07cuwW7D5AkiO_I_GP-kqk21IH2yxiPUwM,11144
22
- bscampp-1.0.1b0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
23
- bscampp-1.0.1b0.dist-info/entry_points.txt,sha256=dZygBmg2OncVyeLeIjh_9e-GBIOesFvMemyW9BRRcXY,113
24
- bscampp-1.0.1b0.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
25
- bscampp-1.0.1b0.dist-info/RECORD,,
20
+ bscampp-1.0.2b0.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
21
+ bscampp-1.0.2b0.dist-info/METADATA,sha256=OWSIl8dFMrgzB9Xe8geqXQw2fBNd8hta3p40O5Q9T5Q,12509
22
+ bscampp-1.0.2b0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
23
+ bscampp-1.0.2b0.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
24
+ bscampp-1.0.2b0.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
25
+ bscampp-1.0.2b0.dist-info/RECORD,,
@@ -1,3 +1,5 @@
1
1
  [console_scripts]
2
2
  bscampp = bscampp.pipeline:bscampp_pipeline
3
3
  run_bscampp.py = bscampp.pipeline:bscampp_pipeline
4
+ run_scampp.py = bscampp.pipeline:scampp_pipeline
5
+ scampp = bscampp.pipeline:scampp_pipeline