PyPI - bscampp - Versions diffs - 1.0.1b0__py3-none-any.whl → 1.0.2b0__py3-none-any.whl - Mend

bscampp 1.0.1b0py3-none-any.whl → 1.0.2b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

bscampp/__init__.py +1 -1
bscampp/configs.py +1 -0
bscampp/functions.py +65 -8
bscampp/jobs.py +18 -8
bscampp/pipeline.py +100 -11
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/METADATA +69 -31
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/RECORD +11 -11
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/entry_points.txt +2 -0
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/LICENSE +0 -0
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/WHEEL +0 -0
{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/top_level.txt +0 -0

bscampp/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ import logging, os
 # not really needed for BSCAMPP but safe to update here
 os.sys.setrecursionlimit(1000000)
-__version__ = "1.0.1b"
+__version__ = "1.0.2b"
 _INSTALL_PATH = __path__[0]
 # global variables to store all loggers

bscampp/configs.py CHANGED Viewed

@@ -50,6 +50,7 @@ class Configs:
     # miscellaneous
     tmpfilenbr = 0
     fragmentflag = True
+    subtreetype = 'd'
 # check if the given configuration is valid to add
 def set_valid_configuration(name, conf):

bscampp/functions.py CHANGED Viewed

@@ -71,6 +71,9 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
     query_votes_dict = dict()
     query_top_vote_dict = dict()
     tmp_output = os.path.join(workdir, 'closest.txt')
+    if Configs.subtreetype == "h":
+        Configs.votes = Configs.subtreesize
     cmd = []
     if Configs.similarityflag:
@@ -226,6 +229,56 @@ def assignQueriesToSubtrees(query_votes_dict, query_top_vote_dict,
     _LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
     return new_subtree_dict, placed_query_list
+'''
+Function to assign queries to subtrees as used in SCAMPP
+(subtrees are built using the nearest leaf as the seed sequence)
+'''
+def buildQuerySubtrees(query_votes_dict, query_top_vote_dict,
+        tree, leaf_dict, dry_run=False):
+    t0 = time.perf_counter()
+    _LOG.info('(SCAMPP) Building query subtree for placement...')
+    if dry_run:
+        return dict(), []
+    # (1) go over the query seed sequences to see if any queries use
+    # the same seed sequence (i.e. subtree)
+    seed_queries = dict()
+    for query, closest_leaf in query_top_vote_dict.items():
+        if closest_leaf not in seed_queries:
+            seed_queries[closest_leaf] = [query]
+        else:
+            seed_queries[closest_leaf].append(query)
+    new_subtree_dict = dict()
+    # assign queries to subtrees, and remove them from the pool
+    # repeat until all queries are assigned
+    _total = 0
+    for seed_label, queries in seed_queries.items():
+        ####### additional logging for tracking progress
+        _total += 1
+        if _total % 1000 == 0 or _total == len(seed_queries):
+            _LOG.info(f"- Built {_total}/{len(seed_queries)} subtrees")
+        node_y = leaf_dict[seed_label]
+        # extract [subtreesize] leaves
+        if Configs.subtreetype == "h":
+            labels = query_votes_dict[queries[0]]
+        elif Configs.subtreetype == "n":
+            labels = utils.subtree_nodes(tree, node_y, Configs.subtreesize)
+        else:
+            labels = utils.subtree_nodes_with_edge_length(tree, node_y,
+                Configs.subtreesize)
+        subtree = tree.extract_tree_with(labels)
+        new_subtree_dict[subtree] = queries
+    placed_query_list = []
+    t1 = time.perf_counter()
+    _LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
+    return new_subtree_dict, placed_query_list
 '''
 Helper function to run a single placement task. Designed to use with
 multiprocessing
@@ -263,12 +316,16 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
         if len(query_list) == 0:
             continue
         final_subtree_count += 1
+        subtree_dir = os.path.join(workdir, f'subtree_{final_subtree_count}')
+        if not os.path.isdir(subtree_dir):
+            os.makedirs(subtree_dir)
         # name all temporary output files
-        tmp_tree = os.path.join(workdir, 'tree')
-        tmp_aln = os.path.join(workdir, f'subtree_{final_subtree_count}_aln.fa')
-        tmp_qaln = os.path.join(workdir, f'subtree_{final_subtree_count}_qaln.fa')
-        tmp_output = os.path.join(workdir,
+        tmp_tree = os.path.join(subtree_dir, 'tree')
+        tmp_aln = os.path.join(subtree_dir, f'subtree_{final_subtree_count}_aln.fa')
+        tmp_qaln = os.path.join(subtree_dir, f'subtree_{final_subtree_count}_qaln.fa')
+        tmp_output = os.path.join(subtree_dir,
                 'subtree_{}_{}.jplace'.format(
                     final_subtree_count, Configs.placement_method))
@@ -292,13 +349,13 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
             job = EPAngJob(path=Configs.epang_path,
                     info_path=Configs.info_path, tree_path=tmp_tree,
                     aln_path=tmp_aln, qaln_path=tmp_qaln,
-                    outdir=workdir, num_cpus=Configs.num_cpus)
+                    outdir=subtree_dir, num_cpus=Configs.num_cpus)
             # for EPA-ng, ensure that outpath name is changed to the one we want
-            _outpath = job.run()
+            _outpath = job.run(logging=f'subtree_{final_subtree_count}')
             os.system('mv {} {}'.format(_outpath, tmp_output))
         elif Configs.placement_method == 'pplacer':
             # build ref_pkg with info and tmp_tree and tmp_aln
-            refpkg_dir = os.path.join(workdir,
+            refpkg_dir = os.path.join(subtree_dir,
                     f'subtree_{final_subtree_count}.refpkg')
             taxit_job = TaxtasticJob(path=Configs.taxit_path,
                     outdir=refpkg_dir, name=f'subtree_{final_subtree_count}',
@@ -311,7 +368,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
                     refpkg_dir=refpkg_dir, model=Configs.model,
                     outpath=tmp_output, num_cpus=Configs.num_cpus,
                     qaln_path=tmp_qaln)
-            tmp_output = job.run()
+            tmp_output = job.run(logging=f'subtree_{final_subtree_count}')
         else:
             raise ValueError(
                     f"Placement method {Configs.placement_method} not recognized")

bscampp/jobs.py CHANGED Viewed

@@ -3,7 +3,7 @@ from subprocess import Popen
 from abc import abstractmethod
 from bscampp import get_logger, log_exception
-from bscampp.configs import Configs
+#from bscampp.configs import Configs
 _LOG = get_logger(__name__)
@@ -25,7 +25,7 @@ class Job(object):
         return self.pid
     # run the job with given invocation and raise errors when encountered
-    def run(self, stdin="", lock=None, logging=False, shell=False):
+    def run(self, stdin="", lock=None, logging=None, shell=False):
         try:
             cmd, outpath = self.get_invocation()
             _LOG.debug(f'Running job_type: {self.job_type}, output: {outpath}')
@@ -57,18 +57,22 @@ class Job(object):
             # logging to local or to PIPE
             stderr, stdout = '', ''
             scmd = ' '.join(cmd)
-            if logging:
+            if logging != None:
                 logpath = os.path.join(
-                        os.path.dirname(outpath), 'f{self.job_type}.txt')
+                        os.path.dirname(outpath),
+                        f'{logging}_{self.job_type}.txt')
                 outlogging = open(logpath, 'w', 1)
                 # TODO: may need to deal with piping in the future, for now
                 # it is not needed
                 p = Popen(cmd, text=True, bufsize=1,
                         stdin=subprocess.PIPE,
-                        stdout=outlogging, stderr=subprocess.PIPE)
+                        stdout=outlogging, stderr=outlogging)
                 self.pid = p.pid
                 stdout, stderr = p.communicate(input=stdin)
+                # stdout and stderr are both written to outlogging
+                # hence, assign them to be empty strings
+                stdout, stderr = '', ''
                 outlogging.close()
             else:
                 p = Popen(cmd, text=True, bufsize=1,
@@ -92,16 +96,22 @@ class Job(object):
             else:
                 error_msg = ' '.join([f'Error occurred running {self.job_type}.',
                     f'returncode: {self.returncode}'])
+                if logging != None:
+                    logpath = '\nLOGPATH: ' + os.path.join(
+                            os.path.dirname(outpath),
+                            f'{logging}_{self.job_type}.txt')
+                else:
+                    logpath = ''
                 if lock:
                     try:
                         lock.acquire()
                         _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
-                                '\nSTDERR: ' + stderr)
+                                '\nSTDERR: ' + stderr + logpath)
                     finally:
                         lock.release()
                 else:
                     _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
-                            '\nSTDERR: ' + stderr)
+                            '\nSTDERR: ' + stderr + logpath)
                 exit(1)
         except Exception:
             log_exception(_LOG)
@@ -177,7 +187,7 @@ A pplacer job that uses taxtastic refpkg to place sequences
 class PplacerTaxtasticJob(Job):
     def __init__(self, **kwargs):
         Job.__init__(self)
-        self.job_type = 'pplacer-taxtastic'
+        self.job_type = 'pplacer'
         self.path = ''
         self.refpkg_dir = ''

bscampp/pipeline.py CHANGED Viewed

@@ -89,6 +89,79 @@ def bscampp_pipeline(*args, **kwargs):
     else:
         return False
+# main pipeline for SCAMPP
+def scampp_pipeline(*args, **kwargs):
+    t0 = time.perf_counter()
+    m = Manager(); lock = m.Lock()
+    # set up a dry run if specified
+    dry_run = False
+    if 'dry_run' in kwargs and isinstance(kwargs['dry_run'], bool):
+        dry_run = kwargs['dry_run']
+    # parse command line arguments and build configurations
+    parser, cmdline_args = parseArguments(dry_run=dry_run, method="SCAMPP")
+    # initialize multiprocessing (if needed)
+    _LOG.warning('Initializing ProcessPoolExecutor...')
+    pool = ProcessPoolExecutor(Configs.num_cpus, initializer=initial_pool,
+            initargs=(parser, cmdline_args,))
+    # (0) temporary files wrote to here
+    if not dry_run:
+        workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
+        try:
+            if not os.path.isdir(workdir):
+                os.makedirs(workdir)
+        except OSError:
+            log_exception(_LOG)
+    else:
+        workdir = os.getcwd()
+    # (1) read in tree, alignment, and separate reference sequences from
+    # query sequences
+    tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir,
+            dry_run=dry_run)
+    # (2) compute closest leaves for all query sequences
+    query_votes_dict, query_top_vote_dict = getClosestLeaves(
+            aln_path, qaln_path, aln, qaln, workdir, dry_run=dry_run)
+    # (3) first assign each query to the subtree built using the closest
+    # leaf as the seed sequence
+    new_subtree_dict, placed_query_list = buildQuerySubtrees(
+            query_votes_dict, query_top_vote_dict, tree, leaf_dict,
+            dry_run=dry_run)
+    # (4) perform placement for each subtree
+    output_jplace = placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict,
+            placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock,
+            dry_run=dry_run)
+    # (5) write the output jplace to local
+    writeOutputJplace(output_jplace, dry_run=dry_run)
+    # shutdown pool
+    _LOG.warning('Shutting down ProcessPoolExecutor...')
+    pool.shutdown()
+    _LOG.warning('ProcessPoolExecutor shut down.')
+    # clean up temp files if not keeping
+    if not Configs.keeptemp:
+        _LOG.info('Removing temporary files...')
+        clean_temp_files()
+    # stop SCAMPP
+    send = time.perf_counter()
+    _LOG.info('SCAMPP completed in {} seconds...'.format(send - t0))
+    if dry_run:
+        return True
+    else:
+        return False
 def clean_temp_files():
     # all temporary files/directories to remove
     temp_items = [f'tmp{Configs.tmpfilenbr}']
@@ -102,10 +175,14 @@ def clean_temp_files():
             continue
         _LOG.info(f'- Removed {temp}')
-def parseArguments(dry_run=False):
+def parseArguments(dry_run=False, method="BSCAMPP"):
     global _root_dir, main_config_path
-    parser = _init_parser()
+    default_outdir = f"{method.lower()}_output"
+    default_outname = f"{method.lower()}_result"
+    parser = _init_parser(default_outdir=default_outdir,
+            default_outname=default_outname)
     cmdline_args = sys.argv[1:]
     if dry_run:
@@ -114,22 +191,27 @@ def parseArguments(dry_run=False):
     # build config
     buildConfigs(parser, cmdline_args)
-    _LOG.info('BSCAMPP is running with: {}'.format(
+    _LOG.info('{} is running with: {}'.format(method,
         ' '.join(cmdline_args)))
     getConfigs()
     return parser, cmdline_args
-def _init_parser():
+def _init_parser(default_outdir="bscampp_output",
+        default_outname="bscampp_result"):
     # example usage
     example_usages = '''Example usages:
-> default
-    %(prog)s -i raxml.info
+> (1) Default
+    %(prog)s -i raxml.bestModel -t reference.tre -a alignment.fa
+> (2) Separate alignment file for query sequences
+    %(prog)s -i raxml.bestModel -t reference.tre -a reference.fa -q query.fa
+> (3) Use pplacer instead of EPA-ng as base method (need RAxML-ng info or FastTree log file)
+    %(prog)s -i fasttree.log -t reference.tre -a alignment.fa --placement-method pplacer
 '''
     parser = ArgumentParser(
             description=(
-                "This program runs BSCAMPP, a scalable phylogenetic "
+                "This program runs BSCAMPP/SCAMPP, a scalable phylogenetic "
                 "placement framework that scales EPA-ng/pplacer "
                 "to very large tree placement."
                 ),
@@ -156,7 +238,7 @@ def _init_parser():
     # basic group
     basic_group = parser.add_argument_group(
             "Basic parameters".upper(),
-            "These are the basic parameters for BSCAMPP.")
+            "These are the basic parameters for BSCAMPP/SCAMPP.")
     parser.groups['basic_group'] = basic_group
     basic_group.add_argument('--placement-method', type=str,
@@ -185,10 +267,10 @@ def _init_parser():
                   required=False, default=None)
     basic_group.add_argument("-d", "--outdir", type=str,
                   help="Directory path for output. Default: bscampp_output/",
-                  required=False, default="bscampp_output")
+                  required=False, default=default_outdir)
     basic_group.add_argument("-o", "--output", type=str, dest="outname",
                   help="Output file name. Default: bscampp_result.jplace",
-                  required=False, default="bscampp_result.jplace")
+                  required=False, default=f"{default_outname}.jplace")
     basic_group.add_argument("--threads", "--num-cpus", type=int,
                   dest="num_cpus",
                   help="Number of cores for parallelization, default: -1 (all)",
@@ -209,7 +291,8 @@ def _init_parser():
                   help="Integer size of the subtree. Default: 2000",
                   required=False, default=2000)
     advance_group.add_argument("-V", "--votes", type=int,
-                  help="Number of votes per query sequence. Default: 5",
+                  help="This is only used for BSCAMPP! Number of votes per "
+                  "query sequence. Default: 5",
                   required=False, default=5)
     advance_group.add_argument("--similarityflag", type=str2bool,
                   help="Boolean, True if maximizing sequence similarity "
@@ -228,6 +311,12 @@ def _init_parser():
     misc_group.add_argument("--fragmentflag", type=str2bool,
                   help="If queries contains fragments. Default: True",
                   required=False, default=True)
+    misc_group.add_argument("--subtreetype", type=str,
+                  help="(SCAMPP only) Options for collecting "
+                  "nodes for the subtree - d for edge weighted "
+                  "distances, n for node distances, h for Hamming "
+                  "distances. Default: d",
+                  required=False, default='d')
     misc_group.add_argument("--keeptemp", type=str2bool,
                   help="Boolean, True to keep all temporary files. "
                   "Default: False",

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: bscampp
-Version: 1.0.1b0
+Version: 1.0.2b0
 Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
 Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
 License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: numpy>=1.21.6
 Requires-Dist: treeswift>=1.1.45
 Requires-Dist: taxtastic>=0.9.3
-# BSCAMPP - A Scalable Phylogenetic Placement Method and Framework
+# BSCAMPP and SCAMPP - Two Scalable Phylogenetic Placement Methods and Frameworks
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
 [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
 [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
@@ -70,47 +70,63 @@ Requires-Dist: taxtastic>=0.9.3
   3. Alignment of query sequences (can be combined with ii.).
   4. Tree info file.
      - (EPA-ng as base method), RAxML-ng info file, typically with suffix `.bestModel`.
-     - (pplacer as base method), RAxML-ng or FastTree log file.
+     - (pplacer as base method), RAxML-ng or FastTree log file containing model parameters.
 * **Output**
   1. Placement results of query sequences in the reference tree in `.jplace` format.
-BSCAMPP is an extension and scalable solution to its previous method [SCAMPP](https://github.com/chry04/PLUSplacer) for phylogenetic placement.
-BSCAMPP achieves some magnitudes of speedup compared to the SCAMPP framework.
+SCAMPP and BSCAMPP are two scalable solutions for phylogenetic placement. SCAMPP is designed more for accuracy
+and BSCAMPP is designed more for speed. BSCAMPP achieves some magnitudes of speedup compared to SCAMPP.
 The core algorithm is described in detail at <https://doi.org/10.1101/2022.10.26.513936>.
-In short, BSCAMPP in default uses EPA-ng as the base placement method, allowing it to scale to placement trees of up to ~200,000 leaves.
-BSCAMPP achieves this by extracting appropriate subtrees and assigning each query to its most fitting subtree.
+In short, Both frameworks in default use EPA-ng as the base placement method, allowing it to scale to placement trees
+of at least ~200,000 leaves. Our two methods achieve this by extracting appropriate subtrees and assigning each query
+to its most fitting subtree.
-BSCAMPP essentially is a divide-and-conquer framework and can be used with any base placement methods (e.g., `pplacer` as well).
-Currently, BSCAMPP is implemented with `epa-ng` and `pplacer`.
+They are divide-and-conquer frameworks and can be used with any base placement methods (e.g., `pplacer` as well).
+Currently, BSCAMPP and SCAMPP are implemented with `epa-ng` and `pplacer`.
-It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 votes based on current best results, especially if sequences
-are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and 5 respectively (see [Usage](#usage) for more details
-on customizing BSCAMPP).
+#### BSCAMPP
+It is recommended that BSCAMPP be used with subtrees of size 2000 and with 5 votes based on current best results,
+especially if sequences are fragmentary. Defaults for the subtree size and number of votes are set to 2,000 and
+5 respectively (see [Usage](#usage) for more details on customizing BSCAMPP).
+#### SCAMPP
+SCAMPP is also implemented in BSCAMPP, originally from <https://github.com/chry04/PLUSplacer>.
+Its default also uses EPA-ng and a subtree size of 2,000.
+The user can invoke SCAMPP by running `run_scampp.py` or `scampp` (if installed with PyPI) after installation.
 # Installation
-BSCAMPP was tested on **Python 3.7 to 3.12**. There are two ways to install and use BSCAMPP: (1) with PyPI, or
-(2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP, please contact Eleanor Wedell
-(ewedell@illinois.edu).
+BSCAMPP and SCAMPP were tested on **Python 3.8 to 3.12**. There are two ways to install:
+(1) with PyPI, or (2) from this GitHub repository. If you have any difficulties installing or running BSCAMPP or SCAMPP,
+please contact Eleanor Wedell (ewedell2@illinois.edu).
 ### External requirements
-EPA-ng and/or pplacer are requirements to run BSCAMPP since BSCAMPP will use them as the base phylogenetic placement methods.
-By default, BSCAMPP will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
-We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
+* **Base placement method**:
+  EPA-ng and/or pplacer are requirements since BSCAMPP and SCAMPP will use them as the base phylogenetic placement methods.
+  By default, the software will search for binary executables of `pplacer` and `epa-ng` in the user's environment when running for the first time.
+  We also included a compiled version of `pplacer` for the Linux system under `bscampp/tools`.
+* **C++ OpenMP**:
+  We also use OpenMP to speed up the similarity comparison between sequences using C++, which is required to run the pre-compiled binaries.
-### (1) Install with `pip` (Coming soon)
-The easiest way to install BSCAMPP is to use `pip install`. This will also install all required Python packages.
+### (1) Install with `pip`
+The easiest way to install BSCAMPP and SCAMPP is to use `pip install`. This will also install all required Python packages.
 ```bash
 # 1. install with pip (--user if no root access)
 pip install bscampp [--user]
-# 2. Two binary executables will be installed. The first time
+# 2. Four binary executables will be installed. The first time
 #    running any will create a config file at
 #    ~/.bscampp/main.config that resolves the links to all
 #    external software (e.g., epa-ng, pplacer)
+# ---- BSCAMPP functions
 bscampp [-h]    # or
 run_bscampp.py [-h]
+# ---- SCAMPP functions
+scampp  [-h]    # or
+run_scampp.py
 ```
 ### (2) Install from GitHub
@@ -132,22 +148,29 @@ git clone https://github.com/ewedell/BSCAMPP.git
 # 2. Install all requirements
 pip install -r requirements.txt
-# 3. Execute BSCAMPP executable `run_bscampp.py`
+# 3. Execute BSCAMPP/SCAMPP executables
 python run_bscampp.py [-h]
+python run_scampp.py [-h]
 ```
 # Usage
 All parameter settings can be found by running
 ```bash
-run_bscampp.py -h
+run_bscampp.py -h   #OR
+run_scampp.py -h
 ```
 ### (1) Default case (`epa-ng`)
 ```bash
+# for BSCAMPP
 run_bscampp.py -i [raxml best model] -t [reference tree] -a [alignment file]
+# for SCAMPP
+run_scampp.py -i [raxml best model] -t [reference tree] -a [alignment file]
 ```
-To run BSCAMPP in its default mode with EPA-ng. `[alignment file]` should contain both sequences from the placement tree and
-the query sequences to be placed. This will create an output directory `bscampp_output` and write the placement results to
+BSCAMPP and SCAMPP in default mode run EPA-ng as the base method. `[alignment file]` should
+contain both sequences from the placement tree and the query sequences to be placed.
+This will create an output directory `bscampp_output` and write the placement results to
 `bscampp_output/bscampp_result.jplace`.
 ### (2) Separately giving query alignment and finer control of outputs
@@ -160,7 +183,13 @@ run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment
 ### (3) Using `pplacer` as the base placement method
 ```bash
 run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
-    -a [reference alignment] -q [query sequence alignment]
+    -a [reference alignment] -q [query sequence alignment] \
+    --placement-method pplacer
+```
+### (4) Changing the number of votes to 15 for BSCAMPP
+```bash
+run_bscampp.py -i [raxml best model] -t [reference tree] -a [reference alignment] \
+    -q [query sequence alignment] -V 15
 ```
 ### More comprehensive usage
@@ -221,14 +250,23 @@ run_bscampp.py -i [logfile from either RAxML/FastTree] -t [reference tree] \
 >                         Temporary file indexing. Default: 0
 >   --fragmentflag FRAGMENTFLAG
 >                         If queries contains fragments. Default: True
+>  --subtreetype SUBTREETYPE
+>                         (SCAMPP only) Options for collecting nodes for the
+>                         subtree - d for edge weighted distances, n for node
+>                         distances, h for Hamming distances. Default: d
 >   --keeptemp KEEPTEMP   Boolean, True to keep all temporary files. Default:
                         False
 ```
 # Example Code and Data
-Example script and data are provided in this GitHub repository in `examples/`. The data is originally from the [RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
-* `examples/run.sh`: contains a simple script to test BSCAMPP with `epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement tree.
-  The info file is from RAxML-ng when running `epa-ng`, and from FastTree-2 when running `pplacer`.
-  - `run.sh` will invoke BSCAMPP with `epa-ng`.
-  - `run.sh pplacer` will invoke BSCAMPP with `pplacer`.
+Example script and data are provided in this GitHub repository in `examples/`.
+The data is originally from the
+[RNAsim-VS datasets](https://doi.org/10.1093/sysbio/syz063).
+* `examples/run_bscampp.sh`: contains a simple script to test BSCAMPP with
+  `epa-ng` or `pplacer`, placing 200 query sequences to a 10000-leaf placement
+  tree. The info file is from RAxML-ng when running `epa-ng`, and from
+  FastTree-2 when running `pplacer`.
+  - `run_bscampp.sh` will invoke BSCAMPP with `epa-ng`.
+  - `run_bscampp.sh pplacer` will invoke BSCAMPP with `pplacer`.
+* `examples/run_scampp.sh`: the same test script but running SCAMPP.

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-bscampp/__init__.py,sha256=Wnn_Bm543hAgQCd9PmwdT_kFBZzGLDW4dcSeP0iLVTk,2290
-bscampp/configs.py,sha256=XuzRbtcUE5bExe-vEZGZ1CeXBmp4oP7LWFveQySx2xs,5745
+bscampp/__init__.py,sha256=mZGsa6XRWkYMo62gK_z5OFyFxRZHadW_SWHAirS1Dvg,2290
+bscampp/configs.py,sha256=3HJHLN2fLV5Tv3TJL95NpOuSXUV6CvqxRqCOM6TpbJQ,5767
 bscampp/default.config,sha256=CEfsUHBy--vwJhEcUuJ0btfuGQWb_lKMVWUIP9f5YGw,112
-bscampp/functions.py,sha256=cPT5eSy_8CSNzDx-5ma43Hp9_AMmaWSTXM89bjdrkRs,15640
+bscampp/functions.py,sha256=QYI5RsUEMGc6jLPzFdInpmxA8wiYyN7785P3WxWYiTo,17839
 bscampp/init_configs.py,sha256=EA9sMN5jWj6zj2b-7tN19LhX2Ef61ByQLxQRLHAqLDM,3600
-bscampp/jobs.py,sha256=de0Dr3ynORwACJqVbeWDfqTwJhWvMYG-7yfRYirGx8M,6703
-bscampp/pipeline.py,sha256=UT8y6ObFZ12q5Vw3731r50k8pLMioFNV4qCy0tz_wuk,9550
+bscampp/jobs.py,sha256=PrVMJBabi4cYlrxVLo37XPOY82fY0zZ8Iyp9CWCNWhU,7181
+bscampp/pipeline.py,sha256=C6I1vWeA6Rq_spPHy_il1FJA_DomWHUHYHLUUk9SnLk,13024
 bscampp/utils.py,sha256=ragaI14Lqb2fVp_uYDkFQnV7a50G9-sUOWdVM-sNhUE,29005
 bscampp/tools/epa-ng,sha256=f3EVoZAAOXLN6l521qp-TrWDl5J2nqL3tGgjPaQE9WQ,3772096
 bscampp/tools/pplacer,sha256=p0H4eo9uuiYoWS_kJbPfauOV99i7BXJdZSiwXIuLxTw,7834576
@@ -17,9 +17,9 @@ bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp,sha256=xCmyAT-OZJOD
 bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp,sha256=eKxgODRlpf0hU84QjNhigvRhWCT9tiJZjA5oQFQ1bUk,7404
 bscampp/tools/hamming_distance/src/homology.cpp,sha256=ZE0uXZWQ-cN4U1Wk5kUr_KKHgzsgA6Sno-IViRa4tmI,6053
 bscampp/tools/hamming_distance/src/new_hamming.cpp,sha256=fBRm99RquBZgZjaLOn9xDI3cH9NchhrxKbL-11j8fmk,5342
-bscampp-1.0.1b0.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
-bscampp-1.0.1b0.dist-info/METADATA,sha256=hCpwS1vbd07cuwW7D5AkiO_I_GP-kqk21IH2yxiPUwM,11144
-bscampp-1.0.1b0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-bscampp-1.0.1b0.dist-info/entry_points.txt,sha256=dZygBmg2OncVyeLeIjh_9e-GBIOesFvMemyW9BRRcXY,113
-bscampp-1.0.1b0.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
-bscampp-1.0.1b0.dist-info/RECORD,,
+bscampp-1.0.2b0.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
+bscampp-1.0.2b0.dist-info/METADATA,sha256=OWSIl8dFMrgzB9Xe8geqXQw2fBNd8hta3p40O5Q9T5Q,12509
+bscampp-1.0.2b0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+bscampp-1.0.2b0.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
+bscampp-1.0.2b0.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
+bscampp-1.0.2b0.dist-info/RECORD,,

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,3 +1,5 @@
 [console_scripts]
 bscampp = bscampp.pipeline:bscampp_pipeline
 run_bscampp.py = bscampp.pipeline:bscampp_pipeline
+run_scampp.py = bscampp.pipeline:scampp_pipeline
+scampp = bscampp.pipeline:scampp_pipeline

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/LICENSE RENAMED Viewed

File without changes

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/WHEEL RENAMED Viewed

File without changes

{bscampp-1.0.1b0.dist-info → bscampp-1.0.2b0.dist-info}/top_level.txt RENAMED Viewed

File without changes

bscampp 1.0.1b0__py3-none-any.whl → 1.0.2b0__py3-none-any.whl

bscampp 1.0.1b0py3-none-any.whl → 1.0.2b0py3-none-any.whl