bscampp 1.0.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bscampp/__init__.py +68 -0
- bscampp/configs.py +169 -0
- bscampp/default.config +5 -0
- bscampp/functions.py +394 -0
- bscampp/init_configs.py +93 -0
- bscampp/jobs.py +198 -0
- bscampp/pipeline.py +224 -0
- bscampp/tools/epa-ng +0 -0
- bscampp/tools/hamming_distance/CMakeLists.txt +13 -0
- bscampp/tools/hamming_distance/fragment_hamming +0 -0
- bscampp/tools/hamming_distance/hamming +0 -0
- bscampp/tools/hamming_distance/homology +0 -0
- bscampp/tools/hamming_distance/src/fragment_hamming.cpp +180 -0
- bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp +183 -0
- bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp +214 -0
- bscampp/tools/hamming_distance/src/homology.cpp +179 -0
- bscampp/tools/hamming_distance/src/new_hamming.cpp +161 -0
- bscampp/tools/pplacer +0 -0
- bscampp/utils.py +913 -0
- bscampp-1.0.1a0.dist-info/LICENSE +21 -0
- bscampp-1.0.1a0.dist-info/METADATA +229 -0
- bscampp-1.0.1a0.dist-info/RECORD +25 -0
- bscampp-1.0.1a0.dist-info/WHEEL +5 -0
- bscampp-1.0.1a0.dist-info/entry_points.txt +3 -0
- bscampp-1.0.1a0.dist-info/top_level.txt +1 -0
bscampp/init_configs.py
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
import os, sys, shutil
|
2
|
+
try:
|
3
|
+
import configparser
|
4
|
+
except ImportError:
|
5
|
+
import ConfigParser as configparser
|
6
|
+
from argparse import ArgumentParser, Namespace
|
7
|
+
from platform import platform
|
8
|
+
|
9
|
+
def find_main_config(homepath):
|
10
|
+
with open(homepath, 'r') as f:
|
11
|
+
_root_dir = f.read().strip()
|
12
|
+
main_config_path = os.path.join(_root_dir, 'main.config')
|
13
|
+
if os.path.exists(main_config_path):
|
14
|
+
return _root_dir, main_config_path
|
15
|
+
else:
|
16
|
+
return None, None
|
17
|
+
|
18
|
+
'''
|
19
|
+
Initialize the config file at ~/.bscampp/main.config
|
20
|
+
By default will prioritize existing software from the user environment
|
21
|
+
'''
|
22
|
+
def init_config_file(homepath, rerun=False, prioritize_user_software=True):
|
23
|
+
if not rerun:
|
24
|
+
# make sure home.path exists
|
25
|
+
if not os.path.exists(homepath):
|
26
|
+
print(f'Cannot find home.path: {homepath}, regenerating...')
|
27
|
+
else:
|
28
|
+
_root_dir, main_config_path = find_main_config(homepath)
|
29
|
+
if _root_dir is not None:
|
30
|
+
return _root_dir, main_config_path
|
31
|
+
else:
|
32
|
+
print(f'Cannot find main.config, regenerating...')
|
33
|
+
|
34
|
+
_root_dir = os.path.expanduser('~/.bscampp')
|
35
|
+
main_config_path = os.path.join(_root_dir, 'main.config')
|
36
|
+
print(f'Initializing the config file at: {main_config_path}')
|
37
|
+
|
38
|
+
# write to local home.path and _root_dir
|
39
|
+
if not os.path.isdir(_root_dir):
|
40
|
+
os.mkdir(_root_dir)
|
41
|
+
with open(homepath, 'w') as f:
|
42
|
+
f.write(_root_dir)
|
43
|
+
|
44
|
+
# create main.config based on the default.config at this file's location
|
45
|
+
_config_path = os.path.join(os.path.dirname(__file__), 'default.config')
|
46
|
+
cparser = configparser.ConfigParser()
|
47
|
+
cparser.optionxform = str
|
48
|
+
assert os.path.exists(_config_path), \
|
49
|
+
'default config file missing! Please redownload from GitHub\n'
|
50
|
+
|
51
|
+
if os.path.exists(main_config_path):
|
52
|
+
print(f'Main configuration file {main_config_path} exists...')
|
53
|
+
print('Overwriting the existing config file...')
|
54
|
+
print('\n')
|
55
|
+
|
56
|
+
with open(_config_path, 'r') as f:
|
57
|
+
cparser.read_file(f)
|
58
|
+
|
59
|
+
# check platform, e.g., macOS or linux, etc.
|
60
|
+
platform_name = platform()
|
61
|
+
print(f'System is: {platform_name}')
|
62
|
+
|
63
|
+
tools_dir = os.path.join(os.path.dirname(__file__), 'tools')
|
64
|
+
set_sections = ['basic']
|
65
|
+
|
66
|
+
# default path to all potential binaries
|
67
|
+
cparser.set('basic', 'pplacer_path',
|
68
|
+
os.path.join(tools_dir, 'pplacer'))
|
69
|
+
cparser.set('basic', 'epang_path',
|
70
|
+
os.path.join(tools_dir, 'epa-ng'))
|
71
|
+
cparser.set('basic', 'hamming_distance_dir',
|
72
|
+
os.path.join(tools_dir, 'hamming_distance'))
|
73
|
+
|
74
|
+
# macOS TODO: need to recompile the binaries
|
75
|
+
if 'macos' in platform_name.lower():
|
76
|
+
cparser.set('basic', 'hamming_distance_dir',
|
77
|
+
os.path.join(tools_dir, 'macOS', 'hamming_distance'))
|
78
|
+
|
79
|
+
# prioritize user's software
|
80
|
+
if prioritize_user_software:
|
81
|
+
print('Detecting existing software from user\'s environment...')
|
82
|
+
softwares = ['pplacer', 'epa-ng', 'taxit']
|
83
|
+
for software in softwares:
|
84
|
+
sname = software.replace('-', '')
|
85
|
+
software_path = shutil.which(software)
|
86
|
+
if software_path:
|
87
|
+
print('\t{}: {}'.format(software, software_path))
|
88
|
+
cparser.set('basic', f'{sname}_path', software_path)
|
89
|
+
with open(main_config_path, 'w') as f:
|
90
|
+
cparser.write(f)
|
91
|
+
print(f'\n(Done) main.config was written to: {main_config_path}')
|
92
|
+
print(f'If you want to make changes, please directly edit {main_config_path}')
|
93
|
+
return _root_dir, main_config_path
|
bscampp/jobs.py
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
import os, shutil, subprocess, stat, re, traceback, shlex
|
2
|
+
from subprocess import Popen
|
3
|
+
from abc import abstractmethod
|
4
|
+
|
5
|
+
from bscampp import get_logger, log_exception
|
6
|
+
from bscampp.configs import Configs
|
7
|
+
|
8
|
+
_LOG = get_logger(__name__)
|
9
|
+
|
10
|
+
'''
|
11
|
+
Template class Job for running external software/jobs
|
12
|
+
'''
|
13
|
+
class Job(object):
|
14
|
+
def __init__(self):
|
15
|
+
self.job_type = ""
|
16
|
+
self.errors = []
|
17
|
+
self.b_ignore_error = False
|
18
|
+
self.pid = -1
|
19
|
+
self.returncode = 0
|
20
|
+
|
21
|
+
def __call__(self):
|
22
|
+
return self.run()
|
23
|
+
|
24
|
+
def get_pid(self):
|
25
|
+
return self.pid
|
26
|
+
|
27
|
+
# run the job with given invocation and raise errors when encountered
|
28
|
+
def run(self, stdin="", lock=None, logging=False, shell=False):
|
29
|
+
try:
|
30
|
+
cmd, outpath = self.get_invocation()
|
31
|
+
_LOG.debug(f'Running job_type: {self.job_type}, output: {outpath}')
|
32
|
+
|
33
|
+
# failsafe for NotImplemented jobs
|
34
|
+
if len(cmd) == 0:
|
35
|
+
raise ValueError(
|
36
|
+
f'{self.job_type} does not have a valid run command. '
|
37
|
+
'It might be due to (invalid input type, etc.).')
|
38
|
+
|
39
|
+
# identify binaries as the first field
|
40
|
+
binpath = cmd[0]
|
41
|
+
# deal with special cases, e.g., python, java
|
42
|
+
if binpath == 'java':
|
43
|
+
binpath = cmd[2]
|
44
|
+
elif binpath == 'python' or binpath == 'python3':
|
45
|
+
binpath = cmd[1]
|
46
|
+
assert os.path.exists(binpath) or binpath == 'gzip', \
|
47
|
+
('executable for %s does not exist: %s' %
|
48
|
+
(self.job_type, binpath))
|
49
|
+
assert \
|
50
|
+
(binpath.count('/')== 0 or os.path.exists(binpath)), \
|
51
|
+
('path for %s does not exist (%s)' %
|
52
|
+
(self.job_type, binpath))
|
53
|
+
|
54
|
+
_LOG.debug('Arguments: %s', ' '.join(
|
55
|
+
(str(x) if x is not None else '?NoneType?' for x in cmd)))
|
56
|
+
|
57
|
+
# logging to local or to PIPE
|
58
|
+
stderr, stdout = '', ''
|
59
|
+
scmd = ' '.join(cmd)
|
60
|
+
if logging:
|
61
|
+
logpath = os.path.join(
|
62
|
+
os.path.dirname(outpath), 'f{self.job_type}.txt')
|
63
|
+
outlogging = open(logpath, 'w', 1)
|
64
|
+
|
65
|
+
# TODO: may need to deal with piping in the future, for now
|
66
|
+
# it is not needed
|
67
|
+
p = Popen(cmd, text=True, bufsize=1,
|
68
|
+
stdin=subprocess.PIPE,
|
69
|
+
stdout=outlogging, stderr=subprocess.PIPE)
|
70
|
+
self.pid = p.pid
|
71
|
+
stdout, stderr = p.communicate(input=stdin)
|
72
|
+
outlogging.close()
|
73
|
+
else:
|
74
|
+
p = Popen(cmd, text=True, bufsize=1,
|
75
|
+
stdin=subprocess.PIPE,
|
76
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
77
|
+
self.pid = p.pid
|
78
|
+
stdout, stderr = p.communicate(input=stdin)
|
79
|
+
self.returncode = p.returncode
|
80
|
+
|
81
|
+
# successful run and write to log
|
82
|
+
if self.returncode == 0:
|
83
|
+
if lock:
|
84
|
+
try:
|
85
|
+
lock.acquire()
|
86
|
+
_LOG.debug(f'{self.job_type} completed, output: {outpath}')
|
87
|
+
finally:
|
88
|
+
lock.release()
|
89
|
+
else:
|
90
|
+
_LOG.debug(f'{self.job_type} completed, output: {outpath}')
|
91
|
+
return outpath
|
92
|
+
else:
|
93
|
+
error_msg = ' '.join([f'Error occurred running {self.job_type}.',
|
94
|
+
f'returncode: {self.returncode}'])
|
95
|
+
if lock:
|
96
|
+
try:
|
97
|
+
lock.acquire()
|
98
|
+
_LOG.error(error_msg + '\nSTDOUT: ' + stdout +
|
99
|
+
'\nSTDERR: ' + stderr)
|
100
|
+
finally:
|
101
|
+
lock.release()
|
102
|
+
else:
|
103
|
+
_LOG.error(error_msg + '\nSTDOUT: ' + stdout +
|
104
|
+
'\nSTDERR: ' + stderr)
|
105
|
+
exit(1)
|
106
|
+
except Exception:
|
107
|
+
log_exception(_LOG)
|
108
|
+
|
109
|
+
# implemented in subclass
|
110
|
+
# return: (cmd, outpath)
|
111
|
+
@abstractmethod
|
112
|
+
def get_invocation(self):
|
113
|
+
raise NotImplementedError(
|
114
|
+
'get_invocation() should be implemented by subclasses.')
|
115
|
+
|
116
|
+
'''
|
117
|
+
A EPA-ng job that runs EPA-ng with given parameters
|
118
|
+
'''
|
119
|
+
class EPAngJob(Job):
|
120
|
+
def __init__(self, **kwargs):
|
121
|
+
Job.__init__(self)
|
122
|
+
self.job_type = 'epa-ng'
|
123
|
+
|
124
|
+
self.path = ''
|
125
|
+
self.info_path = ''
|
126
|
+
self.tree_path = ''
|
127
|
+
self.aln_path = ''
|
128
|
+
self.qaln_path = ''
|
129
|
+
self.outdir = ''
|
130
|
+
self.num_cpus = 1
|
131
|
+
|
132
|
+
for k, v in kwargs.items():
|
133
|
+
setattr(self, k, v)
|
134
|
+
|
135
|
+
def get_invocation(self):
|
136
|
+
self.outpath = os.path.join(self.outdir, 'epa_result.jplace')
|
137
|
+
cmd = [self.path,
|
138
|
+
'-m', self.info_path,
|
139
|
+
'-t', self.tree_path, '-s', self.aln_path,
|
140
|
+
'-q', self.qaln_path, '-w', self.outdir,
|
141
|
+
'-T', str(self.num_cpus), '--redo']
|
142
|
+
return cmd, self.outpath
|
143
|
+
|
144
|
+
'''
|
145
|
+
A taxtastic job that create a refpkg based on given parameters
|
146
|
+
'''
|
147
|
+
class TaxtasticJob(Job):
|
148
|
+
def __init__(self, **kwargs):
|
149
|
+
Job.__init__(self)
|
150
|
+
self.job_type = 'taxit'
|
151
|
+
|
152
|
+
self.path = ''
|
153
|
+
self.outdir = ''
|
154
|
+
self.name = ''
|
155
|
+
self.tree_path = ''
|
156
|
+
self.aln_path = ''
|
157
|
+
self.info_path = ''
|
158
|
+
|
159
|
+
for k, v in kwargs.items():
|
160
|
+
setattr(self, k, v)
|
161
|
+
|
162
|
+
def get_invocation(self):
|
163
|
+
self.outpath = os.path.join(self.outdir)
|
164
|
+
cmd = [self.path, 'create', '-P', self.outdir,
|
165
|
+
'-l', self.name, '--aln-fasta', self.aln_path,
|
166
|
+
'--tree-file', self.tree_path]
|
167
|
+
# check which model file is provided
|
168
|
+
if 'bestModel' in self.info_path:
|
169
|
+
cmd.extend(['--model-file', self.info_path])
|
170
|
+
else:
|
171
|
+
cmd.extend(['--tree-stats', self.info_path])
|
172
|
+
return cmd, self.outpath
|
173
|
+
|
174
|
+
'''
|
175
|
+
A pplacer job that uses taxtastic refpkg to place sequences
|
176
|
+
'''
|
177
|
+
class PplacerTaxtasticJob(Job):
|
178
|
+
def __init__(self, **kwargs):
|
179
|
+
Job.__init__(self)
|
180
|
+
self.job_type = 'pplacer-taxtastic'
|
181
|
+
|
182
|
+
self.path = ''
|
183
|
+
self.refpkg_dir = ''
|
184
|
+
self.qaln_path = ''
|
185
|
+
self.outdir = ''
|
186
|
+
self.outpath = ''
|
187
|
+
self.model = 'GTR'
|
188
|
+
self.num_cpus = 1
|
189
|
+
|
190
|
+
for k, v in kwargs.items():
|
191
|
+
setattr(self, k, v)
|
192
|
+
|
193
|
+
def get_invocation(self):
|
194
|
+
# outpath defined
|
195
|
+
cmd = [self.path, '-m', self.model,
|
196
|
+
'-c', self.refpkg_dir, '-o', self.outpath,
|
197
|
+
'-j', str(self.num_cpus), self.qaln_path]
|
198
|
+
return cmd, self.outpath
|
bscampp/pipeline.py
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
import json, time, sys, os, shutil
|
2
|
+
from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter
|
3
|
+
import argparse
|
4
|
+
|
5
|
+
from bscampp import get_logger, log_exception, __version__
|
6
|
+
from bscampp.configs import *
|
7
|
+
from bscampp.functions import *
|
8
|
+
import bscampp.utils as utils
|
9
|
+
|
10
|
+
from multiprocessing import Manager
|
11
|
+
from concurrent.futures import ProcessPoolExecutor
|
12
|
+
|
13
|
+
_LOG = get_logger(__name__)
|
14
|
+
|
15
|
+
# process pool initializer
|
16
|
+
def initial_pool(parser, cmdline_args):
|
17
|
+
# avoid redundant logging for child process
|
18
|
+
buildConfigs(parser, cmdline_args, child_process=True)
|
19
|
+
|
20
|
+
# main pipeline for BSCAMPP
|
21
|
+
def bscampp_pipeline(*args, **kwargs):
|
22
|
+
t0 = time.perf_counter()
|
23
|
+
m = Manager(); lock = m.Lock()
|
24
|
+
|
25
|
+
# parse command line arguments and build configurations
|
26
|
+
parser, cmdline_args = parseArguments()
|
27
|
+
|
28
|
+
# initialize multiprocessing (if needed)
|
29
|
+
_LOG.warning('Initializing ProcessPoolExecutor...')
|
30
|
+
pool = ProcessPoolExecutor(Configs.num_cpus, initializer=initial_pool,
|
31
|
+
initargs=(parser, cmdline_args,))
|
32
|
+
|
33
|
+
# (0) temporary files wrote to here
|
34
|
+
workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
|
35
|
+
try:
|
36
|
+
if not os.path.isdir(workdir):
|
37
|
+
os.makedirs(workdir)
|
38
|
+
except OSError:
|
39
|
+
log_exception(_LOG)
|
40
|
+
|
41
|
+
# (1) read in tree, alignment, and separate reference sequences from
|
42
|
+
# query sequences
|
43
|
+
tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir)
|
44
|
+
|
45
|
+
# (2) compute closest leaves for all query sequences
|
46
|
+
query_votes_dict, query_top_vote_dict = getClosestLeaves(
|
47
|
+
aln_path, qaln_path, aln, qaln, workdir)
|
48
|
+
|
49
|
+
# (3) first assign all queries to their closest-leaf subtrees,
|
50
|
+
# then do reassignment to minimize distance between each's top vote
|
51
|
+
# and the subtree's seed leaf
|
52
|
+
new_subtree_dict, placed_query_list = assignQueriesToSubtrees(
|
53
|
+
query_votes_dict, query_top_vote_dict, tree, leaf_dict)
|
54
|
+
|
55
|
+
# (4) perform placement for each subtree
|
56
|
+
output_jplace = placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict,
|
57
|
+
placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock)
|
58
|
+
|
59
|
+
# (5) write the output jplace to local
|
60
|
+
writeOutputJplace(output_jplace)
|
61
|
+
|
62
|
+
# shutdown pool
|
63
|
+
_LOG.warning('Shutting down ProcessPoolExecutor...')
|
64
|
+
pool.shutdown()
|
65
|
+
_LOG.warning('ProcessPoolExecutor shut down.')
|
66
|
+
|
67
|
+
# clean up temp files if not keeping
|
68
|
+
if not Configs.keeptemp:
|
69
|
+
_LOG.info('Removing temporary files...')
|
70
|
+
clean_temp_files()
|
71
|
+
|
72
|
+
# stop BSCAMPP
|
73
|
+
send = time.perf_counter()
|
74
|
+
_LOG.info('BSCAMPP completed in {} seconds...'.format(send - t0))
|
75
|
+
|
76
|
+
def clean_temp_files():
|
77
|
+
# all temporary files/directories to remove
|
78
|
+
temp_items = [f'tmp{Configs.tmpfilenbr}']
|
79
|
+
for temp in temp_items:
|
80
|
+
temp_path = os.path.join(Configs.outdir, temp)
|
81
|
+
if os.path.isfile(temp_path):
|
82
|
+
os.remove(temp_path)
|
83
|
+
elif os.path.isdir(temp_path):
|
84
|
+
shutil.rmtree(temp_path)
|
85
|
+
else:
|
86
|
+
continue
|
87
|
+
_LOG.info(f'- Removed {temp}')
|
88
|
+
|
89
|
+
def parseArguments():
|
90
|
+
global _root_dir, main_config_path
|
91
|
+
parser = _init_parser()
|
92
|
+
cmdline_args = sys.argv[1:]
|
93
|
+
|
94
|
+
# build config
|
95
|
+
buildConfigs(parser, cmdline_args)
|
96
|
+
_LOG.info('BSCAMPP is running with: {}'.format(
|
97
|
+
' '.join(cmdline_args)))
|
98
|
+
getConfigs()
|
99
|
+
|
100
|
+
return parser, cmdline_args
|
101
|
+
|
102
|
+
def _init_parser():
|
103
|
+
# example usage
|
104
|
+
example_usages = '''Example usages:
|
105
|
+
> default
|
106
|
+
%(prog)s -i raxml.info
|
107
|
+
'''
|
108
|
+
|
109
|
+
parser = ArgumentParser(
|
110
|
+
description=(
|
111
|
+
"This program runs BSCAMPP, a scalable phylogenetic "
|
112
|
+
"placement framework that scales EPA-ng/pplacer "
|
113
|
+
"to very large tree placement."
|
114
|
+
),
|
115
|
+
conflict_handler='resolve',
|
116
|
+
epilog=example_usages,
|
117
|
+
formatter_class=utils.SmartHelpFormatter,
|
118
|
+
)
|
119
|
+
parser.add_argument('-v', '--version', action='version',
|
120
|
+
version="%(prog)s " + __version__)
|
121
|
+
parser.groups = dict()
|
122
|
+
required = True
|
123
|
+
|
124
|
+
## add a subcommand for updating configuration file without running
|
125
|
+
## the BSCAMPP pipeline
|
126
|
+
#subparsers = parser.add_subparsers(dest='command',
|
127
|
+
# help='Subcommands for BSCAMPP')
|
128
|
+
#update_parser = subparsers.add_parser('update-configs',
|
129
|
+
# help='Update the configuration file without running BSCAMPP.')
|
130
|
+
|
131
|
+
## try update args requirement if subcommand(s) are used
|
132
|
+
#if 'update-configs' in sys.argv:
|
133
|
+
# required = False
|
134
|
+
|
135
|
+
# basic group
|
136
|
+
basic_group = parser.add_argument_group(
|
137
|
+
"Basic parameters".upper(),
|
138
|
+
"These are the basic parameters for BSCAMPP.")
|
139
|
+
parser.groups['basic_group'] = basic_group
|
140
|
+
|
141
|
+
basic_group.add_argument('--placement-method', type=str,
|
142
|
+
help='The base placement method to use. Default: epa-ng',
|
143
|
+
choices=['epa-ng', 'pplacer'], default='epa-ng',
|
144
|
+
required=False)
|
145
|
+
basic_group.add_argument("-i", "--info", "--info-path", type=str,
|
146
|
+
dest="info_path",
|
147
|
+
help=("Path to model parameters. E.g., .bestModel "
|
148
|
+
"from RAxML/RAxML-ng"),
|
149
|
+
required=required, default=None)
|
150
|
+
basic_group.add_argument("-t", "--tree", "--tree-path", type=str,
|
151
|
+
dest="tree_path",
|
152
|
+
help="Path to reference tree with estimated branch lengths",
|
153
|
+
required=required, default=None)
|
154
|
+
basic_group.add_argument("-a", "--alignment", "--aln-path", type=str,
|
155
|
+
dest="aln_path",
|
156
|
+
help=("Path for reference sequence alignment in "
|
157
|
+
"FASTA format. Optionally with query sequences. "
|
158
|
+
"Query alignment can be specified with --qaln-path"),
|
159
|
+
required=required, default=None)
|
160
|
+
basic_group.add_argument("-q", "--qalignment", "--qaln-path", type=str,
|
161
|
+
dest="qaln_path",
|
162
|
+
help=("Optionally provide path to query sequence alignment "
|
163
|
+
"in FASTA format. Default: None"),
|
164
|
+
required=False, default=None)
|
165
|
+
basic_group.add_argument("-d", "--outdir", type=str,
|
166
|
+
help="Directory path for output. Default: bscampp_output/",
|
167
|
+
required=False, default="bscampp_output")
|
168
|
+
basic_group.add_argument("-o", "--output", type=str, dest="outname",
|
169
|
+
help="Output file name. Default: bscampp_result.jplace",
|
170
|
+
required=False, default="bscampp_result.jplace")
|
171
|
+
basic_group.add_argument("--threads", "--num-cpus", type=int,
|
172
|
+
dest="num_cpus",
|
173
|
+
help="Number of cores for parallelization, default: -1 (all)",
|
174
|
+
required=False, default=-1)
|
175
|
+
|
176
|
+
# advanced parameter settings
|
177
|
+
advance_group = parser.add_argument_group(
|
178
|
+
"Advance parameters".upper(),
|
179
|
+
("These parameters control how BSCAMPP is run. "
|
180
|
+
"The default values are set based on experiments."
|
181
|
+
))
|
182
|
+
parser.groups['advance_group'] = advance_group
|
183
|
+
|
184
|
+
advance_group.add_argument("-m", "--model", type=str,
|
185
|
+
help="Model used for edge distances. Default: GTR",
|
186
|
+
required=False, default="GTR")
|
187
|
+
advance_group.add_argument("-b", "--subtreesize", type=int,
|
188
|
+
help="Integer size of the subtree. Default: 2000",
|
189
|
+
required=False, default=2000)
|
190
|
+
advance_group.add_argument("-V", "--votes", type=int,
|
191
|
+
help="Number of votes per query sequence. Default: 5",
|
192
|
+
required=False, default=5)
|
193
|
+
advance_group.add_argument("--similarityflag", type=str2bool,
|
194
|
+
help="Boolean, True if maximizing sequence similarity "
|
195
|
+
"instead of simple Hamming distance (ignoring gap "
|
196
|
+
"sites in the query). Default: True",
|
197
|
+
required=False, default=True)
|
198
|
+
|
199
|
+
# miscellaneous group
|
200
|
+
misc_group = parser.add_argument_group(
|
201
|
+
"Miscellaneous parameters".upper(),)
|
202
|
+
parser.groups['misc_group'] = misc_group
|
203
|
+
|
204
|
+
misc_group.add_argument("-n","--tmpfilenbr", type=int,
|
205
|
+
help="Temporary file indexing. Default: 0",
|
206
|
+
required=False, default=0)
|
207
|
+
misc_group.add_argument("--fragmentflag", type=str2bool,
|
208
|
+
help="If queries contains fragments. Default: True",
|
209
|
+
required=False, default=True)
|
210
|
+
misc_group.add_argument("--keeptemp", type=str2bool,
|
211
|
+
help="Boolean, True to keep all temporary files. "
|
212
|
+
"Default: False",
|
213
|
+
required=False, default=False)
|
214
|
+
return parser
|
215
|
+
|
216
|
+
def str2bool(b):
|
217
|
+
if isinstance(b, bool):
|
218
|
+
return b
|
219
|
+
if b.lower() in ('yes', 'true', 't', 'y', '1'):
|
220
|
+
return True
|
221
|
+
elif b.lower() in ('no', 'false', 'f', 'n', '0'):
|
222
|
+
return False
|
223
|
+
else:
|
224
|
+
raise argparse.ArgumentTypeError('Boolean value expected.')
|
bscampp/tools/epa-ng
ADDED
Binary file
|
@@ -0,0 +1,13 @@
|
|
1
|
+
cmake_minimum_required(VERSION 3.6)
|
2
|
+
|
3
|
+
project(hamming CXX)
|
4
|
+
|
5
|
+
find_package(OpenMP)
|
6
|
+
|
7
|
+
add_executable (hamming src/new_hamming.cpp)
|
8
|
+
add_executable (fragment_hamming src/fragment_hamming.cpp)
|
9
|
+
add_executable (homology src/homology.cpp)
|
10
|
+
|
11
|
+
target_link_libraries(fragment_hamming OpenMP::OpenMP_CXX)
|
12
|
+
target_link_libraries(hamming OpenMP::OpenMP_CXX)
|
13
|
+
target_link_libraries(homology OpenMP::OpenMP_CXX)
|
Binary file
|
Binary file
|
Binary file
|