bscampp 1.0.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bscampp/__init__.py ADDED
@@ -0,0 +1,68 @@
1
+ ############################################################
2
+ #
3
+ # Init file for BSCAMPP, using the __init__.py from
4
+ # SEPP as the original template. Current adaption comes
5
+ # from https://github.com/c5shen/TIPP3.git
6
+ #
7
+ ############################################################
8
+ from operator import itemgetter
9
+ import logging, os
10
+
11
+ # update system recursion limit to avoid issues
12
+ # not really needed for BSCAMPP but safe to update here
13
+ os.sys.setrecursionlimit(1000000)
14
+
15
+ __version__ = "1.0.1a"
16
+ _INSTALL_PATH = __path__[0]
17
+
18
+ # global variables to store all loggers
19
+ __set_loggers = set()
20
+
21
+ # obtain the current logging level, default to INFO
22
+ def get_logging_level(logging_level='info'):
23
+ logging_level_map = {
24
+ 'DEBUG': logging.DEBUG, 'INFO': logging.INFO,
25
+ 'WARNING': logging.WARNING, 'ERROR': logging.ERROR,
26
+ 'CRITICAL': logging.CRITICAL,
27
+ }
28
+ # obtain from environment variable to determine logging level, if
29
+ # set by the user
30
+ env_level = os.getenv('BSCAMPP_LOGGING_LEVEL')
31
+ if env_level is not None:
32
+ ll = env_level.upper()
33
+ else:
34
+ ll = logging_level.upper()
35
+ # default to INFO if ll is not defined
36
+ return logging_level_map.get(ll, logging.INFO)
37
+
38
+ # obtain a logger for a given file
39
+ def get_logger(name='bscampp', log_path=None, logging_level='info'):
40
+ logger = logging.getLogger(name)
41
+ if name not in __set_loggers:
42
+ # set up a new logger for a name not in __set_loggers yet
43
+ level = get_logging_level(logging_level)
44
+ logging_formatter = logging.Formatter(
45
+ ("[%(asctime)s] %(filename)s (line %(lineno)d):"
46
+ " %(levelname) 8s: %(message)s"))
47
+ logging_formatter.datefmt = "%H:%M:%S"
48
+ logger.setLevel(level)
49
+
50
+ # logging to stdout
51
+ if log_path is None:
52
+ ch = logging.StreamHandler()
53
+ else:
54
+ # use FileHandler for logging
55
+ ch = logging.FileHandler(log_path, mode='a')
56
+ ch.setLevel(level)
57
+ ch.setFormatter(logging_formatter)
58
+ logger.addHandler(ch)
59
+ __set_loggers.add(name)
60
+ return logger
61
+
62
+ # logging exception
63
+ def log_exception(logger):
64
+ import traceback, io
65
+ s = io.StringIO()
66
+ traceback.print_exc(None, s)
67
+ logger.error(s.getvalue())
68
+ exit(1)
bscampp/configs.py ADDED
@@ -0,0 +1,169 @@
1
+ import os, time
2
+ try:
3
+ import configparser
4
+ except ImportError:
5
+ from ConfigParser import configparser
6
+ from argparse import ArgumentParser, Namespace
7
+ from bscampp.init_configs import init_config_file
8
+ from bscampp import get_logger, log_exception
9
+
10
+ # detect home.path or create if missing
11
+ homepath = os.path.dirname(__file__) + '/home.path'
12
+ _root_dir, main_config_path = init_config_file(homepath)
13
+
14
+ # set valid configparse section names
15
+ valid_config_sections = []
16
+ logging_levels = set(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])
17
+
18
+ _LOG = get_logger(__name__)
19
+
20
+ '''
21
+ Configuration defined by users and by default values
22
+ '''
23
+ class Configs:
24
+ global _root_dir
25
+
26
+ # basic input paths
27
+ info_path = None # info file for pplacer or EPA-ng
28
+ tree_path = None # placement tree path
29
+ aln_path = None # alignment for backbone. Optinally with queries
30
+ qaln_path = None # (optional) alignment for query.
31
+ outdir = None # output directory
32
+ outname = None # output name for the final jplace file
33
+ keeptemp = False # whether to keep all temporary files
34
+ verbose = 'INFO' # default verbose level to print
35
+ num_cpus = 1 # number of cores to use for parallelization
36
+
37
+ # binaries
38
+ pplacer_path = None
39
+ epang_path = None
40
+ taxit_path = None
41
+ hamming_distance_dir = None
42
+
43
+ # placement settings
44
+ placement_method = 'epa-ng'
45
+ model = 'GTR'
46
+ subtreesize = 2000
47
+ votes = 5
48
+ similarityflag = True
49
+
50
+ # miscellaneous
51
+ tmpfilenbr = 0
52
+ fragmentflag = True
53
+
54
+ # check if the given configuration is valid to add
55
+ def set_valid_configuration(name, conf):
56
+ if not isinstance(conf, Namespace):
57
+ _LOG.warning(
58
+ "Looking for Namespace object from \'{}\' but find {}".format(
59
+ name, type(conf)))
60
+ return
61
+
62
+ # basic section defined in main.config
63
+ if name == 'basic':
64
+ for k in conf.__dict__.keys():
65
+ k_attr = getattr(conf, k)
66
+ if not k_attr:
67
+ continue
68
+ if k in Configs.__dict__:
69
+ setattr(Configs, k, k_attr)
70
+ else:
71
+ pass
72
+
73
+ # valid attribute check for print out
74
+ def valid_attribute(k, v):
75
+ if not isinstance(k, str):
76
+ return False
77
+ if k.startswith('_'):
78
+ return False
79
+ return True
80
+
81
+ # print out current configuration
82
+ def getConfigs():
83
+ msg = '\n************ Configurations ************\n' + \
84
+ f'\thome.path: {homepath}\n' + \
85
+ f'\tmain.config: {main_config_path}\n\n'
86
+ for k, v in Configs.__dict__.items():
87
+ if valid_attribute(k, v):
88
+ msg += f'\tConfigs.{k}: {v}\n'
89
+ print(msg, flush=True)
90
+
91
+ # read in config file if it exists
92
+ def _read_config_file(filename, cparser, opts,
93
+ child_process=False, expand=None):
94
+ config_defaults = []
95
+ with open(filename, 'r') as f:
96
+ cparser.read_file(f)
97
+ if cparser.has_section('commandline'):
98
+ for k, v in cparser.items('commandline'):
99
+ config_defaults.append(f'--{k}')
100
+ config_defaults.append(v)
101
+
102
+ for section in cparser.sections():
103
+ if section == 'commandline':
104
+ continue
105
+ if getattr(opts, section, None):
106
+ section_name_space = getattr(opts, section)
107
+ else:
108
+ section_name_space = Namespace()
109
+ for k, v in cparser.items(section):
110
+ if expand and k == 'path':
111
+ v = os.path.join(expand, v)
112
+ setattr(section_name_space, k, v)
113
+ setattr(opts, section, section_name_space)
114
+ return config_defaults
115
+
116
+ '''
117
+ Build Config class
118
+ '''
119
+ def buildConfigs(parser, cmdline_args, child_process=False, rerun=False):
120
+ cparser = configparser.ConfigParser()
121
+ cparser.optionxform = str
122
+ args = parser.parse_args(cmdline_args)
123
+
124
+ # Check if only updating config files, if so, re-initialize the
125
+ # configuration file at ~/.bscampp/main.config and exit
126
+ #if args.command == 'update-configs':
127
+ # _ = init_config_file(homepath, rerun=True)
128
+ # _LOG.warning('Finished re-initializing the configuration file '
129
+ # f'at {main_config_path}, exiting...')
130
+ # exit(0)
131
+
132
+ # first load arguments from main.configs
133
+ main_args = Namespace()
134
+ cmdline_main = _read_config_file(main_config_path,
135
+ cparser, main_args, child_process=child_process)
136
+
137
+ # merge arguments, in the correct order so things are overridden correctly
138
+ args = parser.parse_args(cmdline_main + cmdline_args,
139
+ namespace=main_args)
140
+
141
+ # directly add all arguments that's defined in the Configs class
142
+ for k in args.__dict__.keys():
143
+ k_attr = getattr(args, k)
144
+ if k in Configs.__dict__:
145
+ # valid argument that's defined in the Configs class
146
+ setattr(Configs, k, k_attr)
147
+ else:
148
+ # check if the argument is valid
149
+ set_valid_configuration(k, k_attr)
150
+
151
+ # create outdir
152
+ if not os.path.isdir(Configs.outdir):
153
+ os.makedirs(Configs.outdir)
154
+
155
+ # modify outname if it does not have a .jplace suffix
156
+ if Configs.outname.split('.')[-1].lower() != 'jplace':
157
+ Configs.outname += '.jplace'
158
+
159
+ # modify num_cpus if it is the default value
160
+ if Configs.num_cpus > 0:
161
+ Configs.num_cpus = min(os.cpu_count(), Configs.num_cpus)
162
+ else:
163
+ Configs.num_cpus = os.cpu_count()
164
+
165
+ # sanity check for existence of base placement binary path
166
+ if Configs.placement_method == 'epa-ng':
167
+ assert os.path.exists(Configs.epang_path), 'epa-ng not detected!'
168
+ elif Configs.placement_method == 'pplacer':
169
+ assert os.path.exists(Configs.pplacer_path), 'pplacer not detected!'
bscampp/default.config ADDED
@@ -0,0 +1,5 @@
1
+ [basic]
2
+ pplacer_path =
3
+ epang_path =
4
+ taxit_path =
5
+ hamming_distance_dir =
bscampp/functions.py ADDED
@@ -0,0 +1,394 @@
1
+ import json, time, os, sys
2
+ import treeswift
3
+ from collections import defaultdict, Counter
4
+
5
+ from bscampp import get_logger, log_exception
6
+ from bscampp.configs import Configs
7
+ from bscampp.jobs import EPAngJob, TaxtasticJob, PplacerTaxtasticJob
8
+ from bscampp.utils import write_fasta
9
+ import bscampp.utils as utils
10
+
11
+ _LOG = get_logger(__name__)
12
+
13
+ '''
14
+ Function to read in the placement tree and alignment.
15
+ If query alignment is provided, will use the provided query instead of
16
+ the ones (potentially) included in the reference alignment
17
+ '''
18
+ def readData(workdir):
19
+ t0 = time.perf_counter()
20
+ _LOG.info('Reading in input data...')
21
+
22
+ # (1) load reference tree
23
+ tree = treeswift.read_tree_newick(Configs.tree_path)
24
+ tree.resolve_polytomies()
25
+
26
+ leaf_dict = tree.label_to_node(selection='leaves')
27
+ # clean the leaf keys so that ' or " are not present
28
+ ori_keys = list(leaf_dict.keys())
29
+ for key in ori_keys:
30
+ _node = leaf_dict[key]
31
+ new_key = key.replace('\'', '')
32
+ new_key = new_key.replace('\"', '')
33
+ leaf_dict.pop(key)
34
+ leaf_dict[new_key] = _node
35
+
36
+ # (2) load reference alignment and query alignment (if provided)
37
+ if Configs.qaln_path is not None:
38
+ ref_dict = utils.read_data(Configs.aln_path)
39
+ q_dict = utils.read_data(Configs.qaln_path)
40
+ aln_path, qaln_path = Configs.aln_path, Configs.qaln_path
41
+ else:
42
+ aln_dict = utils.read_data(Configs.aln_path)
43
+ ref_dict, q_dict = utils.seperate(aln_dict, leaf_dict)
44
+
45
+ # after separating queries from the reference alignment, write
46
+ # them to to TEMP/
47
+ qaln_path = os.path.join(workdir, 'qaln.fa')
48
+ write_fasta(temp_qaln_path, q_dict)
49
+
50
+ aln_path = os.path.join(workdir, 'aln.fa')
51
+ write_fasta(temp_aln_path, ref_dict)
52
+
53
+ t1 = time.perf_counter()
54
+ _LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
55
+ return tree, leaf_dict, aln_path, ref_dict, qaln_path, q_dict
56
+
57
+ '''
58
+ Function to get the closest leaf for each query sequence based on Hamming
59
+ distance
60
+ '''
61
+ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir):
62
+ t0 = time.perf_counter()
63
+ _LOG.info('Computing closest leaves for query sequences...')
64
+ query_votes_dict = dict()
65
+ query_top_vote_dict = dict()
66
+
67
+ tmp_output = os.path.join(workdir, 'closest.txt')
68
+
69
+ cmd = []
70
+ if Configs.similarityflag:
71
+ cmd.append(os.path.join(Configs.hamming_distance_dir, 'homology'))
72
+ else:
73
+ if fragment_flag == False:
74
+ cmd.append(os.path.join(Configs.hamming_distance_dir, 'hamming'))
75
+ else:
76
+ cmd.append(os.path.join(
77
+ Configs.hamming_distance_dir, 'fragment_hamming'))
78
+ cmd.extend([aln_path, str(len(aln)), qaln_path, str(len(qaln)),
79
+ tmp_output, str(Configs.votes)])
80
+ os.system(' '.join(cmd))
81
+
82
+ # process closest leaves
83
+ unusable_queries = set()
84
+ f = open(tmp_output)
85
+ for line in f:
86
+ line = line.strip()
87
+ y = line.split(',')
88
+ name = y.pop(0)
89
+ for idx, taxon in enumerate(y):
90
+ leaf, hamming = taxon.split(':')
91
+ y[idx] = (leaf, int(hamming))
92
+
93
+ y = sorted(y, key=lambda x: x[1])
94
+ for idx, taxon in enumerate(y):
95
+ y[idx] = taxon[0]
96
+
97
+ if name.find(':') >= 0:
98
+ name_list = name.split(":")
99
+ name = name_list[0]
100
+ ungapped_length = name_list[1]
101
+ if y[0] == ungapped_length:
102
+ _LOG.warning(f'Sequence {name}: no homologous sites found, '
103
+ 'removed before placement.')
104
+ unusable_queries.add(name)
105
+ if name not in unusable_queries:
106
+ query_votes_dict[name] = y
107
+ query_top_vote_dict[name] = y[0]
108
+ f.close()
109
+
110
+ t1 = time.perf_counter()
111
+ _LOG.info('Time to compute closest leaves: {} seconds'.format(t1 - t0))
112
+ return query_votes_dict, query_top_vote_dict
113
+
114
+ '''
115
+ Function to assign queries to subtrees based on their votes
116
+ '''
117
+ def assignQueriesToSubtrees(query_votes_dict, query_top_vote_dict,
118
+ tree, leaf_dict):
119
+ t0 = time.perf_counter()
120
+ _LOG.info('Adding query votes to the placement tree...')
121
+
122
+ # (1) go over the query votes and add them to corresponding leaves
123
+ lf_votes = Counter()
124
+ leaf_queries = dict()
125
+ for name, y in query_votes_dict.items():
126
+ lf_votes.update(y)
127
+ for ind, leaf in enumerate(y):
128
+ top_vote = False
129
+ if ind == 0:
130
+ top_vote = True
131
+ if leaf not in leaf_queries:
132
+ leaf_queries[leaf] = {(name,top_vote)}
133
+ else:
134
+ leaf_queries[leaf].add((name,top_vote))
135
+
136
+ subtree_dict = dict()
137
+ subtree_leaf_label_dict = dict()
138
+ most_common_index = 0
139
+
140
+ # assign queries to subtrees, and remove them from the pool
141
+ # repeat until all queries are assigned
142
+ while len(query_votes_dict) > 0:
143
+ _LOG.info("queries left to assign: {}".format(len(query_votes_dict)))
144
+ (seed_label, node_votes) = lf_votes.most_common(
145
+ most_common_index+1)[most_common_index]
146
+
147
+ node_y = leaf_dict[seed_label]
148
+ # extract [subtreesize] leaves
149
+ labels = utils.subtree_nodes_with_edge_length(tree, node_y,
150
+ Configs.subtreesize)
151
+ subtree = tree.extract_tree_with(labels)
152
+ label_set = set(labels)
153
+
154
+ queries_by_subtree = set()
155
+ subtree_query_set = set()
156
+
157
+ # gather any other queries that can be used with this subtree
158
+ for label in labels:
159
+ leaf_queries_remove_set = set()
160
+ if label in leaf_queries:
161
+
162
+ for leaf_query, top_vote in leaf_queries[label]:
163
+
164
+ if leaf_query not in query_votes_dict:
165
+ leaf_queries_remove_set.add((leaf_query, top_vote))
166
+ continue
167
+
168
+ if top_vote:
169
+ subtree_query_set.add(leaf_query)
170
+ leaf_queries_remove_set.add((leaf_query, top_vote))
171
+
172
+ leaf_queries[label].difference_update(leaf_queries_remove_set)
173
+ queries_by_subtree.update(subtree_query_set)
174
+
175
+ if len(queries_by_subtree) > 0:
176
+ subtree_dict[subtree] = (seed_label, queries_by_subtree)
177
+ subtree_leaf_label_dict[subtree] = subtree.label_to_node(
178
+ selection='leaves')
179
+
180
+ votes_b4 = len(list(lf_votes.elements()))
181
+ for query in queries_by_subtree:
182
+ if query in query_votes_dict:
183
+ lf_votes.subtract(query_votes_dict[query])
184
+ query_votes_dict.pop(query)
185
+
186
+ if len(queries_by_subtree) == 0:
187
+ # 10.27.2023 - Chengze Shen
188
+ # >>> prevent going over the the total number of votes
189
+ most_common_index += 1
190
+ else:
191
+ most_common_index = 0
192
+
193
+ placed_query_list = []
194
+
195
+ # reassign queries to the subtree minimizing total edge length
196
+ # from the query's top vote to the subtree's seedleaf
197
+ new_subtree_dict = dict()
198
+ for query, closest_label in query_top_vote_dict.items():
199
+ best_subtree = None
200
+ best_distance = 99999999999999999
201
+ for subtree, value in subtree_dict.items():
202
+ leaf_label_dict = subtree_leaf_label_dict[subtree]
203
+ seed_label, _ = value
204
+ if closest_label in leaf_label_dict:
205
+ distance = subtree.distance_between(
206
+ leaf_label_dict[closest_label],
207
+ leaf_label_dict[seed_label])
208
+ if distance < best_distance:
209
+ best_subtree = subtree
210
+ best_distance = distance
211
+ if best_subtree in new_subtree_dict:
212
+ new_subtree_dict[best_subtree].append(query)
213
+ else:
214
+ new_subtree_dict[best_subtree] = [query]
215
+
216
+ t1 = time.perf_counter()
217
+ _LOG.info('Time to assign queries to subtrees: {} seconds'.format(t1 - t0))
218
+ return new_subtree_dict, placed_query_list
219
+
220
+ '''
221
+ Helper function to run a single placement task. Designed to use with
222
+ multiprocessing
223
+ '''
224
+ def placeOneSubtree():
225
+ # TODO
226
+ pass
227
+
228
+ '''
229
+ Function to perform placement of queries for each subtree
230
+ '''
231
+ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
232
+ aln, qaln, cmdline_args, workdir, pool, lock):
233
+ t0 = time.perf_counter()
234
+ _LOG.info('Performing placement on each subtree...')
235
+
236
+ # prepare to write an aggregated results to local
237
+ jplace = dict()
238
+ utils.add_edge_nbrs(tree)
239
+ jplace["tree"] = utils.newick_edge_tokens(tree)
240
+ placements = []
241
+
242
+ # go over the dictionary of subtrees and their assigned queries
243
+ # perform placement using either EPA-ng or pplacer
244
+ final_subtree_count, total_subtrees_examined = 0, 0
245
+ for subtree, query_list in new_subtree_dict.items():
246
+ total_subtrees_examined += 1
247
+ _LOG.info('- Subtree {}/{} with {} queries'.format(
248
+ total_subtrees_examined, len(new_subtree_dict), len(query_list)))
249
+
250
+ # empty subtree, continue
251
+ if len(query_list) == 0:
252
+ continue
253
+ final_subtree_count += 1
254
+
255
+ # name all temporary output files
256
+ tmp_tree = os.path.join(workdir, 'tree')
257
+ tmp_aln = os.path.join(workdir, f'subtree_{final_subtree_count}_aln.fa')
258
+ tmp_qaln = os.path.join(workdir, f'subtree_{final_subtree_count}_qaln.fa')
259
+ tmp_output = os.path.join(workdir,
260
+ 'subtree_{}_{}.jplace'.format(
261
+ final_subtree_count, Configs.placement_method))
262
+
263
+ # extract corresponding ref sequences and queries
264
+ tmp_leaf_dict = subtree.label_to_node(selection='leaves')
265
+ if '' in tmp_leaf_dict:
266
+ del tmp_leaf_dict['']
267
+ tmp_ref_dict = {label : aln[label] for label in tmp_leaf_dict.keys()}
268
+ tmp_q_dict = {name : qaln[name] for name in query_list}
269
+ write_fasta(tmp_aln, tmp_ref_dict)
270
+ write_fasta(tmp_qaln, tmp_q_dict)
271
+
272
+ # process the subtree before placement
273
+ subtree.resolve_polytomies()
274
+ subtree.suppress_unifurcations()
275
+ subtree.write_tree_newick(tmp_tree, hide_rooted_prefix=True)
276
+
277
+ # 1.27.2025 - Chengze Shen
278
+ # choose the placement method to run
279
+ if Configs.placement_method == 'epa-ng':
280
+ job = EPAngJob(path=Configs.epang_path,
281
+ info_path=Configs.info_path, tree_path=tmp_tree,
282
+ aln_path=tmp_aln, qaln_path=tmp_qaln,
283
+ outdir=workdir, num_cpus=Configs.num_cpus)
284
+ # for EPA-ng, ensure that outpath name is changed to the one we want
285
+ _outpath = job.run()
286
+ os.system('mv {} {}'.format(_outpath, tmp_output))
287
+ elif Configs.placement_method == 'pplacer':
288
+ # build ref_pkg with info and tmp_tree and tmp_aln
289
+ refpkg_dir = os.path.join(workdir,
290
+ f'subtree_{final_subtree_count}.refpkg')
291
+ taxit_job = TaxtasticJob(path=Configs.taxit_path,
292
+ outdir=refpkg_dir, name=f'subtree_{final_subtree_count}',
293
+ aln_path=tmp_aln, tree_path=tmp_tree,
294
+ info_path=Configs.info_path)
295
+ _ = taxit_job.run()
296
+
297
+ # run pplacer-taxtastic
298
+ job = PplacerTaxtasticJob(path=Configs.pplacer_path,
299
+ refpkg_dir=refpkg_dir, model=Configs.model,
300
+ outpath=tmp_output, num_cpus=Configs.num_cpus,
301
+ qaln_path=tmp_qaln)
302
+ tmp_output = job.run()
303
+ else:
304
+ raise ValueError(
305
+ f"Placement method {Configs.placement_method} not recognized")
306
+
307
+ # read in each placement result
308
+ place_file = open(tmp_output, 'r')
309
+ place_json = json.load(place_file)
310
+ tgt = "n"
311
+ if Configs.placement_method == 'pplacer':
312
+ tgt = "nm"
313
+ if len(place_json["placements"]) > 0:
314
+ added_tree, edge_dict = utils.read_tree_newick_edge_tokens(
315
+ place_json["tree"])
316
+
317
+ # obtain the fields for "p"
318
+ fields = place_json["fields"]
319
+ # set the fields in jplace accordingly
320
+ if "fields" not in jplace:
321
+ jplace["fields"] = fields
322
+ field_to_idx = {field: i for i, field in enumerate(fields)}
323
+
324
+ for tmp_place in place_json["placements"]:
325
+ #print(tmp_place)
326
+ placed_query_list.append(tmp_place[tgt][0])
327
+ for i in range(len(tmp_place["p"])):
328
+ edge_num = tmp_place["p"][i][
329
+ field_to_idx['edge_num']]
330
+ edge_distal = tmp_place["p"][i][
331
+ field_to_idx['distal_length']]
332
+
333
+ right_n = edge_dict[str(edge_num)]
334
+ left_n = right_n.get_parent()
335
+
336
+ # left and right path_l and path_r are in added_tree
337
+ left, path_l = utils.find_closest(left_n, {left_n, right_n})
338
+ right, path_r = utils.find_closest(right_n, {left_n, right_n})
339
+
340
+ left = leaf_dict[left.get_label()]
341
+ right = leaf_dict[right.get_label()]
342
+ _, path = utils.find_closest(left, {left}, y=right)
343
+ # now left right and path are in tree
344
+
345
+ length = sum([x.get_edge_length() for x in path_l])+edge_distal
346
+ # left path length through subtree before placement node
347
+
348
+ target_edge = path[-1]
349
+
350
+ for j in range(len(path)):
351
+ length -= path[j].get_edge_length()
352
+ if length < 0:
353
+ target_edge = path[j]
354
+ break
355
+
356
+ #tmp_place["p"][i][field_to_idx['edge_num']] = 0
357
+
358
+ label = target_edge.get_label()
359
+
360
+ [taxon, target_edge_nbr] = label.split('%%',1)
361
+ tmp_place["p"][i][field_to_idx['distal_length']] = \
362
+ target_edge.get_edge_length()+length
363
+ tmp_place["p"][i][field_to_idx['edge_num']] = \
364
+ int(target_edge_nbr)
365
+
366
+ placements.append(tmp_place.copy())
367
+ place_file.close()
368
+ _LOG.info(f'Final number of subtrees used: {final_subtree_count}')
369
+
370
+ # prepare the output jplace to write
371
+ jplace["placements"] = placements
372
+ jplace["metadata"] = {"invocation": " ".join(cmdline_args)}
373
+ jplace["version"] = 3
374
+ #jplace["fields"] = ["distal_length", "edge_num", "like_weight_ratio", \
375
+ # "likelihood", "pendant_length"]
376
+
377
+ t1 = time.perf_counter()
378
+ _LOG.info('Time to place queries to subtrees: {} seconds'.format(t1 - t0))
379
+ return jplace
380
+
381
+ '''
382
+ Function to write a given jplace object to local output
383
+ '''
384
+ def writeOutputJplace(output_jplace):
385
+ t0 = time.perf_counter()
386
+ _LOG.info('Writing aggregated placements to local...')
387
+
388
+ outpath = os.path.join(Configs.outdir, Configs.outname)
389
+ outf = open(outpath, 'w')
390
+ json.dump(output_jplace, outf, sort_keys=True, indent=4)
391
+ outf.close()
392
+
393
+ t1 = time.perf_counter()
394
+ _LOG.info('Time to build final jplace file: {} seconds'.format(t1 - t0))