partis-bcr 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bin/FastTree +0 -0
- bin/add-chimeras.py +59 -0
- bin/add-seqs-to-outputs.py +81 -0
- bin/bcr-phylo-run.py +799 -0
- bin/build.sh +24 -0
- bin/cf-alleles.py +97 -0
- bin/cf-germlines.py +57 -0
- bin/cf-linearham.py +199 -0
- bin/chimera-plot.py +76 -0
- bin/choose-partially-paired.py +143 -0
- bin/circle-plots.py +30 -0
- bin/compare-plotdirs.py +298 -0
- bin/diff-parameters.py +133 -0
- bin/docker-hub-push.sh +6 -0
- bin/extract-pairing-info.py +55 -0
- bin/gcdyn-simu-run.py +223 -0
- bin/gctree-run.py +244 -0
- bin/get-naive-probabilities.py +126 -0
- bin/iqtree-1.6.12 +0 -0
- bin/lonr.r +1020 -0
- bin/makeHtml +52 -0
- bin/mds-run.py +46 -0
- bin/parse-output.py +277 -0
- bin/partis +1869 -0
- bin/partis-pip +116 -0
- bin/partis.py +1869 -0
- bin/plot-gl-set-trees.py +519 -0
- bin/plot-hmms.py +151 -0
- bin/plot-lb-tree.py +427 -0
- bin/raxml-ng +0 -0
- bin/read-bcr-phylo-trees.py +38 -0
- bin/read-gctree-output.py +166 -0
- bin/run-chimeras.sh +64 -0
- bin/run-dtr-scan.sh +25 -0
- bin/run-paired-loci.sh +100 -0
- bin/run-tree-metrics.sh +88 -0
- bin/smetric-run.py +62 -0
- bin/split-loci.py +317 -0
- bin/swarm-2.1.13-linux-x86_64 +0 -0
- bin/test-germline-inference.py +425 -0
- bin/tree-perf-run.py +194 -0
- bin/vsearch-2.4.3-linux-x86_64 +0 -0
- bin/vsearch-2.4.3-macos-x86_64 +0 -0
- bin/xvfb-run +194 -0
- partis_bcr-1.0.2.data/scripts/cf-alleles.py +97 -0
- partis_bcr-1.0.2.data/scripts/cf-germlines.py +57 -0
- partis_bcr-1.0.2.data/scripts/extract-pairing-info.py +55 -0
- partis_bcr-1.0.2.data/scripts/gctree-run.py +244 -0
- partis_bcr-1.0.2.data/scripts/parse-output.py +277 -0
- partis_bcr-1.0.2.data/scripts/split-loci.py +317 -0
- partis_bcr-1.0.2.data/scripts/test.py +1005 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/METADATA +1 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/RECORD +101 -51
- partis_bcr-1.0.2.dist-info/top_level.txt +1 -0
- {partis → python}/glutils.py +1 -1
- python/main.py +30 -0
- {partis → python}/plotting.py +10 -1
- {partis → python}/treeutils.py +18 -16
- {partis → python}/utils.py +14 -7
- packages/ham/bcrham +0 -0
- partis/main.py +0 -59
- partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/WHEEL +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/entry_points.txt +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/licenses/COPYING +0 -0
- {partis → python}/__init__.py +0 -0
- {partis → python}/alleleclusterer.py +0 -0
- {partis → python}/allelefinder.py +0 -0
- {partis → python}/alleleremover.py +0 -0
- {partis → python}/annotationclustering.py +0 -0
- {partis → python}/baseutils.py +0 -0
- {partis → python}/cache/__init__.py +0 -0
- {partis → python}/cache/cached_uncertainties.py +0 -0
- {partis → python}/clusterpath.py +0 -0
- {partis → python}/coar.py +0 -0
- {partis → python}/corrcounter.py +0 -0
- {partis → python}/datautils.py +0 -0
- {partis → python}/event.py +0 -0
- {partis → python}/fraction_uncertainty.py +0 -0
- {partis → python}/gex.py +0 -0
- {partis → python}/glomerator.py +0 -0
- {partis → python}/hist.py +0 -0
- {partis → python}/hmmwriter.py +0 -0
- {partis → python}/hutils.py +0 -0
- {partis → python}/indelutils.py +0 -0
- {partis → python}/lbplotting.py +0 -0
- {partis → python}/mds.py +0 -0
- {partis → python}/mutefreqer.py +0 -0
- {partis → python}/paircluster.py +0 -0
- {partis → python}/parametercounter.py +0 -0
- {partis → python}/paramutils.py +0 -0
- {partis → python}/partitiondriver.py +0 -0
- {partis → python}/partitionplotter.py +0 -0
- {partis → python}/performanceplotter.py +0 -0
- {partis → python}/plotconfig.py +0 -0
- {partis → python}/processargs.py +0 -0
- {partis → python}/prutils.py +0 -0
- {partis → python}/recombinator.py +0 -0
- {partis → python}/scanplot.py +0 -0
- {partis → python}/seqfileopener.py +0 -0
- {partis → python}/treegenerator.py +0 -0
- {partis → python}/viterbicluster.py +0 -0
- {partis → python}/vrc01.py +0 -0
- {partis → python}/waterer.py +0 -0
bin/plot-lb-tree.py
ADDED
@@ -0,0 +1,427 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# has to be its own script, since ete3 requires its own god damn python version, installed in a separated directory
|
3
|
+
from __future__ import absolute_import, division, unicode_literals
|
4
|
+
from __future__ import print_function
|
5
|
+
import time
|
6
|
+
import yaml
|
7
|
+
import itertools
|
8
|
+
import glob
|
9
|
+
import argparse
|
10
|
+
import copy
|
11
|
+
import random
|
12
|
+
import os
|
13
|
+
import subprocess
|
14
|
+
import sys
|
15
|
+
import colored_traceback.always
|
16
|
+
from collections import OrderedDict
|
17
|
+
import numpy
|
18
|
+
import math
|
19
|
+
import re
|
20
|
+
from io import open
|
21
|
+
import ete3
|
22
|
+
from pathlib import Path
|
23
|
+
|
24
|
+
# ----------------------------------------------------------------------------------------
|
25
|
+
scolors = {
|
26
|
+
'novel' : '#ffc300', # 'Gold'
|
27
|
+
'data' : 'LightSteelBlue',
|
28
|
+
'pale-green' : '#85ad98',
|
29
|
+
'pale-blue' : '#94a3d1',
|
30
|
+
'tigger-default' : '#d77c7c', #'#c32222', # red
|
31
|
+
'igdiscover' : '#85ad98', #'#29a614', # green
|
32
|
+
'partis' : '#94a3d1', #'#2455ed', # blue
|
33
|
+
'lbi' : '#94a3d1',
|
34
|
+
}
|
35
|
+
|
36
|
+
# listcolors = [plotting.getgrey('medium') for _ in range(10)]
|
37
|
+
listfaces = [
|
38
|
+
'red',
|
39
|
+
'blue',
|
40
|
+
'green',
|
41
|
+
]
|
42
|
+
used_colors, used_faces = {}, {}
|
43
|
+
simu_colors = OrderedDict((
|
44
|
+
('ok', 'DarkSeaGreen'),
|
45
|
+
('missing', '#d77c7c'),
|
46
|
+
('spurious', '#a44949'),
|
47
|
+
))
|
48
|
+
def get_scale_min(metric, vals): # only make the color scale go down to here
|
49
|
+
if metric == 'cons-dist-aa':
|
50
|
+
return max(vals) - 10
|
51
|
+
else:
|
52
|
+
return min(vals)
|
53
|
+
|
54
|
+
# ----------------------------------------------------------------------------------------
|
55
|
+
def read_input(args):
|
56
|
+
with open(args.treefname) as treefile:
|
57
|
+
treestr = treefile.read().strip()
|
58
|
+
treestr = treestr.replace('[&R] ', '').replace('\'', '')
|
59
|
+
|
60
|
+
return {'treestr' : treestr}
|
61
|
+
|
62
|
+
# ----------------------------------------------------------------------------------------
|
63
|
+
opacity = 0.65
|
64
|
+
node_fsize = 7
|
65
|
+
|
66
|
+
# ----------------------------------------------------------------------------------------
|
67
|
+
def set_delta_affinities(etree, affyfo): # set change in affinity from parent for each node, and return a list of all such affinity changes (for normalizing the cmap)
|
68
|
+
delta_affyvals = []
|
69
|
+
for node in etree.traverse():
|
70
|
+
if node.up is None or any(n.name not in affyfo or affyfo[n.name] is None for n in (node, node.up)):
|
71
|
+
node.add_feature('affinity_change', None)
|
72
|
+
continue
|
73
|
+
node.add_feature('affinity_change', affyfo[node.name] - affyfo[node.up.name])
|
74
|
+
delta_affyvals.append(affyfo[node.name] - affyfo[node.up.name])
|
75
|
+
|
76
|
+
return delta_affyvals
|
77
|
+
|
78
|
+
# ----------------------------------------------------------------------------------------
|
79
|
+
def get_size(vmin, vmax, val):
|
80
|
+
if vmin == vmax:
|
81
|
+
return args.min_face_size
|
82
|
+
if args.use_node_area:
|
83
|
+
val = math.sqrt(val)
|
84
|
+
rfsize = args.min_face_size + (val - vmin) * (args.max_face_size - args.min_face_size) / float(vmax - vmin)
|
85
|
+
return rfsize
|
86
|
+
|
87
|
+
# ----------------------------------------------------------------------------------------
|
88
|
+
def add_legend(tstyle, varname, all_vals, smap, info, start_column, add_missing=False, add_sign=None, reverse_log=False, n_entries=5, fsize=4, no_opacity=False): # NOTE very similar to add_smap_legend() in plot_2d_scatter() in python/lbplotting.py
|
89
|
+
if len(all_vals) == 0:
|
90
|
+
all_vals = [-1, 1] # um, maybe this is ok?
|
91
|
+
# return # NOTE you *cannot* return here, since if we don't actually add the stuff then it later (when rendering) crashes with a key error deep within ete due to the <start_column> being wrong/inconsistent
|
92
|
+
assert add_sign in [None, '-', '+']
|
93
|
+
tstyle.legend.add_face(ete3.TextFace(' %s ' % varname, fsize=fsize), column=start_column)
|
94
|
+
min_val, max_val = get_scale_min(varname, all_vals), max(all_vals)
|
95
|
+
if min_val == max_val:
|
96
|
+
min_val, max_val = plotting.expand_bounds([min_val, max_val], only_down=True) # <only_down> is for affinity increase scale: expand downward if only one value so the one value shows up as dark red (rather than super light red)
|
97
|
+
val_list = plotting.get_leg_entries(n_entries=n_entries, min_val=min_val, max_val=max_val)
|
98
|
+
# if add_sign is not None and add_sign == '-': # for negative changes, we have the cmap using abs() and want to legend order to correspond
|
99
|
+
# val_list = reversed(val_list) # arg, this breaks something deep in the legend maker, not sure what
|
100
|
+
key_list = [None for _ in val_list]
|
101
|
+
if add_missing:
|
102
|
+
val_list += [None]
|
103
|
+
key_list += ['missing!'] # doesn't matter what the last one is as long as it isn't in <affyfo>
|
104
|
+
for val, key in zip(val_list, key_list):
|
105
|
+
tstyle.legend.add_face(ete3.TextFace('', fsize=fsize), column=start_column)
|
106
|
+
if smap is None:
|
107
|
+
sz = get_size(min_val, max_val, val)
|
108
|
+
rface = ete3.RectFace(sz, sz, bgcolor=plotting.getgrey(), fgcolor=None)
|
109
|
+
else:
|
110
|
+
rface = ete3.RectFace(6, 6, bgcolor=plotting.get_smap_color(smap, info, key=key, val=val), fgcolor=None)
|
111
|
+
if not no_opacity:
|
112
|
+
rface.opacity = opacity
|
113
|
+
tstyle.legend.add_face(rface, column=start_column + 1)
|
114
|
+
if reverse_log:
|
115
|
+
val = math.exp(val)
|
116
|
+
def vstr():
|
117
|
+
if varname == 'cons-dist-aa': return '%.1f' % val
|
118
|
+
elif 'affinity' in varname: return '%s' % utils.round_to_n_digits(val, 2)
|
119
|
+
else: return '%.2f' % val
|
120
|
+
if key is None:
|
121
|
+
tfstr = ' %s%s' % (utils.non_none([add_sign, '']), vstr())
|
122
|
+
else:
|
123
|
+
ftstr = ' missing'
|
124
|
+
tstyle.legend.add_face(ete3.TextFace(tfstr, fsize=fsize), column=start_column + 2)
|
125
|
+
|
126
|
+
# ----------------------------------------------------------------------------------------
|
127
|
+
def label_node(lnode, root_node):
|
128
|
+
# ----------------------------------------------------------------------------------------
|
129
|
+
def meta_emph(tname):
|
130
|
+
if args.meta_info_to_emphasize is not None:
|
131
|
+
key, val = list(args.meta_info_to_emphasize.items())[0]
|
132
|
+
if key in args.metafo and tname in args.metafo[key] and utils.meta_info_equal(key, val, args.metafo[key][tname], formats=args.meta_emph_formats):
|
133
|
+
return True
|
134
|
+
return False
|
135
|
+
# ----------------------------------------------------------------------------------------
|
136
|
+
def use_node_name(tname, tnode=None):
|
137
|
+
if args.label_all_nodes:
|
138
|
+
return True
|
139
|
+
if tnode is not None and args.label_leaf_nodes and tnode.is_leaf():
|
140
|
+
return True
|
141
|
+
if args.queries_to_include is not None and tname in args.queries_to_include:
|
142
|
+
return True
|
143
|
+
if tnode is not None and args.label_root_node and tnode is root_node:
|
144
|
+
return True
|
145
|
+
if meta_emph(tname):
|
146
|
+
return True
|
147
|
+
if args.uid_translations is not None and tname in args.uid_translations:
|
148
|
+
return True
|
149
|
+
if args.node_label_regex is not None and len(re.findall(args.node_label_regex, tname)) > 0:
|
150
|
+
return True
|
151
|
+
return False
|
152
|
+
# ----------------------------------------------------------------------------------------
|
153
|
+
def split_line(lstr): # split into two rows if more than 3 entries
|
154
|
+
if lstr.count(',') < 3:
|
155
|
+
return lstr
|
156
|
+
blist = lstr.split(', ')
|
157
|
+
return '%s\n%s' % (', '.join(blist[:len(blist)//2]), ', '.join(blist[len(blist)//2:]))
|
158
|
+
# ----------------------------------------------------------------------------------------
|
159
|
+
def get_nlabel(tname):
|
160
|
+
if not use_node_name(tname):
|
161
|
+
return None
|
162
|
+
nlabel = tname
|
163
|
+
if args.uid_translations is not None and nlabel in args.uid_translations:
|
164
|
+
nlabel = args.uid_translations[nlabel]
|
165
|
+
if args.node_label_regex is not None:
|
166
|
+
mstrs = re.findall(args.node_label_regex, nlabel)
|
167
|
+
if len(mstrs) > 0:
|
168
|
+
nlabel = '+'.join(mstrs)
|
169
|
+
return nlabel
|
170
|
+
# ----------------------------------------------------------------------------------------
|
171
|
+
def get_ncolor(tname):
|
172
|
+
return 'red' if meta_emph(tname) or args.meta_info_to_emphasize is None else 'black'
|
173
|
+
# ----------------------------------------------------------------------------------------
|
174
|
+
use_name = use_node_name(lnode.name, tnode=lnode)
|
175
|
+
if 'duplicates' in args.metafo and lnode.name in args.metafo['duplicates']:
|
176
|
+
use_name |= any(use_node_name(n) for n in args.metafo['duplicates'][lnode.name])
|
177
|
+
if use_name:
|
178
|
+
nlabels, ncolors = [[tfn(lnode.name)] for tfn in (get_nlabel, get_ncolor)]
|
179
|
+
if 'duplicates' in args.metafo and lnode.name in args.metafo['duplicates']:
|
180
|
+
nlabels += [get_nlabel(n) for n in args.metafo['duplicates'][lnode.name]]
|
181
|
+
ncolors += [get_ncolor(n) for n in args.metafo['duplicates'][lnode.name]]
|
182
|
+
nlabel = ', '.join(sorted(set(l for l in nlabels if l is not None)))
|
183
|
+
ncolor = 'red' if 'red' in ncolors else 'black'
|
184
|
+
else:
|
185
|
+
if 'labels' in args.metafo:
|
186
|
+
nlabel, ncolor = '', 'red'
|
187
|
+
else:
|
188
|
+
return
|
189
|
+
if 'labels' in args.metafo:
|
190
|
+
mlabel = args.metafo['labels'].get(lnode.name, '')
|
191
|
+
blabels, tlabels, tcolors, bcolors = ['', ''], ['', ''], ['black' for _ in range(2)], ['black' for _ in range(2)]
|
192
|
+
label_list = mlabel.split('\n')
|
193
|
+
if 'h:' in mlabel or 'l:' in mlabel:
|
194
|
+
for lstr in label_list:
|
195
|
+
if 'nuc' in lstr and 'aa' in lstr:
|
196
|
+
assert lstr.count(',') == 1 # e.g. '3 nuc, 1 aa'
|
197
|
+
tlabels[0], blabels[0] = lstr.split(',')
|
198
|
+
elif lstr.find('h:') == 0:
|
199
|
+
tlabels[1], tcolors[1] = ' '+lstr, 'blue'
|
200
|
+
elif lstr.find('l:') == 0:
|
201
|
+
blabels[1], bcolors[1] = ' '+lstr, 'green'
|
202
|
+
else:
|
203
|
+
raise Exception('couldn\'t parse \'%s\'' % mlabel)
|
204
|
+
elif '\n' in mlabel:
|
205
|
+
tlabels[1], blabels[1] = label_list[0], '\n'.join(label_list[1:])
|
206
|
+
blabels[1] = split_line(blabels[1])
|
207
|
+
else:
|
208
|
+
tlabels[0] = mlabel
|
209
|
+
for il, (blab, bcol) in enumerate(zip(blabels, bcolors)): # blabels are branch bottom labels
|
210
|
+
lnode.add_face(ete3.TextFace(blab, fsize=node_fsize, fgcolor=bcol), column=il, position='branch-bottom')
|
211
|
+
for il, (tlab, tcol) in enumerate(zip(tlabels, tcolors)): # branch top labels
|
212
|
+
lnode.add_face(ete3.TextFace(tlab, fsize=node_fsize, fgcolor=tcol), column=il, position='branch-top')
|
213
|
+
if nlabel != '':
|
214
|
+
tface = ete3.TextFace(' '+nlabel, fsize=node_fsize, fgcolor=ncolor) # <nlabel> is usually the uid/sequence name
|
215
|
+
lnode.add_face(tface, column=1) # position='branch-bottom')
|
216
|
+
|
217
|
+
# ----------------------------------------------------------------------------------------
|
218
|
+
def set_lb_styles(args, etree, tstyle):
|
219
|
+
# ----------------------------------------------------------------------------------------
|
220
|
+
lbfo = args.metafo[args.lb_metric]
|
221
|
+
if 'lbr' in args.lb_metric or 'lbf' in args.lb_metric: # remove zeros + maybe apply log()
|
222
|
+
lbfo = {u : (math.log(v) if args.log_lbr else v) for u, v in lbfo.items() if v > 0}
|
223
|
+
lbvals = list(lbfo.values())
|
224
|
+
if len(lbvals) == 0:
|
225
|
+
return
|
226
|
+
lb_smap = plotting.get_normalized_scalar_map(lbvals, 'viridis', hard_min=get_scale_min(args.lb_metric, lbvals) if args.lb_metric=='cons-dist-aa' else None)
|
227
|
+
lb_min, lb_max = min(lbvals), max(lbvals)
|
228
|
+
|
229
|
+
affyfo = None
|
230
|
+
if args.affy_key in args.metafo and set(args.metafo[args.affy_key].values()) != set([None]):
|
231
|
+
affyfo = args.metafo[args.affy_key]
|
232
|
+
if args.lb_metric in treeutils.affy_metrics:
|
233
|
+
affyvals = list(affyfo.values())
|
234
|
+
affy_smap = plotting.get_normalized_scalar_map([a for a in affyvals if a is not None], 'viridis')
|
235
|
+
elif args.lb_metric in treeutils.daffy_metrics:
|
236
|
+
delta_affyvals = set_delta_affinities(etree, affyfo)
|
237
|
+
affy_increases = [v for v in delta_affyvals if v > 0]
|
238
|
+
if len(set(affy_increases)) == 1: # if there's only one affinity increase, expand downward so color is dark red for actual observed value
|
239
|
+
affy_increases = plotting.expand_bounds([affy_increases[0] for _ in range(2)], only_down=True)
|
240
|
+
delta_affy_increase_smap = plotting.get_normalized_scalar_map(affy_increases, 'Reds', remove_top_end=True) if len(delta_affyvals) > 0 else None
|
241
|
+
delta_affy_decrease_smap = plotting.get_normalized_scalar_map([abs(v) for v in delta_affyvals if v < 0], 'Blues', remove_top_end=True) if len(delta_affyvals) > 0 else None
|
242
|
+
else:
|
243
|
+
assert False
|
244
|
+
|
245
|
+
for node in etree.traverse():
|
246
|
+
node.img_style['size'] = 0
|
247
|
+
rfsize = 0
|
248
|
+
bgcolor = plotting.getgrey()
|
249
|
+
if args.lb_metric in treeutils.affy_metrics:
|
250
|
+
if node.name not in lbfo: # really shouldn't happen
|
251
|
+
print(' %s missing lb info for node \'%s\'' % (utils.color('red', 'warning'), node.name))
|
252
|
+
continue
|
253
|
+
if affyfo is not None:
|
254
|
+
rfsize = get_size(lb_min, lb_max, lbfo[node.name])
|
255
|
+
if node.name in affyfo:
|
256
|
+
bgcolor = plotting.get_smap_color(affy_smap, affyfo, key=node.name)
|
257
|
+
else:
|
258
|
+
rfsize = 5
|
259
|
+
bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
|
260
|
+
elif args.lb_metric in treeutils.daffy_metrics:
|
261
|
+
node.img_style['vt_line_color'] = plotting.getgrey() # if they're black, it's too hard to see the large changes in affinity, since they're very dark (at least with current color schemes)
|
262
|
+
# rfsize = get_size(lb_min, lb_max, lbfo[node.name]) if node.name in lbfo else 1.5
|
263
|
+
rfsize = 5 if node.name in lbfo else 1.5
|
264
|
+
bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
|
265
|
+
if affyfo is not None and delta_affy_increase_smap is not None and node.affinity_change is not None:
|
266
|
+
# tface = ete3.TextFace(('%+.4f' % node.affinity_change) if node.affinity_change != 0 else '0.', fsize=3)
|
267
|
+
# node.add_face(tface, column=0)
|
268
|
+
if node.affinity_change > 0: # increase
|
269
|
+
node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_increase_smap, None, val=node.affinity_change)
|
270
|
+
node.img_style['hz_line_width'] = 1.2
|
271
|
+
elif node.affinity_change < 0: # decrease
|
272
|
+
node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_decrease_smap, None, val=abs(node.affinity_change))
|
273
|
+
node.img_style['hz_line_width'] = 1.2
|
274
|
+
else:
|
275
|
+
node.img_style['hz_line_color'] = plotting.getgrey()
|
276
|
+
label_node(node, etree.get_tree_root())
|
277
|
+
rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None)
|
278
|
+
rface.opacity = opacity
|
279
|
+
node.add_face(rface, column=0)
|
280
|
+
|
281
|
+
affy_label = args.affy_key.replace('_', ' ')
|
282
|
+
mleg = lbplotting.mtitlestr('per-seq', args.lb_metric)
|
283
|
+
if args.lb_metric in treeutils.affy_metrics:
|
284
|
+
if affyfo is None:
|
285
|
+
add_legend(tstyle, mleg, lbvals, lb_smap, lbfo, 0, n_entries=4)
|
286
|
+
else:
|
287
|
+
add_legend(tstyle, mleg, lbvals, None, lbfo, 0, n_entries=4)
|
288
|
+
add_legend(tstyle, affy_label, [a for a in affyvals if a is not None], affy_smap, affyfo, 3)
|
289
|
+
elif args.lb_metric in treeutils.daffy_metrics:
|
290
|
+
add_legend(tstyle, mleg, lbvals, lb_smap, lbfo, 0, reverse_log=args.log_lbr)
|
291
|
+
if affyfo is not None:
|
292
|
+
add_legend(tstyle, '%s decrease' % affy_label, [abs(v) for v in delta_affyvals if v < 0], delta_affy_decrease_smap, affyfo, 3, add_sign='-', no_opacity=True)
|
293
|
+
add_legend(tstyle, '%s increase' % affy_label, [v for v in delta_affyvals if v > 0], delta_affy_increase_smap, affyfo, 6, add_sign='+', no_opacity=True)
|
294
|
+
|
295
|
+
# ----------------------------------------------------------------------------------------
|
296
|
+
def set_meta_styles(args, etree, tstyle):
|
297
|
+
all_emph_vals, emph_colors = None, None
|
298
|
+
if args.meta_info_key_to_color is not None:
|
299
|
+
mvals = args.metafo.get(args.meta_info_key_to_color, {})
|
300
|
+
all_emph_vals, emph_colors = plotting.meta_emph_init(args.meta_info_key_to_color, formats=args.meta_emph_formats, all_emph_vals=set(mvals.values()))
|
301
|
+
mcolors = {v : c for v, c in emph_colors}
|
302
|
+
plotting.make_meta_info_legend(os.path.dirname(args.outfname), utils.getprefix(os.path.basename(args.outfname)), args.meta_info_key_to_color, emph_colors, all_emph_vals, meta_emph_formats=args.meta_emph_formats, alpha=0.6)
|
303
|
+
if args.node_size_key is not None:
|
304
|
+
nsvals = set(args.metafo[args.node_size_key].values()) - set([None])
|
305
|
+
min_nsval, max_nsval = [mfcn(nsvals) for mfcn in [min, max]]
|
306
|
+
if args.use_node_area:
|
307
|
+
min_nsval, max_nsval = [math.sqrt(v) for v in [min_nsval, max_nsval]]
|
308
|
+
if args.branch_color_key is not None:
|
309
|
+
bcvals = args.metafo[args.branch_color_key]
|
310
|
+
smvals = [v for v in bcvals.values() if v is not None]
|
311
|
+
if 'vrc01' in args.branch_color_key:
|
312
|
+
smvals = [v for v in smvals if v > 0]
|
313
|
+
bc_smap = plotting.get_normalized_scalar_map(smvals, 'Reds', remove_top_end=True)
|
314
|
+
add_legend(tstyle, plotting.legends.get(args.branch_color_key, args.branch_color_key), smvals, bc_smap, bcvals, 3, no_opacity=True)
|
315
|
+
for node in etree.traverse():
|
316
|
+
node.img_style['size'] = 0
|
317
|
+
rfsize = 5
|
318
|
+
if args.node_size_key is not None:
|
319
|
+
rfsize = get_size(min_nsval, max_nsval, args.metafo[args.node_size_key].get(node.name, min_nsval))
|
320
|
+
bgcolor = plotting.getgrey()
|
321
|
+
if args.meta_info_key_to_color is not None and node.name in mvals:
|
322
|
+
bgcolor = mcolors.get(mvals[node.name], bgcolor)
|
323
|
+
|
324
|
+
label_node(node, etree.get_tree_root())
|
325
|
+
ftypes = {'rect' : ete3.RectFace, 'circle' : ete3.CircleFace}
|
326
|
+
if args.face_type == 'rect':
|
327
|
+
rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None)
|
328
|
+
elif args.face_type == 'circle':
|
329
|
+
rface = ete3.CircleFace(radius=rfsize, color=bgcolor)
|
330
|
+
else:
|
331
|
+
assert False
|
332
|
+
rface.opacity = opacity
|
333
|
+
node.add_face(rface, column=0)
|
334
|
+
|
335
|
+
if args.branch_color_key is not None:
|
336
|
+
bval = bcvals.get(node.name)
|
337
|
+
bcol = plotting.getgrey() if 'vrc01' in args.branch_color_key and bval==0 else plotting.get_smap_color(bc_smap, bcvals, key=node.name)
|
338
|
+
node.img_style['hz_line_color'] = bcol
|
339
|
+
node.img_style['hz_line_width'] = 1.2
|
340
|
+
|
341
|
+
# ----------------------------------------------------------------------------------------
|
342
|
+
def plot_trees(args):
|
343
|
+
treefo = read_input(args)
|
344
|
+
|
345
|
+
treestr = treefo['treestr']
|
346
|
+
if len(treestr.split()) == 2 and treestr.split()[0] in ['[&U]', '[&R]']: # dumbest #$!#$#ing format in the goddamn world (ete barfs on other programs' rooting information)
|
347
|
+
treestr = treefo['treestr'].split()[1]
|
348
|
+
etree = ete3.Tree(treestr, format=1) # , quoted_node_names=True)
|
349
|
+
|
350
|
+
tstyle = ete3.TreeStyle()
|
351
|
+
tstyle.mode = args.tree_style[0]
|
352
|
+
# tstyle.show_scale = False
|
353
|
+
if getattr(tstyle, 'scale_length', None) is not None:
|
354
|
+
tstyle.scale_length = 1. / treeutils.typical_bcr_seq_len
|
355
|
+
# tstyle.show_branch_length = True
|
356
|
+
# tstyle.complete_branch_lines_when_necessary = True
|
357
|
+
|
358
|
+
if args.metafo is not None:
|
359
|
+
if args.lb_metric is not None:
|
360
|
+
set_lb_styles(args, etree, tstyle)
|
361
|
+
if args.meta_info_key_to_color is not None or args.meta_info_to_emphasize:
|
362
|
+
set_meta_styles(args, etree, tstyle)
|
363
|
+
else:
|
364
|
+
print(' %s --metafo is not set, so no node formats (e.g. labels) will be set)' % utils.wrnstr())
|
365
|
+
|
366
|
+
# print ' %s' % args.outfname
|
367
|
+
tstyle.show_leaf_name = False
|
368
|
+
etree.render(args.outfname, tree_style=tstyle)
|
369
|
+
|
370
|
+
# ----------------------------------------------------------------------------------------
|
371
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
|
372
|
+
parser.add_argument('--treefname', required=True)
|
373
|
+
parser.add_argument('--outfname', required=True)
|
374
|
+
parser.add_argument('--lb-metric') #, default='lbi') #, choices=treeutils.affy_metrics+treeutils.daffy_metrics)
|
375
|
+
parser.add_argument('--affy-key', default='affinity', choices=['affinity', 'relative_affinity'])
|
376
|
+
# parser.add_argument('--lb-tau', required=True, type=float)
|
377
|
+
parser.add_argument('--metafname')
|
378
|
+
parser.add_argument('--queries-to-include')
|
379
|
+
parser.add_argument('--label-all-nodes', action='store_true')
|
380
|
+
parser.add_argument('--label-leaf-nodes', action='store_true')
|
381
|
+
parser.add_argument('--label-root-node', action='store_true')
|
382
|
+
parser.add_argument('--node-label-regex', help='portion of node label to keep (rest is discarded if regex is found, if no regex label is left unchanged). E.g. \'ig.\' reduces them all to the locus')
|
383
|
+
parser.add_argument('--tree-style', default='rectangular', choices=['rectangular', 'circular'])
|
384
|
+
parser.add_argument('--partis-dir', default=str(Path(__file__)).parent.parent), help='path to main partis install dir')
|
385
|
+
parser.add_argument('--log-lbr', action='store_true')
|
386
|
+
parser.add_argument('--seq-len', type=int)
|
387
|
+
parser.add_argument('--uid-translations', help='colon-separated list of comma-separated pairs of uid:translated-id pairs')
|
388
|
+
parser.add_argument('--meta-info-to-emphasize', help='see partis help')
|
389
|
+
parser.add_argument('--meta-info-key-to-color', help='see partis help')
|
390
|
+
parser.add_argument('--meta-emph-formats', help='see partis help')
|
391
|
+
parser.add_argument('--node-size-key', help='annotation key with which to scale the node size')
|
392
|
+
parser.add_argument('--use-node-area', action='store_true', help='for --node-size-key scale by area rather than edge/radius')
|
393
|
+
parser.add_argument('--branch-color-key', help='annotation key with which to scale the branch length color')
|
394
|
+
parser.add_argument('--face-type', choices=['rect', 'circle'], default='rect', help='shape of symbol for each node')
|
395
|
+
parser.add_argument('--min-face-size', type=float, default=1.5, help='min size for node symbol')
|
396
|
+
parser.add_argument('--max-face-size', type=float, default=15, help='min size for node symbol')
|
397
|
+
args = parser.parse_args()
|
398
|
+
if args.meta_info_key_to_color is None and not args.meta_info_to_emphasize: # it'd be nice to move this stuff, but whatevs
|
399
|
+
print(' note: if neither --meta-info-key-to-color or --meta-info-to-emphasize are set, other style attributes may not be set')
|
400
|
+
|
401
|
+
sys.path.insert(1, args.partis_dir) # + '/python')
|
402
|
+
try:
|
403
|
+
import python.utils as utils
|
404
|
+
import python.treeutils as treeutils
|
405
|
+
import python.glutils as glutils
|
406
|
+
import python.plotting as plotting
|
407
|
+
import python.lbplotting as lbplotting
|
408
|
+
except ImportError as e:
|
409
|
+
print(e)
|
410
|
+
raise Exception('couldn\'t import from main partis dir \'%s\' (set with --partis-dir)' % args.partis_dir)
|
411
|
+
args.meta_info_to_emphasize = utils.get_arg_list(args.meta_info_to_emphasize, key_val_pairs=True)
|
412
|
+
args.meta_emph_formats = utils.get_arg_list(args.meta_emph_formats, key_val_pairs=True)
|
413
|
+
utils.meta_emph_arg_process(args)
|
414
|
+
args.uid_translations = utils.get_arg_list(args.uid_translations, key_val_pairs=True)
|
415
|
+
|
416
|
+
if args.node_label_regex is not None and not args.label_all_nodes and not args.label_leaf_nodes:
|
417
|
+
print(' note: --node-label-regex set, but neither --label-all-nodes nor --label-leaf-nodes were set, so they may not actually get labeled')
|
418
|
+
# print(' --node-label-regex: turning on --label-all-nodes')
|
419
|
+
# args.label_all_nodes = True
|
420
|
+
|
421
|
+
args.queries_to_include = utils.get_arg_list(args.queries_to_include)
|
422
|
+
args.metafo = None
|
423
|
+
if args.metafname is not None:
|
424
|
+
with open(args.metafname) as metafile:
|
425
|
+
args.metafo = yaml.load(metafile, Loader=yaml.CLoader)
|
426
|
+
|
427
|
+
plot_trees(args)
|
bin/raxml-ng
ADDED
Binary file
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
from __future__ import absolute_import, division, unicode_literals
|
3
|
+
import os
|
4
|
+
import sys
|
5
|
+
import argparse
|
6
|
+
import pickle
|
7
|
+
import csv
|
8
|
+
from ete3 import TreeNode, TreeStyle, NodeStyle, SVG_COLORS
|
9
|
+
from io import open
|
10
|
+
|
11
|
+
# NOTE this probably doesn't need to be a separate script any more, it used to be necessary since ete3 requires python 3
|
12
|
+
|
13
|
+
parser = argparse.ArgumentParser()
|
14
|
+
parser.add_argument('--pickle-tree-file', help='bcr phylo output pickle tree file')
|
15
|
+
parser.add_argument('--kdfile', help='output csv file with kd info')
|
16
|
+
parser.add_argument('--newick-tree-file', required=True, help='output newick tree file')
|
17
|
+
args = parser.parse_args()
|
18
|
+
|
19
|
+
with open(args.pickle_tree_file, 'rb') as lfile:
|
20
|
+
tree = pickle.load(lfile)
|
21
|
+
|
22
|
+
# print tree
|
23
|
+
# print tree.name
|
24
|
+
# print tree.sequence
|
25
|
+
|
26
|
+
with open(args.newick_tree_file, 'w') as ntfile:
|
27
|
+
treestr = tree.write(format=1) # default format ignores internal node names (numbers listed here: http://etetoolkit.org/docs/latest/tutorial/tutorial_trees.html#reading-and-writing-newick-trees)
|
28
|
+
treestr = treestr.replace(';', '%s;' % tree.name) # add root node name by hand (none of the format integers seem to add the root node name)
|
29
|
+
ntfile.write(treestr)
|
30
|
+
|
31
|
+
if args.kdfile is not None:
|
32
|
+
with open(args.kdfile, 'wb' if sys.version_info.major < 3 else 'w') as kdfile:
|
33
|
+
writer = csv.DictWriter(kdfile, ('uid', 'kd', 'time', 'relative_kd', 'lambda', 'target_index', 'target_distance'))
|
34
|
+
writer.writeheader()
|
35
|
+
for node in tree.traverse(): # small kd is higher affinity
|
36
|
+
if node.name == '':
|
37
|
+
continue
|
38
|
+
writer.writerow({'uid' : node.name, 'kd' : node.Kd, 'time' : node.time, 'relative_kd' : node.relative_Kd, 'lambda' : node.lambda_, 'target_index' : node.target_index, 'target_distance' : node.target_distance})
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
from __future__ import absolute_import, division, unicode_literals
|
3
|
+
from __future__ import print_function
|
4
|
+
import glob
|
5
|
+
import sys
|
6
|
+
import csv
|
7
|
+
from io import open
|
8
|
+
csv.field_size_limit(sys.maxsize) # make sure we can write very large csv fields
|
9
|
+
import os
|
10
|
+
import copy
|
11
|
+
import argparse
|
12
|
+
import colored_traceback.always
|
13
|
+
import json
|
14
|
+
|
15
|
+
# if you move this script, you'll need to change this method of getting the imports
|
16
|
+
from pathlib import Path
|
17
|
+
partis_dir = str(Path(__file__).parent.parent)
|
18
|
+
sys.path.insert(1, partis_dir) # + '/python')
|
19
|
+
|
20
|
+
import python.utils as utils
|
21
|
+
import python.paircluster as paircluster
|
22
|
+
import python.glutils as glutils
|
23
|
+
from python.clusterpath import ClusterPath
|
24
|
+
import python.treeutils as treeutils
|
25
|
+
|
26
|
+
gctree_outstr = 'gctree.out.inference.1'
|
27
|
+
|
28
|
+
helpstr = """
|
29
|
+
Run partis selection metrics on gctree output dir (gctree docs: https://github.com/matsengrp/gctree/).
|
30
|
+
Plots are written to --outdir; it is probably best to start by looking at the summary html with a browser, e.g.: firefox <--outdir>/selection-metrics/plots/inferred-tree-metrics/overview.html.
|
31
|
+
Log files are written to <--outdir>/*.log; get-selection-metrics.log has most of the interesting information (view with less -RS).
|
32
|
+
Example usage:
|
33
|
+
single chain:
|
34
|
+
./bin/read-gctree-output.py --locus igh --gctreedir <gctree-output-dir> --outdir <dir-for-partis-output>
|
35
|
+
paired:
|
36
|
+
./bin/read-gctree-output.py --paired-loci --seqfname <fasta-input-file> --gctreedir <gctree-output-dir> --outdir <dir-for-partis-output>
|
37
|
+
other args, with examples (see datascripts/meta/taraki-gctree-2021-10/partis-run.py):
|
38
|
+
--kdfname /fh/fast/matsen_e/data/taraki-gctree-2021-10/processed-data/determistic/gc1/kdvals.csv
|
39
|
+
--tree-basename tree.nwk --kd-columns delta_bind_CGG_FVS_additive --dont-invert-kd --multiplicity-column multiplicity --species mouse --no-insertions-or-deletions
|
40
|
+
--initial-germline-dir /home/dralph/work/partis/datascripts/meta/taraki-gctree-2021-10/germlines --parameter-plots
|
41
|
+
--slice-bin-fname /home/dralph/work/partis/datascripts/meta/taraki-gctree-2021-10/slice-bins.yaml
|
42
|
+
"""
|
43
|
+
class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
|
44
|
+
pass
|
45
|
+
formatter_class = MultiplyInheritedFormatter
|
46
|
+
parser = argparse.ArgumentParser(formatter_class=MultiplyInheritedFormatter, description=helpstr)
|
47
|
+
all_actions = ['cache-parameters', 'annotate', 'get-selection-metrics']
|
48
|
+
parser.add_argument('--actions', default=':'.join(all_actions), help='colon-separated list of actions to run')
|
49
|
+
parser.add_argument('--seqfname', help='Fasta file with input sequences. If single chain, this defaults to the standard location in --gctreedir. If --paired-loci is set, this should include, separately, all heavy and all light sequences, where the two sequences in a pair have identical uids (at least up to the first \'_\').')
|
50
|
+
parser.add_argument('--gctreedir', required=True, help='gctree output dir (to get --tree-basename, and maybe abundances.csv, --seqfname).')
|
51
|
+
parser.add_argument('--outdir', required=True, help='directory to which to write partis output files')
|
52
|
+
parser.add_argument('--input-partition-fname', help='partis style yaml file with a partition grouping seqs into clonal families; if set, input data is assumed to contain many families (if not set, we assume it\'s only one fmaily).')
|
53
|
+
parser.add_argument('--paired-loci', action='store_true', help='run on paired heavy/light data')
|
54
|
+
parser.add_argument('--locus', choices=utils.loci, help='locus of sequences (required for single chain).')
|
55
|
+
parser.add_argument('--kdfname', help='csv file with kd values (and, optionally, multiplicities), with header names as specified in subsequent args.')
|
56
|
+
parser.add_argument('--name-column', default='name', help='column name in --kdfname from which to take sequence name')
|
57
|
+
parser.add_argument('--kd-columns', default='kd', help='colon-separated list of column name[s] in --kdfname from which to take kd values. If more than one, the values are added.')
|
58
|
+
parser.add_argument('--multiplicity-column', help='If set, column name in --kdfname from which to take multiplicity value (which must be >0, i.e. inferred ancestors should have multiplicity 1). If not set, abundances are read from --abundance-basename in --gctreedir and converted to multiplicities.')
|
59
|
+
parser.add_argument('--dont-invert-kd', action='store_true', help='by default we invert (take 1/kd) to convert to \'affinity\' (after adding multiple kd columns, if specified), or at least something monotonically increasing with affinity. This skips that step, e.g. if you\'re passing in affinity.')
|
60
|
+
parser.add_argument('--species', default='mouse', choices=('human', 'macaque', 'mouse'))
|
61
|
+
parser.add_argument('--tree-basename', default='%s.nk'%gctree_outstr, help='basename of tree file to take from --gctreedir') # .1 is the most likely one (all trees are also in the pickle file as ete trees: gctree.out.inference.parsimony_forest.p
|
62
|
+
parser.add_argument('--abundance-basename', default='abundances.csv', help='basename of abundance file in --gctreedir. Abundance of 0 (inferred ancestor) is converted to multiplicity of 1. Not used if multiplicities are read from kdfname') # .1 is the most likely one (all trees are also in the pickle file as ete trees: gctree.out.inference.parsimony_forest.p
|
63
|
+
parser.add_argument('--dry', action='store_true')
|
64
|
+
parser.add_argument('--no-tree-plots', action='store_true')
|
65
|
+
parser.add_argument('--parameter-plots', action='store_true')
|
66
|
+
parser.add_argument('--no-insertions-or-deletions', action='store_true', help='see partis help')
|
67
|
+
parser.add_argument('--n-procs', type=int)
|
68
|
+
parser.add_argument('--slice-bin-fname')
|
69
|
+
parser.add_argument('--initial-germline-dir', help='see partis help')
|
70
|
+
args = parser.parse_args()
|
71
|
+
args.actions = utils.get_arg_list(args.actions, choices=all_actions)
|
72
|
+
args.kd_columns = utils.get_arg_list(args.kd_columns)
|
73
|
+
if args.multiplicity_column is not None and args.kdfname is None:
|
74
|
+
raise Exception('have to set --kdfname if --multiplicity-column is set')
|
75
|
+
if args.paired_loci:
|
76
|
+
assert args.seqfname is not None
|
77
|
+
else:
|
78
|
+
if args.seqfname is None:
|
79
|
+
args.seqfname = '%s/%s.fasta' % (args.gctreedir, gctree_outstr)
|
80
|
+
print(' set --seqfname to default location in --gctreedir: %s' % args.seqfname)
|
81
|
+
if args.locus is None:
|
82
|
+
raise Exception('have to set --locus for single chain')
|
83
|
+
|
84
|
+
# ----------------------------------------------------------------------------------------
|
85
|
+
def metafname():
|
86
|
+
return '%s/gctree-meta.yaml' % args.outdir
|
87
|
+
|
88
|
+
# ----------------------------------------------------------------------------------------
|
89
|
+
def run_cmd(action):
|
90
|
+
locstr = '--paired-loci' if args.paired_loci else '--locus %s'%args.locus
|
91
|
+
# this doesn't work since all gcs together needs 0:1 but single gc runs need 0, and the guessing functionality is working fine atm --droplet-id-separators - --droplet-id-indices 0:1
|
92
|
+
# NOTE would maybe be better to not guess pair info?
|
93
|
+
cmd = './bin/partis %s %s --species %s --guess-pairing-info --input-metafnames %s' % (action, locstr, args.species, metafname())
|
94
|
+
if args.no_insertions_or_deletions:
|
95
|
+
cmd += ' --no-insertions-or-deletions'
|
96
|
+
if action in ['cache-parameters', 'annotate']:
|
97
|
+
cmd += ' --infname %s' % args.seqfname
|
98
|
+
if args.paired_loci:
|
99
|
+
cmd += ' --paired-outdir %s' % args.outdir
|
100
|
+
else:
|
101
|
+
cmd += ' --parameter-dir %s/parameters' % args.outdir
|
102
|
+
if args.input_partition_fname is None: # one gc at a time
|
103
|
+
cmd += ' --all-seqs-simultaneous'
|
104
|
+
else: # many gcs together
|
105
|
+
cmd += ' --input-partition-fname %s' % args.input_partition_fname
|
106
|
+
if action == 'cache-parameters':
|
107
|
+
if args.initial_germline_dir is not None:
|
108
|
+
cmd += ' --initial-germline-dir %s' % args.initial_germline_dir
|
109
|
+
if args.parameter_plots:
|
110
|
+
cmd += ' --plotdir %s' % args.outdir
|
111
|
+
if action in ['annotate', 'get-selection-metrics'] and '--paired-outdir' not in cmd:
|
112
|
+
cmd += ' --%s %s%s' % ('paired-outdir' if args.paired_loci else 'outfname', args.outdir, '' if args.paired_loci else '/partition.yaml')
|
113
|
+
if action == 'get-selection-metrics':
|
114
|
+
cmd += ' --min-selection-metric-cluster-size 3 --treefname %s/%s --plotdir %s --selection-metrics-to-calculate lbi:aa-lbi:cons-dist-aa:lbr:aa-lbr:lbf:aa-lbf' % (args.gctreedir, args.tree_basename, 'paired-outdir' if args.paired_loci else '%s/selection-metrics/plots'%args.outdir)
|
115
|
+
cmd += ' --extra-daffy-metrics lbi:aa-lbi'
|
116
|
+
cmd += ' --label-root-node'
|
117
|
+
plt_cfg = treeutils.default_plot_cfg + ['distr', 'tree-mut-stats']
|
118
|
+
if args.no_tree_plots:
|
119
|
+
plt_cfg = [t for t in plt_cfg if t != 'tree']
|
120
|
+
cmd += ' --add-selection-metrics-to-outfname --use-droplet-id-for-combo-id --selection-metric-plot-cfg %s' % ':'.join(plt_cfg)
|
121
|
+
if args.slice_bin_fname is not None:
|
122
|
+
cmd += ' --slice-bin-fname %s' % args.slice_bin_fname
|
123
|
+
cmd += ' --choose-all-abs --chosen-ab-fname %s/chosen-abs.csv' % args.outdir # --debug 1
|
124
|
+
if args.n_procs is not None:
|
125
|
+
cmd += ' --n-procs %d' % args.n_procs
|
126
|
+
utils.simplerun(cmd, logfname='%s/%s.log'%(args.outdir, action), dryrun=args.dry)
|
127
|
+
|
128
|
+
# ----------------------------------------------------------------------------------------
|
129
|
+
utils.mkdir(args.outdir)
|
130
|
+
metafos = {}
|
131
|
+
if args.multiplicity_column is None: # if not set, read abundances from args.abundance_basename
|
132
|
+
abfn = '%s/%s' % (args.gctreedir, args.abundance_basename)
|
133
|
+
print(' reading abundance info from %s' % abfn)
|
134
|
+
with open(abfn) as afile:
|
135
|
+
reader = csv.DictReader(afile, fieldnames=('name', 'abundance'))
|
136
|
+
for line in reader:
|
137
|
+
if line['name'] not in metafos:
|
138
|
+
metafos[line['name']] = {}
|
139
|
+
metafos[line['name']]['multiplicity'] = max(1, int(line['abundance'])) # increase 0s (inferred ancestors) to 1
|
140
|
+
if args.kdfname is not None:
|
141
|
+
print(' reading kd info%s from %s' % ('' if args.multiplicity_column is None else ' and multiplicity info', args.kdfname))
|
142
|
+
with open(args.kdfname) as kfile:
|
143
|
+
reader = csv.DictReader(kfile)
|
144
|
+
for line in reader:
|
145
|
+
uid = line[args.name_column]
|
146
|
+
if uid not in metafos:
|
147
|
+
metafos[uid] = {}
|
148
|
+
if all(line[k] not in ['None', None, ''] for k in args.kd_columns):
|
149
|
+
kdval = sum(float(line[k]) for k in args.kd_columns)
|
150
|
+
metafos[uid]['affinity'] = kdval if args.dont_invert_kd else 1. / kdval
|
151
|
+
if args.multiplicity_column is not None:
|
152
|
+
metafos[uid]['multiplicity'] = int(line[args.multiplicity_column])
|
153
|
+
|
154
|
+
if args.paired_loci: # convert metafos to per-locus names
|
155
|
+
for base_id in list(metafos.keys()):
|
156
|
+
for ltmp in utils.sub_loci('ig'):
|
157
|
+
new_id = '%s-%s' % (base_id, ltmp)
|
158
|
+
metafos[new_id] = metafos[base_id]
|
159
|
+
del metafos[base_id]
|
160
|
+
|
161
|
+
# and write to json/yaml
|
162
|
+
print(' writing input meta info to %s' % metafname())
|
163
|
+
utils.jsdump(metafname(), metafos)
|
164
|
+
|
165
|
+
for action in args.actions:
|
166
|
+
run_cmd(action)
|