partis-bcr 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. bin/FastTree +0 -0
  2. bin/add-chimeras.py +59 -0
  3. bin/add-seqs-to-outputs.py +81 -0
  4. bin/bcr-phylo-run.py +799 -0
  5. bin/build.sh +24 -0
  6. bin/cf-alleles.py +97 -0
  7. bin/cf-germlines.py +57 -0
  8. bin/cf-linearham.py +199 -0
  9. bin/chimera-plot.py +76 -0
  10. bin/choose-partially-paired.py +143 -0
  11. bin/circle-plots.py +30 -0
  12. bin/compare-plotdirs.py +298 -0
  13. bin/diff-parameters.py +133 -0
  14. bin/docker-hub-push.sh +6 -0
  15. bin/extract-pairing-info.py +55 -0
  16. bin/gcdyn-simu-run.py +223 -0
  17. bin/gctree-run.py +244 -0
  18. bin/get-naive-probabilities.py +126 -0
  19. bin/iqtree-1.6.12 +0 -0
  20. bin/lonr.r +1020 -0
  21. bin/makeHtml +52 -0
  22. bin/mds-run.py +46 -0
  23. bin/parse-output.py +277 -0
  24. bin/partis +1869 -0
  25. bin/partis-pip +116 -0
  26. bin/partis.py +1869 -0
  27. bin/plot-gl-set-trees.py +519 -0
  28. bin/plot-hmms.py +151 -0
  29. bin/plot-lb-tree.py +427 -0
  30. bin/raxml-ng +0 -0
  31. bin/read-bcr-phylo-trees.py +38 -0
  32. bin/read-gctree-output.py +166 -0
  33. bin/run-chimeras.sh +64 -0
  34. bin/run-dtr-scan.sh +25 -0
  35. bin/run-paired-loci.sh +100 -0
  36. bin/run-tree-metrics.sh +88 -0
  37. bin/smetric-run.py +62 -0
  38. bin/split-loci.py +317 -0
  39. bin/swarm-2.1.13-linux-x86_64 +0 -0
  40. bin/test-germline-inference.py +425 -0
  41. bin/tree-perf-run.py +194 -0
  42. bin/vsearch-2.4.3-linux-x86_64 +0 -0
  43. bin/vsearch-2.4.3-macos-x86_64 +0 -0
  44. bin/xvfb-run +194 -0
  45. partis_bcr-1.0.1.data/scripts/cf-alleles.py +97 -0
  46. partis_bcr-1.0.1.data/scripts/cf-germlines.py +57 -0
  47. partis_bcr-1.0.1.data/scripts/extract-pairing-info.py +55 -0
  48. partis_bcr-1.0.1.data/scripts/gctree-run.py +244 -0
  49. partis_bcr-1.0.1.data/scripts/parse-output.py +277 -0
  50. partis_bcr-1.0.1.data/scripts/split-loci.py +317 -0
  51. partis_bcr-1.0.1.data/scripts/test.py +1005 -0
  52. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/METADATA +1 -1
  53. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/RECORD +101 -50
  54. partis_bcr-1.0.1.dist-info/top_level.txt +1 -0
  55. {partis → python}/glutils.py +1 -1
  56. python/main.py +30 -0
  57. {partis → python}/plotting.py +10 -1
  58. {partis → python}/treeutils.py +18 -16
  59. {partis → python}/utils.py +14 -7
  60. partis/main.py +0 -59
  61. partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
  62. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/WHEEL +0 -0
  63. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/entry_points.txt +0 -0
  64. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/licenses/COPYING +0 -0
  65. {partis → python}/__init__.py +0 -0
  66. {partis → python}/alleleclusterer.py +0 -0
  67. {partis → python}/allelefinder.py +0 -0
  68. {partis → python}/alleleremover.py +0 -0
  69. {partis → python}/annotationclustering.py +0 -0
  70. {partis → python}/baseutils.py +0 -0
  71. {partis → python}/cache/__init__.py +0 -0
  72. {partis → python}/cache/cached_uncertainties.py +0 -0
  73. {partis → python}/clusterpath.py +0 -0
  74. {partis → python}/coar.py +0 -0
  75. {partis → python}/corrcounter.py +0 -0
  76. {partis → python}/datautils.py +0 -0
  77. {partis → python}/event.py +0 -0
  78. {partis → python}/fraction_uncertainty.py +0 -0
  79. {partis → python}/gex.py +0 -0
  80. {partis → python}/glomerator.py +0 -0
  81. {partis → python}/hist.py +0 -0
  82. {partis → python}/hmmwriter.py +0 -0
  83. {partis → python}/hutils.py +0 -0
  84. {partis → python}/indelutils.py +0 -0
  85. {partis → python}/lbplotting.py +0 -0
  86. {partis → python}/mds.py +0 -0
  87. {partis → python}/mutefreqer.py +0 -0
  88. {partis → python}/paircluster.py +0 -0
  89. {partis → python}/parametercounter.py +0 -0
  90. {partis → python}/paramutils.py +0 -0
  91. {partis → python}/partitiondriver.py +0 -0
  92. {partis → python}/partitionplotter.py +0 -0
  93. {partis → python}/performanceplotter.py +0 -0
  94. {partis → python}/plotconfig.py +0 -0
  95. {partis → python}/processargs.py +0 -0
  96. {partis → python}/prutils.py +0 -0
  97. {partis → python}/recombinator.py +0 -0
  98. {partis → python}/scanplot.py +0 -0
  99. {partis → python}/seqfileopener.py +0 -0
  100. {partis → python}/treegenerator.py +0 -0
  101. {partis → python}/viterbicluster.py +0 -0
  102. {partis → python}/vrc01.py +0 -0
  103. {partis → python}/waterer.py +0 -0
bin/plot-lb-tree.py ADDED
@@ -0,0 +1,427 @@
1
+ #!/usr/bin/env python3
2
+ # has to be its own script, since ete3 requires its own god damn python version, installed in a separated directory
3
+ from __future__ import absolute_import, division, unicode_literals
4
+ from __future__ import print_function
5
+ import time
6
+ import yaml
7
+ import itertools
8
+ import glob
9
+ import argparse
10
+ import copy
11
+ import random
12
+ import os
13
+ import subprocess
14
+ import sys
15
+ import colored_traceback.always
16
+ from collections import OrderedDict
17
+ import numpy
18
+ import math
19
+ import re
20
+ from io import open
21
+ import ete3
22
+ from pathlib import Path
23
+
24
+ # ----------------------------------------------------------------------------------------
25
+ scolors = {
26
+ 'novel' : '#ffc300', # 'Gold'
27
+ 'data' : 'LightSteelBlue',
28
+ 'pale-green' : '#85ad98',
29
+ 'pale-blue' : '#94a3d1',
30
+ 'tigger-default' : '#d77c7c', #'#c32222', # red
31
+ 'igdiscover' : '#85ad98', #'#29a614', # green
32
+ 'partis' : '#94a3d1', #'#2455ed', # blue
33
+ 'lbi' : '#94a3d1',
34
+ }
35
+
36
+ # listcolors = [plotting.getgrey('medium') for _ in range(10)]
37
+ listfaces = [
38
+ 'red',
39
+ 'blue',
40
+ 'green',
41
+ ]
42
+ used_colors, used_faces = {}, {}
43
+ simu_colors = OrderedDict((
44
+ ('ok', 'DarkSeaGreen'),
45
+ ('missing', '#d77c7c'),
46
+ ('spurious', '#a44949'),
47
+ ))
48
+ def get_scale_min(metric, vals): # only make the color scale go down to here
49
+ if metric == 'cons-dist-aa':
50
+ return max(vals) - 10
51
+ else:
52
+ return min(vals)
53
+
54
+ # ----------------------------------------------------------------------------------------
55
+ def read_input(args):
56
+ with open(args.treefname) as treefile:
57
+ treestr = treefile.read().strip()
58
+ treestr = treestr.replace('[&R] ', '').replace('\'', '')
59
+
60
+ return {'treestr' : treestr}
61
+
62
+ # ----------------------------------------------------------------------------------------
63
+ opacity = 0.65
64
+ node_fsize = 7
65
+
66
+ # ----------------------------------------------------------------------------------------
67
+ def set_delta_affinities(etree, affyfo): # set change in affinity from parent for each node, and return a list of all such affinity changes (for normalizing the cmap)
68
+ delta_affyvals = []
69
+ for node in etree.traverse():
70
+ if node.up is None or any(n.name not in affyfo or affyfo[n.name] is None for n in (node, node.up)):
71
+ node.add_feature('affinity_change', None)
72
+ continue
73
+ node.add_feature('affinity_change', affyfo[node.name] - affyfo[node.up.name])
74
+ delta_affyvals.append(affyfo[node.name] - affyfo[node.up.name])
75
+
76
+ return delta_affyvals
77
+
78
+ # ----------------------------------------------------------------------------------------
79
+ def get_size(vmin, vmax, val):
80
+ if vmin == vmax:
81
+ return args.min_face_size
82
+ if args.use_node_area:
83
+ val = math.sqrt(val)
84
+ rfsize = args.min_face_size + (val - vmin) * (args.max_face_size - args.min_face_size) / float(vmax - vmin)
85
+ return rfsize
86
+
87
+ # ----------------------------------------------------------------------------------------
88
+ def add_legend(tstyle, varname, all_vals, smap, info, start_column, add_missing=False, add_sign=None, reverse_log=False, n_entries=5, fsize=4, no_opacity=False): # NOTE very similar to add_smap_legend() in plot_2d_scatter() in python/lbplotting.py
89
+ if len(all_vals) == 0:
90
+ all_vals = [-1, 1] # um, maybe this is ok?
91
+ # return # NOTE you *cannot* return here, since if we don't actually add the stuff then it later (when rendering) crashes with a key error deep within ete due to the <start_column> being wrong/inconsistent
92
+ assert add_sign in [None, '-', '+']
93
+ tstyle.legend.add_face(ete3.TextFace(' %s ' % varname, fsize=fsize), column=start_column)
94
+ min_val, max_val = get_scale_min(varname, all_vals), max(all_vals)
95
+ if min_val == max_val:
96
+ min_val, max_val = plotting.expand_bounds([min_val, max_val], only_down=True) # <only_down> is for affinity increase scale: expand downward if only one value so the one value shows up as dark red (rather than super light red)
97
+ val_list = plotting.get_leg_entries(n_entries=n_entries, min_val=min_val, max_val=max_val)
98
+ # if add_sign is not None and add_sign == '-': # for negative changes, we have the cmap using abs() and want to legend order to correspond
99
+ # val_list = reversed(val_list) # arg, this breaks something deep in the legend maker, not sure what
100
+ key_list = [None for _ in val_list]
101
+ if add_missing:
102
+ val_list += [None]
103
+ key_list += ['missing!'] # doesn't matter what the last one is as long as it isn't in <affyfo>
104
+ for val, key in zip(val_list, key_list):
105
+ tstyle.legend.add_face(ete3.TextFace('', fsize=fsize), column=start_column)
106
+ if smap is None:
107
+ sz = get_size(min_val, max_val, val)
108
+ rface = ete3.RectFace(sz, sz, bgcolor=plotting.getgrey(), fgcolor=None)
109
+ else:
110
+ rface = ete3.RectFace(6, 6, bgcolor=plotting.get_smap_color(smap, info, key=key, val=val), fgcolor=None)
111
+ if not no_opacity:
112
+ rface.opacity = opacity
113
+ tstyle.legend.add_face(rface, column=start_column + 1)
114
+ if reverse_log:
115
+ val = math.exp(val)
116
+ def vstr():
117
+ if varname == 'cons-dist-aa': return '%.1f' % val
118
+ elif 'affinity' in varname: return '%s' % utils.round_to_n_digits(val, 2)
119
+ else: return '%.2f' % val
120
+ if key is None:
121
+ tfstr = ' %s%s' % (utils.non_none([add_sign, '']), vstr())
122
+ else:
123
+ ftstr = ' missing'
124
+ tstyle.legend.add_face(ete3.TextFace(tfstr, fsize=fsize), column=start_column + 2)
125
+
126
+ # ----------------------------------------------------------------------------------------
127
+ def label_node(lnode, root_node):
128
+ # ----------------------------------------------------------------------------------------
129
+ def meta_emph(tname):
130
+ if args.meta_info_to_emphasize is not None:
131
+ key, val = list(args.meta_info_to_emphasize.items())[0]
132
+ if key in args.metafo and tname in args.metafo[key] and utils.meta_info_equal(key, val, args.metafo[key][tname], formats=args.meta_emph_formats):
133
+ return True
134
+ return False
135
+ # ----------------------------------------------------------------------------------------
136
+ def use_node_name(tname, tnode=None):
137
+ if args.label_all_nodes:
138
+ return True
139
+ if tnode is not None and args.label_leaf_nodes and tnode.is_leaf():
140
+ return True
141
+ if args.queries_to_include is not None and tname in args.queries_to_include:
142
+ return True
143
+ if tnode is not None and args.label_root_node and tnode is root_node:
144
+ return True
145
+ if meta_emph(tname):
146
+ return True
147
+ if args.uid_translations is not None and tname in args.uid_translations:
148
+ return True
149
+ if args.node_label_regex is not None and len(re.findall(args.node_label_regex, tname)) > 0:
150
+ return True
151
+ return False
152
+ # ----------------------------------------------------------------------------------------
153
+ def split_line(lstr): # split into two rows if more than 3 entries
154
+ if lstr.count(',') < 3:
155
+ return lstr
156
+ blist = lstr.split(', ')
157
+ return '%s\n%s' % (', '.join(blist[:len(blist)//2]), ', '.join(blist[len(blist)//2:]))
158
+ # ----------------------------------------------------------------------------------------
159
+ def get_nlabel(tname):
160
+ if not use_node_name(tname):
161
+ return None
162
+ nlabel = tname
163
+ if args.uid_translations is not None and nlabel in args.uid_translations:
164
+ nlabel = args.uid_translations[nlabel]
165
+ if args.node_label_regex is not None:
166
+ mstrs = re.findall(args.node_label_regex, nlabel)
167
+ if len(mstrs) > 0:
168
+ nlabel = '+'.join(mstrs)
169
+ return nlabel
170
+ # ----------------------------------------------------------------------------------------
171
+ def get_ncolor(tname):
172
+ return 'red' if meta_emph(tname) or args.meta_info_to_emphasize is None else 'black'
173
+ # ----------------------------------------------------------------------------------------
174
+ use_name = use_node_name(lnode.name, tnode=lnode)
175
+ if 'duplicates' in args.metafo and lnode.name in args.metafo['duplicates']:
176
+ use_name |= any(use_node_name(n) for n in args.metafo['duplicates'][lnode.name])
177
+ if use_name:
178
+ nlabels, ncolors = [[tfn(lnode.name)] for tfn in (get_nlabel, get_ncolor)]
179
+ if 'duplicates' in args.metafo and lnode.name in args.metafo['duplicates']:
180
+ nlabels += [get_nlabel(n) for n in args.metafo['duplicates'][lnode.name]]
181
+ ncolors += [get_ncolor(n) for n in args.metafo['duplicates'][lnode.name]]
182
+ nlabel = ', '.join(sorted(set(l for l in nlabels if l is not None)))
183
+ ncolor = 'red' if 'red' in ncolors else 'black'
184
+ else:
185
+ if 'labels' in args.metafo:
186
+ nlabel, ncolor = '', 'red'
187
+ else:
188
+ return
189
+ if 'labels' in args.metafo:
190
+ mlabel = args.metafo['labels'].get(lnode.name, '')
191
+ blabels, tlabels, tcolors, bcolors = ['', ''], ['', ''], ['black' for _ in range(2)], ['black' for _ in range(2)]
192
+ label_list = mlabel.split('\n')
193
+ if 'h:' in mlabel or 'l:' in mlabel:
194
+ for lstr in label_list:
195
+ if 'nuc' in lstr and 'aa' in lstr:
196
+ assert lstr.count(',') == 1 # e.g. '3 nuc, 1 aa'
197
+ tlabels[0], blabels[0] = lstr.split(',')
198
+ elif lstr.find('h:') == 0:
199
+ tlabels[1], tcolors[1] = ' '+lstr, 'blue'
200
+ elif lstr.find('l:') == 0:
201
+ blabels[1], bcolors[1] = ' '+lstr, 'green'
202
+ else:
203
+ raise Exception('couldn\'t parse \'%s\'' % mlabel)
204
+ elif '\n' in mlabel:
205
+ tlabels[1], blabels[1] = label_list[0], '\n'.join(label_list[1:])
206
+ blabels[1] = split_line(blabels[1])
207
+ else:
208
+ tlabels[0] = mlabel
209
+ for il, (blab, bcol) in enumerate(zip(blabels, bcolors)): # blabels are branch bottom labels
210
+ lnode.add_face(ete3.TextFace(blab, fsize=node_fsize, fgcolor=bcol), column=il, position='branch-bottom')
211
+ for il, (tlab, tcol) in enumerate(zip(tlabels, tcolors)): # branch top labels
212
+ lnode.add_face(ete3.TextFace(tlab, fsize=node_fsize, fgcolor=tcol), column=il, position='branch-top')
213
+ if nlabel != '':
214
+ tface = ete3.TextFace(' '+nlabel, fsize=node_fsize, fgcolor=ncolor) # <nlabel> is usually the uid/sequence name
215
+ lnode.add_face(tface, column=1) # position='branch-bottom')
216
+
217
+ # ----------------------------------------------------------------------------------------
218
+ def set_lb_styles(args, etree, tstyle):
219
+ # ----------------------------------------------------------------------------------------
220
+ lbfo = args.metafo[args.lb_metric]
221
+ if 'lbr' in args.lb_metric or 'lbf' in args.lb_metric: # remove zeros + maybe apply log()
222
+ lbfo = {u : (math.log(v) if args.log_lbr else v) for u, v in lbfo.items() if v > 0}
223
+ lbvals = list(lbfo.values())
224
+ if len(lbvals) == 0:
225
+ return
226
+ lb_smap = plotting.get_normalized_scalar_map(lbvals, 'viridis', hard_min=get_scale_min(args.lb_metric, lbvals) if args.lb_metric=='cons-dist-aa' else None)
227
+ lb_min, lb_max = min(lbvals), max(lbvals)
228
+
229
+ affyfo = None
230
+ if args.affy_key in args.metafo and set(args.metafo[args.affy_key].values()) != set([None]):
231
+ affyfo = args.metafo[args.affy_key]
232
+ if args.lb_metric in treeutils.affy_metrics:
233
+ affyvals = list(affyfo.values())
234
+ affy_smap = plotting.get_normalized_scalar_map([a for a in affyvals if a is not None], 'viridis')
235
+ elif args.lb_metric in treeutils.daffy_metrics:
236
+ delta_affyvals = set_delta_affinities(etree, affyfo)
237
+ affy_increases = [v for v in delta_affyvals if v > 0]
238
+ if len(set(affy_increases)) == 1: # if there's only one affinity increase, expand downward so color is dark red for actual observed value
239
+ affy_increases = plotting.expand_bounds([affy_increases[0] for _ in range(2)], only_down=True)
240
+ delta_affy_increase_smap = plotting.get_normalized_scalar_map(affy_increases, 'Reds', remove_top_end=True) if len(delta_affyvals) > 0 else None
241
+ delta_affy_decrease_smap = plotting.get_normalized_scalar_map([abs(v) for v in delta_affyvals if v < 0], 'Blues', remove_top_end=True) if len(delta_affyvals) > 0 else None
242
+ else:
243
+ assert False
244
+
245
+ for node in etree.traverse():
246
+ node.img_style['size'] = 0
247
+ rfsize = 0
248
+ bgcolor = plotting.getgrey()
249
+ if args.lb_metric in treeutils.affy_metrics:
250
+ if node.name not in lbfo: # really shouldn't happen
251
+ print(' %s missing lb info for node \'%s\'' % (utils.color('red', 'warning'), node.name))
252
+ continue
253
+ if affyfo is not None:
254
+ rfsize = get_size(lb_min, lb_max, lbfo[node.name])
255
+ if node.name in affyfo:
256
+ bgcolor = plotting.get_smap_color(affy_smap, affyfo, key=node.name)
257
+ else:
258
+ rfsize = 5
259
+ bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
260
+ elif args.lb_metric in treeutils.daffy_metrics:
261
+ node.img_style['vt_line_color'] = plotting.getgrey() # if they're black, it's too hard to see the large changes in affinity, since they're very dark (at least with current color schemes)
262
+ # rfsize = get_size(lb_min, lb_max, lbfo[node.name]) if node.name in lbfo else 1.5
263
+ rfsize = 5 if node.name in lbfo else 1.5
264
+ bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
265
+ if affyfo is not None and delta_affy_increase_smap is not None and node.affinity_change is not None:
266
+ # tface = ete3.TextFace(('%+.4f' % node.affinity_change) if node.affinity_change != 0 else '0.', fsize=3)
267
+ # node.add_face(tface, column=0)
268
+ if node.affinity_change > 0: # increase
269
+ node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_increase_smap, None, val=node.affinity_change)
270
+ node.img_style['hz_line_width'] = 1.2
271
+ elif node.affinity_change < 0: # decrease
272
+ node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_decrease_smap, None, val=abs(node.affinity_change))
273
+ node.img_style['hz_line_width'] = 1.2
274
+ else:
275
+ node.img_style['hz_line_color'] = plotting.getgrey()
276
+ label_node(node, etree.get_tree_root())
277
+ rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None)
278
+ rface.opacity = opacity
279
+ node.add_face(rface, column=0)
280
+
281
+ affy_label = args.affy_key.replace('_', ' ')
282
+ mleg = lbplotting.mtitlestr('per-seq', args.lb_metric)
283
+ if args.lb_metric in treeutils.affy_metrics:
284
+ if affyfo is None:
285
+ add_legend(tstyle, mleg, lbvals, lb_smap, lbfo, 0, n_entries=4)
286
+ else:
287
+ add_legend(tstyle, mleg, lbvals, None, lbfo, 0, n_entries=4)
288
+ add_legend(tstyle, affy_label, [a for a in affyvals if a is not None], affy_smap, affyfo, 3)
289
+ elif args.lb_metric in treeutils.daffy_metrics:
290
+ add_legend(tstyle, mleg, lbvals, lb_smap, lbfo, 0, reverse_log=args.log_lbr)
291
+ if affyfo is not None:
292
+ add_legend(tstyle, '%s decrease' % affy_label, [abs(v) for v in delta_affyvals if v < 0], delta_affy_decrease_smap, affyfo, 3, add_sign='-', no_opacity=True)
293
+ add_legend(tstyle, '%s increase' % affy_label, [v for v in delta_affyvals if v > 0], delta_affy_increase_smap, affyfo, 6, add_sign='+', no_opacity=True)
294
+
295
+ # ----------------------------------------------------------------------------------------
296
+ def set_meta_styles(args, etree, tstyle):
297
+ all_emph_vals, emph_colors = None, None
298
+ if args.meta_info_key_to_color is not None:
299
+ mvals = args.metafo.get(args.meta_info_key_to_color, {})
300
+ all_emph_vals, emph_colors = plotting.meta_emph_init(args.meta_info_key_to_color, formats=args.meta_emph_formats, all_emph_vals=set(mvals.values()))
301
+ mcolors = {v : c for v, c in emph_colors}
302
+ plotting.make_meta_info_legend(os.path.dirname(args.outfname), utils.getprefix(os.path.basename(args.outfname)), args.meta_info_key_to_color, emph_colors, all_emph_vals, meta_emph_formats=args.meta_emph_formats, alpha=0.6)
303
+ if args.node_size_key is not None:
304
+ nsvals = set(args.metafo[args.node_size_key].values()) - set([None])
305
+ min_nsval, max_nsval = [mfcn(nsvals) for mfcn in [min, max]]
306
+ if args.use_node_area:
307
+ min_nsval, max_nsval = [math.sqrt(v) for v in [min_nsval, max_nsval]]
308
+ if args.branch_color_key is not None:
309
+ bcvals = args.metafo[args.branch_color_key]
310
+ smvals = [v for v in bcvals.values() if v is not None]
311
+ if 'vrc01' in args.branch_color_key:
312
+ smvals = [v for v in smvals if v > 0]
313
+ bc_smap = plotting.get_normalized_scalar_map(smvals, 'Reds', remove_top_end=True)
314
+ add_legend(tstyle, plotting.legends.get(args.branch_color_key, args.branch_color_key), smvals, bc_smap, bcvals, 3, no_opacity=True)
315
+ for node in etree.traverse():
316
+ node.img_style['size'] = 0
317
+ rfsize = 5
318
+ if args.node_size_key is not None:
319
+ rfsize = get_size(min_nsval, max_nsval, args.metafo[args.node_size_key].get(node.name, min_nsval))
320
+ bgcolor = plotting.getgrey()
321
+ if args.meta_info_key_to_color is not None and node.name in mvals:
322
+ bgcolor = mcolors.get(mvals[node.name], bgcolor)
323
+
324
+ label_node(node, etree.get_tree_root())
325
+ ftypes = {'rect' : ete3.RectFace, 'circle' : ete3.CircleFace}
326
+ if args.face_type == 'rect':
327
+ rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None)
328
+ elif args.face_type == 'circle':
329
+ rface = ete3.CircleFace(radius=rfsize, color=bgcolor)
330
+ else:
331
+ assert False
332
+ rface.opacity = opacity
333
+ node.add_face(rface, column=0)
334
+
335
+ if args.branch_color_key is not None:
336
+ bval = bcvals.get(node.name)
337
+ bcol = plotting.getgrey() if 'vrc01' in args.branch_color_key and bval==0 else plotting.get_smap_color(bc_smap, bcvals, key=node.name)
338
+ node.img_style['hz_line_color'] = bcol
339
+ node.img_style['hz_line_width'] = 1.2
340
+
341
+ # ----------------------------------------------------------------------------------------
342
+ def plot_trees(args):
343
+ treefo = read_input(args)
344
+
345
+ treestr = treefo['treestr']
346
+ if len(treestr.split()) == 2 and treestr.split()[0] in ['[&U]', '[&R]']: # dumbest #$!#$#ing format in the goddamn world (ete barfs on other programs' rooting information)
347
+ treestr = treefo['treestr'].split()[1]
348
+ etree = ete3.Tree(treestr, format=1) # , quoted_node_names=True)
349
+
350
+ tstyle = ete3.TreeStyle()
351
+ tstyle.mode = args.tree_style[0]
352
+ # tstyle.show_scale = False
353
+ if getattr(tstyle, 'scale_length', None) is not None:
354
+ tstyle.scale_length = 1. / treeutils.typical_bcr_seq_len
355
+ # tstyle.show_branch_length = True
356
+ # tstyle.complete_branch_lines_when_necessary = True
357
+
358
+ if args.metafo is not None:
359
+ if args.lb_metric is not None:
360
+ set_lb_styles(args, etree, tstyle)
361
+ if args.meta_info_key_to_color is not None or args.meta_info_to_emphasize:
362
+ set_meta_styles(args, etree, tstyle)
363
+ else:
364
+ print(' %s --metafo is not set, so no node formats (e.g. labels) will be set)' % utils.wrnstr())
365
+
366
+ # print ' %s' % args.outfname
367
+ tstyle.show_leaf_name = False
368
+ etree.render(args.outfname, tree_style=tstyle)
369
+
370
+ # ----------------------------------------------------------------------------------------
371
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
372
+ parser.add_argument('--treefname', required=True)
373
+ parser.add_argument('--outfname', required=True)
374
+ parser.add_argument('--lb-metric') #, default='lbi') #, choices=treeutils.affy_metrics+treeutils.daffy_metrics)
375
+ parser.add_argument('--affy-key', default='affinity', choices=['affinity', 'relative_affinity'])
376
+ # parser.add_argument('--lb-tau', required=True, type=float)
377
+ parser.add_argument('--metafname')
378
+ parser.add_argument('--queries-to-include')
379
+ parser.add_argument('--label-all-nodes', action='store_true')
380
+ parser.add_argument('--label-leaf-nodes', action='store_true')
381
+ parser.add_argument('--label-root-node', action='store_true')
382
+ parser.add_argument('--node-label-regex', help='portion of node label to keep (rest is discarded if regex is found, if no regex label is left unchanged). E.g. \'ig.\' reduces them all to the locus')
383
+ parser.add_argument('--tree-style', default='rectangular', choices=['rectangular', 'circular'])
384
+ parser.add_argument('--partis-dir', default=str(Path(__file__)).parent.parent), help='path to main partis install dir')
385
+ parser.add_argument('--log-lbr', action='store_true')
386
+ parser.add_argument('--seq-len', type=int)
387
+ parser.add_argument('--uid-translations', help='colon-separated list of comma-separated pairs of uid:translated-id pairs')
388
+ parser.add_argument('--meta-info-to-emphasize', help='see partis help')
389
+ parser.add_argument('--meta-info-key-to-color', help='see partis help')
390
+ parser.add_argument('--meta-emph-formats', help='see partis help')
391
+ parser.add_argument('--node-size-key', help='annotation key with which to scale the node size')
392
+ parser.add_argument('--use-node-area', action='store_true', help='for --node-size-key scale by area rather than edge/radius')
393
+ parser.add_argument('--branch-color-key', help='annotation key with which to scale the branch length color')
394
+ parser.add_argument('--face-type', choices=['rect', 'circle'], default='rect', help='shape of symbol for each node')
395
+ parser.add_argument('--min-face-size', type=float, default=1.5, help='min size for node symbol')
396
+ parser.add_argument('--max-face-size', type=float, default=15, help='min size for node symbol')
397
+ args = parser.parse_args()
398
+ if args.meta_info_key_to_color is None and not args.meta_info_to_emphasize: # it'd be nice to move this stuff, but whatevs
399
+ print(' note: if neither --meta-info-key-to-color or --meta-info-to-emphasize are set, other style attributes may not be set')
400
+
401
+ sys.path.insert(1, args.partis_dir) # + '/python')
402
+ try:
403
+ import python.utils as utils
404
+ import python.treeutils as treeutils
405
+ import python.glutils as glutils
406
+ import python.plotting as plotting
407
+ import python.lbplotting as lbplotting
408
+ except ImportError as e:
409
+ print(e)
410
+ raise Exception('couldn\'t import from main partis dir \'%s\' (set with --partis-dir)' % args.partis_dir)
411
+ args.meta_info_to_emphasize = utils.get_arg_list(args.meta_info_to_emphasize, key_val_pairs=True)
412
+ args.meta_emph_formats = utils.get_arg_list(args.meta_emph_formats, key_val_pairs=True)
413
+ utils.meta_emph_arg_process(args)
414
+ args.uid_translations = utils.get_arg_list(args.uid_translations, key_val_pairs=True)
415
+
416
+ if args.node_label_regex is not None and not args.label_all_nodes and not args.label_leaf_nodes:
417
+ print(' note: --node-label-regex set, but neither --label-all-nodes nor --label-leaf-nodes were set, so they may not actually get labeled')
418
+ # print(' --node-label-regex: turning on --label-all-nodes')
419
+ # args.label_all_nodes = True
420
+
421
+ args.queries_to_include = utils.get_arg_list(args.queries_to_include)
422
+ args.metafo = None
423
+ if args.metafname is not None:
424
+ with open(args.metafname) as metafile:
425
+ args.metafo = yaml.load(metafile, Loader=yaml.CLoader)
426
+
427
+ plot_trees(args)
bin/raxml-ng ADDED
Binary file
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import absolute_import, division, unicode_literals
3
+ import os
4
+ import sys
5
+ import argparse
6
+ import pickle
7
+ import csv
8
+ from ete3 import TreeNode, TreeStyle, NodeStyle, SVG_COLORS
9
+ from io import open
10
+
11
+ # NOTE this probably doesn't need to be a separate script any more, it used to be necessary since ete3 requires python 3
12
+
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument('--pickle-tree-file', help='bcr phylo output pickle tree file')
15
+ parser.add_argument('--kdfile', help='output csv file with kd info')
16
+ parser.add_argument('--newick-tree-file', required=True, help='output newick tree file')
17
+ args = parser.parse_args()
18
+
19
+ with open(args.pickle_tree_file, 'rb') as lfile:
20
+ tree = pickle.load(lfile)
21
+
22
+ # print tree
23
+ # print tree.name
24
+ # print tree.sequence
25
+
26
+ with open(args.newick_tree_file, 'w') as ntfile:
27
+ treestr = tree.write(format=1) # default format ignores internal node names (numbers listed here: http://etetoolkit.org/docs/latest/tutorial/tutorial_trees.html#reading-and-writing-newick-trees)
28
+ treestr = treestr.replace(';', '%s;' % tree.name) # add root node name by hand (none of the format integers seem to add the root node name)
29
+ ntfile.write(treestr)
30
+
31
+ if args.kdfile is not None:
32
+ with open(args.kdfile, 'wb' if sys.version_info.major < 3 else 'w') as kdfile:
33
+ writer = csv.DictWriter(kdfile, ('uid', 'kd', 'time', 'relative_kd', 'lambda', 'target_index', 'target_distance'))
34
+ writer.writeheader()
35
+ for node in tree.traverse(): # small kd is higher affinity
36
+ if node.name == '':
37
+ continue
38
+ writer.writerow({'uid' : node.name, 'kd' : node.Kd, 'time' : node.time, 'relative_kd' : node.relative_Kd, 'lambda' : node.lambda_, 'target_index' : node.target_index, 'target_distance' : node.target_distance})
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import absolute_import, division, unicode_literals
3
+ from __future__ import print_function
4
+ import glob
5
+ import sys
6
+ import csv
7
+ from io import open
8
+ csv.field_size_limit(sys.maxsize) # make sure we can write very large csv fields
9
+ import os
10
+ import copy
11
+ import argparse
12
+ import colored_traceback.always
13
+ import json
14
+
15
+ # if you move this script, you'll need to change this method of getting the imports
16
+ from pathlib import Path
17
+ partis_dir = str(Path(__file__).parent.parent)
18
+ sys.path.insert(1, partis_dir) # + '/python')
19
+
20
+ import python.utils as utils
21
+ import python.paircluster as paircluster
22
+ import python.glutils as glutils
23
+ from python.clusterpath import ClusterPath
24
+ import python.treeutils as treeutils
25
+
26
+ gctree_outstr = 'gctree.out.inference.1'
27
+
28
+ helpstr = """
29
+ Run partis selection metrics on gctree output dir (gctree docs: https://github.com/matsengrp/gctree/).
30
+ Plots are written to --outdir; it is probably best to start by looking at the summary html with a browser, e.g.: firefox <--outdir>/selection-metrics/plots/inferred-tree-metrics/overview.html.
31
+ Log files are written to <--outdir>/*.log; get-selection-metrics.log has most of the interesting information (view with less -RS).
32
+ Example usage:
33
+ single chain:
34
+ ./bin/read-gctree-output.py --locus igh --gctreedir <gctree-output-dir> --outdir <dir-for-partis-output>
35
+ paired:
36
+ ./bin/read-gctree-output.py --paired-loci --seqfname <fasta-input-file> --gctreedir <gctree-output-dir> --outdir <dir-for-partis-output>
37
+ other args, with examples (see datascripts/meta/taraki-gctree-2021-10/partis-run.py):
38
+ --kdfname /fh/fast/matsen_e/data/taraki-gctree-2021-10/processed-data/determistic/gc1/kdvals.csv
39
+ --tree-basename tree.nwk --kd-columns delta_bind_CGG_FVS_additive --dont-invert-kd --multiplicity-column multiplicity --species mouse --no-insertions-or-deletions
40
+ --initial-germline-dir /home/dralph/work/partis/datascripts/meta/taraki-gctree-2021-10/germlines --parameter-plots
41
+ --slice-bin-fname /home/dralph/work/partis/datascripts/meta/taraki-gctree-2021-10/slice-bins.yaml
42
+ """
43
+ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
44
+ pass
45
+ formatter_class = MultiplyInheritedFormatter
46
+ parser = argparse.ArgumentParser(formatter_class=MultiplyInheritedFormatter, description=helpstr)
47
+ all_actions = ['cache-parameters', 'annotate', 'get-selection-metrics']
48
+ parser.add_argument('--actions', default=':'.join(all_actions), help='colon-separated list of actions to run')
49
+ parser.add_argument('--seqfname', help='Fasta file with input sequences. If single chain, this defaults to the standard location in --gctreedir. If --paired-loci is set, this should include, separately, all heavy and all light sequences, where the two sequences in a pair have identical uids (at least up to the first \'_\').')
50
+ parser.add_argument('--gctreedir', required=True, help='gctree output dir (to get --tree-basename, and maybe abundances.csv, --seqfname).')
51
+ parser.add_argument('--outdir', required=True, help='directory to which to write partis output files')
52
+ parser.add_argument('--input-partition-fname', help='partis style yaml file with a partition grouping seqs into clonal families; if set, input data is assumed to contain many families (if not set, we assume it\'s only one fmaily).')
53
+ parser.add_argument('--paired-loci', action='store_true', help='run on paired heavy/light data')
54
+ parser.add_argument('--locus', choices=utils.loci, help='locus of sequences (required for single chain).')
55
+ parser.add_argument('--kdfname', help='csv file with kd values (and, optionally, multiplicities), with header names as specified in subsequent args.')
56
+ parser.add_argument('--name-column', default='name', help='column name in --kdfname from which to take sequence name')
57
+ parser.add_argument('--kd-columns', default='kd', help='colon-separated list of column name[s] in --kdfname from which to take kd values. If more than one, the values are added.')
58
+ parser.add_argument('--multiplicity-column', help='If set, column name in --kdfname from which to take multiplicity value (which must be >0, i.e. inferred ancestors should have multiplicity 1). If not set, abundances are read from --abundance-basename in --gctreedir and converted to multiplicities.')
59
+ parser.add_argument('--dont-invert-kd', action='store_true', help='by default we invert (take 1/kd) to convert to \'affinity\' (after adding multiple kd columns, if specified), or at least something monotonically increasing with affinity. This skips that step, e.g. if you\'re passing in affinity.')
60
+ parser.add_argument('--species', default='mouse', choices=('human', 'macaque', 'mouse'))
61
+ parser.add_argument('--tree-basename', default='%s.nk'%gctree_outstr, help='basename of tree file to take from --gctreedir') # .1 is the most likely one (all trees are also in the pickle file as ete trees: gctree.out.inference.parsimony_forest.p
62
+ parser.add_argument('--abundance-basename', default='abundances.csv', help='basename of abundance file in --gctreedir. Abundance of 0 (inferred ancestor) is converted to multiplicity of 1. Not used if multiplicities are read from kdfname') # .1 is the most likely one (all trees are also in the pickle file as ete trees: gctree.out.inference.parsimony_forest.p
63
+ parser.add_argument('--dry', action='store_true')
64
+ parser.add_argument('--no-tree-plots', action='store_true')
65
+ parser.add_argument('--parameter-plots', action='store_true')
66
+ parser.add_argument('--no-insertions-or-deletions', action='store_true', help='see partis help')
67
+ parser.add_argument('--n-procs', type=int)
68
+ parser.add_argument('--slice-bin-fname')
69
+ parser.add_argument('--initial-germline-dir', help='see partis help')
70
+ args = parser.parse_args()
71
+ args.actions = utils.get_arg_list(args.actions, choices=all_actions)
72
+ args.kd_columns = utils.get_arg_list(args.kd_columns)
73
+ if args.multiplicity_column is not None and args.kdfname is None:
74
+ raise Exception('have to set --kdfname if --multiplicity-column is set')
75
+ if args.paired_loci:
76
+ assert args.seqfname is not None
77
+ else:
78
+ if args.seqfname is None:
79
+ args.seqfname = '%s/%s.fasta' % (args.gctreedir, gctree_outstr)
80
+ print(' set --seqfname to default location in --gctreedir: %s' % args.seqfname)
81
+ if args.locus is None:
82
+ raise Exception('have to set --locus for single chain')
83
+
84
+ # ----------------------------------------------------------------------------------------
85
+ def metafname():
86
+ return '%s/gctree-meta.yaml' % args.outdir
87
+
88
+ # ----------------------------------------------------------------------------------------
89
+ def run_cmd(action):
90
+ locstr = '--paired-loci' if args.paired_loci else '--locus %s'%args.locus
91
+ # this doesn't work since all gcs together needs 0:1 but single gc runs need 0, and the guessing functionality is working fine atm --droplet-id-separators - --droplet-id-indices 0:1
92
+ # NOTE would maybe be better to not guess pair info?
93
+ cmd = './bin/partis %s %s --species %s --guess-pairing-info --input-metafnames %s' % (action, locstr, args.species, metafname())
94
+ if args.no_insertions_or_deletions:
95
+ cmd += ' --no-insertions-or-deletions'
96
+ if action in ['cache-parameters', 'annotate']:
97
+ cmd += ' --infname %s' % args.seqfname
98
+ if args.paired_loci:
99
+ cmd += ' --paired-outdir %s' % args.outdir
100
+ else:
101
+ cmd += ' --parameter-dir %s/parameters' % args.outdir
102
+ if args.input_partition_fname is None: # one gc at a time
103
+ cmd += ' --all-seqs-simultaneous'
104
+ else: # many gcs together
105
+ cmd += ' --input-partition-fname %s' % args.input_partition_fname
106
+ if action == 'cache-parameters':
107
+ if args.initial_germline_dir is not None:
108
+ cmd += ' --initial-germline-dir %s' % args.initial_germline_dir
109
+ if args.parameter_plots:
110
+ cmd += ' --plotdir %s' % args.outdir
111
+ if action in ['annotate', 'get-selection-metrics'] and '--paired-outdir' not in cmd:
112
+ cmd += ' --%s %s%s' % ('paired-outdir' if args.paired_loci else 'outfname', args.outdir, '' if args.paired_loci else '/partition.yaml')
113
+ if action == 'get-selection-metrics':
114
+ cmd += ' --min-selection-metric-cluster-size 3 --treefname %s/%s --plotdir %s --selection-metrics-to-calculate lbi:aa-lbi:cons-dist-aa:lbr:aa-lbr:lbf:aa-lbf' % (args.gctreedir, args.tree_basename, 'paired-outdir' if args.paired_loci else '%s/selection-metrics/plots'%args.outdir)
115
+ cmd += ' --extra-daffy-metrics lbi:aa-lbi'
116
+ cmd += ' --label-root-node'
117
+ plt_cfg = treeutils.default_plot_cfg + ['distr', 'tree-mut-stats']
118
+ if args.no_tree_plots:
119
+ plt_cfg = [t for t in plt_cfg if t != 'tree']
120
+ cmd += ' --add-selection-metrics-to-outfname --use-droplet-id-for-combo-id --selection-metric-plot-cfg %s' % ':'.join(plt_cfg)
121
+ if args.slice_bin_fname is not None:
122
+ cmd += ' --slice-bin-fname %s' % args.slice_bin_fname
123
+ cmd += ' --choose-all-abs --chosen-ab-fname %s/chosen-abs.csv' % args.outdir # --debug 1
124
+ if args.n_procs is not None:
125
+ cmd += ' --n-procs %d' % args.n_procs
126
+ utils.simplerun(cmd, logfname='%s/%s.log'%(args.outdir, action), dryrun=args.dry)
127
+
128
+ # ----------------------------------------------------------------------------------------
129
+ utils.mkdir(args.outdir)
130
+ metafos = {}
131
+ if args.multiplicity_column is None: # if not set, read abundances from args.abundance_basename
132
+ abfn = '%s/%s' % (args.gctreedir, args.abundance_basename)
133
+ print(' reading abundance info from %s' % abfn)
134
+ with open(abfn) as afile:
135
+ reader = csv.DictReader(afile, fieldnames=('name', 'abundance'))
136
+ for line in reader:
137
+ if line['name'] not in metafos:
138
+ metafos[line['name']] = {}
139
+ metafos[line['name']]['multiplicity'] = max(1, int(line['abundance'])) # increase 0s (inferred ancestors) to 1
140
+ if args.kdfname is not None:
141
+ print(' reading kd info%s from %s' % ('' if args.multiplicity_column is None else ' and multiplicity info', args.kdfname))
142
+ with open(args.kdfname) as kfile:
143
+ reader = csv.DictReader(kfile)
144
+ for line in reader:
145
+ uid = line[args.name_column]
146
+ if uid not in metafos:
147
+ metafos[uid] = {}
148
+ if all(line[k] not in ['None', None, ''] for k in args.kd_columns):
149
+ kdval = sum(float(line[k]) for k in args.kd_columns)
150
+ metafos[uid]['affinity'] = kdval if args.dont_invert_kd else 1. / kdval
151
+ if args.multiplicity_column is not None:
152
+ metafos[uid]['multiplicity'] = int(line[args.multiplicity_column])
153
+
154
+ if args.paired_loci: # convert metafos to per-locus names
155
+ for base_id in list(metafos.keys()):
156
+ for ltmp in utils.sub_loci('ig'):
157
+ new_id = '%s-%s' % (base_id, ltmp)
158
+ metafos[new_id] = metafos[base_id]
159
+ del metafos[base_id]
160
+
161
+ # and write to json/yaml
162
+ print(' writing input meta info to %s' % metafname())
163
+ utils.jsdump(metafname(), metafos)
164
+
165
+ for action in args.actions:
166
+ run_cmd(action)