partis-bcr 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. bin/FastTree +0 -0
  2. bin/add-chimeras.py +59 -0
  3. bin/add-seqs-to-outputs.py +81 -0
  4. bin/bcr-phylo-run.py +799 -0
  5. bin/build.sh +24 -0
  6. bin/cf-alleles.py +97 -0
  7. bin/cf-germlines.py +57 -0
  8. bin/cf-linearham.py +199 -0
  9. bin/chimera-plot.py +76 -0
  10. bin/choose-partially-paired.py +143 -0
  11. bin/circle-plots.py +30 -0
  12. bin/compare-plotdirs.py +298 -0
  13. bin/diff-parameters.py +133 -0
  14. bin/docker-hub-push.sh +6 -0
  15. bin/extract-pairing-info.py +55 -0
  16. bin/gcdyn-simu-run.py +223 -0
  17. bin/gctree-run.py +244 -0
  18. bin/get-naive-probabilities.py +126 -0
  19. bin/iqtree-1.6.12 +0 -0
  20. bin/lonr.r +1020 -0
  21. bin/makeHtml +52 -0
  22. bin/mds-run.py +46 -0
  23. bin/parse-output.py +277 -0
  24. bin/partis +1869 -0
  25. bin/partis-pip +116 -0
  26. bin/partis.py +1869 -0
  27. bin/plot-gl-set-trees.py +519 -0
  28. bin/plot-hmms.py +151 -0
  29. bin/plot-lb-tree.py +427 -0
  30. bin/raxml-ng +0 -0
  31. bin/read-bcr-phylo-trees.py +38 -0
  32. bin/read-gctree-output.py +166 -0
  33. bin/run-chimeras.sh +64 -0
  34. bin/run-dtr-scan.sh +25 -0
  35. bin/run-paired-loci.sh +100 -0
  36. bin/run-tree-metrics.sh +88 -0
  37. bin/smetric-run.py +62 -0
  38. bin/split-loci.py +317 -0
  39. bin/swarm-2.1.13-linux-x86_64 +0 -0
  40. bin/test-germline-inference.py +425 -0
  41. bin/tree-perf-run.py +194 -0
  42. bin/vsearch-2.4.3-linux-x86_64 +0 -0
  43. bin/vsearch-2.4.3-macos-x86_64 +0 -0
  44. bin/xvfb-run +194 -0
  45. partis_bcr-1.0.1.data/scripts/cf-alleles.py +97 -0
  46. partis_bcr-1.0.1.data/scripts/cf-germlines.py +57 -0
  47. partis_bcr-1.0.1.data/scripts/extract-pairing-info.py +55 -0
  48. partis_bcr-1.0.1.data/scripts/gctree-run.py +244 -0
  49. partis_bcr-1.0.1.data/scripts/parse-output.py +277 -0
  50. partis_bcr-1.0.1.data/scripts/split-loci.py +317 -0
  51. partis_bcr-1.0.1.data/scripts/test.py +1005 -0
  52. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/METADATA +1 -1
  53. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/RECORD +101 -50
  54. partis_bcr-1.0.1.dist-info/top_level.txt +1 -0
  55. {partis → python}/glutils.py +1 -1
  56. python/main.py +30 -0
  57. {partis → python}/plotting.py +10 -1
  58. {partis → python}/treeutils.py +18 -16
  59. {partis → python}/utils.py +14 -7
  60. partis/main.py +0 -59
  61. partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
  62. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/WHEEL +0 -0
  63. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/entry_points.txt +0 -0
  64. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/licenses/COPYING +0 -0
  65. {partis → python}/__init__.py +0 -0
  66. {partis → python}/alleleclusterer.py +0 -0
  67. {partis → python}/allelefinder.py +0 -0
  68. {partis → python}/alleleremover.py +0 -0
  69. {partis → python}/annotationclustering.py +0 -0
  70. {partis → python}/baseutils.py +0 -0
  71. {partis → python}/cache/__init__.py +0 -0
  72. {partis → python}/cache/cached_uncertainties.py +0 -0
  73. {partis → python}/clusterpath.py +0 -0
  74. {partis → python}/coar.py +0 -0
  75. {partis → python}/corrcounter.py +0 -0
  76. {partis → python}/datautils.py +0 -0
  77. {partis → python}/event.py +0 -0
  78. {partis → python}/fraction_uncertainty.py +0 -0
  79. {partis → python}/gex.py +0 -0
  80. {partis → python}/glomerator.py +0 -0
  81. {partis → python}/hist.py +0 -0
  82. {partis → python}/hmmwriter.py +0 -0
  83. {partis → python}/hutils.py +0 -0
  84. {partis → python}/indelutils.py +0 -0
  85. {partis → python}/lbplotting.py +0 -0
  86. {partis → python}/mds.py +0 -0
  87. {partis → python}/mutefreqer.py +0 -0
  88. {partis → python}/paircluster.py +0 -0
  89. {partis → python}/parametercounter.py +0 -0
  90. {partis → python}/paramutils.py +0 -0
  91. {partis → python}/partitiondriver.py +0 -0
  92. {partis → python}/partitionplotter.py +0 -0
  93. {partis → python}/performanceplotter.py +0 -0
  94. {partis → python}/plotconfig.py +0 -0
  95. {partis → python}/processargs.py +0 -0
  96. {partis → python}/prutils.py +0 -0
  97. {partis → python}/recombinator.py +0 -0
  98. {partis → python}/scanplot.py +0 -0
  99. {partis → python}/seqfileopener.py +0 -0
  100. {partis → python}/treegenerator.py +0 -0
  101. {partis → python}/viterbicluster.py +0 -0
  102. {partis → python}/vrc01.py +0 -0
  103. {partis → python}/waterer.py +0 -0
@@ -0,0 +1,298 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import absolute_import, division, unicode_literals
3
+ from __future__ import print_function
4
+ import argparse
5
+ from collections import OrderedDict
6
+ import os
7
+ import glob
8
+ import sys
9
+ import colored_traceback.always
10
+ import copy
11
+ from pathlib import Path
12
+ partis_dir = str(Path(__file__).parent.parent)
13
+ sys.path.insert(1, partis_dir) # + '/python')
14
+
15
+ import python.plotconfig as plotconfig
16
+ import python.plotting as plotting
17
+ import python.utils as utils
18
+ import python.glutils as glutils
19
+ from python.hist import Hist
20
+ import python.treeutils as treeutils
21
+
22
+ xtitledict = copy.deepcopy(plotting.legends)
23
+ xtitledict.update(plotconfig.xtitles)
24
+ xtitledict.update(treeutils.legtexts)
25
+
26
+ ptitledict = copy.deepcopy(plotting.legends)
27
+ ptitledict.update(plotconfig.plot_titles)
28
+ ptitledict.update(treeutils.legtexts)
29
+
30
+ # ----------------------------------------------------------------------------------------
31
+ def get_hists_from_dir(dirname, histname, string_to_ignore=None):
32
+ hists = {}
33
+ for fname in glob.glob('%s/%s' % (dirname, args.file_glob_str)):
34
+ varname = os.path.basename(fname)
35
+ for rstr in args.file_replace_strs:
36
+ varname = varname.replace(rstr, '')
37
+ if string_to_ignore is not None:
38
+ varname = varname.replace(string_to_ignore, '')
39
+ hists[varname] = Hist(fname=fname, title=histname)
40
+ if len(hists) == 0:
41
+ print(' no csvs found%s in %s' % ('' if args.file_glob_str is None else ' with --file-glob-str \'%s\''%args.file_glob_str, dirname))
42
+ return hists
43
+
44
+
45
+ # ----------------------------------------------------------------------------------------
46
+ def compare_directories(args, plotdirlist, outdir):
47
+ utils.prep_dir(outdir, wildlings=['*.png', '*.svg', '*.csv'])
48
+
49
+ # read hists from <plotdirlist>
50
+ allhists = OrderedDict()
51
+ allvars = set() # all variables that appeared in any dir
52
+ for idir in range(len(plotdirlist)):
53
+ dirhists = get_hists_from_dir(plotdirlist[idir], args.names[idir])
54
+ allvars |= set(dirhists.keys())
55
+ allhists[args.names[idir]] = dirhists
56
+ # then loop over all the <varname>s we found
57
+ for varname in allvars:
58
+ hlist = [allhists[dname].get(varname, Hist(1, 0, 1, title='null')) for dname in allhists]
59
+ plot_single_variable(args, varname, hlist, outdir, pathnameclues=plotdirlist[0])
60
+
61
+ plotting.make_html(outdir, n_columns=4)
62
+
63
+ # ----------------------------------------------------------------------------------------
64
+ def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
65
+ if varname in plotconfig.gene_usage_columns:
66
+ hlist = plotting.add_bin_labels_not_in_all_hists(hlist)
67
+
68
+ no_labels = False
69
+ xline, bounds, figsize = None, None, None
70
+ stats = args.extra_stats
71
+ translegend = [0.0, -0.2]
72
+ xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
73
+ bounds, xticks, xticklabels = args.xbounds, args.xticks, None
74
+ if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's ''
75
+ xtitle = None
76
+ if '-mean-bins' in varname:
77
+ raise Exception('darn, I was hoping I wasn\'t making these plots any more')
78
+ plottitle = args.plottitle
79
+ if plottitle is None:
80
+ plottitle = ptitledict.get(varname, varname)
81
+
82
+ ytitle = 'fraction of total' if args.normalize else 'counts'
83
+
84
+ if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
85
+ assert not args.normalize
86
+ ytitle = 'mutation freq'
87
+
88
+ if varname in plotconfig.gene_usage_columns:
89
+ xtitle = 'allele'
90
+ if hlist[0].n_bins == 2:
91
+ stats = '0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
92
+ xtitle = None
93
+ # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
94
+ # xtitle = '???'
95
+
96
+ line_width_override = None
97
+ if args.performance_plots:
98
+ if 'hamming_to_true_naive' in varname:
99
+ xtitle = 'hamming distance'
100
+ if '_normed' in varname:
101
+ xtitle = 'fractional ' + xtitle
102
+ elif '_vs_mute_freq' in varname:
103
+ xtitle = 'mutation freq'
104
+ ytitle = 'fraction correct'
105
+ if varname[0] == 'v' or varname[0] == 'j':
106
+ translegend = [-0.4, -0.4]
107
+ elif varname.find('_gene') == 1:
108
+ xtitle = ''
109
+ ytitle = 'fraction correct'
110
+ else:
111
+ xtitle = 'inferred - true'
112
+ bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
113
+ else:
114
+ if bounds is None:
115
+ bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
116
+ if bounds is None and 'insertion' in varname:
117
+ bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
118
+ if varname in plotconfig.gene_usage_columns:
119
+ # no_labels = True # not sure why i wanted these labels turned off?
120
+ if 'j_' not in varname:
121
+ figsize = (10, 5)
122
+ line_width_override = 1
123
+ elif 'per-gene-per-position/v' in pathnameclues:
124
+ figsize = (20, 5)
125
+ if bounds is None:
126
+ bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None)
127
+
128
+ if 'IG' in varname or 'TR' in varname:
129
+ if 'mute-freqs' in pathnameclues:
130
+ gene = utils.unsanitize_name(varname)
131
+ plottitle = gene # + ' -- mutation frequency'
132
+ xtitle = 'position'
133
+ if utils.get_region(gene) == 'j':
134
+ translegend = [0.1, 0.] #(-0.35, -0.02)
135
+ else:
136
+ translegend = [0.15, -0.02]
137
+ xline = None
138
+ if args.glfo is not None:
139
+ if utils.get_region(gene) in utils.conserved_codons[args.locus]:
140
+ xline = args.glfo[utils.conserved_codons[args.locus][utils.get_region(gene)] + '-positions'][gene]
141
+ else:
142
+ ilastdash = varname.rfind('-')
143
+ gene = utils.unsanitize_name(varname[:ilastdash])
144
+ base_varname = varname[ilastdash + 1 :]
145
+ base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
146
+ plottitle = gene + ' -- ' + base_plottitle
147
+
148
+ if varname == 'cluster-sizes':
149
+ xtitle = 'cluster size'
150
+ ytitle = 'fraction of clusters' if args.normalize else 'N clusters'
151
+ plottitle = ''
152
+ xticks, xticklabels = plotting.get_cluster_size_xticks(hlist=hlist) # it would be better to use all the hists, but i think it'll just screw up the ticks
153
+ import matplotlib.pyplot as plt
154
+ if varname in ['func-per-drop', 'nonfunc-per-drop']:
155
+ bounds = (-0.5, 15.5)
156
+ if 'subtree-purity' in varname:
157
+ if 'size' in varname:
158
+ if args.log == '':
159
+ args.log = 'xy'
160
+ xticks = [1, 2, 3, 5, 10, 15, 20]
161
+ xticklabels = ['1', '2', '3', '5', '10', '15', '20']
162
+
163
+ if xtitle is None:
164
+ xtitle = xtitledict.get(varname)
165
+
166
+ if args.add_to_title is not None:
167
+ plottitle += args.add_to_title
168
+
169
+ if len(hlist) > 9: # skootch it down so they (maybe) all fit
170
+ translegend[1] -= 0.5
171
+ if args.translegend is not None: # override with the command line
172
+ translegend = args.translegend
173
+ if varname == 'paired-uids-per-uid':
174
+ translegend = [translegend[0] + 0.15, translegend[1] - 0.3]
175
+ if args.extra_stats == 'auto': # kind of hackey
176
+ if xtitle == 'inferred - true':
177
+ stats = 'absmean'
178
+ else:
179
+ stats = 'mean'
180
+ # draw that little #$*(!
181
+ linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
182
+ if args.alphas is None or len(args.alphas) != len(hlist):
183
+ if args.alphas is not None and len(args.alphas) != len(hlist):
184
+ print(' %s --alphas wrong length, using first entry for all' % utils.wrnstr())
185
+ args.alphas = [0.6 if args.alphas is None else args.alphas[0] for _ in range(len(hlist))]
186
+ shift_overflows = os.path.basename(outdir) != 'gene-call' and 'func-per-drop' not in varname
187
+ plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=stats, bounds=bounds, ybounds=args.ybounds,
188
+ shift_overflows=shift_overflows, plottitle=plottitle, colors=args.colors,
189
+ xtitle=xtitle if args.xtitle is None else args.xtitle, ytitle=ytitle if args.ytitle is None else args.ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
190
+ linewidths=linewidths, markersizes=args.markersizes, alphas=args.alphas, errors=not args.no_errors, remove_empty_bins=True, #='y' in args.log,
191
+ figsize=figsize, no_labels=no_labels, log=args.log, translegend=translegend, xticks=xticks, xticklabels=xticklabels, square_bins=args.square_bins)
192
+
193
+ if args.swarm_meta_key is not None:
194
+ plotvals = {h.title : [h.get_bin_centers()[i] for i in h.ibiniter(True) for _ in range(int(h.bin_contents[i]))] for h in hlist}
195
+ plotting.stack_meta_hists(varname, outdir, args.swarm_meta_key, plotvals, colors={h.title : c for h, c in zip(hlist, args.colors)}, xtitle=xtitle, swarm_plots=True, no_hist=True, xticks=xticks)
196
+
197
+ # ----------------------------------------------------------------------------------------
198
+ helpstr = """
199
+ Compare csv histogram plot files across multiple directories
200
+ ./bin/compare-plotdirs.py --outdir _output/tmp-plots --plotdirs docs/example-plots/sw/mute-freqs/overall:docs/example-plots/hmm/mute-freqs/overall:docs/example-plots/multi-hmm/mute-freqs/overall --names sw:hmm:multi-hmm --normalize
201
+ """
202
+ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
203
+ pass
204
+ formatter_class = MultiplyInheritedFormatter
205
+ parser = argparse.ArgumentParser(formatter_class=MultiplyInheritedFormatter, description=helpstr)
206
+ parser.add_argument('--outdir', required=True, help='Output directory to which to write the resulting comparison plots. A summary .html file is also written to <outdir>.html')
207
+ parser.add_argument('--plotdirs', required=True, help='Colon-separated list of input plot directories, each of which must have identical structure. Looks for svgs first in each dir, but then also in the subdirs of each dir (so e.g. if each of them have a/, b/, and c/ subdirs, this script will make a separate comparison of a/, b/, and c/)')
208
+ parser.add_argument('--names', required=True, help='colon-separated list of names/labels corresponding to --plotdirs (use @ as space)')
209
+ parser.add_argument('--performance-plots', action='store_true', help='set to true if these are annotation performance plots, i.e. made with --plot-annotation-performance (this makes the axis labels more sensible)')
210
+ parser.add_argument('--colors', default=':'.join(plotting.default_colors), help='color-separated list of colors to cycle through for the plotdirs')
211
+ parser.add_argument('--alphas')
212
+ parser.add_argument('--linewidths', default=':'.join(plotting.default_linewidths), help='colon-separated list of linewidths to cycle through')
213
+ parser.add_argument('--markersizes', default=':'.join(plotting.default_markersizes), help='colon-separated list of linewidths to cycle through')
214
+ parser.add_argument('--gldirs', help='On plots showing mutation vs individual gene positions, if you\'d like a dashed veritcal line showing conserved codon positions, set this as a colon-separated list of germline info dirs corresponding to each plotdir') #, default=['data/germlines/human'])
215
+ parser.add_argument('--locus', default='igh')
216
+ parser.add_argument('--normalize', action='store_true', help='If set, the histograms from each plotdir are normalized (each bin contents divided by the integral) before making the comparison (e.g. for comparing different size samples).')
217
+ parser.add_argument('--extra-stats', help='if set, adds extra stat to legend, e.g. \'mean\', \'absmean\', \'auto\'')
218
+ parser.add_argument('--translegend', help='colon-separated list of x, y values with which to translate all the legends')
219
+ parser.add_argument('--log', default='', help='Display these axes on a log scale, set to either \'x\', \'y\', or \'xy\'')
220
+ parser.add_argument('--make-parent-html', action='store_true', help='after doing everything within subdirs, make a single html in the main/parent dir with all plots from subdirs')
221
+ parser.add_argument('--add-to-title', help='string to append to existing title (use @ as space)')
222
+ parser.add_argument('--file-glob-str', default='*.csv', help='shell glob style regex for matching plot files')
223
+ parser.add_argument('--file-replace-strs', default='.csv', help='colon-separated list of strings to remove frome file base name to get variable name')
224
+ parser.add_argument('--xbounds')
225
+ parser.add_argument('--ybounds')
226
+ parser.add_argument('--xticks')
227
+ parser.add_argument('--plottitle')
228
+ parser.add_argument('--xtitle')
229
+ parser.add_argument('--ytitle')
230
+ parser.add_argument('--no-errors', action='store_true')
231
+ parser.add_argument('--single-plotdir', action='store_true')
232
+ parser.add_argument('--square-bins', action='store_true')
233
+ parser.add_argument('--swarm-meta-key', help='if set, also make swarm plots, pretending that each hist\'s title is the value for this "fake" meta info key, and treat each bin\'s entries as observations at the bin center\'s value')
234
+
235
+ args = parser.parse_args()
236
+ args.plotdirs = utils.get_arg_list(args.plotdirs)
237
+ args.names = utils.get_arg_list(args.names)
238
+ args.alphas = utils.get_arg_list(args.alphas, floatify=True)
239
+ args.colors = utils.get_arg_list(args.colors)
240
+ args.linewidths = utils.get_arg_list(args.linewidths, intify=True)
241
+ args.markersizes = utils.get_arg_list(args.markersizes, intify=True)
242
+ args.gldirs = utils.get_arg_list(args.gldirs)
243
+ args.translegend = utils.get_arg_list(args.translegend, floatify=True)
244
+ args.xbounds = utils.get_arg_list(args.xbounds, floatify=True)
245
+ args.ybounds = utils.get_arg_list(args.ybounds, floatify=True)
246
+ args.xticks = utils.get_arg_list(args.xticks, floatify=True)
247
+ args.file_replace_strs = utils.get_arg_list(args.file_replace_strs)
248
+ for iname in range(len(args.names)):
249
+ args.names[iname] = args.names[iname].replace('@', ' ')
250
+ if args.add_to_title is not None:
251
+ args.add_to_title = args.add_to_title.replace('@', ' ')
252
+
253
+ if len(args.plotdirs) == 1 and not args.single_plotdir:
254
+ print(' --plotdirs is length 1 (and --single-plotdir wasn\'t set), so assuming --names has the desired subdirs')
255
+ parentdir = args.plotdirs[0]
256
+ args.plotdirs = [parentdir + '/' + n for n in args.names]
257
+
258
+ if len(args.plotdirs) != len(args.names):
259
+ raise Exception('poorly formatted args:\n %s\n %s' % (' '.join(args.plotdirs), ' '.join(args.names)))
260
+
261
+ # make a merged glfo from all the gldirs
262
+ args.glfo = None
263
+ if args.gldirs is not None:
264
+ for gldir in [gd for gd in args.gldirs if os.path.exists(gd)]:
265
+ tmpglfo = glutils.read_glfo(gldir, args.locus)
266
+ if args.glfo is None:
267
+ args.glfo = tmpglfo
268
+ else:
269
+ args.glfo = glutils.get_merged_glfo(args.glfo, tmpglfo)
270
+
271
+ if any(not os.path.isdir(d) for d in args.plotdirs):
272
+ print(' at least one of --plotdirs doesn\'t exist: %s' % ' '.join(d for d in args.plotdirs if not os.path.isdir(d)))
273
+ sys.exit(0)
274
+
275
+ listof_plotdirlists, listof_outdirs = [], []
276
+ # first add the main/parent dir, if it has csvs
277
+ firstdir = args.plotdirs[0]
278
+ if len(glob.glob(firstdir + '/*.csv')) > 0:
279
+ listof_plotdirlists.append(args.plotdirs)
280
+ listof_outdirs.append(args.outdir)
281
+ else:
282
+ print(' no csvs in main/parent dir %s' % firstdir)
283
+ # then figure out if there's subdirs we need to deal with
284
+ added_subds = []
285
+
286
+ for subdir in [d for d in os.listdir(firstdir) if os.path.isdir(firstdir + '/' + d)]:
287
+ listof_plotdirlists.append([d + '/' + subdir for d in args.plotdirs])
288
+ listof_outdirs.append(args.outdir + '/' + subdir)
289
+ added_subds.append(subdir)
290
+ if len(added_subds) > 0:
291
+ print(' added %d subdirs: %s' % (len(added_subds), ' '.join(added_subds)))
292
+
293
+ for dlist, outdir in zip(listof_plotdirlists, listof_outdirs):
294
+ compare_directories(args, dlist, outdir)
295
+
296
+ if args.make_parent_html: # didn't really test this very well
297
+ fnoutstr, _ = utils.simplerun('find %s -type f -name *.svg' % args.outdir, return_out_err=True)
298
+ plotting.make_html(args.outdir, fnames=[fnoutstr.strip().split('\n')])
bin/diff-parameters.py ADDED
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Replacement for diff -qr -x\'*.svg\' -x params -x plots.html <arg1> <arg2>, but with more intelligent
4
+ control of yamls (e.g. floating point precision) and csvs (e.g. line order)
5
+ """
6
+
7
+ from __future__ import absolute_import, division, unicode_literals
8
+ from __future__ import print_function
9
+ import argparse
10
+ import os
11
+ import sys
12
+ import csv
13
+ import re
14
+ import yaml
15
+ from subprocess import check_output
16
+ from io import open
17
+
18
+ # current_script_dir = os.path.dirname(os.path.realpath(__file__)).replace('/bin', '') #'/python')
19
+ # if not os.path.exists(current_script_dir):
20
+ # print 'WARNING current script dir %s doesn\'t exist, so python path may not be correctly set' % current_script_dir
21
+ # sys.path.insert(1, current_script_dir)
22
+
23
+ parser = argparse.ArgumentParser()
24
+ parser.add_argument('arg1')
25
+ parser.add_argument('arg2')
26
+ parser.add_argument('--keep-going', action='store_true', help='Don\'t fail on differences, instead just keep on chugging')
27
+ parser.add_argument('--precision', type=int, default=9, help='number of digits after the decimal place to keep when comparing floating point numbers')
28
+ args = parser.parse_args()
29
+
30
+ if os.path.isdir(args.arg1): # can either pass arg[12] as directories in which to look
31
+ args.dir1 = args.arg1
32
+ assert os.path.isdir(args.arg2)
33
+ args.dir2 = args.arg2
34
+ args.fname = None
35
+ else: # ...or as single files
36
+ args.dir1 = os.path.dirname(args.arg1)
37
+ assert os.path.exists(args.dir1)
38
+ args.dir2 = os.path.dirname(args.arg2)
39
+ assert os.path.exists(args.dir2)
40
+ args.fname = os.path.basename(args.arg1)
41
+ assert args.fname == os.path.basename(args.arg2)
42
+
43
+ def reduce_float_precision(line):
44
+ regex = '([0-9]\.' + args.precision*'[0-9]' + ')([0-9][0-9]*)'
45
+ match = re.search(regex, line)
46
+ while match is not None:
47
+ left, right = match.groups() # left-hand (more significant) and right-hand parts of number
48
+ assert '.' in left
49
+ clipped_left = left # clip trailing zeros in decimal numbers
50
+ while clipped_left[-1] == '0':
51
+ clipped_left = clipped_left[:-1]
52
+ line = line.replace(left + right, clipped_left)
53
+ match = re.search(regex, line)
54
+ return line
55
+
56
+ def get_lines(fname, reduce_precision=False):
57
+ try:
58
+ with open(fname) as infile:
59
+ lines = sorted(infile.readlines())
60
+ if reduce_precision:
61
+ for iline in range(len(lines)):
62
+ lines[iline] = reduce_float_precision(lines[iline])
63
+ return lines
64
+
65
+ except IOError:
66
+ raise Exception(os.path.basename(fname) + ' not found in ' + os.path.dirname(fname))
67
+
68
+ def check_lines(lines1, lines2):
69
+ difflines = []
70
+ for line in lines1:
71
+ if line not in lines2:
72
+ difflines.append(line)
73
+ # print 'not in', args.dir2, ':\n ', line
74
+ return difflines
75
+
76
+ def check_textfile(fname):
77
+ lines1 = get_lines(args.dir1 + fname, reduce_precision=True)
78
+ lines2 = get_lines(args.dir2 + fname, reduce_precision=True)
79
+ difflines1 = check_lines(lines1, lines2)
80
+ difflines2 = check_lines(lines2, lines1)
81
+ if len(difflines1 + difflines2) > 0:
82
+ print('differing lines from', args.dir1 + fname)
83
+ for line in difflines1:
84
+ print(' ', line.strip())
85
+ print('differing lines from', args.dir2 + fname)
86
+ for line in difflines2:
87
+ print(' ', line.strip())
88
+ if not args.keep_going:
89
+ sys.exit(1)
90
+
91
+ # def diff_models(model1, model2):
92
+ # for state in model1.states:
93
+ # if state
94
+ # for transition in
95
+ # sys.exit()
96
+
97
+ # def check_yaml(fname):
98
+ # print fname
99
+ # with open(args.dir1 + fname) as infile1:
100
+ # with open(args.dir2 + fname) as infile2:
101
+ # model1 = yaml.load(infile1)
102
+ # model2 = yaml.load(infile2)
103
+ # diff_models(model1, model2)
104
+ # diff_models(model2, model1)
105
+
106
+ def get_file_list(extension=''):
107
+ cmd = ['find', args.dir1]
108
+ if extension == '':
109
+ cmd += ['-type', 'f']
110
+ else:
111
+ cmd += ['-name', '*.' + extension]
112
+ output = check_output(cmd, universal_newlines=True)
113
+ return output.replace(args.dir1, '').split()
114
+
115
+ # ----------------------------------------------------------------------------------------
116
+ # for fname in get_file_list('csv'):
117
+ # check_textfile(fname)
118
+ # for fname in get_file_list('yaml'):
119
+ # check_textfile(fname)
120
+
121
+ if args.fname is None:
122
+ filelist = get_file_list()
123
+ else:
124
+ filelist = ['/' + args.fname, ]
125
+ for fname in filelist:
126
+ if '.csv' in fname or '.yaml' in fname:
127
+ check_textfile(fname)
128
+ elif '.svg' in fname or '.html' in fname:
129
+ continue
130
+ else:
131
+ raise Exception(fname + ' has an extension I can\'t handle')
132
+
133
+ sys.exit(0)
bin/docker-hub-push.sh ADDED
@@ -0,0 +1,6 @@
1
+ #!/bin/bash
2
+
3
+ sudo docker build --tag psathyrella/partis .
4
+ # sudo docker tag <local tag> psathyrella/partis
5
+ sudo docker login -u psathyrella
6
+ sudo docker push psathyrella/partis
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import absolute_import, division, unicode_literals
3
+ from __future__ import print_function
4
+ import csv
5
+ import os
6
+ import sys
7
+ from io import open
8
+ csv.field_size_limit(sys.maxsize) # make sure we can write very large csv fields
9
+ import argparse
10
+ import colored_traceback.always
11
+ import yaml
12
+ import json
13
+ import operator
14
+ import random
15
+ import numpy
16
+ from pathlib import Path
17
+
18
+ # if you move this script, you'll need to change this method of getting the imports
19
+ partis_dir = str(Path(__file__).parent.parent)
20
+ sys.path.insert(1, partis_dir) # + '/python')
21
+
22
+ import python.utils as utils
23
+
24
+ dstr = """
25
+ Extract heavy/light chain pairing info from fasta file <infname> and write it to yaml/json file <outfname>.
26
+ Should have the same effect as setting --guess-pairing-info when running bin/split-loci.py.
27
+ """
28
+ parser = argparse.ArgumentParser(description=dstr,
29
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter) # why tf isn't this printing the defaults?
30
+ parser.add_argument('infname')
31
+ parser.add_argument('outfname')
32
+ parser.add_argument('--droplet-id-separators', help=utils.did_help['seps'])
33
+ parser.add_argument('--droplet-id-indices', help=utils.did_help['indices'])
34
+ parser.add_argument('--overwrite', action='store_true')
35
+ parser.add_argument('--for-testing-n-max-queries', type=int, default=-1, help='only for testing, applied when reading initial fasta file, just in case it\'s huge and you want to run quickly without having to read the whole file')
36
+ parser.add_argument('--n-max-queries', type=int, default=-1, help='see partis help (although here it applies to droplets, not individual seqs)')
37
+ parser.add_argument('--n-random-queries', type=int, help='see partis help (although here it applies to droplets, not individual seqs)')
38
+ parser.add_argument('--input-metafname', help='json/yaml file with additional (beyond pairing info) input meta info (see partis help)')
39
+ parser.add_argument('--random-seed', type=int, default=1)
40
+ args = parser.parse_args()
41
+ random.seed(args.random_seed)
42
+ numpy.random.seed(args.random_seed)
43
+ args.droplet_id_indices = utils.get_arg_list(args.droplet_id_indices, intify=True)
44
+
45
+ if utils.output_exists(args, args.outfname, offset=4, debug=False):
46
+ print(' extract-pairing-info.py output exists and --overwrite was not set, so not doing anything: %s' % args.outfname)
47
+ sys.exit(0)
48
+
49
+ seqfos = utils.read_fastx(args.infname, n_max_queries=args.for_testing_n_max_queries)
50
+ if args.n_max_queries != -1 or args.n_random_queries is not None:
51
+ seqfos = utils.subset_paired_queries(seqfos, args.droplet_id_separators, args.droplet_id_indices, n_max_queries=args.n_max_queries, n_random_queries=args.n_random_queries)
52
+ metafos = utils.extract_pairing_info(seqfos, droplet_id_separators=args.droplet_id_separators, droplet_id_indices=args.droplet_id_indices, input_metafname=args.input_metafname)
53
+
54
+ utils.mkdir(args.outfname, isfile=True)
55
+ utils.jsdump(args.outfname, metafos)