partis-bcr 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bin/FastTree +0 -0
- bin/add-chimeras.py +59 -0
- bin/add-seqs-to-outputs.py +81 -0
- bin/bcr-phylo-run.py +799 -0
- bin/build.sh +24 -0
- bin/cf-alleles.py +97 -0
- bin/cf-germlines.py +57 -0
- bin/cf-linearham.py +199 -0
- bin/chimera-plot.py +76 -0
- bin/choose-partially-paired.py +143 -0
- bin/circle-plots.py +30 -0
- bin/compare-plotdirs.py +298 -0
- bin/diff-parameters.py +133 -0
- bin/docker-hub-push.sh +6 -0
- bin/extract-pairing-info.py +55 -0
- bin/gcdyn-simu-run.py +223 -0
- bin/gctree-run.py +244 -0
- bin/get-naive-probabilities.py +126 -0
- bin/iqtree-1.6.12 +0 -0
- bin/lonr.r +1020 -0
- bin/makeHtml +52 -0
- bin/mds-run.py +46 -0
- bin/parse-output.py +277 -0
- bin/partis +1869 -0
- bin/partis-pip +116 -0
- bin/partis.py +1869 -0
- bin/plot-gl-set-trees.py +519 -0
- bin/plot-hmms.py +151 -0
- bin/plot-lb-tree.py +427 -0
- bin/raxml-ng +0 -0
- bin/read-bcr-phylo-trees.py +38 -0
- bin/read-gctree-output.py +166 -0
- bin/run-chimeras.sh +64 -0
- bin/run-dtr-scan.sh +25 -0
- bin/run-paired-loci.sh +100 -0
- bin/run-tree-metrics.sh +88 -0
- bin/smetric-run.py +62 -0
- bin/split-loci.py +317 -0
- bin/swarm-2.1.13-linux-x86_64 +0 -0
- bin/test-germline-inference.py +425 -0
- bin/tree-perf-run.py +194 -0
- bin/vsearch-2.4.3-linux-x86_64 +0 -0
- bin/vsearch-2.4.3-macos-x86_64 +0 -0
- bin/xvfb-run +194 -0
- partis_bcr-1.0.2.data/scripts/cf-alleles.py +97 -0
- partis_bcr-1.0.2.data/scripts/cf-germlines.py +57 -0
- partis_bcr-1.0.2.data/scripts/extract-pairing-info.py +55 -0
- partis_bcr-1.0.2.data/scripts/gctree-run.py +244 -0
- partis_bcr-1.0.2.data/scripts/parse-output.py +277 -0
- partis_bcr-1.0.2.data/scripts/split-loci.py +317 -0
- partis_bcr-1.0.2.data/scripts/test.py +1005 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/METADATA +1 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/RECORD +101 -51
- partis_bcr-1.0.2.dist-info/top_level.txt +1 -0
- {partis → python}/glutils.py +1 -1
- python/main.py +30 -0
- {partis → python}/plotting.py +10 -1
- {partis → python}/treeutils.py +18 -16
- {partis → python}/utils.py +14 -7
- packages/ham/bcrham +0 -0
- partis/main.py +0 -59
- partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/WHEEL +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/entry_points.txt +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/licenses/COPYING +0 -0
- {partis → python}/__init__.py +0 -0
- {partis → python}/alleleclusterer.py +0 -0
- {partis → python}/allelefinder.py +0 -0
- {partis → python}/alleleremover.py +0 -0
- {partis → python}/annotationclustering.py +0 -0
- {partis → python}/baseutils.py +0 -0
- {partis → python}/cache/__init__.py +0 -0
- {partis → python}/cache/cached_uncertainties.py +0 -0
- {partis → python}/clusterpath.py +0 -0
- {partis → python}/coar.py +0 -0
- {partis → python}/corrcounter.py +0 -0
- {partis → python}/datautils.py +0 -0
- {partis → python}/event.py +0 -0
- {partis → python}/fraction_uncertainty.py +0 -0
- {partis → python}/gex.py +0 -0
- {partis → python}/glomerator.py +0 -0
- {partis → python}/hist.py +0 -0
- {partis → python}/hmmwriter.py +0 -0
- {partis → python}/hutils.py +0 -0
- {partis → python}/indelutils.py +0 -0
- {partis → python}/lbplotting.py +0 -0
- {partis → python}/mds.py +0 -0
- {partis → python}/mutefreqer.py +0 -0
- {partis → python}/paircluster.py +0 -0
- {partis → python}/parametercounter.py +0 -0
- {partis → python}/paramutils.py +0 -0
- {partis → python}/partitiondriver.py +0 -0
- {partis → python}/partitionplotter.py +0 -0
- {partis → python}/performanceplotter.py +0 -0
- {partis → python}/plotconfig.py +0 -0
- {partis → python}/processargs.py +0 -0
- {partis → python}/prutils.py +0 -0
- {partis → python}/recombinator.py +0 -0
- {partis → python}/scanplot.py +0 -0
- {partis → python}/seqfileopener.py +0 -0
- {partis → python}/treegenerator.py +0 -0
- {partis → python}/viterbicluster.py +0 -0
- {partis → python}/vrc01.py +0 -0
- {partis → python}/waterer.py +0 -0
bin/compare-plotdirs.py
ADDED
@@ -0,0 +1,298 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
from __future__ import absolute_import, division, unicode_literals
|
3
|
+
from __future__ import print_function
|
4
|
+
import argparse
|
5
|
+
from collections import OrderedDict
|
6
|
+
import os
|
7
|
+
import glob
|
8
|
+
import sys
|
9
|
+
import colored_traceback.always
|
10
|
+
import copy
|
11
|
+
from pathlib import Path
|
12
|
+
partis_dir = str(Path(__file__).parent.parent)
|
13
|
+
sys.path.insert(1, partis_dir) # + '/python')
|
14
|
+
|
15
|
+
import python.plotconfig as plotconfig
|
16
|
+
import python.plotting as plotting
|
17
|
+
import python.utils as utils
|
18
|
+
import python.glutils as glutils
|
19
|
+
from python.hist import Hist
|
20
|
+
import python.treeutils as treeutils
|
21
|
+
|
22
|
+
xtitledict = copy.deepcopy(plotting.legends)
|
23
|
+
xtitledict.update(plotconfig.xtitles)
|
24
|
+
xtitledict.update(treeutils.legtexts)
|
25
|
+
|
26
|
+
ptitledict = copy.deepcopy(plotting.legends)
|
27
|
+
ptitledict.update(plotconfig.plot_titles)
|
28
|
+
ptitledict.update(treeutils.legtexts)
|
29
|
+
|
30
|
+
# ----------------------------------------------------------------------------------------
|
31
|
+
def get_hists_from_dir(dirname, histname, string_to_ignore=None):
|
32
|
+
hists = {}
|
33
|
+
for fname in glob.glob('%s/%s' % (dirname, args.file_glob_str)):
|
34
|
+
varname = os.path.basename(fname)
|
35
|
+
for rstr in args.file_replace_strs:
|
36
|
+
varname = varname.replace(rstr, '')
|
37
|
+
if string_to_ignore is not None:
|
38
|
+
varname = varname.replace(string_to_ignore, '')
|
39
|
+
hists[varname] = Hist(fname=fname, title=histname)
|
40
|
+
if len(hists) == 0:
|
41
|
+
print(' no csvs found%s in %s' % ('' if args.file_glob_str is None else ' with --file-glob-str \'%s\''%args.file_glob_str, dirname))
|
42
|
+
return hists
|
43
|
+
|
44
|
+
|
45
|
+
# ----------------------------------------------------------------------------------------
|
46
|
+
def compare_directories(args, plotdirlist, outdir):
|
47
|
+
utils.prep_dir(outdir, wildlings=['*.png', '*.svg', '*.csv'])
|
48
|
+
|
49
|
+
# read hists from <plotdirlist>
|
50
|
+
allhists = OrderedDict()
|
51
|
+
allvars = set() # all variables that appeared in any dir
|
52
|
+
for idir in range(len(plotdirlist)):
|
53
|
+
dirhists = get_hists_from_dir(plotdirlist[idir], args.names[idir])
|
54
|
+
allvars |= set(dirhists.keys())
|
55
|
+
allhists[args.names[idir]] = dirhists
|
56
|
+
# then loop over all the <varname>s we found
|
57
|
+
for varname in allvars:
|
58
|
+
hlist = [allhists[dname].get(varname, Hist(1, 0, 1, title='null')) for dname in allhists]
|
59
|
+
plot_single_variable(args, varname, hlist, outdir, pathnameclues=plotdirlist[0])
|
60
|
+
|
61
|
+
plotting.make_html(outdir, n_columns=4)
|
62
|
+
|
63
|
+
# ----------------------------------------------------------------------------------------
|
64
|
+
def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
|
65
|
+
if varname in plotconfig.gene_usage_columns:
|
66
|
+
hlist = plotting.add_bin_labels_not_in_all_hists(hlist)
|
67
|
+
|
68
|
+
no_labels = False
|
69
|
+
xline, bounds, figsize = None, None, None
|
70
|
+
stats = args.extra_stats
|
71
|
+
translegend = [0.0, -0.2]
|
72
|
+
xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
|
73
|
+
bounds, xticks, xticklabels = args.xbounds, args.xticks, None
|
74
|
+
if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's ''
|
75
|
+
xtitle = None
|
76
|
+
if '-mean-bins' in varname:
|
77
|
+
raise Exception('darn, I was hoping I wasn\'t making these plots any more')
|
78
|
+
plottitle = args.plottitle
|
79
|
+
if plottitle is None:
|
80
|
+
plottitle = ptitledict.get(varname, varname)
|
81
|
+
|
82
|
+
ytitle = 'fraction of total' if args.normalize else 'counts'
|
83
|
+
|
84
|
+
if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
|
85
|
+
assert not args.normalize
|
86
|
+
ytitle = 'mutation freq'
|
87
|
+
|
88
|
+
if varname in plotconfig.gene_usage_columns:
|
89
|
+
xtitle = 'allele'
|
90
|
+
if hlist[0].n_bins == 2:
|
91
|
+
stats = '0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
|
92
|
+
xtitle = None
|
93
|
+
# elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
|
94
|
+
# xtitle = '???'
|
95
|
+
|
96
|
+
line_width_override = None
|
97
|
+
if args.performance_plots:
|
98
|
+
if 'hamming_to_true_naive' in varname:
|
99
|
+
xtitle = 'hamming distance'
|
100
|
+
if '_normed' in varname:
|
101
|
+
xtitle = 'fractional ' + xtitle
|
102
|
+
elif '_vs_mute_freq' in varname:
|
103
|
+
xtitle = 'mutation freq'
|
104
|
+
ytitle = 'fraction correct'
|
105
|
+
if varname[0] == 'v' or varname[0] == 'j':
|
106
|
+
translegend = [-0.4, -0.4]
|
107
|
+
elif varname.find('_gene') == 1:
|
108
|
+
xtitle = ''
|
109
|
+
ytitle = 'fraction correct'
|
110
|
+
else:
|
111
|
+
xtitle = 'inferred - true'
|
112
|
+
bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
|
113
|
+
else:
|
114
|
+
if bounds is None:
|
115
|
+
bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
|
116
|
+
if bounds is None and 'insertion' in varname:
|
117
|
+
bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
|
118
|
+
if varname in plotconfig.gene_usage_columns:
|
119
|
+
# no_labels = True # not sure why i wanted these labels turned off?
|
120
|
+
if 'j_' not in varname:
|
121
|
+
figsize = (10, 5)
|
122
|
+
line_width_override = 1
|
123
|
+
elif 'per-gene-per-position/v' in pathnameclues:
|
124
|
+
figsize = (20, 5)
|
125
|
+
if bounds is None:
|
126
|
+
bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None)
|
127
|
+
|
128
|
+
if 'IG' in varname or 'TR' in varname:
|
129
|
+
if 'mute-freqs' in pathnameclues:
|
130
|
+
gene = utils.unsanitize_name(varname)
|
131
|
+
plottitle = gene # + ' -- mutation frequency'
|
132
|
+
xtitle = 'position'
|
133
|
+
if utils.get_region(gene) == 'j':
|
134
|
+
translegend = [0.1, 0.] #(-0.35, -0.02)
|
135
|
+
else:
|
136
|
+
translegend = [0.15, -0.02]
|
137
|
+
xline = None
|
138
|
+
if args.glfo is not None:
|
139
|
+
if utils.get_region(gene) in utils.conserved_codons[args.locus]:
|
140
|
+
xline = args.glfo[utils.conserved_codons[args.locus][utils.get_region(gene)] + '-positions'][gene]
|
141
|
+
else:
|
142
|
+
ilastdash = varname.rfind('-')
|
143
|
+
gene = utils.unsanitize_name(varname[:ilastdash])
|
144
|
+
base_varname = varname[ilastdash + 1 :]
|
145
|
+
base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
|
146
|
+
plottitle = gene + ' -- ' + base_plottitle
|
147
|
+
|
148
|
+
if varname == 'cluster-sizes':
|
149
|
+
xtitle = 'cluster size'
|
150
|
+
ytitle = 'fraction of clusters' if args.normalize else 'N clusters'
|
151
|
+
plottitle = ''
|
152
|
+
xticks, xticklabels = plotting.get_cluster_size_xticks(hlist=hlist) # it would be better to use all the hists, but i think it'll just screw up the ticks
|
153
|
+
import matplotlib.pyplot as plt
|
154
|
+
if varname in ['func-per-drop', 'nonfunc-per-drop']:
|
155
|
+
bounds = (-0.5, 15.5)
|
156
|
+
if 'subtree-purity' in varname:
|
157
|
+
if 'size' in varname:
|
158
|
+
if args.log == '':
|
159
|
+
args.log = 'xy'
|
160
|
+
xticks = [1, 2, 3, 5, 10, 15, 20]
|
161
|
+
xticklabels = ['1', '2', '3', '5', '10', '15', '20']
|
162
|
+
|
163
|
+
if xtitle is None:
|
164
|
+
xtitle = xtitledict.get(varname)
|
165
|
+
|
166
|
+
if args.add_to_title is not None:
|
167
|
+
plottitle += args.add_to_title
|
168
|
+
|
169
|
+
if len(hlist) > 9: # skootch it down so they (maybe) all fit
|
170
|
+
translegend[1] -= 0.5
|
171
|
+
if args.translegend is not None: # override with the command line
|
172
|
+
translegend = args.translegend
|
173
|
+
if varname == 'paired-uids-per-uid':
|
174
|
+
translegend = [translegend[0] + 0.15, translegend[1] - 0.3]
|
175
|
+
if args.extra_stats == 'auto': # kind of hackey
|
176
|
+
if xtitle == 'inferred - true':
|
177
|
+
stats = 'absmean'
|
178
|
+
else:
|
179
|
+
stats = 'mean'
|
180
|
+
# draw that little #$*(!
|
181
|
+
linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
|
182
|
+
if args.alphas is None or len(args.alphas) != len(hlist):
|
183
|
+
if args.alphas is not None and len(args.alphas) != len(hlist):
|
184
|
+
print(' %s --alphas wrong length, using first entry for all' % utils.wrnstr())
|
185
|
+
args.alphas = [0.6 if args.alphas is None else args.alphas[0] for _ in range(len(hlist))]
|
186
|
+
shift_overflows = os.path.basename(outdir) != 'gene-call' and 'func-per-drop' not in varname
|
187
|
+
plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=stats, bounds=bounds, ybounds=args.ybounds,
|
188
|
+
shift_overflows=shift_overflows, plottitle=plottitle, colors=args.colors,
|
189
|
+
xtitle=xtitle if args.xtitle is None else args.xtitle, ytitle=ytitle if args.ytitle is None else args.ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
|
190
|
+
linewidths=linewidths, markersizes=args.markersizes, alphas=args.alphas, errors=not args.no_errors, remove_empty_bins=True, #='y' in args.log,
|
191
|
+
figsize=figsize, no_labels=no_labels, log=args.log, translegend=translegend, xticks=xticks, xticklabels=xticklabels, square_bins=args.square_bins)
|
192
|
+
|
193
|
+
if args.swarm_meta_key is not None:
|
194
|
+
plotvals = {h.title : [h.get_bin_centers()[i] for i in h.ibiniter(True) for _ in range(int(h.bin_contents[i]))] for h in hlist}
|
195
|
+
plotting.stack_meta_hists(varname, outdir, args.swarm_meta_key, plotvals, colors={h.title : c for h, c in zip(hlist, args.colors)}, xtitle=xtitle, swarm_plots=True, no_hist=True, xticks=xticks)
|
196
|
+
|
197
|
+
# ----------------------------------------------------------------------------------------
|
198
|
+
helpstr = """
|
199
|
+
Compare csv histogram plot files across multiple directories
|
200
|
+
./bin/compare-plotdirs.py --outdir _output/tmp-plots --plotdirs docs/example-plots/sw/mute-freqs/overall:docs/example-plots/hmm/mute-freqs/overall:docs/example-plots/multi-hmm/mute-freqs/overall --names sw:hmm:multi-hmm --normalize
|
201
|
+
"""
|
202
|
+
class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
|
203
|
+
pass
|
204
|
+
formatter_class = MultiplyInheritedFormatter
|
205
|
+
parser = argparse.ArgumentParser(formatter_class=MultiplyInheritedFormatter, description=helpstr)
|
206
|
+
parser.add_argument('--outdir', required=True, help='Output directory to which to write the resulting comparison plots. A summary .html file is also written to <outdir>.html')
|
207
|
+
parser.add_argument('--plotdirs', required=True, help='Colon-separated list of input plot directories, each of which must have identical structure. Looks for svgs first in each dir, but then also in the subdirs of each dir (so e.g. if each of them have a/, b/, and c/ subdirs, this script will make a separate comparison of a/, b/, and c/)')
|
208
|
+
parser.add_argument('--names', required=True, help='colon-separated list of names/labels corresponding to --plotdirs (use @ as space)')
|
209
|
+
parser.add_argument('--performance-plots', action='store_true', help='set to true if these are annotation performance plots, i.e. made with --plot-annotation-performance (this makes the axis labels more sensible)')
|
210
|
+
parser.add_argument('--colors', default=':'.join(plotting.default_colors), help='color-separated list of colors to cycle through for the plotdirs')
|
211
|
+
parser.add_argument('--alphas')
|
212
|
+
parser.add_argument('--linewidths', default=':'.join(plotting.default_linewidths), help='colon-separated list of linewidths to cycle through')
|
213
|
+
parser.add_argument('--markersizes', default=':'.join(plotting.default_markersizes), help='colon-separated list of linewidths to cycle through')
|
214
|
+
parser.add_argument('--gldirs', help='On plots showing mutation vs individual gene positions, if you\'d like a dashed veritcal line showing conserved codon positions, set this as a colon-separated list of germline info dirs corresponding to each plotdir') #, default=['data/germlines/human'])
|
215
|
+
parser.add_argument('--locus', default='igh')
|
216
|
+
parser.add_argument('--normalize', action='store_true', help='If set, the histograms from each plotdir are normalized (each bin contents divided by the integral) before making the comparison (e.g. for comparing different size samples).')
|
217
|
+
parser.add_argument('--extra-stats', help='if set, adds extra stat to legend, e.g. \'mean\', \'absmean\', \'auto\'')
|
218
|
+
parser.add_argument('--translegend', help='colon-separated list of x, y values with which to translate all the legends')
|
219
|
+
parser.add_argument('--log', default='', help='Display these axes on a log scale, set to either \'x\', \'y\', or \'xy\'')
|
220
|
+
parser.add_argument('--make-parent-html', action='store_true', help='after doing everything within subdirs, make a single html in the main/parent dir with all plots from subdirs')
|
221
|
+
parser.add_argument('--add-to-title', help='string to append to existing title (use @ as space)')
|
222
|
+
parser.add_argument('--file-glob-str', default='*.csv', help='shell glob style regex for matching plot files')
|
223
|
+
parser.add_argument('--file-replace-strs', default='.csv', help='colon-separated list of strings to remove frome file base name to get variable name')
|
224
|
+
parser.add_argument('--xbounds')
|
225
|
+
parser.add_argument('--ybounds')
|
226
|
+
parser.add_argument('--xticks')
|
227
|
+
parser.add_argument('--plottitle')
|
228
|
+
parser.add_argument('--xtitle')
|
229
|
+
parser.add_argument('--ytitle')
|
230
|
+
parser.add_argument('--no-errors', action='store_true')
|
231
|
+
parser.add_argument('--single-plotdir', action='store_true')
|
232
|
+
parser.add_argument('--square-bins', action='store_true')
|
233
|
+
parser.add_argument('--swarm-meta-key', help='if set, also make swarm plots, pretending that each hist\'s title is the value for this "fake" meta info key, and treat each bin\'s entries as observations at the bin center\'s value')
|
234
|
+
|
235
|
+
args = parser.parse_args()
|
236
|
+
args.plotdirs = utils.get_arg_list(args.plotdirs)
|
237
|
+
args.names = utils.get_arg_list(args.names)
|
238
|
+
args.alphas = utils.get_arg_list(args.alphas, floatify=True)
|
239
|
+
args.colors = utils.get_arg_list(args.colors)
|
240
|
+
args.linewidths = utils.get_arg_list(args.linewidths, intify=True)
|
241
|
+
args.markersizes = utils.get_arg_list(args.markersizes, intify=True)
|
242
|
+
args.gldirs = utils.get_arg_list(args.gldirs)
|
243
|
+
args.translegend = utils.get_arg_list(args.translegend, floatify=True)
|
244
|
+
args.xbounds = utils.get_arg_list(args.xbounds, floatify=True)
|
245
|
+
args.ybounds = utils.get_arg_list(args.ybounds, floatify=True)
|
246
|
+
args.xticks = utils.get_arg_list(args.xticks, floatify=True)
|
247
|
+
args.file_replace_strs = utils.get_arg_list(args.file_replace_strs)
|
248
|
+
for iname in range(len(args.names)):
|
249
|
+
args.names[iname] = args.names[iname].replace('@', ' ')
|
250
|
+
if args.add_to_title is not None:
|
251
|
+
args.add_to_title = args.add_to_title.replace('@', ' ')
|
252
|
+
|
253
|
+
if len(args.plotdirs) == 1 and not args.single_plotdir:
|
254
|
+
print(' --plotdirs is length 1 (and --single-plotdir wasn\'t set), so assuming --names has the desired subdirs')
|
255
|
+
parentdir = args.plotdirs[0]
|
256
|
+
args.plotdirs = [parentdir + '/' + n for n in args.names]
|
257
|
+
|
258
|
+
if len(args.plotdirs) != len(args.names):
|
259
|
+
raise Exception('poorly formatted args:\n %s\n %s' % (' '.join(args.plotdirs), ' '.join(args.names)))
|
260
|
+
|
261
|
+
# make a merged glfo from all the gldirs
|
262
|
+
args.glfo = None
|
263
|
+
if args.gldirs is not None:
|
264
|
+
for gldir in [gd for gd in args.gldirs if os.path.exists(gd)]:
|
265
|
+
tmpglfo = glutils.read_glfo(gldir, args.locus)
|
266
|
+
if args.glfo is None:
|
267
|
+
args.glfo = tmpglfo
|
268
|
+
else:
|
269
|
+
args.glfo = glutils.get_merged_glfo(args.glfo, tmpglfo)
|
270
|
+
|
271
|
+
if any(not os.path.isdir(d) for d in args.plotdirs):
|
272
|
+
print(' at least one of --plotdirs doesn\'t exist: %s' % ' '.join(d for d in args.plotdirs if not os.path.isdir(d)))
|
273
|
+
sys.exit(0)
|
274
|
+
|
275
|
+
listof_plotdirlists, listof_outdirs = [], []
|
276
|
+
# first add the main/parent dir, if it has csvs
|
277
|
+
firstdir = args.plotdirs[0]
|
278
|
+
if len(glob.glob(firstdir + '/*.csv')) > 0:
|
279
|
+
listof_plotdirlists.append(args.plotdirs)
|
280
|
+
listof_outdirs.append(args.outdir)
|
281
|
+
else:
|
282
|
+
print(' no csvs in main/parent dir %s' % firstdir)
|
283
|
+
# then figure out if there's subdirs we need to deal with
|
284
|
+
added_subds = []
|
285
|
+
|
286
|
+
for subdir in [d for d in os.listdir(firstdir) if os.path.isdir(firstdir + '/' + d)]:
|
287
|
+
listof_plotdirlists.append([d + '/' + subdir for d in args.plotdirs])
|
288
|
+
listof_outdirs.append(args.outdir + '/' + subdir)
|
289
|
+
added_subds.append(subdir)
|
290
|
+
if len(added_subds) > 0:
|
291
|
+
print(' added %d subdirs: %s' % (len(added_subds), ' '.join(added_subds)))
|
292
|
+
|
293
|
+
for dlist, outdir in zip(listof_plotdirlists, listof_outdirs):
|
294
|
+
compare_directories(args, dlist, outdir)
|
295
|
+
|
296
|
+
if args.make_parent_html: # didn't really test this very well
|
297
|
+
fnoutstr, _ = utils.simplerun('find %s -type f -name *.svg' % args.outdir, return_out_err=True)
|
298
|
+
plotting.make_html(args.outdir, fnames=[fnoutstr.strip().split('\n')])
|
bin/diff-parameters.py
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Replacement for diff -qr -x\'*.svg\' -x params -x plots.html <arg1> <arg2>, but with more intelligent
|
4
|
+
control of yamls (e.g. floating point precision) and csvs (e.g. line order)
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import absolute_import, division, unicode_literals
|
8
|
+
from __future__ import print_function
|
9
|
+
import argparse
|
10
|
+
import os
|
11
|
+
import sys
|
12
|
+
import csv
|
13
|
+
import re
|
14
|
+
import yaml
|
15
|
+
from subprocess import check_output
|
16
|
+
from io import open
|
17
|
+
|
18
|
+
# current_script_dir = os.path.dirname(os.path.realpath(__file__)).replace('/bin', '') #'/python')
|
19
|
+
# if not os.path.exists(current_script_dir):
|
20
|
+
# print 'WARNING current script dir %s doesn\'t exist, so python path may not be correctly set' % current_script_dir
|
21
|
+
# sys.path.insert(1, current_script_dir)
|
22
|
+
|
23
|
+
parser = argparse.ArgumentParser()
|
24
|
+
parser.add_argument('arg1')
|
25
|
+
parser.add_argument('arg2')
|
26
|
+
parser.add_argument('--keep-going', action='store_true', help='Don\'t fail on differences, instead just keep on chugging')
|
27
|
+
parser.add_argument('--precision', type=int, default=9, help='number of digits after the decimal place to keep when comparing floating point numbers')
|
28
|
+
args = parser.parse_args()
|
29
|
+
|
30
|
+
if os.path.isdir(args.arg1): # can either pass arg[12] as directories in which to look
|
31
|
+
args.dir1 = args.arg1
|
32
|
+
assert os.path.isdir(args.arg2)
|
33
|
+
args.dir2 = args.arg2
|
34
|
+
args.fname = None
|
35
|
+
else: # ...or as single files
|
36
|
+
args.dir1 = os.path.dirname(args.arg1)
|
37
|
+
assert os.path.exists(args.dir1)
|
38
|
+
args.dir2 = os.path.dirname(args.arg2)
|
39
|
+
assert os.path.exists(args.dir2)
|
40
|
+
args.fname = os.path.basename(args.arg1)
|
41
|
+
assert args.fname == os.path.basename(args.arg2)
|
42
|
+
|
43
|
+
def reduce_float_precision(line):
|
44
|
+
regex = '([0-9]\.' + args.precision*'[0-9]' + ')([0-9][0-9]*)'
|
45
|
+
match = re.search(regex, line)
|
46
|
+
while match is not None:
|
47
|
+
left, right = match.groups() # left-hand (more significant) and right-hand parts of number
|
48
|
+
assert '.' in left
|
49
|
+
clipped_left = left # clip trailing zeros in decimal numbers
|
50
|
+
while clipped_left[-1] == '0':
|
51
|
+
clipped_left = clipped_left[:-1]
|
52
|
+
line = line.replace(left + right, clipped_left)
|
53
|
+
match = re.search(regex, line)
|
54
|
+
return line
|
55
|
+
|
56
|
+
def get_lines(fname, reduce_precision=False):
|
57
|
+
try:
|
58
|
+
with open(fname) as infile:
|
59
|
+
lines = sorted(infile.readlines())
|
60
|
+
if reduce_precision:
|
61
|
+
for iline in range(len(lines)):
|
62
|
+
lines[iline] = reduce_float_precision(lines[iline])
|
63
|
+
return lines
|
64
|
+
|
65
|
+
except IOError:
|
66
|
+
raise Exception(os.path.basename(fname) + ' not found in ' + os.path.dirname(fname))
|
67
|
+
|
68
|
+
def check_lines(lines1, lines2):
|
69
|
+
difflines = []
|
70
|
+
for line in lines1:
|
71
|
+
if line not in lines2:
|
72
|
+
difflines.append(line)
|
73
|
+
# print 'not in', args.dir2, ':\n ', line
|
74
|
+
return difflines
|
75
|
+
|
76
|
+
def check_textfile(fname):
|
77
|
+
lines1 = get_lines(args.dir1 + fname, reduce_precision=True)
|
78
|
+
lines2 = get_lines(args.dir2 + fname, reduce_precision=True)
|
79
|
+
difflines1 = check_lines(lines1, lines2)
|
80
|
+
difflines2 = check_lines(lines2, lines1)
|
81
|
+
if len(difflines1 + difflines2) > 0:
|
82
|
+
print('differing lines from', args.dir1 + fname)
|
83
|
+
for line in difflines1:
|
84
|
+
print(' ', line.strip())
|
85
|
+
print('differing lines from', args.dir2 + fname)
|
86
|
+
for line in difflines2:
|
87
|
+
print(' ', line.strip())
|
88
|
+
if not args.keep_going:
|
89
|
+
sys.exit(1)
|
90
|
+
|
91
|
+
# def diff_models(model1, model2):
|
92
|
+
# for state in model1.states:
|
93
|
+
# if state
|
94
|
+
# for transition in
|
95
|
+
# sys.exit()
|
96
|
+
|
97
|
+
# def check_yaml(fname):
|
98
|
+
# print fname
|
99
|
+
# with open(args.dir1 + fname) as infile1:
|
100
|
+
# with open(args.dir2 + fname) as infile2:
|
101
|
+
# model1 = yaml.load(infile1)
|
102
|
+
# model2 = yaml.load(infile2)
|
103
|
+
# diff_models(model1, model2)
|
104
|
+
# diff_models(model2, model1)
|
105
|
+
|
106
|
+
def get_file_list(extension=''):
|
107
|
+
cmd = ['find', args.dir1]
|
108
|
+
if extension == '':
|
109
|
+
cmd += ['-type', 'f']
|
110
|
+
else:
|
111
|
+
cmd += ['-name', '*.' + extension]
|
112
|
+
output = check_output(cmd, universal_newlines=True)
|
113
|
+
return output.replace(args.dir1, '').split()
|
114
|
+
|
115
|
+
# ----------------------------------------------------------------------------------------
|
116
|
+
# for fname in get_file_list('csv'):
|
117
|
+
# check_textfile(fname)
|
118
|
+
# for fname in get_file_list('yaml'):
|
119
|
+
# check_textfile(fname)
|
120
|
+
|
121
|
+
if args.fname is None:
|
122
|
+
filelist = get_file_list()
|
123
|
+
else:
|
124
|
+
filelist = ['/' + args.fname, ]
|
125
|
+
for fname in filelist:
|
126
|
+
if '.csv' in fname or '.yaml' in fname:
|
127
|
+
check_textfile(fname)
|
128
|
+
elif '.svg' in fname or '.html' in fname:
|
129
|
+
continue
|
130
|
+
else:
|
131
|
+
raise Exception(fname + ' has an extension I can\'t handle')
|
132
|
+
|
133
|
+
sys.exit(0)
|
bin/docker-hub-push.sh
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
from __future__ import absolute_import, division, unicode_literals
|
3
|
+
from __future__ import print_function
|
4
|
+
import csv
|
5
|
+
import os
|
6
|
+
import sys
|
7
|
+
from io import open
|
8
|
+
csv.field_size_limit(sys.maxsize) # make sure we can write very large csv fields
|
9
|
+
import argparse
|
10
|
+
import colored_traceback.always
|
11
|
+
import yaml
|
12
|
+
import json
|
13
|
+
import operator
|
14
|
+
import random
|
15
|
+
import numpy
|
16
|
+
from pathlib import Path
|
17
|
+
|
18
|
+
# if you move this script, you'll need to change this method of getting the imports
|
19
|
+
partis_dir = str(Path(__file__).parent.parent)
|
20
|
+
sys.path.insert(1, partis_dir) # + '/python')
|
21
|
+
|
22
|
+
import python.utils as utils
|
23
|
+
|
24
|
+
dstr = """
|
25
|
+
Extract heavy/light chain pairing info from fasta file <infname> and write it to yaml/json file <outfname>.
|
26
|
+
Should have the same effect as setting --guess-pairing-info when running bin/split-loci.py.
|
27
|
+
"""
|
28
|
+
parser = argparse.ArgumentParser(description=dstr,
|
29
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter) # why tf isn't this printing the defaults?
|
30
|
+
parser.add_argument('infname')
|
31
|
+
parser.add_argument('outfname')
|
32
|
+
parser.add_argument('--droplet-id-separators', help=utils.did_help['seps'])
|
33
|
+
parser.add_argument('--droplet-id-indices', help=utils.did_help['indices'])
|
34
|
+
parser.add_argument('--overwrite', action='store_true')
|
35
|
+
parser.add_argument('--for-testing-n-max-queries', type=int, default=-1, help='only for testing, applied when reading initial fasta file, just in case it\'s huge and you want to run quickly without having to read the whole file')
|
36
|
+
parser.add_argument('--n-max-queries', type=int, default=-1, help='see partis help (although here it applies to droplets, not individual seqs)')
|
37
|
+
parser.add_argument('--n-random-queries', type=int, help='see partis help (although here it applies to droplets, not individual seqs)')
|
38
|
+
parser.add_argument('--input-metafname', help='json/yaml file with additional (beyond pairing info) input meta info (see partis help)')
|
39
|
+
parser.add_argument('--random-seed', type=int, default=1)
|
40
|
+
args = parser.parse_args()
|
41
|
+
random.seed(args.random_seed)
|
42
|
+
numpy.random.seed(args.random_seed)
|
43
|
+
args.droplet_id_indices = utils.get_arg_list(args.droplet_id_indices, intify=True)
|
44
|
+
|
45
|
+
if utils.output_exists(args, args.outfname, offset=4, debug=False):
|
46
|
+
print(' extract-pairing-info.py output exists and --overwrite was not set, so not doing anything: %s' % args.outfname)
|
47
|
+
sys.exit(0)
|
48
|
+
|
49
|
+
seqfos = utils.read_fastx(args.infname, n_max_queries=args.for_testing_n_max_queries)
|
50
|
+
if args.n_max_queries != -1 or args.n_random_queries is not None:
|
51
|
+
seqfos = utils.subset_paired_queries(seqfos, args.droplet_id_separators, args.droplet_id_indices, n_max_queries=args.n_max_queries, n_random_queries=args.n_random_queries)
|
52
|
+
metafos = utils.extract_pairing_info(seqfos, droplet_id_separators=args.droplet_id_separators, droplet_id_indices=args.droplet_id_indices, input_metafname=args.input_metafname)
|
53
|
+
|
54
|
+
utils.mkdir(args.outfname, isfile=True)
|
55
|
+
utils.jsdump(args.outfname, metafos)
|