PyPI - partis-bcr - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

partis-bcr 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

bin/FastTree +0 -0
bin/add-chimeras.py +59 -0
bin/add-seqs-to-outputs.py +81 -0
bin/bcr-phylo-run.py +799 -0
bin/build.sh +24 -0
bin/cf-alleles.py +97 -0
bin/cf-germlines.py +57 -0
bin/cf-linearham.py +199 -0
bin/chimera-plot.py +76 -0
bin/choose-partially-paired.py +143 -0
bin/circle-plots.py +30 -0
bin/compare-plotdirs.py +298 -0
bin/diff-parameters.py +133 -0
bin/docker-hub-push.sh +6 -0
bin/extract-pairing-info.py +55 -0
bin/gcdyn-simu-run.py +223 -0
bin/gctree-run.py +244 -0
bin/get-naive-probabilities.py +126 -0
bin/iqtree-1.6.12 +0 -0
bin/lonr.r +1020 -0
bin/makeHtml +52 -0
bin/mds-run.py +46 -0
bin/parse-output.py +277 -0
bin/partis +1869 -0
bin/partis-pip +116 -0
bin/partis.py +1869 -0
bin/plot-gl-set-trees.py +519 -0
bin/plot-hmms.py +151 -0
bin/plot-lb-tree.py +427 -0
bin/raxml-ng +0 -0
bin/read-bcr-phylo-trees.py +38 -0
bin/read-gctree-output.py +166 -0
bin/run-chimeras.sh +64 -0
bin/run-dtr-scan.sh +25 -0
bin/run-paired-loci.sh +100 -0
bin/run-tree-metrics.sh +88 -0
bin/smetric-run.py +62 -0
bin/split-loci.py +317 -0
bin/swarm-2.1.13-linux-x86_64 +0 -0
bin/test-germline-inference.py +425 -0
bin/tree-perf-run.py +194 -0
bin/vsearch-2.4.3-linux-x86_64 +0 -0
bin/vsearch-2.4.3-macos-x86_64 +0 -0
bin/xvfb-run +194 -0
partis_bcr-1.0.2.data/scripts/cf-alleles.py +97 -0
partis_bcr-1.0.2.data/scripts/cf-germlines.py +57 -0
partis_bcr-1.0.2.data/scripts/extract-pairing-info.py +55 -0
partis_bcr-1.0.2.data/scripts/gctree-run.py +244 -0
partis_bcr-1.0.2.data/scripts/parse-output.py +277 -0
partis_bcr-1.0.2.data/scripts/split-loci.py +317 -0
partis_bcr-1.0.2.data/scripts/test.py +1005 -0
{partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/METADATA +1 -1
{partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/RECORD +101 -51
partis_bcr-1.0.2.dist-info/top_level.txt +1 -0
{partis → python}/glutils.py +1 -1
python/main.py +30 -0
{partis → python}/plotting.py +10 -1
{partis → python}/treeutils.py +18 -16
{partis → python}/utils.py +14 -7
packages/ham/bcrham +0 -0
partis/main.py +0 -59
partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
{partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/WHEEL +0 -0
{partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/entry_points.txt +0 -0
{partis_bcr-1.0.0.dist-info → partis_bcr-1.0.2.dist-info}/licenses/COPYING +0 -0
{partis → python}/__init__.py +0 -0
{partis → python}/alleleclusterer.py +0 -0
{partis → python}/allelefinder.py +0 -0
{partis → python}/alleleremover.py +0 -0
{partis → python}/annotationclustering.py +0 -0
{partis → python}/baseutils.py +0 -0
{partis → python}/cache/__init__.py +0 -0
{partis → python}/cache/cached_uncertainties.py +0 -0
{partis → python}/clusterpath.py +0 -0
{partis → python}/coar.py +0 -0
{partis → python}/corrcounter.py +0 -0
{partis → python}/datautils.py +0 -0
{partis → python}/event.py +0 -0
{partis → python}/fraction_uncertainty.py +0 -0
{partis → python}/gex.py +0 -0
{partis → python}/glomerator.py +0 -0
{partis → python}/hist.py +0 -0
{partis → python}/hmmwriter.py +0 -0
{partis → python}/hutils.py +0 -0
{partis → python}/indelutils.py +0 -0
{partis → python}/lbplotting.py +0 -0
{partis → python}/mds.py +0 -0
{partis → python}/mutefreqer.py +0 -0
{partis → python}/paircluster.py +0 -0
{partis → python}/parametercounter.py +0 -0
{partis → python}/paramutils.py +0 -0
{partis → python}/partitiondriver.py +0 -0
{partis → python}/partitionplotter.py +0 -0
{partis → python}/performanceplotter.py +0 -0
{partis → python}/plotconfig.py +0 -0
{partis → python}/processargs.py +0 -0
{partis → python}/prutils.py +0 -0
{partis → python}/recombinator.py +0 -0
{partis → python}/scanplot.py +0 -0
{partis → python}/seqfileopener.py +0 -0
{partis → python}/treegenerator.py +0 -0
{partis → python}/viterbicluster.py +0 -0
{partis → python}/vrc01.py +0 -0
{partis → python}/waterer.py +0 -0

bin/FastTree ADDED Viewed

Binary file

bin/add-chimeras.py ADDED Viewed

@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+from __future__ import absolute_import, division, unicode_literals
+from __future__ import print_function
+import argparse
+import collections
+import numpy
+import random
+import sys
+import os
+import csv
+from io import open
+from pathlib import Path
+partis_dir = str(Path(__file__).parent.parent)
+if not os.path.exists(partis_dir):
+    print('WARNING current script dir %s doesn\'t exist, so python path may not be correctly set' % partis_dir)
+sys.path.insert(1, partis_dir) # + '/python')
+import python.utils as utils
+import python.seqfileopener as seqfileopener
+parser = argparse.ArgumentParser()
+parser.add_argument('infile')
+parser.add_argument('outfile')
+parser.add_argument('--debug', action='store_true')
+parser.add_argument('--chimera-freq', default=1., type=float, help='fraction of sequences to make chimeric')
+parser.add_argument('--min-chunk-len', default=15, type=int, help='require that each bit of the chimera is at least this long')
+args = parser.parse_args()
+input_info, _, _ = seqfileopener.read_sequence_file(args.infile, is_data=False)
+if len(input_info) < 50:
+    print('%s making chimeras with only %d sequences, and since we choose from among the existing sequence for templates this won\'t be very effective' % (utils.color('yellow', 'warning'), len(input_info)))
+n_chimeric = 0
+outfo = collections.OrderedDict()
+for uid, seqfo in input_info.items():
+    if args.debug:
+        print(uid)
+    if numpy.random.uniform(0, 1) > args.chimera_freq:  # no chimeras for this sequence
+        if args.debug:
+            print('        non-chimeric')
+        continue
+    break_point = random.randint(args.min_chunk_len, len(seqfo['seqs'][0]) - args.min_chunk_len)
+    switch_uid = numpy.random.choice(input_info)
+    switch_seq = input_info[switch_uid]['seqs'][0][ : break_point]
+    if args.debug:
+        print('    switching to %s at %d:' % (switch_uid, break_point))
+        print('          %s' % switch_seq)
+        print('          %s%s' % (' ' * len(switch_seq), seqfo['seqs'][0][break_point : ]))
+    outfo[uid] = switch_seq + seqfo['seqs'][0][break_point : ]
+    n_chimeric += 1
+print('writing %d / %d chimeric sequences to %s' % (n_chimeric, len(input_info), args.outfile))
+with open(args.outfile, 'w') as outfile:
+    for uid, seq in outfo.items():
+        outfile.write('>%s\n%s\n' % (uid, seq))

bin/add-seqs-to-outputs.py ADDED Viewed

@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+from __future__ import absolute_import, division, unicode_literals
+from __future__ import print_function
+import csv
+import os
+import sys
+csv.field_size_limit(sys.maxsize)  # make sure we can write very large csv fields
+import argparse
+import operator
+import colored_traceback.always
+import collections
+# if you move this script, you'll need to change this method of getting the imports
+from pathlib import Path
+partis_dir = str(Path(__file__).parent.parent)
+sys.path.insert(1, partis_dir) # + '/python')
+import python.utils as utils
+import python.glutils as glutils
+from python.clusterpath import ClusterPath
+dstr = """
+Add seqs from the fasta file --new-seq-file to an annotation from --partis-output-file.
+Looks for a cluster in the best partition that has sequences in common with the fasta file (and crashes if there's more than one such cluster).
+Writes a single modified annotation to --outfile.
+"""
+parser = argparse.ArgumentParser(description=dstr,
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)  # why tf isn't this printing the defaults?
+parser.add_argument('--new-seq-file', required=True, help='fasta input file with seqs to be added to annotations + partitions in partis output yaml')
+parser.add_argument('--partis-output-file', required=True, help='partis output file to which to add the seqs from --new-seq-file')
+parser.add_argument('--partition-index', type=int, help='index of partition from which to take the clusters/annotations (if not set, uses the best partition)')
+parser.add_argument('--glfo-dir', default=partis_dir + '/data/germlines/human', help='germline info directory. Only used if --partis-output-file is an old-style .csv, and this default dir may work if your output file doesn\'t have novel inferred genes. Otherwise, is the germline info dir from the partis inferred parameter directory corresponding to your output file --partis-output-file.')
+parser.add_argument('--locus', default='igh')
+parser.add_argument('--outfile', required=True, help='output partis yaml file')
+parser.add_argument('--debug', action='store_true')
+parser.add_argument('--n-test-subset-seqs', type=int, help='take only the first N seqs from both the fasta file and the annotation in the partis output file (e.g. for testing when the family is huge)')
+args = parser.parse_args()
+new_seqfos = utils.read_fastx(args.new_seq_file, sanitize_seqs=True)
+print('    read %d seqs from %s' % (len(new_seqfos), args.new_seq_file))
+glfo = None
+if utils.getsuffix(args.partis_output_file) == '.csv':
+    print('    reading deprecated csv format, so need to read germline info from somewhere else, using --glfo-dir %s, hopefully it works' % args.glfo_dir)
+    glfo = glutils.read_glfo(args.glfo_dir, locus=args.locus)
+glfo, annotation_list, cpath = utils.read_output(args.partis_output_file, glfo=glfo, locus=args.locus)
+if args.partition_index is not None:
+    print('  using non-best partition index %d (best is %d)' % (args.partition_index, cpath.i_best))
+partition = cpath.partitions[cpath.i_best if args.partition_index is None else args.partition_index]
+print('    read partition with %d clusters from %s' % (len(partition), args.partis_output_file))
+new_uids = set(sfo['name'] for sfo in new_seqfos)
+clusters_with_overlap = []
+for cluster in partition:
+    overlap_uids = set(cluster) & new_uids
+    if len(overlap_uids) > 0:
+        clusters_with_overlap.append((cluster, overlap_uids))
+if len(clusters_with_overlap) == 0:
+    raise Exception('no clusters in partition have any overlap with sequences from fasta file')
+elif len(clusters_with_overlap) > 1:
+    # raise Exception('too many clusters %d in the partition overlaps with sequences from the fasta file' % len(clusters_with_overlap))
+    clusters_with_overlap = sorted(clusters_with_overlap, key=lambda p: len(p[1]), reverse=True)
+    ostrs = ['%d %d'%(len(c), len(o)) for c, o in clusters_with_overlap]
+    print('  %s more than one cluster overlaps with sequences from fasta file, just taking first one (size overlap): %s,  %s' % (utils.color('yellow', 'warning'), utils.color('red', ostrs[0]), ',  '.join(ostrs[1:])))
+old_cluster = clusters_with_overlap[0][0]
+print('    adding %d fasta sequences to cluster of size %d (%d fasta sequences were already in cluster)' % (len(new_uids - set(old_cluster)), len(old_cluster), len(new_uids & set(old_cluster))))
+sfos_to_add = [sfo for sfo in new_seqfos if sfo['name'] not in old_cluster]
+annotation_dict = utils.get_annotation_dict(annotation_list)
+annotation = annotation_dict[':'.join(old_cluster)]
+if args.n_test_subset_seqs is not None:
+    print('  taking only first %d seqs from fasta and annotation' % args.n_test_subset_seqs)
+    utils.restrict_to_iseqs(annotation, list(range(args.n_test_subset_seqs)), glfo)
+    sfos_to_add = sfos_to_add[:args.n_test_subset_seqs]
+utils.add_seqs_to_line(annotation, sfos_to_add, glfo, debug=args.debug)
+output_headers = list(set(annotation_list[0].keys()) | set(utils.annotation_headers))  # try to pick up any extra headers that were written to the file
+utils.write_annotations(args.outfile, glfo, [annotation], output_headers)

partis-bcr 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

partis-bcr 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl