TSSV 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tssv-1.2.1/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2012-2018 Jeroen F.J. Laros <j.f.j.laros@lumc.nl>
2
+ Copyright (c) 2016 Jerry Hoogenboom <j.hoogenboom@nfi.minvenj.nl>
3
+ Copyright (c) 2012 Jaap W.F. van der Heijden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9
+ of the Software, and to permit persons to whom the Software is furnished to do
10
+ so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
tssv-1.2.1/PKG-INFO ADDED
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.4
2
+ Name: TSSV
3
+ Version: 1.2.1
4
+ Summary: Targeted characterisation of short structural variation.
5
+ Author-email: "Jeroen F.J. Laros" <jlaros@fixedpoint.nl>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://git.lumc.nl/j.f.j.laros/tssv
8
+ Keywords: bioinformatics
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering
13
+ Description-Content-Type: text/x-rst
14
+ License-File: LICENSE.md
15
+ Requires-Dist: biopython
16
+ Requires-Dist: requests
17
+ Requires-Dist: xopen
18
+ Dynamic: license-file
19
+
20
+ TSSV: Targeted characterisation of short structural variation
21
+ =============================================================
22
+
23
+ .. image:: https://img.shields.io/github/last-commit/jfjlaros/tssv.svg
24
+ :target: https://github.com/jfjlaros/tssv/graphs/commit-activity
25
+ .. image:: https://github.com/jfjlaros/tssv/actions/workflows/test.yml/badge.svg
26
+ :target: https://github.com/jfjlaros/tssv/actions/workflows/test.yml
27
+ .. image:: https://readthedocs.org/projects/tssv/badge/?version=latest
28
+ :target: https://tssv.readthedocs.io/en/latest
29
+ .. image:: https://img.shields.io/github/release-date/jfjlaros/tssv.svg
30
+ :target: https://github.com/jfjlaros/tssv/releases
31
+ .. image:: https://img.shields.io/github/release/jfjlaros/tssv.svg
32
+ :target: https://github.com/jfjlaros/tssv/releases
33
+ .. image:: https://img.shields.io/pypi/v/tssv.svg
34
+ :target: https://pypi.org/project/tssv/
35
+ .. image:: https://img.shields.io/github/languages/code-size/jfjlaros/tssv.svg
36
+ :target: https://github.com/jfjlaros/tssv
37
+ .. image:: https://img.shields.io/github/languages/count/jfjlaros/tssv.svg
38
+ :target: https://github.com/jfjlaros/tssv
39
+ .. image:: https://img.shields.io/github/languages/top/jfjlaros/tssv.svg
40
+ :target: https://github.com/jfjlaros/tssv
41
+ .. image:: https://img.shields.io/github/license/jfjlaros/tssv.svg
42
+ :target: https://raw.githubusercontent.com/jfjlaros/tssv/master/LICENSE.md
43
+
44
+ ----
45
+
46
+ TSSV is a program that does targeted characterisation of short structural
47
+ variation. It can be used for STR analysis, or any other type of targeted
48
+ analysis. It characterises any variation between a set of user-defined markers.
49
+
50
+ TSSV is platform-independent. It has been tested on Linux, macOS, and Windows.
51
+
52
+ Please see ReadTheDocs_ for the latest documentation.
53
+
54
+
55
+ .. _ReadTheDocs: https://tssv.readthedocs.io/en/latest/index.html
tssv-1.2.1/README.rst ADDED
@@ -0,0 +1,36 @@
1
+ TSSV: Targeted characterisation of short structural variation
2
+ =============================================================
3
+
4
+ .. image:: https://img.shields.io/github/last-commit/jfjlaros/tssv.svg
5
+ :target: https://github.com/jfjlaros/tssv/graphs/commit-activity
6
+ .. image:: https://github.com/jfjlaros/tssv/actions/workflows/test.yml/badge.svg
7
+ :target: https://github.com/jfjlaros/tssv/actions/workflows/test.yml
8
+ .. image:: https://readthedocs.org/projects/tssv/badge/?version=latest
9
+ :target: https://tssv.readthedocs.io/en/latest
10
+ .. image:: https://img.shields.io/github/release-date/jfjlaros/tssv.svg
11
+ :target: https://github.com/jfjlaros/tssv/releases
12
+ .. image:: https://img.shields.io/github/release/jfjlaros/tssv.svg
13
+ :target: https://github.com/jfjlaros/tssv/releases
14
+ .. image:: https://img.shields.io/pypi/v/tssv.svg
15
+ :target: https://pypi.org/project/tssv/
16
+ .. image:: https://img.shields.io/github/languages/code-size/jfjlaros/tssv.svg
17
+ :target: https://github.com/jfjlaros/tssv
18
+ .. image:: https://img.shields.io/github/languages/count/jfjlaros/tssv.svg
19
+ :target: https://github.com/jfjlaros/tssv
20
+ .. image:: https://img.shields.io/github/languages/top/jfjlaros/tssv.svg
21
+ :target: https://github.com/jfjlaros/tssv
22
+ .. image:: https://img.shields.io/github/license/jfjlaros/tssv.svg
23
+ :target: https://raw.githubusercontent.com/jfjlaros/tssv/master/LICENSE.md
24
+
25
+ ----
26
+
27
+ TSSV is a program that does targeted characterisation of short structural
28
+ variation. It can be used for STR analysis, or any other type of targeted
29
+ analysis. It characterises any variation between a set of user-defined markers.
30
+
31
+ TSSV is platform-independent. It has been tested on Linux, macOS, and Windows.
32
+
33
+ Please see ReadTheDocs_ for the latest documentation.
34
+
35
+
36
+ .. _ReadTheDocs: https://tssv.readthedocs.io/en/latest/index.html
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.4
2
+ Name: TSSV
3
+ Version: 1.2.1
4
+ Summary: Targeted characterisation of short structural variation.
5
+ Author-email: "Jeroen F.J. Laros" <jlaros@fixedpoint.nl>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://git.lumc.nl/j.f.j.laros/tssv
8
+ Keywords: bioinformatics
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering
13
+ Description-Content-Type: text/x-rst
14
+ License-File: LICENSE.md
15
+ Requires-Dist: biopython
16
+ Requires-Dist: requests
17
+ Requires-Dist: xopen
18
+ Dynamic: license-file
19
+
20
+ TSSV: Targeted characterisation of short structural variation
21
+ =============================================================
22
+
23
+ .. image:: https://img.shields.io/github/last-commit/jfjlaros/tssv.svg
24
+ :target: https://github.com/jfjlaros/tssv/graphs/commit-activity
25
+ .. image:: https://github.com/jfjlaros/tssv/actions/workflows/test.yml/badge.svg
26
+ :target: https://github.com/jfjlaros/tssv/actions/workflows/test.yml
27
+ .. image:: https://readthedocs.org/projects/tssv/badge/?version=latest
28
+ :target: https://tssv.readthedocs.io/en/latest
29
+ .. image:: https://img.shields.io/github/release-date/jfjlaros/tssv.svg
30
+ :target: https://github.com/jfjlaros/tssv/releases
31
+ .. image:: https://img.shields.io/github/release/jfjlaros/tssv.svg
32
+ :target: https://github.com/jfjlaros/tssv/releases
33
+ .. image:: https://img.shields.io/pypi/v/tssv.svg
34
+ :target: https://pypi.org/project/tssv/
35
+ .. image:: https://img.shields.io/github/languages/code-size/jfjlaros/tssv.svg
36
+ :target: https://github.com/jfjlaros/tssv
37
+ .. image:: https://img.shields.io/github/languages/count/jfjlaros/tssv.svg
38
+ :target: https://github.com/jfjlaros/tssv
39
+ .. image:: https://img.shields.io/github/languages/top/jfjlaros/tssv.svg
40
+ :target: https://github.com/jfjlaros/tssv
41
+ .. image:: https://img.shields.io/github/license/jfjlaros/tssv.svg
42
+ :target: https://raw.githubusercontent.com/jfjlaros/tssv/master/LICENSE.md
43
+
44
+ ----
45
+
46
+ TSSV is a program that does targeted characterisation of short structural
47
+ variation. It can be used for STR analysis, or any other type of targeted
48
+ analysis. It characterises any variation between a set of user-defined markers.
49
+
50
+ TSSV is platform-independent. It has been tested on Linux, macOS, and Windows.
51
+
52
+ Please see ReadTheDocs_ for the latest documentation.
53
+
54
+
55
+ .. _ReadTheDocs: https://tssv.readthedocs.io/en/latest/index.html
@@ -0,0 +1,20 @@
1
+ LICENSE.md
2
+ README.rst
3
+ pyproject.toml
4
+ TSSV.egg-info/PKG-INFO
5
+ TSSV.egg-info/SOURCES.txt
6
+ TSSV.egg-info/dependency_links.txt
7
+ TSSV.egg-info/entry_points.txt
8
+ TSSV.egg-info/requires.txt
9
+ TSSV.egg-info/top_level.txt
10
+ extras/annotate/annotate.py
11
+ tests/test_align.py
12
+ tests/test_annotate.py
13
+ tests/test_tssv.py
14
+ tssv/__init__.py
15
+ tssv/align_pair.py
16
+ tssv/cli.py
17
+ tssv/sgAlign.c
18
+ tssv/sgAlign.h
19
+ tssv/sgAlignWrapper.c
20
+ tssv/tssv.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ tannotate = tssv_extras.annotate:main
3
+ tssv = tssv.cli:main
@@ -0,0 +1,3 @@
1
+ biopython
2
+ requests
3
+ xopen
@@ -0,0 +1,2 @@
1
+ tssv
2
+ tssv_extras
@@ -0,0 +1,95 @@
1
+ """Convert a csv files containing alleles and counts to HGVS descriptions of
2
+ alleles and single variants. Also report statistics about the variant types.
3
+
4
+
5
+ The input file is typically one of the output files of tssv.
6
+ """
7
+ from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter
8
+ from collections import defaultdict
9
+ from sys import stdout
10
+
11
+ from requests import get as req_get
12
+
13
+
14
+ def write_table(data, title, report_handle, minimum):
15
+ """Write a table to a file.
16
+
17
+ :arg dict data: Dictionary containing counts per type.
18
+ :arg str title: Name of the first column.
19
+ :arg stream report_handle: Open writeable handle to the report file.
20
+ :arg int minimum: Minimum count.
21
+ """
22
+ report_handle.write('{}\ttotal\tforward\treverse\n'.format(title))
23
+
24
+ for i in sorted(data, key=lambda x: x[0], reverse=True):
25
+ if data[i][0] < minimum:
26
+ return
27
+ report_handle.write('{}\t{}\t{}\t{}\n'.format(i, *data[i]))
28
+
29
+
30
+ def annotate(alleles_handle, reference, report_handle, minimum):
31
+ """Convert a csv files containing alleles and counts to HGVS descriptions
32
+ of alleles, single variants and variant types.
33
+
34
+ :arg stream alleles_handle: Open handle to the alleles file.
35
+ :arg str reference: The reference sequence.
36
+ :arg stream report_handle: Open writeable handle to the report file.
37
+ :arg int minimum: Minimum count.
38
+ """
39
+ alleles = defaultdict(lambda: [0, 0, 0])
40
+ raw_vars = defaultdict(lambda: [0, 0, 0])
41
+ classification = defaultdict(lambda: [0, 0, 0])
42
+
43
+ data = list(map(
44
+ lambda x: x.strip('\n').split('\t'), alleles_handle.readlines()[1:]))
45
+ for i in data:
46
+ allele_description = req_get(
47
+ 'https://v2.mutalyzer.nl/json/descriptionExtract?' +
48
+ 'reference={}&observed={}'.format(reference, i[0])).json()
49
+ encountered = list(map(int, (i[1:])))
50
+
51
+ alleles[allele_description['description']] = list(map(
52
+ sum, zip(alleles[allele_description['description']], encountered)))
53
+ for j in allele_description['allele']:
54
+ raw_vars[j['description']] = list(map(
55
+ sum, zip(raw_vars[j['description']], encountered)))
56
+ classification[j['type']] = list(map(
57
+ sum, zip(classification[j['type']], encountered)))
58
+
59
+ write_table(alleles, 'allele', report_handle, minimum)
60
+ report_handle.write('\n')
61
+ write_table(raw_vars, 'variant', report_handle, minimum)
62
+ report_handle.write('\n')
63
+ write_table(classification, 'class', report_handle, minimum)
64
+
65
+
66
+ def main():
67
+ """Main entry point."""
68
+ usage = __doc__.split('\n\n\n')
69
+ parser = ArgumentParser(
70
+ description=usage[0], epilog=usage[1],
71
+ formatter_class=RawDescriptionHelpFormatter)
72
+
73
+ parser.add_argument(
74
+ 'alleles', metavar='alleles', type=FileType('r'),
75
+ help='the alleles file')
76
+ parser.add_argument(
77
+ 'reference', metavar='reference', type=str,
78
+ help='sequence of the reference allele')
79
+ parser.add_argument(
80
+ '-r', dest='report', type=FileType('w'), default=stdout,
81
+ help='name of the report file')
82
+ parser.add_argument(
83
+ '-a', dest='minimum', type=int, default=0,
84
+ help='minimum count (default=%(default)s)')
85
+
86
+ args = parser.parse_args()
87
+
88
+ try:
89
+ annotate(args.alleles, args.reference, args.report, args.minimum)
90
+ except OSError as error:
91
+ parser.error(error)
92
+
93
+
94
+ if __name__ == '__main__':
95
+ main()
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ['setuptools']
3
+ build-backend = 'setuptools.build_meta'
4
+
5
+ [tool.setuptools]
6
+ package-dir = {tssv = 'tssv', tssv_extras = 'extras/annotate'}
7
+ ext-modules = [{name = 'tssv.sg_align', sources = ['tssv/sgAlignWrapper.c', 'tssv/sgAlign.c'], extra-compile-args = ['-O3']}]
8
+
9
+ [tool.setuptools.package-data]
10
+ tssv = ['sgAlign.h']
11
+
12
+ [project]
13
+ name = 'TSSV'
14
+ description = 'Targeted characterisation of short structural variation.'
15
+ version = '1.2.1'
16
+
17
+ authors = [{name = 'Jeroen F.J. Laros', email = 'jlaros@fixedpoint.nl'}]
18
+ urls = {homepage = 'https://git.lumc.nl/j.f.j.laros/tssv'}
19
+ readme = 'README.rst'
20
+ keywords = ['bioinformatics']
21
+ classifiers = [
22
+ 'Programming Language :: Python :: 3',
23
+ 'Operating System :: OS Independent',
24
+ 'Intended Audience :: Science/Research',
25
+ 'Topic :: Scientific/Engineering']
26
+ license = 'MIT'
27
+
28
+ dependencies = ['biopython', 'requests', 'xopen']
29
+
30
+ [project.scripts]
31
+ tssv = 'tssv.cli:main'
32
+ tannotate = 'tssv_extras.annotate:main'
tssv-1.2.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,41 @@
1
+ """Tests for the alignment modules."""
2
+ from Bio import Seq
3
+
4
+ from tssv import align_pair
5
+
6
+
7
+ class TestAlign(object):
8
+ def setup_method(self):
9
+ self._reference = 'GACTGTCGTGGGCTCTTACGCACATATTATAACTTTCATAAGTTTGTCAG'
10
+ self._reference_rc = Seq.reverse_complement(self._reference)
11
+
12
+ def test_align_pair_perfect(self):
13
+ result = align_pair(
14
+ self._reference, self._reference_rc, ('TTATGAAAGT', 'CGTAAGAGC'))
15
+ assert result == ((3, 34), (0, 11))
16
+
17
+ def test_align_pair_subst(self):
18
+ result = align_pair(
19
+ self._reference, self._reference_rc, ('TTATGTAAGT', 'CGTATGAGC'))
20
+ assert result == ((3, 34), (1, 11))
21
+
22
+ def test_align_pair_del(self):
23
+ result = align_pair(
24
+ self._reference, self._reference_rc, ('TTATGAAGT', 'CGTAGAGC'))
25
+ assert result == ((2, 34), (1, 11))
26
+
27
+ def test_align_pair_ins(self):
28
+ result = align_pair(
29
+ self._reference, self._reference_rc, ('TTATGACAAGT', 'CGTAACGAGC'))
30
+ assert result == ((3, 43), (1, 11))
31
+
32
+ def test_align_pair_del_punish_indel(self):
33
+ result = align_pair(
34
+ self._reference, self._reference_rc, ('TTATGAAGT', 'CGTAGAGC'), 3)
35
+ assert result == ((3, 35), (3, 11))
36
+
37
+ def test_align_pair_ins_punish_indel(self):
38
+ result = align_pair(
39
+ self._reference, self._reference_rc, ('TTATGACAAGT', 'CGTAACGAGC'),
40
+ 3)
41
+ assert result == ((4, 34), (3, 11))
@@ -0,0 +1,18 @@
1
+ """Tests for the annotation CLI."""
2
+ from io import StringIO
3
+
4
+ from fake_open import md5_check
5
+
6
+ from tssv_extras import annotate
7
+
8
+
9
+ class TestAnnotation(object):
10
+ """Test the annotation CLI."""
11
+ def setup_method(self):
12
+ self._input = open('data/m1_newalleles.csv')
13
+ self._output = StringIO()
14
+
15
+ def test_annotate(self):
16
+ annotate.annotate(self._input, 'TCCGTCCCATGCATGC', self._output, 0)
17
+ assert md5_check(
18
+ self._output.getvalue(), '596241e84c6d20a4155034236c234c51')
@@ -0,0 +1,25 @@
1
+ """Tests for the annotation CLI."""
2
+ from io import StringIO
3
+
4
+ from tssv.tssv import parse_library
5
+
6
+
7
+ class TestTSSV(object):
8
+ """Test the annotation CLI."""
9
+ def setup_method(self):
10
+ self._output = StringIO()
11
+
12
+ def test_parse_library(self):
13
+ library = parse_library(open('data/library.csv'), 0.1)
14
+ assert len(library) == 4
15
+ assert library['m3']['flanks'] == ['TTATTATCTCTC', 'CTATCGAGAGAGAT']
16
+ assert library['m3']['reg_exp'].pattern == '^(TTTAT){1,1}(GGGA){0,1}$'
17
+ assert library['m3']['thresholds'] == [2, 2]
18
+
19
+ def test_parse_library_without_pattern(self):
20
+ library = parse_library(open('data/library_lite.csv'), 0)
21
+ assert library['m3']['reg_exp'].pattern == '(?!x)x'
22
+
23
+ def test_parse_library_with_mismatches(self):
24
+ library = parse_library(open('data/library.csv'), 0.1, 1)
25
+ assert library['m3']['thresholds'] == [1, 1]
@@ -0,0 +1,38 @@
1
+ from argparse import FileType
2
+ from importlib.metadata import PackageNotFoundError, metadata
3
+ from os.path import exists
4
+ from re import split
5
+ from typing import Callable
6
+
7
+ from .align_pair import align_pair
8
+ from .sg_align import align
9
+
10
+
11
+ class ProtectedFileType(FileType):
12
+ def __call__(self, string):
13
+ if 'w' in self._mode and exists(string):
14
+ raise IOError('failed to create "{}": file exists.'.format(string))
15
+ return super(ProtectedFileType, self).__call__(string)
16
+
17
+
18
+ def _extract(key: str, delim: str = r'[^\s\S]', index: int = 0) -> str:
19
+ try:
20
+ value = metadata(__package__).get(key, '')
21
+ except PackageNotFoundError:
22
+ return '<NO DATA>'
23
+ return split(delim, value)[index]
24
+
25
+
26
+ def doc_split(func: Callable) -> str:
27
+ return func.__doc__.split('\n\n')[0]
28
+
29
+
30
+ _project = _extract('Name')
31
+ _version = _extract('Version')
32
+ _year = '2010-2026'
33
+ _author = _extract('Author-email', r'"', 1)
34
+ _email = _extract('Author-email', r'<|>', 1)
35
+ _description = _extract('Summary')
36
+ _copyright = f'Copyright (c) {_year} by {_author} <{_email}>'
37
+ _url = _extract('Project-URL', r', ', 1)
38
+ _info = f'{_project} version {_version}\n\n{_copyright}\nHomepage: {_url}'
@@ -0,0 +1,22 @@
1
+ from .sg_align import align
2
+
3
+
4
+ def align_pair(reference, reference_rc, pair, indel_score=1):
5
+ """Align a pair of markers to the forward reference sequence. The reverse
6
+ complement is used to align the second element of the pair (which is also
7
+ reverse complemented).
8
+
9
+ :arg str reference: Reference sequence to align to.
10
+ :arg str reference_rc: Reverse complement of the reference sequence.
11
+ :arg list pair: A pair (forward, reverse) of markers to align.
12
+ :arg int indel_score: Penalty score for insertions and deletions per
13
+ nucleotide.
14
+
15
+ :returns tuple: A tuple (score, position) of the best alignment.
16
+ """
17
+ left = align(reference, pair[0], indel_score)
18
+ right = align(reference_rc, pair[1], indel_score)
19
+
20
+ return (
21
+ (left['distance'], left['position']),
22
+ (right['distance'], len(reference) - right['position']))
tssv-1.2.1/tssv/cli.py ADDED
@@ -0,0 +1,64 @@
1
+ from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter
2
+ from sys import stdout
3
+ from xopen import xopen
4
+
5
+ from . import _copyright, _description, _info
6
+ from .tssv import tssv
7
+
8
+
9
+ def main():
10
+ """Main entry point."""
11
+ parser = ArgumentParser(
12
+ description=_description, epilog=_copyright,
13
+ formatter_class=RawDescriptionHelpFormatter)
14
+
15
+ parser.add_argument(
16
+ 'input_handle', metavar='INPUT',
17
+ help='a FASTA/FASTQ file')
18
+ parser.add_argument(
19
+ 'library_handle', metavar='LIBRARY', type=FileType('r'),
20
+ help='library of flanking sequences')
21
+ parser.add_argument(
22
+ '-m', dest='threshold', type=float, default=0.08,
23
+ help='mismatches per nucleotide (default=%(default)s)')
24
+ parser.add_argument(
25
+ '-M', dest='mismatches', type=int,
26
+ help='fixed number of mismatches, overrides -m (default=%(default)s)')
27
+ parser.add_argument(
28
+ '-n', dest='indel_score', type=int, default=1,
29
+ help='insertions and deletions are penalised this number of times '
30
+ 'more heavily than mismatches (default=%(default)s)')
31
+ parser.add_argument(
32
+ '-r', dest='report_handle', type=FileType('w'), default=stdout,
33
+ help='name of the report file')
34
+ parser.add_argument(
35
+ '-j', dest='json_report', action='store_true', default=False,
36
+ help='use json format for the output file')
37
+ parser.add_argument('-d', dest='path', type=str, help='output directory')
38
+ parser.add_argument(
39
+ '-a', dest='minimum', type=int, default=0,
40
+ help='minimum count per allele (default=%(default)s)')
41
+ parser.add_argument('-v', action='version', version=_info)
42
+
43
+ args = parser.parse_args()
44
+
45
+ # Have a little look in the input file to determine the file format.
46
+ with xopen(args.input_handle, 'r') as fin:
47
+ if next(fin).startswith('>'):
48
+ args.file_format = 'fasta'
49
+ else:
50
+ args.file_format = 'fastq'
51
+
52
+ # Now that we we know the file format, we can open the file again and
53
+ # have access to the full file content.
54
+ args.input_handle = xopen(args.input_handle)
55
+
56
+ try:
57
+ tssv(**{k: v for k, v in vars(args).items()
58
+ if k not in ('func', 'subcommand')})
59
+ except OSError as error:
60
+ parser.error(error)
61
+
62
+
63
+ if __name__ == '__main__':
64
+ main()
@@ -0,0 +1,104 @@
1
+ /*
2
+ * Library for semi-global alignment.
3
+ */
4
+ #include <string.h>
5
+ #include <stdlib.h>
6
+
7
+ #include "sgAlign.h"
8
+
9
+
10
+ /*! Calculate the minimum of two values.
11
+ *
12
+ * \param [in] a A value.
13
+ * \param [in] b A value.
14
+ *
15
+ * \return The minimum of `a` and `b`.
16
+ */
17
+ static inline int min_(int const a, int const b) {
18
+ if (a < b) {
19
+ return a;
20
+ }
21
+ return b;
22
+ }
23
+
24
+ /*! Initialise a matrix for semi-global alignment.
25
+ *
26
+ * \param [in,out] matrix The alignment matrix.
27
+ * \param [in] rows Number of rows in the matrix.
28
+ * \param [in] columns Number of columns in the matrix.
29
+ * \param [in] indelScore Penalty score for insertions and deletions.
30
+ */
31
+ void initMatrix_(
32
+ int *const matrix, size_t const rows, size_t const columns,
33
+ int const indelScore) {
34
+ int (*const matrix_)[columns] = (int (*const)[columns])matrix;
35
+
36
+ for (size_t i = 1; i < rows; i++) {
37
+ matrix_[i][0] = 0;
38
+ }
39
+ for (size_t i = 0; i < columns; i++) {
40
+ matrix_[0][i] = i * indelScore;
41
+ }
42
+ }
43
+
44
+ /*! Fill the alignment matrix.
45
+ *
46
+ * \param [in, out] matrix The alignment matrix.
47
+ * \param [in] rows Number of rows in the matrix.
48
+ * \param [in] columns Number of columns in the matrix.
49
+ * \param [in] seq1 The sequence to be aligned to.
50
+ * \param [in] seq2 The sequence to be aligned.
51
+ * \param [in] indelScore Penalty score for insertions and deletions.
52
+ */
53
+ void align_(
54
+ int *const matrix, size_t const rows, size_t const columns,
55
+ char const *const seq1, char const *const seq2, int const indelScore) {
56
+ int (*const matrix_)[columns] = (int (*const)[columns])matrix;
57
+
58
+ for (size_t r = 1; r < rows; r++) {
59
+ for (size_t c = 1; c < columns; c++) {
60
+ matrix_[r][c] = min_(
61
+ min_(matrix_[r - 1][c], matrix_[r][c - 1]) + indelScore,
62
+ matrix_[r - 1][c - 1] + (seq1[r - 1] != seq2[c - 1]));
63
+ }
64
+ }
65
+ }
66
+
67
+ /*! Find the minimum distance, ignoring a trailing gap in the sequence
68
+ * associated with the number of rows in an alignment matrix. If the minimum
69
+ * distance is found, also return the row number.
70
+ *
71
+ * \param [in] matrix A `rows` * `columns` matrix.
72
+ * \param [in] rows Number of rows in the matrix.
73
+ * \param [in] columns Number of columns in the matrix.
74
+ *
75
+ * \return The minimum distance and its row number.
76
+ */
77
+ Alignment findMin_(
78
+ int const *const matrix, size_t const rows, size_t const columns) {
79
+ int const (*const matrix_)[columns] = (int const (*const)[columns])matrix;
80
+ Alignment a = {columns - 1, 0};
81
+
82
+ for (size_t r = 1; r < rows; r++) {
83
+ if (matrix_[r][columns - 1] < a.distance) {
84
+ a.distance = matrix_[r][columns - 1];
85
+ a.position = r;
86
+ }
87
+ }
88
+ return a;
89
+ }
90
+
91
+ Alignment align(
92
+ char const *const seq1, char const *const seq2, int const indelScore) {
93
+ Alignment a;
94
+ size_t rows = strlen(seq1) + 1;
95
+ size_t columns = strlen(seq2) + 1;
96
+ int *matrix = (int *)malloc(rows * columns * sizeof(int));
97
+
98
+ initMatrix_(matrix, rows, columns, indelScore);
99
+ align_(matrix, rows, columns, seq1, seq2, indelScore);
100
+ a = findMin_(matrix, rows, columns);
101
+ free(matrix);
102
+
103
+ return a;
104
+ }
@@ -0,0 +1,18 @@
1
+ #pragma once
2
+
3
+ typedef struct {
4
+ size_t distance;
5
+ size_t position;
6
+ } Alignment;
7
+
8
+
9
+ /*! Do a semi-global alignment of `seq2` to `seq1`.
10
+ *
11
+ * \param [in] seq1 The sequence to be aligned to.
12
+ * \param [in] seq2 The sequence to be aligned.
13
+ * \param [in] indelScore Penalty score for insertions and deletions.
14
+ *
15
+ * \return The minimum distance and its row number.
16
+ */
17
+ Alignment align(
18
+ char const *const seq1, char const *const seq2, int const indelScore);
@@ -0,0 +1,62 @@
1
+ #include <Python.h>
2
+
3
+ #include "sgAlign.h"
4
+
5
+
6
+ /**
7
+ * Converter for alignment struct.
8
+ */
9
+ PyObject *pyAlignment(int distance, int position) {
10
+ return Py_BuildValue(
11
+ "{s: i, s: i}", "distance", distance, "position", position);
12
+ }
13
+
14
+ /**
15
+ * Wrapper for align function.
16
+ */
17
+ PyObject *pyAlign(PyObject *self, PyObject *args) {
18
+ char *seq1;
19
+ char *seq2;
20
+ int indel_score;
21
+ Alignment a;
22
+
23
+ if (!PyArg_ParseTuple(args, "ssi", &seq1, &seq2, &indel_score)) {
24
+ return NULL;
25
+ }
26
+
27
+ a = align(seq1, seq2, indel_score);
28
+
29
+ return pyAlignment(a.distance, a.position);
30
+ }
31
+
32
+ /*
33
+ * Module methods.
34
+ */
35
+ PyMethodDef pySgAlignMethods[] = {
36
+ {
37
+ "align", pyAlign, METH_VARARGS,
38
+ "Do a semi-global alignment of {seq2} to {seq1}.\n\n"
39
+ " :arg str seq1: The sequence to be aligned to.\n"
40
+ " :arg str seq2: The sequence to be aligned.\n"
41
+ " :arg int indel_score: Penalty score for insertions and deletions.\n\n"
42
+ " :returns dict alignment: The minimum distance and its row number.\n"},
43
+ {NULL, NULL, 0, NULL}
44
+ };
45
+
46
+ /*
47
+ * Module definition.
48
+ */
49
+ struct PyModuleDef sgAlignModule = {
50
+ PyModuleDef_HEAD_INIT,
51
+ "sg_align",
52
+ "Library for semi-global alignment.",
53
+ -1,
54
+ pySgAlignMethods
55
+ };
56
+
57
+ /**
58
+ * Module init function.
59
+ */
60
+ PyMODINIT_FUNC PyInit_sg_align(void) {
61
+ return PyModule_Create(&sgAlignModule);
62
+ }
@@ -0,0 +1,439 @@
1
+ from collections import defaultdict
2
+ from functools import reduce
3
+ from json import dump
4
+ from math import ceil
5
+ from os import mkdir
6
+ from re import compile as re_compile
7
+
8
+ from Bio import Seq, SeqIO
9
+
10
+ from .align_pair import align_pair
11
+
12
+
13
+ file_names = {
14
+ 'unknown': 'unknown.seq',
15
+ 'markers': 'markers.csv',
16
+ 'known': 'knownalleles.csv',
17
+ 'new': 'newalleles.csv',
18
+ 'nostart': 'nostart.csv',
19
+ 'noend': 'noend.csv',
20
+ 'summary': 'summary.csv'}
21
+ """Names of the global report files."""
22
+
23
+ marker_file_names = {
24
+ 'known': 'known.seq',
25
+ 'new': 'new.seq',
26
+ 'noend': 'noend.seq',
27
+ 'nostart': 'nostart.seq',
28
+ 'knownalleles': 'knownalleles.csv',
29
+ 'newalleles': 'newalleles.csv'}
30
+ """Names of the marker specific report files."""
31
+
32
+ headers = {
33
+ 'markers': 'name\tfPaired\trPaired\tfLeft\trLeft\tfRight\trRight\n',
34
+ 'allele': 'allele\ttotal\tforward\treverse\n',
35
+ 'nostartend': 'name\tforward\treverse\ttotal\n',
36
+ 'overview': 'name\tforward\treverse\ttotal\tallele\n'}
37
+ """Headers for various tables."""
38
+
39
+
40
+ def parse_library(library_handle, threshold, mismatches=0):
41
+ """Parse the library file and put the data in a nested dictionary
42
+ containing per marker the two forward flanking sequences, the two reverse
43
+ flanking sequences and a regular expression pattern object.
44
+
45
+ :arg stream library_handle: Open readable handle to a library file.
46
+ :arg float threshold: Number of allowed mismatches per nucleotide.
47
+ :arg int mismatches: If set, overrides the dynamic threshold calculation.
48
+
49
+ :returns dict: Nested dictionary containing library data.
50
+ """
51
+ library = {}
52
+ data = map(lambda x: x.strip().split('\t'), library_handle.readlines())
53
+
54
+ for i in data:
55
+ pattern = '(?!x)x' # This will never match anything.
56
+ if len(i) == 4:
57
+ pat = i[3].split()
58
+ pattern = '^{}$'.format(''.join(map(
59
+ lambda x: ('({}){{{},{}}}'.format(
60
+ pat[x], pat[x + 1], pat[x + 2])),
61
+ range(0, len(pat), 3))))
62
+
63
+ library[i[0]] = {
64
+ 'flanks': [i[1], Seq.reverse_complement(i[2])],
65
+ 'counts': [0, 0, 0, 0],
66
+ 'pair_match': [0, 0],
67
+ 'thresholds': [
68
+ mismatches or int(ceil(len(i[1]) * threshold)),
69
+ mismatches or int(ceil(len(i[2]) * threshold))],
70
+ 'reg_exp': re_compile(pattern),
71
+ 'new': defaultdict(lambda: [0, 0]),
72
+ 'known': defaultdict(lambda: [0, 0])}
73
+
74
+ return library
75
+
76
+
77
+ def open_files(path, markers):
78
+ """Make a directory structure and return a nested dictionary containing
79
+ open writable handles to the files in the newly created directory.
80
+
81
+ :arg str path: Name of the output folder.
82
+ :arg list markers: Name of the subfolders.
83
+
84
+ :returns dict: Nested dictionary containing writable file handles.
85
+ """
86
+ mkdir(path)
87
+ files = dict(map(lambda x:
88
+ (x, open('{}/{}'.format(path, file_names[x]), 'w')),
89
+ file_names))
90
+ for i in markers:
91
+ marker_path = '{}/{}'.format(path, i)
92
+
93
+ mkdir(marker_path)
94
+ files[i] = dict(map(lambda x:
95
+ (x, open('{}/{}'.format(
96
+ marker_path, marker_file_names[x]), 'w')),
97
+ marker_file_names))
98
+
99
+ return files
100
+
101
+
102
+ def write_table(table, header, handle):
103
+ """General function for saving tables.
104
+
105
+ :arg list table: Table content.
106
+ :arg str header: Table header.
107
+ :arg stream handle: Open writable handle to the output file.
108
+ """
109
+ if header:
110
+ handle.write(header)
111
+
112
+ if table:
113
+ for i in table:
114
+ handle.write('{}\n'.format('\t'.join(map(str, i))))
115
+
116
+
117
+ def rewrite(regular_expression, pattern):
118
+ """Make a pattern that matches a regular expression more human readable.
119
+
120
+ :arg object regular_expression: A compiled regular expression object.
121
+ :arg str pattern: A pattern that matches {regular_expression}.
122
+
123
+ :returns str: A human readable version of {pattern}.
124
+ """
125
+ new_pattern = ""
126
+ match = regular_expression.match(pattern)
127
+
128
+ regs = reduce(lambda x, y:
129
+ x if y == ((-1, -1), None) else
130
+ x[:-1] + [y] if x[-1][1] == y[1] else
131
+ x + [y],
132
+ map(lambda x: (match.regs[x], match.group(x)),
133
+ range(1, len(match.regs))), [((0, 0), None)])
134
+
135
+ for i in range(len(regs) - 1):
136
+ new_pattern += '{}({})'.format(
137
+ regs[i + 1][1], (
138
+ regs[i + 1][0][1] - regs[i][0][1]) //
139
+ (regs[i + 1][0][1] - regs[i + 1][0][0]))
140
+
141
+ return new_pattern
142
+
143
+
144
+ def allele_table(new_allele, minimum):
145
+ """Make an allele statistics table.
146
+
147
+ :arg dict new_allele: Dictionary with count data of new alleles.
148
+ :arg int minimum: Minimum count per allele.
149
+
150
+ :returns list: Allele statistics table.
151
+ """
152
+ result = []
153
+
154
+ for i in sorted(
155
+ new_allele, key=lambda x: sum(new_allele[x]), reverse=True):
156
+ if sum(new_allele[i]) < minimum:
157
+ break
158
+
159
+ result.append([i] + [sum(new_allele[i])] + new_allele[i])
160
+
161
+ return result
162
+
163
+
164
+ def summary_table(allele, minimum):
165
+ """Filter one of the global allele tables.
166
+
167
+ :arg list allele: List with count data of alleles.
168
+ :arg int minimum: Minimum count per allele.
169
+
170
+ :returns list: Allele statistics table.
171
+ """
172
+ return filter(lambda x: x[3] >= minimum, allele)
173
+
174
+
175
+ def make_tables(total, unrecognised, library, minimum):
176
+ """Make overview tables of the results.
177
+
178
+ :arg int total: Total number of reads in the FASTA file.
179
+ :arg int unrecognised: Number of unrecognised reads in.
180
+ :arg dict library: Nested dictionary containing library data.
181
+ :arg int minimum: Minimum count per allele.
182
+
183
+ :returns dict: A nested dictionary containing overview tables.
184
+ """
185
+ known = []
186
+ new = []
187
+ no_start = []
188
+ no_end = []
189
+
190
+ tables = {
191
+ 'library': map(lambda x:
192
+ [x] + library[x]['pair_match'] + library[x]['counts'],
193
+ library),
194
+ 'allele': defaultdict(dict)}
195
+
196
+ for i in library:
197
+ for j in library[i]['known']:
198
+ fr = library[i]['known'][j]
199
+ known.append([i] + fr + [sum(fr), j])
200
+ for j in library[i]['new']:
201
+ fr = library[i]['new'][j]
202
+ new.append([i] + fr + [sum(fr), j])
203
+
204
+ no_start.append([
205
+ i, library[i]['counts'][2] - library[i]['pair_match'][0],
206
+ library[i]['counts'][3] - library[i]['pair_match'][1]])
207
+ no_end.append([
208
+ i, library[i]['counts'][0] - library[i]['pair_match'][0],
209
+ library[i]['counts'][1] - library[i]['pair_match'][1]])
210
+
211
+ tables['allele'][i]['known'] = allele_table(
212
+ library[i]['known'], minimum)
213
+ tables['allele'][i]['new'] = allele_table(library[i]['new'], minimum)
214
+
215
+ tables['known'] = sorted(
216
+ summary_table(known, minimum), key=lambda x: (x[0], x[4]))
217
+ tables['new'] = sorted(
218
+ summary_table(new, minimum), key=lambda x: (x[0], x[3]), reverse=True)
219
+ tables['nostart'] = map(lambda x: x + [sum(x[1:])], sorted(no_start))
220
+ tables['noend'] = map(lambda x: x + [sum(x[1:])], sorted(no_end))
221
+
222
+ tables['summary'] = [
223
+ ['total reads', total],
224
+ ['matched pairs', sum(map(lambda x:
225
+ sum(library[x]['pair_match']), library))],
226
+ ['new alleles', sum(map(lambda x: x[3], tables['new']))],
227
+ ['new unique alleles', sum(map(lambda x:
228
+ len(allele_table(library[x]['new'],
229
+ minimum)),
230
+ library))],
231
+ ['no start', sum(map(lambda x: x[3], tables['nostart']))],
232
+ ['no end', sum(map(lambda x: x[3], tables['noend']))],
233
+ ['unrecognised reads', unrecognised]]
234
+
235
+ return tables
236
+
237
+
238
+ def make_text_report(tables, handle):
239
+ """Make an overview of the results.
240
+
241
+ :arg dict tables: A nested dictionary containing overview tables.
242
+ :arg stream handle: Open writable handle to the report file.
243
+ """
244
+ write_table(tables['summary'], '', handle)
245
+ handle.write('\n')
246
+
247
+ write_table(tables['library'], headers['markers'], handle)
248
+
249
+ for i in tables['allele']:
250
+ handle.write('\nknown alleles for marker {}:\n'.format(i))
251
+ write_table(tables['allele'][i]['known'], headers['allele'], handle)
252
+
253
+ mean_length = 0
254
+ sum_of_lengths = sum(map(lambda x:
255
+ len(x[0]) * x[1], tables['allele'][i]['new']))
256
+ number_of_alleles = sum(map(lambda x:
257
+ x[1], tables['allele'][i]['new']))
258
+ if number_of_alleles:
259
+ mean_length = sum_of_lengths / number_of_alleles
260
+
261
+ handle.write('\nnew alleles for marker {} (mean length {}):\n'.format(
262
+ i, mean_length))
263
+ write_table(tables['allele'][i]['new'], headers['allele'], handle)
264
+
265
+
266
+ def make_json_report(tables, handle):
267
+ """Make an overview of the results per marker, for downstream parsing.
268
+
269
+ :arg dict tables: A nested dictionary containing overview tables.
270
+ :arg stream handle: Open writable handle to the json file.
271
+ """
272
+
273
+ report = dict()
274
+
275
+ ## Parse the allele data
276
+ alleles = tables['allele']
277
+ head = headers['allele'].strip().split('\t')
278
+
279
+ # Add 'marker' section to the json report
280
+ report['marker'] = dict()
281
+ for marker, data in alleles.items():
282
+ # Add the individual marker to the report
283
+ report['marker'][marker] = dict()
284
+ known = [ {k:v for k,v in zip(head, mark)} for mark in data['known']]
285
+ new = [ {k:v for k,v in zip(head, mark)} for mark in data['new']]
286
+
287
+ report['marker'][marker]['allele'] = { 'known': known, 'new': new }
288
+
289
+ ## Parse the summary data
290
+ summary = {field:value for field,value in tables['summary']}
291
+ report['summary'] = summary
292
+
293
+ ## Parse library data
294
+ head = headers['markers'].strip().split('\t')
295
+
296
+ for i in tables['library']:
297
+ row = {field:value for field, value in zip(head, i)}
298
+ marker = row.pop('name')
299
+ report['marker'][marker]['library'] = row
300
+
301
+ dump(report, indent=True, fp=handle)
302
+
303
+
304
+ def write_files(tables, files):
305
+ """Write the overview tables to the appropriate files.
306
+
307
+ :arg dict tables: A nested dictionary containing overview tables.
308
+ :arg dict files: Nested dictionary containing writable file handles.
309
+ """
310
+ write_table(tables['summary'], '', files['summary'])
311
+ write_table(tables['library'], headers['markers'], files['markers'])
312
+ write_table(tables['known'], headers['overview'], files['known'])
313
+ write_table(tables['new'], headers['overview'], files['new'])
314
+ write_table(tables['nostart'], headers['nostartend'], files['nostart'])
315
+ write_table(tables['noend'], headers['nostartend'], files['noend'])
316
+
317
+ for i in tables['allele']:
318
+ write_table(
319
+ tables['allele'][i]['known'], headers['allele'],
320
+ files[i]['knownalleles'])
321
+ write_table(
322
+ tables['allele'][i]['new'], headers['allele'],
323
+ files[i]['newalleles'])
324
+
325
+
326
+ def tssv(
327
+ input_handle, library_handle, report_handle, json_report, path,
328
+ threshold, mismatches, minimum, indel_score, file_format):
329
+ """Do the short structural variation analysis.
330
+
331
+ :arg stream input_handle: Open readable handle to a FASTA file.
332
+ :arg stream library_handle: Open readable handle to a library file.
333
+ :arg stream report_handle: Open writable handle to the report file.
334
+ :arg str report_format: Format for the report file.
335
+ :arg str path: Name of the output folder.
336
+ :arg float threshold: Number of allowed mismatches per nucleotide.
337
+ :arg int mismatches: If set, overrides the dynamic threshold calculation.
338
+ :arg int minimum: Minimum count per allele.
339
+ :arg int indel_score: Penalty score for insertions and deletions per
340
+ nucleotide
341
+ :arg str file_format: File format of input_handle, either 'fasta' or 'fastq'.
342
+ """
343
+ total = 0
344
+ unrecognised = 0
345
+ library = parse_library(library_handle, threshold, mismatches)
346
+
347
+ if path:
348
+ files = open_files(path, library)
349
+
350
+ for record in SeqIO.parse(input_handle, file_format):
351
+ ref = [str(record.seq), Seq.reverse_complement(str(record.seq))]
352
+ ref_up = list(map(str.upper, ref))
353
+ total += 1
354
+ unknown = True
355
+
356
+ for i in library:
357
+ # Align against all-uppercase reference sequence.
358
+ alignments = (
359
+ align_pair(
360
+ ref_up[0], ref_up[1], library[i]['flanks'], indel_score),
361
+ align_pair(
362
+ ref_up[1], ref_up[0], library[i]['flanks'], indel_score))
363
+ matches = [False, False, False, False]
364
+ classification = ''
365
+
366
+ if alignments[0][0][0] <= library[i]['thresholds'][0]:
367
+ cutout = ref[0][
368
+ max(0, alignments[0][0][1]-len(library[i]['flanks'][0])):
369
+ alignments[0][0][1]]
370
+ if cutout.lower() != cutout:
371
+ library[i]['counts'][0] += 1
372
+ classification = 'noend'
373
+ matches[0] = True
374
+ if alignments[0][1][0] <= library[i]['thresholds'][1]:
375
+ cutout = ref[0][
376
+ alignments[0][1][1]:
377
+ alignments[0][1][1]+len(library[i]['flanks'][1])]
378
+ if cutout.lower() != cutout:
379
+ library[i]['counts'][2] += 1
380
+ classification = 'nostart'
381
+ matches[1] = True
382
+ if alignments[1][0][0] <= library[i]['thresholds'][0]:
383
+ cutout = ref[1][
384
+ max(0, alignments[1][0][1]-len(library[i]['flanks'][0])):
385
+ alignments[1][0][1]]
386
+ if cutout.lower() != cutout:
387
+ library[i]['counts'][1] += 1
388
+ classification = 'noend'
389
+ matches[2] = True
390
+ if alignments[1][1][0] <= library[i]['thresholds'][1]:
391
+ cutout = ref[1][
392
+ alignments[1][1][1]:
393
+ alignments[1][1][1]+len(library[i]['flanks'][1])]
394
+ if cutout.lower() != cutout:
395
+ library[i]['counts'][3] += 1
396
+ classification = 'nostart'
397
+ matches[3] = True
398
+
399
+ if (matches[0] and matches[1]) or (matches[2] and matches[3]):
400
+ hit = int(matches[2] and matches[3])
401
+
402
+ library[i]['pair_match'][hit] += 1
403
+ pat = ref_up[hit][alignments[hit][0][1]:alignments[hit][1][1]]
404
+
405
+ classification = 'new'
406
+ if library[i]['reg_exp'].match(pat):
407
+ classification = 'known'
408
+
409
+ library[i][classification][pat][hit] += 1
410
+
411
+ if classification:
412
+ unknown = False
413
+
414
+ if path:
415
+ SeqIO.write(
416
+ [record], files[i][classification], file_format)
417
+
418
+ if unknown:
419
+ unrecognised += 1
420
+
421
+ if path:
422
+ SeqIO.write([record], files['unknown'], file_format)
423
+
424
+ tables = make_tables(total, unrecognised, library, minimum)
425
+
426
+ # Make the known alleles more human readable.
427
+ for i in tables['allele']:
428
+ for j in tables['allele'][i]['known']:
429
+ j[0] = rewrite(library[i]['reg_exp'], j[0])
430
+ for i in tables['known']:
431
+ i[4] = rewrite(library[i[0]]['reg_exp'], i[4])
432
+
433
+ if path:
434
+ write_files(tables, files)
435
+
436
+ if json_report:
437
+ make_json_report(tables, report_handle)
438
+ else:
439
+ make_text_report(tables, report_handle)