pyaragorn 0.1.0__cp37-cp37m-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyaragorn might be problematic. Click here for more details.
- pyaragorn/CMakeLists.txt +1 -0
- pyaragorn/__init__.py +36 -0
- pyaragorn/lib.cpython-37m-darwin.so +0 -0
- pyaragorn/lib.pyx +715 -0
- pyaragorn/tests/__init__.py +10 -0
- pyaragorn/tests/data/CP001621.default.txt +95 -0
- pyaragorn/tests/data/CP001621.fna.gz +0 -0
- pyaragorn/tests/data/__init__.py +30 -0
- pyaragorn/tests/fasta.py +86 -0
- pyaragorn/tests/requirements.txt +1 -0
- pyaragorn/tests/test_doctest.py +93 -0
- pyaragorn/tests/test_rna_finder.py +69 -0
- pyaragorn-0.1.0.dist-info/METADATA +884 -0
- pyaragorn-0.1.0.dist-info/RECORD +16 -0
- pyaragorn-0.1.0.dist-info/WHEEL +5 -0
- pyaragorn-0.1.0.dist-info/licenses/COPYING +674 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
>CP001621.1 Mycoplasma mycoides subsp. capri str. GM12 transgenic clone tetM-lacZ, complete genome
|
|
2
|
+
31 genes found
|
|
3
|
+
1 tRNA-Leu c[87124,87207] 116.6 35 (tag)
|
|
4
|
+
gggggattggcggaattggcagacgcactagacttaggatctagcgtctttaacgtaagggttcaagtcccttatcccccacca
|
|
5
|
+
(((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))vvvvvvvvvvvv(((((ttttttt))))))))))))
|
|
6
|
+
2 tRNA-Lys c[87210,87285] 124.1 34 (ttt)
|
|
7
|
+
gactcgttagctcagccggtagagcaactggcttttaaccagtgggtccggggttcgaatccccgacgagtcacca
|
|
8
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
9
|
+
3 tRNA-Lys c[139027,139102] 117.7 34 (ctt)
|
|
10
|
+
gtctgattagcgcaactggcagagcaactgactcttaatcagtgggttgtgggttcgattcccacatcaggcacca
|
|
11
|
+
(((((((ss((.(dddddddd).))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
12
|
+
4 tmRNA [198037,198447] 109.7 87,173 AEKNEENFEMPAFMINNASAGANYMFA**
|
|
13
|
+
ggggatgtcatggatttgacaggatat|gtatcttggacgcgagttcgattctcgccatctccacca
|
|
14
|
+
(((((((dddddddddddd((((((((|))))))))vvv(((((ttttttt))))))))))))aaaa
|
|
15
|
+
5 tRNA-Ser [355003,355092] 119.7 35 (gct)
|
|
16
|
+
gggttaatactcaagttggtgaagaggacaccctgctaaggtgttaggtcggtctccggcgcgagggttcgagtccctcttaacccgcca
|
|
17
|
+
(((((((ss(((ddddddddddd)))((((((ccAAAcc))))))ss((((vvvvv))))ss(((((ttttttt))))))))))))
|
|
18
|
+
6 tRNA-Gly [371077,371150] 118.5 33 (tcc)
|
|
19
|
+
gcaggtgtagtttaatggcagaacttcagccttccaagctgattgtgagggttcgattcccttcacctgctcca
|
|
20
|
+
(((((((ss((((ddddddd))))s(((((ccAAAcc)))))vvvv(((((ttttttt))))))))))))
|
|
21
|
+
7 tRNA-Arg [459024,459100] 118.0 35 (tct)
|
|
22
|
+
gcccatgtagctcagtaggatagagcacgcgccttctaagcgtgaggtcggaagttcgagccttctcgtgggcacca
|
|
23
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
24
|
+
8 tRNA-Leu [521320,521404] 117.7 35 (caa)
|
|
25
|
+
gcccttttggcggaattggcagacgcattagactcaaaatctaacgaagaaattcgtatcggttcgaccccgataaagggcacca
|
|
26
|
+
(((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))((((vvvv))))s(((((ttttttt))))))))))))
|
|
27
|
+
9 tRNA-Leu c[611707,611795] 119.4 35 (taa)
|
|
28
|
+
ccccaagtggcggaataggtagacgcattggacttaaaatccaacgggcttaatatcctgtgccggttcaagtccggccttggggacca
|
|
29
|
+
(((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))((((vvvvvvvv))))s(((((ttttttt))))))))))))
|
|
30
|
+
10 tRNA-Lys c[611806,611881] 124.1 34 (ttt)
|
|
31
|
+
gactcgttagctcagccggtagagcaactggcttttaaccagtgggtccggggttcgaatccccgacgagtcacca
|
|
32
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
33
|
+
11 tRNA-Gln c[611886,611960] 116.0 33 (ttg)
|
|
34
|
+
tgggctatagccaagcggtaaggcaagggactttgactccctcatgcgccggttcgaatcctgctagcccaacca
|
|
35
|
+
(((((((ss(((ddddddddd)))s(((((ccAAAcc)))))vvvvv((.((ttttttt)).)))))))))
|
|
36
|
+
12 tRNA-Tyr c[611967,612050] 120.5 35 (gta)
|
|
37
|
+
ggaggggtagcgaagtggctaaacgcgggtggctgtaacccacttccttacggttcgggggttcgaatccctccccctccacca
|
|
38
|
+
(((((((ss(((ddddddddddd)))((((((ccAAAcc))))))vvvvvvvvvvv(((((ttttttt))))))))))))
|
|
39
|
+
13 tRNA-Thr c[612057,612132] 119.0 34 (agt)
|
|
40
|
+
gctgacttagctcagttggtagagcaattgactagtaatcaataggtcgaaggttcaaatcctttagtcagcacca
|
|
41
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
42
|
+
14 tRNA-Trp c[763746,763820] 118.0 33 (cca)
|
|
43
|
+
aggagagtagttcaatggtagaacgtcggtctccaaaaccgagcgttgagggttcgattcctttctctcctgcca
|
|
44
|
+
(((((((ss((((ddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
45
|
+
15 tRNA-SeC c[763858,763933] 118.2 34 (tca)
|
|
46
|
+
aggggcatagttcagtaggtagaacatcggtcttcaaaaccgagtgtcacgagttcgagtcttgttgcccctgcca
|
|
47
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
48
|
+
16 tRNA-His c[766239,766314] 113.8 34 (gtg)
|
|
49
|
+
gcgtaggtggtgaagtggttaacacatcaggttgtggctctgacatgcgcgggttcgatccccgttctacgcccca
|
|
50
|
+
(((((((ss(((dddddddddd)))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
51
|
+
17 tRNA-Ile c[778452,778528] 120.9 35 (gat)
|
|
52
|
+
cggaatatagctcagctggttagagcactccgctgataacggagaggtcgttggttcaagtccaattattccgacca
|
|
53
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
54
|
+
18 tRNA-Thr c[813006,813081] 121.2 34 (tgt)
|
|
55
|
+
gctgacttagctcagcaggcagagcaactgacttgtaatcagtaggtcgtaggttcgattcctatagtcagcacca
|
|
56
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
57
|
+
19 tRNA-Val c[813094,813169] 122.2 34 (tac)
|
|
58
|
+
ggagtgttagctcagctgggagagctcctgccttacaagcaggcggtcataggttcaagtcctatacactccacca
|
|
59
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
60
|
+
20 tRNA-Glu c[813177,813252] 119.7 34 (ttc)
|
|
61
|
+
ggcctgttggtgaagcggttaacacacacggttttcatccgtggacacacgggttcgaaccccgtacaggctacca
|
|
62
|
+
(((((((ss(((dddddddddd)))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
63
|
+
21 tRNA-Asn c[813260,813335] 123.7 34 (gtt)
|
|
64
|
+
ggctttttagctcagcaggtagagcaaccggctgttaaccggtttgtcacaggttcgagccctgtaaaagccgcca
|
|
65
|
+
(((((((ss((((dddddddd))))((((((ccAAAcc))))))vvvv(((((ttttttt))))))))))))
|
|
66
|
+
22 tRNA-Arg [856360,856436] 117.7 35 (acg)
|
|
67
|
+
gcgcccgtagatcaattggatagatcgcttgactacggatcaaaaggttgggggttcgagtccctccgggcgcacca
|
|
68
|
+
(((((((ss((((ddddddddd))))s.((((ccAAAcc)))).vvvvv(((((ttttttt))))))))))))
|
|
69
|
+
23 tRNA-Pro [856484,856560] 121.2 35 (tgg)
|
|
70
|
+
cgggaagtggctcagtttggtagagcattcggtttgggaccgaagggtcgcaggttcaaatcctgtcttcccgacca
|
|
71
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
72
|
+
24 tRNA-Ala [856571,856646] 121.1 34 (tgc)
|
|
73
|
+
gggcccttagctcagctgggagagcacctgccttgcacgcagggggtcgacggttcgatcccgttagggtccacca
|
|
74
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
75
|
+
25 tRNA-Met [856651,856727] 120.1 35 (cat)
|
|
76
|
+
ggcggggtagctcagttggttagagcgttcggttcatacccgaaaggtcgagagttcaactctctcccccgctacca
|
|
77
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
78
|
+
26 tRNA-Met [856739,856815] 122.2 35 (cat)
|
|
79
|
+
ggacctttagctcagttggttagagcatccggctcataaccggacggtcattggttcaagtccaataaggtccacca
|
|
80
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
81
|
+
27 tRNA-Ser [856858,856950] 117.2 37 (tga)
|
|
82
|
+
ggaagattacccaagtccggctgaagggatcggtcttgaaaaccgagagtcggggaaacccgagcgggggttcgaatccctcatcttccgcca
|
|
83
|
+
(((((((ss(((.ddddddddddd.)))s(((((ccAAAcc)))))sss(((((vvvv)))))ss(((((ttttttt))))))))))))
|
|
84
|
+
28 tRNA-Met [856973,857048] 123.7 34 (cat)
|
|
85
|
+
cgcggggtagagcagttggtagctcgccgggctcataacccggaggccgcaggttcgagtcctgcccccgcaacca
|
|
86
|
+
.((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))).
|
|
87
|
+
29 tRNA-Asp [857051,857127] 123.9 35 (gtc)
|
|
88
|
+
ggccccatagcgaagttggttatcgcgcctccctgtcacggaggagatcacgggttcgagtcccgttggggtcgcca
|
|
89
|
+
(((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
90
|
+
30 tRNA-Phe [857136,857211] 119.3 34 (gaa)
|
|
91
|
+
ggtcgtgtagctcagtcggtagagcagcagactgaagctctgcgtgtcggcggttcaattccgtccacgaccacca
|
|
92
|
+
(((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
|
|
93
|
+
31 tRNA-Cys [975123,975197] 117.5 34 (gca)
|
|
94
|
+
ggcaatatagccaagcggctaaggcatgggtctgcaacaccctgatcgtcggttcgaatccgactattgcctcca
|
|
95
|
+
(((((((ss(((dddddddddd)))s.((((ccAAAcc)))).vvvv(((((ttttttt))))))))))))
|
|
Binary file
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import gzip
|
|
3
|
+
import io
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
try:
|
|
7
|
+
from importlib.resources import files
|
|
8
|
+
except ImportError:
|
|
9
|
+
from importlib_resources import files # type: ignore
|
|
10
|
+
except ImportError:
|
|
11
|
+
files = None # type: ignore
|
|
12
|
+
|
|
13
|
+
from ..fasta import parse, zopen
|
|
14
|
+
|
|
15
|
+
@contextlib.contextmanager
|
|
16
|
+
def load(name, mode="rb"):
|
|
17
|
+
with zopen(files(__name__).joinpath(name), mode=mode) as src:
|
|
18
|
+
yield src
|
|
19
|
+
|
|
20
|
+
def load_record(name):
|
|
21
|
+
with load(name, mode="r") as f:
|
|
22
|
+
return next(parse(f))
|
|
23
|
+
|
|
24
|
+
def load_records(name):
|
|
25
|
+
with load(name, mode="r") as f:
|
|
26
|
+
return list(parse(f))
|
|
27
|
+
|
|
28
|
+
def load_text(name):
|
|
29
|
+
with load(name, mode="r") as f:
|
|
30
|
+
return f.read()
|
pyaragorn/tests/fasta.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import bz2
|
|
2
|
+
import collections
|
|
3
|
+
import contextlib
|
|
4
|
+
import io
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_BZ2_MAGIC = b"BZh"
|
|
10
|
+
_GZIP_MAGIC = b"\x1f\x8b"
|
|
11
|
+
_XZ_MAGIC = b"\xfd7zXZ"
|
|
12
|
+
_LZ4_MAGIC = b"\x04\x22\x4d\x18"
|
|
13
|
+
_ZSTD_MAGIC = b"\x28\xb5\x2f\xfd"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@contextlib.contextmanager
|
|
17
|
+
def zopen(file, mode='r', encoding=None, errors=None, newline=None):
|
|
18
|
+
with contextlib.ExitStack() as ctx:
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
path = os.fsencode(file)
|
|
22
|
+
file = ctx.enter_context(open(path, mode="rb"))
|
|
23
|
+
except TypeError:
|
|
24
|
+
raise
|
|
25
|
+
|
|
26
|
+
peek = file.peek()
|
|
27
|
+
if peek.startswith(_GZIP_MAGIC):
|
|
28
|
+
try:
|
|
29
|
+
from isal import igzip as gzip
|
|
30
|
+
except ImportError:
|
|
31
|
+
import gzip # type: ignore
|
|
32
|
+
file = ctx.enter_context(gzip.open(file, mode="rb"))
|
|
33
|
+
elif peek.startswith(_BZ2_MAGIC):
|
|
34
|
+
import bz2
|
|
35
|
+
file = ctx.enter_context(bz2.open(file, mode="rb"))
|
|
36
|
+
elif peek.startswith(_XZ_MAGIC):
|
|
37
|
+
import lzma
|
|
38
|
+
file = ctx.enter_context(lzma.open(file, mode="rb"))
|
|
39
|
+
elif peek.startswith(_LZ4_MAGIC):
|
|
40
|
+
try:
|
|
41
|
+
import lz4.frame
|
|
42
|
+
except ImportError as err:
|
|
43
|
+
raise RuntimeError("File compression is LZ4 but lz4 is not installed") from err
|
|
44
|
+
file = ctx.enter_context(lz4.frame.open(file))
|
|
45
|
+
elif peek.startswith(_ZSTD_MAGIC):
|
|
46
|
+
try:
|
|
47
|
+
import zstandard
|
|
48
|
+
except ImportError as err:
|
|
49
|
+
raise RuntimeError("File compression is ZSTD but zstandard is not installed") from err
|
|
50
|
+
decompressor = zstandard.ZstdDecompressor()
|
|
51
|
+
file = decompressor.stream_reader(file)
|
|
52
|
+
if mode == "r":
|
|
53
|
+
file = io.TextIOWrapper(file, encoding=encoding, errors=errors, newline=newline)
|
|
54
|
+
yield file
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Record(collections.namedtuple("Record", ["id", "seq", "description"])):
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse(path):
|
|
62
|
+
with contextlib.ExitStack() as ctx:
|
|
63
|
+
try:
|
|
64
|
+
path = os.fsencode(path)
|
|
65
|
+
file = ctx.enter_context(zopen(path, "r"))
|
|
66
|
+
except TypeError:
|
|
67
|
+
file = path
|
|
68
|
+
|
|
69
|
+
# parse file
|
|
70
|
+
id_ = None
|
|
71
|
+
seq = []
|
|
72
|
+
for line in file:
|
|
73
|
+
l = line.strip()
|
|
74
|
+
if line.startswith(">"):
|
|
75
|
+
if id_ is not None:
|
|
76
|
+
yield Record(id_, "".join(seq), desc)
|
|
77
|
+
fields = line[1:].split(maxsplit=1)
|
|
78
|
+
id_ = fields[0] if len(fields) > 0 else ""
|
|
79
|
+
desc = fields[1] if len(fields) > 1 else ""
|
|
80
|
+
seq = []
|
|
81
|
+
elif l:
|
|
82
|
+
seq.append(l)
|
|
83
|
+
if id_ is not None:
|
|
84
|
+
yield Record(id_, "".join(seq), desc)
|
|
85
|
+
elif seq:
|
|
86
|
+
raise ValueError("not in FASTA format")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
importlib-resources ; python_version < '3.9'
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
"""Test doctest contained tests in every file of the module.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import configparser
|
|
6
|
+
import doctest
|
|
7
|
+
import importlib
|
|
8
|
+
import json
|
|
9
|
+
import gzip
|
|
10
|
+
import os
|
|
11
|
+
import pkgutil
|
|
12
|
+
import re
|
|
13
|
+
import shutil
|
|
14
|
+
import sys
|
|
15
|
+
import types
|
|
16
|
+
import warnings
|
|
17
|
+
from unittest import mock
|
|
18
|
+
|
|
19
|
+
import pyaragorn
|
|
20
|
+
|
|
21
|
+
from .fasta import parse
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_tests_from_module(tests, module, globs, setUp=None, tearDown=None):
|
|
25
|
+
"""Load tests from module, iterating through submodules."""
|
|
26
|
+
for attr in (getattr(module, x) for x in dir(module) if not x.startswith("_")):
|
|
27
|
+
if isinstance(attr, types.ModuleType):
|
|
28
|
+
suite = doctest.DocTestSuite(
|
|
29
|
+
attr,
|
|
30
|
+
globs,
|
|
31
|
+
setUp=setUp,
|
|
32
|
+
tearDown=tearDown,
|
|
33
|
+
optionflags=+doctest.ELLIPSIS,
|
|
34
|
+
)
|
|
35
|
+
tests.addTests(suite)
|
|
36
|
+
return tests
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def load_tests(loader, tests, ignore):
|
|
40
|
+
"""`load_test` function used by unittest to find the doctests."""
|
|
41
|
+
_current_cwd = os.getcwd()
|
|
42
|
+
# demonstrate how to load sequences with Biopython without requiring Biopython
|
|
43
|
+
Bio = mock.Mock()
|
|
44
|
+
Bio.SeqIO = mock.Mock()
|
|
45
|
+
Bio.SeqIO.read = lambda file, format: next(parse(file))
|
|
46
|
+
|
|
47
|
+
# load sample record
|
|
48
|
+
data_folder = os.path.realpath(os.path.join(__file__, os.path.pardir, "data"))
|
|
49
|
+
with gzip.open(os.path.join(data_folder, "CP001621.fna.gz"), "rt") as f:
|
|
50
|
+
record = next(parse(f))
|
|
51
|
+
|
|
52
|
+
def setUp(self):
|
|
53
|
+
warnings.simplefilter("ignore")
|
|
54
|
+
os.chdir(data_folder)
|
|
55
|
+
sys.modules["Bio"] = Bio
|
|
56
|
+
sys.modules["Bio.SeqIO"] = Bio.SeqIO
|
|
57
|
+
|
|
58
|
+
def tearDown(self):
|
|
59
|
+
os.chdir(_current_cwd)
|
|
60
|
+
warnings.simplefilter(warnings.defaultaction)
|
|
61
|
+
sys.modules.pop("Bio")
|
|
62
|
+
sys.modules.pop("Bio.SeqIO")
|
|
63
|
+
|
|
64
|
+
# doctests are not compatible with `green`, so we may want to bail out
|
|
65
|
+
# early if `green` is running the tests
|
|
66
|
+
if sys.argv[0].endswith("green"):
|
|
67
|
+
return tests
|
|
68
|
+
|
|
69
|
+
# recursively traverse all library submodules and load tests from them
|
|
70
|
+
packages = [None, pyaragorn]
|
|
71
|
+
for pkg in iter(packages.pop, None):
|
|
72
|
+
for (_, subpkgname, subispkg) in pkgutil.walk_packages(pkg.__path__):
|
|
73
|
+
# do not import __main__ module to avoid side effects!
|
|
74
|
+
if subpkgname == "__main__" or subpkgname.startswith("tests") or subpkgname.startswith("cli"):
|
|
75
|
+
continue
|
|
76
|
+
# import the submodule and add it to the tests
|
|
77
|
+
module = importlib.import_module(".".join([pkg.__name__, subpkgname]))
|
|
78
|
+
globs = dict(pyaragorn=pyaragorn, json=json, gzip=gzip, Bio=Bio, record=record, **module.__dict__)
|
|
79
|
+
tests.addTests(
|
|
80
|
+
doctest.DocTestSuite(
|
|
81
|
+
module,
|
|
82
|
+
globs=globs,
|
|
83
|
+
setUp=setUp,
|
|
84
|
+
tearDown=tearDown,
|
|
85
|
+
optionflags=+doctest.ELLIPSIS,
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
# if the submodule is a package, we need to process its submodules
|
|
89
|
+
# as well, so we add it to the package queue
|
|
90
|
+
if subispkg and subpkgname != "tests":
|
|
91
|
+
packages.append(module)
|
|
92
|
+
|
|
93
|
+
return tests
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import re
|
|
3
|
+
import unittest
|
|
4
|
+
|
|
5
|
+
from .. import RNAFinder, TMRNAGene, TRNAGene
|
|
6
|
+
from . import data
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_TRNA_RX = re.compile(r"^(\d+)\s+tRNA-([A-Za-z]{3})\s+(c?)\[(\d+),(\d+)\]\s+([\d.]+)\s+(\d+)\s+\(([a-z]{2,4})\)")
|
|
10
|
+
_TMRNA_RX = re.compile(r"^(\d+)\s+tmRNA\s+(c?)\[(\d+),(\d+)]\s+([\d.]+)\s+(\d+),(\d+)\s+([A-Z\*]+)")
|
|
11
|
+
|
|
12
|
+
def batched(iterable, n, *, strict=False):
|
|
13
|
+
# batched('ABCDEFG', 3) → ABC DEF G
|
|
14
|
+
if n < 1:
|
|
15
|
+
raise ValueError('n must be at least one')
|
|
16
|
+
iterator = iter(iterable)
|
|
17
|
+
for batch in iter(lambda: tuple(itertools.islice(iterator, n)), ()):
|
|
18
|
+
if strict and len(batch) != n:
|
|
19
|
+
raise ValueError('batched(): incomplete batch')
|
|
20
|
+
yield batch
|
|
21
|
+
|
|
22
|
+
class TestRNAFinder(unittest.TestCase):
|
|
23
|
+
|
|
24
|
+
def test_default(self):
|
|
25
|
+
record = data.load_record("CP001621.fna.gz")
|
|
26
|
+
lines = data.load_text("CP001621.default.txt").splitlines()
|
|
27
|
+
|
|
28
|
+
finder = RNAFinder(translation_table=11)
|
|
29
|
+
genes = finder.find_rna(str(record.seq))
|
|
30
|
+
|
|
31
|
+
for gene, expected in itertools.zip_longest(genes, batched(lines[2:], 3)):
|
|
32
|
+
self.assertIsNotNone(gene)
|
|
33
|
+
self.assertIsNotNone(expected)
|
|
34
|
+
result, seq, ss = expected
|
|
35
|
+
if gene.type == "tRNA":
|
|
36
|
+
matched = _TRNA_RX.match(result)
|
|
37
|
+
_, aa, complement, begin, end, energy, offset, anticodon = matched.groups()
|
|
38
|
+
self.assertEqual(gene.amino_acid, aa)
|
|
39
|
+
self.assertEqual(gene.begin, int(begin))
|
|
40
|
+
self.assertEqual(gene.end, int(end))
|
|
41
|
+
self.assertEqual(gene.anticodon_offset, int(offset))
|
|
42
|
+
self.assertEqual(gene.anticodon_length, len(anticodon))
|
|
43
|
+
self.assertEqual(gene.anticodon, anticodon)
|
|
44
|
+
self.assertEqual(gene.strand, -1 if complement == "c" else +1)
|
|
45
|
+
self.assertAlmostEqual(gene.energy, float(energy), places=1)
|
|
46
|
+
self.assertEqual(gene.sequence().lower(), seq)
|
|
47
|
+
elif gene.type == "tmRNA":
|
|
48
|
+
matched = _TMRNA_RX.match(result)
|
|
49
|
+
_, complement, begin, end, energy, cds_start, cds_end, peptide = matched.groups()
|
|
50
|
+
self.assertEqual(gene.begin, int(begin))
|
|
51
|
+
self.assertEqual(gene.end, int(end))
|
|
52
|
+
self.assertEqual(gene.cds_offset, int(cds_start))
|
|
53
|
+
self.assertEqual(gene.cds_offset + gene.cds_length, int(cds_end))
|
|
54
|
+
self.assertEqual(gene.peptide(), peptide)
|
|
55
|
+
self.assertEqual(gene.strand, -1 if complement == "c" else +1)
|
|
56
|
+
self.assertAlmostEqual(gene.energy, float(energy), places=1)
|
|
57
|
+
# self.assertEqual(gene.sequence().lower(), seq) # TODO
|
|
58
|
+
|
|
59
|
+
def test_trna(self):
|
|
60
|
+
record = data.load_record("CP001621.fna.gz")
|
|
61
|
+
finder = RNAFinder(translation_table=11, tmrna=False, trna=True)
|
|
62
|
+
for gene in finder.find_rna(str(record.seq)):
|
|
63
|
+
self.assertIsInstance(gene, TRNAGene)
|
|
64
|
+
|
|
65
|
+
def test_tmrna(self):
|
|
66
|
+
record = data.load_record("CP001621.fna.gz")
|
|
67
|
+
finder = RNAFinder(translation_table=11, tmrna=True, trna=False)
|
|
68
|
+
for gene in finder.find_rna(str(record.seq)):
|
|
69
|
+
self.assertIsInstance(gene, TMRNAGene)
|