pyaragorn 0.1.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyaragorn might be problematic. Click here for more details.

@@ -0,0 +1,10 @@
1
+ from . import (
2
+ test_rna_finder,
3
+ test_doctest,
4
+ )
5
+
6
+
7
+ def load_tests(loader, suite, pattern):
8
+ suite.addTests(loader.loadTestsFromModule(test_rna_finder))
9
+ test_doctest.load_tests(loader, suite, pattern)
10
+ return suite
@@ -0,0 +1,95 @@
1
+ >CP001621.1 Mycoplasma mycoides subsp. capri str. GM12 transgenic clone tetM-lacZ, complete genome
2
+ 31 genes found
3
+ 1 tRNA-Leu c[87124,87207] 116.6 35 (tag)
4
+ gggggattggcggaattggcagacgcactagacttaggatctagcgtctttaacgtaagggttcaagtcccttatcccccacca
5
+ (((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))vvvvvvvvvvvv(((((ttttttt))))))))))))
6
+ 2 tRNA-Lys c[87210,87285] 124.1 34 (ttt)
7
+ gactcgttagctcagccggtagagcaactggcttttaaccagtgggtccggggttcgaatccccgacgagtcacca
8
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
9
+ 3 tRNA-Lys c[139027,139102] 117.7 34 (ctt)
10
+ gtctgattagcgcaactggcagagcaactgactcttaatcagtgggttgtgggttcgattcccacatcaggcacca
11
+ (((((((ss((.(dddddddd).))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
12
+ 4 tmRNA [198037,198447] 109.7 87,173 AEKNEENFEMPAFMINNASAGANYMFA**
13
+ ggggatgtcatggatttgacaggatat|gtatcttggacgcgagttcgattctcgccatctccacca
14
+ (((((((dddddddddddd((((((((|))))))))vvv(((((ttttttt))))))))))))aaaa
15
+ 5 tRNA-Ser [355003,355092] 119.7 35 (gct)
16
+ gggttaatactcaagttggtgaagaggacaccctgctaaggtgttaggtcggtctccggcgcgagggttcgagtccctcttaacccgcca
17
+ (((((((ss(((ddddddddddd)))((((((ccAAAcc))))))ss((((vvvvv))))ss(((((ttttttt))))))))))))
18
+ 6 tRNA-Gly [371077,371150] 118.5 33 (tcc)
19
+ gcaggtgtagtttaatggcagaacttcagccttccaagctgattgtgagggttcgattcccttcacctgctcca
20
+ (((((((ss((((ddddddd))))s(((((ccAAAcc)))))vvvv(((((ttttttt))))))))))))
21
+ 7 tRNA-Arg [459024,459100] 118.0 35 (tct)
22
+ gcccatgtagctcagtaggatagagcacgcgccttctaagcgtgaggtcggaagttcgagccttctcgtgggcacca
23
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
24
+ 8 tRNA-Leu [521320,521404] 117.7 35 (caa)
25
+ gcccttttggcggaattggcagacgcattagactcaaaatctaacgaagaaattcgtatcggttcgaccccgataaagggcacca
26
+ (((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))((((vvvv))))s(((((ttttttt))))))))))))
27
+ 9 tRNA-Leu c[611707,611795] 119.4 35 (taa)
28
+ ccccaagtggcggaataggtagacgcattggacttaaaatccaacgggcttaatatcctgtgccggttcaagtccggccttggggacca
29
+ (((((((ss(((ddddddddddd)))s(((((ccAAAcc)))))((((vvvvvvvv))))s(((((ttttttt))))))))))))
30
+ 10 tRNA-Lys c[611806,611881] 124.1 34 (ttt)
31
+ gactcgttagctcagccggtagagcaactggcttttaaccagtgggtccggggttcgaatccccgacgagtcacca
32
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
33
+ 11 tRNA-Gln c[611886,611960] 116.0 33 (ttg)
34
+ tgggctatagccaagcggtaaggcaagggactttgactccctcatgcgccggttcgaatcctgctagcccaacca
35
+ (((((((ss(((ddddddddd)))s(((((ccAAAcc)))))vvvvv((.((ttttttt)).)))))))))
36
+ 12 tRNA-Tyr c[611967,612050] 120.5 35 (gta)
37
+ ggaggggtagcgaagtggctaaacgcgggtggctgtaacccacttccttacggttcgggggttcgaatccctccccctccacca
38
+ (((((((ss(((ddddddddddd)))((((((ccAAAcc))))))vvvvvvvvvvv(((((ttttttt))))))))))))
39
+ 13 tRNA-Thr c[612057,612132] 119.0 34 (agt)
40
+ gctgacttagctcagttggtagagcaattgactagtaatcaataggtcgaaggttcaaatcctttagtcagcacca
41
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
42
+ 14 tRNA-Trp c[763746,763820] 118.0 33 (cca)
43
+ aggagagtagttcaatggtagaacgtcggtctccaaaaccgagcgttgagggttcgattcctttctctcctgcca
44
+ (((((((ss((((ddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
45
+ 15 tRNA-SeC c[763858,763933] 118.2 34 (tca)
46
+ aggggcatagttcagtaggtagaacatcggtcttcaaaaccgagtgtcacgagttcgagtcttgttgcccctgcca
47
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
48
+ 16 tRNA-His c[766239,766314] 113.8 34 (gtg)
49
+ gcgtaggtggtgaagtggttaacacatcaggttgtggctctgacatgcgcgggttcgatccccgttctacgcccca
50
+ (((((((ss(((dddddddddd)))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
51
+ 17 tRNA-Ile c[778452,778528] 120.9 35 (gat)
52
+ cggaatatagctcagctggttagagcactccgctgataacggagaggtcgttggttcaagtccaattattccgacca
53
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
54
+ 18 tRNA-Thr c[813006,813081] 121.2 34 (tgt)
55
+ gctgacttagctcagcaggcagagcaactgacttgtaatcagtaggtcgtaggttcgattcctatagtcagcacca
56
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
57
+ 19 tRNA-Val c[813094,813169] 122.2 34 (tac)
58
+ ggagtgttagctcagctgggagagctcctgccttacaagcaggcggtcataggttcaagtcctatacactccacca
59
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
60
+ 20 tRNA-Glu c[813177,813252] 119.7 34 (ttc)
61
+ ggcctgttggtgaagcggttaacacacacggttttcatccgtggacacacgggttcgaaccccgtacaggctacca
62
+ (((((((ss(((dddddddddd)))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
63
+ 21 tRNA-Asn c[813260,813335] 123.7 34 (gtt)
64
+ ggctttttagctcagcaggtagagcaaccggctgttaaccggtttgtcacaggttcgagccctgtaaaagccgcca
65
+ (((((((ss((((dddddddd))))((((((ccAAAcc))))))vvvv(((((ttttttt))))))))))))
66
+ 22 tRNA-Arg [856360,856436] 117.7 35 (acg)
67
+ gcgcccgtagatcaattggatagatcgcttgactacggatcaaaaggttgggggttcgagtccctccgggcgcacca
68
+ (((((((ss((((ddddddddd))))s.((((ccAAAcc)))).vvvvv(((((ttttttt))))))))))))
69
+ 23 tRNA-Pro [856484,856560] 121.2 35 (tgg)
70
+ cgggaagtggctcagtttggtagagcattcggtttgggaccgaagggtcgcaggttcaaatcctgtcttcccgacca
71
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
72
+ 24 tRNA-Ala [856571,856646] 121.1 34 (tgc)
73
+ gggcccttagctcagctgggagagcacctgccttgcacgcagggggtcgacggttcgatcccgttagggtccacca
74
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
75
+ 25 tRNA-Met [856651,856727] 120.1 35 (cat)
76
+ ggcggggtagctcagttggttagagcgttcggttcatacccgaaaggtcgagagttcaactctctcccccgctacca
77
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
78
+ 26 tRNA-Met [856739,856815] 122.2 35 (cat)
79
+ ggacctttagctcagttggttagagcatccggctcataaccggacggtcattggttcaagtccaataaggtccacca
80
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
81
+ 27 tRNA-Ser [856858,856950] 117.2 37 (tga)
82
+ ggaagattacccaagtccggctgaagggatcggtcttgaaaaccgagagtcggggaaacccgagcgggggttcgaatccctcatcttccgcca
83
+ (((((((ss(((.ddddddddddd.)))s(((((ccAAAcc)))))sss(((((vvvv)))))ss(((((ttttttt))))))))))))
84
+ 28 tRNA-Met [856973,857048] 123.7 34 (cat)
85
+ cgcggggtagagcagttggtagctcgccgggctcataacccggaggccgcaggttcgagtcctgcccccgcaacca
86
+ .((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))).
87
+ 29 tRNA-Asp [857051,857127] 123.9 35 (gtc)
88
+ ggccccatagcgaagttggttatcgcgcctccctgtcacggaggagatcacgggttcgagtcccgttggggtcgcca
89
+ (((((((ss((((ddddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
90
+ 30 tRNA-Phe [857136,857211] 119.3 34 (gaa)
91
+ ggtcgtgtagctcagtcggtagagcagcagactgaagctctgcgtgtcggcggttcaattccgtccacgaccacca
92
+ (((((((ss((((dddddddd))))s(((((ccAAAcc)))))vvvvv(((((ttttttt))))))))))))
93
+ 31 tRNA-Cys [975123,975197] 117.5 34 (gca)
94
+ ggcaatatagccaagcggctaaggcatgggtctgcaacaccctgatcgtcggttcgaatccgactattgcctcca
95
+ (((((((ss(((dddddddddd)))s.((((ccAAAcc)))).vvvv(((((ttttttt))))))))))))
Binary file
@@ -0,0 +1,30 @@
1
+ import contextlib
2
+ import gzip
3
+ import io
4
+
5
+ try:
6
+ try:
7
+ from importlib.resources import files
8
+ except ImportError:
9
+ from importlib_resources import files # type: ignore
10
+ except ImportError:
11
+ files = None # type: ignore
12
+
13
+ from ..fasta import parse, zopen
14
+
15
+ @contextlib.contextmanager
16
+ def load(name, mode="rb"):
17
+ with zopen(files(__name__).joinpath(name), mode=mode) as src:
18
+ yield src
19
+
20
+ def load_record(name):
21
+ with load(name, mode="r") as f:
22
+ return next(parse(f))
23
+
24
+ def load_records(name):
25
+ with load(name, mode="r") as f:
26
+ return list(parse(f))
27
+
28
+ def load_text(name):
29
+ with load(name, mode="r") as f:
30
+ return f.read()
@@ -0,0 +1,86 @@
1
+ import bz2
2
+ import collections
3
+ import contextlib
4
+ import io
5
+ import os
6
+
7
+
8
+
9
+ _BZ2_MAGIC = b"BZh"
10
+ _GZIP_MAGIC = b"\x1f\x8b"
11
+ _XZ_MAGIC = b"\xfd7zXZ"
12
+ _LZ4_MAGIC = b"\x04\x22\x4d\x18"
13
+ _ZSTD_MAGIC = b"\x28\xb5\x2f\xfd"
14
+
15
+
16
+ @contextlib.contextmanager
17
+ def zopen(file, mode='r', encoding=None, errors=None, newline=None):
18
+ with contextlib.ExitStack() as ctx:
19
+
20
+ try:
21
+ path = os.fsencode(file)
22
+ file = ctx.enter_context(open(path, mode="rb"))
23
+ except TypeError:
24
+ raise
25
+
26
+ peek = file.peek()
27
+ if peek.startswith(_GZIP_MAGIC):
28
+ try:
29
+ from isal import igzip as gzip
30
+ except ImportError:
31
+ import gzip # type: ignore
32
+ file = ctx.enter_context(gzip.open(file, mode="rb"))
33
+ elif peek.startswith(_BZ2_MAGIC):
34
+ import bz2
35
+ file = ctx.enter_context(bz2.open(file, mode="rb"))
36
+ elif peek.startswith(_XZ_MAGIC):
37
+ import lzma
38
+ file = ctx.enter_context(lzma.open(file, mode="rb"))
39
+ elif peek.startswith(_LZ4_MAGIC):
40
+ try:
41
+ import lz4.frame
42
+ except ImportError as err:
43
+ raise RuntimeError("File compression is LZ4 but lz4 is not installed") from err
44
+ file = ctx.enter_context(lz4.frame.open(file))
45
+ elif peek.startswith(_ZSTD_MAGIC):
46
+ try:
47
+ import zstandard
48
+ except ImportError as err:
49
+ raise RuntimeError("File compression is ZSTD but zstandard is not installed") from err
50
+ decompressor = zstandard.ZstdDecompressor()
51
+ file = decompressor.stream_reader(file)
52
+ if mode == "r":
53
+ file = io.TextIOWrapper(file, encoding=encoding, errors=errors, newline=newline)
54
+ yield file
55
+
56
+
57
+ class Record(collections.namedtuple("Record", ["id", "seq", "description"])):
58
+ pass
59
+
60
+
61
+ def parse(path):
62
+ with contextlib.ExitStack() as ctx:
63
+ try:
64
+ path = os.fsencode(path)
65
+ file = ctx.enter_context(zopen(path, "r"))
66
+ except TypeError:
67
+ file = path
68
+
69
+ # parse file
70
+ id_ = None
71
+ seq = []
72
+ for line in file:
73
+ l = line.strip()
74
+ if line.startswith(">"):
75
+ if id_ is not None:
76
+ yield Record(id_, "".join(seq), desc)
77
+ fields = line[1:].split(maxsplit=1)
78
+ id_ = fields[0] if len(fields) > 0 else ""
79
+ desc = fields[1] if len(fields) > 1 else ""
80
+ seq = []
81
+ elif l:
82
+ seq.append(l)
83
+ if id_ is not None:
84
+ yield Record(id_, "".join(seq), desc)
85
+ elif seq:
86
+ raise ValueError("not in FASTA format")
@@ -0,0 +1 @@
1
+ importlib-resources ; python_version < '3.9'
@@ -0,0 +1,93 @@
1
+ # coding: utf-8
2
+ """Test doctest contained tests in every file of the module.
3
+ """
4
+
5
+ import configparser
6
+ import doctest
7
+ import importlib
8
+ import json
9
+ import gzip
10
+ import os
11
+ import pkgutil
12
+ import re
13
+ import shutil
14
+ import sys
15
+ import types
16
+ import warnings
17
+ from unittest import mock
18
+
19
+ import pyaragorn
20
+
21
+ from .fasta import parse
22
+
23
+
24
+ def _load_tests_from_module(tests, module, globs, setUp=None, tearDown=None):
25
+ """Load tests from module, iterating through submodules."""
26
+ for attr in (getattr(module, x) for x in dir(module) if not x.startswith("_")):
27
+ if isinstance(attr, types.ModuleType):
28
+ suite = doctest.DocTestSuite(
29
+ attr,
30
+ globs,
31
+ setUp=setUp,
32
+ tearDown=tearDown,
33
+ optionflags=+doctest.ELLIPSIS,
34
+ )
35
+ tests.addTests(suite)
36
+ return tests
37
+
38
+
39
+ def load_tests(loader, tests, ignore):
40
+ """`load_test` function used by unittest to find the doctests."""
41
+ _current_cwd = os.getcwd()
42
+ # demonstrate how to load sequences with Biopython without requiring Biopython
43
+ Bio = mock.Mock()
44
+ Bio.SeqIO = mock.Mock()
45
+ Bio.SeqIO.read = lambda file, format: next(parse(file))
46
+
47
+ # load sample record
48
+ data_folder = os.path.realpath(os.path.join(__file__, os.path.pardir, "data"))
49
+ with gzip.open(os.path.join(data_folder, "CP001621.fna.gz"), "rt") as f:
50
+ record = next(parse(f))
51
+
52
+ def setUp(self):
53
+ warnings.simplefilter("ignore")
54
+ os.chdir(data_folder)
55
+ sys.modules["Bio"] = Bio
56
+ sys.modules["Bio.SeqIO"] = Bio.SeqIO
57
+
58
+ def tearDown(self):
59
+ os.chdir(_current_cwd)
60
+ warnings.simplefilter(warnings.defaultaction)
61
+ sys.modules.pop("Bio")
62
+ sys.modules.pop("Bio.SeqIO")
63
+
64
+ # doctests are not compatible with `green`, so we may want to bail out
65
+ # early if `green` is running the tests
66
+ if sys.argv[0].endswith("green"):
67
+ return tests
68
+
69
+ # recursively traverse all library submodules and load tests from them
70
+ packages = [None, pyaragorn]
71
+ for pkg in iter(packages.pop, None):
72
+ for (_, subpkgname, subispkg) in pkgutil.walk_packages(pkg.__path__):
73
+ # do not import __main__ module to avoid side effects!
74
+ if subpkgname == "__main__" or subpkgname.startswith("tests") or subpkgname.startswith("cli"):
75
+ continue
76
+ # import the submodule and add it to the tests
77
+ module = importlib.import_module(".".join([pkg.__name__, subpkgname]))
78
+ globs = dict(pyaragorn=pyaragorn, json=json, gzip=gzip, Bio=Bio, record=record, **module.__dict__)
79
+ tests.addTests(
80
+ doctest.DocTestSuite(
81
+ module,
82
+ globs=globs,
83
+ setUp=setUp,
84
+ tearDown=tearDown,
85
+ optionflags=+doctest.ELLIPSIS,
86
+ )
87
+ )
88
+ # if the submodule is a package, we need to process its submodules
89
+ # as well, so we add it to the package queue
90
+ if subispkg and subpkgname != "tests":
91
+ packages.append(module)
92
+
93
+ return tests
@@ -0,0 +1,69 @@
1
+ import itertools
2
+ import re
3
+ import unittest
4
+
5
+ from .. import RNAFinder, TMRNAGene, TRNAGene
6
+ from . import data
7
+
8
+
9
+ _TRNA_RX = re.compile(r"^(\d+)\s+tRNA-([A-Za-z]{3})\s+(c?)\[(\d+),(\d+)\]\s+([\d.]+)\s+(\d+)\s+\(([a-z]{2,4})\)")
10
+ _TMRNA_RX = re.compile(r"^(\d+)\s+tmRNA\s+(c?)\[(\d+),(\d+)]\s+([\d.]+)\s+(\d+),(\d+)\s+([A-Z\*]+)")
11
+
12
+ def batched(iterable, n, *, strict=False):
13
+ # batched('ABCDEFG', 3) → ABC DEF G
14
+ if n < 1:
15
+ raise ValueError('n must be at least one')
16
+ iterator = iter(iterable)
17
+ for batch in iter(lambda: tuple(itertools.islice(iterator, n)), ()):
18
+ if strict and len(batch) != n:
19
+ raise ValueError('batched(): incomplete batch')
20
+ yield batch
21
+
22
+ class TestRNAFinder(unittest.TestCase):
23
+
24
+ def test_default(self):
25
+ record = data.load_record("CP001621.fna.gz")
26
+ lines = data.load_text("CP001621.default.txt").splitlines()
27
+
28
+ finder = RNAFinder(translation_table=11)
29
+ genes = finder.find_rna(str(record.seq))
30
+
31
+ for gene, expected in itertools.zip_longest(genes, batched(lines[2:], 3)):
32
+ self.assertIsNotNone(gene)
33
+ self.assertIsNotNone(expected)
34
+ result, seq, ss = expected
35
+ if gene.type == "tRNA":
36
+ matched = _TRNA_RX.match(result)
37
+ _, aa, complement, begin, end, energy, offset, anticodon = matched.groups()
38
+ self.assertEqual(gene.amino_acid, aa)
39
+ self.assertEqual(gene.begin, int(begin))
40
+ self.assertEqual(gene.end, int(end))
41
+ self.assertEqual(gene.anticodon_offset, int(offset))
42
+ self.assertEqual(gene.anticodon_length, len(anticodon))
43
+ self.assertEqual(gene.anticodon, anticodon)
44
+ self.assertEqual(gene.strand, -1 if complement == "c" else +1)
45
+ self.assertAlmostEqual(gene.energy, float(energy), places=1)
46
+ self.assertEqual(gene.sequence().lower(), seq)
47
+ elif gene.type == "tmRNA":
48
+ matched = _TMRNA_RX.match(result)
49
+ _, complement, begin, end, energy, cds_start, cds_end, peptide = matched.groups()
50
+ self.assertEqual(gene.begin, int(begin))
51
+ self.assertEqual(gene.end, int(end))
52
+ self.assertEqual(gene.cds_offset, int(cds_start))
53
+ self.assertEqual(gene.cds_offset + gene.cds_length, int(cds_end))
54
+ self.assertEqual(gene.peptide(), peptide)
55
+ self.assertEqual(gene.strand, -1 if complement == "c" else +1)
56
+ self.assertAlmostEqual(gene.energy, float(energy), places=1)
57
+ # self.assertEqual(gene.sequence().lower(), seq) # TODO
58
+
59
+ def test_trna(self):
60
+ record = data.load_record("CP001621.fna.gz")
61
+ finder = RNAFinder(translation_table=11, tmrna=False, trna=True)
62
+ for gene in finder.find_rna(str(record.seq)):
63
+ self.assertIsInstance(gene, TRNAGene)
64
+
65
+ def test_tmrna(self):
66
+ record = data.load_record("CP001621.fna.gz")
67
+ finder = RNAFinder(translation_table=11, tmrna=True, trna=False)
68
+ for gene in finder.find_rna(str(record.seq)):
69
+ self.assertIsInstance(gene, TMRNAGene)