biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -6,31 +6,22 @@ __name__ = "biotite.application.muscle"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["Muscle5App"]
|
|
8
8
|
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
from
|
|
12
|
-
from ..localapp import cleanup_tempfile
|
|
13
|
-
from ..msaapp import MSAApp
|
|
14
|
-
from ..application import AppState, VersionError, requires_state
|
|
15
|
-
from ...sequence.sequence import Sequence
|
|
16
|
-
from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
17
|
-
from ...sequence.align.matrix import SubstitutionMatrix
|
|
18
|
-
from ...sequence.align.alignment import Alignment
|
|
19
|
-
from ...sequence.phylo.tree import Tree
|
|
20
|
-
from .app3 import get_version
|
|
9
|
+
from biotite.application.application import AppState, VersionError, requires_state
|
|
10
|
+
from biotite.application.msaapp import MSAApp
|
|
11
|
+
from biotite.application.muscle.app3 import get_version
|
|
21
12
|
|
|
22
13
|
|
|
23
14
|
class Muscle5App(MSAApp):
|
|
24
15
|
"""
|
|
25
16
|
Perform a multiple sequence alignment using MUSCLE version 5.
|
|
26
|
-
|
|
17
|
+
|
|
27
18
|
Parameters
|
|
28
19
|
----------
|
|
29
20
|
sequences : list of Sequence
|
|
30
21
|
The sequences to be aligned.
|
|
31
22
|
bin_path : str, optional
|
|
32
23
|
Path of the MUSCLE binary.
|
|
33
|
-
|
|
24
|
+
|
|
34
25
|
See also
|
|
35
26
|
--------
|
|
36
27
|
MuscleApp
|
|
@@ -38,7 +29,7 @@ class Muscle5App(MSAApp):
|
|
|
38
29
|
Notes
|
|
39
30
|
-----
|
|
40
31
|
Alignment ensemble generation is not supported, yet.
|
|
41
|
-
|
|
32
|
+
|
|
42
33
|
Examples
|
|
43
34
|
--------
|
|
44
35
|
|
|
@@ -56,14 +47,14 @@ class Muscle5App(MSAApp):
|
|
|
56
47
|
BI-SMITE
|
|
57
48
|
-I-QLITE
|
|
58
49
|
"""
|
|
59
|
-
|
|
50
|
+
|
|
60
51
|
def __init__(self, sequences, bin_path="muscle"):
|
|
61
52
|
major_version = get_version(bin_path)[0]
|
|
62
53
|
if major_version < 5:
|
|
63
54
|
raise VersionError(
|
|
64
55
|
f"At least Muscle 5 is required, got version {major_version}"
|
|
65
56
|
)
|
|
66
|
-
|
|
57
|
+
|
|
67
58
|
super().__init__(sequences, bin_path)
|
|
68
59
|
self._mode = "align"
|
|
69
60
|
self._consiters = None
|
|
@@ -86,7 +77,7 @@ class Muscle5App(MSAApp):
|
|
|
86
77
|
self._consiters = consistency
|
|
87
78
|
if refinement is not None:
|
|
88
79
|
self._refineiters = refinement
|
|
89
|
-
|
|
80
|
+
|
|
90
81
|
@requires_state(AppState.CREATED)
|
|
91
82
|
def set_thread_number(self, number):
|
|
92
83
|
"""
|
|
@@ -110,48 +101,49 @@ class Muscle5App(MSAApp):
|
|
|
110
101
|
args = [
|
|
111
102
|
f"-{self._mode}",
|
|
112
103
|
self.get_input_file_path(),
|
|
113
|
-
"-output",
|
|
104
|
+
"-output",
|
|
105
|
+
self.get_output_file_path(),
|
|
114
106
|
]
|
|
115
107
|
if self.get_seqtype() == "protein":
|
|
116
108
|
args += ["-amino"]
|
|
117
109
|
else:
|
|
118
110
|
args += ["-nt"]
|
|
119
111
|
if self._n_threads is not None:
|
|
120
|
-
|
|
112
|
+
args += ["-threads", str(self._n_threads)]
|
|
121
113
|
if self._consiters is not None:
|
|
122
|
-
|
|
114
|
+
args += ["-consiters", str(self._consiters)]
|
|
123
115
|
if self._refineiters is not None:
|
|
124
|
-
|
|
116
|
+
args += ["-refineiters", str(self._refineiters)]
|
|
125
117
|
self.set_arguments(args)
|
|
126
118
|
super().run()
|
|
127
|
-
|
|
119
|
+
|
|
128
120
|
def clean_up(self):
|
|
129
121
|
super().clean_up()
|
|
130
|
-
|
|
122
|
+
|
|
131
123
|
@staticmethod
|
|
132
124
|
def supports_nucleotide():
|
|
133
125
|
return True
|
|
134
|
-
|
|
126
|
+
|
|
135
127
|
@staticmethod
|
|
136
128
|
def supports_protein():
|
|
137
129
|
return True
|
|
138
|
-
|
|
130
|
+
|
|
139
131
|
@staticmethod
|
|
140
132
|
def supports_custom_nucleotide_matrix():
|
|
141
133
|
return False
|
|
142
|
-
|
|
134
|
+
|
|
143
135
|
@staticmethod
|
|
144
136
|
def supports_custom_protein_matrix():
|
|
145
137
|
return False
|
|
146
|
-
|
|
138
|
+
|
|
147
139
|
@classmethod
|
|
148
140
|
def align(cls, sequences, bin_path="muscle"):
|
|
149
141
|
"""
|
|
150
142
|
Perform a multiple sequence alignment.
|
|
151
|
-
|
|
143
|
+
|
|
152
144
|
This is a convenience function, that wraps the :class:`Muscle5App`
|
|
153
145
|
execution.
|
|
154
|
-
|
|
146
|
+
|
|
155
147
|
Parameters
|
|
156
148
|
----------
|
|
157
149
|
sequences : iterable object of Sequence
|
|
@@ -159,7 +151,7 @@ class Muscle5App(MSAApp):
|
|
|
159
151
|
bin_path : str, optional
|
|
160
152
|
Path of the MSA software binary. By default, the default path
|
|
161
153
|
will be used.
|
|
162
|
-
|
|
154
|
+
|
|
163
155
|
Returns
|
|
164
156
|
-------
|
|
165
157
|
alignment : Alignment
|
biotite/application/sra/app.py
CHANGED
|
@@ -7,17 +7,21 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["FastaDumpApp", "FastqDumpApp"]
|
|
8
8
|
|
|
9
9
|
import abc
|
|
10
|
-
from os.path import join
|
|
11
|
-
from subprocess import Popen, SubprocessError, PIPE, TimeoutExpired
|
|
12
10
|
import glob
|
|
11
|
+
from os.path import join
|
|
12
|
+
from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
|
|
13
13
|
from tempfile import TemporaryDirectory
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
from
|
|
14
|
+
from biotite.application.application import (
|
|
15
|
+
Application,
|
|
16
|
+
AppState,
|
|
17
|
+
AppStateError,
|
|
18
|
+
requires_state,
|
|
19
|
+
)
|
|
20
|
+
from biotite.sequence.io.fasta.convert import get_sequences
|
|
21
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
22
|
+
from biotite.sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
|
|
23
|
+
from biotite.sequence.io.fastq.file import FastqFile
|
|
24
|
+
from biotite.sequence.seqtypes import NucleotideSequence
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
# Do not use LocalApp, as two programs are executed
|
|
@@ -48,8 +52,13 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
48
52
|
the score format.
|
|
49
53
|
"""
|
|
50
54
|
|
|
51
|
-
def __init__(
|
|
52
|
-
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
uid,
|
|
58
|
+
output_path_prefix=None,
|
|
59
|
+
prefetch_path="prefetch",
|
|
60
|
+
fasterq_dump_path="fasterq-dump",
|
|
61
|
+
):
|
|
53
62
|
super().__init__()
|
|
54
63
|
self._prefetch_path = prefetch_path
|
|
55
64
|
self._fasterq_dump_path = fasterq_dump_path
|
|
@@ -62,21 +71,16 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
62
71
|
self._prefetch_process = None
|
|
63
72
|
self._fasterq_dump_process = None
|
|
64
73
|
|
|
65
|
-
|
|
66
74
|
@requires_state(AppState.RUNNING | AppState.FINISHED)
|
|
67
75
|
def join(self, timeout=None):
|
|
68
76
|
# Override method as repetitive calls of 'is_finished()'
|
|
69
77
|
# are not necessary as 'communicate()' already waits for the
|
|
70
78
|
# finished application
|
|
71
79
|
try:
|
|
72
|
-
_, self._stderr = self._process.communicate(
|
|
73
|
-
timeout=timeout
|
|
74
|
-
)
|
|
80
|
+
_, self._stderr = self._process.communicate(timeout=timeout)
|
|
75
81
|
except TimeoutExpired:
|
|
76
82
|
self.cancel()
|
|
77
|
-
raise TimeoutError(
|
|
78
|
-
f"The application expired its timeout ({timeout:.1f} s)"
|
|
79
|
-
)
|
|
83
|
+
raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
|
|
80
84
|
self._state = AppState.FINISHED
|
|
81
85
|
|
|
82
86
|
try:
|
|
@@ -90,7 +94,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
90
94
|
self._state = AppState.JOINED
|
|
91
95
|
self.clean_up()
|
|
92
96
|
|
|
93
|
-
|
|
94
97
|
def run(self):
|
|
95
98
|
# Prefetch into a temp directory with file name equaling UID
|
|
96
99
|
# This ensures that the ID in the header is not the temp prefix
|
|
@@ -105,16 +108,14 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
105
108
|
command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
|
|
106
109
|
)
|
|
107
110
|
|
|
108
|
-
|
|
109
111
|
def is_finished(self):
|
|
110
112
|
code = self._process.poll()
|
|
111
|
-
if code
|
|
113
|
+
if code is None:
|
|
112
114
|
return False
|
|
113
115
|
else:
|
|
114
|
-
_,
|
|
116
|
+
_, self._stderr = self._process.communicate()
|
|
115
117
|
return True
|
|
116
118
|
|
|
117
|
-
|
|
118
119
|
def evaluate(self):
|
|
119
120
|
super().evaluate()
|
|
120
121
|
# Check if applicaion terminated correctly
|
|
@@ -128,26 +129,24 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
128
129
|
|
|
129
130
|
self._file_names = (
|
|
130
131
|
# For entries with one read per spot
|
|
131
|
-
glob.glob(self._prefix +
|
|
132
|
+
glob.glob(self._prefix + ".fastq")
|
|
133
|
+
+
|
|
132
134
|
# For entries with multiple reads per spot
|
|
133
135
|
glob.glob(self._prefix + "_*.fastq")
|
|
134
136
|
)
|
|
135
137
|
# Only load FASTQ files into memory when needed
|
|
136
138
|
self._fastq_files = None
|
|
137
139
|
|
|
138
|
-
|
|
139
140
|
def wait_interval(self):
|
|
140
141
|
# Not used in this implementation of 'join()'
|
|
141
142
|
raise NotImplementedError()
|
|
142
143
|
|
|
143
|
-
|
|
144
144
|
def clean_up(self):
|
|
145
145
|
if self.get_app_state() == AppState.CANCELLED:
|
|
146
146
|
self._process.kill()
|
|
147
147
|
# Directory with temp files does not need to be deleted,
|
|
148
148
|
# as temp dir is automatically deleted upon object destruction
|
|
149
149
|
|
|
150
|
-
|
|
151
150
|
@requires_state(AppState.CREATED)
|
|
152
151
|
def get_prefetch_options(self):
|
|
153
152
|
"""
|
|
@@ -176,7 +175,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
176
175
|
"""
|
|
177
176
|
return ""
|
|
178
177
|
|
|
179
|
-
|
|
180
178
|
@requires_state(AppState.JOINED)
|
|
181
179
|
def get_file_paths(self):
|
|
182
180
|
"""
|
|
@@ -189,7 +187,6 @@ class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
|
189
187
|
"""
|
|
190
188
|
return self._file_names
|
|
191
189
|
|
|
192
|
-
|
|
193
190
|
@requires_state(AppState.JOINED)
|
|
194
191
|
@abc.abstractmethod
|
|
195
192
|
def get_sequences(self):
|
|
@@ -236,15 +233,18 @@ class FastqDumpApp(_DumpApp):
|
|
|
236
233
|
the score format.
|
|
237
234
|
"""
|
|
238
235
|
|
|
239
|
-
def __init__(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
236
|
+
def __init__(
|
|
237
|
+
self,
|
|
238
|
+
uid,
|
|
239
|
+
output_path_prefix=None,
|
|
240
|
+
prefetch_path="prefetch",
|
|
241
|
+
fasterq_dump_path="fasterq-dump",
|
|
242
|
+
offset="Sanger",
|
|
243
|
+
):
|
|
244
|
+
super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
244
245
|
self._offset = offset
|
|
245
246
|
self._fastq_files = None
|
|
246
247
|
|
|
247
|
-
|
|
248
248
|
@requires_state(AppState.JOINED)
|
|
249
249
|
def get_fastq(self):
|
|
250
250
|
"""
|
|
@@ -265,20 +265,16 @@ class FastqDumpApp(_DumpApp):
|
|
|
265
265
|
]
|
|
266
266
|
return self._fastq_files
|
|
267
267
|
|
|
268
|
-
|
|
269
268
|
@requires_state(AppState.JOINED)
|
|
270
269
|
def get_sequences(self):
|
|
271
270
|
return [
|
|
272
271
|
{
|
|
273
|
-
header: NucleotideSequence(
|
|
274
|
-
seq_str.replace("U","T").replace("X","N")
|
|
275
|
-
)
|
|
272
|
+
header: NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
|
|
276
273
|
for header, (seq_str, _) in fastq_file.items()
|
|
277
274
|
}
|
|
278
275
|
for fastq_file in self.get_fastq()
|
|
279
276
|
]
|
|
280
277
|
|
|
281
|
-
|
|
282
278
|
@requires_state(AppState.JOINED)
|
|
283
279
|
def get_sequences_and_scores(self):
|
|
284
280
|
"""
|
|
@@ -294,15 +290,17 @@ class FastqDumpApp(_DumpApp):
|
|
|
294
290
|
Each item in the list is a dictionary mapping identifiers to its
|
|
295
291
|
corresponding sequence and score values.
|
|
296
292
|
"""
|
|
297
|
-
return [
|
|
298
|
-
get_sequences_and_scores(fastq_file)
|
|
299
|
-
for fastq_file in self.get_fastq()
|
|
300
|
-
]
|
|
301
|
-
|
|
293
|
+
return [get_sequences_and_scores(fastq_file) for fastq_file in self.get_fastq()]
|
|
302
294
|
|
|
303
295
|
@classmethod
|
|
304
|
-
def fetch(
|
|
305
|
-
|
|
296
|
+
def fetch(
|
|
297
|
+
cls,
|
|
298
|
+
uid,
|
|
299
|
+
output_path_prefix=None,
|
|
300
|
+
prefetch_path="prefetch",
|
|
301
|
+
fasterq_dump_path="fasterq-dump",
|
|
302
|
+
offset="Sanger",
|
|
303
|
+
):
|
|
306
304
|
"""
|
|
307
305
|
Get the sequences belonging to the UID from the
|
|
308
306
|
*NCBI sequence read archive* (SRA).
|
|
@@ -338,9 +336,7 @@ class FastqDumpApp(_DumpApp):
|
|
|
338
336
|
Each item in the list is a dictionary mapping identifiers to its
|
|
339
337
|
corresponding sequence.
|
|
340
338
|
"""
|
|
341
|
-
app = cls(
|
|
342
|
-
uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset
|
|
343
|
-
)
|
|
339
|
+
app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset)
|
|
344
340
|
app.start()
|
|
345
341
|
app.join()
|
|
346
342
|
return app.get_sequences()
|
|
@@ -368,14 +364,16 @@ class FastaDumpApp(_DumpApp):
|
|
|
368
364
|
respectively.
|
|
369
365
|
"""
|
|
370
366
|
|
|
371
|
-
def __init__(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
367
|
+
def __init__(
|
|
368
|
+
self,
|
|
369
|
+
uid,
|
|
370
|
+
output_path_prefix=None,
|
|
371
|
+
prefetch_path="prefetch",
|
|
372
|
+
fasterq_dump_path="fasterq-dump",
|
|
373
|
+
):
|
|
374
|
+
super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
376
375
|
self._fasta_files = None
|
|
377
376
|
|
|
378
|
-
|
|
379
377
|
@requires_state(AppState.CREATED)
|
|
380
378
|
def get_prefetch_options(self):
|
|
381
379
|
return
|
|
@@ -383,12 +381,10 @@ class FastaDumpApp(_DumpApp):
|
|
|
383
381
|
# when https://github.com/ncbi/sra-tools/issues/883 is resolved
|
|
384
382
|
# return "--eliminate-quals"
|
|
385
383
|
|
|
386
|
-
|
|
387
384
|
@requires_state(AppState.CREATED)
|
|
388
385
|
def get_fastq_dump_options(self):
|
|
389
386
|
return "--fasta"
|
|
390
387
|
|
|
391
|
-
|
|
392
388
|
@requires_state(AppState.JOINED)
|
|
393
389
|
def get_fasta(self):
|
|
394
390
|
"""
|
|
@@ -404,20 +400,22 @@ class FastaDumpApp(_DumpApp):
|
|
|
404
400
|
"""
|
|
405
401
|
if self._fasta_files is None:
|
|
406
402
|
self._fasta_files = [
|
|
407
|
-
FastaFile.read(file_name)
|
|
408
|
-
for file_name in self.get_file_paths()
|
|
403
|
+
FastaFile.read(file_name) for file_name in self.get_file_paths()
|
|
409
404
|
]
|
|
410
405
|
return self._fasta_files
|
|
411
406
|
|
|
412
|
-
|
|
413
407
|
@requires_state(AppState.JOINED)
|
|
414
408
|
def get_sequences(self):
|
|
415
409
|
return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
|
|
416
410
|
|
|
417
|
-
|
|
418
411
|
@classmethod
|
|
419
|
-
def fetch(
|
|
420
|
-
|
|
412
|
+
def fetch(
|
|
413
|
+
cls,
|
|
414
|
+
uid,
|
|
415
|
+
output_path_prefix=None,
|
|
416
|
+
prefetch_path="prefetch",
|
|
417
|
+
fasterq_dump_path="fasterq-dump",
|
|
418
|
+
):
|
|
421
419
|
"""
|
|
422
420
|
Get the sequences belonging to the UID from the
|
|
423
421
|
*NCBI sequence read archive* (SRA).
|
|
@@ -448,9 +446,7 @@ class FastaDumpApp(_DumpApp):
|
|
|
448
446
|
Each item in the list is a dictionary mapping identifiers to its
|
|
449
447
|
corresponding sequence.
|
|
450
448
|
"""
|
|
451
|
-
app = cls(
|
|
452
|
-
uid, output_path_prefix, prefetch_path, fasterq_dump_path
|
|
453
|
-
)
|
|
449
|
+
app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
454
450
|
app.start()
|
|
455
451
|
app.join()
|
|
456
|
-
return app.get_sequences()
|
|
452
|
+
return app.get_sequences()
|
|
@@ -6,17 +6,15 @@ __name__ = "biotite.application.tantan"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["TantanApp"]
|
|
8
8
|
|
|
9
|
-
from collections.abc import Sequence as SequenceABC
|
|
10
9
|
import io
|
|
10
|
+
from collections.abc import Sequence as SequenceABC
|
|
11
11
|
from tempfile import NamedTemporaryFile
|
|
12
12
|
import numpy as np
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from ..util import map_sequence, map_matrix
|
|
19
|
-
|
|
13
|
+
from biotite.application.application import AppState, requires_state
|
|
14
|
+
from biotite.application.localapp import LocalApp, cleanup_tempfile
|
|
15
|
+
from biotite.sequence.alphabet import common_alphabet
|
|
16
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
17
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
20
18
|
|
|
21
19
|
MASKING_LETTER = "!"
|
|
22
20
|
|
|
@@ -43,7 +41,7 @@ class TantanApp(LocalApp):
|
|
|
43
41
|
|
|
44
42
|
References
|
|
45
43
|
----------
|
|
46
|
-
|
|
44
|
+
|
|
47
45
|
.. footbibliography::
|
|
48
46
|
|
|
49
47
|
Examples
|
|
@@ -59,10 +57,10 @@ class TantanApp(LocalApp):
|
|
|
59
57
|
True True True True True True True True False False False False
|
|
60
58
|
False]
|
|
61
59
|
>>> print(sequence, "\n" + "".join(["^" if e else " " for e in repeat_mask]))
|
|
62
|
-
GGCATCGATATATATATATAGTCAA
|
|
63
|
-
^^^^^^^^^^^
|
|
60
|
+
GGCATCGATATATATATATAGTCAA
|
|
61
|
+
^^^^^^^^^^^
|
|
64
62
|
"""
|
|
65
|
-
|
|
63
|
+
|
|
66
64
|
def __init__(self, sequence, matrix=None, bin_path="tantan"):
|
|
67
65
|
super().__init__(bin_path)
|
|
68
66
|
|
|
@@ -93,59 +91,43 @@ class TantanApp(LocalApp):
|
|
|
93
91
|
)
|
|
94
92
|
self._is_protein = True
|
|
95
93
|
else:
|
|
96
|
-
raise TypeError(
|
|
97
|
-
|
|
98
|
-
)
|
|
99
|
-
|
|
94
|
+
raise TypeError("A NucleotideSequence or ProteinSequence is required")
|
|
95
|
+
|
|
100
96
|
if matrix is None:
|
|
101
97
|
self._matrix_file = None
|
|
102
98
|
else:
|
|
103
|
-
common_alph = common_alphabet(
|
|
104
|
-
(seq.alphabet for seq in self._sequences)
|
|
105
|
-
)
|
|
99
|
+
common_alph = common_alphabet((seq.alphabet for seq in self._sequences))
|
|
106
100
|
if common_alph is None:
|
|
107
|
-
raise ValueError(
|
|
108
|
-
"There is no common alphabet within the sequences"
|
|
109
|
-
)
|
|
101
|
+
raise ValueError("There is no common alphabet within the sequences")
|
|
110
102
|
if not matrix.get_alphabet1().extends(common_alph):
|
|
111
103
|
raise ValueError(
|
|
112
104
|
"The alphabet of the sequence(s) do not fit the matrix"
|
|
113
105
|
)
|
|
114
106
|
if not matrix.is_symmetric():
|
|
115
107
|
raise ValueError("A symmetric matrix is required")
|
|
116
|
-
self._matrix_file = NamedTemporaryFile(
|
|
117
|
-
"w", suffix=".mat", delete=False
|
|
118
|
-
)
|
|
108
|
+
self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
|
|
119
109
|
self._matrix = matrix
|
|
120
|
-
|
|
121
|
-
self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
|
|
122
110
|
|
|
111
|
+
self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
|
|
123
112
|
|
|
124
113
|
def run(self):
|
|
125
114
|
FastaFile.write_iter(
|
|
126
115
|
self._in_file,
|
|
127
|
-
(
|
|
128
|
-
(f"sequence_{i:d}", str(seq))
|
|
129
|
-
for i, seq in enumerate(self._sequences)
|
|
130
|
-
)
|
|
116
|
+
((f"sequence_{i:d}", str(seq)) for i, seq in enumerate(self._sequences)),
|
|
131
117
|
)
|
|
132
118
|
self._in_file.flush()
|
|
133
119
|
if self._matrix is not None:
|
|
134
120
|
self._matrix_file.write(str(self._matrix))
|
|
135
121
|
self._matrix_file.flush()
|
|
136
|
-
|
|
122
|
+
|
|
137
123
|
args = []
|
|
138
124
|
if self._matrix is not None:
|
|
139
125
|
args += ["-m", self._matrix_file.name]
|
|
140
126
|
if self._is_protein:
|
|
141
|
-
|
|
142
|
-
args += [
|
|
143
|
-
"-x", MASKING_LETTER,
|
|
144
|
-
self._in_file.name
|
|
145
|
-
]
|
|
127
|
+
args += ["-p"]
|
|
128
|
+
args += ["-x", MASKING_LETTER, self._in_file.name]
|
|
146
129
|
self.set_arguments(args)
|
|
147
130
|
super().run()
|
|
148
|
-
|
|
149
131
|
|
|
150
132
|
def evaluate(self):
|
|
151
133
|
super().evaluate()
|
|
@@ -154,18 +136,14 @@ class TantanApp(LocalApp):
|
|
|
154
136
|
self._masks = []
|
|
155
137
|
encoded_masking_letter = MASKING_LETTER.encode("ASCII")[0]
|
|
156
138
|
for _, masked_seq_string in FastaFile.read_iter(out_file):
|
|
157
|
-
array = np.frombuffer(
|
|
158
|
-
masked_seq_string.encode("ASCII"), dtype=np.ubyte
|
|
159
|
-
)
|
|
139
|
+
array = np.frombuffer(masked_seq_string.encode("ASCII"), dtype=np.ubyte)
|
|
160
140
|
self._masks.append(array == encoded_masking_letter)
|
|
161
|
-
|
|
162
141
|
|
|
163
142
|
def clean_up(self):
|
|
164
143
|
super().clean_up()
|
|
165
144
|
cleanup_tempfile(self._in_file)
|
|
166
145
|
if self._matrix_file is not None:
|
|
167
146
|
cleanup_tempfile(self._matrix_file)
|
|
168
|
-
|
|
169
147
|
|
|
170
148
|
@requires_state(AppState.JOINED)
|
|
171
149
|
def get_mask(self):
|
|
@@ -186,7 +164,6 @@ class TantanApp(LocalApp):
|
|
|
186
164
|
else:
|
|
187
165
|
return self._masks[0]
|
|
188
166
|
|
|
189
|
-
|
|
190
167
|
@staticmethod
|
|
191
168
|
def mask_repeats(sequence, matrix=None, bin_path="tantan"):
|
|
192
169
|
"""
|
|
@@ -219,4 +196,4 @@ class TantanApp(LocalApp):
|
|
|
219
196
|
app = TantanApp(sequence, matrix, bin_path)
|
|
220
197
|
app.start()
|
|
221
198
|
app.join()
|
|
222
|
-
return app.get_mask()
|
|
199
|
+
return app.get_mask()
|
biotite/application/util.py
CHANGED
|
@@ -8,15 +8,15 @@ __all__ = ["map_sequence", "map_matrix"]
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
|
-
from
|
|
12
|
-
from
|
|
11
|
+
from biotite.sequence.align.matrix import SubstitutionMatrix
|
|
12
|
+
from biotite.sequence.seqtypes import ProteinSequence
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def map_sequence(sequence):
|
|
16
16
|
"""
|
|
17
17
|
Map a sequence with an arbitrary alphabet into a
|
|
18
18
|
:class:`ProteinSequence`, in order to support arbitrary sequence
|
|
19
|
-
types in software that can handle protein sequences.
|
|
19
|
+
types in software that can handle protein sequences.
|
|
20
20
|
"""
|
|
21
21
|
if len(sequence.alphabet) > len(ProteinSequence.alphabet):
|
|
22
22
|
# Cannot map into a protein sequence if the alphabet
|
|
@@ -39,12 +39,11 @@ def map_matrix(matrix):
|
|
|
39
39
|
Map a :class:`SubstitutionMatrix` with an arbitrary alphabet into a
|
|
40
40
|
class:`SubstitutionMatrix` for protein sequences, in order to support
|
|
41
41
|
arbitrary sequence types in software that can handle protein
|
|
42
|
-
sequences.
|
|
42
|
+
sequences.
|
|
43
43
|
"""
|
|
44
44
|
if matrix is None:
|
|
45
45
|
raise TypeError(
|
|
46
|
-
"A substitution matrix must be provided for custom "
|
|
47
|
-
"sequence types"
|
|
46
|
+
"A substitution matrix must be provided for custom " "sequence types"
|
|
48
47
|
)
|
|
49
48
|
# Create a protein substitution matrix with the values taken
|
|
50
49
|
# from the original matrix
|
|
@@ -54,6 +53,5 @@ def map_matrix(matrix):
|
|
|
54
53
|
new_score_matrix = np.zeros((new_length, new_length))
|
|
55
54
|
new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
|
|
56
55
|
return SubstitutionMatrix(
|
|
57
|
-
ProteinSequence.alphabet, ProteinSequence.alphabet,
|
|
58
|
-
|
|
59
|
-
)
|
|
56
|
+
ProteinSequence.alphabet, ProteinSequence.alphabet, new_score_matrix
|
|
57
|
+
)
|