biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/application/msaapp.py
CHANGED
|
@@ -7,22 +7,22 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["MSAApp"]
|
|
8
8
|
|
|
9
9
|
import abc
|
|
10
|
-
from tempfile import NamedTemporaryFile
|
|
11
10
|
from collections import OrderedDict
|
|
11
|
+
from tempfile import NamedTemporaryFile
|
|
12
12
|
import numpy as np
|
|
13
|
-
from .
|
|
14
|
-
from .application import
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from .
|
|
13
|
+
from biotite.application.application import AppState, requires_state
|
|
14
|
+
from biotite.application.localapp import LocalApp, cleanup_tempfile
|
|
15
|
+
from biotite.application.util import map_matrix, map_sequence
|
|
16
|
+
from biotite.sequence.align.alignment import Alignment
|
|
17
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
18
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
22
22
|
"""
|
|
23
23
|
This is an abstract base class for multiple sequence alignment
|
|
24
24
|
software.
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
It handles conversion of :class:`Sequence` objects to FASTA input
|
|
27
27
|
and FASTA output to an :class:`Alignment` object.
|
|
28
28
|
Inheriting subclasses only need to incorporate the file path
|
|
@@ -41,10 +41,10 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
41
41
|
sequences are mapped back into the original sequence types.
|
|
42
42
|
The mapping does not work, when the alphabet of the exotic
|
|
43
43
|
sequences is larger than the amino acid alphabet.
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
Internally this creates a :class:`Popen` instance, which handles
|
|
46
46
|
the execution.
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
Parameters
|
|
49
49
|
----------
|
|
50
50
|
sequences : iterable object of Sequence
|
|
@@ -54,10 +54,10 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
54
54
|
matrix : SubstitutionMatrix, optional
|
|
55
55
|
A custom substitution matrix.
|
|
56
56
|
"""
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
def __init__(self, sequences, bin_path, matrix=None):
|
|
59
59
|
super().__init__(bin_path)
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
if len(sequences) < 2:
|
|
62
62
|
raise ValueError("At least two sequences are required")
|
|
63
63
|
# Check if all sequences share the same alphabet
|
|
@@ -68,40 +68,39 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
68
68
|
# Check matrix symmetry
|
|
69
69
|
if matrix is not None and not matrix.is_symmetric():
|
|
70
70
|
raise ValueError(
|
|
71
|
-
"A symmetric matrix is required for "
|
|
72
|
-
"multiple sequence alignments"
|
|
71
|
+
"A symmetric matrix is required for " "multiple sequence alignments"
|
|
73
72
|
)
|
|
74
73
|
|
|
75
|
-
|
|
76
74
|
# Check whether the program supports the alignment for the given
|
|
77
75
|
# sequence type
|
|
78
|
-
if ProteinSequence.alphabet.extends(alphabet)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
and self.supports_nucleotide()
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
76
|
+
if ProteinSequence.alphabet.extends(alphabet) and self.supports_protein():
|
|
77
|
+
self._is_mapped = False
|
|
78
|
+
self._seqtype = "protein"
|
|
79
|
+
if matrix is not None:
|
|
80
|
+
if not self.supports_custom_protein_matrix():
|
|
81
|
+
raise TypeError(
|
|
82
|
+
"The software does not support custom "
|
|
83
|
+
"substitution matrices for protein sequences"
|
|
84
|
+
)
|
|
85
|
+
self._matrix = matrix
|
|
86
|
+
else:
|
|
87
|
+
self._matrix = None
|
|
88
|
+
|
|
89
|
+
elif (
|
|
90
|
+
NucleotideSequence.alphabet_amb.extends(alphabet)
|
|
91
|
+
and self.supports_nucleotide()
|
|
92
|
+
):
|
|
93
|
+
self._is_mapped = False
|
|
94
|
+
self._seqtype = "nucleotide"
|
|
95
|
+
if matrix is not None:
|
|
96
|
+
if not self.supports_custom_nucleotide_matrix():
|
|
97
|
+
raise TypeError(
|
|
98
|
+
"The software does not support custom "
|
|
99
|
+
"substitution matrices for nucleotide sequences"
|
|
100
|
+
)
|
|
101
|
+
self._matrix = matrix
|
|
102
|
+
else:
|
|
103
|
+
self._matrix = None
|
|
105
104
|
|
|
106
105
|
else:
|
|
107
106
|
# For all other sequence types, try to map the sequence into
|
|
@@ -126,26 +125,16 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
126
125
|
self._sequences = sequences
|
|
127
126
|
# Sequence masquerades as protein
|
|
128
127
|
self._seqtype = "protein"
|
|
129
|
-
self._mapped_sequences = [
|
|
130
|
-
map_sequence(sequence) for sequence in sequences
|
|
131
|
-
]
|
|
128
|
+
self._mapped_sequences = [map_sequence(sequence) for sequence in sequences]
|
|
132
129
|
self._matrix = map_matrix(matrix)
|
|
133
130
|
|
|
134
|
-
|
|
135
131
|
self._sequences = sequences
|
|
136
|
-
self._in_file = NamedTemporaryFile(
|
|
137
|
-
|
|
138
|
-
)
|
|
139
|
-
self._out_file = NamedTemporaryFile(
|
|
140
|
-
"r", suffix=".fa", delete=False
|
|
141
|
-
)
|
|
142
|
-
self._matrix_file = NamedTemporaryFile(
|
|
143
|
-
"w", suffix=".mat", delete=False
|
|
144
|
-
)
|
|
132
|
+
self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
|
|
133
|
+
self._out_file = NamedTemporaryFile("r", suffix=".fa", delete=False)
|
|
134
|
+
self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
|
|
145
135
|
|
|
146
136
|
def run(self):
|
|
147
|
-
sequences = self._sequences if not self._is_mapped
|
|
148
|
-
else self._mapped_sequences
|
|
137
|
+
sequences = self._sequences if not self._is_mapped else self._mapped_sequences
|
|
149
138
|
sequences_file = FastaFile()
|
|
150
139
|
for i, seq in enumerate(sequences):
|
|
151
140
|
sequences_file[str(i)] = str(seq)
|
|
@@ -155,7 +144,7 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
155
144
|
self._matrix_file.write(str(self._matrix))
|
|
156
145
|
self._matrix_file.flush()
|
|
157
146
|
super().run()
|
|
158
|
-
|
|
147
|
+
|
|
159
148
|
def evaluate(self):
|
|
160
149
|
super().evaluate()
|
|
161
150
|
alignment_file = FastaFile.read(self._out_file)
|
|
@@ -169,26 +158,26 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
169
158
|
# Also obtain original order
|
|
170
159
|
self._order = np.zeros(len(seq_dict), dtype=int)
|
|
171
160
|
for i, seq_index in enumerate(seq_dict):
|
|
172
|
-
|
|
173
|
-
|
|
161
|
+
self._order[i] = int(seq_index)
|
|
162
|
+
|
|
174
163
|
def clean_up(self):
|
|
175
164
|
super().clean_up()
|
|
176
165
|
cleanup_tempfile(self._in_file)
|
|
177
166
|
cleanup_tempfile(self._out_file)
|
|
178
167
|
cleanup_tempfile(self._matrix_file)
|
|
179
|
-
|
|
168
|
+
|
|
180
169
|
@requires_state(AppState.JOINED)
|
|
181
170
|
def get_alignment(self):
|
|
182
171
|
"""
|
|
183
172
|
Get the resulting multiple sequence alignment.
|
|
184
|
-
|
|
173
|
+
|
|
185
174
|
Returns
|
|
186
175
|
-------
|
|
187
176
|
alignment : Alignment
|
|
188
177
|
The global multiple sequence alignment.
|
|
189
178
|
"""
|
|
190
179
|
return self._alignment
|
|
191
|
-
|
|
180
|
+
|
|
192
181
|
@requires_state(AppState.JOINED)
|
|
193
182
|
def get_alignment_order(self):
|
|
194
183
|
"""
|
|
@@ -202,12 +191,12 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
202
191
|
order.
|
|
203
192
|
This method returns the order of the sequences intended by the
|
|
204
193
|
MSA software.
|
|
205
|
-
|
|
194
|
+
|
|
206
195
|
Returns
|
|
207
196
|
-------
|
|
208
197
|
order : ndarray, dtype=int
|
|
209
198
|
The sequence order intended by the MSA software.
|
|
210
|
-
|
|
199
|
+
|
|
211
200
|
Examples
|
|
212
201
|
--------
|
|
213
202
|
Align sequences and restore the original order:
|
|
@@ -220,39 +209,39 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
220
209
|
alignment = alignment[:, order]
|
|
221
210
|
"""
|
|
222
211
|
return self._order
|
|
223
|
-
|
|
212
|
+
|
|
224
213
|
def get_input_file_path(self):
|
|
225
214
|
"""
|
|
226
215
|
Get input file path (FASTA format).
|
|
227
|
-
|
|
216
|
+
|
|
228
217
|
PROTECTED: Do not call from outside.
|
|
229
|
-
|
|
218
|
+
|
|
230
219
|
Returns
|
|
231
220
|
-------
|
|
232
221
|
path : str
|
|
233
222
|
Path of input file.
|
|
234
223
|
"""
|
|
235
224
|
return self._in_file.name
|
|
236
|
-
|
|
225
|
+
|
|
237
226
|
def get_output_file_path(self):
|
|
238
227
|
"""
|
|
239
228
|
Get output file path (FASTA format).
|
|
240
|
-
|
|
229
|
+
|
|
241
230
|
PROTECTED: Do not call from outside.
|
|
242
|
-
|
|
231
|
+
|
|
243
232
|
Returns
|
|
244
233
|
-------
|
|
245
234
|
path : str
|
|
246
235
|
Path of output file.
|
|
247
236
|
"""
|
|
248
237
|
return self._out_file.name
|
|
249
|
-
|
|
238
|
+
|
|
250
239
|
def get_matrix_file_path(self):
|
|
251
240
|
"""
|
|
252
241
|
Get file path for custom substitution matrix.
|
|
253
|
-
|
|
242
|
+
|
|
254
243
|
PROTECTED: Do not call from outside.
|
|
255
|
-
|
|
244
|
+
|
|
256
245
|
Returns
|
|
257
246
|
-------
|
|
258
247
|
path : str or None
|
|
@@ -260,7 +249,7 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
260
249
|
None if no matrix was given.
|
|
261
250
|
"""
|
|
262
251
|
return self._matrix_file.name if self._matrix is not None else None
|
|
263
|
-
|
|
252
|
+
|
|
264
253
|
def get_seqtype(self):
|
|
265
254
|
"""
|
|
266
255
|
Get the type of aligned sequences.
|
|
@@ -268,16 +257,16 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
268
257
|
When a custom sequence type (neither nucleotide nor protein)
|
|
269
258
|
is mapped onto a protein sequence, the return value is also
|
|
270
259
|
``'protein'``.
|
|
271
|
-
|
|
260
|
+
|
|
272
261
|
PROTECTED: Do not call from outside.
|
|
273
|
-
|
|
262
|
+
|
|
274
263
|
Returns
|
|
275
264
|
-------
|
|
276
265
|
seqtype : {'nucleotide', 'protein'}
|
|
277
266
|
Type of sequences to be aligned.
|
|
278
267
|
"""
|
|
279
268
|
return self._seqtype
|
|
280
|
-
|
|
269
|
+
|
|
281
270
|
@staticmethod
|
|
282
271
|
@abc.abstractmethod
|
|
283
272
|
def supports_nucleotide():
|
|
@@ -289,11 +278,11 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
289
278
|
-------
|
|
290
279
|
support : bool
|
|
291
280
|
True, if the class has support, false otherwise.
|
|
292
|
-
|
|
281
|
+
|
|
293
282
|
PROTECTED: Override when inheriting.
|
|
294
283
|
"""
|
|
295
284
|
pass
|
|
296
|
-
|
|
285
|
+
|
|
297
286
|
@staticmethod
|
|
298
287
|
@abc.abstractmethod
|
|
299
288
|
def supports_protein():
|
|
@@ -305,11 +294,11 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
305
294
|
-------
|
|
306
295
|
support : bool
|
|
307
296
|
True, if the class has support, false otherwise.
|
|
308
|
-
|
|
297
|
+
|
|
309
298
|
PROTECTED: Override when inheriting.
|
|
310
299
|
"""
|
|
311
300
|
pass
|
|
312
|
-
|
|
301
|
+
|
|
313
302
|
@staticmethod
|
|
314
303
|
@abc.abstractmethod
|
|
315
304
|
def supports_custom_nucleotide_matrix():
|
|
@@ -321,11 +310,11 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
321
310
|
-------
|
|
322
311
|
support : bool
|
|
323
312
|
True, if the class has support, false otherwise.
|
|
324
|
-
|
|
313
|
+
|
|
325
314
|
PROTECTED: Override when inheriting.
|
|
326
315
|
"""
|
|
327
316
|
pass
|
|
328
|
-
|
|
317
|
+
|
|
329
318
|
@staticmethod
|
|
330
319
|
@abc.abstractmethod
|
|
331
320
|
def supports_custom_protein_matrix():
|
|
@@ -337,19 +326,19 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
337
326
|
-------
|
|
338
327
|
support : bool
|
|
339
328
|
True, if the class has support, false otherwise.
|
|
340
|
-
|
|
329
|
+
|
|
341
330
|
PROTECTED: Override when inheriting.
|
|
342
331
|
"""
|
|
343
332
|
pass
|
|
344
|
-
|
|
333
|
+
|
|
345
334
|
@classmethod
|
|
346
335
|
def align(cls, sequences, bin_path=None, matrix=None):
|
|
347
336
|
"""
|
|
348
337
|
Perform a multiple sequence alignment.
|
|
349
|
-
|
|
338
|
+
|
|
350
339
|
This is a convenience function, that wraps the :class:`MSAApp`
|
|
351
340
|
execution.
|
|
352
|
-
|
|
341
|
+
|
|
353
342
|
Parameters
|
|
354
343
|
----------
|
|
355
344
|
sequences : iterable object of Sequence
|
|
@@ -359,7 +348,7 @@ class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
|
359
348
|
path will be used.
|
|
360
349
|
matrix : SubstitutionMatrix, optional
|
|
361
350
|
A custom substitution matrix.
|
|
362
|
-
|
|
351
|
+
|
|
363
352
|
Returns
|
|
364
353
|
-------
|
|
365
354
|
alignment : Alignment
|
|
@@ -6,25 +6,22 @@ __name__ = "biotite.application.muscle"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["MuscleApp"]
|
|
8
8
|
|
|
9
|
-
import re
|
|
10
9
|
import numbers
|
|
11
|
-
import
|
|
10
|
+
import re
|
|
12
11
|
import subprocess
|
|
12
|
+
import warnings
|
|
13
|
+
from collections.abc import Sequence
|
|
13
14
|
from tempfile import NamedTemporaryFile
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
19
|
-
from ...sequence.align.matrix import SubstitutionMatrix
|
|
20
|
-
from ...sequence.align.alignment import Alignment
|
|
21
|
-
from ...sequence.phylo.tree import Tree
|
|
15
|
+
from biotite.application.application import AppState, VersionError, requires_state
|
|
16
|
+
from biotite.application.localapp import cleanup_tempfile
|
|
17
|
+
from biotite.application.msaapp import MSAApp
|
|
18
|
+
from biotite.sequence.phylo.tree import Tree
|
|
22
19
|
|
|
23
20
|
|
|
24
21
|
class MuscleApp(MSAApp):
|
|
25
22
|
"""
|
|
26
23
|
Perform a multiple sequence alignment using MUSCLE version 3.
|
|
27
|
-
|
|
24
|
+
|
|
28
25
|
Parameters
|
|
29
26
|
----------
|
|
30
27
|
sequences : list of Sequence
|
|
@@ -33,11 +30,11 @@ class MuscleApp(MSAApp):
|
|
|
33
30
|
Path of the MUSCLE binary.
|
|
34
31
|
matrix : SubstitutionMatrix, optional
|
|
35
32
|
A custom substitution matrix.
|
|
36
|
-
|
|
33
|
+
|
|
37
34
|
See also
|
|
38
35
|
--------
|
|
39
36
|
Muscle5App
|
|
40
|
-
|
|
37
|
+
|
|
41
38
|
Examples
|
|
42
39
|
--------
|
|
43
40
|
|
|
@@ -55,34 +52,32 @@ class MuscleApp(MSAApp):
|
|
|
55
52
|
BISM-ITE
|
|
56
53
|
-IQL-ITE
|
|
57
54
|
"""
|
|
58
|
-
|
|
55
|
+
|
|
59
56
|
def __init__(self, sequences, bin_path="muscle", matrix=None):
|
|
60
57
|
major_version = get_version(bin_path)[0]
|
|
61
58
|
if major_version != 3:
|
|
62
|
-
raise VersionError(
|
|
63
|
-
|
|
64
|
-
)
|
|
65
|
-
|
|
59
|
+
raise VersionError(f"Muscle 3 is required, got version {major_version}")
|
|
60
|
+
|
|
66
61
|
super().__init__(sequences, bin_path, matrix)
|
|
67
62
|
self._gap_open = None
|
|
68
63
|
self._gap_ext = None
|
|
69
64
|
self._terminal_penalty = None
|
|
70
65
|
self._tree1 = None
|
|
71
66
|
self._tree2 = None
|
|
72
|
-
self._out_tree1_file = NamedTemporaryFile(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
self._out_tree2_file = NamedTemporaryFile(
|
|
76
|
-
"r", suffix=".tree", delete=False
|
|
77
|
-
)
|
|
78
|
-
|
|
67
|
+
self._out_tree1_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
68
|
+
self._out_tree2_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
69
|
+
|
|
79
70
|
def run(self):
|
|
80
71
|
args = [
|
|
81
72
|
"-quiet",
|
|
82
|
-
"-in",
|
|
83
|
-
|
|
84
|
-
"-
|
|
85
|
-
|
|
73
|
+
"-in",
|
|
74
|
+
self.get_input_file_path(),
|
|
75
|
+
"-out",
|
|
76
|
+
self.get_output_file_path(),
|
|
77
|
+
"-tree1",
|
|
78
|
+
self._out_tree1_file.name,
|
|
79
|
+
"-tree2",
|
|
80
|
+
self._out_tree2_file.name,
|
|
86
81
|
]
|
|
87
82
|
if self.get_seqtype() == "protein":
|
|
88
83
|
args += ["-seqtype", "protein"]
|
|
@@ -91,7 +86,7 @@ class MuscleApp(MSAApp):
|
|
|
91
86
|
if self.get_matrix_file_path() is not None:
|
|
92
87
|
args += ["-matrix", self.get_matrix_file_path()]
|
|
93
88
|
if self._gap_open is not None and self._gap_ext is not None:
|
|
94
|
-
args += ["-gapopen",
|
|
89
|
+
args += ["-gapopen", f"{self._gap_open:.1f}"]
|
|
95
90
|
args += ["-gapextend", f"{self._gap_ext:.1f}"]
|
|
96
91
|
# When the gap penalty is set,
|
|
97
92
|
# use the penalty also for hydrophobic regions
|
|
@@ -100,7 +95,7 @@ class MuscleApp(MSAApp):
|
|
|
100
95
|
args += ["-center", "0.0"]
|
|
101
96
|
self.set_arguments(args)
|
|
102
97
|
super().run()
|
|
103
|
-
|
|
98
|
+
|
|
104
99
|
def evaluate(self):
|
|
105
100
|
super().evaluate()
|
|
106
101
|
|
|
@@ -108,23 +103,19 @@ class MuscleApp(MSAApp):
|
|
|
108
103
|
if len(newick) > 0:
|
|
109
104
|
self._tree1 = Tree.from_newick(newick)
|
|
110
105
|
else:
|
|
111
|
-
warnings.warn(
|
|
112
|
-
|
|
113
|
-
)
|
|
114
|
-
|
|
106
|
+
warnings.warn("MUSCLE did not write a tree file from the first iteration")
|
|
107
|
+
|
|
115
108
|
newick = self._out_tree2_file.read().replace("\n", "")
|
|
116
109
|
if len(newick) > 0:
|
|
117
110
|
self._tree2 = Tree.from_newick(newick)
|
|
118
111
|
else:
|
|
119
|
-
warnings.warn(
|
|
120
|
-
|
|
121
|
-
)
|
|
122
|
-
|
|
112
|
+
warnings.warn("MUSCLE did not write a tree file from the second iteration")
|
|
113
|
+
|
|
123
114
|
def clean_up(self):
|
|
124
115
|
super().clean_up()
|
|
125
116
|
cleanup_tempfile(self._out_tree1_file)
|
|
126
117
|
cleanup_tempfile(self._out_tree2_file)
|
|
127
|
-
|
|
118
|
+
|
|
128
119
|
@requires_state(AppState.CREATED)
|
|
129
120
|
def set_gap_penalty(self, gap_penalty):
|
|
130
121
|
"""
|
|
@@ -145,20 +136,20 @@ class MuscleApp(MSAApp):
|
|
|
145
136
|
if gap_penalty > 0:
|
|
146
137
|
raise ValueError("Gap penalty must be negative")
|
|
147
138
|
self._gap_open = gap_penalty
|
|
148
|
-
self._gap_ext= gap_penalty
|
|
149
|
-
elif
|
|
139
|
+
self._gap_ext = gap_penalty
|
|
140
|
+
elif isinstance(gap_penalty, Sequence):
|
|
150
141
|
if gap_penalty[0] > 0 or gap_penalty[1] > 0:
|
|
151
|
-
|
|
142
|
+
raise ValueError("Gap penalty must be negative")
|
|
152
143
|
self._gap_open = gap_penalty[0]
|
|
153
144
|
self._gap_ext = gap_penalty[1]
|
|
154
145
|
else:
|
|
155
146
|
raise TypeError("Gap penalty must be either float or tuple")
|
|
156
|
-
|
|
147
|
+
|
|
157
148
|
@requires_state(AppState.JOINED)
|
|
158
149
|
def get_guide_tree(self, iteration="identity"):
|
|
159
150
|
"""
|
|
160
151
|
Get the guide tree created for the progressive alignment.
|
|
161
|
-
|
|
152
|
+
|
|
162
153
|
Parameters
|
|
163
154
|
----------
|
|
164
155
|
iteration : {'kmer', 'identity'}
|
|
@@ -168,7 +159,7 @@ class MuscleApp(MSAApp):
|
|
|
168
159
|
If 'identity' the second iteration tree is returned.
|
|
169
160
|
This tree uses distances based on the pairwise sequence
|
|
170
161
|
identity after the first progressive alignment iteration.
|
|
171
|
-
|
|
162
|
+
|
|
172
163
|
Returns
|
|
173
164
|
-------
|
|
174
165
|
tree : Tree
|
|
@@ -180,32 +171,31 @@ class MuscleApp(MSAApp):
|
|
|
180
171
|
return self._tree2
|
|
181
172
|
else:
|
|
182
173
|
raise ValueError("Iteration must be 'kmer' or 'identity'")
|
|
183
|
-
|
|
174
|
+
|
|
184
175
|
@staticmethod
|
|
185
176
|
def supports_nucleotide():
|
|
186
177
|
return True
|
|
187
|
-
|
|
178
|
+
|
|
188
179
|
@staticmethod
|
|
189
180
|
def supports_protein():
|
|
190
181
|
return True
|
|
191
|
-
|
|
182
|
+
|
|
192
183
|
@staticmethod
|
|
193
184
|
def supports_custom_nucleotide_matrix():
|
|
194
185
|
return False
|
|
195
|
-
|
|
186
|
+
|
|
196
187
|
@staticmethod
|
|
197
188
|
def supports_custom_protein_matrix():
|
|
198
189
|
return True
|
|
199
|
-
|
|
190
|
+
|
|
200
191
|
@classmethod
|
|
201
|
-
def align(cls, sequences, bin_path=None, matrix=None,
|
|
202
|
-
gap_penalty=None):
|
|
192
|
+
def align(cls, sequences, bin_path=None, matrix=None, gap_penalty=None):
|
|
203
193
|
"""
|
|
204
194
|
Perform a multiple sequence alignment.
|
|
205
|
-
|
|
195
|
+
|
|
206
196
|
This is a convenience function, that wraps the :class:`MuscleApp`
|
|
207
197
|
execution.
|
|
208
|
-
|
|
198
|
+
|
|
209
199
|
Parameters
|
|
210
200
|
----------
|
|
211
201
|
sequences : iterable object of Sequence
|
|
@@ -222,7 +212,7 @@ class MuscleApp(MSAApp):
|
|
|
222
212
|
The first value in the tuple is the gap opening penalty,
|
|
223
213
|
the second value is the gap extension penalty.
|
|
224
214
|
The values need to be negative.
|
|
225
|
-
|
|
215
|
+
|
|
226
216
|
Returns
|
|
227
217
|
-------
|
|
228
218
|
alignment : Alignment
|
|
@@ -240,15 +230,11 @@ class MuscleApp(MSAApp):
|
|
|
240
230
|
|
|
241
231
|
|
|
242
232
|
def get_version(bin_path="muscle"):
|
|
243
|
-
output = subprocess.run(
|
|
244
|
-
[bin_path, "-version"], capture_output=True, text=True
|
|
245
|
-
)
|
|
233
|
+
output = subprocess.run([bin_path, "-version"], capture_output=True, text=True)
|
|
246
234
|
# Find matches for version string containing major and minor version
|
|
247
|
-
match = re.search("\d+\.\d+", output.stdout)
|
|
235
|
+
match = re.search(r"\d+\.\d+", output.stdout)
|
|
248
236
|
if match is None:
|
|
249
|
-
raise subprocess.SubprocessError(
|
|
250
|
-
"Could not determine Muscle version"
|
|
251
|
-
)
|
|
237
|
+
raise subprocess.SubprocessError("Could not determine Muscle version")
|
|
252
238
|
version_string = match.group(0)
|
|
253
239
|
splitted = version_string.split(".")
|
|
254
|
-
return int(splitted[0]), int(splitted[1])
|
|
240
|
+
return int(splitted[0]), int(splitted[1])
|