biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/sequence/align/matrix.py
CHANGED
|
@@ -5,11 +5,9 @@
|
|
|
5
5
|
__name__ = "biotite.sequence.align"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
|
|
8
|
-
from ..sequence import Sequence
|
|
9
|
-
from ..seqtypes import NucleotideSequence, ProteinSequence
|
|
10
|
-
from ..alphabet import Alphabet
|
|
11
|
-
import numpy as np
|
|
12
8
|
import os
|
|
9
|
+
import numpy as np
|
|
10
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
13
11
|
|
|
14
12
|
__all__ = ["SubstitutionMatrix"]
|
|
15
13
|
|
|
@@ -21,54 +19,54 @@ class SubstitutionMatrix(object):
|
|
|
21
19
|
A :class:`SubstitutionMatrix` maps each possible pairing of a symbol
|
|
22
20
|
of a first alphabet with a symbol of a second alphabet to a score
|
|
23
21
|
(integer).
|
|
24
|
-
|
|
22
|
+
|
|
25
23
|
The class uses a 2-D (m x n) :class:`ndarray`
|
|
26
24
|
(dtype=:attr:`numpy.int32`),
|
|
27
25
|
where each element stores the score for a symbol pairing, indexed
|
|
28
26
|
by the symbol codes of the respective symbols in an *m*-length
|
|
29
27
|
alphabet 1 and an *n*-length alphabet 2.
|
|
30
|
-
|
|
28
|
+
|
|
31
29
|
There are 3 ways to creates instances:
|
|
32
|
-
|
|
30
|
+
|
|
33
31
|
At first a 2-D :class:`ndarray` containing the scores can be
|
|
34
32
|
directly provided.
|
|
35
|
-
|
|
33
|
+
|
|
36
34
|
Secondly a dictionary can be provided, where the keys are pairing
|
|
37
35
|
tuples and values are the corresponding scores.
|
|
38
36
|
The pairing tuples consist of a symbol of alphabet 1 as first
|
|
39
37
|
element and a symbol of alphabet 2 as second element. Parings have
|
|
40
38
|
to be provided for each possible combination.
|
|
41
|
-
|
|
39
|
+
|
|
42
40
|
At last a valid matrix name can be given, which is loaded from the
|
|
43
41
|
internal matrix database. The following matrices are avaliable:
|
|
44
|
-
|
|
42
|
+
|
|
45
43
|
- Nucleotide substitution matrices from NCBI database
|
|
46
44
|
- **NUC** - Also usable with ambiguous alphabet
|
|
47
|
-
|
|
45
|
+
|
|
48
46
|
- Protein substitution matrices from NCBI database
|
|
49
|
-
|
|
47
|
+
|
|
50
48
|
- **PAM<n>**
|
|
51
49
|
- **BLOSUM<n>**
|
|
52
50
|
- **MATCH** - Only differentiates between match and mismatch
|
|
53
51
|
- **IDENTITY** - Strongly penalizes mismatches
|
|
54
52
|
- **GONNET** - Not usable with default protein alphabet
|
|
55
53
|
- **DAYHOFF**
|
|
56
|
-
|
|
54
|
+
|
|
57
55
|
- Corrected protein substitution matrices :footcite:`Hess2016`,
|
|
58
56
|
**<BLOCKS>** is the BLOCKS version, the matrix is based on
|
|
59
|
-
|
|
57
|
+
|
|
60
58
|
- **BLOSUM<n>_<BLOCKS>**
|
|
61
59
|
- **RBLOSUM<n>_<BLOCKS>**
|
|
62
60
|
- **CorBLOSUM<n>_<BLOCKS>**
|
|
63
|
-
|
|
61
|
+
|
|
64
62
|
A list of all available matrix names is returned by
|
|
65
63
|
:meth:`list_db()`.
|
|
66
|
-
|
|
64
|
+
|
|
67
65
|
Since this class can handle two different alphabets, it is possible
|
|
68
66
|
to align two different types of sequences.
|
|
69
|
-
|
|
67
|
+
|
|
70
68
|
Objects of this class are immutable.
|
|
71
|
-
|
|
69
|
+
|
|
72
70
|
Parameters
|
|
73
71
|
----------
|
|
74
72
|
alphabet1 : Alphabet, length=m
|
|
@@ -79,23 +77,23 @@ class SubstitutionMatrix(object):
|
|
|
79
77
|
Either a symbol code indexed :class:`ndarray` containing the scores,
|
|
80
78
|
or a dictionary mapping the symbol pairing to scores,
|
|
81
79
|
or a string referencing a matrix in the internal database.
|
|
82
|
-
|
|
80
|
+
|
|
83
81
|
Raises
|
|
84
82
|
------
|
|
85
83
|
KeyError
|
|
86
84
|
If the matrix dictionary misses a symbol given in the alphabet.
|
|
87
|
-
|
|
85
|
+
|
|
88
86
|
References
|
|
89
87
|
----------
|
|
90
|
-
|
|
88
|
+
|
|
91
89
|
.. footbibliography::
|
|
92
|
-
|
|
90
|
+
|
|
93
91
|
Examples
|
|
94
92
|
--------
|
|
95
|
-
|
|
93
|
+
|
|
96
94
|
Creating a matrix for two different (nonsense) alphabets
|
|
97
95
|
via a matrix dictionary:
|
|
98
|
-
|
|
96
|
+
|
|
99
97
|
>>> alph1 = Alphabet(["foo","bar"])
|
|
100
98
|
>>> alph2 = Alphabet([1,2,3])
|
|
101
99
|
>>> matrix_dict = {("foo",1):5, ("foo",2):10, ("foo",3):15,
|
|
@@ -119,17 +117,16 @@ class SubstitutionMatrix(object):
|
|
|
119
117
|
C 0 1 0 0
|
|
120
118
|
G 0 0 1 0
|
|
121
119
|
T 0 0 0 1
|
|
122
|
-
|
|
120
|
+
|
|
123
121
|
Creating a matrix via database name:
|
|
124
|
-
|
|
122
|
+
|
|
125
123
|
>>> alph = ProteinSequence.alphabet
|
|
126
124
|
>>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
|
|
127
125
|
"""
|
|
128
|
-
|
|
126
|
+
|
|
129
127
|
# Directory of matrix files
|
|
130
|
-
_db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
|
|
131
|
-
|
|
132
|
-
|
|
128
|
+
_db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")
|
|
129
|
+
|
|
133
130
|
def __init__(self, alphabet1, alphabet2, score_matrix):
|
|
134
131
|
self._alph1 = alphabet1
|
|
135
132
|
self._alph2 = alphabet2
|
|
@@ -147,16 +144,19 @@ class SubstitutionMatrix(object):
|
|
|
147
144
|
matrix_dict = SubstitutionMatrix.dict_from_db(score_matrix)
|
|
148
145
|
self._fill_with_matrix_dict(matrix_dict)
|
|
149
146
|
else:
|
|
150
|
-
raise TypeError(
|
|
151
|
-
|
|
147
|
+
raise TypeError(
|
|
148
|
+
"Matrix must be either a dictionary, " "an 2-D ndarray or a string"
|
|
149
|
+
)
|
|
152
150
|
# This class is immutable and has a getter function for the
|
|
153
151
|
# score matrix -> make the score matrix read-only
|
|
154
152
|
self._matrix.setflags(write=False)
|
|
155
153
|
|
|
156
154
|
def __repr__(self):
|
|
157
155
|
"""Represent SubstitutionMatrix as a string for debugging."""
|
|
158
|
-
return
|
|
159
|
-
|
|
156
|
+
return (
|
|
157
|
+
f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
|
|
158
|
+
f"np.{np.array_repr(self._matrix)})"
|
|
159
|
+
)
|
|
160
160
|
|
|
161
161
|
def __eq__(self, item):
|
|
162
162
|
if not isinstance(item, SubstitutionMatrix):
|
|
@@ -173,40 +173,39 @@ class SubstitutionMatrix(object):
|
|
|
173
173
|
return not self == item
|
|
174
174
|
|
|
175
175
|
def _fill_with_matrix_dict(self, matrix_dict):
|
|
176
|
-
self._matrix = np.zeros((
|
|
177
|
-
dtype=np.int32)
|
|
176
|
+
self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
|
|
178
177
|
for i in range(len(self._alph1)):
|
|
179
178
|
for j in range(len(self._alph2)):
|
|
180
179
|
sym1 = self._alph1.decode(i)
|
|
181
180
|
sym2 = self._alph2.decode(j)
|
|
182
|
-
self._matrix[i,j] = int(matrix_dict[sym1, sym2])
|
|
183
|
-
|
|
181
|
+
self._matrix[i, j] = int(matrix_dict[sym1, sym2])
|
|
182
|
+
|
|
184
183
|
def get_alphabet1(self):
|
|
185
184
|
"""
|
|
186
|
-
Get the first alphabet.
|
|
187
|
-
|
|
185
|
+
Get the first alphabet.
|
|
186
|
+
|
|
188
187
|
Returns
|
|
189
188
|
-------
|
|
190
189
|
alphabet : Alphabet
|
|
191
190
|
The first alphabet.
|
|
192
191
|
"""
|
|
193
192
|
return self._alph1
|
|
194
|
-
|
|
193
|
+
|
|
195
194
|
def get_alphabet2(self):
|
|
196
195
|
"""
|
|
197
|
-
Get the second alphabet.
|
|
198
|
-
|
|
196
|
+
Get the second alphabet.
|
|
197
|
+
|
|
199
198
|
Returns
|
|
200
199
|
-------
|
|
201
200
|
alphabet : Alphabet
|
|
202
201
|
The second alphabet.
|
|
203
202
|
"""
|
|
204
203
|
return self._alph2
|
|
205
|
-
|
|
204
|
+
|
|
206
205
|
def score_matrix(self):
|
|
207
206
|
"""
|
|
208
207
|
Get the 2-D :class:`ndarray` containing the score values.
|
|
209
|
-
|
|
208
|
+
|
|
210
209
|
Returns
|
|
211
210
|
-------
|
|
212
211
|
matrix : ndarray, shape=(m,n), dtype=np.int32
|
|
@@ -214,12 +213,12 @@ class SubstitutionMatrix(object):
|
|
|
214
213
|
The array is read-only.
|
|
215
214
|
"""
|
|
216
215
|
return self._matrix
|
|
217
|
-
|
|
216
|
+
|
|
218
217
|
def transpose(self):
|
|
219
218
|
"""
|
|
220
219
|
Get a copy of this instance, where the alphabets are
|
|
221
220
|
interchanged.
|
|
222
|
-
|
|
221
|
+
|
|
223
222
|
Returns
|
|
224
223
|
-------
|
|
225
224
|
transposed : SubstitutionMatrix
|
|
@@ -229,7 +228,7 @@ class SubstitutionMatrix(object):
|
|
|
229
228
|
new_alph2 = self._alph1
|
|
230
229
|
new_matrix = np.transpose(self._matrix)
|
|
231
230
|
return SubstitutionMatrix(new_alph1, new_alph2, new_matrix)
|
|
232
|
-
|
|
231
|
+
|
|
233
232
|
def is_symmetric(self):
|
|
234
233
|
"""
|
|
235
234
|
Check whether the substitution matrix is symmetric,
|
|
@@ -242,35 +241,36 @@ class SubstitutionMatrix(object):
|
|
|
242
241
|
True, if both alphabets are identical and the score matrix
|
|
243
242
|
is symmetric, false otherwise.
|
|
244
243
|
"""
|
|
245
|
-
return
|
|
246
|
-
|
|
247
|
-
|
|
244
|
+
return self._alph1 == self._alph2 and np.array_equal(
|
|
245
|
+
self._matrix, np.transpose(self._matrix)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
248
|
def get_score_by_code(self, code1, code2):
|
|
249
249
|
"""
|
|
250
250
|
Get the substitution score of two symbols,
|
|
251
251
|
represented by their code.
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
Parameters
|
|
254
254
|
----------
|
|
255
255
|
code1, code2 : int
|
|
256
256
|
Symbol codes of the two symbols to be aligned.
|
|
257
|
-
|
|
257
|
+
|
|
258
258
|
Returns
|
|
259
259
|
-------
|
|
260
260
|
score : int
|
|
261
261
|
The substitution / alignment score.
|
|
262
262
|
"""
|
|
263
263
|
return self._matrix[code1, code2]
|
|
264
|
-
|
|
264
|
+
|
|
265
265
|
def get_score(self, symbol1, symbol2):
|
|
266
266
|
"""
|
|
267
267
|
Get the substitution score of two symbols.
|
|
268
|
-
|
|
268
|
+
|
|
269
269
|
Parameters
|
|
270
270
|
----------
|
|
271
271
|
symbol1, symbol2 : object
|
|
272
272
|
Symbols to be aligned.
|
|
273
|
-
|
|
273
|
+
|
|
274
274
|
Returns
|
|
275
275
|
-------
|
|
276
276
|
score : int
|
|
@@ -279,19 +279,19 @@ class SubstitutionMatrix(object):
|
|
|
279
279
|
code1 = self._alph1.encode(symbol1)
|
|
280
280
|
code2 = self._alph2.encode(symbol2)
|
|
281
281
|
return self._matrix[code1, code2]
|
|
282
|
-
|
|
282
|
+
|
|
283
283
|
def shape(self):
|
|
284
284
|
"""
|
|
285
285
|
Get the shape (i.e. the length of both alphabets)
|
|
286
286
|
of the subsitution matrix.
|
|
287
|
-
|
|
287
|
+
|
|
288
288
|
Returns
|
|
289
289
|
-------
|
|
290
290
|
shape : tuple
|
|
291
291
|
Matrix shape.
|
|
292
292
|
"""
|
|
293
293
|
return (len(self._alph1), len(self._alph2))
|
|
294
|
-
|
|
294
|
+
|
|
295
295
|
def __str__(self):
|
|
296
296
|
# Create matrix in NCBI format
|
|
297
297
|
string = " "
|
|
@@ -306,18 +306,18 @@ class SubstitutionMatrix(object):
|
|
|
306
306
|
# Remove terminal line break
|
|
307
307
|
string = string[:-1]
|
|
308
308
|
return string
|
|
309
|
-
|
|
309
|
+
|
|
310
310
|
@staticmethod
|
|
311
311
|
def dict_from_str(string):
|
|
312
312
|
"""
|
|
313
313
|
Create a matrix dictionary from a string in NCBI matrix format.
|
|
314
|
-
|
|
314
|
+
|
|
315
315
|
Symbols of the first alphabet are taken from the left column,
|
|
316
316
|
symbols of the second alphabet are taken from the top row.
|
|
317
|
-
|
|
317
|
+
|
|
318
318
|
The keys of the dictionary consist of tuples containing the
|
|
319
319
|
aligned symbols and the values are the corresponding scores.
|
|
320
|
-
|
|
320
|
+
|
|
321
321
|
Returns
|
|
322
322
|
-------
|
|
323
323
|
matrix_dict : dict
|
|
@@ -329,22 +329,22 @@ class SubstitutionMatrix(object):
|
|
|
329
329
|
symbols2 = [e for e in lines[0].split()]
|
|
330
330
|
scores = np.array([line.split()[1:] for line in lines[1:]]).astype(int)
|
|
331
331
|
scores = np.transpose(scores)
|
|
332
|
-
|
|
332
|
+
|
|
333
333
|
matrix_dict = {}
|
|
334
334
|
for i in range(len(symbols1)):
|
|
335
335
|
for j in range(len(symbols2)):
|
|
336
|
-
matrix_dict[(symbols1[i], symbols2[j])] = scores[i,j]
|
|
336
|
+
matrix_dict[(symbols1[i], symbols2[j])] = scores[i, j]
|
|
337
337
|
return matrix_dict
|
|
338
|
-
|
|
338
|
+
|
|
339
339
|
@staticmethod
|
|
340
340
|
def dict_from_db(matrix_name):
|
|
341
341
|
"""
|
|
342
342
|
Create a matrix dictionary from a valid matrix name in the
|
|
343
343
|
internal matrix database.
|
|
344
|
-
|
|
344
|
+
|
|
345
345
|
The keys of the dictionary consist of tuples containing the
|
|
346
346
|
aligned symbols and the values are the corresponding scores.
|
|
347
|
-
|
|
347
|
+
|
|
348
348
|
Returns
|
|
349
349
|
-------
|
|
350
350
|
matrix_dict : dict
|
|
@@ -353,12 +353,12 @@ class SubstitutionMatrix(object):
|
|
|
353
353
|
filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
|
|
354
354
|
with open(filename, "r") as f:
|
|
355
355
|
return SubstitutionMatrix.dict_from_str(f.read())
|
|
356
|
-
|
|
356
|
+
|
|
357
357
|
@staticmethod
|
|
358
358
|
def list_db():
|
|
359
359
|
"""
|
|
360
360
|
List all matrix names in the internal database.
|
|
361
|
-
|
|
361
|
+
|
|
362
362
|
Returns
|
|
363
363
|
-------
|
|
364
364
|
db_list : list
|
|
@@ -367,27 +367,26 @@ class SubstitutionMatrix(object):
|
|
|
367
367
|
files = os.listdir(SubstitutionMatrix._db_dir)
|
|
368
368
|
# Remove '.mat' from files
|
|
369
369
|
return [file[:-4] for file in sorted(files)]
|
|
370
|
-
|
|
371
|
-
|
|
370
|
+
|
|
372
371
|
@staticmethod
|
|
373
372
|
def std_protein_matrix():
|
|
374
373
|
"""
|
|
375
374
|
Get the default :class:`SubstitutionMatrix` for protein sequence
|
|
376
375
|
alignments, which is BLOSUM62.
|
|
377
|
-
|
|
376
|
+
|
|
378
377
|
Returns
|
|
379
378
|
-------
|
|
380
379
|
matrix : SubstitutionMatrix
|
|
381
380
|
Default matrix.
|
|
382
381
|
"""
|
|
383
382
|
return _matrix_blosum62
|
|
384
|
-
|
|
383
|
+
|
|
385
384
|
@staticmethod
|
|
386
385
|
def std_nucleotide_matrix():
|
|
387
386
|
"""
|
|
388
387
|
Get the default :class:`SubstitutionMatrix` for DNA sequence
|
|
389
388
|
alignments.
|
|
390
|
-
|
|
389
|
+
|
|
391
390
|
Returns
|
|
392
391
|
-------
|
|
393
392
|
matrix : SubstitutionMatrix
|
|
@@ -395,11 +394,11 @@ class SubstitutionMatrix(object):
|
|
|
395
394
|
"""
|
|
396
395
|
return _matrix_nuc
|
|
397
396
|
|
|
398
|
-
# Preformatted BLOSUM62 and NUC substitution matrix from NCBI
|
|
399
|
-
_matrix_blosum62 = SubstitutionMatrix(ProteinSequence.alphabet,
|
|
400
|
-
ProteinSequence.alphabet,
|
|
401
|
-
"BLOSUM62")
|
|
402
|
-
_matrix_nuc = SubstitutionMatrix(NucleotideSequence.alphabet_amb,
|
|
403
|
-
NucleotideSequence.alphabet_amb,
|
|
404
|
-
"NUC")
|
|
405
397
|
|
|
398
|
+
# Preformatted BLOSUM62 and NUC substitution matrix from NCBI
|
|
399
|
+
_matrix_blosum62 = SubstitutionMatrix(
|
|
400
|
+
ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
|
|
401
|
+
)
|
|
402
|
+
_matrix_nuc = SubstitutionMatrix(
|
|
403
|
+
NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
|
|
404
|
+
)
|
|
Binary file
|
|
@@ -236,7 +236,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
|
|
|
236
236
|
# Create new matrix with neutral gap symbol
|
|
237
237
|
gap_symbol = GapSymbol.instance()
|
|
238
238
|
new_alphabet = Alphabet(
|
|
239
|
-
matrix.get_alphabet1().get_symbols() +
|
|
239
|
+
matrix.get_alphabet1().get_symbols() + (gap_symbol,)
|
|
240
240
|
)
|
|
241
241
|
new_score_matrix = np.zeros(
|
|
242
242
|
(len(new_alphabet), len(new_alphabet)), dtype=np.int32
|
|
Binary file
|
|
Binary file
|
|
@@ -85,7 +85,7 @@ class RandomPermutation(Permutation):
|
|
|
85
85
|
This class uses a simple full-period *linear congruential generator*
|
|
86
86
|
(LCG) to provide pseudo-randomized values:
|
|
87
87
|
|
|
88
|
-
.. math:: \text{order} = (a c_\text{k-mer} + 1) \mod 2^64.
|
|
88
|
+
.. math:: \text{order} = (a \, c_\text{k-mer} + 1) \mod 2^{64}.
|
|
89
89
|
|
|
90
90
|
The factor :math:`a` is taken from :footcite:`Steele2021` to ensure
|
|
91
91
|
full periodicity and good random behavior.
|
|
@@ -186,6 +186,9 @@ class FrequencyPermutation(Permutation):
|
|
|
186
186
|
The minimum and maximum value, the permutated value
|
|
187
187
|
(i.e. the return value of :meth:`permute()`)
|
|
188
188
|
can take.
|
|
189
|
+
kmer_alphabet : KmerAlphabet
|
|
190
|
+
The *k-mer* alphabet that defines the range of possible *k-mers*
|
|
191
|
+
that should be permuted.
|
|
189
192
|
|
|
190
193
|
Notes
|
|
191
194
|
-----
|
|
@@ -226,11 +229,11 @@ class FrequencyPermutation(Permutation):
|
|
|
226
229
|
>>> permutation = FrequencyPermutation.from_table(kmer_table)
|
|
227
230
|
>>> order = permutation.permute(kmer_codes)
|
|
228
231
|
>>> print(order)
|
|
229
|
-
[ 0
|
|
232
|
+
[ 0 22 18 19 1 2 3 4 5 23 20 6 7 8 9 21 10 11 12 13 24 14 15 16
|
|
230
233
|
17]
|
|
231
234
|
>>> kmer_codes = kmer_codes[np.argsort(order)]
|
|
232
235
|
>>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
|
|
233
|
-
['...', '
|
|
236
|
+
['...', 'rc', 'rd', 'rr', 'ac', 'ad', 'ca', 'da', 'ab', 'br', 'ra']
|
|
234
237
|
"""
|
|
235
238
|
|
|
236
239
|
def __init__(self, kmer_alphabet, counts):
|
|
@@ -240,7 +243,9 @@ class FrequencyPermutation(Permutation):
|
|
|
240
243
|
f"but {len(counts)} counts were given"
|
|
241
244
|
)
|
|
242
245
|
# 'order' maps a permutation to a k-mer
|
|
243
|
-
|
|
246
|
+
# Stability is important to get the same k-mer subset selection
|
|
247
|
+
# on different architectures
|
|
248
|
+
order = np.argsort(counts, kind="stable")
|
|
244
249
|
# '_permutation_table' should perform the reverse mapping
|
|
245
250
|
self._permutation_table = _invert_mapping(order)
|
|
246
251
|
self._kmer_alph = kmer_alphabet
|
|
@@ -259,8 +264,11 @@ class FrequencyPermutation(Permutation):
|
|
|
259
264
|
return self._kmer_alph
|
|
260
265
|
|
|
261
266
|
|
|
267
|
+
@staticmethod
|
|
262
268
|
def from_table(kmer_table):
|
|
263
269
|
"""
|
|
270
|
+
from_table(kmer_table)
|
|
271
|
+
|
|
264
272
|
Create a :class:`FrequencyPermutation` from the *k-mer* counts
|
|
265
273
|
of a :class:`KmerTable`.
|
|
266
274
|
|
|
Binary file
|