biotite 0.41.2__cp312-cp312-win_amd64.whl → 1.0.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -5,11 +5,9 @@
5
5
  __name__ = "biotite.sequence.align"
6
6
  __author__ = "Patrick Kunzmann"
7
7
 
8
- from ..sequence import Sequence
9
- from ..seqtypes import NucleotideSequence, ProteinSequence
10
- from ..alphabet import Alphabet
11
- import numpy as np
12
8
  import os
9
+ import numpy as np
10
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
13
11
 
14
12
  __all__ = ["SubstitutionMatrix"]
15
13
 
@@ -21,54 +19,54 @@ class SubstitutionMatrix(object):
21
19
  A :class:`SubstitutionMatrix` maps each possible pairing of a symbol
22
20
  of a first alphabet with a symbol of a second alphabet to a score
23
21
  (integer).
24
-
22
+
25
23
  The class uses a 2-D (m x n) :class:`ndarray`
26
24
  (dtype=:attr:`numpy.int32`),
27
25
  where each element stores the score for a symbol pairing, indexed
28
26
  by the symbol codes of the respective symbols in an *m*-length
29
27
  alphabet 1 and an *n*-length alphabet 2.
30
-
28
+
31
29
  There are 3 ways to creates instances:
32
-
30
+
33
31
  At first a 2-D :class:`ndarray` containing the scores can be
34
32
  directly provided.
35
-
33
+
36
34
  Secondly a dictionary can be provided, where the keys are pairing
37
35
  tuples and values are the corresponding scores.
38
36
  The pairing tuples consist of a symbol of alphabet 1 as first
39
37
  element and a symbol of alphabet 2 as second element. Parings have
40
38
  to be provided for each possible combination.
41
-
39
+
42
40
  At last a valid matrix name can be given, which is loaded from the
43
41
  internal matrix database. The following matrices are avaliable:
44
-
42
+
45
43
  - Nucleotide substitution matrices from NCBI database
46
44
  - **NUC** - Also usable with ambiguous alphabet
47
-
45
+
48
46
  - Protein substitution matrices from NCBI database
49
-
47
+
50
48
  - **PAM<n>**
51
49
  - **BLOSUM<n>**
52
50
  - **MATCH** - Only differentiates between match and mismatch
53
51
  - **IDENTITY** - Strongly penalizes mismatches
54
52
  - **GONNET** - Not usable with default protein alphabet
55
53
  - **DAYHOFF**
56
-
54
+
57
55
  - Corrected protein substitution matrices :footcite:`Hess2016`,
58
56
  **<BLOCKS>** is the BLOCKS version, the matrix is based on
59
-
57
+
60
58
  - **BLOSUM<n>_<BLOCKS>**
61
59
  - **RBLOSUM<n>_<BLOCKS>**
62
60
  - **CorBLOSUM<n>_<BLOCKS>**
63
-
61
+
64
62
  A list of all available matrix names is returned by
65
63
  :meth:`list_db()`.
66
-
64
+
67
65
  Since this class can handle two different alphabets, it is possible
68
66
  to align two different types of sequences.
69
-
67
+
70
68
  Objects of this class are immutable.
71
-
69
+
72
70
  Parameters
73
71
  ----------
74
72
  alphabet1 : Alphabet, length=m
@@ -79,23 +77,23 @@ class SubstitutionMatrix(object):
79
77
  Either a symbol code indexed :class:`ndarray` containing the scores,
80
78
  or a dictionary mapping the symbol pairing to scores,
81
79
  or a string referencing a matrix in the internal database.
82
-
80
+
83
81
  Raises
84
82
  ------
85
83
  KeyError
86
84
  If the matrix dictionary misses a symbol given in the alphabet.
87
-
85
+
88
86
  References
89
87
  ----------
90
-
88
+
91
89
  .. footbibliography::
92
-
90
+
93
91
  Examples
94
92
  --------
95
-
93
+
96
94
  Creating a matrix for two different (nonsense) alphabets
97
95
  via a matrix dictionary:
98
-
96
+
99
97
  >>> alph1 = Alphabet(["foo","bar"])
100
98
  >>> alph2 = Alphabet([1,2,3])
101
99
  >>> matrix_dict = {("foo",1):5, ("foo",2):10, ("foo",3):15,
@@ -119,17 +117,16 @@ class SubstitutionMatrix(object):
119
117
  C 0 1 0 0
120
118
  G 0 0 1 0
121
119
  T 0 0 0 1
122
-
120
+
123
121
  Creating a matrix via database name:
124
-
122
+
125
123
  >>> alph = ProteinSequence.alphabet
126
124
  >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
127
125
  """
128
-
126
+
129
127
  # Directory of matrix files
130
- _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
131
- "matrix_data")
132
-
128
+ _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")
129
+
133
130
  def __init__(self, alphabet1, alphabet2, score_matrix):
134
131
  self._alph1 = alphabet1
135
132
  self._alph2 = alphabet2
@@ -147,16 +144,19 @@ class SubstitutionMatrix(object):
147
144
  matrix_dict = SubstitutionMatrix.dict_from_db(score_matrix)
148
145
  self._fill_with_matrix_dict(matrix_dict)
149
146
  else:
150
- raise TypeError("Matrix must be either a dictionary, "
151
- "an 2-D ndarray or a string")
147
+ raise TypeError(
148
+ "Matrix must be either a dictionary, " "an 2-D ndarray or a string"
149
+ )
152
150
  # This class is immutable and has a getter function for the
153
151
  # score matrix -> make the score matrix read-only
154
152
  self._matrix.setflags(write=False)
155
153
 
156
154
  def __repr__(self):
157
155
  """Represent SubstitutionMatrix as a string for debugging."""
158
- return f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, " \
159
- f"np.{np.array_repr(self._matrix)})"
156
+ return (
157
+ f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
158
+ f"np.{np.array_repr(self._matrix)})"
159
+ )
160
160
 
161
161
  def __eq__(self, item):
162
162
  if not isinstance(item, SubstitutionMatrix):
@@ -173,40 +173,39 @@ class SubstitutionMatrix(object):
173
173
  return not self == item
174
174
 
175
175
  def _fill_with_matrix_dict(self, matrix_dict):
176
- self._matrix = np.zeros(( len(self._alph1), len(self._alph2) ),
177
- dtype=np.int32)
176
+ self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
178
177
  for i in range(len(self._alph1)):
179
178
  for j in range(len(self._alph2)):
180
179
  sym1 = self._alph1.decode(i)
181
180
  sym2 = self._alph2.decode(j)
182
- self._matrix[i,j] = int(matrix_dict[sym1, sym2])
183
-
181
+ self._matrix[i, j] = int(matrix_dict[sym1, sym2])
182
+
184
183
  def get_alphabet1(self):
185
184
  """
186
- Get the first alphabet.
187
-
185
+ Get the first alphabet.
186
+
188
187
  Returns
189
188
  -------
190
189
  alphabet : Alphabet
191
190
  The first alphabet.
192
191
  """
193
192
  return self._alph1
194
-
193
+
195
194
  def get_alphabet2(self):
196
195
  """
197
- Get the second alphabet.
198
-
196
+ Get the second alphabet.
197
+
199
198
  Returns
200
199
  -------
201
200
  alphabet : Alphabet
202
201
  The second alphabet.
203
202
  """
204
203
  return self._alph2
205
-
204
+
206
205
  def score_matrix(self):
207
206
  """
208
207
  Get the 2-D :class:`ndarray` containing the score values.
209
-
208
+
210
209
  Returns
211
210
  -------
212
211
  matrix : ndarray, shape=(m,n), dtype=np.int32
@@ -214,12 +213,12 @@ class SubstitutionMatrix(object):
214
213
  The array is read-only.
215
214
  """
216
215
  return self._matrix
217
-
216
+
218
217
  def transpose(self):
219
218
  """
220
219
  Get a copy of this instance, where the alphabets are
221
220
  interchanged.
222
-
221
+
223
222
  Returns
224
223
  -------
225
224
  transposed : SubstitutionMatrix
@@ -229,7 +228,7 @@ class SubstitutionMatrix(object):
229
228
  new_alph2 = self._alph1
230
229
  new_matrix = np.transpose(self._matrix)
231
230
  return SubstitutionMatrix(new_alph1, new_alph2, new_matrix)
232
-
231
+
233
232
  def is_symmetric(self):
234
233
  """
235
234
  Check whether the substitution matrix is symmetric,
@@ -242,35 +241,36 @@ class SubstitutionMatrix(object):
242
241
  True, if both alphabets are identical and the score matrix
243
242
  is symmetric, false otherwise.
244
243
  """
245
- return self._alph1 == self._alph2 \
246
- and np.array_equal(self._matrix, np.transpose(self._matrix))
247
-
244
+ return self._alph1 == self._alph2 and np.array_equal(
245
+ self._matrix, np.transpose(self._matrix)
246
+ )
247
+
248
248
  def get_score_by_code(self, code1, code2):
249
249
  """
250
250
  Get the substitution score of two symbols,
251
251
  represented by their code.
252
-
252
+
253
253
  Parameters
254
254
  ----------
255
255
  code1, code2 : int
256
256
  Symbol codes of the two symbols to be aligned.
257
-
257
+
258
258
  Returns
259
259
  -------
260
260
  score : int
261
261
  The substitution / alignment score.
262
262
  """
263
263
  return self._matrix[code1, code2]
264
-
264
+
265
265
  def get_score(self, symbol1, symbol2):
266
266
  """
267
267
  Get the substitution score of two symbols.
268
-
268
+
269
269
  Parameters
270
270
  ----------
271
271
  symbol1, symbol2 : object
272
272
  Symbols to be aligned.
273
-
273
+
274
274
  Returns
275
275
  -------
276
276
  score : int
@@ -279,19 +279,19 @@ class SubstitutionMatrix(object):
279
279
  code1 = self._alph1.encode(symbol1)
280
280
  code2 = self._alph2.encode(symbol2)
281
281
  return self._matrix[code1, code2]
282
-
282
+
283
283
  def shape(self):
284
284
  """
285
285
  Get the shape (i.e. the length of both alphabets)
286
286
  of the subsitution matrix.
287
-
287
+
288
288
  Returns
289
289
  -------
290
290
  shape : tuple
291
291
  Matrix shape.
292
292
  """
293
293
  return (len(self._alph1), len(self._alph2))
294
-
294
+
295
295
  def __str__(self):
296
296
  # Create matrix in NCBI format
297
297
  string = " "
@@ -306,18 +306,18 @@ class SubstitutionMatrix(object):
306
306
  # Remove terminal line break
307
307
  string = string[:-1]
308
308
  return string
309
-
309
+
310
310
  @staticmethod
311
311
  def dict_from_str(string):
312
312
  """
313
313
  Create a matrix dictionary from a string in NCBI matrix format.
314
-
314
+
315
315
  Symbols of the first alphabet are taken from the left column,
316
316
  symbols of the second alphabet are taken from the top row.
317
-
317
+
318
318
  The keys of the dictionary consist of tuples containing the
319
319
  aligned symbols and the values are the corresponding scores.
320
-
320
+
321
321
  Returns
322
322
  -------
323
323
  matrix_dict : dict
@@ -329,22 +329,22 @@ class SubstitutionMatrix(object):
329
329
  symbols2 = [e for e in lines[0].split()]
330
330
  scores = np.array([line.split()[1:] for line in lines[1:]]).astype(int)
331
331
  scores = np.transpose(scores)
332
-
332
+
333
333
  matrix_dict = {}
334
334
  for i in range(len(symbols1)):
335
335
  for j in range(len(symbols2)):
336
- matrix_dict[(symbols1[i], symbols2[j])] = scores[i,j]
336
+ matrix_dict[(symbols1[i], symbols2[j])] = scores[i, j]
337
337
  return matrix_dict
338
-
338
+
339
339
  @staticmethod
340
340
  def dict_from_db(matrix_name):
341
341
  """
342
342
  Create a matrix dictionary from a valid matrix name in the
343
343
  internal matrix database.
344
-
344
+
345
345
  The keys of the dictionary consist of tuples containing the
346
346
  aligned symbols and the values are the corresponding scores.
347
-
347
+
348
348
  Returns
349
349
  -------
350
350
  matrix_dict : dict
@@ -353,12 +353,12 @@ class SubstitutionMatrix(object):
353
353
  filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
354
354
  with open(filename, "r") as f:
355
355
  return SubstitutionMatrix.dict_from_str(f.read())
356
-
356
+
357
357
  @staticmethod
358
358
  def list_db():
359
359
  """
360
360
  List all matrix names in the internal database.
361
-
361
+
362
362
  Returns
363
363
  -------
364
364
  db_list : list
@@ -367,27 +367,26 @@ class SubstitutionMatrix(object):
367
367
  files = os.listdir(SubstitutionMatrix._db_dir)
368
368
  # Remove '.mat' from files
369
369
  return [file[:-4] for file in sorted(files)]
370
-
371
-
370
+
372
371
  @staticmethod
373
372
  def std_protein_matrix():
374
373
  """
375
374
  Get the default :class:`SubstitutionMatrix` for protein sequence
376
375
  alignments, which is BLOSUM62.
377
-
376
+
378
377
  Returns
379
378
  -------
380
379
  matrix : SubstitutionMatrix
381
380
  Default matrix.
382
381
  """
383
382
  return _matrix_blosum62
384
-
383
+
385
384
  @staticmethod
386
385
  def std_nucleotide_matrix():
387
386
  """
388
387
  Get the default :class:`SubstitutionMatrix` for DNA sequence
389
388
  alignments.
390
-
389
+
391
390
  Returns
392
391
  -------
393
392
  matrix : SubstitutionMatrix
@@ -395,11 +394,11 @@ class SubstitutionMatrix(object):
395
394
  """
396
395
  return _matrix_nuc
397
396
 
398
- # Preformatted BLOSUM62 and NUC substitution matrix from NCBI
399
- _matrix_blosum62 = SubstitutionMatrix(ProteinSequence.alphabet,
400
- ProteinSequence.alphabet,
401
- "BLOSUM62")
402
- _matrix_nuc = SubstitutionMatrix(NucleotideSequence.alphabet_amb,
403
- NucleotideSequence.alphabet_amb,
404
- "NUC")
405
397
 
398
+ # Preformatted BLOSUM62 and NUC substitution matrix from NCBI
399
+ _matrix_blosum62 = SubstitutionMatrix(
400
+ ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
401
+ )
402
+ _matrix_nuc = SubstitutionMatrix(
403
+ NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
404
+ )
@@ -236,7 +236,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
236
236
  # Create new matrix with neutral gap symbol
237
237
  gap_symbol = GapSymbol.instance()
238
238
  new_alphabet = Alphabet(
239
- matrix.get_alphabet1().get_symbols() + [gap_symbol]
239
+ matrix.get_alphabet1().get_symbols() + (gap_symbol,)
240
240
  )
241
241
  new_score_matrix = np.zeros(
242
242
  (len(new_alphabet), len(new_alphabet)), dtype=np.int32
@@ -85,7 +85,7 @@ class RandomPermutation(Permutation):
85
85
  This class uses a simple full-period *linear congruential generator*
86
86
  (LCG) to provide pseudo-randomized values:
87
87
 
88
- .. math:: \text{order} = (a c_\text{k-mer} + 1) \mod 2^64.
88
+ .. math:: \text{order} = (a \, c_\text{k-mer} + 1) \mod 2^{64}.
89
89
 
90
90
  The factor :math:`a` is taken from :footcite:`Steele2021` to ensure
91
91
  full periodicity and good random behavior.
@@ -186,6 +186,9 @@ class FrequencyPermutation(Permutation):
186
186
  The minimum and maximum value, the permutated value
187
187
  (i.e. the return value of :meth:`permute()`)
188
188
  can take.
189
+ kmer_alphabet : KmerAlphabet
190
+ The *k-mer* alphabet that defines the range of possible *k-mers*
191
+ that should be permuted.
189
192
 
190
193
  Notes
191
194
  -----
@@ -226,11 +229,11 @@ class FrequencyPermutation(Permutation):
226
229
  >>> permutation = FrequencyPermutation.from_table(kmer_table)
227
230
  >>> order = permutation.permute(kmer_codes)
228
231
  >>> print(order)
229
- [ 0 24 20 19 16 15 14 13 12 22 21 10 11 8 7 18 6 5 4 3 23 2 1 9
232
+ [ 0 22 18 19 1 2 3 4 5 23 20 6 7 8 9 21 10 11 12 13 24 14 15 16
230
233
  17]
231
234
  >>> kmer_codes = kmer_codes[np.argsort(order)]
232
235
  >>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
233
- ['...', 'ba', 'ar', 'rr', 'da', 'ad', 'ac', 'ca', 'br', 'ra', 'ab']
236
+ ['...', 'rc', 'rd', 'rr', 'ac', 'ad', 'ca', 'da', 'ab', 'br', 'ra']
234
237
  """
235
238
 
236
239
  def __init__(self, kmer_alphabet, counts):
@@ -240,7 +243,9 @@ class FrequencyPermutation(Permutation):
240
243
  f"but {len(counts)} counts were given"
241
244
  )
242
245
  # 'order' maps a permutation to a k-mer
243
- order = np.argsort(counts)
246
+ # Stability is important to get the same k-mer subset selection
247
+ # on different architectures
248
+ order = np.argsort(counts, kind="stable")
244
249
  # '_permutation_table' should perform the reverse mapping
245
250
  self._permutation_table = _invert_mapping(order)
246
251
  self._kmer_alph = kmer_alphabet
@@ -259,8 +264,11 @@ class FrequencyPermutation(Permutation):
259
264
  return self._kmer_alph
260
265
 
261
266
 
267
+ @staticmethod
262
268
  def from_table(kmer_table):
263
269
  """
270
+ from_table(kmer_table)
271
+
264
272
  Create a :class:`FrequencyPermutation` from the *k-mer* counts
265
273
  of a :class:`KmerTable`.
266
274