biotite 0.41.1__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +36 -10
  3. biotite/application/application.py +22 -11
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +16 -5
  52. biotite/sequence/align/__init__.py +160 -6
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +35 -35
  67. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +112 -126
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +64 -64
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +226 -240
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +88 -100
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +21 -7
  130. biotite/structure/info/groups.py +10 -15
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -52
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
  186. biotite/structure/io/ctab.py +0 -72
  187. biotite/structure/io/mmtf/__init__.py +0 -21
  188. biotite/structure/io/mmtf/assembly.py +0 -214
  189. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  190. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  191. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  192. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  193. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  194. biotite/structure/io/mmtf/decode.pyx +0 -152
  195. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  196. biotite/structure/io/mmtf/encode.pyx +0 -183
  197. biotite/structure/io/mmtf/file.py +0 -233
  198. biotite/structure/io/npz/__init__.py +0 -20
  199. biotite/structure/io/npz/file.py +0 -152
  200. biotite/structure/io/pdbx/legacy.py +0 -267
  201. biotite/structure/io/tng/__init__.py +0 -13
  202. biotite/structure/io/tng/file.py +0 -46
  203. biotite/temp.py +0 -86
  204. biotite-0.41.1.dist-info/RECORD +0 -340
  205. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -5,11 +5,9 @@
5
5
  __name__ = "biotite.sequence.align"
6
6
  __author__ = "Patrick Kunzmann"
7
7
 
8
- from ..sequence import Sequence
9
- from ..seqtypes import NucleotideSequence, ProteinSequence
10
- from ..alphabet import Alphabet
11
- import numpy as np
12
8
  import os
9
+ import numpy as np
10
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
13
11
 
14
12
  __all__ = ["SubstitutionMatrix"]
15
13
 
@@ -21,54 +19,54 @@ class SubstitutionMatrix(object):
21
19
  A :class:`SubstitutionMatrix` maps each possible pairing of a symbol
22
20
  of a first alphabet with a symbol of a second alphabet to a score
23
21
  (integer).
24
-
22
+
25
23
  The class uses a 2-D (m x n) :class:`ndarray`
26
24
  (dtype=:attr:`numpy.int32`),
27
25
  where each element stores the score for a symbol pairing, indexed
28
26
  by the symbol codes of the respective symbols in an *m*-length
29
27
  alphabet 1 and an *n*-length alphabet 2.
30
-
28
+
31
29
  There are 3 ways to creates instances:
32
-
30
+
33
31
  At first a 2-D :class:`ndarray` containing the scores can be
34
32
  directly provided.
35
-
33
+
36
34
  Secondly a dictionary can be provided, where the keys are pairing
37
35
  tuples and values are the corresponding scores.
38
36
  The pairing tuples consist of a symbol of alphabet 1 as first
39
37
  element and a symbol of alphabet 2 as second element. Parings have
40
38
  to be provided for each possible combination.
41
-
39
+
42
40
  At last a valid matrix name can be given, which is loaded from the
43
41
  internal matrix database. The following matrices are avaliable:
44
-
42
+
45
43
  - Nucleotide substitution matrices from NCBI database
46
44
  - **NUC** - Also usable with ambiguous alphabet
47
-
45
+
48
46
  - Protein substitution matrices from NCBI database
49
-
47
+
50
48
  - **PAM<n>**
51
49
  - **BLOSUM<n>**
52
50
  - **MATCH** - Only differentiates between match and mismatch
53
51
  - **IDENTITY** - Strongly penalizes mismatches
54
52
  - **GONNET** - Not usable with default protein alphabet
55
53
  - **DAYHOFF**
56
-
54
+
57
55
  - Corrected protein substitution matrices :footcite:`Hess2016`,
58
56
  **<BLOCKS>** is the BLOCKS version, the matrix is based on
59
-
57
+
60
58
  - **BLOSUM<n>_<BLOCKS>**
61
59
  - **RBLOSUM<n>_<BLOCKS>**
62
60
  - **CorBLOSUM<n>_<BLOCKS>**
63
-
61
+
64
62
  A list of all available matrix names is returned by
65
63
  :meth:`list_db()`.
66
-
64
+
67
65
  Since this class can handle two different alphabets, it is possible
68
66
  to align two different types of sequences.
69
-
67
+
70
68
  Objects of this class are immutable.
71
-
69
+
72
70
  Parameters
73
71
  ----------
74
72
  alphabet1 : Alphabet, length=m
@@ -79,23 +77,23 @@ class SubstitutionMatrix(object):
79
77
  Either a symbol code indexed :class:`ndarray` containing the scores,
80
78
  or a dictionary mapping the symbol pairing to scores,
81
79
  or a string referencing a matrix in the internal database.
82
-
80
+
83
81
  Raises
84
82
  ------
85
83
  KeyError
86
84
  If the matrix dictionary misses a symbol given in the alphabet.
87
-
85
+
88
86
  References
89
87
  ----------
90
-
88
+
91
89
  .. footbibliography::
92
-
90
+
93
91
  Examples
94
92
  --------
95
-
93
+
96
94
  Creating a matrix for two different (nonsense) alphabets
97
95
  via a matrix dictionary:
98
-
96
+
99
97
  >>> alph1 = Alphabet(["foo","bar"])
100
98
  >>> alph2 = Alphabet([1,2,3])
101
99
  >>> matrix_dict = {("foo",1):5, ("foo",2):10, ("foo",3):15,
@@ -119,17 +117,16 @@ class SubstitutionMatrix(object):
119
117
  C 0 1 0 0
120
118
  G 0 0 1 0
121
119
  T 0 0 0 1
122
-
120
+
123
121
  Creating a matrix via database name:
124
-
122
+
125
123
  >>> alph = ProteinSequence.alphabet
126
124
  >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
127
125
  """
128
-
126
+
129
127
  # Directory of matrix files
130
- _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
131
- "matrix_data")
132
-
128
+ _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")
129
+
133
130
  def __init__(self, alphabet1, alphabet2, score_matrix):
134
131
  self._alph1 = alphabet1
135
132
  self._alph2 = alphabet2
@@ -147,16 +144,19 @@ class SubstitutionMatrix(object):
147
144
  matrix_dict = SubstitutionMatrix.dict_from_db(score_matrix)
148
145
  self._fill_with_matrix_dict(matrix_dict)
149
146
  else:
150
- raise TypeError("Matrix must be either a dictionary, "
151
- "an 2-D ndarray or a string")
147
+ raise TypeError(
148
+ "Matrix must be either a dictionary, " "an 2-D ndarray or a string"
149
+ )
152
150
  # This class is immutable and has a getter function for the
153
151
  # score matrix -> make the score matrix read-only
154
152
  self._matrix.setflags(write=False)
155
153
 
156
154
  def __repr__(self):
157
155
  """Represent SubstitutionMatrix as a string for debugging."""
158
- return f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, " \
159
- f"np.{np.array_repr(self._matrix)})"
156
+ return (
157
+ f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
158
+ f"np.{np.array_repr(self._matrix)})"
159
+ )
160
160
 
161
161
  def __eq__(self, item):
162
162
  if not isinstance(item, SubstitutionMatrix):
@@ -173,40 +173,39 @@ class SubstitutionMatrix(object):
173
173
  return not self == item
174
174
 
175
175
  def _fill_with_matrix_dict(self, matrix_dict):
176
- self._matrix = np.zeros(( len(self._alph1), len(self._alph2) ),
177
- dtype=np.int32)
176
+ self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
178
177
  for i in range(len(self._alph1)):
179
178
  for j in range(len(self._alph2)):
180
179
  sym1 = self._alph1.decode(i)
181
180
  sym2 = self._alph2.decode(j)
182
- self._matrix[i,j] = int(matrix_dict[sym1, sym2])
183
-
181
+ self._matrix[i, j] = int(matrix_dict[sym1, sym2])
182
+
184
183
  def get_alphabet1(self):
185
184
  """
186
- Get the first alphabet.
187
-
185
+ Get the first alphabet.
186
+
188
187
  Returns
189
188
  -------
190
189
  alphabet : Alphabet
191
190
  The first alphabet.
192
191
  """
193
192
  return self._alph1
194
-
193
+
195
194
  def get_alphabet2(self):
196
195
  """
197
- Get the second alphabet.
198
-
196
+ Get the second alphabet.
197
+
199
198
  Returns
200
199
  -------
201
200
  alphabet : Alphabet
202
201
  The second alphabet.
203
202
  """
204
203
  return self._alph2
205
-
204
+
206
205
  def score_matrix(self):
207
206
  """
208
207
  Get the 2-D :class:`ndarray` containing the score values.
209
-
208
+
210
209
  Returns
211
210
  -------
212
211
  matrix : ndarray, shape=(m,n), dtype=np.int32
@@ -214,12 +213,12 @@ class SubstitutionMatrix(object):
214
213
  The array is read-only.
215
214
  """
216
215
  return self._matrix
217
-
216
+
218
217
  def transpose(self):
219
218
  """
220
219
  Get a copy of this instance, where the alphabets are
221
220
  interchanged.
222
-
221
+
223
222
  Returns
224
223
  -------
225
224
  transposed : SubstitutionMatrix
@@ -229,7 +228,7 @@ class SubstitutionMatrix(object):
229
228
  new_alph2 = self._alph1
230
229
  new_matrix = np.transpose(self._matrix)
231
230
  return SubstitutionMatrix(new_alph1, new_alph2, new_matrix)
232
-
231
+
233
232
  def is_symmetric(self):
234
233
  """
235
234
  Check whether the substitution matrix is symmetric,
@@ -242,35 +241,36 @@ class SubstitutionMatrix(object):
242
241
  True, if both alphabets are identical and the score matrix
243
242
  is symmetric, false otherwise.
244
243
  """
245
- return self._alph1 == self._alph2 \
246
- and np.array_equal(self._matrix, np.transpose(self._matrix))
247
-
244
+ return self._alph1 == self._alph2 and np.array_equal(
245
+ self._matrix, np.transpose(self._matrix)
246
+ )
247
+
248
248
  def get_score_by_code(self, code1, code2):
249
249
  """
250
250
  Get the substitution score of two symbols,
251
251
  represented by their code.
252
-
252
+
253
253
  Parameters
254
254
  ----------
255
255
  code1, code2 : int
256
256
  Symbol codes of the two symbols to be aligned.
257
-
257
+
258
258
  Returns
259
259
  -------
260
260
  score : int
261
261
  The substitution / alignment score.
262
262
  """
263
263
  return self._matrix[code1, code2]
264
-
264
+
265
265
  def get_score(self, symbol1, symbol2):
266
266
  """
267
267
  Get the substitution score of two symbols.
268
-
268
+
269
269
  Parameters
270
270
  ----------
271
271
  symbol1, symbol2 : object
272
272
  Symbols to be aligned.
273
-
273
+
274
274
  Returns
275
275
  -------
276
276
  score : int
@@ -279,19 +279,19 @@ class SubstitutionMatrix(object):
279
279
  code1 = self._alph1.encode(symbol1)
280
280
  code2 = self._alph2.encode(symbol2)
281
281
  return self._matrix[code1, code2]
282
-
282
+
283
283
  def shape(self):
284
284
  """
285
285
  Get the shape (i.e. the length of both alphabets)
286
286
  of the subsitution matrix.
287
-
287
+
288
288
  Returns
289
289
  -------
290
290
  shape : tuple
291
291
  Matrix shape.
292
292
  """
293
293
  return (len(self._alph1), len(self._alph2))
294
-
294
+
295
295
  def __str__(self):
296
296
  # Create matrix in NCBI format
297
297
  string = " "
@@ -306,18 +306,18 @@ class SubstitutionMatrix(object):
306
306
  # Remove terminal line break
307
307
  string = string[:-1]
308
308
  return string
309
-
309
+
310
310
  @staticmethod
311
311
  def dict_from_str(string):
312
312
  """
313
313
  Create a matrix dictionary from a string in NCBI matrix format.
314
-
314
+
315
315
  Symbols of the first alphabet are taken from the left column,
316
316
  symbols of the second alphabet are taken from the top row.
317
-
317
+
318
318
  The keys of the dictionary consist of tuples containing the
319
319
  aligned symbols and the values are the corresponding scores.
320
-
320
+
321
321
  Returns
322
322
  -------
323
323
  matrix_dict : dict
@@ -329,22 +329,22 @@ class SubstitutionMatrix(object):
329
329
  symbols2 = [e for e in lines[0].split()]
330
330
  scores = np.array([line.split()[1:] for line in lines[1:]]).astype(int)
331
331
  scores = np.transpose(scores)
332
-
332
+
333
333
  matrix_dict = {}
334
334
  for i in range(len(symbols1)):
335
335
  for j in range(len(symbols2)):
336
- matrix_dict[(symbols1[i], symbols2[j])] = scores[i,j]
336
+ matrix_dict[(symbols1[i], symbols2[j])] = scores[i, j]
337
337
  return matrix_dict
338
-
338
+
339
339
  @staticmethod
340
340
  def dict_from_db(matrix_name):
341
341
  """
342
342
  Create a matrix dictionary from a valid matrix name in the
343
343
  internal matrix database.
344
-
344
+
345
345
  The keys of the dictionary consist of tuples containing the
346
346
  aligned symbols and the values are the corresponding scores.
347
-
347
+
348
348
  Returns
349
349
  -------
350
350
  matrix_dict : dict
@@ -353,12 +353,12 @@ class SubstitutionMatrix(object):
353
353
  filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
354
354
  with open(filename, "r") as f:
355
355
  return SubstitutionMatrix.dict_from_str(f.read())
356
-
356
+
357
357
  @staticmethod
358
358
  def list_db():
359
359
  """
360
360
  List all matrix names in the internal database.
361
-
361
+
362
362
  Returns
363
363
  -------
364
364
  db_list : list
@@ -367,27 +367,26 @@ class SubstitutionMatrix(object):
367
367
  files = os.listdir(SubstitutionMatrix._db_dir)
368
368
  # Remove '.mat' from files
369
369
  return [file[:-4] for file in sorted(files)]
370
-
371
-
370
+
372
371
  @staticmethod
373
372
  def std_protein_matrix():
374
373
  """
375
374
  Get the default :class:`SubstitutionMatrix` for protein sequence
376
375
  alignments, which is BLOSUM62.
377
-
376
+
378
377
  Returns
379
378
  -------
380
379
  matrix : SubstitutionMatrix
381
380
  Default matrix.
382
381
  """
383
382
  return _matrix_blosum62
384
-
383
+
385
384
  @staticmethod
386
385
  def std_nucleotide_matrix():
387
386
  """
388
387
  Get the default :class:`SubstitutionMatrix` for DNA sequence
389
388
  alignments.
390
-
389
+
391
390
  Returns
392
391
  -------
393
392
  matrix : SubstitutionMatrix
@@ -395,11 +394,11 @@ class SubstitutionMatrix(object):
395
394
  """
396
395
  return _matrix_nuc
397
396
 
398
- # Preformatted BLOSUM62 and NUC substitution matrix from NCBI
399
- _matrix_blosum62 = SubstitutionMatrix(ProteinSequence.alphabet,
400
- ProteinSequence.alphabet,
401
- "BLOSUM62")
402
- _matrix_nuc = SubstitutionMatrix(NucleotideSequence.alphabet_amb,
403
- NucleotideSequence.alphabet_amb,
404
- "NUC")
405
397
 
398
+ # Preformatted BLOSUM62 and NUC substitution matrix from NCBI
399
+ _matrix_blosum62 = SubstitutionMatrix(
400
+ ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
401
+ )
402
+ _matrix_nuc = SubstitutionMatrix(
403
+ NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
404
+ )
@@ -39,9 +39,9 @@ cdef float32 MAX_FLOAT = np.finfo(np.float32).max
39
39
 
40
40
 
41
41
  class GapSymbol:
42
-
42
+
43
43
  _instance = None
44
-
44
+
45
45
  def __init__(self):
46
46
  if GapSymbol._instance is not None:
47
47
  raise ValueError(
@@ -49,16 +49,16 @@ class GapSymbol:
49
49
  )
50
50
  else:
51
51
  GapSymbol._instance = self
52
-
52
+
53
53
  @staticmethod
54
54
  def instance():
55
55
  if GapSymbol._instance is None:
56
56
  GapSymbol._instance = GapSymbol()
57
57
  return GapSymbol._instance
58
-
58
+
59
59
  def __str__(self):
60
60
  return "-"
61
-
61
+
62
62
  def __hash__(self):
63
63
  return 0
64
64
 
@@ -69,13 +69,13 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
69
69
  align_multiple(sequences, matrix, gap_penalty=-10,
70
70
  terminal_penalty=True, distances=None,
71
71
  guide_tree=None)
72
-
72
+
73
73
  Perform a multiple sequence alignment using a progressive
74
74
  alignment algorithm. :footcite:`Feng1987`
75
75
 
76
76
  Based on pairwise sequence distances a guide tree is constructed.
77
77
  The sequences are progessively aligned according to the tree,
78
- following the rule 'Once a gap, always a gap'.
78
+ following the rule 'Once a gap, always a gap'.
79
79
 
80
80
  Parameters
81
81
  ----------
@@ -124,7 +124,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
124
124
  distance_matrix : ndarray, shape=(n,n), dtype=float32
125
125
  The pairwise distance matrix used to construct the guide tree.
126
126
  Equal to `distances` if provided.
127
-
127
+
128
128
  Notes
129
129
  -----
130
130
  The similarity to distance conversion is performed according to the
@@ -137,14 +137,14 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
137
137
  \right)
138
138
 
139
139
  .. math:: S_{a,b}^{max} = \frac{ S_{a,a} + S_{b,b} }{ 2 }
140
-
140
+
141
141
  .. math:: S_{a,b}^{rand} = \frac{1}{L_{a,b}}
142
142
  \left(
143
143
  \sum_{x \in \Omega} \sum_{y \in \Omega}
144
144
  s_{x,y} \cdot N_a(x) \cdot N_b(y)
145
145
  \right)
146
146
  + N_{a,b}^{open} \cdot p^{open} + N_{a,b}^{ext} \cdot p^{ext}
147
-
147
+
148
148
  :math:`D_{a,b}` - The distance between the sequences *a* and *b*.
149
149
 
150
150
  :math:`S_{a,b}` - The similarity score between the sequences *a* and *b*.
@@ -164,17 +164,17 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
164
164
 
165
165
  In rare cases of extremely unrelated sequences, :math:`S_{a,b}`
166
166
  can be lower than :math:`S_{a,b}^{rand}`.
167
- In this case the logaritmus cannot be calculated and a
167
+ In this case the logarithm cannot be calculated and a
168
168
  :class:`ValueError` is raised.
169
169
 
170
170
  References
171
171
  ----------
172
-
172
+
173
173
  .. footbibliography::
174
174
 
175
175
  Examples
176
176
  --------
177
-
177
+
178
178
  >>> seq1 = ProteinSequence("BIQTITE")
179
179
  >>> seq2 = ProteinSequence("TITANITE")
180
180
  >>> seq3 = ProteinSequence("BISMITE")
@@ -232,11 +232,11 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
232
232
  else:
233
233
  # Assure that every node in the guide tree is binary
234
234
  guide_tree = as_binary(guide_tree)
235
-
235
+
236
236
  # Create new matrix with neutral gap symbol
237
237
  gap_symbol = GapSymbol.instance()
238
238
  new_alphabet = Alphabet(
239
- matrix.get_alphabet1().get_symbols() + [gap_symbol]
239
+ matrix.get_alphabet1().get_symbols() + (gap_symbol,)
240
240
  )
241
241
  new_score_matrix = np.zeros(
242
242
  (len(new_alphabet), len(new_alphabet)), dtype=np.int32
@@ -275,7 +275,7 @@ def align_multiple(sequences, matrix, gap_penalty=-10, terminal_penalty=True,
275
275
  ]
276
276
  for i in range(len(aligned_seqs)):
277
277
  aligned_seqs[i].code = aligned_seq_codes[i]
278
-
278
+
279
279
  # Reorder alignmets into original alignemnt
280
280
  new_order = np.argsort(order)
281
281
  aligned_seqs = [aligned_seqs[pos] for pos in new_order]
@@ -290,7 +290,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
290
290
  Create all pairwise alignments for the given sequences and use the
291
291
  method proposed by Feng & Doolittle to calculate the pairwise
292
292
  distance matrix
293
-
293
+
294
294
  Parameters
295
295
  ----------
296
296
  _T : ndarray, dtype=VARAIBLE
@@ -306,7 +306,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
306
306
  terminal_penalty : bool
307
307
  Whether to or not count terminal gap penalties for the
308
308
  alignments.
309
-
309
+
310
310
  Returns
311
311
  -------
312
312
  distances : ndarray, shape=(n,n), dtype=float32
@@ -332,7 +332,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
332
332
  )[0]
333
333
  scores[i,j] = alignment.score
334
334
  alignments[i,j] = alignment
335
-
335
+
336
336
  ### Distance calculation from similarity scores ###
337
337
  # Calculate the occurences of each symbol code in each sequence
338
338
  # This is used later for the random score
@@ -364,7 +364,7 @@ def _get_distance_matrix(CodeType[:] _T, sequences, matrix,
364
364
  cdef CodeType[:] seq_code1, seq_code2
365
365
  cdef CodeType code1, code2
366
366
  cdef float32 score_rand, score_max
367
-
367
+
368
368
  # Calculate distance
369
369
  # i and j are indicating the alignment between the sequences i and j
370
370
  for i in range(scores_v.shape[0]):
@@ -405,14 +405,14 @@ def _count_gaps(int64[:,:] trace_v, bint terminal_penalty):
405
405
  """
406
406
  Count the number of gap openings and gap extensions in an alignment
407
407
  trace.
408
-
408
+
409
409
  Parameters
410
410
  ----------
411
411
  trace_v : ndarary, shape=(n,2), dtype=int
412
412
  The alignemnt trace.
413
413
  terminal_penalty : bool
414
414
  Whether to or not count terminal gap penalties.
415
-
415
+
416
416
  Returns
417
417
  -------
418
418
  gap_open_count, gap_ext_count: int
@@ -440,7 +440,7 @@ def _count_gaps(int64[:,:] trace_v, bint terminal_penalty):
440
440
  if start_index == -1 or stop_index == -1:
441
441
  return 0, 0
442
442
  trace_v = trace_v[start_index : stop_index]
443
-
443
+
444
444
  if trace_v[0,0] == -1:
445
445
  gap_open_count += 1
446
446
  if trace_v[0,1] == -1:
@@ -471,7 +471,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
471
471
  The gaps inserted in this pairwise alignment are also inserted
472
472
  into all other sequences in the respective sub-MSA at the same
473
473
  position.
474
-
474
+
475
475
  Parameters
476
476
  ----------
477
477
  _T : ndarray, dtype=VARAIBLE
@@ -490,13 +490,13 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
490
490
  matrix : SubstitutionMatrix
491
491
  The substitution matrix used for the alignments.
492
492
  gap_symbol_code : int
493
- The symbol code for the gap symbol.
493
+ The symbol code for the gap symbol.
494
494
  gap_penalty : int or tuple(int, int)
495
495
  A linear or affine gap penalty for the alignments.
496
496
  terminal_penalty : bool
497
497
  Whether to or not count terminal gap penalties for the
498
498
  alignments.
499
-
499
+
500
500
  Returns
501
501
  -------
502
502
  order : ndarray, shape=(m,), dtype=int
@@ -515,7 +515,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
515
515
  cdef int32[:] indices1_v, indices2_v
516
516
  cdef np.ndarray incides1, incides2
517
517
  cdef list aligned_seqs1, aligned_seqs2
518
-
518
+
519
519
  if tree_node.is_leaf():
520
520
  # Child node -> Cannot do an alignment
521
521
  # -> Just return the sequence corresponding to the leaf node
@@ -523,7 +523,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
523
523
  # when neutral gap character is inserted
524
524
  return np.array([tree_node.index], dtype=np.int32), \
525
525
  [sequences[tree_node.index].copy()]
526
-
526
+
527
527
  else:
528
528
  # Multiple alignment of sequences corresponding to both child nodes
529
529
  child1, child2 = tree_node.children
@@ -537,7 +537,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
537
537
  gap_symbol_code, gap_penalty, terminal_penalty
538
538
  )
539
539
  indices2_v = incides2
540
-
540
+
541
541
  # Find sequence pair with lowest distance
542
542
  dist_min = MAX_FLOAT
543
543
  for i in range(indices1_v.shape[0]):
@@ -554,7 +554,7 @@ def _progressive_align(CodeType[:] _T, sequences, tree_node,
554
554
  gap_penalty, terminal_penalty, max_number=1
555
555
  )[0]
556
556
  # Place neutral gap symbol for position of new gaps
557
- # in both sequence groups
557
+ # in both sequence groups
558
558
  for i in range(len(aligned_seqs1)):
559
559
  seq = aligned_seqs1[i]
560
560
  seq.code = _replace_gaps(
@@ -580,7 +580,7 @@ def _replace_gaps(CodeType[:] _T,
580
580
 
581
581
  The replacement is required by the progressive alignment algorithm
582
582
  to be able to align gapped sequences with each other.
583
-
583
+
584
584
  Parameters
585
585
  ----------
586
586
  _T : ndarray, dtype=VARAIBLE
@@ -592,8 +592,8 @@ def _replace_gaps(CodeType[:] _T,
592
592
  seq_code : ndarary, shape=(n,)
593
593
  The sequence code representing the given sequence.
594
594
  gap_symbol_code : int
595
- The symbol code for the gap symbol.
596
-
595
+ The symbol code for the gap symbol.
596
+
597
597
  Returns
598
598
  -------
599
599
  new_seq_code : ndarary, shape=(m,)
@@ -609,12 +609,12 @@ def _replace_gaps(CodeType[:] _T,
609
609
  partial_trace_v.shape[0], dtype=seq_code.dtype
610
610
  )
611
611
  cdef CodeType[:] new_seq_code_v = new_seq_code
612
-
612
+
613
613
  for i in range(partial_trace_v.shape[0]):
614
614
  index = partial_trace_v[i]
615
615
  if index == -1:
616
616
  new_seq_code_v[i] = gap_symbol_code
617
617
  else:
618
618
  new_seq_code_v[i] = seq_code[index]
619
-
619
+
620
620
  return new_seq_code