biotite 0.41.2__cp312-cp312-macosx_11_0_arm64.whl → 1.0.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  60. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  68. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  102. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  103. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cpython-312-darwin.so +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -5,16 +5,22 @@
5
5
  __name__ = "biotite.sequence.align"
6
6
  __author__ = "Patrick Kunzmann"
7
7
 
8
- import numpy as np
9
8
  import numbers
10
- import copy
11
9
  import textwrap
12
- from ..alphabet import LetterAlphabet
13
-
10
+ from collections.abc import Sequence
11
+ import numpy as np
12
+ from biotite.sequence.alphabet import LetterAlphabet
14
13
 
15
- __all__ = ["Alignment", "get_codes", "get_symbols",
16
- "get_sequence_identity", "get_pairwise_sequence_identity",
17
- "score", "find_terminal_gaps", "remove_terminal_gaps"]
14
+ __all__ = [
15
+ "Alignment",
16
+ "get_codes",
17
+ "get_symbols",
18
+ "get_sequence_identity",
19
+ "get_pairwise_sequence_identity",
20
+ "score",
21
+ "find_terminal_gaps",
22
+ "remove_terminal_gaps",
23
+ ]
18
24
 
19
25
 
20
26
  class Alignment(object):
@@ -22,7 +28,7 @@ class Alignment(object):
22
28
  An :class:`Alignment` object stores information about which symbols
23
29
  of *n* sequences are aligned to each other and it stores the
24
30
  corresponding alignment score.
25
-
31
+
26
32
  Instead of saving a list of aligned symbols, this class saves the
27
33
  original *n* sequences, that were aligned, and a so called *trace*,
28
34
  which indicate the aligned symbols of these sequences.
@@ -31,16 +37,16 @@ class Alignment(object):
31
37
  Each element of the trace is the index in the corresponding
32
38
  sequence.
33
39
  A gap is represented by the value -1.
34
-
40
+
35
41
  Furthermore this class provides multiple utility functions for
36
42
  conversion into strings in order to make the alignment human
37
43
  readable.
38
-
44
+
39
45
  Unless an :class:`Alignment` object is the result of an multiple
40
46
  sequence alignment, the object will contain only two sequences.
41
-
47
+
42
48
  All attributes of this class are publicly accessible.
43
-
49
+
44
50
  Parameters
45
51
  ----------
46
52
  sequences : list
@@ -49,7 +55,7 @@ class Alignment(object):
49
55
  The alignment trace.
50
56
  score : int, optional
51
57
  Alignment score.
52
-
58
+
53
59
  Attributes
54
60
  ----------
55
61
  sequences : list
@@ -58,10 +64,10 @@ class Alignment(object):
58
64
  The alignment trace.
59
65
  score : int
60
66
  Alignment score.
61
-
67
+
62
68
  Examples
63
69
  --------
64
-
70
+
65
71
  >>> seq1 = NucleotideSequence("CGTCAT")
66
72
  >>> seq2 = NucleotideSequence("TCATGC")
67
73
  >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -95,8 +101,10 @@ class Alignment(object):
95
101
 
96
102
  def __repr__(self):
97
103
  """Represent Alignment a string for debugging."""
98
- return f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], " \
99
- f"np.{np.array_repr(self.trace)}, score={self.score})"
104
+ return (
105
+ f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], "
106
+ f"np.{np.array_repr(self.trace)}, score={self.score})"
107
+ )
100
108
 
101
109
  def _gapped_str(self, seq_index):
102
110
  seq_str = ""
@@ -107,11 +115,11 @@ class Alignment(object):
107
115
  else:
108
116
  seq_str += "-"
109
117
  return seq_str
110
-
118
+
111
119
  def get_gapped_sequences(self):
112
120
  """
113
121
  Get a the string representation of the gapped sequences.
114
-
122
+
115
123
  Returns
116
124
  -------
117
125
  sequences : list of str
@@ -119,7 +127,7 @@ class Alignment(object):
119
127
  as in `Alignment.sequences`.
120
128
  """
121
129
  return [self._gapped_str(i) for i in range(len(self.sequences))]
122
-
130
+
123
131
  def __str__(self):
124
132
  # Check if any of the sequences
125
133
  # has an non-single letter alphabet
@@ -143,32 +151,33 @@ class Alignment(object):
143
151
  return ali_str[:-2]
144
152
  else:
145
153
  return super().__str__()
146
-
154
+
147
155
  def __getitem__(self, index):
148
156
  if isinstance(index, tuple):
149
157
  if len(index) > 2:
150
158
  raise IndexError("Only 1D or 2D indices are allowed")
151
- if isinstance(index[0], numbers.Integral) or \
152
- isinstance(index[0], numbers.Integral):
153
- raise IndexError(
154
- "Integers are invalid indices for alignments, "
155
- "a single sequence or alignment column cannot be "
156
- "selected"
157
- )
159
+ if isinstance(index[0], numbers.Integral) or isinstance(
160
+ index[0], numbers.Integral
161
+ ):
162
+ raise IndexError(
163
+ "Integers are invalid indices for alignments, "
164
+ "a single sequence or alignment column cannot be "
165
+ "selected"
166
+ )
158
167
  return Alignment(
159
168
  Alignment._index_sequences(self.sequences, index[1]),
160
169
  self.trace[index],
161
- self.score
170
+ self.score,
162
171
  )
163
172
  else:
164
173
  return Alignment(self.sequences, self.trace[index], self.score)
165
-
174
+
166
175
  def __iter__(self):
167
176
  raise TypeError("'Alignment' object is not iterable")
168
-
177
+
169
178
  def __len__(self):
170
179
  return len(self.trace)
171
-
180
+
172
181
  def __eq__(self, item):
173
182
  if not isinstance(item, Alignment):
174
183
  return False
@@ -179,45 +188,41 @@ class Alignment(object):
179
188
  if self.score != item.score:
180
189
  return False
181
190
  return True
182
-
191
+
183
192
  @staticmethod
184
193
  def _index_sequences(sequences, index):
185
- if isinstance(index, (list, tuple)) or \
186
- (isinstance(index, np.ndarray) and index.dtype != bool):
187
- return [sequences[i] for i in index]
194
+ if isinstance(index, (list, tuple)) or (
195
+ isinstance(index, np.ndarray) and index.dtype != bool
196
+ ):
197
+ return [sequences[i] for i in index]
188
198
  elif isinstance(index, np.ndarray) and index.dtype == bool:
189
199
  return [seq for seq, mask in zip(sequences, index) if mask]
190
200
  if isinstance(index, slice):
191
201
  return sequences[index]
192
202
  else:
193
- raise IndexError(
194
- f"Invalid alignment index type '{type(index).__name__}'"
195
- )
196
-
203
+ raise IndexError(f"Invalid alignment index type '{type(index).__name__}'")
204
+
197
205
  @staticmethod
198
206
  def trace_from_strings(seq_str_list):
199
207
  """
200
208
  Create a trace from strings that represent aligned sequences.
201
-
209
+
202
210
  Parameters
203
211
  ----------
204
212
  seq_str_list : list of str
205
213
  The strings, where each each one represents a sequence
206
214
  (with gaps) in an alignment.
207
215
  A ``-`` is interpreted as gap.
208
-
216
+
209
217
  Returns
210
218
  -------
211
219
  trace : ndarray, dtype=int, shape=(n,2)
212
220
  The created trace.
213
221
  """
214
222
  if len(seq_str_list) < 2:
215
- raise ValueError(
216
- "An alignment must contain at least two sequences"
217
- )
223
+ raise ValueError("An alignment must contain at least two sequences")
218
224
  seq_i = np.zeros(len(seq_str_list))
219
- trace = np.full(( len(seq_str_list[0]), len(seq_str_list) ),
220
- -1, dtype=int)
225
+ trace = np.full((len(seq_str_list[0]), len(seq_str_list)), -1, dtype=int)
221
226
  # Get length of string (same length for all strings)
222
227
  # rather than length of list
223
228
  for pos_i in range(len(seq_str_list[0])):
@@ -238,22 +243,22 @@ def get_codes(alignment):
238
243
  Instead of the indices of the aligned symbols (trace), the return
239
244
  value contains the corresponding symbol codes for each index.
240
245
  Gaps are still represented by *-1*.
241
-
246
+
242
247
  Parameters
243
248
  ----------
244
249
  alignment : Alignment
245
250
  The alignment to get the sequence codes for.
246
-
251
+
247
252
  Returns
248
253
  -------
249
254
  codes : ndarray, dtype=int, shape=(n,m)
250
255
  The sequence codes for the alignment.
251
256
  The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
252
257
  The array uses *-1* values for gaps.
253
-
258
+
254
259
  Examples
255
260
  --------
256
-
261
+
257
262
  >>> seq1 = NucleotideSequence("CGTCAT")
258
263
  >>> seq2 = NucleotideSequence("TCATGC")
259
264
  >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -267,14 +272,17 @@ def get_codes(alignment):
267
272
  """
268
273
  trace = alignment.trace
269
274
  sequences = alignment.sequences
270
-
275
+
271
276
  # The number of sequences is the first dimension
272
- codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=int)
277
+ codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
273
278
  for i in range(len(sequences)):
279
+ # Mark -1 explicitly as int64 to avoid that the unsigned dtype
280
+ # of the sequence code is used
281
+ # (https://numpy.org/neps/nep-0050-scalar-promotion.html)
274
282
  codes[i] = np.where(
275
- trace[:,i] != -1, sequences[i].code[trace[:,i]], -1
283
+ trace[:, i] != -1, sequences[i].code[trace[:, i]], np.int64(-1)
276
284
  )
277
-
285
+
278
286
  return np.stack(codes)
279
287
 
280
288
 
@@ -283,24 +291,24 @@ def get_symbols(alignment):
283
291
  Similar to :func:`get_codes()`, but contains the decoded symbols
284
292
  instead of codes.
285
293
  Gaps are still represented by *None* values.
286
-
294
+
287
295
  Parameters
288
296
  ----------
289
297
  alignment : Alignment
290
298
  The alignment to get the symbols for.
291
-
299
+
292
300
  Returns
293
301
  -------
294
302
  symbols : list of list
295
303
  The nested list of symbols.
296
-
304
+
297
305
  See Also
298
306
  --------
299
307
  get_codes
300
308
 
301
309
  Examples
302
310
  --------
303
-
311
+
304
312
  >>> seq1 = NucleotideSequence("CGTCAT")
305
313
  >>> seq2 = NucleotideSequence("TCATGC")
306
314
  >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -317,8 +325,8 @@ def get_symbols(alignment):
317
325
  alphabet = alignment.sequences[i].get_alphabet()
318
326
  codes_wo_gaps = codes[i, codes[i] != -1]
319
327
  symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
320
- if not isinstance(symbols_wo_gaps, list):
321
- symbols_wo_gaps = list(symbols_wo_gaps)
328
+ if isinstance(symbols_wo_gaps, np.ndarray):
329
+ symbols_wo_gaps = symbols_wo_gaps.tolist()
322
330
  symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
323
331
  symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
324
332
  symbols[i] = symbols_for_seq.tolist()
@@ -331,7 +339,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
331
339
 
332
340
  The identity is equal to the matches divided by a measure for the
333
341
  length of the alignment that depends on the `mode` parameter.
334
-
342
+
335
343
  Parameters
336
344
  ----------
337
345
  alignment : Alignment
@@ -348,12 +356,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
348
356
  length of the shortest sequence.
349
357
 
350
358
  Default is *not_terminal*.
351
-
359
+
352
360
  Returns
353
361
  -------
354
362
  identity : float
355
363
  The sequence identity, ranging between 0 and 1.
356
-
364
+
357
365
  See also
358
366
  --------
359
367
  get_pairwise_sequence_identity
@@ -363,12 +371,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
363
371
  # Count matches
364
372
  matches = 0
365
373
  for i in range(codes.shape[1]):
366
- column = codes[:,i]
374
+ column = codes[:, i]
367
375
  # One unique value -> all symbols match
368
376
  unique_symbols = np.unique(column)
369
377
  if len(unique_symbols) == 1 and unique_symbols[0] != -1:
370
378
  matches += 1
371
-
379
+
372
380
  # Calculate length
373
381
  if mode == "all":
374
382
  length = len(alignment)
@@ -394,7 +402,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
394
402
 
395
403
  The identity is equal to the matches divided by a measure for the
396
404
  length of the alignment that depends on the `mode` parameter.
397
-
405
+
398
406
  Parameters
399
407
  ----------
400
408
  alignment : Alignment, length=n
@@ -411,12 +419,12 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
411
419
  length of the shortest one of the two sequences.
412
420
 
413
421
  Default is *not_terminal*.
414
-
422
+
415
423
  Returns
416
424
  -------
417
425
  identity : ndarray, dtype=float, shape=(n,n)
418
426
  The pairwise sequence identity, ranging between 0 and 1.
419
-
427
+
420
428
  See also
421
429
  --------
422
430
  get_sequence_identity
@@ -427,9 +435,11 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
427
435
  # Count matches
428
436
  # Calculate at which positions the sequences are identical
429
437
  # and are not gaps
430
- equality_matrix = (codes[:, np.newaxis, :] == codes[np.newaxis, :, :]) \
431
- & (codes[:, np.newaxis, :] != -1) \
432
- & (codes[np.newaxis, :, :] != -1) \
438
+ equality_matrix = (
439
+ (codes[:, np.newaxis, :] == codes[np.newaxis, :, :])
440
+ & (codes[:, np.newaxis, :] != -1)
441
+ & (codes[np.newaxis, :, :] != -1)
442
+ )
433
443
  # Sum these positions up
434
444
  matches = np.count_nonzero(equality_matrix, axis=-1)
435
445
 
@@ -441,24 +451,23 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
441
451
  for i in range(n_seq):
442
452
  for j in range(n_seq):
443
453
  # Find latest start and earliest stop of all sequences
444
- start, stop = find_terminal_gaps(alignment[:, [i,j]])
454
+ start, stop = find_terminal_gaps(alignment[:, [i, j]])
445
455
  if stop <= start:
446
456
  raise ValueError(
447
457
  "Cannot calculate non-terminal identity, "
448
458
  "as the two sequences have no overlap"
449
459
  )
450
- length[i,j] = stop - start
460
+ length[i, j] = stop - start
451
461
  elif mode == "shortest":
452
462
  length = np.zeros((n_seq, n_seq))
453
463
  for i in range(n_seq):
454
464
  for j in range(n_seq):
455
- length[i,j] = min([
456
- len(alignment.sequences[i]),
457
- len(alignment.sequences[j])
458
- ])
465
+ length[i, j] = min(
466
+ [len(alignment.sequences[i]), len(alignment.sequences[j])]
467
+ )
459
468
  else:
460
469
  raise ValueError(f"'{mode}' is an invalid calculation mode")
461
-
470
+
462
471
  return matches / length
463
472
 
464
473
 
@@ -468,7 +477,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
468
477
 
469
478
  If the alignment contains more than two sequences,
470
479
  all pairwise scores are counted.
471
-
480
+
472
481
  Parameters
473
482
  ----------
474
483
  alignment : Alignment
@@ -485,7 +494,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
485
494
  terminal_penalty : bool, optional
486
495
  If true, gap penalties are applied to terminal gaps.
487
496
  (Default: True)
488
-
497
+
489
498
  Returns
490
499
  -------
491
500
  score : int
@@ -503,18 +512,18 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
503
512
  # Do not count self-similarity
504
513
  # and do not count similarity twice (not S(i,j) and S(j,i))
505
514
  for i in range(codes.shape[0]):
506
- for j in range(i+1, codes.shape[0]):
515
+ for j in range(i + 1, codes.shape[0]):
507
516
  code_i = column[i]
508
517
  code_j = column[j]
509
518
  # Ignore gaps
510
519
  if code_i != -1 and code_j != -1:
511
520
  score += matrix[code_i, code_j]
512
-
521
+
513
522
  # Sum gap penalties
514
- if type(gap_penalty) == int:
523
+ if isinstance(gap_penalty, numbers.Real):
515
524
  gap_open = gap_penalty
516
525
  gap_ext = gap_penalty
517
- elif type(gap_penalty) == tuple:
526
+ elif isinstance(gap_penalty, Sequence):
518
527
  gap_open = gap_penalty[0]
519
528
  gap_ext = gap_penalty[1]
520
529
  else:
@@ -590,15 +599,15 @@ def find_terminal_gaps(alignment):
590
599
  """
591
600
  trace = alignment.trace
592
601
  # Find for each sequence the positions of non-gap symbols
593
- no_gap_pos = [np.where(trace[:,i] != -1)[0] for i in range(trace.shape[1])]
602
+ no_gap_pos = [np.where(trace[:, i] != -1)[0] for i in range(trace.shape[1])]
594
603
  # Find for each sequence the positions of the sequence start and end
595
604
  # in the alignment
596
- firsts = [no_gap_pos[i][0 ] for i in range(trace.shape[1])]
597
- lasts = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
605
+ firsts = [no_gap_pos[i][0] for i in range(trace.shape[1])]
606
+ lasts = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
598
607
  # The terminal gaps are before all sequences start and after any
599
608
  # sequence ends
600
609
  # Use exclusive stop -> -1
601
- return np.max(firsts), np.min(lasts) + 1
610
+ return np.max(firsts).item(), np.min(lasts).item() + 1
602
611
 
603
612
 
604
613
  def remove_terminal_gaps(alignment):
@@ -655,4 +664,4 @@ def remove_terminal_gaps(alignment):
655
664
  "Cannot remove terminal gaps, since at least two sequences have "
656
665
  "no overlap and the resulting alignment would be empty"
657
666
  )
658
- return alignment[start : stop]
667
+ return alignment[start:stop]
@@ -6,11 +6,12 @@ __name__ = "biotite.sequence.align"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["bucket_number"]
8
8
 
9
- from os.path import realpath, dirname, join
9
+ from os.path import dirname, join, realpath
10
10
  import numpy as np
11
11
 
12
-
13
12
  _primes = None
13
+
14
+
14
15
  def bucket_number(n_kmers, load_factor=0.8):
15
16
  """
16
17
  Find an appropriate number of buckets for a :class:`BucketKmerTable`
@@ -54,16 +55,17 @@ def bucket_number(n_kmers, load_factor=0.8):
54
55
  """
55
56
  global _primes
56
57
  if _primes is None:
57
- with open(
58
- join(dirname(realpath(__file__)), "primes.txt")
59
- ) as file:
60
- _primes = np.array([
61
- int(line) for line in file.read().splitlines()
62
- if len(line) != 0 and line[0] != "#"
63
- ])
58
+ with open(join(dirname(realpath(__file__)), "primes.txt")) as file:
59
+ _primes = np.array(
60
+ [
61
+ int(line)
62
+ for line in file.read().splitlines()
63
+ if len(line) != 0 and line[0] != "#"
64
+ ]
65
+ )
64
66
 
65
67
  number = int(n_kmers / load_factor)
66
68
  index = np.searchsorted(_primes, number, side="left")
67
69
  if index == len(_primes):
68
70
  raise ValueError("Number of buckets too large")
69
- return _primes[index]
71
+ return _primes[index]