biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  60. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  68. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  102. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  103. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +246 -236
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cpython-310-darwin.so +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +83 -78
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +140 -110
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +260 -258
  162. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  163. biotite/structure/io/trajfile.py +90 -107
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
  184. biotite-1.0.1.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
@@ -10,10 +10,8 @@ __all__ = ["MinimizerSelector", "SyncmerSelector", "CachedSyncmerSelector",
10
10
  cimport cython
11
11
  cimport numpy as np
12
12
 
13
- from numbers import Integral
14
13
  import numpy as np
15
14
  from .kmeralphabet import KmerAlphabet
16
- from ..alphabet import AlphabetError
17
15
 
18
16
 
19
17
  ctypedef np.int64_t int64
@@ -21,7 +19,7 @@ ctypedef np.uint32_t uint32
21
19
 
22
20
 
23
21
  # Obtained from 'np.iinfo(np.int64).max'
24
- DEF MAX_INT_64 = 9223372036854775807
22
+ cdef int64 MAX_INT_64 = 9223372036854775807
25
23
 
26
24
 
27
25
  class MinimizerSelector:
@@ -54,7 +52,7 @@ class MinimizerSelector:
54
52
  This standard order is often the lexicographical order, which is
55
53
  known to yield suboptimal *density* in many cases
56
54
  :footcite:`Roberts2004`.
57
-
55
+
58
56
  Attributes
59
57
  ----------
60
58
  kmer_alphabet : KmerAlphabet
@@ -73,7 +71,7 @@ class MinimizerSelector:
73
71
 
74
72
  References
75
73
  ----------
76
-
74
+
77
75
  .. footbibliography::
78
76
 
79
77
  Examples
@@ -122,12 +120,12 @@ class MinimizerSelector:
122
120
  self._window = window
123
121
  self._kmer_alph = kmer_alphabet
124
122
  self._permutation = permutation
125
-
123
+
126
124
 
127
125
  @property
128
126
  def kmer_alphabet(self):
129
127
  return self._kmer_alph
130
-
128
+
131
129
  @property
132
130
  def window(self):
133
131
  return self._window
@@ -135,7 +133,7 @@ class MinimizerSelector:
135
133
  @property
136
134
  def permutation(self):
137
135
  return self._permutation
138
-
136
+
139
137
 
140
138
  def select(self, sequence, bint alphabet_check=True):
141
139
  """
@@ -154,7 +152,7 @@ class MinimizerSelector:
154
152
  of the sequence and the alphabet of the
155
153
  :class:`MinimizerSelector`
156
154
  is not checked to gain additional performance.
157
-
155
+
158
156
  Returns
159
157
  -------
160
158
  minimizer_indices : ndarray, dtype=np.uint32
@@ -162,7 +160,7 @@ class MinimizerSelector:
162
160
  minimizers : ndarray, dtype=np.int64
163
161
  The *k-mers* that are the selected minimizers, returned as
164
162
  *k-mer* code.
165
-
163
+
166
164
  Notes
167
165
  -----
168
166
  Duplicate minimizers are omitted, i.e. if two windows have the
@@ -176,7 +174,7 @@ class MinimizerSelector:
176
174
  )
177
175
  kmers = self._kmer_alph.create_kmers(sequence.code)
178
176
  return self.select_from_kmers(kmers)
179
-
177
+
180
178
 
181
179
  def select_from_kmers(self, kmers):
182
180
  """
@@ -191,7 +189,7 @@ class MinimizerSelector:
191
189
  minimizers in.
192
190
  The *k-mer* codes correspond to the *k-mers* encoded by the
193
191
  given `kmer_alphabet`.
194
-
192
+
195
193
  Returns
196
194
  -------
197
195
  minimizer_indices : ndarray, dtype=np.uint32
@@ -199,7 +197,7 @@ class MinimizerSelector:
199
197
  appears.
200
198
  minimizers : ndarray, dtype=np.int64
201
199
  The corresponding *k-mers* codes of the minimizers.
202
-
200
+
203
201
  Notes
204
202
  -----
205
203
  Duplicate minimizers are omitted, i.e. if two windows have the
@@ -267,7 +265,7 @@ class SyncmerSelector:
267
265
  *k-mer*.
268
266
  By default, the minimum position needs to be at the start of the
269
267
  *k-mer*, which is termed *open syncmer*.
270
-
268
+
271
269
  Attributes
272
270
  ----------
273
271
  alphabet : Alphabet
@@ -276,7 +274,7 @@ class SyncmerSelector:
276
274
  The :class:`KmerAlphabet` for *k* and *s*, respectively.
277
275
  permutation : Permutation
278
276
  The permutation.
279
-
277
+
280
278
  See also
281
279
  --------
282
280
  CachedSyncmerSelector
@@ -291,7 +289,7 @@ class SyncmerSelector:
291
289
 
292
290
  References
293
291
  ----------
294
-
292
+
295
293
  .. footbibliography::
296
294
 
297
295
  Examples
@@ -337,7 +335,7 @@ class SyncmerSelector:
337
335
  self._alphabet = alphabet
338
336
  self._kmer_alph = KmerAlphabet(alphabet, k)
339
337
  self._smer_alph = KmerAlphabet(alphabet, s)
340
-
338
+
341
339
  self._permutation = permutation
342
340
 
343
341
  self._offset = np.asarray(offset, dtype=np.int64)
@@ -353,7 +351,7 @@ class SyncmerSelector:
353
351
  )
354
352
  if len(np.unique(self._offset)) != len(self._offset):
355
353
  raise ValueError("Offset must contain unique values")
356
-
354
+
357
355
 
358
356
  @property
359
357
  def alphabet(self):
@@ -362,7 +360,7 @@ class SyncmerSelector:
362
360
  @property
363
361
  def kmer_alphabet(self):
364
362
  return self._kmer_alph
365
-
363
+
366
364
  @property
367
365
  def smer_alphabet(self):
368
366
  return self._smer_alph
@@ -370,7 +368,7 @@ class SyncmerSelector:
370
368
  @property
371
369
  def permutation(self):
372
370
  return self._permutation
373
-
371
+
374
372
 
375
373
  def select(self, sequence, bint alphabet_check=True):
376
374
  """
@@ -389,7 +387,7 @@ class SyncmerSelector:
389
387
  of the sequence and the alphabet of the
390
388
  :class:`SyncmerSelector`
391
389
  is not checked to gain additional performance.
392
-
390
+
393
391
  Returns
394
392
  -------
395
393
  syncmer_indices : ndarray, dtype=np.uint32
@@ -428,7 +426,7 @@ class SyncmerSelector:
428
426
  relative_min_pos = min_pos - np.arange(len(kmers))
429
427
  syncmer_pos = self._filter_syncmer_pos(relative_min_pos)
430
428
  return syncmer_pos, kmers[syncmer_pos]
431
-
429
+
432
430
 
433
431
  def select_from_kmers(self, kmers):
434
432
  """
@@ -442,7 +440,7 @@ class SyncmerSelector:
442
440
  ----------
443
441
  kmers : ndarray, dtype=np.int64
444
442
  The *k-mer* codes to select the syncmers from.
445
-
443
+
446
444
  Returns
447
445
  -------
448
446
  syncmer_indices : ndarray, dtype=np.uint32
@@ -459,9 +457,9 @@ class SyncmerSelector:
459
457
  :class:`Sequence` objects.
460
458
  """
461
459
  cdef int64 i
462
-
460
+
463
461
  symbol_codes_for_each_kmer = self._kmer_alph.split(kmers)
464
-
462
+
465
463
  cdef int64[:] min_pos = np.zeros(
466
464
  len(symbol_codes_for_each_kmer), dtype=np.int64
467
465
  )
@@ -477,10 +475,10 @@ class SyncmerSelector:
477
475
  f"sort keys for {len(smers)} s-mers"
478
476
  )
479
477
  min_pos[i] = np.argmin(ordering)
480
-
478
+
481
479
  syncmer_pos = self._filter_syncmer_pos(min_pos)
482
480
  return syncmer_pos, kmers[syncmer_pos]
483
-
481
+
484
482
 
485
483
  def _filter_syncmer_pos(self, min_pos):
486
484
  """
@@ -538,7 +536,7 @@ class CachedSyncmerSelector(SyncmerSelector):
538
536
  *k-mer*.
539
537
  By default, the minimum position needs to be at the start of the
540
538
  *k-mer*, which is termed *open syncmer*.
541
-
539
+
542
540
  Attributes
543
541
  ----------
544
542
  alphabet : Alphabet
@@ -547,7 +545,7 @@ class CachedSyncmerSelector(SyncmerSelector):
547
545
  The :class:`KmerAlphabet` for *k* and *s*, respectively.
548
546
  permutation : Permutation
549
547
  The permutation.
550
-
548
+
551
549
  See also
552
550
  --------
553
551
  SyncmerSelector
@@ -562,7 +560,7 @@ class CachedSyncmerSelector(SyncmerSelector):
562
560
 
563
561
  References
564
562
  ----------
565
-
563
+
566
564
  .. footbibliography::
567
565
 
568
566
  Examples
@@ -584,7 +582,7 @@ class CachedSyncmerSelector(SyncmerSelector):
584
582
  >>> print(["".join(kmer_alph.decode(kmer)) for kmer in syncmers])
585
583
  ['GGCAA', 'AAGTG', 'AGTGA', 'GTGAC']
586
584
  """
587
-
585
+
588
586
  def __init__(self, alphabet, k, s, permutation=None, offset=(0,)):
589
587
  super().__init__(alphabet, k, s, permutation, offset)
590
588
  # Check for all possible *k-mers*, whether they are syncmers
@@ -593,7 +591,7 @@ class CachedSyncmerSelector(SyncmerSelector):
593
591
  # Convert the index array into a boolean mask
594
592
  self._syncmer_mask = np.zeros(len(self.kmer_alphabet), dtype=bool)
595
593
  self._syncmer_mask[syncmer_indices] = True
596
-
594
+
597
595
 
598
596
  def select(self, sequence, bint alphabet_check=True):
599
597
  """
@@ -612,7 +610,7 @@ class CachedSyncmerSelector(SyncmerSelector):
612
610
  of the sequence and the alphabet of the
613
611
  :class:`CachedSyncmerSelector`
614
612
  is not checked to gain additional performance.
615
-
613
+
616
614
  Returns
617
615
  -------
618
616
  syncmer_indices : ndarray, dtype=np.uint32
@@ -628,7 +626,7 @@ class CachedSyncmerSelector(SyncmerSelector):
628
626
  )
629
627
  kmers = self.kmer_alphabet.create_kmers(sequence.code)
630
628
  return self.select_from_kmers(kmers)
631
-
629
+
632
630
 
633
631
  def select_from_kmers(self, kmers):
634
632
  """
@@ -642,7 +640,7 @@ class CachedSyncmerSelector(SyncmerSelector):
642
640
  ----------
643
641
  kmers : ndarray, dtype=np.int64
644
642
  The *k-mer* codes to select the syncmers from.
645
-
643
+
646
644
  Returns
647
645
  -------
648
646
  syncmer_indices : ndarray, dtype=np.uint32
@@ -660,7 +658,7 @@ class MincodeSelector:
660
658
 
661
659
  Selects the :math:`1/\text{compression}` *smallest* *k-mers* from
662
660
  :class:`KmerAlphabet`. :footcite:`Edgar2021`
663
-
661
+
664
662
  '*Small*' refers to the lexicographical order, or alternatively a
665
663
  custom order if `permutation` is given.
666
664
  The *Mincode* approach tries to reduce the number of *k-mers* from a
@@ -682,7 +680,7 @@ class MincodeSelector:
682
680
  By default, the standard order of the :class:`KmerAlphabet` is
683
681
  used.
684
682
  This standard order is often the lexicographical order.
685
-
683
+
686
684
  Attributes
687
685
  ----------
688
686
  kmer_alphabet : KmerAlphabet
@@ -695,10 +693,10 @@ class MincodeSelector:
695
693
  All *k-mers*, that are smaller than this value are selected.
696
694
  permutation : Permutation
697
695
  The permutation.
698
-
696
+
699
697
  References
700
698
  ----------
701
-
699
+
702
700
  .. footbibliography::
703
701
 
704
702
  Examples
@@ -735,12 +733,12 @@ class MincodeSelector:
735
733
  permutation_offset = permutation.min
736
734
  permutation_range = permutation.max - permutation.min + 1
737
735
  self._threshold = permutation_offset + permutation_range / compression
738
-
736
+
739
737
 
740
738
  @property
741
739
  def kmer_alphabet(self):
742
740
  return self._kmer_alph
743
-
741
+
744
742
  @property
745
743
  def compression(self):
746
744
  return self._compression
@@ -752,7 +750,7 @@ class MincodeSelector:
752
750
  @property
753
751
  def permutation(self):
754
752
  return self._permutation
755
-
753
+
756
754
 
757
755
  def select(self, sequence, bint alphabet_check=True):
758
756
  """
@@ -771,7 +769,7 @@ class MincodeSelector:
771
769
  of the sequence and the alphabet of the
772
770
  :class:`MincodeSelector`
773
771
  is not checked to gain additional performance.
774
-
772
+
775
773
  Returns
776
774
  -------
777
775
  mincode_indices : ndarray, dtype=np.uint32
@@ -786,7 +784,7 @@ class MincodeSelector:
786
784
  )
787
785
  kmers = self._kmer_alph.create_kmers(sequence.code)
788
786
  return self.select_from_kmers(kmers)
789
-
787
+
790
788
 
791
789
  def select_from_kmers(self, kmers):
792
790
  """
@@ -800,7 +798,7 @@ class MincodeSelector:
800
798
  ----------
801
799
  kmers : ndarray, dtype=np.int64
802
800
  The *k-mer* codes to select the *Mincode k-mers* from.
803
-
801
+
804
802
  Returns
805
803
  -------
806
804
  mincode_indices : ndarray, dtype=np.uint32
@@ -820,7 +818,7 @@ class MincodeSelector:
820
818
 
821
819
  mincode_pos = ordering < self._threshold
822
820
  return mincode_pos, kmers[mincode_pos]
823
-
821
+
824
822
 
825
823
  @cython.boundscheck(False)
826
824
  @cython.wraparound(False)
@@ -835,7 +833,7 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window,
835
833
  instead of 'x - (window-1)/2' to 'x + (window-1)/2'.
836
834
  """
837
835
  cdef uint32 seq_i
838
-
836
+
839
837
  cdef uint32 n_windows = kmers.shape[0] - (window - 1)
840
838
  # Pessimistic array allocation size
841
839
  # -> Expect that every window has a new minimizer
@@ -865,14 +863,14 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window,
865
863
  reverse_argcummin = reverse_argcummins[seq_i]
866
864
  forward_cummin = ordering[forward_argcummin]
867
865
  reverse_cummin = ordering[reverse_argcummin]
868
-
866
+
869
867
  # At ties the leftmost position is taken,
870
868
  # which stems from the reverse pass
871
869
  if forward_cummin < reverse_cummin:
872
870
  combined_argcummin = forward_argcummin
873
871
  else:
874
872
  combined_argcummin = reverse_argcummin
875
-
873
+
876
874
  # If the same minimizer position was observed before, the
877
875
  # duplicate is simply ignored, if 'include_duplicates' is false
878
876
  if include_duplicates or combined_argcummin != prev_argcummin:
@@ -899,7 +897,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size):
899
897
  cdef uint32 current_min_i = 0
900
898
  cdef int64 current_min, current_val
901
899
  cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
902
-
900
+
903
901
  # Any actual value will be smaller than this placeholder
904
902
  current_min = MAX_INT_64
905
903
  for seq_i in range(values.shape[0]):
@@ -911,7 +909,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size):
911
909
  current_min_i = seq_i
912
910
  current_min = current_val
913
911
  min_pos[seq_i] = current_min_i
914
-
912
+
915
913
  return min_pos
916
914
 
917
915
  @cython.boundscheck(False)
@@ -930,7 +928,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
930
928
  - There are issues in selecting the leftmost argument
931
929
  - An offset is necessary to ensure alignment of chunks with forward
932
930
  pass
933
-
931
+
934
932
  Hence, a separate 'reverse' variant of the function was implemented.
935
933
  """
936
934
  cdef uint32 seq_i
@@ -938,7 +936,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
938
936
  cdef uint32 current_min_i = 0
939
937
  cdef int64 current_min, current_val
940
938
  cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
941
-
939
+
942
940
  current_min = MAX_INT_64
943
941
  for seq_i in reversed(range(values.shape[0])):
944
942
  # The chunk beginning is a small difference to forward
@@ -952,5 +950,5 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
952
950
  current_min_i = seq_i
953
951
  current_min = current_val
954
952
  min_pos[seq_i] = current_min_i
955
-
953
+
956
954
  return min_pos
@@ -7,8 +7,8 @@ __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["EValueEstimator"]
8
8
 
9
9
  import numpy as np
10
- from ..seqtypes import GeneralSequence
11
- from .pairwise import align_optimal
10
+ from biotite.sequence.align.pairwise import align_optimal
11
+ from biotite.sequence.seqtypes import GeneralSequence
12
12
 
13
13
 
14
14
  class EValueEstimator:
@@ -29,7 +29,7 @@ class EValueEstimator:
29
29
  of random sequence alignments in :meth:`from_samples()`
30
30
  :footcite:`Altschul1986`, which may be time consuming.
31
31
  If these parameters are known, the constructor can be used instead.
32
-
32
+
33
33
  Based on the sampled parameters, the decadic logarithm of the
34
34
  E-value can be quickly calculated via :meth:`log_evalue()`.
35
35
 
@@ -39,7 +39,7 @@ class EValueEstimator:
39
39
  The :math:`\lambda` parameter.
40
40
  k : float
41
41
  The :math:`K` parameter.
42
-
42
+
43
43
  Notes
44
44
  -----
45
45
  The calculated E-value is a rough estimation that gets more
@@ -102,8 +102,9 @@ class EValueEstimator:
102
102
  self._k = k
103
103
 
104
104
  @staticmethod
105
- def from_samples(alphabet, matrix, gap_penalty, frequencies,
106
- sample_length=1000, sample_size=1000):
105
+ def from_samples(
106
+ alphabet, matrix, gap_penalty, frequencies, sample_length=1000, sample_size=1000
107
+ ):
107
108
  r"""
108
109
  Create an :class:`EValueEstimator` with :math:`\lambda` and
109
110
  :math:`K` estimated via sampling alignments of random sequences
@@ -137,13 +138,13 @@ class EValueEstimator:
137
138
  The number of sampled sequences.
138
139
  The accuracy of the estimated parameters and E-values,
139
140
  but also the runtime increases with the sample size.
140
-
141
+
141
142
  Returns
142
143
  -------
143
144
  estimator : EValueEstimator
144
145
  A :class:`EValueEstimator` with sampled :math:`\lambda` and
145
146
  :math:`K` parameters.
146
-
147
+
147
148
  Notes
148
149
  -----
149
150
  The sampling process generates random sequences based on
@@ -167,15 +168,15 @@ class EValueEstimator:
167
168
  raise ValueError("A symmetric substitution matrix is required")
168
169
  if not matrix.get_alphabet1().extends(alphabet):
169
170
  raise ValueError(
170
- "The substitution matrix is not compatible "
171
- "with the given alphabet"
171
+ "The substitution matrix is not compatible " "with the given alphabet"
172
172
  )
173
- score_matrix = matrix.score_matrix()[:len(alphabet), :len(alphabet)]
174
- if np.sum(
175
- score_matrix \
176
- * frequencies[np.newaxis, :] \
177
- * frequencies[:, np.newaxis]
178
- ) >= 0:
173
+ score_matrix = matrix.score_matrix()[: len(alphabet), : len(alphabet)]
174
+ if (
175
+ np.sum(
176
+ score_matrix * frequencies[np.newaxis, :] * frequencies[:, np.newaxis]
177
+ )
178
+ >= 0
179
+ ):
179
180
  raise ValueError(
180
181
  "Invalid substitution matrix, the expected similarity "
181
182
  "score between two random symbols is not negative"
@@ -183,9 +184,7 @@ class EValueEstimator:
183
184
 
184
185
  # Generate the sequence code for the random sequences
185
186
  random_sequence_code = np.random.choice(
186
- len(alphabet),
187
- size=(sample_size, 2, sample_length),
188
- p=frequencies
187
+ len(alphabet), size=(sample_size, 2, sample_length), p=frequencies
189
188
  )
190
189
 
191
190
  # Sample the alignments of random sequences
@@ -193,28 +192,27 @@ class EValueEstimator:
193
192
  for i in range(sample_size):
194
193
  seq1 = GeneralSequence(alphabet)
195
194
  seq2 = GeneralSequence(alphabet)
196
- seq1.code = random_sequence_code[i,0]
197
- seq2.code = random_sequence_code[i,1]
195
+ seq1.code = random_sequence_code[i, 0]
196
+ seq2.code = random_sequence_code[i, 1]
198
197
  sample_scores[i] = align_optimal(
199
- seq1, seq2, matrix,
200
- local=True, gap_penalty=gap_penalty, max_number=1
198
+ seq1, seq2, matrix, local=True, gap_penalty=gap_penalty, max_number=1
201
199
  )[0].score
202
-
200
+
203
201
  # Use method of moments to estimate parameters
204
202
  lam = np.pi / np.sqrt(6 * np.var(sample_scores))
205
203
  u = np.mean(sample_scores) - np.euler_gamma / lam
206
204
  k = np.exp(lam * u) / sample_length**2
207
-
205
+
208
206
  return EValueEstimator(lam, k)
209
207
 
210
208
  @property
211
209
  def lam(self):
212
210
  return self._lam
213
-
211
+
214
212
  @property
215
213
  def k(self):
216
214
  return self._k
217
-
215
+
218
216
  def log_evalue(self, score, seq1_length, seq2_length):
219
217
  r"""
220
218
  Calculate the decadic logarithm of the E-value for a given
@@ -223,11 +221,11 @@ class EValueEstimator:
223
221
  The E-value and the logarithm of the E-value is calculated as
224
222
 
225
223
  .. math::
226
-
224
+
227
225
  E = Kmn e^{-\lambda s}
228
226
 
229
227
  \log_{10} E = (\log_{10} Kmn) - \frac{\lambda s}{\ln 10},
230
-
228
+
231
229
  where :math:`s` is the similarity score and :math:`m` and
232
230
  :math:`n` are the lengths of the aligned sequences.
233
231
 
@@ -245,12 +243,12 @@ class EValueEstimator:
245
243
  this is usually either the combined length of all sequences
246
244
  in the database or the length of the hit sequence multiplied
247
245
  by the number of sequences in the database.
248
-
246
+
249
247
  Returns
250
248
  -------
251
249
  log_e : float
252
250
  The decadic logarithm of the E-value.
253
-
251
+
254
252
  Notes
255
253
  -----
256
254
  This method returns the logarithm of the E-value instead of
@@ -261,5 +259,6 @@ class EValueEstimator:
261
259
  seq1_length = np.asarray(seq1_length)
262
260
  seq2_length = np.asarray(seq2_length)
263
261
 
264
- return np.log10(self._k * seq1_length * seq2_length) \
265
- - self._lam * score / np.log(10)
262
+ return np.log10(
263
+ self._k * seq1_length * seq2_length
264
+ ) - self._lam * score / np.log(10)