PyPI - biotite - Versions diffs - 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl - Mend

biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +1 -1
biotite/application/application.py +20 -10
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +3 -3
biotite/sequence/align/__init__.py +2 -2
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +1 -1
biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
biotite/sequence/alphabet.py +51 -65
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cpython-310-darwin.so +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +15 -17
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +246 -236
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cpython-310-darwin.so +0 -0
biotite/structure/bonds.pyx +29 -32
biotite/structure/box.py +67 -71
biotite/structure/celllist.cpython-310-darwin.so +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cpython-310-darwin.so +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +83 -78
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +4 -5
biotite/structure/info/groups.py +1 -3
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -44
biotite/structure/io/pdbx/cif.py +140 -110
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +260 -258
biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
biotite/structure/io/trajfile.py +90 -107
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cpython-310-darwin.so +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
biotite-1.0.1.dist-info/RECORD +322 -0
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.2.dist-info/RECORD +0 -340
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/align/selector.pyx CHANGED Viewed

@@ -10,10 +10,8 @@ __all__ = ["MinimizerSelector", "SyncmerSelector", "CachedSyncmerSelector",
 cimport cython
 cimport numpy as np
-from numbers import Integral
 import numpy as np
 from .kmeralphabet import KmerAlphabet
-from ..alphabet import AlphabetError
 ctypedef np.int64_t int64
@@ -21,7 +19,7 @@ ctypedef np.uint32_t uint32
 # Obtained from 'np.iinfo(np.int64).max'
-DEF MAX_INT_64 = 9223372036854775807
+cdef int64 MAX_INT_64 = 9223372036854775807
 class MinimizerSelector:
@@ -54,7 +52,7 @@ class MinimizerSelector:
         This standard order is often the lexicographical order, which is
         known to yield suboptimal *density* in many cases
         :footcite:`Roberts2004`.
     Attributes
     ----------
     kmer_alphabet : KmerAlphabet
@@ -73,7 +71,7 @@ class MinimizerSelector:
     References
     ----------
     .. footbibliography::
     Examples
@@ -122,12 +120,12 @@ class MinimizerSelector:
         self._window = window
         self._kmer_alph = kmer_alphabet
         self._permutation = permutation
     @property
     def kmer_alphabet(self):
         return self._kmer_alph
     @property
     def window(self):
         return self._window
@@ -135,7 +133,7 @@ class MinimizerSelector:
     @property
     def permutation(self):
         return self._permutation
     def select(self, sequence, bint alphabet_check=True):
         """
@@ -154,7 +152,7 @@ class MinimizerSelector:
             of the sequence and the alphabet of the
             :class:`MinimizerSelector`
             is not checked to gain additional performance.
         Returns
         -------
         minimizer_indices : ndarray, dtype=np.uint32
@@ -162,7 +160,7 @@ class MinimizerSelector:
         minimizers : ndarray, dtype=np.int64
             The *k-mers* that are the selected minimizers, returned as
             *k-mer* code.
         Notes
         -----
         Duplicate minimizers are omitted, i.e. if two windows have the
@@ -176,7 +174,7 @@ class MinimizerSelector:
                 )
         kmers = self._kmer_alph.create_kmers(sequence.code)
         return self.select_from_kmers(kmers)
     def select_from_kmers(self, kmers):
         """
@@ -191,7 +189,7 @@ class MinimizerSelector:
             minimizers in.
             The *k-mer* codes correspond to the *k-mers* encoded by the
             given `kmer_alphabet`.
         Returns
         -------
         minimizer_indices : ndarray, dtype=np.uint32
@@ -199,7 +197,7 @@ class MinimizerSelector:
             appears.
         minimizers : ndarray, dtype=np.int64
             The corresponding *k-mers* codes of the minimizers.
         Notes
         -----
         Duplicate minimizers are omitted, i.e. if two windows have the
@@ -267,7 +265,7 @@ class SyncmerSelector:
         *k-mer*.
         By default, the minimum position needs to be at the start of the
         *k-mer*, which is termed *open syncmer*.
     Attributes
     ----------
     alphabet : Alphabet
@@ -276,7 +274,7 @@ class SyncmerSelector:
         The :class:`KmerAlphabet` for *k* and *s*, respectively.
     permutation : Permutation
         The permutation.
     See also
     --------
     CachedSyncmerSelector
@@ -291,7 +289,7 @@ class SyncmerSelector:
     References
     ----------
     .. footbibliography::
     Examples
@@ -337,7 +335,7 @@ class SyncmerSelector:
         self._alphabet = alphabet
         self._kmer_alph = KmerAlphabet(alphabet, k)
         self._smer_alph = KmerAlphabet(alphabet, s)
         self._permutation = permutation
         self._offset = np.asarray(offset, dtype=np.int64)
@@ -353,7 +351,7 @@ class SyncmerSelector:
             )
         if len(np.unique(self._offset)) != len(self._offset):
             raise ValueError("Offset must contain unique values")
     @property
     def alphabet(self):
@@ -362,7 +360,7 @@ class SyncmerSelector:
     @property
     def kmer_alphabet(self):
         return self._kmer_alph
     @property
     def smer_alphabet(self):
         return self._smer_alph
@@ -370,7 +368,7 @@ class SyncmerSelector:
     @property
     def permutation(self):
         return self._permutation
     def select(self, sequence, bint alphabet_check=True):
         """
@@ -389,7 +387,7 @@ class SyncmerSelector:
             of the sequence and the alphabet of the
             :class:`SyncmerSelector`
             is not checked to gain additional performance.
         Returns
         -------
         syncmer_indices : ndarray, dtype=np.uint32
@@ -428,7 +426,7 @@ class SyncmerSelector:
         relative_min_pos = min_pos - np.arange(len(kmers))
         syncmer_pos = self._filter_syncmer_pos(relative_min_pos)
         return syncmer_pos, kmers[syncmer_pos]
     def select_from_kmers(self, kmers):
         """
@@ -442,7 +440,7 @@ class SyncmerSelector:
         ----------
         kmers : ndarray, dtype=np.int64
             The *k-mer* codes to select the syncmers from.
         Returns
         -------
         syncmer_indices : ndarray, dtype=np.uint32
@@ -459,9 +457,9 @@ class SyncmerSelector:
         :class:`Sequence` objects.
         """
         cdef int64 i
         symbol_codes_for_each_kmer = self._kmer_alph.split(kmers)
         cdef int64[:] min_pos = np.zeros(
             len(symbol_codes_for_each_kmer), dtype=np.int64
         )
@@ -477,10 +475,10 @@ class SyncmerSelector:
                         f"sort keys for {len(smers)} s-mers"
                     )
             min_pos[i] = np.argmin(ordering)
         syncmer_pos = self._filter_syncmer_pos(min_pos)
         return syncmer_pos, kmers[syncmer_pos]
     def _filter_syncmer_pos(self, min_pos):
         """
@@ -538,7 +536,7 @@ class CachedSyncmerSelector(SyncmerSelector):
         *k-mer*.
         By default, the minimum position needs to be at the start of the
         *k-mer*, which is termed *open syncmer*.
     Attributes
     ----------
     alphabet : Alphabet
@@ -547,7 +545,7 @@ class CachedSyncmerSelector(SyncmerSelector):
         The :class:`KmerAlphabet` for *k* and *s*, respectively.
     permutation : Permutation
         The permutation.
     See also
     --------
     SyncmerSelector
@@ -562,7 +560,7 @@ class CachedSyncmerSelector(SyncmerSelector):
     References
     ----------
     .. footbibliography::
     Examples
@@ -584,7 +582,7 @@ class CachedSyncmerSelector(SyncmerSelector):
     >>> print(["".join(kmer_alph.decode(kmer)) for kmer in syncmers])
     ['GGCAA', 'AAGTG', 'AGTGA', 'GTGAC']
     """
     def __init__(self, alphabet, k, s, permutation=None, offset=(0,)):
         super().__init__(alphabet, k, s, permutation, offset)
         # Check for all possible *k-mers*, whether they are syncmers
@@ -593,7 +591,7 @@ class CachedSyncmerSelector(SyncmerSelector):
         # Convert the index array into a boolean mask
         self._syncmer_mask = np.zeros(len(self.kmer_alphabet), dtype=bool)
         self._syncmer_mask[syncmer_indices] = True
     def select(self, sequence, bint alphabet_check=True):
         """
@@ -612,7 +610,7 @@ class CachedSyncmerSelector(SyncmerSelector):
             of the sequence and the alphabet of the
             :class:`CachedSyncmerSelector`
             is not checked to gain additional performance.
         Returns
         -------
         syncmer_indices : ndarray, dtype=np.uint32
@@ -628,7 +626,7 @@ class CachedSyncmerSelector(SyncmerSelector):
                 )
         kmers = self.kmer_alphabet.create_kmers(sequence.code)
         return self.select_from_kmers(kmers)
     def select_from_kmers(self, kmers):
         """
@@ -642,7 +640,7 @@ class CachedSyncmerSelector(SyncmerSelector):
         ----------
         kmers : ndarray, dtype=np.int64
             The *k-mer* codes to select the syncmers from.
         Returns
         -------
         syncmer_indices : ndarray, dtype=np.uint32
@@ -660,7 +658,7 @@ class MincodeSelector:
     Selects the :math:`1/\text{compression}` *smallest* *k-mers* from
     :class:`KmerAlphabet`. :footcite:`Edgar2021`
     '*Small*' refers to the lexicographical order, or alternatively a
     custom order if `permutation` is given.
     The *Mincode* approach tries to reduce the number of *k-mers* from a
@@ -682,7 +680,7 @@ class MincodeSelector:
         By default, the standard order of the :class:`KmerAlphabet` is
         used.
         This standard order is often the lexicographical order.
     Attributes
     ----------
     kmer_alphabet : KmerAlphabet
@@ -695,10 +693,10 @@ class MincodeSelector:
         All *k-mers*, that are smaller than this value are selected.
     permutation : Permutation
         The permutation.
     References
     ----------
     .. footbibliography::
     Examples
@@ -735,12 +733,12 @@ class MincodeSelector:
             permutation_offset = permutation.min
             permutation_range = permutation.max - permutation.min + 1
         self._threshold = permutation_offset + permutation_range / compression
     @property
     def kmer_alphabet(self):
         return self._kmer_alph
     @property
     def compression(self):
         return self._compression
@@ -752,7 +750,7 @@ class MincodeSelector:
     @property
     def permutation(self):
         return self._permutation
     def select(self, sequence, bint alphabet_check=True):
         """
@@ -771,7 +769,7 @@ class MincodeSelector:
             of the sequence and the alphabet of the
             :class:`MincodeSelector`
             is not checked to gain additional performance.
         Returns
         -------
         mincode_indices : ndarray, dtype=np.uint32
@@ -786,7 +784,7 @@ class MincodeSelector:
                 )
         kmers = self._kmer_alph.create_kmers(sequence.code)
         return self.select_from_kmers(kmers)
     def select_from_kmers(self, kmers):
         """
@@ -800,7 +798,7 @@ class MincodeSelector:
         ----------
         kmers : ndarray, dtype=np.int64
             The *k-mer* codes to select the *Mincode k-mers* from.
         Returns
         -------
         mincode_indices : ndarray, dtype=np.uint32
@@ -820,7 +818,7 @@ class MincodeSelector:
         mincode_pos = ordering < self._threshold
         return mincode_pos, kmers[mincode_pos]
 @cython.boundscheck(False)
 @cython.wraparound(False)
@@ -835,7 +833,7 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window,
     instead of 'x - (window-1)/2' to 'x + (window-1)/2'.
     """
     cdef uint32 seq_i
     cdef uint32 n_windows = kmers.shape[0] - (window - 1)
     # Pessimistic array allocation size
     # -> Expect that every window has a new minimizer
@@ -865,14 +863,14 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window,
         reverse_argcummin = reverse_argcummins[seq_i]
         forward_cummin = ordering[forward_argcummin]
         reverse_cummin = ordering[reverse_argcummin]
         # At ties the leftmost position is taken,
         # which stems from the reverse pass
         if forward_cummin < reverse_cummin:
             combined_argcummin = forward_argcummin
         else:
             combined_argcummin = reverse_argcummin
         # If the same minimizer position was observed before, the
         # duplicate is simply ignored, if 'include_duplicates' is false
         if include_duplicates or combined_argcummin != prev_argcummin:
@@ -899,7 +897,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size):
     cdef uint32 current_min_i = 0
     cdef int64 current_min, current_val
     cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
     # Any actual value will be smaller than this placeholder
     current_min = MAX_INT_64
     for seq_i in range(values.shape[0]):
@@ -911,7 +909,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size):
             current_min_i = seq_i
             current_min = current_val
         min_pos[seq_i] = current_min_i
     return min_pos
 @cython.boundscheck(False)
@@ -930,7 +928,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
     - There are issues in selecting the leftmost argument
     - An offset is necessary to ensure alignment of chunks with forward
       pass
     Hence, a separate 'reverse' variant of the function was implemented.
     """
     cdef uint32 seq_i
@@ -938,7 +936,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
     cdef uint32 current_min_i = 0
     cdef int64 current_min, current_val
     cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
     current_min = MAX_INT_64
     for seq_i in reversed(range(values.shape[0])):
         # The chunk beginning is a small difference to forward
@@ -952,5 +950,5 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
             current_min_i = seq_i
             current_min = current_val
         min_pos[seq_i] = current_min_i
     return min_pos

biotite/sequence/align/statistics.py CHANGED Viewed

@@ -7,8 +7,8 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["EValueEstimator"]
 import numpy as np
-from ..seqtypes import GeneralSequence
-from .pairwise import align_optimal
+from biotite.sequence.align.pairwise import align_optimal
+from biotite.sequence.seqtypes import GeneralSequence
 class EValueEstimator:
@@ -29,7 +29,7 @@ class EValueEstimator:
     of random sequence alignments in :meth:`from_samples()`
     :footcite:`Altschul1986`, which may be time consuming.
     If these parameters are known, the constructor can be used instead.
     Based on the sampled parameters, the decadic logarithm of the
     E-value can be quickly calculated via :meth:`log_evalue()`.
@@ -39,7 +39,7 @@ class EValueEstimator:
         The :math:`\lambda` parameter.
     k : float
         The :math:`K` parameter.
     Notes
     -----
     The calculated E-value is a rough estimation that gets more
@@ -102,8 +102,9 @@ class EValueEstimator:
         self._k = k
     @staticmethod
-    def from_samples(alphabet, matrix, gap_penalty, frequencies,
-                     sample_length=1000, sample_size=1000):
+    def from_samples(
+        alphabet, matrix, gap_penalty, frequencies, sample_length=1000, sample_size=1000
+    ):
         r"""
         Create an :class:`EValueEstimator` with :math:`\lambda` and
         :math:`K` estimated via sampling alignments of random sequences
@@ -137,13 +138,13 @@ class EValueEstimator:
             The number of sampled sequences.
             The accuracy of the estimated parameters and E-values,
             but also the runtime increases with the sample size.
         Returns
         -------
         estimator : EValueEstimator
             A :class:`EValueEstimator` with sampled :math:`\lambda` and
             :math:`K` parameters.
         Notes
         -----
         The sampling process generates random sequences based on
@@ -167,15 +168,15 @@ class EValueEstimator:
             raise ValueError("A symmetric substitution matrix is required")
         if not matrix.get_alphabet1().extends(alphabet):
             raise ValueError(
-                "The substitution matrix is not compatible "
-                "with the given alphabet"
+                "The substitution matrix is not compatible " "with the given alphabet"
             )
-        score_matrix = matrix.score_matrix()[:len(alphabet), :len(alphabet)]
-        if np.sum(
-            score_matrix \
-            * frequencies[np.newaxis, :] \
-            * frequencies[:, np.newaxis]
-        ) >= 0:
+        score_matrix = matrix.score_matrix()[: len(alphabet), : len(alphabet)]
+        if (
+            np.sum(
+                score_matrix * frequencies[np.newaxis, :] * frequencies[:, np.newaxis]
+            )
+            >= 0
+        ):
             raise ValueError(
                 "Invalid substitution matrix, the expected similarity "
                 "score between two random symbols is not negative"
@@ -183,9 +184,7 @@ class EValueEstimator:
         # Generate the sequence code for the random sequences
         random_sequence_code = np.random.choice(
-            len(alphabet),
-            size=(sample_size, 2, sample_length),
-            p=frequencies
+            len(alphabet), size=(sample_size, 2, sample_length), p=frequencies
         )
         # Sample the alignments of random sequences
@@ -193,28 +192,27 @@ class EValueEstimator:
         for i in range(sample_size):
             seq1 = GeneralSequence(alphabet)
             seq2 = GeneralSequence(alphabet)
-            seq1.code = random_sequence_code[i,0]
-            seq2.code = random_sequence_code[i,1]
+            seq1.code = random_sequence_code[i, 0]
+            seq2.code = random_sequence_code[i, 1]
             sample_scores[i] = align_optimal(
-                seq1, seq2, matrix,
-                local=True, gap_penalty=gap_penalty, max_number=1
+                seq1, seq2, matrix, local=True, gap_penalty=gap_penalty, max_number=1
             )[0].score
         # Use method of moments to estimate parameters
         lam = np.pi / np.sqrt(6 * np.var(sample_scores))
         u = np.mean(sample_scores) - np.euler_gamma / lam
         k = np.exp(lam * u) / sample_length**2
         return EValueEstimator(lam, k)
     @property
     def lam(self):
         return self._lam
     @property
     def k(self):
         return self._k
     def log_evalue(self, score, seq1_length, seq2_length):
         r"""
         Calculate the decadic logarithm of the E-value for a given
@@ -223,11 +221,11 @@ class EValueEstimator:
         The E-value and the logarithm of the E-value is calculated as
         .. math::
             E = Kmn e^{-\lambda s}
             \log_{10} E = (\log_{10} Kmn) - \frac{\lambda s}{\ln 10},
         where :math:`s` is the similarity score and :math:`m` and
         :math:`n` are the lengths of the aligned sequences.
@@ -245,12 +243,12 @@ class EValueEstimator:
             this is usually either the combined length of all sequences
             in the database or the length of the hit sequence multiplied
             by the number of sequences in the database.
         Returns
         -------
         log_e : float
             The decadic logarithm of the E-value.
         Notes
         -----
         This method returns the logarithm of the E-value instead of
@@ -261,5 +259,6 @@ class EValueEstimator:
         seq1_length = np.asarray(seq1_length)
         seq2_length = np.asarray(seq2_length)
-        return np.log10(self._k * seq1_length * seq2_length) \
-            - self._lam * score / np.log(10)
+        return np.log10(
+            self._k * seq1_length * seq2_length
+        ) - self._lam * score / np.log(10)

biotite/sequence/align/tracetable.cpython-310-darwin.so CHANGED Viewed

Binary file