PyPI - biotite - Versions diffs - 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl - Mend

biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +1 -1
biotite/application/application.py +20 -10
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +3 -3
biotite/sequence/align/__init__.py +2 -2
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -1
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +51 -65
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +15 -17
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +246 -236
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +29 -32
biotite/structure/box.py +67 -71
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +83 -78
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +4 -5
biotite/structure/info/groups.py +1 -3
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -44
biotite/structure/io/pdbx/cif.py +140 -110
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +260 -258
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +90 -107
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
biotite-1.0.1.dist-info/RECORD +322 -0
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.2.dist-info/RECORD +0 -340
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
{biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/align/alignment.py CHANGED Viewed

@@ -5,16 +5,22 @@
 __name__ = "biotite.sequence.align"
 __author__ = "Patrick Kunzmann"
-import numpy as np
 import numbers
-import copy
 import textwrap
-from ..alphabet import LetterAlphabet
+from collections.abc import Sequence
+import numpy as np
+from biotite.sequence.alphabet import LetterAlphabet
-__all__ = ["Alignment", "get_codes", "get_symbols",
-           "get_sequence_identity", "get_pairwise_sequence_identity",
-           "score", "find_terminal_gaps", "remove_terminal_gaps"]
+__all__ = [
+    "Alignment",
+    "get_codes",
+    "get_symbols",
+    "get_sequence_identity",
+    "get_pairwise_sequence_identity",
+    "score",
+    "find_terminal_gaps",
+    "remove_terminal_gaps",
+]
 class Alignment(object):
@@ -22,7 +28,7 @@ class Alignment(object):
     An :class:`Alignment` object stores information about which symbols
     of *n* sequences are aligned to each other and it stores the
     corresponding alignment score.
     Instead of saving a list of aligned symbols, this class saves the
     original *n* sequences, that were aligned, and a so called *trace*,
     which indicate the aligned symbols of these sequences.
@@ -31,16 +37,16 @@ class Alignment(object):
     Each element of the trace is the index in the corresponding
     sequence.
     A gap is represented by the value -1.
     Furthermore this class provides multiple utility functions for
     conversion into strings in order to make the alignment human
     readable.
     Unless an :class:`Alignment` object is the result of an multiple
     sequence alignment, the object will contain only two sequences.
     All attributes of this class are publicly accessible.
     Parameters
     ----------
     sequences : list
@@ -49,7 +55,7 @@ class Alignment(object):
         The alignment trace.
     score : int, optional
         Alignment score.
     Attributes
     ----------
     sequences : list
@@ -58,10 +64,10 @@ class Alignment(object):
         The alignment trace.
     score : int
         Alignment score.
     Examples
     --------
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -95,8 +101,10 @@ class Alignment(object):
     def __repr__(self):
         """Represent Alignment a string for debugging."""
-        return f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], " \
-               f"np.{np.array_repr(self.trace)}, score={self.score})"
+        return (
+            f"Alignment([{', '.join([seq.__repr__() for seq in self.sequences])}], "
+            f"np.{np.array_repr(self.trace)}, score={self.score})"
+        )
     def _gapped_str(self, seq_index):
         seq_str = ""
@@ -107,11 +115,11 @@ class Alignment(object):
             else:
                 seq_str += "-"
         return seq_str
     def get_gapped_sequences(self):
         """
         Get a the string representation of the gapped sequences.
         Returns
         -------
         sequences : list of str
@@ -119,7 +127,7 @@ class Alignment(object):
             as in `Alignment.sequences`.
         """
         return [self._gapped_str(i) for i in range(len(self.sequences))]
     def __str__(self):
         # Check if any of the sequences
         # has an non-single letter alphabet
@@ -143,32 +151,33 @@ class Alignment(object):
             return ali_str[:-2]
         else:
             return super().__str__()
     def __getitem__(self, index):
         if isinstance(index, tuple):
             if len(index) > 2:
                 raise IndexError("Only 1D or 2D indices are allowed")
-            if isinstance(index[0], numbers.Integral) or \
-               isinstance(index[0], numbers.Integral):
-                    raise IndexError(
-                        "Integers are invalid indices for alignments, "
-                        "a single sequence or alignment column cannot be "
-                        "selected"
-                    )
+            if isinstance(index[0], numbers.Integral) or isinstance(
+                index[0], numbers.Integral
+            ):
+                raise IndexError(
+                    "Integers are invalid indices for alignments, "
+                    "a single sequence or alignment column cannot be "
+                    "selected"
+                )
             return Alignment(
                 Alignment._index_sequences(self.sequences, index[1]),
                 self.trace[index],
-                self.score
+                self.score,
             )
         else:
             return Alignment(self.sequences, self.trace[index], self.score)
     def __iter__(self):
         raise TypeError("'Alignment' object is not iterable")
     def __len__(self):
         return len(self.trace)
     def __eq__(self, item):
         if not isinstance(item, Alignment):
             return False
@@ -179,45 +188,41 @@ class Alignment(object):
         if self.score != item.score:
             return False
         return True
     @staticmethod
     def _index_sequences(sequences, index):
-        if isinstance(index, (list, tuple)) or \
-            (isinstance(index, np.ndarray) and index.dtype != bool):
-                return [sequences[i] for i in index]
+        if isinstance(index, (list, tuple)) or (
+            isinstance(index, np.ndarray) and index.dtype != bool
+        ):
+            return [sequences[i] for i in index]
         elif isinstance(index, np.ndarray) and index.dtype == bool:
             return [seq for seq, mask in zip(sequences, index) if mask]
         if isinstance(index, slice):
             return sequences[index]
         else:
-            raise IndexError(
-                f"Invalid alignment index type '{type(index).__name__}'"
-            )
+            raise IndexError(f"Invalid alignment index type '{type(index).__name__}'")
     @staticmethod
     def trace_from_strings(seq_str_list):
         """
         Create a trace from strings that represent aligned sequences.
         Parameters
         ----------
         seq_str_list : list of str
             The strings, where each each one represents a sequence
             (with gaps) in an alignment.
             A ``-`` is interpreted as gap.
         Returns
         -------
         trace : ndarray, dtype=int, shape=(n,2)
             The created trace.
         """
         if len(seq_str_list) < 2:
-            raise ValueError(
-                "An alignment must contain at least two sequences"
-            )
+            raise ValueError("An alignment must contain at least two sequences")
         seq_i = np.zeros(len(seq_str_list))
-        trace = np.full(( len(seq_str_list[0]), len(seq_str_list) ),
-                        -1, dtype=int)
+        trace = np.full((len(seq_str_list[0]), len(seq_str_list)), -1, dtype=int)
         # Get length of string (same length for all strings)
         # rather than length of list
         for pos_i in range(len(seq_str_list[0])):
@@ -238,22 +243,22 @@ def get_codes(alignment):
     Instead of the indices of the aligned symbols (trace), the return
     value contains the corresponding symbol codes for each index.
     Gaps are still represented by *-1*.
     Parameters
     ----------
     alignment : Alignment
         The alignment to get the sequence codes for.
     Returns
     -------
     codes : ndarray, dtype=int, shape=(n,m)
         The sequence codes for the alignment.
         The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
         The array uses *-1* values for gaps.
     Examples
     --------
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -267,14 +272,17 @@ def get_codes(alignment):
     """
     trace = alignment.trace
     sequences = alignment.sequences
     # The number of sequences is the first dimension
-    codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=int)
+    codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
     for i in range(len(sequences)):
+        # Mark -1 explicitly as int64 to avoid that the unsigned dtype
+        # of the sequence code is used
+        # (https://numpy.org/neps/nep-0050-scalar-promotion.html)
         codes[i] = np.where(
-            trace[:,i] != -1, sequences[i].code[trace[:,i]], -1
+            trace[:, i] != -1, sequences[i].code[trace[:, i]], np.int64(-1)
         )
     return np.stack(codes)
@@ -283,24 +291,24 @@ def get_symbols(alignment):
     Similar to :func:`get_codes()`, but contains the decoded symbols
     instead of codes.
     Gaps are still represented by *None* values.
     Parameters
     ----------
     alignment : Alignment
         The alignment to get the symbols for.
     Returns
     -------
     symbols : list of list
         The nested list of symbols.
     See Also
     --------
     get_codes
     Examples
     --------
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -317,8 +325,8 @@ def get_symbols(alignment):
         alphabet = alignment.sequences[i].get_alphabet()
         codes_wo_gaps = codes[i, codes[i] != -1]
         symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
-        if not isinstance(symbols_wo_gaps, list):
-            symbols_wo_gaps = list(symbols_wo_gaps)
+        if isinstance(symbols_wo_gaps, np.ndarray):
+            symbols_wo_gaps = symbols_wo_gaps.tolist()
         symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
         symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
         symbols[i] = symbols_for_seq.tolist()
@@ -331,7 +339,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
     The identity is equal to the matches divided by a measure for the
     length of the alignment that depends on the `mode` parameter.
     Parameters
     ----------
     alignment : Alignment
@@ -348,12 +356,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
               length of the shortest sequence.
         Default is *not_terminal*.
     Returns
     -------
     identity : float
         The sequence identity, ranging between 0 and 1.
     See also
     --------
     get_pairwise_sequence_identity
@@ -363,12 +371,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
     # Count matches
     matches = 0
     for i in range(codes.shape[1]):
-        column = codes[:,i]
+        column = codes[:, i]
         # One unique value -> all symbols match
         unique_symbols = np.unique(column)
         if len(unique_symbols) == 1 and unique_symbols[0] != -1:
             matches += 1
     # Calculate length
     if mode == "all":
         length = len(alignment)
@@ -394,7 +402,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
     The identity is equal to the matches divided by a measure for the
     length of the alignment that depends on the `mode` parameter.
     Parameters
     ----------
     alignment : Alignment, length=n
@@ -411,12 +419,12 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
               length of the shortest one of the two sequences.
         Default is *not_terminal*.
     Returns
     -------
     identity : ndarray, dtype=float, shape=(n,n)
         The pairwise sequence identity, ranging between 0 and 1.
     See also
     --------
     get_sequence_identity
@@ -427,9 +435,11 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
     # Count matches
     # Calculate at which positions the sequences are identical
     # and are not gaps
-    equality_matrix = (codes[:, np.newaxis, :] == codes[np.newaxis, :, :]) \
-                    & (codes[:, np.newaxis, :] != -1) \
-                    & (codes[np.newaxis, :, :] != -1) \
+    equality_matrix = (
+        (codes[:, np.newaxis, :] == codes[np.newaxis, :, :])
+        & (codes[:, np.newaxis, :] != -1)
+        & (codes[np.newaxis, :, :] != -1)
+    )
     # Sum these positions up
     matches = np.count_nonzero(equality_matrix, axis=-1)
@@ -441,24 +451,23 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
         for i in range(n_seq):
             for j in range(n_seq):
                 # Find latest start and earliest stop of all sequences
-                start, stop = find_terminal_gaps(alignment[:, [i,j]])
+                start, stop = find_terminal_gaps(alignment[:, [i, j]])
                 if stop <= start:
                     raise ValueError(
                         "Cannot calculate non-terminal identity, "
                         "as the two sequences have no overlap"
                     )
-                length[i,j] = stop - start
+                length[i, j] = stop - start
     elif mode == "shortest":
         length = np.zeros((n_seq, n_seq))
         for i in range(n_seq):
             for j in range(n_seq):
-                length[i,j] = min([
-                    len(alignment.sequences[i]),
-                    len(alignment.sequences[j])
-                ])
+                length[i, j] = min(
+                    [len(alignment.sequences[i]), len(alignment.sequences[j])]
+                )
     else:
         raise ValueError(f"'{mode}' is an invalid calculation mode")
     return matches / length
@@ -468,7 +477,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
     If the alignment contains more than two sequences,
     all pairwise scores are counted.
     Parameters
     ----------
     alignment : Alignment
@@ -485,7 +494,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
     terminal_penalty : bool, optional
         If true, gap penalties are applied to terminal gaps.
         (Default: True)
     Returns
     -------
     score : int
@@ -503,18 +512,18 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
         # Do not count self-similarity
         # and do not count similarity twice (not S(i,j) and S(j,i))
         for i in range(codes.shape[0]):
-            for j in range(i+1, codes.shape[0]):
+            for j in range(i + 1, codes.shape[0]):
                 code_i = column[i]
                 code_j = column[j]
                 # Ignore gaps
                 if code_i != -1 and code_j != -1:
                     score += matrix[code_i, code_j]
     # Sum gap penalties
-    if type(gap_penalty) == int:
+    if isinstance(gap_penalty, numbers.Real):
         gap_open = gap_penalty
         gap_ext = gap_penalty
-    elif type(gap_penalty) == tuple:
+    elif isinstance(gap_penalty, Sequence):
         gap_open = gap_penalty[0]
         gap_ext = gap_penalty[1]
     else:
@@ -590,15 +599,15 @@ def find_terminal_gaps(alignment):
     """
     trace = alignment.trace
     # Find for each sequence the positions of non-gap symbols
-    no_gap_pos = [np.where(trace[:,i] != -1)[0] for i in range(trace.shape[1])]
+    no_gap_pos = [np.where(trace[:, i] != -1)[0] for i in range(trace.shape[1])]
     # Find for each sequence the positions of the sequence start and end
     # in the alignment
-    firsts = [no_gap_pos[i][0 ] for i in range(trace.shape[1])]
-    lasts  = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
+    firsts = [no_gap_pos[i][0] for i in range(trace.shape[1])]
+    lasts = [no_gap_pos[i][-1] for i in range(trace.shape[1])]
     # The terminal gaps are before all sequences start and after any
     # sequence ends
     # Use exclusive stop -> -1
-    return np.max(firsts), np.min(lasts) + 1
+    return np.max(firsts).item(), np.min(lasts).item() + 1
 def remove_terminal_gaps(alignment):
@@ -655,4 +664,4 @@ def remove_terminal_gaps(alignment):
             "Cannot remove terminal gaps, since at least two sequences have "
             "no overlap and the resulting alignment would be empty"
         )
-    return alignment[start : stop]
+    return alignment[start:stop]

biotite/sequence/align/banded.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/buckets.py CHANGED Viewed

@@ -6,11 +6,12 @@ __name__ = "biotite.sequence.align"
 __author__ = "Patrick Kunzmann"
 __all__ = ["bucket_number"]
-from os.path import realpath, dirname, join
+from os.path import dirname, join, realpath
 import numpy as np
 _primes = None
 def bucket_number(n_kmers, load_factor=0.8):
     """
     Find an appropriate number of buckets for a :class:`BucketKmerTable`
@@ -54,16 +55,17 @@ def bucket_number(n_kmers, load_factor=0.8):
     """
     global _primes
     if _primes is None:
-        with open(
-            join(dirname(realpath(__file__)), "primes.txt")
-        ) as file:
-            _primes = np.array([
-                int(line) for line in file.read().splitlines()
-                if len(line) != 0 and line[0] != "#"
-            ])
+        with open(join(dirname(realpath(__file__)), "primes.txt")) as file:
+            _primes = np.array(
+                [
+                    int(line)
+                    for line in file.read().splitlines()
+                    if len(line) != 0 and line[0] != "#"
+                ]
+            )
     number = int(n_kmers / load_factor)
     index = np.searchsorted(_primes, number, side="left")
     if index == len(_primes):
         raise ValueError("Number of buckets too large")
-    return _primes[index]
+    return _primes[index]