PyPI - biotite - Versions diffs - 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.0.1__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +129 -40
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +72 -59
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +194 -48
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/align/matrix.py CHANGED Viewed

@@ -2,14 +2,21 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
+__all__ = ["SubstitutionMatrix"]
 __name__ = "biotite.sequence.align"
 __author__ = "Patrick Kunzmann"
-import os
+import functools
+from pathlib import Path
 import numpy as np
-from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
+from biotite.sequence.seqtypes import (
+    NucleotideSequence,
+    PositionalSequence,
+    ProteinSequence,
+)
-__all__ = ["SubstitutionMatrix"]
+# Directory of matrix files
+_DB_DIR = Path(__file__).parent / "matrix_data"
 class SubstitutionMatrix(object):
@@ -59,6 +66,11 @@ class SubstitutionMatrix(object):
             - **RBLOSUM<n>_<BLOCKS>**
             - **CorBLOSUM<n>_<BLOCKS>**
+        - Structural alphabet substitution matrices
+            - **3Di** - For 3Di alphabet from ``foldseek`` :footcite:`VanKempen2024`
+            - **PB** - For Protein Blocks alphabet from *PBexplore* :footcite:`Barnoud2017`
     A list of all available matrix names is returned by
     :meth:`list_db()`.
@@ -78,6 +90,11 @@ class SubstitutionMatrix(object):
         or a dictionary mapping the symbol pairing to scores,
         or a string referencing a matrix in the internal database.
+    Attributes
+    ----------
+    shape : tuple
+        The shape of the substitution matrix.
     Raises
     ------
     KeyError
@@ -110,7 +127,7 @@ class SubstitutionMatrix(object):
     Creating an identity substitution matrix via the score matrix:
     >>> alph = NucleotideSequence.alphabet_unamb
-    >>> matrix = SubstitutionMatrix(alph, alph, np.identity(len(alph)))
+    >>> matrix = SubstitutionMatrix(alph, alph, np.identity(len(alph), dtype=int))
     >>> print(matrix)
         A   C   G   T
     A   1   0   0   0
@@ -124,9 +141,6 @@ class SubstitutionMatrix(object):
     >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
     """
-    # Directory of matrix files
-    _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")
     def __init__(self, alphabet1, alphabet2, score_matrix):
         self._alph1 = alphabet1
         self._alph2 = alphabet2
@@ -139,7 +153,21 @@ class SubstitutionMatrix(object):
                     f"Matrix has shape {score_matrix.shape}, "
                     f"but {alph_shape} is required"
                 )
+            if not np.issubdtype(score_matrix.dtype, np.integer):
+                raise TypeError("Score matrix must be an integer ndarray")
             self._matrix = score_matrix.astype(np.int32)
+            # If the score matrix was converted from a a float matrix,
+            # inf values would be converted to 2**31,
+            # which is probably undesired and gives overflow issues in the alignment
+            # functions
+            if (
+                np.any(self._matrix == np.iinfo(np.int32).max) or
+                np.any(self._matrix == np.iinfo(np.int32).min)
+            ):  # fmt: skip
+                raise ValueError(
+                    "Score values are too large. "
+                    "Maybe it was converted from a float matrix containing inf values?"
+                )
         elif isinstance(score_matrix, str):
             matrix_dict = SubstitutionMatrix.dict_from_db(score_matrix)
             self._fill_with_matrix_dict(matrix_dict)
@@ -151,34 +179,18 @@ class SubstitutionMatrix(object):
         # score matrix -> make the score matrix read-only
         self._matrix.setflags(write=False)
-    def __repr__(self):
-        """Represent SubstitutionMatrix as a string for debugging."""
-        return (
-            f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
-            f"np.{np.array_repr(self._matrix)})"
-        )
-    def __eq__(self, item):
-        if not isinstance(item, SubstitutionMatrix):
-            return False
-        if self._alph1 != item.get_alphabet1():
-            return False
-        if self._alph2 != item.get_alphabet2():
-            return False
-        if not np.array_equal(self.score_matrix(), item.score_matrix()):
-            return False
-        return True
-    def __ne__(self, item):
-        return not self == item
+    @property
+    def shape(self):
+        """
+        Get the shape (i.e. the length of both alphabets)
+        of the substitution matrix.
-    def _fill_with_matrix_dict(self, matrix_dict):
-        self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
-        for i in range(len(self._alph1)):
-            for j in range(len(self._alph2)):
-                sym1 = self._alph1.decode(i)
-                sym2 = self._alph2.decode(j)
-                self._matrix[i, j] = int(matrix_dict[sym1, sym2])
+        Returns
+        -------
+        shape : tuple
+            Matrix shape.
+        """
+        return (len(self._alph1), len(self._alph2))
     def get_alphabet1(self):
         """
@@ -280,26 +292,155 @@ class SubstitutionMatrix(object):
         code2 = self._alph2.encode(symbol2)
         return self._matrix[code1, code2]
-    def shape(self):
+    def as_positional(self, sequence1, sequence2):
         """
-        Get the shape (i.e. the length of both alphabets)
-        of the subsitution matrix.
+        Transform this substitution matrix and two sequences into positional
+        equivalents.
+        This means the new substitution matrix is position-specific: It has the lengths
+        of the sequences instead of the lengths of their alphabets.
+        Its scores represent the same scores as the original matrix, but now mapped
+        onto the positions of the sequences.
+        Parameters
+        ----------
+        sequence1, sequence2 : seq.Sequence, length=n
+            The sequences to create the positional equivalents from.
         Returns
         -------
-        shape : tuple
-            Matrix shape.
+        pos_matrix : align.SubstitutionMatrix, shape=(n, n)
+            The position-specific substitution matrix.
+        pos_sequence1, pos_sequence2 : PositionalSequence, length=n
+            The positional sequences.
+        Notes
+        -----
+        After the transformation the substitution scores remain the same, i.e.
+        `substitution_matrix.get_score(sequence1[i], sequence2[j])` is equal to
+        `pos_matrix.get_score(pos_sequence1[i], pos_sequence2[j])`.
+        Examples
+        --------
+        Run an alignment with the usual substitution matrix:
+        >>> seq1 = ProteinSequence("BIQTITE")
+        >>> seq2 = ProteinSequence("IQLITE")
+        >>> matrix = SubstitutionMatrix.std_protein_matrix()
+        >>> print(matrix)
+            A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y   B   Z   X   *
+        A   4   0  -2  -1  -2   0  -2  -1  -1  -1  -1  -2  -1  -1  -1   1   0   0  -3  -2  -2  -1   0  -4
+        C   0   9  -3  -4  -2  -3  -3  -1  -3  -1  -1  -3  -3  -3  -3  -1  -1  -1  -2  -2  -3  -3  -2  -4
+        D  -2  -3   6   2  -3  -1  -1  -3  -1  -4  -3   1  -1   0  -2   0  -1  -3  -4  -3   4   1  -1  -4
+        E  -1  -4   2   5  -3  -2   0  -3   1  -3  -2   0  -1   2   0   0  -1  -2  -3  -2   1   4  -1  -4
+        F  -2  -2  -3  -3   6  -3  -1   0  -3   0   0  -3  -4  -3  -3  -2  -2  -1   1   3  -3  -3  -1  -4
+        G   0  -3  -1  -2  -3   6  -2  -4  -2  -4  -3   0  -2  -2  -2   0  -2  -3  -2  -3  -1  -2  -1  -4
+        H  -2  -3  -1   0  -1  -2   8  -3  -1  -3  -2   1  -2   0   0  -1  -2  -3  -2   2   0   0  -1  -4
+        I  -1  -1  -3  -3   0  -4  -3   4  -3   2   1  -3  -3  -3  -3  -2  -1   3  -3  -1  -3  -3  -1  -4
+        K  -1  -3  -1   1  -3  -2  -1  -3   5  -2  -1   0  -1   1   2   0  -1  -2  -3  -2   0   1  -1  -4
+        L  -1  -1  -4  -3   0  -4  -3   2  -2   4   2  -3  -3  -2  -2  -2  -1   1  -2  -1  -4  -3  -1  -4
+        M  -1  -1  -3  -2   0  -3  -2   1  -1   2   5  -2  -2   0  -1  -1  -1   1  -1  -1  -3  -1  -1  -4
+        N  -2  -3   1   0  -3   0   1  -3   0  -3  -2   6  -2   0   0   1   0  -3  -4  -2   3   0  -1  -4
+        P  -1  -3  -1  -1  -4  -2  -2  -3  -1  -3  -2  -2   7  -1  -2  -1  -1  -2  -4  -3  -2  -1  -2  -4
+        Q  -1  -3   0   2  -3  -2   0  -3   1  -2   0   0  -1   5   1   0  -1  -2  -2  -1   0   3  -1  -4
+        R  -1  -3  -2   0  -3  -2   0  -3   2  -2  -1   0  -2   1   5  -1  -1  -3  -3  -2  -1   0  -1  -4
+        S   1  -1   0   0  -2   0  -1  -2   0  -2  -1   1  -1   0  -1   4   1  -2  -3  -2   0   0   0  -4
+        T   0  -1  -1  -1  -2  -2  -2  -1  -1  -1  -1   0  -1  -1  -1   1   5   0  -2  -2  -1  -1   0  -4
+        V   0  -1  -3  -2  -1  -3  -3   3  -2   1   1  -3  -2  -2  -3  -2   0   4  -3  -1  -3  -2  -1  -4
+        W  -3  -2  -4  -3   1  -2  -2  -3  -3  -2  -1  -4  -4  -2  -3  -3  -2  -3  11   2  -4  -3  -2  -4
+        Y  -2  -2  -3  -2   3  -3   2  -1  -2  -1  -1  -2  -3  -1  -2  -2  -2  -1   2   7  -3  -2  -1  -4
+        B  -2  -3   4   1  -3  -1   0  -3   0  -4  -3   3  -2   0  -1   0  -1  -3  -4  -3   4   1  -1  -4
+        Z  -1  -3   1   4  -3  -2   0  -3   1  -3  -1   0  -1   3   0   0  -1  -2  -3  -2   1   4  -1  -4
+        X   0  -2  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -2  -1  -1   0   0  -1  -2  -1  -1  -1  -1  -4
+        *  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4  -4   1
+        >>> alignment = align_optimal(seq1, seq2, matrix, gap_penalty=-10)[0]
+        >>> print(alignment)
+        BIQTITE
+        -IQLITE
+        Running the alignment with positional equivalents gives the same result:
+        >>> pos_matrix, pos_seq1, pos_seq2 = matrix.as_positional(seq1, seq2)
+        >>> print(pos_matrix)
+            I   Q   L   I   T   E
+        B  -3   0  -4  -3  -1   1
+        I   4  -3   2   4  -1  -3
+        Q  -3   5  -2  -3  -1   2
+        T  -1  -1  -1  -1   5  -1
+        I   4  -3   2   4  -1  -3
+        T  -1  -1  -1  -1   5  -1
+        E  -3   2  -3  -3  -1   5
+        >>> pos_alignment = align_optimal(pos_seq1, pos_seq2, pos_matrix, gap_penalty=-10)[0]
+        >>> print(pos_alignment)
+        BIQTITE
+        -IQLITE
+        Increase the substitution score for the first symbols in both sequences to align
+        to each other:
+        >>> score_matrix = pos_matrix.score_matrix().copy()
+        >>> score_matrix[0, 0] = 100
+        >>> biased_matrix = SubstitutionMatrix(
+        ...     pos_matrix.get_alphabet1(), pos_matrix.get_alphabet2(), score_matrix
+        ... )
+        >>> print(biased_matrix)
+            I   Q   L   I   T   E
+        B 100   0  -4  -3  -1   1
+        I   4  -3   2   4  -1  -3
+        Q  -3   5  -2  -3  -1   2
+        T  -1  -1  -1  -1   5  -1
+        I   4  -3   2   4  -1  -3
+        T  -1  -1  -1  -1   5  -1
+        E  -3   2  -3  -3  -1   5
+        >>> biased_alignment = align_optimal(pos_seq1, pos_seq2, biased_matrix, gap_penalty=-10)[0]
+        >>> print(biased_alignment)
+        BIQTITE
+        I-QLITE
         """
-        return (len(self._alph1), len(self._alph2))
+        pos_sequence1 = PositionalSequence(sequence1)
+        pos_sequence2 = PositionalSequence(sequence2)
+        pos_score_matrix = self._matrix[
+            tuple(_cartesian_product(sequence1.code, sequence2.code).T)
+        ].reshape(len(sequence1), len(sequence2))
+        pos_matrix = SubstitutionMatrix(
+            pos_sequence1.get_alphabet(),
+            pos_sequence2.get_alphabet(),
+            pos_score_matrix,
+        )
+        return pos_matrix, pos_sequence1, pos_sequence2
+    def __repr__(self):
+        """Represent SubstitutionMatrix as a string for debugging."""
+        return (
+            f"SubstitutionMatrix({self._alph1.__repr__()}, {self._alph2.__repr__()}, "
+            f"np.{np.array_repr(self._matrix)})"
+        )
+    def __eq__(self, item):
+        if not isinstance(item, SubstitutionMatrix):
+            return False
+        if self._alph1 != item.get_alphabet1():
+            return False
+        if self._alph2 != item.get_alphabet2():
+            return False
+        if not np.array_equal(self.score_matrix(), item.score_matrix()):
+            return False
+        return True
+    def __ne__(self, item):
+        return not self == item
     def __str__(self):
         # Create matrix in NCBI format
         string = " "
         for symbol in self._alph2:
-            string += f" {symbol:>3}"
+            string += f" {str(symbol):>3}"
         string += "\n"
         for i, symbol in enumerate(self._alph1):
-            string += f"{symbol:>1}"
+            string += f"{str(symbol):>1}"
             for j in range(len(self._alph2)):
                 string += f" {int(self._matrix[i,j]):>3d}"
             string += "\n"
@@ -350,7 +491,7 @@ class SubstitutionMatrix(object):
         matrix_dict : dict
             A dictionary representing the substitution matrix.
         """
-        filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
+        filename = _DB_DIR / f"{matrix_name}.mat"
         with open(filename, "r") as f:
             return SubstitutionMatrix.dict_from_str(f.read())
@@ -364,11 +505,10 @@ class SubstitutionMatrix(object):
         db_list : list
             List of matrix names in the internal database.
         """
-        files = os.listdir(SubstitutionMatrix._db_dir)
-        # Remove '.mat' from files
-        return [file[:-4] for file in sorted(files)]
+        return [path.stem for path in _DB_DIR.glob("*.mat")]
     @staticmethod
+    @functools.cache
     def std_protein_matrix():
         """
         Get the default :class:`SubstitutionMatrix` for protein sequence
@@ -379,9 +519,12 @@ class SubstitutionMatrix(object):
         matrix : SubstitutionMatrix
             Default matrix.
         """
-        return _matrix_blosum62
+        return SubstitutionMatrix(
+            ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
+        )
     @staticmethod
+    @functools.cache
     def std_nucleotide_matrix():
         """
         Get the default :class:`SubstitutionMatrix` for DNA sequence
@@ -392,13 +535,88 @@ class SubstitutionMatrix(object):
         matrix : SubstitutionMatrix
             Default matrix.
         """
-        return _matrix_nuc
+        return SubstitutionMatrix(
+            NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
+        )
+    @staticmethod
+    @functools.cache
+    def std_3di_matrix():
+        """
+        Get the default :class:`SubstitutionMatrix` for 3Di sequence
+        alignments.
+        :footcite:`VanKempen2024`
-# Preformatted BLOSUM62 and NUC substitution matrix from NCBI
-_matrix_blosum62 = SubstitutionMatrix(
-    ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
-)
-_matrix_nuc = SubstitutionMatrix(
-    NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
-)
+        Returns
+        -------
+        matrix : SubstitutionMatrix
+            Default matrix.
+        """
+        # Import inside function to avoid circular import
+        from biotite.structure.alphabet.i3d import I3DSequence
+        return SubstitutionMatrix(I3DSequence.alphabet, I3DSequence.alphabet, "3Di")
+    @staticmethod
+    @functools.cache
+    def std_protein_blocks_matrix(undefined_match=200, undefined_mismatch=-200):
+        """
+        Get the default :class:`SubstitutionMatrix` for Protein Blocks sequences.
+        The matrix is adapted from *PBxplore* :footcite:`Barnoud2017`.
+        Parameters
+        ----------
+        undefined_match, undefined_mismatch : int, optional
+            The match and mismatch score for undefined symbols.
+            The default values were chosen arbitrarily, but are in the order of
+            magnitude of the other score values.
+        Returns
+        -------
+        matrix : SubstitutionMatrix
+            Default matrix.
+        References
+        ----------
+        .. footbibliography::
+        """
+        from biotite.structure.alphabet.pb import ProteinBlocksSequence
+        alphabet = ProteinBlocksSequence.alphabet
+        undefined_symbol = ProteinBlocksSequence.undefined_symbol
+        matrix_dict = SubstitutionMatrix.dict_from_db("PB")
+        # Add match/mismatch scores for undefined symbols residues
+        for symbol in alphabet:
+            if symbol == undefined_symbol:
+                continue
+            matrix_dict[symbol, undefined_symbol] = undefined_mismatch
+            matrix_dict[undefined_symbol, symbol] = undefined_mismatch
+        matrix_dict[undefined_symbol, undefined_symbol] = undefined_match
+        return SubstitutionMatrix(
+            alphabet,
+            alphabet,
+            matrix_dict,
+        )
+    def _fill_with_matrix_dict(self, matrix_dict):
+        self._matrix = np.zeros((len(self._alph1), len(self._alph2)), dtype=np.int32)
+        for i in range(len(self._alph1)):
+            for j in range(len(self._alph2)):
+                sym1 = self._alph1.decode(i)
+                sym2 = self._alph2.decode(j)
+                self._matrix[i, j] = int(matrix_dict[sym1, sym2])
+def _cartesian_product(array1, array2):
+    """
+    Create all combinations of elements from two arrays.
+    """
+    return np.transpose(
+        [
+            np.repeat(array1, len(array2)),
+            np.tile(array2, len(array1)),
+        ]
+    )

biotite/sequence/align/matrix_data/3Di.mat ADDED Viewed

@@ -0,0 +1,24 @@
+# 3Di bit/2
+# Background (precomputed optional): 0.0489372 0.0306991 0.101049 0.0329671 0.0276149 0.0416262 0.0452521 0.030876 0.0297251 0.0607036 0.0150238 0.0215826 0.0783843 0.0512926 0.0264886 0.0610702 0.0201311 0.215998 0.0310265 0.0295417 0.00001
+# Lambda     (precomputed optional): 0.351568
+    a   c   d   e   f   g   h   i   k   l   m   n   p   q   r   s   t   v   w   y
+a   6  -3   1   2   3  -2  -2  -7  -3  -3 -10  -5  -1   1  -4  -7  -5  -6   0  -2
+c  -3   6  -2  -8  -5  -4  -4 -12 -13   1 -14   0   0   1  -1   0  -8   1  -7  -9
+d   1  -2   4  -3   0   1   1  -3  -5  -4  -5  -2   1  -1  -1  -4  -2  -3  -2  -2
+e   2  -8  -3   9  -2  -7  -4 -12 -10  -7 -17  -8  -6  -3  -8 -10 -10 -13  -6  -3
+f   3  -5   0  -2   7  -3  -3  -5   1  -3  -9  -5  -2   2  -5  -8  -3  -7   4  -4
+g  -2  -4   1  -7  -3   6   3   0  -7  -7  -1  -2  -2  -4   3  -3   4  -6  -4  -2
+h  -2  -4   1  -4  -3   3   6  -4  -7  -6  -6   0  -1  -3   1  -3  -1  -5  -5   3
+i  -7 -12  -3 -12  -5   0  -4   8  -5 -11   7  -7  -6  -6  -3  -9   6 -12  -5  -8
+k  -3 -13  -5 -10   1  -7  -7  -5   9 -11  -8 -12  -6  -5  -9 -14  -5 -15   5  -8
+l  -3   1  -4  -7  -3  -7  -6 -11 -11   6 -16  -3  -2   2  -4  -4  -9   0  -8  -9
+m -10 -14  -5 -17  -9  -1  -6   7  -8 -16  10  -9  -9 -10  -5 -10   3 -16  -6  -9
+n  -5   0  -2  -8  -5  -2   0  -7 -12  -3  -9   7   0  -2   2   3  -4   0  -8  -5
+p  -1   0   1  -6  -2  -2  -1  -6  -6  -2  -9   0   4   0   0  -2  -4   0  -4  -5
+q   1   1  -1  -3   2  -4  -3  -6  -5   2 -10  -2   0   5  -2  -4  -5  -1  -2  -5
+r  -4  -1  -1  -8  -5   3   1  -3  -9  -4  -5   2   0  -2   6   2   0  -1  -6  -3
+s  -7   0  -4 -10  -8  -3  -3  -9 -14  -4 -10   3  -2  -4   2   6  -6   0 -11  -9
+t  -5  -8  -2 -10  -3   4  -1   6  -5  -9   3  -4  -4  -5   0  -6   8  -9  -5  -5
+v  -6   1  -3 -13  -7  -6  -5 -12 -15   0 -16   0   0  -1  -1   0  -9   3 -10 -11
+w   0  -7  -2  -6   4  -4  -5  -5   5  -8  -6  -8  -4  -2  -6 -11  -5 -10   8  -6
+y  -2  -9  -2  -3  -4  -2   3  -8  -8  -9  -9  -5  -5  -5  -3  -9  -5 -11  -6   9

biotite/sequence/align/matrix_data/PB.license ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2013 Poulain, A. G. de Brevern
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

biotite/sequence/align/matrix_data/PB.mat ADDED Viewed

@@ -0,0 +1,18 @@
+# PB substitution matrix, adapted from PBxplore
+   a     b     c     d     e     f     g     h     i     j     k     l     m     n     o     p
+a  516   -59   113  -105  -411  -177   -27  -361    47  -103  -644  -259  -599  -372  -124   -83
+b  -59   541  -146  -210  -155  -310   -97    90   182  -128   -30    29  -745  -242  -165    22
+c  113  -146   360   -14  -333  -240    49  -438  -269  -282  -688  -682  -608  -455  -147     6
+d -105  -210   -14   221     5  -131  -349  -278  -253  -173  -585  -670 -1573 -1048  -691  -497
+e -411  -155  -333     5   520   185   186   138  -378   -70  -112  -514 -1136  -469  -617  -632
+f -177  -310  -240  -131   185   459   -99   -45  -445    83  -214   -88  -547  -629  -406  -552
+g  -27   -97    49  -349   186   -99   665   -99   -89  -118  -409  -138  -124   172   128   254
+h -361    90  -438  -278   138   -45   -99   632  -205   316   192  -108  -712  -359    95  -399
+i   47   182  -269  -253  -378  -445   -89  -205   696   186     8    15  -709  -269  -169   226
+j -103  -128  -282  -173   -70    83  -118   316   186   768   196     5  -398  -340  -117  -104
+k -644   -30  -688  -585  -112  -214  -409   192     8   196   568   -65  -270  -231  -471  -382
+l -259    29  -682  -670  -514   -88  -138  -108    15     5   -65   533  -131     8   -11  -316
+m -599  -745  -608 -1573 -1136  -547  -124  -712  -709  -398  -270  -131   241    -4  -190  -155
+n -372  -242  -455 -1048  -469  -629   172  -359  -269  -340  -231     8    -4   703    88   146
+o -124  -165  -147  -691  -617  -406   128    95  -169  -117  -471   -11  -190    88   716    58
+p  -83    22     6  -497  -632  -552   254  -399   226  -104  -382  -316  -155   146    58   609

biotite/sequence/align/multiple.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/pairwise.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/permutation.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/selector.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/tracetable.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/alphabet.py CHANGED Viewed

@@ -410,6 +410,9 @@ class LetterAlphabet(Alphabet):
             symbols = symbols.astype("U1")
         return symbols
+    def is_letter_alphabet(self):
+        return True
     def __contains__(self, symbol):
         if not isinstance(symbol, (str, bytes)):
             return False

biotite/sequence/codec.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/graphics/color_schemes/3di_flower.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+    "comment": "Generated with 'gecos --matrix 3Di --name flower --lmin 60 --lmax 80 -f 3di_flower.json'",
+    "name": "flower",
+    "alphabet": [
+        "a",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "k",
+        "l",
+        "m",
+        "n",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "v",
+        "w",
+        "y"
+    ],
+    "colors": {
+        "a": "#a189a1",
+        "c": "#ff5806",
+        "d": "#ab9a93",
+        "e": "#e754d5",
+        "f": "#8191b5",
+        "g": "#cbc7ae",
+        "h": "#dac1bc",
+        "i": "#5eaf6e",
+        "k": "#04c1fd",
+        "l": "#ff544b",
+        "m": "#07e560",
+        "n": "#f28d05",
+        "p": "#b68767",
+        "q": "#bc8277",
+        "r": "#eebe86",
+        "s": "#ffa103",
+        "t": "#a4c49a",
+        "v": "#ed6903",
+        "w": "#3a97d8",
+        "y": "#f7adfd"
+    }
+}

biotite/sequence/graphics/color_schemes/pb_flower.json CHANGED Viewed

@@ -16,7 +16,8 @@
         "m",
         "n",
         "o",
-        "p"
+        "p",
+        "z"
     ],
     "colors": {
         "a": "#31b5fc",

biotite/sequence/graphics/colorschemes.py CHANGED Viewed

@@ -94,27 +94,32 @@ def get_color_scheme(name, alphabet, default="#FFFFFF"):
     >>> print(color_scheme)
     ['#3737f5', '#37f537', '#f5f537', '#f53737']
     """
+    # Try exact alphabet match first
+    for scheme in _color_schemes:
+        if scheme["name"] == name and scheme["alphabet"] == alphabet:
+            return _fit_color_scheme(alphabet, scheme, default)
+    # If no exact match was found, try to find a scheme for an alphabet
+    # that extends the given alphabet
     for scheme in _color_schemes:
         if scheme["name"] == name and scheme["alphabet"].extends(alphabet):
-            colors = scheme["colors"]
-            # Replace None values with default color
-            colors = [color if color is not None else default for color in colors]
-            # Only return colors that are in scope of this alphabet
-            # and not the extended alphabet
-            return colors[: len(alphabet)]
+            return _fit_color_scheme(alphabet, scheme, default)
     raise ValueError(f"Unkown scheme '{name}' for given alphabet")
-def list_color_scheme_names(alphabet):
+def list_color_scheme_names(alphabet, strict=False):
     """
     Get a list of available color scheme names for a given alphabet.
     Parameters
     ----------
     alphabet : Alphabet
-        The alphbet to get the color scheme names for.
-        The alphabet of the scheme must equal or extend this parameter,
-        to be included in the list.
+        The alphabet to get the color scheme names for.
+    strict : bool, optional
+        If set to true, only schemes with an exact match to the given
+        alphabet are included in the list.
+        If set to false, schemes with an alphabet that extends the given
+        alphabet are also included.
     Returns
     -------
@@ -123,7 +128,9 @@ def list_color_scheme_names(alphabet):
     """
     scheme_list = []
     for scheme in _color_schemes:
-        if scheme["alphabet"].extends(alphabet):
+        if strict and scheme["alphabet"] == alphabet:
+            scheme_list.append(scheme["name"])
+        if not strict and scheme["alphabet"].extends(alphabet):
             scheme_list.append(scheme["name"])
     return scheme_list
@@ -135,3 +142,29 @@ _color_schemes = []
 for file_name in glob.glob(_scheme_dir + os.sep + "*.json"):
     scheme = load_color_scheme(file_name)
     _color_schemes.append(scheme)
+def _fit_color_scheme(alphabet, color_scheme, default_color):
+    """
+    Fit a color scheme to the given alphabet.
+    Parameters
+    ----------
+    alphabet : Alphabet
+        The alphabet to get the color scheme for.
+    color_scheme : dict
+        The color scheme.
+    default_color : str or tuple
+        The default color.
+    Returns
+    -------
+    scheme : list of str
+        The colors from the scheme.
+    """
+    colors = color_scheme["colors"]
+    # Replace None values with default color
+    colors = [color if color is not None else default_color for color in colors]
+    # Only return colors that are in scope of this alphabet
+    # and not the extended alphabet
+    return colors[: len(alphabet)]

biotite/sequence/phylo/nj.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/phylo/tree.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/sequence/phylo/upgma.cpython-311-darwin.so CHANGED Viewed

Binary file