PyPI - biotite - Versions diffs - 0.41.1__cp312-cp312-win_amd64.whl → 1.0.0__cp312-cp312-win_amd64.whl - Mend

biotite 0.41.1__cp312-cp312-win_amd64.whl → 1.0.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +36 -10
biotite/application/application.py +22 -11
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +16 -5
biotite/sequence/align/__init__.py +160 -6
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +35 -35
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +112 -126
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +64 -64
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +226 -240
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +88 -100
biotite/structure/box.py +67 -71
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +82 -77
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +21 -7
biotite/structure/info/groups.py +10 -15
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -52
biotite/structure/io/pdbx/cif.py +64 -62
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +235 -246
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +76 -93
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
biotite-1.0.0.dist-info/RECORD +322 -0
{biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.1.dist-info/RECORD +0 -340
{biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/align/cigar.py CHANGED Viewed

@@ -8,13 +8,14 @@ __all__ = ["CigarOp", "read_alignment_from_cigar", "write_alignment_to_cigar"]
 import enum
 import numpy as np
-from .alignment import Alignment, get_codes
+from biotite.sequence.align.alignment import Alignment, get_codes
 class CigarOp(enum.IntEnum):
     """
     An enum for the different CIGAR operations.
     """
     MATCH = 0
     INSERTION = 1
     DELETION = 2
@@ -46,23 +47,23 @@ class CigarOp(enum.IntEnum):
     def to_cigar_symbol(self):
         return _op_to_str[self]
 _str_to_op = {
-        "M" : CigarOp.MATCH,
-        "I" : CigarOp.INSERTION,
-        "D" : CigarOp.DELETION,
-        "N" : CigarOp.INTRON,
-        "S" : CigarOp.SOFT_CLIP,
-        "H" : CigarOp.HARD_CLIP,
-        "P" : CigarOp.PADDING,
-        "=" : CigarOp.EQUAL,
-        "X" : CigarOp.DIFFERENT,
-        "B" : CigarOp.BACK
-    }
+    "M": CigarOp.MATCH,
+    "I": CigarOp.INSERTION,
+    "D": CigarOp.DELETION,
+    "N": CigarOp.INTRON,
+    "S": CigarOp.SOFT_CLIP,
+    "H": CigarOp.HARD_CLIP,
+    "P": CigarOp.PADDING,
+    "=": CigarOp.EQUAL,
+    "X": CigarOp.DIFFERENT,
+    "B": CigarOp.BACK,
+}
 _op_to_str = {v: k for k, v in _str_to_op.items()}
-def read_alignment_from_cigar(cigar, position,
-                              reference_sequence, segment_sequence):
+def read_alignment_from_cigar(cigar, position, reference_sequence, segment_sequence):
     """
     Create an :class:`Alignment` from a CIGAR string.
@@ -147,20 +148,16 @@ def read_alignment_from_cigar(cigar, position,
     else:
         operations = np.asarray(cigar, dtype=int)
         if operations.ndim != 2:
-            raise ValueError(
-                "Expected array with shape (n,2)"
-            )
+            raise ValueError("Expected array with shape (n,2)")
         if operations.shape[1] != 2:
-            raise ValueError(
-                "Expected (operation, length) pairs"
-            )
+            raise ValueError("Expected (operation, length) pairs")
     if len(operations) == 0:
         return Alignment(
             [reference_sequence, segment_sequence], np.zeros((0, 2), dtype=int)
         )
-    trace = np.zeros((np.sum(operations[:,1]), 2), dtype=int)
+    trace = np.zeros((np.sum(operations[:, 1]), 2), dtype=int)
     clip_mask = np.ones(trace.shape[0], dtype=bool)
     i = 0
@@ -187,19 +184,23 @@ def read_alignment_from_cigar(cigar, position,
         elif op == CigarOp.HARD_CLIP:
             clip_mask[i : i + length] = False
         else:
-            raise ValueError(
-                f"CIGAR operation {op} is not implemented"
-            )
+            raise ValueError(f"CIGAR operation {op} is not implemented")
         i += length
     # Remove clipped positions
     trace = trace[clip_mask]
     return Alignment([reference_sequence, segment_sequence], trace)
-def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
-                             introns=(), distinguish_matches=False,
-                             hard_clip=False, include_terminal_gaps=False,
-                             as_string=True):
+def write_alignment_to_cigar(
+    alignment,
+    reference_index=0,
+    segment_index=1,
+    introns=(),
+    distinguish_matches=False,
+    hard_clip=False,
+    include_terminal_gaps=False,
+    as_string=True,
+):
     """
     Convert an :class:`Alignment` into a CIGAR string.
@@ -293,10 +294,10 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     >>> op_tuples = write_alignment_to_cigar(semiglobal_alignment, as_string=False)
     >>> for op, length in op_tuples:
-    ...     print(CigarOp(op), length)
-    CigarOp.MATCH 9
-    CigarOp.DELETION 2
-    CigarOp.MATCH 12
+    ...     print(CigarOp(op).name, length)
+    MATCH 9
+    DELETION 2
+    MATCH 12
     """
     if not include_terminal_gaps:
         alignment = _remove_terminal_segment_gaps(alignment, segment_index)
@@ -305,8 +306,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
     seg_trace = alignment.trace[:, segment_index]
     operations = np.full(alignment.trace.shape[0], CigarOp.MATCH, dtype=int)
-    insertion_mask = (ref_trace == -1)
-    deletion_mask = (seg_trace == -1)
+    insertion_mask = ref_trace == -1
+    deletion_mask = seg_trace == -1
     if np.any(insertion_mask & deletion_mask):
         raise ValueError(
             "Alignment contains insertion and deletion at the same position"
@@ -318,35 +319,27 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
         intron_mask = np.zeros(operations.shape[0], dtype=bool)
         for start, stop in introns:
             if start >= stop:
-                raise ValueError(
-                    "Intron start must be smaller than intron stop"
-                )
+                raise ValueError("Intron start must be smaller than intron stop")
             if start < 0:
-                raise ValueError(
-                    "Intron start must not be negative"
-                )
+                raise ValueError("Intron start must not be negative")
             intron_mask[(ref_trace >= start) & (ref_trace < stop)] = True
         if np.any(intron_mask & ~deletion_mask):
-            raise ValueError(
-                "Introns must be within gaps in the reference sequence"
-            )
+            raise ValueError("Introns must be within gaps in the reference sequence")
         operations[intron_mask] = CigarOp.INTRON
     if distinguish_matches:
         symbol_codes = get_codes(alignment)
         ref_codes = symbol_codes[reference_index, :]
         seg_codes = symbol_codes[segment_index, :]
-        equal_mask = (ref_codes == seg_codes)
-        match_mask = (operations == CigarOp.MATCH)
+        equal_mask = ref_codes == seg_codes
+        match_mask = operations == CigarOp.MATCH
         operations[equal_mask & match_mask] = CigarOp.EQUAL
         operations[~equal_mask & match_mask] = CigarOp.DIFFERENT
     op_tuples = _aggregate_consecutive(operations)
     clip_op = CigarOp.HARD_CLIP if hard_clip else CigarOp.SOFT_CLIP
-    start_clip_length, end_clip_length = _find_clipped_bases(
-        alignment, segment_index
-    )
+    start_clip_length, end_clip_length = _find_clipped_bases(alignment, segment_index)
     if start_clip_length != 0:
         start_clip = [(clip_op, start_clip_length)]
     else:
@@ -386,9 +379,7 @@ def _find_clipped_bases(alignment, segment_index):
     # all previous bases are clipped...
     start_clip_length = seg_trace[0]
     # ...and the same applies for the last base
-    end_clip_length = (
-        len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
-    )
+    end_clip_length = len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
     return start_clip_length, end_clip_length
@@ -431,4 +422,4 @@ def _op_tuples_from_cigar(cigar):
             op = CigarOp.from_cigar_symbol(char)
             op_tuples.append((op, count))
             count = ""
-    return np.array(op_tuples, dtype=int)
+    return np.array(op_tuples, dtype=int)

biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmeralphabet.pyx CHANGED Viewed

@@ -33,7 +33,7 @@ class KmerAlphabet(Alphabet):
     This type of alphabet uses *k-mers* as symbols, i.e. all
     combinations of *k* symbols from its *base alphabet*.
     It's primary use is its :meth:`create_kmers()` method, that iterates
     over all overlapping *k-mers* in a :class:`Sequence` and encodes
     each one into its corresponding *k-mer* symbol code
@@ -68,7 +68,7 @@ class KmerAlphabet(Alphabet):
         integers, that indicate the *informative* positions.
         For a continuous *k-mer* the `spacing` would be
         ``[0, 1, 2,...]``.
     Attributes
     ----------
     base_alphabet : Alphabet
@@ -79,7 +79,7 @@ class KmerAlphabet(Alphabet):
     spacing : None or ndarray, dtype=int
         The *k-mer* model in array form, if spaced *k-mers* are used,
         ``None`` otherwise.
     Notes
     -----
     The symbol code for a *k-mer* :math:`s` calculates as
@@ -94,7 +94,7 @@ class KmerAlphabet(Alphabet):
     References
     ----------
     .. footbibliography::
     Examples
@@ -103,11 +103,11 @@ class KmerAlphabet(Alphabet):
     >>> base_alphabet = NucleotideSequence.unambiguous_alphabet()
     >>> print(base_alphabet.get_symbols())
-    ['A', 'C', 'G', 'T']
+    ('A', 'C', 'G', 'T')
     >>> kmer_alphabet = KmerAlphabet(base_alphabet, 2)
     >>> print(kmer_alphabet.get_symbols())
-    ['AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']
+    ('AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT')
     Encode and decode *k-mers*:
     >>> print(kmer_alphabet.encode("TC"))
@@ -127,7 +127,7 @@ class KmerAlphabet(Alphabet):
     [3 1]
     Encode all overlapping continuous k-mers of a sequence:
     >>> sequence = NucleotideSequence("ATTGCT")
     >>> kmer_codes = kmer_alphabet.create_kmers(sequence.code)
     >>> print(kmer_codes)
@@ -146,7 +146,7 @@ class KmerAlphabet(Alphabet):
     >>> print([s[0] + s[1] + "_" + s[2] for s in strings])
     ['BI_T', 'IQ_I', 'QT_T', 'TI_E']
     """
     def __init__(self, base_alphabet, k, spacing=None):
         if not isinstance(base_alphabet, Alphabet):
             raise TypeError(
@@ -157,7 +157,7 @@ class KmerAlphabet(Alphabet):
             raise ValueError("k must be at least 2")
         self._base_alph = base_alphabet
         self._k = k
         base_alph_len = len(self._base_alph)
         self._radix_multiplier = np.array(
             [base_alph_len**n for n in reversed(range(0, self._k))],
@@ -166,10 +166,10 @@ class KmerAlphabet(Alphabet):
         if spacing is None:
             self._spacing = None
         elif isinstance(spacing, str):
             self._spacing = _to_array_form(spacing)
         else:
             self._spacing = np.array(spacing, dtype=np.int64)
             self._spacing.sort()
@@ -181,13 +181,13 @@ class KmerAlphabet(Alphabet):
                 raise ValueError(
                     "Spacing model contains duplicate values"
                 )
         if spacing is not None and len(self._spacing) != self._k:
             raise ValueError(
                 f"Expected {self._k} informative positions, "
                 f"but got {len(self._spacing)} positions in spacing"
             )
     @property
     def base_alphabet(self):
@@ -196,11 +196,11 @@ class KmerAlphabet(Alphabet):
     @property
     def k(self):
         return self._k
     @property
     def spacing(self):
         return None if self._spacing is None else self._spacing.copy()
     def get_symbols(self):
         """
@@ -210,10 +210,10 @@ class KmerAlphabet(Alphabet):
         Returns
         -------
-        symbols : list
-            A list of all *k-mer* symbols, i.e. all possible
+        symbols : tuple
+            A tuple of all *k-mer* symbols, i.e. all possible
             combinations of *k* symbols from its *base alphabet*.
         Notes
         -----
         In contrast the base :class:`Alphabet` and
@@ -224,10 +224,10 @@ class KmerAlphabet(Alphabet):
         to be created first.
         """
         if isinstance(self._base_alph, LetterAlphabet):
-            return ["".join(self.decode(code)) for code in range(len(self))]
+            return tuple(["".join(self.decode(code)) for code in range(len(self))])
         else:
-            return [list(self.decode(code)) for code in range(len(self))]
+            return tuple([list(self.decode(code)) for code in range(len(self))])
     def extends(self, alphabet):
         # A KmerAlphabet cannot really extend another KmerAlphabet:
@@ -237,15 +237,15 @@ class KmerAlphabet(Alphabet):
         # A KmerAlphabet can only 'extend' another KmerAlphabet,
         # if the two alphabets are equal
         return alphabet == self
     def encode(self, symbol):
         return self.fuse(self._base_alph.encode_multiple(symbol))
     def decode(self, code):
         return self._base_alph.decode_multiple(self.split(code))
     def fuse(self, codes):
         """
@@ -261,7 +261,7 @@ class KmerAlphabet(Alphabet):
         ----------
         codes : ndarray, dtype=int, shape=(k,) or shape=(n,k)
             The symbol codes from the base alphabet to be fused.
         Returns
         -------
         kmer_codes : int or ndarray, dtype=np.int64, shape=(n,)
@@ -292,13 +292,13 @@ class KmerAlphabet(Alphabet):
             )
         if np.any(codes > len(self._base_alph)):
             raise AlphabetError("Given k-mer(s) contains invalid symbol code")
         orig_shape = codes.shape
         codes = np.atleast_2d(codes)
         kmer_code = np.sum(self._radix_multiplier * codes, axis=-1)
         # The last dimension is removed since it collpased in np.sum
         return kmer_code.reshape(orig_shape[:-1])
     def split(self, kmer_code):
         """
         split(kmer_code)
@@ -313,7 +313,7 @@ class KmerAlphabet(Alphabet):
         ----------
         kmer_code : int or ndarray, dtype=int, shape=(n,)
             The *k-mer* code(s).
         Returns
         -------
         codes : ndarray, dtype=np.uint64, shape=(k,) or shape=(n,k)
@@ -341,13 +341,13 @@ class KmerAlphabet(Alphabet):
             raise AlphabetError(
                 f"Given k-mer symbol code is invalid for this alphabet"
             )
         orig_shape = np.shape(kmer_code)
         split_codes = self._split(
             np.atleast_1d(kmer_code).astype(np.int64, copy=False)
         )
         return split_codes.reshape(orig_shape + (self._k,))
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.cdivision(True)
@@ -360,7 +360,7 @@ class KmerAlphabet(Alphabet):
         cdef uint64[:,:] split_codes = np.empty(
             (codes.shape[0], self._k), dtype=np.uint64
         )
         cdef int k = self._k
         for i in range(codes.shape[0]):
             code = codes[i]
@@ -369,9 +369,9 @@ class KmerAlphabet(Alphabet):
                 symbol_code = code // val
                 split_codes[i,n] = symbol_code
                 code -= symbol_code * val
         return np.asarray(split_codes)
     def kmer_array_length(self, int64 length):
         """
@@ -385,7 +385,7 @@ class KmerAlphabet(Alphabet):
         ----------
         length : int
             The length of the hypothetical sequence
         Returns
         -------
         kmer_length : int
@@ -400,7 +400,7 @@ class KmerAlphabet(Alphabet):
             spacing = self._spacing
             max_offset = self._spacing[len(spacing)-1] + 1
             return length - max_offset + 1
     def create_kmers(self, seq_code):
         """
@@ -418,7 +418,7 @@ class KmerAlphabet(Alphabet):
         -------
         kmer_codes : ndarray, dtype=int64
             The symbol codes for the *k-mers*.
         Examples
         --------
@@ -435,7 +435,7 @@ class KmerAlphabet(Alphabet):
             return self._create_continuous_kmers(seq_code)
         else:
             return self._create_spaced_kmers(seq_code)
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def _create_continuous_kmers(self, CodeType[:] seq_code not None):
@@ -460,7 +460,7 @@ class KmerAlphabet(Alphabet):
         cdef int64[:] kmers = np.empty(
             self.kmer_array_length(len(seq_code)), dtype=np.int64
         )
         cdef CodeType code
         cdef int64 kmer, prev_kmer
         # Compute first k-mer using naive approach
@@ -471,7 +471,7 @@ class KmerAlphabet(Alphabet):
                 raise AlphabetError(f"Symbol code {code} is out of range")
             kmer += radix_multiplier[i] * code
         kmers[0] = kmer
         # Compute all following k-mers from the previous one
         prev_kmer = kmer
         for i in range(1, kmers.shape[0]):
@@ -481,7 +481,7 @@ class KmerAlphabet(Alphabet):
             kmer = (
                 (
                     # Remove first symbol
-                    (prev_kmer - seq_code[i - 1] * end_radix_multiplier)
+                    (prev_kmer - seq_code[i - 1] * end_radix_multiplier)
                     # Shift k-mer to left
                     * alphabet_length
                 )
@@ -490,9 +490,9 @@ class KmerAlphabet(Alphabet):
             )
             kmers[i] = kmer
             prev_kmer = kmer
         return np.asarray(kmers)
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def _create_spaced_kmers(self, CodeType[:] seq_code not None):
@@ -515,7 +515,7 @@ class KmerAlphabet(Alphabet):
         cdef int64[:] kmers = np.empty(
             self.kmer_array_length(len(seq_code)), dtype=np.int64
         )
         cdef CodeType code
         cdef int64 kmer
         cdef int64 offset
@@ -528,18 +528,18 @@ class KmerAlphabet(Alphabet):
                     raise AlphabetError(f"Symbol code {code} is out of range")
                 kmer += radix_multiplier[j] * code
             kmers[i] = kmer
         return np.asarray(kmers)
     def __str__(self):
         return str(self.get_symbols())
     def __repr__(self):
         return f"KmerAlphabet({repr(self._base_alph)}, " \
                f"{self._k}, {repr(self._spacing)})"
     def __eq__(self, item):
         if item is self:
@@ -550,15 +550,19 @@ class KmerAlphabet(Alphabet):
             return False
         if self._k != item._k:
             return False
         if self._spacing is None:
             if item._spacing is not None:
                 return False
         elif np.any(self._spacing != item._spacing):
             return False
         return True
+    def __hash__(self):
+        return hash((self._base_alph, self._k, tuple(self._spacing.tolist())))
     def __len__(self):
         return int(len(self._base_alph) ** self._k)

biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.pyx CHANGED Viewed

@@ -1352,7 +1352,8 @@ cdef class KmerTable:
     def __iter__(self):
-        return iter(self.get_kmers())
+        for kmer in self.get_kmers():
+            yield kmer.item()
     def __reversed__(self):
@@ -3394,7 +3395,7 @@ def _to_string(table):
         else:
             symbols = str(tuple(symbols))
         line = symbols + ": " + ", ".join(
-            [str(tuple(pos)) for pos in table[kmer]]
+            [str((ref_id.item(), pos.item())) for ref_id, pos in table[kmer]]
         )
         lines.append(line)
     return "\n".join(lines)

biotite/sequence/align/localgapped.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/localungapped.cp312-win_amd64.pyd CHANGED Viewed

Binary file