PyPI - biotite - Versions diffs - 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl - Mend

biotite 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +156 -43
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/filter.py +1 -1
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +148 -107
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +227 -68
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +16 -16
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/application/dssp/app.py CHANGED Viewed

@@ -6,10 +6,11 @@ __name__ = "biotite.application.dssp"
 __author__ = "Patrick Kunzmann"
 __all__ = ["DsspApp"]
+from subprocess import SubprocessError
 from tempfile import NamedTemporaryFile
 import numpy as np
 from biotite.application.application import AppState, requires_state
-from biotite.application.localapp import LocalApp, cleanup_tempfile
+from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
 from biotite.structure.io.pdbx.cif import CIFFile
 from biotite.structure.io.pdbx.convert import set_structure
@@ -72,7 +73,13 @@ class DsspApp(LocalApp):
             self._array.set_annotation(
                 "occupancy", np.ones(self._array.array_length(), dtype=float)
             )
+        try:
+            # The parameters have changed in version 4
+            self._new_cli = get_version(bin_path)[0] >= 4
+        except SubprocessError:
+            # In older versions, the no version is returned with `--version`
+            # -> a SubprocessError is raised
+            self._new_cli = False
         self._in_file = NamedTemporaryFile("w", suffix=".cif", delete=False)
         self._out_file = NamedTemporaryFile("r", suffix=".dssp", delete=False)
@@ -81,7 +88,10 @@ class DsspApp(LocalApp):
         set_structure(in_file, self._array)
         in_file.write(self._in_file)
         self._in_file.flush()
-        self.set_arguments(["-i", self._in_file.name, "-o", self._out_file.name])
+        if self._new_cli:
+            self.set_arguments([self._in_file.name, self._out_file.name])
+        else:
+            self.set_arguments(["-i", self._in_file.name, "-o", self._out_file.name])
         super().run()
     def evaluate(self):

biotite/application/localapp.py CHANGED Viewed

@@ -8,7 +8,10 @@ __all__ = ["LocalApp"]
 import abc
 import copy
+import re
+import subprocess
 from os import chdir, getcwd, remove
+from pathlib import Path
 from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
 from biotite.application.application import (
     Application,
@@ -306,3 +309,34 @@ def cleanup_tempfile(temp_file):
     except FileNotFoundError:
         # File was already deleted, e.g. due to `TemporaryFile(delete=True)`
         pass
+def get_version(bin_path, version_option="--version"):
+    """
+    Get the version of a locally installed application.
+    Parameters
+    ----------
+    bin_path : str or Path
+        Path of the application.
+    version_option : str, optional
+        The command line option to get the version.
+    Returns
+    -------
+    major, minor : int
+        The major and minor version number.
+    """
+    output = subprocess.run(
+        [bin_path, version_option], capture_output=True, text=True
+    ).stdout
+    # Find matches for version string containing major and minor version
+    match = re.search(r"\d+\.\d+", output)
+    if match is None:
+        raise subprocess.SubprocessError(
+            f"Could not determine '{Path(bin_path).name}' version "
+            f"from the string '{output}'"
+        )
+    version_string = match.group(0)
+    splitted = version_string.split(".")
+    return int(splitted[0]), int(splitted[1])

biotite/application/muscle/app3.py CHANGED Viewed

@@ -7,13 +7,11 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["MuscleApp"]
 import numbers
-import re
-import subprocess
 import warnings
 from collections.abc import Sequence
 from tempfile import NamedTemporaryFile
 from biotite.application.application import AppState, VersionError, requires_state
-from biotite.application.localapp import cleanup_tempfile
+from biotite.application.localapp import cleanup_tempfile, get_version
 from biotite.application.msaapp import MSAApp
 from biotite.sequence.phylo.tree import Tree
@@ -54,7 +52,7 @@ class MuscleApp(MSAApp):
     """
     def __init__(self, sequences, bin_path="muscle", matrix=None):
-        major_version = get_version(bin_path)[0]
+        major_version = get_version(bin_path, "-version")[0]
         if major_version != 3:
             raise VersionError(f"Muscle 3 is required, got version {major_version}")
@@ -227,14 +225,3 @@ class MuscleApp(MSAApp):
         app.start()
         app.join()
         return app.get_alignment()
-def get_version(bin_path="muscle"):
-    output = subprocess.run([bin_path, "-version"], capture_output=True, text=True)
-    # Find matches for version string containing major and minor version
-    match = re.search(r"\d+\.\d+", output.stdout)
-    if match is None:
-        raise subprocess.SubprocessError("Could not determine Muscle version")
-    version_string = match.group(0)
-    splitted = version_string.split(".")
-    return int(splitted[0]), int(splitted[1])

biotite/application/muscle/app5.py CHANGED Viewed

@@ -7,8 +7,8 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["Muscle5App"]
 from biotite.application.application import AppState, VersionError, requires_state
+from biotite.application.localapp import get_version
 from biotite.application.msaapp import MSAApp
-from biotite.application.muscle.app3 import get_version
 class Muscle5App(MSAApp):
@@ -49,7 +49,7 @@ class Muscle5App(MSAApp):
     """
     def __init__(self, sequences, bin_path="muscle"):
-        major_version = get_version(bin_path)[0]
+        major_version = get_version(bin_path, "-version")[0]
         if major_version < 5:
             raise VersionError(
                 f"At least Muscle 5 is required, got version {major_version}"

biotite/application/util.py CHANGED Viewed

@@ -50,7 +50,7 @@ def map_matrix(matrix):
     # All trailing symbols are filled with zeros
     old_length = len(matrix.get_alphabet1())
     new_length = len(ProteinSequence.alphabet)
-    new_score_matrix = np.zeros((new_length, new_length))
+    new_score_matrix = np.zeros((new_length, new_length), dtype=np.int32)
     new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
     return SubstitutionMatrix(
         ProteinSequence.alphabet, ProteinSequence.alphabet, new_score_matrix

biotite/application/viennarna/rnaplot.py CHANGED Viewed

@@ -99,8 +99,12 @@ class RNAplotApp(LocalApp):
         self._in_file.write(self._dot_bracket)
         self._in_file.flush()
         self.set_arguments(
-            ["-i", self._in_file.name, "-o", "xrna", "-t", self._layout_type]
-        )
+            [
+                "-i", self._in_file.name,
+                "--output-format", "xrna",
+                "-t", self._layout_type,
+            ]
+        )  # fmt: skip
         super().run()
     def evaluate(self):

biotite/database/rcsb/query.py CHANGED Viewed

@@ -146,9 +146,9 @@ class BasicQuery(SingleQuery):
     Examples
     --------
-    >>> query = BasicQuery("tc5b")
+    >>> query = BasicQuery("Miniprotein Construct")
     >>> print(sorted(search(query)))
-    ['1L2Y', '8ANG', '8ANH', '8ANI', '8ANM', '8QWW']
+    ['1L2Y']
     """
     def __init__(self, term):
@@ -346,9 +346,9 @@ class SequenceQuery(SingleQuery):
     --------
     >>> sequence = "NLYIQWLKDGGPSSGRPPPS"
-    >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.8)
+    >>> query = SequenceQuery(sequence, scope="protein", min_identity=0.95)
     >>> print(sorted(search(query)))
-    ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2LL5', '2MJ9', '3UC7', '3UC8']
+    ['1L2Y', '2LDJ', '9G22', '9G2N', '9G2O', '9G31', '9G32', '9GDL', '9GDN', '9GDT', '9GDU', '9GE1']
     """
     def __init__(self, sequence, scope, min_identity=0.0, max_expect_value=10000000.0):
@@ -441,7 +441,7 @@ class StructureQuery(SingleQuery):
     >>> query = StructureQuery("1L2Y", chain="A")
     >>> print(sorted(search(query)))
-    ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS']
+    ['1L2Y', '1RIJ', '2JOF', '2LDJ', '2M7D', '7MQS', '9DPF']
     """
     def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
@@ -868,7 +868,7 @@ def search(
     ...     query, return_type="polymer_entity", return_groups=True,
     ...     group_by=UniprotGrouping(sort_by="rcsb_accession_info.initial_release_date"),
     ... ))
-     {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['1EJG_1', '3NIR_1']}
+    {'P24297': ['5NW3_1'], 'P27707': ['4JLJ_1'], 'P80176': ['5D8V_1'], 'O29777': ['7R0H_1'], 'P01542': ['3NIR_1', '1EJG_1']}
     """
     query_dict = _initialize_query_dict(query, return_type, group_by, content_types)

biotite/database/uniprot/check.py CHANGED Viewed

@@ -10,7 +10,7 @@ from biotite.database.error import RequestError
 # Taken from https://www.uniprot.org/help/api_retrieve_entries
-def assert_valid_response(response_status_code):
+def assert_valid_response(response):
     """
     Checks whether the response is valid.
@@ -19,17 +19,22 @@ def assert_valid_response(response_status_code):
     response_status_code: int
         Status code of request.get.
     """
-    if response_status_code == 400:
-        raise RequestError("Bad request. There is a problem with your input.")
-    elif response_status_code == 404:
-        raise RequestError("Not found. The resource you requested doesn't exist.")
-    elif response_status_code == 410:
-        raise RequestError("Gone. The resource you requested was removed.")
-    elif response_status_code == 500:
-        raise RequestError(
-            "Internal server error. Most likely a temporary problem, but if the problem persists please contact UniProt team."
-        )
-    elif response_status_code == 503:
-        raise RequestError(
-            "Service not available. The server is being updated, try again later."
-        )
+    if len(response.content) == 0:
+        raise RequestError("No content returned")
+    match response.status_code:
+        case 400:
+            raise RequestError("Bad request. There is a problem with your input.")
+        case 404:
+            raise RequestError("Not found. The resource you requested doesn't exist.")
+        case 410:
+            raise RequestError("Gone. The resource you requested was removed.")
+        case 500:
+            raise RequestError(
+                "Internal server error. "
+                "Most likely a temporary problem, "
+                "but if the problem persists please contact UniProt team."
+            )
+        case 503:
+            raise RequestError(
+                "Service not available. The server is being updated, try again later."
+            )

biotite/database/uniprot/download.py CHANGED Viewed

@@ -111,7 +111,7 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
             if format in ["fasta", "gff", "txt", "xml", "rdf", "tab"]:
                 r = requests.get(_fetch_url + db_name + "/" + id + "." + format)
                 content = r.text
-                assert_valid_response(r.status_code)
+                assert_valid_response(r)
             else:
                 raise ValueError(f"Format '{format}' is not supported")
             if file is None:

biotite/database/uniprot/query.py CHANGED Viewed

@@ -289,5 +289,5 @@ def search(query, number=500):
     params = {"query": str(query), "format": "list", "size": str(number)}
     r = requests.get(_base_url, params=params)
     content = r.text
-    assert_valid_response(r.status_code)
+    assert_valid_response(r)
     return content.split("\n")[:-1]

biotite/sequence/align/alignment.py CHANGED Viewed

@@ -9,7 +9,6 @@ import numbers
 import textwrap
 from collections.abc import Sequence
 import numpy as np
-from biotite.sequence.alphabet import LetterAlphabet
 __all__ = [
     "Alignment",
@@ -111,7 +110,7 @@ class Alignment(object):
         for i in range(len(self.trace)):
             j = self.trace[i][seq_index]
             if j != -1:
-                seq_str += self.sequences[seq_index][j]
+                seq_str += str(self.sequences[seq_index][j])
             else:
                 seq_str += "-"
         return seq_str
@@ -133,7 +132,7 @@ class Alignment(object):
         # has an non-single letter alphabet
         all_single_letter = True
         for seq in self.sequences:
-            if not isinstance(seq.get_alphabet(), LetterAlphabet):
+            if not _is_single_letter(seq.alphabet):
                 all_single_letter = False
         if all_single_letter:
             # First dimension: sequence number,
@@ -665,3 +664,17 @@ def remove_terminal_gaps(alignment):
             "no overlap and the resulting alignment would be empty"
         )
     return alignment[start:stop]
+def _is_single_letter(alphabet):
+    """
+    More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
+    It is sufficient that only only the string representation of each symbol is only
+    a single character.
+    """
+    if alphabet.is_letter_alphabet():
+        return True
+    for symbol in alphabet:
+        if len(str(symbol)) != 1:
+            return False
+    return True

biotite/sequence/align/banded.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/banded.pyx CHANGED Viewed

@@ -214,9 +214,6 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     else:
         is_swapped = False
     lower_diag, upper_diag = min(band), max(band)
-    band_width = upper_diag - lower_diag + 1
-    if band_width < 1:
-        raise ValueError("The width of the band is 0")
     if len(seq1) + upper_diag <= 0 or lower_diag >= len(seq2):
         raise ValueError(
             "Alignment band is out of range, the band allows no overlap "
@@ -226,6 +223,9 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     # covers the search space of an unbanded alignment
     lower_diag = max(lower_diag, -len(seq1)+1)
     upper_diag = min(upper_diag,  len(seq2)-1)
+    band_width = upper_diag - lower_diag + 1
+    if band_width < 1:
+        raise ValueError("The width of the band is 0")
     # This implementation uses transposed tables in comparison
     # to the common visualization
@@ -249,12 +249,12 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     ###############
     # A score value that signals that the respective direction in the
-    # dynamic programming matrix should not be used since, it would be
+    # dynamic programming matrix should not be used, since it would be
     # outside the band
     # It is the 'worst' score available, so the trace table will never
     # include such a direction
     neg_inf = np.iinfo(np.int32).min
-    # Correct the 'negative infinity' integer, by making it more positve
+    # Correct the 'negative infinity' integer, by making it more positive
     # This prevents an integer underflow when the gap penalty or
     # match score is added to this value
     neg_inf -= min(gap_penalty) if affine_penalty else gap_penalty

biotite/sequence/align/kmeralphabet.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmeralphabet.pyx CHANGED Viewed

@@ -568,6 +568,23 @@ class KmerAlphabet(Alphabet):
         return int(len(self._base_alph) ** self._k)
+    def __iter__(self):
+        # Creating all symbols is expensive
+        # -> Use a generator instead
+        if isinstance(self._base_alph, LetterAlphabet):
+            return ("".join(self.decode(code)) for code in range(len(self)))
+        else:
+            return (list(self.decode(code)) for code in range(len(self)))
+    def __contains__(self, symbol):
+        try:
+            self.fuse(self._base_alph.encode_multiple(symbol))
+            return True
+        except AlphabetError:
+            return False
 def _to_array_form(model_string):
     """
     Convert the the common string representation of a *k-mer* spacing

biotite/sequence/align/kmersimilarity.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.pyx CHANGED Viewed

@@ -1384,8 +1384,7 @@ cdef class KmerTable:
     def __getstate__(self):
-        relevant_kmers = self.get_kmers()
-        return _pickle_c_arrays(self._ptr_array, relevant_kmers)
+        return _pickle_c_arrays(self._ptr_array)
     def __setstate__(self, state):
@@ -2836,12 +2835,7 @@ cdef class BucketKmerTable:
     def __getstate__(self):
-        cdef int64[:] relevant_buckets = np.where(
-            np.asarray(self._ptr_array) != 0
-        )[0]
-        return _pickle_c_arrays(self._ptr_array, relevant_buckets)
+        return _pickle_c_arrays(self._ptr_array)
     def __setstate__(self, state):
         _unpickle_c_arrays(self._ptr_array, state)
@@ -3097,27 +3091,44 @@ def _append_entries(ptr[:] trg_ptr_array, ptr[:] src_ptr_array):
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def _pickle_c_arrays(ptr[:] ptr_array, int64[:] relevant_buckets):
+def _pickle_c_arrays(ptr[:] ptr_array):
     """
-    Pickle the `relevant_buckets` (i.e. the buckets that actualy point
-    to an array) of the `ptr_array` into a list of bytes.
+    Pickle the C arrays into a single concatenated :class:`ndarray`.
+    The lengths of each C-array on these concatenated array is saved as well.
     """
-    cdef int64 i
-    cdef int64 bucket
+    cdef int64 pointer_i, bucket_i, concat_i
     cdef int64 length
     cdef uint32* bucket_ptr
-    cdef list pickled_arrays = [b""] * relevant_buckets.shape[0]
-    for i in range(relevant_buckets.shape[0]):
-        bucket = relevant_buckets[i]
-        bucket_ptr = <uint32*>ptr_array[bucket]
-        length = (<int64*>bucket_ptr)[0]
-        # Get directly the bytes coding for each C-array
-        pickled_arrays[i] \
-            = <bytes>(<char*>bucket_ptr)[:sizeof(uint32) * length]
+    # First pass: Count the total concatenated size
+    cdef int64 total_length = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        bucket_ptr = <uint32*>ptr_array[pointer_i]
+        if bucket_ptr != NULL:
+            # The first element of the C-array is the length
+            # of the array
+            total_length += (<int64*>bucket_ptr)[0]
+    # Second pass: Copy the C-arrays into a single concatenated array
+    # and track the start position of each C-array
+    cdef uint32[:] concatenated_array = np.empty(total_length, dtype=np.uint32)
+    cdef int64[:] lengths = np.empty(ptr_array.shape[0], dtype=np.int64)
+    concat_i = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        bucket_ptr = <uint32*>ptr_array[pointer_i]
+        if bucket_ptr != NULL:
+            length = (<int64*>bucket_ptr)[0]
+            lengths[pointer_i] = length
+            memcpy(
+                &concatenated_array[concat_i],
+                bucket_ptr,
+                length * sizeof(uint32),
+            )
+            concat_i += length
+        else:
+            lengths[pointer_i] = 0
-    return np.asarray(relevant_buckets), pickled_arrays
+    return np.asarray(concatenated_array), np.asarray(lengths)
 @cython.boundscheck(False)
@@ -3126,28 +3137,27 @@ def _unpickle_c_arrays(ptr[:] ptr_array, state):
     """
     Unpickle the pickled `state` into the given `ptr_array`.
     """
-    cdef int64 i
-    cdef int64 bucket
-    cdef int64 byte_length
+    cdef int64 pointer_i, concat_i
+    cdef int64 length
     cdef uint32* bucket_ptr
-    cdef bytes pickled_bytes
-    cdef int64[:] relevant_buckets = state[0]
-    cdef list pickled_pointers = state[1]
-    for i in range(relevant_buckets.shape[0]):
-        bucket = relevant_buckets[i]
-        if bucket < 0 or bucket >= ptr_array.shape[0]:
-            raise ValueError("Invalid bucket found while unpickling")
-        pickled_bytes = pickled_pointers[i]
-        byte_length = len(pickled_bytes)
-        if byte_length != 0:
-            bucket_ptr = <uint32*>malloc(byte_length)
+    cdef uint32[:] concatenated_array = state[0]
+    cdef int64[:] lengths = state[1]
+    concat_i = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        length = lengths[pointer_i]
+        if length != 0:
+            bucket_ptr = <uint32*>malloc(length * sizeof(uint32))
             if not bucket_ptr:
                 raise MemoryError
-            # Convert bytes back into C-array
-            memcpy(bucket_ptr, <char*>pickled_bytes, byte_length)
-            ptr_array[bucket] = <ptr>bucket_ptr
+            memcpy(
+                bucket_ptr,
+                &concatenated_array[concat_i],
+                length * sizeof(uint32),
+            )
+            concat_i += length
+            ptr_array[pointer_i] = <ptr>bucket_ptr
 cdef inline void _deallocate_ptrs(ptr[:] ptrs):

biotite/sequence/align/localgapped.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/localungapped.cpython-312-darwin.so CHANGED Viewed

Binary file