PyPI - biotite - Versions diffs - 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.2.0__cp312-cp312-macosx_11_0_arm64.whl - Mend

biotite 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.2.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +36 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +5 -18
biotite/application/muscle/app5.py +5 -5
biotite/application/sra/app.py +0 -5
biotite/application/util.py +22 -2
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +9 -3
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +8 -9
biotite/database/uniprot/check.py +22 -17
biotite/database/uniprot/download.py +3 -6
biotite/database/uniprot/query.py +4 -5
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +49 -14
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +26 -26
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +19 -2
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +58 -48
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +284 -57
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/alphabet.py +5 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/profile.py +105 -29
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +136 -8
biotite/sequence/sequence.py +1 -2
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +6 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +109 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +170 -0
biotite/structure/alphabet/unkerasify.py +128 -0
biotite/structure/atoms.py +163 -66
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +79 -25
biotite/structure/box.py +19 -21
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +74 -127
biotite/structure/hbond.py +17 -19
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +24 -15
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -34
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +62 -19
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -22
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +4 -4
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +80 -53
biotite/structure/io/pdb/convert.py +4 -3
biotite/structure/io/pdb/file.py +85 -25
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +36 -36
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +54 -15
biotite/structure/io/pdbx/cif.py +92 -66
biotite/structure/io/pdbx/component.py +15 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +410 -75
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +141 -156
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +69 -11
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +76 -4
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/align/alignment.py CHANGED Viewed

@@ -9,7 +9,6 @@ import numbers
 import textwrap
 from collections.abc import Sequence
 import numpy as np
-from biotite.sequence.alphabet import LetterAlphabet
 __all__ = [
     "Alignment",
@@ -20,6 +19,7 @@ __all__ = [
     "score",
     "find_terminal_gaps",
     "remove_terminal_gaps",
+    "remove_gaps",
 ]
@@ -111,7 +111,7 @@ class Alignment(object):
         for i in range(len(self.trace)):
             j = self.trace[i][seq_index]
             if j != -1:
-                seq_str += self.sequences[seq_index][j]
+                seq_str += str(self.sequences[seq_index][j])
             else:
                 seq_str += "-"
         return seq_str
@@ -133,7 +133,7 @@ class Alignment(object):
         # has an non-single letter alphabet
         all_single_letter = True
         for seq in self.sequences:
-            if not isinstance(seq.get_alphabet(), LetterAlphabet):
+            if not _is_single_letter(seq.alphabet):
                 all_single_letter = False
         if all_single_letter:
             # First dimension: sequence number,
@@ -304,7 +304,7 @@ def get_symbols(alignment):
     See Also
     --------
-    get_codes
+    get_codes : Get the sequence codes of the sequences in the alignment.
     Examples
     --------
@@ -362,9 +362,9 @@ def get_sequence_identity(alignment, mode="not_terminal"):
     identity : float
         The sequence identity, ranging between 0 and 1.
-    See also
+    See Also
     --------
-    get_pairwise_sequence_identity
+    get_pairwise_sequence_identity : Get sequence identity for each pair of alignment rows.
     """
     codes = get_codes(alignment)
@@ -425,9 +425,9 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
     identity : ndarray, dtype=float, shape=(n,n)
         The pairwise sequence identity, ranging between 0 and 1.
-    See also
+    See Also
     --------
-    get_sequence_identity
+    get_sequence_identity : Get sequence identity over all alignment rows.
     """
     codes = get_codes(alignment)
     n_seq = len(codes)
@@ -490,10 +490,9 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
         penalty is used. The first integer in the tuple is the gap
         opening penalty, the second integer is the gap extension
         penalty.
-        The values need to be negative. (Default: *-10*)
+        The values need to be negative.
     terminal_penalty : bool, optional
         If true, gap penalties are applied to terminal gaps.
-        (Default: True)
     Returns
     -------
@@ -570,9 +569,9 @@ def find_terminal_gaps(alignment):
         When these indices are used as slice index for an alignment or
         trace, the index would remove terminal gaps.
-    See also
+    See Also
     --------
-    remove_terminal_gaps
+    remove_terminal_gaps : Remove terminal gap columns directly.
     Examples
     --------
@@ -628,9 +627,9 @@ def remove_terminal_gaps(alignment):
         A shallow copy of the input `alignment` with an truncated trace,
         that does not contain alignment columns with terminal gaps.
-    See also
+    See Also
     --------
-    find_terminal_gaps
+    find_terminal_gaps : Only find terminal gap columns.
     Examples
     --------
@@ -665,3 +664,39 @@ def remove_terminal_gaps(alignment):
             "no overlap and the resulting alignment would be empty"
         )
     return alignment[start:stop]
+def remove_gaps(alignment):
+    """
+    Remove all gap columns from an alignment.
+    Parameters
+    ----------
+    alignment : Alignment
+        The alignment to be modified.
+    Returns
+    -------
+    truncated_alignment : Alignment
+        The alignment without gap columns.
+    See Also
+    --------
+    remove_terminal_gaps : Remove only terminal gap columns.
+    """
+    non_gap_mask = (alignment.trace != -1).all(axis=1)
+    return alignment[non_gap_mask]
+def _is_single_letter(alphabet):
+    """
+    More relaxed version of :func:`biotite.sequence.alphabet.is_letter_alphabet()`:
+    It is sufficient that only only the string representation of each symbol is only
+    a single character.
+    """
+    if alphabet.is_letter_alphabet():
+        return True
+    for symbol in alphabet:
+        if len(str(symbol)) != 1:
+            return False
+    return True

biotite/sequence/align/banded.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/banded.pyx CHANGED Viewed

@@ -54,7 +54,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     aligned to each other, if :math:`D_L \leq j - i \leq D_U`.
     With increasing width of the diagonal band, the probability to find
     the optimal alignment, but also the computation time increases.
     Parameters
     ----------
     seq1, seq2 : Sequence
@@ -84,15 +84,15 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
         The maximum number of alignments returned.
         When the number of branches exceeds this value in the traceback
         step, no further branches are created.
     Returns
     -------
     alignments : list of Alignment
         The generated alignments.
         Each alignment in the list has the same similarity score,
         which is the maximum score possible within the defined band.
-    See also
+    See Also
     --------
     align_optimal
         Guarantees to find the optimal alignment at the cost of greater
@@ -110,7 +110,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     yield a more optimal alignment.
     Considerations on how to find a suitable band width are discussed in
     :footcite:`Gibrat2018`.
     The restriction to a limited band is the central difference between
     the banded alignment heuristic and the optimal alignment
     algorithms :footcite:`Needleman1970, Smith1981`.
@@ -151,12 +151,12 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     Filled cells, i.e. cells within the band, are indicated by ``x``.
     The shorter sequence is always represented by the first dimension
     of the table in this implementation.
     References
     ----------
     .. footbibliography::
     Examples
     --------
@@ -203,7 +203,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
         raise ValueError(
             "Maximum number of returned alignments must be at least 1"
         )
     # The shorter sequence is the one on the left of the matrix
     # -> shorter sequence is 'seq1'
     if len(seq2) < len(seq1):
@@ -214,9 +214,6 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     else:
         is_swapped = False
     lower_diag, upper_diag = min(band), max(band)
-    band_width = upper_diag - lower_diag + 1
-    if band_width < 1:
-        raise ValueError("The width of the band is 0")
     if len(seq1) + upper_diag <= 0 or lower_diag >= len(seq2):
         raise ValueError(
             "Alignment band is out of range, the band allows no overlap "
@@ -226,6 +223,9 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     # covers the search space of an unbanded alignment
     lower_diag = max(lower_diag, -len(seq1)+1)
     upper_diag = min(upper_diag,  len(seq2)-1)
+    band_width = upper_diag - lower_diag + 1
+    if band_width < 1:
+        raise ValueError("The width of the band is 0")
     # This implementation uses transposed tables in comparison
     # to the common visualization
@@ -243,18 +243,18 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
     trace_table = np.zeros((len(seq1)+1, band_width+2), dtype=np.uint8)
     code1 = seq1.code
     code2 = seq2.code
     # Table filling
     ###############
     # A score value that signals that the respective direction in the
-    # dynamic programming matrix should not be used since, it would be
+    # dynamic programming matrix should not be used, since it would be
     # outside the band
     # It is the 'worst' score available, so the trace table will never
     # include such a direction
     neg_inf = np.iinfo(np.int32).min
-    # Correct the 'negative infinity' integer, by making it more positve
+    # Correct the 'negative infinity' integer, by making it more positive
     # This prevents an integer underflow when the gap penalty or
     # match score is added to this value
     neg_inf -= min(gap_penalty) if affine_penalty else gap_penalty
@@ -294,7 +294,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
             code1, code2, matrix.score_matrix(), trace_table, score_table,
             lower_diag, upper_diag, gap_penalty, local
         )
     # Traceback
     ###########
@@ -383,7 +383,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
             state_list = np.full(
                 len(i_list), TraceState.NO_STATE, dtype=int
             )
     # Follow the traces specified in state and indices lists
     cdef int curr_trace_count
     for k in range(len(i_list)):
@@ -401,7 +401,7 @@ def align_banded(seq1, seq2, matrix, band, gap_penalty=-10, local=False,
             curr_trace_count=&curr_trace_count, max_trace_count=max_number,
             lower_diag=lower_diag, upper_diag=upper_diag
         )
     # Replace gap entries in trace with -1
     for i, trace in enumerate(trace_list):
         trace = np.flip(trace, axis=0)
@@ -459,7 +459,7 @@ def _fill_align_table(CodeType1[:] code1 not None,
     local
         Indicates, whether a local alignment should be performed.
     """
     cdef int i, j
     cdef int seq_i, seq_j
     cdef int32 from_diag, from_left, from_top
@@ -488,7 +488,7 @@ def _fill_align_table(CodeType1[:] code1 not None,
             from_top  = score_table[i-1, j+1] + gap_penalty
             trace = get_trace_linear(from_diag, from_left, from_top, &score)
             # Local alignment specialty:
             # If score is less than or equal to 0,
             # then 0 is saved on the field and the trace ends here
@@ -541,7 +541,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
     local
         Indicates, whether a local alignment should be performed.
     """
     cdef int i, j
     cdef int seq_i, seq_j
     cdef int32 mm_score, g1m_score, g2m_score
@@ -550,7 +550,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
     cdef uint8 trace
     cdef int32 m_score, g1_score, g2_score
     cdef int32 similarity_score
     # Starts at 1 since the first row and column are already fil
     for seq_i in range(0, code1.shape[0]):
         i = seq_i + 1
@@ -572,7 +572,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
             g1g1_score = g1_table[i, j-1] + gap_ext
             mg2_score  =  m_table[i-1, j+1] + gap_open
             g2g2_score = g2_table[i-1, j+1] + gap_ext
             trace = get_trace_affine(
                 mm_score, g1m_score, g2m_score,
                 mg1_score, g1g1_score,
@@ -600,7 +600,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
                     m_table[i,j] = m_score
                 if g1_score <= 0:
                     trace &= ~(
-                        TraceDirectionAffine.MATCH_TO_GAP_LEFT |
+                        TraceDirectionAffine.MATCH_TO_GAP_LEFT |
                         TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
                     )
                     # g1_table[i,j] remains negative infinity
@@ -623,7 +623,7 @@ def _fill_align_table_affine(CodeType1[:] code1 not None,
 def get_global_trace_starts(seq1_len, seq2_len, lower_diag, upper_diag):
     band_width = upper_diag - lower_diag + 1
     j = np.arange(1, band_width + 1)
     seq_j = j + (seq1_len-1) + lower_diag - 1
     # Start from the end from the first (shorter) sequence,

biotite/sequence/align/cigar.py CHANGED Viewed

@@ -86,7 +86,7 @@ def read_alignment_from_cigar(cigar, position, reference_sequence, segment_seque
     See Also
     --------
-    write_alignment_to_cigar
+    write_alignment_to_cigar : The reverse operation.
     Notes
     -----
@@ -253,7 +253,7 @@ def write_alignment_to_cigar(
     See Also
     --------
-    read_alignment_from_cigar
+    read_alignment_from_cigar : The reverse operation.
     Notes
     -----

biotite/sequence/align/kmeralphabet.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmeralphabet.pyx CHANGED Viewed

@@ -267,7 +267,7 @@ class KmerAlphabet(Alphabet):
         kmer_codes : int or ndarray, dtype=np.int64, shape=(n,)
             The fused *k-mer* code(s).
-        See also
+        See Also
         --------
         split
             The reverse operation.
@@ -319,7 +319,7 @@ class KmerAlphabet(Alphabet):
         codes : ndarray, dtype=np.uint64, shape=(k,) or shape=(n,k)
             The split symbol codes from the base alphabet.
-        See also
+        See Also
         --------
         fuse
             The reverse operation.
@@ -568,6 +568,23 @@ class KmerAlphabet(Alphabet):
         return int(len(self._base_alph) ** self._k)
+    def __iter__(self):
+        # Creating all symbols is expensive
+        # -> Use a generator instead
+        if isinstance(self._base_alph, LetterAlphabet):
+            return ("".join(self.decode(code)) for code in range(len(self)))
+        else:
+            return (list(self.decode(code)) for code in range(len(self)))
+    def __contains__(self, symbol):
+        try:
+            self.fuse(self._base_alph.encode_multiple(symbol))
+            return True
+        except AlphabetError:
+            return False
 def _to_array_form(model_string):
     """
     Convert the the common string representation of a *k-mer* spacing

biotite/sequence/align/kmersimilarity.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.pyx CHANGED Viewed

@@ -102,7 +102,7 @@ cdef class KmerTable:
     k : int
         The length of the *k-mers*.
-    See also
+    See Also
     --------
     BucketKmerTable
@@ -308,7 +308,7 @@ cdef class KmerTable:
             The number of *informative* positions must equal *k*.
             Refer to :class:`KmerAlphabet` for more details.
-        See also
+        See Also
         --------
         from_kmers : The same functionality based on already created *k-mers*
@@ -413,7 +413,7 @@ cdef class KmerTable:
             is false, is not added to the table.
             By default, all positions are added.
-        See also
+        See Also
         --------
         from_sequences : The same functionality based on undecomposed sequences
@@ -1384,8 +1384,7 @@ cdef class KmerTable:
     def __getstate__(self):
-        relevant_kmers = self.get_kmers()
-        return _pickle_c_arrays(self._ptr_array, relevant_kmers)
+        return _pickle_c_arrays(self._ptr_array)
     def __setstate__(self, state):
@@ -1549,7 +1548,7 @@ cdef class BucketKmerTable:
     n_buckets : int
         The number of buckets, the *k-mers* are divided into.
-    See also
+    See Also
     --------
     KmerTable
@@ -1775,7 +1774,7 @@ cdef class BucketKmerTable:
             purpose.
             By default, a load factor of approximately 0.8 is used.
-        See also
+        See Also
         --------
         from_kmers : The same functionality based on already created *k-mers*
@@ -1893,7 +1892,7 @@ cdef class BucketKmerTable:
             purpose.
             By default, a load factor of approximately 0.8 is used.
-        See also
+        See Also
         --------
         from_sequences : The same functionality based on undecomposed sequences
@@ -2836,12 +2835,7 @@ cdef class BucketKmerTable:
     def __getstate__(self):
-        cdef int64[:] relevant_buckets = np.where(
-            np.asarray(self._ptr_array) != 0
-        )[0]
-        return _pickle_c_arrays(self._ptr_array, relevant_buckets)
+        return _pickle_c_arrays(self._ptr_array)
     def __setstate__(self, state):
         _unpickle_c_arrays(self._ptr_array, state)
@@ -3097,27 +3091,44 @@ def _append_entries(ptr[:] trg_ptr_array, ptr[:] src_ptr_array):
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def _pickle_c_arrays(ptr[:] ptr_array, int64[:] relevant_buckets):
+def _pickle_c_arrays(ptr[:] ptr_array):
     """
-    Pickle the `relevant_buckets` (i.e. the buckets that actualy point
-    to an array) of the `ptr_array` into a list of bytes.
+    Pickle the C arrays into a single concatenated :class:`ndarray`.
+    The lengths of each C-array on these concatenated array is saved as well.
     """
-    cdef int64 i
-    cdef int64 bucket
+    cdef int64 pointer_i, bucket_i, concat_i
     cdef int64 length
     cdef uint32* bucket_ptr
-    cdef list pickled_arrays = [b""] * relevant_buckets.shape[0]
-    for i in range(relevant_buckets.shape[0]):
-        bucket = relevant_buckets[i]
-        bucket_ptr = <uint32*>ptr_array[bucket]
-        length = (<int64*>bucket_ptr)[0]
-        # Get directly the bytes coding for each C-array
-        pickled_arrays[i] \
-            = <bytes>(<char*>bucket_ptr)[:sizeof(uint32) * length]
+    # First pass: Count the total concatenated size
+    cdef int64 total_length = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        bucket_ptr = <uint32*>ptr_array[pointer_i]
+        if bucket_ptr != NULL:
+            # The first element of the C-array is the length
+            # of the array
+            total_length += (<int64*>bucket_ptr)[0]
+    # Second pass: Copy the C-arrays into a single concatenated array
+    # and track the start position of each C-array
+    cdef uint32[:] concatenated_array = np.empty(total_length, dtype=np.uint32)
+    cdef int64[:] lengths = np.empty(ptr_array.shape[0], dtype=np.int64)
+    concat_i = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        bucket_ptr = <uint32*>ptr_array[pointer_i]
+        if bucket_ptr != NULL:
+            length = (<int64*>bucket_ptr)[0]
+            lengths[pointer_i] = length
+            memcpy(
+                &concatenated_array[concat_i],
+                bucket_ptr,
+                length * sizeof(uint32),
+            )
+            concat_i += length
+        else:
+            lengths[pointer_i] = 0
-    return np.asarray(relevant_buckets), pickled_arrays
+    return np.asarray(concatenated_array), np.asarray(lengths)
 @cython.boundscheck(False)
@@ -3126,28 +3137,27 @@ def _unpickle_c_arrays(ptr[:] ptr_array, state):
     """
     Unpickle the pickled `state` into the given `ptr_array`.
     """
-    cdef int64 i
-    cdef int64 bucket
-    cdef int64 byte_length
+    cdef int64 pointer_i, concat_i
+    cdef int64 length
     cdef uint32* bucket_ptr
-    cdef bytes pickled_bytes
-    cdef int64[:] relevant_buckets = state[0]
-    cdef list pickled_pointers = state[1]
-    for i in range(relevant_buckets.shape[0]):
-        bucket = relevant_buckets[i]
-        if bucket < 0 or bucket >= ptr_array.shape[0]:
-            raise ValueError("Invalid bucket found while unpickling")
-        pickled_bytes = pickled_pointers[i]
-        byte_length = len(pickled_bytes)
-        if byte_length != 0:
-            bucket_ptr = <uint32*>malloc(byte_length)
+    cdef uint32[:] concatenated_array = state[0]
+    cdef int64[:] lengths = state[1]
+    concat_i = 0
+    for pointer_i in range(ptr_array.shape[0]):
+        length = lengths[pointer_i]
+        if length != 0:
+            bucket_ptr = <uint32*>malloc(length * sizeof(uint32))
             if not bucket_ptr:
                 raise MemoryError
-            # Convert bytes back into C-array
-            memcpy(bucket_ptr, <char*>pickled_bytes, byte_length)
-            ptr_array[bucket] = <ptr>bucket_ptr
+            memcpy(
+                bucket_ptr,
+                &concatenated_array[concat_i],
+                length * sizeof(uint32),
+            )
+            concat_i += length
+            ptr_array[pointer_i] = <ptr>bucket_ptr
 cdef inline void _deallocate_ptrs(ptr[:] ptrs):

biotite/sequence/align/localgapped.cpython-312-darwin.so CHANGED Viewed

Binary file