PyPI - biotite - Versions diffs - 1.0.1__cp311-cp311-win_amd64.whl → 1.1.0__cp311-cp311-win_amd64.whl - Mend

biotite 1.0.1__cp311-cp311-win_amd64.whl → 1.1.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +129 -40
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +72 -59
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +194 -48
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/cif.py CHANGED Viewed

@@ -7,7 +7,6 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["CIFFile", "CIFBlock", "CIFCategory", "CIFColumn", "CIFData"]
 import itertools
-import re
 from collections.abc import MutableMapping, Sequence
 import numpy as np
 from biotite.file import (
@@ -357,7 +356,7 @@ class CIFCategory(_Component, MutableMapping):
         return CIFBlock
     @staticmethod
-    def deserialize(text, expect_whitespace=True):
+    def deserialize(text):
         lines = [line.strip() for line in text.splitlines() if not _is_empty(line)]
         if _is_loop_start(lines[0]):
@@ -372,7 +371,7 @@ class CIFCategory(_Component, MutableMapping):
         lines = _to_single(lines)
         if is_looped:
-            category_dict = CIFCategory._deserialize_looped(lines, expect_whitespace)
+            category_dict = CIFCategory._deserialize_looped(lines)
         else:
             category_dict = CIFCategory._deserialize_single(lines)
         return CIFCategory(category_dict, category_name)
@@ -416,6 +415,9 @@ class CIFCategory(_Component, MutableMapping):
             raise ValueError("At least one column must remain")
         del self._columns[key]
+    def __contains__(self, key):
+        return key in self._columns
     def __iter__(self):
         return iter(self._columns)
@@ -442,7 +444,7 @@ class CIFCategory(_Component, MutableMapping):
         line_i = 0
         while line_i < len(lines):
             line = lines[line_i]
-            parts = _split_one_line(line)
+            parts = list(_split_one_line(line))
             if len(parts) == 2:
                 # Standard case -> name and value in one line
                 name_part, value_part = parts
@@ -450,7 +452,7 @@ class CIFCategory(_Component, MutableMapping):
             elif len(parts) == 1:
                 # Value is a multiline value on the next line
                 name_part = parts[0]
-                parts = _split_one_line(lines[line_i + 1])
+                parts = list(_split_one_line(lines[line_i + 1]))
                 if len(parts) == 1:
                     value_part = parts[0]
                 else:
@@ -464,7 +466,7 @@ class CIFCategory(_Component, MutableMapping):
         return category_dict
     @staticmethod
-    def _deserialize_looped(lines, expect_whitespace):
+    def _deserialize_looped(lines):
         """
         Process a category where each field has multiple values
         (category is a table).
@@ -487,20 +489,7 @@ class CIFCategory(_Component, MutableMapping):
         # row-line-alignment at all and simply cycle through columns
         column_indices = itertools.cycle(range(len(column_names)))
         for data_line in data_lines:
-            # If whitespace is expected in quote protected values,
-            # use regex-based _split_one_line() to split
-            # Otherwise use much more faster whitespace split
-            # and quote removal if applicable.
-            if expect_whitespace:
-                values = _split_one_line(data_line)
-            else:
-                values = data_line.split()
-                for k in range(len(values)):
-                    # Remove quotes
-                    if (values[k][0] == '"' and values[k][-1] == '"') or (
-                        values[k][0] == "'" and values[k][-1] == "'"
-                    ):
-                        values[k] = values[k][1:-1]
+            values = _split_one_line(data_line)
             for val in values:
                 column_index = next(column_indices)
                 column_name = column_names[column_index]
@@ -569,6 +558,17 @@ class CIFBlock(_Component, MutableMapping):
         The keys are the category names and the values are the
         :class:`CIFCategory` objects.
         By default, an empty block is created.
+    name : str, optional
+        The name of the block.
+        This is only used for serialization and is automatically set,
+        when the :class:`CIFBlock` is added to a :class:`CIFFile`.
+        It only needs to be set manually, when the block is directly
+        serialized.
+    Attributes
+    ----------
+    name : str
+        The name of the block.
     Notes
     -----
@@ -580,13 +580,15 @@ class CIFBlock(_Component, MutableMapping):
     --------
     >>> # Add category on creation
-    >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})})
+    >>> block = CIFBlock({"foo": CIFCategory({"some_column": 1})}, name="baz")
     >>> # Add category later on
     >>> block["bar"] = CIFCategory({"another_column": [2, 3]})
     >>> # Access a column
     >>> print(block["bar"]["another_column"].as_array())
     ['2' '3']
     >>> print(block.serialize())
+    data_baz
+    #
     _foo.some_column   1
     #
     loop_
@@ -596,11 +598,20 @@ class CIFBlock(_Component, MutableMapping):
     #
     """
-    def __init__(self, categories=None):
+    def __init__(self, categories=None, name=None):
+        self._name = name
         if categories is None:
             categories = {}
         self._categories = categories
+    @property
+    def name(self):
+        return self._name
+    @name.setter
+    def name(self, name):
+        self._name = name
     @staticmethod
     def subcomponent_class():
         return CIFCategory
@@ -634,7 +645,10 @@ class CIFBlock(_Component, MutableMapping):
         return CIFBlock(_create_element_dict(lines, category_names, category_starts))
     def serialize(self):
-        text_blocks = []
+        if self._name is None:
+            raise SerializationError("Block name is required")
+        # The block starts with the black name line followed by a comment line
+        text_blocks = ["data_" + self._name + "\n#\n"]
         for category_name, category in self._categories.items():
             if isinstance(category, str):
                 # Category is already stored as lines
@@ -657,15 +671,7 @@ class CIFBlock(_Component, MutableMapping):
             # Element is stored in serialized form
             # -> must be deserialized first
             try:
-                # Special optimization for "atom_site":
-                # Even if the values are quote protected,
-                # no whitespace is expected in escaped values
-                # Therefore slow regex-based _split_one_line() call is not necessary
-                if key == "atom_site":
-                    expect_whitespace = False
-                else:
-                    expect_whitespace = True
-                category = CIFCategory.deserialize(category, expect_whitespace)
+                category = CIFCategory.deserialize(category)
             except Exception:
                 raise DeserializationError(f"Failed to deserialize category '{key}'")
             # Update with deserialized object
@@ -683,6 +689,9 @@ class CIFBlock(_Component, MutableMapping):
     def __delitem__(self, key):
         del self._categories[key]
+    def __contains__(self, key):
+        return key in self._categories
     def __iter__(self):
         return iter(self._categories)
@@ -806,14 +815,12 @@ class CIFFile(_Component, File, MutableMapping):
     def serialize(self):
         text_blocks = []
         for block_name, block in self._blocks.items():
-            text_blocks.append("data_" + block_name + "\n")
-            # A comment line is set after the block indicator
-            text_blocks.append("#\n")
             if isinstance(block, str):
                 # Block is already stored as text
                 text_blocks.append(block)
             else:
                 try:
+                    block.name = block_name
                     text_blocks.append(block.serialize())
                 except Exception:
                     raise SerializationError(
@@ -884,11 +891,15 @@ class CIFFile(_Component, File, MutableMapping):
     def __setitem__(self, key, block):
         if not isinstance(block, CIFBlock):
             raise TypeError(f"Expected 'CIFBlock', but got '{type(block).__name__}'")
+        block.name = key
         self._blocks[key] = block
     def __delitem__(self, key):
         del self._blocks[key]
+    def __contains__(self, key):
+        return key in self._blocks
     def __iter__(self):
         return iter(self._blocks)
@@ -921,7 +932,7 @@ def _create_element_dict(lines, element_names, element_starts):
     # Lazy deserialization
     # -> keep as text for now and deserialize later if needed
     return {
-        element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]])
+        element_name: "\n".join(lines[element_starts[i] : element_starts[i + 1]]) + "\n"
         for i, element_name in enumerate(element_names)
     }
@@ -1029,29 +1040,31 @@ def _split_one_line(line):
     """
     # Special case of multiline value, where the line starts with ';'
     if line[0] == ";":
-        return [line[1:]]
-    # Define the patterns for different types of fields
-    single_quote_pattern = r"('(?:'(?! )|[^'])*')(?:\s|$)"
-    double_quote_pattern = r'("(?:"(?! )|[^"])*")(?:\s|$)'
-    unquoted_pattern = r"([^\s]+)"
-    # Combine the patterns using alternation
-    combined_pattern = (
-        f"{single_quote_pattern}|{double_quote_pattern}|{unquoted_pattern}"
-    )
-    # Find all matches
-    matches = re.findall(combined_pattern, line)
-    # Extract non-empty groups from the matches
-    fields = []
-    for match in matches:
-        field = next(group for group in match if group)
-        if field[0] == field[-1] == "'" or field[0] == field[-1] == '"':
-            field = field[1:-1]
-        fields.append(field)
-    return fields
+        yield line[1:]
+    elif "'" in line or '"' in line:
+        # Quoted values in the line
+        while line:
+            # Strip leading whitespace(s)
+            stripped_line = line.lstrip()
+            # Split the line on whitespace
+            word, _, line = stripped_line.partition(" ")
+            # Handle the case where the word start with a quote
+            if word.startswith(("'", '"')):
+                # Set the separator to the quote found
+                separator = word[0]
+                # Handle the case of a quoted word without space
+                if word.endswith(separator) and len(word) > 1:
+                    # Yield the word without the opening and closing quotes
+                    yield word[1:-1]
+                    continue
+                # split the word on the separator
+                word, _, line = stripped_line[1:].partition(separator)
+            yield word
+    else:
+        # No quoted values in the line -> simple whitespace split
+        for line in line.split():
+            yield line
 def _arrayfy(data):

biotite/structure/io/pdbx/component.py CHANGED Viewed

@@ -171,10 +171,10 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
         Parameters
         ----------
         store_key_in: str, optional
-        If given, the key of each element is stored as value in the
-        serialized element.
-        This is basically the reverse operation of `take_key_from` in
-        :meth:`_deserialize_elements()`.
+            If given, the key of each element is stored as value in the
+            serialized element.
+            This is basically the reverse operation of `take_key_from` in
+            :meth:`_deserialize_elements()`.
         """
         serialized_elements = []
         for key, element in self._elements.items():
@@ -223,6 +223,11 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
     def __delitem__(self, key):
         del self._elements[key]
+    # Implement `__contains__()` explicitly,
+    # because the mixin method unnecessarily deserializes the value, if available
+    def __contains__(self, key):
+        return key in self._elements
     def __iter__(self):
         return iter(self._elements)

biotite/structure/io/pdbx/compress.py ADDED Viewed

@@ -0,0 +1,321 @@
+__all__ = ["compress"]
+__name__ = "biotite.structure.io.pdbx"
+__author__ = "Patrick Kunzmann"
+import itertools
+import msgpack
+import numpy as np
+import biotite.structure.io.pdbx.bcif as bcif
+from biotite.structure.io.pdbx.bcif import _encode_numpy as encode_numpy
+from biotite.structure.io.pdbx.encoding import (
+    ByteArrayEncoding,
+    DeltaEncoding,
+    FixedPointEncoding,
+    IntegerPackingEncoding,
+    RunLengthEncoding,
+    StringArrayEncoding,
+)
+def compress(data, float_tolerance=1e-6):
+    """
+    Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
+    different data encodings for each data array and selecting the one, which results in
+    the smallest size.
+    Parameters
+    ----------
+    data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
+        The data to compress.
+    Returns
+    -------
+    compressed_file : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
+        The compressed data with the same type as the input data.
+        If no improved compression is found for a :class:`BinaryCIFData` array,
+        the input data is kept.
+        Hence, the return value is no deep copy of the input data.
+    float_tolerance : float, optional
+        The relative error that is accepted when compressing floating point numbers.
+    Examples
+    --------
+    >>> from io import BytesIO
+    >>> pdbx_file = BinaryCIFFile()
+    >>> set_structure(pdbx_file, atom_array_stack)
+    >>> # Write uncompressed file
+    >>> uncompressed_file = BytesIO()
+    >>> pdbx_file.write(uncompressed_file)
+    >>> _ = uncompressed_file.seek(0)
+    >>> print(f"{len(uncompressed_file.read()) // 1000} KB")
+    927 KB
+    >>> # Write compressed file
+    >>> pdbx_file = compress(pdbx_file)
+    >>> compressed_file = BytesIO()
+    >>> pdbx_file.write(compressed_file)
+    >>> _ = compressed_file.seek(0)
+    >>> print(f"{len(compressed_file.read()) // 1000} KB")
+    111 KB
+    """
+    match type(data):
+        case bcif.BinaryCIFFile:
+            return _compress_file(data, float_tolerance)
+        case bcif.BinaryCIFBlock:
+            return _compress_block(data, float_tolerance)
+        case bcif.BinaryCIFCategory:
+            return _compress_category(data, float_tolerance)
+        case bcif.BinaryCIFColumn:
+            return _compress_column(data, float_tolerance)
+        case bcif.BinaryCIFData:
+            return _compress_data(data, float_tolerance)
+        case _:
+            raise TypeError(f"Unsupported type {type(data).__name__}")
+def _compress_file(bcif_file, float_tolerance):
+    compressed_file = bcif.BinaryCIFFile()
+    for block_name, bcif_block in bcif_file.items():
+        compressed_block = _compress_block(bcif_block, float_tolerance)
+        compressed_file[block_name] = compressed_block
+    return compressed_file
+def _compress_block(bcif_block, float_tolerance):
+    compressed_block = bcif.BinaryCIFBlock()
+    for category_name, bcif_category in bcif_block.items():
+        compressed_category = _compress_category(bcif_category, float_tolerance)
+        compressed_block[category_name] = compressed_category
+    return compressed_block
+def _compress_category(bcif_category, float_tolerance):
+    compressed_category = bcif.BinaryCIFCategory()
+    for column_name, bcif_column in bcif_category.items():
+        compressed_column = _compress_column(bcif_column, float_tolerance)
+        compressed_category[column_name] = compressed_column
+    return compressed_category
+def _compress_column(bcif_column, float_tolerance):
+    data = _compress_data(bcif_column.data, float_tolerance)
+    if bcif_column.mask is not None:
+        mask = _compress_data(bcif_column.mask, float_tolerance)
+    else:
+        mask = None
+    return bcif.BinaryCIFColumn(data, mask)
+def _compress_data(bcif_data, float_tolerance):
+    array = bcif_data.array
+    if len(array) == 1:
+        # No need to compress a single value -> Use default uncompressed encoding
+        return bcif.BinaryCIFData(array)
+    if np.issubdtype(array.dtype, np.str_):
+        # Leave encoding empty for now, as it is explicitly set later
+        encoding = StringArrayEncoding(data_encoding=[], offset_encoding=[])
+        # Run encode to initialize the data and offset arrays
+        indices = encoding.encode(array)
+        offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
+        encoding.data_encoding, _ = _find_best_integer_compression(indices)
+        encoding.offset_encoding, _ = _find_best_integer_compression(offsets)
+        return bcif.BinaryCIFData(array, [encoding])
+    elif np.issubdtype(array.dtype, np.floating):
+        to_integer_encoding = FixedPointEncoding(
+            10 ** _get_decimal_places(array, float_tolerance)
+        )
+        integer_array = to_integer_encoding.encode(array)
+        best_encoding, size_compressed = _find_best_integer_compression(integer_array)
+        if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
+            return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
+        else:
+            # The float array is smaller -> encode it directly as bytes
+            return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
+    elif np.issubdtype(array.dtype, np.integer):
+        array = _to_smallest_integer_type(array)
+        encodings, _ = _find_best_integer_compression(array)
+        return bcif.BinaryCIFData(array, encodings)
+    else:
+        raise TypeError(f"Unsupported data type {array.dtype}")
+def _find_best_integer_compression(array):
+    """
+    Try different data encodings on an integer array and return the one that results in
+    the smallest size.
+    """
+    best_encoding_sequence = None
+    smallest_size = np.inf
+    for use_delta in [False, True]:
+        if use_delta:
+            encoding = DeltaEncoding()
+            array_after_delta = encoding.encode(array)
+            encodings_after_delta = [encoding]
+        else:
+            encodings_after_delta = []
+            array_after_delta = array
+        for use_run_length in [False, True]:
+            # Use encoded data from previous step to save time
+            if use_run_length:
+                encoding = RunLengthEncoding()
+                array_after_rle = encoding.encode(array_after_delta)
+                encodings_after_rle = encodings_after_delta + [encoding]
+            else:
+                encodings_after_rle = encodings_after_delta
+                array_after_rle = array_after_delta
+            for packed_byte_count in [None, 1, 2]:
+                if packed_byte_count is not None:
+                    # Quickly check this heuristic
+                    # to avoid computing an exploding packed data array
+                    if (
+                        _estimate_packed_length(array_after_rle, packed_byte_count)
+                        >= array_after_rle.nbytes
+                    ):
+                        # Packing would not reduce the size
+                        continue
+                    encoding = IntegerPackingEncoding(packed_byte_count)
+                    array_after_packing = encoding.encode(array_after_rle)
+                    encodings_after_packing = encodings_after_rle + [encoding]
+                else:
+                    encodings_after_packing = encodings_after_rle
+                    array_after_packing = array_after_rle
+                encoding = ByteArrayEncoding()
+                encoded_array = encoding.encode(array_after_packing)
+                encodings = encodings_after_packing + [encoding]
+                # Pack data directly instead of using the BinaryCIFData class
+                # to avoid the unnecessary re-encoding of the array,
+                # as it is already available in 'encoded_array'
+                serialized_encoding = [enc.serialize() for enc in encodings]
+                serialized_data = {
+                    "data": encoded_array,
+                    "encoding": serialized_encoding,
+                }
+                size = _data_size_in_file(serialized_data)
+                if size < smallest_size:
+                    best_encoding_sequence = encodings
+                    smallest_size = size
+    return best_encoding_sequence, smallest_size
+def _estimate_packed_length(array, packed_byte_count):
+    """
+    Estimate the length of an integer array after packing it with a given number of
+    bytes.
+    Parameters
+    ----------
+    array : numpy.ndarray
+        The array to pack.
+    packed_byte_count : int
+        The number of bytes used for packing.
+    Returns
+    -------
+    length : int
+        The estimated length of the packed array.
+    """
+    # Use int64 to avoid integer overflow in the following line
+    max_val_per_element = np.int64(2 ** (8 * packed_byte_count))
+    n_bytes_per_element = packed_byte_count * (np.abs(array // max_val_per_element) + 1)
+    return np.sum(n_bytes_per_element, dtype=np.int64)
+def _to_smallest_integer_type(array):
+    """
+    Convert an integer array to the smallest possible integer type, that is still able
+    to represent all values in the array.
+    Parameters
+    ----------
+    array : numpy.ndarray
+        The array to convert.
+    Returns
+    -------
+    array : numpy.ndarray
+        The converted array.
+    """
+    if array.min() >= 0:
+        for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
+            if np.all(array <= np.iinfo(dtype).max):
+                return array.astype(dtype)
+    for dtype in [np.int8, np.int16, np.int32, np.int64]:
+        if np.all(array >= np.iinfo(dtype).min) and np.all(
+            array <= np.iinfo(dtype).max
+        ):
+            return array.astype(dtype)
+    raise ValueError("Array is out of bounds for all integer types")
+def _data_size_in_file(data):
+    """
+    Get the size of the data, it would have when written into a *BinaryCIF* file.
+    Parameters
+    ----------
+    data : BinaryCIFData or dict
+        The data array whose size is measured.
+        Can be either a :class:`BinaryCIFData` object or already serialized data.
+    Returns
+    -------
+    size : int
+        The size of the data array in the file in bytes.
+    """
+    if isinstance(data, bcif.BinaryCIFData):
+        data = data.serialize()
+    bytes_in_file = msgpack.packb(data, use_bin_type=True, default=encode_numpy)
+    return len(bytes_in_file)
+def _get_decimal_places(array, tol):
+    """
+    Get the number of decimal places in a floating point array.
+    Parameters
+    ----------
+    array : numpy.ndarray
+        The array to analyze.
+    tol : float, optional
+        The relative tolerance allowed when the values are cut off after the returned
+        number of decimal places.
+    Returns
+    -------
+    decimals : int
+        The number of decimal places.
+    """
+    # Decimals of NaN or infinite values do not make sense
+    # and 0 would give NaN when rounding on decimals
+    array = array[np.isfinite(array) & (array != 0)]
+    for decimals in itertools.count(start=-_order_magnitude(array)):
+        error = np.abs(np.round(array, decimals) - array)
+        if np.all(error < tol * np.abs(array)):
+            return decimals
+def _order_magnitude(array):
+    """
+    Get the order of magnitude of floating point values.
+    Parameters
+    ----------
+    array : ndarray, dtype=float
+        The value to analyze.
+    Returns
+    -------
+    magnitude : int
+        The order of magnitude, i.e. the maximum exponent a number in the array would
+        have in scientific notation, if only one digit is left of the decimal point.
+    """
+    array = array[array != 0]
+    if len(array) == 0:
+        # No non-zero values -> define order of magnitude as 0
+        return 0
+    return int(np.max(np.floor(np.log10(np.abs(array)))).item())