PyPI - biotite - Versions diffs - 1.2.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl - Mend

biotite 1.2.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (56) hide show

biotite/application/viennarna/rnaplot.py +7 -7
biotite/interface/openmm/__init__.py +4 -0
biotite/interface/pymol/__init__.py +3 -0
biotite/interface/rdkit/__init__.py +4 -0
biotite/interface/version.py +23 -0
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +1 -1
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -2
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +2 -4
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/structure/basepairs.py +13 -14
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/box.py +140 -2
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +0 -1
biotite/structure/chains.py +15 -21
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/dotbracket.py +4 -4
biotite/structure/graphics/rna.py +19 -16
biotite/structure/hbond.py +1 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/io/pdb/convert.py +84 -2
biotite/structure/io/pdb/file.py +79 -2
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/compress.py +69 -32
biotite/structure/io/pdbx/convert.py +207 -44
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +39 -23
biotite/structure/pseudoknots.py +6 -6
biotite/structure/residues.py +10 -27
biotite/structure/rings.py +1 -1
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +28 -29
biotite/structure/segments.py +55 -0
biotite/structure/spacegroups.json +1567 -0
biotite/structure/spacegroups.license +26 -0
biotite/structure/superimpose.py +1 -191
biotite/structure/transform.py +220 -1
biotite/version.py +2 -2
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/METADATA +4 -34
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/RECORD +56 -54
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdb/convert.py CHANGED Viewed

@@ -15,9 +15,11 @@ __all__ = [
     "set_structure",
     "list_assemblies",
     "get_assembly",
-    "get_symmetry_mates",
+    "get_unit_cell",
 ]
+import warnings
 def get_model_count(pdb_file):
     """
@@ -232,6 +234,80 @@ def get_assembly(
     )
+def get_unit_cell(
+    pdb_file, model=None, altloc="first", extra_fields=[], include_bonds=False
+):
+    """
+    Build a structure model containing all symmetric copies
+    of the structure within a single unit cell, given by the space
+    group.
+    This function receives the data from ``REMARK 290`` records in
+    the file.
+    Consequently, this remark must be present in the file, which is
+    usually only true for crystal structures.
+    Parameters
+    ----------
+    pdb_file : PDBFile
+        The file object.
+    model : int, optional
+        If this parameter is given, the function will return an
+        :class:`AtomArray` from the atoms corresponding to the given
+        model number (starting at 1).
+        Negative values are used to index models starting from the
+        last model instead of the first model.
+        If this parameter is omitted, an :class:`AtomArrayStack`
+        containing all models will be returned, even if the
+        structure contains only one model.
+    altloc : {'first', 'occupancy', 'all'}
+        This parameter defines how *altloc* IDs are handled:
+            - ``'first'`` - Use atoms that have the first
+                *altloc* ID appearing in a residue.
+            - ``'occupancy'`` - Use atoms that have the *altloc* ID
+                with the highest occupancy for a residue.
+            - ``'all'`` - Use all atoms.
+                Note that this leads to duplicate atoms.
+                When this option is chosen, the ``altloc_id``
+                annotation array is added to the returned structure.
+    extra_fields : list of str, optional
+        The strings in the list are optional annotation categories
+        that should be stored in the output array or stack.
+        These are valid values:
+        ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
+        ``'charge'``.
+    include_bonds : bool, optional
+        If set to true, a :class:`BondList` will be created for the
+        resulting :class:`AtomArray` containing the bond information
+        from the file.
+        Bonds, whose order could not be determined from the
+        *Chemical Component Dictionary*
+        (e.g. especially inter-residue bonds),
+        have :attr:`BondType.ANY`, since the PDB format itself does
+        not support bond orders.
+    Returns
+    -------
+    symmetry_mates : AtomArray or AtomArrayStack
+        All atoms within a single unit cell.
+        The return type depends on the `model` parameter.
+    Notes
+    -----
+    To expand the structure beyond a single unit cell, use
+    :func:`repeat_box()` with the return value as its
+    input.
+    Examples
+    --------
+    >>> import os.path
+    >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
+    >>> atoms_in_unit_cell = get_unit_cell(file, model=1)
+    """
+    return pdb_file.get_unit_cell(model, altloc, extra_fields, include_bonds)
 def get_symmetry_mates(
     pdb_file, model=None, altloc="first", extra_fields=[], include_bonds=False
 ):
@@ -245,6 +321,8 @@ def get_symmetry_mates(
     Consequently, this remark must be present in the file, which is
     usually only true for crystal structures.
+    DEPRECATED: Use :func:`get_unit_cell()` instead.
     Parameters
     ----------
     pdb_file : PDBFile
@@ -303,4 +381,8 @@ def get_symmetry_mates(
     >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
     >>> atoms_in_unit_cell = get_symmetry_mates(file, model=1)
     """
-    return pdb_file.get_symmetry_mates(model, altloc, extra_fields, include_bonds)
+    warnings.warn(
+        "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
+        DeprecationWarning,
+    )
+    return pdb_file.get_unit_cell(model, altloc, extra_fields, include_bonds)

biotite/structure/io/pdb/file.py CHANGED Viewed

@@ -954,7 +954,7 @@ class PDBFile(TextFile):
         return assembly
-    def get_symmetry_mates(
+    def get_unit_cell(
         self, model=None, altloc="first", extra_fields=[], include_bonds=False
     ):
         """
@@ -1021,7 +1021,7 @@ class PDBFile(TextFile):
         >>> import os.path
         >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
-        >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
+        >>> atoms_in_unit_cell = file.get_unit_cell(model=1)
         """
         # Get base structure
         structure = self.get_structure(
@@ -1041,6 +1041,83 @@ class PDBFile(TextFile):
         rotations, translations = _parse_transformations(transform_lines)
         return _apply_transformations(structure, rotations, translations)
+    def get_symmetry_mates(
+        self, model=None, altloc="first", extra_fields=[], include_bonds=False
+    ):
+        """
+        Build a structure model containing all symmetric copies
+        of the structure within a single unit cell, given by the space
+        group.
+        This function receives the data from ``REMARK 290`` records in
+        the file.
+        Consequently, this remark must be present in the file, which is
+        usually only true for crystal structures.
+        DEPRECATED: Use :meth:`get_unit_cell()` instead.
+        Parameters
+        ----------
+        model : int, optional
+            If this parameter is given, the function will return an
+            :class:`AtomArray` from the atoms corresponding to the given
+            model number (starting at 1).
+            Negative values are used to index models starting from the
+            last model instead of the first model.
+            If this parameter is omitted, an :class:`AtomArrayStack`
+            containing all models will be returned, even if the
+            structure contains only one model.
+        altloc : {'first', 'occupancy', 'all'}
+            This parameter defines how *altloc* IDs are handled:
+                - ``'first'`` - Use atoms that have the first
+                  *altloc* ID appearing in a residue.
+                - ``'occupancy'`` - Use atoms that have the *altloc* ID
+                  with the highest occupancy for a residue.
+                - ``'all'`` - Use all atoms.
+                  Note that this leads to duplicate atoms.
+                  When this option is chosen, the ``altloc_id``
+                  annotation array is added to the returned structure.
+        extra_fields : list of str, optional
+            The strings in the list are optional annotation categories
+            that should be stored in the output array or stack.
+            These are valid values:
+            ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
+            ``'charge'``.
+        include_bonds : bool, optional
+            If set to true, a :class:`BondList` will be created for the
+            resulting :class:`AtomArray` containing the bond information
+            from the file.
+            Bonds, whose order could not be determined from the
+            *Chemical Component Dictionary*
+            (e.g. especially inter-residue bonds),
+            have :attr:`BondType.ANY`, since the PDB format itself does
+            not support bond orders.
+        Returns
+        -------
+        symmetry_mates : AtomArray or AtomArrayStack
+            All atoms within a single unit cell.
+            The return type depends on the `model` parameter.
+        Notes
+        -----
+        To expand the structure beyond a single unit cell, use
+        :func:`repeat_box()` with the return value as its
+        input.
+        Examples
+        --------
+        >>> import os.path
+        >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
+        >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
+        """
+        warnings.warn(
+            "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
+            DeprecationWarning,
+        )
+        return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
     def _index_models_and_atoms(self):
         # Line indices where a new model starts
         self._model_start_i = np.array(

biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/io/pdbx/compress.py CHANGED Viewed

@@ -3,6 +3,7 @@ __name__ = "biotite.structure.io.pdbx"
 __author__ = "Patrick Kunzmann"
 import itertools
+import warnings
 import msgpack
 import numpy as np
 import biotite.structure.io.pdbx.bcif as bcif
@@ -17,7 +18,7 @@ from biotite.structure.io.pdbx.encoding import (
 )
-def compress(data, float_tolerance=1e-6):
+def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
     """
     Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
     different data encodings for each data array and selecting the one, which results in
@@ -29,6 +30,12 @@ def compress(data, float_tolerance=1e-6):
         The data to compress.
     float_tolerance : float, optional
         The relative error that is accepted when compressing floating point numbers.
+        DEPRECATED: Use `rtol` instead.
+    rtol, atol : float, optional
+        The compression factor of floating point numbers is chosen such that
+        either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
+        for each value, i.e. the difference between the compressed and uncompressed
+        value is smaller than the tolerance.
     Returns
     -------
@@ -58,55 +65,70 @@ def compress(data, float_tolerance=1e-6):
     >>> print(f"{len(compressed_file.read()) // 1000} KB")
     111 KB
     """
+    if float_tolerance is not None:
+        warnings.warn(
+            "The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
+            DeprecationWarning,
+        )
     match type(data):
         case bcif.BinaryCIFFile:
-            return _compress_file(data, float_tolerance)
+            return _compress_file(data, rtol, atol)
         case bcif.BinaryCIFBlock:
-            return _compress_block(data, float_tolerance)
+            return _compress_block(data, rtol, atol)
         case bcif.BinaryCIFCategory:
-            return _compress_category(data, float_tolerance)
+            return _compress_category(data, rtol, atol)
         case bcif.BinaryCIFColumn:
-            return _compress_column(data, float_tolerance)
+            return _compress_column(data, rtol, atol)
         case bcif.BinaryCIFData:
-            return _compress_data(data, float_tolerance)
+            return _compress_data(data, rtol, atol)
         case _:
             raise TypeError(f"Unsupported type {type(data).__name__}")
-def _compress_file(bcif_file, float_tolerance):
+def _compress_file(bcif_file, rtol, atol):
     compressed_file = bcif.BinaryCIFFile()
     for block_name, bcif_block in bcif_file.items():
-        compressed_block = _compress_block(bcif_block, float_tolerance)
+        try:
+            compressed_block = _compress_block(bcif_block, rtol, atol)
+        except Exception:
+            raise ValueError(f"Failed to compress block '{block_name}'")
         compressed_file[block_name] = compressed_block
     return compressed_file
-def _compress_block(bcif_block, float_tolerance):
+def _compress_block(bcif_block, rtol, atol):
     compressed_block = bcif.BinaryCIFBlock()
     for category_name, bcif_category in bcif_block.items():
-        compressed_category = _compress_category(bcif_category, float_tolerance)
+        try:
+            compressed_category = _compress_category(bcif_category, rtol, atol)
+        except Exception:
+            raise ValueError(f"Failed to compress category '{category_name}'")
         compressed_block[category_name] = compressed_category
     return compressed_block
-def _compress_category(bcif_category, float_tolerance):
+def _compress_category(bcif_category, rtol, atol):
     compressed_category = bcif.BinaryCIFCategory()
     for column_name, bcif_column in bcif_category.items():
-        compressed_column = _compress_column(bcif_column, float_tolerance)
+        try:
+            compressed_column = _compress_column(bcif_column, rtol, atol)
+        except Exception:
+            raise ValueError(f"Failed to compress column '{column_name}'")
         compressed_category[column_name] = compressed_column
     return compressed_category
-def _compress_column(bcif_column, float_tolerance):
-    data = _compress_data(bcif_column.data, float_tolerance)
+def _compress_column(bcif_column, rtol, atol):
+    data = _compress_data(bcif_column.data, rtol, atol)
     if bcif_column.mask is not None:
-        mask = _compress_data(bcif_column.mask, float_tolerance)
+        mask = _compress_data(bcif_column.mask, rtol, atol)
     else:
         mask = None
     return bcif.BinaryCIFColumn(data, mask)
-def _compress_data(bcif_data, float_tolerance):
+def _compress_data(bcif_data, rtol, atol):
     array = bcif_data.array
     if len(array) == 1:
         # No need to compress a single value -> Use default uncompressed encoding
@@ -123,16 +145,28 @@ def _compress_data(bcif_data, float_tolerance):
         return bcif.BinaryCIFData(array, [encoding])
     elif np.issubdtype(array.dtype, np.floating):
+        if not np.isfinite(array).all():
+            # NaN/inf values cannot be represented by integers
+            # -> do not use integer encoding
+            return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
         to_integer_encoding = FixedPointEncoding(
-            10 ** _get_decimal_places(array, float_tolerance)
+            10 ** _get_decimal_places(array, rtol, atol)
         )
-        integer_array = to_integer_encoding.encode(array)
-        best_encoding, size_compressed = _find_best_integer_compression(integer_array)
-        if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
-            return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
-        else:
-            # The float array is smaller -> encode it directly as bytes
+        try:
+            integer_array = to_integer_encoding.encode(array)
+        except ValueError:
+            # With the given tolerances integer underflow/overflow would occur
+            # -> do not use integer encoding
             return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
+        else:
+            best_encoding, size_compressed = _find_best_integer_compression(
+                integer_array
+            )
+            if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
+                return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
+            else:
+                # The float array is smaller -> encode it directly as bytes
+                return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
     elif np.issubdtype(array.dtype, np.integer):
         array = _to_smallest_integer_type(array)
@@ -273,7 +307,7 @@ def _data_size_in_file(data):
     return len(bytes_in_file)
-def _get_decimal_places(array, tol):
+def _get_decimal_places(array, rtol, atol):
     """
     Get the number of decimal places in a floating point array.
@@ -281,21 +315,24 @@ def _get_decimal_places(array, tol):
     ----------
     array : numpy.ndarray
         The array to analyze.
-    tol : float, optional
-        The relative tolerance allowed when the values are cut off after the returned
-        number of decimal places.
+    rtol, atol : float, optional
+        The relative and absolute tolerance allowed when the values are cut off after
+        the returned number of decimal places.
     Returns
     -------
     decimals : int
         The number of decimal places.
     """
-    # Decimals of NaN or infinite values do not make sense
-    # and 0 would give NaN when rounding on decimals
-    array = array[np.isfinite(array) & (array != 0)]
-    for decimals in itertools.count(start=-_order_magnitude(array)):
+    if rtol <= 0 and atol <= 0:
+        raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
+    # 0 would give NaN when rounding on decimals
+    array = array[array != 0]
+    for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
         error = np.abs(np.round(array, decimals) - array)
-        if np.all(error < tol * np.abs(array)):
+        if decimals == 100:
+            raise
+        if np.all((error < rtol * np.abs(array)) | (error < atol)):
             return decimals