PyPI - biotite - Versions diffs - 1.2.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl - Mend

biotite 1.2.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (56) hide show

biotite/application/viennarna/rnaplot.py +7 -7
biotite/interface/openmm/__init__.py +4 -0
biotite/interface/pymol/__init__.py +3 -0
biotite/interface/rdkit/__init__.py +4 -0
biotite/interface/version.py +23 -0
biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
biotite/sequence/align/banded.pyx +1 -1
biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +1 -2
biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +2 -4
biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
biotite/sequence/codec.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
biotite/structure/basepairs.py +13 -14
biotite/structure/bonds.cpython-311-darwin.so +0 -0
biotite/structure/box.py +140 -2
biotite/structure/celllist.cpython-311-darwin.so +0 -0
biotite/structure/celllist.pyx +0 -1
biotite/structure/chains.py +15 -21
biotite/structure/charges.cpython-311-darwin.so +0 -0
biotite/structure/dotbracket.py +4 -4
biotite/structure/graphics/rna.py +19 -16
biotite/structure/hbond.py +1 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/io/pdb/convert.py +84 -2
biotite/structure/io/pdb/file.py +79 -2
biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/compress.py +69 -32
biotite/structure/io/pdbx/convert.py +207 -44
biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +39 -23
biotite/structure/pseudoknots.py +6 -6
biotite/structure/residues.py +10 -27
biotite/structure/rings.py +1 -1
biotite/structure/sasa.cpython-311-darwin.so +0 -0
biotite/structure/sasa.pyx +28 -29
biotite/structure/segments.py +55 -0
biotite/structure/spacegroups.json +1567 -0
biotite/structure/spacegroups.license +26 -0
biotite/structure/superimpose.py +1 -191
biotite/structure/transform.py +220 -1
biotite/version.py +2 -2
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/METADATA +4 -34
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/RECORD +56 -54
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +3 -1
{biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/convert.py CHANGED Viewed

@@ -13,17 +13,30 @@ __all__ = [
     "set_component",
     "list_assemblies",
     "get_assembly",
+    "get_unit_cell",
     "get_sse",
 ]
 import itertools
 import warnings
+from collections import defaultdict
 import numpy as np
 from biotite.file import InvalidFileError
 from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
-from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
+from biotite.structure.atoms import (
+    AtomArray,
+    AtomArrayStack,
+    concatenate,
+    repeat,
+)
 from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
-from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
+from biotite.structure.box import (
+    coord_to_fraction,
+    fraction_to_coord,
+    space_group_transforms,
+    unitcell_from_vectors,
+    vectors_from_unitcell,
+)
 from biotite.structure.error import BadStructureError
 from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
 from biotite.structure.filter import (
@@ -33,6 +46,7 @@ from biotite.structure.filter import (
     filter_first_altloc,
     filter_highest_occupancy_altloc,
 )
+from biotite.structure.geometry import centroid
 from biotite.structure.io.pdbx.bcif import (
     BinaryCIFBlock,
     BinaryCIFColumn,
@@ -46,7 +60,7 @@ from biotite.structure.residues import (
     get_residue_positions,
     get_residue_starts_for,
 )
-from biotite.structure.util import matrix_rotate
+from biotite.structure.transform import AffineTransformation
 # Bond types in `struct_conn` category that refer to covalent bonds
 PDBX_BOND_TYPE_ID_TO_TYPE = {
@@ -125,8 +139,7 @@ _other_type_list = [
 def _filter(category, index):
     """
-    Reduce the ``atom_site`` category to the values for the given
-    model.
+    Reduce the given category to the values selected by the given index,
     """
     Category = type(category)
     Column = Category.subcomponent_class()
@@ -391,7 +404,16 @@ def get_structure(
     # The below part is the same for both, AtomArray and AtomArrayStack
     _fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
+    atoms, altloc_filtered_atom_site = _filter_altloc(atoms, model_atom_site, altloc)
     if include_bonds:
+        if altloc == "all":
+            raise ValueError(
+                "Bond computation is not supported with `altloc='all', consider using "
+                "'connect_via_residue_names()' afterwards"
+            )
         if "chem_comp_bond" in block:
             try:
                 custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
@@ -407,10 +429,13 @@ def get_structure(
             bonds = connect_via_residue_names(atoms)
         if "struct_conn" in block:
             bonds = bonds.merge(
-                _parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
+                _parse_inter_residue_bonds(
+                    altloc_filtered_atom_site,
+                    block["struct_conn"],
+                    atom_count=atoms.array_length(),
+                )
             )
         atoms.bonds = bonds
-    atoms = _filter_altloc(atoms, model_atom_site, altloc)
     return atoms
@@ -570,11 +595,12 @@ def _parse_intra_residue_bonds(chem_comp_bond):
     return custom_bond_dict
-def _parse_inter_residue_bonds(atom_site, struct_conn):
+def _parse_inter_residue_bonds(atom_site, struct_conn, atom_count=None):
     """
     Create inter-residue bonds by parsing the ``struct_conn`` category.
     The atom indices of each bond are found by matching the bond labels
     to the ``atom_site`` category.
+    If atom_count is None, it will be inferred from the ``atom_site`` category.
     """
     # Identity symmetry operation
     IDENTITY = "1_555"
@@ -643,7 +669,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
     bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
     return BondList(
-        atom_site.row_count,
+        atom_count if atom_count is not None else atom_site.row_count,
         np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
     )
@@ -739,25 +765,28 @@ def _get_struct_conn_col_name(col_name, partner):
 def _filter_altloc(array, atom_site, altloc):
+    """
+    Filter the given :class:`AtomArray` and ``atom_site`` category to the rows
+    specified by the given *altloc* identifier.
+    """
     altloc_ids = atom_site.get("label_alt_id")
     occupancy = atom_site.get("occupancy")
-    # Filter altloc IDs and return
-    if altloc_ids is None:
-        return array
+    if altloc == "all":
+        array.set_annotation("altloc_id", altloc_ids.as_array(str))
+        return array, atom_site
+    elif altloc_ids is None or (altloc_ids.mask.array != MaskValue.PRESENT).all():
+        # No altlocs in atom_site category
+        return array, atom_site
     elif altloc == "occupancy" and occupancy is not None:
-        return array[
-            ...,
-            filter_highest_occupancy_altloc(
-                array, altloc_ids.as_array(str), occupancy.as_array(float)
-            ),
-        ]
+        mask = filter_highest_occupancy_altloc(
+            array, altloc_ids.as_array(str), occupancy.as_array(float)
+        )
+        return array[..., mask], _filter(atom_site, mask)
     # 'first' is also fallback if file has no occupancy information
     elif altloc == "first":
-        return array[..., filter_first_altloc(array, altloc_ids.as_array(str))]
-    elif altloc == "all":
-        array.set_annotation("altloc_id", altloc_ids.as_array(str))
-        return array
+        mask = filter_first_altloc(array, altloc_ids.as_array(str))
+        return array[..., mask], _filter(atom_site, mask)
     else:
         raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
@@ -1686,7 +1715,7 @@ def get_assembly(
     )
     ### Get transformations and apply them to the affected asym IDs
-    assembly = None
+    chain_ops = defaultdict(list)
     for id, op_expr, asym_id_expr in zip(
         assembly_gen_category["assembly_id"].as_array(str),
         assembly_gen_category["oper_expression"].as_array(str),
@@ -1695,19 +1724,22 @@ def get_assembly(
         # Find the operation expressions for given assembly ID
         # We already asserted that the ID is actually present
         if id == assembly_id:
-            operations = _parse_operation_expression(op_expr)
-            asym_ids = asym_id_expr.split(",")
-            # Filter affected asym IDs
-            sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
-            sub_assembly = _apply_transformations(
-                sub_structure, transformations, operations
-            )
-            # Merge the chains with asym IDs for this operation
-            # with chains from other operations
-            if assembly is None:
-                assembly = sub_assembly
-            else:
-                assembly += sub_assembly
+            for chain_id in asym_id_expr.split(","):
+                chain_ops[chain_id].extend(_parse_operation_expression(op_expr))
+    sub_assemblies = []
+    for asym_id, op_list in chain_ops.items():
+        sub_struct = structure[..., structure.label_asym_id == asym_id]
+        sub_assembly = _apply_transformations(sub_struct, transformations, op_list)
+        # Merge the chain's sub_assembly into the rest of the assembly
+        sub_assemblies.append(sub_assembly)
+    assembly = concatenate(sub_assemblies)
+    # Sort AtomArray or AtomArrayStack by 'sym_id'
+    max_sym_id = assembly.sym_id.max()
+    assembly = concatenate(
+        [assembly[..., assembly.sym_id == sym_id] for sym_id in range(max_sym_id + 1)]
+    )
     # Remove 'label_asym_id', if it was not included in the original
     # user-supplied 'extra_fields'
@@ -1730,11 +1762,7 @@ def _apply_transformations(structure, transformation_dict, operations):
         # Execute for each transformation step
         # in the operation expression
         for op_step in operation:
-            rotation_matrix, translation_vector = transformation_dict[op_step]
-            # Rotate
-            coord = matrix_rotate(coord, rotation_matrix)
-            # Translate
-            coord += translation_vector
+            coord = transformation_dict[op_step].apply(coord)
         assembly_coord[i] = coord
     assembly = repeat(structure, assembly_coord)
@@ -1746,8 +1774,7 @@ def _apply_transformations(structure, transformation_dict, operations):
 def _get_transformations(struct_oper):
     """
-    Get transformation operation in terms of rotation matrix and
-    translation for each operation ID in ``pdbx_struct_oper_list``.
+    Get affine transformation for each operation ID in ``pdbx_struct_oper_list``.
     """
     transformation_dict = {}
     for index, id in enumerate(struct_oper["id"].as_array(str)):
@@ -1763,7 +1790,9 @@ def _get_transformations(struct_oper):
         translation_vector = np.array(
             [struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
         )
-        transformation_dict[id] = (rotation_matrix, translation_vector)
+        transformation_dict[id] = AffineTransformation(
+            np.zeros(3), rotation_matrix, translation_vector
+        )
     return transformation_dict
@@ -1820,6 +1849,140 @@ def _convert_string_to_sequence(string, stype):
         raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
+def get_unit_cell(
+    pdbx_file,
+    center=True,
+    model=None,
+    data_block=None,
+    altloc="first",
+    extra_fields=None,
+    use_author_fields=True,
+    include_bonds=False,
+):
+    """
+    Build a structure model containing all symmetric copies of the structure within a
+    single unit cell.
+    This function receives the data from the ``symmetry`` and ``atom_site`` categories
+    in the file.
+    Consequently, these categories must be present in the file.
+    Parameters
+    ----------
+    pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
+        The file object.
+    center : bool, optional
+        If set to true, each symmetric copy will be moved inside the unit cell
+        dimensions, if its centroid is outside.
+        By default, the copies are are created using the raw space group
+        transformations, which may put them one unit cell length further away.
+    model : int, optional
+        If this parameter is given, the function will return an
+        :class:`AtomArray` from the atoms corresponding to the given
+        model number (starting at 1).
+        Negative values are used to index models starting from the last
+        model insted of the first model.
+        If this parameter is omitted, an :class:`AtomArrayStack`
+        containing all models will be returned, even if the structure
+        contains only one model.
+    data_block : str, optional
+        The name of the data block.
+        Default is the first (and most times only) data block of the
+        file.
+        If the data block object is passed directly to `pdbx_file`,
+        this parameter is ignored.
+    altloc : {'first', 'occupancy', 'all'}
+        This parameter defines how *altloc* IDs are handled:
+            - ``'first'`` - Use atoms that have the first *altloc* ID
+              appearing in a residue.
+            - ``'occupancy'`` - Use atoms that have the *altloc* ID
+              with the highest occupancy for a residue.
+            - ``'all'`` - Use all atoms.
+              Note that this leads to duplicate atoms.
+              When this option is chosen, the ``altloc_id`` annotation
+              array is added to the returned structure.
+    extra_fields : list of str, optional
+        The strings in the list are entry names, that are
+        additionally added as annotation arrays.
+        The annotation category name will be the same as the PDBx
+        subcategory name.
+        The array type is always `str`.
+        An exception are the special field identifiers:
+        ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
+        These will convert the fitting subcategory into an
+        annotation array with reasonable type.
+    use_author_fields : bool, optional
+        Some fields can be read from two alternative sources,
+        for example both, ``label_seq_id`` and ``auth_seq_id`` describe
+        the ID of the residue.
+        While, the ``label_xxx`` fields can be used as official pointers
+        to other categories in the file, the ``auth_xxx``
+        fields are set by the author(s) of the structure and are
+        consistent with the corresponding values in PDB files.
+        If `use_author_fields` is true, the annotation arrays will be
+        read from the ``auth_xxx`` fields (if applicable),
+        otherwise from the the ``label_xxx`` fields.
+    include_bonds : bool, optional
+        If set to true, a :class:`BondList` will be created for the
+        resulting :class:`AtomArray` containing the bond information
+        from the file.
+        Bonds, whose order could not be determined from the
+        *Chemical Component Dictionary*
+        (e.g. especially inter-residue bonds),
+        have :attr:`BondType.ANY`, since the PDB format itself does
+        not support bond orders.
+    Returns
+    -------
+    unit_cell : AtomArray or AtomArrayStack
+        The structure representing the unit cell.
+        The return type depends on the `model` parameter.
+        Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
+        unit in the unit cell.
+    Examples
+    --------
+    >>> import os.path
+    >>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
+    >>> unit_cell = get_unit_cell(file, model=1)
+    """
+    block = _get_block(pdbx_file, data_block)
+    try:
+        space_group = block["symmetry"]["space_group_name_H-M"].as_item()
+    except KeyError:
+        raise InvalidFileError("File has no 'symmetry.space_group_name_H-M' field")
+    transforms = space_group_transforms(space_group)
+    asym = get_structure(
+        pdbx_file,
+        model,
+        data_block,
+        altloc,
+        extra_fields,
+        use_author_fields,
+        include_bonds,
+    )
+    fractional_asym_coord = coord_to_fraction(asym.coord, asym.box)
+    unit_cell_copies = []
+    for transform in transforms:
+        fractional_coord = transform.apply(fractional_asym_coord)
+        if center:
+            # If the centroid is outside the box, move the copy inside the box
+            orig_centroid = centroid(fractional_coord)
+            new_centroid = orig_centroid % 1
+            fractional_coord += (new_centroid - orig_centroid)[..., np.newaxis, :]
+        unit_cell_copies.append(fraction_to_coord(fractional_coord, asym.box))
+    unit_cell = repeat(asym, np.stack(unit_cell_copies, axis=0))
+    unit_cell.set_annotation(
+        "sym_id", np.repeat(np.arange(len(transforms)), asym.array_length())
+    )
+    return unit_cell
 def get_sse(pdbx_file, data_block=None, match_model=None):
     """
     Get the secondary structure from a PDBx file.

biotite/structure/io/pdbx/encoding.cpython-311-darwin.so CHANGED Viewed

Binary file

biotite/structure/io/pdbx/encoding.pyx CHANGED Viewed

@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
         # since the file content may be invalid/malicious.
         raise NotImplementedError()
+    def __str__(self):
+        # Restore original behavior, as `__str__()` implementation of `_Component`
+        # may require serialization, which is not possible for some encodings prior
+        # to the first encoding pass
+        return object.__str__(self)
 @dataclass
 class ByteArrayEncoding(Encoding):
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
                 )
         # Round to avoid wrong values due to floating point inaccuracies
-        return np.round(data * self.factor).astype(np.int32)
+        scaled_data = np.round(data * self.factor)
+        return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
     def decode(self, data):
         return (data / self.factor).astype(
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
             self.min, self.max, self.num_steps, dtype=data.dtype
         )
         indices = np.searchsorted(steps, data, side="left")
-        return indices.astype(np.int32, copy=False)
+        return _safe_cast(indices, np.int32)
     def decode(self, data):
         output = data * (self.max - self.min) / (self.num_steps - 1)
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
         if self.origin is None:
             self.origin = data[0]
+        # Differences (including `np.diff`) return an array with the same dtype as the
+        # input array
+        # As the input dtype may be unsigned, the output dtype could underflow,
+        # if the difference is negative
+        # -> cast to int64 to avoid this
+        data = data.astype(np.int64, copy=False)
         data = data - self.origin
-        return np.diff(data, prepend=0).astype(np.int32, copy=False)
+        return _safe_cast(np.diff(data, prepend=0), np.int32)
     def decode(self, data):
         output = np.cumsum(data, dtype=self.src_type.to_dtype())
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
             # Only positive values -> use unsigned integers
             self.is_unsigned = data.min().item() >= 0
-        data = data.astype(np.int32, copy=False)
+        data = _safe_cast(data, np.int32)
         return self._encode(
             data, np.empty(0, dtype=self._determine_packed_dtype())
         )
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
         else:
             check_present = True
-        string_order = np.argsort(self.strings).astype(np.int32)
+        string_order = _safe_cast(np.argsort(self.strings), np.int32)
         sorted_strings = self.strings[string_order]
         sorted_indices = np.searchsorted(sorted_strings, data)
         indices = string_order[sorted_indices]
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
     return attribute_name[0].lower() + attribute_name[1:]
-def _safe_cast(array, dtype):
-    dtype = np.dtype(dtype)
-    if dtype == array.dtype:
+def _safe_cast(array, dtype, allow_decimal_loss=False):
+    source_dtype = array.dtype
+    target_dtype = np.dtype(dtype)
+    if target_dtype == source_dtype:
         return array
-    if np.issubdtype(dtype, np.integer):
-        if not np.issubdtype(array.dtype, np.integer):
-            raise ValueError("Cannot cast floating point to integer")
-        dtype_info = np.iinfo(dtype)
-        if np.any(array < dtype_info.min) or np.any(array > dtype_info.max):
-            raise ValueError("Integer values do not fit into the given dtype")
-    return array.astype(dtype)
-def _get_n_decimals(value, tolerance):
-    MAX_DECIMALS = 10
-    for n in range(MAX_DECIMALS):
-        if abs(value - round(value, n)) < tolerance:
-            return n
-    return MAX_DECIMALS
+    if np.issubdtype(target_dtype, np.integer):
+        if np.issubdtype(source_dtype, np.floating):
+            if not allow_decimal_loss:
+                raise ValueError("Cannot cast floating point to integer")
+            if not np.isfinite(array).all():
+                raise ValueError("Data contains non-finite values")
+        elif not np.issubdtype(source_dtype, np.integer):
+            # Neither float, nor integer -> cannot cast
+            raise ValueError(f"Cannot cast '{source_dtype}' to integer")
+        dtype_info = np.iinfo(target_dtype)
+        # Check if an integer underflow/overflow would occur during conversion
+        if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
+            raise ValueError("Values do not fit into the given dtype")
+    return array.astype(target_dtype)

biotite/structure/pseudoknots.py CHANGED Viewed

@@ -148,7 +148,7 @@ class _Region:
     region_pairs : ndarray, dtype=int
         The indices of the base pairs in ``base_pairs`` that are part of
         the region.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
         The score for each base pair.
     """
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
     base_pairs : ndarray, dtype=int, shape=(n, 2)
         Each row is equivalent to one base pair and contains the first
         indices of the residues corresponding to each base.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
         The score for each base pair.
     Returns
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
             return i
-def _get_region_array_for(regions, content=[], dtype=[]):
+def _get_region_array_for(regions, content=(), dtype=()):
     """
     Get a :class:`ndarray` of region objects. Each object occurs twice,
     representing its start and end point. The regions positions in the
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
     ----------
     regions : set {_region, ...}
         The regions to be considered
-    content : list [function, ...] (default: [])
+    content : list [function, ...]
         The functions to be considered for custom outputs. For a given
         region they must return a tuple of which the first value is
         placed at the start position and the second value at the end
         position of the region relative to the other regions.
-    dtype : list [str, ...] (default: [])
+    dtype : list [str, ...]
         The data type of the output of the custom functions.
     Returns
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
         The maximum pseudoknot order to be found. If a base pair would
         be of a higher order, its order is specified as -1. If ``None``
         is given, all base pairs are evaluated.
-    order : int (default: 0)
+    order : int
         The order that is currently evaluated.
     Returns

biotite/structure/residues.py CHANGED Viewed

@@ -21,23 +21,23 @@ __all__ = [
     "residue_iter",
 ]
-import numpy as np
 from biotite.structure.segments import (
     apply_segment_wise,
     get_segment_masks,
     get_segment_positions,
+    get_segment_starts,
     get_segment_starts_for,
     segment_iter,
     spread_segment_wise,
 )
-def get_residue_starts(array, add_exclusive_stop=False):
+def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
     """
     Get indices for an atom array, each indicating the beginning of
     a residue.
-    A new residue starts, either when the chain ID, residue ID,
+    A new residue starts, either when the chain ID, sym ID, residue ID,
     insertion code or residue name changes from one to the next atom.
     Parameters
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
         If true, the exclusive stop of the input atom array, i.e.
         ``array.array_length()``, is added to the returned array of
         start indices as last element.
+    extra_categories : tuple of str, optional
+        Additional annotation categories that induce the start of a new residue,
+        when their value change from one atom to the next.
     Returns
     -------
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
     [  0  16  35  56  75  92 116 135 157 169 176 183 197 208 219 226 250 264
      278 292 304]
     """
-    if array.array_length() == 0:
-        return np.array([], dtype=int)
-    # These mask are 'true' at indices where the value changes
-    chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
-    res_id_changes = array.res_id[1:] != array.res_id[:-1]
-    ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
-    res_name_changes = array.res_name[1:] != array.res_name[:-1]
-    # If any of these annotation arrays change, a new residue starts
-    residue_change_mask = (
-        chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
-    )
-    # Convert mask to indices
-    # Add 1, to shift the indices from the end of a residue
-    # to the start of a new residue
-    residue_starts = np.where(residue_change_mask)[0] + 1
-    # The first residue is not included yet -> Insert '[0]'
-    if add_exclusive_stop:
-        return np.concatenate(([0], residue_starts, [array.array_length()]))
-    else:
-        return np.concatenate(([0], residue_starts))
+    categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
+    if "sym_id" in array.get_annotation_categories():
+        categories.append("sym_id")
+    return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
 def apply_residue_wise(array, data, function, axis=None):

biotite/structure/rings.py CHANGED Viewed

@@ -149,7 +149,7 @@ def find_stacking_interactions(
     The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
-        - The ring centroids must be within cutoff distance (default: 6.5 Å).
+        - The ring centroids must be within cutoff `centroid_cutoff` distance.
           While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
           adopted from :footcite:`Bouysset2021` to better identify perpendicular
           stacking interactions.

biotite/structure/sasa.cpython-311-darwin.so CHANGED Viewed

Binary file