PyPI - biotite - Versions diffs - 1.2.0__cp312-cp312-win_amd64.whl → 1.4.0__cp312-cp312-win_amd64.whl - Mend

biotite 1.2.0__cp312-cp312-win_amd64.whl → 1.4.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

biotite/application/viennarna/rnaplot.py +7 -7
biotite/interface/openmm/__init__.py +4 -0
biotite/interface/pymol/__init__.py +3 -0
biotite/interface/pymol/object.py +3 -1
biotite/interface/rdkit/__init__.py +4 -0
biotite/interface/rdkit/mol.py +5 -5
biotite/interface/version.py +23 -0
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +1 -1
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -2
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +2 -4
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/structure/basepairs.py +13 -14
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +67 -6
biotite/structure/box.py +141 -3
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +0 -1
biotite/structure/chains.py +15 -21
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +2 -0
biotite/structure/dotbracket.py +4 -4
biotite/structure/graphics/rna.py +19 -16
biotite/structure/hbond.py +1 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/io/pdb/convert.py +84 -2
biotite/structure/io/pdb/file.py +94 -7
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/bcif.py +6 -3
biotite/structure/io/pdbx/cif.py +5 -2
biotite/structure/io/pdbx/compress.py +71 -34
biotite/structure/io/pdbx/convert.py +226 -58
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +39 -23
biotite/structure/pseudoknots.py +6 -6
biotite/structure/residues.py +10 -27
biotite/structure/rings.py +118 -2
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +28 -29
biotite/structure/segments.py +55 -0
biotite/structure/spacegroups.json +1567 -0
biotite/structure/spacegroups.license +26 -0
biotite/structure/superimpose.py +1 -191
biotite/structure/transform.py +220 -1
biotite/version.py +2 -2
{biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/METADATA +4 -34
{biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/RECORD +62 -60
{biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/WHEEL +1 -1
{biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/encoding.pyx CHANGED Viewed

@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
         # since the file content may be invalid/malicious.
         raise NotImplementedError()
+    def __str__(self):
+        # Restore original behavior, as `__str__()` implementation of `_Component`
+        # may require serialization, which is not possible for some encodings prior
+        # to the first encoding pass
+        return object.__str__(self)
 @dataclass
 class ByteArrayEncoding(Encoding):
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
                 )
         # Round to avoid wrong values due to floating point inaccuracies
-        return np.round(data * self.factor).astype(np.int32)
+        scaled_data = np.round(data * self.factor)
+        return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
     def decode(self, data):
         return (data / self.factor).astype(
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
             self.min, self.max, self.num_steps, dtype=data.dtype
         )
         indices = np.searchsorted(steps, data, side="left")
-        return indices.astype(np.int32, copy=False)
+        return _safe_cast(indices, np.int32)
     def decode(self, data):
         output = data * (self.max - self.min) / (self.num_steps - 1)
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
         if self.origin is None:
             self.origin = data[0]
+        # Differences (including `np.diff`) return an array with the same dtype as the
+        # input array
+        # As the input dtype may be unsigned, the output dtype could underflow,
+        # if the difference is negative
+        # -> cast to int64 to avoid this
+        data = data.astype(np.int64, copy=False)
         data = data - self.origin
-        return np.diff(data, prepend=0).astype(np.int32, copy=False)
+        return _safe_cast(np.diff(data, prepend=0), np.int32)
     def decode(self, data):
         output = np.cumsum(data, dtype=self.src_type.to_dtype())
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
             # Only positive values -> use unsigned integers
             self.is_unsigned = data.min().item() >= 0
-        data = data.astype(np.int32, copy=False)
+        data = _safe_cast(data, np.int32)
         return self._encode(
             data, np.empty(0, dtype=self._determine_packed_dtype())
         )
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
         else:
             check_present = True
-        string_order = np.argsort(self.strings).astype(np.int32)
+        string_order = _safe_cast(np.argsort(self.strings), np.int32)
         sorted_strings = self.strings[string_order]
         sorted_indices = np.searchsorted(sorted_strings, data)
         indices = string_order[sorted_indices]
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
     return attribute_name[0].lower() + attribute_name[1:]
-def _safe_cast(array, dtype):
-    dtype = np.dtype(dtype)
-    if dtype == array.dtype:
+def _safe_cast(array, dtype, allow_decimal_loss=False):
+    source_dtype = array.dtype
+    target_dtype = np.dtype(dtype)
+    if target_dtype == source_dtype:
         return array
-    if np.issubdtype(dtype, np.integer):
-        if not np.issubdtype(array.dtype, np.integer):
-            raise ValueError("Cannot cast floating point to integer")
-        dtype_info = np.iinfo(dtype)
-        if np.any(array < dtype_info.min) or np.any(array > dtype_info.max):
-            raise ValueError("Integer values do not fit into the given dtype")
-    return array.astype(dtype)
-def _get_n_decimals(value, tolerance):
-    MAX_DECIMALS = 10
-    for n in range(MAX_DECIMALS):
-        if abs(value - round(value, n)) < tolerance:
-            return n
-    return MAX_DECIMALS
+    if np.issubdtype(target_dtype, np.integer):
+        if np.issubdtype(source_dtype, np.floating):
+            if not allow_decimal_loss:
+                raise ValueError("Cannot cast floating point to integer")
+            if not np.isfinite(array).all():
+                raise ValueError("Data contains non-finite values")
+        elif not np.issubdtype(source_dtype, np.integer):
+            # Neither float, nor integer -> cannot cast
+            raise ValueError(f"Cannot cast '{source_dtype}' to integer")
+        dtype_info = np.iinfo(target_dtype)
+        # Check if an integer underflow/overflow would occur during conversion
+        if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
+            raise ValueError("Values do not fit into the given dtype")
+    return array.astype(target_dtype)

biotite/structure/pseudoknots.py CHANGED Viewed

@@ -148,7 +148,7 @@ class _Region:
     region_pairs : ndarray, dtype=int
         The indices of the base pairs in ``base_pairs`` that are part of
         the region.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
         The score for each base pair.
     """
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
     base_pairs : ndarray, dtype=int, shape=(n, 2)
         Each row is equivalent to one base pair and contains the first
         indices of the residues corresponding to each base.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
         The score for each base pair.
     Returns
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
             return i
-def _get_region_array_for(regions, content=[], dtype=[]):
+def _get_region_array_for(regions, content=(), dtype=()):
     """
     Get a :class:`ndarray` of region objects. Each object occurs twice,
     representing its start and end point. The regions positions in the
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
     ----------
     regions : set {_region, ...}
         The regions to be considered
-    content : list [function, ...] (default: [])
+    content : list [function, ...]
         The functions to be considered for custom outputs. For a given
         region they must return a tuple of which the first value is
         placed at the start position and the second value at the end
         position of the region relative to the other regions.
-    dtype : list [str, ...] (default: [])
+    dtype : list [str, ...]
         The data type of the output of the custom functions.
     Returns
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
         The maximum pseudoknot order to be found. If a base pair would
         be of a higher order, its order is specified as -1. If ``None``
         is given, all base pairs are evaluated.
-    order : int (default: 0)
+    order : int
         The order that is currently evaluated.
     Returns

biotite/structure/residues.py CHANGED Viewed

@@ -21,23 +21,23 @@ __all__ = [
     "residue_iter",
 ]
-import numpy as np
 from biotite.structure.segments import (
     apply_segment_wise,
     get_segment_masks,
     get_segment_positions,
+    get_segment_starts,
     get_segment_starts_for,
     segment_iter,
     spread_segment_wise,
 )
-def get_residue_starts(array, add_exclusive_stop=False):
+def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
     """
     Get indices for an atom array, each indicating the beginning of
     a residue.
-    A new residue starts, either when the chain ID, residue ID,
+    A new residue starts, either when the chain ID, sym ID, residue ID,
     insertion code or residue name changes from one to the next atom.
     Parameters
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
         If true, the exclusive stop of the input atom array, i.e.
         ``array.array_length()``, is added to the returned array of
         start indices as last element.
+    extra_categories : tuple of str, optional
+        Additional annotation categories that induce the start of a new residue,
+        when their value change from one atom to the next.
     Returns
     -------
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
     [  0  16  35  56  75  92 116 135 157 169 176 183 197 208 219 226 250 264
      278 292 304]
     """
-    if array.array_length() == 0:
-        return np.array([], dtype=int)
-    # These mask are 'true' at indices where the value changes
-    chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
-    res_id_changes = array.res_id[1:] != array.res_id[:-1]
-    ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
-    res_name_changes = array.res_name[1:] != array.res_name[:-1]
-    # If any of these annotation arrays change, a new residue starts
-    residue_change_mask = (
-        chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
-    )
-    # Convert mask to indices
-    # Add 1, to shift the indices from the end of a residue
-    # to the start of a new residue
-    residue_starts = np.where(residue_change_mask)[0] + 1
-    # The first residue is not included yet -> Insert '[0]'
-    if add_exclusive_stop:
-        return np.concatenate(([0], residue_starts, [array.array_length()]))
-    else:
-        return np.concatenate(([0], residue_starts))
+    categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
+    if "sym_id" in array.get_annotation_categories():
+        categories.append("sym_id")
+    return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
 def apply_residue_wise(array, data, function, axis=None):

biotite/structure/rings.py CHANGED Viewed

@@ -8,7 +8,12 @@ This module provides functions related to aromatic rings.
 __name__ = "biotite.structure"
 __author__ = "Patrick Kunzmann"
-__all__ = ["find_aromatic_rings", "find_stacking_interactions", "PiStacking"]
+__all__ = [
+    "find_aromatic_rings",
+    "find_stacking_interactions",
+    "find_pi_cation_interactions",
+    "PiStacking",
+]
 from enum import IntEnum
@@ -149,7 +154,7 @@ def find_stacking_interactions(
     The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
-        - The ring centroids must be within cutoff distance (default: 6.5 Å).
+        - The ring centroids must be within cutoff `centroid_cutoff` distance.
           While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
           adopted from :footcite:`Bouysset2021` to better identify perpendicular
           stacking interactions.
@@ -268,6 +273,117 @@ def find_stacking_interactions(
     ]
+def find_pi_cation_interactions(
+    atoms,
+    distance_cutoff=5.0,
+    angle_tol=np.deg2rad(30.0),
+):
+    """
+    Find pi-cation interactions between aromatic rings and cations.
+    Parameters
+    ----------
+    atoms : AtomArray
+        The atoms to be searched for pi-cation interactions.
+        Requires an associated :class:`BondList` and ``charge`` annotation.
+    distance_cutoff : float, optional
+        The cutoff distance between ring centroid and cation.
+    angle_tol : float, optional
+        The tolerance for the angle between the ring plane normal
+        and the centroid-cation vector. Perfect pi-cation interaction
+        has 0° angle (perpendicular to ring plane).
+        Given in radians.
+    Returns
+    -------
+    interactions : list of tuple(ndarray, int)
+        The pi-cation interactions between aromatic rings and cations.
+        Each element in the list represents one pi-cation interaction.
+        The first element of each tuple represents atom indices of the
+        aromatic ring, the second element is the atom index of the cation.
+    See Also
+    --------
+    find_aromatic_rings : Used for finding the aromatic rings in this function.
+    find_stacking_interactions : Find pi-stacking interactions between rings.
+    Notes
+    -----
+    The conditions for pi-cation interactions are:
+        - The distance between ring centroid and cation must be within
+          `distance_cutoff`. :footcite:`Wojcikowski2015` uses 5.0 Å,
+          whereas :footcite:`Bouysset2021` uses 4.5 Å.
+        - The angle between the ring plane normal and the centroid-cation
+          vector must be within `angle_tol` of 0° (perpendicular to plane).
+    Examples
+    --------
+    >>> from os.path import join
+    >>> structure = load_structure(join(path_to_structures, "3wip.cif"), include_bonds=True, extra_fields=["charge"])
+    >>> interactions = find_pi_cation_interactions(structure)
+    >>> for ring_indices, cation_index in interactions:
+    ...     print(
+    ...         structure.res_name[ring_indices[0]],
+    ...         structure.res_name[cation_index]
+    ...     )
+    TYR ACH
+    TRP ACH
+    """
+    if atoms.bonds is None:
+        raise BadStructureError("Structure must have an associated BondList")
+    if atoms.charge is None:
+        raise BadStructureError(
+            "Structure must have a 'charge' annotation to identify cations."
+        )
+    rings = find_aromatic_rings(atoms)
+    if len(rings) == 0:
+        return []
+    cation_mask = atoms.charge > 0
+    cation_indices = np.where(cation_mask)[0]
+    if len(cation_indices) == 0:
+        return []
+    # Calculate ring centroids and normals
+    ring_centroids = np.array(
+        [atoms.coord[atom_indices].mean(axis=0) for atom_indices in rings]
+    )
+    ring_normals = np.array(
+        [_get_ring_normal(atoms.coord[atom_indices]) for atom_indices in rings]
+    )
+    cation_coords = atoms.coord[cation_indices]
+    # Create an index array that contains the Cartesian product of all rings and cations
+    indices = np.stack(
+        [
+            np.repeat(np.arange(len(rings)), len(cation_indices)),
+            np.tile(np.arange(len(cation_indices)), len(rings)),
+        ],
+        axis=-1,
+    )
+    ## Condition 1: Ring centroids and cations are close enough to each other
+    diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
+    # Use squared distance to avoid time consuming sqrt computation
+    sq_distance = vector_dot(diff, diff)
+    is_interacting = sq_distance < distance_cutoff**2
+    indices = indices[is_interacting]
+    ## Condition 2: Angle between ring normal and centroid-cation vector
+    diff = displacement(ring_centroids[indices[:, 0]], cation_coords[indices[:, 1]])
+    norm_vector(diff)
+    angles = _minimum_angle(ring_normals[indices[:, 0]], diff)
+    is_interacting = _is_within_tolerance(angles, 0, angle_tol)
+    indices = indices[is_interacting]
+    # Only return pairs where all conditions were fulfilled
+    return [(rings[ring_i], cation_indices[cation_j]) for ring_i, cation_j in indices]
 def _get_ring_normal(ring_coord):
     """
     Get the normal vector perpendicular to the ring plane.

biotite/structure/sasa.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/sasa.pyx CHANGED Viewed

@@ -35,39 +35,38 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
          point_number=1000, point_distr="Fibonacci", vdw_radii="ProtOr")
     Calculate the Solvent Accessible Surface Area (SASA) of a protein.
     This function uses the Shrake-Rupley ("rolling probe")
     algorithm :footcite:`Shrake1973`:
     Every atom is occupied by a evenly distributed point mesh. The
     points that can be reached by the "rolling probe", are surface
     accessible.
     Parameters
     ----------
     array : AtomArray
         The protein model to calculate the SASA for.
     probe_radius : float, optional
-        The VdW-radius of the solvent molecules (default: 1.4).
+        The VdW-radius of the solvent molecules.
     atom_filter : ndarray, dtype=bool, optional
         If this parameter is given, SASA is only calculated for the
         filtered atoms.
     ignore_ions : bool, optional
-        If true, all monoatomic ions are removed before SASA calculation
-        (default: True).
+        If true, all monoatomic ions are removed before SASA calculation.
     point_number : int, optional
         The number of points in the mesh occupying each atom for SASA
-        calculation (default: 100). The SASA calculation time is
-        proportional to the amount of sphere points.
+        calculation.
+        The SASA calculation time is proportional to the amount of sphere points.
     point_distr : str or function, optional
         If a function is given, the function is used to calculate the
         point distribution for the mesh (the function must take `float`
         *n* as parameter and return a *(n x 3)* :class:`ndarray`).
         Alternatively a string can be given to choose a built-in
         distribution:
             - **Fibonacci** - Distribute points using a golden section
               spiral.
         By default *Fibonacci* is used.
     vdw_radii : str or ndarray, dtype=float, optional
         Indicates the set of VdW radii to be used. If an `array`-length
@@ -76,7 +75,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
         SASA calculation (e.g. solvent atoms) can have arbitrary values
         (e.g. `NaN`). If instead a string is given, one of the
         built-in sets is used:
             - **ProtOr** - A set, which does not require hydrogen atoms
               in the model. Suitable for crystal structures.
               :footcite:`Tsai1999`
@@ -85,25 +84,25 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
               in the model (e.g. NMR elucidated structures).
               Values for main group elements are taken from :footcite:`Mantina2009`,
               and for relevant transition metals from the :footcite:`RDKit`.
         By default *ProtOr* is used.
     Returns
     -------
     sasa : ndarray, dtype=bool, shape=(n,)
-        Atom-wise SASA. `NaN` for atoms where SASA has not been
+        Atom-wise SASA. `NaN` for atoms where SASA has not been
         calculated
         (solvent atoms, hydrogen atoms (ProtOr), atoms not in `filter`).
     References
     ----------
     .. footbibliography::
     """
     cdef int i=0, j=0, k=0, adj_atom_i=0, rel_atom_i=0
     cdef np.ndarray sasa_filter
     cdef np.ndarray occl_filter
     if atom_filter is not None:
@@ -122,7 +121,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
         filter = ~filter_monoatomic_ions(array)
         sasa_filter = sasa_filter & filter
         occl_filter = occl_filter & filter
     cdef np.ndarray sphere_points
     if callable(point_distr):
         sphere_points = point_distr(point_number)
@@ -131,7 +130,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
     else:
         raise ValueError(f"'{point_distr}' is not a valid point distribution")
     sphere_points = sphere_points.astype(np.float32)
     cdef np.ndarray radii
     if isinstance(vdw_radii, np.ndarray):
         radii = vdw_radii.astype(np.float32)
@@ -159,17 +158,17 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
         raise KeyError(f"'{vdw_radii}' is not a valid radii set")
     # Increase atom radii by probe size ("rolling probe")
     radii += probe_radius
     # Memoryview for filter
     # Problem with creating boolean memoryviews
     # -> Type uint8 is used
     cdef np_bool[:] sasa_filter_view = np.frombuffer(sasa_filter,
                                                      dtype=np.uint8)
     cdef np.ndarray occl_r = radii[occl_filter]
     # Atom array containing occluding atoms
     occl_array = array[occl_filter]
     # Memoryviews for coordinates of entire (main) array
     # and for coordinates of occluding atom array
     cdef float32[:,:] main_coord = array.coord.astype(np.float32,
@@ -191,10 +190,10 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
     cdef float32[:] occl_radii_sq = occl_r * occl_r
     # Memoryview for atomwise SASA
     cdef float32[:] sasa = np.full(len(array), np.nan, dtype=np.float32)
     # Area of a sphere point on a unit sphere
     cdef float32 area_per_point = 4.0 * np.pi / point_number
     # Define further statically typed variables
     # that are needed for SASA calculation
     cdef int n_accesible = 0
@@ -213,8 +212,8 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
     cdef float32 occl_y = 0
     cdef float32 occl_z = 0
     cdef float32[:,:] relevant_occl_coord = None
-    # Cell size is as large as the maximum distance,
+    # Cell size is as large as the maximum distance,
     # where two atom can intersect.
     # Therefore intersecting atoms are always in the same or adjacent cell.
     cell_list = CellList(occl_array, np.max(radii[occl_filter])*2)
@@ -227,7 +226,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
     cell_indices = cell_list.get_atoms_in_cells(array.coord)
     cell_indices_view = cell_indices
     max_adj_list_length = cell_indices.shape[0]
     # Later on, this array stores coordinates for actual
     # occluding atoms for a certain atom to calculate the
     # SASA for
@@ -237,7 +236,7 @@ def sasa(array, float probe_radius=1.4, np.ndarray atom_filter=None,
     # adjacent atoms
     relevant_occl_coord = np.zeros((max_adj_list_length, 4),
                                    dtype=np.float32)
     # Actual SASA calculation
     for i in range(array_length):
         # First level: The atoms to calculate SASA for

biotite/structure/segments.py CHANGED Viewed

@@ -5,6 +5,7 @@
 __name__ = "biotite.structure"
 __author__ = "Patrick Kunzmann"
 __all__ = [
+    "get_segment_starts",
     "apply_segment_wise",
     "spread_segment_wise",
     "get_segment_masks",
@@ -16,6 +17,60 @@ __all__ = [
 import numpy as np
+def get_segment_starts(
+    array, add_exclusive_stop, continuous_categories=(), equal_categories=()
+):
+    """
+    Generalized version of :func:`get_residue_starts()` for residues and chains.
+    The starts are determined from value changes in the given annotations.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The atom array (stack) to get the segment starts from.
+    add_exclusive_stop : bool, optional
+        If true, the exclusive stop of the input atom array,
+        i.e. ``array.array_length()``, is added to the returned array of start indices
+        as last element.
+    continuous_categories : tuple of str, optional
+        Annotation categories that are expected to be continuously increasing within a
+        segment.
+        This means if the value of such an annotation decreases from one atom to
+        another, a new segment is started.
+    equal_categories : tuple of str, optional
+        Annotation categories that are expected to be equal within a segment.
+        This means if the value of such an annotation changes from one atom to
+        another, a new segment is started.
+    Returns
+    -------
+    starts : ndarray, dtype=int
+        The start indices of segments in `array`.
+    """
+    if array.array_length() == 0:
+        return np.array([], dtype=int)
+    segment_start_mask = np.zeros(array.array_length() - 1, dtype=bool)
+    for annot_name in continuous_categories:
+        annotation = array.get_annotation(annot_name)
+        segment_start_mask |= np.diff(annotation) < 0
+    for annot_name in equal_categories:
+        annotation = array.get_annotation(annot_name)
+        segment_start_mask |= annotation[1:] != annotation[:-1]
+    # Convert mask to indices
+    # Add 1, to shift the indices from the end of a segment
+    # to the start of a new segment
+    chain_starts = np.where(segment_start_mask)[0] + 1
+    # The first chain is not included yet -> Insert '[0]'
+    if add_exclusive_stop:
+        return np.concatenate(([0], chain_starts, [array.array_length()]))
+    else:
+        return np.concatenate(([0], chain_starts))
 def apply_segment_wise(starts, data, function, axis=None):
     """
     Generalized version of :func:`apply_residue_wise()` for