PyPI - biotite - Versions diffs - 1.1.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl - Mend

biotite 1.1.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (160) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/localapp.py +2 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +3 -3
biotite/application/muscle/app5.py +3 -3
biotite/application/sra/app.py +0 -5
biotite/application/util.py +21 -1
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +10 -8
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +2 -3
biotite/database/uniprot/check.py +2 -2
biotite/database/uniprot/download.py +2 -5
biotite/database/uniprot/query.py +3 -4
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +20 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +201 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +19 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +94 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +33 -11
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +22 -22
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +2 -2
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +6 -6
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +12 -3
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -2
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +37 -39
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +2 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/sequence/profile.py +19 -25
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +12 -5
biotite/sequence/sequence.py +1 -2
biotite/structure/__init__.py +2 -0
biotite/structure/alphabet/i3d.py +1 -2
biotite/structure/alphabet/pb.py +1 -2
biotite/structure/alphabet/unkerasify.py +8 -2
biotite/structure/atoms.py +35 -27
biotite/structure/basepairs.py +39 -40
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +8 -5
biotite/structure/box.py +159 -23
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -68
biotite/structure/chains.py +17 -55
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +31 -32
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +15 -15
biotite/structure/graphics/rna.py +19 -16
biotite/structure/hbond.py +18 -21
biotite/structure/info/atoms.py +11 -2
biotite/structure/info/ccd.py +0 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/info/groups.py +0 -3
biotite/structure/info/misc.py +0 -1
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +1 -2
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +39 -13
biotite/structure/io/pdb/convert.py +86 -5
biotite/structure/io/pdb/file.py +90 -24
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +4 -4
biotite/structure/io/pdbx/bcif.py +22 -7
biotite/structure/io/pdbx/cif.py +20 -7
biotite/structure/io/pdbx/component.py +6 -0
biotite/structure/io/pdbx/compress.py +71 -34
biotite/structure/io/pdbx/convert.py +429 -77
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +39 -23
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +0 -15
biotite/structure/pseudoknots.py +13 -19
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +20 -48
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +30 -30
biotite/structure/segments.py +123 -9
biotite/structure/sequence.py +0 -1
biotite/structure/spacegroups.json +1567 -0
biotite/structure/spacegroups.license +26 -0
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +75 -253
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +232 -26
biotite/structure/util.py +3 -3
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
{biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
{biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
{biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/compare.py CHANGED Viewed

@@ -9,11 +9,16 @@ comparing multiple structures with each other.
 __name__ = "biotite.structure"
 __author__ = "Patrick Kunzmann"
-__all__ = ["rmsd", "rmspd", "rmsf", "average"]
+__all__ = ["rmsd", "rmspd", "rmsf", "average", "lddt"]
+import collections.abc
+import warnings
 import numpy as np
-from biotite.structure.atoms import AtomArrayStack, coord
+from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
+from biotite.structure.celllist import CellList
+from biotite.structure.chains import get_chain_count, get_chain_positions
 from biotite.structure.geometry import index_distance
+from biotite.structure.residues import get_residue_count, get_residue_positions
 from biotite.structure.util import vector_dot
@@ -21,7 +26,7 @@ def rmsd(reference, subject):
     r"""
     Calculate the RMSD between two structures.
-    The *root-mean-square-deviation* (RMSD) indicates the overall
+    The *root mean square deviation* (RMSD) indicates the overall
     deviation of each model of a structure to a reference structure.
     It is defined as:
@@ -48,7 +53,7 @@ def rmsd(reference, subject):
     See Also
     --------
-    rmsf
+    rmsf : The *root mean square fluctuation*.
     Notes
     -----
@@ -121,11 +126,9 @@ def rmspd(reference, subject, periodic=False, box=None):
     to ensure correct results.
     (e.g. with :func:`remove_pbc()`).
-    See also
+    See Also
     --------
-    index_distance
-    remove_pbc
-    rmsd
+    rmsd : The *root mean square fluctuation*.
     """
     # Compute index pairs in reference structure -> pair_ij for j < i
     reflen = reference.array_length()
@@ -173,7 +176,8 @@ def rmsf(reference, subject):
     See Also
     --------
-    rmsd
+    rmsd : The *root mean square deviation*.
+    average : Average the structure over the models to be used as reference in this function.
     Notes
     -----
@@ -218,10 +222,6 @@ def average(atoms):
         If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
         returned.
-    See Also
-    --------
-    rmsd, rmsf
     Notes
     -----
     The calculated average structure is not suitable for visualization
@@ -242,6 +242,244 @@ def average(atoms):
         return mean_coords
+def lddt(
+    reference,
+    subject,
+    aggregation="all",
+    atom_mask=None,
+    partner_mask=None,
+    inclusion_radius=15,
+    distance_bins=(0.5, 1.0, 2.0, 4.0),
+    exclude_same_residue=True,
+    exclude_same_chain=False,
+    filter_function=None,
+    symmetric=False,
+):
+    """
+    Calculate the *local Distance Difference Test* (lDDT) score of a structure with
+    respect to its reference.
+    :footcite:`Mariani2013`
+    Parameters
+    ----------
+    reference : AtomArray
+        The reference structure.
+    subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
+        The structure(s) to evaluate with respect to `reference`.
+        The number of atoms must be the same as in `reference`.
+        Alternatively, coordinates can be provided directly as
+        :class:`ndarray`.
+    aggregation : {'all', 'chain', 'residue', 'atom'} or ndarray, shape=(n,), dtype=int, optional
+        Defines on which scale the lDDT score is calculated.
+        - `'all'`: The score is computed over all contacts.
+        - `'chain'`: The score is calculated for each chain separately.
+        - `'residue'`: The score is calculated for each residue separately.
+        - `'atom'`: The score is calculated for each atom separately.
+        Alternatively, an array of aggregation bins can be provided, i.e. each contact
+        is assigned to the corresponding bin.
+    atom_mask : ndarray, shape=(n,), dtype=bool, optional
+        If given, the contacts are only computed for the masked atoms.
+        Atoms excluded by the mask do not have any contacts and their *lDDT* would
+        be NaN in case of ``aggregation="atom"``.
+        Providing this mask can significantly speed up the computation, if
+        only for certain chains/residues/atoms the *lDDT* is of interest.
+    partner_mask : ndarray, shape=(n,), dtype=bool, optional
+        If given, only contacts **to** the masked atoms are considered.
+        While `atom_mask` does not alter the *lDDT* for the masked atoms,
+        `partner_mask` does, as for each atom only the masked atoms are considered
+        as potential contact partners.
+    inclusion_radius : float, optional
+        Pairwise atom distances are considered within this radius in `reference`.
+    distance_bins : list of float, optional
+        The distance bins for the score calculation, i.e if a distance deviation is
+        within the first bin, the score is 1, if it is outside all bins, the score is 0.
+    exclude_same_residue : bool, optional
+        If true, only atom distances between different residues are considered.
+        Otherwise, also atom distances within the same residue are included.
+    exclude_same_chain : bool, optional
+        If true, only atom distances between different chains are considered.
+        Otherwise, also atom distances within the same chain are included.
+    filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
+        Used for custom contact filtering, if the other parameters are not sufficient.
+        A function that takes an array of contact atom indices and returns a mask that
+        is ``True`` for all contacts that should be retained.
+        All other contacts are not considered for lDDT computation.
+    symmetric : bool, optional
+        If set to true, the *lDDT* score is computed symmetrically.
+        This means both contacts found in the `reference` and `subject` structure are
+        considered.
+        Hence the score is independent of which structure is given as `reference` and
+        `subject`.
+        Note that in this case `subject` must be an :class:`AtomArray` as well.
+        By default, only contacts in the `reference` are considered.
+    Returns
+    -------
+    lddt : float or ndarray, dtype=float
+        The lDDT score for each model and aggregation bin.
+        The shape depends on `subject` and `aggregation`:
+        If `subject` is an :class:`AtomArrayStack` (or equivalent coordinate
+        :class:`ndarray`), a dimension depicting each model is added.
+        if `aggregation` is not ``'all'``, a second dimension with the length equal to
+        the number of aggregation bins is added (i.e. number of chains, residues, etc.).
+        If both, an :class:`AtomArray` as `subject` and ``aggregation='all'`` is passed,
+        a float is returned.
+    Notes
+    -----
+    The lDDT score measures how well the pairwise atom distances in a model match the
+    corresponding distances in a reference.
+    Hence, like :func:`rmspd()` it works superimposition-free, but instead of capturing
+    the global deviation, only the local environment within the `inclusion_radius` is
+    considered.
+    Note that by default, also hydrogen atoms are considered in the distance
+    calculation.
+    If this is undesired, the hydrogen atoms can be removed prior to the calculation.
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    Calculate the global lDDT of all models to the first model:
+    >>> reference = atom_array_stack[0]
+    >>> subject = atom_array_stack[1:]
+    >>> print(lddt(reference, subject))
+    [0.799 0.769 0.792 0.836 0.799 0.752 0.860 0.769 0.825 0.777 0.760 0.787
+     0.790 0.783 0.804 0.842 0.769 0.797 0.757 0.852 0.811 0.786 0.805 0.755
+     0.734 0.794 0.771 0.778 0.842 0.772 0.815 0.789 0.828 0.750 0.826 0.739
+     0.760]
+    Calculate the residue-wise lDDT for a single model:
+    >>> subject = atom_array_stack[1]
+    >>> print(lddt(reference, subject, aggregation="residue"))
+    [0.599 0.692 0.870 0.780 0.830 0.881 0.872 0.658 0.782 0.901 0.888 0.885
+     0.856 0.795 0.847 0.603 0.895 0.878 0.871 0.789]
+    As example for custom aggregation, calculate the lDDT for each chemical element:
+    >>> unique_elements = np.unique(reference.element)
+    >>> element_bins = np.array(
+    ...     [np.where(unique_elements == element)[0][0] for element in reference.element]
+    ... )
+    >>> element_lddt = lddt(reference, subject, aggregation=element_bins)
+    >>> for element, lddt_for_element in zip(unique_elements, element_lddt):
+    ...     print(f"{element}: {lddt_for_element:.3f}")
+    C: 0.837
+    H: 0.770
+    N: 0.811
+    O: 0.808
+    If the reference structure has more atoms resolved than the subject structure,
+    the missing atoms can be indicated with *NaN* values:
+    >>> reference = atom_array_stack[0]
+    >>> subject = atom_array_stack[1].copy()
+    >>> # Simulate the situation where the first residue is missing in the subject
+    >>> subject.coord[subject.res_id == 1] = np.nan
+    >>> global_lddt = lddt(reference, subject)
+    >>> print(f"{global_lddt:.3f}")
+    0.751
+    """
+    reference_coord = coord(reference)
+    subject_coord = coord(subject)
+    if subject_coord.shape[-2] != reference_coord.shape[-2]:
+        raise IndexError(
+            f"The given reference has {reference_coord.shape[-2]} atoms, but the "
+            f"subject has {subject_coord.shape[-2]} atoms"
+        )
+    contacts = _find_contacts(
+        reference,
+        atom_mask,
+        partner_mask,
+        inclusion_radius,
+        exclude_same_residue,
+        exclude_same_chain,
+        filter_function,
+    )
+    if symmetric:
+        if not isinstance(subject, AtomArray):
+            raise TypeError(
+                "Expected 'AtomArray' as subject, as symmetric lDDT is enabled, "
+                f"but got '{type(subject).__name__}'"
+            )
+        subject_contacts = _find_contacts(
+            subject,
+            atom_mask,
+            partner_mask,
+            inclusion_radius,
+            exclude_same_residue,
+            exclude_same_chain,
+            filter_function,
+        )
+        contacts = np.concatenate((contacts, subject_contacts), axis=0)
+        # Adding additional contacts may introduce duplicates between the existing and
+        # new ones -> filter them out
+        contacts = np.unique(contacts, axis=0)
+    if (
+        isinstance(aggregation, str)
+        and aggregation == "all"
+        and atom_mask is None
+        and partner_mask is None
+    ):
+        # Remove duplicate pairs as each pair appears twice
+        # (if i is in threshold distance to j, j is also in threshold distance to i)
+        # keep only the pair where i < j
+        # This improves performance due to less distances that need to be computed
+        # The assumption also only works when no atoms are masked
+        contacts = contacts[contacts[:, 0] < contacts[:, 1]]
+    reference_distances = index_distance(reference_coord, contacts)
+    subject_distances = index_distance(subject_coord, contacts)
+    deviations = np.abs(subject_distances - reference_distances)
+    distance_bins = np.asarray(distance_bins)
+    fraction_preserved_bins = np.count_nonzero(
+        deviations[..., np.newaxis] <= distance_bins[np.newaxis, :], axis=-1
+    ) / len(distance_bins)
+    # Aggregate the fractions over the desired level
+    if isinstance(aggregation, str) and aggregation == "all":
+        # Average over all contacts
+        return np.mean(fraction_preserved_bins, axis=-1)
+    else:
+        # A string is also a 'Sequence'
+        # -> distinguish between string and array, list, etc.
+        if isinstance(
+            aggregation, (np.ndarray, collections.abc.Sequence)
+        ) and not isinstance(aggregation, str):
+            return _average_over_indices(
+                fraction_preserved_bins,
+                bins=np.asarray(aggregation)[contacts[:, 0]],
+            )
+        elif aggregation == "chain":
+            return _average_over_indices(
+                fraction_preserved_bins,
+                bins=get_chain_positions(reference, contacts[:, 0]),
+                n_bins=get_chain_count(reference),
+            )
+        elif aggregation == "residue":
+            return _average_over_indices(
+                fraction_preserved_bins,
+                bins=get_residue_positions(reference, contacts[:, 0]),
+                n_bins=get_residue_count(reference),
+            )
+        elif aggregation == "atom":
+            return _average_over_indices(
+                fraction_preserved_bins, contacts[:, 0], reference.array_length()
+            )
+        else:
+            raise ValueError(f"Invalid aggregation level '{aggregation}'")
 def _sq_euclidian(reference, subject):
     """
     Calculate squared euclidian distance between atoms in two
@@ -272,3 +510,172 @@ def _sq_euclidian(reference, subject):
         )
     dif = subject_coord - reference_coord
     return vector_dot(dif, dif)
+def _to_sparse_indices(all_contacts):
+    """
+    Create tuples of contact indices from the :meth:`CellList.get_atoms()` return value.
+    In other words, they would mark the non-zero elements in a dense contact matrix.
+    Parameters
+    ----------
+    all_contacts : ndarray, dtype=int, shape=(m,n)
+        The contact indices as returned by :meth:`CellList.get_atoms()`.
+        Padded with -1, in the second dimension.
+        Dimension *m* marks the query atoms, dimension *n* marks the contact atoms.
+    Returns
+    -------
+    combined_indices : ndarray, dtype=int, shape=(l,2)
+        The contact indices.
+        Each column contains the query and contact atom index.
+    """
+    # Find rows where a query atom has at least one contact
+    non_empty_indices = np.where(np.any(all_contacts != -1, axis=1))[0]
+    # Take those rows and flatten them
+    contact_indices = all_contacts[non_empty_indices].flatten()
+    # For each row the corresponding query atom is the same
+    # Hence in the flattened form the query atom index is simply repeated
+    query_indices = np.repeat(non_empty_indices, all_contacts.shape[1])
+    combined_indices = np.stack([query_indices, contact_indices], axis=1)
+    # Remove the padding values
+    return combined_indices[contact_indices != -1]
+def _find_contacts(
+    atoms=None,
+    atom_mask=None,
+    partner_mask=None,
+    inclusion_radius=15,
+    exclude_same_residue=False,
+    exclude_same_chain=True,
+    filter_function=None,
+):
+    """
+    Find contacts between the atoms in the given structure.
+    Parameters
+    ----------
+    atoms : AtomArray
+        The structure to find the contacts for.
+    atom_mask : ndarray, shape=(n,), dtype=bool, optional
+        If given, the contacts are only computed for the masked atoms.
+        Atoms excluded by the mask do not have any contacts and their *lDDT* would
+        be NaN in case of ``aggregation="atom"``.
+        Providing this mask can significantly speed up the computation, if
+        only for certain chains/residues/atoms the *lDDT* is of interest.
+    partner_mask : ndarray, shape=(n,), dtype=bool, optional
+        If given, only contacts **to** the masked atoms are considered.
+        While `atom_mask` does not alter the *lDDT* for the masked atoms,
+        `partner_mask` does, as for each atom only the masked atoms are considered
+        as potential contact partners.
+    inclusion_radius : float, optional
+        Pairwise atom distances are considered within this radius.
+    exclude_same_residue : bool, optional
+        If true, only atom distances between different residues are considered.
+        Otherwise, also atom distances within the same residue are included.
+    exclude_same_chain : bool, optional
+        If true, only atom distances between different chains are considered.
+        Otherwise, also atom distances within the same chain are included.
+    filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
+        Used for custom contact filtering, if the other parameters are not sufficient.
+        A function that takes an array of contact atom indices and returns a mask that
+        is ``True`` for all contacts that should be retained.
+        All other contacts are not considered for lDDT computation.
+    Returns
+    -------
+    contacts : ndarray, shape=(n,2), dtype=int
+        The array of contacts.
+        Each element represents a pair of atom indices that are in contact.
+    """
+    coords = coord(atoms)
+    selection = ~np.isnan(coords).any(axis=-1)
+    if partner_mask is not None:
+        selection &= partner_mask
+    # Use a cell list to find atoms within inclusion radius in O(n) time complexity
+    cell_list = CellList(coords, inclusion_radius, selection=selection)
+    # Pairs of indices for atoms within the inclusion radius
+    if atom_mask is None:
+        all_contacts = cell_list.get_atoms(coords, inclusion_radius)
+    else:
+        filtered_contacts = cell_list.get_atoms(coords[atom_mask], inclusion_radius)
+        # Map the contacts for the masked atoms to the original coordinates
+        # Rows that were filtered out by the mask are fully padded with -1
+        # consistent with the padding of `get_atoms()`
+        all_contacts = np.full(
+            (coords.shape[0], filtered_contacts.shape[-1]),
+            -1,
+            dtype=filtered_contacts.dtype,
+        )
+        all_contacts[atom_mask] = filtered_contacts
+    # Convert into pairs of indices
+    contacts = _to_sparse_indices(all_contacts)
+    if exclude_same_chain:
+        # Do the same for the chain level
+        chain_indices = get_chain_positions(atoms, contacts.flatten()).reshape(
+            contacts.shape
+        )
+        contacts = contacts[chain_indices[:, 0] != chain_indices[:, 1]]
+    elif exclude_same_residue:
+        # Find the index of the residue for each atom
+        residue_indices = get_residue_positions(atoms, contacts.flatten()).reshape(
+            contacts.shape
+        )
+        # Remove contacts between atoms of the same residue
+        contacts = contacts[residue_indices[:, 0] != residue_indices[:, 1]]
+    else:
+        # In any case self-contacts should not be considered
+        contacts = contacts[contacts[:, 0] != contacts[:, 1]]
+    if filter_function is not None:
+        mask = filter_function(contacts)
+        if mask.shape != (contacts.shape[0],):
+            raise IndexError(
+                f"Mask returned from filter function has shape {mask.shape}, "
+                f"but expected ({contacts.shape[0]},)"
+            )
+        contacts = contacts[mask, :]
+    return contacts
+def _average_over_indices(values, bins, n_bins=None):
+    """
+    For each unique index in `bins`, average the corresponding values in `values`.
+    Based on
+    https://stackoverflow.com/questions/79140661/how-to-sum-values-based-on-a-second-index-array-in-a-vectorized-manner
+    Parameters
+    ----------
+    values : ndarray, shape=(..., n)
+        The values to average.
+    bins : ndarray, shape=(n,) dtype=int
+        Associates each value from `values` with a bin.
+    n_bins : int
+        The total number of bins.
+        This is necessary as the some bin in `bins`may be empty.
+        By default the number of bins is determined from `bins`.
+    Returns
+    -------
+    averaged : ndarray, shape=(..., k)
+        The averaged values.
+        *k* is the maximum value in `bins` + 1.
+    """
+    if n_bins is None:
+        n_elements_per_bin = np.bincount(bins)
+        n_bins = len(n_elements_per_bin)
+    else:
+        n_elements_per_bin = np.bincount(bins, minlength=n_bins)
+    # The last dimension is replaced by the number of bins
+    # Broadcasting in 'np.add.at()' requires the replaced dimension to be the first
+    aggregated = np.zeros((n_bins, *values.shape[:-1]), dtype=values.dtype)
+    np.add.at(aggregated, bins, np.swapaxes(values, 0, -1))
+    # If an atom has no contacts, the corresponding value is NaN
+    # This result is expected, hence the warning is ignored
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        # Bring the bin dimension into the last dimension again
+        return np.swapaxes(aggregated, 0, -1) / n_elements_per_bin

biotite/structure/density.py CHANGED Viewed

@@ -49,7 +49,7 @@ def density(atoms, selection=None, delta=1.0, bins=None, density=False, weights=
         If False, the number of samples in each bin is returned.
         Otherwise, returns the probability density function of each bin.
         See :func:`numpy.histogramdd()` for further details.
-    weights: ndarray, shape=(n,) or shape=(m,n), optional
+    weights : ndarray, shape=(n,) or shape=(m,n), optional
         An array of values to weight the contribution of *n* atoms in
         *m* models.
         If the shape is *(n,)*, the weights will be interpreted as

biotite/structure/dotbracket.py CHANGED Viewed

@@ -31,12 +31,12 @@ def dot_bracket_from_structure(
     Parameters
     ----------
-    atom_array : AtomArray
+    nucleic_acid_strand : AtomArray
         The nucleic acid strand to be represented in DBL-notation.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
         The score for each base pair, which is passed on to
         :func:`pseudoknots()`.
-    max_pseudoknot_order : int (default: None)
+    max_pseudoknot_order : int
         The maximum pseudoknot order to be found. If a base pair would
         be of a higher order, it is represented as unpaired. If ``None``
         is given, all base pairs are evaluated.
@@ -48,8 +48,9 @@ def dot_bracket_from_structure(
     See Also
     --------
-    base_pairs
-    pseudoknots
+    base_pairs : Compute the base pairs from a structure as passed to this function.
+    dot_bracket : Compute the dot bracket notation directly from base pairs.
+    pseudoknots : Get the pseudoknot order for each base pair.
     References
     ----------
@@ -81,10 +82,9 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
         strand.
     length : int
         The number of bases in the strand.
-    scores : ndarray, dtype=int, shape=(n,) (default: None)
-        The score for each base pair, which is passed on to
-        :func:`pseudoknots()`
-    max_pseudoknot_order : int (default: None)
+    scores : ndarray, dtype=int, shape=(n,)
+        The score for each base pair, which is passed on to :func:`pseudoknots()`.
+    max_pseudoknot_order : int
         The maximum pseudoknot order to be found. If a base pair would
         be of a higher order, it is represented as unpaired. If ``None``
         is given, all pseudoknot orders are evaluated.
@@ -94,6 +94,18 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
     notations : list [str, ...]
         The DBL-notation for each solution from :func:`pseudoknots()`.
+    See Also
+    --------
+    base_pairs_from_dot_bracket : The reverse operation.
+    dot_bracket_from_structure : Compute the dot bracket notation from a structure.
+    base_pairs : Compute the base pairs from a structure as passed to this function.
+    pseudoknots : Get the pseudoknot order for each base pair.
+    References
+    ----------
+    .. footbibliography::
     Examples
     --------
     The sequence ``ACGTC`` has a length of 5. If there was to be a
@@ -107,18 +119,6 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
     >>> dot_bracket(basepairs, 5)[0]
     '(..).'
-    See Also
-    --------
-    dot_bracket_from_structure
-    base_pairs
-    pseudoknots
-    References
-    ----------
-    .. footbibliography::
     """
     # Make sure the lower residue is on the left for each row
     basepairs = np.sort(basepairs, axis=1)
@@ -159,6 +159,15 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
         Each row corresponds to the positions of the bases in the
         sequence.
+    See Also
+    --------
+    dot_bracket : The reverse operation.
+    References
+    ----------
+    .. footbibliography::
     Examples
     --------
     The notation string ``'(..).'`` contains a base pair between the
@@ -167,15 +176,6 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
     >>> base_pairs_from_dot_bracket('(..).')
     array([[0, 3]])
-    See Also
-    --------
-    dot_bracket
-    References
-    ----------
-    .. footbibliography::
     """
     basepairs = []
     opened_brackets = [[] for _ in range(len(_OPENING_BRACKETS))]
@@ -203,8 +203,7 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
     for not_closed in opened_brackets:
         if not_closed != []:
             raise ValueError(
-                "Invalid DBL-notation, not all opening brackets have a "
-                "closing bracket"
+                "Invalid DBL-notation, not all opening brackets have a closing bracket"
             )
     # Sort the base pair indices in ascending order

biotite/structure/filter.py CHANGED Viewed

@@ -294,7 +294,9 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
     lies within the provided boundaries.
     The result will depend on the atoms' order.
-    For instance, consider a molecule::
+    For instance, consider a molecule:
+    .. code-block:: none
            C3
            |
@@ -306,12 +308,12 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
     Parameters
     ----------
-    array: AtomArray
+    array : AtomArray
         The array to filter.
-    min_len: float
-        Minmum bond length
-    max_len: float
-        Maximum bond length
+    min_len : float
+        Minmum bond length.
+    max_len : float
+        Maximum bond length.
     Returns
     -------
@@ -364,7 +366,6 @@ def filter_polymer(array, min_size=2, pol_type="peptide"):
     filter : ndarray, dtype=bool
         This array is `True` for all indices in `array`, where atoms belong to
         consecutive polymer entity having at least `min_size` monomers.
     """
     # Import `check_res_id_continuity` here to avoid circular imports
     from biotite.structure.integrity import check_res_id_continuity
@@ -412,7 +413,6 @@ def filter_intersection(array, intersect):
     >>> array1 = array1[filter_intersection(array1, array2)]
     >>> print(array1.chain_id)
     ['B' 'C' 'D']
     """
     filter = np.full(array.array_length(), True, dtype=bool)
     intersect_categories = intersect.get_annotation_categories()