PyPI - biotite - Versions diffs - 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl - Mend

biotite 1.0.0__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +156 -43
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/filter.py +1 -1
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +148 -107
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +227 -68
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +16 -16
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/bonds.pyx CHANGED Viewed

@@ -17,6 +17,7 @@ cimport cython
 cimport numpy as np
 from libc.stdlib cimport free, realloc
+from collections.abc import Sequence
 import itertools
 import numbers
 from enum import IntEnum
@@ -59,6 +60,7 @@ class BondType(IntEnum):
         - `AROMATIC_SINGLE` - Aromatic bond with a single formal bond
         - `AROMATIC_DOUBLE` - Aromatic bond with a double formal bond
         - `AROMATIC_TRIPLE` - Aromatic bond with a triple formal bond
+        - `COORDINATION` - Coordination complex involving a metal atom
     """
     ANY = 0
     SINGLE = 1
@@ -68,6 +70,7 @@ class BondType(IntEnum):
     AROMATIC_SINGLE = 5
     AROMATIC_DOUBLE = 6
     AROMATIC_TRIPLE = 7
+    COORDINATION = 8
     def without_aromaticity(self):
@@ -88,10 +91,12 @@ class BondType(IntEnum):
         >>> print(BondType.AROMATIC_DOUBLE.without_aromaticity().name)
         DOUBLE
         """
-        difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
-        if self >= BondType.AROMATIC_SINGLE:
-            difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
-            return BondType(self - difference)
+        if self == BondType.AROMATIC_SINGLE:
+            return BondType.SINGLE
+        elif self == BondType.AROMATIC_DOUBLE:
+            return BondType.DOUBLE
+        elif self == BondType.AROMATIC_TRIPLE:
+            return BondType.TRIPLE
         else:
             return self
@@ -305,6 +310,61 @@ class BondList(Copyable):
             self._bonds = np.zeros((0, 3), dtype=np.uint32)
             self._max_bonds_per_atom = 0
+    @staticmethod
+    def concatenate(bonds_lists):
+        """
+        Concatenate multiple :class:`BondList` objects into a single
+        :class:`BondList`, respectively.
+        Parameters
+        ----------
+        bonds_lists : iterable object of BondList
+            The bond lists to be concatenated.
+        Returns
+        -------
+        concatenated_bonds : BondList
+            The concatenated bond lists.
+        Examples
+        --------
+        >>> bonds1 = BondList(2, np.array([(0, 1)]))
+        >>> bonds2 = BondList(3, np.array([(0, 1), (0, 2)]))
+        >>> merged_bonds = BondList.concatenate([bonds1, bonds2])
+        >>> print(merged_bonds.get_atom_count())
+        5
+        >>> print(merged_bonds.as_array()[:, :2])
+        [[0 1]
+         [2 3]
+         [2 4]]
+        """
+        # Ensure that the bonds_lists can be iterated over multiple times
+        if not isinstance(bonds_lists, Sequence):
+            bonds_lists = list(bonds_lists)
+        cdef np.ndarray merged_bonds = np.concatenate(
+            [bond_list._bonds for bond_list in bonds_lists]
+        )
+        # Offset the indices of appended bonds list
+        # (consistent with addition of AtomArray)
+        cdef int start = 0, stop = 0
+        cdef int cum_atom_count = 0
+        for bond_list in bonds_lists:
+            stop = start + bond_list._bonds.shape[0]
+            merged_bonds[start : stop, :2] += cum_atom_count
+            cum_atom_count += bond_list._atom_count
+            start = stop
+        cdef merged_bond_list = BondList(cum_atom_count)
+        # Array is not used in constructor to prevent unnecessary
+        # maximum and redundant bond calculation
+        merged_bond_list._bonds = merged_bonds
+        merged_bond_list._max_bonds_per_atom = max(
+            [bond_list._max_bonds_per_atom for bond_list in bonds_lists]
+        )
+        return merged_bond_list
     def __copy_create__(self):
         # Create empty bond list to prevent
         # unnecessary removal of redundant atoms
@@ -453,9 +513,13 @@ class BondList(Copyable):
         0 1 SINGLE
         1 2 DOUBLE
         """
-        bonds = self._bonds
-        difference = BondType.AROMATIC_SINGLE - BondType.SINGLE
-        bonds[bonds[:, 2] >= BondType.AROMATIC_SINGLE, 2] -= difference
+        bond_types = self._bonds[:,2]
+        for aromatic_type, non_aromatic_type in [
+            (BondType.AROMATIC_SINGLE, BondType.SINGLE),
+            (BondType.AROMATIC_DOUBLE, BondType.DOUBLE),
+            (BondType.AROMATIC_TRIPLE, BondType.TRIPLE)
+        ]:
+            bond_types[bond_types == aromatic_type] = non_aromatic_type
     def remove_bond_order(self):
         """
@@ -994,20 +1058,7 @@ class BondList(Copyable):
         )
     def __add__(self, bond_list):
-        cdef np.ndarray merged_bonds \
-            = np.concatenate([self._bonds, bond_list._bonds])
-        # Offset the indices of appended bonds list
-        # (consistent with addition of AtomArray)
-        merged_bonds[len(self._bonds):, :2] += self._atom_count
-        cdef uint32 merged_count = self._atom_count + bond_list._atom_count
-        cdef merged_bond_list = BondList(merged_count)
-        # Array is not used in constructor to prevent unnecessary
-        # maximum and redundant bond calculation
-        merged_bond_list._bonds = merged_bonds
-        merged_bond_list._max_bonds_per_atom = max(
-            self._max_bonds_per_atom, bond_list._max_bonds_per_atom
-        )
-        return merged_bond_list
+        return BondList.concatenate([self, bond_list])
     def __getitem__(self, index):
         ## Variables for both, integer and boolean index arrays

biotite/structure/celllist.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/structure/charges.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/structure/filter.py CHANGED Viewed

@@ -577,7 +577,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
         if len(letter_altloc_ids) > 0:
             highest = -1.0
             highest_id = None
-            for id in set(letter_altloc_ids):
+            for id in sorted(set(letter_altloc_ids)):
                 occupancy_sum = np.sum(occupancies_in_res[altloc_ids_in_res == id])
                 if occupancy_sum > highest:
                     highest = occupancy_sum

biotite/structure/geometry.py CHANGED Viewed

@@ -25,10 +25,12 @@ __all__ = [
 import numpy as np
 from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
 from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
-from biotite.structure.chains import chain_iter
-from biotite.structure.error import BadStructureError
-from biotite.structure.filter import filter_peptide_backbone
-from biotite.structure.util import norm_vector, vector_dot
+from biotite.structure.filter import filter_amino_acids
+from biotite.structure.util import (
+    coord_for_atom_name_per_residue,
+    norm_vector,
+    vector_dot,
+)
 def displacement(atoms1, atoms2, box=None):
@@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):
 def dihedral_backbone(atom_array):
     """
-    Measure the characteristic backbone dihedral angles of a protein
-    structure.
+    Measure the characteristic backbone dihedral angles of a chain.
     Parameters
     ----------
-    atom_array: AtomArray or AtomArrayStack
-        The protein structure. A complete backbone, without gaps,
-        is required here.
-        Chain transitions are allowed, the angles at the transition are
-        `NaN`.
-        The order of the backbone atoms for each residue must be
-        (N, CA, C).
+    atoms: AtomArray or AtomArrayStack
+        The protein structure to measure the dihedral angles for.
+        For missing backbone atoms the corresponding angles are `NaN`.
     Returns
     -------
     phi, psi, omega : ndarray
-        An array containing the 3 backbone dihedral angles for every
-        CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega'
-        are not defined at the C-terminus. In these places the arrays
-        have *NaN* values. If an :class:`AtomArrayStack` is given, the
-        output angles are 2-dimensional, the first dimension corresponds
-        to the model number.
-    Raises
-    ------
-    BadStructureError
-        If the amount of backbone atoms is not equal to amount of
-        residues times 3 (for N, CA and C).
-    See Also
-    --------
-    dihedral
-    Examples
-    --------
-    >>> phi, psi, omega = dihedral_backbone(atom_array)
-    >>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
-    [[     nan  -56.145]
-     [ -43.980  -51.309]
-     [ -66.466  -30.898]
-     [ -65.219  -45.945]
-     [ -64.747  -30.346]
-     [ -73.136  -43.425]
-     [ -64.882  -43.255]
-     [ -59.509  -25.698]
-     [ -77.989   -8.823]
-     [ 110.784    8.079]
-     [  55.244 -124.371]
-     [ -57.983  -28.766]
-     [ -81.834   19.125]
-     [-124.057   13.401]
-     [  67.931   25.218]
-     [-143.952  131.297]
-     [ -70.100  160.068]
-     [ -69.484  145.669]
-     [ -77.264  124.223]
-     [ -78.100      nan]]
+        An array containing the 3 backbone dihedral angles for every CA atom.
+        `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
+        C-terminus.
+        In these places the arrays have *NaN* values.
+        If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
+        the first dimension corresponds to the model number.
     """
-    bb_filter = filter_peptide_backbone(atom_array)
-    backbone = atom_array[..., bb_filter]
-    if (
-        backbone.array_length() % 3 != 0
-        or (backbone.atom_name[0::3] != "N").any()
-        or (backbone.atom_name[1::3] != "CA").any()
-        or (backbone.atom_name[2::3] != "C").any()
-    ):
-        raise BadStructureError(
-            "The backbone is invalid, must be repeats of (N, CA, C), "
-            "maybe a backbone atom is missing"
-        )
-    phis = []
-    psis = []
-    omegas = []
-    for chain_bb in chain_iter(backbone):
-        phi, psi, omega = _dihedral_backbone(chain_bb)
-        phis.append(phi)
-        psis.append(psi)
-        omegas.append(omega)
-    return (
-        np.concatenate(phis, axis=-1),
-        np.concatenate(psis, axis=-1),
-        np.concatenate(omegas, axis=-1),
-    )
+    amino_acid_mask = filter_amino_acids(atom_array)
+    # Coordinates for dihedral angle calculation
+    coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
+        atom_array,
+        ("N", "CA", "C"),
+        amino_acid_mask,
+    )
+    n_residues = coord_n.shape[-2]
-def _dihedral_backbone(chain_bb):
-    bb_coord = chain_bb.coord
     # Coordinates for dihedral angle calculation
     # Dim 0: Model index (only for atom array stacks)
     # Dim 1: Angle index
     # Dim 2: X, Y, Z coordinates
     # Dim 3: Atoms involved in dihedral angle
-    if isinstance(chain_bb, AtomArray):
-        angle_coord_shape = (len(bb_coord) // 3, 3, 4)
-    elif isinstance(chain_bb, AtomArrayStack):
-        angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4)
-    phi_coord = np.full(angle_coord_shape, np.nan)
-    psi_coord = np.full(angle_coord_shape, np.nan)
-    omega_coord = np.full(angle_coord_shape, np.nan)
-    # Indices for coordinates of CA atoms
-    ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
+    if isinstance(atom_array, AtomArray):
+        angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
+    elif isinstance(atom_array, AtomArrayStack):
+        angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
+    coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)
     # fmt: off
-    phi_coord  [..., 1:,  :, 0] = bb_coord[..., ca_i[1: ]-2, :]
-    phi_coord  [..., 1:,  :, 1] = bb_coord[..., ca_i[1: ]-1, :]
-    phi_coord  [..., 1:,  :, 2] = bb_coord[..., ca_i[1: ],   :]
-    phi_coord  [..., 1:,  :, 3] = bb_coord[..., ca_i[1: ]+1, :]
-    psi_coord  [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :]
-    psi_coord  [..., :-1, :, 1] = bb_coord[..., ca_i[:-1],   :]
-    psi_coord  [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :]
-    psi_coord  [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1],   :]
-    omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :]
-    omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :]
+    coord_for_phi[..., 1:,   :, 0] =  coord_c[..., 0:-1, :]
+    coord_for_phi[..., 1:,   :, 1] =  coord_n[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 2] = coord_ca[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 3] =  coord_c[..., 1:,   :]
+    coord_for_psi[..., 0:-1, :, 0] =  coord_n[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 2] =  coord_c[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 3] =  coord_n[..., 1:,   :]
+    coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 1] =  coord_c[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 2] =  coord_n[..., 1:,   :]
+    coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:,   :]
     # fmt: on
     phi = dihedral(
-        phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3]
+        coord_for_phi[..., 0],
+        coord_for_phi[..., 1],
+        coord_for_phi[..., 2],
+        coord_for_phi[..., 3],
     )
     psi = dihedral(
-        psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3]
+        coord_for_psi[..., 0],
+        coord_for_psi[..., 1],
+        coord_for_psi[..., 2],
+        coord_for_psi[..., 3],
     )
-    omega = dihedral(
-        omega_coord[..., 0],
-        omega_coord[..., 1],
-        omega_coord[..., 2],
-        omega_coord[..., 3],
+    omg = dihedral(
+        coord_for_omg[..., 0],
+        coord_for_omg[..., 1],
+        coord_for_omg[..., 2],
+        coord_for_omg[..., 3],
     )
-    return phi, psi, omega
+    return phi, psi, omg
 def centroid(atoms):

biotite/structure/info/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ __author__ = "Patrick Kunzmann, Tom David Müller"
 from .atoms import *
 from .bonds import *
+from .ccd import *
 from .groups import *
 from .masses import *
 from .misc import *

biotite/structure/info/atoms.py CHANGED Viewed

@@ -42,19 +42,19 @@ def residue(res_name):
     >>> alanine = residue("ALA")
     >>> # Atoms and geometry
     >>> print(alanine)
-                0  ALA N      N        -0.970    0.490    1.500
-                0  ALA CA     C         0.260    0.420    0.690
-                0  ALA C      C        -0.090    0.020   -0.720
-                0  ALA O      O        -1.060   -0.680   -0.920
-                0  ALA CB     C         1.200   -0.620    1.300
-                0  ALA OXT    O         0.660    0.440   -1.740
-                0  ALA H      H        -1.380   -0.420    1.480
-                0  ALA H2     H        -0.680    0.660    2.450
-                0  ALA HA     H         0.750    1.390    0.680
-                0  ALA HB1    H         1.460   -0.330    2.320
-                0  ALA HB2    H         0.720   -1.590    1.310
-                0  ALA HB3    H         2.110   -0.680    0.700
-                0  ALA HXT    H         0.440    0.180   -2.650
+                0  ALA N      N        -0.966    0.493    1.500
+                0  ALA CA     C         0.257    0.418    0.692
+                0  ALA C      C        -0.094    0.017   -0.716
+                0  ALA O      O        -1.056   -0.682   -0.923
+                0  ALA CB     C         1.204   -0.620    1.296
+                0  ALA OXT    O         0.661    0.439   -1.742
+                0  ALA H      H        -1.383   -0.425    1.482
+                0  ALA H2     H        -0.676    0.661    2.452
+                0  ALA HA     H         0.746    1.392    0.682
+                0  ALA HB1    H         1.459   -0.330    2.316
+                0  ALA HB2    H         0.715   -1.594    1.307
+                0  ALA HB3    H         2.113   -0.676    0.697
+                0  ALA HXT    H         0.435    0.182   -2.647
     >>> # Bonds
     >>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
     [['N' 'CA']

biotite/structure/info/bonds.py CHANGED Viewed

@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
 __all__ = ["bond_type", "bonds_in_residue"]
+import functools
 from biotite.structure.bonds import BondType
 from biotite.structure.info.ccd import get_from_ccd
@@ -69,6 +70,7 @@ def bond_type(res_name, atom_name1, atom_name2):
         return None
+@functools.cache
 def bonds_in_residue(res_name):
     """
     Get a dictionary containing all atoms inside a given residue
@@ -94,6 +96,10 @@ def bonds_in_residue(res_name):
     In other functionalities throughout *Biotite* that uses this
     function.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
     Examples
     --------
     >>> bonds = bonds_in_residue("PHE")
@@ -126,16 +132,16 @@ def bonds_in_residue(res_name):
     """
     global _intra_bonds
     if res_name not in _intra_bonds:
-        chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
-        if chem_comp_bond_dict is None:
+        chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
+        if chem_comp_bond is None:
             _intra_bonds[res_name] = {}
         else:
             bonds_for_residue = {}
             for atom1, atom2, order, aromatic_flag in zip(
-                chem_comp_bond_dict["atom_id_1"],
-                chem_comp_bond_dict["atom_id_2"],
-                chem_comp_bond_dict["value_order"],
-                chem_comp_bond_dict["pdbx_aromatic_flag"],
+                chem_comp_bond["atom_id_1"].as_array(),
+                chem_comp_bond["atom_id_2"].as_array(),
+                chem_comp_bond["value_order"].as_array(),
+                chem_comp_bond["pdbx_aromatic_flag"].as_array(),
             ):
                 bond_type = BOND_TYPES[order, aromatic_flag]
                 bonds_for_residue[atom1.item(), atom2.item()] = bond_type

biotite/structure/info/ccd.py CHANGED Viewed

@@ -4,23 +4,23 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["get_ccd", "get_from_ccd"]
+__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
+import functools
+import importlib
+import inspect
+import pkgutil
 from pathlib import Path
 import numpy as np
-CCD_DIR = Path(__file__).parent / "ccd"
-INDEX_COLUMN_NAME = {
+_CCD_FILE = Path(__file__).parent / "components.bcif"
+_SPECIAL_ID_COLUMN_NAMES = {
     "chem_comp": "id",
-    "chem_comp_atom": "comp_id",
-    "chem_comp_bond": "comp_id",
 }
-_ccd_block = None
-# For each category this index gives the start and stop for each residue
-_residue_index = {}
+_DEFAULT_ID_COLUMN_NAME = "comp_id"
+@functools.cache
 def get_ccd():
     """
     Get the internal subset of the PDB
@@ -29,8 +29,16 @@ def get_ccd():
     Returns
     -------
-    ccd : BinaryCIFFile
+    ccd : BinaryCIFBlock
         The CCD.
+        It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
+    Warnings
+    --------
+    Consider the return value as read-only.
+    As other functions cache data from it, changing data may lead to undefined
+    behavior.
     References
     ----------
@@ -41,13 +49,49 @@ def get_ccd():
     # Avoid circular import
     from biotite.structure.io.pdbx.bcif import BinaryCIFFile
-    global _ccd_block
-    if _ccd_block is None:
-        # Load CCD once and cache it for subsequent calls
-        _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
-    return _ccd_block
+    try:
+        return BinaryCIFFile.read(_CCD_FILE).block
+    except FileNotFoundError:
+        raise RuntimeError(
+            "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
+        )
+def set_ccd_path(ccd_path):
+    """
+    Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
+    This function also clears the cache of functions depending on the CCD to ensure
+    that the new CCD is used.
+    Parameters
+    ----------
+    ccd_path : path-like
+        The path to the custom CCD in BinaryCIF format, prepared with the
+        ``setup_ccd.py`` module.
+    Notes
+    -----
+    This function is intended for advanced users who need to add information for
+    compounds, which are not part of the internal CCD.
+    The reason might be that an updated version already exists upstream or that
+    the user wants to add custom compounds to the CCD.
+    """
+    global _CCD_FILE
+    _CCD_FILE = Path(ccd_path)
+    # Clear caches in all functions in biotite.structure.info
+    info_modules = [
+        importlib.import_module(f"biotite.structure.info.{mod_name}")
+        for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
+    ]
+    for module in info_modules:
+        for _, function in inspect.getmembers(module, callable):
+            if hasattr(function, "cache_clear"):
+                function.cache_clear()
+@functools.cache
 def get_from_ccd(category_name, comp_id, column_name=None):
     """
     Get the rows for the given residue in the given category from the
@@ -67,9 +111,13 @@ def get_from_ccd(category_name, comp_id, column_name=None):
     Returns
     -------
-    value : ndarray or dict or None
-        The array of the given column or all columns as dictionary.
-        ``None`` if the `comp_id` is not found in the category.
+    slice : BinaryCIFCategory or BinaryCIFColumn
+        The category or column (if `column_name` is provided) containing only the rows
+        for the given residue.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
     References
     ----------
@@ -77,28 +125,41 @@ def get_from_ccd(category_name, comp_id, column_name=None):
     .. footbibliography::
     """
-    global _residue_index
-    ccd = get_ccd()
-    category = ccd[category_name]
-    if category_name not in _residue_index:
-        _residue_index[category_name] = _index_residues(
-            category[INDEX_COLUMN_NAME[category_name]].as_array()
-        )
     try:
-        start, stop = _residue_index[category_name][comp_id]
+        start, stop = _residue_index(category_name)[comp_id]
     except KeyError:
         return None
+    category = get_ccd()[category_name]
     if column_name is None:
-        return {
-            col_name: category[col_name].as_array()[start:stop]
-            for col_name in category.keys()
-        }
+        return _filter_category(category, slice(start, stop))
     else:
-        return category[column_name].as_array()[start:stop]
+        return _filter_column(category[column_name], slice(start, stop))
+@functools.cache
+def _residue_index(category_name):
+    """
+    Get the start and stop index for each component name in the given
+    CCD category.
+    Parameters
+    ----------
+    category_name : str
+        The category to determine start and stop indices for each component in.
+    Returns
+    -------
+    index : dict (str -> (int, int))
+        The index maps each present component name to the corresponding
+        start and exclusive stop index in `id_column`.
+    """
+    category = get_ccd()[category_name]
+    id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
+        category_name, _DEFAULT_ID_COLUMN_NAME
+    )
+    id_column = category[id_column_name].as_array()
-def _index_residues(id_column):
     residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
     # The final start is the exclusive stop of last residue
     residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
@@ -107,3 +168,35 @@ def _index_residues(id_column):
         comp_id = id_column[residue_starts[i]].item()
         index[comp_id] = (residue_starts[i], residue_starts[i + 1])
     return index
+def _filter_category(category, index):
+    """
+    Reduce the category to the values for the given index.∂
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
+    return BinaryCIFCategory(
+        {key: _filter_column(column, index) for key, column in category.items()}
+    )
+def _filter_column(column, index):
+    """
+    Reduce the column to the values for the given index.
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
+    from biotite.structure.io.pdbx.component import MaskValue
+    data_array = column.data.array[index]
+    mask_array = column.mask.array[index] if column.mask is not None else None
+    return BinaryCIFColumn(
+        BinaryCIFData(data_array),
+        (
+            BinaryCIFData(mask_array)
+            if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
+            else None
+        ),
+    )

biotite/structure/info/{ccd/components.bcif → components.bcif} RENAMED Viewed

Binary file