PyPI - biotite - Versions diffs - 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl - Mend

biotite 1.0.1__cp312-cp312-macosx_11_0_arm64.whl → 1.1.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show

biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +34 -0
biotite/application/muscle/app3.py +2 -15
biotite/application/muscle/app5.py +2 -2
biotite/application/util.py +1 -1
biotite/application/viennarna/rnaplot.py +6 -2
biotite/database/rcsb/query.py +6 -6
biotite/database/uniprot/check.py +20 -15
biotite/database/uniprot/download.py +1 -1
biotite/database/uniprot/query.py +1 -1
biotite/sequence/align/alignment.py +16 -3
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +5 -5
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +17 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +52 -42
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/matrix.py +273 -55
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/alphabet.py +3 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/profile.py +86 -4
biotite/sequence/seqtypes.py +124 -3
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +4 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +129 -40
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +72 -21
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/geometry.py +60 -113
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +13 -13
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -32
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +63 -17
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -21
biotite/structure/info/standardize.py +3 -2
biotite/structure/io/mol/sdf.py +41 -40
biotite/structure/io/pdb/convert.py +2 -0
biotite/structure/io/pdb/file.py +74 -3
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +32 -8
biotite/structure/io/pdbx/cif.py +72 -59
biotite/structure/io/pdbx/component.py +9 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +194 -48
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/molecules.py +141 -141
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/segments.py +1 -2
biotite/structure/util.py +73 -1
biotite/version.py +2 -2
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/METADATA +3 -1
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/RECORD +86 -76
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/geometry.py CHANGED Viewed

@@ -25,10 +25,12 @@ __all__ = [
 import numpy as np
 from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
 from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
-from biotite.structure.chains import chain_iter
-from biotite.structure.error import BadStructureError
-from biotite.structure.filter import filter_peptide_backbone
-from biotite.structure.util import norm_vector, vector_dot
+from biotite.structure.filter import filter_amino_acids
+from biotite.structure.util import (
+    coord_for_atom_name_per_residue,
+    norm_vector,
+    vector_dot,
+)
 def displacement(atoms1, atoms2, box=None):
@@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):
 def dihedral_backbone(atom_array):
     """
-    Measure the characteristic backbone dihedral angles of a protein
-    structure.
+    Measure the characteristic backbone dihedral angles of a chain.
     Parameters
     ----------
-    atom_array: AtomArray or AtomArrayStack
-        The protein structure. A complete backbone, without gaps,
-        is required here.
-        Chain transitions are allowed, the angles at the transition are
-        `NaN`.
-        The order of the backbone atoms for each residue must be
-        (N, CA, C).
+    atoms: AtomArray or AtomArrayStack
+        The protein structure to measure the dihedral angles for.
+        For missing backbone atoms the corresponding angles are `NaN`.
     Returns
     -------
     phi, psi, omega : ndarray
-        An array containing the 3 backbone dihedral angles for every
-        CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega'
-        are not defined at the C-terminus. In these places the arrays
-        have *NaN* values. If an :class:`AtomArrayStack` is given, the
-        output angles are 2-dimensional, the first dimension corresponds
-        to the model number.
-    Raises
-    ------
-    BadStructureError
-        If the amount of backbone atoms is not equal to amount of
-        residues times 3 (for N, CA and C).
-    See Also
-    --------
-    dihedral
-    Examples
-    --------
-    >>> phi, psi, omega = dihedral_backbone(atom_array)
-    >>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
-    [[     nan  -56.145]
-     [ -43.980  -51.309]
-     [ -66.466  -30.898]
-     [ -65.219  -45.945]
-     [ -64.747  -30.346]
-     [ -73.136  -43.425]
-     [ -64.882  -43.255]
-     [ -59.509  -25.698]
-     [ -77.989   -8.823]
-     [ 110.784    8.079]
-     [  55.244 -124.371]
-     [ -57.983  -28.766]
-     [ -81.834   19.125]
-     [-124.057   13.401]
-     [  67.931   25.218]
-     [-143.952  131.297]
-     [ -70.100  160.068]
-     [ -69.484  145.669]
-     [ -77.264  124.223]
-     [ -78.100      nan]]
+        An array containing the 3 backbone dihedral angles for every CA atom.
+        `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
+        C-terminus.
+        In these places the arrays have *NaN* values.
+        If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
+        the first dimension corresponds to the model number.
     """
-    bb_filter = filter_peptide_backbone(atom_array)
-    backbone = atom_array[..., bb_filter]
-    if (
-        backbone.array_length() % 3 != 0
-        or (backbone.atom_name[0::3] != "N").any()
-        or (backbone.atom_name[1::3] != "CA").any()
-        or (backbone.atom_name[2::3] != "C").any()
-    ):
-        raise BadStructureError(
-            "The backbone is invalid, must be repeats of (N, CA, C), "
-            "maybe a backbone atom is missing"
-        )
-    phis = []
-    psis = []
-    omegas = []
-    for chain_bb in chain_iter(backbone):
-        phi, psi, omega = _dihedral_backbone(chain_bb)
-        phis.append(phi)
-        psis.append(psi)
-        omegas.append(omega)
-    return (
-        np.concatenate(phis, axis=-1),
-        np.concatenate(psis, axis=-1),
-        np.concatenate(omegas, axis=-1),
-    )
+    amino_acid_mask = filter_amino_acids(atom_array)
+    # Coordinates for dihedral angle calculation
+    coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
+        atom_array,
+        ("N", "CA", "C"),
+        amino_acid_mask,
+    )
+    n_residues = coord_n.shape[-2]
-def _dihedral_backbone(chain_bb):
-    bb_coord = chain_bb.coord
     # Coordinates for dihedral angle calculation
     # Dim 0: Model index (only for atom array stacks)
     # Dim 1: Angle index
     # Dim 2: X, Y, Z coordinates
     # Dim 3: Atoms involved in dihedral angle
-    if isinstance(chain_bb, AtomArray):
-        angle_coord_shape = (len(bb_coord) // 3, 3, 4)
-    elif isinstance(chain_bb, AtomArrayStack):
-        angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4)
-    phi_coord = np.full(angle_coord_shape, np.nan)
-    psi_coord = np.full(angle_coord_shape, np.nan)
-    omega_coord = np.full(angle_coord_shape, np.nan)
-    # Indices for coordinates of CA atoms
-    ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
+    if isinstance(atom_array, AtomArray):
+        angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
+    elif isinstance(atom_array, AtomArrayStack):
+        angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
+    coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)
     # fmt: off
-    phi_coord  [..., 1:,  :, 0] = bb_coord[..., ca_i[1: ]-2, :]
-    phi_coord  [..., 1:,  :, 1] = bb_coord[..., ca_i[1: ]-1, :]
-    phi_coord  [..., 1:,  :, 2] = bb_coord[..., ca_i[1: ],   :]
-    phi_coord  [..., 1:,  :, 3] = bb_coord[..., ca_i[1: ]+1, :]
-    psi_coord  [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :]
-    psi_coord  [..., :-1, :, 1] = bb_coord[..., ca_i[:-1],   :]
-    psi_coord  [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :]
-    psi_coord  [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1],   :]
-    omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :]
-    omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :]
+    coord_for_phi[..., 1:,   :, 0] =  coord_c[..., 0:-1, :]
+    coord_for_phi[..., 1:,   :, 1] =  coord_n[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 2] = coord_ca[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 3] =  coord_c[..., 1:,   :]
+    coord_for_psi[..., 0:-1, :, 0] =  coord_n[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 2] =  coord_c[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 3] =  coord_n[..., 1:,   :]
+    coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 1] =  coord_c[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 2] =  coord_n[..., 1:,   :]
+    coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:,   :]
     # fmt: on
     phi = dihedral(
-        phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3]
+        coord_for_phi[..., 0],
+        coord_for_phi[..., 1],
+        coord_for_phi[..., 2],
+        coord_for_phi[..., 3],
     )
     psi = dihedral(
-        psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3]
+        coord_for_psi[..., 0],
+        coord_for_psi[..., 1],
+        coord_for_psi[..., 2],
+        coord_for_psi[..., 3],
     )
-    omega = dihedral(
-        omega_coord[..., 0],
-        omega_coord[..., 1],
-        omega_coord[..., 2],
-        omega_coord[..., 3],
+    omg = dihedral(
+        coord_for_omg[..., 0],
+        coord_for_omg[..., 1],
+        coord_for_omg[..., 2],
+        coord_for_omg[..., 3],
     )
-    return phi, psi, omega
+    return phi, psi, omg
 def centroid(atoms):

biotite/structure/info/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ __author__ = "Patrick Kunzmann, Tom David Müller"
 from .atoms import *
 from .bonds import *
+from .ccd import *
 from .groups import *
 from .masses import *
 from .misc import *

biotite/structure/info/atoms.py CHANGED Viewed

@@ -42,19 +42,19 @@ def residue(res_name):
     >>> alanine = residue("ALA")
     >>> # Atoms and geometry
     >>> print(alanine)
-                0  ALA N      N        -0.970    0.490    1.500
-                0  ALA CA     C         0.260    0.420    0.690
-                0  ALA C      C        -0.090    0.020   -0.720
-                0  ALA O      O        -1.060   -0.680   -0.920
-                0  ALA CB     C         1.200   -0.620    1.300
-                0  ALA OXT    O         0.660    0.440   -1.740
-                0  ALA H      H        -1.380   -0.420    1.480
-                0  ALA H2     H        -0.680    0.660    2.450
-                0  ALA HA     H         0.750    1.390    0.680
-                0  ALA HB1    H         1.460   -0.330    2.320
-                0  ALA HB2    H         0.720   -1.590    1.310
-                0  ALA HB3    H         2.110   -0.680    0.700
-                0  ALA HXT    H         0.440    0.180   -2.650
+                0  ALA N      N        -0.966    0.493    1.500
+                0  ALA CA     C         0.257    0.418    0.692
+                0  ALA C      C        -0.094    0.017   -0.716
+                0  ALA O      O        -1.056   -0.682   -0.923
+                0  ALA CB     C         1.204   -0.620    1.296
+                0  ALA OXT    O         0.661    0.439   -1.742
+                0  ALA H      H        -1.383   -0.425    1.482
+                0  ALA H2     H        -0.676    0.661    2.452
+                0  ALA HA     H         0.746    1.392    0.682
+                0  ALA HB1    H         1.459   -0.330    2.316
+                0  ALA HB2    H         0.715   -1.594    1.307
+                0  ALA HB3    H         2.113   -0.676    0.697
+                0  ALA HXT    H         0.435    0.182   -2.647
     >>> # Bonds
     >>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
     [['N' 'CA']

biotite/structure/info/bonds.py CHANGED Viewed

@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
 __all__ = ["bond_type", "bonds_in_residue"]
+import functools
 from biotite.structure.bonds import BondType
 from biotite.structure.info.ccd import get_from_ccd
@@ -69,6 +70,7 @@ def bond_type(res_name, atom_name1, atom_name2):
         return None
+@functools.cache
 def bonds_in_residue(res_name):
     """
     Get a dictionary containing all atoms inside a given residue
@@ -94,6 +96,10 @@ def bonds_in_residue(res_name):
     In other functionalities throughout *Biotite* that uses this
     function.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
     Examples
     --------
     >>> bonds = bonds_in_residue("PHE")
@@ -126,16 +132,16 @@ def bonds_in_residue(res_name):
     """
     global _intra_bonds
     if res_name not in _intra_bonds:
-        chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
-        if chem_comp_bond_dict is None:
+        chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
+        if chem_comp_bond is None:
             _intra_bonds[res_name] = {}
         else:
             bonds_for_residue = {}
             for atom1, atom2, order, aromatic_flag in zip(
-                chem_comp_bond_dict["atom_id_1"],
-                chem_comp_bond_dict["atom_id_2"],
-                chem_comp_bond_dict["value_order"],
-                chem_comp_bond_dict["pdbx_aromatic_flag"],
+                chem_comp_bond["atom_id_1"].as_array(),
+                chem_comp_bond["atom_id_2"].as_array(),
+                chem_comp_bond["value_order"].as_array(),
+                chem_comp_bond["pdbx_aromatic_flag"].as_array(),
             ):
                 bond_type = BOND_TYPES[order, aromatic_flag]
                 bonds_for_residue[atom1.item(), atom2.item()] = bond_type

biotite/structure/info/ccd.py CHANGED Viewed

@@ -4,23 +4,23 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["get_ccd", "get_from_ccd"]
+__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
+import functools
+import importlib
+import inspect
+import pkgutil
 from pathlib import Path
 import numpy as np
-CCD_DIR = Path(__file__).parent / "ccd"
-INDEX_COLUMN_NAME = {
+_CCD_FILE = Path(__file__).parent / "components.bcif"
+_SPECIAL_ID_COLUMN_NAMES = {
     "chem_comp": "id",
-    "chem_comp_atom": "comp_id",
-    "chem_comp_bond": "comp_id",
 }
-_ccd_block = None
-# For each category this index gives the start and stop for each residue
-_residue_index = {}
+_DEFAULT_ID_COLUMN_NAME = "comp_id"
+@functools.cache
 def get_ccd():
     """
     Get the internal subset of the PDB
@@ -29,8 +29,16 @@ def get_ccd():
     Returns
     -------
-    ccd : BinaryCIFFile
+    ccd : BinaryCIFBlock
         The CCD.
+        It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
+    Warnings
+    --------
+    Consider the return value as read-only.
+    As other functions cache data from it, changing data may lead to undefined
+    behavior.
     References
     ----------
@@ -41,13 +49,49 @@ def get_ccd():
     # Avoid circular import
     from biotite.structure.io.pdbx.bcif import BinaryCIFFile
-    global _ccd_block
-    if _ccd_block is None:
-        # Load CCD once and cache it for subsequent calls
-        _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
-    return _ccd_block
+    try:
+        return BinaryCIFFile.read(_CCD_FILE).block
+    except FileNotFoundError:
+        raise RuntimeError(
+            "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
+        )
+def set_ccd_path(ccd_path):
+    """
+    Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
+    This function also clears the cache of functions depending on the CCD to ensure
+    that the new CCD is used.
+    Parameters
+    ----------
+    ccd_path : path-like
+        The path to the custom CCD in BinaryCIF format, prepared with the
+        ``setup_ccd.py`` module.
+    Notes
+    -----
+    This function is intended for advanced users who need to add information for
+    compounds, which are not part of the internal CCD.
+    The reason might be that an updated version already exists upstream or that
+    the user wants to add custom compounds to the CCD.
+    """
+    global _CCD_FILE
+    _CCD_FILE = Path(ccd_path)
+    # Clear caches in all functions in biotite.structure.info
+    info_modules = [
+        importlib.import_module(f"biotite.structure.info.{mod_name}")
+        for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
+    ]
+    for module in info_modules:
+        for _, function in inspect.getmembers(module, callable):
+            if hasattr(function, "cache_clear"):
+                function.cache_clear()
+@functools.cache
 def get_from_ccd(category_name, comp_id, column_name=None):
     """
     Get the rows for the given residue in the given category from the
@@ -67,9 +111,13 @@ def get_from_ccd(category_name, comp_id, column_name=None):
     Returns
     -------
-    value : ndarray or dict or None
-        The array of the given column or all columns as dictionary.
-        ``None`` if the `comp_id` is not found in the category.
+    slice : BinaryCIFCategory or BinaryCIFColumn
+        The category or column (if `column_name` is provided) containing only the rows
+        for the given residue.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
     References
     ----------
@@ -77,28 +125,41 @@ def get_from_ccd(category_name, comp_id, column_name=None):
     .. footbibliography::
     """
-    global _residue_index
-    ccd = get_ccd()
-    category = ccd[category_name]
-    if category_name not in _residue_index:
-        _residue_index[category_name] = _index_residues(
-            category[INDEX_COLUMN_NAME[category_name]].as_array()
-        )
     try:
-        start, stop = _residue_index[category_name][comp_id]
+        start, stop = _residue_index(category_name)[comp_id]
     except KeyError:
         return None
+    category = get_ccd()[category_name]
     if column_name is None:
-        return {
-            col_name: category[col_name].as_array()[start:stop]
-            for col_name in category.keys()
-        }
+        return _filter_category(category, slice(start, stop))
     else:
-        return category[column_name].as_array()[start:stop]
+        return _filter_column(category[column_name], slice(start, stop))
+@functools.cache
+def _residue_index(category_name):
+    """
+    Get the start and stop index for each component name in the given
+    CCD category.
+    Parameters
+    ----------
+    category_name : str
+        The category to determine start and stop indices for each component in.
+    Returns
+    -------
+    index : dict (str -> (int, int))
+        The index maps each present component name to the corresponding
+        start and exclusive stop index in `id_column`.
+    """
+    category = get_ccd()[category_name]
+    id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
+        category_name, _DEFAULT_ID_COLUMN_NAME
+    )
+    id_column = category[id_column_name].as_array()
-def _index_residues(id_column):
     residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
     # The final start is the exclusive stop of last residue
     residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
@@ -107,3 +168,35 @@ def _index_residues(id_column):
         comp_id = id_column[residue_starts[i]].item()
         index[comp_id] = (residue_starts[i], residue_starts[i + 1])
     return index
+def _filter_category(category, index):
+    """
+    Reduce the category to the values for the given index.∂
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
+    return BinaryCIFCategory(
+        {key: _filter_column(column, index) for key, column in category.items()}
+    )
+def _filter_column(column, index):
+    """
+    Reduce the column to the values for the given index.
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
+    from biotite.structure.io.pdbx.component import MaskValue
+    data_array = column.data.array[index]
+    mask_array = column.mask.array[index] if column.mask is not None else None
+    return BinaryCIFColumn(
+        BinaryCIFData(data_array),
+        (
+            BinaryCIFData(mask_array)
+            if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
+            else None
+        ),
+    )

biotite/structure/info/{ccd/components.bcif → components.bcif} RENAMED Viewed

Binary file

biotite/structure/info/groups.py CHANGED Viewed

@@ -6,14 +6,45 @@ __name__ = "biotite.structure.info"
 __author__ = "Tom David Müller, Patrick Kunzmann"
 __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
-from pathlib import Path
-CCD_DIR = Path(__file__).parent / "ccd"
-group_lists = {}
+import functools
+import numpy as np
+from biotite.structure.info.ccd import get_ccd
+_AMINO_ACID_TYPES = [
+    "D-beta-peptide, C-gamma linking",
+    "D-gamma-peptide, C-delta linking",
+    "D-peptide COOH carboxy terminus",
+    "D-peptide NH3 amino terminus",
+    "D-peptide linking",
+    "L-beta-peptide, C-gamma linking",
+    "L-gamma-peptide, C-delta linking",
+    "L-peptide COOH carboxy terminus",
+    "L-peptide NH3 amino terminus",
+    "L-peptide linking",
+    "peptide linking",
+]
+_NUCLEOTIDE_TYPES = [
+    "DNA OH 3 prime terminus",
+    "DNA OH 5 prime terminus",
+    "DNA linking",
+    "L-DNA linking",
+    "L-RNA linking",
+    "RNA OH 3 prime terminus",
+    "RNA OH 5 prime terminus",
+    "RNA linking",
+]
+_CARBOHYDRATE_TYPES = [
+    "D-saccharide",
+    "D-saccharide, alpha linking",
+    "D-saccharide, beta linking",
+    "L-saccharide",
+    "L-saccharide, alpha linking",
+    "L-saccharide, beta linking",
+    "saccharide",
+]
+@functools.cache
 def amino_acid_names():
     """
     Get a tuple of amino acid three-letter codes according to the
@@ -32,9 +63,10 @@ def amino_acid_names():
     .. footbibliography::
     """
-    return _get_group_members("amino_acids")
+    return _get_group_members(_AMINO_ACID_TYPES)
+@functools.cache
 def nucleotide_names():
     """
     Get a tuple of nucleotide three-letter codes according to the
@@ -53,9 +85,10 @@ def nucleotide_names():
     .. footbibliography::
     """
-    return _get_group_members("nucleotides")
+    return _get_group_members(_NUCLEOTIDE_TYPES)
+@functools.cache
 def carbohydrate_names():
     """
     Get a tuple of carbohydrate three-letter codes according to the
@@ -74,12 +107,25 @@ def carbohydrate_names():
     .. footbibliography::
     """
-    return _get_group_members("carbohydrates")
+    return _get_group_members(_CARBOHYDRATE_TYPES)
+def _get_group_members(match_types):
+    """
+    Identify component IDs that matches a given component *type* from the CCD.
+    Parameters
+    ----------
+    match_types : list of str
+        The component types to extract.
-def _get_group_members(group_name):
-    global group_lists
-    if group_name not in group_lists:
-        with open(CCD_DIR / f"{group_name}.txt", "r") as file:
-            group_lists[group_name] = tuple(file.read().split())
-    return group_lists[group_name]
+    Returns
+    -------
+    comp_ids : list of str
+        The extracted component IDs.
+    """
+    category = get_ccd()["chem_comp"]
+    comp_ids = category["id"].as_array()
+    types = category["type"].as_array()
+    # Ignore case
+    return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()

biotite/structure/info/masses.py CHANGED Viewed

@@ -95,15 +95,11 @@ def mass(item, is_residue=None):
         if is_residue is None:
             result_mass = _atom_masses.get(item.upper())
             if result_mass is None:
-                result_mass = get_from_ccd(
-                    "chem_comp", item.upper(), "formula_weight"
-                ).item()
+                result_mass = _mass_for_residue(item)
         elif not is_residue:
             result_mass = _atom_masses.get(item.upper())
         else:
-            result_mass = get_from_ccd(
-                "chem_comp", item.upper(), "formula_weight"
-            ).item()
+            result_mass = _mass_for_residue(item)
     elif isinstance(item, Atom):
         result_mass = mass(item.element, is_residue=False)
@@ -116,3 +112,10 @@ def mass(item, is_residue=None):
     if result_mass is None:
         raise KeyError(f"{item} is not known")
     return result_mass
+def _mass_for_residue(res_name):
+    column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
+    if column is None:
+        raise KeyError(f"Residue '{res_name}' is not known")
+    return column.as_item()