PyPI - biotite - Versions diffs - 0.39.0__cp312-cp312-macosx_11_0_arm64.whl → 0.41.0__cp312-cp312-macosx_11_0_arm64.whl - Mend

biotite 0.39.0__cp312-cp312-macosx_11_0_arm64.whl → 0.41.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show

biotite/__init__.py +3 -3
biotite/application/dssp/app.py +18 -18
biotite/database/pubchem/download.py +23 -23
biotite/database/pubchem/query.py +7 -7
biotite/database/rcsb/download.py +19 -14
biotite/file.py +17 -9
biotite/sequence/align/banded.c +256 -235
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/cigar.py +60 -15
biotite/sequence/align/kmeralphabet.c +241 -220
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.c +213 -194
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.cpp +231 -203
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/localgapped.c +256 -235
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.c +233 -212
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/multiple.c +253 -232
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.c +272 -251
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.c +213 -194
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.c +215 -195
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.c +213 -193
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/annotation.py +2 -2
biotite/sequence/codec.c +233 -212
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/io/fasta/convert.py +27 -24
biotite/sequence/phylo/nj.c +213 -194
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.c +225 -200
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.c +213 -194
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/structure/__init__.py +2 -0
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +1435 -1277
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/celllist.c +215 -195
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/charges.c +1050 -1099
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/dotbracket.py +2 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +31 -68
biotite/structure/info/bonds.py +47 -101
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +78 -25
biotite/structure/info/standardize.py +17 -12
biotite/structure/integrity.py +19 -70
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +12 -106
biotite/structure/io/general.py +167 -181
biotite/structure/io/gro/file.py +16 -16
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +217 -196
biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.c +215 -195
biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/decode.c +223 -202
biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/encode.c +213 -194
biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/mol/__init__.py +4 -2
biotite/structure/io/mol/convert.py +71 -7
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/{file.py → mol.py} +69 -82
biotite/structure/io/mol/sdf.py +909 -0
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +89 -34
biotite/structure/io/pdb/hybrid36.c +63 -43
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +12 -6
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +858 -386
biotite/structure/io/pdbx/encoding.c +112803 -0
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/repair.py +253 -0
biotite/structure/sasa.c +213 -194
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/sequence.py +112 -0
biotite/structure/superimpose.py +618 -116
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0

biotite/structure/charges.cpython-312-darwin.so CHANGED Viewed

Binary file

biotite/structure/dotbracket.py CHANGED Viewed

@@ -57,6 +57,8 @@ def dot_bracket_from_structure(
     .. footbibliography::
     """
     basepairs = base_pairs(nucleic_acid_strand)
+    if len(basepairs) == 0:
+        return ['']
     basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
     length = get_residue_count(nucleic_acid_strand)
     return dot_bracket(basepairs, length, scores=scores,

biotite/structure/filter.py CHANGED Viewed

@@ -10,9 +10,9 @@ arrays and atom array stacks.
 __name__ = "biotite.structure"
 __author__ = "Patrick Kunzmann, Tom David Müller"
 __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
-           "filter_canonical_nucleotides", "filter_amino_acids",
-           "filter_canonical_amino_acids", "filter_carbohydrates",
-           "filter_backbone", "filter_intersection", "filter_first_altloc",
+           "filter_canonical_nucleotides", "filter_amino_acids",
+           "filter_canonical_amino_acids", "filter_carbohydrates",
+           "filter_backbone", "filter_intersection", "filter_first_altloc",
            "filter_highest_occupancy_altloc", "filter_peptide_backbone",
            "filter_phosphate_backbone", "filter_linear_bond_continuity",
            "filter_polymer"]
@@ -20,13 +20,10 @@ __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
 import warnings
 import numpy as np
-import operator as op
-from functools import partial, reduce
-from .atoms import Atom, AtomArray, AtomArrayStack, array as atom_array
+from functools import partial
+from .atoms import array as atom_array
 from .residues import get_residue_starts, get_residue_count
-from .info.nucleotides import nucleotide_names
-from .info.amino_acids import amino_acid_names
-from .info.carbohydrates import carbohydrate_names
+from .info.groups import amino_acid_names, carbohydrate_names, nucleotide_names
 _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
@@ -34,10 +31,6 @@ _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
                       "TRP","TYR","VAL", "SEC"]
 _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
-_nucleotide_list = nucleotide_names()
-_amino_acid_list = amino_acid_names()
-_carbohydrate_list = carbohydrate_names()
 _solvent_list = ["HOH","SOL"]
 _peptide_backbone_atoms = ['N', 'CA', 'C']
@@ -118,22 +111,22 @@ def filter_nucleotides(array):
     Notes
     -----
-    Nucleotides are identified according to the PDB chemical component
+    Nucleotides are identified according to the PDB chemical component
     dictionary. A residue is considered a nucleotide if it its
     ``_chem_comp.type`` property has one of the following values (case
     insensitive):
-    ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
-    ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
+    ``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
+    ``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
     ``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
     ``RNA OH 5 PRIME TERMINUS``
     """
-    return np.isin(array.res_name, _nucleotide_list)
+    return np.isin(array.res_name, nucleotide_names())
 def filter_canonical_amino_acids(array):
     """
-    Filter all atoms of one array that belong to canonical amino acid
+    Filter all atoms of one array that belong to canonical amino acid
     residues.
     Parameters
@@ -164,23 +157,23 @@ def filter_amino_acids(array):
     filter : ndarray, dtype=bool
         This array is `True` for all indices in `array`, where the atom
         belongs to an amino acid residue.
     Notes
     -----
-    Amino acids are identified according to the PDB chemical component
+    Amino acids are identified according to the PDB chemical component
     dictionary. A residue is considered an amino acid if it its
     ``_chem_comp.type`` property has one of the following values (case
     insensitive):
-    ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
-    ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
-    ``D-PEPTIDE NH3 AMINO TERMINUS``,
-    ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
-    ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
-    ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
+    ``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
+    ``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
+    ``D-PEPTIDE NH3 AMINO TERMINUS``,
+    ``L-BETA-PEPTIDE, C-GAMMA LINKING``,
+    ``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
+    ``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
     ``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
     """
-    return np.isin(array.res_name, _amino_acid_list)
+    return np.isin(array.res_name, amino_acid_names())
 def filter_carbohydrates(array):
@@ -197,20 +190,20 @@ def filter_carbohydrates(array):
     filter : ndarray, dtype=bool
         This array is `True` for all indices in `array`, where the atom
         belongs to a carbohydrate.
     Notes
     -----
-    Carbohydrates are identified according to the PDB chemical component
+    Carbohydrates are identified according to the PDB chemical component
     dictionary. A residue is considered a carbohydrate if it its
     ``_chem_comp.type`` property has one of the following values (case
     insensitive):
-    ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
-    ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
-    ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
+    ``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
+    ``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
+    ``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
     ``SACCHARIDE``
     """
-    return np.isin(array.res_name, _carbohydrate_list)
+    return np.isin(array.res_name, carbohydrate_names())
 def filter_backbone(array):
@@ -299,7 +292,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
     The result will depend on the atoms' order.
     For instance, consider a molecule::
            C3
            |
         C1-C2-C4
@@ -323,7 +316,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
         This array is `True` for all indices in `array`, where an atom
         has a bond length with the next atom within [`min_len`, `max_len`]
         boundaries.
     Notes
     -----
     Note that this function purely uses distances between consecutive atoms.
@@ -438,7 +431,7 @@ def filter_first_altloc(atoms, altloc_ids):
     Filter all atoms, that have the first *altloc* ID appearing in a
     residue.
-    Structure files (PDB, PDBx, MMTF) allow for duplicate atom records,
+    Structure files (PDB, PDBx) allow for duplicate atom records,
     in case a residue is found in multiple alternate locations
     (*altloc*).
     This function is used to remove such duplicate atoms by choosing a
@@ -507,7 +500,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
     For each residue, filter all atoms, that have the *altloc* ID
     with the highest occupancy for this residue.
-    Structure files (PDB, PDBx, MMTF) allow for duplicate atom records,
+    Structure files (PDB, PDBx) allow for duplicate atom records,
     in case a residue is found in multiple alternate locations
     (*altloc*).
     This function is used to remove such duplicate atoms by choosing a

biotite/structure/info/__init__.py CHANGED Viewed

@@ -6,23 +6,20 @@
 A subpackage for obtaining all kinds of chemical information about atoms
 and residues, including masses, radii, bonds, etc.
-Most information is extracted from the chemical compound dictionary
+Most information is extracted from the *Chemical Component Dictionary*
 of the
-`wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_
-via tools from the
-`biotite-util <https://github.com/biotite-dev/biotite-util>`_
-repository.
+`wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_.
 """
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann, Tom David Müller"
+from .groups import *
 from .atoms import *
 from .bonds import *
+from .groups import *
 from .masses import *
 from .misc import *
 from .radii import *
 from .standardize import *
-from .nucleotides import *
-from .amino_acids import *
-from .carbohydrates import *

biotite/structure/info/atoms.py CHANGED Viewed

@@ -6,36 +6,15 @@ __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
 __all__ = ["residue"]
-from os.path import join, dirname, realpath
-import msgpack
-import numpy as np
-from ..atoms import AtomArray
-from ..bonds import BondList
+from .ccd import get_ccd
-_residues = None
-def _init_dataset():
-    """
-    Load the residue dataset from MessagePack file.
-    Since loading the database is computationally expensive,
-    this is only done, when the residue database is actually required.
-    """
-    global _residues
-    if _residues is not None:
-        # Database is already initialized
-        return
-    # Residue data is taken from
-    # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
-    # (2019/01/27)
-    _info_dir = dirname(realpath(__file__))
-    with open(join(_info_dir, "residues.msgpack"), "rb") as file:
-        _residues = msgpack.unpack(
-            file, use_list=False, raw=False
-        )
+non_hetero_residues = set([
+    "ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
+    "ILE","LEU","LYS","MET","PHE","PRO","PYL","SER","THR",
+    "TRP","TYR","VAL", "SEC",
+    "A", "DA", "G", "DG", "C", "DC", "U", "DT",
+])
 def residue(res_name):
@@ -62,19 +41,19 @@ def residue(res_name):
     >>> alanine = residue("ALA")
     >>> # Atoms and geometry
     >>> print(alanine)
-                0  ALA N      N        -0.966    0.493    1.500
-                0  ALA CA     C         0.257    0.418    0.692
-                0  ALA C      C        -0.094    0.017   -0.716
-                0  ALA O      O        -1.056   -0.682   -0.923
-                0  ALA CB     C         1.204   -0.620    1.296
-                0  ALA OXT    O         0.661    0.439   -1.742
-                0  ALA H      H        -1.383   -0.425    1.482
-                0  ALA H2     H        -0.676    0.661    2.452
-                0  ALA HA     H         0.746    1.392    0.682
-                0  ALA HB1    H         1.459   -0.330    2.316
-                0  ALA HB2    H         0.715   -1.594    1.307
-                0  ALA HB3    H         2.113   -0.676    0.697
-                0  ALA HXT    H         0.435    0.182   -2.647
+                0  ALA N      N        -0.970    0.490    1.500
+                0  ALA CA     C         0.260    0.420    0.690
+                0  ALA C      C        -0.090    0.020   -0.720
+                0  ALA O      O        -1.060   -0.680   -0.920
+                0  ALA CB     C         1.200   -0.620    1.300
+                0  ALA OXT    O         0.660    0.440   -1.740
+                0  ALA H      H        -1.380   -0.420    1.480
+                0  ALA H2     H        -0.680    0.660    2.450
+                0  ALA HA     H         0.750    1.390    0.680
+                0  ALA HB1    H         1.460   -0.330    2.320
+                0  ALA HB2    H         0.720   -1.590    1.310
+                0  ALA HB3    H         2.110   -0.680    0.700
+                0  ALA HXT    H         0.440    0.180   -2.650
     >>> # Bonds
     >>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
     [['N' 'CA']
@@ -90,30 +69,14 @@ def residue(res_name):
      ['CB' 'HB3']
      ['OXT' 'HXT']]
     """
-    _init_dataset()
-    array_dict = _residues[res_name]
-    array = AtomArray(len(array_dict["res_name"]))
-    array.add_annotation("charge", int)
-    array.res_name = array_dict["res_name"]
-    array.atom_name = array_dict["atom_name"]
-    array.element = array_dict["element"]
-    array.charge = array_dict["charge"]
-    array.hetero = array_dict["hetero"]
-    array.coord[:,0] = array_dict["coord_x"]
-    array.coord[:,1] = array_dict["coord_y"]
-    array.coord[:,2] = array_dict["coord_z"]
-    array.bonds = BondList(
-        array.array_length(),
-        bonds = np.stack([
-            array_dict["bond_i"],
-            array_dict["bond_j"],
-            array_dict["bond_type"]
-        ]).T
-    )
-    return array
+    # Avoid circular import
+    from ..io.pdbx import get_component
+    try:
+        component = get_component(get_ccd(), res_name=res_name)
+    except KeyError:
+        raise KeyError(
+            f"No atom information found for residue '{res_name}' in CCD"
+        )
+    component.hetero[:] = res_name not in non_hetero_residues
+    return component

biotite/structure/info/bonds.py CHANGED Viewed

@@ -4,98 +4,23 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["bond_dataset", "bond_order", "bond_type", "bonds_in_residue"]
+__all__ = ["bond_type", "bonds_in_residue"]
-import warnings
-import copy
-from os.path import join, dirname, realpath
-import msgpack
 from ..bonds import BondType
+from .ccd import get_from_ccd
-_intra_bonds = None
+BOND_TYPES = {
+    ("SING", "N") : BondType.SINGLE,
+    ("DOUB", "N") : BondType.DOUBLE,
+    ("TRIP", "N") : BondType.TRIPLE,
+    ("QUAD", "N") : BondType.QUADRUPLE,
+    ("SING", "Y") : BondType.AROMATIC_SINGLE,
+    ("DOUB", "Y") : BondType.AROMATIC_DOUBLE,
+    ("TRIP", "Y") : BondType.AROMATIC_TRIPLE,
+}
-def _init_dataset():
-    """
-    Load the bond dataset from MessagePack file.
-    Since loading the database is computationally expensive,
-    this is only done, when the bond database is actually required.
-    """
-    global _intra_bonds
-    if _intra_bonds is not None:
-        # Database is already initialized
-        return
-    # Bonds are taken from
-    # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
-    # (2019/01/27)
-    _info_dir = dirname(realpath(__file__))
-    with open(join(_info_dir, "intra_bonds.msgpack"), "rb") as file:
-        _intra_bonds= msgpack.unpack(
-            file, use_list=False, raw=False, strict_map_key=False
-        )
-def bond_dataset():
-    """
-    Get a copy of the complete bond dataset extracted from the chemical
-    components dictionary.
-    This dataset does only contain intra-residue bonds.
-    Returns
-    -------
-    bonds : dict (str -> dict ((str, str) -> int))
-        The bonds as nested dictionary.
-        It maps residue names (up to 3-letters, upper case) to
-        inner dictionaries.
-        Each of these dictionary contains the bond information for the
-        given residue.
-        Specifically, it uses a set of two atom names, that are bonded,
-        as keys and the respective :class:`BondType`
-        (represented by an integer) as values.
-    """
-    _init_dataset()
-    return copy.copy(_intra_bonds)
-def bond_order(res_name, atom_name1, atom_name2):
-    """
-    Get the bond order for two atoms of the same residue, based
-    on the PDB chemical components dictionary.
-    DEPRECATED: Please use :func:`bond_type()` instead.
-    Parameters
-    ----------
-    res_name : str
-        The up to 3-letter name of the residue
-        `atom_name1` and `atom_name2` belong to.
-    atom_name1, atom_name2 : str
-        The names of the two atoms to get the bond order from.
-    Returns
-    -------
-    order : int or None
-        The order of the bond between `atom_name1` and `atom_name2`.
-        If the atoms form no bond, if any of the two atoms does not
-        exist in the context of the residue or if the residue is unknown
-        to the chemical components dictionary, `None` is returned.
-    """
-    warnings.warn("Please use `bond_type()` instead", DeprecationWarning)
-    _init_dataset()
-    btype = bond_type(res_name, atom_name1, atom_name2)
-    if btype is None:
-        return None
-    elif btype == BondType.AROMATIC_SINGLE:
-        return 1
-    elif btype == BondType.AROMATIC_DOUBLE:
-        return 2
-    else:
-        return int(btype)
+_intra_bonds = {}
 def bond_type(res_name, atom_name1, atom_name2):
@@ -110,7 +35,7 @@ def bond_type(res_name, atom_name1, atom_name2):
         `atom_name1` and `atom_name2` belong to.
     atom_name1, atom_name2 : str
         The names of the two atoms to get the bond order from.
     Returns
     -------
     order : BondType or None
@@ -119,7 +44,7 @@ def bond_type(res_name, atom_name1, atom_name2):
         If the atoms form no bond, if any of the two atoms does not
         exist in the context of the residue or if the residue is unknown
         to the chemical components dictionary, `None` is returned.
     Examples
     --------
@@ -132,14 +57,13 @@ def bond_type(res_name, atom_name1, atom_name2):
     >>> print(bond_type("PHE", "FOO", "BAR"))
     None
     """
-    _init_dataset()
-    group_bonds = _intra_bonds.get(res_name.upper())
-    if group_bonds is None:
+    bonds_for_residue = bonds_in_residue(res_name)
+    if bonds_for_residue is None:
         return None
-    # Try both atom aroders
-    bond_type_int = group_bonds.get(
+    # Try both atom orders
+    bond_type_int = bonds_for_residue.get(
         (atom_name1, atom_name2),
-        group_bonds.get((atom_name2, atom_name1))
+        bonds_for_residue.get((atom_name2, atom_name1))
     )
     if bond_type_int is not None:
         return BondType(bond_type_int)
@@ -156,15 +80,22 @@ def bonds_in_residue(res_name):
     ----------
     res_name : str
         The up to 3-letter name of the residue to get the bonds for.
     Returns
     -------
-    bonds : dict (str -> int)
+    bonds : dict ((str, str) -> int)
         A dictionary that maps tuples of two atom names to their
         respective bond types (represented as integer).
-        `None` if the residue is unknown to the
+        Empty, if the residue is unknown to the
         chemical components dictionary.
+    Warnings
+    --------
+    Treat the returned dictionary as immutable.
+    Modifying the dictionary may lead to unexpected behavior.
+    In other functionalities throughout *Biotite* that uses this
+    function.
     Examples
     --------
     >>> bonds = bonds_in_residue("PHE")
@@ -195,5 +126,20 @@ def bonds_in_residue(res_name):
     H2  + N   -> BondType.SINGLE
     HXT + OXT -> BondType.SINGLE
     """
-    _init_dataset()
-    return copy.copy(_intra_bonds.get(res_name.upper()))
+    global _intra_bonds
+    if res_name not in _intra_bonds:
+        chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
+        if chem_comp_bond_dict is None:
+            _intra_bonds[res_name] = {}
+        else:
+            bonds_for_residue = {}
+            for atom1, atom2, order, aromatic_flag in zip(
+                chem_comp_bond_dict["atom_id_1"],
+                chem_comp_bond_dict["atom_id_2"],
+                chem_comp_bond_dict["value_order"],
+                chem_comp_bond_dict["pdbx_aromatic_flag"]
+            ):
+                bond_type = BOND_TYPES[order, aromatic_flag]
+                bonds_for_residue[atom1.item(), atom2.item()] = bond_type
+            _intra_bonds[res_name] = bonds_for_residue
+    return _intra_bonds[res_name]

biotite/structure/info/ccd/README.rst ADDED Viewed

@@ -0,0 +1,8 @@
+These files are based on the
+`Chemical Component Dictionary <https://www.wwpdb.org/data/ccd>`_
+and were created using ``setup_ccd.py``.
+To keep the size of the repository small, the original commit should be
+rewritten, if the formats of the affected files are compatible with the
+original ones.
+The name of the commit is ``Add CCD dataset``.