PyPI - biotite - Versions diffs - 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl - Mend

biotite 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +36 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +5 -18
biotite/application/muscle/app5.py +5 -5
biotite/application/sra/app.py +0 -5
biotite/application/util.py +22 -2
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +9 -3
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +8 -9
biotite/database/uniprot/check.py +22 -17
biotite/database/uniprot/download.py +3 -6
biotite/database/uniprot/query.py +4 -5
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +49 -14
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +26 -26
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +19 -2
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +58 -48
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +284 -57
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +5 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/sequence/profile.py +105 -29
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +136 -8
biotite/sequence/sequence.py +1 -2
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +6 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +109 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +170 -0
biotite/structure/alphabet/unkerasify.py +128 -0
biotite/structure/atoms.py +163 -66
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +79 -25
biotite/structure/box.py +19 -21
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +74 -127
biotite/structure/hbond.py +17 -19
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +24 -15
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -34
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +62 -19
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -22
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +4 -4
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +80 -53
biotite/structure/io/pdb/convert.py +4 -3
biotite/structure/io/pdb/file.py +85 -25
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +36 -36
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +54 -15
biotite/structure/io/pdbx/cif.py +92 -66
biotite/structure/io/pdbx/component.py +15 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +410 -75
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +141 -156
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +69 -11
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +76 -4
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/info/ccd.py CHANGED Viewed

@@ -4,23 +4,23 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["get_ccd", "get_from_ccd"]
+__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
+import functools
+import importlib
+import inspect
+import pkgutil
 from pathlib import Path
 import numpy as np
-CCD_DIR = Path(__file__).parent / "ccd"
-INDEX_COLUMN_NAME = {
+_CCD_FILE = Path(__file__).parent / "components.bcif"
+_SPECIAL_ID_COLUMN_NAMES = {
     "chem_comp": "id",
-    "chem_comp_atom": "comp_id",
-    "chem_comp_bond": "comp_id",
 }
-_ccd_block = None
-# For each category this index gives the start and stop for each residue
-_residue_index = {}
+_DEFAULT_ID_COLUMN_NAME = "comp_id"
+@functools.cache
 def get_ccd():
     """
     Get the internal subset of the PDB
@@ -29,25 +29,68 @@ def get_ccd():
     Returns
     -------
-    ccd : BinaryCIFFile
+    ccd : BinaryCIFBlock
         The CCD.
+        It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
+    Warnings
+    --------
+    Consider the return value as read-only.
+    As other functions cache data from it, changing data may lead to undefined
+    behavior.
     References
     ----------
     .. footbibliography::
     """
     # Avoid circular import
     from biotite.structure.io.pdbx.bcif import BinaryCIFFile
-    global _ccd_block
-    if _ccd_block is None:
-        # Load CCD once and cache it for subsequent calls
-        _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
-    return _ccd_block
+    try:
+        return BinaryCIFFile.read(_CCD_FILE).block
+    except FileNotFoundError:
+        raise RuntimeError(
+            "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
+        )
+def set_ccd_path(ccd_path):
+    """
+    Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
+    This function also clears the cache of functions depending on the CCD to ensure
+    that the new CCD is used.
+    Parameters
+    ----------
+    ccd_path : path-like
+        The path to the custom CCD in BinaryCIF format, prepared with the
+        ``setup_ccd.py`` module.
+    Notes
+    -----
+    This function is intended for advanced users who need to add information for
+    compounds, which are not part of the internal CCD.
+    The reason might be that an updated version already exists upstream or that
+    the user wants to add custom compounds to the CCD.
+    """
+    global _CCD_FILE
+    _CCD_FILE = Path(ccd_path)
+    # Clear caches in all functions in biotite.structure.info
+    info_modules = [
+        importlib.import_module(f"biotite.structure.info.{mod_name}")
+        for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
+    ]
+    for module in info_modules:
+        for _, function in inspect.getmembers(module, callable):
+            if hasattr(function, "cache_clear"):
+                function.cache_clear()
+@functools.cache
 def get_from_ccd(category_name, comp_id, column_name=None):
     """
     Get the rows for the given residue in the given category from the
@@ -67,38 +110,54 @@ def get_from_ccd(category_name, comp_id, column_name=None):
     Returns
     -------
-    value : ndarray or dict or None
-        The array of the given column or all columns as dictionary.
-        ``None`` if the `comp_id` is not found in the category.
+    slice : BinaryCIFCategory or BinaryCIFColumn
+        The category or column (if `column_name` is provided) containing only the rows
+        for the given residue.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
     References
     ----------
     .. footbibliography::
     """
-    global _residue_index
-    ccd = get_ccd()
-    category = ccd[category_name]
-    if category_name not in _residue_index:
-        _residue_index[category_name] = _index_residues(
-            category[INDEX_COLUMN_NAME[category_name]].as_array()
-        )
     try:
-        start, stop = _residue_index[category_name][comp_id]
+        start, stop = _residue_index(category_name)[comp_id]
     except KeyError:
         return None
+    category = get_ccd()[category_name]
     if column_name is None:
-        return {
-            col_name: category[col_name].as_array()[start:stop]
-            for col_name in category.keys()
-        }
+        return _filter_category(category, slice(start, stop))
     else:
-        return category[column_name].as_array()[start:stop]
+        return _filter_column(category[column_name], slice(start, stop))
-def _index_residues(id_column):
+@functools.cache
+def _residue_index(category_name):
+    """
+    Get the start and stop index for each component name in the given
+    CCD category.
+    Parameters
+    ----------
+    category_name : str
+        The category to determine start and stop indices for each component in.
+    Returns
+    -------
+    index : dict (str -> (int, int))
+        The index maps each present component name to the corresponding
+        start and exclusive stop index in `id_column`.
+    """
+    category = get_ccd()[category_name]
+    id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
+        category_name, _DEFAULT_ID_COLUMN_NAME
+    )
+    id_column = category[id_column_name].as_array()
     residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
     # The final start is the exclusive stop of last residue
     residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
@@ -107,3 +166,35 @@ def _index_residues(id_column):
         comp_id = id_column[residue_starts[i]].item()
         index[comp_id] = (residue_starts[i], residue_starts[i + 1])
     return index
+def _filter_category(category, index):
+    """
+    Reduce the category to the values for the given index.∂
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
+    return BinaryCIFCategory(
+        {key: _filter_column(column, index) for key, column in category.items()}
+    )
+def _filter_column(column, index):
+    """
+    Reduce the column to the values for the given index.
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
+    from biotite.structure.io.pdbx.component import MaskValue
+    data_array = column.data.array[index]
+    mask_array = column.mask.array[index] if column.mask is not None else None
+    return BinaryCIFColumn(
+        BinaryCIFData(data_array),
+        (
+            BinaryCIFData(mask_array)
+            if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
+            else None
+        ),
+    )

biotite/structure/info/{ccd/components.bcif → components.bcif} RENAMED Viewed

Binary file

biotite/structure/info/groups.py CHANGED Viewed

@@ -6,14 +6,45 @@ __name__ = "biotite.structure.info"
 __author__ = "Tom David Müller, Patrick Kunzmann"
 __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
-from pathlib import Path
-CCD_DIR = Path(__file__).parent / "ccd"
-group_lists = {}
+import functools
+import numpy as np
+from biotite.structure.info.ccd import get_ccd
+_AMINO_ACID_TYPES = [
+    "D-beta-peptide, C-gamma linking",
+    "D-gamma-peptide, C-delta linking",
+    "D-peptide COOH carboxy terminus",
+    "D-peptide NH3 amino terminus",
+    "D-peptide linking",
+    "L-beta-peptide, C-gamma linking",
+    "L-gamma-peptide, C-delta linking",
+    "L-peptide COOH carboxy terminus",
+    "L-peptide NH3 amino terminus",
+    "L-peptide linking",
+    "peptide linking",
+]
+_NUCLEOTIDE_TYPES = [
+    "DNA OH 3 prime terminus",
+    "DNA OH 5 prime terminus",
+    "DNA linking",
+    "L-DNA linking",
+    "L-RNA linking",
+    "RNA OH 3 prime terminus",
+    "RNA OH 5 prime terminus",
+    "RNA linking",
+]
+_CARBOHYDRATE_TYPES = [
+    "D-saccharide",
+    "D-saccharide, alpha linking",
+    "D-saccharide, beta linking",
+    "L-saccharide",
+    "L-saccharide, alpha linking",
+    "L-saccharide, beta linking",
+    "saccharide",
+]
+@functools.cache
 def amino_acid_names():
     """
     Get a tuple of amino acid three-letter codes according to the
@@ -30,11 +61,11 @@ def amino_acid_names():
     ----------
     .. footbibliography::
     """
-    return _get_group_members("amino_acids")
+    return _get_group_members(_AMINO_ACID_TYPES)
+@functools.cache
 def nucleotide_names():
     """
     Get a tuple of nucleotide three-letter codes according to the
@@ -51,11 +82,11 @@ def nucleotide_names():
     ----------
     .. footbibliography::
     """
-    return _get_group_members("nucleotides")
+    return _get_group_members(_NUCLEOTIDE_TYPES)
+@functools.cache
 def carbohydrate_names():
     """
     Get a tuple of carbohydrate three-letter codes according to the
@@ -72,14 +103,26 @@ def carbohydrate_names():
     ----------
     .. footbibliography::
+    """
+    return _get_group_members(_CARBOHYDRATE_TYPES)
+def _get_group_members(match_types):
     """
-    return _get_group_members("carbohydrates")
+    Identify component IDs that matches a given component *type* from the CCD.
+    Parameters
+    ----------
+    match_types : list of str
+        The component types to extract.
-def _get_group_members(group_name):
-    global group_lists
-    if group_name not in group_lists:
-        with open(CCD_DIR / f"{group_name}.txt", "r") as file:
-            group_lists[group_name] = tuple(file.read().split())
-    return group_lists[group_name]
+    Returns
+    -------
+    comp_ids : list of str
+        The extracted component IDs.
+    """
+    category = get_ccd()["chem_comp"]
+    comp_ids = category["id"].as_array()
+    types = category["type"].as_array()
+    # Ignore case
+    return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()

biotite/structure/info/masses.py CHANGED Viewed

@@ -95,15 +95,11 @@ def mass(item, is_residue=None):
         if is_residue is None:
             result_mass = _atom_masses.get(item.upper())
             if result_mass is None:
-                result_mass = get_from_ccd(
-                    "chem_comp", item.upper(), "formula_weight"
-                ).item()
+                result_mass = _mass_for_residue(item)
         elif not is_residue:
             result_mass = _atom_masses.get(item.upper())
         else:
-            result_mass = get_from_ccd(
-                "chem_comp", item.upper(), "formula_weight"
-            ).item()
+            result_mass = _mass_for_residue(item)
     elif isinstance(item, Atom):
         result_mass = mass(item.element, is_residue=False)
@@ -116,3 +112,10 @@ def mass(item, is_residue=None):
     if result_mass is None:
         raise KeyError(f"{item} is not known")
     return result_mass
+def _mass_for_residue(res_name):
+    column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
+    if column is None:
+        raise KeyError(f"Residue '{res_name}' is not known")
+    return column.as_item()

biotite/structure/info/misc.py CHANGED Viewed

@@ -11,19 +11,13 @@ from biotite.structure.info.ccd import get_ccd, get_from_ccd
 def all_residues():
     """
-    Get a list of all residues/compound names in the
-    PDB chemical components dictionary.
+    Get a list of all residues/compound names in the PDB
+    *Chemical Component Dictionary* (CCD).
     Returns
     -------
     residues : list of str
-        A list of all available The up to 3-letter residue names.
-    Examples
-    --------
-    >>> print(all_residues()[1000 : 1010])
-    ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
+        A list of all available residue names.
     """
     return get_ccd()["chem_comp"]["id"].as_array().tolist()
@@ -51,10 +45,10 @@ def full_name(res_name):
     >>> print(full_name("MAN"))
     alpha-D-mannopyranose
     """
-    array = get_from_ccd("chem_comp", res_name.upper(), "name")
-    if array is None:
+    column = get_from_ccd("chem_comp", res_name.upper(), "name")
+    if column is None:
         return None
-    return array.item()
+    return column.as_item()
 def link_type(res_name):
@@ -84,10 +78,10 @@ def link_type(res_name):
     >>> print(link_type("HOH"))
     NON-POLYMER
     """
-    array = get_from_ccd("chem_comp", res_name.upper(), "type")
-    if array is None:
+    column = get_from_ccd("chem_comp", res_name.upper(), "type")
+    if column is None:
         return None
-    return array.item()
+    return column.as_item()
 def one_letter_code(res_name):
@@ -107,7 +101,7 @@ def one_letter_code(res_name):
     -------
     one_letter_code : str or None
         The one-letter code.
-        None if the compound is not present in the CCD or if no
+        ``None`` if the compound is not present in the CCD or if no
         one-letter code is defined for this compound.
     Examples
@@ -133,12 +127,11 @@ def one_letter_code(res_name):
     alpha-D-mannopyranose
     >>> print(one_letter_code("MAN"))
     None
     """
-    array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
-    if array is None:
+    column = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
+    if column is None:
         return None
-    item = array.item()
-    if item == "":
+    if column.mask is not None:
+        # Value is masked, i.e. inapplicable or missing
         return None
-    return item
+    return column.as_item()

biotite/structure/info/radii.py CHANGED Viewed

@@ -26,37 +26,106 @@ _PROTOR_RADII = {
     ("S",  1, 0) : 1.77,
     ("S",  2, 0) : 1.77, # Not official, added for completeness (MET)
     ("S",  2, 1) : 1.77,
-    ("F",  1, 0) : 1.47, # Taken from _SINGLE_RADII
-    ("CL", 1, 0) : 1.75, # Taken from _SINGLE_RADII
-    ("BR", 1, 0) : 1.85, # Taken from _SINGLE_RADII
+    ("F",  1, 0) : 1.47, # Taken from _SINGLE_ATOM_VDW_RADII
+    ("CL", 1, 0) : 1.75, # Taken from _SINGLE_ATOM_VDW_RADII
+    ("BR", 1, 0) : 1.85, # Taken from _SINGLE_ATOM_VDW_RADII
     ("I",  1, 0) : 1.98, # Taken from _SINGLE_RADII
 }
-_SINGLE_RADII = {
-    "H":  1.20,
+_SINGLE_ATOM_VDW_RADII = {
+    # Main group
+    # Row 1 (Period 1)
+    "H":  1.10,
     "HE": 1.40,
+    # Row 2 (Period 2)
+    "LI": 1.81,
+    "BE": 1.53,
+    "B":  1.92,
     "C":  1.70,
     "N":  1.55,
     "O":  1.52,
     "F":  1.47,
     "NE": 1.54,
+    # Row 3 (Period 3)
+    "NA": 2.27,
+    "MG": 1.73,
+    "AL": 1.84,
     "SI": 2.10,
     "P":  1.80,
     "S":  1.80,
     "CL": 1.75,
     "AR": 1.88,
+    # Row 4 (Period 4)
+    "K":  2.75,
+    "CA": 2.31,
+    "GA": 1.87,
+    "GE": 2.11,
     "AS": 1.85,
     "SE": 1.90,
-    "BR": 1.85,
+    "BR": 1.83,
     "KR": 2.02,
+    # Row 5 (Period 5)
+    "RB": 3.03,
+    "SR": 2.49,
+    "IN": 1.93,
+    "SN": 2.17,
+    "SB": 2.06,
     "TE": 2.06,
     "I":  1.98,
     "XE": 2.16,
+    # Row 6 (Period 6)
+    "CS": 3.43,
+    "BA": 2.68,
+    "TL": 1.96,
+    "PB": 2.02,
+    "BI": 2.07,
+    "PO": 1.97,
+    "AT": 2.02,
+    "RN": 2.20,
+    # Row 7 (Period 7)
+    "FR": 3.48,
+    "RA": 2.83,
+    # Transition metals (relevant ones only)
+    # Row 1
+    "FE": 2.05,
+    "CU": 2.00,
+    "ZN": 2.10,
+    "MN": 2.05,
+    "CO": 2.00,
+    "NI": 2.00,
+    # Row 2
+    'MO': 2.10,
+    'RU': 2.05,
+    # Row 3
+    'W': 2.10,
+    'PT': 2.05,
+    'AU': 2.10,
 }
+"""
+Van der Waals radii for main group and transition elements.
+Main group:
+Source: https://pubs.acs.org/doi/10.1021/jp8111556, Table 12 (Mantina et al. 2009)
+Transition metals:
+Source: RDKit, 2024.9.4 Release
+https://github.com/rdkit/rdkit/blob/af6347963f25cfe8fe4db0638410b2f3a8e8bd89/Code/GraphMol/atomic_data.cpp#L51
+Where available, these values were cross-checked vs the CRC Handbook of
+Chemistry and Physics (105th edition) and verified that they are closely
+in line (barring very minor discrepancies, usually < 0.05 Å).
+We cannot use the CRC values directly as they are not permissively licensed.
+"""
 # fmt: on
 # A dictionary that caches radii for each residue
@@ -65,16 +134,15 @@ _protor_radii = {}
 def vdw_radius_protor(res_name, atom_name):
     """
-    Estimate the Van-der-Waals radius of an non-hydrogen atom,
+    Estimate the Van-der-Waals radius of a heavy atom,
     that includes the radius added by potential bonded hydrogen atoms.
     The respective radii are taken from the ProtOr dataset.
     :footcite:`Tsai1999`
     This is especially useful for macromolecular structures where no
     hydrogen atoms are resolved, e.g. crystal structures.
-    The valency of the non-hydrogen atom and the amount of normally
-    bonded hydrogen atoms is taken from the chemical compound dictionary
-    dataset.
+    The valency of the heavy atom and the amount of normally
+    bonded hydrogen atoms is taken from the *Chemical Component Dictionary*.
     Parameters
     ----------
@@ -86,12 +154,13 @@ def vdw_radius_protor(res_name, atom_name):
     Returns
     -------
-    The Van-der-Waals radius of the given atom.
-    If the radius cannot be estimated for the atom, `None` is returned.
+    radius : float
+        The Van-der-Waals radius of the given atom.
+        If the radius cannot be estimated for the atom, `None` is returned.
-    See also
+    See Also
     --------
-    vdw_radius_single
+    vdw_radius_single : *Van-der-Waals* radii for structures with annotated hydrogen atoms.
     References
     ----------
@@ -114,7 +183,7 @@ def vdw_radius_protor(res_name, atom_name):
         # Use cached radii for the residue, if already calculated
         if atom_name not in _protor_radii[res_name]:
             raise KeyError(
-                f"Residue '{res_name}' does not contain an atom named " f"'{atom_name}'"
+                f"Residue '{res_name}' does not contain an atom named '{atom_name}'"
             )
         return _protor_radii[res_name].get(atom_name)
     else:
@@ -166,8 +235,8 @@ def _calculate_protor_radii(res_name):
 def vdw_radius_single(element):
     """
-    Get the Van-der-Waals radius of an atom from the given element.
-    :footcite:`Bondi1964`
+    Get the *Van-der-Waals* radius of an atom from the given element.
+    :footcite:`Mantina2009`
     Parameters
     ----------
@@ -176,12 +245,13 @@ def vdw_radius_single(element):
     Returns
     -------
-    The Van-der-Waals radius of the atom.
-    If the radius is unknown for the element, `None` is returned.
+    radius : float
+        The Van-der-Waals radius of the atom.
+        If the radius is unknown for the element, `None` is returned.
-    See also
+    See Also
     --------
-    vdw_radius_protor
+    vdw_radius_protor : *Van-der-Waals* radii for structures without annotated hydrogen atoms.
     References
     ----------
@@ -194,4 +264,4 @@ def vdw_radius_single(element):
     >>> print(vdw_radius_single("C"))
     1.7
     """
-    return _SINGLE_RADII.get(element.upper())
+    return _SINGLE_ATOM_VDW_RADII.get(element.upper())

biotite/structure/info/standardize.py CHANGED Viewed

@@ -121,16 +121,16 @@ def standardize_order(atoms):
         stop = starts[i + 1]
         res_name = atoms.res_name[start]
-        standard_atom_names = get_from_ccd("chem_comp_atom", res_name, "atom_id")
-        if standard_atom_names is None:
+        chem_comp_atom = get_from_ccd("chem_comp_atom", res_name, "atom_id")
+        if chem_comp_atom is None:
             # If the residue is not in the CCD, keep the current order
             warnings.warn(
-                f"Residue '{res_name}' is not in the CCD, "
-                f"keeping current atom order"
+                f"Residue '{res_name}' is not in the CCD, keeping current atom order"
             )
             reordered_indices[start:stop] = np.arange(start, stop)
             continue
+        standard_atom_names = chem_comp_atom.as_array()
         reordered_indices[start:stop] = (
             _reorder(atoms.atom_name[start:stop], standard_atom_names) + start
         )

biotite/structure/integrity.py CHANGED Viewed

@@ -47,7 +47,7 @@ def check_atom_id_continuity(array):
     Returns
     -------
     discontinuity : ndarray, dtype=int
-        Contains the indices of atoms after a discontinuity
+        Contains the indices of atoms after a discontinuity.
     """
     ids = array.atom_id
     return _check_continuity(ids)
@@ -69,7 +69,7 @@ def check_res_id_continuity(array):
     Returns
     -------
     discontinuity : ndarray, dtype=int
-        Contains the indices of atoms after a discontinuity
+        Contains the indices of atoms after a discontinuity.
     """
     ids = array.res_id
     return _check_continuity(ids)
@@ -96,10 +96,8 @@ def check_linear_continuity(array, min_len=1.2, max_len=1.8):
     See Also
     --------
-    biotite.structure.filter.filter_linear_bond_continuity :
-        A function to filter for atoms preserving the continuity (used here).
-    biotite.structure.bonds.BondList :
-        A class that doesn't depend on the atoms' order to identify bonds.
+    filter_linear_bond_continuity : A function to filter for atoms preserving the continuity (used here).
+    BondList : A class that doesn't depend on the atoms' order to identify bonds.
     """
     con_mask = filter_linear_bond_continuity(array, min_len, max_len)
     # The continuity mask `con_mask` points to atoms for which the next atom is continuous.