PyPI - biotite - Versions diffs - 1.1.0__cp313-cp313-macosx_10_13_x86_64.whl - Mend

biotite 1.1.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show

biotite/__init__.py +18 -0
biotite/application/__init__.py +69 -0
biotite/application/application.py +276 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +500 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +92 -0
biotite/application/blast/webapp.py +428 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +223 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +159 -0
biotite/application/localapp.py +342 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +116 -0
biotite/application/msaapp.py +363 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +227 -0
biotite/application/muscle/app5.py +163 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +452 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +199 -0
biotite/application/util.py +57 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +310 -0
biotite/application/viennarna/rnafold.py +254 -0
biotite/application/viennarna/rnaplot.py +206 -0
biotite/application/viennarna/util.py +77 -0
biotite/application/webapp.py +76 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +60 -0
biotite/database/entrez/dbnames.py +91 -0
biotite/database/entrez/download.py +229 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +262 -0
biotite/database/error.py +16 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +258 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +830 -0
biotite/database/pubchem/throttle.py +98 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +159 -0
biotite/database/rcsb/query.py +964 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +40 -0
biotite/database/uniprot/download.py +129 -0
biotite/database/uniprot/query.py +293 -0
biotite/file.py +232 -0
biotite/sequence/__init__.py +84 -0
biotite/sequence/align/__init__.py +203 -0
biotite/sequence/align/alignment.py +680 -0
biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +71 -0
biotite/sequence/align/cigar.py +425 -0
biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +595 -0
biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3411 -0
biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +622 -0
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +313 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
biotite/sequence/align/selector.pyx +954 -0
biotite/sequence/align/statistics.py +264 -0
biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +555 -0
biotite/sequence/annotation.py +830 -0
biotite/sequence/codec.cpython-313-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +477 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1115 -0
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +170 -0
biotite/sequence/graphics/dendrogram.py +229 -0
biotite/sequence/graphics/features.py +544 -0
biotite/sequence/graphics/logo.py +104 -0
biotite/sequence/graphics/plasmid.py +712 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +284 -0
biotite/sequence/io/fasta/file.py +265 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +117 -0
biotite/sequence/io/fastq/file.py +507 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +269 -0
biotite/sequence/io/genbank/file.py +573 -0
biotite/sequence/io/genbank/metadata.py +336 -0
biotite/sequence/io/genbank/sequence.py +171 -0
biotite/sequence/io/general.py +201 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +128 -0
biotite/sequence/io/gff/file.py +450 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +567 -0
biotite/sequence/search.py +118 -0
biotite/sequence/seqtypes.py +713 -0
biotite/sequence/sequence.py +374 -0
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +133 -0
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +110 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +171 -0
biotite/structure/alphabet/unkerasify.py +122 -0
biotite/structure/atoms.py +1554 -0
biotite/structure/basepairs.py +1404 -0
biotite/structure/bonds.cpython-313-darwin.so +0 -0
biotite/structure/bonds.pyx +1972 -0
biotite/structure/box.py +588 -0
biotite/structure/celllist.cpython-313-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +314 -0
biotite/structure/charges.cpython-313-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +109 -0
biotite/structure/dotbracket.py +214 -0
biotite/structure/error.py +39 -0
biotite/structure/filter.py +590 -0
biotite/structure/geometry.py +655 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +243 -0
biotite/structure/graphics/rna.py +295 -0
biotite/structure/hbond.py +428 -0
biotite/structure/info/__init__.py +24 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +81 -0
biotite/structure/info/bonds.py +149 -0
biotite/structure/info/ccd.py +202 -0
biotite/structure/info/components.bcif +0 -0
biotite/structure/info/groups.py +131 -0
biotite/structure/info/masses.py +121 -0
biotite/structure/info/misc.py +138 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +186 -0
biotite/structure/integrity.py +215 -0
biotite/structure/io/__init__.py +29 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +67 -0
biotite/structure/io/general.py +243 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +344 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +112 -0
biotite/structure/io/mol/ctab.py +415 -0
biotite/structure/io/mol/header.py +120 -0
biotite/structure/io/mol/mol.py +149 -0
biotite/structure/io/mol/sdf.py +914 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +64 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +307 -0
biotite/structure/io/pdb/file.py +1290 -0
biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +113 -0
biotite/structure/io/pdbqt/file.py +688 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +656 -0
biotite/structure/io/pdbx/cif.py +1075 -0
biotite/structure/io/pdbx/component.py +245 -0
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +1745 -0
biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +1031 -0
biotite/structure/io/trajfile.py +693 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +43 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +43 -0
biotite/structure/mechanics.py +73 -0
biotite/structure/molecules.py +352 -0
biotite/structure/pseudoknots.py +628 -0
biotite/structure/rdf.py +245 -0
biotite/structure/repair.py +304 -0
biotite/structure/residues.py +572 -0
biotite/structure/sasa.cpython-313-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/segments.py +178 -0
biotite/structure/sequence.py +111 -0
biotite/structure/sse.py +308 -0
biotite/structure/superimpose.py +689 -0
biotite/structure/transform.py +530 -0
biotite/structure/util.py +168 -0
biotite/version.py +16 -0
biotite/visualize.py +265 -0
biotite-1.1.0.dist-info/METADATA +190 -0
biotite-1.1.0.dist-info/RECORD +332 -0
biotite-1.1.0.dist-info/WHEEL +4 -0
biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0

biotite/structure/info/bonds.py ADDED Viewed

@@ -0,0 +1,149 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["bond_type", "bonds_in_residue"]
+import functools
+from biotite.structure.bonds import BondType
+from biotite.structure.info.ccd import get_from_ccd
+BOND_TYPES = {
+    ("SING", "N"): BondType.SINGLE,
+    ("DOUB", "N"): BondType.DOUBLE,
+    ("TRIP", "N"): BondType.TRIPLE,
+    ("QUAD", "N"): BondType.QUADRUPLE,
+    ("SING", "Y"): BondType.AROMATIC_SINGLE,
+    ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
+    ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
+}
+_intra_bonds = {}
+def bond_type(res_name, atom_name1, atom_name2):
+    """
+    Get the :class:`BondType` for two atoms of the same residue, based
+    on the PDB chemical components dictionary.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter name of the residue
+        `atom_name1` and `atom_name2` belong to.
+    atom_name1, atom_name2 : str
+        The names of the two atoms to get the bond order from.
+    Returns
+    -------
+    order : BondType or None
+        The :class:`BondType` of the bond between `atom_name1` and
+        `atom_name2`.
+        If the atoms form no bond, if any of the two atoms does not
+        exist in the context of the residue or if the residue is unknown
+        to the chemical components dictionary, `None` is returned.
+    Examples
+    --------
+    >>> print(repr(bond_type("PHE", "CA", "CB")))
+    <BondType.SINGLE: 1>
+    >>> print(repr(bond_type("PHE", "CG", "CD1")))
+    <BondType.AROMATIC_DOUBLE: 6>
+    >>> print(repr(bond_type("PHE", "CA", "CG")))
+    None
+    >>> print(repr(bond_type("PHE", "FOO", "BAR")))
+    None
+    """
+    bonds_for_residue = bonds_in_residue(res_name)
+    if bonds_for_residue is None:
+        return None
+    # Try both atom orders
+    bond_type_int = bonds_for_residue.get(
+        (atom_name1, atom_name2), bonds_for_residue.get((atom_name2, atom_name1))
+    )
+    if bond_type_int is not None:
+        return BondType(bond_type_int)
+    else:
+        return None
+@functools.cache
+def bonds_in_residue(res_name):
+    """
+    Get a dictionary containing all atoms inside a given residue
+    that form a bond.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter name of the residue to get the bonds for.
+    Returns
+    -------
+    bonds : dict ((str, str) -> int)
+        A dictionary that maps tuples of two atom names to their
+        respective bond types (represented as integer).
+        Empty, if the residue is unknown to the
+        chemical components dictionary.
+    Warnings
+    --------
+    Treat the returned dictionary as immutable.
+    Modifying the dictionary may lead to unexpected behavior.
+    In other functionalities throughout *Biotite* that uses this
+    function.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
+    Examples
+    --------
+    >>> bonds = bonds_in_residue("PHE")
+    >>> for atoms, bond_type_int in sorted(bonds.items()):
+    ...     atom1, atom2 = sorted(atoms)
+    ...     print(f"{atom1:3} + {atom2:3} -> {BondType(bond_type_int).name}")
+    C   + O   -> DOUBLE
+    C   + OXT -> SINGLE
+    C   + CA  -> SINGLE
+    CA  + CB  -> SINGLE
+    CA  + HA  -> SINGLE
+    CB  + CG  -> SINGLE
+    CB  + HB2 -> SINGLE
+    CB  + HB3 -> SINGLE
+    CD1 + CE1 -> AROMATIC_SINGLE
+    CD1 + HD1 -> SINGLE
+    CD2 + CE2 -> AROMATIC_DOUBLE
+    CD2 + HD2 -> SINGLE
+    CE1 + CZ  -> AROMATIC_DOUBLE
+    CE1 + HE1 -> SINGLE
+    CE2 + CZ  -> AROMATIC_SINGLE
+    CE2 + HE2 -> SINGLE
+    CD1 + CG  -> AROMATIC_DOUBLE
+    CD2 + CG  -> AROMATIC_SINGLE
+    CZ  + HZ  -> SINGLE
+    CA  + N   -> SINGLE
+    H   + N   -> SINGLE
+    H2  + N   -> SINGLE
+    HXT + OXT -> SINGLE
+    """
+    global _intra_bonds
+    if res_name not in _intra_bonds:
+        chem_comp_bond = get_from_ccd("chem_comp_bond", res_name)
+        if chem_comp_bond is None:
+            _intra_bonds[res_name] = {}
+        else:
+            bonds_for_residue = {}
+            for atom1, atom2, order, aromatic_flag in zip(
+                chem_comp_bond["atom_id_1"].as_array(),
+                chem_comp_bond["atom_id_2"].as_array(),
+                chem_comp_bond["value_order"].as_array(),
+                chem_comp_bond["pdbx_aromatic_flag"].as_array(),
+            ):
+                bond_type = BOND_TYPES[order, aromatic_flag]
+                bonds_for_residue[atom1.item(), atom2.item()] = bond_type
+            _intra_bonds[res_name] = bonds_for_residue
+    return _intra_bonds[res_name]

biotite/structure/info/ccd.py ADDED Viewed

@@ -0,0 +1,202 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
+import functools
+import importlib
+import inspect
+import pkgutil
+from pathlib import Path
+import numpy as np
+_CCD_FILE = Path(__file__).parent / "components.bcif"
+_SPECIAL_ID_COLUMN_NAMES = {
+    "chem_comp": "id",
+}
+_DEFAULT_ID_COLUMN_NAME = "comp_id"
+@functools.cache
+def get_ccd():
+    """
+    Get the internal subset of the PDB
+    *Chemical Component Dictionary* (CCD).
+    :footcite:`Westbrook2015`
+    Returns
+    -------
+    ccd : BinaryCIFBlock
+        The CCD.
+        It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
+    Warnings
+    --------
+    Consider the return value as read-only.
+    As other functions cache data from it, changing data may lead to undefined
+    behavior.
+    References
+    ----------
+    .. footbibliography::
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFFile
+    try:
+        return BinaryCIFFile.read(_CCD_FILE).block
+    except FileNotFoundError:
+        raise RuntimeError(
+            "Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
+        )
+def set_ccd_path(ccd_path):
+    """
+    Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
+    This function also clears the cache of functions depending on the CCD to ensure
+    that the new CCD is used.
+    Parameters
+    ----------
+    ccd_path : path-like
+        The path to the custom CCD in BinaryCIF format, prepared with the
+        ``setup_ccd.py`` module.
+    Notes
+    -----
+    This function is intended for advanced users who need to add information for
+    compounds, which are not part of the internal CCD.
+    The reason might be that an updated version already exists upstream or that
+    the user wants to add custom compounds to the CCD.
+    """
+    global _CCD_FILE
+    _CCD_FILE = Path(ccd_path)
+    # Clear caches in all functions in biotite.structure.info
+    info_modules = [
+        importlib.import_module(f"biotite.structure.info.{mod_name}")
+        for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
+    ]
+    for module in info_modules:
+        for _, function in inspect.getmembers(module, callable):
+            if hasattr(function, "cache_clear"):
+                function.cache_clear()
+@functools.cache
+def get_from_ccd(category_name, comp_id, column_name=None):
+    """
+    Get the rows for the given residue in the given category from the
+    internal subset of the PDB *Chemical Component Dictionary* (CCD).
+    :footcite:`Westbrook2015`
+    Parameters
+    ----------
+    category_name : str
+        The category in the CCD.
+    comp_id : str
+        The residue identifier, i.e. the ``res_name``.
+    column_name : str, optional
+        The name of the column to be retrieved.
+        If None, all columns are returned as dictionary.
+        By default None.
+    Returns
+    -------
+    slice : BinaryCIFCategory or BinaryCIFColumn
+        The category or column (if `column_name` is provided) containing only the rows
+        for the given residue.
+    Notes
+    -----
+    The returned values are cached for faster access in subsequent calls.
+    References
+    ----------
+    .. footbibliography::
+    """
+    try:
+        start, stop = _residue_index(category_name)[comp_id]
+    except KeyError:
+        return None
+    category = get_ccd()[category_name]
+    if column_name is None:
+        return _filter_category(category, slice(start, stop))
+    else:
+        return _filter_column(category[column_name], slice(start, stop))
+@functools.cache
+def _residue_index(category_name):
+    """
+    Get the start and stop index for each component name in the given
+    CCD category.
+    Parameters
+    ----------
+    category_name : str
+        The category to determine start and stop indices for each component in.
+    Returns
+    -------
+    index : dict (str -> (int, int))
+        The index maps each present component name to the corresponding
+        start and exclusive stop index in `id_column`.
+    """
+    category = get_ccd()[category_name]
+    id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
+        category_name, _DEFAULT_ID_COLUMN_NAME
+    )
+    id_column = category[id_column_name].as_array()
+    residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
+    # The final start is the exclusive stop of last residue
+    residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
+    index = {}
+    for i in range(len(residue_starts) - 1):
+        comp_id = id_column[residue_starts[i]].item()
+        index[comp_id] = (residue_starts[i], residue_starts[i + 1])
+    return index
+def _filter_category(category, index):
+    """
+    Reduce the category to the values for the given index.∂
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
+    return BinaryCIFCategory(
+        {key: _filter_column(column, index) for key, column in category.items()}
+    )
+def _filter_column(column, index):
+    """
+    Reduce the column to the values for the given index.
+    """
+    # Avoid circular import
+    from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
+    from biotite.structure.io.pdbx.component import MaskValue
+    data_array = column.data.array[index]
+    mask_array = column.mask.array[index] if column.mask is not None else None
+    return BinaryCIFColumn(
+        BinaryCIFData(data_array),
+        (
+            BinaryCIFData(mask_array)
+            if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
+            else None
+        ),
+    )

biotite/structure/info/components.bcif ADDED Viewed

Binary file

biotite/structure/info/groups.py ADDED Viewed

@@ -0,0 +1,131 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Tom David Müller, Patrick Kunzmann"
+__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
+import functools
+import numpy as np
+from biotite.structure.info.ccd import get_ccd
+_AMINO_ACID_TYPES = [
+    "D-beta-peptide, C-gamma linking",
+    "D-gamma-peptide, C-delta linking",
+    "D-peptide COOH carboxy terminus",
+    "D-peptide NH3 amino terminus",
+    "D-peptide linking",
+    "L-beta-peptide, C-gamma linking",
+    "L-gamma-peptide, C-delta linking",
+    "L-peptide COOH carboxy terminus",
+    "L-peptide NH3 amino terminus",
+    "L-peptide linking",
+    "peptide linking",
+]
+_NUCLEOTIDE_TYPES = [
+    "DNA OH 3 prime terminus",
+    "DNA OH 5 prime terminus",
+    "DNA linking",
+    "L-DNA linking",
+    "L-RNA linking",
+    "RNA OH 3 prime terminus",
+    "RNA OH 5 prime terminus",
+    "RNA linking",
+]
+_CARBOHYDRATE_TYPES = [
+    "D-saccharide",
+    "D-saccharide, alpha linking",
+    "D-saccharide, beta linking",
+    "L-saccharide",
+    "L-saccharide, alpha linking",
+    "L-saccharide, beta linking",
+    "saccharide",
+]
+@functools.cache
+def amino_acid_names():
+    """
+    Get a tuple of amino acid three-letter codes according to the
+    PDB *Chemical Component Dictionary*.
+    :footcite:`Westbrook2015`
+    Returns
+    -------
+    amino_acid_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        peptide monomers.
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members(_AMINO_ACID_TYPES)
+@functools.cache
+def nucleotide_names():
+    """
+    Get a tuple of nucleotide three-letter codes according to the
+    PDB *Chemical Component Dictionary*.
+    :footcite:`Westbrook2015`
+    Returns
+    -------
+    nucleotide_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        DNA/RNA monomers.
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members(_NUCLEOTIDE_TYPES)
+@functools.cache
+def carbohydrate_names():
+    """
+    Get a tuple of carbohydrate three-letter codes according to the
+    PDB *Chemical Component Dictionary*.
+    :footcite:`Westbrook2015`
+    Returns
+    -------
+    carbohydrate_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        saccharide monomers.
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members(_CARBOHYDRATE_TYPES)
+def _get_group_members(match_types):
+    """
+    Identify component IDs that matches a given component *type* from the CCD.
+    Parameters
+    ----------
+    match_types : list of str
+        The component types to extract.
+    Returns
+    -------
+    comp_ids : list of str
+        The extracted component IDs.
+    """
+    category = get_ccd()["chem_comp"]
+    comp_ids = category["id"].as_array()
+    types = category["type"].as_array()
+    # Ignore case
+    return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()

biotite/structure/info/masses.py ADDED Viewed

@@ -0,0 +1,121 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["mass"]
+import json
+from pathlib import Path
+from biotite.structure.atoms import Atom, AtomArray, AtomArrayStack
+from biotite.structure.info.ccd import get_from_ccd
+# Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
+ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
+_atom_masses = None
+def mass(item, is_residue=None):
+    """
+    Calculate the mass for the given object.
+    :footcite:`Meija2016`
+    If a residue name is given, the mass values refer to the masses of
+    the complete molecule without additional or missing protons.
+    In case of residues in a longer chain, some atoms might be missing
+    from the molecule.
+    For example non-terminal residues in a protein or nucleotide chain
+    miss the mass of a water molecule.
+    Parameters
+    ----------
+    item : str or Atom or AtomArray or AtomArrayStack
+        The atom or molecule to get the mass for.
+        If a string is given, it is interpreted as residue name or
+        chemical element.
+        If an :class:`Atom` is given the mass is taken from its element.
+        If an :class:`AtomArray` or :class:`AtomArrayStack` is given the
+        mass is the sum of the mass of its atoms.
+    is_residue : bool, optional
+        If set to true and a string is given for `item`, the string
+        will be strictly interpreted as residue.
+        If set to false, the string is strictly interpreted as element.
+        By default the string will be interpreted as element at first
+        and secondly as residue name, if the element is unknown.
+    Returns
+    -------
+    mass : float or None
+        The mass of the given object in *u*. None if the mass is unknown.
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> print(mass(atom_array))
+    2170.438
+    >>> first_residue = list(residue_iter(atom_array))[0]
+    >>> print(first_residue)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+        A       1  ASN CA     C        -8.608    3.135   -1.618
+        A       1  ASN C      C        -7.117    2.964   -1.897
+        A       1  ASN O      O        -6.634    1.849   -1.758
+        A       1  ASN CB     C        -9.437    3.396   -2.889
+        A       1  ASN CG     C       -10.915    3.130   -2.611
+        A       1  ASN OD1    O       -11.269    2.700   -1.524
+        A       1  ASN ND2    N       -11.806    3.406   -3.543
+        A       1  ASN H1     H        -8.330    3.957    0.261
+        A       1  ASN H2     H        -8.740    5.068   -0.889
+        A       1  ASN H3     H        -9.877    4.041   -0.293
+        A       1  ASN HA     H        -8.930    2.162   -1.239
+        A       1  ASN HB2    H        -9.310    4.417   -3.193
+        A       1  ASN HB3    H        -9.108    2.719   -3.679
+        A       1  ASN HD21   H       -11.572    3.791   -4.444
+        A       1  ASN HD22   H       -12.757    3.183   -3.294
+    >>> print(mass("ASN"))
+    132.118
+    >>> first_atom = first_residue[0]
+    >>> print(first_atom)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+    >>> print(mass(first_atom))
+    14.007
+    >>> print(mass("N"))
+    14.007
+    """
+    global _atom_masses
+    with open(ATOM_MASSES_FILE, "r") as file:
+        _atom_masses = json.load(file)
+    if isinstance(item, str):
+        if is_residue is None:
+            result_mass = _atom_masses.get(item.upper())
+            if result_mass is None:
+                result_mass = _mass_for_residue(item)
+        elif not is_residue:
+            result_mass = _atom_masses.get(item.upper())
+        else:
+            result_mass = _mass_for_residue(item)
+    elif isinstance(item, Atom):
+        result_mass = mass(item.element, is_residue=False)
+    elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
+        result_mass = sum((mass(element, is_residue=False) for element in item.element))
+    else:
+        raise TypeError(f"Cannot calculate mass for {type(item).__name__} objects")
+    if result_mass is None:
+        raise KeyError(f"{item} is not known")
+    return result_mass
+def _mass_for_residue(res_name):
+    column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
+    if column is None:
+        raise KeyError(f"Residue '{res_name}' is not known")
+    return column.as_item()