PyPI - biotite - Versions diffs - 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl - Mend

biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show

biotite/__init__.py +19 -0
biotite/application/__init__.py +43 -0
biotite/application/application.py +265 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +505 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +83 -0
biotite/application/blast/webapp.py +421 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +238 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +152 -0
biotite/application/localapp.py +306 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +122 -0
biotite/application/msaapp.py +374 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +254 -0
biotite/application/muscle/app5.py +171 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +456 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +222 -0
biotite/application/util.py +59 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +304 -0
biotite/application/viennarna/rnafold.py +269 -0
biotite/application/viennarna/rnaplot.py +187 -0
biotite/application/viennarna/util.py +72 -0
biotite/application/webapp.py +77 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +61 -0
biotite/database/entrez/dbnames.py +89 -0
biotite/database/entrez/download.py +223 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +223 -0
biotite/database/error.py +15 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +260 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +827 -0
biotite/database/pubchem/throttle.py +99 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +167 -0
biotite/database/rcsb/query.py +959 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +32 -0
biotite/database/uniprot/download.py +134 -0
biotite/database/uniprot/query.py +209 -0
biotite/file.py +251 -0
biotite/sequence/__init__.py +73 -0
biotite/sequence/align/__init__.py +49 -0
biotite/sequence/align/alignment.py +658 -0
biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +434 -0
biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +574 -0
biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3400 -0
biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +405 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +305 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
biotite/sequence/align/selector.pyx +956 -0
biotite/sequence/align/statistics.py +265 -0
biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +566 -0
biotite/sequence/annotation.py +829 -0
biotite/sequence/codec.cpython-310-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +466 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1034 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +139 -0
biotite/sequence/graphics/dendrogram.py +184 -0
biotite/sequence/graphics/features.py +510 -0
biotite/sequence/graphics/logo.py +110 -0
biotite/sequence/graphics/plasmid.py +661 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +273 -0
biotite/sequence/io/fasta/file.py +278 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +120 -0
biotite/sequence/io/fastq/file.py +551 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +277 -0
biotite/sequence/io/genbank/file.py +575 -0
biotite/sequence/io/genbank/metadata.py +324 -0
biotite/sequence/io/genbank/sequence.py +172 -0
biotite/sequence/io/general.py +192 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +133 -0
biotite/sequence/io/gff/file.py +434 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +456 -0
biotite/sequence/search.py +116 -0
biotite/sequence/seqtypes.py +556 -0
biotite/sequence/sequence.py +374 -0
biotite/structure/__init__.py +132 -0
biotite/structure/atoms.py +1455 -0
biotite/structure/basepairs.py +1415 -0
biotite/structure/bonds.cpython-310-darwin.so +0 -0
biotite/structure/bonds.pyx +1933 -0
biotite/structure/box.py +592 -0
biotite/structure/celllist.cpython-310-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +298 -0
biotite/structure/charges.cpython-310-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +114 -0
biotite/structure/dotbracket.py +216 -0
biotite/structure/error.py +31 -0
biotite/structure/filter.py +585 -0
biotite/structure/geometry.py +697 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +226 -0
biotite/structure/graphics/rna.py +282 -0
biotite/structure/hbond.py +409 -0
biotite/structure/info/__init__.py +25 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +82 -0
biotite/structure/info/bonds.py +145 -0
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +123 -0
biotite/structure/info/misc.py +144 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +196 -0
biotite/structure/integrity.py +268 -0
biotite/structure/io/__init__.py +30 -0
biotite/structure/io/ctab.py +72 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +65 -0
biotite/structure/io/general.py +257 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +343 -0
biotite/structure/io/mmtf/__init__.py +21 -0
biotite/structure/io/mmtf/assembly.py +214 -0
biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +341 -0
biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +501 -0
biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +152 -0
biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +183 -0
biotite/structure/io/mmtf/file.py +233 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +115 -0
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/mol.py +193 -0
biotite/structure/io/mol/sdf.py +916 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +63 -0
biotite/structure/io/npz/__init__.py +20 -0
biotite/structure/io/npz/file.py +152 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +293 -0
biotite/structure/io/pdb/file.py +1240 -0
biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +107 -0
biotite/structure/io/pdbqt/file.py +640 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +1597 -0
biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +950 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/io/tng/__init__.py +13 -0
biotite/structure/io/tng/file.py +46 -0
biotite/structure/io/trajfile.py +710 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +46 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +46 -0
biotite/structure/mechanics.py +75 -0
biotite/structure/molecules.py +353 -0
biotite/structure/pseudoknots.py +642 -0
biotite/structure/rdf.py +243 -0
biotite/structure/repair.py +253 -0
biotite/structure/residues.py +562 -0
biotite/structure/resutil.py +178 -0
biotite/structure/sasa.cpython-310-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/sequence.py +112 -0
biotite/structure/sse.py +327 -0
biotite/structure/superimpose.py +727 -0
biotite/structure/transform.py +504 -0
biotite/structure/util.py +98 -0
biotite/temp.py +86 -0
biotite/version.py +16 -0
biotite/visualize.py +251 -0
biotite-0.41.1.dist-info/METADATA +187 -0
biotite-0.41.1.dist-info/RECORD +340 -0
biotite-0.41.1.dist-info/WHEEL +4 -0
biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0

biotite/structure/info/ccd.py ADDED Viewed

@@ -0,0 +1,95 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["get_ccd", "get_from_ccd"]
+from pathlib import Path
+import numpy as np
+CCD_DIR = Path(__file__).parent / "ccd"
+INDEX_COLUMN_NAME = {
+    "chem_comp": "id",
+    "chem_comp_atom": "comp_id",
+    "chem_comp_bond": "comp_id",
+}
+_ccd_block = None
+# For each category this index gives the start and stop for each residue
+_residue_index = {}
+def get_ccd():
+    """
+    Get the PDB *Chemical Component Dictionary* (CCD).
+    Returns
+    -------
+    ccd : BinaryCIFFile
+        The CCD.
+    """
+    # Avoid circular import
+    from ..io.pdbx.bcif import BinaryCIFFile
+    global _ccd_block
+    if _ccd_block is None:
+        # Load CCD once and cache it for subsequent calls
+        _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
+    return _ccd_block
+def get_from_ccd(category_name, comp_id, column_name=None):
+    """
+    Get the rows for the given residue in the given category from the
+    PDB *Chemical Component Dictionary* (CCD).
+    Parameters
+    ----------
+    category_name : str
+        The category in the CCD.
+    comp_id : str
+        The residue identifier, i.e. the ``res_name``.
+    column_name : str, optional
+        The name of the column to be retrieved.
+        If None, all columns are returned as dictionary.
+        By default None.
+    Returns
+    -------
+    value : ndarray or dict or None
+        The array of the given column or all columns as dictionary.
+        ``None`` if the `comp_id` is not found in the category.
+    """
+    global _residue_index
+    ccd = get_ccd()
+    category = ccd[category_name]
+    if category_name not in _residue_index:
+        _residue_index[category_name] = _index_residues(
+            category[INDEX_COLUMN_NAME[category_name]].as_array()
+        )
+    try:
+        start, stop = _residue_index[category_name][comp_id]
+    except KeyError:
+        return None
+    if column_name is None:
+        return {
+            col_name: category[col_name].as_array()[start:stop]
+            for col_name in category.keys()
+        }
+    else:
+        return category[column_name].as_array()[start:stop]
+def _index_residues(id_column):
+    residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
+    # The final start is the exclusive stop of last residue
+    residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
+    index = {}
+    for i in range(len(residue_starts)-1):
+        comp_id = id_column[residue_starts[i]].item()
+        index[comp_id] = (residue_starts[i], residue_starts[i+1])
+    return index

biotite/structure/info/groups.py ADDED Viewed

@@ -0,0 +1,90 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Tom David Müller, Patrick Kunzmann"
+__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
+from pathlib import Path
+import copy
+CCD_DIR = Path(__file__).parent / "ccd"
+group_lists = {}
+def amino_acid_names():
+    """
+    Get a tuple of amino acid three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    amino_acid_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        peptide monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("amino_acids")
+def nucleotide_names():
+    """
+    Get a tuple of nucleotide three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    nucleotide_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        DNA/RNA monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("nucleotides")
+def carbohydrate_names():
+    """
+    Get a tuple of carbohydrate three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    carbohydrate_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        saccharide monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("carbohydrates")
+def _get_group_members(group_name):
+    global group_lists
+    if group_name not in group_lists:
+        with open(CCD_DIR / f"{group_name}.txt", "r") as file:
+            group_lists[group_name] = tuple(file.read().split())
+    return group_lists[group_name]

biotite/structure/info/masses.py ADDED Viewed

@@ -0,0 +1,123 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["mass"]
+import json
+from pathlib import Path
+from ..atoms import Atom, AtomArray, AtomArrayStack
+from .ccd import get_from_ccd
+# Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
+ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
+_atom_masses = None
+def mass(item, is_residue=None):
+    """
+    Calculate the mass for the given object.
+    :footcite:`Meija2016`
+    If a residue name is given, the mass values refer to the masses of
+    the complete molecule without additional or missing protons.
+    In case of residues in a longer chain, some atoms might be missing
+    from the molecule.
+    For example non-terminal residues in a protein or nucleotide chain
+    miss the mass of a water molecule.
+    Parameters
+    ----------
+    item : str or Atom or AtomArray or AtomArrayStack
+        The atom or molecule to get the mass for.
+        If a string is given, it is interpreted as residue name or
+        chemical element.
+        If an :class:`Atom` is given the mass is taken from its element.
+        If an :class:`AtomArray` or :class:`AtomArrayStack` is given the
+        mass is the sum of the mass of its atoms.
+    is_residue : bool, optional
+        If set to true and a string is given for `item`, the string
+        will be strictly interpreted as residue.
+        If set to false, the string is strictly interpreted as element.
+        By default the string will be interpreted as element at first
+        and secondly as residue name, if the element is unknown.
+    Returns
+    -------
+    mass : float or None
+        The mass of the given object in *u*. None if the mass is unknown.
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> print(mass(atom_array))
+    2170.438
+    >>> first_residue = list(residue_iter(atom_array))[0]
+    >>> print(first_residue)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+        A       1  ASN CA     C        -8.608    3.135   -1.618
+        A       1  ASN C      C        -7.117    2.964   -1.897
+        A       1  ASN O      O        -6.634    1.849   -1.758
+        A       1  ASN CB     C        -9.437    3.396   -2.889
+        A       1  ASN CG     C       -10.915    3.130   -2.611
+        A       1  ASN OD1    O       -11.269    2.700   -1.524
+        A       1  ASN ND2    N       -11.806    3.406   -3.543
+        A       1  ASN H1     H        -8.330    3.957    0.261
+        A       1  ASN H2     H        -8.740    5.068   -0.889
+        A       1  ASN H3     H        -9.877    4.041   -0.293
+        A       1  ASN HA     H        -8.930    2.162   -1.239
+        A       1  ASN HB2    H        -9.310    4.417   -3.193
+        A       1  ASN HB3    H        -9.108    2.719   -3.679
+        A       1  ASN HD21   H       -11.572    3.791   -4.444
+        A       1  ASN HD22   H       -12.757    3.183   -3.294
+    >>> print(mass("ASN"))
+    132.118
+    >>> first_atom = first_residue[0]
+    >>> print(first_atom)
+        A       1  ASN N      N        -8.901    4.127   -0.555
+    >>> print(mass(first_atom))
+    14.007
+    >>> print(mass("N"))
+    14.007
+    """
+    global _atom_masses
+    with open(ATOM_MASSES_FILE, "r") as file:
+        _atom_masses = json.load(file)
+    if isinstance(item, str):
+        if is_residue is None:
+            result_mass = _atom_masses.get(item.upper())
+            if result_mass is None:
+                result_mass = get_from_ccd(
+                    "chem_comp", item.upper(), "formula_weight"
+                ).item()
+        elif not is_residue:
+            result_mass = _atom_masses.get(item.upper())
+        else:
+            result_mass = get_from_ccd(
+                "chem_comp", item.upper(), "formula_weight"
+            ).item()
+    elif isinstance(item, Atom):
+        result_mass = mass(item.element, is_residue=False)
+    elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
+        result_mass = sum(
+            (mass(element, is_residue=False) for element in item.element)
+        )
+    else:
+        raise TypeError(
+            f"Cannot calculate mass for {type(item).__name__} objects"
+        )
+    if result_mass is None:
+        raise KeyError(f"{item} is not known")
+    return result_mass

biotite/structure/info/misc.py ADDED Viewed

@@ -0,0 +1,144 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
+from .ccd import get_ccd, get_from_ccd
+def all_residues():
+    """
+    Get a list of all residues/compound names in the
+    PDB chemical components dictionary.
+    Returns
+    -------
+    residues : list of str
+        A list of all available The up to 3-letter residue names.
+    Examples
+    --------
+    >>> print(all_residues()[1000 : 1010])
+    ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
+    """
+    return get_ccd()["chem_comp"]["id"].as_array().tolist()
+def full_name(res_name):
+    """
+    Get the full name of a residue/compound from the up to 3-letter
+    residue name, based on the PDB chemical components dictionary.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name.
+    Returns
+    -------
+    name : str or None
+        The full name of the residue.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
+    Examples
+    --------
+    >>> print(full_name("MAN"))
+    alpha-D-mannopyranose
+    """
+    array = get_from_ccd("chem_comp", res_name.upper(), "name")
+    if array is None:
+        return None
+    return array.item()
+def link_type(res_name):
+    """
+    Get the linking type of a residue/compound,
+    based on the PDB chemical components dictionary.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name.
+    Returns
+    -------
+    link_type : str or None
+        The link type.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
+    Examples
+    --------
+    >>> print(link_type("MAN"))
+    D-saccharide, alpha linking
+    >>> print(link_type("TRP"))
+    L-PEPTIDE LINKING
+    >>> print(link_type("HOH"))
+    NON-POLYMER
+    """
+    array = get_from_ccd("chem_comp", res_name.upper(), "type")
+    if array is None:
+        return None
+    return array.item()
+def one_letter_code(res_name):
+    """
+    Get the one-letter code of a residue/compound,
+    based on the PDB chemical components dictionary.
+    The one-letter code is only defined for amino acids and nucleotides
+    and for compounds that are structurally similar to them.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name.
+    Returns
+    -------
+    one_letter_code : str or None
+        The one-letter code.
+        None if the compound is not present in the CCD or if no
+        one-letter code is defined for this compound.
+    Examples
+    --------
+    Get the one letter code for an amino acid (or a nucleotide).
+    >>> print(full_name("ALA"))
+    ALANINE
+    >>> print(one_letter_code("ALA"))
+    A
+    For similar compounds, the one-letter code is also defined.
+    >>> print(full_name("DAL"))
+    D-ALANINE
+    >>> print(one_letter_code("DAL"))
+    A
+    For other compounds, the one-letter code is not defined.
+    >>> print(full_name("MAN"))
+    alpha-D-mannopyranose
+    >>> print(one_letter_code("MAN"))
+    None
+    """
+    array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
+    if array is None:
+        return None
+    item = array.item()
+    if item == "":
+        return None
+    return item

biotite/structure/info/radii.py ADDED Viewed

@@ -0,0 +1,197 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["vdw_radius_protor", "vdw_radius_single"]
+from .bonds import bonds_in_residue
+# Contains tuples for the different ProtOr groups:
+# Tuple contains: element, valency, H count
+_PROTOR_RADII = {
+    ("C",  3, 0) : 1.61,
+    ("C",  3, 1) : 1.76,
+    ("C",  4, 1) : 1.88,
+    ("C",  4, 2) : 1.88,
+    ("C",  4, 3) : 1.88,
+    ("N",  3, 0) : 1.64,
+    ("N",  3, 1) : 1.64,
+    ("N",  3, 2) : 1.64,
+    ("N",  4, 3) : 1.64,
+    ("O",  1, 0) : 1.42,
+    ("O",  2, 1) : 1.46,
+    ("S",  1, 0) : 1.77,
+    ("S",  2, 0) : 1.77, # Not official, added for completeness (MET)
+    ("S",  2, 1) : 1.77,
+    ("F",  1, 0) : 1.47, # Taken from _SINGLE_RADII
+    ("CL", 1, 0) : 1.75, # Taken from _SINGLE_RADII
+    ("BR", 1, 0) : 1.85, # Taken from _SINGLE_RADII
+    ("I",  1, 0) : 1.98, # Taken from _SINGLE_RADII
+}
+_SINGLE_RADII = {
+    "H":  1.20,
+    "HE": 1.40,
+    "C":  1.70,
+    "N":  1.55,
+    "O":  1.52,
+    "F":  1.47,
+    "NE": 1.54,
+    "SI": 2.10,
+    "P":  1.80,
+    "S":  1.80,
+    "CL": 1.75,
+    "AR": 1.88,
+    "AS": 1.85,
+    "SE": 1.90,
+    "BR": 1.85,
+    "KR": 2.02,
+    "TE": 2.06,
+    "I":  1.98,
+    "XE": 2.16,
+}
+# A dictionary that caches radii for each residue
+_protor_radii = {}
+def vdw_radius_protor(res_name, atom_name):
+    """
+    Estimate the Van-der-Waals radius of an non-hydrogen atom,
+    that includes the radius added by potential bonded hydrogen atoms.
+    The respective radii are taken from the ProtOr dataset.
+    :footcite:`Tsai1999`
+    This is especially useful for macromolecular structures where no
+    hydrogen atoms are resolved, e.g. crystal structures.
+    The valency of the non-hydrogen atom and the amount of normally
+    bonded hydrogen atoms is taken from the chemical compound dictionary
+    dataset.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name the non-hydrogen atom belongs
+        to.
+    atom_name : str
+        The name of the non-hydrogen atom.
+    Returns
+    -------
+    The Van-der-Waals radius of the given atom.
+    If the radius cannot be estimated for the atom, `None` is returned.
+    See also
+    --------
+    vdw_radius_single
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> print(vdw_radius_protor("GLY", "CA"))
+    1.88
+    """
+    res_name = res_name.upper()
+    if atom_name[0] == "H":
+        raise ValueError(
+            f"Calculating the ProtOr radius for the hydrogen atom "
+            f"'{atom_name}' is not meaningful"
+        )
+    if res_name in _protor_radii:
+        # Use cached radii for the residue, if already calculated
+        if atom_name not in _protor_radii[res_name]:
+            raise KeyError(
+                f"Residue '{res_name}' does not contain an atom named "
+                f"'{atom_name}'"
+            )
+        return _protor_radii[res_name].get(atom_name)
+    else:
+        # Otherwise calculate radii for the given residue and cache
+        _protor_radii[res_name] = _calculate_protor_radii(res_name)
+        # Recursive call, but this time the radii for the given residue
+        # are cached
+        return vdw_radius_protor(res_name, atom_name)
+def _calculate_protor_radii(res_name):
+    """
+    Calculate the ProtOr VdW radii for all atoms (atom names) in
+    a residue.
+    """
+    bonds = bonds_in_residue(res_name)
+    # Maps atom names to a ProtOr group
+    # -> tuple(element, valency, H count)
+    # Based on the group the radius is chosen from _PROTOR_RADII
+    groups = {}
+    for atom1, atom2 in bonds:
+        # Process each bond two times:
+        # One time the first atom is the one to get valency and H count
+        # for and the other time vice versa
+        for main_atom, bound_atom in ((atom1, atom2), (atom2, atom1)):
+            element = main_atom[0]
+            # Calculating ProtOr radii for hydrogens in not meaningful
+            if element == "H":
+                continue
+            # Only for these elements ProtOr groups exist
+            # Calculation of group for all other elements would be
+            # pointless
+            if element not in ["C", "N", "O", "S"]:
+                # Empty tuple to indicate nonexistent entry
+                groups[main_atom] = ()
+                continue
+            # Update existing entry if already existing
+            group = groups.get(main_atom, [element, 0, 0])
+            # Increase valency by one, since the bond entry exists
+            group[1] += 1
+            # If the atom is bonded to hydrogen, increase H count
+            if bound_atom[0] == "H":
+                group[2] += 1
+            groups[main_atom] = group
+    # Get radii based on ProtOr groups
+    radii = {atom : _PROTOR_RADII.get(tuple(group))
+             for atom, group in groups.items()}
+    return radii
+def vdw_radius_single(element):
+    """
+    Get the Van-der-Waals radius of an atom from the given element.
+    :footcite:`Bondi1964`
+    Parameters
+    ----------
+    element : str
+        The chemical element of the atoms.
+    Returns
+    -------
+    The Van-der-Waals radius of the atom.
+    If the radius is unknown for the element, `None` is returned.
+    See also
+    --------
+    vdw_radius_protor
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> print(vdw_radius_single("C"))
+    1.7
+    """
+    return _SINGLE_RADII.get(element.upper())