biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
|
|
8
|
+
|
|
9
|
+
from biotite.structure.info.ccd import get_ccd, get_from_ccd
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def all_residues():
|
|
13
|
+
"""
|
|
14
|
+
Get a list of all residues/compound names in the PDB
|
|
15
|
+
*Chemical Component Dictionary* (CCD).
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
residues : list of str
|
|
20
|
+
A list of all available residue names.
|
|
21
|
+
"""
|
|
22
|
+
return get_ccd()["chem_comp"]["id"].as_array().tolist()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def full_name(res_name):
|
|
26
|
+
"""
|
|
27
|
+
Get the full name of a residue/compound from the up to 3-letter
|
|
28
|
+
residue name, based on the PDB chemical components dictionary.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
res_name : str
|
|
33
|
+
The up to 3-letter residue name.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
name : str or None
|
|
38
|
+
The full name of the residue.
|
|
39
|
+
If the residue is unknown to the chemical components dictionary,
|
|
40
|
+
``None`` is returned.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
|
|
45
|
+
>>> print(full_name("MAN"))
|
|
46
|
+
alpha-D-mannopyranose
|
|
47
|
+
"""
|
|
48
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "name")
|
|
49
|
+
if column is None:
|
|
50
|
+
return None
|
|
51
|
+
return column.as_item()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def link_type(res_name):
|
|
55
|
+
"""
|
|
56
|
+
Get the linking type of a residue/compound,
|
|
57
|
+
based on the PDB chemical components dictionary.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
res_name : str
|
|
62
|
+
The up to 3-letter residue name.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
link_type : str or None
|
|
67
|
+
The link type.
|
|
68
|
+
If the residue is unknown to the chemical components dictionary,
|
|
69
|
+
``None`` is returned.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
|
|
74
|
+
>>> print(link_type("MAN"))
|
|
75
|
+
D-saccharide, alpha linking
|
|
76
|
+
>>> print(link_type("TRP"))
|
|
77
|
+
L-PEPTIDE LINKING
|
|
78
|
+
>>> print(link_type("HOH"))
|
|
79
|
+
NON-POLYMER
|
|
80
|
+
"""
|
|
81
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "type")
|
|
82
|
+
if column is None:
|
|
83
|
+
return None
|
|
84
|
+
return column.as_item()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def one_letter_code(res_name):
|
|
88
|
+
"""
|
|
89
|
+
Get the one-letter code of a residue/compound,
|
|
90
|
+
based on the PDB chemical components dictionary.
|
|
91
|
+
|
|
92
|
+
The one-letter code is only defined for amino acids and nucleotides
|
|
93
|
+
and for compounds that are structurally similar to them.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
res_name : str
|
|
98
|
+
The up to 3-letter residue name.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
one_letter_code : str or None
|
|
103
|
+
The one-letter code.
|
|
104
|
+
``None`` if the compound is not present in the CCD or if no
|
|
105
|
+
one-letter code is defined for this compound.
|
|
106
|
+
|
|
107
|
+
Examples
|
|
108
|
+
--------
|
|
109
|
+
|
|
110
|
+
Get the one letter code for an amino acid (or a nucleotide).
|
|
111
|
+
|
|
112
|
+
>>> print(full_name("ALA"))
|
|
113
|
+
ALANINE
|
|
114
|
+
>>> print(one_letter_code("ALA"))
|
|
115
|
+
A
|
|
116
|
+
|
|
117
|
+
For similar compounds, the one-letter code is also defined.
|
|
118
|
+
|
|
119
|
+
>>> print(full_name("DAL"))
|
|
120
|
+
D-ALANINE
|
|
121
|
+
>>> print(one_letter_code("DAL"))
|
|
122
|
+
A
|
|
123
|
+
|
|
124
|
+
For other compounds, the one-letter code is not defined.
|
|
125
|
+
|
|
126
|
+
>>> print(full_name("MAN"))
|
|
127
|
+
alpha-D-mannopyranose
|
|
128
|
+
>>> print(one_letter_code("MAN"))
|
|
129
|
+
None
|
|
130
|
+
"""
|
|
131
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
|
|
132
|
+
if column is None:
|
|
133
|
+
return None
|
|
134
|
+
if column.mask is not None:
|
|
135
|
+
# Value is masked, i.e. inapplicable or missing
|
|
136
|
+
return None
|
|
137
|
+
return column.as_item()
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["vdw_radius_protor", "vdw_radius_single"]
|
|
8
|
+
|
|
9
|
+
from biotite.structure.info.bonds import bonds_in_residue
|
|
10
|
+
|
|
11
|
+
# fmt: off
|
|
12
|
+
# Contains tuples for the different ProtOr groups:
|
|
13
|
+
# Tuple contains: element, valency, H count
|
|
14
|
+
_PROTOR_RADII = {
|
|
15
|
+
("C", 3, 0) : 1.61,
|
|
16
|
+
("C", 3, 1) : 1.76,
|
|
17
|
+
("C", 4, 1) : 1.88,
|
|
18
|
+
("C", 4, 2) : 1.88,
|
|
19
|
+
("C", 4, 3) : 1.88,
|
|
20
|
+
("N", 3, 0) : 1.64,
|
|
21
|
+
("N", 3, 1) : 1.64,
|
|
22
|
+
("N", 3, 2) : 1.64,
|
|
23
|
+
("N", 4, 3) : 1.64,
|
|
24
|
+
("O", 1, 0) : 1.42,
|
|
25
|
+
("O", 2, 1) : 1.46,
|
|
26
|
+
("S", 1, 0) : 1.77,
|
|
27
|
+
("S", 2, 0) : 1.77, # Not official, added for completeness (MET)
|
|
28
|
+
("S", 2, 1) : 1.77,
|
|
29
|
+
("F", 1, 0) : 1.47, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
30
|
+
("CL", 1, 0) : 1.75, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
31
|
+
("BR", 1, 0) : 1.85, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
32
|
+
("I", 1, 0) : 1.98, # Taken from _SINGLE_RADII
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_SINGLE_ATOM_VDW_RADII = {
|
|
36
|
+
# Main group
|
|
37
|
+
# Row 1 (Period 1)
|
|
38
|
+
"H": 1.10,
|
|
39
|
+
"HE": 1.40,
|
|
40
|
+
|
|
41
|
+
# Row 2 (Period 2)
|
|
42
|
+
"LI": 1.81,
|
|
43
|
+
"BE": 1.53,
|
|
44
|
+
"B": 1.92,
|
|
45
|
+
"C": 1.70,
|
|
46
|
+
"N": 1.55,
|
|
47
|
+
"O": 1.52,
|
|
48
|
+
"F": 1.47,
|
|
49
|
+
"NE": 1.54,
|
|
50
|
+
|
|
51
|
+
# Row 3 (Period 3)
|
|
52
|
+
"NA": 2.27,
|
|
53
|
+
"MG": 1.73,
|
|
54
|
+
"AL": 1.84,
|
|
55
|
+
"SI": 2.10,
|
|
56
|
+
"P": 1.80,
|
|
57
|
+
"S": 1.80,
|
|
58
|
+
"CL": 1.75,
|
|
59
|
+
"AR": 1.88,
|
|
60
|
+
|
|
61
|
+
# Row 4 (Period 4)
|
|
62
|
+
"K": 2.75,
|
|
63
|
+
"CA": 2.31,
|
|
64
|
+
"GA": 1.87,
|
|
65
|
+
"GE": 2.11,
|
|
66
|
+
"AS": 1.85,
|
|
67
|
+
"SE": 1.90,
|
|
68
|
+
"BR": 1.83,
|
|
69
|
+
"KR": 2.02,
|
|
70
|
+
|
|
71
|
+
# Row 5 (Period 5)
|
|
72
|
+
"RB": 3.03,
|
|
73
|
+
"SR": 2.49,
|
|
74
|
+
"IN": 1.93,
|
|
75
|
+
"SN": 2.17,
|
|
76
|
+
"SB": 2.06,
|
|
77
|
+
"TE": 2.06,
|
|
78
|
+
"I": 1.98,
|
|
79
|
+
"XE": 2.16,
|
|
80
|
+
|
|
81
|
+
# Row 6 (Period 6)
|
|
82
|
+
"CS": 3.43,
|
|
83
|
+
"BA": 2.68,
|
|
84
|
+
"TL": 1.96,
|
|
85
|
+
"PB": 2.02,
|
|
86
|
+
"BI": 2.07,
|
|
87
|
+
"PO": 1.97,
|
|
88
|
+
"AT": 2.02,
|
|
89
|
+
"RN": 2.20,
|
|
90
|
+
|
|
91
|
+
# Row 7 (Period 7)
|
|
92
|
+
"FR": 3.48,
|
|
93
|
+
"RA": 2.83,
|
|
94
|
+
|
|
95
|
+
# Transition metals (relevant ones only)
|
|
96
|
+
# Row 1
|
|
97
|
+
"FE": 2.05,
|
|
98
|
+
"CU": 2.00,
|
|
99
|
+
"ZN": 2.10,
|
|
100
|
+
"MN": 2.05,
|
|
101
|
+
"CO": 2.00,
|
|
102
|
+
"NI": 2.00,
|
|
103
|
+
|
|
104
|
+
# Row 2
|
|
105
|
+
'MO': 2.10,
|
|
106
|
+
'RU': 2.05,
|
|
107
|
+
|
|
108
|
+
# Row 3
|
|
109
|
+
'W': 2.10,
|
|
110
|
+
'PT': 2.05,
|
|
111
|
+
'AU': 2.10,
|
|
112
|
+
}
|
|
113
|
+
"""
|
|
114
|
+
Van der Waals radii for main group and transition elements.
|
|
115
|
+
|
|
116
|
+
Main group:
|
|
117
|
+
Source: https://pubs.acs.org/doi/10.1021/jp8111556, Table 12 (Mantina et al. 2009)
|
|
118
|
+
|
|
119
|
+
Transition metals:
|
|
120
|
+
Source: RDKit, 2024.9.4 Release
|
|
121
|
+
https://github.com/rdkit/rdkit/blob/af6347963f25cfe8fe4db0638410b2f3a8e8bd89/Code/GraphMol/atomic_data.cpp#L51
|
|
122
|
+
|
|
123
|
+
Where available, these values were cross-checked vs the CRC Handbook of
|
|
124
|
+
Chemistry and Physics (105th edition) and verified that they are closely
|
|
125
|
+
in line (barring very minor discrepancies, usually < 0.05 Å).
|
|
126
|
+
We cannot use the CRC values directly as they are not permissively licensed.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
# fmt: on
|
|
130
|
+
|
|
131
|
+
# A dictionary that caches radii for each residue
|
|
132
|
+
_protor_radii = {}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def vdw_radius_protor(res_name, atom_name):
|
|
136
|
+
"""
|
|
137
|
+
Estimate the Van-der-Waals radius of a heavy atom,
|
|
138
|
+
that includes the radius added by potential bonded hydrogen atoms.
|
|
139
|
+
The respective radii are taken from the ProtOr dataset.
|
|
140
|
+
:footcite:`Tsai1999`
|
|
141
|
+
|
|
142
|
+
This is especially useful for macromolecular structures where no
|
|
143
|
+
hydrogen atoms are resolved, e.g. crystal structures.
|
|
144
|
+
The valency of the heavy atom and the amount of normally
|
|
145
|
+
bonded hydrogen atoms is taken from the *Chemical Component Dictionary*.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
res_name : str
|
|
150
|
+
The up to 3-letter residue name the non-hydrogen atom belongs
|
|
151
|
+
to.
|
|
152
|
+
atom_name : str
|
|
153
|
+
The name of the non-hydrogen atom.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
radius : float
|
|
158
|
+
The Van-der-Waals radius of the given atom.
|
|
159
|
+
If the radius cannot be estimated for the atom, `None` is returned.
|
|
160
|
+
|
|
161
|
+
See Also
|
|
162
|
+
--------
|
|
163
|
+
vdw_radius_single : *Van-der-Waals* radii for structures with annotated hydrogen atoms.
|
|
164
|
+
|
|
165
|
+
References
|
|
166
|
+
----------
|
|
167
|
+
|
|
168
|
+
.. footbibliography::
|
|
169
|
+
|
|
170
|
+
Examples
|
|
171
|
+
--------
|
|
172
|
+
|
|
173
|
+
>>> print(vdw_radius_protor("GLY", "CA"))
|
|
174
|
+
1.88
|
|
175
|
+
"""
|
|
176
|
+
res_name = res_name.upper()
|
|
177
|
+
if atom_name[0] == "H":
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Calculating the ProtOr radius for the hydrogen atom "
|
|
180
|
+
f"'{atom_name}' is not meaningful"
|
|
181
|
+
)
|
|
182
|
+
if res_name in _protor_radii:
|
|
183
|
+
# Use cached radii for the residue, if already calculated
|
|
184
|
+
if atom_name not in _protor_radii[res_name]:
|
|
185
|
+
raise KeyError(
|
|
186
|
+
f"Residue '{res_name}' does not contain an atom named '{atom_name}'"
|
|
187
|
+
)
|
|
188
|
+
return _protor_radii[res_name].get(atom_name)
|
|
189
|
+
else:
|
|
190
|
+
# Otherwise calculate radii for the given residue and cache
|
|
191
|
+
_protor_radii[res_name] = _calculate_protor_radii(res_name)
|
|
192
|
+
# Recursive call, but this time the radii for the given residue
|
|
193
|
+
# are cached
|
|
194
|
+
return vdw_radius_protor(res_name, atom_name)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _calculate_protor_radii(res_name):
|
|
198
|
+
"""
|
|
199
|
+
Calculate the ProtOr VdW radii for all atoms (atom names) in
|
|
200
|
+
a residue.
|
|
201
|
+
"""
|
|
202
|
+
bonds = bonds_in_residue(res_name)
|
|
203
|
+
# Maps atom names to a ProtOr group
|
|
204
|
+
# -> tuple(element, valency, H count)
|
|
205
|
+
# Based on the group the radius is chosen from _PROTOR_RADII
|
|
206
|
+
groups = {}
|
|
207
|
+
for atom1, atom2 in bonds:
|
|
208
|
+
# Process each bond two times:
|
|
209
|
+
# One time the first atom is the one to get valency and H count
|
|
210
|
+
# for and the other time vice versa
|
|
211
|
+
for main_atom, bound_atom in ((atom1, atom2), (atom2, atom1)):
|
|
212
|
+
element = main_atom[0]
|
|
213
|
+
# Calculating ProtOr radii for hydrogens in not meaningful
|
|
214
|
+
if element == "H":
|
|
215
|
+
continue
|
|
216
|
+
# Only for these elements ProtOr groups exist
|
|
217
|
+
# Calculation of group for all other elements would be
|
|
218
|
+
# pointless
|
|
219
|
+
if element not in ["C", "N", "O", "S"]:
|
|
220
|
+
# Empty tuple to indicate nonexistent entry
|
|
221
|
+
groups[main_atom] = ()
|
|
222
|
+
continue
|
|
223
|
+
# Update existing entry if already existing
|
|
224
|
+
group = groups.get(main_atom, [element, 0, 0])
|
|
225
|
+
# Increase valency by one, since the bond entry exists
|
|
226
|
+
group[1] += 1
|
|
227
|
+
# If the atom is bonded to hydrogen, increase H count
|
|
228
|
+
if bound_atom[0] == "H":
|
|
229
|
+
group[2] += 1
|
|
230
|
+
groups[main_atom] = group
|
|
231
|
+
# Get radii based on ProtOr groups
|
|
232
|
+
radii = {atom: _PROTOR_RADII.get(tuple(group)) for atom, group in groups.items()}
|
|
233
|
+
return radii
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def vdw_radius_single(element):
|
|
237
|
+
"""
|
|
238
|
+
Get the *Van-der-Waals* radius of an atom from the given element.
|
|
239
|
+
:footcite:`Mantina2009`
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
element : str
|
|
244
|
+
The chemical element of the atoms.
|
|
245
|
+
|
|
246
|
+
Returns
|
|
247
|
+
-------
|
|
248
|
+
radius : float
|
|
249
|
+
The Van-der-Waals radius of the atom.
|
|
250
|
+
If the radius is unknown for the element, `None` is returned.
|
|
251
|
+
|
|
252
|
+
See Also
|
|
253
|
+
--------
|
|
254
|
+
vdw_radius_protor : *Van-der-Waals* radii for structures without annotated hydrogen atoms.
|
|
255
|
+
|
|
256
|
+
References
|
|
257
|
+
----------
|
|
258
|
+
|
|
259
|
+
.. footbibliography::
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
|
|
264
|
+
>>> print(vdw_radius_single("C"))
|
|
265
|
+
1.7
|
|
266
|
+
"""
|
|
267
|
+
return _SINGLE_ATOM_VDW_RADII.get(element.upper())
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["standardize_order"]
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
import numpy as np
|
|
11
|
+
from biotite.structure.error import BadStructureError
|
|
12
|
+
from biotite.structure.info.ccd import get_from_ccd
|
|
13
|
+
from biotite.structure.residues import get_residue_starts
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def standardize_order(atoms):
|
|
17
|
+
"""
|
|
18
|
+
Get an index array for an input :class:`AtomArray` or
|
|
19
|
+
:class:`AtomArrayStack` that reorders the atoms for each residue
|
|
20
|
+
to obtain the standard *RCSB PDB* atom order.
|
|
21
|
+
|
|
22
|
+
The standard atom order is determined from the reference residues in
|
|
23
|
+
the official *Chemical Component Dictionary*.
|
|
24
|
+
If a residue of the input structure contains additional atoms that
|
|
25
|
+
are not present in the reference residue, these indices to these
|
|
26
|
+
atoms are appended to the end of the respective residue.
|
|
27
|
+
A example for this are optional hydrogen atoms, that appear due to
|
|
28
|
+
protonation.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
33
|
+
Input structure with atoms that are potentially not in the
|
|
34
|
+
*standard* order.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
indices : ndarray, dtype=int, shape=(n,)
|
|
39
|
+
When this index array is applied on the input `atoms`,
|
|
40
|
+
the atoms for each residue are reordered to obtain the
|
|
41
|
+
standard *RCSB PDB* atom order.
|
|
42
|
+
|
|
43
|
+
Raises
|
|
44
|
+
------
|
|
45
|
+
BadStructureError
|
|
46
|
+
If the input `atoms` have duplicate atoms (same atom name)
|
|
47
|
+
within a residue.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
|
|
52
|
+
Use as single residue as example.
|
|
53
|
+
|
|
54
|
+
>>> residue = atom_array[atom_array.res_id == 1]
|
|
55
|
+
>>> print(residue)
|
|
56
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
57
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
58
|
+
A 1 ASN C C -7.117 2.964 -1.897
|
|
59
|
+
A 1 ASN O O -6.634 1.849 -1.758
|
|
60
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
61
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
62
|
+
A 1 ASN OD1 O -11.269 2.700 -1.524
|
|
63
|
+
A 1 ASN ND2 N -11.806 3.406 -3.543
|
|
64
|
+
A 1 ASN H1 H -8.330 3.957 0.261
|
|
65
|
+
A 1 ASN H2 H -8.740 5.068 -0.889
|
|
66
|
+
A 1 ASN H3 H -9.877 4.041 -0.293
|
|
67
|
+
A 1 ASN HA H -8.930 2.162 -1.239
|
|
68
|
+
A 1 ASN HB2 H -9.310 4.417 -3.193
|
|
69
|
+
A 1 ASN HB3 H -9.108 2.719 -3.679
|
|
70
|
+
A 1 ASN HD21 H -11.572 3.791 -4.444
|
|
71
|
+
A 1 ASN HD22 H -12.757 3.183 -3.294
|
|
72
|
+
|
|
73
|
+
Reverse the atom array.
|
|
74
|
+
Consequently, this also changes the atom order within the residue.
|
|
75
|
+
|
|
76
|
+
>>> reordered = residue[np.arange(len(residue))[::-1]]
|
|
77
|
+
>>> print(reordered)
|
|
78
|
+
A 1 ASN HD22 H -12.757 3.183 -3.294
|
|
79
|
+
A 1 ASN HD21 H -11.572 3.791 -4.444
|
|
80
|
+
A 1 ASN HB3 H -9.108 2.719 -3.679
|
|
81
|
+
A 1 ASN HB2 H -9.310 4.417 -3.193
|
|
82
|
+
A 1 ASN HA H -8.930 2.162 -1.239
|
|
83
|
+
A 1 ASN H3 H -9.877 4.041 -0.293
|
|
84
|
+
A 1 ASN H2 H -8.740 5.068 -0.889
|
|
85
|
+
A 1 ASN H1 H -8.330 3.957 0.261
|
|
86
|
+
A 1 ASN ND2 N -11.806 3.406 -3.543
|
|
87
|
+
A 1 ASN OD1 O -11.269 2.700 -1.524
|
|
88
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
89
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
90
|
+
A 1 ASN O O -6.634 1.849 -1.758
|
|
91
|
+
A 1 ASN C C -7.117 2.964 -1.897
|
|
92
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
93
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
94
|
+
|
|
95
|
+
The order is restored with the exception of the N-terminus protonation.
|
|
96
|
+
|
|
97
|
+
>>> restored = reordered[info.standardize_order(reordered)]
|
|
98
|
+
>>> print(restored)
|
|
99
|
+
A 1 ASN N N -8.901 4.127 -0.555
|
|
100
|
+
A 1 ASN CA C -8.608 3.135 -1.618
|
|
101
|
+
A 1 ASN C C -7.117 2.964 -1.897
|
|
102
|
+
A 1 ASN O O -6.634 1.849 -1.758
|
|
103
|
+
A 1 ASN CB C -9.437 3.396 -2.889
|
|
104
|
+
A 1 ASN CG C -10.915 3.130 -2.611
|
|
105
|
+
A 1 ASN OD1 O -11.269 2.700 -1.524
|
|
106
|
+
A 1 ASN ND2 N -11.806 3.406 -3.543
|
|
107
|
+
A 1 ASN H2 H -8.740 5.068 -0.889
|
|
108
|
+
A 1 ASN HA H -8.930 2.162 -1.239
|
|
109
|
+
A 1 ASN HB2 H -9.310 4.417 -3.193
|
|
110
|
+
A 1 ASN HB3 H -9.108 2.719 -3.679
|
|
111
|
+
A 1 ASN HD21 H -11.572 3.791 -4.444
|
|
112
|
+
A 1 ASN HD22 H -12.757 3.183 -3.294
|
|
113
|
+
A 1 ASN H3 H -9.877 4.041 -0.293
|
|
114
|
+
A 1 ASN H1 H -8.330 3.957 0.261
|
|
115
|
+
"""
|
|
116
|
+
reordered_indices = np.zeros(atoms.array_length(), dtype=int)
|
|
117
|
+
|
|
118
|
+
starts = get_residue_starts(atoms, add_exclusive_stop=True)
|
|
119
|
+
for i in range(len(starts) - 1):
|
|
120
|
+
start = starts[i]
|
|
121
|
+
stop = starts[i + 1]
|
|
122
|
+
|
|
123
|
+
res_name = atoms.res_name[start]
|
|
124
|
+
chem_comp_atom = get_from_ccd("chem_comp_atom", res_name, "atom_id")
|
|
125
|
+
if chem_comp_atom is None:
|
|
126
|
+
# If the residue is not in the CCD, keep the current order
|
|
127
|
+
warnings.warn(
|
|
128
|
+
f"Residue '{res_name}' is not in the CCD, keeping current atom order"
|
|
129
|
+
)
|
|
130
|
+
reordered_indices[start:stop] = np.arange(start, stop)
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
standard_atom_names = chem_comp_atom.as_array()
|
|
134
|
+
reordered_indices[start:stop] = (
|
|
135
|
+
_reorder(atoms.atom_name[start:stop], standard_atom_names) + start
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return reordered_indices
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _reorder(origin, target):
|
|
142
|
+
"""
|
|
143
|
+
Create indices to `origin`, that changes the order of `origin`,
|
|
144
|
+
so that the order is the same as in `target`.
|
|
145
|
+
|
|
146
|
+
Indices for elements of `target` that are not in `origin`
|
|
147
|
+
are ignored.
|
|
148
|
+
Indices for elements of `origin` that are not in `target`
|
|
149
|
+
are appended to the end of the returned array.
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
origin : ndarray, dtype=str
|
|
155
|
+
The atom names to reorder.
|
|
156
|
+
target : ndarray, dtype=str
|
|
157
|
+
The atom names in target order.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
indices : ndarray, dtype=int
|
|
162
|
+
Indices for `origin` that that changes the order of `origin`
|
|
163
|
+
to the order of `target`.
|
|
164
|
+
"""
|
|
165
|
+
target_hits, origin_hits = np.where(target[:, np.newaxis] == origin[np.newaxis, :])
|
|
166
|
+
|
|
167
|
+
counts = np.bincount(target_hits, minlength=len(target))
|
|
168
|
+
if (counts > 1).any():
|
|
169
|
+
counts = np.bincount(target_hits, minlength=len(target))
|
|
170
|
+
# Identify which atom is duplicate
|
|
171
|
+
duplicate_i = np.where(counts > 1)[0][0]
|
|
172
|
+
duplicate_name = target[duplicate_i]
|
|
173
|
+
raise BadStructureError(
|
|
174
|
+
f"Input structure has duplicate atom '{duplicate_name}'"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if len(origin_hits) < len(origin):
|
|
178
|
+
# The origin structure has additional atoms
|
|
179
|
+
# to the target structure
|
|
180
|
+
# -> Identify which atoms are missing in the target structure
|
|
181
|
+
# and append these to the end of the residue
|
|
182
|
+
missing_atom_mask = np.bincount(origin_hits, minlength=len(origin)).astype(bool)
|
|
183
|
+
return np.concatenate([origin_hits, np.where(~missing_atom_mask)[0]])
|
|
184
|
+
else:
|
|
185
|
+
return origin_hits
|