biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions for calculation of characteristic values when
|
|
7
|
+
comparing multiple structures with each other.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = ["rmsd", "rmspd", "rmsf", "average", "lddt"]
|
|
13
|
+
|
|
14
|
+
import collections.abc
|
|
15
|
+
import warnings
|
|
16
|
+
import numpy as np
|
|
17
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
|
|
18
|
+
from biotite.structure.celllist import CellList
|
|
19
|
+
from biotite.structure.chains import get_chain_count, get_chain_positions
|
|
20
|
+
from biotite.structure.geometry import index_distance
|
|
21
|
+
from biotite.structure.residues import get_residue_count, get_residue_positions
|
|
22
|
+
from biotite.structure.util import vector_dot
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def rmsd(reference, subject):
|
|
26
|
+
r"""
|
|
27
|
+
Calculate the RMSD between two structures.
|
|
28
|
+
|
|
29
|
+
The *root mean square deviation* (RMSD) indicates the overall
|
|
30
|
+
deviation of each model of a structure to a reference structure.
|
|
31
|
+
It is defined as:
|
|
32
|
+
|
|
33
|
+
.. math:: RMSD = \sqrt{ \frac{1}{n} \sum\limits_{i=1}^n (x_i - x_{ref,i})^2}
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
38
|
+
The reference structure.
|
|
39
|
+
Alternatively, coordinates can be provided directly as
|
|
40
|
+
:class:`ndarray`.
|
|
41
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
42
|
+
Structure(s) to be compared with `reference`.
|
|
43
|
+
Alternatively, coordinates can be provided directly as
|
|
44
|
+
:class:`ndarray`.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
rmsd : float or ndarray, dtype=float, shape=(m,)
|
|
49
|
+
RMSD between subject and reference.
|
|
50
|
+
If subject is an :class:`AtomArray` a float is returned.
|
|
51
|
+
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
52
|
+
containing the RMSD for each model is returned.
|
|
53
|
+
|
|
54
|
+
See Also
|
|
55
|
+
--------
|
|
56
|
+
rmsf : The *root mean square fluctuation*.
|
|
57
|
+
|
|
58
|
+
Notes
|
|
59
|
+
-----
|
|
60
|
+
This function does not superimpose the subject to its reference.
|
|
61
|
+
In most cases :func:`superimpose()` should be called prior to this
|
|
62
|
+
function.
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
|
|
67
|
+
Calculate the RMSD of all models to the first model:
|
|
68
|
+
|
|
69
|
+
>>> superimposed, _ = superimpose(atom_array, atom_array_stack)
|
|
70
|
+
>>> rms = rmsd(atom_array, superimposed)
|
|
71
|
+
>>> print(np.around(rms, decimals=3))
|
|
72
|
+
[0.000 1.928 2.103 2.209 1.806 2.172 2.704 1.360 2.337 1.818 1.879 2.471
|
|
73
|
+
1.939 2.035 2.167 1.789 1.653 2.348 2.247 2.529 1.583 2.115 2.131 2.050
|
|
74
|
+
2.512 2.666 2.206 2.397 2.328 1.868 2.316 1.984 2.124 1.761 2.642 1.721
|
|
75
|
+
2.571 2.579]
|
|
76
|
+
"""
|
|
77
|
+
return np.sqrt(np.mean(_sq_euclidian(reference, subject), axis=-1))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def rmspd(reference, subject, periodic=False, box=None):
|
|
81
|
+
r"""
|
|
82
|
+
Calculate the RMSD of atom pair distances for given structures
|
|
83
|
+
relative to those found in a reference structure.
|
|
84
|
+
|
|
85
|
+
Unlike the standard RMSD, the *root-mean-square-pairwise-deviation*
|
|
86
|
+
(RMSPD) is a fit-free method to determine deviations between
|
|
87
|
+
a structure and a preset reference.
|
|
88
|
+
|
|
89
|
+
.. math:: RMSPD = \sqrt{ \frac{1}{n^2} \sum\limits_{i=1}^n \sum\limits_{j \neq i}^n (d_{ij} - d_{ref,ij})^2}
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
94
|
+
The reference structure.
|
|
95
|
+
Alternatively, coordinates can be provided directly as
|
|
96
|
+
:class:`ndarray`.
|
|
97
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
98
|
+
Structure(s) to be compared with `reference`.
|
|
99
|
+
Alternatively, coordinates can be provided directly as
|
|
100
|
+
:class:`ndarray`.
|
|
101
|
+
periodic : bool, optional
|
|
102
|
+
If set to true, periodic boundary conditions are taken into
|
|
103
|
+
account (minimum-image convention).
|
|
104
|
+
The `box` attribute of the `atoms` parameter is used for
|
|
105
|
+
calculation.
|
|
106
|
+
An alternative box can be provided via the `box` parameter.
|
|
107
|
+
By default, periodicity is ignored.
|
|
108
|
+
box : ndarray, shape=(3,3) or shape=(m,3,3), optional
|
|
109
|
+
If this parameter is set, the given box is used instead of the
|
|
110
|
+
`box` attribute of `atoms`.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
rmspd : float or ndarray, dtype=float, shape=(m,)
|
|
115
|
+
Atom pair distance RMSD between subject and reference.
|
|
116
|
+
If subject is an :class:`AtomArray` a float is returned.
|
|
117
|
+
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
118
|
+
containing the RMSD for each model is returned.
|
|
119
|
+
|
|
120
|
+
Warnings
|
|
121
|
+
--------
|
|
122
|
+
Internally, this function uses :func:`index_distance()`.
|
|
123
|
+
For non-orthorombic boxes (at least one angle deviates from
|
|
124
|
+
90 degrees), periodic boundary conditions should be corrected
|
|
125
|
+
prior to the computation of RMSPDs with `periodic` set to false
|
|
126
|
+
to ensure correct results.
|
|
127
|
+
(e.g. with :func:`remove_pbc()`).
|
|
128
|
+
|
|
129
|
+
See Also
|
|
130
|
+
--------
|
|
131
|
+
rmsd : The *root mean square fluctuation*.
|
|
132
|
+
"""
|
|
133
|
+
# Compute index pairs in reference structure -> pair_ij for j < i
|
|
134
|
+
reflen = reference.array_length()
|
|
135
|
+
index_i = np.repeat(np.arange(reflen), reflen)
|
|
136
|
+
index_j = np.tile(np.arange(reflen), reflen)
|
|
137
|
+
pairs = np.stack([index_i, index_j]).T
|
|
138
|
+
refdist = index_distance(reference, pairs, periodic=periodic, box=box)
|
|
139
|
+
subjdist = index_distance(subject, pairs, periodic=periodic, box=box)
|
|
140
|
+
|
|
141
|
+
rmspd = np.sqrt(np.sum((subjdist - refdist) ** 2, axis=-1)) / reflen
|
|
142
|
+
return rmspd
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def rmsf(reference, subject):
|
|
146
|
+
r"""
|
|
147
|
+
Calculate the RMSF between two structures.
|
|
148
|
+
|
|
149
|
+
The *root-mean-square-fluctuation* (RMSF) indicates the positional
|
|
150
|
+
deviation of a structure to a reference structure, averaged over all
|
|
151
|
+
models.
|
|
152
|
+
Usually the reference structure, is the average over all models.
|
|
153
|
+
The RMSF is defined as:
|
|
154
|
+
|
|
155
|
+
.. math:: RMSF(i) = \sqrt{ \frac{1}{T} \sum\limits_{t=1}^T (x_i(t) - x_{ref,i}(t))^2}
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
160
|
+
The reference structure.
|
|
161
|
+
Alternatively, coordinates can be provided directly as
|
|
162
|
+
:class:`ndarray`.
|
|
163
|
+
subject : AtomArrayStack or ndarray, dtype=float, shape=(m,n,3)
|
|
164
|
+
Structures to be compared with `reference`.
|
|
165
|
+
The time *t* is represented by the models in the
|
|
166
|
+
:class:`AtomArrayStack`.
|
|
167
|
+
Alternatively, coordinates can be provided directly as
|
|
168
|
+
:class:`ndarray`.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
rmsf : ndarray, dtype=float, shape=(n,)
|
|
173
|
+
RMSF between subject and reference structure.
|
|
174
|
+
Each element gives the RMSF for the atom at the respective
|
|
175
|
+
index.
|
|
176
|
+
|
|
177
|
+
See Also
|
|
178
|
+
--------
|
|
179
|
+
rmsd : The *root mean square deviation*.
|
|
180
|
+
average : Average the structure over the models to be used as reference in this function.
|
|
181
|
+
|
|
182
|
+
Notes
|
|
183
|
+
-----
|
|
184
|
+
This function does not superimpose the subject to its reference.
|
|
185
|
+
In most cases :func:`superimpose()` should be called prior to this
|
|
186
|
+
function.
|
|
187
|
+
|
|
188
|
+
Examples
|
|
189
|
+
--------
|
|
190
|
+
|
|
191
|
+
Calculate the :math:`C_\alpha` RMSF of all models to the average
|
|
192
|
+
model:
|
|
193
|
+
|
|
194
|
+
>>> ca = atom_array_stack[:, atom_array_stack.atom_name == "CA"]
|
|
195
|
+
>>> ca_average = average(ca)
|
|
196
|
+
>>> ca, _ = superimpose(ca_average, ca)
|
|
197
|
+
>>> print(rmsf(ca_average, ca))
|
|
198
|
+
[1.372 0.360 0.265 0.261 0.288 0.204 0.196 0.306 0.353 0.238 0.266 0.317
|
|
199
|
+
0.358 0.448 0.586 0.369 0.332 0.396 0.410 0.968]
|
|
200
|
+
"""
|
|
201
|
+
return np.sqrt(np.mean(_sq_euclidian(reference, subject), axis=-2))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def average(atoms):
|
|
205
|
+
"""
|
|
206
|
+
Calculate an average structure.
|
|
207
|
+
|
|
208
|
+
The average structure has the average coordinates
|
|
209
|
+
of the input models.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
atoms : AtomArrayStack or ndarray, dtype=float, shape=(m,n,3)
|
|
214
|
+
The structure models to be averaged.
|
|
215
|
+
Alternatively, coordinates can be provided directly as
|
|
216
|
+
:class:`ndarray`.
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
average : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
221
|
+
Structure with averaged atom coordinates.
|
|
222
|
+
If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
|
|
223
|
+
returned.
|
|
224
|
+
|
|
225
|
+
Notes
|
|
226
|
+
-----
|
|
227
|
+
The calculated average structure is not suitable for visualization
|
|
228
|
+
or geometric calculations, since bond lengths and angles will
|
|
229
|
+
deviate from meaningful values.
|
|
230
|
+
This method is rather useful to provide a reference structure for
|
|
231
|
+
calculation of e.g. the RMSD or RMSF.
|
|
232
|
+
"""
|
|
233
|
+
coords = coord(atoms)
|
|
234
|
+
if coords.ndim != 3:
|
|
235
|
+
raise TypeError("Expected an AtomArrayStack or an ndarray with shape (m,n,3)")
|
|
236
|
+
mean_coords = np.mean(coords, axis=0)
|
|
237
|
+
if isinstance(atoms, AtomArrayStack):
|
|
238
|
+
mean_array = atoms[0].copy()
|
|
239
|
+
mean_array.coord = mean_coords
|
|
240
|
+
return mean_array
|
|
241
|
+
else:
|
|
242
|
+
return mean_coords
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def lddt(
|
|
246
|
+
reference,
|
|
247
|
+
subject,
|
|
248
|
+
aggregation="all",
|
|
249
|
+
atom_mask=None,
|
|
250
|
+
partner_mask=None,
|
|
251
|
+
inclusion_radius=15,
|
|
252
|
+
distance_bins=(0.5, 1.0, 2.0, 4.0),
|
|
253
|
+
exclude_same_residue=True,
|
|
254
|
+
exclude_same_chain=False,
|
|
255
|
+
filter_function=None,
|
|
256
|
+
symmetric=False,
|
|
257
|
+
):
|
|
258
|
+
"""
|
|
259
|
+
Calculate the *local Distance Difference Test* (lDDT) score of a structure with
|
|
260
|
+
respect to its reference.
|
|
261
|
+
:footcite:`Mariani2013`
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
reference : AtomArray
|
|
266
|
+
The reference structure.
|
|
267
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
268
|
+
The structure(s) to evaluate with respect to `reference`.
|
|
269
|
+
The number of atoms must be the same as in `reference`.
|
|
270
|
+
Alternatively, coordinates can be provided directly as
|
|
271
|
+
:class:`ndarray`.
|
|
272
|
+
aggregation : {'all', 'chain', 'residue', 'atom'} or ndarray, shape=(n,), dtype=int, optional
|
|
273
|
+
Defines on which scale the lDDT score is calculated.
|
|
274
|
+
|
|
275
|
+
- `'all'`: The score is computed over all contacts.
|
|
276
|
+
- `'chain'`: The score is calculated for each chain separately.
|
|
277
|
+
- `'residue'`: The score is calculated for each residue separately.
|
|
278
|
+
- `'atom'`: The score is calculated for each atom separately.
|
|
279
|
+
|
|
280
|
+
Alternatively, an array of aggregation bins can be provided, i.e. each contact
|
|
281
|
+
is assigned to the corresponding bin.
|
|
282
|
+
atom_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
283
|
+
If given, the contacts are only computed for the masked atoms.
|
|
284
|
+
Atoms excluded by the mask do not have any contacts and their *lDDT* would
|
|
285
|
+
be NaN in case of ``aggregation="atom"``.
|
|
286
|
+
Providing this mask can significantly speed up the computation, if
|
|
287
|
+
only for certain chains/residues/atoms the *lDDT* is of interest.
|
|
288
|
+
partner_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
289
|
+
If given, only contacts **to** the masked atoms are considered.
|
|
290
|
+
While `atom_mask` does not alter the *lDDT* for the masked atoms,
|
|
291
|
+
`partner_mask` does, as for each atom only the masked atoms are considered
|
|
292
|
+
as potential contact partners.
|
|
293
|
+
inclusion_radius : float, optional
|
|
294
|
+
Pairwise atom distances are considered within this radius in `reference`.
|
|
295
|
+
distance_bins : list of float, optional
|
|
296
|
+
The distance bins for the score calculation, i.e if a distance deviation is
|
|
297
|
+
within the first bin, the score is 1, if it is outside all bins, the score is 0.
|
|
298
|
+
exclude_same_residue : bool, optional
|
|
299
|
+
If true, only atom distances between different residues are considered.
|
|
300
|
+
Otherwise, also atom distances within the same residue are included.
|
|
301
|
+
exclude_same_chain : bool, optional
|
|
302
|
+
If true, only atom distances between different chains are considered.
|
|
303
|
+
Otherwise, also atom distances within the same chain are included.
|
|
304
|
+
filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
|
|
305
|
+
Used for custom contact filtering, if the other parameters are not sufficient.
|
|
306
|
+
A function that takes an array of contact atom indices and returns a mask that
|
|
307
|
+
is ``True`` for all contacts that should be retained.
|
|
308
|
+
All other contacts are not considered for lDDT computation.
|
|
309
|
+
symmetric : bool, optional
|
|
310
|
+
If set to true, the *lDDT* score is computed symmetrically.
|
|
311
|
+
This means both contacts found in the `reference` and `subject` structure are
|
|
312
|
+
considered.
|
|
313
|
+
Hence the score is independent of which structure is given as `reference` and
|
|
314
|
+
`subject`.
|
|
315
|
+
Note that in this case `subject` must be an :class:`AtomArray` as well.
|
|
316
|
+
By default, only contacts in the `reference` are considered.
|
|
317
|
+
|
|
318
|
+
Returns
|
|
319
|
+
-------
|
|
320
|
+
lddt : float or ndarray, dtype=float
|
|
321
|
+
The lDDT score for each model and aggregation bin.
|
|
322
|
+
The shape depends on `subject` and `aggregation`:
|
|
323
|
+
If `subject` is an :class:`AtomArrayStack` (or equivalent coordinate
|
|
324
|
+
:class:`ndarray`), a dimension depicting each model is added.
|
|
325
|
+
if `aggregation` is not ``'all'``, a second dimension with the length equal to
|
|
326
|
+
the number of aggregation bins is added (i.e. number of chains, residues, etc.).
|
|
327
|
+
If both, an :class:`AtomArray` as `subject` and ``aggregation='all'`` is passed,
|
|
328
|
+
a float is returned.
|
|
329
|
+
|
|
330
|
+
Notes
|
|
331
|
+
-----
|
|
332
|
+
The lDDT score measures how well the pairwise atom distances in a model match the
|
|
333
|
+
corresponding distances in a reference.
|
|
334
|
+
Hence, like :func:`rmspd()` it works superimposition-free, but instead of capturing
|
|
335
|
+
the global deviation, only the local environment within the `inclusion_radius` is
|
|
336
|
+
considered.
|
|
337
|
+
|
|
338
|
+
Note that by default, also hydrogen atoms are considered in the distance
|
|
339
|
+
calculation.
|
|
340
|
+
If this is undesired, the hydrogen atoms can be removed prior to the calculation.
|
|
341
|
+
|
|
342
|
+
References
|
|
343
|
+
----------
|
|
344
|
+
|
|
345
|
+
.. footbibliography::
|
|
346
|
+
|
|
347
|
+
Examples
|
|
348
|
+
--------
|
|
349
|
+
|
|
350
|
+
Calculate the global lDDT of all models to the first model:
|
|
351
|
+
|
|
352
|
+
>>> reference = atom_array_stack[0]
|
|
353
|
+
>>> subject = atom_array_stack[1:]
|
|
354
|
+
>>> print(lddt(reference, subject))
|
|
355
|
+
[0.799 0.769 0.792 0.836 0.799 0.752 0.860 0.769 0.825 0.777 0.760 0.787
|
|
356
|
+
0.790 0.783 0.804 0.842 0.769 0.797 0.757 0.852 0.811 0.786 0.805 0.755
|
|
357
|
+
0.734 0.794 0.771 0.778 0.842 0.772 0.815 0.789 0.828 0.750 0.826 0.739
|
|
358
|
+
0.760]
|
|
359
|
+
|
|
360
|
+
Calculate the residue-wise lDDT for a single model:
|
|
361
|
+
|
|
362
|
+
>>> subject = atom_array_stack[1]
|
|
363
|
+
>>> print(lddt(reference, subject, aggregation="residue"))
|
|
364
|
+
[0.599 0.692 0.870 0.780 0.830 0.881 0.872 0.658 0.782 0.901 0.888 0.885
|
|
365
|
+
0.856 0.795 0.847 0.603 0.895 0.878 0.871 0.789]
|
|
366
|
+
|
|
367
|
+
As example for custom aggregation, calculate the lDDT for each chemical element:
|
|
368
|
+
|
|
369
|
+
>>> unique_elements = np.unique(reference.element)
|
|
370
|
+
>>> element_bins = np.array(
|
|
371
|
+
... [np.where(unique_elements == element)[0][0] for element in reference.element]
|
|
372
|
+
... )
|
|
373
|
+
>>> element_lddt = lddt(reference, subject, aggregation=element_bins)
|
|
374
|
+
>>> for element, lddt_for_element in zip(unique_elements, element_lddt):
|
|
375
|
+
... print(f"{element}: {lddt_for_element:.3f}")
|
|
376
|
+
C: 0.837
|
|
377
|
+
H: 0.770
|
|
378
|
+
N: 0.811
|
|
379
|
+
O: 0.808
|
|
380
|
+
|
|
381
|
+
If the reference structure has more atoms resolved than the subject structure,
|
|
382
|
+
the missing atoms can be indicated with *NaN* values:
|
|
383
|
+
|
|
384
|
+
>>> reference = atom_array_stack[0]
|
|
385
|
+
>>> subject = atom_array_stack[1].copy()
|
|
386
|
+
>>> # Simulate the situation where the first residue is missing in the subject
|
|
387
|
+
>>> subject.coord[subject.res_id == 1] = np.nan
|
|
388
|
+
>>> global_lddt = lddt(reference, subject)
|
|
389
|
+
>>> print(f"{global_lddt:.3f}")
|
|
390
|
+
0.751
|
|
391
|
+
"""
|
|
392
|
+
reference_coord = coord(reference)
|
|
393
|
+
subject_coord = coord(subject)
|
|
394
|
+
if subject_coord.shape[-2] != reference_coord.shape[-2]:
|
|
395
|
+
raise IndexError(
|
|
396
|
+
f"The given reference has {reference_coord.shape[-2]} atoms, but the "
|
|
397
|
+
f"subject has {subject_coord.shape[-2]} atoms"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
contacts = _find_contacts(
|
|
401
|
+
reference,
|
|
402
|
+
atom_mask,
|
|
403
|
+
partner_mask,
|
|
404
|
+
inclusion_radius,
|
|
405
|
+
exclude_same_residue,
|
|
406
|
+
exclude_same_chain,
|
|
407
|
+
filter_function,
|
|
408
|
+
)
|
|
409
|
+
if symmetric:
|
|
410
|
+
if not isinstance(subject, AtomArray):
|
|
411
|
+
raise TypeError(
|
|
412
|
+
"Expected 'AtomArray' as subject, as symmetric lDDT is enabled, "
|
|
413
|
+
f"but got '{type(subject).__name__}'"
|
|
414
|
+
)
|
|
415
|
+
subject_contacts = _find_contacts(
|
|
416
|
+
subject,
|
|
417
|
+
atom_mask,
|
|
418
|
+
partner_mask,
|
|
419
|
+
inclusion_radius,
|
|
420
|
+
exclude_same_residue,
|
|
421
|
+
exclude_same_chain,
|
|
422
|
+
filter_function,
|
|
423
|
+
)
|
|
424
|
+
contacts = np.concatenate((contacts, subject_contacts), axis=0)
|
|
425
|
+
# Adding additional contacts may introduce duplicates between the existing and
|
|
426
|
+
# new ones -> filter them out
|
|
427
|
+
contacts = np.unique(contacts, axis=0)
|
|
428
|
+
if (
|
|
429
|
+
isinstance(aggregation, str)
|
|
430
|
+
and aggregation == "all"
|
|
431
|
+
and atom_mask is None
|
|
432
|
+
and partner_mask is None
|
|
433
|
+
):
|
|
434
|
+
# Remove duplicate pairs as each pair appears twice
|
|
435
|
+
# (if i is in threshold distance to j, j is also in threshold distance to i)
|
|
436
|
+
# keep only the pair where i < j
|
|
437
|
+
# This improves performance due to less distances that need to be computed
|
|
438
|
+
# The assumption also only works when no atoms are masked
|
|
439
|
+
contacts = contacts[contacts[:, 0] < contacts[:, 1]]
|
|
440
|
+
|
|
441
|
+
reference_distances = index_distance(reference_coord, contacts)
|
|
442
|
+
subject_distances = index_distance(subject_coord, contacts)
|
|
443
|
+
deviations = np.abs(subject_distances - reference_distances)
|
|
444
|
+
distance_bins = np.asarray(distance_bins)
|
|
445
|
+
fraction_preserved_bins = np.count_nonzero(
|
|
446
|
+
deviations[..., np.newaxis] <= distance_bins[np.newaxis, :], axis=-1
|
|
447
|
+
) / len(distance_bins)
|
|
448
|
+
|
|
449
|
+
# Aggregate the fractions over the desired level
|
|
450
|
+
if isinstance(aggregation, str) and aggregation == "all":
|
|
451
|
+
# Average over all contacts
|
|
452
|
+
if len(fraction_preserved_bins) == 0:
|
|
453
|
+
return np.float32(np.nan)
|
|
454
|
+
return np.mean(fraction_preserved_bins, axis=-1)
|
|
455
|
+
else:
|
|
456
|
+
# A string is also a 'Sequence'
|
|
457
|
+
# -> distinguish between string and array, list, etc.
|
|
458
|
+
if isinstance(
|
|
459
|
+
aggregation, (np.ndarray, collections.abc.Sequence)
|
|
460
|
+
) and not isinstance(aggregation, str):
|
|
461
|
+
return _average_over_indices(
|
|
462
|
+
fraction_preserved_bins,
|
|
463
|
+
bins=np.asarray(aggregation)[contacts[:, 0]],
|
|
464
|
+
)
|
|
465
|
+
elif aggregation == "chain":
|
|
466
|
+
return _average_over_indices(
|
|
467
|
+
fraction_preserved_bins,
|
|
468
|
+
bins=get_chain_positions(reference, contacts[:, 0]),
|
|
469
|
+
n_bins=get_chain_count(reference),
|
|
470
|
+
)
|
|
471
|
+
elif aggregation == "residue":
|
|
472
|
+
return _average_over_indices(
|
|
473
|
+
fraction_preserved_bins,
|
|
474
|
+
bins=get_residue_positions(reference, contacts[:, 0]),
|
|
475
|
+
n_bins=get_residue_count(reference),
|
|
476
|
+
)
|
|
477
|
+
elif aggregation == "atom":
|
|
478
|
+
return _average_over_indices(
|
|
479
|
+
fraction_preserved_bins, contacts[:, 0], reference.array_length()
|
|
480
|
+
)
|
|
481
|
+
else:
|
|
482
|
+
raise ValueError(f"Invalid aggregation level '{aggregation}'")
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _sq_euclidian(reference, subject):
|
|
486
|
+
"""
|
|
487
|
+
Calculate squared euclidian distance between atoms in two
|
|
488
|
+
structures.
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
493
|
+
Reference structure.
|
|
494
|
+
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
495
|
+
Structure(s) whose atoms squared euclidian distance to
|
|
496
|
+
`reference` is measured.
|
|
497
|
+
|
|
498
|
+
Returns
|
|
499
|
+
-------
|
|
500
|
+
ndarray, dtype=float, shape=(n,) or shape=(m,n)
|
|
501
|
+
Squared euclidian distance between subject and reference.
|
|
502
|
+
If subject is an :class:`AtomArray` a 1-D array is returned.
|
|
503
|
+
If subject is an :class:`AtomArrayStack` a 2-D array is
|
|
504
|
+
returned.
|
|
505
|
+
In this case the first dimension indexes the AtomArray.
|
|
506
|
+
"""
|
|
507
|
+
reference_coord = coord(reference)
|
|
508
|
+
subject_coord = coord(subject)
|
|
509
|
+
if reference_coord.ndim != 2:
|
|
510
|
+
raise TypeError(
|
|
511
|
+
"Expected an AtomArray or an ndarray with shape (n,3) as reference"
|
|
512
|
+
)
|
|
513
|
+
dif = subject_coord - reference_coord
|
|
514
|
+
return vector_dot(dif, dif)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _to_sparse_indices(all_contacts):
|
|
518
|
+
"""
|
|
519
|
+
Create tuples of contact indices from the :meth:`CellList.get_atoms()` return value.
|
|
520
|
+
|
|
521
|
+
In other words, they would mark the non-zero elements in a dense contact matrix.
|
|
522
|
+
|
|
523
|
+
Parameters
|
|
524
|
+
----------
|
|
525
|
+
all_contacts : ndarray, dtype=int, shape=(m,n)
|
|
526
|
+
The contact indices as returned by :meth:`CellList.get_atoms()`.
|
|
527
|
+
Padded with -1, in the second dimension.
|
|
528
|
+
Dimension *m* marks the query atoms, dimension *n* marks the contact atoms.
|
|
529
|
+
|
|
530
|
+
Returns
|
|
531
|
+
-------
|
|
532
|
+
combined_indices : ndarray, dtype=int, shape=(l,2)
|
|
533
|
+
The contact indices.
|
|
534
|
+
Each column contains the query and contact atom index.
|
|
535
|
+
"""
|
|
536
|
+
# Find rows where a query atom has at least one contact
|
|
537
|
+
non_empty_indices = np.where(np.any(all_contacts != -1, axis=1))[0]
|
|
538
|
+
# Take those rows and flatten them
|
|
539
|
+
contact_indices = all_contacts[non_empty_indices].flatten()
|
|
540
|
+
# For each row the corresponding query atom is the same
|
|
541
|
+
# Hence in the flattened form the query atom index is simply repeated
|
|
542
|
+
query_indices = np.repeat(non_empty_indices, all_contacts.shape[1])
|
|
543
|
+
combined_indices = np.stack([query_indices, contact_indices], axis=1)
|
|
544
|
+
# Remove the padding values
|
|
545
|
+
return combined_indices[contact_indices != -1]
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _find_contacts(
|
|
549
|
+
atoms=None,
|
|
550
|
+
atom_mask=None,
|
|
551
|
+
partner_mask=None,
|
|
552
|
+
inclusion_radius=15,
|
|
553
|
+
exclude_same_residue=False,
|
|
554
|
+
exclude_same_chain=True,
|
|
555
|
+
filter_function=None,
|
|
556
|
+
):
|
|
557
|
+
"""
|
|
558
|
+
Find contacts between the atoms in the given structure.
|
|
559
|
+
|
|
560
|
+
Parameters
|
|
561
|
+
----------
|
|
562
|
+
atoms : AtomArray
|
|
563
|
+
The structure to find the contacts for.
|
|
564
|
+
atom_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
565
|
+
If given, the contacts are only computed for the masked atoms.
|
|
566
|
+
Atoms excluded by the mask do not have any contacts and their *lDDT* would
|
|
567
|
+
be NaN in case of ``aggregation="atom"``.
|
|
568
|
+
Providing this mask can significantly speed up the computation, if
|
|
569
|
+
only for certain chains/residues/atoms the *lDDT* is of interest.
|
|
570
|
+
partner_mask : ndarray, shape=(n,), dtype=bool, optional
|
|
571
|
+
If given, only contacts **to** the masked atoms are considered.
|
|
572
|
+
While `atom_mask` does not alter the *lDDT* for the masked atoms,
|
|
573
|
+
`partner_mask` does, as for each atom only the masked atoms are considered
|
|
574
|
+
as potential contact partners.
|
|
575
|
+
inclusion_radius : float, optional
|
|
576
|
+
Pairwise atom distances are considered within this radius.
|
|
577
|
+
exclude_same_residue : bool, optional
|
|
578
|
+
If true, only atom distances between different residues are considered.
|
|
579
|
+
Otherwise, also atom distances within the same residue are included.
|
|
580
|
+
exclude_same_chain : bool, optional
|
|
581
|
+
If true, only atom distances between different chains are considered.
|
|
582
|
+
Otherwise, also atom distances within the same chain are included.
|
|
583
|
+
filter_function : Callable(ndarray, shape=(n,2), dtype=int -> ndarray, shape=(n,), dtype=bool), optional
|
|
584
|
+
Used for custom contact filtering, if the other parameters are not sufficient.
|
|
585
|
+
A function that takes an array of contact atom indices and returns a mask that
|
|
586
|
+
is ``True`` for all contacts that should be retained.
|
|
587
|
+
All other contacts are not considered for lDDT computation.
|
|
588
|
+
|
|
589
|
+
Returns
|
|
590
|
+
-------
|
|
591
|
+
contacts : ndarray, shape=(n,2), dtype=int
|
|
592
|
+
The array of contacts.
|
|
593
|
+
Each element represents a pair of atom indices that are in contact.
|
|
594
|
+
"""
|
|
595
|
+
coords = coord(atoms)
|
|
596
|
+
selection = ~np.isnan(coords).any(axis=-1)
|
|
597
|
+
if partner_mask is not None:
|
|
598
|
+
selection &= partner_mask
|
|
599
|
+
# Use a cell list to find atoms within inclusion radius in O(n) time complexity
|
|
600
|
+
cell_list = CellList(coords, inclusion_radius, selection=selection)
|
|
601
|
+
# Pairs of indices for atoms within the inclusion radius
|
|
602
|
+
if atom_mask is None:
|
|
603
|
+
all_contacts = cell_list.get_atoms(coords, inclusion_radius)
|
|
604
|
+
else:
|
|
605
|
+
filtered_contacts = cell_list.get_atoms(coords[atom_mask], inclusion_radius)
|
|
606
|
+
# Map the contacts for the masked atoms to the original coordinates
|
|
607
|
+
# Rows that were filtered out by the mask are fully padded with -1
|
|
608
|
+
# consistent with the padding of `get_atoms()`
|
|
609
|
+
all_contacts = np.full(
|
|
610
|
+
(coords.shape[0], filtered_contacts.shape[-1]),
|
|
611
|
+
-1,
|
|
612
|
+
dtype=filtered_contacts.dtype,
|
|
613
|
+
)
|
|
614
|
+
all_contacts[atom_mask] = filtered_contacts
|
|
615
|
+
# Convert into pairs of indices
|
|
616
|
+
contacts = _to_sparse_indices(all_contacts)
|
|
617
|
+
|
|
618
|
+
if exclude_same_chain:
|
|
619
|
+
# Do the same for the chain level
|
|
620
|
+
chain_indices = get_chain_positions(atoms, contacts.flatten()).reshape(
|
|
621
|
+
contacts.shape
|
|
622
|
+
)
|
|
623
|
+
contacts = contacts[chain_indices[:, 0] != chain_indices[:, 1]]
|
|
624
|
+
elif exclude_same_residue:
|
|
625
|
+
# Find the index of the residue for each atom
|
|
626
|
+
residue_indices = get_residue_positions(atoms, contacts.flatten()).reshape(
|
|
627
|
+
contacts.shape
|
|
628
|
+
)
|
|
629
|
+
# Remove contacts between atoms of the same residue
|
|
630
|
+
contacts = contacts[residue_indices[:, 0] != residue_indices[:, 1]]
|
|
631
|
+
else:
|
|
632
|
+
# In any case self-contacts should not be considered
|
|
633
|
+
contacts = contacts[contacts[:, 0] != contacts[:, 1]]
|
|
634
|
+
if filter_function is not None:
|
|
635
|
+
mask = filter_function(contacts)
|
|
636
|
+
if mask.shape != (contacts.shape[0],):
|
|
637
|
+
raise IndexError(
|
|
638
|
+
f"Mask returned from filter function has shape {mask.shape}, "
|
|
639
|
+
f"but expected ({contacts.shape[0]},)"
|
|
640
|
+
)
|
|
641
|
+
contacts = contacts[mask, :]
|
|
642
|
+
return contacts
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _average_over_indices(values, bins, n_bins=None):
|
|
646
|
+
"""
|
|
647
|
+
For each unique index in `bins`, average the corresponding values in `values`.
|
|
648
|
+
|
|
649
|
+
Based on
|
|
650
|
+
https://stackoverflow.com/questions/79140661/how-to-sum-values-based-on-a-second-index-array-in-a-vectorized-manner
|
|
651
|
+
|
|
652
|
+
Parameters
|
|
653
|
+
----------
|
|
654
|
+
values : ndarray, shape=(..., n)
|
|
655
|
+
The values to average.
|
|
656
|
+
bins : ndarray, shape=(n,) dtype=int
|
|
657
|
+
Associates each value from `values` with a bin.
|
|
658
|
+
n_bins : int
|
|
659
|
+
The total number of bins.
|
|
660
|
+
This is necessary as the some bin in `bins`may be empty.
|
|
661
|
+
By default the number of bins is determined from `bins`.
|
|
662
|
+
|
|
663
|
+
Returns
|
|
664
|
+
-------
|
|
665
|
+
averaged : ndarray, shape=(..., k)
|
|
666
|
+
The averaged values.
|
|
667
|
+
*k* is the maximum value in `bins` + 1.
|
|
668
|
+
"""
|
|
669
|
+
if n_bins is None:
|
|
670
|
+
n_elements_per_bin = np.bincount(bins)
|
|
671
|
+
n_bins = len(n_elements_per_bin)
|
|
672
|
+
else:
|
|
673
|
+
n_elements_per_bin = np.bincount(bins, minlength=n_bins)
|
|
674
|
+
# The last dimension is replaced by the number of bins
|
|
675
|
+
# Broadcasting in 'np.add.at()' requires the replaced dimension to be the first
|
|
676
|
+
aggregated = np.zeros((n_bins, *values.shape[:-1]), dtype=values.dtype)
|
|
677
|
+
np.add.at(aggregated, bins, np.swapaxes(values, 0, -1))
|
|
678
|
+
# If an atom has no contacts, the corresponding value is NaN
|
|
679
|
+
# This result is expected, hence the warning is ignored
|
|
680
|
+
with warnings.catch_warnings():
|
|
681
|
+
warnings.simplefilter("ignore")
|
|
682
|
+
# Bring the bin dimension into the last dimension again
|
|
683
|
+
return np.swapaxes(aggregated, 0, -1) / n_elements_per_bin
|