biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
biotite/structure/tm.py
ADDED
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions for computing the TM-score between two structures and
|
|
7
|
+
for computing the superimposition to do so.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__name__ = "biotite.structure"
|
|
11
|
+
__author__ = "Patrick Kunzmann"
|
|
12
|
+
__all__ = [
|
|
13
|
+
"tm_score",
|
|
14
|
+
"superimpose_structural_homologs",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
import itertools
|
|
18
|
+
import numpy as np
|
|
19
|
+
from biotite.sequence.align.alignment import get_codes, remove_gaps
|
|
20
|
+
from biotite.sequence.align.matrix import SubstitutionMatrix
|
|
21
|
+
from biotite.sequence.align.pairwise import align_optimal
|
|
22
|
+
from biotite.sequence.seqtypes import PurePositionalSequence
|
|
23
|
+
from biotite.structure.filter import filter_amino_acids
|
|
24
|
+
from biotite.structure.geometry import distance
|
|
25
|
+
from biotite.structure.residues import get_residue_count
|
|
26
|
+
from biotite.structure.superimpose import superimpose
|
|
27
|
+
from biotite.structure.util import coord_for_atom_name_per_residue
|
|
28
|
+
|
|
29
|
+
# Minimum value for d0
|
|
30
|
+
# This is not part of the explanation in the paper, but it is implemented in TM-align
|
|
31
|
+
_D0_MIN = 0.5
|
|
32
|
+
# Gap open penalty for hybrid alignment
|
|
33
|
+
_HYBRID_PENALTY = -1
|
|
34
|
+
# Gap open penalty for pure TM-based alignment
|
|
35
|
+
_TM_GAP_PENALTY = -0.6
|
|
36
|
+
# Arbitrary scale factor to avoid rounding errors when converting scores to integer
|
|
37
|
+
_SCORE_SCALING = 100
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def tm_score(
|
|
41
|
+
reference, subject, reference_indices, subject_indices, reference_length="shorter"
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Compute the *TM*-score for the given protein structures. :footcite:`Zhang2004`
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
reference, subject : AtomArray or ndarray, dtype=float
|
|
49
|
+
The protein structures to be compared.
|
|
50
|
+
The number of their atoms may differ from each other.
|
|
51
|
+
Alternatively, coordinates can be provided directly as
|
|
52
|
+
:class:`ndarray`.
|
|
53
|
+
reference_indices, subject_indices : ndarray, dtype=int, shape=(n,)
|
|
54
|
+
The indices of the atoms in the reference and subject, respectively,
|
|
55
|
+
that correspond to each other.
|
|
56
|
+
In consequence, the length of both arrays must be equal.
|
|
57
|
+
reference_length : int or {"shorter", "longer", "reference"}
|
|
58
|
+
The reference length used to normalize the TM-score.
|
|
59
|
+
If "shorter", the number of residues in the smaller structure is used.
|
|
60
|
+
If "longer", the number of residues in the larger structure is used.
|
|
61
|
+
If "reference", the number of residues in the reference structure is used.
|
|
62
|
+
The length can also be provided directly as an integer.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
tm_score : float
|
|
67
|
+
The *TM*-score for the given structure.
|
|
68
|
+
|
|
69
|
+
See Also
|
|
70
|
+
--------
|
|
71
|
+
superimpose_structural_homologs :
|
|
72
|
+
Aims to minimize the *TM*-score between two structures.
|
|
73
|
+
It also returns the corresponding atom indices that can be passed to
|
|
74
|
+
:func:`tm_score()`.
|
|
75
|
+
|
|
76
|
+
Notes
|
|
77
|
+
-----
|
|
78
|
+
This functions takes the coordinates as they are.
|
|
79
|
+
It is recommended to use superimpose them using
|
|
80
|
+
:func:`superimpose_structural_homologs()` before, as that function aims to find a
|
|
81
|
+
superimposition that minimizes the *TM*-score.
|
|
82
|
+
|
|
83
|
+
References
|
|
84
|
+
----------
|
|
85
|
+
|
|
86
|
+
.. footbibliography::
|
|
87
|
+
|
|
88
|
+
Examples
|
|
89
|
+
--------
|
|
90
|
+
|
|
91
|
+
>>> reference = atom_array_stack[0]
|
|
92
|
+
>>> subject = atom_array_stack[1]
|
|
93
|
+
>>> superimposed, _, ref_indices, sub_indices = superimpose_structural_homologs(
|
|
94
|
+
... reference, subject, max_iterations=1
|
|
95
|
+
... )
|
|
96
|
+
>>> print(tm_score(reference, superimposed, ref_indices, sub_indices))
|
|
97
|
+
0.69...
|
|
98
|
+
"""
|
|
99
|
+
if not np.all(filter_amino_acids(reference)):
|
|
100
|
+
raise ValueError("Reference structure must be peptide only")
|
|
101
|
+
if not np.all(filter_amino_acids(subject)):
|
|
102
|
+
raise ValueError("Subject structure must be peptide only")
|
|
103
|
+
ref_length = _get_reference_length(
|
|
104
|
+
reference_length, get_residue_count(reference), get_residue_count(subject)
|
|
105
|
+
)
|
|
106
|
+
distances = distance(reference[reference_indices], subject[subject_indices])
|
|
107
|
+
return np.sum(_tm_score(distances, ref_length)).item() / ref_length
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def superimpose_structural_homologs(
|
|
111
|
+
fixed,
|
|
112
|
+
mobile,
|
|
113
|
+
structural_alphabet="3di",
|
|
114
|
+
substitution_matrix=None,
|
|
115
|
+
max_iterations=float("inf"),
|
|
116
|
+
reference_length="shorter",
|
|
117
|
+
):
|
|
118
|
+
"""
|
|
119
|
+
Superimpose two remotely homologous protein structures.
|
|
120
|
+
|
|
121
|
+
This method relies on structural similarity between the two given structures,
|
|
122
|
+
inspired by the *TM-align algorithm*. :footcite:`Zhang2005`.
|
|
123
|
+
Thus, this method is better suited for structurally homologous pairs in the
|
|
124
|
+
*twilight zone*, i.e. with low amino acid sequence similarity.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
fixed : AtomArray, shape(n,)
|
|
129
|
+
The fixed structure.
|
|
130
|
+
Must contain only peptide chains.
|
|
131
|
+
mobile : AtomArray, shape(n,)
|
|
132
|
+
The structure which is superimposed on the `fixed` structure.
|
|
133
|
+
Must contain only peptide chains.
|
|
134
|
+
Must contain the same number of chains as `fixed`.
|
|
135
|
+
structural_alphabet : {"3di", "pb"}, optional
|
|
136
|
+
The structural alphabet to use for finding corresponding residues using sequence
|
|
137
|
+
alignment.
|
|
138
|
+
Either *3Di* or *Protein Blocks*.
|
|
139
|
+
substitution_matrix : SubstitutionMatrix, optional
|
|
140
|
+
The substitution matrix to use for finding corresponding residues using sequence
|
|
141
|
+
alignment.
|
|
142
|
+
max_iterations : int, optional
|
|
143
|
+
The maximum number of iterations to perform in the last step.
|
|
144
|
+
reference_length : int or {"shorter", "longer", "reference"}
|
|
145
|
+
The reference length used to normalize the TM-score and to compute :math:`d_0`.
|
|
146
|
+
If "shorter", the number of residues in the smaller structure is used.
|
|
147
|
+
If "longer", the number of residues in the larger structure is used.
|
|
148
|
+
If "reference", the number of residues in the fixed structure is used.
|
|
149
|
+
The length can also be provided directly as an integer.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
fitted : AtomArray or AtomArrayStack
|
|
154
|
+
A copy of the `mobile` structure, superimposed on the fixed structure.
|
|
155
|
+
transform : AffineTransformation
|
|
156
|
+
This object contains the affine transformation(s) that were
|
|
157
|
+
applied on `mobile`.
|
|
158
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
159
|
+
another AtomArray in the same way.
|
|
160
|
+
fixed_indices, mobile_indices : ndarray, shape(k,), dtype=int
|
|
161
|
+
The indices of the corresponding ``CA`` atoms in the fixed and mobile structure,
|
|
162
|
+
respectively.
|
|
163
|
+
These atoms were used for the superimposition, if their pairwise distance is
|
|
164
|
+
below the :math:`d_0` threshold :footcite:`Zhang2004`.
|
|
165
|
+
|
|
166
|
+
See Also
|
|
167
|
+
--------
|
|
168
|
+
superimpose_homologs : Analogous functionality for structures with high sequence similarity.
|
|
169
|
+
|
|
170
|
+
Notes
|
|
171
|
+
-----
|
|
172
|
+
The challenge of aligning two structures with different number of residues is
|
|
173
|
+
finding the corresponding residues between them.
|
|
174
|
+
This algorithm inspired by *TM-align* :footcite:`Zhang2005` uses a 3 step heuristic:
|
|
175
|
+
|
|
176
|
+
1. Find corresponding residues using a structural alphabet alignment and superimpose
|
|
177
|
+
the chains based on them.
|
|
178
|
+
2. Refine the corresponding residues using a sequence alignment based on a hybrid
|
|
179
|
+
positional substitution matrix:
|
|
180
|
+
The scores are a 50/50 combination of the structural alphabet substitution score
|
|
181
|
+
and the distance-based TM-score between two residues.
|
|
182
|
+
The superimposition is updated based on the new corresponding residues.
|
|
183
|
+
3. Refine the corresponding residues using a sequence alignment with a pure
|
|
184
|
+
TM-score based positional substitution matrix.
|
|
185
|
+
Update the superimposition based on the new corresponding residues.
|
|
186
|
+
Repeat this step until the correspondences are stable.
|
|
187
|
+
|
|
188
|
+
References
|
|
189
|
+
----------
|
|
190
|
+
|
|
191
|
+
.. footbibliography::
|
|
192
|
+
|
|
193
|
+
Examples
|
|
194
|
+
--------
|
|
195
|
+
|
|
196
|
+
>>> fixed = atom_array_stack[0]
|
|
197
|
+
>>> mobile = atom_array_stack[1]
|
|
198
|
+
>>> superimposed, _, fix_indices, mob_indices = superimpose_structural_homologs(
|
|
199
|
+
... fixed, mobile, max_iterations=1
|
|
200
|
+
... )
|
|
201
|
+
>>> print(tm_score(fixed, superimposed, fix_indices, mob_indices))
|
|
202
|
+
0.69...
|
|
203
|
+
>>> print(rmsd(fixed[fix_indices], superimposed[mob_indices]))
|
|
204
|
+
0.83...
|
|
205
|
+
"""
|
|
206
|
+
# Avoid circular imports
|
|
207
|
+
from biotite.structure.alphabet.i3d import to_3di
|
|
208
|
+
from biotite.structure.alphabet.pb import to_protein_blocks
|
|
209
|
+
|
|
210
|
+
match structural_alphabet.lower():
|
|
211
|
+
case "3di":
|
|
212
|
+
conversion_function = to_3di
|
|
213
|
+
if substitution_matrix is None:
|
|
214
|
+
substitution_matrix = SubstitutionMatrix.std_3di_matrix()
|
|
215
|
+
case "pb":
|
|
216
|
+
conversion_function = to_protein_blocks
|
|
217
|
+
if substitution_matrix is None:
|
|
218
|
+
substitution_matrix = SubstitutionMatrix.std_protein_blocks_matrix()
|
|
219
|
+
case _:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"Unsupported structural alphabet: '{structural_alphabet}'"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Concatenate the structural sequences for simplicity
|
|
225
|
+
# In the the sequence alignment, this will make barely a difference compared
|
|
226
|
+
# to separate alignments, as there is no gap extension penalty
|
|
227
|
+
fixed_seq = _concatenate_sequences(conversion_function(fixed)[0])
|
|
228
|
+
mobile_seq = _concatenate_sequences(conversion_function(mobile)[0])
|
|
229
|
+
fixed_ca_coord = coord_for_atom_name_per_residue(fixed, ["CA"])[0]
|
|
230
|
+
mobile_ca_coord = coord_for_atom_name_per_residue(mobile, ["CA"])[0]
|
|
231
|
+
# NaN values (i.e. residues without CA atom) would let the superimposition fail
|
|
232
|
+
fixed_not_nan_mask = ~np.isnan(fixed_ca_coord).any(axis=-1)
|
|
233
|
+
mobile_not_nan_mask = ~np.isnan(mobile_ca_coord).any(axis=-1)
|
|
234
|
+
fixed_seq = fixed_seq[fixed_not_nan_mask]
|
|
235
|
+
fixed_ca_coord = fixed_ca_coord[fixed_not_nan_mask]
|
|
236
|
+
mobile_seq = mobile_seq[mobile_not_nan_mask]
|
|
237
|
+
mobile_ca_coord = mobile_ca_coord[mobile_not_nan_mask]
|
|
238
|
+
reference_length = _get_reference_length(
|
|
239
|
+
reference_length, len(fixed_seq), len(mobile_seq)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# 1. step
|
|
243
|
+
anchors = _find_anchors_structure_based(fixed_seq, mobile_seq, substitution_matrix)
|
|
244
|
+
_, transform = superimpose(
|
|
245
|
+
*_filter_by_anchors(fixed_ca_coord, mobile_ca_coord, anchors)
|
|
246
|
+
)
|
|
247
|
+
superimposed_ca_coord = transform.apply(mobile_ca_coord)
|
|
248
|
+
|
|
249
|
+
# 2. step
|
|
250
|
+
anchors = _find_anchors_hybrid(
|
|
251
|
+
fixed_seq,
|
|
252
|
+
mobile_seq,
|
|
253
|
+
fixed_ca_coord,
|
|
254
|
+
superimposed_ca_coord,
|
|
255
|
+
substitution_matrix,
|
|
256
|
+
reference_length,
|
|
257
|
+
)
|
|
258
|
+
_, transform = superimpose(
|
|
259
|
+
*_filter_by_anchors(
|
|
260
|
+
fixed_ca_coord,
|
|
261
|
+
mobile_ca_coord,
|
|
262
|
+
anchors,
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
superimposed_ca_coord = transform.apply(mobile_ca_coord)
|
|
266
|
+
|
|
267
|
+
# 3. step
|
|
268
|
+
for n_iterations in itertools.count(1):
|
|
269
|
+
previous_anchors = anchors
|
|
270
|
+
anchors = _find_anchors_tm_based(
|
|
271
|
+
fixed_ca_coord, superimposed_ca_coord, reference_length
|
|
272
|
+
)
|
|
273
|
+
_, transform = superimpose(
|
|
274
|
+
*_filter_by_anchors(
|
|
275
|
+
fixed_ca_coord,
|
|
276
|
+
mobile_ca_coord,
|
|
277
|
+
anchors,
|
|
278
|
+
superimposed_ca_coord,
|
|
279
|
+
reference_length,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
superimposed_ca_coord = transform.apply(mobile_ca_coord)
|
|
283
|
+
if n_iterations >= max_iterations or np.array_equal(previous_anchors, anchors):
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
# The anchors currently refer to the CA atoms only
|
|
287
|
+
# -> map anchors back to all-atom indices
|
|
288
|
+
fixed_anchors = np.where(fixed.atom_name == "CA")[0][anchors[:, 0]]
|
|
289
|
+
mobile_anchors = np.where(mobile.atom_name == "CA")[0][anchors[:, 1]]
|
|
290
|
+
return transform.apply(mobile), transform, fixed_anchors, mobile_anchors
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _concatenate_sequences(sequences):
|
|
294
|
+
"""
|
|
295
|
+
Concatenate the sequences into a single sequence.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
sequences : list of Sequence
|
|
300
|
+
The sequences to concatenate.
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
sequence : Sequence
|
|
305
|
+
The concatenated sequence.
|
|
306
|
+
"""
|
|
307
|
+
# Start with an empty sequence of the same type
|
|
308
|
+
return sum(sequences, start=type(sequences[0])())
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _filter_by_anchors(
|
|
312
|
+
fixed_ca_coord,
|
|
313
|
+
mobile_ca_coord,
|
|
314
|
+
anchors,
|
|
315
|
+
superimposed_ca_coord=None,
|
|
316
|
+
reference_length=None,
|
|
317
|
+
):
|
|
318
|
+
"""
|
|
319
|
+
Filter the coordinates by the anchor indices.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
fixed_ca_coord, mobile_ca_coord : ndarray, shape=(n,3)
|
|
324
|
+
The coordinates of the CA atoms of the fixed and mobile structure,
|
|
325
|
+
respectively.
|
|
326
|
+
anchors : ndarray, shape=(k,2)
|
|
327
|
+
The anchor indices.
|
|
328
|
+
superimposed_ca_coord : ndarray, shape=(m,3), optional
|
|
329
|
+
The coordinates of the CA atoms of the superimposed structure.
|
|
330
|
+
If given, the anchors are additionally filtered by the distance between the
|
|
331
|
+
fixed and superimposed structure, which must be lower than :math:`d_0`.
|
|
332
|
+
reference_length : int, optional
|
|
333
|
+
The reference length used to compute :math:`d_0`.
|
|
334
|
+
Needs to be given if `superimposed_ca_coord` is given.
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
anchor_fixed_coord, anchor_mobile_coord : ndarray, shape=(k,3)
|
|
339
|
+
The anchor coordinates of the fixed and mobile structure.
|
|
340
|
+
"""
|
|
341
|
+
anchor_fixed_coord = fixed_ca_coord[anchors[:, 0]]
|
|
342
|
+
anchor_mobile_coord = mobile_ca_coord[anchors[:, 1]]
|
|
343
|
+
if reference_length is not None and superimposed_ca_coord is not None:
|
|
344
|
+
anchor_superimposed_coord = superimposed_ca_coord[anchors[:, 1]]
|
|
345
|
+
mask = _mask_by_d0_threshold(
|
|
346
|
+
anchor_fixed_coord, anchor_superimposed_coord, reference_length
|
|
347
|
+
)
|
|
348
|
+
anchor_fixed_coord = anchor_fixed_coord[mask]
|
|
349
|
+
anchor_mobile_coord = anchor_mobile_coord[mask]
|
|
350
|
+
return anchor_fixed_coord, anchor_mobile_coord
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _find_anchors_structure_based(fixed_seq, mobile_seq, substitution_matrix):
|
|
354
|
+
alignment = align_optimal(
|
|
355
|
+
fixed_seq,
|
|
356
|
+
mobile_seq,
|
|
357
|
+
substitution_matrix,
|
|
358
|
+
gap_penalty=(-_get_median_match_score(substitution_matrix), 0),
|
|
359
|
+
terminal_penalty=False,
|
|
360
|
+
max_number=1,
|
|
361
|
+
)[0]
|
|
362
|
+
# Cannot anchor gaps
|
|
363
|
+
alignment = remove_gaps(alignment)
|
|
364
|
+
# Anchors must be structurally similar
|
|
365
|
+
alignment_codes = get_codes(alignment)
|
|
366
|
+
score_matrix = substitution_matrix.score_matrix()
|
|
367
|
+
anchor_mask = score_matrix[alignment_codes[0], alignment_codes[1]] > 0
|
|
368
|
+
anchors = alignment.trace[anchor_mask]
|
|
369
|
+
return anchors
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _find_anchors_hybrid(
|
|
373
|
+
fixed_seq,
|
|
374
|
+
mobile_seq,
|
|
375
|
+
fixed_ca_coord,
|
|
376
|
+
mobile_ca_coord,
|
|
377
|
+
substitution_matrix,
|
|
378
|
+
reference_length,
|
|
379
|
+
):
|
|
380
|
+
# Bring substitution scores into the range of pairwise TM scores
|
|
381
|
+
scale_factor = _get_median_match_score(substitution_matrix)
|
|
382
|
+
# Create positional substitution matrix to be able to add the TM-score to it:
|
|
383
|
+
# The TM-score is based on the coordinates of a particular residue and not on the
|
|
384
|
+
# general symbol in the structural alphabet
|
|
385
|
+
# Hence, the shape of the substitution matrix must reflect the number of residues
|
|
386
|
+
# instead of the number of symbols in the structural alphabet
|
|
387
|
+
positional_matrix, fixed_seq, mobile_seq = substitution_matrix.as_positional(
|
|
388
|
+
fixed_seq,
|
|
389
|
+
mobile_seq,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
tm_score_matrix = _pairwise_tm_score(
|
|
393
|
+
fixed_ca_coord, mobile_ca_coord, reference_length
|
|
394
|
+
)
|
|
395
|
+
sa_score_matrix = positional_matrix.score_matrix()
|
|
396
|
+
# Scale the score matrix and the gap penalty to avoid rounding errors
|
|
397
|
+
# when the score matrix is converted to integer type
|
|
398
|
+
hybrid_score_matrix = _SCORE_SCALING * (
|
|
399
|
+
sa_score_matrix / scale_factor + tm_score_matrix
|
|
400
|
+
)
|
|
401
|
+
gap_penalty = _SCORE_SCALING * _HYBRID_PENALTY
|
|
402
|
+
hybrid_matrix = SubstitutionMatrix(
|
|
403
|
+
positional_matrix.get_alphabet1(),
|
|
404
|
+
positional_matrix.get_alphabet2(),
|
|
405
|
+
hybrid_score_matrix.astype(np.int32),
|
|
406
|
+
)
|
|
407
|
+
alignment = align_optimal(
|
|
408
|
+
fixed_seq,
|
|
409
|
+
mobile_seq,
|
|
410
|
+
hybrid_matrix,
|
|
411
|
+
(gap_penalty, 0),
|
|
412
|
+
terminal_penalty=False,
|
|
413
|
+
max_number=1,
|
|
414
|
+
)[0]
|
|
415
|
+
alignment = remove_gaps(alignment)
|
|
416
|
+
anchors = alignment.trace
|
|
417
|
+
return anchors
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _find_anchors_tm_based(fixed_ca_coord, mobile_ca_coord, reference_length):
|
|
421
|
+
# The substitution matrix is positional -> Any positional sequence suffices
|
|
422
|
+
fixed_seq = PurePositionalSequence(len(fixed_ca_coord))
|
|
423
|
+
mobile_seq = PurePositionalSequence(len(mobile_ca_coord))
|
|
424
|
+
tm_score_matrix = _SCORE_SCALING * _pairwise_tm_score(
|
|
425
|
+
fixed_ca_coord, mobile_ca_coord, reference_length
|
|
426
|
+
)
|
|
427
|
+
gap_penalty = _SCORE_SCALING * _TM_GAP_PENALTY
|
|
428
|
+
matrix = SubstitutionMatrix(
|
|
429
|
+
fixed_seq.alphabet,
|
|
430
|
+
mobile_seq.alphabet,
|
|
431
|
+
tm_score_matrix.astype(np.int32),
|
|
432
|
+
)
|
|
433
|
+
alignment = align_optimal(
|
|
434
|
+
fixed_seq,
|
|
435
|
+
mobile_seq,
|
|
436
|
+
matrix,
|
|
437
|
+
(gap_penalty, 0),
|
|
438
|
+
terminal_penalty=False,
|
|
439
|
+
max_number=1,
|
|
440
|
+
)[0]
|
|
441
|
+
alignment = remove_gaps(alignment)
|
|
442
|
+
anchors = alignment.trace
|
|
443
|
+
return anchors
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _get_median_match_score(substitution_matrix):
|
|
447
|
+
"""
|
|
448
|
+
Get the median score of two symbols matching.
|
|
449
|
+
|
|
450
|
+
Parameters
|
|
451
|
+
----------
|
|
452
|
+
substitution_matrix : SubstitutionMatrix
|
|
453
|
+
The substitution matrix to get the median match score from.
|
|
454
|
+
Must be symmetric.
|
|
455
|
+
|
|
456
|
+
Returns
|
|
457
|
+
-------
|
|
458
|
+
score : int
|
|
459
|
+
The median match score.
|
|
460
|
+
|
|
461
|
+
Notes
|
|
462
|
+
-----
|
|
463
|
+
The median is used instead of the mean, as the score range can be quite large,
|
|
464
|
+
especially when the matrix assigns an arbitrary score to the *undefined symbol*.
|
|
465
|
+
Furthermore, this ensures that the return value is an integer, which is required
|
|
466
|
+
for using it as gap penalty.
|
|
467
|
+
"""
|
|
468
|
+
return np.median(np.diagonal(substitution_matrix.score_matrix()))
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _mask_by_d0_threshold(fixed_ca_coord, mobile_ca_coord, reference_length):
|
|
472
|
+
"""
|
|
473
|
+
Mask every pairwise distance below the :math:`d_0` threshold.
|
|
474
|
+
|
|
475
|
+
Parameters
|
|
476
|
+
----------
|
|
477
|
+
fixed_ca_coord, mobile_ca_coord : ndarray, shape=(n,3)
|
|
478
|
+
The coordinates of the CA atoms of the fixed and mobile structure whose distance
|
|
479
|
+
is measured.
|
|
480
|
+
reference_length : int
|
|
481
|
+
The reference length used to compute :math:`d_0`.
|
|
482
|
+
|
|
483
|
+
Returns
|
|
484
|
+
-------
|
|
485
|
+
mask : ndarray, shape=(n,), dtype=bool
|
|
486
|
+
A boolean mask that indicates which distances are below the :math:`d_0`
|
|
487
|
+
threshold.
|
|
488
|
+
"""
|
|
489
|
+
mask = distance(fixed_ca_coord, mobile_ca_coord) < _d0(reference_length)
|
|
490
|
+
if not np.any(mask):
|
|
491
|
+
raise ValueError("No anchors found, the structures are too dissimilar")
|
|
492
|
+
return mask
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _pairwise_tm_score(reference_coord, subject_coord, reference_length):
|
|
496
|
+
"""
|
|
497
|
+
Compute the TM score for the Cartesian product of two coordinate arrays.
|
|
498
|
+
|
|
499
|
+
Parameters
|
|
500
|
+
----------
|
|
501
|
+
reference_coord, subject_coord : ndarray, shape=(p,3) or shape=(q,3), dtype=float
|
|
502
|
+
The coordinates of the CA atoms to compute all pairwise distances between.
|
|
503
|
+
reference_length : int
|
|
504
|
+
The reference length used to compute :math:`d_0`.
|
|
505
|
+
|
|
506
|
+
Returns
|
|
507
|
+
-------
|
|
508
|
+
tm_score : ndarray, shape=(p,q), dtype=float
|
|
509
|
+
The TM score for the Cartesian product of the two coordinate arrays.
|
|
510
|
+
"""
|
|
511
|
+
distance_matrix = distance(
|
|
512
|
+
reference_coord[:, np.newaxis, :],
|
|
513
|
+
subject_coord[np.newaxis, :, :],
|
|
514
|
+
)
|
|
515
|
+
return _tm_score(distance_matrix, reference_length)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _tm_score(distance, reference_length):
|
|
519
|
+
"""
|
|
520
|
+
Compute the TM score for the given distances.
|
|
521
|
+
|
|
522
|
+
Parameters
|
|
523
|
+
----------
|
|
524
|
+
distance : float or ndarray
|
|
525
|
+
The distance(s) between the CA atoms of two residues.
|
|
526
|
+
reference_length : int
|
|
527
|
+
The reference length used to compute :math:`d_0`.
|
|
528
|
+
|
|
529
|
+
Returns
|
|
530
|
+
-------
|
|
531
|
+
tm_score : float or ndarray
|
|
532
|
+
The TM score for the given distances.
|
|
533
|
+
"""
|
|
534
|
+
return 1 / (1 + (distance / _d0(reference_length)) ** 2)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def _d0(reference_length):
|
|
538
|
+
"""
|
|
539
|
+
Compute the :math:`d_0` threshold.
|
|
540
|
+
|
|
541
|
+
Parameters
|
|
542
|
+
----------
|
|
543
|
+
reference_length : int
|
|
544
|
+
The reference length used to compute :math:`d_0`.
|
|
545
|
+
|
|
546
|
+
Returns
|
|
547
|
+
-------
|
|
548
|
+
d0 : float
|
|
549
|
+
The :math:`d_0` threshold.
|
|
550
|
+
"""
|
|
551
|
+
# Constants taken from Zhang2004
|
|
552
|
+
return max(
|
|
553
|
+
# Avoid complex solutions -> clip to positive values
|
|
554
|
+
# For short sequence lengths _D0_MIN takes precedence anyway
|
|
555
|
+
1.24 * max((reference_length - 15), 0) ** (1 / 3) - 1.8,
|
|
556
|
+
_D0_MIN,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _get_reference_length(user_parameter, reference_length, subject_length):
|
|
561
|
+
"""
|
|
562
|
+
Get the reference length to normalize the TM-score and compute :math:`d_0`.
|
|
563
|
+
|
|
564
|
+
Parameters
|
|
565
|
+
----------
|
|
566
|
+
user_parameter : int or {"shorter", "longer", "reference"}
|
|
567
|
+
The value given by the user via the `reference_length` parameter.
|
|
568
|
+
reference_length, subject_length : int
|
|
569
|
+
The lengths of the reference and subject structure, respectively.
|
|
570
|
+
"""
|
|
571
|
+
match user_parameter:
|
|
572
|
+
case "shorter":
|
|
573
|
+
return min(reference_length, subject_length)
|
|
574
|
+
case "longer":
|
|
575
|
+
return max(reference_length, subject_length)
|
|
576
|
+
case "reference":
|
|
577
|
+
return reference_length
|
|
578
|
+
case int(number):
|
|
579
|
+
return number
|
|
580
|
+
case _:
|
|
581
|
+
raise ValueError(f"Unsupported reference length: '{user_parameter}'")
|