biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions for structure superimposition.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Patrick Kunzmann, Claude J. Rogers"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"superimpose",
|
|
13
|
+
"superimpose_homologs",
|
|
14
|
+
"superimpose_without_outliers",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from biotite.sequence.align.alignment import get_codes, remove_gaps
|
|
20
|
+
from biotite.sequence.align.matrix import SubstitutionMatrix
|
|
21
|
+
from biotite.sequence.align.pairwise import align_optimal
|
|
22
|
+
from biotite.sequence.alphabet import common_alphabet
|
|
23
|
+
from biotite.sequence.seqtypes import ProteinSequence
|
|
24
|
+
from biotite.structure.atoms import coord
|
|
25
|
+
from biotite.structure.chains import chain_iter
|
|
26
|
+
from biotite.structure.filter import filter_amino_acids, filter_nucleotides
|
|
27
|
+
from biotite.structure.geometry import centroid, distance
|
|
28
|
+
from biotite.structure.sequence import to_sequence
|
|
29
|
+
from biotite.structure.transform import AffineTransformation
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def superimpose(fixed, mobile, atom_mask=None):
|
|
33
|
+
"""
|
|
34
|
+
Superimpose structures onto each other, minimizing the RMSD between
|
|
35
|
+
them.
|
|
36
|
+
:footcite:`Kabsch1976, Kabsch1978`.
|
|
37
|
+
|
|
38
|
+
More precisely, the `mobile` structure is rotated and translated onto
|
|
39
|
+
the `fixed` structure.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
44
|
+
The fixed structure(s).
|
|
45
|
+
Alternatively coordinates can be given.
|
|
46
|
+
mobile : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
47
|
+
The structure(s) which is/are superimposed on the `fixed`
|
|
48
|
+
structure.
|
|
49
|
+
Each atom at index *i* in `mobile` must correspond the
|
|
50
|
+
atom at index *i* in `fixed` to obtain correct results.
|
|
51
|
+
Furthermore, if both `fixed` and `mobile` are
|
|
52
|
+
:class:`AtomArrayStack` objects, they must have the same
|
|
53
|
+
number of models.
|
|
54
|
+
Alternatively coordinates can be given.
|
|
55
|
+
atom_mask : ndarray, dtype=bool, optional
|
|
56
|
+
If given, only the atoms covered by this boolean mask will be
|
|
57
|
+
considered for superimposition.
|
|
58
|
+
This means that the algorithm will minimize the RMSD based
|
|
59
|
+
on the covered atoms instead of all atoms.
|
|
60
|
+
The returned superimposed structure will contain all atoms
|
|
61
|
+
of the input structure, regardless of this parameter.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
fitted : AtomArray or AtomArrayStack or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
66
|
+
A copy of the `mobile` structure(s),
|
|
67
|
+
superimposed on the fixed structure(s).
|
|
68
|
+
Only coordinates are returned, if coordinates were given in
|
|
69
|
+
`mobile`.
|
|
70
|
+
transformation : AffineTransformation
|
|
71
|
+
The affine transformation(s) that were applied on `mobile`.
|
|
72
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
73
|
+
another AtomArray in the same way.
|
|
74
|
+
|
|
75
|
+
See Also
|
|
76
|
+
--------
|
|
77
|
+
superimpose_without_outliers : Superimposition with outlier removal.
|
|
78
|
+
superimpose_homologs : Superimposition of homologous structures.
|
|
79
|
+
|
|
80
|
+
Notes
|
|
81
|
+
-----
|
|
82
|
+
The `transformation` can come in handy, in case you want to
|
|
83
|
+
superimpose two
|
|
84
|
+
structures with different amount of atoms.
|
|
85
|
+
Often the two structures need to be filtered in order to obtain the
|
|
86
|
+
same size and annotation arrays.
|
|
87
|
+
After superimposition the transformation can be applied on the
|
|
88
|
+
original structure using :meth:`AffineTransformation.apply()`.
|
|
89
|
+
|
|
90
|
+
References
|
|
91
|
+
----------
|
|
92
|
+
|
|
93
|
+
.. footbibliography::
|
|
94
|
+
|
|
95
|
+
Examples
|
|
96
|
+
--------
|
|
97
|
+
|
|
98
|
+
At first two models of a structure are taken and one of them is
|
|
99
|
+
randomly rotated/translated.
|
|
100
|
+
Consequently the RMSD is quite large:
|
|
101
|
+
|
|
102
|
+
>>> array1 = atom_array_stack[0]
|
|
103
|
+
>>> array2 = atom_array_stack[1]
|
|
104
|
+
>>> array2 = translate(array2, [1,2,3])
|
|
105
|
+
>>> array2 = rotate(array2, [1,2,3])
|
|
106
|
+
>>> print("{:.3f}".format(rmsd(array1, array2)))
|
|
107
|
+
11.260
|
|
108
|
+
|
|
109
|
+
RMSD decreases after superimposition of only CA atoms:
|
|
110
|
+
|
|
111
|
+
>>> array2_fit, transformation = superimpose(
|
|
112
|
+
... array1, array2, atom_mask=(array2.atom_name == "CA")
|
|
113
|
+
... )
|
|
114
|
+
>>> print("{:.3f}".format(rmsd(array1, array2_fit)))
|
|
115
|
+
1.961
|
|
116
|
+
|
|
117
|
+
RMSD is even lower when all atoms are considered in the
|
|
118
|
+
superimposition:
|
|
119
|
+
|
|
120
|
+
>>> array2_fit, transformation = superimpose(array1, array2)
|
|
121
|
+
>>> print("{:.3f}".format(rmsd(array1, array2_fit)))
|
|
122
|
+
1.928
|
|
123
|
+
"""
|
|
124
|
+
# Bring coordinates into the same dimensionality
|
|
125
|
+
mob_coord = _reshape_to_3d(coord(mobile))
|
|
126
|
+
fix_coord = _reshape_to_3d(coord(fixed))
|
|
127
|
+
|
|
128
|
+
if atom_mask is not None:
|
|
129
|
+
# Implicitly this creates array copies
|
|
130
|
+
mob_filtered = mob_coord[:, atom_mask, :]
|
|
131
|
+
fix_filtered = fix_coord[:, atom_mask, :]
|
|
132
|
+
else:
|
|
133
|
+
mob_filtered = np.copy(mob_coord)
|
|
134
|
+
fix_filtered = np.copy(fix_coord)
|
|
135
|
+
|
|
136
|
+
# Center coordinates at (0,0,0)
|
|
137
|
+
mob_centroid = centroid(mob_filtered)
|
|
138
|
+
fix_centroid = centroid(fix_filtered)
|
|
139
|
+
mob_centered_filtered = mob_filtered - mob_centroid[:, np.newaxis, :]
|
|
140
|
+
fix_centered_filtered = fix_filtered - fix_centroid[:, np.newaxis, :]
|
|
141
|
+
|
|
142
|
+
rotation = _get_rotation_matrices(fix_centered_filtered, mob_centered_filtered)
|
|
143
|
+
transform = AffineTransformation(-mob_centroid, rotation, fix_centroid)
|
|
144
|
+
return transform.apply(mobile), transform
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def superimpose_without_outliers(
|
|
148
|
+
fixed,
|
|
149
|
+
mobile,
|
|
150
|
+
min_anchors=3,
|
|
151
|
+
max_iterations=10,
|
|
152
|
+
quantiles=(0.25, 0.75),
|
|
153
|
+
outlier_threshold=1.5,
|
|
154
|
+
):
|
|
155
|
+
r"""
|
|
156
|
+
Superimpose structures onto a fixed structure, ignoring
|
|
157
|
+
conformational outliers.
|
|
158
|
+
|
|
159
|
+
This method iteratively superimposes the `mobile` structure onto the
|
|
160
|
+
`fixed` structure, removes conformational outliers and superimposes
|
|
161
|
+
the remaining atoms (called *anchors*) again until no outlier
|
|
162
|
+
remains.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
167
|
+
The fixed structure(s).
|
|
168
|
+
Alternatively coordinates can be given.
|
|
169
|
+
mobile : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
170
|
+
The structure(s) which is/are superimposed on the `fixed`
|
|
171
|
+
structure.
|
|
172
|
+
Each atom at index *i* in `mobile` must correspond the
|
|
173
|
+
atom at index *i* in `fixed` to obtain correct results.
|
|
174
|
+
Furthermore, if both `fixed` and `mobile` are
|
|
175
|
+
:class:`AtomArrayStack` objects, they must have the same
|
|
176
|
+
number of models.
|
|
177
|
+
Alternatively coordinates can be given.
|
|
178
|
+
min_anchors : int, optional
|
|
179
|
+
The outlier removal is stopped, if less than `min_anchors`
|
|
180
|
+
anchors would be left.
|
|
181
|
+
max_iterations : int, optional
|
|
182
|
+
The maximum number of iterations for removing conformational
|
|
183
|
+
outliers.
|
|
184
|
+
Setting the value to 1 means that no outlier removal is
|
|
185
|
+
conducted.
|
|
186
|
+
quantiles : tuple (float, float), optional
|
|
187
|
+
The lower and upper quantile for the interpercentile range
|
|
188
|
+
(IPR).
|
|
189
|
+
By default the interquartile range is taken.
|
|
190
|
+
outlier_threshold : float, optional
|
|
191
|
+
The threshold for considering a conformational outlier.
|
|
192
|
+
The threshold is given in units of IPR.
|
|
193
|
+
|
|
194
|
+
Returns
|
|
195
|
+
-------
|
|
196
|
+
fitted : AtomArray or AtomArrayStack
|
|
197
|
+
A copy of the `mobile` structure(s), superimposed on the fixed
|
|
198
|
+
structure.
|
|
199
|
+
Only coordinates are returned, if coordinates were given in
|
|
200
|
+
`mobile`.
|
|
201
|
+
transform : AffineTransformation
|
|
202
|
+
This object contains the affine transformation(s) that were
|
|
203
|
+
applied on `mobile`.
|
|
204
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
205
|
+
another AtomArray in the same way.
|
|
206
|
+
anchor_indices : ndarray, shape(k,), dtype=int
|
|
207
|
+
The indices of the anchor atoms.
|
|
208
|
+
These atoms were used for the superimposition.
|
|
209
|
+
|
|
210
|
+
See Also
|
|
211
|
+
--------
|
|
212
|
+
superimpose : Superimposition without outlier removal.
|
|
213
|
+
superimpose_homologs : Superimposition of homologous structures.
|
|
214
|
+
|
|
215
|
+
Notes
|
|
216
|
+
-----
|
|
217
|
+
This method runs the following algorithm in iterations:
|
|
218
|
+
|
|
219
|
+
1. Superimpose anchor atoms of `mobile` onto `fixed`.
|
|
220
|
+
2. Calculate the squared distance :math:`d^2` between the
|
|
221
|
+
superimposed anchors.
|
|
222
|
+
3. Remove conformational outliers from anchors based on the
|
|
223
|
+
following criterion:
|
|
224
|
+
|
|
225
|
+
.. math:: d^2 > P_\text{upper}(d^2) + \left( P_\text{upper}(d^2) - P_\text{lower}(d^2) \right) \cdot T
|
|
226
|
+
|
|
227
|
+
In prose this means that an anchor is considered an outlier, if
|
|
228
|
+
it is `outlier_threshold` :math:`T` times the interpercentile
|
|
229
|
+
range (IPR) above the upper percentile.
|
|
230
|
+
By default, this is 1.5 times the interquartile range, which is
|
|
231
|
+
the usual threshold to mark outliers in box plots.
|
|
232
|
+
|
|
233
|
+
In the beginning, all atoms are considered as anchors.
|
|
234
|
+
|
|
235
|
+
Considering all atoms (not only the anchors), this approach does
|
|
236
|
+
**not** minimize the RMSD, in contrast to :func:`superimpose()`.
|
|
237
|
+
The purpose of this function is to ignore outliers to decrease the
|
|
238
|
+
RMSD in the more conserved parts of the structure.
|
|
239
|
+
"""
|
|
240
|
+
if max_iterations < 1:
|
|
241
|
+
raise ValueError("Maximum number of iterations must be at least 1")
|
|
242
|
+
|
|
243
|
+
# Ensure that the first quantile is smaller than the second one
|
|
244
|
+
quantiles = sorted(quantiles)
|
|
245
|
+
|
|
246
|
+
fixed_coord = coord(fixed)
|
|
247
|
+
mobile_coord = coord(mobile)
|
|
248
|
+
# Before refinement, all anchors are included
|
|
249
|
+
# 'inlier' is the opposite of 'outlier'
|
|
250
|
+
updated_inlier_mask = np.ones(fixed_coord.shape[-2], dtype=bool)
|
|
251
|
+
|
|
252
|
+
for _ in range(max_iterations):
|
|
253
|
+
# Run superimposition
|
|
254
|
+
inlier_mask = updated_inlier_mask
|
|
255
|
+
filtered_fixed_coord = fixed_coord[..., inlier_mask, :]
|
|
256
|
+
filtered_mobile_coord = mobile_coord[..., inlier_mask, :]
|
|
257
|
+
superimposed_coord, transform = superimpose(
|
|
258
|
+
filtered_fixed_coord, filtered_mobile_coord
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Find outliers
|
|
262
|
+
sq_dist = distance(filtered_fixed_coord, superimposed_coord) ** 2
|
|
263
|
+
if sq_dist.ndim == 2:
|
|
264
|
+
# If multiple models are superimposed,
|
|
265
|
+
# use the mean squared distance to determine outliers
|
|
266
|
+
sq_dist = np.mean(sq_dist, axis=0)
|
|
267
|
+
lower_quantile, upper_quantile = np.quantile(sq_dist, quantiles)
|
|
268
|
+
ipr = upper_quantile - lower_quantile
|
|
269
|
+
updated_inlier_mask = inlier_mask.copy()
|
|
270
|
+
# Squared distance was only calculated for the existing inliers
|
|
271
|
+
# -> update the mask only for these atoms
|
|
272
|
+
updated_inlier_mask[updated_inlier_mask] = (
|
|
273
|
+
sq_dist <= upper_quantile + outlier_threshold * ipr
|
|
274
|
+
)
|
|
275
|
+
if np.all(updated_inlier_mask):
|
|
276
|
+
# No outliers anymore -> early termination
|
|
277
|
+
break
|
|
278
|
+
if np.count_nonzero(updated_inlier_mask) < min_anchors:
|
|
279
|
+
# Less than min_anchors anchors would be left -> early termination
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
anchor_indices = np.where(inlier_mask)[0]
|
|
283
|
+
return transform.apply(mobile), transform, anchor_indices
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def superimpose_homologs(
|
|
287
|
+
fixed,
|
|
288
|
+
mobile,
|
|
289
|
+
substitution_matrix=None,
|
|
290
|
+
gap_penalty=-10,
|
|
291
|
+
min_anchors=3,
|
|
292
|
+
terminal_penalty=False,
|
|
293
|
+
**kwargs,
|
|
294
|
+
):
|
|
295
|
+
r"""
|
|
296
|
+
Superimpose a protein or nucleotide structure onto another one,
|
|
297
|
+
considering sequence differences and conformational outliers.
|
|
298
|
+
|
|
299
|
+
The method finds corresponding residues by sequence alignment and
|
|
300
|
+
selects their :math:`C_{\alpha}` or :math:`P` atoms as
|
|
301
|
+
superimposition *anchors*.
|
|
302
|
+
Then iteratively the anchor atoms are superimposed and outliers are
|
|
303
|
+
removed.
|
|
304
|
+
|
|
305
|
+
Parameters
|
|
306
|
+
----------
|
|
307
|
+
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n)
|
|
308
|
+
The fixed structure(s).
|
|
309
|
+
mobile : AtomArray, shape(n,) or AtomArrayStack, shape(m,n)
|
|
310
|
+
The structure(s) which is/are superimposed on the `fixed` structure.
|
|
311
|
+
Must contain the same number of chains as `fixed` with corresponding chains
|
|
312
|
+
being in the same order.
|
|
313
|
+
The specific chain IDs can be different.
|
|
314
|
+
substitution_matrix : str or SubstitutionMatrix, optional
|
|
315
|
+
The (name of the) substitution matrix used for sequence
|
|
316
|
+
alignment.
|
|
317
|
+
Must fit the chain type.
|
|
318
|
+
By default, ``"BLOSUM62"`` and ``"NUC"`` are used respectively.
|
|
319
|
+
Only aligned residues with a positive score are considered as
|
|
320
|
+
initial anchors.
|
|
321
|
+
gap_penalty : int or tuple of int, optional
|
|
322
|
+
The gap penalty for sequence alignment.
|
|
323
|
+
A single value indicates a linear penalty, while a tuple
|
|
324
|
+
indicates an affine penalty.
|
|
325
|
+
min_anchors : int, optional
|
|
326
|
+
If less than `min_anchors` anchors are found by sequence
|
|
327
|
+
alignment, the method ditches the alignment and matches all
|
|
328
|
+
anchor atoms.
|
|
329
|
+
If the number of anchor atoms is not equal in `fixed` and
|
|
330
|
+
`mobile` in this fallback case, an exception is raised.
|
|
331
|
+
Furthermore, the outlier removal is stopped, if less than
|
|
332
|
+
`min_anchors` anchors would be left.
|
|
333
|
+
terminal_penalty : bool, optional
|
|
334
|
+
If set to true, gap penalties are applied to terminal gaps in the sequence
|
|
335
|
+
alignment.
|
|
336
|
+
**kwargs
|
|
337
|
+
Additional parameters for
|
|
338
|
+
:func:`superimpose_without_outliers()`.
|
|
339
|
+
|
|
340
|
+
Returns
|
|
341
|
+
-------
|
|
342
|
+
fitted : AtomArray or AtomArrayStack
|
|
343
|
+
A copy of the `mobile` structure(s), superimposed on the fixed
|
|
344
|
+
structure(s).
|
|
345
|
+
transform : AffineTransformation
|
|
346
|
+
This object contains the affine transformation(s) that were
|
|
347
|
+
applied on `mobile`.
|
|
348
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
349
|
+
another AtomArray in the same way.
|
|
350
|
+
fixed_anchor_indices, mobile_anchor_indices : ndarray, shape(k,), dtype=int
|
|
351
|
+
The indices of the anchor atoms in the fixed and mobile
|
|
352
|
+
structure, respectively.
|
|
353
|
+
These atoms were used for the superimposition.
|
|
354
|
+
|
|
355
|
+
See Also
|
|
356
|
+
--------
|
|
357
|
+
superimpose : Superimposition without outlier removal.
|
|
358
|
+
superimpose_without_outliers : Internally used for outlier removal.
|
|
359
|
+
superimpose_structural_homologs : Better suited for low sequence similarity.
|
|
360
|
+
|
|
361
|
+
Notes
|
|
362
|
+
-----
|
|
363
|
+
As this method relies on sequence alignment, it works only for
|
|
364
|
+
proteins/nucleic acids with decent sequence homology.
|
|
365
|
+
"""
|
|
366
|
+
fixed_anchor_indices = _get_backbone_anchor_indices(fixed)
|
|
367
|
+
mobile_anchor_indices = _get_backbone_anchor_indices(mobile)
|
|
368
|
+
if (
|
|
369
|
+
len(fixed_anchor_indices) < min_anchors
|
|
370
|
+
or len(mobile_anchor_indices) < min_anchors
|
|
371
|
+
):
|
|
372
|
+
raise ValueError(
|
|
373
|
+
"Structures have too few backbone atoms for required number of anchors"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
anchor_indices = _find_matching_anchors(
|
|
377
|
+
fixed[..., fixed_anchor_indices],
|
|
378
|
+
mobile[..., mobile_anchor_indices],
|
|
379
|
+
substitution_matrix,
|
|
380
|
+
gap_penalty,
|
|
381
|
+
terminal_penalty,
|
|
382
|
+
)
|
|
383
|
+
if len(anchor_indices) < min_anchors:
|
|
384
|
+
# Fallback: Match all backbone anchors
|
|
385
|
+
if len(fixed_anchor_indices) != len(mobile_anchor_indices):
|
|
386
|
+
raise ValueError(
|
|
387
|
+
"Tried fallback due to low anchor number, "
|
|
388
|
+
"but number of backbone atoms does not match"
|
|
389
|
+
)
|
|
390
|
+
fixed_anchor_indices = fixed_anchor_indices
|
|
391
|
+
mobile_anchor_indices = mobile_anchor_indices
|
|
392
|
+
else:
|
|
393
|
+
# The anchor indices point to the backbone atoms
|
|
394
|
+
# -> get the corresponding indices for the whole structure
|
|
395
|
+
fixed_anchor_indices = fixed_anchor_indices[anchor_indices[:, 0]]
|
|
396
|
+
mobile_anchor_indices = mobile_anchor_indices[anchor_indices[:, 1]]
|
|
397
|
+
|
|
398
|
+
_, transform, selected_anchor_indices = superimpose_without_outliers(
|
|
399
|
+
fixed[..., fixed_anchor_indices],
|
|
400
|
+
mobile[..., mobile_anchor_indices],
|
|
401
|
+
min_anchors,
|
|
402
|
+
**kwargs,
|
|
403
|
+
)
|
|
404
|
+
fixed_anchor_indices = fixed_anchor_indices[selected_anchor_indices]
|
|
405
|
+
mobile_anchor_indices = mobile_anchor_indices[selected_anchor_indices]
|
|
406
|
+
|
|
407
|
+
return (
|
|
408
|
+
transform.apply(mobile),
|
|
409
|
+
transform,
|
|
410
|
+
fixed_anchor_indices,
|
|
411
|
+
mobile_anchor_indices,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _reshape_to_3d(coord):
|
|
416
|
+
"""
|
|
417
|
+
Reshape the coordinate array to 3D, if it is 2D.
|
|
418
|
+
"""
|
|
419
|
+
if coord.ndim < 2:
|
|
420
|
+
raise ValueError("Coordinates must be at least two-dimensional")
|
|
421
|
+
if coord.ndim == 2:
|
|
422
|
+
return coord[np.newaxis, ...]
|
|
423
|
+
elif coord.ndim == 3:
|
|
424
|
+
return coord
|
|
425
|
+
else:
|
|
426
|
+
raise ValueError("Coordinates must be at most three-dimensional")
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _get_rotation_matrices(fixed, mobile):
|
|
430
|
+
"""
|
|
431
|
+
Get the rotation matrices to superimpose the given mobile
|
|
432
|
+
coordinates into the given fixed coordinates, minimizing the RMSD.
|
|
433
|
+
|
|
434
|
+
Uses the *Kabsch* algorithm.
|
|
435
|
+
Both sets of coordinates must already be centered at origin.
|
|
436
|
+
"""
|
|
437
|
+
# Calculate cross-covariance matrices
|
|
438
|
+
cov = np.sum(fixed[:, :, :, np.newaxis] * mobile[:, :, np.newaxis, :], axis=1)
|
|
439
|
+
v, s, w = np.linalg.svd(cov)
|
|
440
|
+
# Remove possibility of reflected atom coordinates
|
|
441
|
+
reflected_mask = np.linalg.det(v) * np.linalg.det(w) < 0
|
|
442
|
+
v[reflected_mask, :, -1] *= -1
|
|
443
|
+
matrices = np.matmul(v, w)
|
|
444
|
+
return matrices
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _get_backbone_anchor_indices(atoms):
|
|
448
|
+
"""
|
|
449
|
+
Select one representative anchor atom for each amino acid and
|
|
450
|
+
nucleotide and return their indices.
|
|
451
|
+
"""
|
|
452
|
+
return np.where(
|
|
453
|
+
((filter_amino_acids(atoms)) & (atoms.atom_name == "CA"))
|
|
454
|
+
| ((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
|
|
455
|
+
)[0]
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _find_matching_anchors(
|
|
459
|
+
fixed_anchor_atoms,
|
|
460
|
+
mobile_anchors_atoms,
|
|
461
|
+
substitution_matrix,
|
|
462
|
+
gap_penalty,
|
|
463
|
+
terminal_penalty,
|
|
464
|
+
):
|
|
465
|
+
"""
|
|
466
|
+
Find corresponding residues using pairwise sequence alignment.
|
|
467
|
+
"""
|
|
468
|
+
anchor_list = []
|
|
469
|
+
fixed_seq_offset = 0
|
|
470
|
+
mobile_seq_offset = 0
|
|
471
|
+
for fixed_chain, mobile_chain in zip(
|
|
472
|
+
chain_iter(fixed_anchor_atoms), chain_iter(mobile_anchors_atoms), strict=True
|
|
473
|
+
):
|
|
474
|
+
# The input is a single chain -> expect a single sequence
|
|
475
|
+
fixed_seq = to_sequence(fixed_chain, allow_hetero=True)[0][0]
|
|
476
|
+
mobile_seq = to_sequence(mobile_chain, allow_hetero=True)[0][0]
|
|
477
|
+
|
|
478
|
+
common_alph = common_alphabet([fixed_seq.alphabet, mobile_seq.alphabet])
|
|
479
|
+
if common_alph is None:
|
|
480
|
+
raise ValueError("Cannot superimpose peptides with nucleic acids")
|
|
481
|
+
if substitution_matrix is None:
|
|
482
|
+
if isinstance(fixed_seq, ProteinSequence):
|
|
483
|
+
substitution_matrix = SubstitutionMatrix.std_protein_matrix()
|
|
484
|
+
else:
|
|
485
|
+
substitution_matrix = SubstitutionMatrix.std_nucleotide_matrix()
|
|
486
|
+
elif isinstance(substitution_matrix, str):
|
|
487
|
+
substitution_matrix = SubstitutionMatrix(
|
|
488
|
+
common_alph, common_alph, substitution_matrix
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
alignment = align_optimal(
|
|
492
|
+
fixed_seq,
|
|
493
|
+
mobile_seq,
|
|
494
|
+
substitution_matrix,
|
|
495
|
+
gap_penalty,
|
|
496
|
+
terminal_penalty=terminal_penalty,
|
|
497
|
+
max_number=1,
|
|
498
|
+
)[0]
|
|
499
|
+
# Cannot anchor gaps
|
|
500
|
+
alignment = remove_gaps(alignment)
|
|
501
|
+
ali_codes = get_codes(alignment)
|
|
502
|
+
score_matrix = substitution_matrix.score_matrix()
|
|
503
|
+
# Anchors must be similar amino acids
|
|
504
|
+
anchors = alignment.trace[score_matrix[ali_codes[0], ali_codes[1]] > 0]
|
|
505
|
+
|
|
506
|
+
anchors += fixed_seq_offset, mobile_seq_offset
|
|
507
|
+
fixed_seq_offset += len(fixed_seq)
|
|
508
|
+
mobile_seq_offset += len(mobile_seq)
|
|
509
|
+
anchor_list.append(anchors)
|
|
510
|
+
|
|
511
|
+
return np.concatenate(anchor_list, axis=0)
|