biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,1403 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functions for base pair identification.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Tom David Müller"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"base_pairs",
|
|
13
|
+
"map_nucleotide",
|
|
14
|
+
"base_stacking",
|
|
15
|
+
"base_pairs_edge",
|
|
16
|
+
"Edge",
|
|
17
|
+
"base_pairs_glycosidic_bond",
|
|
18
|
+
"GlycosidicBond",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
import warnings
|
|
22
|
+
from enum import IntEnum
|
|
23
|
+
import numpy as np
|
|
24
|
+
from biotite.structure.atoms import Atom, array
|
|
25
|
+
from biotite.structure.celllist import CellList
|
|
26
|
+
from biotite.structure.compare import rmsd
|
|
27
|
+
from biotite.structure.error import (
|
|
28
|
+
BadStructureError,
|
|
29
|
+
IncompleteStructureWarning,
|
|
30
|
+
UnexpectedStructureWarning,
|
|
31
|
+
)
|
|
32
|
+
from biotite.structure.filter import filter_nucleotides
|
|
33
|
+
from biotite.structure.hbond import hbond
|
|
34
|
+
from biotite.structure.info.standardize import standardize_order
|
|
35
|
+
from biotite.structure.residues import get_residue_masks, get_residue_starts_for
|
|
36
|
+
from biotite.structure.superimpose import superimpose
|
|
37
|
+
from biotite.structure.util import distance, norm_vector
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _get_std_adenine():
|
|
41
|
+
"""
|
|
42
|
+
Get standard base variables for adenine.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
standard_base : AtomArray
|
|
47
|
+
Standard coordinates nomenclature of the adenine base as
|
|
48
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
49
|
+
coordinates : tuple (ndarray, ndarray, ndarray, dtype=float)
|
|
50
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
51
|
+
paper referenced in the function ``base_pairs``,
|
|
52
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
53
|
+
ring center, :class:`ndarray` containing the coordinates of the
|
|
54
|
+
imidazole ring center
|
|
55
|
+
"""
|
|
56
|
+
atom1 = Atom([-1.291, 4.498, 0.000], atom_name="N9", res_name="A")
|
|
57
|
+
atom2 = Atom([0.024, 4.897, 0.000], atom_name="C8", res_name="A")
|
|
58
|
+
atom3 = Atom([0.877, 3.902, 0.000], atom_name="N7", res_name="A")
|
|
59
|
+
atom4 = Atom([0.071, 2.771, 0.000], atom_name="C5", res_name="A")
|
|
60
|
+
atom5 = Atom([0.369, 1.398, 0.000], atom_name="C6", res_name="A")
|
|
61
|
+
atom6 = Atom([1.611, 0.909, 0.000], atom_name="N6", res_name="A")
|
|
62
|
+
atom7 = Atom([-0.668, 0.532, 0.000], atom_name="N1", res_name="A")
|
|
63
|
+
atom8 = Atom([-1.912, 1.023, 0.000], atom_name="C2", res_name="A")
|
|
64
|
+
atom9 = Atom([-2.320, 2.290, 0.000], atom_name="N3", res_name="A")
|
|
65
|
+
atom10 = Atom([-1.267, 3.124, 0.000], atom_name="C4", res_name="A")
|
|
66
|
+
adenine = array(
|
|
67
|
+
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9, atom10]
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Get the midpoint between the N1 and C4 atoms
|
|
71
|
+
midpoint = np.mean([atom7.coord, atom10.coord], axis=-2)
|
|
72
|
+
# Calculate the coordinates of the aromatic ring centers
|
|
73
|
+
pyrimidine_center = np.mean(
|
|
74
|
+
[atom4.coord, atom5.coord, atom7.coord, atom8.coord, atom9.coord, atom10.coord],
|
|
75
|
+
axis=-2,
|
|
76
|
+
)
|
|
77
|
+
imidazole_center = np.mean(
|
|
78
|
+
[atom1.coord, atom2.coord, atom3.coord, atom4.coord, atom10.coord], axis=-2
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return adenine, (midpoint, pyrimidine_center, imidazole_center)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _get_std_cytosine():
|
|
85
|
+
"""
|
|
86
|
+
Get standard base variables for cytosine.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
standard_base : AtomArray
|
|
91
|
+
Standard coordinates nomenclature of the cytosine base as
|
|
92
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
93
|
+
coordinates : tuple (ndarray, ndarray, dtype=float)
|
|
94
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
95
|
+
paper referenced in the function ``base_pairs``,
|
|
96
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
97
|
+
ring center
|
|
98
|
+
"""
|
|
99
|
+
atom1 = Atom([-1.285, 4.542, 0.000], atom_name="N1", res_name="C")
|
|
100
|
+
atom2 = Atom([-1.472, 3.158, 0.000], atom_name="C2", res_name="C")
|
|
101
|
+
atom3 = Atom([-2.628, 2.709, 0.000], atom_name="O2", res_name="C")
|
|
102
|
+
atom4 = Atom([-0.391, 2.344, 0.000], atom_name="N3", res_name="C")
|
|
103
|
+
atom5 = Atom([0.837, 2.868, 0.000], atom_name="C4", res_name="C")
|
|
104
|
+
atom6 = Atom([1.875, 2.027, 0.000], atom_name="N4", res_name="C")
|
|
105
|
+
atom7 = Atom([1.056, 4.275, 0.000], atom_name="C5", res_name="C")
|
|
106
|
+
atom8 = Atom([-0.023, 5.068, 0.000], atom_name="C6", res_name="C")
|
|
107
|
+
cytosine = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8])
|
|
108
|
+
|
|
109
|
+
# Get the midpoint between the N3 and C6 atoms
|
|
110
|
+
midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
|
|
111
|
+
# Calculate the coordinates of the aromatic ring center
|
|
112
|
+
pyrimidine_center = np.mean(
|
|
113
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom8.coord],
|
|
114
|
+
axis=-2,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return cytosine, (midpoint, pyrimidine_center)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _get_std_guanine():
|
|
121
|
+
"""
|
|
122
|
+
Get standard base variables for guanine.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
standard_base : AtomArray
|
|
127
|
+
Standard coordinates nomenclature of the guanine base as
|
|
128
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
129
|
+
coordinates : tuple (ndarray, ndarray, ndarray, dtype=float)
|
|
130
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
131
|
+
paper referenced in the function ''base_pairs'',
|
|
132
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
133
|
+
ring center, :class:`ndarray` containing the coordinates of the
|
|
134
|
+
imidazole ring center
|
|
135
|
+
"""
|
|
136
|
+
atom1 = Atom([-1.289, 4.551, 0.000], atom_name="N9", res_name="G")
|
|
137
|
+
atom2 = Atom([0.023, 4.962, 0.000], atom_name="C8", res_name="G")
|
|
138
|
+
atom3 = Atom([0.870, 3.969, 0.000], atom_name="N7", res_name="G")
|
|
139
|
+
atom4 = Atom([0.071, 2.833, 0.000], atom_name="C5", res_name="G")
|
|
140
|
+
atom5 = Atom([0.424, 1.460, 0.000], atom_name="C6", res_name="G")
|
|
141
|
+
atom6 = Atom([1.554, 0.955, 0.000], atom_name="O6", res_name="G")
|
|
142
|
+
atom7 = Atom([-0.700, 0.641, 0.000], atom_name="N1", res_name="G")
|
|
143
|
+
atom8 = Atom([-1.999, 1.087, 0.000], atom_name="C2", res_name="G")
|
|
144
|
+
atom9 = Atom([-2.949, 0.139, -0.001], atom_name="N2", res_name="G")
|
|
145
|
+
atom10 = Atom([-2.342, 2.364, 0.001], atom_name="N3", res_name="G")
|
|
146
|
+
atom11 = Atom([-1.265, 3.177, 0.000], atom_name="C4", res_name="G")
|
|
147
|
+
guanine = array(
|
|
148
|
+
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9, atom10, atom11]
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Get the midpoint between the N1 and C4 atoms
|
|
152
|
+
midpoint = np.mean([atom7.coord, atom11.coord], axis=-2)
|
|
153
|
+
# Calculate the coordinates of the aromatic ring centers
|
|
154
|
+
pyrimidine_center = np.mean(
|
|
155
|
+
[
|
|
156
|
+
atom4.coord,
|
|
157
|
+
atom5.coord,
|
|
158
|
+
atom7.coord,
|
|
159
|
+
atom8.coord,
|
|
160
|
+
atom10.coord,
|
|
161
|
+
atom11.coord,
|
|
162
|
+
],
|
|
163
|
+
axis=-2,
|
|
164
|
+
)
|
|
165
|
+
imidazole_center = np.mean(
|
|
166
|
+
[atom1.coord, atom2.coord, atom3.coord, atom4.coord, atom11.coord], axis=-2
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return guanine, (midpoint, pyrimidine_center, imidazole_center)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _get_std_thymine():
|
|
173
|
+
"""
|
|
174
|
+
Get standard base variables for thymine.
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
standard_base : AtomArray
|
|
179
|
+
Standard coordinates nomenclature of the thymine base as
|
|
180
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
181
|
+
coordinates : tuple (ndarray, ndarray, dtype=float)
|
|
182
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
183
|
+
paper referenced in the function ``base_pairs``,
|
|
184
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
185
|
+
ring center
|
|
186
|
+
"""
|
|
187
|
+
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="T")
|
|
188
|
+
atom2 = Atom([-1.462, 3.135, 0.000], atom_name="C2", res_name="T")
|
|
189
|
+
atom3 = Atom([-2.562, 2.608, 0.000], atom_name="O2", res_name="T")
|
|
190
|
+
atom4 = Atom([-0.298, 2.407, 0.000], atom_name="N3", res_name="T")
|
|
191
|
+
atom5 = Atom([0.994, 2.897, 0.000], atom_name="C4", res_name="T")
|
|
192
|
+
atom6 = Atom([1.944, 2.119, 0.000], atom_name="O4", res_name="T")
|
|
193
|
+
atom7 = Atom([1.106, 4.338, 0.000], atom_name="C5", res_name="T")
|
|
194
|
+
atom8 = Atom([2.466, 4.961, 0.001], atom_name="C7", res_name="T")
|
|
195
|
+
atom9 = Atom([-0.024, 5.057, 0.000], atom_name="C6", res_name="T")
|
|
196
|
+
thymine = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9])
|
|
197
|
+
|
|
198
|
+
# Get the midpoint between the N3 and C6 atoms
|
|
199
|
+
midpoint = np.mean([atom4.coord, atom9.coord], axis=-2)
|
|
200
|
+
# Calculate the coordinates of the aromatic ring center
|
|
201
|
+
pyrimidine_center = np.mean(
|
|
202
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom9.coord],
|
|
203
|
+
axis=-2,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
return thymine, (midpoint, pyrimidine_center)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _get_std_uracil():
|
|
210
|
+
"""
|
|
211
|
+
Get standard base variables for uracil.
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
standard_base : AtomArray
|
|
216
|
+
Standard coordinates nomenclature of the uracil base as
|
|
217
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
218
|
+
coordinates : tuple (ndarray, ndarray, dtype=float)
|
|
219
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
220
|
+
paper referenced in the function ``base_pairs``,
|
|
221
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
222
|
+
ring center
|
|
223
|
+
"""
|
|
224
|
+
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="U")
|
|
225
|
+
atom2 = Atom([-1.462, 3.131, 0.000], atom_name="C2", res_name="U")
|
|
226
|
+
atom3 = Atom([-2.563, 2.608, 0.000], atom_name="O2", res_name="U")
|
|
227
|
+
atom4 = Atom([-0.302, 2.397, 0.000], atom_name="N3", res_name="U")
|
|
228
|
+
atom5 = Atom([0.989, 2.884, 0.000], atom_name="C4", res_name="U")
|
|
229
|
+
atom6 = Atom([1.935, 2.094, -0.001], atom_name="O4", res_name="U")
|
|
230
|
+
atom7 = Atom([1.089, 4.311, 0.000], atom_name="C5", res_name="U")
|
|
231
|
+
atom8 = Atom([-0.024, 5.053, 0.000], atom_name="C6", res_name="U")
|
|
232
|
+
uracil = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8])
|
|
233
|
+
|
|
234
|
+
# Get the midpoint between the N3 and C6 atoms
|
|
235
|
+
midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
|
|
236
|
+
# Calculate the coordinates of the aromatic ring center
|
|
237
|
+
pyrimidine_center = np.mean(
|
|
238
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom8.coord],
|
|
239
|
+
axis=-2,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return uracil, (midpoint, pyrimidine_center)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
_STD_ADENINE, _STD_ADENINE_RING_CENTERS = _get_std_adenine()
|
|
246
|
+
_STD_CYTOSINE, _STD_CYTOSINE_RING_CENTERS = _get_std_cytosine()
|
|
247
|
+
_STD_GUANINE, _STD_GUANINE_RING_CENTERS = _get_std_guanine()
|
|
248
|
+
_STD_THYMINE, _STD_THYMINE_RING_CENTERS = _get_std_thymine()
|
|
249
|
+
_STD_URACIL, _STD_URACIL_RING_CENTERS = _get_std_uracil()
|
|
250
|
+
|
|
251
|
+
_ADENINE_CONTAINING_NUCLEOTIDES = ["A", "DA"]
|
|
252
|
+
_THYMINE_CONTAINING_NUCLEOTIDES = ["T", "DT"]
|
|
253
|
+
_CYTOSINE_CONTAINING_NUCLEOTIDES = ["C", "DC"]
|
|
254
|
+
_GUANINE_CONTAINING_NUCLEOTIDES = ["G", "DG"]
|
|
255
|
+
_URACIL_CONTAINING_NUCLEOTIDES = ["U", "DU"]
|
|
256
|
+
_REFERENCE_NUCLEOTIDE_NAMES = (
|
|
257
|
+
_ADENINE_CONTAINING_NUCLEOTIDES
|
|
258
|
+
+ _THYMINE_CONTAINING_NUCLEOTIDES
|
|
259
|
+
+ _CYTOSINE_CONTAINING_NUCLEOTIDES
|
|
260
|
+
+ _GUANINE_CONTAINING_NUCLEOTIDES
|
|
261
|
+
+ _URACIL_CONTAINING_NUCLEOTIDES
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Atoms that are part of respective base edges according to the
|
|
265
|
+
# Leontis-Westhof nomenclature
|
|
266
|
+
_WATSON_CRICK_EDGE = {
|
|
267
|
+
"A": ["N6", "N1"],
|
|
268
|
+
"G": ["O6", "N1", "N2"],
|
|
269
|
+
"U": ["O4", "N3", "O2"],
|
|
270
|
+
"T": ["O4", "N3", "O2"],
|
|
271
|
+
"C": ["N4", "N3", "O2"],
|
|
272
|
+
}
|
|
273
|
+
_HOOGSTEEN_EDGE = {
|
|
274
|
+
"A": ["N6", "N7"],
|
|
275
|
+
"G": ["O6", "N7"],
|
|
276
|
+
"U": ["O4"],
|
|
277
|
+
"T": ["O4"],
|
|
278
|
+
"C": ["N4"],
|
|
279
|
+
}
|
|
280
|
+
_SUGAR_EDGE = {
|
|
281
|
+
"A": ["N3", "O2'"],
|
|
282
|
+
"G": ["N2", "N3", "O2'"],
|
|
283
|
+
"U": ["O2", "O2'"],
|
|
284
|
+
"T": ["O2", "O2'"],
|
|
285
|
+
"C": ["O2", "O2'"],
|
|
286
|
+
}
|
|
287
|
+
_EDGES = [_WATSON_CRICK_EDGE, _HOOGSTEEN_EDGE, _SUGAR_EDGE]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class Edge(IntEnum):
|
|
291
|
+
"""
|
|
292
|
+
This enum type represents the interacting edge for a given base.
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
INVALID = (0,)
|
|
296
|
+
WATSON_CRICK = (1,)
|
|
297
|
+
HOOGSTEEN = (2,)
|
|
298
|
+
SUGAR = 3
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class GlycosidicBond(IntEnum):
|
|
302
|
+
"""
|
|
303
|
+
This enum type represents the relative glycosidic bond orientation
|
|
304
|
+
for a given base pair.
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
INVALID = 0
|
|
308
|
+
CIS = (1,)
|
|
309
|
+
TRANS = (2,)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def base_pairs_edge(atom_array, base_pairs):
|
|
313
|
+
"""
|
|
314
|
+
Get the interacting edges for given base pairs in an
|
|
315
|
+
:class:`AtomArray` according to the Leontis-Westhof nomenclature.
|
|
316
|
+
:footcite:`Leontis2001`
|
|
317
|
+
|
|
318
|
+
The :class:`AtomArray` must contain hydrogens as it relies on
|
|
319
|
+
:func:`hbond()`.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
atom_array : AtomArray
|
|
324
|
+
The :class:`AtomArray` containing the bases.
|
|
325
|
+
base_pairs : ndarray, dtype=int, shape=(n,2)
|
|
326
|
+
Each row is equivalent to one base pair and contains the first
|
|
327
|
+
indices of the residues corresponding to each base. The
|
|
328
|
+
structure of the ``ndarray`` is the same as the output of
|
|
329
|
+
:func:`base_pairs()`.
|
|
330
|
+
|
|
331
|
+
Returns
|
|
332
|
+
-------
|
|
333
|
+
results : ndarray, dtype=uint8, shape=(n,2)
|
|
334
|
+
The ``ndarray`` has the same dimensions as ``base_pairs``. Each
|
|
335
|
+
cell corresponds to the interacting edge of the referenced base
|
|
336
|
+
in ``base_pairs``. The edge type is stored as integer that is
|
|
337
|
+
interpreted as member of the the :class:`Edge` enum.
|
|
338
|
+
|
|
339
|
+
See Also
|
|
340
|
+
--------
|
|
341
|
+
base_pairs : Get the base pairs required for this function.
|
|
342
|
+
base_pairs_glycosidic_bond : Determine the orientation for each base pair.
|
|
343
|
+
|
|
344
|
+
Notes
|
|
345
|
+
-----
|
|
346
|
+
If a base is not a canonical base (``A``, ``C``, ``G``, ``T``,
|
|
347
|
+
``U``) or no hydrogen bonds are found between the bases that conform
|
|
348
|
+
to the interacting edges described by Leontis and Westhof, 0 is
|
|
349
|
+
returned (corresponding to ``Edge.INVALID``).
|
|
350
|
+
|
|
351
|
+
The edge returned always corresponds to the edge with the most
|
|
352
|
+
hydrogen bonding interactions.
|
|
353
|
+
|
|
354
|
+
References
|
|
355
|
+
----------
|
|
356
|
+
|
|
357
|
+
.. footbibliography::
|
|
358
|
+
|
|
359
|
+
Examples
|
|
360
|
+
--------
|
|
361
|
+
Compute the interacting base edges for the dna helix with the PDB
|
|
362
|
+
id 1QXB:
|
|
363
|
+
|
|
364
|
+
>>> from os.path import join
|
|
365
|
+
>>> dna_helix = load_structure(
|
|
366
|
+
... join(path_to_structures, "base_pairs", "1qxb.cif")
|
|
367
|
+
... )
|
|
368
|
+
>>> basepairs = base_pairs(dna_helix)
|
|
369
|
+
>>> interacting_edges = base_pairs_edge(dna_helix, basepairs)
|
|
370
|
+
>>> print(interacting_edges)
|
|
371
|
+
[[1 1]
|
|
372
|
+
[1 1]
|
|
373
|
+
[1 1]
|
|
374
|
+
[1 1]
|
|
375
|
+
[1 1]
|
|
376
|
+
[1 1]
|
|
377
|
+
[1 1]
|
|
378
|
+
[1 1]
|
|
379
|
+
[1 1]
|
|
380
|
+
[1 1]
|
|
381
|
+
[1 1]
|
|
382
|
+
[1 1]]
|
|
383
|
+
|
|
384
|
+
The resulting integers can be interpreted as :class:`Edge` ``Enum``:
|
|
385
|
+
|
|
386
|
+
>>> for interaction in interacting_edges:
|
|
387
|
+
... print(f"{Edge(interaction[0]).name} to {Edge(interaction[1]).name}")
|
|
388
|
+
WATSON_CRICK to WATSON_CRICK
|
|
389
|
+
WATSON_CRICK to WATSON_CRICK
|
|
390
|
+
WATSON_CRICK to WATSON_CRICK
|
|
391
|
+
WATSON_CRICK to WATSON_CRICK
|
|
392
|
+
WATSON_CRICK to WATSON_CRICK
|
|
393
|
+
WATSON_CRICK to WATSON_CRICK
|
|
394
|
+
WATSON_CRICK to WATSON_CRICK
|
|
395
|
+
WATSON_CRICK to WATSON_CRICK
|
|
396
|
+
WATSON_CRICK to WATSON_CRICK
|
|
397
|
+
WATSON_CRICK to WATSON_CRICK
|
|
398
|
+
WATSON_CRICK to WATSON_CRICK
|
|
399
|
+
WATSON_CRICK to WATSON_CRICK
|
|
400
|
+
"""
|
|
401
|
+
# Result-``ndarray`` matches the dimensions of the input array
|
|
402
|
+
results = np.zeros_like(base_pairs, dtype="uint8")
|
|
403
|
+
|
|
404
|
+
# Get the residue masks for each residue
|
|
405
|
+
base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
|
|
406
|
+
|
|
407
|
+
# Group every two masks together for easy iteration (each 'row' is
|
|
408
|
+
# respective to a row in ``base_pairs``)
|
|
409
|
+
base_pairs_masks = base_pairs_masks.reshape(
|
|
410
|
+
(base_pairs.shape[0], 2, atom_array.shape[0])
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
for i, base_masks in enumerate(base_pairs_masks):
|
|
414
|
+
# Get the absolute atom count for each edge
|
|
415
|
+
base_edges = _get_edge_matrix(atom_array, base_masks)
|
|
416
|
+
|
|
417
|
+
# Classify the base edges based on the highest number of
|
|
418
|
+
# matching hydrogen bonded atoms
|
|
419
|
+
for j, base in enumerate(base_edges):
|
|
420
|
+
if np.max(base) != 0:
|
|
421
|
+
results[i, j] = np.argmax(base) + 1
|
|
422
|
+
return results
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _get_edge_matrix(atom_array, base_masks):
|
|
426
|
+
"""
|
|
427
|
+
Get the number of atoms interacting for each edge as a matrix, where
|
|
428
|
+
each row corresponds to a base and each column to the number of
|
|
429
|
+
Watson-Crick-, Hoogsteen- and Sugar-edge interactions respectively.
|
|
430
|
+
|
|
431
|
+
Parameters
|
|
432
|
+
----------
|
|
433
|
+
atom_array : AtomArray
|
|
434
|
+
The :class:`AtomArray` containing the bases.
|
|
435
|
+
base_masks : ndarray, dtype=bool, shape=(2,n)
|
|
436
|
+
Boolean masks for the interacting bases
|
|
437
|
+
|
|
438
|
+
Returns
|
|
439
|
+
-------
|
|
440
|
+
matrix : ndarray, dtype=int, shape=(2,3)
|
|
441
|
+
The edge matrix.
|
|
442
|
+
"""
|
|
443
|
+
# Get the hydrogen bonds between the residues
|
|
444
|
+
hbonds = hbond(atom_array, base_masks[0], base_masks[1])
|
|
445
|
+
if len(hbonds) == 0:
|
|
446
|
+
raise BadStructureError(
|
|
447
|
+
f"No hydrogen bonds between nucleotides with residue start "
|
|
448
|
+
f"indices {np.argmax(base_masks[0])} and "
|
|
449
|
+
f"{np.argmax(base_masks[1])}"
|
|
450
|
+
)
|
|
451
|
+
# filter out donor/acceptor heteroatoms and flatten for easy
|
|
452
|
+
# iteration
|
|
453
|
+
hbonds = hbonds[:, (0, 2)].flatten()
|
|
454
|
+
|
|
455
|
+
# ``ndarray`` with one row for each base and the number of
|
|
456
|
+
# bonded edge heteroatoms as in ``_edge`` as columns
|
|
457
|
+
matrix = np.zeros((2, 3), dtype="int32")
|
|
458
|
+
|
|
459
|
+
# Iterate through the atoms and corresponding atoms indices
|
|
460
|
+
# that are part of the hydrogen bonds
|
|
461
|
+
for atom, atom_index in zip(atom_array[hbonds], hbonds):
|
|
462
|
+
if atom.res_name not in _REFERENCE_NUCLEOTIDE_NAMES:
|
|
463
|
+
continue
|
|
464
|
+
|
|
465
|
+
# Iterate over the edge types
|
|
466
|
+
for edge_type_index, edge_type in enumerate(_EDGES):
|
|
467
|
+
# Iterate over the two base masks
|
|
468
|
+
for base_index, base_mask in enumerate(base_masks):
|
|
469
|
+
# If a donor/acceptor atom name matches a name in
|
|
470
|
+
# the corresponding edge list increase the tally
|
|
471
|
+
if (
|
|
472
|
+
base_mask[atom_index]
|
|
473
|
+
and atom.atom_name in edge_type[atom.res_name[-1]]
|
|
474
|
+
):
|
|
475
|
+
matrix[base_index, edge_type_index] += 1
|
|
476
|
+
return matrix
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def base_pairs_glycosidic_bond(atom_array, base_pairs):
|
|
480
|
+
"""
|
|
481
|
+
Calculate the glycosidic bond orientation for given base pairs in an
|
|
482
|
+
:class:`AtomArray` according to the Leontis-Westhof nomenclature.
|
|
483
|
+
:footcite:`Leontis2001`
|
|
484
|
+
|
|
485
|
+
Parameters
|
|
486
|
+
----------
|
|
487
|
+
atom_array : AtomArray
|
|
488
|
+
The :class:`AtomArray` containing the bases.
|
|
489
|
+
base_pairs : ndarray, dtype=int, shape=(n,2)
|
|
490
|
+
Each row is equivalent to one base pair and contains the first
|
|
491
|
+
indices of the residues corresponding to each base. The
|
|
492
|
+
structure of the ``ndarray`` is the same as the output of
|
|
493
|
+
:func:`base_pairs()`.
|
|
494
|
+
|
|
495
|
+
Returns
|
|
496
|
+
-------
|
|
497
|
+
results : ndarray, dtype=int, shape=(n,)
|
|
498
|
+
The ``ndarray`` has the same dimensions as ``base_pairs``. Each
|
|
499
|
+
cell corresponds to the interacting edge of the referenced base
|
|
500
|
+
in ``base_pairs``.
|
|
501
|
+
Each row is equivalent to the respective base pair. The
|
|
502
|
+
glycosidic bond orientation is stored as integer that is
|
|
503
|
+
interpreted as member of the the :class:`GlycosidicBond` class.
|
|
504
|
+
|
|
505
|
+
See Also
|
|
506
|
+
--------
|
|
507
|
+
base_pairs : Get the base pairs required for this function.
|
|
508
|
+
base_pairs_edge : Determine the interacting edge for each base pair.
|
|
509
|
+
GlycosidicBond : The Enum type for interpretation of the return value.
|
|
510
|
+
|
|
511
|
+
Notes
|
|
512
|
+
-----
|
|
513
|
+
The orientation is found using the geometric centers of the bases
|
|
514
|
+
and the glycosidic bonds as described in :footcite:`Yang2003`.
|
|
515
|
+
|
|
516
|
+
References
|
|
517
|
+
----------
|
|
518
|
+
|
|
519
|
+
.. footbibliography::
|
|
520
|
+
|
|
521
|
+
Examples
|
|
522
|
+
--------
|
|
523
|
+
Compute the glycosidic bond orientations for the dna helix with the
|
|
524
|
+
PDB ID 1QXB:
|
|
525
|
+
|
|
526
|
+
>>> from os.path import join
|
|
527
|
+
>>> dna_helix = load_structure(
|
|
528
|
+
... join(path_to_structures, "base_pairs", "1qxb.cif")
|
|
529
|
+
... )
|
|
530
|
+
>>> basepairs = base_pairs(dna_helix)
|
|
531
|
+
>>> orientations = base_pairs_glycosidic_bond(dna_helix, basepairs)
|
|
532
|
+
>>> print(orientations)
|
|
533
|
+
[1 1 1 1 1 1 1 1 1 1 1 1]
|
|
534
|
+
|
|
535
|
+
The resulting integers can be interpreted as :class:`GlycosidicBond`
|
|
536
|
+
``Enum``:
|
|
537
|
+
|
|
538
|
+
>>> for orientation in orientations:
|
|
539
|
+
... print(GlycosidicBond(orientation).name)
|
|
540
|
+
CIS
|
|
541
|
+
CIS
|
|
542
|
+
CIS
|
|
543
|
+
CIS
|
|
544
|
+
CIS
|
|
545
|
+
CIS
|
|
546
|
+
CIS
|
|
547
|
+
CIS
|
|
548
|
+
CIS
|
|
549
|
+
CIS
|
|
550
|
+
CIS
|
|
551
|
+
CIS
|
|
552
|
+
"""
|
|
553
|
+
results = np.zeros(len(base_pairs), dtype="uint8")
|
|
554
|
+
|
|
555
|
+
# Get the residue masks for each residue
|
|
556
|
+
base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
|
|
557
|
+
|
|
558
|
+
# Group every two masks together for easy iteration (each 'row' is
|
|
559
|
+
# respective to a row in ``base_pairs``)
|
|
560
|
+
base_pairs_masks = base_pairs_masks.reshape(
|
|
561
|
+
(base_pairs.shape[0], 2, atom_array.shape[0])
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
for i, pair_masks in enumerate(base_pairs_masks):
|
|
565
|
+
# position vectors of each bases geometric center
|
|
566
|
+
geometric_centers = np.zeros((2, 3))
|
|
567
|
+
# direction vectors of the glycosidic bonds
|
|
568
|
+
glycosidic_bonds = np.zeros((2, 3))
|
|
569
|
+
|
|
570
|
+
for base_index, base_mask in enumerate(pair_masks):
|
|
571
|
+
base = atom_array[base_mask]
|
|
572
|
+
ring_center = _match_base(base, 3)[3:]
|
|
573
|
+
|
|
574
|
+
# For Purines the glycosidic bond is between the C1' and the
|
|
575
|
+
# N9 atoms, for pyrimidines it is between the C1' atom and
|
|
576
|
+
# the N1 atom
|
|
577
|
+
if (
|
|
578
|
+
base.res_name[0] in _ADENINE_CONTAINING_NUCLEOTIDES
|
|
579
|
+
or base.res_name[0] in _GUANINE_CONTAINING_NUCLEOTIDES
|
|
580
|
+
):
|
|
581
|
+
geometric_centers[base_index] = (ring_center[0] + ring_center[1]) / 2
|
|
582
|
+
base_atom = base[base.atom_name == "N9"][0]
|
|
583
|
+
|
|
584
|
+
elif (
|
|
585
|
+
base.res_name[0] in _THYMINE_CONTAINING_NUCLEOTIDES
|
|
586
|
+
or base.res_name[0] in _URACIL_CONTAINING_NUCLEOTIDES
|
|
587
|
+
or base.res_name[0] in _CYTOSINE_CONTAINING_NUCLEOTIDES
|
|
588
|
+
):
|
|
589
|
+
geometric_centers[base_index] = ring_center[0]
|
|
590
|
+
base_atom = base[base.atom_name == "N1"][0]
|
|
591
|
+
|
|
592
|
+
else:
|
|
593
|
+
results[i] = GlycosidicBond.INVALID
|
|
594
|
+
break
|
|
595
|
+
|
|
596
|
+
sugar_atom = base[base.atom_name == "C1'"][0]
|
|
597
|
+
|
|
598
|
+
# Calculate the glycosidic bond direction vector
|
|
599
|
+
glycosidic_bonds[base_index] = sugar_atom.coord - base_atom.coord
|
|
600
|
+
|
|
601
|
+
# if the bond is not invalid compute the orientation
|
|
602
|
+
else:
|
|
603
|
+
# Calculate the direction vector between the geometric centers
|
|
604
|
+
geometric_centers_dir = geometric_centers[1] - geometric_centers[0]
|
|
605
|
+
|
|
606
|
+
# Check the orientation of the glycosidic bonds
|
|
607
|
+
if (
|
|
608
|
+
np.dot(
|
|
609
|
+
np.cross(geometric_centers_dir, glycosidic_bonds[0]),
|
|
610
|
+
np.cross(geometric_centers_dir, glycosidic_bonds[1]),
|
|
611
|
+
)
|
|
612
|
+
< 0
|
|
613
|
+
):
|
|
614
|
+
results[i] = GlycosidicBond.TRANS
|
|
615
|
+
|
|
616
|
+
else:
|
|
617
|
+
results[i] = GlycosidicBond.CIS
|
|
618
|
+
|
|
619
|
+
return results
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def base_stacking(atom_array, min_atoms_per_base=3):
|
|
623
|
+
"""
|
|
624
|
+
Find pi-stacking interactions between aromatic rings
|
|
625
|
+
in nucleic acids.
|
|
626
|
+
|
|
627
|
+
The presence of base stacking is assumed if the following criteria
|
|
628
|
+
are met :footcite:`Gabb1996`:
|
|
629
|
+
|
|
630
|
+
(i) Distance between aromatic ring centers <=4.5 Å
|
|
631
|
+
|
|
632
|
+
(ii) Angle between the ring normal vectors <=23°
|
|
633
|
+
|
|
634
|
+
(iii) Angle between normalized distance vector between two ring
|
|
635
|
+
centers and both bases' normal vectors <=40°
|
|
636
|
+
|
|
637
|
+
Parameters
|
|
638
|
+
----------
|
|
639
|
+
atom_array : AtomArray
|
|
640
|
+
The :class:`AtomArray` to find stacked bases in.
|
|
641
|
+
min_atoms_per_base : integer, optional
|
|
642
|
+
The number of atoms a nucleotides' base must have to be
|
|
643
|
+
considered a candidate for a stacking interaction.
|
|
644
|
+
|
|
645
|
+
Returns
|
|
646
|
+
-------
|
|
647
|
+
stacked_bases : ndarray, dtype=int, shape=(n,2)
|
|
648
|
+
Each row is equivalent to one pair of stacked bases and
|
|
649
|
+
contains the indices to the first atom for each one of both
|
|
650
|
+
paired residues.
|
|
651
|
+
|
|
652
|
+
Notes
|
|
653
|
+
-----
|
|
654
|
+
Please note that ring normal vectors are assumed to be equal to the
|
|
655
|
+
base normal vectors.
|
|
656
|
+
|
|
657
|
+
References
|
|
658
|
+
----------
|
|
659
|
+
|
|
660
|
+
.. footbibliography::
|
|
661
|
+
|
|
662
|
+
Examples
|
|
663
|
+
--------
|
|
664
|
+
Compute the stacking interactions for a DNA-double-helix (PDB ID
|
|
665
|
+
1BNA):
|
|
666
|
+
|
|
667
|
+
>>> from os.path import join
|
|
668
|
+
>>> dna_helix = load_structure(
|
|
669
|
+
... join(path_to_structures, "base_pairs", "1bna.pdb")
|
|
670
|
+
... )
|
|
671
|
+
>>> stacking_interactions = base_stacking(dna_helix)
|
|
672
|
+
>>> print(dna_helix[stacking_interactions].res_id)
|
|
673
|
+
[[ 1 2]
|
|
674
|
+
[ 2 3]
|
|
675
|
+
[ 3 4]
|
|
676
|
+
[ 4 5]
|
|
677
|
+
[ 5 6]
|
|
678
|
+
[ 6 7]
|
|
679
|
+
[ 7 8]
|
|
680
|
+
[ 8 9]
|
|
681
|
+
[ 9 10]
|
|
682
|
+
[11 12]
|
|
683
|
+
[14 15]
|
|
684
|
+
[15 16]
|
|
685
|
+
[16 17]
|
|
686
|
+
[17 18]
|
|
687
|
+
[18 19]
|
|
688
|
+
[19 20]
|
|
689
|
+
[20 21]
|
|
690
|
+
[21 22]
|
|
691
|
+
[22 23]
|
|
692
|
+
[23 24]]
|
|
693
|
+
"""
|
|
694
|
+
# Get the stacking candidates according to a cutoff distance, where
|
|
695
|
+
# each base is identified as the first index of its respective
|
|
696
|
+
# residue.
|
|
697
|
+
# The diameter from the C1'-sugar-atom across a purine base is ~5Å
|
|
698
|
+
# and the distance between the base centers can be at most 4.5Å.
|
|
699
|
+
# Thus, accounting for buffer, a cutoff of 15Å between the
|
|
700
|
+
# nucleotides' C1'-atoms was chosen.
|
|
701
|
+
c1_mask = filter_nucleotides(atom_array) & (atom_array.atom_name == "C1'")
|
|
702
|
+
stacking_candidates, _ = _get_proximate_residues(atom_array, c1_mask, 15)
|
|
703
|
+
|
|
704
|
+
# Contains the plausible pairs of stacked bases
|
|
705
|
+
stacked_bases = []
|
|
706
|
+
|
|
707
|
+
# Get the residue masks for each residue
|
|
708
|
+
base_masks = get_residue_masks(atom_array, stacking_candidates.flatten())
|
|
709
|
+
|
|
710
|
+
# Group every two masks together for easy iteration (each 'row' is
|
|
711
|
+
# respective to a row in ``stacking_candidates``)
|
|
712
|
+
base_masks = base_masks.reshape(
|
|
713
|
+
(stacking_candidates.shape[0], 2, atom_array.shape[0])
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
for (base1_index, base2_index), (base1_mask, base2_mask) in zip(
|
|
717
|
+
stacking_candidates, base_masks
|
|
718
|
+
):
|
|
719
|
+
bases = (atom_array[base1_mask], atom_array[base2_mask])
|
|
720
|
+
|
|
721
|
+
# A list containing ndarray for each base with transformed
|
|
722
|
+
# vectors from the standard base reference frame to the
|
|
723
|
+
# structures' coordinates. The layout is as follows:
|
|
724
|
+
#
|
|
725
|
+
# [Origin coordinates]
|
|
726
|
+
# [Base normal vector]
|
|
727
|
+
# [SCHNAaP origin coordinates]
|
|
728
|
+
# [Aromatic Ring Center coordinates]
|
|
729
|
+
transformed_std_vectors = [None] * 2
|
|
730
|
+
|
|
731
|
+
# Generate the data necessary for analysis of each base.
|
|
732
|
+
for i in range(2):
|
|
733
|
+
base_tuple = _match_base(bases[i], min_atoms_per_base)
|
|
734
|
+
|
|
735
|
+
if base_tuple is None:
|
|
736
|
+
break
|
|
737
|
+
|
|
738
|
+
transformed_std_vectors[i] = base_tuple
|
|
739
|
+
|
|
740
|
+
normal_vectors = np.vstack(
|
|
741
|
+
(transformed_std_vectors[0][1], transformed_std_vectors[1][1])
|
|
742
|
+
)
|
|
743
|
+
aromatic_ring_centers = [
|
|
744
|
+
transformed_std_vectors[0][3:],
|
|
745
|
+
transformed_std_vectors[1][3:],
|
|
746
|
+
]
|
|
747
|
+
|
|
748
|
+
# Check if the base pairs are stacked.
|
|
749
|
+
stacked = _check_base_stacking(aromatic_ring_centers, normal_vectors)
|
|
750
|
+
|
|
751
|
+
# If a stacking interaction is found, append the first indices
|
|
752
|
+
# of the bases´'residues to the output.
|
|
753
|
+
if stacked:
|
|
754
|
+
stacked_bases.append((base1_index, base2_index))
|
|
755
|
+
|
|
756
|
+
return np.array(stacked_bases)
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def base_pairs(atom_array, min_atoms_per_base=3, unique=True):
|
|
760
|
+
"""
|
|
761
|
+
Use DSSR criteria to find the base pairs in an :class:`AtomArray`.
|
|
762
|
+
|
|
763
|
+
The algorithm is able to identify canonical and non-canonical
|
|
764
|
+
base pairs. between the 5 common bases Adenine, Guanine, Thymine,
|
|
765
|
+
Cytosine, and Uracil bound to Deoxyribose and Ribose.
|
|
766
|
+
Each Base is mapped to the 5 common bases Adenine, Guanine, Thymine,
|
|
767
|
+
Cytosine, and Uracil in a standard reference frame described in
|
|
768
|
+
:footcite:`Olson2001` using :func:`map_nucleotide()`.
|
|
769
|
+
|
|
770
|
+
The DSSR Criteria are as follows :footcite:`Lu2015`:
|
|
771
|
+
|
|
772
|
+
(i) Distance between base origins <=15 Å
|
|
773
|
+
|
|
774
|
+
(ii) Vertical separation between the base planes <=2.5 Å
|
|
775
|
+
|
|
776
|
+
(iii) Angle between the base normal vectors <=65°
|
|
777
|
+
|
|
778
|
+
(iv) Absence of stacking between the two bases
|
|
779
|
+
|
|
780
|
+
(v) Presence of at least one hydrogen bond involving a base atom
|
|
781
|
+
|
|
782
|
+
Parameters
|
|
783
|
+
----------
|
|
784
|
+
atom_array : AtomArray
|
|
785
|
+
The :class:`AtomArray` to find base pairs in.
|
|
786
|
+
min_atoms_per_base : integer, optional
|
|
787
|
+
The number of atoms a nucleotides' base must have to be
|
|
788
|
+
considered a candidate for a base pair.
|
|
789
|
+
unique : bool, optional
|
|
790
|
+
If ``True``, each base is assumed to be only paired with one
|
|
791
|
+
other base. If multiple pairings are plausible, the pairing with
|
|
792
|
+
the most hydrogen bonds is selected.
|
|
793
|
+
|
|
794
|
+
Returns
|
|
795
|
+
-------
|
|
796
|
+
basepairs : ndarray, dtype=int, shape=(n,2)
|
|
797
|
+
Each row is equivalent to one base pair and contains the first
|
|
798
|
+
indices of the residues corresponding to each base.
|
|
799
|
+
|
|
800
|
+
Notes
|
|
801
|
+
-----
|
|
802
|
+
The bases from the standard reference frame described in
|
|
803
|
+
:footcite:`Olson2001` were modified such that only the base atoms
|
|
804
|
+
are implemented.
|
|
805
|
+
Sugar atoms (specifically C1') were disregarded, as nucleosides such
|
|
806
|
+
as PSU do not posess the usual N-glycosidic linkage, thus leading to
|
|
807
|
+
inaccurate results.
|
|
808
|
+
|
|
809
|
+
The vertical separation is implemented as the scalar
|
|
810
|
+
projection of the distance vectors between the base origins
|
|
811
|
+
according to :footcite:`Lu1997` onto the averaged base normal
|
|
812
|
+
vectors.
|
|
813
|
+
|
|
814
|
+
The presence of base stacking is assumed if the following criteria
|
|
815
|
+
are met :footcite:`Gabb1996`:
|
|
816
|
+
|
|
817
|
+
(i) Distance between aromatic ring centers <=4.5 Å
|
|
818
|
+
|
|
819
|
+
(ii) Angle between the ring normal vectors <=23°
|
|
820
|
+
|
|
821
|
+
(iii) Angle between normalized distance vector between two ring
|
|
822
|
+
centers and both bases' normal vectors <=40°
|
|
823
|
+
|
|
824
|
+
Please note that ring normal vectors are assumed to be equal to the
|
|
825
|
+
base normal vectors.
|
|
826
|
+
|
|
827
|
+
For structures without hydrogens the accuracy of the algorithm is
|
|
828
|
+
limited as the hydrogen bonds can be only checked be checked for
|
|
829
|
+
plausibility.
|
|
830
|
+
A hydrogen bond is considered as plausible if a cutoff of 3.6 Å
|
|
831
|
+
between N/O atom pairs is met. 3.6Å was chosen as hydrogen bonds are
|
|
832
|
+
typically 1.5-2.5Å in length. N-H and O-H bonds have a length of
|
|
833
|
+
1.00Å and 0.96Å respectively. Thus, including some buffer, a 3.6Å
|
|
834
|
+
cutoff should cover all hydrogen bonds.
|
|
835
|
+
|
|
836
|
+
References
|
|
837
|
+
----------
|
|
838
|
+
|
|
839
|
+
.. footbibliography::
|
|
840
|
+
|
|
841
|
+
Examples
|
|
842
|
+
--------
|
|
843
|
+
Compute the base pairs for the structure with the PDB ID 1QXB:
|
|
844
|
+
|
|
845
|
+
>>> from os.path import join
|
|
846
|
+
>>> dna_helix = load_structure(
|
|
847
|
+
... join(path_to_structures, "base_pairs", "1qxb.cif")
|
|
848
|
+
... )
|
|
849
|
+
>>> basepairs = base_pairs(dna_helix)
|
|
850
|
+
>>> print(dna_helix[basepairs].res_name)
|
|
851
|
+
[['DC' 'DG']
|
|
852
|
+
['DG' 'DC']
|
|
853
|
+
['DC' 'DG']
|
|
854
|
+
['DG' 'DC']
|
|
855
|
+
['DA' 'DT']
|
|
856
|
+
['DA' 'DT']
|
|
857
|
+
['DT' 'DA']
|
|
858
|
+
['DT' 'DA']
|
|
859
|
+
['DC' 'DG']
|
|
860
|
+
['DG' 'DC']
|
|
861
|
+
['DC' 'DG']
|
|
862
|
+
['DG' 'DC']]
|
|
863
|
+
"""
|
|
864
|
+
|
|
865
|
+
# Get the nucleotides for the given atom_array
|
|
866
|
+
nucleotides_boolean = filter_nucleotides(atom_array)
|
|
867
|
+
|
|
868
|
+
# Disregard the phosphate-backbone
|
|
869
|
+
non_phosphate_boolean = ~np.isin(
|
|
870
|
+
atom_array.atom_name, ["O5'", "P", "OP1", "OP2", "OP3", "HOP2", "HOP3"]
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
# Combine the two boolean masks
|
|
874
|
+
boolean_mask = nucleotides_boolean & non_phosphate_boolean
|
|
875
|
+
|
|
876
|
+
# Get only nucleosides
|
|
877
|
+
nucleosides = atom_array[boolean_mask]
|
|
878
|
+
|
|
879
|
+
# Get the base pair candidates according to a N/O cutoff distance,
|
|
880
|
+
# where each base is identified as the first index of its respective
|
|
881
|
+
# residue
|
|
882
|
+
n_o_mask = np.isin(nucleosides.element, ["N", "O"])
|
|
883
|
+
basepair_candidates, n_o_matches = _get_proximate_residues(
|
|
884
|
+
nucleosides, n_o_mask, 3.6
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
# Contains the plausible base pairs
|
|
888
|
+
basepairs = []
|
|
889
|
+
# Contains the number of hydrogens for each plausible base pair
|
|
890
|
+
basepairs_hbonds = []
|
|
891
|
+
|
|
892
|
+
# Get the residue masks for each residue
|
|
893
|
+
base_masks = get_residue_masks(nucleosides, basepair_candidates.flatten())
|
|
894
|
+
|
|
895
|
+
# Group every two masks together for easy iteration (each 'row' is
|
|
896
|
+
# respective to a row in ``basepair_candidates``)
|
|
897
|
+
base_masks = base_masks.reshape(
|
|
898
|
+
(basepair_candidates.shape[0], 2, nucleosides.shape[0])
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
for (base1_index, base2_index), (base1_mask, base2_mask), n_o_pairs in zip(
|
|
902
|
+
basepair_candidates, base_masks, n_o_matches
|
|
903
|
+
):
|
|
904
|
+
base1 = nucleosides[base1_mask]
|
|
905
|
+
base2 = nucleosides[base2_mask]
|
|
906
|
+
|
|
907
|
+
hbonds = _check_dssr_criteria((base1, base2), min_atoms_per_base, unique)
|
|
908
|
+
|
|
909
|
+
# If no hydrogens are present use the number N/O pairs to
|
|
910
|
+
# decide between multiple pairing possibilities.
|
|
911
|
+
|
|
912
|
+
if hbonds is None:
|
|
913
|
+
# Each N/O-pair is detected twice. Thus, the number of
|
|
914
|
+
# matches must be divided by two.
|
|
915
|
+
hbonds = n_o_pairs / 2
|
|
916
|
+
if hbonds != -1:
|
|
917
|
+
basepairs.append((base1_index, base2_index))
|
|
918
|
+
if unique:
|
|
919
|
+
basepairs_hbonds.append(hbonds)
|
|
920
|
+
|
|
921
|
+
basepair_array = np.array(basepairs)
|
|
922
|
+
|
|
923
|
+
if unique:
|
|
924
|
+
# Contains all non-unique base pairs that are flagged to be
|
|
925
|
+
# removed
|
|
926
|
+
to_remove = []
|
|
927
|
+
|
|
928
|
+
# Get all bases that have non-unique pairing interactions
|
|
929
|
+
base_indices, occurrences = np.unique(basepairs, return_counts=True)
|
|
930
|
+
for base_index, occurrence in zip(base_indices, occurrences):
|
|
931
|
+
if occurrence > 1:
|
|
932
|
+
# Write the non-unique base pairs to a dictionary as
|
|
933
|
+
# 'index: number of hydrogen bonds'
|
|
934
|
+
remove_candidates = {}
|
|
935
|
+
for i, row in enumerate(np.asarray(basepair_array == base_index)):
|
|
936
|
+
if np.any(row):
|
|
937
|
+
remove_candidates[i] = basepairs_hbonds[i]
|
|
938
|
+
# Flag all non-unique base pairs for removal except the
|
|
939
|
+
# one that has the most hydrogen bonds
|
|
940
|
+
del remove_candidates[max(remove_candidates, key=remove_candidates.get)]
|
|
941
|
+
to_remove += list(remove_candidates.keys())
|
|
942
|
+
# Remove all flagged base pairs from the output `ndarray`
|
|
943
|
+
basepair_array = np.delete(basepair_array, to_remove, axis=0)
|
|
944
|
+
|
|
945
|
+
# Remap values to original atom array
|
|
946
|
+
if len(basepair_array) > 0:
|
|
947
|
+
basepair_array = np.where(boolean_mask)[0][basepair_array]
|
|
948
|
+
for i, row in enumerate(basepair_array):
|
|
949
|
+
basepair_array[i] = get_residue_starts_for(atom_array, row)
|
|
950
|
+
return basepair_array
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
954
|
+
"""
|
|
955
|
+
Check the DSSR criteria of a potential base pair.
|
|
956
|
+
|
|
957
|
+
Parameters
|
|
958
|
+
----------
|
|
959
|
+
basepair : tuple (AtomArray, AtomArray)
|
|
960
|
+
The two bases to check the criteria for as :class:`AtomArray`.
|
|
961
|
+
min_atoms_per_base : int
|
|
962
|
+
The number of atoms a nucleotides' base must have to be
|
|
963
|
+
considered a candidate for a base pair.
|
|
964
|
+
unique : bool
|
|
965
|
+
If ``True``, the shortest hydrogen bond length between the bases
|
|
966
|
+
is calculated for plausible base pairs.
|
|
967
|
+
|
|
968
|
+
Returns
|
|
969
|
+
-------
|
|
970
|
+
satisfied : int
|
|
971
|
+
`> 0` if the base pair satisfies the criteria and `-1`,
|
|
972
|
+
if it does not.
|
|
973
|
+
If unique is ``True``, the number of hydrogen bonds is
|
|
974
|
+
returned for plausible base pairs.
|
|
975
|
+
"""
|
|
976
|
+
|
|
977
|
+
# A list containing ndarray for each base with transformed
|
|
978
|
+
# vectors from the standard base reference frame to the structures'
|
|
979
|
+
# coordinates. The layout is as follows:
|
|
980
|
+
#
|
|
981
|
+
# [Origin coordinates]
|
|
982
|
+
# [Base normal vector]
|
|
983
|
+
# [SCHNAaP origin coordinates]
|
|
984
|
+
# [Aromatic Ring Center coordinates]
|
|
985
|
+
transformed_std_vectors = [None] * 2
|
|
986
|
+
|
|
987
|
+
# Generate the data necessary for analysis of each base.
|
|
988
|
+
for i in range(2):
|
|
989
|
+
transformed_std_vectors[i] = _match_base(basepair[i], min_atoms_per_base)
|
|
990
|
+
|
|
991
|
+
if transformed_std_vectors[i] is None:
|
|
992
|
+
return -1
|
|
993
|
+
|
|
994
|
+
origins = np.vstack((transformed_std_vectors[0][0], transformed_std_vectors[1][0]))
|
|
995
|
+
normal_vectors = np.vstack(
|
|
996
|
+
(transformed_std_vectors[0][1], transformed_std_vectors[1][1])
|
|
997
|
+
)
|
|
998
|
+
schnaap_origins = np.vstack(
|
|
999
|
+
(transformed_std_vectors[0][2], transformed_std_vectors[1][2])
|
|
1000
|
+
)
|
|
1001
|
+
aromatic_ring_centers = [
|
|
1002
|
+
transformed_std_vectors[0][3:],
|
|
1003
|
+
transformed_std_vectors[1][3:],
|
|
1004
|
+
]
|
|
1005
|
+
|
|
1006
|
+
# Criterion 1: Distance between orgins <=15 Å
|
|
1007
|
+
if not (distance(origins[0], origins[1]) <= 15):
|
|
1008
|
+
return -1
|
|
1009
|
+
|
|
1010
|
+
# Criterion 2: Vertical separation <=2.5 Å
|
|
1011
|
+
#
|
|
1012
|
+
# Average the base normal vectors. If the angle between the vectors
|
|
1013
|
+
# is >=90°, flip one vector before averaging
|
|
1014
|
+
mean_normal_vector = (
|
|
1015
|
+
normal_vectors[0]
|
|
1016
|
+
+ (normal_vectors[1] * np.sign(np.dot(normal_vectors[0], normal_vectors[1])))
|
|
1017
|
+
) / 2
|
|
1018
|
+
norm_vector(mean_normal_vector)
|
|
1019
|
+
# Calculate the distance vector between the two SCHNAaP origins
|
|
1020
|
+
origin_distance_vector = schnaap_origins[1] - schnaap_origins[0]
|
|
1021
|
+
|
|
1022
|
+
# The scalar projection of the distance vector between the two
|
|
1023
|
+
# origins onto the averaged normal vectors is the vertical
|
|
1024
|
+
# seperation
|
|
1025
|
+
if not abs(np.dot(origin_distance_vector, mean_normal_vector)) <= 2.5:
|
|
1026
|
+
return -1
|
|
1027
|
+
|
|
1028
|
+
# Criterion 3: Angle between normal vectors <=65°
|
|
1029
|
+
if not (
|
|
1030
|
+
np.arccos(np.dot(normal_vectors[0], normal_vectors[1])) >= ((115 * np.pi) / 180)
|
|
1031
|
+
):
|
|
1032
|
+
return -1
|
|
1033
|
+
|
|
1034
|
+
# Criterion 4: Absence of stacking
|
|
1035
|
+
if _check_base_stacking(aromatic_ring_centers, normal_vectors):
|
|
1036
|
+
return -1
|
|
1037
|
+
|
|
1038
|
+
# Criterion 5: Presence of at least one hydrogen bond
|
|
1039
|
+
#
|
|
1040
|
+
# Check if both bases came with hydrogens.
|
|
1041
|
+
if ("H" in basepair[0].element) and ("H" in basepair[1].element):
|
|
1042
|
+
# For Structures that contain hydrogens, check for their
|
|
1043
|
+
# presence directly.
|
|
1044
|
+
#
|
|
1045
|
+
# Generate input atom array for ``hbond``
|
|
1046
|
+
potential_basepair = basepair[0] + basepair[1]
|
|
1047
|
+
|
|
1048
|
+
# Get the number of hydrogen bonds
|
|
1049
|
+
bonds = len(
|
|
1050
|
+
hbond(
|
|
1051
|
+
potential_basepair,
|
|
1052
|
+
np.ones_like(potential_basepair, dtype=bool),
|
|
1053
|
+
np.ones_like(potential_basepair, dtype=bool),
|
|
1054
|
+
)
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
if bonds > 0:
|
|
1058
|
+
return bonds
|
|
1059
|
+
return -1
|
|
1060
|
+
|
|
1061
|
+
else:
|
|
1062
|
+
# If the structure does not contain hydrogens return None
|
|
1063
|
+
return None
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
def _check_base_stacking(aromatic_ring_centers, normal_vectors):
|
|
1067
|
+
"""
|
|
1068
|
+
Check for base stacking between two bases.
|
|
1069
|
+
|
|
1070
|
+
Parameters
|
|
1071
|
+
----------
|
|
1072
|
+
aromatic_ring_centers : list [ndarray, ndarray]
|
|
1073
|
+
A list with the aromatic ring center coordinates as
|
|
1074
|
+
:class:`ndarray`. Each row represents a ring center.
|
|
1075
|
+
normal_vectors : ndarray shape=(2, 3)
|
|
1076
|
+
The normal vectors of the bases.
|
|
1077
|
+
|
|
1078
|
+
Returns
|
|
1079
|
+
-------
|
|
1080
|
+
base_stacking : bool
|
|
1081
|
+
``True`` if base stacking is detected and ``False`` if not
|
|
1082
|
+
"""
|
|
1083
|
+
|
|
1084
|
+
# Contains the normalized distance vectors between ring centers less
|
|
1085
|
+
# than 4.5 Å apart.
|
|
1086
|
+
normalized_distance_vectors = []
|
|
1087
|
+
|
|
1088
|
+
# Criterion 1: Distance between aromatic ring centers <=4.5 Å
|
|
1089
|
+
wrong_distance = True
|
|
1090
|
+
for ring_center1 in aromatic_ring_centers[0]:
|
|
1091
|
+
for ring_center2 in aromatic_ring_centers[1]:
|
|
1092
|
+
if distance(ring_center1, ring_center2) <= 4.5:
|
|
1093
|
+
wrong_distance = False
|
|
1094
|
+
normalized_distance_vectors.append(ring_center2 - ring_center1)
|
|
1095
|
+
norm_vector(normalized_distance_vectors[-1])
|
|
1096
|
+
if wrong_distance:
|
|
1097
|
+
return False
|
|
1098
|
+
|
|
1099
|
+
# Criterion 2: Angle between normal vectors or its supplement <=23°
|
|
1100
|
+
normal_vectors_angle = np.rad2deg(
|
|
1101
|
+
np.arccos(np.dot(normal_vectors[0], normal_vectors[1]))
|
|
1102
|
+
)
|
|
1103
|
+
if (normal_vectors_angle >= 23) and (normal_vectors_angle <= 157):
|
|
1104
|
+
return False
|
|
1105
|
+
|
|
1106
|
+
# Criterion 3: Angle between one normalized distance vector and
|
|
1107
|
+
# each of the bases' normal vector or supplement <=40°
|
|
1108
|
+
for normal_vector in normal_vectors:
|
|
1109
|
+
for normalized_dist_vector in normalized_distance_vectors:
|
|
1110
|
+
dist_normal_vector_angle = np.rad2deg(
|
|
1111
|
+
np.arccos(np.dot(normal_vector, normalized_dist_vector))
|
|
1112
|
+
)
|
|
1113
|
+
if (dist_normal_vector_angle >= 40) and (dist_normal_vector_angle <= 140):
|
|
1114
|
+
return False
|
|
1115
|
+
|
|
1116
|
+
return True
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def _match_base(nucleotide, min_atoms_per_base):
|
|
1120
|
+
"""
|
|
1121
|
+
Match the nucleotide to a corresponding standard base reference
|
|
1122
|
+
frame.
|
|
1123
|
+
|
|
1124
|
+
Parameters
|
|
1125
|
+
----------
|
|
1126
|
+
nucleotide : AtomArray
|
|
1127
|
+
The nucleotide to be matched to a standard base.
|
|
1128
|
+
min_atoms_per_base : integer
|
|
1129
|
+
The number of atoms a base must have to be considered a
|
|
1130
|
+
candidate for a base pair.
|
|
1131
|
+
|
|
1132
|
+
Returns
|
|
1133
|
+
-------
|
|
1134
|
+
vectors : ndarray, dtype=float, shape=(n,3)
|
|
1135
|
+
Transformed standard vectors, origin coordinates, base normal
|
|
1136
|
+
vector, aromatic ring center coordinates.
|
|
1137
|
+
"""
|
|
1138
|
+
|
|
1139
|
+
# Standard vectors containing the origin and the base normal vectors
|
|
1140
|
+
vectors = np.array([[0, 0, 0], [0, 0, 1]], dtype=float)
|
|
1141
|
+
|
|
1142
|
+
# Map the nucleotide to a reference base
|
|
1143
|
+
one_letter_code, _ = map_nucleotide(nucleotide, min_atoms_per_base)
|
|
1144
|
+
|
|
1145
|
+
if one_letter_code is None:
|
|
1146
|
+
return None
|
|
1147
|
+
|
|
1148
|
+
if one_letter_code == "A":
|
|
1149
|
+
std_base = _STD_ADENINE
|
|
1150
|
+
std_ring_centers = _STD_ADENINE_RING_CENTERS
|
|
1151
|
+
elif one_letter_code == "T":
|
|
1152
|
+
std_base = _STD_THYMINE
|
|
1153
|
+
std_ring_centers = _STD_THYMINE_RING_CENTERS
|
|
1154
|
+
elif one_letter_code == "C":
|
|
1155
|
+
std_base = _STD_CYTOSINE
|
|
1156
|
+
std_ring_centers = _STD_CYTOSINE_RING_CENTERS
|
|
1157
|
+
elif one_letter_code == "G":
|
|
1158
|
+
std_base = _STD_GUANINE
|
|
1159
|
+
std_ring_centers = _STD_GUANINE_RING_CENTERS
|
|
1160
|
+
elif one_letter_code == "U":
|
|
1161
|
+
std_base = _STD_URACIL
|
|
1162
|
+
std_ring_centers = _STD_URACIL_RING_CENTERS
|
|
1163
|
+
|
|
1164
|
+
# Add the ring centers to the array of vectors to be transformed.
|
|
1165
|
+
vectors = np.vstack((vectors, std_ring_centers))
|
|
1166
|
+
|
|
1167
|
+
# Select the matching atoms of the nucleotide and the standard base
|
|
1168
|
+
nucleotide_matched = nucleotide[np.isin(nucleotide.atom_name, std_base.atom_name)]
|
|
1169
|
+
std_base_matched = std_base[np.isin(std_base.atom_name, nucleotide.atom_name)]
|
|
1170
|
+
# Ensure the nucleotide does not contain duplicate atom names
|
|
1171
|
+
_, unique_indices = np.unique(nucleotide_matched.atom_name, return_index=True)
|
|
1172
|
+
nucleotide_matched = nucleotide_matched[unique_indices]
|
|
1173
|
+
# Only continue if minimum number of matching atoms is reached
|
|
1174
|
+
if len(nucleotide_matched) < min_atoms_per_base:
|
|
1175
|
+
warnings.warn(
|
|
1176
|
+
f"Nucleotide with res_id {nucleotide.res_id[0]} and "
|
|
1177
|
+
f"chain_id {nucleotide.chain_id[0]} has less than 3 base "
|
|
1178
|
+
f"atoms, unable to check for base pair.",
|
|
1179
|
+
IncompleteStructureWarning,
|
|
1180
|
+
)
|
|
1181
|
+
return None
|
|
1182
|
+
# Reorder the atoms of the nucleotide to obtain the standard RCSB
|
|
1183
|
+
# PDB atom order.
|
|
1184
|
+
nucleotide_matched = nucleotide_matched[standardize_order(nucleotide_matched)]
|
|
1185
|
+
|
|
1186
|
+
# Match the selected std_base to the base.
|
|
1187
|
+
_, transformation = superimpose(nucleotide_matched, std_base_matched)
|
|
1188
|
+
vectors = transformation.apply(vectors)
|
|
1189
|
+
# Normalize the base-normal-vector
|
|
1190
|
+
vectors[1, :] = vectors[1, :] - vectors[0, :]
|
|
1191
|
+
norm_vector(vectors[1, :])
|
|
1192
|
+
|
|
1193
|
+
return vectors
|
|
1194
|
+
|
|
1195
|
+
|
|
1196
|
+
def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
1197
|
+
"""
|
|
1198
|
+
Map a nucleotide to one of the 5 common bases Adenine, Guanine,
|
|
1199
|
+
Thymine, Cytosine, and Uracil. If one of those bases bound to
|
|
1200
|
+
Deoxyribose and Ribose is detected as input, the corresponding one-
|
|
1201
|
+
letter-code (``A``, ``G``, ``T``, ``C``, ``U``) is returned.
|
|
1202
|
+
|
|
1203
|
+
If a different nucleotide is given, it is mapped to the best
|
|
1204
|
+
fitting base using the algorithm described below.
|
|
1205
|
+
|
|
1206
|
+
(i) The number of matching atom names with the reference bases is counted.
|
|
1207
|
+
If the number of matching atoms with all reference bases is less than the
|
|
1208
|
+
specified `min_atoms_per_base` the nucleotide cannot be mapped and ``None`` is
|
|
1209
|
+
returned.
|
|
1210
|
+
|
|
1211
|
+
(ii) The bases with maximum number of matching atoms are selected and superimposed
|
|
1212
|
+
with each reference.
|
|
1213
|
+
The base with lowest RMSD is chosen.
|
|
1214
|
+
If the RMSD is more than the specified `rmsd_cutoff`, the nucleotide cannot be
|
|
1215
|
+
mapped and ``None`` is returned.
|
|
1216
|
+
|
|
1217
|
+
Parameters
|
|
1218
|
+
----------
|
|
1219
|
+
residue : AtomArray
|
|
1220
|
+
The nucleotide to be mapped.
|
|
1221
|
+
min_atoms_per_base : int, optional
|
|
1222
|
+
The number of atoms the residue must have in common with the
|
|
1223
|
+
reference.
|
|
1224
|
+
rmsd_cutoff : float, optional
|
|
1225
|
+
The maximum RSMD that is allowed for a mapping to occur.
|
|
1226
|
+
|
|
1227
|
+
Returns
|
|
1228
|
+
-------
|
|
1229
|
+
one_letter_code : str
|
|
1230
|
+
The one-letter-code of the mapped base. ``None`` if no base can
|
|
1231
|
+
be mapped.
|
|
1232
|
+
exact_match : bool
|
|
1233
|
+
Wether or not the residue name exactly matches one of the common
|
|
1234
|
+
bases, i.e. the ``res_name`` of the input `residue` is one of
|
|
1235
|
+
``A``, ``G``, ``T``, ``C``, ``U``, ``DA``, ``DG``, ``DT``,
|
|
1236
|
+
``DC`` or ``DU``.
|
|
1237
|
+
|
|
1238
|
+
Notes
|
|
1239
|
+
-----
|
|
1240
|
+
The default RMSD cutoff was chosen according to :footcite:`Lu2015`,
|
|
1241
|
+
where the same cutoff is used to detect if a given base is a
|
|
1242
|
+
nucleotide, by superimposing the base ring atoms onto a reference
|
|
1243
|
+
structure.
|
|
1244
|
+
|
|
1245
|
+
References
|
|
1246
|
+
----------
|
|
1247
|
+
|
|
1248
|
+
.. footbibliography::
|
|
1249
|
+
"""
|
|
1250
|
+
# Check if the residue is a 'standard' nucleotide
|
|
1251
|
+
if residue.res_name[0] in _REFERENCE_NUCLEOTIDE_NAMES:
|
|
1252
|
+
return residue.res_name[0][-1], True
|
|
1253
|
+
|
|
1254
|
+
# List of the standard bases for easy iteration
|
|
1255
|
+
std_base_list = [
|
|
1256
|
+
_STD_ADENINE,
|
|
1257
|
+
_STD_THYMINE,
|
|
1258
|
+
_STD_CYTOSINE,
|
|
1259
|
+
_STD_GUANINE,
|
|
1260
|
+
_STD_URACIL,
|
|
1261
|
+
]
|
|
1262
|
+
|
|
1263
|
+
# The number of matched atoms for each 'standard' base
|
|
1264
|
+
matched_atom_no = [
|
|
1265
|
+
np.sum(np.isin(ref_base.atom_name, residue.atom_name))
|
|
1266
|
+
for ref_base in std_base_list
|
|
1267
|
+
]
|
|
1268
|
+
|
|
1269
|
+
if np.max(matched_atom_no) < min_atoms_per_base:
|
|
1270
|
+
warnings.warn(
|
|
1271
|
+
f"Base with res_id {residue.res_id[0]} and chain_id "
|
|
1272
|
+
f"{residue.chain_id[0]} has an overlap with the reference "
|
|
1273
|
+
f"bases which is less than {min_atoms_per_base} atoms. "
|
|
1274
|
+
f"Unable to map nucleotide.",
|
|
1275
|
+
IncompleteStructureWarning,
|
|
1276
|
+
)
|
|
1277
|
+
return None, False
|
|
1278
|
+
|
|
1279
|
+
# The one letter code of the best matching reference base
|
|
1280
|
+
best_base = None
|
|
1281
|
+
|
|
1282
|
+
# Iterate through the reference bases with the maximum number of
|
|
1283
|
+
# matching atoms
|
|
1284
|
+
for ref_base in np.array(std_base_list, dtype="object")[
|
|
1285
|
+
np.array(matched_atom_no) == np.max(matched_atom_no)
|
|
1286
|
+
]:
|
|
1287
|
+
# Copy the residue as the res_name property of the ``AtomArray``
|
|
1288
|
+
# has to be modified for later function calls.
|
|
1289
|
+
nuc = residue.copy()
|
|
1290
|
+
|
|
1291
|
+
# Select the matching atoms of the nucleotide and the reference
|
|
1292
|
+
# base
|
|
1293
|
+
nuc = nuc[np.isin(nuc.atom_name, ref_base.atom_name)]
|
|
1294
|
+
ref_base_matched = ref_base[np.isin(ref_base.atom_name, nuc.atom_name)]
|
|
1295
|
+
|
|
1296
|
+
# Set the res_name property to the same as the reference base.
|
|
1297
|
+
# This is a requirement for ``standardize_order``
|
|
1298
|
+
nuc.res_name = ref_base_matched.res_name
|
|
1299
|
+
# Reorder the atoms of the nucleotide to obtain the standard
|
|
1300
|
+
# RCSB PDB atom order. If a residue contains multiple atoms with
|
|
1301
|
+
# the same ``atom_name`` an exception is thrown by
|
|
1302
|
+
# ``standardize_order``. The exception is caught and the
|
|
1303
|
+
# selected reference is disregarded
|
|
1304
|
+
try:
|
|
1305
|
+
nuc = nuc[standardize_order(nuc)]
|
|
1306
|
+
except Exception:
|
|
1307
|
+
continue
|
|
1308
|
+
|
|
1309
|
+
# Superimpose the nucleotide to the reference base
|
|
1310
|
+
fitted, _ = superimpose(ref_base_matched, nuc)
|
|
1311
|
+
|
|
1312
|
+
# If the RMSD is lower than the specified cutoff or better than
|
|
1313
|
+
# a previous found reference, the current reference is selected
|
|
1314
|
+
# as best base
|
|
1315
|
+
if rmsd(fitted, ref_base_matched) < rmsd_cutoff:
|
|
1316
|
+
rmsd_cutoff = rmsd(fitted, ref_base_matched)
|
|
1317
|
+
best_base = ref_base_matched.res_name[0][-1]
|
|
1318
|
+
|
|
1319
|
+
if best_base is None:
|
|
1320
|
+
warnings.warn(
|
|
1321
|
+
f"Base Type {residue.res_name[0]} not supported. ",
|
|
1322
|
+
UnexpectedStructureWarning,
|
|
1323
|
+
)
|
|
1324
|
+
return None
|
|
1325
|
+
|
|
1326
|
+
return best_base, False
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
def _get_proximate_residues(atom_array, boolean_mask, cutoff):
|
|
1330
|
+
"""
|
|
1331
|
+
Filter for residue pairs based on the distance between selected
|
|
1332
|
+
atoms.
|
|
1333
|
+
|
|
1334
|
+
Parameters
|
|
1335
|
+
----------
|
|
1336
|
+
atom_array : AtomArray, shape=(n,)
|
|
1337
|
+
The :class:`AtomArray`` to find basepair candidates in.
|
|
1338
|
+
boolean_mask : ndarray, dtype=bool, shape=(n,)
|
|
1339
|
+
The selection of atoms.
|
|
1340
|
+
cutoff : integer
|
|
1341
|
+
The maximum distance between the atoms of the two residues.
|
|
1342
|
+
|
|
1343
|
+
Returns
|
|
1344
|
+
-------
|
|
1345
|
+
pairs : ndarray, dtype=int, shape=(n,2)
|
|
1346
|
+
Contains the basepair candidates. Each row is equivalent to one
|
|
1347
|
+
potential basepair. bases are represented as the first indices
|
|
1348
|
+
of their corresponding residues.
|
|
1349
|
+
count : ndarray, dtype=int, shape=(n,)
|
|
1350
|
+
The number of atom pairs between the residues within the
|
|
1351
|
+
specified cutoff
|
|
1352
|
+
"""
|
|
1353
|
+
|
|
1354
|
+
# Get the indices of the atoms that are within the maximum cutoff
|
|
1355
|
+
# of each other
|
|
1356
|
+
indices = CellList(atom_array, cutoff, selection=boolean_mask).get_atoms(
|
|
1357
|
+
atom_array.coord[boolean_mask], cutoff
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
# Loop through the indices of potential partners
|
|
1361
|
+
pairs = []
|
|
1362
|
+
for candidate, partners in zip(np.argwhere(boolean_mask)[:, 0], indices):
|
|
1363
|
+
for partner in partners:
|
|
1364
|
+
if partner != -1:
|
|
1365
|
+
pairs.append((candidate, partner))
|
|
1366
|
+
|
|
1367
|
+
# Get the residue starts for the indices of the candidate/partner
|
|
1368
|
+
# indices.
|
|
1369
|
+
pairs = np.array(pairs)
|
|
1370
|
+
basepair_candidates_shape = pairs.shape
|
|
1371
|
+
pairs = get_residue_starts_for(atom_array, pairs.flatten()).reshape(
|
|
1372
|
+
basepair_candidates_shape
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1375
|
+
# Remove candidates where the pairs are from the same residue
|
|
1376
|
+
pairs = np.delete(pairs, np.where(pairs[:, 0] == pairs[:, 1]), axis=0)
|
|
1377
|
+
# Sort the residue starts for each pair
|
|
1378
|
+
for i, candidate in enumerate(pairs):
|
|
1379
|
+
pairs[i] = sorted(candidate)
|
|
1380
|
+
# Make sure each pair is only listed once, count the occurrences
|
|
1381
|
+
pairs, count = np.unique(pairs, axis=0, return_counts=True)
|
|
1382
|
+
|
|
1383
|
+
return pairs, count
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
def _filter_atom_type(atom_array, atom_names):
|
|
1387
|
+
"""
|
|
1388
|
+
Get all atoms with specified atom names.
|
|
1389
|
+
|
|
1390
|
+
Parameters
|
|
1391
|
+
----------
|
|
1392
|
+
atom_array : AtomArray
|
|
1393
|
+
The :class:`AtomArray` to filter.
|
|
1394
|
+
atom_names : array_like
|
|
1395
|
+
The desired atom names.
|
|
1396
|
+
|
|
1397
|
+
Returns
|
|
1398
|
+
-------
|
|
1399
|
+
filter : ndarray, dtype=bool
|
|
1400
|
+
This array is ``True`` for all indices in the :class:`AtomArray`
|
|
1401
|
+
, where the atom has the desired atom names.
|
|
1402
|
+
"""
|
|
1403
|
+
return np.isin(atom_array.atom_name, atom_names) & (atom_array.res_id != -1)
|