biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/basepairs.py
CHANGED
|
@@ -8,23 +8,33 @@ This module provides functions for base pair identification.
|
|
|
8
8
|
|
|
9
9
|
__name__ = "biotite.structure"
|
|
10
10
|
__author__ = "Tom David Müller"
|
|
11
|
-
__all__ = [
|
|
12
|
-
|
|
11
|
+
__all__ = [
|
|
12
|
+
"base_pairs",
|
|
13
|
+
"map_nucleotide",
|
|
14
|
+
"base_stacking",
|
|
15
|
+
"base_pairs_edge",
|
|
16
|
+
"Edge",
|
|
17
|
+
"base_pairs_glycosidic_bond",
|
|
18
|
+
"GlycosidicBond",
|
|
19
|
+
]
|
|
13
20
|
|
|
14
|
-
import numpy as np
|
|
15
21
|
import warnings
|
|
16
22
|
from enum import IntEnum
|
|
17
|
-
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
from .
|
|
27
|
-
from .
|
|
23
|
+
import numpy as np
|
|
24
|
+
from biotite.structure.atoms import Atom, array
|
|
25
|
+
from biotite.structure.celllist import CellList
|
|
26
|
+
from biotite.structure.compare import rmsd
|
|
27
|
+
from biotite.structure.error import (
|
|
28
|
+
BadStructureError,
|
|
29
|
+
IncompleteStructureWarning,
|
|
30
|
+
UnexpectedStructureWarning,
|
|
31
|
+
)
|
|
32
|
+
from biotite.structure.filter import filter_nucleotides
|
|
33
|
+
from biotite.structure.hbond import hbond
|
|
34
|
+
from biotite.structure.info.standardize import standardize_order
|
|
35
|
+
from biotite.structure.residues import get_residue_masks, get_residue_starts_for
|
|
36
|
+
from biotite.structure.superimpose import superimpose
|
|
37
|
+
from biotite.structure.util import distance, norm_vector
|
|
28
38
|
|
|
29
39
|
|
|
30
40
|
def _get_std_adenine():
|
|
@@ -43,31 +53,29 @@ def _get_std_adenine():
|
|
|
43
53
|
ring center, :class:`ndarray` containing the coordinates of the
|
|
44
54
|
imidazole ring center
|
|
45
55
|
"""
|
|
46
|
-
atom1 =
|
|
47
|
-
atom2 =
|
|
48
|
-
atom3 =
|
|
49
|
-
atom4 =
|
|
50
|
-
atom5 =
|
|
51
|
-
atom6 =
|
|
52
|
-
atom7 =
|
|
53
|
-
atom8 =
|
|
54
|
-
atom9 =
|
|
55
|
-
atom10 = Atom([-1.267, 3.124, 0.000], atom_name="C4",
|
|
56
|
+
atom1 = Atom([-1.291, 4.498, 0.000], atom_name="N9", res_name="A")
|
|
57
|
+
atom2 = Atom([0.024, 4.897, 0.000], atom_name="C8", res_name="A")
|
|
58
|
+
atom3 = Atom([0.877, 3.902, 0.000], atom_name="N7", res_name="A")
|
|
59
|
+
atom4 = Atom([0.071, 2.771, 0.000], atom_name="C5", res_name="A")
|
|
60
|
+
atom5 = Atom([0.369, 1.398, 0.000], atom_name="C6", res_name="A")
|
|
61
|
+
atom6 = Atom([1.611, 0.909, 0.000], atom_name="N6", res_name="A")
|
|
62
|
+
atom7 = Atom([-0.668, 0.532, 0.000], atom_name="N1", res_name="A")
|
|
63
|
+
atom8 = Atom([-1.912, 1.023, 0.000], atom_name="C2", res_name="A")
|
|
64
|
+
atom9 = Atom([-2.320, 2.290, 0.000], atom_name="N3", res_name="A")
|
|
65
|
+
atom10 = Atom([-1.267, 3.124, 0.000], atom_name="C4", res_name="A")
|
|
56
66
|
adenine = array(
|
|
57
|
-
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8,
|
|
58
|
-
atom9, atom10]
|
|
67
|
+
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9, atom10]
|
|
59
68
|
)
|
|
60
69
|
|
|
61
70
|
# Get the midpoint between the N1 and C4 atoms
|
|
62
71
|
midpoint = np.mean([atom7.coord, atom10.coord], axis=-2)
|
|
63
72
|
# Calculate the coordinates of the aromatic ring centers
|
|
64
73
|
pyrimidine_center = np.mean(
|
|
65
|
-
[atom4.coord, atom5.coord, atom7.coord,
|
|
66
|
-
|
|
74
|
+
[atom4.coord, atom5.coord, atom7.coord, atom8.coord, atom9.coord, atom10.coord],
|
|
75
|
+
axis=-2,
|
|
67
76
|
)
|
|
68
77
|
imidazole_center = np.mean(
|
|
69
|
-
[atom1.coord, atom2.coord, atom3.coord,
|
|
70
|
-
atom4.coord, atom10.coord], axis=-2
|
|
78
|
+
[atom1.coord, atom2.coord, atom3.coord, atom4.coord, atom10.coord], axis=-2
|
|
71
79
|
)
|
|
72
80
|
|
|
73
81
|
return adenine, (midpoint, pyrimidine_center, imidazole_center)
|
|
@@ -75,37 +83,35 @@ def _get_std_adenine():
|
|
|
75
83
|
|
|
76
84
|
def _get_std_cytosine():
|
|
77
85
|
"""
|
|
78
|
-
|
|
86
|
+
Get standard base variables for cytosine.
|
|
79
87
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
standard_base : AtomArray
|
|
91
|
+
Standard coordinates nomenclature of the cytosine base as
|
|
92
|
+
:class:`AtomArray` with nomenclature of PDB File Format V3
|
|
93
|
+
coordinates : tuple (ndarray, ndarray, dtype=float)
|
|
94
|
+
:class:`ndarray` containing the center according to the SCHNaP-
|
|
95
|
+
paper referenced in the function ``base_pairs``,
|
|
96
|
+
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
97
|
+
ring center
|
|
90
98
|
"""
|
|
91
|
-
atom1 = Atom([-1.285, 4.542, 0.000], atom_name="N1",
|
|
92
|
-
atom2 = Atom([-1.472, 3.158, 0.000], atom_name="C2",
|
|
93
|
-
atom3 = Atom([-2.628, 2.709, 0.000], atom_name="O2",
|
|
94
|
-
atom4 = Atom([-0.391, 2.344, 0.000], atom_name="N3",
|
|
95
|
-
atom5 = Atom([0.837, 2.868, 0.000],
|
|
96
|
-
atom6 = Atom([1.875, 2.027, 0.000],
|
|
97
|
-
atom7 = Atom([1.056, 4.275, 0.000],
|
|
98
|
-
atom8 = Atom([-0.023, 5.068, 0.000], atom_name="C6",
|
|
99
|
-
cytosine = array(
|
|
100
|
-
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8]
|
|
101
|
-
)
|
|
99
|
+
atom1 = Atom([-1.285, 4.542, 0.000], atom_name="N1", res_name="C")
|
|
100
|
+
atom2 = Atom([-1.472, 3.158, 0.000], atom_name="C2", res_name="C")
|
|
101
|
+
atom3 = Atom([-2.628, 2.709, 0.000], atom_name="O2", res_name="C")
|
|
102
|
+
atom4 = Atom([-0.391, 2.344, 0.000], atom_name="N3", res_name="C")
|
|
103
|
+
atom5 = Atom([0.837, 2.868, 0.000], atom_name="C4", res_name="C")
|
|
104
|
+
atom6 = Atom([1.875, 2.027, 0.000], atom_name="N4", res_name="C")
|
|
105
|
+
atom7 = Atom([1.056, 4.275, 0.000], atom_name="C5", res_name="C")
|
|
106
|
+
atom8 = Atom([-0.023, 5.068, 0.000], atom_name="C6", res_name="C")
|
|
107
|
+
cytosine = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8])
|
|
102
108
|
|
|
103
109
|
# Get the midpoint between the N3 and C6 atoms
|
|
104
110
|
midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
|
|
105
111
|
# Calculate the coordinates of the aromatic ring center
|
|
106
112
|
pyrimidine_center = np.mean(
|
|
107
|
-
[atom1.coord, atom2.coord, atom4.coord,
|
|
108
|
-
|
|
113
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom8.coord],
|
|
114
|
+
axis=-2,
|
|
109
115
|
)
|
|
110
116
|
|
|
111
117
|
return cytosine, (midpoint, pyrimidine_center)
|
|
@@ -127,32 +133,37 @@ def _get_std_guanine():
|
|
|
127
133
|
ring center, :class:`ndarray` containing the coordinates of the
|
|
128
134
|
imidazole ring center
|
|
129
135
|
"""
|
|
130
|
-
atom1 =
|
|
131
|
-
atom2 =
|
|
132
|
-
atom3 =
|
|
133
|
-
atom4 =
|
|
134
|
-
atom5 =
|
|
135
|
-
atom6 =
|
|
136
|
-
atom7 =
|
|
137
|
-
atom8 =
|
|
138
|
-
atom9 =
|
|
139
|
-
atom10 = Atom([-2.342, 2.364, 0.001],
|
|
140
|
-
atom11 = Atom([-1.265, 3.177, 0.000],
|
|
136
|
+
atom1 = Atom([-1.289, 4.551, 0.000], atom_name="N9", res_name="G")
|
|
137
|
+
atom2 = Atom([0.023, 4.962, 0.000], atom_name="C8", res_name="G")
|
|
138
|
+
atom3 = Atom([0.870, 3.969, 0.000], atom_name="N7", res_name="G")
|
|
139
|
+
atom4 = Atom([0.071, 2.833, 0.000], atom_name="C5", res_name="G")
|
|
140
|
+
atom5 = Atom([0.424, 1.460, 0.000], atom_name="C6", res_name="G")
|
|
141
|
+
atom6 = Atom([1.554, 0.955, 0.000], atom_name="O6", res_name="G")
|
|
142
|
+
atom7 = Atom([-0.700, 0.641, 0.000], atom_name="N1", res_name="G")
|
|
143
|
+
atom8 = Atom([-1.999, 1.087, 0.000], atom_name="C2", res_name="G")
|
|
144
|
+
atom9 = Atom([-2.949, 0.139, -0.001], atom_name="N2", res_name="G")
|
|
145
|
+
atom10 = Atom([-2.342, 2.364, 0.001], atom_name="N3", res_name="G")
|
|
146
|
+
atom11 = Atom([-1.265, 3.177, 0.000], atom_name="C4", res_name="G")
|
|
141
147
|
guanine = array(
|
|
142
|
-
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8,
|
|
143
|
-
atom9, atom10, atom11]
|
|
148
|
+
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9, atom10, atom11]
|
|
144
149
|
)
|
|
145
150
|
|
|
146
151
|
# Get the midpoint between the N1 and C4 atoms
|
|
147
152
|
midpoint = np.mean([atom7.coord, atom11.coord], axis=-2)
|
|
148
153
|
# Calculate the coordinates of the aromatic ring centers
|
|
149
154
|
pyrimidine_center = np.mean(
|
|
150
|
-
[
|
|
151
|
-
|
|
155
|
+
[
|
|
156
|
+
atom4.coord,
|
|
157
|
+
atom5.coord,
|
|
158
|
+
atom7.coord,
|
|
159
|
+
atom8.coord,
|
|
160
|
+
atom10.coord,
|
|
161
|
+
atom11.coord,
|
|
162
|
+
],
|
|
163
|
+
axis=-2,
|
|
152
164
|
)
|
|
153
165
|
imidazole_center = np.mean(
|
|
154
|
-
[atom1.coord, atom2.coord, atom3.coord,
|
|
155
|
-
atom4.coord, atom11.coord], axis=-2
|
|
166
|
+
[atom1.coord, atom2.coord, atom3.coord, atom4.coord, atom11.coord], axis=-2
|
|
156
167
|
)
|
|
157
168
|
|
|
158
169
|
return guanine, (midpoint, pyrimidine_center, imidazole_center)
|
|
@@ -173,25 +184,23 @@ def _get_std_thymine():
|
|
|
173
184
|
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
174
185
|
ring center
|
|
175
186
|
"""
|
|
176
|
-
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1",
|
|
177
|
-
atom2 = Atom([-1.462, 3.135, 0.000], atom_name="C2",
|
|
178
|
-
atom3 = Atom([-2.562, 2.608, 0.000], atom_name="O2",
|
|
179
|
-
atom4 = Atom([-0.298, 2.407, 0.000], atom_name="N3",
|
|
180
|
-
atom5 = Atom([0.994, 2.897, 0.000],
|
|
181
|
-
atom6 = Atom([1.944, 2.119, 0.000],
|
|
182
|
-
atom7 = Atom([1.106, 4.338, 0.000],
|
|
183
|
-
atom8 = Atom([2.466, 4.961, 0.001],
|
|
184
|
-
atom9 = Atom([-0.024, 5.057, 0.000], atom_name="C6",
|
|
185
|
-
thymine = array(
|
|
186
|
-
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9]
|
|
187
|
-
)
|
|
187
|
+
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="T")
|
|
188
|
+
atom2 = Atom([-1.462, 3.135, 0.000], atom_name="C2", res_name="T")
|
|
189
|
+
atom3 = Atom([-2.562, 2.608, 0.000], atom_name="O2", res_name="T")
|
|
190
|
+
atom4 = Atom([-0.298, 2.407, 0.000], atom_name="N3", res_name="T")
|
|
191
|
+
atom5 = Atom([0.994, 2.897, 0.000], atom_name="C4", res_name="T")
|
|
192
|
+
atom6 = Atom([1.944, 2.119, 0.000], atom_name="O4", res_name="T")
|
|
193
|
+
atom7 = Atom([1.106, 4.338, 0.000], atom_name="C5", res_name="T")
|
|
194
|
+
atom8 = Atom([2.466, 4.961, 0.001], atom_name="C7", res_name="T")
|
|
195
|
+
atom9 = Atom([-0.024, 5.057, 0.000], atom_name="C6", res_name="T")
|
|
196
|
+
thymine = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8, atom9])
|
|
188
197
|
|
|
189
198
|
# Get the midpoint between the N3 and C6 atoms
|
|
190
199
|
midpoint = np.mean([atom4.coord, atom9.coord], axis=-2)
|
|
191
200
|
# Calculate the coordinates of the aromatic ring center
|
|
192
201
|
pyrimidine_center = np.mean(
|
|
193
|
-
[atom1.coord, atom2.coord, atom4.coord,
|
|
194
|
-
|
|
202
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom9.coord],
|
|
203
|
+
axis=-2,
|
|
195
204
|
)
|
|
196
205
|
|
|
197
206
|
return thymine, (midpoint, pyrimidine_center)
|
|
@@ -212,30 +221,28 @@ def _get_std_uracil():
|
|
|
212
221
|
:class:`ndarray` containing the coordinates of the pyrimidine
|
|
213
222
|
ring center
|
|
214
223
|
"""
|
|
215
|
-
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1",
|
|
216
|
-
atom2 = Atom([-1.462, 3.131, 0.000], atom_name="C2",
|
|
217
|
-
atom3 = Atom([-2.563, 2.608, 0.000], atom_name="O2",
|
|
218
|
-
atom4 = Atom([-0.302, 2.397, 0.000], atom_name="N3",
|
|
219
|
-
atom5 = Atom([0.989, 2.884, 0.000],
|
|
220
|
-
atom6 = Atom([1.935, 2.094, -0.001], atom_name="O4",
|
|
221
|
-
atom7 = Atom([1.089, 4.311, 0.000],
|
|
222
|
-
atom8 = Atom([-0.024, 5.053, 0.000], atom_name="C6",
|
|
223
|
-
uracil = array(
|
|
224
|
-
[atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8]
|
|
225
|
-
)
|
|
224
|
+
atom1 = Atom([-1.284, 4.500, 0.000], atom_name="N1", res_name="U")
|
|
225
|
+
atom2 = Atom([-1.462, 3.131, 0.000], atom_name="C2", res_name="U")
|
|
226
|
+
atom3 = Atom([-2.563, 2.608, 0.000], atom_name="O2", res_name="U")
|
|
227
|
+
atom4 = Atom([-0.302, 2.397, 0.000], atom_name="N3", res_name="U")
|
|
228
|
+
atom5 = Atom([0.989, 2.884, 0.000], atom_name="C4", res_name="U")
|
|
229
|
+
atom6 = Atom([1.935, 2.094, -0.001], atom_name="O4", res_name="U")
|
|
230
|
+
atom7 = Atom([1.089, 4.311, 0.000], atom_name="C5", res_name="U")
|
|
231
|
+
atom8 = Atom([-0.024, 5.053, 0.000], atom_name="C6", res_name="U")
|
|
232
|
+
uracil = array([atom1, atom2, atom3, atom4, atom5, atom6, atom7, atom8])
|
|
226
233
|
|
|
227
234
|
# Get the midpoint between the N3 and C6 atoms
|
|
228
235
|
midpoint = np.mean([atom4.coord, atom8.coord], axis=-2)
|
|
229
236
|
# Calculate the coordinates of the aromatic ring center
|
|
230
237
|
pyrimidine_center = np.mean(
|
|
231
|
-
[atom1.coord, atom2.coord, atom4.coord,
|
|
232
|
-
|
|
238
|
+
[atom1.coord, atom2.coord, atom4.coord, atom5.coord, atom7.coord, atom8.coord],
|
|
239
|
+
axis=-2,
|
|
233
240
|
)
|
|
234
241
|
|
|
235
242
|
return uracil, (midpoint, pyrimidine_center)
|
|
236
243
|
|
|
237
244
|
|
|
238
|
-
_STD_ADENINE, _STD_ADENINE_RING_CENTERS
|
|
245
|
+
_STD_ADENINE, _STD_ADENINE_RING_CENTERS = _get_std_adenine()
|
|
239
246
|
_STD_CYTOSINE, _STD_CYTOSINE_RING_CENTERS = _get_std_cytosine()
|
|
240
247
|
_STD_GUANINE, _STD_GUANINE_RING_CENTERS = _get_std_guanine()
|
|
241
248
|
_STD_THYMINE, _STD_THYMINE_RING_CENTERS = _get_std_thymine()
|
|
@@ -247,35 +254,35 @@ _CYTOSINE_CONTAINING_NUCLEOTIDES = ["C", "DC"]
|
|
|
247
254
|
_GUANINE_CONTAINING_NUCLEOTIDES = ["G", "DG"]
|
|
248
255
|
_URACIL_CONTAINING_NUCLEOTIDES = ["U", "DU"]
|
|
249
256
|
_REFERENCE_NUCLEOTIDE_NAMES = (
|
|
250
|
-
_ADENINE_CONTAINING_NUCLEOTIDES
|
|
251
|
-
_THYMINE_CONTAINING_NUCLEOTIDES
|
|
252
|
-
_CYTOSINE_CONTAINING_NUCLEOTIDES
|
|
253
|
-
_GUANINE_CONTAINING_NUCLEOTIDES
|
|
254
|
-
_URACIL_CONTAINING_NUCLEOTIDES
|
|
257
|
+
_ADENINE_CONTAINING_NUCLEOTIDES
|
|
258
|
+
+ _THYMINE_CONTAINING_NUCLEOTIDES
|
|
259
|
+
+ _CYTOSINE_CONTAINING_NUCLEOTIDES
|
|
260
|
+
+ _GUANINE_CONTAINING_NUCLEOTIDES
|
|
261
|
+
+ _URACIL_CONTAINING_NUCLEOTIDES
|
|
255
262
|
)
|
|
256
263
|
|
|
257
264
|
# Atoms that are part of respective base edges according to the
|
|
258
265
|
# Leontis-Westhof nomenclature
|
|
259
266
|
_WATSON_CRICK_EDGE = {
|
|
260
|
-
"A"
|
|
261
|
-
"G"
|
|
262
|
-
"U"
|
|
263
|
-
"T"
|
|
264
|
-
"C"
|
|
267
|
+
"A": ["N6", "N1"],
|
|
268
|
+
"G": ["O6", "N1", "N2"],
|
|
269
|
+
"U": ["O4", "N3", "O2"],
|
|
270
|
+
"T": ["O4", "N3", "O2"],
|
|
271
|
+
"C": ["N4", "N3", "O2"],
|
|
265
272
|
}
|
|
266
273
|
_HOOGSTEEN_EDGE = {
|
|
267
|
-
"A"
|
|
268
|
-
"G"
|
|
269
|
-
"U"
|
|
270
|
-
"T"
|
|
271
|
-
"C"
|
|
274
|
+
"A": ["N6", "N7"],
|
|
275
|
+
"G": ["O6", "N7"],
|
|
276
|
+
"U": ["O4"],
|
|
277
|
+
"T": ["O4"],
|
|
278
|
+
"C": ["N4"],
|
|
272
279
|
}
|
|
273
280
|
_SUGAR_EDGE = {
|
|
274
|
-
"A"
|
|
275
|
-
"G"
|
|
276
|
-
"U"
|
|
277
|
-
"T"
|
|
278
|
-
"C"
|
|
281
|
+
"A": ["N3", "O2'"],
|
|
282
|
+
"G": ["N2", "N3", "O2'"],
|
|
283
|
+
"U": ["O2", "O2'"],
|
|
284
|
+
"T": ["O2", "O2'"],
|
|
285
|
+
"C": ["O2", "O2'"],
|
|
279
286
|
}
|
|
280
287
|
_EDGES = [_WATSON_CRICK_EDGE, _HOOGSTEEN_EDGE, _SUGAR_EDGE]
|
|
281
288
|
|
|
@@ -284,9 +291,10 @@ class Edge(IntEnum):
|
|
|
284
291
|
"""
|
|
285
292
|
This enum type represents the interacting edge for a given base.
|
|
286
293
|
"""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
294
|
+
|
|
295
|
+
INVALID = (0,)
|
|
296
|
+
WATSON_CRICK = (1,)
|
|
297
|
+
HOOGSTEEN = (2,)
|
|
290
298
|
SUGAR = 3
|
|
291
299
|
|
|
292
300
|
|
|
@@ -295,9 +303,10 @@ class GlycosidicBond(IntEnum):
|
|
|
295
303
|
This enum type represents the relative glycosidic bond orientation
|
|
296
304
|
for a given base pair.
|
|
297
305
|
"""
|
|
306
|
+
|
|
298
307
|
INVALID = 0
|
|
299
|
-
CIS = 1,
|
|
300
|
-
TRANS = 2,
|
|
308
|
+
CIS = (1,)
|
|
309
|
+
TRANS = (2,)
|
|
301
310
|
|
|
302
311
|
|
|
303
312
|
def base_pairs_edge(atom_array, base_pairs):
|
|
@@ -370,19 +379,19 @@ def base_pairs_edge(atom_array, base_pairs):
|
|
|
370
379
|
The resulting integers can be interpreted as :class:`Edge` ``Enum``:
|
|
371
380
|
|
|
372
381
|
>>> for interaction in interacting_edges:
|
|
373
|
-
... print(Edge(interaction[0])
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
382
|
+
... print(f"{Edge(interaction[0]).name} to {Edge(interaction[1]).name}")
|
|
383
|
+
WATSON_CRICK to WATSON_CRICK
|
|
384
|
+
WATSON_CRICK to WATSON_CRICK
|
|
385
|
+
WATSON_CRICK to WATSON_CRICK
|
|
386
|
+
WATSON_CRICK to WATSON_CRICK
|
|
387
|
+
WATSON_CRICK to WATSON_CRICK
|
|
388
|
+
WATSON_CRICK to WATSON_CRICK
|
|
389
|
+
WATSON_CRICK to WATSON_CRICK
|
|
390
|
+
WATSON_CRICK to WATSON_CRICK
|
|
391
|
+
WATSON_CRICK to WATSON_CRICK
|
|
392
|
+
WATSON_CRICK to WATSON_CRICK
|
|
393
|
+
WATSON_CRICK to WATSON_CRICK
|
|
394
|
+
WATSON_CRICK to WATSON_CRICK
|
|
386
395
|
|
|
387
396
|
References
|
|
388
397
|
----------
|
|
@@ -390,7 +399,7 @@ def base_pairs_edge(atom_array, base_pairs):
|
|
|
390
399
|
.. footbibliography::
|
|
391
400
|
"""
|
|
392
401
|
# Result-``ndarray`` matches the dimensions of the input array
|
|
393
|
-
results = np.zeros_like(base_pairs, dtype=
|
|
402
|
+
results = np.zeros_like(base_pairs, dtype="uint8")
|
|
394
403
|
|
|
395
404
|
# Get the residue masks for each residue
|
|
396
405
|
base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
|
|
@@ -441,16 +450,15 @@ def _get_edge_matrix(atom_array, base_masks):
|
|
|
441
450
|
)
|
|
442
451
|
# filter out donor/acceptor heteroatoms and flatten for easy
|
|
443
452
|
# iteration
|
|
444
|
-
hbonds = hbonds[:, (0,2)].flatten()
|
|
453
|
+
hbonds = hbonds[:, (0, 2)].flatten()
|
|
445
454
|
|
|
446
455
|
# ``ndarray`` with one row for each base and the number of
|
|
447
456
|
# bonded edge heteroatoms as in ``_edge`` as columns
|
|
448
|
-
matrix = np.zeros((2, 3), dtype=
|
|
457
|
+
matrix = np.zeros((2, 3), dtype="int32")
|
|
449
458
|
|
|
450
459
|
# Iterate through the atoms and corresponding atoms indices
|
|
451
460
|
# that are part of the hydrogen bonds
|
|
452
461
|
for atom, atom_index in zip(atom_array[hbonds], hbonds):
|
|
453
|
-
|
|
454
462
|
if atom.res_name not in _REFERENCE_NUCLEOTIDE_NAMES:
|
|
455
463
|
continue
|
|
456
464
|
|
|
@@ -460,8 +468,10 @@ def _get_edge_matrix(atom_array, base_masks):
|
|
|
460
468
|
for base_index, base_mask in enumerate(base_masks):
|
|
461
469
|
# If a donor/acceptor atom name matches a name in
|
|
462
470
|
# the corresponding edge list increase the tally
|
|
463
|
-
if (
|
|
464
|
-
|
|
471
|
+
if (
|
|
472
|
+
base_mask[atom_index]
|
|
473
|
+
and atom.atom_name in edge_type[atom.res_name[-1]]
|
|
474
|
+
):
|
|
465
475
|
matrix[base_index, edge_type_index] += 1
|
|
466
476
|
return matrix
|
|
467
477
|
|
|
@@ -521,26 +531,26 @@ def base_pairs_glycosidic_bond(atom_array, base_pairs):
|
|
|
521
531
|
``Enum``:
|
|
522
532
|
|
|
523
533
|
>>> for orientation in orientations:
|
|
524
|
-
... print(GlycosidicBond(orientation))
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
534
|
+
... print(GlycosidicBond(orientation).name)
|
|
535
|
+
CIS
|
|
536
|
+
CIS
|
|
537
|
+
CIS
|
|
538
|
+
CIS
|
|
539
|
+
CIS
|
|
540
|
+
CIS
|
|
541
|
+
CIS
|
|
542
|
+
CIS
|
|
543
|
+
CIS
|
|
544
|
+
CIS
|
|
545
|
+
CIS
|
|
546
|
+
CIS
|
|
537
547
|
|
|
538
548
|
References
|
|
539
549
|
----------
|
|
540
550
|
|
|
541
551
|
.. footbibliography::
|
|
542
552
|
"""
|
|
543
|
-
results = np.zeros(len(base_pairs), dtype=
|
|
553
|
+
results = np.zeros(len(base_pairs), dtype="uint8")
|
|
544
554
|
|
|
545
555
|
# Get the residue masks for each residue
|
|
546
556
|
base_pairs_masks = get_residue_masks(atom_array, base_pairs.flatten())
|
|
@@ -552,7 +562,6 @@ def base_pairs_glycosidic_bond(atom_array, base_pairs):
|
|
|
552
562
|
)
|
|
553
563
|
|
|
554
564
|
for i, pair_masks in enumerate(base_pairs_masks):
|
|
555
|
-
|
|
556
565
|
# position vectors of each bases geometric center
|
|
557
566
|
geometric_centers = np.zeros((2, 3))
|
|
558
567
|
# direction vectors of the glycosidic bonds
|
|
@@ -565,23 +574,22 @@ def base_pairs_glycosidic_bond(atom_array, base_pairs):
|
|
|
565
574
|
# For Purines the glycosidic bond is between the C1' and the
|
|
566
575
|
# N9 atoms, for pyrimidines it is between the C1' atom and
|
|
567
576
|
# the N1 atom
|
|
568
|
-
if (
|
|
569
|
-
base.res_name[0] in
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
)
|
|
577
|
+
if (
|
|
578
|
+
base.res_name[0] in _ADENINE_CONTAINING_NUCLEOTIDES
|
|
579
|
+
or base.res_name[0] in _GUANINE_CONTAINING_NUCLEOTIDES
|
|
580
|
+
):
|
|
581
|
+
geometric_centers[base_index] = (ring_center[0] + ring_center[1]) / 2
|
|
574
582
|
base_atom = base[base.atom_name == "N9"][0]
|
|
575
583
|
|
|
576
|
-
elif (
|
|
577
|
-
base.res_name[0] in
|
|
578
|
-
base.res_name[0] in
|
|
579
|
-
|
|
584
|
+
elif (
|
|
585
|
+
base.res_name[0] in _THYMINE_CONTAINING_NUCLEOTIDES
|
|
586
|
+
or base.res_name[0] in _URACIL_CONTAINING_NUCLEOTIDES
|
|
587
|
+
or base.res_name[0] in _CYTOSINE_CONTAINING_NUCLEOTIDES
|
|
588
|
+
):
|
|
580
589
|
geometric_centers[base_index] = ring_center[0]
|
|
581
590
|
base_atom = base[base.atom_name == "N1"][0]
|
|
582
591
|
|
|
583
592
|
else:
|
|
584
|
-
|
|
585
593
|
results[i] = GlycosidicBond.INVALID
|
|
586
594
|
break
|
|
587
595
|
|
|
@@ -596,15 +604,16 @@ def base_pairs_glycosidic_bond(atom_array, base_pairs):
|
|
|
596
604
|
geometric_centers_dir = geometric_centers[1] - geometric_centers[0]
|
|
597
605
|
|
|
598
606
|
# Check the orientation of the glycosidic bonds
|
|
599
|
-
if
|
|
600
|
-
np.
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
607
|
+
if (
|
|
608
|
+
np.dot(
|
|
609
|
+
np.cross(geometric_centers_dir, glycosidic_bonds[0]),
|
|
610
|
+
np.cross(geometric_centers_dir, glycosidic_bonds[1]),
|
|
611
|
+
)
|
|
612
|
+
< 0
|
|
613
|
+
):
|
|
604
614
|
results[i] = GlycosidicBond.TRANS
|
|
605
615
|
|
|
606
616
|
else:
|
|
607
|
-
|
|
608
617
|
results[i] = GlycosidicBond.CIS
|
|
609
618
|
|
|
610
619
|
return results
|
|
@@ -723,15 +732,18 @@ def base_stacking(atom_array, min_atoms_per_base=3):
|
|
|
723
732
|
for i in range(2):
|
|
724
733
|
base_tuple = _match_base(bases[i], min_atoms_per_base)
|
|
725
734
|
|
|
726
|
-
if
|
|
735
|
+
if base_tuple is None:
|
|
727
736
|
break
|
|
728
737
|
|
|
729
738
|
transformed_std_vectors[i] = base_tuple
|
|
730
739
|
|
|
731
|
-
normal_vectors = np.vstack(
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
740
|
+
normal_vectors = np.vstack(
|
|
741
|
+
(transformed_std_vectors[0][1], transformed_std_vectors[1][1])
|
|
742
|
+
)
|
|
743
|
+
aromatic_ring_centers = [
|
|
744
|
+
transformed_std_vectors[0][3:],
|
|
745
|
+
transformed_std_vectors[1][3:],
|
|
746
|
+
]
|
|
735
747
|
|
|
736
748
|
# Check if the base pairs are stacked.
|
|
737
749
|
stacked = _check_base_stacking(aromatic_ring_centers, normal_vectors)
|
|
@@ -744,7 +756,7 @@ def base_stacking(atom_array, min_atoms_per_base=3):
|
|
|
744
756
|
return np.array(stacked_bases)
|
|
745
757
|
|
|
746
758
|
|
|
747
|
-
def base_pairs(atom_array, min_atoms_per_base
|
|
759
|
+
def base_pairs(atom_array, min_atoms_per_base=3, unique=True):
|
|
748
760
|
"""
|
|
749
761
|
Use DSSR criteria to find the base pairs in an :class:`AtomArray`.
|
|
750
762
|
|
|
@@ -854,11 +866,8 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
|
|
|
854
866
|
nucleotides_boolean = filter_nucleotides(atom_array)
|
|
855
867
|
|
|
856
868
|
# Disregard the phosphate-backbone
|
|
857
|
-
non_phosphate_boolean = (
|
|
858
|
-
|
|
859
|
-
atom_array.atom_name,
|
|
860
|
-
["O5'", "P", "OP1", "OP2", "OP3", "HOP2", "HOP3"]
|
|
861
|
-
)
|
|
869
|
+
non_phosphate_boolean = ~np.isin(
|
|
870
|
+
atom_array.atom_name, ["O5'", "P", "OP1", "OP2", "OP3", "HOP2", "HOP3"]
|
|
862
871
|
)
|
|
863
872
|
|
|
864
873
|
# Combine the two boolean masks
|
|
@@ -867,7 +876,6 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
|
|
|
867
876
|
# Get only nucleosides
|
|
868
877
|
nucleosides = atom_array[boolean_mask]
|
|
869
878
|
|
|
870
|
-
|
|
871
879
|
# Get the base pair candidates according to a N/O cutoff distance,
|
|
872
880
|
# where each base is identified as the first index of its respective
|
|
873
881
|
# residue
|
|
@@ -896,9 +904,7 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
|
|
|
896
904
|
base1 = nucleosides[base1_mask]
|
|
897
905
|
base2 = nucleosides[base2_mask]
|
|
898
906
|
|
|
899
|
-
hbonds =
|
|
900
|
-
(base1, base2), min_atoms_per_base, unique
|
|
901
|
-
)
|
|
907
|
+
hbonds = _check_dssr_criteria((base1, base2), min_atoms_per_base, unique)
|
|
902
908
|
|
|
903
909
|
# If no hydrogens are present use the number N/O pairs to
|
|
904
910
|
# decide between multiple pairing possibilities.
|
|
@@ -906,7 +912,7 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
|
|
|
906
912
|
if hbonds is None:
|
|
907
913
|
# Each N/O-pair is detected twice. Thus, the number of
|
|
908
914
|
# matches must be divided by two.
|
|
909
|
-
hbonds = n_o_pairs/2
|
|
915
|
+
hbonds = n_o_pairs / 2
|
|
910
916
|
if hbonds != -1:
|
|
911
917
|
basepairs.append((base1_index, base2_index))
|
|
912
918
|
if unique:
|
|
@@ -922,20 +928,16 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True):
|
|
|
922
928
|
# Get all bases that have non-unique pairing interactions
|
|
923
929
|
base_indices, occurrences = np.unique(basepairs, return_counts=True)
|
|
924
930
|
for base_index, occurrence in zip(base_indices, occurrences):
|
|
925
|
-
if
|
|
931
|
+
if occurrence > 1:
|
|
926
932
|
# Write the non-unique base pairs to a dictionary as
|
|
927
933
|
# 'index: number of hydrogen bonds'
|
|
928
934
|
remove_candidates = {}
|
|
929
|
-
for i, row in enumerate(
|
|
930
|
-
np.
|
|
931
|
-
):
|
|
932
|
-
if(np.any(row)):
|
|
935
|
+
for i, row in enumerate(np.asarray(basepair_array == base_index)):
|
|
936
|
+
if np.any(row):
|
|
933
937
|
remove_candidates[i] = basepairs_hbonds[i]
|
|
934
938
|
# Flag all non-unique base pairs for removal except the
|
|
935
939
|
# one that has the most hydrogen bonds
|
|
936
|
-
del remove_candidates[
|
|
937
|
-
max(remove_candidates, key=remove_candidates.get)
|
|
938
|
-
]
|
|
940
|
+
del remove_candidates[max(remove_candidates, key=remove_candidates.get)]
|
|
939
941
|
to_remove += list(remove_candidates.keys())
|
|
940
942
|
# Remove all flagged base pairs from the output `ndarray`
|
|
941
943
|
basepair_array = np.delete(basepair_array, to_remove, axis=0)
|
|
@@ -984,21 +986,22 @@ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
|
984
986
|
|
|
985
987
|
# Generate the data necessary for analysis of each base.
|
|
986
988
|
for i in range(2):
|
|
987
|
-
transformed_std_vectors[i] = _match_base(
|
|
988
|
-
basepair[i], min_atoms_per_base
|
|
989
|
-
)
|
|
989
|
+
transformed_std_vectors[i] = _match_base(basepair[i], min_atoms_per_base)
|
|
990
990
|
|
|
991
|
-
if
|
|
991
|
+
if transformed_std_vectors[i] is None:
|
|
992
992
|
return -1
|
|
993
993
|
|
|
994
|
-
origins = np.vstack((transformed_std_vectors[0][0],
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
schnaap_origins = np.vstack(
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
994
|
+
origins = np.vstack((transformed_std_vectors[0][0], transformed_std_vectors[1][0]))
|
|
995
|
+
normal_vectors = np.vstack(
|
|
996
|
+
(transformed_std_vectors[0][1], transformed_std_vectors[1][1])
|
|
997
|
+
)
|
|
998
|
+
schnaap_origins = np.vstack(
|
|
999
|
+
(transformed_std_vectors[0][2], transformed_std_vectors[1][2])
|
|
1000
|
+
)
|
|
1001
|
+
aromatic_ring_centers = [
|
|
1002
|
+
transformed_std_vectors[0][3:],
|
|
1003
|
+
transformed_std_vectors[1][3:],
|
|
1004
|
+
]
|
|
1002
1005
|
|
|
1003
1006
|
# Criterion 1: Distance between orgins <=15 Å
|
|
1004
1007
|
if not (distance(origins[0], origins[1]) <= 15):
|
|
@@ -1009,9 +1012,8 @@ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
|
1009
1012
|
# Average the base normal vectors. If the angle between the vectors
|
|
1010
1013
|
# is >=90°, flip one vector before averaging
|
|
1011
1014
|
mean_normal_vector = (
|
|
1012
|
-
normal_vectors[0]
|
|
1013
|
-
|
|
1014
|
-
)))
|
|
1015
|
+
normal_vectors[0]
|
|
1016
|
+
+ (normal_vectors[1] * np.sign(np.dot(normal_vectors[0], normal_vectors[1])))
|
|
1015
1017
|
) / 2
|
|
1016
1018
|
norm_vector(mean_normal_vector)
|
|
1017
1019
|
# Calculate the distance vector between the two SCHNAaP origins
|
|
@@ -1024,8 +1026,9 @@ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
|
1024
1026
|
return -1
|
|
1025
1027
|
|
|
1026
1028
|
# Criterion 3: Angle between normal vectors <=65°
|
|
1027
|
-
if not (
|
|
1028
|
-
|
|
1029
|
+
if not (
|
|
1030
|
+
np.arccos(np.dot(normal_vectors[0], normal_vectors[1])) >= ((115 * np.pi) / 180)
|
|
1031
|
+
):
|
|
1029
1032
|
return -1
|
|
1030
1033
|
|
|
1031
1034
|
# Criterion 4: Absence of stacking
|
|
@@ -1035,8 +1038,7 @@ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
|
1035
1038
|
# Criterion 5: Presence of at least one hydrogen bond
|
|
1036
1039
|
#
|
|
1037
1040
|
# Check if both bases came with hydrogens.
|
|
1038
|
-
if (
|
|
1039
|
-
and ("H" in basepair[1].element)):
|
|
1041
|
+
if ("H" in basepair[0].element) and ("H" in basepair[1].element):
|
|
1040
1042
|
# For Structures that contain hydrogens, check for their
|
|
1041
1043
|
# presence directly.
|
|
1042
1044
|
#
|
|
@@ -1044,11 +1046,13 @@ def _check_dssr_criteria(basepair, min_atoms_per_base, unique):
|
|
|
1044
1046
|
potential_basepair = basepair[0] + basepair[1]
|
|
1045
1047
|
|
|
1046
1048
|
# Get the number of hydrogen bonds
|
|
1047
|
-
bonds = len(
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1049
|
+
bonds = len(
|
|
1050
|
+
hbond(
|
|
1051
|
+
potential_basepair,
|
|
1052
|
+
np.ones_like(potential_basepair, dtype=bool),
|
|
1053
|
+
np.ones_like(potential_basepair, dtype=bool),
|
|
1054
|
+
)
|
|
1055
|
+
)
|
|
1052
1056
|
|
|
1053
1057
|
if bonds > 0:
|
|
1054
1058
|
return bonds
|
|
@@ -1085,7 +1089,7 @@ def _check_base_stacking(aromatic_ring_centers, normal_vectors):
|
|
|
1085
1089
|
wrong_distance = True
|
|
1086
1090
|
for ring_center1 in aromatic_ring_centers[0]:
|
|
1087
1091
|
for ring_center2 in aromatic_ring_centers[1]:
|
|
1088
|
-
if
|
|
1092
|
+
if distance(ring_center1, ring_center2) <= 4.5:
|
|
1089
1093
|
wrong_distance = False
|
|
1090
1094
|
normalized_distance_vectors.append(ring_center2 - ring_center1)
|
|
1091
1095
|
norm_vector(normalized_distance_vectors[-1])
|
|
@@ -1106,8 +1110,7 @@ def _check_base_stacking(aromatic_ring_centers, normal_vectors):
|
|
|
1106
1110
|
dist_normal_vector_angle = np.rad2deg(
|
|
1107
1111
|
np.arccos(np.dot(normal_vector, normalized_dist_vector))
|
|
1108
1112
|
)
|
|
1109
|
-
if (
|
|
1110
|
-
(dist_normal_vector_angle <= 140)):
|
|
1113
|
+
if (dist_normal_vector_angle >= 40) and (dist_normal_vector_angle <= 140):
|
|
1111
1114
|
return False
|
|
1112
1115
|
|
|
1113
1116
|
return True
|
|
@@ -1142,19 +1145,19 @@ def _match_base(nucleotide, min_atoms_per_base):
|
|
|
1142
1145
|
if one_letter_code is None:
|
|
1143
1146
|
return None
|
|
1144
1147
|
|
|
1145
|
-
if
|
|
1148
|
+
if one_letter_code == "A":
|
|
1146
1149
|
std_base = _STD_ADENINE
|
|
1147
1150
|
std_ring_centers = _STD_ADENINE_RING_CENTERS
|
|
1148
|
-
elif
|
|
1151
|
+
elif one_letter_code == "T":
|
|
1149
1152
|
std_base = _STD_THYMINE
|
|
1150
1153
|
std_ring_centers = _STD_THYMINE_RING_CENTERS
|
|
1151
|
-
elif
|
|
1154
|
+
elif one_letter_code == "C":
|
|
1152
1155
|
std_base = _STD_CYTOSINE
|
|
1153
1156
|
std_ring_centers = _STD_CYTOSINE_RING_CENTERS
|
|
1154
|
-
elif
|
|
1157
|
+
elif one_letter_code == "G":
|
|
1155
1158
|
std_base = _STD_GUANINE
|
|
1156
1159
|
std_ring_centers = _STD_GUANINE_RING_CENTERS
|
|
1157
|
-
elif
|
|
1160
|
+
elif one_letter_code == "U":
|
|
1158
1161
|
std_base = _STD_URACIL
|
|
1159
1162
|
std_ring_centers = _STD_URACIL_RING_CENTERS
|
|
1160
1163
|
|
|
@@ -1162,16 +1165,10 @@ def _match_base(nucleotide, min_atoms_per_base):
|
|
|
1162
1165
|
vectors = np.vstack((vectors, std_ring_centers))
|
|
1163
1166
|
|
|
1164
1167
|
# Select the matching atoms of the nucleotide and the standard base
|
|
1165
|
-
nucleotide_matched = nucleotide[
|
|
1166
|
-
|
|
1167
|
-
]
|
|
1168
|
-
std_base_matched = std_base[
|
|
1169
|
-
np.isin(std_base.atom_name, nucleotide.atom_name)
|
|
1170
|
-
]
|
|
1168
|
+
nucleotide_matched = nucleotide[np.isin(nucleotide.atom_name, std_base.atom_name)]
|
|
1169
|
+
std_base_matched = std_base[np.isin(std_base.atom_name, nucleotide.atom_name)]
|
|
1171
1170
|
# Ensure the nucleotide does not contain duplicate atom names
|
|
1172
|
-
_, unique_indices = np.unique(
|
|
1173
|
-
nucleotide_matched.atom_name, return_index=True
|
|
1174
|
-
)
|
|
1171
|
+
_, unique_indices = np.unique(nucleotide_matched.atom_name, return_index=True)
|
|
1175
1172
|
nucleotide_matched = nucleotide_matched[unique_indices]
|
|
1176
1173
|
# Only continue if minimum number of matching atoms is reached
|
|
1177
1174
|
if len(nucleotide_matched) < min_atoms_per_base:
|
|
@@ -1179,21 +1176,19 @@ def _match_base(nucleotide, min_atoms_per_base):
|
|
|
1179
1176
|
f"Nucleotide with res_id {nucleotide.res_id[0]} and "
|
|
1180
1177
|
f"chain_id {nucleotide.chain_id[0]} has less than 3 base "
|
|
1181
1178
|
f"atoms, unable to check for base pair.",
|
|
1182
|
-
IncompleteStructureWarning
|
|
1179
|
+
IncompleteStructureWarning,
|
|
1183
1180
|
)
|
|
1184
1181
|
return None
|
|
1185
1182
|
# Reorder the atoms of the nucleotide to obtain the standard RCSB
|
|
1186
1183
|
# PDB atom order.
|
|
1187
|
-
nucleotide_matched = nucleotide_matched[
|
|
1188
|
-
standardize_order(nucleotide_matched)
|
|
1189
|
-
]
|
|
1184
|
+
nucleotide_matched = nucleotide_matched[standardize_order(nucleotide_matched)]
|
|
1190
1185
|
|
|
1191
1186
|
# Match the selected std_base to the base.
|
|
1192
1187
|
_, transformation = superimpose(nucleotide_matched, std_base_matched)
|
|
1193
1188
|
vectors = transformation.apply(vectors)
|
|
1194
1189
|
# Normalize the base-normal-vector
|
|
1195
|
-
vectors[1
|
|
1196
|
-
norm_vector(vectors[1
|
|
1190
|
+
vectors[1, :] = vectors[1, :] - vectors[0, :]
|
|
1191
|
+
norm_vector(vectors[1, :])
|
|
1197
1192
|
|
|
1198
1193
|
return vectors
|
|
1199
1194
|
|
|
@@ -1259,8 +1254,11 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
|
1259
1254
|
|
|
1260
1255
|
# List of the standard bases for easy iteration
|
|
1261
1256
|
std_base_list = [
|
|
1262
|
-
_STD_ADENINE,
|
|
1263
|
-
|
|
1257
|
+
_STD_ADENINE,
|
|
1258
|
+
_STD_THYMINE,
|
|
1259
|
+
_STD_CYTOSINE,
|
|
1260
|
+
_STD_GUANINE,
|
|
1261
|
+
_STD_URACIL,
|
|
1264
1262
|
]
|
|
1265
1263
|
|
|
1266
1264
|
# The number of matched atoms for each 'standard' base
|
|
@@ -1275,7 +1273,7 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
|
1275
1273
|
f"{residue.chain_id[0]} has an overlap with the reference "
|
|
1276
1274
|
f"bases which is less than {min_atoms_per_base} atoms. "
|
|
1277
1275
|
f"Unable to map nucleotide.",
|
|
1278
|
-
IncompleteStructureWarning
|
|
1276
|
+
IncompleteStructureWarning,
|
|
1279
1277
|
)
|
|
1280
1278
|
return None, False
|
|
1281
1279
|
|
|
@@ -1284,7 +1282,7 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
|
1284
1282
|
|
|
1285
1283
|
# Iterate through the reference bases with the maximum number of
|
|
1286
1284
|
# matching atoms
|
|
1287
|
-
for ref_base in np.array(std_base_list, dtype=
|
|
1285
|
+
for ref_base in np.array(std_base_list, dtype="object")[
|
|
1288
1286
|
np.array(matched_atom_no) == np.max(matched_atom_no)
|
|
1289
1287
|
]:
|
|
1290
1288
|
# Copy the residue as the res_name property of the ``AtomArray``
|
|
@@ -1293,12 +1291,8 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
|
1293
1291
|
|
|
1294
1292
|
# Select the matching atoms of the nucleotide and the reference
|
|
1295
1293
|
# base
|
|
1296
|
-
nuc = nuc[
|
|
1297
|
-
|
|
1298
|
-
]
|
|
1299
|
-
ref_base_matched = ref_base[
|
|
1300
|
-
np.isin(ref_base.atom_name, nuc.atom_name)
|
|
1301
|
-
]
|
|
1294
|
+
nuc = nuc[np.isin(nuc.atom_name, ref_base.atom_name)]
|
|
1295
|
+
ref_base_matched = ref_base[np.isin(ref_base.atom_name, nuc.atom_name)]
|
|
1302
1296
|
|
|
1303
1297
|
# Set the res_name property to the same as the reference base.
|
|
1304
1298
|
# This is a requirement for ``standardize_order``
|
|
@@ -1319,14 +1313,14 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28):
|
|
|
1319
1313
|
# If the RMSD is lower than the specified cutoff or better than
|
|
1320
1314
|
# a previous found reference, the current reference is selected
|
|
1321
1315
|
# as best base
|
|
1322
|
-
if
|
|
1316
|
+
if rmsd(fitted, ref_base_matched) < rmsd_cutoff:
|
|
1323
1317
|
rmsd_cutoff = rmsd(fitted, ref_base_matched)
|
|
1324
1318
|
best_base = ref_base_matched.res_name[0][-1]
|
|
1325
1319
|
|
|
1326
1320
|
if best_base is None:
|
|
1327
1321
|
warnings.warn(
|
|
1328
1322
|
f"Base Type {residue.res_name[0]} not supported. ",
|
|
1329
|
-
UnexpectedStructureWarning
|
|
1323
|
+
UnexpectedStructureWarning,
|
|
1330
1324
|
)
|
|
1331
1325
|
return None
|
|
1332
1326
|
|
|
@@ -1360,9 +1354,9 @@ def _get_proximate_residues(atom_array, boolean_mask, cutoff):
|
|
|
1360
1354
|
|
|
1361
1355
|
# Get the indices of the atoms that are within the maximum cutoff
|
|
1362
1356
|
# of each other
|
|
1363
|
-
indices = CellList(
|
|
1364
|
-
atom_array, cutoff
|
|
1365
|
-
)
|
|
1357
|
+
indices = CellList(atom_array, cutoff, selection=boolean_mask).get_atoms(
|
|
1358
|
+
atom_array.coord[boolean_mask], cutoff
|
|
1359
|
+
)
|
|
1366
1360
|
|
|
1367
1361
|
# Loop through the indices of potential partners
|
|
1368
1362
|
pairs = []
|
|
@@ -1375,16 +1369,12 @@ def _get_proximate_residues(atom_array, boolean_mask, cutoff):
|
|
|
1375
1369
|
# indices.
|
|
1376
1370
|
pairs = np.array(pairs)
|
|
1377
1371
|
basepair_candidates_shape = pairs.shape
|
|
1378
|
-
pairs = get_residue_starts_for(
|
|
1379
|
-
|
|
1380
|
-
)
|
|
1372
|
+
pairs = get_residue_starts_for(atom_array, pairs.flatten()).reshape(
|
|
1373
|
+
basepair_candidates_shape
|
|
1374
|
+
)
|
|
1381
1375
|
|
|
1382
1376
|
# Remove candidates where the pairs are from the same residue
|
|
1383
|
-
pairs = np.delete(
|
|
1384
|
-
pairs, np.where(
|
|
1385
|
-
pairs[:,0] == pairs[:,1]
|
|
1386
|
-
), axis=0
|
|
1387
|
-
)
|
|
1377
|
+
pairs = np.delete(pairs, np.where(pairs[:, 0] == pairs[:, 1]), axis=0)
|
|
1388
1378
|
# Sort the residue starts for each pair
|
|
1389
1379
|
for i, candidate in enumerate(pairs):
|
|
1390
1380
|
pairs[i] = sorted(candidate)
|
|
@@ -1411,5 +1401,4 @@ def _filter_atom_type(atom_array, atom_names):
|
|
|
1411
1401
|
This array is ``True`` for all indices in the :class:`AtomArray`
|
|
1412
1402
|
, where the atom has the desired atom names.
|
|
1413
1403
|
"""
|
|
1414
|
-
return
|
|
1415
|
-
& (atom_array.res_id != -1))
|
|
1404
|
+
return np.isin(atom_array.atom_name, atom_names) & (atom_array.res_id != -1)
|