biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/hbond.py
CHANGED
|
@@ -11,16 +11,23 @@ __author__ = "Daniel Bauer, Patrick Kunzmann"
|
|
|
11
11
|
__all__ = ["hbond", "hbond_frequency"]
|
|
12
12
|
|
|
13
13
|
import warnings
|
|
14
|
-
from .geometry import distance, angle
|
|
15
14
|
import numpy as np
|
|
16
|
-
from .atoms import AtomArrayStack, stack
|
|
17
|
-
from .celllist import CellList
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
from biotite.structure.atoms import AtomArrayStack, stack
|
|
16
|
+
from biotite.structure.celllist import CellList
|
|
17
|
+
from biotite.structure.geometry import angle, distance
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def hbond(
|
|
21
|
+
atoms,
|
|
22
|
+
selection1=None,
|
|
23
|
+
selection2=None,
|
|
24
|
+
selection1_type="both",
|
|
25
|
+
cutoff_dist=2.5,
|
|
26
|
+
cutoff_angle=120,
|
|
27
|
+
donor_elements=("O", "N", "S"),
|
|
28
|
+
acceptor_elements=("O", "N", "S"),
|
|
29
|
+
periodic=False,
|
|
30
|
+
):
|
|
24
31
|
r"""
|
|
25
32
|
Find hydrogen bonds in a structure using the Baker-Hubbard
|
|
26
33
|
algorithm. :footcite:`Baker1984`
|
|
@@ -31,7 +38,7 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
31
38
|
and :math:`d_{H,A} \le 2.5 \mathring{A}`.
|
|
32
39
|
Consequently, the given structure must contain hydrogen atoms.
|
|
33
40
|
Otherwise, no hydrogen bonds will be found.
|
|
34
|
-
|
|
41
|
+
|
|
35
42
|
Parameters
|
|
36
43
|
----------
|
|
37
44
|
atoms : AtomArray or AtomArrayStack
|
|
@@ -60,7 +67,7 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
60
67
|
boundary conditions.
|
|
61
68
|
The `box` attribute of `atoms` is required in this case.
|
|
62
69
|
(Default: False).
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
Returns
|
|
65
72
|
-------
|
|
66
73
|
triplets : ndarray, dtype=int, shape=(n,3)
|
|
@@ -74,7 +81,7 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
74
81
|
*m x n* matrix that shows if an interaction with index *n* in
|
|
75
82
|
`triplets` is present in the model *m* of the input `atoms`.
|
|
76
83
|
Only returned if `atoms` is an :class:`AtomArrayStack`.
|
|
77
|
-
|
|
84
|
+
|
|
78
85
|
Notes
|
|
79
86
|
-----
|
|
80
87
|
The result of this function may include false positives:
|
|
@@ -84,19 +91,19 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
84
91
|
For example, a nitrogen atom with positive charge could be
|
|
85
92
|
considered as acceptor atom by this method, although this does
|
|
86
93
|
make sense from a chemical perspective.
|
|
87
|
-
|
|
94
|
+
|
|
88
95
|
Examples
|
|
89
96
|
--------
|
|
90
97
|
Calculate the total number of hydrogen bonds found in each model:
|
|
91
|
-
|
|
98
|
+
|
|
92
99
|
>>> triplets, mask = hbond(atom_array_stack)
|
|
93
100
|
>>> hbonds_per_model = np.count_nonzero(mask, axis=1)
|
|
94
101
|
>>> print(hbonds_per_model)
|
|
95
102
|
[14 14 14 12 11 12 9 13 9 14 13 13 14 11 11 12 11 14 14 13 14 13 15 17
|
|
96
103
|
14 12 15 12 12 13 13 13 12 12 11 14 10 11]
|
|
97
|
-
|
|
104
|
+
|
|
98
105
|
Get hydrogen bond donors of third model:
|
|
99
|
-
|
|
106
|
+
|
|
100
107
|
>>> # Third model -> index 2
|
|
101
108
|
>>> triplets = triplets[mask[2,:]]
|
|
102
109
|
>>> # First column contains donors
|
|
@@ -137,12 +144,12 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
137
144
|
single_model = True
|
|
138
145
|
else:
|
|
139
146
|
single_model = False
|
|
140
|
-
|
|
147
|
+
|
|
141
148
|
if periodic:
|
|
142
149
|
box = atoms.box
|
|
143
150
|
else:
|
|
144
151
|
box = None
|
|
145
|
-
|
|
152
|
+
|
|
146
153
|
# Mask for donor/acceptor elements
|
|
147
154
|
donor_element_mask = np.isin(atoms.element, donor_elements)
|
|
148
155
|
acceptor_element_mask = np.isin(atoms.element, acceptor_elements)
|
|
@@ -152,69 +159,81 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
152
159
|
if selection2 is None:
|
|
153
160
|
selection2 = np.ones(atoms.array_length(), dtype=bool)
|
|
154
161
|
|
|
155
|
-
if selection1_type ==
|
|
162
|
+
if selection1_type == "both":
|
|
156
163
|
# The two selections are separated into three selections:
|
|
157
164
|
# the original ones without the overlaping part
|
|
158
165
|
# and one containing the overlap
|
|
159
|
-
# This prevents redundant triplets and unnecessary computation
|
|
166
|
+
# This prevents redundant triplets and unnecessary computation
|
|
160
167
|
overlap_selection = selection1 & selection2
|
|
161
168
|
# Original selections without overlaping part
|
|
162
169
|
exclusive_selection1 = selection1 & (~overlap_selection)
|
|
163
170
|
exclusive_selection2 = selection2 & (~overlap_selection)
|
|
164
|
-
|
|
171
|
+
|
|
165
172
|
# Put selections to list for cleaner iteration
|
|
166
|
-
selections = [
|
|
167
|
-
exclusive_selection1, exclusive_selection2, overlap_selection
|
|
168
|
-
]
|
|
173
|
+
selections = [exclusive_selection1, exclusive_selection2, overlap_selection]
|
|
169
174
|
selection_combinations = [
|
|
170
|
-
#(0,0), is not included, would be same selection
|
|
175
|
+
# (0,0), is not included, would be same selection
|
|
171
176
|
# as donor and acceptor simultaneously
|
|
172
|
-
(0,1),
|
|
173
|
-
(0,2),
|
|
174
|
-
(1,0),
|
|
175
|
-
#(1,1), # same reason above
|
|
176
|
-
(1,2),
|
|
177
|
-
(2,0),
|
|
178
|
-
(2,1),
|
|
179
|
-
(2,2)
|
|
177
|
+
(0, 1),
|
|
178
|
+
(0, 2),
|
|
179
|
+
(1, 0),
|
|
180
|
+
# (1,1), # same reason above
|
|
181
|
+
(1, 2),
|
|
182
|
+
(2, 0),
|
|
183
|
+
(2, 1),
|
|
184
|
+
(2, 2), # overlaping part, combination is necessary
|
|
180
185
|
]
|
|
181
|
-
|
|
186
|
+
|
|
182
187
|
all_comb_triplets = []
|
|
183
188
|
all_comb_mask = []
|
|
184
189
|
for selection_index1, selection_index2 in selection_combinations:
|
|
185
190
|
donor_mask = selections[selection_index1]
|
|
186
191
|
acceptor_mask = selections[selection_index2]
|
|
187
|
-
if
|
|
188
|
-
np.count_nonzero(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
192
|
+
if (
|
|
193
|
+
np.count_nonzero(donor_mask) != 0
|
|
194
|
+
and np.count_nonzero(acceptor_mask) != 0
|
|
195
|
+
):
|
|
196
|
+
# Calculate triplets and mask
|
|
197
|
+
triplets, mask = _hbond(
|
|
198
|
+
atoms,
|
|
199
|
+
donor_mask,
|
|
200
|
+
acceptor_mask,
|
|
201
|
+
donor_element_mask,
|
|
202
|
+
acceptor_element_mask,
|
|
203
|
+
cutoff_dist,
|
|
204
|
+
cutoff_angle,
|
|
205
|
+
box,
|
|
206
|
+
)
|
|
207
|
+
all_comb_triplets.append(triplets)
|
|
208
|
+
all_comb_mask.append(mask)
|
|
198
209
|
# Merge results from all combinations
|
|
199
210
|
triplets = np.concatenate(all_comb_triplets, axis=0)
|
|
200
211
|
mask = np.concatenate(all_comb_mask, axis=1)
|
|
201
212
|
|
|
202
|
-
elif selection1_type ==
|
|
213
|
+
elif selection1_type == "donor":
|
|
203
214
|
triplets, mask = _hbond(
|
|
204
|
-
atoms,
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
215
|
+
atoms,
|
|
216
|
+
selection1,
|
|
217
|
+
selection2,
|
|
218
|
+
donor_element_mask,
|
|
219
|
+
acceptor_element_mask,
|
|
220
|
+
cutoff_dist,
|
|
221
|
+
cutoff_angle,
|
|
222
|
+
box,
|
|
208
223
|
)
|
|
209
|
-
|
|
210
|
-
elif selection1_type ==
|
|
224
|
+
|
|
225
|
+
elif selection1_type == "acceptor":
|
|
211
226
|
triplets, mask = _hbond(
|
|
212
|
-
atoms,
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
227
|
+
atoms,
|
|
228
|
+
selection2,
|
|
229
|
+
selection1,
|
|
230
|
+
donor_element_mask,
|
|
231
|
+
acceptor_element_mask,
|
|
232
|
+
cutoff_dist,
|
|
233
|
+
cutoff_angle,
|
|
234
|
+
box,
|
|
216
235
|
)
|
|
217
|
-
|
|
236
|
+
|
|
218
237
|
else:
|
|
219
238
|
raise ValueError(f"Unkown selection type '{selection1_type}'")
|
|
220
239
|
|
|
@@ -228,12 +247,18 @@ def hbond(atoms, selection1=None, selection2=None, selection1_type='both',
|
|
|
228
247
|
return triplets, mask
|
|
229
248
|
|
|
230
249
|
|
|
231
|
-
def _hbond(
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
250
|
+
def _hbond(
|
|
251
|
+
atoms,
|
|
252
|
+
donor_mask,
|
|
253
|
+
acceptor_mask,
|
|
254
|
+
donor_element_mask,
|
|
255
|
+
acceptor_element_mask,
|
|
256
|
+
cutoff_dist,
|
|
257
|
+
cutoff_angle,
|
|
258
|
+
box,
|
|
259
|
+
):
|
|
235
260
|
# Filter donor/acceptor elements
|
|
236
|
-
donor_mask
|
|
261
|
+
donor_mask &= donor_element_mask
|
|
237
262
|
acceptor_mask &= acceptor_element_mask
|
|
238
263
|
|
|
239
264
|
first_model_box = box[0] if box is not None else None
|
|
@@ -254,47 +279,43 @@ def _hbond(atoms, donor_mask, acceptor_mask,
|
|
|
254
279
|
if len(donor_h_i) == 0 or len(acceptor_i) == 0:
|
|
255
280
|
# Return empty triplets and mask
|
|
256
281
|
return (
|
|
257
|
-
np.zeros((0,3), dtype=int),
|
|
258
|
-
np.zeros((atoms.stack_depth(),0), dtype=bool)
|
|
282
|
+
np.zeros((0, 3), dtype=int),
|
|
283
|
+
np.zeros((atoms.stack_depth(), 0), dtype=bool),
|
|
259
284
|
)
|
|
260
|
-
|
|
285
|
+
|
|
261
286
|
# Narrow the amount of possible acceptor to donor-H connections
|
|
262
287
|
# down via the distance cutoff parameter using a cell list
|
|
263
288
|
# Save in acceptor-to-hydrogen matrix
|
|
264
289
|
# (true when distance smaller than cutoff)
|
|
265
290
|
coord = atoms.coord
|
|
266
|
-
possible_bonds = np.zeros(
|
|
267
|
-
(len(acceptor_i), len(donor_h_i)),
|
|
268
|
-
dtype=bool
|
|
269
|
-
)
|
|
291
|
+
possible_bonds = np.zeros((len(acceptor_i), len(donor_h_i)), dtype=bool)
|
|
270
292
|
periodic = False if box is None else True
|
|
271
293
|
for model_i in range(atoms.stack_depth()):
|
|
272
294
|
donor_h_coord = coord[model_i, donor_h_mask]
|
|
273
295
|
acceptor_coord = coord[model_i, acceptor_mask]
|
|
274
296
|
box_for_model = box[model_i] if box is not None else None
|
|
275
297
|
cell_list = CellList(
|
|
276
|
-
donor_h_coord, cell_size=cutoff_dist,
|
|
277
|
-
periodic=periodic, box=box_for_model
|
|
278
|
-
)
|
|
279
|
-
possible_bonds |= cell_list.get_atoms_in_cells(
|
|
280
|
-
acceptor_coord, as_mask=True
|
|
298
|
+
donor_h_coord, cell_size=cutoff_dist, periodic=periodic, box=box_for_model
|
|
281
299
|
)
|
|
300
|
+
possible_bonds |= cell_list.get_atoms_in_cells(acceptor_coord, as_mask=True)
|
|
282
301
|
possible_bonds_i = np.where(possible_bonds)
|
|
283
302
|
# Narrow down
|
|
284
303
|
acceptor_i = acceptor_i[possible_bonds_i[0]]
|
|
285
304
|
donor_h_i = donor_h_i[possible_bonds_i[1]]
|
|
286
|
-
|
|
305
|
+
|
|
287
306
|
# Build D-H..A triplets
|
|
288
307
|
donor_i = associated_donor_indices[donor_h_i]
|
|
289
308
|
triplets = np.stack((donor_i, donor_h_i, acceptor_i), axis=1)
|
|
290
309
|
# Remove entries where donor and acceptor are the same
|
|
291
310
|
triplets = triplets[donor_i != acceptor_i]
|
|
292
|
-
|
|
311
|
+
|
|
293
312
|
hbond_mask = _is_hbond(
|
|
294
|
-
coord[:, triplets[:,0]], # donors
|
|
295
|
-
coord[:, triplets[:,1]], # donor hydrogens
|
|
296
|
-
coord[:, triplets[:,2]], # acceptors
|
|
297
|
-
box,
|
|
313
|
+
coord[:, triplets[:, 0]], # donors
|
|
314
|
+
coord[:, triplets[:, 1]], # donor hydrogens
|
|
315
|
+
coord[:, triplets[:, 2]], # acceptors
|
|
316
|
+
box,
|
|
317
|
+
cutoff_dist=cutoff_dist,
|
|
318
|
+
cutoff_angle=cutoff_angle,
|
|
298
319
|
)
|
|
299
320
|
|
|
300
321
|
# Reduce output to contain only triplets counted at least once
|
|
@@ -311,14 +332,14 @@ def _get_bonded_h(array, donor_mask, bonds):
|
|
|
311
332
|
all donors in atoms[donor_mask].
|
|
312
333
|
A `BondsList` is used for detecting bonded hydrogen atoms.
|
|
313
334
|
"""
|
|
314
|
-
hydrogen_mask =
|
|
315
|
-
|
|
335
|
+
hydrogen_mask = array.element == "H"
|
|
336
|
+
|
|
316
337
|
donor_hydrogen_mask = np.zeros(len(array), dtype=bool)
|
|
317
338
|
associated_donor_indices = np.full(len(array), -1, dtype=int)
|
|
318
339
|
|
|
319
340
|
all_bond_indices, _ = bonds.get_all_bonds()
|
|
320
341
|
donor_indices = np.where(donor_mask)[0]
|
|
321
|
-
|
|
342
|
+
|
|
322
343
|
for donor_i in donor_indices:
|
|
323
344
|
bonded_indices = all_bond_indices[donor_i]
|
|
324
345
|
# Remove padding values
|
|
@@ -327,7 +348,7 @@ def _get_bonded_h(array, donor_mask, bonds):
|
|
|
327
348
|
bonded_indices = bonded_indices[hydrogen_mask[bonded_indices]]
|
|
328
349
|
donor_hydrogen_mask[bonded_indices] = True
|
|
329
350
|
associated_donor_indices[bonded_indices] = donor_i
|
|
330
|
-
|
|
351
|
+
|
|
331
352
|
return donor_hydrogen_mask, associated_donor_indices
|
|
332
353
|
|
|
333
354
|
|
|
@@ -342,22 +363,20 @@ def _get_bonded_h_via_distance(array, donor_mask, box):
|
|
|
342
363
|
|
|
343
364
|
coord = array.coord
|
|
344
365
|
res_id = array.res_id
|
|
345
|
-
hydrogen_mask =
|
|
346
|
-
|
|
366
|
+
hydrogen_mask = array.element == "H"
|
|
367
|
+
|
|
347
368
|
donor_hydrogen_mask = np.zeros(len(array), dtype=bool)
|
|
348
369
|
associated_donor_indices = np.full(len(array), -1, dtype=int)
|
|
349
370
|
|
|
350
371
|
donor_indices = np.where(donor_mask)[0]
|
|
351
372
|
for donor_i in donor_indices:
|
|
352
373
|
candidate_mask = hydrogen_mask & (res_id == res_id[donor_i])
|
|
353
|
-
distances = distance(
|
|
354
|
-
coord[donor_i], coord[candidate_mask], box=box
|
|
355
|
-
)
|
|
374
|
+
distances = distance(coord[donor_i], coord[candidate_mask], box=box)
|
|
356
375
|
donor_h_indices = np.where(candidate_mask)[0][distances <= CUTOFF]
|
|
357
376
|
for i in donor_h_indices:
|
|
358
377
|
associated_donor_indices[i] = donor_i
|
|
359
378
|
donor_hydrogen_mask[i] = True
|
|
360
|
-
|
|
379
|
+
|
|
361
380
|
return donor_hydrogen_mask, associated_donor_indices
|
|
362
381
|
|
|
363
382
|
|
|
@@ -378,12 +397,12 @@ def hbond_frequency(mask):
|
|
|
378
397
|
|
|
379
398
|
The frequency is the amount of models, where the respective bond
|
|
380
399
|
exists divided by the total amount of models.
|
|
381
|
-
|
|
400
|
+
|
|
382
401
|
Parameters
|
|
383
402
|
----------
|
|
384
403
|
mask: ndarray, dtype=bool, shape=(m,n)
|
|
385
404
|
Input mask obtained from `hbond` function.
|
|
386
|
-
|
|
405
|
+
|
|
387
406
|
Returns
|
|
388
407
|
-------
|
|
389
408
|
ndarray, dtype=Float
|
|
@@ -406,4 +425,4 @@ def hbond_frequency(mask):
|
|
|
406
425
|
0.132 0.053 0.026 0.158 0.026 0.868 0.211 0.026 0.921 0.316 0.079 0.237
|
|
407
426
|
0.105 0.421 0.079 0.026 1.000 0.053 0.132 0.026 0.184]
|
|
408
427
|
"""
|
|
409
|
-
return mask.sum(axis=0)/len(mask)
|
|
428
|
+
return mask.sum(axis=0) / len(mask)
|
biotite/structure/info/atoms.py
CHANGED
|
@@ -6,15 +6,16 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["residue"]
|
|
8
8
|
|
|
9
|
-
from .ccd import get_ccd
|
|
9
|
+
from biotite.structure.info.ccd import get_ccd
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
"ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
|
|
14
|
-
"ILE","LEU","LYS","MET","PHE","PRO","PYL","SER","THR",
|
|
15
|
-
"TRP","TYR","VAL", "SEC",
|
|
11
|
+
# fmt: off
|
|
12
|
+
NON_HETERO_RESIDUES = set([
|
|
13
|
+
"ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS",
|
|
14
|
+
"ILE", "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SER", "THR",
|
|
15
|
+
"TRP", "TYR", "VAL", "SEC",
|
|
16
16
|
"A", "DA", "G", "DG", "C", "DC", "U", "DT",
|
|
17
17
|
])
|
|
18
|
+
# fmt: on
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
def residue(res_name):
|
|
@@ -70,13 +71,11 @@ def residue(res_name):
|
|
|
70
71
|
['OXT' 'HXT']]
|
|
71
72
|
"""
|
|
72
73
|
# Avoid circular import
|
|
73
|
-
from
|
|
74
|
+
from biotite.structure.io.pdbx import get_component
|
|
74
75
|
|
|
75
76
|
try:
|
|
76
77
|
component = get_component(get_ccd(), res_name=res_name)
|
|
77
78
|
except KeyError:
|
|
78
|
-
raise KeyError(
|
|
79
|
-
|
|
80
|
-
)
|
|
81
|
-
component.hetero[:] = res_name not in non_hetero_residues
|
|
79
|
+
raise KeyError(f"No atom information found for residue '{res_name}' in CCD")
|
|
80
|
+
component.hetero[:] = res_name not in NON_HETERO_RESIDUES
|
|
82
81
|
return component
|
biotite/structure/info/bonds.py
CHANGED
|
@@ -6,18 +6,17 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["bond_type", "bonds_in_residue"]
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from .ccd import get_from_ccd
|
|
11
|
-
|
|
9
|
+
from biotite.structure.bonds import BondType
|
|
10
|
+
from biotite.structure.info.ccd import get_from_ccd
|
|
12
11
|
|
|
13
12
|
BOND_TYPES = {
|
|
14
|
-
("SING", "N")
|
|
15
|
-
("DOUB", "N")
|
|
16
|
-
("TRIP", "N")
|
|
17
|
-
("QUAD", "N")
|
|
18
|
-
("SING", "Y")
|
|
19
|
-
("DOUB", "Y")
|
|
20
|
-
("TRIP", "Y")
|
|
13
|
+
("SING", "N"): BondType.SINGLE,
|
|
14
|
+
("DOUB", "N"): BondType.DOUBLE,
|
|
15
|
+
("TRIP", "N"): BondType.TRIPLE,
|
|
16
|
+
("QUAD", "N"): BondType.QUADRUPLE,
|
|
17
|
+
("SING", "Y"): BondType.AROMATIC_SINGLE,
|
|
18
|
+
("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
|
|
19
|
+
("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
|
|
21
20
|
}
|
|
22
21
|
|
|
23
22
|
_intra_bonds = {}
|
|
@@ -48,13 +47,13 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
48
47
|
Examples
|
|
49
48
|
--------
|
|
50
49
|
|
|
51
|
-
>>> print(bond_type("PHE", "CA", "CB"))
|
|
52
|
-
BondType.SINGLE
|
|
53
|
-
>>> print(bond_type("PHE", "CG", "CD1"))
|
|
54
|
-
BondType.AROMATIC_DOUBLE
|
|
55
|
-
>>> print(bond_type("PHE", "CA", "CG"))
|
|
50
|
+
>>> print(repr(bond_type("PHE", "CA", "CB")))
|
|
51
|
+
<BondType.SINGLE: 1>
|
|
52
|
+
>>> print(repr(bond_type("PHE", "CG", "CD1")))
|
|
53
|
+
<BondType.AROMATIC_DOUBLE: 6>
|
|
54
|
+
>>> print(repr(bond_type("PHE", "CA", "CG")))
|
|
56
55
|
None
|
|
57
|
-
>>> print(bond_type("PHE", "FOO", "BAR"))
|
|
56
|
+
>>> print(repr(bond_type("PHE", "FOO", "BAR")))
|
|
58
57
|
None
|
|
59
58
|
"""
|
|
60
59
|
bonds_for_residue = bonds_in_residue(res_name)
|
|
@@ -62,8 +61,7 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
62
61
|
return None
|
|
63
62
|
# Try both atom orders
|
|
64
63
|
bond_type_int = bonds_for_residue.get(
|
|
65
|
-
(atom_name1, atom_name2),
|
|
66
|
-
bonds_for_residue.get((atom_name2, atom_name1))
|
|
64
|
+
(atom_name1, atom_name2), bonds_for_residue.get((atom_name2, atom_name1))
|
|
67
65
|
)
|
|
68
66
|
if bond_type_int is not None:
|
|
69
67
|
return BondType(bond_type_int)
|
|
@@ -101,30 +99,30 @@ def bonds_in_residue(res_name):
|
|
|
101
99
|
>>> bonds = bonds_in_residue("PHE")
|
|
102
100
|
>>> for atoms, bond_type_int in sorted(bonds.items()):
|
|
103
101
|
... atom1, atom2 = sorted(atoms)
|
|
104
|
-
... print(f"{atom1:3} + {atom2:3} -> {
|
|
105
|
-
C + O ->
|
|
106
|
-
C + OXT ->
|
|
107
|
-
C + CA ->
|
|
108
|
-
CA + CB ->
|
|
109
|
-
CA + HA ->
|
|
110
|
-
CB + CG ->
|
|
111
|
-
CB + HB2 ->
|
|
112
|
-
CB + HB3 ->
|
|
113
|
-
CD1 + CE1 ->
|
|
114
|
-
CD1 + HD1 ->
|
|
115
|
-
CD2 + CE2 ->
|
|
116
|
-
CD2 + HD2 ->
|
|
117
|
-
CE1 + CZ ->
|
|
118
|
-
CE1 + HE1 ->
|
|
119
|
-
CE2 + CZ ->
|
|
120
|
-
CE2 + HE2 ->
|
|
121
|
-
CD1 + CG ->
|
|
122
|
-
CD2 + CG ->
|
|
123
|
-
CZ + HZ ->
|
|
124
|
-
CA + N ->
|
|
125
|
-
H + N ->
|
|
126
|
-
H2 + N ->
|
|
127
|
-
HXT + OXT ->
|
|
102
|
+
... print(f"{atom1:3} + {atom2:3} -> {BondType(bond_type_int).name}")
|
|
103
|
+
C + O -> DOUBLE
|
|
104
|
+
C + OXT -> SINGLE
|
|
105
|
+
C + CA -> SINGLE
|
|
106
|
+
CA + CB -> SINGLE
|
|
107
|
+
CA + HA -> SINGLE
|
|
108
|
+
CB + CG -> SINGLE
|
|
109
|
+
CB + HB2 -> SINGLE
|
|
110
|
+
CB + HB3 -> SINGLE
|
|
111
|
+
CD1 + CE1 -> AROMATIC_SINGLE
|
|
112
|
+
CD1 + HD1 -> SINGLE
|
|
113
|
+
CD2 + CE2 -> AROMATIC_DOUBLE
|
|
114
|
+
CD2 + HD2 -> SINGLE
|
|
115
|
+
CE1 + CZ -> AROMATIC_DOUBLE
|
|
116
|
+
CE1 + HE1 -> SINGLE
|
|
117
|
+
CE2 + CZ -> AROMATIC_SINGLE
|
|
118
|
+
CE2 + HE2 -> SINGLE
|
|
119
|
+
CD1 + CG -> AROMATIC_DOUBLE
|
|
120
|
+
CD2 + CG -> AROMATIC_SINGLE
|
|
121
|
+
CZ + HZ -> SINGLE
|
|
122
|
+
CA + N -> SINGLE
|
|
123
|
+
H + N -> SINGLE
|
|
124
|
+
H2 + N -> SINGLE
|
|
125
|
+
HXT + OXT -> SINGLE
|
|
128
126
|
"""
|
|
129
127
|
global _intra_bonds
|
|
130
128
|
if res_name not in _intra_bonds:
|
|
@@ -137,7 +135,7 @@ def bonds_in_residue(res_name):
|
|
|
137
135
|
chem_comp_bond_dict["atom_id_1"],
|
|
138
136
|
chem_comp_bond_dict["atom_id_2"],
|
|
139
137
|
chem_comp_bond_dict["value_order"],
|
|
140
|
-
chem_comp_bond_dict["pdbx_aromatic_flag"]
|
|
138
|
+
chem_comp_bond_dict["pdbx_aromatic_flag"],
|
|
141
139
|
):
|
|
142
140
|
bond_type = BOND_TYPES[order, aromatic_flag]
|
|
143
141
|
bonds_for_residue[atom1.item(), atom2.item()] = bond_type
|
biotite/structure/info/ccd.py
CHANGED
|
@@ -9,7 +9,6 @@ __all__ = ["get_ccd", "get_from_ccd"]
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
13
|
INDEX_COLUMN_NAME = {
|
|
15
14
|
"chem_comp": "id",
|
|
@@ -40,7 +39,7 @@ def get_ccd():
|
|
|
40
39
|
|
|
41
40
|
"""
|
|
42
41
|
# Avoid circular import
|
|
43
|
-
from
|
|
42
|
+
from biotite.structure.io.pdbx.bcif import BinaryCIFFile
|
|
44
43
|
|
|
45
44
|
global _ccd_block
|
|
46
45
|
if _ccd_block is None:
|
|
@@ -104,7 +103,7 @@ def _index_residues(id_column):
|
|
|
104
103
|
# The final start is the exclusive stop of last residue
|
|
105
104
|
residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
|
|
106
105
|
index = {}
|
|
107
|
-
for i in range(len(residue_starts)-1):
|
|
106
|
+
for i in range(len(residue_starts) - 1):
|
|
108
107
|
comp_id = id_column[residue_starts[i]].item()
|
|
109
|
-
index[comp_id] = (residue_starts[i], residue_starts[i+1])
|
|
110
|
-
return index
|
|
108
|
+
index[comp_id] = (residue_starts[i], residue_starts[i + 1])
|
|
109
|
+
return index
|
biotite/structure/info/groups.py
CHANGED
|
@@ -7,8 +7,6 @@ __author__ = "Tom David Müller, Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
|
|
8
8
|
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
import copy
|
|
11
|
-
|
|
12
10
|
|
|
13
11
|
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
12
|
|
|
@@ -84,4 +82,4 @@ def _get_group_members(group_name):
|
|
|
84
82
|
if group_name not in group_lists:
|
|
85
83
|
with open(CCD_DIR / f"{group_name}.txt", "r") as file:
|
|
86
84
|
group_lists[group_name] = tuple(file.read().split())
|
|
87
|
-
return group_lists[group_name]
|
|
85
|
+
return group_lists[group_name]
|
biotite/structure/info/masses.py
CHANGED
|
@@ -8,9 +8,8 @@ __all__ = ["mass"]
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from
|
|
12
|
-
from .ccd import get_from_ccd
|
|
13
|
-
|
|
11
|
+
from biotite.structure.atoms import Atom, AtomArray, AtomArrayStack
|
|
12
|
+
from biotite.structure.info.ccd import get_from_ccd
|
|
14
13
|
|
|
15
14
|
# Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
|
|
16
15
|
ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
|
|
@@ -109,15 +108,11 @@ def mass(item, is_residue=None):
|
|
|
109
108
|
elif isinstance(item, Atom):
|
|
110
109
|
result_mass = mass(item.element, is_residue=False)
|
|
111
110
|
elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
|
|
112
|
-
result_mass = sum(
|
|
113
|
-
(mass(element, is_residue=False) for element in item.element)
|
|
114
|
-
)
|
|
111
|
+
result_mass = sum((mass(element, is_residue=False) for element in item.element))
|
|
115
112
|
|
|
116
113
|
else:
|
|
117
|
-
raise TypeError(
|
|
118
|
-
f"Cannot calculate mass for {type(item).__name__} objects"
|
|
119
|
-
)
|
|
114
|
+
raise TypeError(f"Cannot calculate mass for {type(item).__name__} objects")
|
|
120
115
|
|
|
121
116
|
if result_mass is None:
|
|
122
117
|
raise KeyError(f"{item} is not known")
|
|
123
|
-
return result_mass
|
|
118
|
+
return result_mass
|
biotite/structure/info/misc.py
CHANGED
|
@@ -6,7 +6,7 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
|
|
8
8
|
|
|
9
|
-
from .ccd import get_ccd, get_from_ccd
|
|
9
|
+
from biotite.structure.info.ccd import get_ccd, get_from_ccd
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def all_residues():
|