firecode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecode/TEST_NOTEBOOK.ipynb +3940 -0
- firecode/__init__.py +0 -0
- firecode/__main__.py +118 -0
- firecode/_gaussian.py +97 -0
- firecode/algebra.py +405 -0
- firecode/ase_manipulations.py +879 -0
- firecode/atropisomer_module.py +516 -0
- firecode/automep.py +130 -0
- firecode/calculators/__init__.py +29 -0
- firecode/calculators/_gaussian.py +98 -0
- firecode/calculators/_mopac.py +242 -0
- firecode/calculators/_openbabel.py +154 -0
- firecode/calculators/_orca.py +129 -0
- firecode/calculators/_xtb.py +786 -0
- firecode/concurrent_test.py +119 -0
- firecode/embedder.py +2590 -0
- firecode/embedder_options.py +577 -0
- firecode/embeds.py +881 -0
- firecode/errors.py +65 -0
- firecode/graph_manipulations.py +333 -0
- firecode/hypermolecule_class.py +364 -0
- firecode/mep_relaxer.py +199 -0
- firecode/modify_settings.py +186 -0
- firecode/mprof.py +65 -0
- firecode/multiembed.py +148 -0
- firecode/nci.py +186 -0
- firecode/numba_functions.py +260 -0
- firecode/operators.py +776 -0
- firecode/optimization_methods.py +609 -0
- firecode/parameters.py +84 -0
- firecode/pka.py +275 -0
- firecode/profiler.py +17 -0
- firecode/pruning.py +421 -0
- firecode/pt.py +32 -0
- firecode/quotes.json +6651 -0
- firecode/quotes.py +9 -0
- firecode/reactive_atoms_classes.py +666 -0
- firecode/references.py +11 -0
- firecode/rmsd.py +74 -0
- firecode/settings.py +75 -0
- firecode/solvents.py +126 -0
- firecode/tests/C2F2H4.xyz +10 -0
- firecode/tests/C2H4.xyz +8 -0
- firecode/tests/CH3Cl.xyz +7 -0
- firecode/tests/HCOOH.xyz +7 -0
- firecode/tests/HCOOOH.xyz +8 -0
- firecode/tests/chelotropic.txt +3 -0
- firecode/tests/cyclical.txt +3 -0
- firecode/tests/dihedral.txt +2 -0
- firecode/tests/string.txt +3 -0
- firecode/tests/trimolecular.txt +9 -0
- firecode/tests.py +151 -0
- firecode/torsion_module.py +1035 -0
- firecode/utils.py +541 -0
- firecode-1.0.0.dist-info/LICENSE +165 -0
- firecode-1.0.0.dist-info/METADATA +321 -0
- firecode-1.0.0.dist-info/RECORD +59 -0
- firecode-1.0.0.dist-info/WHEEL +5 -0
- firecode-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1035 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
'''
|
|
3
|
+
FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
|
|
4
|
+
Copyright (C) 2021-2024 Nicolò Tampellini
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: LGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU Lesser General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU Lesser General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU Lesser General Public License
|
|
19
|
+
along with this program. If not, see
|
|
20
|
+
https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
|
|
21
|
+
|
|
22
|
+
'''
|
|
23
|
+
import os
|
|
24
|
+
import time
|
|
25
|
+
from copy import deepcopy
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
from networkx import (connected_components, has_path, is_isomorphic,
|
|
29
|
+
shortest_path, subgraph)
|
|
30
|
+
from sklearn.cluster import KMeans, dbscan
|
|
31
|
+
|
|
32
|
+
from firecode.algebra import norm, norm_of, vec_angle
|
|
33
|
+
from firecode.errors import SegmentedGraphError
|
|
34
|
+
from firecode.graph_manipulations import (_get_phenyl_ids, findPaths, get_sp_n,
|
|
35
|
+
is_amide_n, is_ester_o, is_sp_n,
|
|
36
|
+
neighbors)
|
|
37
|
+
from firecode.hypermolecule_class import graphize
|
|
38
|
+
from firecode.numba_functions import prune_conformers_tfd, torsion_comp_check
|
|
39
|
+
from firecode.pt import pt
|
|
40
|
+
from firecode.rmsd import rmsd_and_max_numba
|
|
41
|
+
from firecode.settings import DEFAULT_FF_LEVELS, FF_CALC
|
|
42
|
+
from firecode.utils import (align_structures, cartesian_product, flatten,
|
|
43
|
+
get_double_bonds_indices, rotate_dihedral,
|
|
44
|
+
time_to_string, write_xyz)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Torsion:
|
|
48
|
+
def __repr__(self):
|
|
49
|
+
if hasattr(self, 'n_fold'):
|
|
50
|
+
return f'Torsion({self.i1}, {self.i2}, {self.i3}, {self.i4}; {self.n_fold}-fold)'
|
|
51
|
+
return f'Torsion({self.i1}, {self.i2}, {self.i3}, {self.i4})'
|
|
52
|
+
|
|
53
|
+
def __init__(self, i1, i2, i3, i4):
|
|
54
|
+
self.i1 = i1
|
|
55
|
+
self.i2 = i2
|
|
56
|
+
self.i3 = i3
|
|
57
|
+
self.i4 = i4
|
|
58
|
+
self.torsion = (i1, i2, i3 ,i4)
|
|
59
|
+
|
|
60
|
+
def in_cycle(self, graph):
|
|
61
|
+
'''
|
|
62
|
+
Returns True if the torsion is part of a cycle
|
|
63
|
+
'''
|
|
64
|
+
graph.remove_edge(self.i2, self.i3)
|
|
65
|
+
cyclical = has_path(graph, self.i1, self.i4)
|
|
66
|
+
graph.add_edge(self.i2, self.i3)
|
|
67
|
+
return cyclical
|
|
68
|
+
|
|
69
|
+
def is_rotable(self, graph, hydrogen_bonds, keepdummy=False) -> bool:
|
|
70
|
+
'''
|
|
71
|
+
hydrogen bonds: iterable with pairs of sorted atomic indices
|
|
72
|
+
'''
|
|
73
|
+
|
|
74
|
+
if sorted((self.i2, self.i3)) in hydrogen_bonds:
|
|
75
|
+
# self.n_fold = 6
|
|
76
|
+
# # This has to be an intermolecular HB: rotate it
|
|
77
|
+
# return True
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
if _is_free(self.i2, graph) or (
|
|
81
|
+
_is_free(self.i3, graph)):
|
|
82
|
+
|
|
83
|
+
if keepdummy or (
|
|
84
|
+
_is_nondummy(self.i2, self.i3, graph) and (
|
|
85
|
+
_is_nondummy(self.i3, self.i2, graph))):
|
|
86
|
+
|
|
87
|
+
self.n_fold = self.get_n_fold(graph)
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def get_n_fold(self, graph) -> int:
|
|
93
|
+
|
|
94
|
+
nums = (graph.nodes[self.i2]['atomnos'],
|
|
95
|
+
graph.nodes[self.i3]['atomnos'])
|
|
96
|
+
|
|
97
|
+
if 1 in nums:
|
|
98
|
+
return 6 # H-N, H-O hydrogen bonds
|
|
99
|
+
|
|
100
|
+
if is_amide_n(self.i2, graph, mode=2) or (
|
|
101
|
+
is_amide_n(self.i3, graph, mode=2)):
|
|
102
|
+
# tertiary amides rotations are 2-fold
|
|
103
|
+
return 2
|
|
104
|
+
|
|
105
|
+
if (6 in nums) or (7 in nums) or (16 in nums): # if C, N or S atoms
|
|
106
|
+
|
|
107
|
+
sp_n_i2 = get_sp_n(self.i2, graph)
|
|
108
|
+
sp_n_i3 = get_sp_n(self.i3, graph)
|
|
109
|
+
|
|
110
|
+
if 3 == sp_n_i2 == sp_n_i3:
|
|
111
|
+
return 3
|
|
112
|
+
|
|
113
|
+
if 3 in (sp_n_i2, sp_n_i3): # Csp3-X, Nsp3-X, Ssulfone-X
|
|
114
|
+
|
|
115
|
+
if self.mode == 'csearch':
|
|
116
|
+
return 3
|
|
117
|
+
|
|
118
|
+
elif self.mode == 'symmetry':
|
|
119
|
+
return sp_n_i3
|
|
120
|
+
|
|
121
|
+
if 2 in (sp_n_i2, sp_n_i3):
|
|
122
|
+
return 2
|
|
123
|
+
|
|
124
|
+
return 4 #O-O, S-S, Ar-Ar, Ar-CO, and everything else
|
|
125
|
+
|
|
126
|
+
def get_angles(self):
|
|
127
|
+
return {
|
|
128
|
+
2:(0, 180),
|
|
129
|
+
3:(0, 120, 240),
|
|
130
|
+
4:(0, 90, 180, 270),
|
|
131
|
+
6:(0, 60, 120, 180, 240, 300),
|
|
132
|
+
}[self.n_fold]
|
|
133
|
+
|
|
134
|
+
def sort_torsion(self, graph, constrained_indices) -> None:
|
|
135
|
+
'''
|
|
136
|
+
Acts on the self.torsion tuple leaving it as it is or
|
|
137
|
+
reversing it, so that the first index of it (from which
|
|
138
|
+
rotation will act) is external to the molecule constrained
|
|
139
|
+
indices. That is we make sure to rotate external groups
|
|
140
|
+
and not the whole structure.
|
|
141
|
+
'''
|
|
142
|
+
graph.remove_edge(self.i2, self.i3)
|
|
143
|
+
for d in constrained_indices.flatten():
|
|
144
|
+
if has_path(graph, self.i2, d):
|
|
145
|
+
self.torsion = tuple(reversed(self.torsion))
|
|
146
|
+
graph.add_edge(self.i2, self.i3)
|
|
147
|
+
|
|
148
|
+
def _is_free(index, graph):
|
|
149
|
+
'''
|
|
150
|
+
Return True if the index specified
|
|
151
|
+
satisfies all of the following:
|
|
152
|
+
- Is not a sp2 carbonyl carbon atom
|
|
153
|
+
- Is not the oxygen atom of an ester
|
|
154
|
+
- Is not the nitrogen atom of a secondary amide (CONHR)
|
|
155
|
+
|
|
156
|
+
'''
|
|
157
|
+
if all((
|
|
158
|
+
graph.nodes[index]['atomnos'] == 6,
|
|
159
|
+
is_sp_n(index, graph, 2),
|
|
160
|
+
8 in (graph.nodes[n]['atomnos'] for n in neighbors(graph, index))
|
|
161
|
+
)):
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
if is_amide_n(index, graph, mode=1):
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
if is_ester_o(index, graph):
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
return True
|
|
171
|
+
|
|
172
|
+
def _is_nondummy(i, root, graph) -> bool:
|
|
173
|
+
'''
|
|
174
|
+
Checks that a molecular rotation along the dihedral
|
|
175
|
+
angle (*, root, i, *) is non-dummy, that is the atom
|
|
176
|
+
at index i, in the direction opposite to the one leading
|
|
177
|
+
to root, has different substituents. i.e. methyl, CF3 and tBu
|
|
178
|
+
rotations should return False.
|
|
179
|
+
'''
|
|
180
|
+
|
|
181
|
+
if graph.nodes[i]['atomnos'] not in (6,7):
|
|
182
|
+
return True
|
|
183
|
+
# for now, we only discard rotations around carbon
|
|
184
|
+
# and nitrogen atoms, like methyl/tert-butyl/triphenyl
|
|
185
|
+
# and flat symmetrical rings like phenyl, N-pyrrolyl...
|
|
186
|
+
|
|
187
|
+
G = deepcopy(graph)
|
|
188
|
+
nb = neighbors(G, i)
|
|
189
|
+
nb.remove(root)
|
|
190
|
+
|
|
191
|
+
if len(nb) == 1:
|
|
192
|
+
if len(neighbors(G, nb[0])) == 2:
|
|
193
|
+
return False
|
|
194
|
+
# if node i has two bonds only (one with root and one with a)
|
|
195
|
+
# and the other atom (a) has two bonds only (one with i)
|
|
196
|
+
# the rotation is considered dummy: some other rotation
|
|
197
|
+
# will account for its freedom (i.e. alkynes, hydrogen bonds)
|
|
198
|
+
|
|
199
|
+
# check if it is a phenyl-like rotation
|
|
200
|
+
if len(nb) == 2:
|
|
201
|
+
|
|
202
|
+
# get the 6 indices of the aromatic atoms (i1-i6)
|
|
203
|
+
phenyl_indices = _get_phenyl_ids(i, G)
|
|
204
|
+
|
|
205
|
+
# compare the two halves of the 6-membered ring (indices i2-i3 region with i5-i6 region)
|
|
206
|
+
if phenyl_indices is not None:
|
|
207
|
+
i1, i2, i3, i4, i5, i6 = phenyl_indices
|
|
208
|
+
G.remove_edge(i3, i4)
|
|
209
|
+
G.remove_edge(i4, i5)
|
|
210
|
+
G.remove_edge(i1, i2)
|
|
211
|
+
G.remove_edge(i1, i6)
|
|
212
|
+
|
|
213
|
+
subgraphs = [subgraph(G, _set) for _set in connected_components(G)
|
|
214
|
+
if i2 in _set or i6 in _set]
|
|
215
|
+
|
|
216
|
+
if len(subgraphs) == 2:
|
|
217
|
+
return not is_isomorphic(subgraphs[0], subgraphs[1],
|
|
218
|
+
node_match=lambda n1, n2: n1['atomnos'] == n2['atomnos'])
|
|
219
|
+
|
|
220
|
+
# We should not end up here, but if we do, rotation should not be dummy
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
# if not, compare immediate neighbors of i
|
|
224
|
+
for n in nb:
|
|
225
|
+
G.remove_edge(i, n)
|
|
226
|
+
|
|
227
|
+
# make a set of each fragment around the chopped n-i bonds,
|
|
228
|
+
# but only for fragments that are not root nor contain other random,
|
|
229
|
+
# disconnected parts of the graph
|
|
230
|
+
subgraphs_nodes = [_set for _set in connected_components(G)
|
|
231
|
+
if root not in _set and any(
|
|
232
|
+
n in _set for n in nb
|
|
233
|
+
)]
|
|
234
|
+
|
|
235
|
+
if len(subgraphs_nodes) == 1:
|
|
236
|
+
return True
|
|
237
|
+
# if not, the torsion is likely to be rotable
|
|
238
|
+
# (tetramethylguanidyl alanine C(β)-N bond)
|
|
239
|
+
|
|
240
|
+
subgraphs = [subgraph(G, s) for s in subgraphs_nodes]
|
|
241
|
+
for sub in subgraphs[1:]:
|
|
242
|
+
if not is_isomorphic(subgraphs[0], sub,
|
|
243
|
+
node_match=lambda n1, n2: n1['atomnos'] == n2['atomnos']):
|
|
244
|
+
return True
|
|
245
|
+
# Care should be taken because chiral centers are not taken into account: a rotation
|
|
246
|
+
# involving an index where substituents only differ by stereochemistry, and where a
|
|
247
|
+
# rotation is not an element of symmetry of the subsystem, the rotation is considered
|
|
248
|
+
# dummy even if it would be more correct not to. For rotaionally corrected RMSD this
|
|
249
|
+
# should only cause small inefficiencies and not lead to discarding any good conformer.
|
|
250
|
+
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
def _get_hydrogen_bonds(coords, atomnos, graph, d_min=2.5, d_max=3.3, max_angle=45, fragments=None):
|
|
254
|
+
'''
|
|
255
|
+
Returns a list of tuples with the indices
|
|
256
|
+
of hydrogen bonding partners.
|
|
257
|
+
|
|
258
|
+
An HB is a pair of atoms:
|
|
259
|
+
- with one H and one X (N or O) atom
|
|
260
|
+
- with an Y-X distance between d_min and d_max (i.e. N-O, Angstroms)
|
|
261
|
+
- with an Y-H-X angle below max_angle (i.e. N-H-O, degrees)
|
|
262
|
+
|
|
263
|
+
If fragments is specified (iterable of iterable of indices for each fragment)
|
|
264
|
+
the function only returns inter-fragment hydrogen bonds.
|
|
265
|
+
'''
|
|
266
|
+
|
|
267
|
+
hbs = []
|
|
268
|
+
# initializing output list
|
|
269
|
+
|
|
270
|
+
het_idx = np.array([i for i, a in enumerate(atomnos) if a in (7,8)], dtype=int)
|
|
271
|
+
# indices where N or O atoms are present. Let's ignore F for now.
|
|
272
|
+
|
|
273
|
+
for i, i1 in enumerate(het_idx):
|
|
274
|
+
for i2 in het_idx[i+1:]:
|
|
275
|
+
|
|
276
|
+
if fragments is not None:
|
|
277
|
+
if any(((i1 in f and i2 in f) for f in fragments)):
|
|
278
|
+
continue
|
|
279
|
+
# if inter-fragment HBs are requested, skip intra-HBs
|
|
280
|
+
|
|
281
|
+
if d_min < norm_of(coords[i1]-coords[i2]) < d_max:
|
|
282
|
+
# getting all pairs of O/N atoms between these distances
|
|
283
|
+
|
|
284
|
+
Hs = [i for i in (neighbors(graph, i1) +
|
|
285
|
+
neighbors(graph, i2)) if graph.nodes[i]['atomnos'] == 1]
|
|
286
|
+
# getting the indices of all H atoms attached to them
|
|
287
|
+
|
|
288
|
+
versor = norm(coords[i2]-coords[i1])
|
|
289
|
+
# versor connectring the two Heteroatoms
|
|
290
|
+
|
|
291
|
+
for iH in Hs:
|
|
292
|
+
|
|
293
|
+
v1 = coords[iH]-coords[i1]
|
|
294
|
+
v2 = coords[iH]-coords[i2]
|
|
295
|
+
# vectors connecting heteroatoms to H
|
|
296
|
+
|
|
297
|
+
d1 = norm_of(v1)
|
|
298
|
+
d2 = norm_of(v2)
|
|
299
|
+
# lengths of these vectors
|
|
300
|
+
|
|
301
|
+
l1 = v1 @ versor
|
|
302
|
+
l2 = v2 @ -versor
|
|
303
|
+
# scalar projection in the heteroatom direction
|
|
304
|
+
|
|
305
|
+
alfa = vec_angle(v1, versor) if l1 < l2 else vec_angle(v2, -versor)
|
|
306
|
+
# largest planar angle between Het-H and Het-Het, in degrees (0 to 90°)
|
|
307
|
+
|
|
308
|
+
if alfa < max_angle:
|
|
309
|
+
# if the three atoms are not too far from being in line
|
|
310
|
+
|
|
311
|
+
if d1 < d2:
|
|
312
|
+
hbs.append(sorted((iH,i2)))
|
|
313
|
+
else:
|
|
314
|
+
hbs.append(sorted((iH,i1)))
|
|
315
|
+
# adding the correct pair of atoms to results
|
|
316
|
+
|
|
317
|
+
break
|
|
318
|
+
|
|
319
|
+
return hbs
|
|
320
|
+
|
|
321
|
+
def _get_rotation_mask(graph, torsion):
|
|
322
|
+
'''
|
|
323
|
+
Get mask for the atoms that will rotate in a torsion:
|
|
324
|
+
all the ones in the graph reachable from the last index
|
|
325
|
+
of the torsion but not going through the central two
|
|
326
|
+
atoms in the torsion quadruplet.
|
|
327
|
+
|
|
328
|
+
'''
|
|
329
|
+
_, i2, i3, i4 = torsion
|
|
330
|
+
|
|
331
|
+
graph.remove_edge(i2, i3)
|
|
332
|
+
reachable_indices = shortest_path(graph, i4).keys()
|
|
333
|
+
# get all indices reachable from i4 not going through i2-i3
|
|
334
|
+
|
|
335
|
+
graph.add_edge(i2, i3)
|
|
336
|
+
# restore modified graph
|
|
337
|
+
|
|
338
|
+
mask = np.array([i in reachable_indices for i in graph.nodes], dtype=bool)
|
|
339
|
+
# generate boolean mask
|
|
340
|
+
|
|
341
|
+
if np.count_nonzero(mask) > int(len(mask)/2):
|
|
342
|
+
mask = ~mask
|
|
343
|
+
# if we want to rotate more than half of the indices,
|
|
344
|
+
# invert the selection so that we do less math
|
|
345
|
+
|
|
346
|
+
mask[i3] = False
|
|
347
|
+
# do not rotate i3: would not move,
|
|
348
|
+
# since it lies on rotation axis
|
|
349
|
+
|
|
350
|
+
return mask
|
|
351
|
+
|
|
352
|
+
def _get_quadruplets(graph):
|
|
353
|
+
'''
|
|
354
|
+
Returns list of quadruplets that indicate potential torsions
|
|
355
|
+
'''
|
|
356
|
+
|
|
357
|
+
allpaths = []
|
|
358
|
+
for node in graph:
|
|
359
|
+
allpaths.extend(findPaths(graph, node, 3))
|
|
360
|
+
# get all possible continuous indices quadruplets
|
|
361
|
+
|
|
362
|
+
quadruplets, q_ids = [], []
|
|
363
|
+
for path in allpaths:
|
|
364
|
+
_, i2, i3, _ = path
|
|
365
|
+
q_id = tuple(sorted((i2, i3)))
|
|
366
|
+
|
|
367
|
+
if (q_id not in q_ids):
|
|
368
|
+
|
|
369
|
+
quadruplets.append(path)
|
|
370
|
+
q_ids.append(q_id)
|
|
371
|
+
|
|
372
|
+
# Yields non-redundant quadruplets
|
|
373
|
+
# Rejects (4,3,2,1) if (1,2,3,4) is present
|
|
374
|
+
|
|
375
|
+
return np.array(quadruplets)
|
|
376
|
+
|
|
377
|
+
def _get_torsions(graph, hydrogen_bonds, double_bonds, keepdummy=False, mode="csearch"):
|
|
378
|
+
'''
|
|
379
|
+
Returns list of Torsion objects
|
|
380
|
+
'''
|
|
381
|
+
|
|
382
|
+
torsions = []
|
|
383
|
+
for path in _get_quadruplets(graph):
|
|
384
|
+
_, i2, i3, _ = path
|
|
385
|
+
bt = tuple(sorted((i2, i3)))
|
|
386
|
+
|
|
387
|
+
if bt not in double_bonds:
|
|
388
|
+
t = Torsion(*path)
|
|
389
|
+
t.mode = mode
|
|
390
|
+
|
|
391
|
+
if (not t.in_cycle(graph)) and t.is_rotable(graph, hydrogen_bonds, keepdummy=keepdummy):
|
|
392
|
+
torsions.append(t)
|
|
393
|
+
# Create non-redundant torsion objects
|
|
394
|
+
# Rejects (4,3,2,1) if (1,2,3,4) is present
|
|
395
|
+
# Rejects torsions that do not represent a rotable bond
|
|
396
|
+
|
|
397
|
+
return torsions
|
|
398
|
+
|
|
399
|
+
def _group_torsions_dbscan(coords, torsions, max_size=5):
|
|
400
|
+
'''
|
|
401
|
+
'''
|
|
402
|
+
torsions_indices = [t.torsion for t in torsions]
|
|
403
|
+
# get torsion indices
|
|
404
|
+
|
|
405
|
+
torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
|
|
406
|
+
# compute spatial distance
|
|
407
|
+
|
|
408
|
+
for eps in np.arange(10, 1.5, -0.5):
|
|
409
|
+
labels = dbscan(torsions_centers, eps=eps, min_samples=1)[1]
|
|
410
|
+
n_clusters = max(labels) + 1
|
|
411
|
+
biggest_cluster_size = max([np.count_nonzero(labels==i) for i in set(labels)])
|
|
412
|
+
|
|
413
|
+
if biggest_cluster_size <= max_size:
|
|
414
|
+
break
|
|
415
|
+
|
|
416
|
+
output = [[] for _ in range(n_clusters)]
|
|
417
|
+
for torsion, cluster in zip(torsions, labels):
|
|
418
|
+
output[cluster].append(torsion)
|
|
419
|
+
|
|
420
|
+
output = sorted(output, key=len)
|
|
421
|
+
# largest groups last
|
|
422
|
+
|
|
423
|
+
return output
|
|
424
|
+
|
|
425
|
+
def random_csearch(
|
|
426
|
+
coords,
|
|
427
|
+
atomnos,
|
|
428
|
+
torsions,
|
|
429
|
+
graph,
|
|
430
|
+
constrained_indices=None,
|
|
431
|
+
n_out=100,
|
|
432
|
+
max_tries=10000,
|
|
433
|
+
rotations=None,
|
|
434
|
+
title='test',
|
|
435
|
+
logfunction=print,
|
|
436
|
+
interactive_print=True,
|
|
437
|
+
write_torsions=False
|
|
438
|
+
):
|
|
439
|
+
'''
|
|
440
|
+
Random dihedral rotations - quickly generate n_out conformers
|
|
441
|
+
|
|
442
|
+
n_out: number of output structures
|
|
443
|
+
max_tries: if n_out conformers are not generated after these number of tries, stop trying
|
|
444
|
+
rotations: number of dihedrals to rotate per conformer. If none, all will be rotated
|
|
445
|
+
'''
|
|
446
|
+
|
|
447
|
+
t_start_run = time.perf_counter()
|
|
448
|
+
|
|
449
|
+
############################################## LOG TORSIONS
|
|
450
|
+
|
|
451
|
+
logfunction('\n> Torsion list: (indices: n-fold)')
|
|
452
|
+
for i, t in enumerate(torsions):
|
|
453
|
+
logfunction(' {:2s} - {:21s} : {}{}{}{} : {}-fold'.format(
|
|
454
|
+
str(i),
|
|
455
|
+
str(t.torsion),
|
|
456
|
+
pt[atomnos[t.torsion[0]]].symbol,
|
|
457
|
+
pt[atomnos[t.torsion[1]]].symbol,
|
|
458
|
+
pt[atomnos[t.torsion[2]]].symbol,
|
|
459
|
+
pt[atomnos[t.torsion[3]]].symbol,
|
|
460
|
+
t.n_fold))
|
|
461
|
+
|
|
462
|
+
central_ids = set(flatten([t.torsion[1:3] for t in torsions], int))
|
|
463
|
+
logfunction(f'\n> Rotable bonds ids: {" ".join([str(i) for i in sorted(central_ids)])}')
|
|
464
|
+
|
|
465
|
+
if write_torsions:
|
|
466
|
+
_write_torsion_vmd(coords, atomnos, constrained_indices, [torsions], title=title)
|
|
467
|
+
# logging torsions to file
|
|
468
|
+
|
|
469
|
+
torsions_indices = [t.torsion for t in torsions]
|
|
470
|
+
torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
|
|
471
|
+
|
|
472
|
+
with open(f'{title}_torsion_centers.xyz', 'w') as f:
|
|
473
|
+
write_xyz(torsions_centers, np.array([3 for _ in torsions_centers]), f)
|
|
474
|
+
|
|
475
|
+
############################################## END LOG TORSIONS
|
|
476
|
+
|
|
477
|
+
logfunction(f'\n--> Random dihedral CSearch on {title}\n mode 2 (random) - {len(torsions)} torsions')
|
|
478
|
+
|
|
479
|
+
angles = cartesian_product(*[t.get_angles() for t in torsions])
|
|
480
|
+
# calculating the angles for rotation based on step values
|
|
481
|
+
|
|
482
|
+
if rotations is not None:
|
|
483
|
+
mask = (np.count_nonzero(angles, axis=1) == rotations)
|
|
484
|
+
angles = angles[mask]
|
|
485
|
+
|
|
486
|
+
np.random.shuffle(angles)
|
|
487
|
+
# shuffle them so we don't bias conformational sampling
|
|
488
|
+
|
|
489
|
+
new_structures = []
|
|
490
|
+
|
|
491
|
+
for a ,angle_set in enumerate(angles):
|
|
492
|
+
|
|
493
|
+
if interactive_print:
|
|
494
|
+
print(f'Generating conformers... ({round(len(new_structures)/n_out*100)} %) {" "*10}', end='\r')
|
|
495
|
+
|
|
496
|
+
# get a copy of the molecule position as a starting point
|
|
497
|
+
new_coords = np.copy(coords)
|
|
498
|
+
|
|
499
|
+
# initialize the number of bonds that actually rotate
|
|
500
|
+
rotated_bonds = 0
|
|
501
|
+
|
|
502
|
+
for t, torsion in enumerate(torsions):
|
|
503
|
+
angle = angle_set[t]
|
|
504
|
+
|
|
505
|
+
# for every angle we have to rotate, calculate the new coordinates
|
|
506
|
+
if angle != 0:
|
|
507
|
+
mask = _get_rotation_mask(graph, torsion.torsion)
|
|
508
|
+
temp_coords = rotate_dihedral(new_coords, torsion.torsion, angle, mask=mask)
|
|
509
|
+
|
|
510
|
+
# if these coordinates are bad and compenetration is present
|
|
511
|
+
if not torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
|
|
512
|
+
|
|
513
|
+
# back off five degrees
|
|
514
|
+
for _ in range(angle//5):
|
|
515
|
+
temp_coords = rotate_dihedral(temp_coords, torsion.torsion, -5, mask=mask)
|
|
516
|
+
|
|
517
|
+
# and reiterate until we have no more compenetrations,
|
|
518
|
+
# or until we have undone the previous rotation
|
|
519
|
+
if torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
|
|
520
|
+
# print(f'------> DEBUG - backed off {_*5}/{angle} degrees')
|
|
521
|
+
rotated_bonds += 1
|
|
522
|
+
break
|
|
523
|
+
|
|
524
|
+
else:
|
|
525
|
+
rotated_bonds += 1
|
|
526
|
+
|
|
527
|
+
# update the active coordinates with the temp ones
|
|
528
|
+
new_coords = temp_coords
|
|
529
|
+
|
|
530
|
+
# add the rotated molecule to the output list
|
|
531
|
+
if rotated_bonds != 0:
|
|
532
|
+
new_structures.append(new_coords)
|
|
533
|
+
|
|
534
|
+
# after adding a molecule to the output, check if we
|
|
535
|
+
# have reached the number of desired output structures
|
|
536
|
+
if len(new_structures) == n_out or a == max_tries:
|
|
537
|
+
break
|
|
538
|
+
|
|
539
|
+
# make an array out of them
|
|
540
|
+
new_structures = np.array(new_structures)
|
|
541
|
+
|
|
542
|
+
# Get a descriptor for how exhaustive the sampling has been
|
|
543
|
+
exhaustiveness = len(new_structures) / np.prod([t.n_fold for t in torsions])
|
|
544
|
+
|
|
545
|
+
logfunction(f' Generated {len(new_structures)} conformers, (est. {round(100*exhaustiveness, 2)} % of the total conformational space) - CSearch time {time_to_string(time.perf_counter()-t_start_run)}')
|
|
546
|
+
|
|
547
|
+
return new_structures
|
|
548
|
+
|
|
549
|
+
def csearch(
|
|
550
|
+
coords,
|
|
551
|
+
atomnos,
|
|
552
|
+
constrained_indices=None,
|
|
553
|
+
keep_hb=False,
|
|
554
|
+
ff_opt=False,
|
|
555
|
+
n=100,
|
|
556
|
+
n_out=100,
|
|
557
|
+
mode=1,
|
|
558
|
+
calc=None,
|
|
559
|
+
method=None,
|
|
560
|
+
title='test',
|
|
561
|
+
logfunction=print,
|
|
562
|
+
interactive_print=True,
|
|
563
|
+
write_torsions=False):
|
|
564
|
+
'''
|
|
565
|
+
n: number of structures to keep from each torsion cluster
|
|
566
|
+
mode: 0 - torsion clustered - keep the n lowest energy conformers
|
|
567
|
+
1 - torsion clustered - keep the n most diverse conformers
|
|
568
|
+
2 - random dihedral rotations - quickly generate n_out conformers
|
|
569
|
+
|
|
570
|
+
n_out: maximum number of output structures
|
|
571
|
+
|
|
572
|
+
keep_hb: whether to preserve the presence of current hydrogen bonds or not
|
|
573
|
+
'''
|
|
574
|
+
|
|
575
|
+
calc = FF_CALC if calc is None else calc
|
|
576
|
+
method = DEFAULT_FF_LEVELS[calc] if method is None else method
|
|
577
|
+
# Set default calculator attributes if user did not specify them
|
|
578
|
+
|
|
579
|
+
if constrained_indices is not None and len(constrained_indices) > 0:
|
|
580
|
+
logfunction(f'Constraining {len(constrained_indices)} distance{"s" if len(constrained_indices) > 1 else ""} - {constrained_indices}')
|
|
581
|
+
else:
|
|
582
|
+
logfunction('Free conformational search: no constraints provided.')
|
|
583
|
+
constrained_indices = np.array([])
|
|
584
|
+
|
|
585
|
+
graph = graphize(coords, atomnos)
|
|
586
|
+
for i1, i2 in constrained_indices:
|
|
587
|
+
graph.add_edge(i1, i2)
|
|
588
|
+
# build a molecular graph of the TS
|
|
589
|
+
# that includes constrained indices pairs
|
|
590
|
+
|
|
591
|
+
# ... and hydrogen bonding, if requested
|
|
592
|
+
if keep_hb:
|
|
593
|
+
hydrogen_bonds = _get_hydrogen_bonds(coords, atomnos, graph)
|
|
594
|
+
for hb in hydrogen_bonds:
|
|
595
|
+
graph.add_edge(*hb)
|
|
596
|
+
|
|
597
|
+
if hydrogen_bonds:
|
|
598
|
+
logfunction(f'Preserving {len(hydrogen_bonds)} hydrogen bonds - {hydrogen_bonds}')
|
|
599
|
+
else:
|
|
600
|
+
logfunction('No hydrogen bonds found.')
|
|
601
|
+
|
|
602
|
+
else:
|
|
603
|
+
hydrogen_bonds = []
|
|
604
|
+
# get informations on the intra/intermolecular hydrogen
|
|
605
|
+
# bonds that we should avoid disrupting
|
|
606
|
+
|
|
607
|
+
if len(fragments := list(connected_components(graph))) > 1:
|
|
608
|
+
# if the molecule graph is not made up of a single connected component
|
|
609
|
+
|
|
610
|
+
s = (f'{title} has a segmented connectivity graph: double check the input geometry.\n' +
|
|
611
|
+
'if this is supposed to be a complex, FIRECODE was not able to find hydrogen bonds\n' +
|
|
612
|
+
'connecting the molecules, and the algorithm is not designed to reliably perform\n'+
|
|
613
|
+
'conformational searches on loosely bound multimolecular arrangements.')
|
|
614
|
+
|
|
615
|
+
if keep_hb:
|
|
616
|
+
raise SegmentedGraphError(s)
|
|
617
|
+
# if we already looked for HBs, raise the error
|
|
618
|
+
|
|
619
|
+
hydrogen_bonds.extend(_get_hydrogen_bonds(coords, atomnos, graph, fragments=fragments))
|
|
620
|
+
# otherwise, look for INTERFRAGMENT HBs only
|
|
621
|
+
|
|
622
|
+
if not hydrogen_bonds:
|
|
623
|
+
raise SegmentedGraphError(s)
|
|
624
|
+
# if they are not present, raise error
|
|
625
|
+
|
|
626
|
+
for hb in hydrogen_bonds:
|
|
627
|
+
graph.add_edge(*hb)
|
|
628
|
+
|
|
629
|
+
if len(list(connected_components(graph))) > 1:
|
|
630
|
+
raise SegmentedGraphError(s)
|
|
631
|
+
# otherwise, add the new HBs linking the pieces
|
|
632
|
+
# and make sure that now we only have one connected component
|
|
633
|
+
|
|
634
|
+
double_bonds = get_double_bonds_indices(coords, atomnos)
|
|
635
|
+
# get all double bonds - do not rotate these
|
|
636
|
+
|
|
637
|
+
torsions = _get_torsions(graph, hydrogen_bonds, double_bonds)
|
|
638
|
+
# get all torsions that we should explore
|
|
639
|
+
|
|
640
|
+
for t in torsions:
|
|
641
|
+
t.sort_torsion(graph, constrained_indices)
|
|
642
|
+
# sort torsion indices so that first index of each torsion
|
|
643
|
+
# is the half that will move and is external to the structure
|
|
644
|
+
|
|
645
|
+
if not torsions:
|
|
646
|
+
logfunction(f'No rotable bonds found for {title}.')
|
|
647
|
+
return np.array([coords])
|
|
648
|
+
|
|
649
|
+
if mode in (0,1):
|
|
650
|
+
return clustered_csearch(
|
|
651
|
+
coords,
|
|
652
|
+
atomnos,
|
|
653
|
+
torsions,
|
|
654
|
+
graph,
|
|
655
|
+
constrained_indices=constrained_indices,
|
|
656
|
+
ff_opt=ff_opt,
|
|
657
|
+
n=n,
|
|
658
|
+
n_out=n_out,
|
|
659
|
+
mode=mode,
|
|
660
|
+
calc=calc,
|
|
661
|
+
method=method,
|
|
662
|
+
title=title,
|
|
663
|
+
logfunction=logfunction,
|
|
664
|
+
interactive_print=interactive_print,
|
|
665
|
+
write_torsions=write_torsions
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
return random_csearch(
|
|
669
|
+
coords,
|
|
670
|
+
atomnos,
|
|
671
|
+
torsions,
|
|
672
|
+
graph,
|
|
673
|
+
constrained_indices=constrained_indices,
|
|
674
|
+
n_out=n_out,
|
|
675
|
+
title=title,
|
|
676
|
+
logfunction=logfunction,
|
|
677
|
+
interactive_print=interactive_print,
|
|
678
|
+
write_torsions=write_torsions
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
def clustered_csearch(
|
|
682
|
+
coords,
|
|
683
|
+
atomnos,
|
|
684
|
+
torsions,
|
|
685
|
+
graph,
|
|
686
|
+
constrained_indices=None,
|
|
687
|
+
ff_opt=False,
|
|
688
|
+
n=100,
|
|
689
|
+
n_out=100,
|
|
690
|
+
mode=1,
|
|
691
|
+
calc=None,
|
|
692
|
+
method=None,
|
|
693
|
+
title='test',
|
|
694
|
+
logfunction=print,
|
|
695
|
+
interactive_print=True,
|
|
696
|
+
write_torsions=False):
|
|
697
|
+
'''
|
|
698
|
+
n: number of structures to keep from each torsion cluster
|
|
699
|
+
mode: 0 - torsion clustered - keep the n lowest energy conformers
|
|
700
|
+
1 - torsion clustered - keep the n most diverse conformers
|
|
701
|
+
|
|
702
|
+
n_out: maximum number of output structures
|
|
703
|
+
|
|
704
|
+
keep_hb: whether to preserve the presence of current hydrogen bonds or not
|
|
705
|
+
'''
|
|
706
|
+
|
|
707
|
+
assert mode != 0 or ff_opt, 'Either leave mode=1 or turn on force field optimization'
|
|
708
|
+
assert mode in (0,1), 'The mode keyword can only be 0 or 1'
|
|
709
|
+
|
|
710
|
+
t_start_run = time.perf_counter()
|
|
711
|
+
|
|
712
|
+
tag = ('stable', 'diverse')[mode]
|
|
713
|
+
# criteria to choose the best structure of each torsional cluster
|
|
714
|
+
|
|
715
|
+
if len(torsions) < 9:
|
|
716
|
+
grouped_torsions = [torsions]
|
|
717
|
+
|
|
718
|
+
else:
|
|
719
|
+
grouped_torsions = _group_torsions_dbscan(coords,
|
|
720
|
+
torsions,
|
|
721
|
+
max_size=3 if ff_opt else 5)
|
|
722
|
+
|
|
723
|
+
############################################## LOG TORSIONS
|
|
724
|
+
|
|
725
|
+
logfunction('\n> Torsion list: (indices: n-fold)')
|
|
726
|
+
for i, t in enumerate(torsions):
|
|
727
|
+
logfunction(' {} - {:21s} : {}-fold'.format(i, str(t.torsion), t.n_fold))
|
|
728
|
+
|
|
729
|
+
central_ids = set(flatten([t.torsion[1:3] for t in torsions], int))
|
|
730
|
+
logfunction(f'\n> Rotable bonds ids: {" ".join([str(i) for i in sorted(central_ids)])}')
|
|
731
|
+
|
|
732
|
+
if write_torsions:
|
|
733
|
+
_write_torsion_vmd(coords, atomnos, constrained_indices, grouped_torsions, title=title)
|
|
734
|
+
# logging torsions to file
|
|
735
|
+
|
|
736
|
+
torsions_indices = [t.torsion for t in torsions]
|
|
737
|
+
torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
|
|
738
|
+
|
|
739
|
+
with open(f'{title}_torsion_centers.xyz', 'w') as f:
|
|
740
|
+
write_xyz(torsions_centers, np.array([3 for _ in torsions_centers]), f)
|
|
741
|
+
|
|
742
|
+
############################################## END LOG TORSIONS
|
|
743
|
+
|
|
744
|
+
logfunction(f'\n--> Clustered CSearch on {title}\n mode {mode} ({"stability" if mode == 0 else "diversity"}) - ' +
|
|
745
|
+
f'{len(torsions)} torsions in {len(grouped_torsions)} group{"s" if len(grouped_torsions) != 1 else ""} - ' +
|
|
746
|
+
f'{[len(t) for t in grouped_torsions]}')
|
|
747
|
+
|
|
748
|
+
output_structures = []
|
|
749
|
+
starting_points = [coords]
|
|
750
|
+
for tg, torsions_group in enumerate(grouped_torsions):
|
|
751
|
+
|
|
752
|
+
angles = cartesian_product(*[t.get_angles() for t in torsions_group])
|
|
753
|
+
candidates = len(angles)*len(starting_points)
|
|
754
|
+
# calculating the angles for rotation based on step values
|
|
755
|
+
|
|
756
|
+
logfunction(f'\n> Group {tg+1}/{len(grouped_torsions)} - {len(torsions_group)} bonds, ' +
|
|
757
|
+
f'{[t.n_fold for t in torsions_group]} n-folds, {len(starting_points)} ' +
|
|
758
|
+
f'starting point{"s" if len(starting_points) > 1 else ""} = {candidates} conformers')
|
|
759
|
+
|
|
760
|
+
new_structures = []
|
|
761
|
+
|
|
762
|
+
for s, sp in enumerate(starting_points):
|
|
763
|
+
|
|
764
|
+
if interactive_print:
|
|
765
|
+
print(f'Generating conformers... ({round(s/len(starting_points)*100)} %) {" "*10}', end='\r')
|
|
766
|
+
|
|
767
|
+
new_structures.append(sp)
|
|
768
|
+
|
|
769
|
+
for angle_set in angles:
|
|
770
|
+
|
|
771
|
+
new_coords = np.copy(sp)
|
|
772
|
+
# get a copy of the molecule position as a starting point
|
|
773
|
+
|
|
774
|
+
rotated_bonds = 0
|
|
775
|
+
# initialize the number of bonds that actually rotate
|
|
776
|
+
|
|
777
|
+
for t, torsion in enumerate(torsions_group):
|
|
778
|
+
angle = angle_set[t]
|
|
779
|
+
|
|
780
|
+
if angle != 0:
|
|
781
|
+
mask = _get_rotation_mask(graph, torsion.torsion)
|
|
782
|
+
temp_coords = rotate_dihedral(new_coords, torsion.torsion, angle, mask=mask)
|
|
783
|
+
# for every angle we have to rotate, calculate the new coordinates
|
|
784
|
+
|
|
785
|
+
if not torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
|
|
786
|
+
# if these coordinates are bad and compenetration is present
|
|
787
|
+
|
|
788
|
+
for _ in range(angle//5):
|
|
789
|
+
temp_coords = rotate_dihedral(temp_coords, torsion.torsion, -5, mask=mask)
|
|
790
|
+
# back off five degrees
|
|
791
|
+
|
|
792
|
+
if torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
|
|
793
|
+
# print(f'------> DEBUG - backed off {_*5}/{angle} degrees')
|
|
794
|
+
rotated_bonds += 1
|
|
795
|
+
break
|
|
796
|
+
# and reiterate until we have no more compenetrations,
|
|
797
|
+
# or until we have undone the previous rotation
|
|
798
|
+
|
|
799
|
+
else:
|
|
800
|
+
rotated_bonds += 1
|
|
801
|
+
|
|
802
|
+
new_coords = temp_coords
|
|
803
|
+
# update the active coordinates with the temp ones
|
|
804
|
+
|
|
805
|
+
if rotated_bonds != 0:
|
|
806
|
+
new_structures.append(new_coords)
|
|
807
|
+
# add the rotated molecule to the output list
|
|
808
|
+
|
|
809
|
+
new_structures = np.array(new_structures)
|
|
810
|
+
torsion_array = np.array([t.torsion for t in torsions])
|
|
811
|
+
|
|
812
|
+
energies = None
|
|
813
|
+
if ff_opt:
|
|
814
|
+
|
|
815
|
+
t_start = time.perf_counter()
|
|
816
|
+
|
|
817
|
+
energies = np.zeros(new_structures.shape[0])
|
|
818
|
+
for c, new_coords in enumerate(np.copy(new_structures)):
|
|
819
|
+
|
|
820
|
+
from firecode.optimization_methods import optimize
|
|
821
|
+
opt_coords, energy, success = optimize(new_coords,
|
|
822
|
+
atomnos,
|
|
823
|
+
calc,
|
|
824
|
+
method=method,
|
|
825
|
+
constrained_indices=constrained_indices)
|
|
826
|
+
|
|
827
|
+
if success:
|
|
828
|
+
new_structures[c] = opt_coords
|
|
829
|
+
energies[c] = energy
|
|
830
|
+
|
|
831
|
+
else:
|
|
832
|
+
energies[c] = 1E10
|
|
833
|
+
|
|
834
|
+
logfunction(f'Optimized {len(new_structures)} structures at {method} level ({time_to_string(time.perf_counter()-t_start)})')
|
|
835
|
+
|
|
836
|
+
if tg+1 != len(grouped_torsions):
|
|
837
|
+
if n is not None and len(new_structures) > n:
|
|
838
|
+
|
|
839
|
+
if mode == 0:
|
|
840
|
+
new_structures, energies = zip(*sorted(zip(new_structures, energies), key=lambda x: x[1]))
|
|
841
|
+
new_structures = new_structures[0:n]
|
|
842
|
+
|
|
843
|
+
if mode == 1:
|
|
844
|
+
new_structures = most_diverse_conformers(n, new_structures, torsion_array,
|
|
845
|
+
energies=energies,
|
|
846
|
+
interactive_print=interactive_print)
|
|
847
|
+
|
|
848
|
+
logfunction(f' Kept the most {tag} {len(new_structures)} starting points for next rotation cluster')
|
|
849
|
+
|
|
850
|
+
output_structures.extend(new_structures)
|
|
851
|
+
starting_points = new_structures
|
|
852
|
+
|
|
853
|
+
output_structures = np.array(output_structures)
|
|
854
|
+
output_structures, _ = prune_conformers_tfd(output_structures, torsion_array)
|
|
855
|
+
|
|
856
|
+
if len(new_structures) > n_out:
|
|
857
|
+
|
|
858
|
+
if mode == 0:
|
|
859
|
+
output_structures, energies = zip(*sorted(zip(output_structures, energies), key=lambda x: x[1]))
|
|
860
|
+
output_structures = output_structures[0:n_out]
|
|
861
|
+
output_structures = np.array(output_structures)
|
|
862
|
+
|
|
863
|
+
if mode == 1:
|
|
864
|
+
output_structures = most_diverse_conformers(n_out, output_structures,
|
|
865
|
+
torsion_array=torsion_array,
|
|
866
|
+
energies=energies,
|
|
867
|
+
interactive_print=interactive_print)
|
|
868
|
+
|
|
869
|
+
exhaustiveness = len(output_structures) / np.prod([t.n_fold for t in torsions])
|
|
870
|
+
|
|
871
|
+
logfunction(f' Selected the {"best" if mode == 0 else "most diverse"} {len(output_structures)} conformers, corresponding\n' +
|
|
872
|
+
f' to about {round(100*exhaustiveness, 2)} % of the total conformational space - CSearch time {time_to_string(time.perf_counter()-t_start_run)}')
|
|
873
|
+
|
|
874
|
+
return output_structures
|
|
875
|
+
|
|
876
|
+
def most_diverse_conformers(n, structures, torsion_array, energies=None, interactive_print=False):
|
|
877
|
+
'''
|
|
878
|
+
Return the n most diverse structures from the set.
|
|
879
|
+
First removes similar structures based on torsional fingerprints, then divides them in n subsets and:
|
|
880
|
+
- If the enrgy list is given, chooses the
|
|
881
|
+
one with the lowest energy from each.
|
|
882
|
+
- If it is not, picks the most diverse structures.
|
|
883
|
+
|
|
884
|
+
'''
|
|
885
|
+
|
|
886
|
+
if len(structures) <= n:
|
|
887
|
+
return structures
|
|
888
|
+
# if we already pruned enough structures to meet the requirement, return them
|
|
889
|
+
|
|
890
|
+
if n > 300:
|
|
891
|
+
indices = np.sort(np.random.choice(len(structures), size=n))
|
|
892
|
+
return structures[indices]
|
|
893
|
+
# For now, the algorithm scales badly with number of clusters.
|
|
894
|
+
# If there are too many to compute, just choose randomly
|
|
895
|
+
|
|
896
|
+
if interactive_print:
|
|
897
|
+
print(f'Removing similar structures...{" "*10}', end='\r')
|
|
898
|
+
|
|
899
|
+
structures, _ = prune_conformers_tfd(structures, torsion_array)
|
|
900
|
+
# remove structrures with too similar TFPs
|
|
901
|
+
|
|
902
|
+
if len(structures) <= n:
|
|
903
|
+
return structures
|
|
904
|
+
# if we already pruned enough structures to meet the requirement, return them
|
|
905
|
+
|
|
906
|
+
if interactive_print:
|
|
907
|
+
print(f'Aligning structures...{" "*10}', end='\r')
|
|
908
|
+
|
|
909
|
+
structures = align_structures(structures)
|
|
910
|
+
features = structures.reshape((structures.shape[0], structures.shape[1]*structures.shape[2]))
|
|
911
|
+
# reduce the dimensionality of the rest of the structure array to cluster them with KMeans
|
|
912
|
+
|
|
913
|
+
if interactive_print:
|
|
914
|
+
print(f'Performing KMeans clustering...{" "*10}', end='\r')
|
|
915
|
+
|
|
916
|
+
kmeans = KMeans(n_clusters=n)
|
|
917
|
+
kmeans.fit(features)
|
|
918
|
+
# Generate and train the model
|
|
919
|
+
|
|
920
|
+
# if energies are given, pick the lowest energy structure from each cluster
|
|
921
|
+
if energies is not None:
|
|
922
|
+
clusters = [[] for _ in range(n)]
|
|
923
|
+
for coords, energy, c in zip(structures, energies, kmeans.labels_):
|
|
924
|
+
clusters[c].append((coords, energy))
|
|
925
|
+
|
|
926
|
+
output = []
|
|
927
|
+
for group in clusters:
|
|
928
|
+
sorted_s, _ = zip(*sorted(group, key=lambda x: x[1]))
|
|
929
|
+
output.append(sorted_s[0])
|
|
930
|
+
|
|
931
|
+
# if not, from each non-empty cluster yield the structure that is more distant from the other clusters
|
|
932
|
+
else:
|
|
933
|
+
centers = kmeans.cluster_centers_.reshape((n, *structures.shape[1:3]))
|
|
934
|
+
|
|
935
|
+
clusters = [[] for _ in range(n)]
|
|
936
|
+
for coords, c in zip(structures, kmeans.labels_):
|
|
937
|
+
clusters[c].append(coords)
|
|
938
|
+
|
|
939
|
+
r = np.arange(len(clusters))
|
|
940
|
+
output = []
|
|
941
|
+
|
|
942
|
+
# take one from each non-empty cluster
|
|
943
|
+
for cluster in clusters:
|
|
944
|
+
|
|
945
|
+
if cluster:
|
|
946
|
+
cumdists = [np.sum(np.linalg.norm(centers[r!=c]-ref, axis=2)) for c, ref in enumerate(cluster)]
|
|
947
|
+
|
|
948
|
+
furthest = cluster[cumdists.index(max(cumdists))]
|
|
949
|
+
output.append(furthest)
|
|
950
|
+
|
|
951
|
+
return np.array(output)
|
|
952
|
+
|
|
953
|
+
def _write_torsion_vmd(coords, atomnos, constrained_indices, grouped_torsions, title='test'):
|
|
954
|
+
|
|
955
|
+
with open(f'{title}.xyz', 'w') as f:
|
|
956
|
+
write_xyz(coords, atomnos, f)
|
|
957
|
+
|
|
958
|
+
path = os.path.join(os.getcwd(), f'{title}_torsional_clusters.vmd')
|
|
959
|
+
with open(path, 'w') as f:
|
|
960
|
+
s = ('display resetview\n' +
|
|
961
|
+
'mol new {%s}\n' % (os.path.join(os.getcwd() + f'\{title}.xyz')) +
|
|
962
|
+
'mol representation Lines 2\n' +
|
|
963
|
+
'mol color ColorID 16\n'
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
for group, color in zip(grouped_torsions, (7,9,10,11,29,16)):
|
|
967
|
+
for torsion in group:
|
|
968
|
+
s += ('mol selection index %s\n' % (' '.join([str(i) for i in torsion.torsion[1:-1]])) +
|
|
969
|
+
'mol representation CPK 0.7 0.5 50 50\n' +
|
|
970
|
+
f'mol color ColorID {color}\n' +
|
|
971
|
+
'mol material Transparent\n' +
|
|
972
|
+
'mol addrep top\n')
|
|
973
|
+
|
|
974
|
+
for a, b in constrained_indices:
|
|
975
|
+
s += f'label add Bonds 0/{a} 0/{b}\n'
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
f.write(s)
|
|
979
|
+
|
|
980
|
+
def rotationally_corrected_rmsd_and_max(ref, coord, atomnos, torsions, graph, angles, debugfunction=None):
|
|
981
|
+
|
|
982
|
+
torsion_corrections = [0 for _ in torsions]
|
|
983
|
+
|
|
984
|
+
# Now rotate every dummy torsion by the appropriate increment until we minimize local RMSD
|
|
985
|
+
for i, torsion in enumerate(torsions):
|
|
986
|
+
|
|
987
|
+
best_rmsd = 1E10
|
|
988
|
+
|
|
989
|
+
# for angle_set in combinations
|
|
990
|
+
# Look for the rotational angle set that minimizes the torsion RMSD and save it for later
|
|
991
|
+
for angle in angles[i]:
|
|
992
|
+
|
|
993
|
+
coord = rotate_dihedral(coord,
|
|
994
|
+
torsion,
|
|
995
|
+
angle,
|
|
996
|
+
indices_to_be_moved=[torsion[3]])
|
|
997
|
+
|
|
998
|
+
locally_corrected_rmsd, _ = rmsd_and_max_numba(ref[torsion], coord[torsion])
|
|
999
|
+
|
|
1000
|
+
if locally_corrected_rmsd < best_rmsd:
|
|
1001
|
+
best_rmsd = locally_corrected_rmsd
|
|
1002
|
+
torsion_corrections[i] = angle
|
|
1003
|
+
|
|
1004
|
+
# it is faster to undo the rotation rather than working with a copy of coords
|
|
1005
|
+
coord = rotate_dihedral(coord,
|
|
1006
|
+
torsion,
|
|
1007
|
+
-angle,
|
|
1008
|
+
indices_to_be_moved=[torsion[3]])
|
|
1009
|
+
|
|
1010
|
+
# now rotate that angle to the desired orientation before going to the next angle
|
|
1011
|
+
if torsion_corrections[i] != 0:
|
|
1012
|
+
coord = rotate_dihedral(coord,
|
|
1013
|
+
torsion,
|
|
1014
|
+
torsion_corrections[i],
|
|
1015
|
+
mask=_get_rotation_mask(graph, torsion))
|
|
1016
|
+
|
|
1017
|
+
if debugfunction is not None:
|
|
1018
|
+
debugfunction(f"Torsion {i+1} - {torsion}: best corr = {torsion_corrections[i]}°, 4-atom RMSD: " +
|
|
1019
|
+
f"{best_rmsd:.3f} A, global RMSD: {rmsd_and_max_numba(ref[(atomnos != 1)], coord[(atomnos != 1)])[0]:.3f}")
|
|
1020
|
+
|
|
1021
|
+
# we should have the optimal orientation on all torsions now:
|
|
1022
|
+
# calculate the RMSD (only on heavy atoms)
|
|
1023
|
+
rmsd, maxdev = rmsd_and_max_numba(ref[(atomnos != 1)], coord[(atomnos != 1)])
|
|
1024
|
+
|
|
1025
|
+
# since we could have segmented graphs, and therefore potentially only rotate
|
|
1026
|
+
# subsets of the graph where the torsion last two indices are,
|
|
1027
|
+
# we have to undo the final rotation too (would not be needed for connected graphs)
|
|
1028
|
+
for torsion, optimal_angle in zip(reversed(torsions), reversed(torsion_corrections)):
|
|
1029
|
+
coord = rotate_dihedral(coord,
|
|
1030
|
+
torsion,
|
|
1031
|
+
-optimal_angle,
|
|
1032
|
+
mask=_get_rotation_mask(graph, torsion))
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
return rmsd, maxdev
|