firecode 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. firecode/TEST_NOTEBOOK.ipynb +3940 -0
  2. firecode/__init__.py +0 -0
  3. firecode/__main__.py +118 -0
  4. firecode/_gaussian.py +97 -0
  5. firecode/algebra.py +405 -0
  6. firecode/ase_manipulations.py +879 -0
  7. firecode/atropisomer_module.py +516 -0
  8. firecode/automep.py +130 -0
  9. firecode/calculators/__init__.py +29 -0
  10. firecode/calculators/_gaussian.py +98 -0
  11. firecode/calculators/_mopac.py +242 -0
  12. firecode/calculators/_openbabel.py +154 -0
  13. firecode/calculators/_orca.py +129 -0
  14. firecode/calculators/_xtb.py +786 -0
  15. firecode/concurrent_test.py +119 -0
  16. firecode/embedder.py +2590 -0
  17. firecode/embedder_options.py +577 -0
  18. firecode/embeds.py +881 -0
  19. firecode/errors.py +65 -0
  20. firecode/graph_manipulations.py +333 -0
  21. firecode/hypermolecule_class.py +364 -0
  22. firecode/mep_relaxer.py +199 -0
  23. firecode/modify_settings.py +186 -0
  24. firecode/mprof.py +65 -0
  25. firecode/multiembed.py +148 -0
  26. firecode/nci.py +186 -0
  27. firecode/numba_functions.py +260 -0
  28. firecode/operators.py +776 -0
  29. firecode/optimization_methods.py +609 -0
  30. firecode/parameters.py +84 -0
  31. firecode/pka.py +275 -0
  32. firecode/profiler.py +17 -0
  33. firecode/pruning.py +421 -0
  34. firecode/pt.py +32 -0
  35. firecode/quotes.json +6651 -0
  36. firecode/quotes.py +9 -0
  37. firecode/reactive_atoms_classes.py +666 -0
  38. firecode/references.py +11 -0
  39. firecode/rmsd.py +74 -0
  40. firecode/settings.py +75 -0
  41. firecode/solvents.py +126 -0
  42. firecode/tests/C2F2H4.xyz +10 -0
  43. firecode/tests/C2H4.xyz +8 -0
  44. firecode/tests/CH3Cl.xyz +7 -0
  45. firecode/tests/HCOOH.xyz +7 -0
  46. firecode/tests/HCOOOH.xyz +8 -0
  47. firecode/tests/chelotropic.txt +3 -0
  48. firecode/tests/cyclical.txt +3 -0
  49. firecode/tests/dihedral.txt +2 -0
  50. firecode/tests/string.txt +3 -0
  51. firecode/tests/trimolecular.txt +9 -0
  52. firecode/tests.py +151 -0
  53. firecode/torsion_module.py +1035 -0
  54. firecode/utils.py +541 -0
  55. firecode-1.0.0.dist-info/LICENSE +165 -0
  56. firecode-1.0.0.dist-info/METADATA +321 -0
  57. firecode-1.0.0.dist-info/RECORD +59 -0
  58. firecode-1.0.0.dist-info/WHEEL +5 -0
  59. firecode-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1035 @@
1
+ # coding=utf-8
2
+ '''
3
+ FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
4
+ Copyright (C) 2021-2024 Nicolò Tampellini
5
+
6
+ SPDX-License-Identifier: LGPL-3.0-or-later
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU Lesser General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public License
19
+ along with this program. If not, see
20
+ https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
21
+
22
+ '''
23
+ import os
24
+ import time
25
+ from copy import deepcopy
26
+
27
+ import numpy as np
28
+ from networkx import (connected_components, has_path, is_isomorphic,
29
+ shortest_path, subgraph)
30
+ from sklearn.cluster import KMeans, dbscan
31
+
32
+ from firecode.algebra import norm, norm_of, vec_angle
33
+ from firecode.errors import SegmentedGraphError
34
+ from firecode.graph_manipulations import (_get_phenyl_ids, findPaths, get_sp_n,
35
+ is_amide_n, is_ester_o, is_sp_n,
36
+ neighbors)
37
+ from firecode.hypermolecule_class import graphize
38
+ from firecode.numba_functions import prune_conformers_tfd, torsion_comp_check
39
+ from firecode.pt import pt
40
+ from firecode.rmsd import rmsd_and_max_numba
41
+ from firecode.settings import DEFAULT_FF_LEVELS, FF_CALC
42
+ from firecode.utils import (align_structures, cartesian_product, flatten,
43
+ get_double_bonds_indices, rotate_dihedral,
44
+ time_to_string, write_xyz)
45
+
46
+
47
+ class Torsion:
48
+ def __repr__(self):
49
+ if hasattr(self, 'n_fold'):
50
+ return f'Torsion({self.i1}, {self.i2}, {self.i3}, {self.i4}; {self.n_fold}-fold)'
51
+ return f'Torsion({self.i1}, {self.i2}, {self.i3}, {self.i4})'
52
+
53
+ def __init__(self, i1, i2, i3, i4):
54
+ self.i1 = i1
55
+ self.i2 = i2
56
+ self.i3 = i3
57
+ self.i4 = i4
58
+ self.torsion = (i1, i2, i3 ,i4)
59
+
60
+ def in_cycle(self, graph):
61
+ '''
62
+ Returns True if the torsion is part of a cycle
63
+ '''
64
+ graph.remove_edge(self.i2, self.i3)
65
+ cyclical = has_path(graph, self.i1, self.i4)
66
+ graph.add_edge(self.i2, self.i3)
67
+ return cyclical
68
+
69
+ def is_rotable(self, graph, hydrogen_bonds, keepdummy=False) -> bool:
70
+ '''
71
+ hydrogen bonds: iterable with pairs of sorted atomic indices
72
+ '''
73
+
74
+ if sorted((self.i2, self.i3)) in hydrogen_bonds:
75
+ # self.n_fold = 6
76
+ # # This has to be an intermolecular HB: rotate it
77
+ # return True
78
+ return False
79
+
80
+ if _is_free(self.i2, graph) or (
81
+ _is_free(self.i3, graph)):
82
+
83
+ if keepdummy or (
84
+ _is_nondummy(self.i2, self.i3, graph) and (
85
+ _is_nondummy(self.i3, self.i2, graph))):
86
+
87
+ self.n_fold = self.get_n_fold(graph)
88
+ return True
89
+
90
+ return False
91
+
92
+ def get_n_fold(self, graph) -> int:
93
+
94
+ nums = (graph.nodes[self.i2]['atomnos'],
95
+ graph.nodes[self.i3]['atomnos'])
96
+
97
+ if 1 in nums:
98
+ return 6 # H-N, H-O hydrogen bonds
99
+
100
+ if is_amide_n(self.i2, graph, mode=2) or (
101
+ is_amide_n(self.i3, graph, mode=2)):
102
+ # tertiary amides rotations are 2-fold
103
+ return 2
104
+
105
+ if (6 in nums) or (7 in nums) or (16 in nums): # if C, N or S atoms
106
+
107
+ sp_n_i2 = get_sp_n(self.i2, graph)
108
+ sp_n_i3 = get_sp_n(self.i3, graph)
109
+
110
+ if 3 == sp_n_i2 == sp_n_i3:
111
+ return 3
112
+
113
+ if 3 in (sp_n_i2, sp_n_i3): # Csp3-X, Nsp3-X, Ssulfone-X
114
+
115
+ if self.mode == 'csearch':
116
+ return 3
117
+
118
+ elif self.mode == 'symmetry':
119
+ return sp_n_i3
120
+
121
+ if 2 in (sp_n_i2, sp_n_i3):
122
+ return 2
123
+
124
+ return 4 #O-O, S-S, Ar-Ar, Ar-CO, and everything else
125
+
126
+ def get_angles(self):
127
+ return {
128
+ 2:(0, 180),
129
+ 3:(0, 120, 240),
130
+ 4:(0, 90, 180, 270),
131
+ 6:(0, 60, 120, 180, 240, 300),
132
+ }[self.n_fold]
133
+
134
+ def sort_torsion(self, graph, constrained_indices) -> None:
135
+ '''
136
+ Acts on the self.torsion tuple leaving it as it is or
137
+ reversing it, so that the first index of it (from which
138
+ rotation will act) is external to the molecule constrained
139
+ indices. That is we make sure to rotate external groups
140
+ and not the whole structure.
141
+ '''
142
+ graph.remove_edge(self.i2, self.i3)
143
+ for d in constrained_indices.flatten():
144
+ if has_path(graph, self.i2, d):
145
+ self.torsion = tuple(reversed(self.torsion))
146
+ graph.add_edge(self.i2, self.i3)
147
+
148
+ def _is_free(index, graph):
149
+ '''
150
+ Return True if the index specified
151
+ satisfies all of the following:
152
+ - Is not a sp2 carbonyl carbon atom
153
+ - Is not the oxygen atom of an ester
154
+ - Is not the nitrogen atom of a secondary amide (CONHR)
155
+
156
+ '''
157
+ if all((
158
+ graph.nodes[index]['atomnos'] == 6,
159
+ is_sp_n(index, graph, 2),
160
+ 8 in (graph.nodes[n]['atomnos'] for n in neighbors(graph, index))
161
+ )):
162
+ return False
163
+
164
+ if is_amide_n(index, graph, mode=1):
165
+ return False
166
+
167
+ if is_ester_o(index, graph):
168
+ return False
169
+
170
+ return True
171
+
172
+ def _is_nondummy(i, root, graph) -> bool:
173
+ '''
174
+ Checks that a molecular rotation along the dihedral
175
+ angle (*, root, i, *) is non-dummy, that is the atom
176
+ at index i, in the direction opposite to the one leading
177
+ to root, has different substituents. i.e. methyl, CF3 and tBu
178
+ rotations should return False.
179
+ '''
180
+
181
+ if graph.nodes[i]['atomnos'] not in (6,7):
182
+ return True
183
+ # for now, we only discard rotations around carbon
184
+ # and nitrogen atoms, like methyl/tert-butyl/triphenyl
185
+ # and flat symmetrical rings like phenyl, N-pyrrolyl...
186
+
187
+ G = deepcopy(graph)
188
+ nb = neighbors(G, i)
189
+ nb.remove(root)
190
+
191
+ if len(nb) == 1:
192
+ if len(neighbors(G, nb[0])) == 2:
193
+ return False
194
+ # if node i has two bonds only (one with root and one with a)
195
+ # and the other atom (a) has two bonds only (one with i)
196
+ # the rotation is considered dummy: some other rotation
197
+ # will account for its freedom (i.e. alkynes, hydrogen bonds)
198
+
199
+ # check if it is a phenyl-like rotation
200
+ if len(nb) == 2:
201
+
202
+ # get the 6 indices of the aromatic atoms (i1-i6)
203
+ phenyl_indices = _get_phenyl_ids(i, G)
204
+
205
+ # compare the two halves of the 6-membered ring (indices i2-i3 region with i5-i6 region)
206
+ if phenyl_indices is not None:
207
+ i1, i2, i3, i4, i5, i6 = phenyl_indices
208
+ G.remove_edge(i3, i4)
209
+ G.remove_edge(i4, i5)
210
+ G.remove_edge(i1, i2)
211
+ G.remove_edge(i1, i6)
212
+
213
+ subgraphs = [subgraph(G, _set) for _set in connected_components(G)
214
+ if i2 in _set or i6 in _set]
215
+
216
+ if len(subgraphs) == 2:
217
+ return not is_isomorphic(subgraphs[0], subgraphs[1],
218
+ node_match=lambda n1, n2: n1['atomnos'] == n2['atomnos'])
219
+
220
+ # We should not end up here, but if we do, rotation should not be dummy
221
+ return True
222
+
223
+ # if not, compare immediate neighbors of i
224
+ for n in nb:
225
+ G.remove_edge(i, n)
226
+
227
+ # make a set of each fragment around the chopped n-i bonds,
228
+ # but only for fragments that are not root nor contain other random,
229
+ # disconnected parts of the graph
230
+ subgraphs_nodes = [_set for _set in connected_components(G)
231
+ if root not in _set and any(
232
+ n in _set for n in nb
233
+ )]
234
+
235
+ if len(subgraphs_nodes) == 1:
236
+ return True
237
+ # if not, the torsion is likely to be rotable
238
+ # (tetramethylguanidyl alanine C(β)-N bond)
239
+
240
+ subgraphs = [subgraph(G, s) for s in subgraphs_nodes]
241
+ for sub in subgraphs[1:]:
242
+ if not is_isomorphic(subgraphs[0], sub,
243
+ node_match=lambda n1, n2: n1['atomnos'] == n2['atomnos']):
244
+ return True
245
+ # Care should be taken because chiral centers are not taken into account: a rotation
246
+ # involving an index where substituents only differ by stereochemistry, and where a
247
+ # rotation is not an element of symmetry of the subsystem, the rotation is considered
248
+ # dummy even if it would be more correct not to. For rotaionally corrected RMSD this
249
+ # should only cause small inefficiencies and not lead to discarding any good conformer.
250
+
251
+ return False
252
+
253
+ def _get_hydrogen_bonds(coords, atomnos, graph, d_min=2.5, d_max=3.3, max_angle=45, fragments=None):
254
+ '''
255
+ Returns a list of tuples with the indices
256
+ of hydrogen bonding partners.
257
+
258
+ An HB is a pair of atoms:
259
+ - with one H and one X (N or O) atom
260
+ - with an Y-X distance between d_min and d_max (i.e. N-O, Angstroms)
261
+ - with an Y-H-X angle below max_angle (i.e. N-H-O, degrees)
262
+
263
+ If fragments is specified (iterable of iterable of indices for each fragment)
264
+ the function only returns inter-fragment hydrogen bonds.
265
+ '''
266
+
267
+ hbs = []
268
+ # initializing output list
269
+
270
+ het_idx = np.array([i for i, a in enumerate(atomnos) if a in (7,8)], dtype=int)
271
+ # indices where N or O atoms are present. Let's ignore F for now.
272
+
273
+ for i, i1 in enumerate(het_idx):
274
+ for i2 in het_idx[i+1:]:
275
+
276
+ if fragments is not None:
277
+ if any(((i1 in f and i2 in f) for f in fragments)):
278
+ continue
279
+ # if inter-fragment HBs are requested, skip intra-HBs
280
+
281
+ if d_min < norm_of(coords[i1]-coords[i2]) < d_max:
282
+ # getting all pairs of O/N atoms between these distances
283
+
284
+ Hs = [i for i in (neighbors(graph, i1) +
285
+ neighbors(graph, i2)) if graph.nodes[i]['atomnos'] == 1]
286
+ # getting the indices of all H atoms attached to them
287
+
288
+ versor = norm(coords[i2]-coords[i1])
289
+ # versor connectring the two Heteroatoms
290
+
291
+ for iH in Hs:
292
+
293
+ v1 = coords[iH]-coords[i1]
294
+ v2 = coords[iH]-coords[i2]
295
+ # vectors connecting heteroatoms to H
296
+
297
+ d1 = norm_of(v1)
298
+ d2 = norm_of(v2)
299
+ # lengths of these vectors
300
+
301
+ l1 = v1 @ versor
302
+ l2 = v2 @ -versor
303
+ # scalar projection in the heteroatom direction
304
+
305
+ alfa = vec_angle(v1, versor) if l1 < l2 else vec_angle(v2, -versor)
306
+ # largest planar angle between Het-H and Het-Het, in degrees (0 to 90°)
307
+
308
+ if alfa < max_angle:
309
+ # if the three atoms are not too far from being in line
310
+
311
+ if d1 < d2:
312
+ hbs.append(sorted((iH,i2)))
313
+ else:
314
+ hbs.append(sorted((iH,i1)))
315
+ # adding the correct pair of atoms to results
316
+
317
+ break
318
+
319
+ return hbs
320
+
321
+ def _get_rotation_mask(graph, torsion):
322
+ '''
323
+ Get mask for the atoms that will rotate in a torsion:
324
+ all the ones in the graph reachable from the last index
325
+ of the torsion but not going through the central two
326
+ atoms in the torsion quadruplet.
327
+
328
+ '''
329
+ _, i2, i3, i4 = torsion
330
+
331
+ graph.remove_edge(i2, i3)
332
+ reachable_indices = shortest_path(graph, i4).keys()
333
+ # get all indices reachable from i4 not going through i2-i3
334
+
335
+ graph.add_edge(i2, i3)
336
+ # restore modified graph
337
+
338
+ mask = np.array([i in reachable_indices for i in graph.nodes], dtype=bool)
339
+ # generate boolean mask
340
+
341
+ if np.count_nonzero(mask) > int(len(mask)/2):
342
+ mask = ~mask
343
+ # if we want to rotate more than half of the indices,
344
+ # invert the selection so that we do less math
345
+
346
+ mask[i3] = False
347
+ # do not rotate i3: would not move,
348
+ # since it lies on rotation axis
349
+
350
+ return mask
351
+
352
+ def _get_quadruplets(graph):
353
+ '''
354
+ Returns list of quadruplets that indicate potential torsions
355
+ '''
356
+
357
+ allpaths = []
358
+ for node in graph:
359
+ allpaths.extend(findPaths(graph, node, 3))
360
+ # get all possible continuous indices quadruplets
361
+
362
+ quadruplets, q_ids = [], []
363
+ for path in allpaths:
364
+ _, i2, i3, _ = path
365
+ q_id = tuple(sorted((i2, i3)))
366
+
367
+ if (q_id not in q_ids):
368
+
369
+ quadruplets.append(path)
370
+ q_ids.append(q_id)
371
+
372
+ # Yields non-redundant quadruplets
373
+ # Rejects (4,3,2,1) if (1,2,3,4) is present
374
+
375
+ return np.array(quadruplets)
376
+
377
+ def _get_torsions(graph, hydrogen_bonds, double_bonds, keepdummy=False, mode="csearch"):
378
+ '''
379
+ Returns list of Torsion objects
380
+ '''
381
+
382
+ torsions = []
383
+ for path in _get_quadruplets(graph):
384
+ _, i2, i3, _ = path
385
+ bt = tuple(sorted((i2, i3)))
386
+
387
+ if bt not in double_bonds:
388
+ t = Torsion(*path)
389
+ t.mode = mode
390
+
391
+ if (not t.in_cycle(graph)) and t.is_rotable(graph, hydrogen_bonds, keepdummy=keepdummy):
392
+ torsions.append(t)
393
+ # Create non-redundant torsion objects
394
+ # Rejects (4,3,2,1) if (1,2,3,4) is present
395
+ # Rejects torsions that do not represent a rotable bond
396
+
397
+ return torsions
398
+
399
+ def _group_torsions_dbscan(coords, torsions, max_size=5):
400
+ '''
401
+ '''
402
+ torsions_indices = [t.torsion for t in torsions]
403
+ # get torsion indices
404
+
405
+ torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
406
+ # compute spatial distance
407
+
408
+ for eps in np.arange(10, 1.5, -0.5):
409
+ labels = dbscan(torsions_centers, eps=eps, min_samples=1)[1]
410
+ n_clusters = max(labels) + 1
411
+ biggest_cluster_size = max([np.count_nonzero(labels==i) for i in set(labels)])
412
+
413
+ if biggest_cluster_size <= max_size:
414
+ break
415
+
416
+ output = [[] for _ in range(n_clusters)]
417
+ for torsion, cluster in zip(torsions, labels):
418
+ output[cluster].append(torsion)
419
+
420
+ output = sorted(output, key=len)
421
+ # largest groups last
422
+
423
+ return output
424
+
425
+ def random_csearch(
426
+ coords,
427
+ atomnos,
428
+ torsions,
429
+ graph,
430
+ constrained_indices=None,
431
+ n_out=100,
432
+ max_tries=10000,
433
+ rotations=None,
434
+ title='test',
435
+ logfunction=print,
436
+ interactive_print=True,
437
+ write_torsions=False
438
+ ):
439
+ '''
440
+ Random dihedral rotations - quickly generate n_out conformers
441
+
442
+ n_out: number of output structures
443
+ max_tries: if n_out conformers are not generated after these number of tries, stop trying
444
+ rotations: number of dihedrals to rotate per conformer. If none, all will be rotated
445
+ '''
446
+
447
+ t_start_run = time.perf_counter()
448
+
449
+ ############################################## LOG TORSIONS
450
+
451
+ logfunction('\n> Torsion list: (indices: n-fold)')
452
+ for i, t in enumerate(torsions):
453
+ logfunction(' {:2s} - {:21s} : {}{}{}{} : {}-fold'.format(
454
+ str(i),
455
+ str(t.torsion),
456
+ pt[atomnos[t.torsion[0]]].symbol,
457
+ pt[atomnos[t.torsion[1]]].symbol,
458
+ pt[atomnos[t.torsion[2]]].symbol,
459
+ pt[atomnos[t.torsion[3]]].symbol,
460
+ t.n_fold))
461
+
462
+ central_ids = set(flatten([t.torsion[1:3] for t in torsions], int))
463
+ logfunction(f'\n> Rotable bonds ids: {" ".join([str(i) for i in sorted(central_ids)])}')
464
+
465
+ if write_torsions:
466
+ _write_torsion_vmd(coords, atomnos, constrained_indices, [torsions], title=title)
467
+ # logging torsions to file
468
+
469
+ torsions_indices = [t.torsion for t in torsions]
470
+ torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
471
+
472
+ with open(f'{title}_torsion_centers.xyz', 'w') as f:
473
+ write_xyz(torsions_centers, np.array([3 for _ in torsions_centers]), f)
474
+
475
+ ############################################## END LOG TORSIONS
476
+
477
+ logfunction(f'\n--> Random dihedral CSearch on {title}\n mode 2 (random) - {len(torsions)} torsions')
478
+
479
+ angles = cartesian_product(*[t.get_angles() for t in torsions])
480
+ # calculating the angles for rotation based on step values
481
+
482
+ if rotations is not None:
483
+ mask = (np.count_nonzero(angles, axis=1) == rotations)
484
+ angles = angles[mask]
485
+
486
+ np.random.shuffle(angles)
487
+ # shuffle them so we don't bias conformational sampling
488
+
489
+ new_structures = []
490
+
491
+ for a ,angle_set in enumerate(angles):
492
+
493
+ if interactive_print:
494
+ print(f'Generating conformers... ({round(len(new_structures)/n_out*100)} %) {" "*10}', end='\r')
495
+
496
+ # get a copy of the molecule position as a starting point
497
+ new_coords = np.copy(coords)
498
+
499
+ # initialize the number of bonds that actually rotate
500
+ rotated_bonds = 0
501
+
502
+ for t, torsion in enumerate(torsions):
503
+ angle = angle_set[t]
504
+
505
+ # for every angle we have to rotate, calculate the new coordinates
506
+ if angle != 0:
507
+ mask = _get_rotation_mask(graph, torsion.torsion)
508
+ temp_coords = rotate_dihedral(new_coords, torsion.torsion, angle, mask=mask)
509
+
510
+ # if these coordinates are bad and compenetration is present
511
+ if not torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
512
+
513
+ # back off five degrees
514
+ for _ in range(angle//5):
515
+ temp_coords = rotate_dihedral(temp_coords, torsion.torsion, -5, mask=mask)
516
+
517
+ # and reiterate until we have no more compenetrations,
518
+ # or until we have undone the previous rotation
519
+ if torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
520
+ # print(f'------> DEBUG - backed off {_*5}/{angle} degrees')
521
+ rotated_bonds += 1
522
+ break
523
+
524
+ else:
525
+ rotated_bonds += 1
526
+
527
+ # update the active coordinates with the temp ones
528
+ new_coords = temp_coords
529
+
530
+ # add the rotated molecule to the output list
531
+ if rotated_bonds != 0:
532
+ new_structures.append(new_coords)
533
+
534
+ # after adding a molecule to the output, check if we
535
+ # have reached the number of desired output structures
536
+ if len(new_structures) == n_out or a == max_tries:
537
+ break
538
+
539
+ # make an array out of them
540
+ new_structures = np.array(new_structures)
541
+
542
+ # Get a descriptor for how exhaustive the sampling has been
543
+ exhaustiveness = len(new_structures) / np.prod([t.n_fold for t in torsions])
544
+
545
+ logfunction(f' Generated {len(new_structures)} conformers, (est. {round(100*exhaustiveness, 2)} % of the total conformational space) - CSearch time {time_to_string(time.perf_counter()-t_start_run)}')
546
+
547
+ return new_structures
548
+
549
+ def csearch(
550
+ coords,
551
+ atomnos,
552
+ constrained_indices=None,
553
+ keep_hb=False,
554
+ ff_opt=False,
555
+ n=100,
556
+ n_out=100,
557
+ mode=1,
558
+ calc=None,
559
+ method=None,
560
+ title='test',
561
+ logfunction=print,
562
+ interactive_print=True,
563
+ write_torsions=False):
564
+ '''
565
+ n: number of structures to keep from each torsion cluster
566
+ mode: 0 - torsion clustered - keep the n lowest energy conformers
567
+ 1 - torsion clustered - keep the n most diverse conformers
568
+ 2 - random dihedral rotations - quickly generate n_out conformers
569
+
570
+ n_out: maximum number of output structures
571
+
572
+ keep_hb: whether to preserve the presence of current hydrogen bonds or not
573
+ '''
574
+
575
+ calc = FF_CALC if calc is None else calc
576
+ method = DEFAULT_FF_LEVELS[calc] if method is None else method
577
+ # Set default calculator attributes if user did not specify them
578
+
579
+ if constrained_indices is not None and len(constrained_indices) > 0:
580
+ logfunction(f'Constraining {len(constrained_indices)} distance{"s" if len(constrained_indices) > 1 else ""} - {constrained_indices}')
581
+ else:
582
+ logfunction('Free conformational search: no constraints provided.')
583
+ constrained_indices = np.array([])
584
+
585
+ graph = graphize(coords, atomnos)
586
+ for i1, i2 in constrained_indices:
587
+ graph.add_edge(i1, i2)
588
+ # build a molecular graph of the TS
589
+ # that includes constrained indices pairs
590
+
591
+ # ... and hydrogen bonding, if requested
592
+ if keep_hb:
593
+ hydrogen_bonds = _get_hydrogen_bonds(coords, atomnos, graph)
594
+ for hb in hydrogen_bonds:
595
+ graph.add_edge(*hb)
596
+
597
+ if hydrogen_bonds:
598
+ logfunction(f'Preserving {len(hydrogen_bonds)} hydrogen bonds - {hydrogen_bonds}')
599
+ else:
600
+ logfunction('No hydrogen bonds found.')
601
+
602
+ else:
603
+ hydrogen_bonds = []
604
+ # get informations on the intra/intermolecular hydrogen
605
+ # bonds that we should avoid disrupting
606
+
607
+ if len(fragments := list(connected_components(graph))) > 1:
608
+ # if the molecule graph is not made up of a single connected component
609
+
610
+ s = (f'{title} has a segmented connectivity graph: double check the input geometry.\n' +
611
+ 'if this is supposed to be a complex, FIRECODE was not able to find hydrogen bonds\n' +
612
+ 'connecting the molecules, and the algorithm is not designed to reliably perform\n'+
613
+ 'conformational searches on loosely bound multimolecular arrangements.')
614
+
615
+ if keep_hb:
616
+ raise SegmentedGraphError(s)
617
+ # if we already looked for HBs, raise the error
618
+
619
+ hydrogen_bonds.extend(_get_hydrogen_bonds(coords, atomnos, graph, fragments=fragments))
620
+ # otherwise, look for INTERFRAGMENT HBs only
621
+
622
+ if not hydrogen_bonds:
623
+ raise SegmentedGraphError(s)
624
+ # if they are not present, raise error
625
+
626
+ for hb in hydrogen_bonds:
627
+ graph.add_edge(*hb)
628
+
629
+ if len(list(connected_components(graph))) > 1:
630
+ raise SegmentedGraphError(s)
631
+ # otherwise, add the new HBs linking the pieces
632
+ # and make sure that now we only have one connected component
633
+
634
+ double_bonds = get_double_bonds_indices(coords, atomnos)
635
+ # get all double bonds - do not rotate these
636
+
637
+ torsions = _get_torsions(graph, hydrogen_bonds, double_bonds)
638
+ # get all torsions that we should explore
639
+
640
+ for t in torsions:
641
+ t.sort_torsion(graph, constrained_indices)
642
+ # sort torsion indices so that first index of each torsion
643
+ # is the half that will move and is external to the structure
644
+
645
+ if not torsions:
646
+ logfunction(f'No rotable bonds found for {title}.')
647
+ return np.array([coords])
648
+
649
+ if mode in (0,1):
650
+ return clustered_csearch(
651
+ coords,
652
+ atomnos,
653
+ torsions,
654
+ graph,
655
+ constrained_indices=constrained_indices,
656
+ ff_opt=ff_opt,
657
+ n=n,
658
+ n_out=n_out,
659
+ mode=mode,
660
+ calc=calc,
661
+ method=method,
662
+ title=title,
663
+ logfunction=logfunction,
664
+ interactive_print=interactive_print,
665
+ write_torsions=write_torsions
666
+ )
667
+
668
+ return random_csearch(
669
+ coords,
670
+ atomnos,
671
+ torsions,
672
+ graph,
673
+ constrained_indices=constrained_indices,
674
+ n_out=n_out,
675
+ title=title,
676
+ logfunction=logfunction,
677
+ interactive_print=interactive_print,
678
+ write_torsions=write_torsions
679
+ )
680
+
681
+ def clustered_csearch(
682
+ coords,
683
+ atomnos,
684
+ torsions,
685
+ graph,
686
+ constrained_indices=None,
687
+ ff_opt=False,
688
+ n=100,
689
+ n_out=100,
690
+ mode=1,
691
+ calc=None,
692
+ method=None,
693
+ title='test',
694
+ logfunction=print,
695
+ interactive_print=True,
696
+ write_torsions=False):
697
+ '''
698
+ n: number of structures to keep from each torsion cluster
699
+ mode: 0 - torsion clustered - keep the n lowest energy conformers
700
+ 1 - torsion clustered - keep the n most diverse conformers
701
+
702
+ n_out: maximum number of output structures
703
+
704
+ keep_hb: whether to preserve the presence of current hydrogen bonds or not
705
+ '''
706
+
707
+ assert mode != 0 or ff_opt, 'Either leave mode=1 or turn on force field optimization'
708
+ assert mode in (0,1), 'The mode keyword can only be 0 or 1'
709
+
710
+ t_start_run = time.perf_counter()
711
+
712
+ tag = ('stable', 'diverse')[mode]
713
+ # criteria to choose the best structure of each torsional cluster
714
+
715
+ if len(torsions) < 9:
716
+ grouped_torsions = [torsions]
717
+
718
+ else:
719
+ grouped_torsions = _group_torsions_dbscan(coords,
720
+ torsions,
721
+ max_size=3 if ff_opt else 5)
722
+
723
+ ############################################## LOG TORSIONS
724
+
725
+ logfunction('\n> Torsion list: (indices: n-fold)')
726
+ for i, t in enumerate(torsions):
727
+ logfunction(' {} - {:21s} : {}-fold'.format(i, str(t.torsion), t.n_fold))
728
+
729
+ central_ids = set(flatten([t.torsion[1:3] for t in torsions], int))
730
+ logfunction(f'\n> Rotable bonds ids: {" ".join([str(i) for i in sorted(central_ids)])}')
731
+
732
+ if write_torsions:
733
+ _write_torsion_vmd(coords, atomnos, constrained_indices, grouped_torsions, title=title)
734
+ # logging torsions to file
735
+
736
+ torsions_indices = [t.torsion for t in torsions]
737
+ torsions_centers = np.array([np.mean((coords[i2], coords[i3]), axis=0) for _, i2, i3, _ in torsions_indices])
738
+
739
+ with open(f'{title}_torsion_centers.xyz', 'w') as f:
740
+ write_xyz(torsions_centers, np.array([3 for _ in torsions_centers]), f)
741
+
742
+ ############################################## END LOG TORSIONS
743
+
744
+ logfunction(f'\n--> Clustered CSearch on {title}\n mode {mode} ({"stability" if mode == 0 else "diversity"}) - ' +
745
+ f'{len(torsions)} torsions in {len(grouped_torsions)} group{"s" if len(grouped_torsions) != 1 else ""} - ' +
746
+ f'{[len(t) for t in grouped_torsions]}')
747
+
748
+ output_structures = []
749
+ starting_points = [coords]
750
+ for tg, torsions_group in enumerate(grouped_torsions):
751
+
752
+ angles = cartesian_product(*[t.get_angles() for t in torsions_group])
753
+ candidates = len(angles)*len(starting_points)
754
+ # calculating the angles for rotation based on step values
755
+
756
+ logfunction(f'\n> Group {tg+1}/{len(grouped_torsions)} - {len(torsions_group)} bonds, ' +
757
+ f'{[t.n_fold for t in torsions_group]} n-folds, {len(starting_points)} ' +
758
+ f'starting point{"s" if len(starting_points) > 1 else ""} = {candidates} conformers')
759
+
760
+ new_structures = []
761
+
762
+ for s, sp in enumerate(starting_points):
763
+
764
+ if interactive_print:
765
+ print(f'Generating conformers... ({round(s/len(starting_points)*100)} %) {" "*10}', end='\r')
766
+
767
+ new_structures.append(sp)
768
+
769
+ for angle_set in angles:
770
+
771
+ new_coords = np.copy(sp)
772
+ # get a copy of the molecule position as a starting point
773
+
774
+ rotated_bonds = 0
775
+ # initialize the number of bonds that actually rotate
776
+
777
+ for t, torsion in enumerate(torsions_group):
778
+ angle = angle_set[t]
779
+
780
+ if angle != 0:
781
+ mask = _get_rotation_mask(graph, torsion.torsion)
782
+ temp_coords = rotate_dihedral(new_coords, torsion.torsion, angle, mask=mask)
783
+ # for every angle we have to rotate, calculate the new coordinates
784
+
785
+ if not torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
786
+ # if these coordinates are bad and compenetration is present
787
+
788
+ for _ in range(angle//5):
789
+ temp_coords = rotate_dihedral(temp_coords, torsion.torsion, -5, mask=mask)
790
+ # back off five degrees
791
+
792
+ if torsion_comp_check(temp_coords, torsion=torsion.torsion, mask=mask, thresh=1.5):
793
+ # print(f'------> DEBUG - backed off {_*5}/{angle} degrees')
794
+ rotated_bonds += 1
795
+ break
796
+ # and reiterate until we have no more compenetrations,
797
+ # or until we have undone the previous rotation
798
+
799
+ else:
800
+ rotated_bonds += 1
801
+
802
+ new_coords = temp_coords
803
+ # update the active coordinates with the temp ones
804
+
805
+ if rotated_bonds != 0:
806
+ new_structures.append(new_coords)
807
+ # add the rotated molecule to the output list
808
+
809
+ new_structures = np.array(new_structures)
810
+ torsion_array = np.array([t.torsion for t in torsions])
811
+
812
+ energies = None
813
+ if ff_opt:
814
+
815
+ t_start = time.perf_counter()
816
+
817
+ energies = np.zeros(new_structures.shape[0])
818
+ for c, new_coords in enumerate(np.copy(new_structures)):
819
+
820
+ from firecode.optimization_methods import optimize
821
+ opt_coords, energy, success = optimize(new_coords,
822
+ atomnos,
823
+ calc,
824
+ method=method,
825
+ constrained_indices=constrained_indices)
826
+
827
+ if success:
828
+ new_structures[c] = opt_coords
829
+ energies[c] = energy
830
+
831
+ else:
832
+ energies[c] = 1E10
833
+
834
+ logfunction(f'Optimized {len(new_structures)} structures at {method} level ({time_to_string(time.perf_counter()-t_start)})')
835
+
836
+ if tg+1 != len(grouped_torsions):
837
+ if n is not None and len(new_structures) > n:
838
+
839
+ if mode == 0:
840
+ new_structures, energies = zip(*sorted(zip(new_structures, energies), key=lambda x: x[1]))
841
+ new_structures = new_structures[0:n]
842
+
843
+ if mode == 1:
844
+ new_structures = most_diverse_conformers(n, new_structures, torsion_array,
845
+ energies=energies,
846
+ interactive_print=interactive_print)
847
+
848
+ logfunction(f' Kept the most {tag} {len(new_structures)} starting points for next rotation cluster')
849
+
850
+ output_structures.extend(new_structures)
851
+ starting_points = new_structures
852
+
853
+ output_structures = np.array(output_structures)
854
+ output_structures, _ = prune_conformers_tfd(output_structures, torsion_array)
855
+
856
+ if len(new_structures) > n_out:
857
+
858
+ if mode == 0:
859
+ output_structures, energies = zip(*sorted(zip(output_structures, energies), key=lambda x: x[1]))
860
+ output_structures = output_structures[0:n_out]
861
+ output_structures = np.array(output_structures)
862
+
863
+ if mode == 1:
864
+ output_structures = most_diverse_conformers(n_out, output_structures,
865
+ torsion_array=torsion_array,
866
+ energies=energies,
867
+ interactive_print=interactive_print)
868
+
869
+ exhaustiveness = len(output_structures) / np.prod([t.n_fold for t in torsions])
870
+
871
+ logfunction(f' Selected the {"best" if mode == 0 else "most diverse"} {len(output_structures)} conformers, corresponding\n' +
872
+ f' to about {round(100*exhaustiveness, 2)} % of the total conformational space - CSearch time {time_to_string(time.perf_counter()-t_start_run)}')
873
+
874
+ return output_structures
875
+
876
+ def most_diverse_conformers(n, structures, torsion_array, energies=None, interactive_print=False):
877
+ '''
878
+ Return the n most diverse structures from the set.
879
+ First removes similar structures based on torsional fingerprints, then divides them in n subsets and:
880
+ - If the enrgy list is given, chooses the
881
+ one with the lowest energy from each.
882
+ - If it is not, picks the most diverse structures.
883
+
884
+ '''
885
+
886
+ if len(structures) <= n:
887
+ return structures
888
+ # if we already pruned enough structures to meet the requirement, return them
889
+
890
+ if n > 300:
891
+ indices = np.sort(np.random.choice(len(structures), size=n))
892
+ return structures[indices]
893
+ # For now, the algorithm scales badly with number of clusters.
894
+ # If there are too many to compute, just choose randomly
895
+
896
+ if interactive_print:
897
+ print(f'Removing similar structures...{" "*10}', end='\r')
898
+
899
+ structures, _ = prune_conformers_tfd(structures, torsion_array)
900
+ # remove structrures with too similar TFPs
901
+
902
+ if len(structures) <= n:
903
+ return structures
904
+ # if we already pruned enough structures to meet the requirement, return them
905
+
906
+ if interactive_print:
907
+ print(f'Aligning structures...{" "*10}', end='\r')
908
+
909
+ structures = align_structures(structures)
910
+ features = structures.reshape((structures.shape[0], structures.shape[1]*structures.shape[2]))
911
+ # reduce the dimensionality of the rest of the structure array to cluster them with KMeans
912
+
913
+ if interactive_print:
914
+ print(f'Performing KMeans clustering...{" "*10}', end='\r')
915
+
916
+ kmeans = KMeans(n_clusters=n)
917
+ kmeans.fit(features)
918
+ # Generate and train the model
919
+
920
+ # if energies are given, pick the lowest energy structure from each cluster
921
+ if energies is not None:
922
+ clusters = [[] for _ in range(n)]
923
+ for coords, energy, c in zip(structures, energies, kmeans.labels_):
924
+ clusters[c].append((coords, energy))
925
+
926
+ output = []
927
+ for group in clusters:
928
+ sorted_s, _ = zip(*sorted(group, key=lambda x: x[1]))
929
+ output.append(sorted_s[0])
930
+
931
+ # if not, from each non-empty cluster yield the structure that is more distant from the other clusters
932
+ else:
933
+ centers = kmeans.cluster_centers_.reshape((n, *structures.shape[1:3]))
934
+
935
+ clusters = [[] for _ in range(n)]
936
+ for coords, c in zip(structures, kmeans.labels_):
937
+ clusters[c].append(coords)
938
+
939
+ r = np.arange(len(clusters))
940
+ output = []
941
+
942
+ # take one from each non-empty cluster
943
+ for cluster in clusters:
944
+
945
+ if cluster:
946
+ cumdists = [np.sum(np.linalg.norm(centers[r!=c]-ref, axis=2)) for c, ref in enumerate(cluster)]
947
+
948
+ furthest = cluster[cumdists.index(max(cumdists))]
949
+ output.append(furthest)
950
+
951
+ return np.array(output)
952
+
953
+ def _write_torsion_vmd(coords, atomnos, constrained_indices, grouped_torsions, title='test'):
954
+
955
+ with open(f'{title}.xyz', 'w') as f:
956
+ write_xyz(coords, atomnos, f)
957
+
958
+ path = os.path.join(os.getcwd(), f'{title}_torsional_clusters.vmd')
959
+ with open(path, 'w') as f:
960
+ s = ('display resetview\n' +
961
+ 'mol new {%s}\n' % (os.path.join(os.getcwd() + f'\{title}.xyz')) +
962
+ 'mol representation Lines 2\n' +
963
+ 'mol color ColorID 16\n'
964
+ )
965
+
966
+ for group, color in zip(grouped_torsions, (7,9,10,11,29,16)):
967
+ for torsion in group:
968
+ s += ('mol selection index %s\n' % (' '.join([str(i) for i in torsion.torsion[1:-1]])) +
969
+ 'mol representation CPK 0.7 0.5 50 50\n' +
970
+ f'mol color ColorID {color}\n' +
971
+ 'mol material Transparent\n' +
972
+ 'mol addrep top\n')
973
+
974
+ for a, b in constrained_indices:
975
+ s += f'label add Bonds 0/{a} 0/{b}\n'
976
+
977
+
978
+ f.write(s)
979
+
980
+ def rotationally_corrected_rmsd_and_max(ref, coord, atomnos, torsions, graph, angles, debugfunction=None):
981
+
982
+ torsion_corrections = [0 for _ in torsions]
983
+
984
+ # Now rotate every dummy torsion by the appropriate increment until we minimize local RMSD
985
+ for i, torsion in enumerate(torsions):
986
+
987
+ best_rmsd = 1E10
988
+
989
+ # for angle_set in combinations
990
+ # Look for the rotational angle set that minimizes the torsion RMSD and save it for later
991
+ for angle in angles[i]:
992
+
993
+ coord = rotate_dihedral(coord,
994
+ torsion,
995
+ angle,
996
+ indices_to_be_moved=[torsion[3]])
997
+
998
+ locally_corrected_rmsd, _ = rmsd_and_max_numba(ref[torsion], coord[torsion])
999
+
1000
+ if locally_corrected_rmsd < best_rmsd:
1001
+ best_rmsd = locally_corrected_rmsd
1002
+ torsion_corrections[i] = angle
1003
+
1004
+ # it is faster to undo the rotation rather than working with a copy of coords
1005
+ coord = rotate_dihedral(coord,
1006
+ torsion,
1007
+ -angle,
1008
+ indices_to_be_moved=[torsion[3]])
1009
+
1010
+ # now rotate that angle to the desired orientation before going to the next angle
1011
+ if torsion_corrections[i] != 0:
1012
+ coord = rotate_dihedral(coord,
1013
+ torsion,
1014
+ torsion_corrections[i],
1015
+ mask=_get_rotation_mask(graph, torsion))
1016
+
1017
+ if debugfunction is not None:
1018
+ debugfunction(f"Torsion {i+1} - {torsion}: best corr = {torsion_corrections[i]}°, 4-atom RMSD: " +
1019
+ f"{best_rmsd:.3f} A, global RMSD: {rmsd_and_max_numba(ref[(atomnos != 1)], coord[(atomnos != 1)])[0]:.3f}")
1020
+
1021
+ # we should have the optimal orientation on all torsions now:
1022
+ # calculate the RMSD (only on heavy atoms)
1023
+ rmsd, maxdev = rmsd_and_max_numba(ref[(atomnos != 1)], coord[(atomnos != 1)])
1024
+
1025
+ # since we could have segmented graphs, and therefore potentially only rotate
1026
+ # subsets of the graph where the torsion last two indices are,
1027
+ # we have to undo the final rotation too (would not be needed for connected graphs)
1028
+ for torsion, optimal_angle in zip(reversed(torsions), reversed(torsion_corrections)):
1029
+ coord = rotate_dihedral(coord,
1030
+ torsion,
1031
+ -optimal_angle,
1032
+ mask=_get_rotation_mask(graph, torsion))
1033
+
1034
+
1035
+ return rmsd, maxdev