firecode 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. firecode/TEST_NOTEBOOK.ipynb +3940 -0
  2. firecode/__init__.py +0 -0
  3. firecode/__main__.py +118 -0
  4. firecode/_gaussian.py +97 -0
  5. firecode/algebra.py +405 -0
  6. firecode/ase_manipulations.py +879 -0
  7. firecode/atropisomer_module.py +516 -0
  8. firecode/automep.py +130 -0
  9. firecode/calculators/__init__.py +29 -0
  10. firecode/calculators/_gaussian.py +98 -0
  11. firecode/calculators/_mopac.py +242 -0
  12. firecode/calculators/_openbabel.py +154 -0
  13. firecode/calculators/_orca.py +129 -0
  14. firecode/calculators/_xtb.py +786 -0
  15. firecode/concurrent_test.py +119 -0
  16. firecode/embedder.py +2590 -0
  17. firecode/embedder_options.py +577 -0
  18. firecode/embeds.py +881 -0
  19. firecode/errors.py +65 -0
  20. firecode/graph_manipulations.py +333 -0
  21. firecode/hypermolecule_class.py +364 -0
  22. firecode/mep_relaxer.py +199 -0
  23. firecode/modify_settings.py +186 -0
  24. firecode/mprof.py +65 -0
  25. firecode/multiembed.py +148 -0
  26. firecode/nci.py +186 -0
  27. firecode/numba_functions.py +260 -0
  28. firecode/operators.py +776 -0
  29. firecode/optimization_methods.py +609 -0
  30. firecode/parameters.py +84 -0
  31. firecode/pka.py +275 -0
  32. firecode/profiler.py +17 -0
  33. firecode/pruning.py +421 -0
  34. firecode/pt.py +32 -0
  35. firecode/quotes.json +6651 -0
  36. firecode/quotes.py +9 -0
  37. firecode/reactive_atoms_classes.py +666 -0
  38. firecode/references.py +11 -0
  39. firecode/rmsd.py +74 -0
  40. firecode/settings.py +75 -0
  41. firecode/solvents.py +126 -0
  42. firecode/tests/C2F2H4.xyz +10 -0
  43. firecode/tests/C2H4.xyz +8 -0
  44. firecode/tests/CH3Cl.xyz +7 -0
  45. firecode/tests/HCOOH.xyz +7 -0
  46. firecode/tests/HCOOOH.xyz +8 -0
  47. firecode/tests/chelotropic.txt +3 -0
  48. firecode/tests/cyclical.txt +3 -0
  49. firecode/tests/dihedral.txt +2 -0
  50. firecode/tests/string.txt +3 -0
  51. firecode/tests/trimolecular.txt +9 -0
  52. firecode/tests.py +151 -0
  53. firecode/torsion_module.py +1035 -0
  54. firecode/utils.py +541 -0
  55. firecode-1.0.0.dist-info/LICENSE +165 -0
  56. firecode-1.0.0.dist-info/METADATA +321 -0
  57. firecode-1.0.0.dist-info/RECORD +59 -0
  58. firecode-1.0.0.dist-info/WHEEL +5 -0
  59. firecode-1.0.0.dist-info/top_level.txt +1 -0
firecode/operators.py ADDED
@@ -0,0 +1,776 @@
1
+ # coding=utf-8
2
+ '''
3
+ FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
4
+ Copyright (C) 2021-2024 Nicolò Tampellini
5
+
6
+ SPDX-License-Identifier: LGPL-3.0-or-later
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU Lesser General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public License
19
+ along with this program. If not, see
20
+ https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
21
+
22
+ '''
23
+
24
+ # import pickle
25
+ import time
26
+ from subprocess import CalledProcessError
27
+
28
+ import numpy as np
29
+
30
+ from firecode.ase_manipulations import ase_saddle
31
+ from firecode.atropisomer_module import dihedral_scan
32
+ from firecode.automep import automep
33
+ from firecode.calculators._xtb import crest_mtd_search
34
+ from firecode.errors import FatalError, InputError
35
+ from firecode.graph_manipulations import graphize
36
+ from firecode.mep_relaxer import ase_mep_relax
37
+ from firecode.numba_functions import prune_conformers_tfd
38
+ from firecode.optimization_methods import _refine_structures, optimize
39
+ from firecode.pka import pka_routine
40
+ from firecode.pruning import prune_by_rmsd, prune_by_rmsd_rot_corr
41
+ from firecode.settings import (CALCULATOR, DEFAULT_FF_LEVELS, DEFAULT_LEVELS,
42
+ FF_CALC, FF_OPT_BOOL, PROCS)
43
+ from firecode.torsion_module import _get_quadruplets, csearch
44
+ from firecode.utils import (align_structures, get_scan_peak_index,
45
+ molecule_check, read_xyz, time_to_string, write_xyz)
46
+
47
+
48
+ def operate(input_string, embedder):
49
+ '''
50
+ Perform the operations according to the chosen
51
+ operator and return the outname of the (new) .xyz
52
+ file to read instead of the input one.
53
+ '''
54
+
55
+ filename = embedder._extract_filename(input_string)
56
+
57
+ if not hasattr(embedder, "t_start_run"):
58
+ embedder.t_start_run = time.perf_counter()
59
+
60
+ if embedder.options.dryrun:
61
+ embedder.log(f'--> Dry run requested: skipping operator \"{input_string}\"')
62
+ return filename
63
+
64
+ elif 'csearch>' in input_string:
65
+ outname = csearch_operator(filename, embedder)
66
+
67
+ elif 'opt>' in input_string:
68
+ outname = opt_operator(filename,
69
+ embedder,
70
+ logfunction=embedder.log)
71
+
72
+ elif 'csearch_hb>' in input_string:
73
+ outname = csearch_operator(filename, embedder, keep_hb=True)
74
+
75
+ elif 'rsearch>' in input_string:
76
+ outname = csearch_operator(filename, embedder, mode=2)
77
+
78
+ elif any(string in input_string for string in ('mtd_search>', 'mtd>')):
79
+ outname = mtd_search_operator(filename, embedder)
80
+
81
+ elif 'saddle>' in input_string:
82
+ saddle_operator(filename, embedder)
83
+ embedder.normal_termination()
84
+
85
+ elif 'scan>' in input_string:
86
+ scan_operator(filename, embedder)
87
+ outname = filename
88
+
89
+ elif 'automep>' in input_string:
90
+ automep(embedder, n_images=embedder.options.images if hasattr(embedder.options, 'images') else 9)
91
+ # neb_operator(automep_filename, embedder)
92
+ # embedder.normal_termination()
93
+
94
+ elif 'neb>' in input_string:
95
+ neb_operator(filename, embedder)
96
+ embedder.normal_termination()
97
+
98
+ elif 'refine>' in input_string:
99
+ outname = filename
100
+ # this operator is accounted for in the OptionSetter
101
+ # class of Options, set when the Embedder calls _set_options
102
+
103
+ elif 'pka>' in input_string:
104
+ pka_routine(filename, embedder)
105
+ outname = filename
106
+
107
+ elif 'mep_relax>' in input_string:
108
+
109
+ data = read_xyz(filename)
110
+
111
+ # can implement a smart safety feature that is
112
+ # disabled when some bonds have to be let free to break
113
+ no_bonds_breaking = True
114
+
115
+ if no_bonds_breaking:
116
+
117
+ mep, _, exit_status = ase_mep_relax(
118
+ embedder,
119
+ data.atomcoords,
120
+ data.atomnos,
121
+ title=embedder.stamp+"_safe",
122
+ n_images=embedder.options.images if hasattr(embedder.options, 'images') else None,
123
+ logfunction=embedder.log,
124
+ write_plot=True,
125
+ verbose_print=True,
126
+ safe=True
127
+ )
128
+
129
+ else:
130
+ mep = data.atomcoords
131
+ exit_status = True
132
+
133
+ if exit_status:
134
+
135
+ if no_bonds_breaking:
136
+ print("--> Completed safe optimization, relaxing bond distance constraints.")
137
+
138
+ ase_mep_relax(
139
+ embedder,
140
+ mep,
141
+ data.atomnos,
142
+ title=embedder.stamp,
143
+ n_images=embedder.options.images if hasattr(embedder.options, 'images') else None,
144
+ logfunction=embedder.log,
145
+ write_plot=True,
146
+ verbose_print=True,
147
+ safe=True
148
+ )
149
+
150
+ embedder.normal_termination()
151
+
152
+ else:
153
+ op = input_string.split('>')[0]
154
+ raise Exception(f'Operator {op} not recognized.')
155
+
156
+ return outname
157
+
158
+ def csearch_operator(filename, embedder, keep_hb=False, mode=1):
159
+ '''
160
+ '''
161
+
162
+ s = f'--> Performing conformational search on {filename}'
163
+ if keep_hb:
164
+ s += ' (preserving current hydrogen bonds)'
165
+ embedder.log(s)
166
+
167
+ # t_start = time.perf_counter()
168
+
169
+ data = read_xyz(filename)
170
+
171
+ if len(data.atomcoords) > 1:
172
+ embedder.log('Requested conformational search on multimolecular file - will do\n' +
173
+ 'an individual search from each conformer (might be time-consuming).')
174
+
175
+ # calc, method, procs = _get_lowest_calc(embedder)
176
+ conformers = []
177
+
178
+ for i, coords in enumerate(data.atomcoords):
179
+
180
+ # opt_coords = optimize(coords, data.atomnos, calculator=calc, method=method, procs=procs)[0] if embedder.options.optimization else coords
181
+ opt_coords = coords
182
+ # optimize starting structure before running csearch
183
+
184
+ conf_batch = csearch(
185
+ opt_coords,
186
+ data.atomnos,
187
+ constrained_indices=_get_internal_constraints(filename, embedder),
188
+ keep_hb=keep_hb,
189
+ mode=mode,
190
+ n_out=embedder.options.max_confs//len(data.atomcoords),
191
+ title=f'{filename}_conf{i}',
192
+ logfunction=embedder.log,
193
+ write_torsions=embedder.options.debug
194
+ )
195
+ # generate the most diverse conformers starting from optimized geometry
196
+
197
+ conformers.extend(conf_batch)
198
+
199
+ conformers = np.concatenate(conformers)
200
+ # batch_size = conformers.shape[1]
201
+
202
+ conformers = conformers.reshape(-1, data.atomnos.shape[0], 3)
203
+ # merging structures from each run in a single array
204
+
205
+ # if embedder.embed is not None:
206
+ # embedder.log(f'\nSelected the most diverse {batch_size} out of {conformers.shape[0]} conformers for {filename} ({time_to_string(time.perf_counter()-t_start)})')
207
+ # conformers = most_diverse_conformers(batch_size, conformers)
208
+
209
+ print(f'Writing conformers to file...{" "*10}', end='\r')
210
+
211
+ confname = filename[:-4] + '_confs.xyz'
212
+ with open(confname, 'w') as f:
213
+ for i, conformer in enumerate(conformers):
214
+ write_xyz(conformer, data.atomnos, f, title=f'Generated conformer {i}')
215
+
216
+ print(f'{" "*30}', end='\r')
217
+
218
+ embedder.log('\n')
219
+
220
+ return confname
221
+
222
+ def opt_operator(filename, embedder, logfunction=None):
223
+ '''
224
+ '''
225
+
226
+ mol = next((mol for mol in embedder.objects if mol.filename == filename))
227
+ # load molecule to be optimized from embedder
228
+
229
+ if logfunction is not None:
230
+ logfunction(f'--> Performing {embedder.options.calculator} {embedder.options.theory_level}' + (
231
+ f'{f"/{embedder.options.solvent}" if embedder.options.solvent is not None else ""} optimization on {filename} ({len(mol.atomcoords)} conformers)'))
232
+
233
+ constrained_indices = _get_internal_constraints(filename, embedder)
234
+ constrained_distances = [embedder.get_pairing_dists_from_constrained_indices(cp) for cp in constrained_indices]
235
+
236
+ energies = []
237
+ lowest_calc = _get_lowest_calc(embedder)
238
+
239
+ t_start = time.perf_counter()
240
+
241
+ conformers, energies = _refine_structures(mol.atomcoords,
242
+ mol.atomnos,
243
+ constrained_indices=constrained_indices,
244
+ constrained_distances=constrained_distances,
245
+ *lowest_calc,
246
+ loadstring='Optimizing conformer',
247
+ logfunction=lambda s:embedder.log(s, p=False))
248
+
249
+ energies, conformers = zip(*sorted(zip(energies, conformers), key=lambda x: x[0]))
250
+ energies = np.array(energies) - np.min(energies)
251
+ conformers = np.array(conformers)
252
+ # sorting structures based on energy
253
+
254
+ mask = energies < 20
255
+ # getting the structures to reject (Rel Energy > 20 kcal/mol)
256
+
257
+ if logfunction is not None:
258
+ s = 's' if len(conformers) > 1 else ''
259
+ s = f'Completed optimization on {len(conformers)} conformer{s}. ({time_to_string(time.perf_counter()-t_start)}, ~{time_to_string((time.perf_counter()-t_start)/len(conformers))} per structure).\n'
260
+
261
+ if max(energies) > 20:
262
+ s += f'Discarded {len(conformers)-np.count_nonzero(mask)}/{len(conformers)} unstable conformers (Rel. E. > 20 kcal/mol)\n'
263
+
264
+ conformers, energies = conformers[mask], energies[mask]
265
+ # applying the mask that rejects high energy confs
266
+
267
+ optname = filename[:-4] + '_opt.xyz'
268
+ with open(optname, 'w') as f:
269
+ for i, conformer in enumerate(align_structures(conformers)):
270
+ write_xyz(conformer, mol.atomnos, f, title=f'Optimized conformer {i} - Rel. E. = {round(energies[i], 3)} kcal/mol')
271
+
272
+ logfunction(s+'\n')
273
+ logfunction(f'Wrote {len(conformers)} optimized structures to {optname}\n')
274
+
275
+ return optname
276
+
277
+ def neb_operator(filename, embedder, attempts=5):
278
+ '''
279
+ '''
280
+ embedder.t_start_run = time.perf_counter()
281
+ data = read_xyz(filename)
282
+ n_str = len(data.atomcoords)
283
+ assert (n_str in (2, 3) or n_str % 2 == 1), 'NEB calculations need a .xyz input file with two, three or an odd number of geometries.'
284
+
285
+ if n_str == 2:
286
+ reagents, products = data.atomcoords
287
+ ts_guess = None
288
+ mep_override = None
289
+ embedder.log('--> Two structures as input: using them as start and end points.')
290
+
291
+ elif n_str == 3:
292
+ reagents, ts_guess, products = data.atomcoords
293
+ mep_override = None
294
+ embedder.log('--> Three structures as input: using them as start, TS guess and end points.')
295
+
296
+ else:
297
+ reagents, *_, products = data.atomcoords
298
+ ts_guess = data.atomcoords[n_str//2]
299
+ mep_override = data.atomcoords
300
+ embedder.log(f'--> {n_str} structures as input: using these as the NEB MEP guess.')
301
+
302
+ from firecode.ase_manipulations import ase_neb
303
+
304
+ title = filename[:-4] + '_NEB'
305
+
306
+ # if embedder.options.neb.preopt:
307
+ if True:
308
+
309
+ embedder.log(f'--> Performing NEB TS optimization. Preoptimizing structures from {filename}\n'
310
+ f'Theory level is {embedder.options.theory_level}/{embedder.options.solvent or "vacuum"} via {embedder.options.calculator}')
311
+
312
+ reagents, reag_energy, _ = optimize(
313
+ reagents,
314
+ data.atomnos,
315
+ embedder.options.calculator,
316
+ method=embedder.options.theory_level,
317
+ procs=embedder.procs,
318
+ solvent=embedder.options.solvent,
319
+ title='reagents',
320
+ logfunction=embedder.log,
321
+ )
322
+
323
+ products, prod_energy, _ = optimize(
324
+ products,
325
+ data.atomnos,
326
+ embedder.options.calculator,
327
+ method=embedder.options.theory_level,
328
+ procs=embedder.procs,
329
+ solvent=embedder.options.solvent,
330
+ title='products',
331
+ logfunction=embedder.log,
332
+ )
333
+
334
+ if mep_override is not None:
335
+ mep_override[0] = reagents
336
+ mep_override[-1] = products
337
+
338
+ # else:
339
+ # embedder.log(f'--> Performing NEB TS optimization. Structures from {filename}\n'
340
+ # f'Theory level is {embedder.options.theory_level} via {embedder.options.calculator}')
341
+
342
+ # print('Getting start point energy...', end='\r')
343
+ # _, reag_energy, _ = ase_popt(embedder, reagents, data.atomnos, steps=0)
344
+
345
+ # print('Getting end point energy...', end='\r')
346
+ # _, prod_energy, _ = ase_popt(embedder, products, data.atomnos, steps=0)
347
+
348
+ for attempt in range(attempts):
349
+
350
+ ts_coords, ts_energy, energies, exit_status = ase_neb(
351
+ embedder,
352
+ reagents,
353
+ products,
354
+ data.atomnos,
355
+ # n_images=embedder.options.neb.images,
356
+ n_images=7,
357
+ ts_guess= ts_guess,
358
+ mep_override=mep_override,
359
+ title=title,
360
+ logfunction=embedder.log,
361
+ write_plot=True,
362
+ verbose_print=True
363
+ )
364
+
365
+ if exit_status == "CONVERGED":
366
+ break
367
+
368
+ elif exit_status == "MAX ITER" and attempt+2 < attempts:
369
+ mep_override = read_xyz(f'{title}_MEP_start_of_CI.xyz').atomcoords
370
+ reagents, *_, products = mep_override
371
+ embedder.log(f'--> Restarting NEB from checkpoint. Attempt {attempt+2}/3.\n')
372
+
373
+
374
+ e1 = ts_energy - reag_energy
375
+ e2 = ts_energy - prod_energy
376
+ dg1 = ts_energy - min(energies[:3])
377
+ dg2 = ts_energy - min(energies[4:])
378
+
379
+ embedder.log(f'NEB completed, relative energy from start/end points (not barrier heights):\n'
380
+ f' > E(TS)-E(start): {"+" if e1>=0 else "-"}{round(e1, 3)} kcal/mol\n'
381
+ f' > E(TS)-E(end) : {"+" if e2>=0 else "-"}{round(e2, 3)} kcal/mol\n')
382
+
383
+ embedder.log(f'Barrier heights (based on lowest energy point on each side):\n'
384
+ f' > E(TS)-E(left) : {"+" if dg1>=0 else "-"}{round(dg1, 3)} kcal/mol\n'
385
+ f' > E(TS)-E(right): {"+" if dg2>=0 else "-"}{round(dg2, 3)} kcal/mol')
386
+
387
+ if not (e1 > 0 and e2 > 0):
388
+ embedder.log('\nNEB failed, TS energy is lower than both the start and end points.\n')
389
+
390
+ with open(f'{title}_TS.xyz', 'w') as f:
391
+ write_xyz(ts_coords, data.atomnos, f, title='NEB TS - see log for relative energies')
392
+
393
+ def saddle_operator(filename, embedder):
394
+ '''
395
+ Perform a saddle optimization on the specified structure
396
+ '''
397
+
398
+ mol = next((mol for mol in embedder.objects if mol.filename == filename))
399
+ # load molecule to be optimized from embedder
400
+
401
+ assert len(mol.atomcoords) == 1, 'saddle> operator works with a single structure as input.'
402
+
403
+ logfunction = embedder.log
404
+
405
+ logfunction(f'--> Performing {embedder.options.calculator} {embedder.options.theory_level}' + (
406
+ f'{f"/{embedder.options.solvent}" if embedder.options.solvent is not None else ""} saddle optimization on {filename}'))
407
+
408
+ new_structure, energy, success = ase_saddle(
409
+ embedder,
410
+ mol.atomcoords[0],
411
+ mol.atomnos,
412
+ constrained_indices=None,
413
+ mols_graphs=None,
414
+ title=mol.rootname,
415
+ logfile=mol.rootname+"_saddle_opt_log.txt",
416
+ traj=None,
417
+ freq=False,
418
+ maxiterations=200
419
+ )
420
+
421
+ with open(mol.rootname+"_saddle.xyz", 'w') as f:
422
+ write_xyz(new_structure, mol.atomnos, f, f"ASE Saddle optimization {'succeded' if success else 'failed'} ({embedder.options.calculator}" +
423
+ f'{embedder.options.theory_level}/{embedder.options.solvent})')
424
+ if success:
425
+ embedder.log(
426
+ f'Saddle optimization completed, relative energy from start/end points (not barrier heights):\n'
427
+ f' > E(Saddle_point) : {round(energy, 3)} kcal/mol\n')
428
+
429
+ def mtd_search_operator(filename, embedder):
430
+ '''
431
+ Run a CREST metadynamic conformational search and return the output filename.
432
+ '''
433
+ mol = next((mol for mol in embedder.objects if mol.filename == filename))
434
+ # load molecule to be optimized from embedder
435
+
436
+ if not hasattr(mol, 'charge'):
437
+ mol.charge = 0
438
+
439
+ if not embedder.options.let:
440
+ if len(mol.atomcoords) >= 20:
441
+ raise InputError('The mtd_search> operator was given more than 20 input structures. ' +
442
+ 'This would run >20 metadynamic conformational searches. If this was not a mistake, ' +
443
+ 'add the LET keyword an re-run the job.')
444
+
445
+ logfunction = embedder.log
446
+ constrained_indices = _get_internal_constraints(filename, embedder)
447
+ constrained_distances = [embedder.get_pairing_dists_from_constrained_indices(cp) for cp in constrained_indices]
448
+
449
+ logfunction(f'--> {filename}: Geometry optimization pre-mtd_search ({embedder.options.theory_level} via {embedder.options.calculator})')
450
+ return_char = "\n"
451
+ logfunction(f' {len(constrained_indices)} constraints applied{": "+str(constrained_indices).replace(return_char, " ") if len(constrained_indices) > 0 else ""}')
452
+
453
+ for c, coords in enumerate(mol.atomcoords.copy()):
454
+ logfunction(f" Optimizing conformer {c+1}/{len(mol.atomcoords)}")
455
+
456
+ opt_coords, _, success = optimize(
457
+ coords,
458
+ mol.atomnos,
459
+ calculator=embedder.options.calculator,
460
+ method=embedder.options.theory_level,
461
+ solvent=embedder.options.solvent,
462
+ charge=embedder.options.charge,
463
+ procs=embedder.procs,
464
+ constrained_indices=constrained_indices,
465
+ constrained_distances=constrained_distances,
466
+ title=f'{filename.split(".")[0]}_conf{c+1}',
467
+ ) if embedder.options.optimization else coords
468
+
469
+ exit_status = "" if success else "CRASHED"
470
+
471
+ if success:
472
+ success = molecule_check(coords, opt_coords, mol.atomnos)
473
+ exit_status = "" if success else "SCRAMBLED"
474
+
475
+ if not success:
476
+ dumpname = filename.split(".")[0] + f"_conf{c+1}_{exit_status}.xyz"
477
+ with open(dumpname, "w") as f:
478
+ write_xyz(opt_coords, mol.atomnos, f, title=f"{filename}, conformer {c+1}/{len(mol.atomcoords)}, {exit_status}")
479
+
480
+ logfunction(f"{filename}, conformer {c+1}/{len(mol.atomcoords)} optimization {exit_status}. Inspect geometry at {dumpname}. Aborting run.")
481
+
482
+ raise FatalError(filename)
483
+
484
+ # update embedder structures after optimization
485
+ mol.atomcoords[c] = opt_coords
486
+
487
+ logfunction()
488
+
489
+ # update mol and embedder graph after optimization
490
+ mol.graph = graphize(mol.atomcoords[0], mol.atomnos)
491
+ embedder.graphs = [m.graph for m in embedder.objects]
492
+
493
+ max_workers = embedder.avail_cpus//2 or 1
494
+ logfunction(f'--> Performing {embedder.options.calculator} GFN2//GFN-FF' + (
495
+ f'{f"/{embedder.options.solvent.upper()}" if embedder.options.solvent is not None else ""} ' +
496
+ f'metadynamic conformational search on {filename} via CREST.\n' +
497
+ f' (2 cores/thread, {max_workers} threads, {embedder.options.kcal_thresh} kcal/mol thr.)'))
498
+
499
+ if embedder.options.crestnci:
500
+ logfunction('--> CRESTNCI: Running crest in NCI mode (wall potential applied)')
501
+
502
+ if len(mol.atomcoords) > 1:
503
+ embedder.log('--> Requested conformational search on multimolecular file - will do\n' +
504
+ 'an individual search from each conformer (might be time-consuming).')
505
+
506
+ t_start = time.perf_counter()
507
+ conformers = []
508
+ for i, coords in enumerate(mol.atomcoords):
509
+
510
+ t_start_conf = time.perf_counter()
511
+ try:
512
+ conf_batch = crest_mtd_search(
513
+ coords,
514
+ mol.atomnos,
515
+ constrained_indices=constrained_indices,
516
+ constrained_distances=constrained_distances,
517
+ solvent=embedder.options.solvent,
518
+ charge=mol.charge,
519
+ kcal=embedder.options.kcal_thresh,
520
+ ncimode=embedder.options.crestnci,
521
+ title=mol.rootname+"_mtd_csearch",
522
+ procs=2,
523
+ threads=max_workers,
524
+ )
525
+
526
+ # if the run errors out, we retry with XTB2
527
+ except CalledProcessError:
528
+ logfunction('--> Metadynamics run failed with GFN2-XTB//GFN-FF, retrying with just GFN2-XTB (slower but more stable)')
529
+ conf_batch = crest_mtd_search(
530
+ coords,
531
+ mol.atomnos,
532
+ constrained_indices=constrained_indices,
533
+ constrained_distances=constrained_distances,
534
+ solvent=embedder.options.solvent,
535
+ charge=mol.charge,
536
+ method='GFN2-XTB', # try with XTB2
537
+ kcal=embedder.options.kcal_thresh,
538
+ ncimode=embedder.options.crestnci,
539
+ title=mol.rootname+"_mtd_csearch",
540
+ procs=2,
541
+ threads=max_workers,
542
+ )
543
+
544
+ conformers.extend(conf_batch)
545
+
546
+ elapsed = time.perf_counter() - t_start_conf
547
+ embedder.log(f' Conformer {i+1:2}/{len(mol.atomcoords):2} - generated {len(conf_batch)} structures in {time_to_string(elapsed)}')
548
+
549
+ conformers = np.concatenate(conformers)
550
+ conformers = conformers.reshape(-1, mol.atomnos.shape[0], 3)
551
+ # merging structures from each run in a single array
552
+
553
+ embedder.log(f' MTD conformational search: Generated {len(conformers)} conformers in {time_to_string(time.perf_counter()-t_start)}')
554
+ before = len(conformers)
555
+
556
+ ### SIMILARITY PRUNING: TFD
557
+ quadruplets = _get_quadruplets(mol.graph)
558
+ conformers, _ = prune_conformers_tfd(conformers, quadruplets)
559
+
560
+ # ### MOI - turned off, as it would get rid of enantiomeric conformations
561
+ # conformers, _ = prune_by_moment_of_inertia(conformers, mol.atomnos)
562
+
563
+ ### RMSD
564
+ if len(conformers) < 5E4:
565
+ conformers, _ = prune_by_rmsd(conformers, mol.atomnos, max_rmsd=embedder.options.rmsd, debugfunction=embedder.debuglog)
566
+ if len(conformers) < 1E3:
567
+ conformers, _ = prune_by_rmsd_rot_corr(conformers, mol.atomnos, mol.graph, max_rmsd=embedder.options.rmsd, debugfunction=embedder.debuglog)
568
+
569
+ embedder.log(f' Discarded {before-len(conformers)} RMSD-similar structures ({len(conformers)} left)\n')
570
+
571
+ ### PRINTOUT
572
+ with open(f'{mol.rootname}_mtd_confs.xyz', 'w') as f:
573
+ for i, new_s in enumerate(conformers):
574
+ write_xyz(new_s, mol.atomnos, f, title=f'Conformer {i}/{len(conformers)} from CREST MTD')
575
+
576
+
577
+ # check the structures again and warn if some look compenetrated
578
+ embedder.check_objects_compenetration()
579
+
580
+ return f'{mol.rootname}_mtd_confs.xyz'
581
+
582
+ def scan_operator(filename, embedder):
583
+ '''
584
+ Scan operator dispatcher:
585
+ 2 indices: distance_scan
586
+ 4 indices: dihedral_scan
587
+
588
+ '''
589
+ mol = next((mol for mol in embedder.objects if mol.filename == filename))
590
+
591
+ assert len(mol.atomcoords) == 1, 'The scan> operator works on a single .xyz geometry.'
592
+ assert len(mol.reactive_indices) in (2,4), 'The scan> operator needs two or four indices' + (
593
+ f'({len(mol.reactive_indices)} were provided)')
594
+
595
+ if len(mol.reactive_indices) == 2:
596
+ return distance_scan(embedder)
597
+
598
+ elif len(mol.reactive_indices) == 4:
599
+ return dihedral_scan(embedder)
600
+
601
+ def distance_scan(embedder):
602
+ '''
603
+ Thought to approach or separate two reactive atoms, looking for the energy maximum.
604
+ Scan direction is inferred by the reactive index distance.
605
+ '''
606
+
607
+ import matplotlib.pyplot as plt
608
+
609
+ from firecode.algebra import norm_of
610
+ from firecode.pt import pt
611
+
612
+ embedder.t_start_run = time.perf_counter()
613
+ mol = embedder.objects[0]
614
+ t_start = time.perf_counter()
615
+
616
+ # shorthands for clearer code
617
+ i1, i2 = mol.reactive_indices
618
+ coords = mol.atomcoords[0]
619
+
620
+ # getting the start distance between scan indices and start energy
621
+ d = norm_of(coords[i1]-coords[i2])
622
+
623
+ # deciding if moving atoms closer or further apart based on distance
624
+ bonds = list(mol.graph.edges)
625
+ step = 0.05 if (i1, i2) in bonds else -0.05
626
+
627
+ # logging to file and terminal
628
+ embedder.log(f'--> {mol.rootname} - Performing a distance scan {"approaching" if step < 0 else "separating"} indices {i1} ' +
629
+ f'and {i2} - step size {round(step, 2)} A\n Theory level is {embedder.options.theory_level}/{embedder.options.solvent or "vacuum"} ' +
630
+ f'via {embedder.options.calculator}')
631
+
632
+ # creating a dictionary that will hold results
633
+ # and the structure output list
634
+ dists, energies, structures = [], [], []
635
+
636
+ # getting atomic symbols
637
+ s1, s2 = mol.atomnos[[i1, i2]]
638
+
639
+ # defining the maximum number of iterations
640
+ if step < 0:
641
+ smallest_d = 0.9*(pt[s1].covalent_radius+
642
+ pt[s2].covalent_radius)
643
+ max_iterations = round((d-smallest_d) / abs(step))
644
+ # so that atoms are never forced closer than
645
+ # a proportionally small distance between those two atoms.
646
+
647
+ else:
648
+ max_d = 1.8*(pt[s1].covalent_radius+
649
+ pt[s2].covalent_radius)
650
+ max_iterations = round((max_d-d) / abs(step))
651
+ # so that atoms are never spaced too far apart
652
+
653
+ from firecode.calculators._xtb import xtb_opt
654
+ for i in range(max_iterations):
655
+
656
+ t_start = time.perf_counter()
657
+
658
+ coords, energy, _ = xtb_opt(
659
+ coords,
660
+ mol.atomnos,
661
+ constrained_indices=np.array([mol.reactive_indices]),
662
+ constrained_distances=(d,),
663
+ method=embedder.options.theory_level,
664
+ solvent=embedder.options.solvent,
665
+ charge=embedder.options.charge,
666
+ title='temp',
667
+ procs=embedder.procs,
668
+ )
669
+
670
+ if i == 0:
671
+ e_0 = energy
672
+
673
+ energies.append(energy - e_0)
674
+ dists.append(d)
675
+ structures.append(coords)
676
+ # print(f"------> target was {round(d, 3)} A, reached {round(norm_of(coords[mol.reactive_indices[0]]-coords[mol.reactive_indices[1]]), 3)} A")
677
+ # saving the structure, distance and relative energy
678
+
679
+ embedder.log(f'Step {i+1}/{max_iterations} - d={round(d, 2)} A - {round(energy-e_0, 2):4} kcal/mol - {time_to_string(time.perf_counter()-t_start)}')
680
+
681
+ with open("temp_scan.xyz", "w") as f:
682
+ for i, (s, d, e) in enumerate(zip(structures, dists, energies)):
683
+ write_xyz(s, mol.atomnos, f, title=f'Scan point {i+1}/{len(structures)} ' +
684
+ f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e-min(energies), 2)} kcal/mol')
685
+
686
+ d += step
687
+ # modify the target distance and reiterate
688
+
689
+ ### Start the plotting sequence
690
+
691
+ plt.figure()
692
+ plt.plot(
693
+ dists,
694
+ energies,
695
+ color='tab:red',
696
+ label='Scan energy',
697
+ linewidth=3,
698
+ )
699
+
700
+ # e_max = max(energies)
701
+ id_max = get_scan_peak_index(energies)
702
+ e_max = energies[id_max]
703
+
704
+ # id_max = energies.index(e_max)
705
+ d_opt = dists[id_max]
706
+
707
+ plt.plot(
708
+ d_opt,
709
+ e_max,
710
+ color='gold',
711
+ label='Energy maximum (TS guess)',
712
+ marker='o',
713
+ markersize=3,
714
+ )
715
+
716
+ title = mol.rootname + ' distance scan'
717
+ plt.legend()
718
+ plt.title(title)
719
+ plt.xlabel(f'indices s{i1}-{i2} distance (A)')
720
+
721
+ if step > 0:
722
+ plt.gca().invert_xaxis()
723
+
724
+ plt.ylabel('Rel. E. (kcal/mol)')
725
+ plt.savefig(f'{title.replace(" ", "_")}_plt.svg')
726
+ # with open(f'{title.replace(" ", "_")}_plt.pickle', 'wb') as _f:
727
+ # pickle.dump(fig, _f)
728
+
729
+ ### Start structure writing
730
+
731
+ # print all scan structures
732
+ with open(f'{mol.filename[:-4]}_scan.xyz', 'w') as f:
733
+ for i, (s, d, e) in enumerate(zip(structures, dists, energies)):
734
+ write_xyz(s, mol.atomnos, f, title=f'Scan point {i+1}/{len(structures)} ' +
735
+ f'- d({i1}-{i2}) = {round(d, 2)} A - Rel. E = {round(e, 2)} kcal/mol')
736
+
737
+ # print the maximum on another file for convienience
738
+ with open(f'{mol.filename[:-4]}_scan_max.xyz', 'w') as f:
739
+ s = structures[id_max]
740
+ d = dists[id_max]
741
+ write_xyz(s, mol.atomnos, f, title=f'Scan point {id_max+1}/{len(structures)} ' +
742
+ f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e_max, 3)} kcal/mol')
743
+
744
+ embedder.log(f'\n--> Written {len(structures)} structures to {mol.filename[:-4]}_scan.xyz ({time_to_string(time.perf_counter() - t_start)})')
745
+ embedder.log(f'\n--> Written energy maximum to {mol.filename[:-4]}_scan_max.xyz\n')
746
+
747
+ # Log data to the embedder class
748
+ mol.scan_data = (dists, energies)
749
+
750
+ def _get_lowest_calc(embedder=None):
751
+ '''
752
+ Returns the values for calculator,
753
+ method and processors for the lowest
754
+ theory level available from embedder or settings.
755
+ '''
756
+ if embedder is None:
757
+ if FF_OPT_BOOL:
758
+ return (FF_CALC, DEFAULT_FF_LEVELS[FF_CALC], PROCS)
759
+ return (CALCULATOR, DEFAULT_LEVELS[CALCULATOR], PROCS)
760
+
761
+ if embedder.options.ff_opt:
762
+ return (embedder.options.ff_calc, embedder.options.ff_level, embedder.procs)
763
+ return (embedder.options.calculator, embedder.options.theory_level, embedder.procs)
764
+
765
+ def _get_internal_constraints(filename, embedder):
766
+ '''
767
+ '''
768
+ mol_id = next((i for i, mol in enumerate(embedder.objects) if mol.filename == filename))
769
+ # get embedder,objects index of molecule to get internal constraints of
770
+
771
+ out = []
772
+ for _, tgt in embedder.pairings_dict[mol_id].items():
773
+ if isinstance(tgt, tuple):
774
+ out.append(tgt)
775
+
776
+ return np.array(out)