firecode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecode/TEST_NOTEBOOK.ipynb +3940 -0
- firecode/__init__.py +0 -0
- firecode/__main__.py +118 -0
- firecode/_gaussian.py +97 -0
- firecode/algebra.py +405 -0
- firecode/ase_manipulations.py +879 -0
- firecode/atropisomer_module.py +516 -0
- firecode/automep.py +130 -0
- firecode/calculators/__init__.py +29 -0
- firecode/calculators/_gaussian.py +98 -0
- firecode/calculators/_mopac.py +242 -0
- firecode/calculators/_openbabel.py +154 -0
- firecode/calculators/_orca.py +129 -0
- firecode/calculators/_xtb.py +786 -0
- firecode/concurrent_test.py +119 -0
- firecode/embedder.py +2590 -0
- firecode/embedder_options.py +577 -0
- firecode/embeds.py +881 -0
- firecode/errors.py +65 -0
- firecode/graph_manipulations.py +333 -0
- firecode/hypermolecule_class.py +364 -0
- firecode/mep_relaxer.py +199 -0
- firecode/modify_settings.py +186 -0
- firecode/mprof.py +65 -0
- firecode/multiembed.py +148 -0
- firecode/nci.py +186 -0
- firecode/numba_functions.py +260 -0
- firecode/operators.py +776 -0
- firecode/optimization_methods.py +609 -0
- firecode/parameters.py +84 -0
- firecode/pka.py +275 -0
- firecode/profiler.py +17 -0
- firecode/pruning.py +421 -0
- firecode/pt.py +32 -0
- firecode/quotes.json +6651 -0
- firecode/quotes.py +9 -0
- firecode/reactive_atoms_classes.py +666 -0
- firecode/references.py +11 -0
- firecode/rmsd.py +74 -0
- firecode/settings.py +75 -0
- firecode/solvents.py +126 -0
- firecode/tests/C2F2H4.xyz +10 -0
- firecode/tests/C2H4.xyz +8 -0
- firecode/tests/CH3Cl.xyz +7 -0
- firecode/tests/HCOOH.xyz +7 -0
- firecode/tests/HCOOOH.xyz +8 -0
- firecode/tests/chelotropic.txt +3 -0
- firecode/tests/cyclical.txt +3 -0
- firecode/tests/dihedral.txt +2 -0
- firecode/tests/string.txt +3 -0
- firecode/tests/trimolecular.txt +9 -0
- firecode/tests.py +151 -0
- firecode/torsion_module.py +1035 -0
- firecode/utils.py +541 -0
- firecode-1.0.0.dist-info/LICENSE +165 -0
- firecode-1.0.0.dist-info/METADATA +321 -0
- firecode-1.0.0.dist-info/RECORD +59 -0
- firecode-1.0.0.dist-info/WHEEL +5 -0
- firecode-1.0.0.dist-info/top_level.txt +1 -0
firecode/operators.py
ADDED
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
'''
|
|
3
|
+
FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
|
|
4
|
+
Copyright (C) 2021-2024 Nicolò Tampellini
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: LGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU Lesser General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU Lesser General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU Lesser General Public License
|
|
19
|
+
along with this program. If not, see
|
|
20
|
+
https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
|
|
21
|
+
|
|
22
|
+
'''
|
|
23
|
+
|
|
24
|
+
# import pickle
|
|
25
|
+
import time
|
|
26
|
+
from subprocess import CalledProcessError
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
|
|
30
|
+
from firecode.ase_manipulations import ase_saddle
|
|
31
|
+
from firecode.atropisomer_module import dihedral_scan
|
|
32
|
+
from firecode.automep import automep
|
|
33
|
+
from firecode.calculators._xtb import crest_mtd_search
|
|
34
|
+
from firecode.errors import FatalError, InputError
|
|
35
|
+
from firecode.graph_manipulations import graphize
|
|
36
|
+
from firecode.mep_relaxer import ase_mep_relax
|
|
37
|
+
from firecode.numba_functions import prune_conformers_tfd
|
|
38
|
+
from firecode.optimization_methods import _refine_structures, optimize
|
|
39
|
+
from firecode.pka import pka_routine
|
|
40
|
+
from firecode.pruning import prune_by_rmsd, prune_by_rmsd_rot_corr
|
|
41
|
+
from firecode.settings import (CALCULATOR, DEFAULT_FF_LEVELS, DEFAULT_LEVELS,
|
|
42
|
+
FF_CALC, FF_OPT_BOOL, PROCS)
|
|
43
|
+
from firecode.torsion_module import _get_quadruplets, csearch
|
|
44
|
+
from firecode.utils import (align_structures, get_scan_peak_index,
|
|
45
|
+
molecule_check, read_xyz, time_to_string, write_xyz)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def operate(input_string, embedder):
|
|
49
|
+
'''
|
|
50
|
+
Perform the operations according to the chosen
|
|
51
|
+
operator and return the outname of the (new) .xyz
|
|
52
|
+
file to read instead of the input one.
|
|
53
|
+
'''
|
|
54
|
+
|
|
55
|
+
filename = embedder._extract_filename(input_string)
|
|
56
|
+
|
|
57
|
+
if not hasattr(embedder, "t_start_run"):
|
|
58
|
+
embedder.t_start_run = time.perf_counter()
|
|
59
|
+
|
|
60
|
+
if embedder.options.dryrun:
|
|
61
|
+
embedder.log(f'--> Dry run requested: skipping operator \"{input_string}\"')
|
|
62
|
+
return filename
|
|
63
|
+
|
|
64
|
+
elif 'csearch>' in input_string:
|
|
65
|
+
outname = csearch_operator(filename, embedder)
|
|
66
|
+
|
|
67
|
+
elif 'opt>' in input_string:
|
|
68
|
+
outname = opt_operator(filename,
|
|
69
|
+
embedder,
|
|
70
|
+
logfunction=embedder.log)
|
|
71
|
+
|
|
72
|
+
elif 'csearch_hb>' in input_string:
|
|
73
|
+
outname = csearch_operator(filename, embedder, keep_hb=True)
|
|
74
|
+
|
|
75
|
+
elif 'rsearch>' in input_string:
|
|
76
|
+
outname = csearch_operator(filename, embedder, mode=2)
|
|
77
|
+
|
|
78
|
+
elif any(string in input_string for string in ('mtd_search>', 'mtd>')):
|
|
79
|
+
outname = mtd_search_operator(filename, embedder)
|
|
80
|
+
|
|
81
|
+
elif 'saddle>' in input_string:
|
|
82
|
+
saddle_operator(filename, embedder)
|
|
83
|
+
embedder.normal_termination()
|
|
84
|
+
|
|
85
|
+
elif 'scan>' in input_string:
|
|
86
|
+
scan_operator(filename, embedder)
|
|
87
|
+
outname = filename
|
|
88
|
+
|
|
89
|
+
elif 'automep>' in input_string:
|
|
90
|
+
automep(embedder, n_images=embedder.options.images if hasattr(embedder.options, 'images') else 9)
|
|
91
|
+
# neb_operator(automep_filename, embedder)
|
|
92
|
+
# embedder.normal_termination()
|
|
93
|
+
|
|
94
|
+
elif 'neb>' in input_string:
|
|
95
|
+
neb_operator(filename, embedder)
|
|
96
|
+
embedder.normal_termination()
|
|
97
|
+
|
|
98
|
+
elif 'refine>' in input_string:
|
|
99
|
+
outname = filename
|
|
100
|
+
# this operator is accounted for in the OptionSetter
|
|
101
|
+
# class of Options, set when the Embedder calls _set_options
|
|
102
|
+
|
|
103
|
+
elif 'pka>' in input_string:
|
|
104
|
+
pka_routine(filename, embedder)
|
|
105
|
+
outname = filename
|
|
106
|
+
|
|
107
|
+
elif 'mep_relax>' in input_string:
|
|
108
|
+
|
|
109
|
+
data = read_xyz(filename)
|
|
110
|
+
|
|
111
|
+
# can implement a smart safety feature that is
|
|
112
|
+
# disabled when some bonds have to be let free to break
|
|
113
|
+
no_bonds_breaking = True
|
|
114
|
+
|
|
115
|
+
if no_bonds_breaking:
|
|
116
|
+
|
|
117
|
+
mep, _, exit_status = ase_mep_relax(
|
|
118
|
+
embedder,
|
|
119
|
+
data.atomcoords,
|
|
120
|
+
data.atomnos,
|
|
121
|
+
title=embedder.stamp+"_safe",
|
|
122
|
+
n_images=embedder.options.images if hasattr(embedder.options, 'images') else None,
|
|
123
|
+
logfunction=embedder.log,
|
|
124
|
+
write_plot=True,
|
|
125
|
+
verbose_print=True,
|
|
126
|
+
safe=True
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
else:
|
|
130
|
+
mep = data.atomcoords
|
|
131
|
+
exit_status = True
|
|
132
|
+
|
|
133
|
+
if exit_status:
|
|
134
|
+
|
|
135
|
+
if no_bonds_breaking:
|
|
136
|
+
print("--> Completed safe optimization, relaxing bond distance constraints.")
|
|
137
|
+
|
|
138
|
+
ase_mep_relax(
|
|
139
|
+
embedder,
|
|
140
|
+
mep,
|
|
141
|
+
data.atomnos,
|
|
142
|
+
title=embedder.stamp,
|
|
143
|
+
n_images=embedder.options.images if hasattr(embedder.options, 'images') else None,
|
|
144
|
+
logfunction=embedder.log,
|
|
145
|
+
write_plot=True,
|
|
146
|
+
verbose_print=True,
|
|
147
|
+
safe=True
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
embedder.normal_termination()
|
|
151
|
+
|
|
152
|
+
else:
|
|
153
|
+
op = input_string.split('>')[0]
|
|
154
|
+
raise Exception(f'Operator {op} not recognized.')
|
|
155
|
+
|
|
156
|
+
return outname
|
|
157
|
+
|
|
158
|
+
def csearch_operator(filename, embedder, keep_hb=False, mode=1):
|
|
159
|
+
'''
|
|
160
|
+
'''
|
|
161
|
+
|
|
162
|
+
s = f'--> Performing conformational search on {filename}'
|
|
163
|
+
if keep_hb:
|
|
164
|
+
s += ' (preserving current hydrogen bonds)'
|
|
165
|
+
embedder.log(s)
|
|
166
|
+
|
|
167
|
+
# t_start = time.perf_counter()
|
|
168
|
+
|
|
169
|
+
data = read_xyz(filename)
|
|
170
|
+
|
|
171
|
+
if len(data.atomcoords) > 1:
|
|
172
|
+
embedder.log('Requested conformational search on multimolecular file - will do\n' +
|
|
173
|
+
'an individual search from each conformer (might be time-consuming).')
|
|
174
|
+
|
|
175
|
+
# calc, method, procs = _get_lowest_calc(embedder)
|
|
176
|
+
conformers = []
|
|
177
|
+
|
|
178
|
+
for i, coords in enumerate(data.atomcoords):
|
|
179
|
+
|
|
180
|
+
# opt_coords = optimize(coords, data.atomnos, calculator=calc, method=method, procs=procs)[0] if embedder.options.optimization else coords
|
|
181
|
+
opt_coords = coords
|
|
182
|
+
# optimize starting structure before running csearch
|
|
183
|
+
|
|
184
|
+
conf_batch = csearch(
|
|
185
|
+
opt_coords,
|
|
186
|
+
data.atomnos,
|
|
187
|
+
constrained_indices=_get_internal_constraints(filename, embedder),
|
|
188
|
+
keep_hb=keep_hb,
|
|
189
|
+
mode=mode,
|
|
190
|
+
n_out=embedder.options.max_confs//len(data.atomcoords),
|
|
191
|
+
title=f'{filename}_conf{i}',
|
|
192
|
+
logfunction=embedder.log,
|
|
193
|
+
write_torsions=embedder.options.debug
|
|
194
|
+
)
|
|
195
|
+
# generate the most diverse conformers starting from optimized geometry
|
|
196
|
+
|
|
197
|
+
conformers.extend(conf_batch)
|
|
198
|
+
|
|
199
|
+
conformers = np.concatenate(conformers)
|
|
200
|
+
# batch_size = conformers.shape[1]
|
|
201
|
+
|
|
202
|
+
conformers = conformers.reshape(-1, data.atomnos.shape[0], 3)
|
|
203
|
+
# merging structures from each run in a single array
|
|
204
|
+
|
|
205
|
+
# if embedder.embed is not None:
|
|
206
|
+
# embedder.log(f'\nSelected the most diverse {batch_size} out of {conformers.shape[0]} conformers for {filename} ({time_to_string(time.perf_counter()-t_start)})')
|
|
207
|
+
# conformers = most_diverse_conformers(batch_size, conformers)
|
|
208
|
+
|
|
209
|
+
print(f'Writing conformers to file...{" "*10}', end='\r')
|
|
210
|
+
|
|
211
|
+
confname = filename[:-4] + '_confs.xyz'
|
|
212
|
+
with open(confname, 'w') as f:
|
|
213
|
+
for i, conformer in enumerate(conformers):
|
|
214
|
+
write_xyz(conformer, data.atomnos, f, title=f'Generated conformer {i}')
|
|
215
|
+
|
|
216
|
+
print(f'{" "*30}', end='\r')
|
|
217
|
+
|
|
218
|
+
embedder.log('\n')
|
|
219
|
+
|
|
220
|
+
return confname
|
|
221
|
+
|
|
222
|
+
def opt_operator(filename, embedder, logfunction=None):
|
|
223
|
+
'''
|
|
224
|
+
'''
|
|
225
|
+
|
|
226
|
+
mol = next((mol for mol in embedder.objects if mol.filename == filename))
|
|
227
|
+
# load molecule to be optimized from embedder
|
|
228
|
+
|
|
229
|
+
if logfunction is not None:
|
|
230
|
+
logfunction(f'--> Performing {embedder.options.calculator} {embedder.options.theory_level}' + (
|
|
231
|
+
f'{f"/{embedder.options.solvent}" if embedder.options.solvent is not None else ""} optimization on {filename} ({len(mol.atomcoords)} conformers)'))
|
|
232
|
+
|
|
233
|
+
constrained_indices = _get_internal_constraints(filename, embedder)
|
|
234
|
+
constrained_distances = [embedder.get_pairing_dists_from_constrained_indices(cp) for cp in constrained_indices]
|
|
235
|
+
|
|
236
|
+
energies = []
|
|
237
|
+
lowest_calc = _get_lowest_calc(embedder)
|
|
238
|
+
|
|
239
|
+
t_start = time.perf_counter()
|
|
240
|
+
|
|
241
|
+
conformers, energies = _refine_structures(mol.atomcoords,
|
|
242
|
+
mol.atomnos,
|
|
243
|
+
constrained_indices=constrained_indices,
|
|
244
|
+
constrained_distances=constrained_distances,
|
|
245
|
+
*lowest_calc,
|
|
246
|
+
loadstring='Optimizing conformer',
|
|
247
|
+
logfunction=lambda s:embedder.log(s, p=False))
|
|
248
|
+
|
|
249
|
+
energies, conformers = zip(*sorted(zip(energies, conformers), key=lambda x: x[0]))
|
|
250
|
+
energies = np.array(energies) - np.min(energies)
|
|
251
|
+
conformers = np.array(conformers)
|
|
252
|
+
# sorting structures based on energy
|
|
253
|
+
|
|
254
|
+
mask = energies < 20
|
|
255
|
+
# getting the structures to reject (Rel Energy > 20 kcal/mol)
|
|
256
|
+
|
|
257
|
+
if logfunction is not None:
|
|
258
|
+
s = 's' if len(conformers) > 1 else ''
|
|
259
|
+
s = f'Completed optimization on {len(conformers)} conformer{s}. ({time_to_string(time.perf_counter()-t_start)}, ~{time_to_string((time.perf_counter()-t_start)/len(conformers))} per structure).\n'
|
|
260
|
+
|
|
261
|
+
if max(energies) > 20:
|
|
262
|
+
s += f'Discarded {len(conformers)-np.count_nonzero(mask)}/{len(conformers)} unstable conformers (Rel. E. > 20 kcal/mol)\n'
|
|
263
|
+
|
|
264
|
+
conformers, energies = conformers[mask], energies[mask]
|
|
265
|
+
# applying the mask that rejects high energy confs
|
|
266
|
+
|
|
267
|
+
optname = filename[:-4] + '_opt.xyz'
|
|
268
|
+
with open(optname, 'w') as f:
|
|
269
|
+
for i, conformer in enumerate(align_structures(conformers)):
|
|
270
|
+
write_xyz(conformer, mol.atomnos, f, title=f'Optimized conformer {i} - Rel. E. = {round(energies[i], 3)} kcal/mol')
|
|
271
|
+
|
|
272
|
+
logfunction(s+'\n')
|
|
273
|
+
logfunction(f'Wrote {len(conformers)} optimized structures to {optname}\n')
|
|
274
|
+
|
|
275
|
+
return optname
|
|
276
|
+
|
|
277
|
+
def neb_operator(filename, embedder, attempts=5):
|
|
278
|
+
'''
|
|
279
|
+
'''
|
|
280
|
+
embedder.t_start_run = time.perf_counter()
|
|
281
|
+
data = read_xyz(filename)
|
|
282
|
+
n_str = len(data.atomcoords)
|
|
283
|
+
assert (n_str in (2, 3) or n_str % 2 == 1), 'NEB calculations need a .xyz input file with two, three or an odd number of geometries.'
|
|
284
|
+
|
|
285
|
+
if n_str == 2:
|
|
286
|
+
reagents, products = data.atomcoords
|
|
287
|
+
ts_guess = None
|
|
288
|
+
mep_override = None
|
|
289
|
+
embedder.log('--> Two structures as input: using them as start and end points.')
|
|
290
|
+
|
|
291
|
+
elif n_str == 3:
|
|
292
|
+
reagents, ts_guess, products = data.atomcoords
|
|
293
|
+
mep_override = None
|
|
294
|
+
embedder.log('--> Three structures as input: using them as start, TS guess and end points.')
|
|
295
|
+
|
|
296
|
+
else:
|
|
297
|
+
reagents, *_, products = data.atomcoords
|
|
298
|
+
ts_guess = data.atomcoords[n_str//2]
|
|
299
|
+
mep_override = data.atomcoords
|
|
300
|
+
embedder.log(f'--> {n_str} structures as input: using these as the NEB MEP guess.')
|
|
301
|
+
|
|
302
|
+
from firecode.ase_manipulations import ase_neb
|
|
303
|
+
|
|
304
|
+
title = filename[:-4] + '_NEB'
|
|
305
|
+
|
|
306
|
+
# if embedder.options.neb.preopt:
|
|
307
|
+
if True:
|
|
308
|
+
|
|
309
|
+
embedder.log(f'--> Performing NEB TS optimization. Preoptimizing structures from {filename}\n'
|
|
310
|
+
f'Theory level is {embedder.options.theory_level}/{embedder.options.solvent or "vacuum"} via {embedder.options.calculator}')
|
|
311
|
+
|
|
312
|
+
reagents, reag_energy, _ = optimize(
|
|
313
|
+
reagents,
|
|
314
|
+
data.atomnos,
|
|
315
|
+
embedder.options.calculator,
|
|
316
|
+
method=embedder.options.theory_level,
|
|
317
|
+
procs=embedder.procs,
|
|
318
|
+
solvent=embedder.options.solvent,
|
|
319
|
+
title='reagents',
|
|
320
|
+
logfunction=embedder.log,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
products, prod_energy, _ = optimize(
|
|
324
|
+
products,
|
|
325
|
+
data.atomnos,
|
|
326
|
+
embedder.options.calculator,
|
|
327
|
+
method=embedder.options.theory_level,
|
|
328
|
+
procs=embedder.procs,
|
|
329
|
+
solvent=embedder.options.solvent,
|
|
330
|
+
title='products',
|
|
331
|
+
logfunction=embedder.log,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if mep_override is not None:
|
|
335
|
+
mep_override[0] = reagents
|
|
336
|
+
mep_override[-1] = products
|
|
337
|
+
|
|
338
|
+
# else:
|
|
339
|
+
# embedder.log(f'--> Performing NEB TS optimization. Structures from {filename}\n'
|
|
340
|
+
# f'Theory level is {embedder.options.theory_level} via {embedder.options.calculator}')
|
|
341
|
+
|
|
342
|
+
# print('Getting start point energy...', end='\r')
|
|
343
|
+
# _, reag_energy, _ = ase_popt(embedder, reagents, data.atomnos, steps=0)
|
|
344
|
+
|
|
345
|
+
# print('Getting end point energy...', end='\r')
|
|
346
|
+
# _, prod_energy, _ = ase_popt(embedder, products, data.atomnos, steps=0)
|
|
347
|
+
|
|
348
|
+
for attempt in range(attempts):
|
|
349
|
+
|
|
350
|
+
ts_coords, ts_energy, energies, exit_status = ase_neb(
|
|
351
|
+
embedder,
|
|
352
|
+
reagents,
|
|
353
|
+
products,
|
|
354
|
+
data.atomnos,
|
|
355
|
+
# n_images=embedder.options.neb.images,
|
|
356
|
+
n_images=7,
|
|
357
|
+
ts_guess= ts_guess,
|
|
358
|
+
mep_override=mep_override,
|
|
359
|
+
title=title,
|
|
360
|
+
logfunction=embedder.log,
|
|
361
|
+
write_plot=True,
|
|
362
|
+
verbose_print=True
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if exit_status == "CONVERGED":
|
|
366
|
+
break
|
|
367
|
+
|
|
368
|
+
elif exit_status == "MAX ITER" and attempt+2 < attempts:
|
|
369
|
+
mep_override = read_xyz(f'{title}_MEP_start_of_CI.xyz').atomcoords
|
|
370
|
+
reagents, *_, products = mep_override
|
|
371
|
+
embedder.log(f'--> Restarting NEB from checkpoint. Attempt {attempt+2}/3.\n')
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
e1 = ts_energy - reag_energy
|
|
375
|
+
e2 = ts_energy - prod_energy
|
|
376
|
+
dg1 = ts_energy - min(energies[:3])
|
|
377
|
+
dg2 = ts_energy - min(energies[4:])
|
|
378
|
+
|
|
379
|
+
embedder.log(f'NEB completed, relative energy from start/end points (not barrier heights):\n'
|
|
380
|
+
f' > E(TS)-E(start): {"+" if e1>=0 else "-"}{round(e1, 3)} kcal/mol\n'
|
|
381
|
+
f' > E(TS)-E(end) : {"+" if e2>=0 else "-"}{round(e2, 3)} kcal/mol\n')
|
|
382
|
+
|
|
383
|
+
embedder.log(f'Barrier heights (based on lowest energy point on each side):\n'
|
|
384
|
+
f' > E(TS)-E(left) : {"+" if dg1>=0 else "-"}{round(dg1, 3)} kcal/mol\n'
|
|
385
|
+
f' > E(TS)-E(right): {"+" if dg2>=0 else "-"}{round(dg2, 3)} kcal/mol')
|
|
386
|
+
|
|
387
|
+
if not (e1 > 0 and e2 > 0):
|
|
388
|
+
embedder.log('\nNEB failed, TS energy is lower than both the start and end points.\n')
|
|
389
|
+
|
|
390
|
+
with open(f'{title}_TS.xyz', 'w') as f:
|
|
391
|
+
write_xyz(ts_coords, data.atomnos, f, title='NEB TS - see log for relative energies')
|
|
392
|
+
|
|
393
|
+
def saddle_operator(filename, embedder):
|
|
394
|
+
'''
|
|
395
|
+
Perform a saddle optimization on the specified structure
|
|
396
|
+
'''
|
|
397
|
+
|
|
398
|
+
mol = next((mol for mol in embedder.objects if mol.filename == filename))
|
|
399
|
+
# load molecule to be optimized from embedder
|
|
400
|
+
|
|
401
|
+
assert len(mol.atomcoords) == 1, 'saddle> operator works with a single structure as input.'
|
|
402
|
+
|
|
403
|
+
logfunction = embedder.log
|
|
404
|
+
|
|
405
|
+
logfunction(f'--> Performing {embedder.options.calculator} {embedder.options.theory_level}' + (
|
|
406
|
+
f'{f"/{embedder.options.solvent}" if embedder.options.solvent is not None else ""} saddle optimization on {filename}'))
|
|
407
|
+
|
|
408
|
+
new_structure, energy, success = ase_saddle(
|
|
409
|
+
embedder,
|
|
410
|
+
mol.atomcoords[0],
|
|
411
|
+
mol.atomnos,
|
|
412
|
+
constrained_indices=None,
|
|
413
|
+
mols_graphs=None,
|
|
414
|
+
title=mol.rootname,
|
|
415
|
+
logfile=mol.rootname+"_saddle_opt_log.txt",
|
|
416
|
+
traj=None,
|
|
417
|
+
freq=False,
|
|
418
|
+
maxiterations=200
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
with open(mol.rootname+"_saddle.xyz", 'w') as f:
|
|
422
|
+
write_xyz(new_structure, mol.atomnos, f, f"ASE Saddle optimization {'succeded' if success else 'failed'} ({embedder.options.calculator}" +
|
|
423
|
+
f'{embedder.options.theory_level}/{embedder.options.solvent})')
|
|
424
|
+
if success:
|
|
425
|
+
embedder.log(
|
|
426
|
+
f'Saddle optimization completed, relative energy from start/end points (not barrier heights):\n'
|
|
427
|
+
f' > E(Saddle_point) : {round(energy, 3)} kcal/mol\n')
|
|
428
|
+
|
|
429
|
+
def mtd_search_operator(filename, embedder):
|
|
430
|
+
'''
|
|
431
|
+
Run a CREST metadynamic conformational search and return the output filename.
|
|
432
|
+
'''
|
|
433
|
+
mol = next((mol for mol in embedder.objects if mol.filename == filename))
|
|
434
|
+
# load molecule to be optimized from embedder
|
|
435
|
+
|
|
436
|
+
if not hasattr(mol, 'charge'):
|
|
437
|
+
mol.charge = 0
|
|
438
|
+
|
|
439
|
+
if not embedder.options.let:
|
|
440
|
+
if len(mol.atomcoords) >= 20:
|
|
441
|
+
raise InputError('The mtd_search> operator was given more than 20 input structures. ' +
|
|
442
|
+
'This would run >20 metadynamic conformational searches. If this was not a mistake, ' +
|
|
443
|
+
'add the LET keyword an re-run the job.')
|
|
444
|
+
|
|
445
|
+
logfunction = embedder.log
|
|
446
|
+
constrained_indices = _get_internal_constraints(filename, embedder)
|
|
447
|
+
constrained_distances = [embedder.get_pairing_dists_from_constrained_indices(cp) for cp in constrained_indices]
|
|
448
|
+
|
|
449
|
+
logfunction(f'--> {filename}: Geometry optimization pre-mtd_search ({embedder.options.theory_level} via {embedder.options.calculator})')
|
|
450
|
+
return_char = "\n"
|
|
451
|
+
logfunction(f' {len(constrained_indices)} constraints applied{": "+str(constrained_indices).replace(return_char, " ") if len(constrained_indices) > 0 else ""}')
|
|
452
|
+
|
|
453
|
+
for c, coords in enumerate(mol.atomcoords.copy()):
|
|
454
|
+
logfunction(f" Optimizing conformer {c+1}/{len(mol.atomcoords)}")
|
|
455
|
+
|
|
456
|
+
opt_coords, _, success = optimize(
|
|
457
|
+
coords,
|
|
458
|
+
mol.atomnos,
|
|
459
|
+
calculator=embedder.options.calculator,
|
|
460
|
+
method=embedder.options.theory_level,
|
|
461
|
+
solvent=embedder.options.solvent,
|
|
462
|
+
charge=embedder.options.charge,
|
|
463
|
+
procs=embedder.procs,
|
|
464
|
+
constrained_indices=constrained_indices,
|
|
465
|
+
constrained_distances=constrained_distances,
|
|
466
|
+
title=f'{filename.split(".")[0]}_conf{c+1}',
|
|
467
|
+
) if embedder.options.optimization else coords
|
|
468
|
+
|
|
469
|
+
exit_status = "" if success else "CRASHED"
|
|
470
|
+
|
|
471
|
+
if success:
|
|
472
|
+
success = molecule_check(coords, opt_coords, mol.atomnos)
|
|
473
|
+
exit_status = "" if success else "SCRAMBLED"
|
|
474
|
+
|
|
475
|
+
if not success:
|
|
476
|
+
dumpname = filename.split(".")[0] + f"_conf{c+1}_{exit_status}.xyz"
|
|
477
|
+
with open(dumpname, "w") as f:
|
|
478
|
+
write_xyz(opt_coords, mol.atomnos, f, title=f"{filename}, conformer {c+1}/{len(mol.atomcoords)}, {exit_status}")
|
|
479
|
+
|
|
480
|
+
logfunction(f"{filename}, conformer {c+1}/{len(mol.atomcoords)} optimization {exit_status}. Inspect geometry at {dumpname}. Aborting run.")
|
|
481
|
+
|
|
482
|
+
raise FatalError(filename)
|
|
483
|
+
|
|
484
|
+
# update embedder structures after optimization
|
|
485
|
+
mol.atomcoords[c] = opt_coords
|
|
486
|
+
|
|
487
|
+
logfunction()
|
|
488
|
+
|
|
489
|
+
# update mol and embedder graph after optimization
|
|
490
|
+
mol.graph = graphize(mol.atomcoords[0], mol.atomnos)
|
|
491
|
+
embedder.graphs = [m.graph for m in embedder.objects]
|
|
492
|
+
|
|
493
|
+
max_workers = embedder.avail_cpus//2 or 1
|
|
494
|
+
logfunction(f'--> Performing {embedder.options.calculator} GFN2//GFN-FF' + (
|
|
495
|
+
f'{f"/{embedder.options.solvent.upper()}" if embedder.options.solvent is not None else ""} ' +
|
|
496
|
+
f'metadynamic conformational search on {filename} via CREST.\n' +
|
|
497
|
+
f' (2 cores/thread, {max_workers} threads, {embedder.options.kcal_thresh} kcal/mol thr.)'))
|
|
498
|
+
|
|
499
|
+
if embedder.options.crestnci:
|
|
500
|
+
logfunction('--> CRESTNCI: Running crest in NCI mode (wall potential applied)')
|
|
501
|
+
|
|
502
|
+
if len(mol.atomcoords) > 1:
|
|
503
|
+
embedder.log('--> Requested conformational search on multimolecular file - will do\n' +
|
|
504
|
+
'an individual search from each conformer (might be time-consuming).')
|
|
505
|
+
|
|
506
|
+
t_start = time.perf_counter()
|
|
507
|
+
conformers = []
|
|
508
|
+
for i, coords in enumerate(mol.atomcoords):
|
|
509
|
+
|
|
510
|
+
t_start_conf = time.perf_counter()
|
|
511
|
+
try:
|
|
512
|
+
conf_batch = crest_mtd_search(
|
|
513
|
+
coords,
|
|
514
|
+
mol.atomnos,
|
|
515
|
+
constrained_indices=constrained_indices,
|
|
516
|
+
constrained_distances=constrained_distances,
|
|
517
|
+
solvent=embedder.options.solvent,
|
|
518
|
+
charge=mol.charge,
|
|
519
|
+
kcal=embedder.options.kcal_thresh,
|
|
520
|
+
ncimode=embedder.options.crestnci,
|
|
521
|
+
title=mol.rootname+"_mtd_csearch",
|
|
522
|
+
procs=2,
|
|
523
|
+
threads=max_workers,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# if the run errors out, we retry with XTB2
|
|
527
|
+
except CalledProcessError:
|
|
528
|
+
logfunction('--> Metadynamics run failed with GFN2-XTB//GFN-FF, retrying with just GFN2-XTB (slower but more stable)')
|
|
529
|
+
conf_batch = crest_mtd_search(
|
|
530
|
+
coords,
|
|
531
|
+
mol.atomnos,
|
|
532
|
+
constrained_indices=constrained_indices,
|
|
533
|
+
constrained_distances=constrained_distances,
|
|
534
|
+
solvent=embedder.options.solvent,
|
|
535
|
+
charge=mol.charge,
|
|
536
|
+
method='GFN2-XTB', # try with XTB2
|
|
537
|
+
kcal=embedder.options.kcal_thresh,
|
|
538
|
+
ncimode=embedder.options.crestnci,
|
|
539
|
+
title=mol.rootname+"_mtd_csearch",
|
|
540
|
+
procs=2,
|
|
541
|
+
threads=max_workers,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
conformers.extend(conf_batch)
|
|
545
|
+
|
|
546
|
+
elapsed = time.perf_counter() - t_start_conf
|
|
547
|
+
embedder.log(f' Conformer {i+1:2}/{len(mol.atomcoords):2} - generated {len(conf_batch)} structures in {time_to_string(elapsed)}')
|
|
548
|
+
|
|
549
|
+
conformers = np.concatenate(conformers)
|
|
550
|
+
conformers = conformers.reshape(-1, mol.atomnos.shape[0], 3)
|
|
551
|
+
# merging structures from each run in a single array
|
|
552
|
+
|
|
553
|
+
embedder.log(f' MTD conformational search: Generated {len(conformers)} conformers in {time_to_string(time.perf_counter()-t_start)}')
|
|
554
|
+
before = len(conformers)
|
|
555
|
+
|
|
556
|
+
### SIMILARITY PRUNING: TFD
|
|
557
|
+
quadruplets = _get_quadruplets(mol.graph)
|
|
558
|
+
conformers, _ = prune_conformers_tfd(conformers, quadruplets)
|
|
559
|
+
|
|
560
|
+
# ### MOI - turned off, as it would get rid of enantiomeric conformations
|
|
561
|
+
# conformers, _ = prune_by_moment_of_inertia(conformers, mol.atomnos)
|
|
562
|
+
|
|
563
|
+
### RMSD
|
|
564
|
+
if len(conformers) < 5E4:
|
|
565
|
+
conformers, _ = prune_by_rmsd(conformers, mol.atomnos, max_rmsd=embedder.options.rmsd, debugfunction=embedder.debuglog)
|
|
566
|
+
if len(conformers) < 1E3:
|
|
567
|
+
conformers, _ = prune_by_rmsd_rot_corr(conformers, mol.atomnos, mol.graph, max_rmsd=embedder.options.rmsd, debugfunction=embedder.debuglog)
|
|
568
|
+
|
|
569
|
+
embedder.log(f' Discarded {before-len(conformers)} RMSD-similar structures ({len(conformers)} left)\n')
|
|
570
|
+
|
|
571
|
+
### PRINTOUT
|
|
572
|
+
with open(f'{mol.rootname}_mtd_confs.xyz', 'w') as f:
|
|
573
|
+
for i, new_s in enumerate(conformers):
|
|
574
|
+
write_xyz(new_s, mol.atomnos, f, title=f'Conformer {i}/{len(conformers)} from CREST MTD')
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
# check the structures again and warn if some look compenetrated
|
|
578
|
+
embedder.check_objects_compenetration()
|
|
579
|
+
|
|
580
|
+
return f'{mol.rootname}_mtd_confs.xyz'
|
|
581
|
+
|
|
582
|
+
def scan_operator(filename, embedder):
|
|
583
|
+
'''
|
|
584
|
+
Scan operator dispatcher:
|
|
585
|
+
2 indices: distance_scan
|
|
586
|
+
4 indices: dihedral_scan
|
|
587
|
+
|
|
588
|
+
'''
|
|
589
|
+
mol = next((mol for mol in embedder.objects if mol.filename == filename))
|
|
590
|
+
|
|
591
|
+
assert len(mol.atomcoords) == 1, 'The scan> operator works on a single .xyz geometry.'
|
|
592
|
+
assert len(mol.reactive_indices) in (2,4), 'The scan> operator needs two or four indices' + (
|
|
593
|
+
f'({len(mol.reactive_indices)} were provided)')
|
|
594
|
+
|
|
595
|
+
if len(mol.reactive_indices) == 2:
|
|
596
|
+
return distance_scan(embedder)
|
|
597
|
+
|
|
598
|
+
elif len(mol.reactive_indices) == 4:
|
|
599
|
+
return dihedral_scan(embedder)
|
|
600
|
+
|
|
601
|
+
def distance_scan(embedder):
|
|
602
|
+
'''
|
|
603
|
+
Thought to approach or separate two reactive atoms, looking for the energy maximum.
|
|
604
|
+
Scan direction is inferred by the reactive index distance.
|
|
605
|
+
'''
|
|
606
|
+
|
|
607
|
+
import matplotlib.pyplot as plt
|
|
608
|
+
|
|
609
|
+
from firecode.algebra import norm_of
|
|
610
|
+
from firecode.pt import pt
|
|
611
|
+
|
|
612
|
+
embedder.t_start_run = time.perf_counter()
|
|
613
|
+
mol = embedder.objects[0]
|
|
614
|
+
t_start = time.perf_counter()
|
|
615
|
+
|
|
616
|
+
# shorthands for clearer code
|
|
617
|
+
i1, i2 = mol.reactive_indices
|
|
618
|
+
coords = mol.atomcoords[0]
|
|
619
|
+
|
|
620
|
+
# getting the start distance between scan indices and start energy
|
|
621
|
+
d = norm_of(coords[i1]-coords[i2])
|
|
622
|
+
|
|
623
|
+
# deciding if moving atoms closer or further apart based on distance
|
|
624
|
+
bonds = list(mol.graph.edges)
|
|
625
|
+
step = 0.05 if (i1, i2) in bonds else -0.05
|
|
626
|
+
|
|
627
|
+
# logging to file and terminal
|
|
628
|
+
embedder.log(f'--> {mol.rootname} - Performing a distance scan {"approaching" if step < 0 else "separating"} indices {i1} ' +
|
|
629
|
+
f'and {i2} - step size {round(step, 2)} A\n Theory level is {embedder.options.theory_level}/{embedder.options.solvent or "vacuum"} ' +
|
|
630
|
+
f'via {embedder.options.calculator}')
|
|
631
|
+
|
|
632
|
+
# creating a dictionary that will hold results
|
|
633
|
+
# and the structure output list
|
|
634
|
+
dists, energies, structures = [], [], []
|
|
635
|
+
|
|
636
|
+
# getting atomic symbols
|
|
637
|
+
s1, s2 = mol.atomnos[[i1, i2]]
|
|
638
|
+
|
|
639
|
+
# defining the maximum number of iterations
|
|
640
|
+
if step < 0:
|
|
641
|
+
smallest_d = 0.9*(pt[s1].covalent_radius+
|
|
642
|
+
pt[s2].covalent_radius)
|
|
643
|
+
max_iterations = round((d-smallest_d) / abs(step))
|
|
644
|
+
# so that atoms are never forced closer than
|
|
645
|
+
# a proportionally small distance between those two atoms.
|
|
646
|
+
|
|
647
|
+
else:
|
|
648
|
+
max_d = 1.8*(pt[s1].covalent_radius+
|
|
649
|
+
pt[s2].covalent_radius)
|
|
650
|
+
max_iterations = round((max_d-d) / abs(step))
|
|
651
|
+
# so that atoms are never spaced too far apart
|
|
652
|
+
|
|
653
|
+
from firecode.calculators._xtb import xtb_opt
|
|
654
|
+
for i in range(max_iterations):
|
|
655
|
+
|
|
656
|
+
t_start = time.perf_counter()
|
|
657
|
+
|
|
658
|
+
coords, energy, _ = xtb_opt(
|
|
659
|
+
coords,
|
|
660
|
+
mol.atomnos,
|
|
661
|
+
constrained_indices=np.array([mol.reactive_indices]),
|
|
662
|
+
constrained_distances=(d,),
|
|
663
|
+
method=embedder.options.theory_level,
|
|
664
|
+
solvent=embedder.options.solvent,
|
|
665
|
+
charge=embedder.options.charge,
|
|
666
|
+
title='temp',
|
|
667
|
+
procs=embedder.procs,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
if i == 0:
|
|
671
|
+
e_0 = energy
|
|
672
|
+
|
|
673
|
+
energies.append(energy - e_0)
|
|
674
|
+
dists.append(d)
|
|
675
|
+
structures.append(coords)
|
|
676
|
+
# print(f"------> target was {round(d, 3)} A, reached {round(norm_of(coords[mol.reactive_indices[0]]-coords[mol.reactive_indices[1]]), 3)} A")
|
|
677
|
+
# saving the structure, distance and relative energy
|
|
678
|
+
|
|
679
|
+
embedder.log(f'Step {i+1}/{max_iterations} - d={round(d, 2)} A - {round(energy-e_0, 2):4} kcal/mol - {time_to_string(time.perf_counter()-t_start)}')
|
|
680
|
+
|
|
681
|
+
with open("temp_scan.xyz", "w") as f:
|
|
682
|
+
for i, (s, d, e) in enumerate(zip(structures, dists, energies)):
|
|
683
|
+
write_xyz(s, mol.atomnos, f, title=f'Scan point {i+1}/{len(structures)} ' +
|
|
684
|
+
f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e-min(energies), 2)} kcal/mol')
|
|
685
|
+
|
|
686
|
+
d += step
|
|
687
|
+
# modify the target distance and reiterate
|
|
688
|
+
|
|
689
|
+
### Start the plotting sequence
|
|
690
|
+
|
|
691
|
+
plt.figure()
|
|
692
|
+
plt.plot(
|
|
693
|
+
dists,
|
|
694
|
+
energies,
|
|
695
|
+
color='tab:red',
|
|
696
|
+
label='Scan energy',
|
|
697
|
+
linewidth=3,
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# e_max = max(energies)
|
|
701
|
+
id_max = get_scan_peak_index(energies)
|
|
702
|
+
e_max = energies[id_max]
|
|
703
|
+
|
|
704
|
+
# id_max = energies.index(e_max)
|
|
705
|
+
d_opt = dists[id_max]
|
|
706
|
+
|
|
707
|
+
plt.plot(
|
|
708
|
+
d_opt,
|
|
709
|
+
e_max,
|
|
710
|
+
color='gold',
|
|
711
|
+
label='Energy maximum (TS guess)',
|
|
712
|
+
marker='o',
|
|
713
|
+
markersize=3,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
title = mol.rootname + ' distance scan'
|
|
717
|
+
plt.legend()
|
|
718
|
+
plt.title(title)
|
|
719
|
+
plt.xlabel(f'indices s{i1}-{i2} distance (A)')
|
|
720
|
+
|
|
721
|
+
if step > 0:
|
|
722
|
+
plt.gca().invert_xaxis()
|
|
723
|
+
|
|
724
|
+
plt.ylabel('Rel. E. (kcal/mol)')
|
|
725
|
+
plt.savefig(f'{title.replace(" ", "_")}_plt.svg')
|
|
726
|
+
# with open(f'{title.replace(" ", "_")}_plt.pickle', 'wb') as _f:
|
|
727
|
+
# pickle.dump(fig, _f)
|
|
728
|
+
|
|
729
|
+
### Start structure writing
|
|
730
|
+
|
|
731
|
+
# print all scan structures
|
|
732
|
+
with open(f'{mol.filename[:-4]}_scan.xyz', 'w') as f:
|
|
733
|
+
for i, (s, d, e) in enumerate(zip(structures, dists, energies)):
|
|
734
|
+
write_xyz(s, mol.atomnos, f, title=f'Scan point {i+1}/{len(structures)} ' +
|
|
735
|
+
f'- d({i1}-{i2}) = {round(d, 2)} A - Rel. E = {round(e, 2)} kcal/mol')
|
|
736
|
+
|
|
737
|
+
# print the maximum on another file for convienience
|
|
738
|
+
with open(f'{mol.filename[:-4]}_scan_max.xyz', 'w') as f:
|
|
739
|
+
s = structures[id_max]
|
|
740
|
+
d = dists[id_max]
|
|
741
|
+
write_xyz(s, mol.atomnos, f, title=f'Scan point {id_max+1}/{len(structures)} ' +
|
|
742
|
+
f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e_max, 3)} kcal/mol')
|
|
743
|
+
|
|
744
|
+
embedder.log(f'\n--> Written {len(structures)} structures to {mol.filename[:-4]}_scan.xyz ({time_to_string(time.perf_counter() - t_start)})')
|
|
745
|
+
embedder.log(f'\n--> Written energy maximum to {mol.filename[:-4]}_scan_max.xyz\n')
|
|
746
|
+
|
|
747
|
+
# Log data to the embedder class
|
|
748
|
+
mol.scan_data = (dists, energies)
|
|
749
|
+
|
|
750
|
+
def _get_lowest_calc(embedder=None):
|
|
751
|
+
'''
|
|
752
|
+
Returns the values for calculator,
|
|
753
|
+
method and processors for the lowest
|
|
754
|
+
theory level available from embedder or settings.
|
|
755
|
+
'''
|
|
756
|
+
if embedder is None:
|
|
757
|
+
if FF_OPT_BOOL:
|
|
758
|
+
return (FF_CALC, DEFAULT_FF_LEVELS[FF_CALC], PROCS)
|
|
759
|
+
return (CALCULATOR, DEFAULT_LEVELS[CALCULATOR], PROCS)
|
|
760
|
+
|
|
761
|
+
if embedder.options.ff_opt:
|
|
762
|
+
return (embedder.options.ff_calc, embedder.options.ff_level, embedder.procs)
|
|
763
|
+
return (embedder.options.calculator, embedder.options.theory_level, embedder.procs)
|
|
764
|
+
|
|
765
|
+
def _get_internal_constraints(filename, embedder):
|
|
766
|
+
'''
|
|
767
|
+
'''
|
|
768
|
+
mol_id = next((i for i, mol in enumerate(embedder.objects) if mol.filename == filename))
|
|
769
|
+
# get embedder,objects index of molecule to get internal constraints of
|
|
770
|
+
|
|
771
|
+
out = []
|
|
772
|
+
for _, tgt in embedder.pairings_dict[mol_id].items():
|
|
773
|
+
if isinstance(tgt, tuple):
|
|
774
|
+
out.append(tgt)
|
|
775
|
+
|
|
776
|
+
return np.array(out)
|