firecode 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. firecode/TEST_NOTEBOOK.ipynb +3940 -0
  2. firecode/__init__.py +0 -0
  3. firecode/__main__.py +118 -0
  4. firecode/_gaussian.py +97 -0
  5. firecode/algebra.py +405 -0
  6. firecode/ase_manipulations.py +879 -0
  7. firecode/atropisomer_module.py +516 -0
  8. firecode/automep.py +130 -0
  9. firecode/calculators/__init__.py +29 -0
  10. firecode/calculators/_gaussian.py +98 -0
  11. firecode/calculators/_mopac.py +242 -0
  12. firecode/calculators/_openbabel.py +154 -0
  13. firecode/calculators/_orca.py +129 -0
  14. firecode/calculators/_xtb.py +786 -0
  15. firecode/concurrent_test.py +119 -0
  16. firecode/embedder.py +2590 -0
  17. firecode/embedder_options.py +577 -0
  18. firecode/embeds.py +881 -0
  19. firecode/errors.py +65 -0
  20. firecode/graph_manipulations.py +333 -0
  21. firecode/hypermolecule_class.py +364 -0
  22. firecode/mep_relaxer.py +199 -0
  23. firecode/modify_settings.py +186 -0
  24. firecode/mprof.py +65 -0
  25. firecode/multiembed.py +148 -0
  26. firecode/nci.py +186 -0
  27. firecode/numba_functions.py +260 -0
  28. firecode/operators.py +776 -0
  29. firecode/optimization_methods.py +609 -0
  30. firecode/parameters.py +84 -0
  31. firecode/pka.py +275 -0
  32. firecode/profiler.py +17 -0
  33. firecode/pruning.py +421 -0
  34. firecode/pt.py +32 -0
  35. firecode/quotes.json +6651 -0
  36. firecode/quotes.py +9 -0
  37. firecode/reactive_atoms_classes.py +666 -0
  38. firecode/references.py +11 -0
  39. firecode/rmsd.py +74 -0
  40. firecode/settings.py +75 -0
  41. firecode/solvents.py +126 -0
  42. firecode/tests/C2F2H4.xyz +10 -0
  43. firecode/tests/C2H4.xyz +8 -0
  44. firecode/tests/CH3Cl.xyz +7 -0
  45. firecode/tests/HCOOH.xyz +7 -0
  46. firecode/tests/HCOOOH.xyz +8 -0
  47. firecode/tests/chelotropic.txt +3 -0
  48. firecode/tests/cyclical.txt +3 -0
  49. firecode/tests/dihedral.txt +2 -0
  50. firecode/tests/string.txt +3 -0
  51. firecode/tests/trimolecular.txt +9 -0
  52. firecode/tests.py +151 -0
  53. firecode/torsion_module.py +1035 -0
  54. firecode/utils.py +541 -0
  55. firecode-1.0.0.dist-info/LICENSE +165 -0
  56. firecode-1.0.0.dist-info/METADATA +321 -0
  57. firecode-1.0.0.dist-info/RECORD +59 -0
  58. firecode-1.0.0.dist-info/WHEEL +5 -0
  59. firecode-1.0.0.dist-info/top_level.txt +1 -0
firecode/embedder.py ADDED
@@ -0,0 +1,2590 @@
1
+ # coding=utf-8
2
+ '''
3
+ FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
4
+ Copyright (C) 2021-2024 Nicolò Tampellini
5
+
6
+ SPDX-License-Identifier: LGPL-3.0-or-later
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU Lesser General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public License
19
+ along with this program. If not, see
20
+ https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
21
+
22
+ '''
23
+ import logging
24
+ import os
25
+ import pickle
26
+ import random
27
+ import re
28
+ import sys
29
+ import time
30
+ from concurrent.futures import ProcessPoolExecutor, as_completed
31
+ from copy import deepcopy
32
+ from getpass import getuser
33
+ from itertools import groupby
34
+
35
+ import numpy as np
36
+ from psutil import virtual_memory
37
+
38
+ from firecode.__main__ import __version__
39
+ from firecode.algebra import norm_of
40
+ from firecode.ase_manipulations import ase_saddle
41
+ from firecode.calculators._xtb import (xtb_metadyn_augmentation, xtb_opt,
42
+ xtb_pre_opt)
43
+ from firecode.embedder_options import Options, OptionSetter, keywords_dict
44
+ from firecode.embeds import (_get_monomolecular_reactive_indices,
45
+ cyclical_embed, monomolecular_embed, string_embed)
46
+ from firecode.errors import (InputError, NoOrbitalError, SegmentedGraphError,
47
+ ZeroCandidatesError)
48
+ from firecode.graph_manipulations import get_sum_graph
49
+ from firecode.hypermolecule_class import Hypermolecule, Pivot, align_by_moi
50
+ from firecode.multiembed import multiembed_dispatcher
51
+ from firecode.nci import get_nci
52
+ from firecode.numba_functions import (compenetration_check, count_clashes,
53
+ prune_conformers_tfd, scramble)
54
+ from firecode.operators import operate
55
+ from firecode.optimization_methods import Opt_func_dispatcher, fitness_check
56
+ from firecode.parameters import orb_dim_dict
57
+ from firecode.pruning import (prune_by_moment_of_inertia, prune_by_rmsd,
58
+ prune_by_rmsd_rot_corr)
59
+ from firecode.pt import pt
60
+ from firecode.references import references
61
+ from firecode.rmsd import rmsd_and_max_numba
62
+ from firecode.settings import DEFAULT_LEVELS, PROCS
63
+ from firecode.torsion_module import _get_quadruplets, csearch
64
+ from firecode.utils import (_saturation_check, align_structures, ase_view,
65
+ auto_newline, cartesian_product, clean_directory,
66
+ graphize, loadbar, scramble_check, time_to_string,
67
+ timing_wrapper, write_xyz)
68
+
69
+
70
+ class Embedder:
71
+ '''
72
+ Embedder class, containing all methods to set attributes,
73
+ options and initialize the calculation
74
+ '''
75
+
76
+ def __init__(self, filename, stamp=None, procs=None):
77
+ '''
78
+ Initialize the Embedder object by reading the input filename (.txt).
79
+ Sets the Option dataclass properties to default and then updates them
80
+ with the user-requested keywords, if there are any.
81
+
82
+ '''
83
+
84
+ self.t_start_run = time.perf_counter()
85
+
86
+ parent_dir = os.path.dirname(filename)
87
+ if parent_dir != '':
88
+ os.chdir(parent_dir)
89
+
90
+ if stamp is None:
91
+ self.stamp = time.ctime().replace(' ','_').replace(':','-')[4:-8]
92
+ # replaced ctime yields 'Sun_May_23_18-53-47_2021', only keeping 'May_23_18-53'
93
+
94
+ else:
95
+ self.stamp = stamp
96
+
97
+ self.avail_cpus = len(os.sched_getaffinity(0))
98
+ self.avail_mem_gb = virtual_memory().available/1E9
99
+
100
+ try:
101
+ from torch.cuda import device_count
102
+ self.avail_gpus = device_count()
103
+ except ImportError:
104
+ self.avail_gpus = 'N/A'
105
+
106
+ self.procs = int(procs) if procs is not None else PROCS or 4
107
+
108
+ try:
109
+ os.remove(f'firecode_{self.stamp}.log')
110
+
111
+ except FileNotFoundError:
112
+ pass
113
+
114
+ log_filename = f'firecode_{self.stamp}.log'
115
+ self.logfile = open(log_filename, 'a', buffering=1, encoding="utf-8")
116
+ logging.basicConfig(filename=log_filename, filemode='a')
117
+
118
+ try:
119
+
120
+ self.write_banner_and_info()
121
+ # Write banner to log file
122
+
123
+ self.options = Options()
124
+ # initialize option subclass
125
+
126
+ self.embed = None
127
+ self.warnings = []
128
+ # initialize embed type variable and warnings list
129
+
130
+ inp = self._parse_input(filename)
131
+ # collect information about molecule files
132
+
133
+ self.objects = [Hypermolecule(name, c_ids) for name, c_ids in inp]
134
+ # load designated molecular files
135
+
136
+ # self.objects.sort(key=lambda obj: len(obj.atomcoords[0]), reverse=True)
137
+ # sort them in descending number of atoms (not for now - messes up pairings)
138
+
139
+ self.ids = np.array([len(mol.atomnos) for mol in self.objects])
140
+ # Compute length of each molecule coordinates. Used to divide molecules in TSs
141
+
142
+ self.graphs = [mol.graph for mol in self.objects]
143
+ # Store molecular graphs
144
+
145
+ self._read_pairings()
146
+ # read imposed pairings from input file [i.e. mol1(6)<->mol2(45)]
147
+
148
+ self.check_objects_compenetration()
149
+ # make sure the input structures look alright
150
+
151
+ self.check_saturation()
152
+ # make sure that structures look nice and correct
153
+
154
+ self._set_options(filename)
155
+ # read the keywords line and set the relative options
156
+ # then read the operators and store them
157
+
158
+ self._calculator_setup()
159
+ # initialize default or specified calculator
160
+
161
+ self._print_references()
162
+ # based on the data collected from setup
163
+
164
+ self._apply_operators()
165
+ # execute the operators, replacing the self.objects molecule
166
+
167
+ self._setup()
168
+ # setting embed type and getting ready to embed (if needed)
169
+
170
+ if self.options.debug:
171
+ for mol in self.objects:
172
+ if hasattr(mol, 'reactive_atoms_classes_dict'):
173
+ if len(mol.reactive_atoms_classes_dict[0]) > 0:
174
+ mol.write_hypermolecule()
175
+ self.debuglog(f'DEBUG: written hypermolecule file for ({mol.filename})')
176
+ self.log()
177
+
178
+ if self.options.check_structures:
179
+ self._inspect_structures()
180
+
181
+ except Exception as e:
182
+ logging.exception(e)
183
+ raise e
184
+
185
+ def log(self, string="", p=True):
186
+ if p:
187
+ print(string)
188
+ string += '\n'
189
+ self.logfile.write(string)
190
+
191
+ def debuglog(self, string=""):
192
+ if self.options.debug:
193
+ string += '\n'
194
+ # self.logfile.write(string)
195
+ self.debug_logfile.write(string)
196
+
197
+ def warn(self, string):
198
+ self.warnings.append(string)
199
+ self.log(string)
200
+
201
+ def write_banner_and_info(self):
202
+ '''
203
+ Write banner to log file, containing program and run info
204
+ '''
205
+
206
+ banner = '''
207
+ . . * *
208
+ ▒ ..
209
+ * . ▒ .. * ▒ ▒░▒ *
210
+ ▒░▒ . ▒░░▒ . . ..
211
+ * . ▒ ▒░░░▒ ▒ ▒ ▒░░▒
212
+ + ▒ ▒ ▒░░░░▒ . .. ▒ ▒░░▒ * +
213
+ . ▒░ ░░▒ ▒ ▒░▒ * ▒░░░▒ ▒
214
+ * ▒ ▒░░░░░░▒▒░▒ * ▒▒░░▒ ▒ ▒░░░░▒ ▒ .. .
215
+ . ▒ ▒░░ ░░░░ ▒ ▒░░░▒ ▒░▒▒ ▒░░ ░░░▒▒▒▒
216
+ . ▒▒ ▒ ▒░░░░░░░░░░░▒ .. ▒░ ░▒ ▒░░░░▒ ▒░░░░░░░░░ ▒▒ * +
217
+ ▒▒░░▒ ▒░░░░ ░▒░░░░▒ ▒░░░░░▒ ▒░░ ░░░▒░░░░░▒░░░░▒▒▒ *
218
+ * ▒▒░░░░▒ ▒░░░░░░░░░░░░░▒ . ▒░░░░░░▒░░░░░░░░░░░ ░░░░░░░░▒▒ +
219
+ ▒░░░░░░░▒░░░ ░░░░░░░░░░░▒ ▒ ▒░ ░░░░░░░ ░░░░▒░░░ ░░░░░░░ ░▒
220
+ *▒░░░░ ░░░░░░░░░░▒░░░░░ ░░░░▒░▒░░░░░░░▒░░░ ░░░░░░░ ░░░▒░░░░░▒ ▒
221
+ . ▒░ ░░░░░░░░░░░ ░░░░░░░░░░░░░░░░░░░ ░░░░░░░░░░ ░░░░░░░░░░░░░░░░░▒ .░▒ . +
222
+ ▒░░███████╗██╗██████╗░███████╗░█████╗░░█████╗░██████╗░███████╗░░░░░▒
223
+ ▒ ░░██╔════╝██║██╔══██╗██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝░░░░▒ +
224
+ ▒ ▒░░░█████╗░░██║██████╔╝█████╗░░██║░░╚═╝██║░░██║██║░░██║█████╗░░░░░▒ *
225
+ ▒░ ░██╔══╝░░██║██╔══██╗██╔══╝░░██║░░██╗██║░░██║██║░░██║██╔══╝░░░░▒
226
+ ▒░░░██║░░░░░██║██║░░██║███████╗╚█████╔╝╚█████╔╝██████╔╝███████╗░░░▒
227
+ . ▒░░░╚═╝░░ ░░╚═╝╚═╝░░╚═╝╚══════╝░╚════╝░░╚════╝░╚═════╝░╚══════╝░░░▒ * ▒ ..
228
+ ▒ ░░░░░░ ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ ░░░░░░░░░░░░░░▒ .▒░▒ .
229
+ ▒ ▒░░░░░ ░░▒░░░ Filtering Refiner and Embedder for ░░░ ░░▒░░ ░░░░░░░▒░░▒
230
+ ▒░░░░▒░░░░░░░░ Conformationally Dense Ensembles ░░ ░░░░░░░ ░ ░░░░░░░░▒
231
+ .▒░░░░░░░░ ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ ░▒░░░ ░░░░▒ * ▒
232
+ . ▒░░░▒░╔═════════════════════════════════════════════╗░░░ ░░░ ░░░░▒
233
+ ▒░ ░║ ║░▒░░ ░░░░░▒
234
+ +▒ ▒░░░░░║ nicolo.tampellini@yale.edu ║░░░░░░░░▒ ▒
235
+ ▒░░░░║ ║░░ ░░░▒
236
+ .. ▒ ▒░▒░░║ Version 🔥{0:^24}║░▒░░░ ░░░▒ * .
237
+ . ▒ ░░░░░║ User 🔥{1:^24}║░░░░░░░░▒ +
238
+ ▒░░ ▒░║ Current Time 🔥{2:^24}║░░ ░░▒ ▒ + ..
239
+ .. ▒ * ▒░ ░░║ Avail CPUs 🔥{3:^24}║░░░░░░░▒ *
240
+ . ▒░ ░░║ Avail GPUs 🔥{4:^24}║ ░░░░▒ . ..
241
+ .▒░▒▒░░║ Avail Memory 🔥{5:^24}║░░▒
242
+ + .. ▒░░ ░║ ║░▒ .. .
243
+ . ▒ ░░╚═════════════════════════════════════════════╝░▒ +
244
+ . * ▒░░░░░░ ░░▒░░░░░▒▒░░░▒▒ ░░░░░░░▒░░ ░░░▒░░▒░░░░░▒░░░░▒ .
245
+ . ▒░░▒░▒░░░ ░░░▒░░▒░ ░░░░░░▒░░ ░░░░░░▒▒▒░ ░░▒░░░░░░▒ .
246
+ ▒░ ░░░░░ ░░░░░▒░░░░▒░░░░░░░ ▒░░░░░░ ░░░░▒
247
+ ░░░░ ░░░ ░ ░░ ░ ░ ░░
248
+
249
+ '''.format(__version__,
250
+ getuser(),
251
+ time.ctime()[0:-8],
252
+ self.avail_cpus,
253
+ self.avail_gpus,
254
+ str(round(self.avail_mem_gb, 1))+' GB')
255
+ # 🔥
256
+
257
+
258
+ # banner = '''
259
+ # + . ____________________________________ . .
260
+ # * . .. /────────────────────────────────────\ * .
261
+ # . .. + /▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ \ . . +
262
+ # + ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ . .. .
263
+ # . ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ . *
264
+ # ▒░████████╗░██████╗░█████╗░░█████╗░██████╗░███████╗░░░▒ . .
265
+ # + ▒░╚══██╔══╝██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝░░░▒ ..
266
+ # .. . ▒░░░░██║░░░╚█████╗░██║░░╚═╝██║░░██║██║░░██║█████╗░░░░░▒ * +
267
+ # . ▒░░░░██║░░░░╚═══██╗██║░░██╗██║░░██║██║░░██║██╔══╝░░░░░▒ . .
268
+ # . ▒░░░██║░░░██████╔╝╚█████╔╝╚█████╔╝██████╔╝███████╗░░░▒ .. +
269
+ # * . / ▒░░╚═╝░░░╚═════╝░░╚════╝░░╚════╝░╚═════╝░╚══════╝░░▒ \ . ..
270
+ # .. / ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ \ .
271
+ # . / ▒░░╔══════════════════════════════════════════╗░░▒ \ +
272
+ # / ▒░║ Transition State Conformational Docker ║░▒ \ ..
273
+ # + \\\ ▒░║ nicolo.tampellini@yale.edu ║░▒ // .
274
+ # \\\ ▒░║ ║░▒ // .
275
+ # .. \\\ ▒░║ Version >{0:^25}║░▒ // . *
276
+ # . \\\ ▒░║ User >{1:^25}║░▒ // .
277
+ # \\\ ▒░║ Time >{2:^25}║░▒ // * .
278
+ # .. * \\\▒░║ Procs >{3:^25}║░▒// ..
279
+ # . \▒░║ Threads >{4:^25}║░▒/ +
280
+ # . ▒░║ Avail CPUs >{5:^25}║░▒ . ..
281
+ # + .. . ▒░╚══════════════════════════════════════════╝░▒ .. .
282
+ # . ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ .
283
+ # . * + \\\ ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ // . .
284
+ # . . \\\____________________________________// . .
285
+ # '''
286
+
287
+ # ⏣█▓▒░ banner art adapted from https://fsymbols.com/generators/tarty/
288
+
289
+ self.log(banner)
290
+
291
+ def _print_references(self):
292
+ '''
293
+ Print relevant literature references based on the run settings
294
+
295
+ '''
296
+
297
+ self.log('\n--> If you use FIRECODE in your publication, please cite this reference in the main text:\n' +
298
+ f' {references["FIRECODE"]}')
299
+
300
+ cite_ff = self.options.ff_calc == "XTB"
301
+ cite_gfn2 = self.options.calculator == "XTB"
302
+ cite_crest = any(("mtd>" in op or "mtd_search>" in op) for op in self.options.operators)
303
+
304
+ if any((cite_ff, cite_gfn2, cite_crest)):
305
+ s = ''
306
+ s += f" GFN-FF : {references['GFN-FF']}\n" if cite_ff else ""
307
+ s += f" GFN2-XTB : {references['GFN2-XTB']}\n" if cite_gfn2 else ""
308
+ s += f" CREST : {references['CREST']}\n" if cite_crest else ""
309
+
310
+ self.log(f'\n--> Your run also makes use of this other software: please cite these references as well.\n{s}')
311
+
312
+ def _parse_input(self, filename):
313
+ '''
314
+ Reads a textfile and sets the Embedder properties for the run.
315
+ Keywords are read from the first non-comment(#), non-blank line
316
+ if there are any, and molecules are read afterward.
317
+
318
+ '''
319
+
320
+ with open(filename, 'r') as f:
321
+ lines = f.readlines()
322
+
323
+ # write a formatted copy of the input file to the log
324
+ self.log(f'--> Input file: {filename}\n')
325
+ longest = max(len(line.rstrip('\n')) for line in lines)
326
+ self.log(' '+'-'*(longest+6))
327
+ for _l, line in enumerate(lines):
328
+ self.log(f'{_l+1:2}> | '+line.rstrip('\n').ljust(longest)+' |')
329
+ self.log(' '+'-'*(longest+6)+'\n')
330
+
331
+ # start parsing: get rid of comment and blank lines
332
+ lines = [line.replace(', ',',') for line in lines if line[0] not in ('#', '\n')]
333
+
334
+ def _remove_internal_constraints(string):
335
+ numbers = [int(re.sub('[^0-9]', '', i)) for i in string]
336
+ letters = [re.sub('[^A-Za-z]', '', i) for i in string]
337
+ count = [letters.count(_l) if (_l != '') else 1 for _l in letters]
338
+ return tuple([n for n, c in zip(numbers, count) if c == 1])
339
+
340
+ try:
341
+
342
+ keywords = [_l.split('=')[0] if '(' not in _l else _l.split('(')[0] for _l in lines[0].split()]
343
+ if any(k.upper() in keywords_dict.keys() for k in keywords):
344
+ self.kw_line, *self.mol_lines = lines
345
+ else:
346
+ self.mol_lines = lines
347
+
348
+ inp = []
349
+ for _l, line in enumerate(self.mol_lines):
350
+
351
+ if '>' in line:
352
+ self.options.operators_dict[_l] = [op.rstrip().lstrip() for op in reversed(line.rstrip('\n').split('>')[:-1])]
353
+ self.options.operators.append(line.rstrip('\n'))
354
+ line = line.split('>')[-1].lstrip()
355
+ # record that we will need to perform these operations before the run
356
+
357
+ filename, *reactive_atoms = line.split()
358
+
359
+ if reactive_atoms:
360
+ # remove attributes from reactive indices
361
+ reactive_atoms = [fragment for fragment in reactive_atoms if '=' not in fragment]
362
+
363
+ # remove inteernal constraints from reactive indices
364
+ reactive_indices = _remove_internal_constraints(reactive_atoms)
365
+ else:
366
+ reactive_indices = None
367
+
368
+ inp.append((filename, reactive_indices))
369
+
370
+ return inp
371
+
372
+ except Exception as e:
373
+ print(e)
374
+ raise InputError(f'Error in reading molecule input for {filename}. Please check your syntax.')
375
+
376
+ def check_saturation(self):
377
+ '''
378
+ Check each loaded object and make sure it looks nice and correct
379
+
380
+ '''
381
+ self.log()
382
+ for mol in self.objects:
383
+ charge = int(mol.charge) if hasattr(mol, "charge") else 0
384
+
385
+ if _saturation_check(mol.atomnos, charge):
386
+ self.log(f"--> {mol.filename}: saturation check passed (even saturation index)")
387
+
388
+ else:
389
+ self.warn(f"--> WARNING! {mol.filename}: saturation check failed. Odd saturation index (charge={charge}). Radical or bad input geometry?")
390
+
391
+ def check_objects_compenetration(self):
392
+ '''
393
+ Checks that the input molecules look alright
394
+
395
+ '''
396
+ for mol in self.objects:
397
+ for c, coords in enumerate(mol.atomcoords):
398
+ if not compenetration_check(coords):
399
+ clashes = count_clashes(coords)
400
+ self.warn(f"--> WARNING! {mol.filename}, conformer {c+1}, looks compenetrated ({clashes} interatomic distance{'s' if clashes > 1 else ''} < 0.5 A)")
401
+
402
+ def _set_options(self, filename):
403
+ '''
404
+ Set the options dataclass parameters through the OptionSetter class,
405
+ from a list of given keywords. These will be used during the run to
406
+ vary the search depth and/or output.
407
+ '''
408
+
409
+ try:
410
+ option_setter = OptionSetter(self)
411
+ option_setter.set_options()
412
+
413
+ except SyntaxError as e:
414
+ raise e
415
+
416
+ except Exception as e:
417
+ print(e)
418
+ raise InputError(f'Error in reading keywords from {filename}. Please check your syntax.')
419
+
420
+ def _set_reactive_atoms_cumnums(self):
421
+
422
+ if self.embed in ('cyclical', 'chelotropic', 'string'):
423
+ for i, mol in enumerate(self.objects):
424
+
425
+ if not hasattr(mol, 'reactive_atoms_classes_dict'):
426
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
427
+
428
+ for c, _ in enumerate(mol.atomcoords):
429
+ for r_atom in mol.reactive_atoms_classes_dict[c].values():
430
+ r_atom.cumnum = r_atom.index
431
+ if i > 0:
432
+ r_atom.cumnum += sum(self.ids[:i])
433
+
434
+ def _read_pairings(self):
435
+ '''
436
+ Reads atomic pairings to be respected from the input file, if any are present.
437
+ '''
438
+
439
+ parsed = []
440
+ unlabeled_list = []
441
+ self.pairings_dict = {i:{} for i, _ in enumerate(self.objects)}
442
+
443
+ for i, line in enumerate(self.mol_lines):
444
+ # now i is also the molecule index in self.objects
445
+
446
+ fragments = line.split('>')[-1].split()[1:]
447
+ # remove operators (if present) and the molecule name, keeping pairs only ['2a','5b']
448
+
449
+ # store custom variables
450
+ for fragment in deepcopy(fragments):
451
+ if '=' in fragment:
452
+ parts = fragment.split('=')
453
+
454
+ if len(parts) != 2:
455
+ raise InputError(f'Error reading attribute \'{fragment}\'. Syntax: \'var=value\'')
456
+
457
+ attr_name, attr_value = parts
458
+ setattr(self.objects[i], attr_name, attr_value)
459
+
460
+ fragments.remove(fragment)
461
+
462
+ self.log(f'--> Set attribute \'{attr_name}\' of {self.objects[i]} to \'{attr_value}\'.')
463
+
464
+ self.log()
465
+
466
+ unlabeled = []
467
+ pairings = []
468
+
469
+ for fragment in fragments:
470
+
471
+ if not fragment.lower().islower(): # if all we have is a number
472
+ unlabeled.append(int(fragment))
473
+
474
+ else:
475
+ index, letters = [''.join(g) for _, g in groupby(fragment, str.isalpha)]
476
+
477
+ for letter in letters:
478
+ pairings.append([int(index), letter])
479
+
480
+ # appending pairing to dict before
481
+ # calculating their cumulative index
482
+ # If a pairing is already present, add the number
483
+ # (refine>/REFINE runs)
484
+ for index, letter in pairings:
485
+
486
+ if self.pairings_dict[i].get(letter) is not None:
487
+ prev = self.pairings_dict[i][letter]
488
+ self.pairings_dict[i][letter] = (prev, index)
489
+
490
+ else:
491
+ self.pairings_dict[i][letter] = index
492
+
493
+ if i > 0:
494
+ for z in pairings:
495
+ z[0] += sum(self.ids[:i])
496
+
497
+ if unlabeled != []:
498
+ for z in unlabeled:
499
+ z += sum(self.ids[:i])
500
+ unlabeled_list.append(z)
501
+ else:
502
+ if unlabeled != []:
503
+ for z in unlabeled:
504
+ unlabeled_list.append(z)
505
+
506
+ # getting the cumulative index rather than the molecule index
507
+
508
+ for cumulative_pair in pairings:
509
+ parsed.append(cumulative_pair)
510
+ # parsed looks like [[1, 'a'], [9, 'a']] where numbers are
511
+ # cumulative indices for TSs
512
+
513
+ links = {j:[] for j in set([i[1] for i in parsed])}
514
+ for index, tag in parsed:
515
+ links[tag].append(index)
516
+ # storing couples into a dictionary
517
+
518
+ pairings = sorted(list(links.items()), key=lambda x: x[0])
519
+ # sorting values so that 'a' is the first pairing
520
+
521
+ self.pairings_table = {i[0]:sorted(i[1]) for i in pairings}
522
+ # cumulative, looks like {'a':[3,45]}
523
+
524
+ letters = tuple(self.pairings_table.keys())
525
+
526
+ for letter, ids in self.pairings_table.items():
527
+
528
+ if len(ids) == 1:
529
+ raise SyntaxError(f'Letter \'{letter}\' is only specified once. Please flag the second reactive atom.')
530
+
531
+ if len(ids) > 2:
532
+ raise SyntaxError(f'Letter \'{letter}\' is specified more than two times. Please remove the unwanted letters.')
533
+
534
+ if len(self.mol_lines) == 3:
535
+ # adding third pairing if we have three molecules and user specified two pairings
536
+ # (used to adjust distances for trimolecular TSs)
537
+ if len(unlabeled_list) == 2:
538
+ third_constraint = list(sorted(unlabeled_list))
539
+ self.pairings_table['?'] = third_constraint
540
+
541
+ elif len(self.mol_lines) == 2:
542
+ # adding second pairing if we have two molecules and user specified one pairing
543
+ # (used to adjust distances for bimolecular TSs)
544
+ if len(unlabeled_list) == 2:
545
+ second_constraint = list(sorted(unlabeled_list))
546
+ self.pairings_table['?'] = second_constraint
547
+
548
+ # Now record the internal constraints, that is the intramolecular
549
+ # distances to freeze and later enforce to the imposed spacings
550
+ self.internal_constraints = []
551
+
552
+ # making sure we set the kw_line attribute
553
+ self.kw_line = self.kw_line if hasattr(self, 'kw_line') else ''
554
+
555
+ for letter, pair in self.pairings_table.items():
556
+ for mol_id in self.pairings_dict:
557
+ if isinstance(self.pairings_dict[mol_id].get(letter), tuple):
558
+
559
+ # They are internal constraints only if we have a distance
560
+ # to impose later on. We are checking this way because the
561
+ # set_options function is still to be called at this stage
562
+ if f'{letter}=' in self.kw_line:
563
+ self.internal_constraints.append([pair])
564
+ self.internal_constraints = np.concatenate(self.internal_constraints) if self.internal_constraints else []
565
+
566
+ def _set_custom_orbs(self, orb_string):
567
+ '''
568
+ Update the reactive_atoms classes with the user-specified orbital distances.
569
+ :param orb_string: string that looks like 'a=2.345,b=3.456,c=2.22'
570
+
571
+ '''
572
+ for mol in self.objects:
573
+ if not hasattr(mol, 'reactive_atoms_classes_dict'):
574
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
575
+
576
+ self.pairing_dists = {piece.split('=')[0] : float(piece.split('=')[1]) for piece in orb_string.split(',')}
577
+
578
+ # Set the new orbital center with imposed distance from the reactive atom. The imposed distance is half the
579
+ # user-specified one, as the final atomic distances will be given by two halves of this length.
580
+ for letter, dist in self.pairing_dists.items():
581
+
582
+ if letter not in self.pairings_table:
583
+ raise SyntaxError(f'Letter \'{letter}\' is specified in DIST but not present in molecules string.')
584
+
585
+ for i, mol in enumerate(self.objects):
586
+ for c, _ in enumerate(mol.atomcoords):
587
+
588
+ r_index = self.pairings_dict[i].get(letter)
589
+ if r_index is None:
590
+ continue
591
+
592
+ if isinstance(r_index, int):
593
+ r_atom = mol.reactive_atoms_classes_dict[c][r_index]
594
+ r_atom.init(mol, r_index, update=True, orb_dim=dist/2, conf=c)
595
+
596
+ else:
597
+ for r_i in r_index:
598
+ r_atom = mol.reactive_atoms_classes_dict[c].get(r_i)
599
+ if r_atom:
600
+ r_atom.init(mol, r_i, update=True, orb_dim=dist/2, conf=c)
601
+
602
+ # saves the last orb_string executed so that operators can
603
+ # keep the imposed orbital spacings when replacing molecules
604
+ self.orb_string = orb_string
605
+ # self.log(f'DEBUG ---> Updated orb string -> {orb_string}')
606
+
607
+ def _set_pivots(self, mol):
608
+ '''
609
+ params mol: Hypermolecule class
610
+ (Cyclical embed) Function that sets the mol.pivots attribute, that is a list
611
+ containing each vector connecting two orbitals on different atoms or on the
612
+ same atom (for single-reactive atom molecules in chelotropic embedding)
613
+ '''
614
+ mol.pivots = self._get_pivots(mol)
615
+
616
+ for c, _ in enumerate(mol.atomcoords):
617
+ if self.options.suprafacial:
618
+ if len(mol.pivots[c]) == 4:
619
+ # reactive atoms have two centers each.
620
+ # Applying suprafacial correction, only keeping
621
+ # the shorter two, as they should be the suprafacial ones
622
+ norms = np.linalg.norm([p.pivot for p in mol.pivots[c]], axis=1)
623
+ for sample in norms:
624
+ to_keep = [i for i in norms if sample >= i]
625
+ if len(to_keep) == 2:
626
+ mask = np.array([i in to_keep for i in norms])
627
+ mol.pivots[c] = mol.pivots[c][mask]
628
+ break
629
+
630
+ # if mol is reacting with a sigmastar orbital (two connected reactive Sp3/Single
631
+ # Bond centers) then remove all pivots that are not the shortest. This ensures
632
+ # the "suprafaciality" to the pivots used, preventing the embed of
633
+ # impossible bonding structures
634
+ if hasattr(mol, 'sp3_sigmastar') and mol.sp3_sigmastar:
635
+ pivots_lengths = [norm_of(pivot.pivot) for pivot in mol.pivots[c]]
636
+ shortest_length = min(pivots_lengths)
637
+ mask = np.array([(i - shortest_length) < 1e-5 for i in pivots_lengths])
638
+ mol.pivots[c] = mol.pivots[c][mask]
639
+
640
+ def _get_pivots(self, mol):
641
+ '''
642
+ params mol: Hypermolecule class
643
+ (Cyclical embed) Function that yields the molecule pivots. Called by _set_pivots
644
+ and in pre-conditioning (deforming, bending) the molecules in ase_bend.
645
+ '''
646
+
647
+ if not hasattr(mol, 'reactive_atoms_classes_dict'):
648
+ return []
649
+
650
+ pivots_list = [[] for _ in mol.atomcoords]
651
+
652
+ for c, _ in enumerate(mol.atomcoords):
653
+
654
+ if len(mol.reactive_atoms_classes_dict[c]) == 2:
655
+ # most molecules: dienes and alkenes for Diels-Alder, conjugated ketones for acid-bridged additions
656
+
657
+ indices = cartesian_product(*[range(len(atom.center)) for atom in mol.reactive_atoms_classes_dict[c].values()])
658
+ # indices of vectors in reactive_atom.center. Reactive atoms are 2 and so for one center on atom 0 and
659
+ # 2 centers on atom 2 we get [[0,0], [0,1], [1,0], [1,1]]
660
+
661
+ for i,j in indices:
662
+ a1, a2 = mol.get_r_atoms(c)
663
+
664
+ c1 = a1.center[i]
665
+ c2 = a2.center[j]
666
+
667
+ pivots_list[c].append(Pivot(c1, c2, a1, a2, i, j))
668
+
669
+ elif len(mol.reactive_atoms_classes_dict[c]) == 1:
670
+ # carbenes, oxygen atom in Prilezhaev reaction, SO2 in classic chelotropic reactions
671
+
672
+ indices = cartesian_product(*[range(len(mol.get_r_atoms(c)[0].center)) for _ in range(2)])
673
+ indices = [i for i in indices if i[0] != i[1] and (sorted(i) == i).all()]
674
+ # indices of vectors in reactive_atom.center. Reactive atoms is just one, that builds pivots with itself.
675
+ # pivots with the same index or inverse order are discarded. 2 centers on one atom 2 yield just [[0,1]]
676
+
677
+ for i,j in indices:
678
+ a1 = mol.get_r_atoms(c)[0]
679
+ # chelotropic embeds have pivots that start/end on the same atom
680
+
681
+ c1 = a1.center[i]
682
+ c2 = a1.center[j]
683
+
684
+ pivots_list[c].append(Pivot(c1, c2, a1, a1, i, j))
685
+
686
+ return [np.array(_l) for _l in pivots_list]
687
+
688
+ def _setup(self, p=True):
689
+ '''
690
+ Setting embed type and calculating the number of conformation combinations based on embed type
691
+ '''
692
+
693
+ if any('pka>' in op for op in self.options.operators) or (
694
+ any('scan>' in op for op in self.options.operators)
695
+ ):
696
+ self.embed = 'data'
697
+ # If a pka or scan operator is requested, the embed is skipped
698
+ # and data is shown instead
699
+ return
700
+
701
+ if any('refine>' in op for op in self.options.operators) or self.options.noembed:
702
+ self.embed = 'refine'
703
+
704
+ # If the run is a refine>/REFINE one, the self.embed
705
+ # attribute is set in advance by the self._set_options
706
+ # function through the OptionSetter class
707
+ return
708
+
709
+ for mol in self.objects:
710
+ if self.options.max_confs < len(mol.atomcoords) and self.embed is not None:
711
+ self.log(f'--> {mol.filename} - kept {self.options.max_confs}/{len(mol.atomcoords)} conformations for the embed (override with CONFS=n)\n')
712
+ mol.atomcoords = mol.atomcoords[0:self.options.max_confs]
713
+ # remove conformers if there are too many
714
+
715
+ if all([len(mol.reactive_indices) == 0 for mol in self.objects]):
716
+ self.embed = None
717
+ # Flag the embed type as None if no reactive indices are
718
+ # provided (and the run is not a refine> one)
719
+ return
720
+
721
+ if len(self.objects) == 1:
722
+ # embed must be either monomolecular
723
+
724
+ mol = self.objects[0]
725
+
726
+ if len(mol.reactive_indices) == 2:
727
+
728
+ self.embed = 'monomolecular'
729
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
730
+ self._set_pivots(mol)
731
+
732
+ self.options.only_refined = True
733
+ self.options.fix_angles_in_deformation = True
734
+ # These are required: otherwise, extreme bending could scramble molecules
735
+
736
+ else:
737
+ self.embed = 'error'
738
+ # if none of the previous, the program had trouble recognizing the embed to carry.
739
+
740
+ return
741
+
742
+ elif len(self.objects) in (2,3):
743
+ # Setting embed type and calculating the number of conformation combinations based on embed type
744
+
745
+ cyclical = all(len(molecule.reactive_indices) == 2 for molecule in self.objects)
746
+
747
+ # chelotropic embed should check that the two atoms on one molecule are bonded
748
+ chelotropic = sorted(len(molecule.reactive_indices) for molecule in self.objects) == [1,2]
749
+
750
+ string = all(len(molecule.reactive_indices) == 1 for molecule in self.objects) and len(self.objects) == 2
751
+
752
+ multiembed = (len(self.objects) == 2 and
753
+ all(len(molecule.reactive_indices) >= 2 for molecule in self.objects) and
754
+ not cyclical)
755
+
756
+ if cyclical or chelotropic or multiembed:
757
+
758
+ if cyclical:
759
+ self.embed = 'cyclical'
760
+ elif multiembed:
761
+ self.embed = 'multiembed'
762
+ else:
763
+ self.embed = 'chelotropic'
764
+ for mol in self.objects:
765
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
766
+ for c, _ in enumerate(mol.atomcoords):
767
+ for index, atom in mol.reactive_atoms_classes_dict[c].items():
768
+ orb_dim = norm_of(atom.center[0]-atom.coord)
769
+ atom.init(mol, index, update=True, orb_dim=orb_dim + 0.2, conf=c)
770
+ # Slightly enlarging orbitals for chelotropic embeds, or they will
771
+ # be generated a tad too close to each other for how the cyclical embed works
772
+
773
+ self.options.rotation_steps = 5
774
+
775
+ if hasattr(self.options, 'custom_rotation_steps'):
776
+ # if user specified a custom value, use it.
777
+ self.options.rotation_steps = self.options.custom_rotation_steps
778
+
779
+ self.systematic_angles = cartesian_product(*[range(self.options.rotation_steps+1) for _ in self.objects]) \
780
+ * 2*self.options.rotation_range/self.options.rotation_steps - self.options.rotation_range
781
+
782
+ if p:
783
+ # avoid calculating pivots if this is an early call
784
+ for molecule in self.objects:
785
+ self._set_pivots(molecule)
786
+
787
+ elif string:
788
+
789
+ self.embed = 'string'
790
+ self.options.rotation_steps = 36
791
+
792
+ for mol in self.objects:
793
+ if not hasattr(mol, 'reactive_atoms_classes_dict'):
794
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
795
+
796
+ if hasattr(self.options, 'custom_rotation_steps'):
797
+ # if user specified a custom value, use it.
798
+ self.options.rotation_steps = self.options.custom_rotation_steps
799
+
800
+ self.systematic_angles = [n * 360 / self.options.rotation_steps for n in range(self.options.rotation_steps)]
801
+
802
+ else:
803
+ self.embed = 'error'
804
+
805
+ if multiembed:
806
+ # Complex, unspecified embed type - will explore many possibilities concurrently
807
+ self.embed = 'multiembed'
808
+ for mol in self.objects:
809
+ mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
810
+
811
+ if self.embed == 'error':
812
+ raise InputError(('Bad input - The only molecular configurations accepted are:\n'
813
+ '1) One molecule with two reactive centers (monomolecular embed)\n'
814
+ '2) One molecule with four indices(dihedral embed)\n'
815
+ '3) Two or three molecules with two reactive centers each (cyclical embed)\n'
816
+ '4) Two molecules with one reactive center each (string embed)\n'
817
+ '5) Two molecules, one with a single reactive center and the other with two (chelotropic embed)\n'
818
+ '6) Two molecules with at least two reactive centers each'))
819
+
820
+ if p:
821
+ # avoid calculating this if this is an early call
822
+
823
+ self._set_reactive_atoms_cumnums()
824
+ # appending to each reactive atom the cumulative
825
+ # number indexing in the TS context
826
+
827
+ else:
828
+ raise InputError('Bad input - could not set up an appropriate embed type (too many structures specified?)')
829
+
830
+ # Only call this part if it is not an early call
831
+ if p:
832
+ if self.options.shrink:
833
+ for molecule in self.objects:
834
+ molecule._scale_orbs(self.options.shrink_multiplier)
835
+ self._set_pivots(molecule)
836
+ self.options.only_refined = True
837
+ # SHRINK - scale orbitals and rebuild pivots
838
+
839
+ # if self.options.rmsd is None:
840
+ # self.options.rmsd = 0.25
841
+
842
+ self.candidates = self._get_number_of_candidates()
843
+ _s = self.candidates or 'Many'
844
+ self.log(f'--> Setup performed correctly. {_s} candidates will be generated.\n')
845
+
846
+ def _get_number_of_candidates(self):
847
+ '''
848
+ Get the number of structures that will be generated in the run.
849
+ '''
850
+ _l = len(self.objects)
851
+ if _l == 1:
852
+ return int(sum([len(self.objects[0].pivots[c])
853
+ for c, _ in enumerate(self.objects[0].atomcoords)]))
854
+
855
+ if self.embed == 'string':
856
+ return int(self.options.rotation_steps*(
857
+ np.prod([sum([len(mol.get_r_atoms(conf)[0].center)
858
+ for conf, _ in enumerate(mol.atomcoords)])
859
+ for mol in self.objects]))
860
+ )
861
+
862
+ if self.embed == 'multiembed':
863
+ return 0
864
+
865
+ candidates = 2*len(self.systematic_angles)*np.prod([len(mol.atomcoords) for mol in self.objects])
866
+
867
+ if _l == 3:
868
+ candidates *= 4
869
+ # Trimolecular there are 8 different triangles originated from three oriented vectors,
870
+ # while only 2 disposition of two vectors (parallel, antiparallel).
871
+
872
+ if self.pairings_table:
873
+ # If there is any pairing to be respected, each one reduces the number of
874
+ # candidates to be computed.
875
+
876
+ if self.embed == 'cyclical':
877
+ if len(self.objects) == 2:
878
+ # Diels-Alder-like, if we have one (two) pairing(s) only half
879
+ # of the total arrangements are to be checked
880
+ candidates /= 2
881
+
882
+ else: # trimolecular
883
+ if len(self.pairings_table) == 1:
884
+ candidates /= 4
885
+ else: # trimolecular, 2 (3) pairings imposed
886
+ candidates /= 8
887
+
888
+ candidates *= np.prod([len(mol.pivots[0]) for mol in self.objects]) # add sum over len(mol.pivots[c])?
889
+ # The more atomic pivots, the more candidates
890
+
891
+ return int(candidates)
892
+
893
+ def _set_embedder_structures_from_mol(self):
894
+ '''
895
+ Intended for REFINE runs, set the self.structures variable
896
+ (and related) to the confomers of a specific molecuele.
897
+ '''
898
+ self.structures = self.objects[0].atomcoords
899
+ self.atomnos = self.objects[0].atomnos
900
+ self.constrained_indices = _get_monomolecular_reactive_indices(self)
901
+ self.ids = None
902
+ self.energies = np.array([0 for _ in self.structures])
903
+ self.exit_status = np.ones(self.structures.shape[0], dtype=bool)
904
+ self.embed_graph = get_sum_graph([graphize(self.structures[0], self.atomnos)], self.constrained_indices[0])
905
+
906
+ def _calculator_setup(self):
907
+ '''
908
+ Set up the calculator to be used with default theory levels.
909
+ '''
910
+ # Checking that calculator is specified correctly
911
+ if self.options.calculator not in ('MOPAC', 'ORCA', 'GAUSSIAN','XTB', 'AIMNET2'):
912
+ raise SyntaxError(f'\'{self.options.calculator}\' is not a valid calculator. Change its value from the parameters.py file or with the CALC keyword.')
913
+
914
+ # Setting default theory level if user did not specify it
915
+ if self.options.theory_level is None:
916
+ self.options.theory_level = DEFAULT_LEVELS[self.options.calculator]
917
+
918
+ self.dispatcher = Opt_func_dispatcher()
919
+
920
+ if self.options.calculator == 'AIMNET2':
921
+ self.dispatcher.load_aimnet2_calc(self.options.theory_level, logfunction=self.log)
922
+
923
+
924
+ def _apply_operators(self):
925
+ '''
926
+ Replace molecules in self.objects with
927
+ their post-operator ones.
928
+ '''
929
+
930
+ # early call to get the self.embed attribute
931
+ self._setup(p=False)
932
+
933
+ # for input_string in self.options.operators:
934
+ for index, operators in self.options.operators_dict.items():
935
+
936
+ for operator in operators:
937
+
938
+ input_string = f'{operator}> {self.objects[index].filename}'
939
+ outname = operate(input_string, self)
940
+ # operator = input_string.split('>')[0]
941
+
942
+ if operator == 'refine':
943
+ self._set_embedder_structures_from_mol()
944
+
945
+ # these operators do not need molecule substitution
946
+ elif operator not in ('pka', 'scan'):
947
+
948
+ # names = [mol.filename for mol in self.objects]
949
+ # filename = self._extract_filename(input_string)
950
+ # index = names.index(filename)
951
+ reactive_indices = self.objects[index].reactive_indices
952
+
953
+ # replacing the old molecule with the one post-operators
954
+ self.objects[index] = Hypermolecule(outname, reactive_indices)
955
+
956
+ # calculating where the new orbitals are
957
+ self.objects[index].compute_orbitals(override='Single' if self.options.simpleorbitals else None)
958
+
959
+ # updating orbital size if not default
960
+ if hasattr(self, 'orb_string'):
961
+ self._set_custom_orbs(self.orb_string)
962
+
963
+ # updating global docker if necessary
964
+ if operator in ('rsearch', 'csearch') and self.options.noembed and len(self.objects) == 1:
965
+ self.structures = self.objects[0].atomcoords
966
+ self.atomnos = self.objects[0].atomnos
967
+ self.constrained_indices = _get_monomolecular_reactive_indices(self)
968
+ self.ids = None
969
+ self.energies = np.array([0 for _ in self.structures])
970
+ self.exit_status = np.ones(self.structures.shape[0], dtype=bool)
971
+ self.embed_graph = get_sum_graph([graphize(self.structures[0], self.atomnos)], self.constrained_indices[0])
972
+
973
+ # updating the orbital cumnums for
974
+ # all the molecules in the run
975
+ self._set_reactive_atoms_cumnums()
976
+
977
+ # resetting the attribute
978
+ self.embed = None
979
+
980
+ def _extract_filename(self, input_string):
981
+ '''
982
+ Input: 'refine> firecode_unoptimized_comp_check.xyz 5a 36a 0b 43b 33c 60c'
983
+ Output: 'firecode_unoptimized_comp_check.xyz'
984
+ '''
985
+ input_string = input_string.split('>')[-1].lstrip()
986
+ # remove operator and whitespaces after it
987
+
988
+ input_string = input_string.split()[0]
989
+ # remove pairing numbers/letters and newline chars
990
+
991
+ return input_string
992
+
993
+ def _inspect_structures(self):
994
+ '''
995
+ '''
996
+
997
+ self.log('--> Structures check requested. Shutting down after last window is closed.\n')
998
+
999
+ for mol in self.objects:
1000
+ ase_view(mol)
1001
+
1002
+ self.close_log_streams()
1003
+ os.remove(f'firecode_{self.stamp}.log')
1004
+
1005
+ sys.exit()
1006
+
1007
+ def scramble(self, array, sequence):
1008
+ return np.array([array[s] for s in sequence])
1009
+
1010
+ def get_pairing_dist_from_letter(self, letter):
1011
+ '''
1012
+ Get constrained distance between paired reactive
1013
+ atoms, accessed via the associated constraint letter.
1014
+ The distance returned is the final one (not affected by SHRINK)
1015
+ '''
1016
+
1017
+ if hasattr(self, 'pairing_dists') and self.pairing_dists.get(letter) is not None:
1018
+ return self.pairing_dists[letter]
1019
+
1020
+ d = 0
1021
+ try:
1022
+ for mol_index, mol_pairing_dict in self.pairings_dict.items():
1023
+ if r_atom_index := mol_pairing_dict.get(letter):
1024
+
1025
+ # for refine embeds, one letter corresponds to two indices
1026
+ # on the same molecule
1027
+ if isinstance(r_atom_index, tuple):
1028
+ i1, i2 = r_atom_index
1029
+ return (self.objects[mol_index].get_orbital_length(i1) +
1030
+ self.objects[mol_index].get_orbital_length(i2))
1031
+
1032
+ # for other runs, it is just one atom per molecule per letter
1033
+ d += self.objects[mol_index].get_orbital_length(r_atom_index)
1034
+
1035
+ if self.options.shrink:
1036
+ d /= self.options.shrink_multiplier
1037
+
1038
+ return d
1039
+
1040
+ # If no orbitals were built, return None
1041
+ except NoOrbitalError:
1042
+ return None
1043
+
1044
+ def get_pairing_dists_from_constrained_indices(self, constrained_pair):
1045
+ '''
1046
+ Returns the constrained distance
1047
+ for a specific constrained pair of indices
1048
+ '''
1049
+ try:
1050
+ letter = next(lett for lett, pair in self.pairings_table.items() if (pair[0] == constrained_pair[0] and
1051
+ pair[1] == constrained_pair[1]))
1052
+ return self.get_pairing_dist_from_letter(letter)
1053
+
1054
+ except StopIteration:
1055
+ return None
1056
+
1057
+ def get_pairing_dists(self, conf):
1058
+ '''
1059
+ Returns a list with the constrained distances for each embedder constraint
1060
+ '''
1061
+ if self.constrained_indices[conf].size == 0:
1062
+ return None
1063
+
1064
+ constraints = np.concatenate([self.constrained_indices[conf], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[conf]
1065
+ return [self.get_pairing_dists_from_constrained_indices(pair) for pair in constraints]
1066
+
1067
+ def write_structures(
1068
+ self,
1069
+ tag,
1070
+ indices=None,
1071
+ energies=True,
1072
+ relative=True,
1073
+ extra='',
1074
+ align='indices',
1075
+ p=True,
1076
+ ):
1077
+ '''
1078
+ Writes structures to file.
1079
+
1080
+ '''
1081
+
1082
+ align_functions = {
1083
+ 'indices' : align_structures,
1084
+ 'moi' : align_by_moi,
1085
+ }
1086
+
1087
+ if energies:
1088
+ rel_e = self.energies
1089
+
1090
+ if relative:
1091
+ rel_e -= np.min(self.energies)
1092
+
1093
+ # truncate if there are too many (embed debug first dump)
1094
+ if len(self.structures) > 10000 and not self.options.let:
1095
+ self.log(f'Truncated {tag} output structures to 10000 (from {len(self.structures)} - keyword LET to override).')
1096
+ output_structures = self.structures[0:10000]
1097
+ else:
1098
+ output_structures = self.structures
1099
+
1100
+ self.outname = f'firecode_{tag}_{self.stamp}.xyz'
1101
+ with open(self.outname, 'w') as f:
1102
+
1103
+ for i, structure in enumerate(align_functions[align](output_structures, atomnos=self.atomnos, indices=indices)):
1104
+ title = f'Strucure {i+1} - {tag}'
1105
+
1106
+ if energies:
1107
+ title += f' - Rel. E. = {round(rel_e[i], 3)} kcal/mol '
1108
+
1109
+ title += extra
1110
+
1111
+ write_xyz(structure, self.atomnos, f, title=title)
1112
+
1113
+ if p:
1114
+ self.log(f'Wrote {len(output_structures)} {tag} structures to {self.outname} file.\n')
1115
+
1116
+ def write_quote(self):
1117
+ '''
1118
+ Reads the quote file and writes one in the logfile
1119
+ '''
1120
+ from firecode.quotes import load_quotes
1121
+ quote, author = random.choice(load_quotes()).values()
1122
+
1123
+ self.log('\n' + auto_newline(quote))
1124
+
1125
+ if author != "":
1126
+ self.log(f' - {author}\n')
1127
+
1128
+ def run(self):
1129
+ '''
1130
+ Run the embedding.
1131
+ '''
1132
+ try:
1133
+ RunEmbedding(self).run()
1134
+
1135
+ except Exception as _e:
1136
+ logging.exception(_e)
1137
+ raise _e
1138
+
1139
+ def normal_termination(self):
1140
+ '''
1141
+ Terminate the run, printing the total time and the
1142
+ relative energies of the first 10 structures, if possible.
1143
+
1144
+ '''
1145
+ clean_directory()
1146
+ self.log(f'\n--> FIRECODE normal termination: total time {time_to_string(time.perf_counter() - self.t_start_run, verbose=True)}.')
1147
+
1148
+ if hasattr(self, "structures"):
1149
+ show = 10
1150
+ if len(self.structures) > 0 and hasattr(self, "energies"):
1151
+ self.energies = self.energies if len(self.energies) <= show else self.energies[0:show]
1152
+
1153
+ # Don't write structure info if there is only one, or all are zero
1154
+ if np.max(self.energies - np.min(self.energies)) > 0:
1155
+
1156
+ self.log(f'\n--> Energies of output structures (first {show}, {self.options.theory_level}/{self.options.calculator}{f"/{self.options.solvent}" if self.options.solvent is not None else ""})\n')
1157
+
1158
+ self.log(f'> # (total {len(self.structures)}) Rel. E. RMSD')
1159
+ self.log('-------------------------------------------')
1160
+ for i, energy in enumerate(self.energies-self.energies[0]):
1161
+
1162
+ rmsd_value = '(ref)' if i == 0 else str(round(rmsd_and_max_numba(self.structures[i], self.structures[0], center=True)[0], 2))+' Å'
1163
+
1164
+ self.log(f'> Candidate {str(i+1):2} : {energy:.2f} kcal/mol : {rmsd_value}')
1165
+
1166
+ if len(self.structures) > show:
1167
+ self.log(f'> ... ({len(self.structures)-show} more)')
1168
+
1169
+ self.write_quote()
1170
+ self.close_log_streams()
1171
+ sys.exit()
1172
+
1173
+ def close_log_streams(self):
1174
+ self.logfile.close()
1175
+
1176
+ if hasattr(self, "debug_logfile"):
1177
+ self.debug_logfile.close()
1178
+
1179
+ class RunEmbedding(Embedder):
1180
+ '''
1181
+ Class for running embeds, containing all
1182
+ methods to embed and refine structures
1183
+ '''
1184
+
1185
+ def __init__(self, embedder):
1186
+ '''
1187
+ Copying all non-callable attributes
1188
+ of the previous embedder.
1189
+ '''
1190
+ # Copy all the non-callables (variables) into the child class
1191
+ for attr in dir(embedder):
1192
+ if attr[0:2] != '__' and attr != 'run':
1193
+ attr_value = getattr(embedder, attr)
1194
+ if not hasattr(attr_value, '__call__'):
1195
+ setattr(self, attr, attr_value)
1196
+
1197
+ def rel_energies(self):
1198
+ return self.energies - np.min(self.energies)
1199
+
1200
+ def apply_mask(self, attributes, mask):
1201
+ '''
1202
+ Applies in-place masking of Embedder attributes
1203
+ '''
1204
+ for attr in attributes:
1205
+ if hasattr(self, attr):
1206
+ new_attr = getattr(self, attr)[mask]
1207
+ setattr(self, attr, new_attr)
1208
+
1209
+ def zero_candidates_check(self):
1210
+ '''
1211
+ Asserts that not all structures are being rejected.
1212
+ '''
1213
+ if len(self.structures) == 0:
1214
+ self.log_warnings()
1215
+ raise ZeroCandidatesError()
1216
+
1217
+ def generate_candidates(self):
1218
+ '''
1219
+ Generate a series of candidate structures by the proper embed algorithm.
1220
+ '''
1221
+
1222
+ embed_functions = {
1223
+ 'chelotropic' : cyclical_embed,
1224
+ 'cyclical' : cyclical_embed,
1225
+ 'monomolecular' : monomolecular_embed,
1226
+ 'string' : string_embed,
1227
+ 'multiembed' : multiembed_dispatcher,
1228
+ }
1229
+
1230
+ if self.embed == 'refine':
1231
+ self.log('\n')
1232
+ return
1233
+
1234
+ # Embed structures and assign them to self.structures
1235
+ self.structures = embed_functions[self.embed](self)
1236
+
1237
+ # cumulative list of atomic numbers associated with coordinates
1238
+ self.atomnos = np.concatenate([molecule.atomnos for molecule in self.objects])
1239
+
1240
+ # Build the embed graph. This will be used as a future reference.
1241
+ # Note that the use of the first constrained_indices pair is irrelevant
1242
+ # for the torsion fingerprint outcome, but other future features might
1243
+ # rely on the embed_graph to be accurate if conformers have different
1244
+ # constrained indices.
1245
+
1246
+ additional_bonds = self.constrained_indices[0]
1247
+ if len(self.internal_constraints) > 0:
1248
+ additional_bonds = np.concatenate((self.internal_constraints, additional_bonds))
1249
+
1250
+ self.embed_graph = get_sum_graph(self.graphs, additional_bonds)
1251
+
1252
+ self.log(f'Generated {len(self.structures)} transition state candidates ({time_to_string(time.perf_counter()-self.t_start_run)})\n')
1253
+
1254
+ # if self.options.debug:
1255
+ self.write_structures('embedded', energies=False)
1256
+
1257
+ if self.options.debug:
1258
+ self.dump_status('generate_candidates')
1259
+ self.debuglog('DEBUG: Dumped emebedder status after generating candidates (\"generate_candidates\")')
1260
+
1261
+ def dump_status(self, outname, only_fixed_constraints=False):
1262
+ '''
1263
+ Writes structures and energies to [outname].xyz
1264
+ and [outname].dat to help debug the current run.
1265
+
1266
+ '''
1267
+
1268
+ if hasattr(self, 'energies'):
1269
+ with open(f'{outname}_energies.dat', 'w') as _f:
1270
+ for i, energy in enumerate(self.energies):
1271
+ print_energy = str(round(energy-np.min(self.energies), 2))+' kcal/mol' if energy != 1E10 else 'SCRAMBLED'
1272
+ _f.write('Candidate {:5} : {}\n'.format(i, print_energy))
1273
+
1274
+ with open(f'{outname}_structures.xyz', 'w') as _f:
1275
+ exit_status = self.exit_status if hasattr(self, 'exit_status') else [0 for _ in self.structures]
1276
+ energies = self.rel_energies() if hasattr(self, 'energies') else [0 for _ in self.structures]
1277
+ for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
1278
+ exit_status,
1279
+ energies)):
1280
+
1281
+ kind = 'REFINED - ' if status else 'NOT REFINED - '
1282
+ write_xyz(structure, self.atomnos, _f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
1283
+
1284
+ with open(f'{outname}_constraints.dat', 'w') as _f:
1285
+ for i, constraints in enumerate(self.constrained_indices):
1286
+
1287
+ if only_fixed_constraints:
1288
+ constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
1289
+
1290
+ else:
1291
+ constraints = np.concatenate([constraints, self.internal_constraints]) if len(self.internal_constraints) > 0 else constraints
1292
+
1293
+ c_str = repr(constraints).replace('\n','').replace(', ',', ')
1294
+ d_str = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
1295
+ _f.write('Candidate {:5} : {} -> {}\n'.format(i, c_str, d_str))
1296
+
1297
+ with open(f'{outname}_runembedding.pickle', 'wb') as _f:
1298
+ d = {
1299
+ 'structures' : self.structures,
1300
+ 'constrained_indices' : self.constrained_indices,
1301
+ 'graphs' : self.graphs,
1302
+ 'objects' : self.objects,
1303
+ 'options' : self.options,
1304
+ 'atomnos' : self.atomnos,
1305
+ }
1306
+
1307
+ if hasattr(self, 'energies'):
1308
+ d['energies'] = self.energies
1309
+
1310
+ pickle.dump(d, _f)
1311
+
1312
+ def compenetration_refining(self):
1313
+ '''
1314
+ Performing a sanity check for excessive compenetration
1315
+ on generated structures, discarding the ones that look too bad.
1316
+ '''
1317
+
1318
+ if self.embed not in ('string', 'cyclical', 'monomolecular'):
1319
+ # these do not need compenetration refining: the
1320
+ # algorithm checks for compenetrations when embedding
1321
+
1322
+ self.log('--> Checking structures for compenetrations')
1323
+
1324
+ t_start = time.perf_counter()
1325
+ mask = np.zeros(len(self.structures), dtype=bool)
1326
+ # num = len(self.structures)
1327
+ for s, structure in enumerate(self.structures):
1328
+ # if num > 100 and num % 100 != 0 and s % (num % 100) == 99:
1329
+ # loadbar(s, num, prefix=f'Checking structure {s+1}/{num} ')
1330
+ mask[s] = compenetration_check(structure, self.ids, max_clashes=self.options.max_clashes, thresh=self.options.clash_thresh)
1331
+
1332
+ # loadbar(1, 1, prefix=f'Checking structure {len(self.structures)}/{len(self.structures)} ')
1333
+
1334
+ self.apply_mask(('structures', 'constrained_indices'), mask)
1335
+ t_end = time.perf_counter()
1336
+
1337
+ if False in mask:
1338
+ self.log(f'Discarded {len([b for b in mask if not b])} candidates for compenetration ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})')
1339
+ else:
1340
+ self.log(f'All {len(mask)} structures passed the compenetration check')
1341
+ self.log()
1342
+
1343
+ self.zero_candidates_check()
1344
+
1345
+ # initialize embedder values for the active structures
1346
+ # that survived the compenetration check
1347
+ self.energies = np.full(len(self.structures), 1E10)
1348
+ self.exit_status = np.zeros(len(self.structures), dtype=bool)
1349
+
1350
+ def fitness_refining(self, threshold=5, verbose=False):
1351
+ '''
1352
+ Performing a distance check on generated structures,
1353
+ discarding the ones that do not respect the imposed pairings.
1354
+ Internal constraints are ignored.
1355
+
1356
+ threshold : rejection happens when the sum of the deviations from the
1357
+ intended spacings is greater than threshold.
1358
+
1359
+ '''
1360
+ if verbose:
1361
+ self.log(' \n--> Fitness pruning - removing inaccurate structures')
1362
+
1363
+ mask = np.ones(len(self.structures), dtype=bool)
1364
+
1365
+ for s, (structure, constraints) in enumerate(zip(self.structures, self.constrained_indices)):
1366
+
1367
+ constrained_distances = tuple(self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints)
1368
+
1369
+ mask[s] = fitness_check(structure,
1370
+ constraints,
1371
+ constrained_distances,
1372
+ threshold=threshold)
1373
+
1374
+ attr = (
1375
+ 'structures',
1376
+ 'energies',
1377
+ 'constrained_indices',
1378
+ 'exit_status',
1379
+ )
1380
+
1381
+ self.apply_mask(attr, mask)
1382
+
1383
+ if False in mask:
1384
+ self.log(f'Discarded {len([b for b in mask if not b])} candidates for unfitness ({len([b for b in mask if b])} left)')
1385
+ else:
1386
+ if verbose:
1387
+ self.log('All candidates meet the imposed criteria.')
1388
+ self.log()
1389
+
1390
+ self.zero_candidates_check()
1391
+
1392
+ def similarity_refining(self, tfd=True, moi=True, rmsd=True, verbose=False):
1393
+ '''
1394
+ If possible, removes structures with similar torsional profile (TFD-based).
1395
+ Removes structures that are too similar to each other (RMSD-based).
1396
+ '''
1397
+
1398
+ if verbose:
1399
+ self.log('--> Similarity Processing')
1400
+
1401
+ before = len(self.structures)
1402
+ attr = ('constrained_indices', 'energies', 'exit_status')
1403
+
1404
+ if (
1405
+ tfd and
1406
+ len(self.objects) > 1 and
1407
+ hasattr(self, 'embed_graph') and
1408
+ self.embed_graph.is_single_molecule
1409
+ ):
1410
+
1411
+ t_start = time.perf_counter()
1412
+
1413
+ quadruplets = _get_quadruplets(self.embed_graph)
1414
+ if len(quadruplets) > 0:
1415
+ self.structures, mask = prune_conformers_tfd(self.structures, quadruplets, verbose=verbose)
1416
+
1417
+ self.apply_mask(attr, mask)
1418
+
1419
+ if False in mask:
1420
+ self.log(f'Discarded {len([b for b in mask if not b])} structures for TFD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
1421
+
1422
+ if moi:
1423
+
1424
+ if len(self.structures) <= 1E5:
1425
+
1426
+ ### Now again, based on the moment of inertia
1427
+
1428
+ before3 = len(self.structures)
1429
+
1430
+ t_start = time.perf_counter()
1431
+ self.structures, mask = prune_by_moment_of_inertia(self.structures, self.atomnos, debugfunction=self.debuglog)
1432
+
1433
+ self.apply_mask(attr, mask)
1434
+
1435
+ if before3 > len(self.structures):
1436
+ self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for MOI similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
1437
+
1438
+ else:
1439
+ self.log('Skipped MOI pruning (>100k) structures')
1440
+
1441
+ if rmsd and len(self.structures) <= 1E5:
1442
+
1443
+ before1 = len(self.structures)
1444
+
1445
+ t_start = time.perf_counter()
1446
+
1447
+ self.structures, mask = prune_by_rmsd(self.structures, self.atomnos, self.options.rmsd, debugfunction=self.debuglog)
1448
+
1449
+ self.apply_mask(attr, mask)
1450
+
1451
+ if before1 > len(self.structures):
1452
+ self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for RMSD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
1453
+
1454
+ ### Second step: again but symmetry-corrected (unless we have too many structures)
1455
+
1456
+ if len(self.structures) <= 1E4 and hasattr(self, 'embed_graph'):
1457
+
1458
+ before2 = len(self.structures)
1459
+
1460
+ t_start = time.perf_counter()
1461
+ self.structures, mask = prune_by_rmsd_rot_corr(
1462
+ self.structures,
1463
+ self.atomnos,
1464
+ self.embed_graph,
1465
+ max_rmsd=self.options.rmsd,
1466
+ logfunction=(self.log if verbose else None),
1467
+ debugfunction=self.debuglog,
1468
+ )
1469
+
1470
+ self.apply_mask(attr, mask)
1471
+
1472
+ if before2 > len(self.structures):
1473
+ self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for symmetry-corrected RMSD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
1474
+
1475
+ elif hasattr(self, 'embed_graph'):
1476
+ self.log('Skipped rotationally-corrected RMSD pruning (>10k) structures')
1477
+
1478
+ else:
1479
+ self.log('Skipped RMSD pruning (>100k) structures')
1480
+
1481
+ if verbose and len(self.structures) == before:
1482
+ self.log(f'All structures passed the similarity check.{" "*15}')
1483
+
1484
+ self.log()
1485
+
1486
+ def force_field_refining(self, conv_thr="tight", only_fixed_constraints=False, prevent_scrambling=False):
1487
+ '''
1488
+ Performs structural optimizations with the embedder force field caculator.
1489
+ Only structures that do not scramble during FF optimization are updated,
1490
+ while the rest are kept as they are.
1491
+ conv_thr: convergence threshold, passed to calculator
1492
+ only_fixed_constraints: only uses fixed (UPPERCASE) constraints in optimization
1493
+ prevent_scrambling: preserves molecular identities constraining bonds present in graphs (XTB only)
1494
+ '''
1495
+
1496
+ ################################################# CHECKPOINT BEFORE FF OPTIMIZATION
1497
+
1498
+ if not only_fixed_constraints:
1499
+ self.outname = f'firecode_checkpoint_{self.stamp}.xyz'
1500
+ with open(self.outname, 'w') as f:
1501
+ for i, structure in enumerate(align_structures(self.structures)):
1502
+ write_xyz(structure, self.atomnos, f, title=f'TS candidate {i+1} - Checkpoint before FF optimization')
1503
+ self.log(f'\n--> Checkpoint output - Wrote {len(self.structures)} unoptimized structures to {self.outname} file before FF optimization.\n')
1504
+
1505
+ ################################################# GEOMETRY OPTIMIZATION - FORCE FIELD
1506
+
1507
+ if only_fixed_constraints:
1508
+ task = 'Structure optimization (tight) / relaxing interactions'
1509
+ else:
1510
+ task = f'Structure {"pre-" if prevent_scrambling else ""}optimization (loose)'
1511
+
1512
+ self.log(f'--> {task} ({self.options.ff_level}{f"/{self.options.solvent}" if self.options.solvent is not None else ""} level via {self.options.ff_calc}, {self.avail_cpus} thread{"s" if self.avail_cpus>1 else ""})')
1513
+
1514
+ t_start_ff_opt = time.perf_counter()
1515
+
1516
+ processes = []
1517
+ cum_time = 0
1518
+
1519
+ opt_function = xtb_pre_opt if prevent_scrambling else xtb_opt
1520
+
1521
+ # Running as many threads as we have procs
1522
+ # since FF does not parallelize well with more cores
1523
+ with ProcessPoolExecutor(max_workers=self.avail_cpus) as executor:
1524
+
1525
+ for i, structure in enumerate(deepcopy(self.structures)):
1526
+
1527
+ if only_fixed_constraints:
1528
+ constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
1529
+
1530
+ else:
1531
+ constraints = np.concatenate([self.constrained_indices[i], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[i]
1532
+
1533
+ pairing_dists = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
1534
+
1535
+ process = executor.submit(
1536
+ timing_wrapper,
1537
+ opt_function,
1538
+ structure,
1539
+ self.atomnos,
1540
+ graphs=self.graphs,
1541
+ calculator=self.options.ff_calc,
1542
+ method=self.options.ff_level,
1543
+ solvent=self.options.solvent,
1544
+ charge=self.options.charge,
1545
+ maxiter=None,
1546
+ conv_thr=conv_thr,
1547
+ constrained_indices=constraints,
1548
+ constrained_distances=pairing_dists,
1549
+ procs=2, # FF just needs two per structure
1550
+ title=f'Candidate_{i+1}',
1551
+ spring_constant=0.2 if prevent_scrambling else 1,
1552
+ payload=(
1553
+ self.constrained_indices[i],
1554
+ )
1555
+ )
1556
+ processes.append(process)
1557
+
1558
+ for i, process in enumerate(as_completed(processes)):
1559
+
1560
+ loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
1561
+
1562
+ ((
1563
+ new_structure,
1564
+ new_energy,
1565
+ self.exit_status[i]
1566
+ ),
1567
+ # from optimization function
1568
+
1569
+ (
1570
+ self.constrained_indices[i],
1571
+ ),
1572
+ # from payload
1573
+
1574
+ t_struct
1575
+ # from timing_wrapper
1576
+
1577
+ ) = process.result()
1578
+
1579
+ # assert that the structure did not scramble during optimization
1580
+ if self.exit_status[i]:
1581
+ constraints = (np.concatenate([self.constrained_indices[i], self.internal_constraints])
1582
+ if len(self.internal_constraints) > 0
1583
+ else self.constrained_indices[i])
1584
+
1585
+ self.exit_status[i] = scramble_check(new_structure,
1586
+ self.atomnos,
1587
+ excluded_atoms=constraints.ravel(),
1588
+ mols_graphs=self.graphs,
1589
+ max_newbonds=self.options.max_newbonds,
1590
+ logfunction=self.log if self.options.debug else None,
1591
+ title=f"Candidate_{i+1}")
1592
+
1593
+ cum_time += t_struct
1594
+
1595
+ if self.options.debug:
1596
+ exit_status = 'REFINED ' if self.exit_status[i] else 'SCRAMBLED'
1597
+ self.debuglog(f'DEBUG: force_field_refining ({conv_thr}) - Candidate_{i+1} - {exit_status} {time_to_string(t_struct, digits=3)}')
1598
+
1599
+ if self.exit_status[i] and new_energy is not None:
1600
+ self.structures[i] = new_structure
1601
+ self.energies[i] = new_energy
1602
+
1603
+ else:
1604
+ self.energies[i] = 1E10
1605
+
1606
+ ### Update checkpoint every (20*max_workers) optimized structures, and give an estimate of the remaining time
1607
+ chk_freq = self.avail_cpus * self.options.checkpoint_frequency
1608
+ if i % chk_freq == chk_freq-1:
1609
+
1610
+ with open(self.outname, 'w') as f:
1611
+ for j, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
1612
+ self.exit_status,
1613
+ self.rel_energies())):
1614
+
1615
+ kind = 'REFINED - ' if status else 'NOT REFINED - '
1616
+ write_xyz(structure, self.atomnos, f, title=f'Structure {j+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
1617
+
1618
+ elapsed = time.perf_counter() - t_start_ff_opt
1619
+ average = (elapsed)/(i+1)
1620
+ time_left = time_to_string((average) * (len(self.structures)-i-1))
1621
+ speedup = cum_time/elapsed
1622
+ self.log(f' - Optimized {i+1:>4}/{len(self.structures):>4} structures - updated checkpoint file (avg. {time_to_string(average)}/struc, {round(speedup, 1)}x speedup, est. {time_left} left)', p=False)
1623
+
1624
+ loadbar(1, 1, prefix=f'Optimizing structure {len(self.structures)}/{len(self.structures)} ')
1625
+
1626
+ elapsed = time.perf_counter() - t_start_ff_opt
1627
+ average = (elapsed)/(len(self.structures))
1628
+ speedup = cum_time/elapsed
1629
+
1630
+ self.log(f'{self.options.ff_calc}/{self.options.ff_level} optimization took {time_to_string(elapsed)} (~{time_to_string(average)} per structure, {round(speedup, 1)}x speedup)')
1631
+
1632
+ ################################################# EXIT STATUS
1633
+
1634
+ self.log(f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.')
1635
+
1636
+ ################################################# PRUNING: ENERGY
1637
+
1638
+ _, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
1639
+ self.energies = self.scramble(self.energies, sequence)
1640
+ self.structures = self.scramble(self.structures, sequence)
1641
+ self.constrained_indices = self.scramble(self.constrained_indices, sequence)
1642
+ # sorting structures based on energy
1643
+
1644
+ if self.options.debug:
1645
+ self.dump_status(f'force_field_refining_{conv_thr}', only_fixed_constraints=only_fixed_constraints)
1646
+ self.debuglog(f'DEBUG: Dumped emebedder status after generating candidates (\"force_field_refining_{conv_thr}\")')
1647
+
1648
+ mask = self.rel_energies() < 1E10
1649
+ self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
1650
+
1651
+ if False in mask:
1652
+ self.log(f'Discarded {len([b for b in mask if not b])} scrambled candidates ({np.count_nonzero(mask)} left)')
1653
+
1654
+ ################################################# PRUNING: FITNESS (POST FORCE FIELD OPT)
1655
+
1656
+ self.fitness_refining(threshold=2)
1657
+
1658
+ ################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT)
1659
+
1660
+ self.zero_candidates_check()
1661
+ self.similarity_refining()
1662
+
1663
+ ################################################# CHECKPOINT AFTER FF OPTIMIZATION
1664
+
1665
+ s = f'--> Checkpoint output - Updated {len(self.structures)} optimized structures to {self.outname} file'
1666
+
1667
+ if self.options.optimization and (self.options.ff_level != self.options.theory_level) and conv_thr != "tight":
1668
+ s += f' before {self.options.calculator} optimization.'
1669
+
1670
+ else:
1671
+ self.outname = f'firecode_{"ensemble" if self.embed == "refine" else "poses"}_{self.stamp}.xyz'
1672
+ # if the FF optimization was the last one, call the outfile accordingly
1673
+
1674
+
1675
+ self.log(s+'\n')
1676
+
1677
+ with open(self.outname, 'w') as f:
1678
+ for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
1679
+ self.exit_status,
1680
+ self.rel_energies())):
1681
+
1682
+ kind = 'REFINED - ' if status else 'NOT REFINED - '
1683
+ write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
1684
+
1685
+ # do not retain energies for the next optimization step if optimization was not tight
1686
+ if not only_fixed_constraints:
1687
+ self.energies.fill(0)
1688
+
1689
+ def _set_target_distances(self):
1690
+ '''
1691
+ Called before TS refinement to compute all
1692
+ target bonding distances. These are only returned
1693
+ if that pairing is not a non-covalent interaction,
1694
+ that is if pairing was not specified with letters
1695
+ "x", "y" or "z".
1696
+ '''
1697
+ self.target_distances = {}
1698
+
1699
+ # grab the atoms we want to extract information from
1700
+ r_atoms = {}
1701
+ for mol in self.objects:
1702
+ for letter, r_atom in mol.reactive_atoms_classes_dict[0].items():
1703
+ cumnum = r_atom.cumnum if hasattr(r_atom, 'cumnum') else r_atom.index
1704
+ if letter not in ("x", "y", "z"):
1705
+ r_atoms[cumnum] = r_atom
1706
+
1707
+ pairings = self.constrained_indices.ravel()
1708
+ pairings = pairings.reshape(int(pairings.shape[0]/2), 2)
1709
+ pairings = {tuple(sorted((a,b))) for a, b in pairings}
1710
+
1711
+ active_pairs = [indices for letter, indices in self.pairings_table.items() if letter not in ("x", "y", "z")]
1712
+
1713
+ for index1, index2 in pairings:
1714
+
1715
+ if [index1, index2] in active_pairs:
1716
+
1717
+ if hasattr(self, 'pairing_dists'):
1718
+ letter = list(self.pairings_table.keys())[active_pairs.index([index1, index2])]
1719
+
1720
+ if letter in self.pairing_dists:
1721
+ self.target_distances[(index1, index2)] = self.pairing_dists[letter]
1722
+ continue
1723
+ # if target distance has been specified by user, read that, otherwise compute it
1724
+
1725
+ r_atom1 = r_atoms[index1]
1726
+ r_atom2 = r_atoms[index2]
1727
+
1728
+ dist1 = orb_dim_dict.get(r_atom1.symbol + ' ' + str(r_atom1), orb_dim_dict['Fallback'])
1729
+ dist2 = orb_dim_dict.get(r_atom2.symbol + ' ' + str(r_atom2), orb_dim_dict['Fallback'])
1730
+
1731
+ self.target_distances[(index1, index2)] = dist1 + dist2
1732
+
1733
+ def optimization_refining(self, maxiter=None, conv_thr='tight', only_fixed_constraints=False):
1734
+ '''
1735
+ Refines structures by constrained optimizations with the active calculator,
1736
+ discarding similar ones and scrambled ones.
1737
+ maxiter - int, number of max iterations for the optimization
1738
+ conv_thr: convergence threshold, passed to calculator
1739
+ only_fixed_constraints: only uses fixed (UPPERCASE) constraints in optimization
1740
+
1741
+ '''
1742
+
1743
+ # pytorch parallellization is carried out differently
1744
+ if self.options.calculator == 'AIMNET2':
1745
+ from aimnet2_firecode.interface import \
1746
+ aimnet2_optimization_refining
1747
+ return aimnet2_optimization_refining(self, maxiter=maxiter, conv_thr=conv_thr, only_fixed_constraints=only_fixed_constraints)
1748
+
1749
+ self.outname = f'firecode_{"ensemble" if self.embed == "refine" else "poses"}_{self.stamp}.xyz'
1750
+
1751
+ if only_fixed_constraints:
1752
+ task = 'Structure optimization (tight) / relaxing interactions'
1753
+ else:
1754
+ task = 'Structure optimization (loose)'
1755
+
1756
+ max_workers = {
1757
+ 'XTB' : int(self.avail_cpus//4),
1758
+ 'ORCA' : int(self.avail_cpus//self.procs),
1759
+ 'GAUSSIAN' : int(self.avail_cpus//self.procs),
1760
+ }[self.options.calculator]
1761
+
1762
+ self.log(f'--> {task} ({self.options.theory_level}{f"/{self.options.solvent}" if self.options.solvent is not None else ""}' +
1763
+ f' level via {self.options.calculator}, {max_workers} thread{"s" if max_workers>1 else ""})')
1764
+
1765
+ self.energies.fill(0)
1766
+ # Resetting all energies since we changed theory level
1767
+
1768
+ t_start = time.perf_counter()
1769
+ processes = []
1770
+ cum_time = 0
1771
+
1772
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
1773
+
1774
+ opt_func = self.dispatcher.opt_funcs_dict[self.options.calculator]
1775
+
1776
+ for i, structure in enumerate(deepcopy(self.structures)):
1777
+ loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
1778
+
1779
+ if only_fixed_constraints:
1780
+ constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
1781
+
1782
+ else:
1783
+ constraints = np.concatenate([self.constrained_indices[i], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[i]
1784
+
1785
+ pairing_dists = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
1786
+
1787
+ process = executor.submit(
1788
+ timing_wrapper,
1789
+ opt_func,
1790
+ structure,
1791
+ self.atomnos,
1792
+ method=self.options.theory_level,
1793
+ solvent=self.options.solvent,
1794
+ charge=self.options.charge,
1795
+ maxiter=maxiter,
1796
+ conv_thr=conv_thr,
1797
+ constrained_indices=constraints,
1798
+ constrained_distances=pairing_dists,
1799
+ procs=self.procs,
1800
+ title=f'Candidate_{i+1}',
1801
+ spring_constant=2 if only_fixed_constraints else 1,
1802
+
1803
+ payload=(
1804
+ self.constrained_indices[i],
1805
+ )
1806
+ )
1807
+
1808
+ processes.append(process)
1809
+
1810
+ for i, process in enumerate(as_completed(processes)):
1811
+
1812
+ loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
1813
+
1814
+ ( (
1815
+ new_structure,
1816
+ new_energy,
1817
+ self.exit_status[i]
1818
+ ),
1819
+ # from optimization function
1820
+
1821
+ (
1822
+ self.constrained_indices[i],
1823
+ ),
1824
+ # from payload
1825
+
1826
+ t_struct
1827
+ # from timing_wrapper
1828
+
1829
+ ) = process.result()
1830
+
1831
+ # assert that the structure did not scramble during optimization
1832
+ if self.exit_status[i]:
1833
+ constraints = (np.concatenate([self.constrained_indices[i], self.internal_constraints])
1834
+ if len(self.internal_constraints) > 0
1835
+ else self.constrained_indices[i])
1836
+
1837
+ self.exit_status[i] = scramble_check(new_structure,
1838
+ self.atomnos,
1839
+ excluded_atoms=constraints.ravel(),
1840
+ mols_graphs=self.graphs,
1841
+ max_newbonds=0)
1842
+
1843
+ cum_time += t_struct
1844
+
1845
+ if self.options.debug:
1846
+ exit_status = 'REFINED ' if self.exit_status[i] else 'SCRAMBLED'
1847
+ self.debuglog(f'DEBUG: optimzation_refining ({conv_thr}) - Candidate_{i+1} - {exit_status if new_energy is not None else "CRASHED"} {time_to_string(t_struct, digits=3)}')
1848
+
1849
+ if self.exit_status[i] and new_energy is not None:
1850
+ self.structures[i] = new_structure
1851
+ self.energies[i] = new_energy
1852
+
1853
+ else:
1854
+ self.energies[i] = 1E10
1855
+
1856
+ ### Update checkpoint every (20*max_workers) optimized structures, and give an estimate of the remaining time
1857
+ chk_freq = int(self.avail_cpus//4) * self.options.checkpoint_frequency
1858
+ if i % chk_freq == chk_freq-1:
1859
+
1860
+ with open(self.outname, 'w') as f:
1861
+ for j, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
1862
+ self.exit_status,
1863
+ self.rel_energies())):
1864
+
1865
+ kind = 'REFINED - ' if status else 'NOT REFINED - '
1866
+ write_xyz(structure, self.atomnos, f, title=f'Structure {j+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
1867
+
1868
+ elapsed = time.perf_counter() - t_start
1869
+ average = (elapsed)/(i+1)
1870
+ time_left = time_to_string((average) * (len(self.structures)-i-1))
1871
+ speedup = cum_time/elapsed
1872
+ self.log(f' - Optimized {i+1:>4}/{len(self.structures):>4} structures - updated checkpoint file (avg. {time_to_string(average)}/struc, {round(speedup, 1)}x speedup, est. {time_left} left)', p=False)
1873
+
1874
+ loadbar(1, 1, prefix=f'Optimizing structure {len(self.structures)}/{len(self.structures)} ')
1875
+
1876
+ elapsed = time.perf_counter() - t_start
1877
+ average = (elapsed)/(len(self.structures))
1878
+ speedup = cum_time/elapsed
1879
+
1880
+ self.log((f'{self.options.calculator}/{self.options.theory_level} optimization took '
1881
+ f'{time_to_string(elapsed)} (~{time_to_string(average)} per structure, {round(speedup, 1)}x speedup)'))
1882
+
1883
+ ################################################# EXIT STATUS
1884
+
1885
+ self.log(f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will {"not " if not self.options.only_refined else ""}be discarded.')
1886
+
1887
+ if self.options.only_refined:
1888
+
1889
+ mask = self.exit_status
1890
+ self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
1891
+
1892
+ if False in mask:
1893
+ self.log(f'Discarded {len([b for b in mask if not b])} candidates for unsuccessful optimization ({np.count_nonzero(mask)} left')
1894
+
1895
+ ################################################# PRUNING: ENERGY
1896
+
1897
+ _, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
1898
+ self.energies = self.scramble(self.energies, sequence)
1899
+ self.structures = self.scramble(self.structures, sequence)
1900
+ self.constrained_indices = self.scramble(self.constrained_indices, sequence)
1901
+ # sorting structures based on energy
1902
+
1903
+ if self.options.debug:
1904
+ self.dump_status(f'optimization_refining_{conv_thr}', only_fixed_constraints=only_fixed_constraints)
1905
+ self.debuglog(f'DEBUG: Dumped emebedder status after generating candidates (\"optimization_refining_{conv_thr}\")')
1906
+
1907
+ if self.options.kcal_thresh is not None and only_fixed_constraints:
1908
+
1909
+ # mask = self.rel_energies() < self.options.kcal_thresh
1910
+ energy_thr = self.dynamic_energy_thr()
1911
+ mask = self.rel_energies() < energy_thr
1912
+
1913
+ self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
1914
+
1915
+ if False in mask:
1916
+ self.log(f'Discarded {len([b for b in mask if not b])} candidates for energy ({np.count_nonzero(mask)} left, ' +
1917
+ f'{round(100*np.count_nonzero(mask)/len(mask), 1)}% kept, threshold {energy_thr} kcal/mol)')
1918
+
1919
+ ################################################# PRUNING: FITNESS (POST SEMIEMPIRICAL OPT)
1920
+
1921
+ self.fitness_refining(threshold=2)
1922
+
1923
+ ################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT)
1924
+
1925
+ self.zero_candidates_check()
1926
+ self.similarity_refining()
1927
+
1928
+ ################################################# CHECKPOINT AFTER SE OPTIMIZATION
1929
+
1930
+ with open(self.outname, 'w') as f:
1931
+ for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
1932
+ self.exit_status,
1933
+ self.rel_energies())):
1934
+
1935
+ kind = 'REFINED - ' if status else 'NOT REFINED - '
1936
+ write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
1937
+
1938
+ self.log(f'--> Wrote {len(self.structures)} optimized structures to {self.outname}')
1939
+
1940
+ # do not retain energies for the next optimization step if optimization was not tight
1941
+ if not only_fixed_constraints:
1942
+ self.energies.fill(0)
1943
+
1944
+ def dynamic_energy_thr(self, keep_min=0.1, verbose=True):
1945
+ '''
1946
+ Returns an energy threshold that is dynamically adjusted
1947
+ based on the distribution of energies around the lowest,
1948
+ so that at least 10% of the structures are retained.
1949
+
1950
+ keep_min: float, minimum percentage of structures to keep
1951
+ verbose: bool, prints comments in self.log
1952
+
1953
+ '''
1954
+ active = len(self.structures)
1955
+ keep = np.count_nonzero(self.rel_energies() < self.options.kcal_thresh)
1956
+
1957
+ # if the standard threshold keeps enough structures, use that
1958
+ if keep/active > keep_min:
1959
+ return self.options.kcal_thresh
1960
+
1961
+ # if not, iterate on the relative energy values as
1962
+ # thresholds until we keep enough structures
1963
+ for thr in (energy for energy in self.rel_energies() if energy > self.options.kcal_thresh):
1964
+ keep = np.count_nonzero(self.rel_energies() < thr)
1965
+
1966
+ if keep/active > keep_min:
1967
+ if verbose:
1968
+ self.log(f"--> Dynamically adjusted energy threshold to {round(thr, 1)} kcal/mol to retain at least {round(thr)}% of structures.")
1969
+ return thr
1970
+
1971
+ def metadynamics_augmentation(self):
1972
+ '''
1973
+ Runs a metadynamics simulation (MTD) through
1974
+ the XTB program for each structure in self.structure.
1975
+ New structures are obtained from the simulations, minimized
1976
+ in energy and added to self. structures.
1977
+ '''
1978
+
1979
+ self.log('--> Performing XTB Metadynamic augmentation of TS candidates')
1980
+
1981
+ before = len(self.structures)
1982
+ t_start_run = time.perf_counter()
1983
+
1984
+ for s, (structure, constrained_indices) in enumerate(zip(deepcopy(self.structures), deepcopy(self.constrained_indices))):
1985
+
1986
+ loadbar(s, before, f'Running MTD {s+1}/{before} ')
1987
+ t_start = time.perf_counter()
1988
+
1989
+ new_structures = xtb_metadyn_augmentation(structure,
1990
+ self.atomnos,
1991
+ constrained_indices=constrained_indices,
1992
+ new_structures=5,
1993
+ title=s)
1994
+
1995
+ self.structures = np.concatenate((self.structures, new_structures))
1996
+ self.energies = np.concatenate((self.energies, [0 for _ in new_structures]))
1997
+ self.constrained_indices = np.concatenate((self.constrained_indices, [constrained_indices for _ in new_structures]))
1998
+
1999
+ self.log(f' - Structure {s+1} - {len(new_structures)} new conformers ({time_to_string(time.perf_counter()-t_start)})', p=False)
2000
+
2001
+ loadbar(before, before, f'Running MTD {before}/{before} ')
2002
+ self.exit_status = np.array([True for _ in self.structures], dtype=bool)
2003
+
2004
+ self.log(f'Metadynamics augmentation completed - found {len(self.structures)-before} new conformers ({time_to_string(time.perf_counter()-t_start_run)})\n')
2005
+
2006
+ def csearch_augmentation(self, text='', max_structs=1000):
2007
+ '''
2008
+ Runs a conformational search for each structure in self.structure,
2009
+ preserving the current reactive atoms pose and HB interactions.
2010
+ New structures geometries are optimized and added to self.structures.
2011
+ '''
2012
+
2013
+ self.warn("--> WARNING! csearch_augmentation is an experimental routine and has not been fully tested yet.")
2014
+ self.log(f'--> Performing conformational augmentation of candidates {text}')
2015
+
2016
+ before = len(self.structures)
2017
+ t_start_run = time.perf_counter()
2018
+ n_out = 100 if len(self.structures)*100 < max_structs else round(max_structs/len(self.structures))
2019
+ n_out = max((1, n_out))
2020
+
2021
+ for s, (structure, constrained_indices) in enumerate(zip(self.structures, self.constrained_indices)):
2022
+
2023
+ loadbar(s, before, f'Performing CSearch {s+1}/{before} ', suffix=f'({len(self.structures)-before} generated)')
2024
+ t_start = time.perf_counter()
2025
+
2026
+ if self.options.debug:
2027
+ dump = open(f'Candidate_{s+1}_csearch_log.txt', 'w', buffering=1)
2028
+
2029
+ try:
2030
+ new_structures = csearch(
2031
+ structure,
2032
+ self.atomnos,
2033
+ constrained_indices=constrained_indices,
2034
+ keep_hb=True,
2035
+ mode=2,
2036
+ n_out=n_out,
2037
+ logfunction=lambda s: dump.write(s+'\n') if self.options.debug else None,
2038
+ title=f'Candidate_{s+1}',
2039
+ interactive_print=False,
2040
+ write_torsions=self.options.debug,
2041
+ )
2042
+
2043
+ # if CSearch cannot be performed, just go on
2044
+ except SegmentedGraphError:
2045
+ new_structures = []
2046
+
2047
+ if self.options.debug:
2048
+ dump.close()
2049
+
2050
+ if len(new_structures) != 0: # could be either array or list, so have to check this way
2051
+ self.structures = np.concatenate((self.structures, new_structures))
2052
+ self.energies = np.concatenate((self.energies, [1E10 for _ in new_structures]))
2053
+ self.constrained_indices = np.concatenate((self.constrained_indices, [constrained_indices for _ in new_structures]))
2054
+
2055
+ self.log(f' - Candidate {s+1} - {len(new_structures)} new conformers ({time_to_string(time.perf_counter()-t_start)})', p=False)
2056
+
2057
+ loadbar(before, before, f'Performing CSearch {before}/{before} ', suffix=f'{" "*15}')
2058
+ self.exit_status = np.array([True for _ in self.structures], dtype=bool)
2059
+
2060
+ self.similarity_refining(rmsd=False)
2061
+
2062
+ self.log(f'Conformational augmentation completed - generated {len(self.structures)-before} new conformers ({time_to_string(time.perf_counter()-t_start_run)})\n')
2063
+
2064
+ def csearch_augmentation_routine(self):
2065
+ '''
2066
+ '''
2067
+
2068
+ if self.options.csearch_aug:
2069
+
2070
+ csearch_func = self.csearch_augmentation
2071
+
2072
+ null_runs = 0
2073
+
2074
+ for i in range(3):
2075
+
2076
+ min_e = np.min(self.energies)
2077
+
2078
+ csearch_func(text=f'(step {i+1}/3)', max_structs=self.options.max_confs)
2079
+ self.force_field_refining()
2080
+
2081
+ if np.min(self.energies) < min_e:
2082
+ delta = min_e - np.min(self.energies)
2083
+ self.log(f'--> Lower minima found: {round(delta, 2)} kcal/mol below previous best\n')
2084
+
2085
+ if self.options.debug:
2086
+ with open(f'best_of_aug_run_{i}.xyz', 'w') as f:
2087
+ e, s = zip(*sorted(zip(self.energies, self.structures), key=lambda x: x[0]))
2088
+ write_xyz(s[0], self.atomnos, f, title=f'Energy = {round(e[0], 6)}')
2089
+
2090
+ min_e = np.min(self.energies)
2091
+
2092
+ else:
2093
+ self.log('--> No new minima found.\n')
2094
+ null_runs += 1
2095
+
2096
+ if null_runs == 2:
2097
+ break
2098
+
2099
+ def saddle_refining(self):
2100
+ '''
2101
+ Performs a first order saddle optimization for each structure.
2102
+
2103
+ '''
2104
+ self.log(f'--> Saddle optimization ({self.options.theory_level} level)')
2105
+ t_start = time.perf_counter()
2106
+
2107
+ for i, structure in enumerate(self.structures):
2108
+
2109
+ loadbar(i, len(self.structures), prefix=f'Performing saddle opt {i+1}/{len(self.structures)} ')
2110
+
2111
+ try:
2112
+
2113
+ self.structures[i], self.energies[i], self.exit_status[i] = ase_saddle(self,
2114
+ structure,
2115
+ self.atomnos,
2116
+ self.constrained_indices[i],
2117
+ mols_graphs=self.graphs if self.embed != 'monomolecular' else None,
2118
+ title=f'Saddle opt - Structure {i+1}',
2119
+ logfile=self.logfile,
2120
+ traj=f'Saddle_opt_{i+1}.traj',
2121
+ maxiterations=200)
2122
+
2123
+ except ValueError:
2124
+ # Thrown when an ASE read fails (during saddle opt)
2125
+ self.exit_status[i] = False
2126
+
2127
+ loadbar(1, 1, prefix=f'Performing saddle opt {len(self.structures)}/{len(self.structures)} ')
2128
+ t_end = time.perf_counter()
2129
+ self.log(f'{self.options.calculator} {self.options.theory_level} saddle optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)')
2130
+ self.log(f'Saddle opt completed for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures')
2131
+
2132
+ mask = self.exit_status
2133
+
2134
+ self.apply_mask(('structures', 'energies', 'exit_status'), mask)
2135
+
2136
+ ################################################# PRUNING: SIMILARITY (POST SADDLE OPT)
2137
+
2138
+ if len(self.structures) != 0:
2139
+
2140
+ t_start = time.perf_counter()
2141
+ self.structures, mask = prune_by_rmsd(self.structures, self.atomnos, max_rmsd=self.options.rmsd, debugfunction=self.debuglog)
2142
+ self.apply_mask(('energies', 'exit_status'), mask)
2143
+ t_end = time.perf_counter()
2144
+
2145
+ if False in mask:
2146
+ self.log(f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})')
2147
+ self.log()
2148
+
2149
+ ################################################# SADDLE OPT EXTRA XYZ OUTPUT
2150
+
2151
+ _, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
2152
+ self.energies = scramble(self.energies, sequence)
2153
+ self.structures = scramble(self.structures, sequence)
2154
+ self.constrained_indices = scramble(self.constrained_indices, sequence)
2155
+ # sorting structures based on energy
2156
+
2157
+ self.outname = f'firecode_SADDLE_TSs_{self.stamp}.xyz'
2158
+ with open(self.outname, 'w') as f:
2159
+ for structure, energy in zip(align_structures(self.structures), self.rel_energies()):
2160
+ write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - TS - Rel. E. = {round(energy, 3)} kcal/mol')
2161
+
2162
+ self.log(f'Wrote {len(self.structures)} saddle-optimized structures to {self.outname} file\n')
2163
+
2164
+ else:
2165
+ self.log()
2166
+
2167
+ def print_nci(self):
2168
+ '''
2169
+ Prints and logs the non-covalent interactions guesses for final structures.
2170
+
2171
+ '''
2172
+ self.log('--> Non-covalent interactions finder (EXPERIMENTAL)')
2173
+ self.nci = []
2174
+
2175
+ for i, structure in enumerate(self.structures):
2176
+
2177
+ nci, print_list = get_nci(structure, self.atomnos, self.constrained_indices[i], self.ids)
2178
+ self.nci.append(nci)
2179
+
2180
+ if nci != []:
2181
+ self.log(f'Structure {i+1}: {len(nci)} interactions')
2182
+
2183
+ for p in print_list:
2184
+ self.log(' '+p)
2185
+ self.log()
2186
+
2187
+ if not [_l for _l in self.nci if _l != []]:
2188
+ self.log('No particular NCIs spotted for these structures\n')
2189
+
2190
+ else:
2191
+ unshared_nci = []
2192
+ for i, nci_list in enumerate(self.nci):
2193
+ for nci in nci_list:
2194
+ # for each interaction of each structure
2195
+
2196
+ if nci not in [n[0] for n in unshared_nci]:
2197
+ # if we have not already done it
2198
+
2199
+ if not all([nci in structure_nci for structure_nci in self.nci]):
2200
+ # if the interaction is not shared by all structures, take note
2201
+
2202
+ shared_by = [i for i, structure_nci in enumerate(self.nci) if nci in structure_nci]
2203
+ unshared_nci.append((nci, shared_by))
2204
+
2205
+ if unshared_nci != []:
2206
+ self.log('--> Differential NCIs found - these might be the source of selectivity:')
2207
+ for nci, shared_by in unshared_nci:
2208
+ nci_type, i1, i2 = nci
2209
+ self.log(f' {nci_type} between indices {i1}/{i2} is present in {len(shared_by)}/{len(self.structures)} structures {tuple([i+1 for i in shared_by])}')
2210
+ self.log()
2211
+
2212
+ def write_mol_info(self):
2213
+ '''
2214
+ Writes information about the firecode molecules read from the input file.
2215
+
2216
+ '''
2217
+
2218
+ head = ''
2219
+ for i, mol in enumerate(self.objects):
2220
+
2221
+ if hasattr(mol, 'reactive_atoms_classes_dict'):
2222
+
2223
+ descs = [atom.symbol+f'({str(atom)} type, {round(norm_of(atom.center[0]-atom.coord), 3)} A, ' +
2224
+ f'{len(atom.center)} center{"s" if len(atom.center) != 1 else ""})' for atom in mol.reactive_atoms_classes_dict[0].values()]
2225
+
2226
+ else:
2227
+
2228
+ descs = [pt[mol.atomnos[i]].symbol for i in mol.reactive_indices]
2229
+
2230
+ t = '\n '.join([(str(index) + ' ' if len(str(index)) == 1 else str(index)) + ' -> ' + desc for index, desc in zip(mol.reactive_indices, descs)])
2231
+
2232
+ mol_line = f' -> {len(mol.atomcoords[0])} atoms, {len(mol.atomcoords)} conformer{"s" if len(mol.atomcoords) != 1 else ""}'
2233
+ if hasattr(mol, 'pivots') and len(mol.pivots) > 0:
2234
+ mol_line += f', {len(mol.pivots[0])} pivot{"s" if len(mol.pivots[0]) != 1 else ""}'
2235
+
2236
+ if mol.sp3_sigmastar:
2237
+ mol_line += ', sp3_sigmastar'
2238
+
2239
+ if any(mol.sigmatropic):
2240
+ mol_line += ', sigmatropic'
2241
+ if all(mol.sigmatropic):
2242
+ mol_line += ' (all conformers)'
2243
+ else:
2244
+ mol_line += ' (some conformers)'
2245
+
2246
+ head += f'\n {i+1}. {mol.filename}{mol_line}\n {t}\n'
2247
+
2248
+ self.log('--> Input structures & reactive indices data:\n' + head)
2249
+
2250
+ def write_options(self):
2251
+ '''
2252
+ Writes information about the firecode parameters used in the calculation, if applicable to the run.
2253
+ '''
2254
+
2255
+ ######################################################################################################## PAIRINGS
2256
+
2257
+ if not self.pairings_table:
2258
+ if all([len(mol.reactive_indices) == 2 for mol in self.objects]):
2259
+ self.log('--> No atom pairings imposed. Computing all possible dispositions.\n')
2260
+ # only print the no pairings statements if there are multiple regioisomers to be computed
2261
+ else:
2262
+ self.log(f'--> Atom pairings imposed are {len(self.pairings_table)}: {list(self.pairings_table.values())} (Cumulative index numbering)\n')
2263
+
2264
+ for i, letter in enumerate(self.pairings_table):
2265
+ kind = 'Constraint' if letter.isupper() else 'Interaction'
2266
+ internal = any(isinstance(d.get(letter), tuple) for d in self.pairings_dict.values())
2267
+ kind += ' (Internal)' if internal else ''
2268
+ dist = self.get_pairing_dist_from_letter(letter)
2269
+
2270
+ if self.options.shrink and not internal:
2271
+ dist *= self.options.shrink_multiplier
2272
+
2273
+ if dist is None:
2274
+ kind += ' - will relax'
2275
+ elif kind == 'Interaction':
2276
+ kind += f' - embedded at {round(dist, 3)} A - will relax'
2277
+ else:
2278
+ kind += f' - constrained to {round(dist, 3)} A'
2279
+
2280
+ if self.options.shrink and not internal:
2281
+ kind += f' (to be shrinked to {round(dist/self.options.shrink_multiplier, 3)} A)'
2282
+
2283
+ s = f' {i+1}. {letter} - {kind}\n'
2284
+
2285
+ for mol_id, d in self.pairings_dict.items():
2286
+ atom_id = d.get(letter)
2287
+
2288
+ if atom_id is not None:
2289
+ mol = self.objects[mol_id]
2290
+
2291
+ if isinstance(atom_id, int):
2292
+ atom_id = [atom_id]
2293
+
2294
+ for a in atom_id:
2295
+ s += f' Index {a} ({pt[mol.atomnos[a]].name}) on {mol.rootname}\n'
2296
+
2297
+ self.log(s)
2298
+
2299
+ ######################################################################################################## EMBEDDING/CALC OPTIONS
2300
+
2301
+ self.log('--> Calculation options used were:')
2302
+ for line in str(self.options).split('\n'):
2303
+
2304
+ if self.embed in ('monomolecular', 'string', 'refine') and line.split()[0] in ('rotation_range',
2305
+ 'rotation_steps',
2306
+ 'rigid',
2307
+ 'suprafacial',
2308
+ 'fix_angles_in_deformation',
2309
+ 'double_bond_protection'):
2310
+ continue
2311
+
2312
+ if self.embed == 'refine' and line.split()[0] in ('shrink',
2313
+ 'shrink_multiplier',
2314
+ 'fix_angles_in_deformation',
2315
+ 'double_bond_protection'):
2316
+ continue
2317
+
2318
+ if not self.options.optimization and line.split()[0] in ('calculator',
2319
+ 'double_bond_protection',
2320
+ 'ff_opt',
2321
+ 'ff_calc',
2322
+ 'ff_level',
2323
+ 'fix_angles_in_deformation',
2324
+ 'only_refined',
2325
+ 'rigid',
2326
+ 'theory_level'):
2327
+ continue
2328
+
2329
+ if self.options.rigid and line.split()[0] in ('double_bond_protection',
2330
+ 'fix_angles_in_deformation'):
2331
+ continue
2332
+
2333
+ if not self.options.shrink and line.split()[0] in ('shrink_multiplier',):
2334
+ continue
2335
+
2336
+ if not self.options.ff_opt and line.split()[0] in ('ff_calc', 'ff_level'):
2337
+ continue
2338
+
2339
+ self.log(f' - {line}')
2340
+
2341
+ def log_warnings(self):
2342
+ '''
2343
+ Logs the non-fatal errors (warnings) at the end of a run.
2344
+
2345
+ '''
2346
+ if self.warnings:
2347
+ self.log()
2348
+ self.log("{:*^76}".format(" W A R N I N G S "))
2349
+ self.log("{:*^76}".format(" your run generated these non-fatal warnings "))
2350
+ self.log()
2351
+
2352
+ for warning in self.warnings:
2353
+ self.log(auto_newline(warning, max_line_len=65))
2354
+ self.log()
2355
+
2356
+ self.log("*"*76)
2357
+
2358
+ def run(self):
2359
+ '''
2360
+ Run the firecode program.
2361
+
2362
+ '''
2363
+ self.write_mol_info()
2364
+
2365
+ if self.embed is None:
2366
+ self.log('--> No embed requested, exiting.\n')
2367
+ self.normal_termination()
2368
+
2369
+ if self.embed == 'error':
2370
+ self.log('--> Embed type not recognized, exiting.\n')
2371
+ self.normal_termination()
2372
+
2373
+ if self.embed == 'data':
2374
+ self.data_termination()
2375
+
2376
+ if not self.options.let and (
2377
+ self.embed in ('cyclical', 'chelotropic')) and (
2378
+ max([len(mol.atomcoords) for mol in self.objects]) > 100) and (
2379
+ not self.options.rigid):
2380
+
2381
+ self.options.rigid = True
2382
+
2383
+ self.log('--> Large embed: RIGID keyword added for efficiency (override with LET)')
2384
+
2385
+ self.write_options()
2386
+
2387
+ if not hasattr(self, "t_start_run"):
2388
+ self.t_start_run = time.perf_counter()
2389
+
2390
+ if self.options.dryrun:
2391
+ self.log('\n--> Dry run requested: exiting.')
2392
+ self.normal_termination()
2393
+
2394
+ try: # except KeyboardInterrupt
2395
+ try: # except ZeroCandidatesError()
2396
+ self.generate_candidates()
2397
+
2398
+ if self.options.bypass:
2399
+ self.write_structures('unoptimized', energies=False)
2400
+ self.normal_termination()
2401
+
2402
+ self.compenetration_refining()
2403
+ self.similarity_refining(rmsd=True if self.embed == "refine" else False, verbose=True)
2404
+
2405
+ if self.options.optimization:
2406
+
2407
+ if self.options.ff_opt:
2408
+
2409
+ # perform safe optimization only for embeds
2410
+ if len(self.objects) > 1 and self.options.ff_calc == 'XTB':
2411
+ # self.log(f"--> Performing {self.options.calculator} FF pre-optimization (loose convergence, molecular and pairing constraints)\n")
2412
+ self.force_field_refining(conv_thr="loose", prevent_scrambling=True)
2413
+
2414
+
2415
+ # self.log(f"--> Performing {self.options.calculator} FF optimization (loose convergence, pairing constraints, step 1/2)\n")
2416
+ self.force_field_refining(conv_thr="loose")
2417
+
2418
+ # self.log(f"--> Performing {self.options.calculator} FF optimization (tight convergence, fixed constraints only, step 2/2)\n")
2419
+ self.force_field_refining(conv_thr="tight", only_fixed_constraints=True)
2420
+
2421
+ # self.csearch_augmentation_routine()
2422
+
2423
+ if not (self.options.ff_opt and self.options.theory_level == self.options.ff_level):
2424
+ # If we just optimized at a (FF) level and the final
2425
+ # optimization level is the same, avoid repeating it
2426
+
2427
+ if self.options.calculator == "ORCA":
2428
+ # Perform stepwise pruning of the ensemble for more expensive theory levels
2429
+
2430
+ self.log("--> Performing ORCA optimization (3 iterations, step 1/3)\n")
2431
+ self.optimization_refining(maxiter=3)
2432
+
2433
+ self.log("--> Performing ORCA optimization (5 iterations, step 2/3)\n")
2434
+ self.optimization_refining(maxiter=5)
2435
+
2436
+ self.log("--> Performing ORCA optimization (convergence, step 3/3)\n")
2437
+
2438
+ if len(self.structures) > 500:
2439
+ self.optimization_refining(conv_thr='loose')
2440
+ # final uncompromised optimization (with fixed constraints and interactions active)
2441
+
2442
+ self.optimization_refining(conv_thr='tight', only_fixed_constraints=True)
2443
+ # final uncompromised optimization (with only fixed constraints active)
2444
+
2445
+ else:
2446
+ self.write_structures('unoptimized', energies=False)
2447
+ # accounting for output in "refine" runs with NOOPT
2448
+
2449
+ except ZeroCandidatesError:
2450
+ t_end_run = time.perf_counter()
2451
+ s = (' Sorry, the program did not find any reasonable embedded structure. Are you sure the input indices and pairings were correct? If so, try these tips:\n'
2452
+ ' - If no structure passes the compenetration check, the SHRINK keyword may help (see documentation).\n'
2453
+ ' - Similarly, enlarging the spacing between atom pairs with the DIST keyword facilitates the embed.\n'
2454
+ ' - If no structure passes the fitness check, try adding a solvent with the SOLVENT keyword.\n'
2455
+ ' - Impose less strict compenetration rejection criteria with the CLASHES keyword.\n'
2456
+ ' - Generate more structures with higher STEPS and ROTRANGE values.\n'
2457
+ )
2458
+
2459
+ self.log(f'\n--> Program termination: No candidates found - Total time {time_to_string(t_end_run-self.t_start_run)}')
2460
+ self.log(s)
2461
+ self.close_log_streams()
2462
+ clean_directory()
2463
+ sys.exit()
2464
+
2465
+ ##################### AUGMENTATION - METADYNAMICS / CSEARCH
2466
+
2467
+ if self.options.metadynamics:
2468
+
2469
+ self.metadynamics_augmentation()
2470
+ self.optimization_refining()
2471
+ self.similarity_refining()
2472
+
2473
+ ##################### POST FIRECODE - SADDLE, NEB, NCI, VMD
2474
+
2475
+ # if (self.options.optimization or self.options.ff_opt) and not self.options.bypass:
2476
+ # self.write_vmd()
2477
+
2478
+ # if self.options.neb:
2479
+ # self.hyperneb_refining()
2480
+
2481
+ if self.options.saddle:
2482
+ self.saddle_refining()
2483
+
2484
+ if self.options.nci and self.options.optimization:
2485
+ self.print_nci()
2486
+
2487
+ self.log_warnings()
2488
+ self.normal_termination()
2489
+
2490
+ ################################################ END
2491
+
2492
+ except KeyboardInterrupt:
2493
+ print('\n\nKeyboardInterrupt requested by user. Quitting.')
2494
+ sys.exit()
2495
+
2496
+ def data_termination(self):
2497
+ '''
2498
+ Type of termination for runs when there is no embedding,
2499
+ but some computed data are to be shown in a formatted way.
2500
+ '''
2501
+
2502
+ if any('pka>' in op for op in self.options.operators):
2503
+ self.pka_termination()
2504
+
2505
+ if len([op for op in self.options.operators if 'scan>' in op]) > 1:
2506
+ self.scan_termination()
2507
+
2508
+ self.normal_termination()
2509
+
2510
+ def pka_termination(self):
2511
+ '''
2512
+ Print data acquired during pKa energetics calculation
2513
+ for every molecule in input
2514
+ '''
2515
+
2516
+ self.log('\n--> pKa energetics (from best conformers)')
2517
+ solv = 'gas phase' if self.options.solvent is None else self.options.solvent
2518
+
2519
+ from prettytable import PrettyTable
2520
+ table = PrettyTable()
2521
+ table.field_names = ['Name', '#(Symb)', 'Process', 'Energy (kcal/mol)']
2522
+
2523
+ for mol in self.objects:
2524
+ if hasattr(mol, 'pka_data'):
2525
+ table.add_row([mol.rootname,
2526
+ f'{mol.reactive_indices[0]}({pt[mol.atomnos[mol.reactive_indices[0]]].symbol})',
2527
+ mol.pka_data[0],
2528
+ mol.pka_data[1]])
2529
+
2530
+ # Add pKa column if we were given a reference
2531
+ if hasattr(self, 'pka_ref'):
2532
+
2533
+ pkas = []
2534
+ for mol in self.objects:
2535
+ if mol.filename == self.pka_ref[0]:
2536
+ dG_ref = mol.pka_data[1]
2537
+ break
2538
+
2539
+ for mol in self.objects:
2540
+ process, free_energy = mol.pka_data
2541
+
2542
+ dG = free_energy - dG_ref if process == 'HA -> A-' else dG_ref - free_energy
2543
+ # The free energy difference has a different sign for acids or bases, since
2544
+ # the pKa for a base is the one of its conjugate acid, BH+
2545
+
2546
+ pka = dG / (np.log(10) * 1.9872036e-3 * 298.15) + self.pka_ref[1]
2547
+ pkas.append(round(pka, 3))
2548
+
2549
+ table.add_column(f'pKa ({solv}, 298.15 K)', pkas)
2550
+
2551
+ self.log(table.get_string())
2552
+ self.log(f'\n Level used is {self.options.theory_level} via {self.options.calculator}' +
2553
+ f", using the ALPB solvation model for {self.options.solvent}" if self.options.solvent is not None else "")
2554
+
2555
+ if len(self.objects) == 2:
2556
+ mol0, mol1 = self.objects
2557
+ if hasattr(mol0, 'pka_data') and hasattr(mol1, 'pka_data'):
2558
+ tags = (mol0.pka_data[0],
2559
+ mol1.pka_data[0])
2560
+ if 'HA -> A-' in tags and 'B -> BH+' in tags:
2561
+ dG = mol0.pka_data[1] + mol1.pka_data[1]
2562
+ self.log('\n Equilibrium data:')
2563
+ self.log(f'\n HA + B -> BH+ + A- K({solv}, 298.15 K) = {round(np.exp(-dG/(1.9872036e-3 * 298.15)), 3)}')
2564
+ self.log(f'\n dG({solv}, 298.15 K) = {round(dG, 3)} kcal/mol')
2565
+
2566
+ def scan_termination(self):
2567
+ '''
2568
+ Print the unified data and write the cumulative plot
2569
+ for the approach of all the molecules in input
2570
+ '''
2571
+ # import pickle
2572
+
2573
+ import matplotlib.pyplot as plt
2574
+
2575
+ plt.figure()
2576
+
2577
+ for mol in self.objects:
2578
+ if hasattr(mol, 'scan_data'):
2579
+ plt.plot(*mol.scan_data, label=mol.rootname)
2580
+
2581
+ plt.legend()
2582
+ plt.title('Unified scan energetics')
2583
+ plt.xlabel('Distance (A)')
2584
+ plt.gca().invert_xaxis()
2585
+ plt.ylabel('Rel. E. (kcal/mol)')
2586
+ plt.savefig(f'{self.stamp}_cumulative_plt.svg')
2587
+ # with open(f'{self.stamp}_cumulative_plt.pickle', 'wb') as _f:
2588
+ # pickle.dump(fig, _f)
2589
+
2590
+ self.log(f'\n--> Written cumulative scan plot at {self.stamp}_cumulative_plt.svg')