firecode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecode/TEST_NOTEBOOK.ipynb +3940 -0
- firecode/__init__.py +0 -0
- firecode/__main__.py +118 -0
- firecode/_gaussian.py +97 -0
- firecode/algebra.py +405 -0
- firecode/ase_manipulations.py +879 -0
- firecode/atropisomer_module.py +516 -0
- firecode/automep.py +130 -0
- firecode/calculators/__init__.py +29 -0
- firecode/calculators/_gaussian.py +98 -0
- firecode/calculators/_mopac.py +242 -0
- firecode/calculators/_openbabel.py +154 -0
- firecode/calculators/_orca.py +129 -0
- firecode/calculators/_xtb.py +786 -0
- firecode/concurrent_test.py +119 -0
- firecode/embedder.py +2590 -0
- firecode/embedder_options.py +577 -0
- firecode/embeds.py +881 -0
- firecode/errors.py +65 -0
- firecode/graph_manipulations.py +333 -0
- firecode/hypermolecule_class.py +364 -0
- firecode/mep_relaxer.py +199 -0
- firecode/modify_settings.py +186 -0
- firecode/mprof.py +65 -0
- firecode/multiembed.py +148 -0
- firecode/nci.py +186 -0
- firecode/numba_functions.py +260 -0
- firecode/operators.py +776 -0
- firecode/optimization_methods.py +609 -0
- firecode/parameters.py +84 -0
- firecode/pka.py +275 -0
- firecode/profiler.py +17 -0
- firecode/pruning.py +421 -0
- firecode/pt.py +32 -0
- firecode/quotes.json +6651 -0
- firecode/quotes.py +9 -0
- firecode/reactive_atoms_classes.py +666 -0
- firecode/references.py +11 -0
- firecode/rmsd.py +74 -0
- firecode/settings.py +75 -0
- firecode/solvents.py +126 -0
- firecode/tests/C2F2H4.xyz +10 -0
- firecode/tests/C2H4.xyz +8 -0
- firecode/tests/CH3Cl.xyz +7 -0
- firecode/tests/HCOOH.xyz +7 -0
- firecode/tests/HCOOOH.xyz +8 -0
- firecode/tests/chelotropic.txt +3 -0
- firecode/tests/cyclical.txt +3 -0
- firecode/tests/dihedral.txt +2 -0
- firecode/tests/string.txt +3 -0
- firecode/tests/trimolecular.txt +9 -0
- firecode/tests.py +151 -0
- firecode/torsion_module.py +1035 -0
- firecode/utils.py +541 -0
- firecode-1.0.0.dist-info/LICENSE +165 -0
- firecode-1.0.0.dist-info/METADATA +321 -0
- firecode-1.0.0.dist-info/RECORD +59 -0
- firecode-1.0.0.dist-info/WHEEL +5 -0
- firecode-1.0.0.dist-info/top_level.txt +1 -0
firecode/embedder.py
ADDED
|
@@ -0,0 +1,2590 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
'''
|
|
3
|
+
FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
|
|
4
|
+
Copyright (C) 2021-2024 Nicolò Tampellini
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: LGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU Lesser General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU Lesser General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU Lesser General Public License
|
|
19
|
+
along with this program. If not, see
|
|
20
|
+
https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
|
|
21
|
+
|
|
22
|
+
'''
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
import pickle
|
|
26
|
+
import random
|
|
27
|
+
import re
|
|
28
|
+
import sys
|
|
29
|
+
import time
|
|
30
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
31
|
+
from copy import deepcopy
|
|
32
|
+
from getpass import getuser
|
|
33
|
+
from itertools import groupby
|
|
34
|
+
|
|
35
|
+
import numpy as np
|
|
36
|
+
from psutil import virtual_memory
|
|
37
|
+
|
|
38
|
+
from firecode.__main__ import __version__
|
|
39
|
+
from firecode.algebra import norm_of
|
|
40
|
+
from firecode.ase_manipulations import ase_saddle
|
|
41
|
+
from firecode.calculators._xtb import (xtb_metadyn_augmentation, xtb_opt,
|
|
42
|
+
xtb_pre_opt)
|
|
43
|
+
from firecode.embedder_options import Options, OptionSetter, keywords_dict
|
|
44
|
+
from firecode.embeds import (_get_monomolecular_reactive_indices,
|
|
45
|
+
cyclical_embed, monomolecular_embed, string_embed)
|
|
46
|
+
from firecode.errors import (InputError, NoOrbitalError, SegmentedGraphError,
|
|
47
|
+
ZeroCandidatesError)
|
|
48
|
+
from firecode.graph_manipulations import get_sum_graph
|
|
49
|
+
from firecode.hypermolecule_class import Hypermolecule, Pivot, align_by_moi
|
|
50
|
+
from firecode.multiembed import multiembed_dispatcher
|
|
51
|
+
from firecode.nci import get_nci
|
|
52
|
+
from firecode.numba_functions import (compenetration_check, count_clashes,
|
|
53
|
+
prune_conformers_tfd, scramble)
|
|
54
|
+
from firecode.operators import operate
|
|
55
|
+
from firecode.optimization_methods import Opt_func_dispatcher, fitness_check
|
|
56
|
+
from firecode.parameters import orb_dim_dict
|
|
57
|
+
from firecode.pruning import (prune_by_moment_of_inertia, prune_by_rmsd,
|
|
58
|
+
prune_by_rmsd_rot_corr)
|
|
59
|
+
from firecode.pt import pt
|
|
60
|
+
from firecode.references import references
|
|
61
|
+
from firecode.rmsd import rmsd_and_max_numba
|
|
62
|
+
from firecode.settings import DEFAULT_LEVELS, PROCS
|
|
63
|
+
from firecode.torsion_module import _get_quadruplets, csearch
|
|
64
|
+
from firecode.utils import (_saturation_check, align_structures, ase_view,
|
|
65
|
+
auto_newline, cartesian_product, clean_directory,
|
|
66
|
+
graphize, loadbar, scramble_check, time_to_string,
|
|
67
|
+
timing_wrapper, write_xyz)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Embedder:
|
|
71
|
+
'''
|
|
72
|
+
Embedder class, containing all methods to set attributes,
|
|
73
|
+
options and initialize the calculation
|
|
74
|
+
'''
|
|
75
|
+
|
|
76
|
+
def __init__(self, filename, stamp=None, procs=None):
|
|
77
|
+
'''
|
|
78
|
+
Initialize the Embedder object by reading the input filename (.txt).
|
|
79
|
+
Sets the Option dataclass properties to default and then updates them
|
|
80
|
+
with the user-requested keywords, if there are any.
|
|
81
|
+
|
|
82
|
+
'''
|
|
83
|
+
|
|
84
|
+
self.t_start_run = time.perf_counter()
|
|
85
|
+
|
|
86
|
+
parent_dir = os.path.dirname(filename)
|
|
87
|
+
if parent_dir != '':
|
|
88
|
+
os.chdir(parent_dir)
|
|
89
|
+
|
|
90
|
+
if stamp is None:
|
|
91
|
+
self.stamp = time.ctime().replace(' ','_').replace(':','-')[4:-8]
|
|
92
|
+
# replaced ctime yields 'Sun_May_23_18-53-47_2021', only keeping 'May_23_18-53'
|
|
93
|
+
|
|
94
|
+
else:
|
|
95
|
+
self.stamp = stamp
|
|
96
|
+
|
|
97
|
+
self.avail_cpus = len(os.sched_getaffinity(0))
|
|
98
|
+
self.avail_mem_gb = virtual_memory().available/1E9
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
from torch.cuda import device_count
|
|
102
|
+
self.avail_gpus = device_count()
|
|
103
|
+
except ImportError:
|
|
104
|
+
self.avail_gpus = 'N/A'
|
|
105
|
+
|
|
106
|
+
self.procs = int(procs) if procs is not None else PROCS or 4
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
os.remove(f'firecode_{self.stamp}.log')
|
|
110
|
+
|
|
111
|
+
except FileNotFoundError:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
log_filename = f'firecode_{self.stamp}.log'
|
|
115
|
+
self.logfile = open(log_filename, 'a', buffering=1, encoding="utf-8")
|
|
116
|
+
logging.basicConfig(filename=log_filename, filemode='a')
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
|
|
120
|
+
self.write_banner_and_info()
|
|
121
|
+
# Write banner to log file
|
|
122
|
+
|
|
123
|
+
self.options = Options()
|
|
124
|
+
# initialize option subclass
|
|
125
|
+
|
|
126
|
+
self.embed = None
|
|
127
|
+
self.warnings = []
|
|
128
|
+
# initialize embed type variable and warnings list
|
|
129
|
+
|
|
130
|
+
inp = self._parse_input(filename)
|
|
131
|
+
# collect information about molecule files
|
|
132
|
+
|
|
133
|
+
self.objects = [Hypermolecule(name, c_ids) for name, c_ids in inp]
|
|
134
|
+
# load designated molecular files
|
|
135
|
+
|
|
136
|
+
# self.objects.sort(key=lambda obj: len(obj.atomcoords[0]), reverse=True)
|
|
137
|
+
# sort them in descending number of atoms (not for now - messes up pairings)
|
|
138
|
+
|
|
139
|
+
self.ids = np.array([len(mol.atomnos) for mol in self.objects])
|
|
140
|
+
# Compute length of each molecule coordinates. Used to divide molecules in TSs
|
|
141
|
+
|
|
142
|
+
self.graphs = [mol.graph for mol in self.objects]
|
|
143
|
+
# Store molecular graphs
|
|
144
|
+
|
|
145
|
+
self._read_pairings()
|
|
146
|
+
# read imposed pairings from input file [i.e. mol1(6)<->mol2(45)]
|
|
147
|
+
|
|
148
|
+
self.check_objects_compenetration()
|
|
149
|
+
# make sure the input structures look alright
|
|
150
|
+
|
|
151
|
+
self.check_saturation()
|
|
152
|
+
# make sure that structures look nice and correct
|
|
153
|
+
|
|
154
|
+
self._set_options(filename)
|
|
155
|
+
# read the keywords line and set the relative options
|
|
156
|
+
# then read the operators and store them
|
|
157
|
+
|
|
158
|
+
self._calculator_setup()
|
|
159
|
+
# initialize default or specified calculator
|
|
160
|
+
|
|
161
|
+
self._print_references()
|
|
162
|
+
# based on the data collected from setup
|
|
163
|
+
|
|
164
|
+
self._apply_operators()
|
|
165
|
+
# execute the operators, replacing the self.objects molecule
|
|
166
|
+
|
|
167
|
+
self._setup()
|
|
168
|
+
# setting embed type and getting ready to embed (if needed)
|
|
169
|
+
|
|
170
|
+
if self.options.debug:
|
|
171
|
+
for mol in self.objects:
|
|
172
|
+
if hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
173
|
+
if len(mol.reactive_atoms_classes_dict[0]) > 0:
|
|
174
|
+
mol.write_hypermolecule()
|
|
175
|
+
self.debuglog(f'DEBUG: written hypermolecule file for ({mol.filename})')
|
|
176
|
+
self.log()
|
|
177
|
+
|
|
178
|
+
if self.options.check_structures:
|
|
179
|
+
self._inspect_structures()
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logging.exception(e)
|
|
183
|
+
raise e
|
|
184
|
+
|
|
185
|
+
def log(self, string="", p=True):
|
|
186
|
+
if p:
|
|
187
|
+
print(string)
|
|
188
|
+
string += '\n'
|
|
189
|
+
self.logfile.write(string)
|
|
190
|
+
|
|
191
|
+
def debuglog(self, string=""):
|
|
192
|
+
if self.options.debug:
|
|
193
|
+
string += '\n'
|
|
194
|
+
# self.logfile.write(string)
|
|
195
|
+
self.debug_logfile.write(string)
|
|
196
|
+
|
|
197
|
+
def warn(self, string):
|
|
198
|
+
self.warnings.append(string)
|
|
199
|
+
self.log(string)
|
|
200
|
+
|
|
201
|
+
def write_banner_and_info(self):
|
|
202
|
+
'''
|
|
203
|
+
Write banner to log file, containing program and run info
|
|
204
|
+
'''
|
|
205
|
+
|
|
206
|
+
banner = '''
|
|
207
|
+
. . * *
|
|
208
|
+
▒ ..
|
|
209
|
+
* . ▒ .. * ▒ ▒░▒ *
|
|
210
|
+
▒░▒ . ▒░░▒ . . ..
|
|
211
|
+
* . ▒ ▒░░░▒ ▒ ▒ ▒░░▒
|
|
212
|
+
+ ▒ ▒ ▒░░░░▒ . .. ▒ ▒░░▒ * +
|
|
213
|
+
. ▒░ ░░▒ ▒ ▒░▒ * ▒░░░▒ ▒
|
|
214
|
+
* ▒ ▒░░░░░░▒▒░▒ * ▒▒░░▒ ▒ ▒░░░░▒ ▒ .. .
|
|
215
|
+
. ▒ ▒░░ ░░░░ ▒ ▒░░░▒ ▒░▒▒ ▒░░ ░░░▒▒▒▒
|
|
216
|
+
. ▒▒ ▒ ▒░░░░░░░░░░░▒ .. ▒░ ░▒ ▒░░░░▒ ▒░░░░░░░░░ ▒▒ * +
|
|
217
|
+
▒▒░░▒ ▒░░░░ ░▒░░░░▒ ▒░░░░░▒ ▒░░ ░░░▒░░░░░▒░░░░▒▒▒ *
|
|
218
|
+
* ▒▒░░░░▒ ▒░░░░░░░░░░░░░▒ . ▒░░░░░░▒░░░░░░░░░░░ ░░░░░░░░▒▒ +
|
|
219
|
+
▒░░░░░░░▒░░░ ░░░░░░░░░░░▒ ▒ ▒░ ░░░░░░░ ░░░░▒░░░ ░░░░░░░ ░▒
|
|
220
|
+
*▒░░░░ ░░░░░░░░░░▒░░░░░ ░░░░▒░▒░░░░░░░▒░░░ ░░░░░░░ ░░░▒░░░░░▒ ▒
|
|
221
|
+
. ▒░ ░░░░░░░░░░░ ░░░░░░░░░░░░░░░░░░░ ░░░░░░░░░░ ░░░░░░░░░░░░░░░░░▒ .░▒ . +
|
|
222
|
+
▒░░███████╗██╗██████╗░███████╗░█████╗░░█████╗░██████╗░███████╗░░░░░▒
|
|
223
|
+
▒ ░░██╔════╝██║██╔══██╗██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝░░░░▒ +
|
|
224
|
+
▒ ▒░░░█████╗░░██║██████╔╝█████╗░░██║░░╚═╝██║░░██║██║░░██║█████╗░░░░░▒ *
|
|
225
|
+
▒░ ░██╔══╝░░██║██╔══██╗██╔══╝░░██║░░██╗██║░░██║██║░░██║██╔══╝░░░░▒
|
|
226
|
+
▒░░░██║░░░░░██║██║░░██║███████╗╚█████╔╝╚█████╔╝██████╔╝███████╗░░░▒
|
|
227
|
+
. ▒░░░╚═╝░░ ░░╚═╝╚═╝░░╚═╝╚══════╝░╚════╝░░╚════╝░╚═════╝░╚══════╝░░░▒ * ▒ ..
|
|
228
|
+
▒ ░░░░░░ ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ ░░░░░░░░░░░░░░▒ .▒░▒ .
|
|
229
|
+
▒ ▒░░░░░ ░░▒░░░ Filtering Refiner and Embedder for ░░░ ░░▒░░ ░░░░░░░▒░░▒
|
|
230
|
+
▒░░░░▒░░░░░░░░ Conformationally Dense Ensembles ░░ ░░░░░░░ ░ ░░░░░░░░▒
|
|
231
|
+
.▒░░░░░░░░ ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ ░▒░░░ ░░░░▒ * ▒
|
|
232
|
+
. ▒░░░▒░╔═════════════════════════════════════════════╗░░░ ░░░ ░░░░▒
|
|
233
|
+
▒░ ░║ ║░▒░░ ░░░░░▒
|
|
234
|
+
+▒ ▒░░░░░║ nicolo.tampellini@yale.edu ║░░░░░░░░▒ ▒
|
|
235
|
+
▒░░░░║ ║░░ ░░░▒
|
|
236
|
+
.. ▒ ▒░▒░░║ Version 🔥{0:^24}║░▒░░░ ░░░▒ * .
|
|
237
|
+
. ▒ ░░░░░║ User 🔥{1:^24}║░░░░░░░░▒ +
|
|
238
|
+
▒░░ ▒░║ Current Time 🔥{2:^24}║░░ ░░▒ ▒ + ..
|
|
239
|
+
.. ▒ * ▒░ ░░║ Avail CPUs 🔥{3:^24}║░░░░░░░▒ *
|
|
240
|
+
. ▒░ ░░║ Avail GPUs 🔥{4:^24}║ ░░░░▒ . ..
|
|
241
|
+
.▒░▒▒░░║ Avail Memory 🔥{5:^24}║░░▒
|
|
242
|
+
+ .. ▒░░ ░║ ║░▒ .. .
|
|
243
|
+
. ▒ ░░╚═════════════════════════════════════════════╝░▒ +
|
|
244
|
+
. * ▒░░░░░░ ░░▒░░░░░▒▒░░░▒▒ ░░░░░░░▒░░ ░░░▒░░▒░░░░░▒░░░░▒ .
|
|
245
|
+
. ▒░░▒░▒░░░ ░░░▒░░▒░ ░░░░░░▒░░ ░░░░░░▒▒▒░ ░░▒░░░░░░▒ .
|
|
246
|
+
▒░ ░░░░░ ░░░░░▒░░░░▒░░░░░░░ ▒░░░░░░ ░░░░▒
|
|
247
|
+
░░░░ ░░░ ░ ░░ ░ ░ ░░
|
|
248
|
+
|
|
249
|
+
'''.format(__version__,
|
|
250
|
+
getuser(),
|
|
251
|
+
time.ctime()[0:-8],
|
|
252
|
+
self.avail_cpus,
|
|
253
|
+
self.avail_gpus,
|
|
254
|
+
str(round(self.avail_mem_gb, 1))+' GB')
|
|
255
|
+
# 🔥
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# banner = '''
|
|
259
|
+
# + . ____________________________________ . .
|
|
260
|
+
# * . .. /────────────────────────────────────\ * .
|
|
261
|
+
# . .. + /▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ \ . . +
|
|
262
|
+
# + ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ . .. .
|
|
263
|
+
# . ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ . *
|
|
264
|
+
# ▒░████████╗░██████╗░█████╗░░█████╗░██████╗░███████╗░░░▒ . .
|
|
265
|
+
# + ▒░╚══██╔══╝██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝░░░▒ ..
|
|
266
|
+
# .. . ▒░░░░██║░░░╚█████╗░██║░░╚═╝██║░░██║██║░░██║█████╗░░░░░▒ * +
|
|
267
|
+
# . ▒░░░░██║░░░░╚═══██╗██║░░██╗██║░░██║██║░░██║██╔══╝░░░░░▒ . .
|
|
268
|
+
# . ▒░░░██║░░░██████╔╝╚█████╔╝╚█████╔╝██████╔╝███████╗░░░▒ .. +
|
|
269
|
+
# * . / ▒░░╚═╝░░░╚═════╝░░╚════╝░░╚════╝░╚═════╝░╚══════╝░░▒ \ . ..
|
|
270
|
+
# .. / ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ \ .
|
|
271
|
+
# . / ▒░░╔══════════════════════════════════════════╗░░▒ \ +
|
|
272
|
+
# / ▒░║ Transition State Conformational Docker ║░▒ \ ..
|
|
273
|
+
# + \\\ ▒░║ nicolo.tampellini@yale.edu ║░▒ // .
|
|
274
|
+
# \\\ ▒░║ ║░▒ // .
|
|
275
|
+
# .. \\\ ▒░║ Version >{0:^25}║░▒ // . *
|
|
276
|
+
# . \\\ ▒░║ User >{1:^25}║░▒ // .
|
|
277
|
+
# \\\ ▒░║ Time >{2:^25}║░▒ // * .
|
|
278
|
+
# .. * \\\▒░║ Procs >{3:^25}║░▒// ..
|
|
279
|
+
# . \▒░║ Threads >{4:^25}║░▒/ +
|
|
280
|
+
# . ▒░║ Avail CPUs >{5:^25}║░▒ . ..
|
|
281
|
+
# + .. . ▒░╚══════════════════════════════════════════╝░▒ .. .
|
|
282
|
+
# . ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ .
|
|
283
|
+
# . * + \\\ ▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▒ // . .
|
|
284
|
+
# . . \\\____________________________________// . .
|
|
285
|
+
# '''
|
|
286
|
+
|
|
287
|
+
# ⏣█▓▒░ banner art adapted from https://fsymbols.com/generators/tarty/
|
|
288
|
+
|
|
289
|
+
self.log(banner)
|
|
290
|
+
|
|
291
|
+
def _print_references(self):
|
|
292
|
+
'''
|
|
293
|
+
Print relevant literature references based on the run settings
|
|
294
|
+
|
|
295
|
+
'''
|
|
296
|
+
|
|
297
|
+
self.log('\n--> If you use FIRECODE in your publication, please cite this reference in the main text:\n' +
|
|
298
|
+
f' {references["FIRECODE"]}')
|
|
299
|
+
|
|
300
|
+
cite_ff = self.options.ff_calc == "XTB"
|
|
301
|
+
cite_gfn2 = self.options.calculator == "XTB"
|
|
302
|
+
cite_crest = any(("mtd>" in op or "mtd_search>" in op) for op in self.options.operators)
|
|
303
|
+
|
|
304
|
+
if any((cite_ff, cite_gfn2, cite_crest)):
|
|
305
|
+
s = ''
|
|
306
|
+
s += f" GFN-FF : {references['GFN-FF']}\n" if cite_ff else ""
|
|
307
|
+
s += f" GFN2-XTB : {references['GFN2-XTB']}\n" if cite_gfn2 else ""
|
|
308
|
+
s += f" CREST : {references['CREST']}\n" if cite_crest else ""
|
|
309
|
+
|
|
310
|
+
self.log(f'\n--> Your run also makes use of this other software: please cite these references as well.\n{s}')
|
|
311
|
+
|
|
312
|
+
def _parse_input(self, filename):
|
|
313
|
+
'''
|
|
314
|
+
Reads a textfile and sets the Embedder properties for the run.
|
|
315
|
+
Keywords are read from the first non-comment(#), non-blank line
|
|
316
|
+
if there are any, and molecules are read afterward.
|
|
317
|
+
|
|
318
|
+
'''
|
|
319
|
+
|
|
320
|
+
with open(filename, 'r') as f:
|
|
321
|
+
lines = f.readlines()
|
|
322
|
+
|
|
323
|
+
# write a formatted copy of the input file to the log
|
|
324
|
+
self.log(f'--> Input file: {filename}\n')
|
|
325
|
+
longest = max(len(line.rstrip('\n')) for line in lines)
|
|
326
|
+
self.log(' '+'-'*(longest+6))
|
|
327
|
+
for _l, line in enumerate(lines):
|
|
328
|
+
self.log(f'{_l+1:2}> | '+line.rstrip('\n').ljust(longest)+' |')
|
|
329
|
+
self.log(' '+'-'*(longest+6)+'\n')
|
|
330
|
+
|
|
331
|
+
# start parsing: get rid of comment and blank lines
|
|
332
|
+
lines = [line.replace(', ',',') for line in lines if line[0] not in ('#', '\n')]
|
|
333
|
+
|
|
334
|
+
def _remove_internal_constraints(string):
|
|
335
|
+
numbers = [int(re.sub('[^0-9]', '', i)) for i in string]
|
|
336
|
+
letters = [re.sub('[^A-Za-z]', '', i) for i in string]
|
|
337
|
+
count = [letters.count(_l) if (_l != '') else 1 for _l in letters]
|
|
338
|
+
return tuple([n for n, c in zip(numbers, count) if c == 1])
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
|
|
342
|
+
keywords = [_l.split('=')[0] if '(' not in _l else _l.split('(')[0] for _l in lines[0].split()]
|
|
343
|
+
if any(k.upper() in keywords_dict.keys() for k in keywords):
|
|
344
|
+
self.kw_line, *self.mol_lines = lines
|
|
345
|
+
else:
|
|
346
|
+
self.mol_lines = lines
|
|
347
|
+
|
|
348
|
+
inp = []
|
|
349
|
+
for _l, line in enumerate(self.mol_lines):
|
|
350
|
+
|
|
351
|
+
if '>' in line:
|
|
352
|
+
self.options.operators_dict[_l] = [op.rstrip().lstrip() for op in reversed(line.rstrip('\n').split('>')[:-1])]
|
|
353
|
+
self.options.operators.append(line.rstrip('\n'))
|
|
354
|
+
line = line.split('>')[-1].lstrip()
|
|
355
|
+
# record that we will need to perform these operations before the run
|
|
356
|
+
|
|
357
|
+
filename, *reactive_atoms = line.split()
|
|
358
|
+
|
|
359
|
+
if reactive_atoms:
|
|
360
|
+
# remove attributes from reactive indices
|
|
361
|
+
reactive_atoms = [fragment for fragment in reactive_atoms if '=' not in fragment]
|
|
362
|
+
|
|
363
|
+
# remove inteernal constraints from reactive indices
|
|
364
|
+
reactive_indices = _remove_internal_constraints(reactive_atoms)
|
|
365
|
+
else:
|
|
366
|
+
reactive_indices = None
|
|
367
|
+
|
|
368
|
+
inp.append((filename, reactive_indices))
|
|
369
|
+
|
|
370
|
+
return inp
|
|
371
|
+
|
|
372
|
+
except Exception as e:
|
|
373
|
+
print(e)
|
|
374
|
+
raise InputError(f'Error in reading molecule input for {filename}. Please check your syntax.')
|
|
375
|
+
|
|
376
|
+
def check_saturation(self):
|
|
377
|
+
'''
|
|
378
|
+
Check each loaded object and make sure it looks nice and correct
|
|
379
|
+
|
|
380
|
+
'''
|
|
381
|
+
self.log()
|
|
382
|
+
for mol in self.objects:
|
|
383
|
+
charge = int(mol.charge) if hasattr(mol, "charge") else 0
|
|
384
|
+
|
|
385
|
+
if _saturation_check(mol.atomnos, charge):
|
|
386
|
+
self.log(f"--> {mol.filename}: saturation check passed (even saturation index)")
|
|
387
|
+
|
|
388
|
+
else:
|
|
389
|
+
self.warn(f"--> WARNING! {mol.filename}: saturation check failed. Odd saturation index (charge={charge}). Radical or bad input geometry?")
|
|
390
|
+
|
|
391
|
+
def check_objects_compenetration(self):
|
|
392
|
+
'''
|
|
393
|
+
Checks that the input molecules look alright
|
|
394
|
+
|
|
395
|
+
'''
|
|
396
|
+
for mol in self.objects:
|
|
397
|
+
for c, coords in enumerate(mol.atomcoords):
|
|
398
|
+
if not compenetration_check(coords):
|
|
399
|
+
clashes = count_clashes(coords)
|
|
400
|
+
self.warn(f"--> WARNING! {mol.filename}, conformer {c+1}, looks compenetrated ({clashes} interatomic distance{'s' if clashes > 1 else ''} < 0.5 A)")
|
|
401
|
+
|
|
402
|
+
def _set_options(self, filename):
|
|
403
|
+
'''
|
|
404
|
+
Set the options dataclass parameters through the OptionSetter class,
|
|
405
|
+
from a list of given keywords. These will be used during the run to
|
|
406
|
+
vary the search depth and/or output.
|
|
407
|
+
'''
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
option_setter = OptionSetter(self)
|
|
411
|
+
option_setter.set_options()
|
|
412
|
+
|
|
413
|
+
except SyntaxError as e:
|
|
414
|
+
raise e
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
print(e)
|
|
418
|
+
raise InputError(f'Error in reading keywords from {filename}. Please check your syntax.')
|
|
419
|
+
|
|
420
|
+
def _set_reactive_atoms_cumnums(self):
|
|
421
|
+
|
|
422
|
+
if self.embed in ('cyclical', 'chelotropic', 'string'):
|
|
423
|
+
for i, mol in enumerate(self.objects):
|
|
424
|
+
|
|
425
|
+
if not hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
426
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
427
|
+
|
|
428
|
+
for c, _ in enumerate(mol.atomcoords):
|
|
429
|
+
for r_atom in mol.reactive_atoms_classes_dict[c].values():
|
|
430
|
+
r_atom.cumnum = r_atom.index
|
|
431
|
+
if i > 0:
|
|
432
|
+
r_atom.cumnum += sum(self.ids[:i])
|
|
433
|
+
|
|
434
|
+
def _read_pairings(self):
|
|
435
|
+
'''
|
|
436
|
+
Reads atomic pairings to be respected from the input file, if any are present.
|
|
437
|
+
'''
|
|
438
|
+
|
|
439
|
+
parsed = []
|
|
440
|
+
unlabeled_list = []
|
|
441
|
+
self.pairings_dict = {i:{} for i, _ in enumerate(self.objects)}
|
|
442
|
+
|
|
443
|
+
for i, line in enumerate(self.mol_lines):
|
|
444
|
+
# now i is also the molecule index in self.objects
|
|
445
|
+
|
|
446
|
+
fragments = line.split('>')[-1].split()[1:]
|
|
447
|
+
# remove operators (if present) and the molecule name, keeping pairs only ['2a','5b']
|
|
448
|
+
|
|
449
|
+
# store custom variables
|
|
450
|
+
for fragment in deepcopy(fragments):
|
|
451
|
+
if '=' in fragment:
|
|
452
|
+
parts = fragment.split('=')
|
|
453
|
+
|
|
454
|
+
if len(parts) != 2:
|
|
455
|
+
raise InputError(f'Error reading attribute \'{fragment}\'. Syntax: \'var=value\'')
|
|
456
|
+
|
|
457
|
+
attr_name, attr_value = parts
|
|
458
|
+
setattr(self.objects[i], attr_name, attr_value)
|
|
459
|
+
|
|
460
|
+
fragments.remove(fragment)
|
|
461
|
+
|
|
462
|
+
self.log(f'--> Set attribute \'{attr_name}\' of {self.objects[i]} to \'{attr_value}\'.')
|
|
463
|
+
|
|
464
|
+
self.log()
|
|
465
|
+
|
|
466
|
+
unlabeled = []
|
|
467
|
+
pairings = []
|
|
468
|
+
|
|
469
|
+
for fragment in fragments:
|
|
470
|
+
|
|
471
|
+
if not fragment.lower().islower(): # if all we have is a number
|
|
472
|
+
unlabeled.append(int(fragment))
|
|
473
|
+
|
|
474
|
+
else:
|
|
475
|
+
index, letters = [''.join(g) for _, g in groupby(fragment, str.isalpha)]
|
|
476
|
+
|
|
477
|
+
for letter in letters:
|
|
478
|
+
pairings.append([int(index), letter])
|
|
479
|
+
|
|
480
|
+
# appending pairing to dict before
|
|
481
|
+
# calculating their cumulative index
|
|
482
|
+
# If a pairing is already present, add the number
|
|
483
|
+
# (refine>/REFINE runs)
|
|
484
|
+
for index, letter in pairings:
|
|
485
|
+
|
|
486
|
+
if self.pairings_dict[i].get(letter) is not None:
|
|
487
|
+
prev = self.pairings_dict[i][letter]
|
|
488
|
+
self.pairings_dict[i][letter] = (prev, index)
|
|
489
|
+
|
|
490
|
+
else:
|
|
491
|
+
self.pairings_dict[i][letter] = index
|
|
492
|
+
|
|
493
|
+
if i > 0:
|
|
494
|
+
for z in pairings:
|
|
495
|
+
z[0] += sum(self.ids[:i])
|
|
496
|
+
|
|
497
|
+
if unlabeled != []:
|
|
498
|
+
for z in unlabeled:
|
|
499
|
+
z += sum(self.ids[:i])
|
|
500
|
+
unlabeled_list.append(z)
|
|
501
|
+
else:
|
|
502
|
+
if unlabeled != []:
|
|
503
|
+
for z in unlabeled:
|
|
504
|
+
unlabeled_list.append(z)
|
|
505
|
+
|
|
506
|
+
# getting the cumulative index rather than the molecule index
|
|
507
|
+
|
|
508
|
+
for cumulative_pair in pairings:
|
|
509
|
+
parsed.append(cumulative_pair)
|
|
510
|
+
# parsed looks like [[1, 'a'], [9, 'a']] where numbers are
|
|
511
|
+
# cumulative indices for TSs
|
|
512
|
+
|
|
513
|
+
links = {j:[] for j in set([i[1] for i in parsed])}
|
|
514
|
+
for index, tag in parsed:
|
|
515
|
+
links[tag].append(index)
|
|
516
|
+
# storing couples into a dictionary
|
|
517
|
+
|
|
518
|
+
pairings = sorted(list(links.items()), key=lambda x: x[0])
|
|
519
|
+
# sorting values so that 'a' is the first pairing
|
|
520
|
+
|
|
521
|
+
self.pairings_table = {i[0]:sorted(i[1]) for i in pairings}
|
|
522
|
+
# cumulative, looks like {'a':[3,45]}
|
|
523
|
+
|
|
524
|
+
letters = tuple(self.pairings_table.keys())
|
|
525
|
+
|
|
526
|
+
for letter, ids in self.pairings_table.items():
|
|
527
|
+
|
|
528
|
+
if len(ids) == 1:
|
|
529
|
+
raise SyntaxError(f'Letter \'{letter}\' is only specified once. Please flag the second reactive atom.')
|
|
530
|
+
|
|
531
|
+
if len(ids) > 2:
|
|
532
|
+
raise SyntaxError(f'Letter \'{letter}\' is specified more than two times. Please remove the unwanted letters.')
|
|
533
|
+
|
|
534
|
+
if len(self.mol_lines) == 3:
|
|
535
|
+
# adding third pairing if we have three molecules and user specified two pairings
|
|
536
|
+
# (used to adjust distances for trimolecular TSs)
|
|
537
|
+
if len(unlabeled_list) == 2:
|
|
538
|
+
third_constraint = list(sorted(unlabeled_list))
|
|
539
|
+
self.pairings_table['?'] = third_constraint
|
|
540
|
+
|
|
541
|
+
elif len(self.mol_lines) == 2:
|
|
542
|
+
# adding second pairing if we have two molecules and user specified one pairing
|
|
543
|
+
# (used to adjust distances for bimolecular TSs)
|
|
544
|
+
if len(unlabeled_list) == 2:
|
|
545
|
+
second_constraint = list(sorted(unlabeled_list))
|
|
546
|
+
self.pairings_table['?'] = second_constraint
|
|
547
|
+
|
|
548
|
+
# Now record the internal constraints, that is the intramolecular
|
|
549
|
+
# distances to freeze and later enforce to the imposed spacings
|
|
550
|
+
self.internal_constraints = []
|
|
551
|
+
|
|
552
|
+
# making sure we set the kw_line attribute
|
|
553
|
+
self.kw_line = self.kw_line if hasattr(self, 'kw_line') else ''
|
|
554
|
+
|
|
555
|
+
for letter, pair in self.pairings_table.items():
|
|
556
|
+
for mol_id in self.pairings_dict:
|
|
557
|
+
if isinstance(self.pairings_dict[mol_id].get(letter), tuple):
|
|
558
|
+
|
|
559
|
+
# They are internal constraints only if we have a distance
|
|
560
|
+
# to impose later on. We are checking this way because the
|
|
561
|
+
# set_options function is still to be called at this stage
|
|
562
|
+
if f'{letter}=' in self.kw_line:
|
|
563
|
+
self.internal_constraints.append([pair])
|
|
564
|
+
self.internal_constraints = np.concatenate(self.internal_constraints) if self.internal_constraints else []
|
|
565
|
+
|
|
566
|
+
def _set_custom_orbs(self, orb_string):
|
|
567
|
+
'''
|
|
568
|
+
Update the reactive_atoms classes with the user-specified orbital distances.
|
|
569
|
+
:param orb_string: string that looks like 'a=2.345,b=3.456,c=2.22'
|
|
570
|
+
|
|
571
|
+
'''
|
|
572
|
+
for mol in self.objects:
|
|
573
|
+
if not hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
574
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
575
|
+
|
|
576
|
+
self.pairing_dists = {piece.split('=')[0] : float(piece.split('=')[1]) for piece in orb_string.split(',')}
|
|
577
|
+
|
|
578
|
+
# Set the new orbital center with imposed distance from the reactive atom. The imposed distance is half the
|
|
579
|
+
# user-specified one, as the final atomic distances will be given by two halves of this length.
|
|
580
|
+
for letter, dist in self.pairing_dists.items():
|
|
581
|
+
|
|
582
|
+
if letter not in self.pairings_table:
|
|
583
|
+
raise SyntaxError(f'Letter \'{letter}\' is specified in DIST but not present in molecules string.')
|
|
584
|
+
|
|
585
|
+
for i, mol in enumerate(self.objects):
|
|
586
|
+
for c, _ in enumerate(mol.atomcoords):
|
|
587
|
+
|
|
588
|
+
r_index = self.pairings_dict[i].get(letter)
|
|
589
|
+
if r_index is None:
|
|
590
|
+
continue
|
|
591
|
+
|
|
592
|
+
if isinstance(r_index, int):
|
|
593
|
+
r_atom = mol.reactive_atoms_classes_dict[c][r_index]
|
|
594
|
+
r_atom.init(mol, r_index, update=True, orb_dim=dist/2, conf=c)
|
|
595
|
+
|
|
596
|
+
else:
|
|
597
|
+
for r_i in r_index:
|
|
598
|
+
r_atom = mol.reactive_atoms_classes_dict[c].get(r_i)
|
|
599
|
+
if r_atom:
|
|
600
|
+
r_atom.init(mol, r_i, update=True, orb_dim=dist/2, conf=c)
|
|
601
|
+
|
|
602
|
+
# saves the last orb_string executed so that operators can
|
|
603
|
+
# keep the imposed orbital spacings when replacing molecules
|
|
604
|
+
self.orb_string = orb_string
|
|
605
|
+
# self.log(f'DEBUG ---> Updated orb string -> {orb_string}')
|
|
606
|
+
|
|
607
|
+
def _set_pivots(self, mol):
|
|
608
|
+
'''
|
|
609
|
+
params mol: Hypermolecule class
|
|
610
|
+
(Cyclical embed) Function that sets the mol.pivots attribute, that is a list
|
|
611
|
+
containing each vector connecting two orbitals on different atoms or on the
|
|
612
|
+
same atom (for single-reactive atom molecules in chelotropic embedding)
|
|
613
|
+
'''
|
|
614
|
+
mol.pivots = self._get_pivots(mol)
|
|
615
|
+
|
|
616
|
+
for c, _ in enumerate(mol.atomcoords):
|
|
617
|
+
if self.options.suprafacial:
|
|
618
|
+
if len(mol.pivots[c]) == 4:
|
|
619
|
+
# reactive atoms have two centers each.
|
|
620
|
+
# Applying suprafacial correction, only keeping
|
|
621
|
+
# the shorter two, as they should be the suprafacial ones
|
|
622
|
+
norms = np.linalg.norm([p.pivot for p in mol.pivots[c]], axis=1)
|
|
623
|
+
for sample in norms:
|
|
624
|
+
to_keep = [i for i in norms if sample >= i]
|
|
625
|
+
if len(to_keep) == 2:
|
|
626
|
+
mask = np.array([i in to_keep for i in norms])
|
|
627
|
+
mol.pivots[c] = mol.pivots[c][mask]
|
|
628
|
+
break
|
|
629
|
+
|
|
630
|
+
# if mol is reacting with a sigmastar orbital (two connected reactive Sp3/Single
|
|
631
|
+
# Bond centers) then remove all pivots that are not the shortest. This ensures
|
|
632
|
+
# the "suprafaciality" to the pivots used, preventing the embed of
|
|
633
|
+
# impossible bonding structures
|
|
634
|
+
if hasattr(mol, 'sp3_sigmastar') and mol.sp3_sigmastar:
|
|
635
|
+
pivots_lengths = [norm_of(pivot.pivot) for pivot in mol.pivots[c]]
|
|
636
|
+
shortest_length = min(pivots_lengths)
|
|
637
|
+
mask = np.array([(i - shortest_length) < 1e-5 for i in pivots_lengths])
|
|
638
|
+
mol.pivots[c] = mol.pivots[c][mask]
|
|
639
|
+
|
|
640
|
+
def _get_pivots(self, mol):
|
|
641
|
+
'''
|
|
642
|
+
params mol: Hypermolecule class
|
|
643
|
+
(Cyclical embed) Function that yields the molecule pivots. Called by _set_pivots
|
|
644
|
+
and in pre-conditioning (deforming, bending) the molecules in ase_bend.
|
|
645
|
+
'''
|
|
646
|
+
|
|
647
|
+
if not hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
648
|
+
return []
|
|
649
|
+
|
|
650
|
+
pivots_list = [[] for _ in mol.atomcoords]
|
|
651
|
+
|
|
652
|
+
for c, _ in enumerate(mol.atomcoords):
|
|
653
|
+
|
|
654
|
+
if len(mol.reactive_atoms_classes_dict[c]) == 2:
|
|
655
|
+
# most molecules: dienes and alkenes for Diels-Alder, conjugated ketones for acid-bridged additions
|
|
656
|
+
|
|
657
|
+
indices = cartesian_product(*[range(len(atom.center)) for atom in mol.reactive_atoms_classes_dict[c].values()])
|
|
658
|
+
# indices of vectors in reactive_atom.center. Reactive atoms are 2 and so for one center on atom 0 and
|
|
659
|
+
# 2 centers on atom 2 we get [[0,0], [0,1], [1,0], [1,1]]
|
|
660
|
+
|
|
661
|
+
for i,j in indices:
|
|
662
|
+
a1, a2 = mol.get_r_atoms(c)
|
|
663
|
+
|
|
664
|
+
c1 = a1.center[i]
|
|
665
|
+
c2 = a2.center[j]
|
|
666
|
+
|
|
667
|
+
pivots_list[c].append(Pivot(c1, c2, a1, a2, i, j))
|
|
668
|
+
|
|
669
|
+
elif len(mol.reactive_atoms_classes_dict[c]) == 1:
|
|
670
|
+
# carbenes, oxygen atom in Prilezhaev reaction, SO2 in classic chelotropic reactions
|
|
671
|
+
|
|
672
|
+
indices = cartesian_product(*[range(len(mol.get_r_atoms(c)[0].center)) for _ in range(2)])
|
|
673
|
+
indices = [i for i in indices if i[0] != i[1] and (sorted(i) == i).all()]
|
|
674
|
+
# indices of vectors in reactive_atom.center. Reactive atoms is just one, that builds pivots with itself.
|
|
675
|
+
# pivots with the same index or inverse order are discarded. 2 centers on one atom 2 yield just [[0,1]]
|
|
676
|
+
|
|
677
|
+
for i,j in indices:
|
|
678
|
+
a1 = mol.get_r_atoms(c)[0]
|
|
679
|
+
# chelotropic embeds have pivots that start/end on the same atom
|
|
680
|
+
|
|
681
|
+
c1 = a1.center[i]
|
|
682
|
+
c2 = a1.center[j]
|
|
683
|
+
|
|
684
|
+
pivots_list[c].append(Pivot(c1, c2, a1, a1, i, j))
|
|
685
|
+
|
|
686
|
+
return [np.array(_l) for _l in pivots_list]
|
|
687
|
+
|
|
688
|
+
def _setup(self, p=True):
|
|
689
|
+
'''
|
|
690
|
+
Setting embed type and calculating the number of conformation combinations based on embed type
|
|
691
|
+
'''
|
|
692
|
+
|
|
693
|
+
if any('pka>' in op for op in self.options.operators) or (
|
|
694
|
+
any('scan>' in op for op in self.options.operators)
|
|
695
|
+
):
|
|
696
|
+
self.embed = 'data'
|
|
697
|
+
# If a pka or scan operator is requested, the embed is skipped
|
|
698
|
+
# and data is shown instead
|
|
699
|
+
return
|
|
700
|
+
|
|
701
|
+
if any('refine>' in op for op in self.options.operators) or self.options.noembed:
|
|
702
|
+
self.embed = 'refine'
|
|
703
|
+
|
|
704
|
+
# If the run is a refine>/REFINE one, the self.embed
|
|
705
|
+
# attribute is set in advance by the self._set_options
|
|
706
|
+
# function through the OptionSetter class
|
|
707
|
+
return
|
|
708
|
+
|
|
709
|
+
for mol in self.objects:
|
|
710
|
+
if self.options.max_confs < len(mol.atomcoords) and self.embed is not None:
|
|
711
|
+
self.log(f'--> {mol.filename} - kept {self.options.max_confs}/{len(mol.atomcoords)} conformations for the embed (override with CONFS=n)\n')
|
|
712
|
+
mol.atomcoords = mol.atomcoords[0:self.options.max_confs]
|
|
713
|
+
# remove conformers if there are too many
|
|
714
|
+
|
|
715
|
+
if all([len(mol.reactive_indices) == 0 for mol in self.objects]):
|
|
716
|
+
self.embed = None
|
|
717
|
+
# Flag the embed type as None if no reactive indices are
|
|
718
|
+
# provided (and the run is not a refine> one)
|
|
719
|
+
return
|
|
720
|
+
|
|
721
|
+
if len(self.objects) == 1:
|
|
722
|
+
# embed must be either monomolecular
|
|
723
|
+
|
|
724
|
+
mol = self.objects[0]
|
|
725
|
+
|
|
726
|
+
if len(mol.reactive_indices) == 2:
|
|
727
|
+
|
|
728
|
+
self.embed = 'monomolecular'
|
|
729
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
730
|
+
self._set_pivots(mol)
|
|
731
|
+
|
|
732
|
+
self.options.only_refined = True
|
|
733
|
+
self.options.fix_angles_in_deformation = True
|
|
734
|
+
# These are required: otherwise, extreme bending could scramble molecules
|
|
735
|
+
|
|
736
|
+
else:
|
|
737
|
+
self.embed = 'error'
|
|
738
|
+
# if none of the previous, the program had trouble recognizing the embed to carry.
|
|
739
|
+
|
|
740
|
+
return
|
|
741
|
+
|
|
742
|
+
elif len(self.objects) in (2,3):
|
|
743
|
+
# Setting embed type and calculating the number of conformation combinations based on embed type
|
|
744
|
+
|
|
745
|
+
cyclical = all(len(molecule.reactive_indices) == 2 for molecule in self.objects)
|
|
746
|
+
|
|
747
|
+
# chelotropic embed should check that the two atoms on one molecule are bonded
|
|
748
|
+
chelotropic = sorted(len(molecule.reactive_indices) for molecule in self.objects) == [1,2]
|
|
749
|
+
|
|
750
|
+
string = all(len(molecule.reactive_indices) == 1 for molecule in self.objects) and len(self.objects) == 2
|
|
751
|
+
|
|
752
|
+
multiembed = (len(self.objects) == 2 and
|
|
753
|
+
all(len(molecule.reactive_indices) >= 2 for molecule in self.objects) and
|
|
754
|
+
not cyclical)
|
|
755
|
+
|
|
756
|
+
if cyclical or chelotropic or multiembed:
|
|
757
|
+
|
|
758
|
+
if cyclical:
|
|
759
|
+
self.embed = 'cyclical'
|
|
760
|
+
elif multiembed:
|
|
761
|
+
self.embed = 'multiembed'
|
|
762
|
+
else:
|
|
763
|
+
self.embed = 'chelotropic'
|
|
764
|
+
for mol in self.objects:
|
|
765
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
766
|
+
for c, _ in enumerate(mol.atomcoords):
|
|
767
|
+
for index, atom in mol.reactive_atoms_classes_dict[c].items():
|
|
768
|
+
orb_dim = norm_of(atom.center[0]-atom.coord)
|
|
769
|
+
atom.init(mol, index, update=True, orb_dim=orb_dim + 0.2, conf=c)
|
|
770
|
+
# Slightly enlarging orbitals for chelotropic embeds, or they will
|
|
771
|
+
# be generated a tad too close to each other for how the cyclical embed works
|
|
772
|
+
|
|
773
|
+
self.options.rotation_steps = 5
|
|
774
|
+
|
|
775
|
+
if hasattr(self.options, 'custom_rotation_steps'):
|
|
776
|
+
# if user specified a custom value, use it.
|
|
777
|
+
self.options.rotation_steps = self.options.custom_rotation_steps
|
|
778
|
+
|
|
779
|
+
self.systematic_angles = cartesian_product(*[range(self.options.rotation_steps+1) for _ in self.objects]) \
|
|
780
|
+
* 2*self.options.rotation_range/self.options.rotation_steps - self.options.rotation_range
|
|
781
|
+
|
|
782
|
+
if p:
|
|
783
|
+
# avoid calculating pivots if this is an early call
|
|
784
|
+
for molecule in self.objects:
|
|
785
|
+
self._set_pivots(molecule)
|
|
786
|
+
|
|
787
|
+
elif string:
|
|
788
|
+
|
|
789
|
+
self.embed = 'string'
|
|
790
|
+
self.options.rotation_steps = 36
|
|
791
|
+
|
|
792
|
+
for mol in self.objects:
|
|
793
|
+
if not hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
794
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
795
|
+
|
|
796
|
+
if hasattr(self.options, 'custom_rotation_steps'):
|
|
797
|
+
# if user specified a custom value, use it.
|
|
798
|
+
self.options.rotation_steps = self.options.custom_rotation_steps
|
|
799
|
+
|
|
800
|
+
self.systematic_angles = [n * 360 / self.options.rotation_steps for n in range(self.options.rotation_steps)]
|
|
801
|
+
|
|
802
|
+
else:
|
|
803
|
+
self.embed = 'error'
|
|
804
|
+
|
|
805
|
+
if multiembed:
|
|
806
|
+
# Complex, unspecified embed type - will explore many possibilities concurrently
|
|
807
|
+
self.embed = 'multiembed'
|
|
808
|
+
for mol in self.objects:
|
|
809
|
+
mol.compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
810
|
+
|
|
811
|
+
if self.embed == 'error':
|
|
812
|
+
raise InputError(('Bad input - The only molecular configurations accepted are:\n'
|
|
813
|
+
'1) One molecule with two reactive centers (monomolecular embed)\n'
|
|
814
|
+
'2) One molecule with four indices(dihedral embed)\n'
|
|
815
|
+
'3) Two or three molecules with two reactive centers each (cyclical embed)\n'
|
|
816
|
+
'4) Two molecules with one reactive center each (string embed)\n'
|
|
817
|
+
'5) Two molecules, one with a single reactive center and the other with two (chelotropic embed)\n'
|
|
818
|
+
'6) Two molecules with at least two reactive centers each'))
|
|
819
|
+
|
|
820
|
+
if p:
|
|
821
|
+
# avoid calculating this if this is an early call
|
|
822
|
+
|
|
823
|
+
self._set_reactive_atoms_cumnums()
|
|
824
|
+
# appending to each reactive atom the cumulative
|
|
825
|
+
# number indexing in the TS context
|
|
826
|
+
|
|
827
|
+
else:
|
|
828
|
+
raise InputError('Bad input - could not set up an appropriate embed type (too many structures specified?)')
|
|
829
|
+
|
|
830
|
+
# Only call this part if it is not an early call
|
|
831
|
+
if p:
|
|
832
|
+
if self.options.shrink:
|
|
833
|
+
for molecule in self.objects:
|
|
834
|
+
molecule._scale_orbs(self.options.shrink_multiplier)
|
|
835
|
+
self._set_pivots(molecule)
|
|
836
|
+
self.options.only_refined = True
|
|
837
|
+
# SHRINK - scale orbitals and rebuild pivots
|
|
838
|
+
|
|
839
|
+
# if self.options.rmsd is None:
|
|
840
|
+
# self.options.rmsd = 0.25
|
|
841
|
+
|
|
842
|
+
self.candidates = self._get_number_of_candidates()
|
|
843
|
+
_s = self.candidates or 'Many'
|
|
844
|
+
self.log(f'--> Setup performed correctly. {_s} candidates will be generated.\n')
|
|
845
|
+
|
|
846
|
+
def _get_number_of_candidates(self):
|
|
847
|
+
'''
|
|
848
|
+
Get the number of structures that will be generated in the run.
|
|
849
|
+
'''
|
|
850
|
+
_l = len(self.objects)
|
|
851
|
+
if _l == 1:
|
|
852
|
+
return int(sum([len(self.objects[0].pivots[c])
|
|
853
|
+
for c, _ in enumerate(self.objects[0].atomcoords)]))
|
|
854
|
+
|
|
855
|
+
if self.embed == 'string':
|
|
856
|
+
return int(self.options.rotation_steps*(
|
|
857
|
+
np.prod([sum([len(mol.get_r_atoms(conf)[0].center)
|
|
858
|
+
for conf, _ in enumerate(mol.atomcoords)])
|
|
859
|
+
for mol in self.objects]))
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
if self.embed == 'multiembed':
|
|
863
|
+
return 0
|
|
864
|
+
|
|
865
|
+
candidates = 2*len(self.systematic_angles)*np.prod([len(mol.atomcoords) for mol in self.objects])
|
|
866
|
+
|
|
867
|
+
if _l == 3:
|
|
868
|
+
candidates *= 4
|
|
869
|
+
# Trimolecular there are 8 different triangles originated from three oriented vectors,
|
|
870
|
+
# while only 2 disposition of two vectors (parallel, antiparallel).
|
|
871
|
+
|
|
872
|
+
if self.pairings_table:
|
|
873
|
+
# If there is any pairing to be respected, each one reduces the number of
|
|
874
|
+
# candidates to be computed.
|
|
875
|
+
|
|
876
|
+
if self.embed == 'cyclical':
|
|
877
|
+
if len(self.objects) == 2:
|
|
878
|
+
# Diels-Alder-like, if we have one (two) pairing(s) only half
|
|
879
|
+
# of the total arrangements are to be checked
|
|
880
|
+
candidates /= 2
|
|
881
|
+
|
|
882
|
+
else: # trimolecular
|
|
883
|
+
if len(self.pairings_table) == 1:
|
|
884
|
+
candidates /= 4
|
|
885
|
+
else: # trimolecular, 2 (3) pairings imposed
|
|
886
|
+
candidates /= 8
|
|
887
|
+
|
|
888
|
+
candidates *= np.prod([len(mol.pivots[0]) for mol in self.objects]) # add sum over len(mol.pivots[c])?
|
|
889
|
+
# The more atomic pivots, the more candidates
|
|
890
|
+
|
|
891
|
+
return int(candidates)
|
|
892
|
+
|
|
893
|
+
def _set_embedder_structures_from_mol(self):
|
|
894
|
+
'''
|
|
895
|
+
Intended for REFINE runs, set the self.structures variable
|
|
896
|
+
(and related) to the confomers of a specific molecuele.
|
|
897
|
+
'''
|
|
898
|
+
self.structures = self.objects[0].atomcoords
|
|
899
|
+
self.atomnos = self.objects[0].atomnos
|
|
900
|
+
self.constrained_indices = _get_monomolecular_reactive_indices(self)
|
|
901
|
+
self.ids = None
|
|
902
|
+
self.energies = np.array([0 for _ in self.structures])
|
|
903
|
+
self.exit_status = np.ones(self.structures.shape[0], dtype=bool)
|
|
904
|
+
self.embed_graph = get_sum_graph([graphize(self.structures[0], self.atomnos)], self.constrained_indices[0])
|
|
905
|
+
|
|
906
|
+
def _calculator_setup(self):
|
|
907
|
+
'''
|
|
908
|
+
Set up the calculator to be used with default theory levels.
|
|
909
|
+
'''
|
|
910
|
+
# Checking that calculator is specified correctly
|
|
911
|
+
if self.options.calculator not in ('MOPAC', 'ORCA', 'GAUSSIAN','XTB', 'AIMNET2'):
|
|
912
|
+
raise SyntaxError(f'\'{self.options.calculator}\' is not a valid calculator. Change its value from the parameters.py file or with the CALC keyword.')
|
|
913
|
+
|
|
914
|
+
# Setting default theory level if user did not specify it
|
|
915
|
+
if self.options.theory_level is None:
|
|
916
|
+
self.options.theory_level = DEFAULT_LEVELS[self.options.calculator]
|
|
917
|
+
|
|
918
|
+
self.dispatcher = Opt_func_dispatcher()
|
|
919
|
+
|
|
920
|
+
if self.options.calculator == 'AIMNET2':
|
|
921
|
+
self.dispatcher.load_aimnet2_calc(self.options.theory_level, logfunction=self.log)
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def _apply_operators(self):
|
|
925
|
+
'''
|
|
926
|
+
Replace molecules in self.objects with
|
|
927
|
+
their post-operator ones.
|
|
928
|
+
'''
|
|
929
|
+
|
|
930
|
+
# early call to get the self.embed attribute
|
|
931
|
+
self._setup(p=False)
|
|
932
|
+
|
|
933
|
+
# for input_string in self.options.operators:
|
|
934
|
+
for index, operators in self.options.operators_dict.items():
|
|
935
|
+
|
|
936
|
+
for operator in operators:
|
|
937
|
+
|
|
938
|
+
input_string = f'{operator}> {self.objects[index].filename}'
|
|
939
|
+
outname = operate(input_string, self)
|
|
940
|
+
# operator = input_string.split('>')[0]
|
|
941
|
+
|
|
942
|
+
if operator == 'refine':
|
|
943
|
+
self._set_embedder_structures_from_mol()
|
|
944
|
+
|
|
945
|
+
# these operators do not need molecule substitution
|
|
946
|
+
elif operator not in ('pka', 'scan'):
|
|
947
|
+
|
|
948
|
+
# names = [mol.filename for mol in self.objects]
|
|
949
|
+
# filename = self._extract_filename(input_string)
|
|
950
|
+
# index = names.index(filename)
|
|
951
|
+
reactive_indices = self.objects[index].reactive_indices
|
|
952
|
+
|
|
953
|
+
# replacing the old molecule with the one post-operators
|
|
954
|
+
self.objects[index] = Hypermolecule(outname, reactive_indices)
|
|
955
|
+
|
|
956
|
+
# calculating where the new orbitals are
|
|
957
|
+
self.objects[index].compute_orbitals(override='Single' if self.options.simpleorbitals else None)
|
|
958
|
+
|
|
959
|
+
# updating orbital size if not default
|
|
960
|
+
if hasattr(self, 'orb_string'):
|
|
961
|
+
self._set_custom_orbs(self.orb_string)
|
|
962
|
+
|
|
963
|
+
# updating global docker if necessary
|
|
964
|
+
if operator in ('rsearch', 'csearch') and self.options.noembed and len(self.objects) == 1:
|
|
965
|
+
self.structures = self.objects[0].atomcoords
|
|
966
|
+
self.atomnos = self.objects[0].atomnos
|
|
967
|
+
self.constrained_indices = _get_monomolecular_reactive_indices(self)
|
|
968
|
+
self.ids = None
|
|
969
|
+
self.energies = np.array([0 for _ in self.structures])
|
|
970
|
+
self.exit_status = np.ones(self.structures.shape[0], dtype=bool)
|
|
971
|
+
self.embed_graph = get_sum_graph([graphize(self.structures[0], self.atomnos)], self.constrained_indices[0])
|
|
972
|
+
|
|
973
|
+
# updating the orbital cumnums for
|
|
974
|
+
# all the molecules in the run
|
|
975
|
+
self._set_reactive_atoms_cumnums()
|
|
976
|
+
|
|
977
|
+
# resetting the attribute
|
|
978
|
+
self.embed = None
|
|
979
|
+
|
|
980
|
+
def _extract_filename(self, input_string):
|
|
981
|
+
'''
|
|
982
|
+
Input: 'refine> firecode_unoptimized_comp_check.xyz 5a 36a 0b 43b 33c 60c'
|
|
983
|
+
Output: 'firecode_unoptimized_comp_check.xyz'
|
|
984
|
+
'''
|
|
985
|
+
input_string = input_string.split('>')[-1].lstrip()
|
|
986
|
+
# remove operator and whitespaces after it
|
|
987
|
+
|
|
988
|
+
input_string = input_string.split()[0]
|
|
989
|
+
# remove pairing numbers/letters and newline chars
|
|
990
|
+
|
|
991
|
+
return input_string
|
|
992
|
+
|
|
993
|
+
def _inspect_structures(self):
|
|
994
|
+
'''
|
|
995
|
+
'''
|
|
996
|
+
|
|
997
|
+
self.log('--> Structures check requested. Shutting down after last window is closed.\n')
|
|
998
|
+
|
|
999
|
+
for mol in self.objects:
|
|
1000
|
+
ase_view(mol)
|
|
1001
|
+
|
|
1002
|
+
self.close_log_streams()
|
|
1003
|
+
os.remove(f'firecode_{self.stamp}.log')
|
|
1004
|
+
|
|
1005
|
+
sys.exit()
|
|
1006
|
+
|
|
1007
|
+
def scramble(self, array, sequence):
|
|
1008
|
+
return np.array([array[s] for s in sequence])
|
|
1009
|
+
|
|
1010
|
+
def get_pairing_dist_from_letter(self, letter):
|
|
1011
|
+
'''
|
|
1012
|
+
Get constrained distance between paired reactive
|
|
1013
|
+
atoms, accessed via the associated constraint letter.
|
|
1014
|
+
The distance returned is the final one (not affected by SHRINK)
|
|
1015
|
+
'''
|
|
1016
|
+
|
|
1017
|
+
if hasattr(self, 'pairing_dists') and self.pairing_dists.get(letter) is not None:
|
|
1018
|
+
return self.pairing_dists[letter]
|
|
1019
|
+
|
|
1020
|
+
d = 0
|
|
1021
|
+
try:
|
|
1022
|
+
for mol_index, mol_pairing_dict in self.pairings_dict.items():
|
|
1023
|
+
if r_atom_index := mol_pairing_dict.get(letter):
|
|
1024
|
+
|
|
1025
|
+
# for refine embeds, one letter corresponds to two indices
|
|
1026
|
+
# on the same molecule
|
|
1027
|
+
if isinstance(r_atom_index, tuple):
|
|
1028
|
+
i1, i2 = r_atom_index
|
|
1029
|
+
return (self.objects[mol_index].get_orbital_length(i1) +
|
|
1030
|
+
self.objects[mol_index].get_orbital_length(i2))
|
|
1031
|
+
|
|
1032
|
+
# for other runs, it is just one atom per molecule per letter
|
|
1033
|
+
d += self.objects[mol_index].get_orbital_length(r_atom_index)
|
|
1034
|
+
|
|
1035
|
+
if self.options.shrink:
|
|
1036
|
+
d /= self.options.shrink_multiplier
|
|
1037
|
+
|
|
1038
|
+
return d
|
|
1039
|
+
|
|
1040
|
+
# If no orbitals were built, return None
|
|
1041
|
+
except NoOrbitalError:
|
|
1042
|
+
return None
|
|
1043
|
+
|
|
1044
|
+
def get_pairing_dists_from_constrained_indices(self, constrained_pair):
|
|
1045
|
+
'''
|
|
1046
|
+
Returns the constrained distance
|
|
1047
|
+
for a specific constrained pair of indices
|
|
1048
|
+
'''
|
|
1049
|
+
try:
|
|
1050
|
+
letter = next(lett for lett, pair in self.pairings_table.items() if (pair[0] == constrained_pair[0] and
|
|
1051
|
+
pair[1] == constrained_pair[1]))
|
|
1052
|
+
return self.get_pairing_dist_from_letter(letter)
|
|
1053
|
+
|
|
1054
|
+
except StopIteration:
|
|
1055
|
+
return None
|
|
1056
|
+
|
|
1057
|
+
def get_pairing_dists(self, conf):
|
|
1058
|
+
'''
|
|
1059
|
+
Returns a list with the constrained distances for each embedder constraint
|
|
1060
|
+
'''
|
|
1061
|
+
if self.constrained_indices[conf].size == 0:
|
|
1062
|
+
return None
|
|
1063
|
+
|
|
1064
|
+
constraints = np.concatenate([self.constrained_indices[conf], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[conf]
|
|
1065
|
+
return [self.get_pairing_dists_from_constrained_indices(pair) for pair in constraints]
|
|
1066
|
+
|
|
1067
|
+
def write_structures(
|
|
1068
|
+
self,
|
|
1069
|
+
tag,
|
|
1070
|
+
indices=None,
|
|
1071
|
+
energies=True,
|
|
1072
|
+
relative=True,
|
|
1073
|
+
extra='',
|
|
1074
|
+
align='indices',
|
|
1075
|
+
p=True,
|
|
1076
|
+
):
|
|
1077
|
+
'''
|
|
1078
|
+
Writes structures to file.
|
|
1079
|
+
|
|
1080
|
+
'''
|
|
1081
|
+
|
|
1082
|
+
align_functions = {
|
|
1083
|
+
'indices' : align_structures,
|
|
1084
|
+
'moi' : align_by_moi,
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
if energies:
|
|
1088
|
+
rel_e = self.energies
|
|
1089
|
+
|
|
1090
|
+
if relative:
|
|
1091
|
+
rel_e -= np.min(self.energies)
|
|
1092
|
+
|
|
1093
|
+
# truncate if there are too many (embed debug first dump)
|
|
1094
|
+
if len(self.structures) > 10000 and not self.options.let:
|
|
1095
|
+
self.log(f'Truncated {tag} output structures to 10000 (from {len(self.structures)} - keyword LET to override).')
|
|
1096
|
+
output_structures = self.structures[0:10000]
|
|
1097
|
+
else:
|
|
1098
|
+
output_structures = self.structures
|
|
1099
|
+
|
|
1100
|
+
self.outname = f'firecode_{tag}_{self.stamp}.xyz'
|
|
1101
|
+
with open(self.outname, 'w') as f:
|
|
1102
|
+
|
|
1103
|
+
for i, structure in enumerate(align_functions[align](output_structures, atomnos=self.atomnos, indices=indices)):
|
|
1104
|
+
title = f'Strucure {i+1} - {tag}'
|
|
1105
|
+
|
|
1106
|
+
if energies:
|
|
1107
|
+
title += f' - Rel. E. = {round(rel_e[i], 3)} kcal/mol '
|
|
1108
|
+
|
|
1109
|
+
title += extra
|
|
1110
|
+
|
|
1111
|
+
write_xyz(structure, self.atomnos, f, title=title)
|
|
1112
|
+
|
|
1113
|
+
if p:
|
|
1114
|
+
self.log(f'Wrote {len(output_structures)} {tag} structures to {self.outname} file.\n')
|
|
1115
|
+
|
|
1116
|
+
def write_quote(self):
|
|
1117
|
+
'''
|
|
1118
|
+
Reads the quote file and writes one in the logfile
|
|
1119
|
+
'''
|
|
1120
|
+
from firecode.quotes import load_quotes
|
|
1121
|
+
quote, author = random.choice(load_quotes()).values()
|
|
1122
|
+
|
|
1123
|
+
self.log('\n' + auto_newline(quote))
|
|
1124
|
+
|
|
1125
|
+
if author != "":
|
|
1126
|
+
self.log(f' - {author}\n')
|
|
1127
|
+
|
|
1128
|
+
def run(self):
|
|
1129
|
+
'''
|
|
1130
|
+
Run the embedding.
|
|
1131
|
+
'''
|
|
1132
|
+
try:
|
|
1133
|
+
RunEmbedding(self).run()
|
|
1134
|
+
|
|
1135
|
+
except Exception as _e:
|
|
1136
|
+
logging.exception(_e)
|
|
1137
|
+
raise _e
|
|
1138
|
+
|
|
1139
|
+
def normal_termination(self):
|
|
1140
|
+
'''
|
|
1141
|
+
Terminate the run, printing the total time and the
|
|
1142
|
+
relative energies of the first 10 structures, if possible.
|
|
1143
|
+
|
|
1144
|
+
'''
|
|
1145
|
+
clean_directory()
|
|
1146
|
+
self.log(f'\n--> FIRECODE normal termination: total time {time_to_string(time.perf_counter() - self.t_start_run, verbose=True)}.')
|
|
1147
|
+
|
|
1148
|
+
if hasattr(self, "structures"):
|
|
1149
|
+
show = 10
|
|
1150
|
+
if len(self.structures) > 0 and hasattr(self, "energies"):
|
|
1151
|
+
self.energies = self.energies if len(self.energies) <= show else self.energies[0:show]
|
|
1152
|
+
|
|
1153
|
+
# Don't write structure info if there is only one, or all are zero
|
|
1154
|
+
if np.max(self.energies - np.min(self.energies)) > 0:
|
|
1155
|
+
|
|
1156
|
+
self.log(f'\n--> Energies of output structures (first {show}, {self.options.theory_level}/{self.options.calculator}{f"/{self.options.solvent}" if self.options.solvent is not None else ""})\n')
|
|
1157
|
+
|
|
1158
|
+
self.log(f'> # (total {len(self.structures)}) Rel. E. RMSD')
|
|
1159
|
+
self.log('-------------------------------------------')
|
|
1160
|
+
for i, energy in enumerate(self.energies-self.energies[0]):
|
|
1161
|
+
|
|
1162
|
+
rmsd_value = '(ref)' if i == 0 else str(round(rmsd_and_max_numba(self.structures[i], self.structures[0], center=True)[0], 2))+' Å'
|
|
1163
|
+
|
|
1164
|
+
self.log(f'> Candidate {str(i+1):2} : {energy:.2f} kcal/mol : {rmsd_value}')
|
|
1165
|
+
|
|
1166
|
+
if len(self.structures) > show:
|
|
1167
|
+
self.log(f'> ... ({len(self.structures)-show} more)')
|
|
1168
|
+
|
|
1169
|
+
self.write_quote()
|
|
1170
|
+
self.close_log_streams()
|
|
1171
|
+
sys.exit()
|
|
1172
|
+
|
|
1173
|
+
def close_log_streams(self):
|
|
1174
|
+
self.logfile.close()
|
|
1175
|
+
|
|
1176
|
+
if hasattr(self, "debug_logfile"):
|
|
1177
|
+
self.debug_logfile.close()
|
|
1178
|
+
|
|
1179
|
+
class RunEmbedding(Embedder):
|
|
1180
|
+
'''
|
|
1181
|
+
Class for running embeds, containing all
|
|
1182
|
+
methods to embed and refine structures
|
|
1183
|
+
'''
|
|
1184
|
+
|
|
1185
|
+
def __init__(self, embedder):
|
|
1186
|
+
'''
|
|
1187
|
+
Copying all non-callable attributes
|
|
1188
|
+
of the previous embedder.
|
|
1189
|
+
'''
|
|
1190
|
+
# Copy all the non-callables (variables) into the child class
|
|
1191
|
+
for attr in dir(embedder):
|
|
1192
|
+
if attr[0:2] != '__' and attr != 'run':
|
|
1193
|
+
attr_value = getattr(embedder, attr)
|
|
1194
|
+
if not hasattr(attr_value, '__call__'):
|
|
1195
|
+
setattr(self, attr, attr_value)
|
|
1196
|
+
|
|
1197
|
+
def rel_energies(self):
|
|
1198
|
+
return self.energies - np.min(self.energies)
|
|
1199
|
+
|
|
1200
|
+
def apply_mask(self, attributes, mask):
|
|
1201
|
+
'''
|
|
1202
|
+
Applies in-place masking of Embedder attributes
|
|
1203
|
+
'''
|
|
1204
|
+
for attr in attributes:
|
|
1205
|
+
if hasattr(self, attr):
|
|
1206
|
+
new_attr = getattr(self, attr)[mask]
|
|
1207
|
+
setattr(self, attr, new_attr)
|
|
1208
|
+
|
|
1209
|
+
def zero_candidates_check(self):
|
|
1210
|
+
'''
|
|
1211
|
+
Asserts that not all structures are being rejected.
|
|
1212
|
+
'''
|
|
1213
|
+
if len(self.structures) == 0:
|
|
1214
|
+
self.log_warnings()
|
|
1215
|
+
raise ZeroCandidatesError()
|
|
1216
|
+
|
|
1217
|
+
def generate_candidates(self):
|
|
1218
|
+
'''
|
|
1219
|
+
Generate a series of candidate structures by the proper embed algorithm.
|
|
1220
|
+
'''
|
|
1221
|
+
|
|
1222
|
+
embed_functions = {
|
|
1223
|
+
'chelotropic' : cyclical_embed,
|
|
1224
|
+
'cyclical' : cyclical_embed,
|
|
1225
|
+
'monomolecular' : monomolecular_embed,
|
|
1226
|
+
'string' : string_embed,
|
|
1227
|
+
'multiembed' : multiembed_dispatcher,
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
if self.embed == 'refine':
|
|
1231
|
+
self.log('\n')
|
|
1232
|
+
return
|
|
1233
|
+
|
|
1234
|
+
# Embed structures and assign them to self.structures
|
|
1235
|
+
self.structures = embed_functions[self.embed](self)
|
|
1236
|
+
|
|
1237
|
+
# cumulative list of atomic numbers associated with coordinates
|
|
1238
|
+
self.atomnos = np.concatenate([molecule.atomnos for molecule in self.objects])
|
|
1239
|
+
|
|
1240
|
+
# Build the embed graph. This will be used as a future reference.
|
|
1241
|
+
# Note that the use of the first constrained_indices pair is irrelevant
|
|
1242
|
+
# for the torsion fingerprint outcome, but other future features might
|
|
1243
|
+
# rely on the embed_graph to be accurate if conformers have different
|
|
1244
|
+
# constrained indices.
|
|
1245
|
+
|
|
1246
|
+
additional_bonds = self.constrained_indices[0]
|
|
1247
|
+
if len(self.internal_constraints) > 0:
|
|
1248
|
+
additional_bonds = np.concatenate((self.internal_constraints, additional_bonds))
|
|
1249
|
+
|
|
1250
|
+
self.embed_graph = get_sum_graph(self.graphs, additional_bonds)
|
|
1251
|
+
|
|
1252
|
+
self.log(f'Generated {len(self.structures)} transition state candidates ({time_to_string(time.perf_counter()-self.t_start_run)})\n')
|
|
1253
|
+
|
|
1254
|
+
# if self.options.debug:
|
|
1255
|
+
self.write_structures('embedded', energies=False)
|
|
1256
|
+
|
|
1257
|
+
if self.options.debug:
|
|
1258
|
+
self.dump_status('generate_candidates')
|
|
1259
|
+
self.debuglog('DEBUG: Dumped emebedder status after generating candidates (\"generate_candidates\")')
|
|
1260
|
+
|
|
1261
|
+
def dump_status(self, outname, only_fixed_constraints=False):
|
|
1262
|
+
'''
|
|
1263
|
+
Writes structures and energies to [outname].xyz
|
|
1264
|
+
and [outname].dat to help debug the current run.
|
|
1265
|
+
|
|
1266
|
+
'''
|
|
1267
|
+
|
|
1268
|
+
if hasattr(self, 'energies'):
|
|
1269
|
+
with open(f'{outname}_energies.dat', 'w') as _f:
|
|
1270
|
+
for i, energy in enumerate(self.energies):
|
|
1271
|
+
print_energy = str(round(energy-np.min(self.energies), 2))+' kcal/mol' if energy != 1E10 else 'SCRAMBLED'
|
|
1272
|
+
_f.write('Candidate {:5} : {}\n'.format(i, print_energy))
|
|
1273
|
+
|
|
1274
|
+
with open(f'{outname}_structures.xyz', 'w') as _f:
|
|
1275
|
+
exit_status = self.exit_status if hasattr(self, 'exit_status') else [0 for _ in self.structures]
|
|
1276
|
+
energies = self.rel_energies() if hasattr(self, 'energies') else [0 for _ in self.structures]
|
|
1277
|
+
for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
|
|
1278
|
+
exit_status,
|
|
1279
|
+
energies)):
|
|
1280
|
+
|
|
1281
|
+
kind = 'REFINED - ' if status else 'NOT REFINED - '
|
|
1282
|
+
write_xyz(structure, self.atomnos, _f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
|
|
1283
|
+
|
|
1284
|
+
with open(f'{outname}_constraints.dat', 'w') as _f:
|
|
1285
|
+
for i, constraints in enumerate(self.constrained_indices):
|
|
1286
|
+
|
|
1287
|
+
if only_fixed_constraints:
|
|
1288
|
+
constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
|
|
1289
|
+
|
|
1290
|
+
else:
|
|
1291
|
+
constraints = np.concatenate([constraints, self.internal_constraints]) if len(self.internal_constraints) > 0 else constraints
|
|
1292
|
+
|
|
1293
|
+
c_str = repr(constraints).replace('\n','').replace(', ',', ')
|
|
1294
|
+
d_str = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
|
|
1295
|
+
_f.write('Candidate {:5} : {} -> {}\n'.format(i, c_str, d_str))
|
|
1296
|
+
|
|
1297
|
+
with open(f'{outname}_runembedding.pickle', 'wb') as _f:
|
|
1298
|
+
d = {
|
|
1299
|
+
'structures' : self.structures,
|
|
1300
|
+
'constrained_indices' : self.constrained_indices,
|
|
1301
|
+
'graphs' : self.graphs,
|
|
1302
|
+
'objects' : self.objects,
|
|
1303
|
+
'options' : self.options,
|
|
1304
|
+
'atomnos' : self.atomnos,
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
if hasattr(self, 'energies'):
|
|
1308
|
+
d['energies'] = self.energies
|
|
1309
|
+
|
|
1310
|
+
pickle.dump(d, _f)
|
|
1311
|
+
|
|
1312
|
+
def compenetration_refining(self):
|
|
1313
|
+
'''
|
|
1314
|
+
Performing a sanity check for excessive compenetration
|
|
1315
|
+
on generated structures, discarding the ones that look too bad.
|
|
1316
|
+
'''
|
|
1317
|
+
|
|
1318
|
+
if self.embed not in ('string', 'cyclical', 'monomolecular'):
|
|
1319
|
+
# these do not need compenetration refining: the
|
|
1320
|
+
# algorithm checks for compenetrations when embedding
|
|
1321
|
+
|
|
1322
|
+
self.log('--> Checking structures for compenetrations')
|
|
1323
|
+
|
|
1324
|
+
t_start = time.perf_counter()
|
|
1325
|
+
mask = np.zeros(len(self.structures), dtype=bool)
|
|
1326
|
+
# num = len(self.structures)
|
|
1327
|
+
for s, structure in enumerate(self.structures):
|
|
1328
|
+
# if num > 100 and num % 100 != 0 and s % (num % 100) == 99:
|
|
1329
|
+
# loadbar(s, num, prefix=f'Checking structure {s+1}/{num} ')
|
|
1330
|
+
mask[s] = compenetration_check(structure, self.ids, max_clashes=self.options.max_clashes, thresh=self.options.clash_thresh)
|
|
1331
|
+
|
|
1332
|
+
# loadbar(1, 1, prefix=f'Checking structure {len(self.structures)}/{len(self.structures)} ')
|
|
1333
|
+
|
|
1334
|
+
self.apply_mask(('structures', 'constrained_indices'), mask)
|
|
1335
|
+
t_end = time.perf_counter()
|
|
1336
|
+
|
|
1337
|
+
if False in mask:
|
|
1338
|
+
self.log(f'Discarded {len([b for b in mask if not b])} candidates for compenetration ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})')
|
|
1339
|
+
else:
|
|
1340
|
+
self.log(f'All {len(mask)} structures passed the compenetration check')
|
|
1341
|
+
self.log()
|
|
1342
|
+
|
|
1343
|
+
self.zero_candidates_check()
|
|
1344
|
+
|
|
1345
|
+
# initialize embedder values for the active structures
|
|
1346
|
+
# that survived the compenetration check
|
|
1347
|
+
self.energies = np.full(len(self.structures), 1E10)
|
|
1348
|
+
self.exit_status = np.zeros(len(self.structures), dtype=bool)
|
|
1349
|
+
|
|
1350
|
+
def fitness_refining(self, threshold=5, verbose=False):
|
|
1351
|
+
'''
|
|
1352
|
+
Performing a distance check on generated structures,
|
|
1353
|
+
discarding the ones that do not respect the imposed pairings.
|
|
1354
|
+
Internal constraints are ignored.
|
|
1355
|
+
|
|
1356
|
+
threshold : rejection happens when the sum of the deviations from the
|
|
1357
|
+
intended spacings is greater than threshold.
|
|
1358
|
+
|
|
1359
|
+
'''
|
|
1360
|
+
if verbose:
|
|
1361
|
+
self.log(' \n--> Fitness pruning - removing inaccurate structures')
|
|
1362
|
+
|
|
1363
|
+
mask = np.ones(len(self.structures), dtype=bool)
|
|
1364
|
+
|
|
1365
|
+
for s, (structure, constraints) in enumerate(zip(self.structures, self.constrained_indices)):
|
|
1366
|
+
|
|
1367
|
+
constrained_distances = tuple(self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints)
|
|
1368
|
+
|
|
1369
|
+
mask[s] = fitness_check(structure,
|
|
1370
|
+
constraints,
|
|
1371
|
+
constrained_distances,
|
|
1372
|
+
threshold=threshold)
|
|
1373
|
+
|
|
1374
|
+
attr = (
|
|
1375
|
+
'structures',
|
|
1376
|
+
'energies',
|
|
1377
|
+
'constrained_indices',
|
|
1378
|
+
'exit_status',
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
self.apply_mask(attr, mask)
|
|
1382
|
+
|
|
1383
|
+
if False in mask:
|
|
1384
|
+
self.log(f'Discarded {len([b for b in mask if not b])} candidates for unfitness ({len([b for b in mask if b])} left)')
|
|
1385
|
+
else:
|
|
1386
|
+
if verbose:
|
|
1387
|
+
self.log('All candidates meet the imposed criteria.')
|
|
1388
|
+
self.log()
|
|
1389
|
+
|
|
1390
|
+
self.zero_candidates_check()
|
|
1391
|
+
|
|
1392
|
+
def similarity_refining(self, tfd=True, moi=True, rmsd=True, verbose=False):
|
|
1393
|
+
'''
|
|
1394
|
+
If possible, removes structures with similar torsional profile (TFD-based).
|
|
1395
|
+
Removes structures that are too similar to each other (RMSD-based).
|
|
1396
|
+
'''
|
|
1397
|
+
|
|
1398
|
+
if verbose:
|
|
1399
|
+
self.log('--> Similarity Processing')
|
|
1400
|
+
|
|
1401
|
+
before = len(self.structures)
|
|
1402
|
+
attr = ('constrained_indices', 'energies', 'exit_status')
|
|
1403
|
+
|
|
1404
|
+
if (
|
|
1405
|
+
tfd and
|
|
1406
|
+
len(self.objects) > 1 and
|
|
1407
|
+
hasattr(self, 'embed_graph') and
|
|
1408
|
+
self.embed_graph.is_single_molecule
|
|
1409
|
+
):
|
|
1410
|
+
|
|
1411
|
+
t_start = time.perf_counter()
|
|
1412
|
+
|
|
1413
|
+
quadruplets = _get_quadruplets(self.embed_graph)
|
|
1414
|
+
if len(quadruplets) > 0:
|
|
1415
|
+
self.structures, mask = prune_conformers_tfd(self.structures, quadruplets, verbose=verbose)
|
|
1416
|
+
|
|
1417
|
+
self.apply_mask(attr, mask)
|
|
1418
|
+
|
|
1419
|
+
if False in mask:
|
|
1420
|
+
self.log(f'Discarded {len([b for b in mask if not b])} structures for TFD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
|
|
1421
|
+
|
|
1422
|
+
if moi:
|
|
1423
|
+
|
|
1424
|
+
if len(self.structures) <= 1E5:
|
|
1425
|
+
|
|
1426
|
+
### Now again, based on the moment of inertia
|
|
1427
|
+
|
|
1428
|
+
before3 = len(self.structures)
|
|
1429
|
+
|
|
1430
|
+
t_start = time.perf_counter()
|
|
1431
|
+
self.structures, mask = prune_by_moment_of_inertia(self.structures, self.atomnos, debugfunction=self.debuglog)
|
|
1432
|
+
|
|
1433
|
+
self.apply_mask(attr, mask)
|
|
1434
|
+
|
|
1435
|
+
if before3 > len(self.structures):
|
|
1436
|
+
self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for MOI similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
|
|
1437
|
+
|
|
1438
|
+
else:
|
|
1439
|
+
self.log('Skipped MOI pruning (>100k) structures')
|
|
1440
|
+
|
|
1441
|
+
if rmsd and len(self.structures) <= 1E5:
|
|
1442
|
+
|
|
1443
|
+
before1 = len(self.structures)
|
|
1444
|
+
|
|
1445
|
+
t_start = time.perf_counter()
|
|
1446
|
+
|
|
1447
|
+
self.structures, mask = prune_by_rmsd(self.structures, self.atomnos, self.options.rmsd, debugfunction=self.debuglog)
|
|
1448
|
+
|
|
1449
|
+
self.apply_mask(attr, mask)
|
|
1450
|
+
|
|
1451
|
+
if before1 > len(self.structures):
|
|
1452
|
+
self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for RMSD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
|
|
1453
|
+
|
|
1454
|
+
### Second step: again but symmetry-corrected (unless we have too many structures)
|
|
1455
|
+
|
|
1456
|
+
if len(self.structures) <= 1E4 and hasattr(self, 'embed_graph'):
|
|
1457
|
+
|
|
1458
|
+
before2 = len(self.structures)
|
|
1459
|
+
|
|
1460
|
+
t_start = time.perf_counter()
|
|
1461
|
+
self.structures, mask = prune_by_rmsd_rot_corr(
|
|
1462
|
+
self.structures,
|
|
1463
|
+
self.atomnos,
|
|
1464
|
+
self.embed_graph,
|
|
1465
|
+
max_rmsd=self.options.rmsd,
|
|
1466
|
+
logfunction=(self.log if verbose else None),
|
|
1467
|
+
debugfunction=self.debuglog,
|
|
1468
|
+
)
|
|
1469
|
+
|
|
1470
|
+
self.apply_mask(attr, mask)
|
|
1471
|
+
|
|
1472
|
+
if before2 > len(self.structures):
|
|
1473
|
+
self.log(f'Discarded {int(len([b for b in mask if not b]))} candidates for symmetry-corrected RMSD similarity ({len([b for b in mask if b])} left, {time_to_string(time.perf_counter()-t_start)})')
|
|
1474
|
+
|
|
1475
|
+
elif hasattr(self, 'embed_graph'):
|
|
1476
|
+
self.log('Skipped rotationally-corrected RMSD pruning (>10k) structures')
|
|
1477
|
+
|
|
1478
|
+
else:
|
|
1479
|
+
self.log('Skipped RMSD pruning (>100k) structures')
|
|
1480
|
+
|
|
1481
|
+
if verbose and len(self.structures) == before:
|
|
1482
|
+
self.log(f'All structures passed the similarity check.{" "*15}')
|
|
1483
|
+
|
|
1484
|
+
self.log()
|
|
1485
|
+
|
|
1486
|
+
def force_field_refining(self, conv_thr="tight", only_fixed_constraints=False, prevent_scrambling=False):
|
|
1487
|
+
'''
|
|
1488
|
+
Performs structural optimizations with the embedder force field caculator.
|
|
1489
|
+
Only structures that do not scramble during FF optimization are updated,
|
|
1490
|
+
while the rest are kept as they are.
|
|
1491
|
+
conv_thr: convergence threshold, passed to calculator
|
|
1492
|
+
only_fixed_constraints: only uses fixed (UPPERCASE) constraints in optimization
|
|
1493
|
+
prevent_scrambling: preserves molecular identities constraining bonds present in graphs (XTB only)
|
|
1494
|
+
'''
|
|
1495
|
+
|
|
1496
|
+
################################################# CHECKPOINT BEFORE FF OPTIMIZATION
|
|
1497
|
+
|
|
1498
|
+
if not only_fixed_constraints:
|
|
1499
|
+
self.outname = f'firecode_checkpoint_{self.stamp}.xyz'
|
|
1500
|
+
with open(self.outname, 'w') as f:
|
|
1501
|
+
for i, structure in enumerate(align_structures(self.structures)):
|
|
1502
|
+
write_xyz(structure, self.atomnos, f, title=f'TS candidate {i+1} - Checkpoint before FF optimization')
|
|
1503
|
+
self.log(f'\n--> Checkpoint output - Wrote {len(self.structures)} unoptimized structures to {self.outname} file before FF optimization.\n')
|
|
1504
|
+
|
|
1505
|
+
################################################# GEOMETRY OPTIMIZATION - FORCE FIELD
|
|
1506
|
+
|
|
1507
|
+
if only_fixed_constraints:
|
|
1508
|
+
task = 'Structure optimization (tight) / relaxing interactions'
|
|
1509
|
+
else:
|
|
1510
|
+
task = f'Structure {"pre-" if prevent_scrambling else ""}optimization (loose)'
|
|
1511
|
+
|
|
1512
|
+
self.log(f'--> {task} ({self.options.ff_level}{f"/{self.options.solvent}" if self.options.solvent is not None else ""} level via {self.options.ff_calc}, {self.avail_cpus} thread{"s" if self.avail_cpus>1 else ""})')
|
|
1513
|
+
|
|
1514
|
+
t_start_ff_opt = time.perf_counter()
|
|
1515
|
+
|
|
1516
|
+
processes = []
|
|
1517
|
+
cum_time = 0
|
|
1518
|
+
|
|
1519
|
+
opt_function = xtb_pre_opt if prevent_scrambling else xtb_opt
|
|
1520
|
+
|
|
1521
|
+
# Running as many threads as we have procs
|
|
1522
|
+
# since FF does not parallelize well with more cores
|
|
1523
|
+
with ProcessPoolExecutor(max_workers=self.avail_cpus) as executor:
|
|
1524
|
+
|
|
1525
|
+
for i, structure in enumerate(deepcopy(self.structures)):
|
|
1526
|
+
|
|
1527
|
+
if only_fixed_constraints:
|
|
1528
|
+
constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
|
|
1529
|
+
|
|
1530
|
+
else:
|
|
1531
|
+
constraints = np.concatenate([self.constrained_indices[i], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[i]
|
|
1532
|
+
|
|
1533
|
+
pairing_dists = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
|
|
1534
|
+
|
|
1535
|
+
process = executor.submit(
|
|
1536
|
+
timing_wrapper,
|
|
1537
|
+
opt_function,
|
|
1538
|
+
structure,
|
|
1539
|
+
self.atomnos,
|
|
1540
|
+
graphs=self.graphs,
|
|
1541
|
+
calculator=self.options.ff_calc,
|
|
1542
|
+
method=self.options.ff_level,
|
|
1543
|
+
solvent=self.options.solvent,
|
|
1544
|
+
charge=self.options.charge,
|
|
1545
|
+
maxiter=None,
|
|
1546
|
+
conv_thr=conv_thr,
|
|
1547
|
+
constrained_indices=constraints,
|
|
1548
|
+
constrained_distances=pairing_dists,
|
|
1549
|
+
procs=2, # FF just needs two per structure
|
|
1550
|
+
title=f'Candidate_{i+1}',
|
|
1551
|
+
spring_constant=0.2 if prevent_scrambling else 1,
|
|
1552
|
+
payload=(
|
|
1553
|
+
self.constrained_indices[i],
|
|
1554
|
+
)
|
|
1555
|
+
)
|
|
1556
|
+
processes.append(process)
|
|
1557
|
+
|
|
1558
|
+
for i, process in enumerate(as_completed(processes)):
|
|
1559
|
+
|
|
1560
|
+
loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
|
|
1561
|
+
|
|
1562
|
+
((
|
|
1563
|
+
new_structure,
|
|
1564
|
+
new_energy,
|
|
1565
|
+
self.exit_status[i]
|
|
1566
|
+
),
|
|
1567
|
+
# from optimization function
|
|
1568
|
+
|
|
1569
|
+
(
|
|
1570
|
+
self.constrained_indices[i],
|
|
1571
|
+
),
|
|
1572
|
+
# from payload
|
|
1573
|
+
|
|
1574
|
+
t_struct
|
|
1575
|
+
# from timing_wrapper
|
|
1576
|
+
|
|
1577
|
+
) = process.result()
|
|
1578
|
+
|
|
1579
|
+
# assert that the structure did not scramble during optimization
|
|
1580
|
+
if self.exit_status[i]:
|
|
1581
|
+
constraints = (np.concatenate([self.constrained_indices[i], self.internal_constraints])
|
|
1582
|
+
if len(self.internal_constraints) > 0
|
|
1583
|
+
else self.constrained_indices[i])
|
|
1584
|
+
|
|
1585
|
+
self.exit_status[i] = scramble_check(new_structure,
|
|
1586
|
+
self.atomnos,
|
|
1587
|
+
excluded_atoms=constraints.ravel(),
|
|
1588
|
+
mols_graphs=self.graphs,
|
|
1589
|
+
max_newbonds=self.options.max_newbonds,
|
|
1590
|
+
logfunction=self.log if self.options.debug else None,
|
|
1591
|
+
title=f"Candidate_{i+1}")
|
|
1592
|
+
|
|
1593
|
+
cum_time += t_struct
|
|
1594
|
+
|
|
1595
|
+
if self.options.debug:
|
|
1596
|
+
exit_status = 'REFINED ' if self.exit_status[i] else 'SCRAMBLED'
|
|
1597
|
+
self.debuglog(f'DEBUG: force_field_refining ({conv_thr}) - Candidate_{i+1} - {exit_status} {time_to_string(t_struct, digits=3)}')
|
|
1598
|
+
|
|
1599
|
+
if self.exit_status[i] and new_energy is not None:
|
|
1600
|
+
self.structures[i] = new_structure
|
|
1601
|
+
self.energies[i] = new_energy
|
|
1602
|
+
|
|
1603
|
+
else:
|
|
1604
|
+
self.energies[i] = 1E10
|
|
1605
|
+
|
|
1606
|
+
### Update checkpoint every (20*max_workers) optimized structures, and give an estimate of the remaining time
|
|
1607
|
+
chk_freq = self.avail_cpus * self.options.checkpoint_frequency
|
|
1608
|
+
if i % chk_freq == chk_freq-1:
|
|
1609
|
+
|
|
1610
|
+
with open(self.outname, 'w') as f:
|
|
1611
|
+
for j, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
|
|
1612
|
+
self.exit_status,
|
|
1613
|
+
self.rel_energies())):
|
|
1614
|
+
|
|
1615
|
+
kind = 'REFINED - ' if status else 'NOT REFINED - '
|
|
1616
|
+
write_xyz(structure, self.atomnos, f, title=f'Structure {j+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
|
|
1617
|
+
|
|
1618
|
+
elapsed = time.perf_counter() - t_start_ff_opt
|
|
1619
|
+
average = (elapsed)/(i+1)
|
|
1620
|
+
time_left = time_to_string((average) * (len(self.structures)-i-1))
|
|
1621
|
+
speedup = cum_time/elapsed
|
|
1622
|
+
self.log(f' - Optimized {i+1:>4}/{len(self.structures):>4} structures - updated checkpoint file (avg. {time_to_string(average)}/struc, {round(speedup, 1)}x speedup, est. {time_left} left)', p=False)
|
|
1623
|
+
|
|
1624
|
+
loadbar(1, 1, prefix=f'Optimizing structure {len(self.structures)}/{len(self.structures)} ')
|
|
1625
|
+
|
|
1626
|
+
elapsed = time.perf_counter() - t_start_ff_opt
|
|
1627
|
+
average = (elapsed)/(len(self.structures))
|
|
1628
|
+
speedup = cum_time/elapsed
|
|
1629
|
+
|
|
1630
|
+
self.log(f'{self.options.ff_calc}/{self.options.ff_level} optimization took {time_to_string(elapsed)} (~{time_to_string(average)} per structure, {round(speedup, 1)}x speedup)')
|
|
1631
|
+
|
|
1632
|
+
################################################# EXIT STATUS
|
|
1633
|
+
|
|
1634
|
+
self.log(f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.')
|
|
1635
|
+
|
|
1636
|
+
################################################# PRUNING: ENERGY
|
|
1637
|
+
|
|
1638
|
+
_, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
|
|
1639
|
+
self.energies = self.scramble(self.energies, sequence)
|
|
1640
|
+
self.structures = self.scramble(self.structures, sequence)
|
|
1641
|
+
self.constrained_indices = self.scramble(self.constrained_indices, sequence)
|
|
1642
|
+
# sorting structures based on energy
|
|
1643
|
+
|
|
1644
|
+
if self.options.debug:
|
|
1645
|
+
self.dump_status(f'force_field_refining_{conv_thr}', only_fixed_constraints=only_fixed_constraints)
|
|
1646
|
+
self.debuglog(f'DEBUG: Dumped emebedder status after generating candidates (\"force_field_refining_{conv_thr}\")')
|
|
1647
|
+
|
|
1648
|
+
mask = self.rel_energies() < 1E10
|
|
1649
|
+
self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
|
|
1650
|
+
|
|
1651
|
+
if False in mask:
|
|
1652
|
+
self.log(f'Discarded {len([b for b in mask if not b])} scrambled candidates ({np.count_nonzero(mask)} left)')
|
|
1653
|
+
|
|
1654
|
+
################################################# PRUNING: FITNESS (POST FORCE FIELD OPT)
|
|
1655
|
+
|
|
1656
|
+
self.fitness_refining(threshold=2)
|
|
1657
|
+
|
|
1658
|
+
################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT)
|
|
1659
|
+
|
|
1660
|
+
self.zero_candidates_check()
|
|
1661
|
+
self.similarity_refining()
|
|
1662
|
+
|
|
1663
|
+
################################################# CHECKPOINT AFTER FF OPTIMIZATION
|
|
1664
|
+
|
|
1665
|
+
s = f'--> Checkpoint output - Updated {len(self.structures)} optimized structures to {self.outname} file'
|
|
1666
|
+
|
|
1667
|
+
if self.options.optimization and (self.options.ff_level != self.options.theory_level) and conv_thr != "tight":
|
|
1668
|
+
s += f' before {self.options.calculator} optimization.'
|
|
1669
|
+
|
|
1670
|
+
else:
|
|
1671
|
+
self.outname = f'firecode_{"ensemble" if self.embed == "refine" else "poses"}_{self.stamp}.xyz'
|
|
1672
|
+
# if the FF optimization was the last one, call the outfile accordingly
|
|
1673
|
+
|
|
1674
|
+
|
|
1675
|
+
self.log(s+'\n')
|
|
1676
|
+
|
|
1677
|
+
with open(self.outname, 'w') as f:
|
|
1678
|
+
for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
|
|
1679
|
+
self.exit_status,
|
|
1680
|
+
self.rel_energies())):
|
|
1681
|
+
|
|
1682
|
+
kind = 'REFINED - ' if status else 'NOT REFINED - '
|
|
1683
|
+
write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
|
|
1684
|
+
|
|
1685
|
+
# do not retain energies for the next optimization step if optimization was not tight
|
|
1686
|
+
if not only_fixed_constraints:
|
|
1687
|
+
self.energies.fill(0)
|
|
1688
|
+
|
|
1689
|
+
def _set_target_distances(self):
|
|
1690
|
+
'''
|
|
1691
|
+
Called before TS refinement to compute all
|
|
1692
|
+
target bonding distances. These are only returned
|
|
1693
|
+
if that pairing is not a non-covalent interaction,
|
|
1694
|
+
that is if pairing was not specified with letters
|
|
1695
|
+
"x", "y" or "z".
|
|
1696
|
+
'''
|
|
1697
|
+
self.target_distances = {}
|
|
1698
|
+
|
|
1699
|
+
# grab the atoms we want to extract information from
|
|
1700
|
+
r_atoms = {}
|
|
1701
|
+
for mol in self.objects:
|
|
1702
|
+
for letter, r_atom in mol.reactive_atoms_classes_dict[0].items():
|
|
1703
|
+
cumnum = r_atom.cumnum if hasattr(r_atom, 'cumnum') else r_atom.index
|
|
1704
|
+
if letter not in ("x", "y", "z"):
|
|
1705
|
+
r_atoms[cumnum] = r_atom
|
|
1706
|
+
|
|
1707
|
+
pairings = self.constrained_indices.ravel()
|
|
1708
|
+
pairings = pairings.reshape(int(pairings.shape[0]/2), 2)
|
|
1709
|
+
pairings = {tuple(sorted((a,b))) for a, b in pairings}
|
|
1710
|
+
|
|
1711
|
+
active_pairs = [indices for letter, indices in self.pairings_table.items() if letter not in ("x", "y", "z")]
|
|
1712
|
+
|
|
1713
|
+
for index1, index2 in pairings:
|
|
1714
|
+
|
|
1715
|
+
if [index1, index2] in active_pairs:
|
|
1716
|
+
|
|
1717
|
+
if hasattr(self, 'pairing_dists'):
|
|
1718
|
+
letter = list(self.pairings_table.keys())[active_pairs.index([index1, index2])]
|
|
1719
|
+
|
|
1720
|
+
if letter in self.pairing_dists:
|
|
1721
|
+
self.target_distances[(index1, index2)] = self.pairing_dists[letter]
|
|
1722
|
+
continue
|
|
1723
|
+
# if target distance has been specified by user, read that, otherwise compute it
|
|
1724
|
+
|
|
1725
|
+
r_atom1 = r_atoms[index1]
|
|
1726
|
+
r_atom2 = r_atoms[index2]
|
|
1727
|
+
|
|
1728
|
+
dist1 = orb_dim_dict.get(r_atom1.symbol + ' ' + str(r_atom1), orb_dim_dict['Fallback'])
|
|
1729
|
+
dist2 = orb_dim_dict.get(r_atom2.symbol + ' ' + str(r_atom2), orb_dim_dict['Fallback'])
|
|
1730
|
+
|
|
1731
|
+
self.target_distances[(index1, index2)] = dist1 + dist2
|
|
1732
|
+
|
|
1733
|
+
def optimization_refining(self, maxiter=None, conv_thr='tight', only_fixed_constraints=False):
|
|
1734
|
+
'''
|
|
1735
|
+
Refines structures by constrained optimizations with the active calculator,
|
|
1736
|
+
discarding similar ones and scrambled ones.
|
|
1737
|
+
maxiter - int, number of max iterations for the optimization
|
|
1738
|
+
conv_thr: convergence threshold, passed to calculator
|
|
1739
|
+
only_fixed_constraints: only uses fixed (UPPERCASE) constraints in optimization
|
|
1740
|
+
|
|
1741
|
+
'''
|
|
1742
|
+
|
|
1743
|
+
# pytorch parallellization is carried out differently
|
|
1744
|
+
if self.options.calculator == 'AIMNET2':
|
|
1745
|
+
from aimnet2_firecode.interface import \
|
|
1746
|
+
aimnet2_optimization_refining
|
|
1747
|
+
return aimnet2_optimization_refining(self, maxiter=maxiter, conv_thr=conv_thr, only_fixed_constraints=only_fixed_constraints)
|
|
1748
|
+
|
|
1749
|
+
self.outname = f'firecode_{"ensemble" if self.embed == "refine" else "poses"}_{self.stamp}.xyz'
|
|
1750
|
+
|
|
1751
|
+
if only_fixed_constraints:
|
|
1752
|
+
task = 'Structure optimization (tight) / relaxing interactions'
|
|
1753
|
+
else:
|
|
1754
|
+
task = 'Structure optimization (loose)'
|
|
1755
|
+
|
|
1756
|
+
max_workers = {
|
|
1757
|
+
'XTB' : int(self.avail_cpus//4),
|
|
1758
|
+
'ORCA' : int(self.avail_cpus//self.procs),
|
|
1759
|
+
'GAUSSIAN' : int(self.avail_cpus//self.procs),
|
|
1760
|
+
}[self.options.calculator]
|
|
1761
|
+
|
|
1762
|
+
self.log(f'--> {task} ({self.options.theory_level}{f"/{self.options.solvent}" if self.options.solvent is not None else ""}' +
|
|
1763
|
+
f' level via {self.options.calculator}, {max_workers} thread{"s" if max_workers>1 else ""})')
|
|
1764
|
+
|
|
1765
|
+
self.energies.fill(0)
|
|
1766
|
+
# Resetting all energies since we changed theory level
|
|
1767
|
+
|
|
1768
|
+
t_start = time.perf_counter()
|
|
1769
|
+
processes = []
|
|
1770
|
+
cum_time = 0
|
|
1771
|
+
|
|
1772
|
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
1773
|
+
|
|
1774
|
+
opt_func = self.dispatcher.opt_funcs_dict[self.options.calculator]
|
|
1775
|
+
|
|
1776
|
+
for i, structure in enumerate(deepcopy(self.structures)):
|
|
1777
|
+
loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
|
|
1778
|
+
|
|
1779
|
+
if only_fixed_constraints:
|
|
1780
|
+
constraints = np.array([value for key, value in self.pairings_table.items() if key.isupper()])
|
|
1781
|
+
|
|
1782
|
+
else:
|
|
1783
|
+
constraints = np.concatenate([self.constrained_indices[i], self.internal_constraints]) if len(self.internal_constraints) > 0 else self.constrained_indices[i]
|
|
1784
|
+
|
|
1785
|
+
pairing_dists = [self.get_pairing_dists_from_constrained_indices(_c) for _c in constraints]
|
|
1786
|
+
|
|
1787
|
+
process = executor.submit(
|
|
1788
|
+
timing_wrapper,
|
|
1789
|
+
opt_func,
|
|
1790
|
+
structure,
|
|
1791
|
+
self.atomnos,
|
|
1792
|
+
method=self.options.theory_level,
|
|
1793
|
+
solvent=self.options.solvent,
|
|
1794
|
+
charge=self.options.charge,
|
|
1795
|
+
maxiter=maxiter,
|
|
1796
|
+
conv_thr=conv_thr,
|
|
1797
|
+
constrained_indices=constraints,
|
|
1798
|
+
constrained_distances=pairing_dists,
|
|
1799
|
+
procs=self.procs,
|
|
1800
|
+
title=f'Candidate_{i+1}',
|
|
1801
|
+
spring_constant=2 if only_fixed_constraints else 1,
|
|
1802
|
+
|
|
1803
|
+
payload=(
|
|
1804
|
+
self.constrained_indices[i],
|
|
1805
|
+
)
|
|
1806
|
+
)
|
|
1807
|
+
|
|
1808
|
+
processes.append(process)
|
|
1809
|
+
|
|
1810
|
+
for i, process in enumerate(as_completed(processes)):
|
|
1811
|
+
|
|
1812
|
+
loadbar(i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
|
|
1813
|
+
|
|
1814
|
+
( (
|
|
1815
|
+
new_structure,
|
|
1816
|
+
new_energy,
|
|
1817
|
+
self.exit_status[i]
|
|
1818
|
+
),
|
|
1819
|
+
# from optimization function
|
|
1820
|
+
|
|
1821
|
+
(
|
|
1822
|
+
self.constrained_indices[i],
|
|
1823
|
+
),
|
|
1824
|
+
# from payload
|
|
1825
|
+
|
|
1826
|
+
t_struct
|
|
1827
|
+
# from timing_wrapper
|
|
1828
|
+
|
|
1829
|
+
) = process.result()
|
|
1830
|
+
|
|
1831
|
+
# assert that the structure did not scramble during optimization
|
|
1832
|
+
if self.exit_status[i]:
|
|
1833
|
+
constraints = (np.concatenate([self.constrained_indices[i], self.internal_constraints])
|
|
1834
|
+
if len(self.internal_constraints) > 0
|
|
1835
|
+
else self.constrained_indices[i])
|
|
1836
|
+
|
|
1837
|
+
self.exit_status[i] = scramble_check(new_structure,
|
|
1838
|
+
self.atomnos,
|
|
1839
|
+
excluded_atoms=constraints.ravel(),
|
|
1840
|
+
mols_graphs=self.graphs,
|
|
1841
|
+
max_newbonds=0)
|
|
1842
|
+
|
|
1843
|
+
cum_time += t_struct
|
|
1844
|
+
|
|
1845
|
+
if self.options.debug:
|
|
1846
|
+
exit_status = 'REFINED ' if self.exit_status[i] else 'SCRAMBLED'
|
|
1847
|
+
self.debuglog(f'DEBUG: optimzation_refining ({conv_thr}) - Candidate_{i+1} - {exit_status if new_energy is not None else "CRASHED"} {time_to_string(t_struct, digits=3)}')
|
|
1848
|
+
|
|
1849
|
+
if self.exit_status[i] and new_energy is not None:
|
|
1850
|
+
self.structures[i] = new_structure
|
|
1851
|
+
self.energies[i] = new_energy
|
|
1852
|
+
|
|
1853
|
+
else:
|
|
1854
|
+
self.energies[i] = 1E10
|
|
1855
|
+
|
|
1856
|
+
### Update checkpoint every (20*max_workers) optimized structures, and give an estimate of the remaining time
|
|
1857
|
+
chk_freq = int(self.avail_cpus//4) * self.options.checkpoint_frequency
|
|
1858
|
+
if i % chk_freq == chk_freq-1:
|
|
1859
|
+
|
|
1860
|
+
with open(self.outname, 'w') as f:
|
|
1861
|
+
for j, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
|
|
1862
|
+
self.exit_status,
|
|
1863
|
+
self.rel_energies())):
|
|
1864
|
+
|
|
1865
|
+
kind = 'REFINED - ' if status else 'NOT REFINED - '
|
|
1866
|
+
write_xyz(structure, self.atomnos, f, title=f'Structure {j+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
|
|
1867
|
+
|
|
1868
|
+
elapsed = time.perf_counter() - t_start
|
|
1869
|
+
average = (elapsed)/(i+1)
|
|
1870
|
+
time_left = time_to_string((average) * (len(self.structures)-i-1))
|
|
1871
|
+
speedup = cum_time/elapsed
|
|
1872
|
+
self.log(f' - Optimized {i+1:>4}/{len(self.structures):>4} structures - updated checkpoint file (avg. {time_to_string(average)}/struc, {round(speedup, 1)}x speedup, est. {time_left} left)', p=False)
|
|
1873
|
+
|
|
1874
|
+
loadbar(1, 1, prefix=f'Optimizing structure {len(self.structures)}/{len(self.structures)} ')
|
|
1875
|
+
|
|
1876
|
+
elapsed = time.perf_counter() - t_start
|
|
1877
|
+
average = (elapsed)/(len(self.structures))
|
|
1878
|
+
speedup = cum_time/elapsed
|
|
1879
|
+
|
|
1880
|
+
self.log((f'{self.options.calculator}/{self.options.theory_level} optimization took '
|
|
1881
|
+
f'{time_to_string(elapsed)} (~{time_to_string(average)} per structure, {round(speedup, 1)}x speedup)'))
|
|
1882
|
+
|
|
1883
|
+
################################################# EXIT STATUS
|
|
1884
|
+
|
|
1885
|
+
self.log(f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will {"not " if not self.options.only_refined else ""}be discarded.')
|
|
1886
|
+
|
|
1887
|
+
if self.options.only_refined:
|
|
1888
|
+
|
|
1889
|
+
mask = self.exit_status
|
|
1890
|
+
self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
|
|
1891
|
+
|
|
1892
|
+
if False in mask:
|
|
1893
|
+
self.log(f'Discarded {len([b for b in mask if not b])} candidates for unsuccessful optimization ({np.count_nonzero(mask)} left')
|
|
1894
|
+
|
|
1895
|
+
################################################# PRUNING: ENERGY
|
|
1896
|
+
|
|
1897
|
+
_, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
|
|
1898
|
+
self.energies = self.scramble(self.energies, sequence)
|
|
1899
|
+
self.structures = self.scramble(self.structures, sequence)
|
|
1900
|
+
self.constrained_indices = self.scramble(self.constrained_indices, sequence)
|
|
1901
|
+
# sorting structures based on energy
|
|
1902
|
+
|
|
1903
|
+
if self.options.debug:
|
|
1904
|
+
self.dump_status(f'optimization_refining_{conv_thr}', only_fixed_constraints=only_fixed_constraints)
|
|
1905
|
+
self.debuglog(f'DEBUG: Dumped emebedder status after generating candidates (\"optimization_refining_{conv_thr}\")')
|
|
1906
|
+
|
|
1907
|
+
if self.options.kcal_thresh is not None and only_fixed_constraints:
|
|
1908
|
+
|
|
1909
|
+
# mask = self.rel_energies() < self.options.kcal_thresh
|
|
1910
|
+
energy_thr = self.dynamic_energy_thr()
|
|
1911
|
+
mask = self.rel_energies() < energy_thr
|
|
1912
|
+
|
|
1913
|
+
self.apply_mask(('structures', 'constrained_indices', 'energies', 'exit_status'), mask)
|
|
1914
|
+
|
|
1915
|
+
if False in mask:
|
|
1916
|
+
self.log(f'Discarded {len([b for b in mask if not b])} candidates for energy ({np.count_nonzero(mask)} left, ' +
|
|
1917
|
+
f'{round(100*np.count_nonzero(mask)/len(mask), 1)}% kept, threshold {energy_thr} kcal/mol)')
|
|
1918
|
+
|
|
1919
|
+
################################################# PRUNING: FITNESS (POST SEMIEMPIRICAL OPT)
|
|
1920
|
+
|
|
1921
|
+
self.fitness_refining(threshold=2)
|
|
1922
|
+
|
|
1923
|
+
################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT)
|
|
1924
|
+
|
|
1925
|
+
self.zero_candidates_check()
|
|
1926
|
+
self.similarity_refining()
|
|
1927
|
+
|
|
1928
|
+
################################################# CHECKPOINT AFTER SE OPTIMIZATION
|
|
1929
|
+
|
|
1930
|
+
with open(self.outname, 'w') as f:
|
|
1931
|
+
for i, (structure, status, energy) in enumerate(zip(align_structures(self.structures),
|
|
1932
|
+
self.exit_status,
|
|
1933
|
+
self.rel_energies())):
|
|
1934
|
+
|
|
1935
|
+
kind = 'REFINED - ' if status else 'NOT REFINED - '
|
|
1936
|
+
write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - {kind}Rel. E. = {round(energy, 3)} kcal/mol ({self.options.ff_level})')
|
|
1937
|
+
|
|
1938
|
+
self.log(f'--> Wrote {len(self.structures)} optimized structures to {self.outname}')
|
|
1939
|
+
|
|
1940
|
+
# do not retain energies for the next optimization step if optimization was not tight
|
|
1941
|
+
if not only_fixed_constraints:
|
|
1942
|
+
self.energies.fill(0)
|
|
1943
|
+
|
|
1944
|
+
def dynamic_energy_thr(self, keep_min=0.1, verbose=True):
|
|
1945
|
+
'''
|
|
1946
|
+
Returns an energy threshold that is dynamically adjusted
|
|
1947
|
+
based on the distribution of energies around the lowest,
|
|
1948
|
+
so that at least 10% of the structures are retained.
|
|
1949
|
+
|
|
1950
|
+
keep_min: float, minimum percentage of structures to keep
|
|
1951
|
+
verbose: bool, prints comments in self.log
|
|
1952
|
+
|
|
1953
|
+
'''
|
|
1954
|
+
active = len(self.structures)
|
|
1955
|
+
keep = np.count_nonzero(self.rel_energies() < self.options.kcal_thresh)
|
|
1956
|
+
|
|
1957
|
+
# if the standard threshold keeps enough structures, use that
|
|
1958
|
+
if keep/active > keep_min:
|
|
1959
|
+
return self.options.kcal_thresh
|
|
1960
|
+
|
|
1961
|
+
# if not, iterate on the relative energy values as
|
|
1962
|
+
# thresholds until we keep enough structures
|
|
1963
|
+
for thr in (energy for energy in self.rel_energies() if energy > self.options.kcal_thresh):
|
|
1964
|
+
keep = np.count_nonzero(self.rel_energies() < thr)
|
|
1965
|
+
|
|
1966
|
+
if keep/active > keep_min:
|
|
1967
|
+
if verbose:
|
|
1968
|
+
self.log(f"--> Dynamically adjusted energy threshold to {round(thr, 1)} kcal/mol to retain at least {round(thr)}% of structures.")
|
|
1969
|
+
return thr
|
|
1970
|
+
|
|
1971
|
+
def metadynamics_augmentation(self):
|
|
1972
|
+
'''
|
|
1973
|
+
Runs a metadynamics simulation (MTD) through
|
|
1974
|
+
the XTB program for each structure in self.structure.
|
|
1975
|
+
New structures are obtained from the simulations, minimized
|
|
1976
|
+
in energy and added to self. structures.
|
|
1977
|
+
'''
|
|
1978
|
+
|
|
1979
|
+
self.log('--> Performing XTB Metadynamic augmentation of TS candidates')
|
|
1980
|
+
|
|
1981
|
+
before = len(self.structures)
|
|
1982
|
+
t_start_run = time.perf_counter()
|
|
1983
|
+
|
|
1984
|
+
for s, (structure, constrained_indices) in enumerate(zip(deepcopy(self.structures), deepcopy(self.constrained_indices))):
|
|
1985
|
+
|
|
1986
|
+
loadbar(s, before, f'Running MTD {s+1}/{before} ')
|
|
1987
|
+
t_start = time.perf_counter()
|
|
1988
|
+
|
|
1989
|
+
new_structures = xtb_metadyn_augmentation(structure,
|
|
1990
|
+
self.atomnos,
|
|
1991
|
+
constrained_indices=constrained_indices,
|
|
1992
|
+
new_structures=5,
|
|
1993
|
+
title=s)
|
|
1994
|
+
|
|
1995
|
+
self.structures = np.concatenate((self.structures, new_structures))
|
|
1996
|
+
self.energies = np.concatenate((self.energies, [0 for _ in new_structures]))
|
|
1997
|
+
self.constrained_indices = np.concatenate((self.constrained_indices, [constrained_indices for _ in new_structures]))
|
|
1998
|
+
|
|
1999
|
+
self.log(f' - Structure {s+1} - {len(new_structures)} new conformers ({time_to_string(time.perf_counter()-t_start)})', p=False)
|
|
2000
|
+
|
|
2001
|
+
loadbar(before, before, f'Running MTD {before}/{before} ')
|
|
2002
|
+
self.exit_status = np.array([True for _ in self.structures], dtype=bool)
|
|
2003
|
+
|
|
2004
|
+
self.log(f'Metadynamics augmentation completed - found {len(self.structures)-before} new conformers ({time_to_string(time.perf_counter()-t_start_run)})\n')
|
|
2005
|
+
|
|
2006
|
+
def csearch_augmentation(self, text='', max_structs=1000):
|
|
2007
|
+
'''
|
|
2008
|
+
Runs a conformational search for each structure in self.structure,
|
|
2009
|
+
preserving the current reactive atoms pose and HB interactions.
|
|
2010
|
+
New structures geometries are optimized and added to self.structures.
|
|
2011
|
+
'''
|
|
2012
|
+
|
|
2013
|
+
self.warn("--> WARNING! csearch_augmentation is an experimental routine and has not been fully tested yet.")
|
|
2014
|
+
self.log(f'--> Performing conformational augmentation of candidates {text}')
|
|
2015
|
+
|
|
2016
|
+
before = len(self.structures)
|
|
2017
|
+
t_start_run = time.perf_counter()
|
|
2018
|
+
n_out = 100 if len(self.structures)*100 < max_structs else round(max_structs/len(self.structures))
|
|
2019
|
+
n_out = max((1, n_out))
|
|
2020
|
+
|
|
2021
|
+
for s, (structure, constrained_indices) in enumerate(zip(self.structures, self.constrained_indices)):
|
|
2022
|
+
|
|
2023
|
+
loadbar(s, before, f'Performing CSearch {s+1}/{before} ', suffix=f'({len(self.structures)-before} generated)')
|
|
2024
|
+
t_start = time.perf_counter()
|
|
2025
|
+
|
|
2026
|
+
if self.options.debug:
|
|
2027
|
+
dump = open(f'Candidate_{s+1}_csearch_log.txt', 'w', buffering=1)
|
|
2028
|
+
|
|
2029
|
+
try:
|
|
2030
|
+
new_structures = csearch(
|
|
2031
|
+
structure,
|
|
2032
|
+
self.atomnos,
|
|
2033
|
+
constrained_indices=constrained_indices,
|
|
2034
|
+
keep_hb=True,
|
|
2035
|
+
mode=2,
|
|
2036
|
+
n_out=n_out,
|
|
2037
|
+
logfunction=lambda s: dump.write(s+'\n') if self.options.debug else None,
|
|
2038
|
+
title=f'Candidate_{s+1}',
|
|
2039
|
+
interactive_print=False,
|
|
2040
|
+
write_torsions=self.options.debug,
|
|
2041
|
+
)
|
|
2042
|
+
|
|
2043
|
+
# if CSearch cannot be performed, just go on
|
|
2044
|
+
except SegmentedGraphError:
|
|
2045
|
+
new_structures = []
|
|
2046
|
+
|
|
2047
|
+
if self.options.debug:
|
|
2048
|
+
dump.close()
|
|
2049
|
+
|
|
2050
|
+
if len(new_structures) != 0: # could be either array or list, so have to check this way
|
|
2051
|
+
self.structures = np.concatenate((self.structures, new_structures))
|
|
2052
|
+
self.energies = np.concatenate((self.energies, [1E10 for _ in new_structures]))
|
|
2053
|
+
self.constrained_indices = np.concatenate((self.constrained_indices, [constrained_indices for _ in new_structures]))
|
|
2054
|
+
|
|
2055
|
+
self.log(f' - Candidate {s+1} - {len(new_structures)} new conformers ({time_to_string(time.perf_counter()-t_start)})', p=False)
|
|
2056
|
+
|
|
2057
|
+
loadbar(before, before, f'Performing CSearch {before}/{before} ', suffix=f'{" "*15}')
|
|
2058
|
+
self.exit_status = np.array([True for _ in self.structures], dtype=bool)
|
|
2059
|
+
|
|
2060
|
+
self.similarity_refining(rmsd=False)
|
|
2061
|
+
|
|
2062
|
+
self.log(f'Conformational augmentation completed - generated {len(self.structures)-before} new conformers ({time_to_string(time.perf_counter()-t_start_run)})\n')
|
|
2063
|
+
|
|
2064
|
+
def csearch_augmentation_routine(self):
|
|
2065
|
+
'''
|
|
2066
|
+
'''
|
|
2067
|
+
|
|
2068
|
+
if self.options.csearch_aug:
|
|
2069
|
+
|
|
2070
|
+
csearch_func = self.csearch_augmentation
|
|
2071
|
+
|
|
2072
|
+
null_runs = 0
|
|
2073
|
+
|
|
2074
|
+
for i in range(3):
|
|
2075
|
+
|
|
2076
|
+
min_e = np.min(self.energies)
|
|
2077
|
+
|
|
2078
|
+
csearch_func(text=f'(step {i+1}/3)', max_structs=self.options.max_confs)
|
|
2079
|
+
self.force_field_refining()
|
|
2080
|
+
|
|
2081
|
+
if np.min(self.energies) < min_e:
|
|
2082
|
+
delta = min_e - np.min(self.energies)
|
|
2083
|
+
self.log(f'--> Lower minima found: {round(delta, 2)} kcal/mol below previous best\n')
|
|
2084
|
+
|
|
2085
|
+
if self.options.debug:
|
|
2086
|
+
with open(f'best_of_aug_run_{i}.xyz', 'w') as f:
|
|
2087
|
+
e, s = zip(*sorted(zip(self.energies, self.structures), key=lambda x: x[0]))
|
|
2088
|
+
write_xyz(s[0], self.atomnos, f, title=f'Energy = {round(e[0], 6)}')
|
|
2089
|
+
|
|
2090
|
+
min_e = np.min(self.energies)
|
|
2091
|
+
|
|
2092
|
+
else:
|
|
2093
|
+
self.log('--> No new minima found.\n')
|
|
2094
|
+
null_runs += 1
|
|
2095
|
+
|
|
2096
|
+
if null_runs == 2:
|
|
2097
|
+
break
|
|
2098
|
+
|
|
2099
|
+
def saddle_refining(self):
|
|
2100
|
+
'''
|
|
2101
|
+
Performs a first order saddle optimization for each structure.
|
|
2102
|
+
|
|
2103
|
+
'''
|
|
2104
|
+
self.log(f'--> Saddle optimization ({self.options.theory_level} level)')
|
|
2105
|
+
t_start = time.perf_counter()
|
|
2106
|
+
|
|
2107
|
+
for i, structure in enumerate(self.structures):
|
|
2108
|
+
|
|
2109
|
+
loadbar(i, len(self.structures), prefix=f'Performing saddle opt {i+1}/{len(self.structures)} ')
|
|
2110
|
+
|
|
2111
|
+
try:
|
|
2112
|
+
|
|
2113
|
+
self.structures[i], self.energies[i], self.exit_status[i] = ase_saddle(self,
|
|
2114
|
+
structure,
|
|
2115
|
+
self.atomnos,
|
|
2116
|
+
self.constrained_indices[i],
|
|
2117
|
+
mols_graphs=self.graphs if self.embed != 'monomolecular' else None,
|
|
2118
|
+
title=f'Saddle opt - Structure {i+1}',
|
|
2119
|
+
logfile=self.logfile,
|
|
2120
|
+
traj=f'Saddle_opt_{i+1}.traj',
|
|
2121
|
+
maxiterations=200)
|
|
2122
|
+
|
|
2123
|
+
except ValueError:
|
|
2124
|
+
# Thrown when an ASE read fails (during saddle opt)
|
|
2125
|
+
self.exit_status[i] = False
|
|
2126
|
+
|
|
2127
|
+
loadbar(1, 1, prefix=f'Performing saddle opt {len(self.structures)}/{len(self.structures)} ')
|
|
2128
|
+
t_end = time.perf_counter()
|
|
2129
|
+
self.log(f'{self.options.calculator} {self.options.theory_level} saddle optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)')
|
|
2130
|
+
self.log(f'Saddle opt completed for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures')
|
|
2131
|
+
|
|
2132
|
+
mask = self.exit_status
|
|
2133
|
+
|
|
2134
|
+
self.apply_mask(('structures', 'energies', 'exit_status'), mask)
|
|
2135
|
+
|
|
2136
|
+
################################################# PRUNING: SIMILARITY (POST SADDLE OPT)
|
|
2137
|
+
|
|
2138
|
+
if len(self.structures) != 0:
|
|
2139
|
+
|
|
2140
|
+
t_start = time.perf_counter()
|
|
2141
|
+
self.structures, mask = prune_by_rmsd(self.structures, self.atomnos, max_rmsd=self.options.rmsd, debugfunction=self.debuglog)
|
|
2142
|
+
self.apply_mask(('energies', 'exit_status'), mask)
|
|
2143
|
+
t_end = time.perf_counter()
|
|
2144
|
+
|
|
2145
|
+
if False in mask:
|
|
2146
|
+
self.log(f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})')
|
|
2147
|
+
self.log()
|
|
2148
|
+
|
|
2149
|
+
################################################# SADDLE OPT EXTRA XYZ OUTPUT
|
|
2150
|
+
|
|
2151
|
+
_, sequence = zip(*sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
|
|
2152
|
+
self.energies = scramble(self.energies, sequence)
|
|
2153
|
+
self.structures = scramble(self.structures, sequence)
|
|
2154
|
+
self.constrained_indices = scramble(self.constrained_indices, sequence)
|
|
2155
|
+
# sorting structures based on energy
|
|
2156
|
+
|
|
2157
|
+
self.outname = f'firecode_SADDLE_TSs_{self.stamp}.xyz'
|
|
2158
|
+
with open(self.outname, 'w') as f:
|
|
2159
|
+
for structure, energy in zip(align_structures(self.structures), self.rel_energies()):
|
|
2160
|
+
write_xyz(structure, self.atomnos, f, title=f'Structure {i+1} - TS - Rel. E. = {round(energy, 3)} kcal/mol')
|
|
2161
|
+
|
|
2162
|
+
self.log(f'Wrote {len(self.structures)} saddle-optimized structures to {self.outname} file\n')
|
|
2163
|
+
|
|
2164
|
+
else:
|
|
2165
|
+
self.log()
|
|
2166
|
+
|
|
2167
|
+
def print_nci(self):
|
|
2168
|
+
'''
|
|
2169
|
+
Prints and logs the non-covalent interactions guesses for final structures.
|
|
2170
|
+
|
|
2171
|
+
'''
|
|
2172
|
+
self.log('--> Non-covalent interactions finder (EXPERIMENTAL)')
|
|
2173
|
+
self.nci = []
|
|
2174
|
+
|
|
2175
|
+
for i, structure in enumerate(self.structures):
|
|
2176
|
+
|
|
2177
|
+
nci, print_list = get_nci(structure, self.atomnos, self.constrained_indices[i], self.ids)
|
|
2178
|
+
self.nci.append(nci)
|
|
2179
|
+
|
|
2180
|
+
if nci != []:
|
|
2181
|
+
self.log(f'Structure {i+1}: {len(nci)} interactions')
|
|
2182
|
+
|
|
2183
|
+
for p in print_list:
|
|
2184
|
+
self.log(' '+p)
|
|
2185
|
+
self.log()
|
|
2186
|
+
|
|
2187
|
+
if not [_l for _l in self.nci if _l != []]:
|
|
2188
|
+
self.log('No particular NCIs spotted for these structures\n')
|
|
2189
|
+
|
|
2190
|
+
else:
|
|
2191
|
+
unshared_nci = []
|
|
2192
|
+
for i, nci_list in enumerate(self.nci):
|
|
2193
|
+
for nci in nci_list:
|
|
2194
|
+
# for each interaction of each structure
|
|
2195
|
+
|
|
2196
|
+
if nci not in [n[0] for n in unshared_nci]:
|
|
2197
|
+
# if we have not already done it
|
|
2198
|
+
|
|
2199
|
+
if not all([nci in structure_nci for structure_nci in self.nci]):
|
|
2200
|
+
# if the interaction is not shared by all structures, take note
|
|
2201
|
+
|
|
2202
|
+
shared_by = [i for i, structure_nci in enumerate(self.nci) if nci in structure_nci]
|
|
2203
|
+
unshared_nci.append((nci, shared_by))
|
|
2204
|
+
|
|
2205
|
+
if unshared_nci != []:
|
|
2206
|
+
self.log('--> Differential NCIs found - these might be the source of selectivity:')
|
|
2207
|
+
for nci, shared_by in unshared_nci:
|
|
2208
|
+
nci_type, i1, i2 = nci
|
|
2209
|
+
self.log(f' {nci_type} between indices {i1}/{i2} is present in {len(shared_by)}/{len(self.structures)} structures {tuple([i+1 for i in shared_by])}')
|
|
2210
|
+
self.log()
|
|
2211
|
+
|
|
2212
|
+
def write_mol_info(self):
|
|
2213
|
+
'''
|
|
2214
|
+
Writes information about the firecode molecules read from the input file.
|
|
2215
|
+
|
|
2216
|
+
'''
|
|
2217
|
+
|
|
2218
|
+
head = ''
|
|
2219
|
+
for i, mol in enumerate(self.objects):
|
|
2220
|
+
|
|
2221
|
+
if hasattr(mol, 'reactive_atoms_classes_dict'):
|
|
2222
|
+
|
|
2223
|
+
descs = [atom.symbol+f'({str(atom)} type, {round(norm_of(atom.center[0]-atom.coord), 3)} A, ' +
|
|
2224
|
+
f'{len(atom.center)} center{"s" if len(atom.center) != 1 else ""})' for atom in mol.reactive_atoms_classes_dict[0].values()]
|
|
2225
|
+
|
|
2226
|
+
else:
|
|
2227
|
+
|
|
2228
|
+
descs = [pt[mol.atomnos[i]].symbol for i in mol.reactive_indices]
|
|
2229
|
+
|
|
2230
|
+
t = '\n '.join([(str(index) + ' ' if len(str(index)) == 1 else str(index)) + ' -> ' + desc for index, desc in zip(mol.reactive_indices, descs)])
|
|
2231
|
+
|
|
2232
|
+
mol_line = f' -> {len(mol.atomcoords[0])} atoms, {len(mol.atomcoords)} conformer{"s" if len(mol.atomcoords) != 1 else ""}'
|
|
2233
|
+
if hasattr(mol, 'pivots') and len(mol.pivots) > 0:
|
|
2234
|
+
mol_line += f', {len(mol.pivots[0])} pivot{"s" if len(mol.pivots[0]) != 1 else ""}'
|
|
2235
|
+
|
|
2236
|
+
if mol.sp3_sigmastar:
|
|
2237
|
+
mol_line += ', sp3_sigmastar'
|
|
2238
|
+
|
|
2239
|
+
if any(mol.sigmatropic):
|
|
2240
|
+
mol_line += ', sigmatropic'
|
|
2241
|
+
if all(mol.sigmatropic):
|
|
2242
|
+
mol_line += ' (all conformers)'
|
|
2243
|
+
else:
|
|
2244
|
+
mol_line += ' (some conformers)'
|
|
2245
|
+
|
|
2246
|
+
head += f'\n {i+1}. {mol.filename}{mol_line}\n {t}\n'
|
|
2247
|
+
|
|
2248
|
+
self.log('--> Input structures & reactive indices data:\n' + head)
|
|
2249
|
+
|
|
2250
|
+
def write_options(self):
|
|
2251
|
+
'''
|
|
2252
|
+
Writes information about the firecode parameters used in the calculation, if applicable to the run.
|
|
2253
|
+
'''
|
|
2254
|
+
|
|
2255
|
+
######################################################################################################## PAIRINGS
|
|
2256
|
+
|
|
2257
|
+
if not self.pairings_table:
|
|
2258
|
+
if all([len(mol.reactive_indices) == 2 for mol in self.objects]):
|
|
2259
|
+
self.log('--> No atom pairings imposed. Computing all possible dispositions.\n')
|
|
2260
|
+
# only print the no pairings statements if there are multiple regioisomers to be computed
|
|
2261
|
+
else:
|
|
2262
|
+
self.log(f'--> Atom pairings imposed are {len(self.pairings_table)}: {list(self.pairings_table.values())} (Cumulative index numbering)\n')
|
|
2263
|
+
|
|
2264
|
+
for i, letter in enumerate(self.pairings_table):
|
|
2265
|
+
kind = 'Constraint' if letter.isupper() else 'Interaction'
|
|
2266
|
+
internal = any(isinstance(d.get(letter), tuple) for d in self.pairings_dict.values())
|
|
2267
|
+
kind += ' (Internal)' if internal else ''
|
|
2268
|
+
dist = self.get_pairing_dist_from_letter(letter)
|
|
2269
|
+
|
|
2270
|
+
if self.options.shrink and not internal:
|
|
2271
|
+
dist *= self.options.shrink_multiplier
|
|
2272
|
+
|
|
2273
|
+
if dist is None:
|
|
2274
|
+
kind += ' - will relax'
|
|
2275
|
+
elif kind == 'Interaction':
|
|
2276
|
+
kind += f' - embedded at {round(dist, 3)} A - will relax'
|
|
2277
|
+
else:
|
|
2278
|
+
kind += f' - constrained to {round(dist, 3)} A'
|
|
2279
|
+
|
|
2280
|
+
if self.options.shrink and not internal:
|
|
2281
|
+
kind += f' (to be shrinked to {round(dist/self.options.shrink_multiplier, 3)} A)'
|
|
2282
|
+
|
|
2283
|
+
s = f' {i+1}. {letter} - {kind}\n'
|
|
2284
|
+
|
|
2285
|
+
for mol_id, d in self.pairings_dict.items():
|
|
2286
|
+
atom_id = d.get(letter)
|
|
2287
|
+
|
|
2288
|
+
if atom_id is not None:
|
|
2289
|
+
mol = self.objects[mol_id]
|
|
2290
|
+
|
|
2291
|
+
if isinstance(atom_id, int):
|
|
2292
|
+
atom_id = [atom_id]
|
|
2293
|
+
|
|
2294
|
+
for a in atom_id:
|
|
2295
|
+
s += f' Index {a} ({pt[mol.atomnos[a]].name}) on {mol.rootname}\n'
|
|
2296
|
+
|
|
2297
|
+
self.log(s)
|
|
2298
|
+
|
|
2299
|
+
######################################################################################################## EMBEDDING/CALC OPTIONS
|
|
2300
|
+
|
|
2301
|
+
self.log('--> Calculation options used were:')
|
|
2302
|
+
for line in str(self.options).split('\n'):
|
|
2303
|
+
|
|
2304
|
+
if self.embed in ('monomolecular', 'string', 'refine') and line.split()[0] in ('rotation_range',
|
|
2305
|
+
'rotation_steps',
|
|
2306
|
+
'rigid',
|
|
2307
|
+
'suprafacial',
|
|
2308
|
+
'fix_angles_in_deformation',
|
|
2309
|
+
'double_bond_protection'):
|
|
2310
|
+
continue
|
|
2311
|
+
|
|
2312
|
+
if self.embed == 'refine' and line.split()[0] in ('shrink',
|
|
2313
|
+
'shrink_multiplier',
|
|
2314
|
+
'fix_angles_in_deformation',
|
|
2315
|
+
'double_bond_protection'):
|
|
2316
|
+
continue
|
|
2317
|
+
|
|
2318
|
+
if not self.options.optimization and line.split()[0] in ('calculator',
|
|
2319
|
+
'double_bond_protection',
|
|
2320
|
+
'ff_opt',
|
|
2321
|
+
'ff_calc',
|
|
2322
|
+
'ff_level',
|
|
2323
|
+
'fix_angles_in_deformation',
|
|
2324
|
+
'only_refined',
|
|
2325
|
+
'rigid',
|
|
2326
|
+
'theory_level'):
|
|
2327
|
+
continue
|
|
2328
|
+
|
|
2329
|
+
if self.options.rigid and line.split()[0] in ('double_bond_protection',
|
|
2330
|
+
'fix_angles_in_deformation'):
|
|
2331
|
+
continue
|
|
2332
|
+
|
|
2333
|
+
if not self.options.shrink and line.split()[0] in ('shrink_multiplier',):
|
|
2334
|
+
continue
|
|
2335
|
+
|
|
2336
|
+
if not self.options.ff_opt and line.split()[0] in ('ff_calc', 'ff_level'):
|
|
2337
|
+
continue
|
|
2338
|
+
|
|
2339
|
+
self.log(f' - {line}')
|
|
2340
|
+
|
|
2341
|
+
def log_warnings(self):
|
|
2342
|
+
'''
|
|
2343
|
+
Logs the non-fatal errors (warnings) at the end of a run.
|
|
2344
|
+
|
|
2345
|
+
'''
|
|
2346
|
+
if self.warnings:
|
|
2347
|
+
self.log()
|
|
2348
|
+
self.log("{:*^76}".format(" W A R N I N G S "))
|
|
2349
|
+
self.log("{:*^76}".format(" your run generated these non-fatal warnings "))
|
|
2350
|
+
self.log()
|
|
2351
|
+
|
|
2352
|
+
for warning in self.warnings:
|
|
2353
|
+
self.log(auto_newline(warning, max_line_len=65))
|
|
2354
|
+
self.log()
|
|
2355
|
+
|
|
2356
|
+
self.log("*"*76)
|
|
2357
|
+
|
|
2358
|
+
def run(self):
|
|
2359
|
+
'''
|
|
2360
|
+
Run the firecode program.
|
|
2361
|
+
|
|
2362
|
+
'''
|
|
2363
|
+
self.write_mol_info()
|
|
2364
|
+
|
|
2365
|
+
if self.embed is None:
|
|
2366
|
+
self.log('--> No embed requested, exiting.\n')
|
|
2367
|
+
self.normal_termination()
|
|
2368
|
+
|
|
2369
|
+
if self.embed == 'error':
|
|
2370
|
+
self.log('--> Embed type not recognized, exiting.\n')
|
|
2371
|
+
self.normal_termination()
|
|
2372
|
+
|
|
2373
|
+
if self.embed == 'data':
|
|
2374
|
+
self.data_termination()
|
|
2375
|
+
|
|
2376
|
+
if not self.options.let and (
|
|
2377
|
+
self.embed in ('cyclical', 'chelotropic')) and (
|
|
2378
|
+
max([len(mol.atomcoords) for mol in self.objects]) > 100) and (
|
|
2379
|
+
not self.options.rigid):
|
|
2380
|
+
|
|
2381
|
+
self.options.rigid = True
|
|
2382
|
+
|
|
2383
|
+
self.log('--> Large embed: RIGID keyword added for efficiency (override with LET)')
|
|
2384
|
+
|
|
2385
|
+
self.write_options()
|
|
2386
|
+
|
|
2387
|
+
if not hasattr(self, "t_start_run"):
|
|
2388
|
+
self.t_start_run = time.perf_counter()
|
|
2389
|
+
|
|
2390
|
+
if self.options.dryrun:
|
|
2391
|
+
self.log('\n--> Dry run requested: exiting.')
|
|
2392
|
+
self.normal_termination()
|
|
2393
|
+
|
|
2394
|
+
try: # except KeyboardInterrupt
|
|
2395
|
+
try: # except ZeroCandidatesError()
|
|
2396
|
+
self.generate_candidates()
|
|
2397
|
+
|
|
2398
|
+
if self.options.bypass:
|
|
2399
|
+
self.write_structures('unoptimized', energies=False)
|
|
2400
|
+
self.normal_termination()
|
|
2401
|
+
|
|
2402
|
+
self.compenetration_refining()
|
|
2403
|
+
self.similarity_refining(rmsd=True if self.embed == "refine" else False, verbose=True)
|
|
2404
|
+
|
|
2405
|
+
if self.options.optimization:
|
|
2406
|
+
|
|
2407
|
+
if self.options.ff_opt:
|
|
2408
|
+
|
|
2409
|
+
# perform safe optimization only for embeds
|
|
2410
|
+
if len(self.objects) > 1 and self.options.ff_calc == 'XTB':
|
|
2411
|
+
# self.log(f"--> Performing {self.options.calculator} FF pre-optimization (loose convergence, molecular and pairing constraints)\n")
|
|
2412
|
+
self.force_field_refining(conv_thr="loose", prevent_scrambling=True)
|
|
2413
|
+
|
|
2414
|
+
|
|
2415
|
+
# self.log(f"--> Performing {self.options.calculator} FF optimization (loose convergence, pairing constraints, step 1/2)\n")
|
|
2416
|
+
self.force_field_refining(conv_thr="loose")
|
|
2417
|
+
|
|
2418
|
+
# self.log(f"--> Performing {self.options.calculator} FF optimization (tight convergence, fixed constraints only, step 2/2)\n")
|
|
2419
|
+
self.force_field_refining(conv_thr="tight", only_fixed_constraints=True)
|
|
2420
|
+
|
|
2421
|
+
# self.csearch_augmentation_routine()
|
|
2422
|
+
|
|
2423
|
+
if not (self.options.ff_opt and self.options.theory_level == self.options.ff_level):
|
|
2424
|
+
# If we just optimized at a (FF) level and the final
|
|
2425
|
+
# optimization level is the same, avoid repeating it
|
|
2426
|
+
|
|
2427
|
+
if self.options.calculator == "ORCA":
|
|
2428
|
+
# Perform stepwise pruning of the ensemble for more expensive theory levels
|
|
2429
|
+
|
|
2430
|
+
self.log("--> Performing ORCA optimization (3 iterations, step 1/3)\n")
|
|
2431
|
+
self.optimization_refining(maxiter=3)
|
|
2432
|
+
|
|
2433
|
+
self.log("--> Performing ORCA optimization (5 iterations, step 2/3)\n")
|
|
2434
|
+
self.optimization_refining(maxiter=5)
|
|
2435
|
+
|
|
2436
|
+
self.log("--> Performing ORCA optimization (convergence, step 3/3)\n")
|
|
2437
|
+
|
|
2438
|
+
if len(self.structures) > 500:
|
|
2439
|
+
self.optimization_refining(conv_thr='loose')
|
|
2440
|
+
# final uncompromised optimization (with fixed constraints and interactions active)
|
|
2441
|
+
|
|
2442
|
+
self.optimization_refining(conv_thr='tight', only_fixed_constraints=True)
|
|
2443
|
+
# final uncompromised optimization (with only fixed constraints active)
|
|
2444
|
+
|
|
2445
|
+
else:
|
|
2446
|
+
self.write_structures('unoptimized', energies=False)
|
|
2447
|
+
# accounting for output in "refine" runs with NOOPT
|
|
2448
|
+
|
|
2449
|
+
except ZeroCandidatesError:
|
|
2450
|
+
t_end_run = time.perf_counter()
|
|
2451
|
+
s = (' Sorry, the program did not find any reasonable embedded structure. Are you sure the input indices and pairings were correct? If so, try these tips:\n'
|
|
2452
|
+
' - If no structure passes the compenetration check, the SHRINK keyword may help (see documentation).\n'
|
|
2453
|
+
' - Similarly, enlarging the spacing between atom pairs with the DIST keyword facilitates the embed.\n'
|
|
2454
|
+
' - If no structure passes the fitness check, try adding a solvent with the SOLVENT keyword.\n'
|
|
2455
|
+
' - Impose less strict compenetration rejection criteria with the CLASHES keyword.\n'
|
|
2456
|
+
' - Generate more structures with higher STEPS and ROTRANGE values.\n'
|
|
2457
|
+
)
|
|
2458
|
+
|
|
2459
|
+
self.log(f'\n--> Program termination: No candidates found - Total time {time_to_string(t_end_run-self.t_start_run)}')
|
|
2460
|
+
self.log(s)
|
|
2461
|
+
self.close_log_streams()
|
|
2462
|
+
clean_directory()
|
|
2463
|
+
sys.exit()
|
|
2464
|
+
|
|
2465
|
+
##################### AUGMENTATION - METADYNAMICS / CSEARCH
|
|
2466
|
+
|
|
2467
|
+
if self.options.metadynamics:
|
|
2468
|
+
|
|
2469
|
+
self.metadynamics_augmentation()
|
|
2470
|
+
self.optimization_refining()
|
|
2471
|
+
self.similarity_refining()
|
|
2472
|
+
|
|
2473
|
+
##################### POST FIRECODE - SADDLE, NEB, NCI, VMD
|
|
2474
|
+
|
|
2475
|
+
# if (self.options.optimization or self.options.ff_opt) and not self.options.bypass:
|
|
2476
|
+
# self.write_vmd()
|
|
2477
|
+
|
|
2478
|
+
# if self.options.neb:
|
|
2479
|
+
# self.hyperneb_refining()
|
|
2480
|
+
|
|
2481
|
+
if self.options.saddle:
|
|
2482
|
+
self.saddle_refining()
|
|
2483
|
+
|
|
2484
|
+
if self.options.nci and self.options.optimization:
|
|
2485
|
+
self.print_nci()
|
|
2486
|
+
|
|
2487
|
+
self.log_warnings()
|
|
2488
|
+
self.normal_termination()
|
|
2489
|
+
|
|
2490
|
+
################################################ END
|
|
2491
|
+
|
|
2492
|
+
except KeyboardInterrupt:
|
|
2493
|
+
print('\n\nKeyboardInterrupt requested by user. Quitting.')
|
|
2494
|
+
sys.exit()
|
|
2495
|
+
|
|
2496
|
+
def data_termination(self):
|
|
2497
|
+
'''
|
|
2498
|
+
Type of termination for runs when there is no embedding,
|
|
2499
|
+
but some computed data are to be shown in a formatted way.
|
|
2500
|
+
'''
|
|
2501
|
+
|
|
2502
|
+
if any('pka>' in op for op in self.options.operators):
|
|
2503
|
+
self.pka_termination()
|
|
2504
|
+
|
|
2505
|
+
if len([op for op in self.options.operators if 'scan>' in op]) > 1:
|
|
2506
|
+
self.scan_termination()
|
|
2507
|
+
|
|
2508
|
+
self.normal_termination()
|
|
2509
|
+
|
|
2510
|
+
def pka_termination(self):
|
|
2511
|
+
'''
|
|
2512
|
+
Print data acquired during pKa energetics calculation
|
|
2513
|
+
for every molecule in input
|
|
2514
|
+
'''
|
|
2515
|
+
|
|
2516
|
+
self.log('\n--> pKa energetics (from best conformers)')
|
|
2517
|
+
solv = 'gas phase' if self.options.solvent is None else self.options.solvent
|
|
2518
|
+
|
|
2519
|
+
from prettytable import PrettyTable
|
|
2520
|
+
table = PrettyTable()
|
|
2521
|
+
table.field_names = ['Name', '#(Symb)', 'Process', 'Energy (kcal/mol)']
|
|
2522
|
+
|
|
2523
|
+
for mol in self.objects:
|
|
2524
|
+
if hasattr(mol, 'pka_data'):
|
|
2525
|
+
table.add_row([mol.rootname,
|
|
2526
|
+
f'{mol.reactive_indices[0]}({pt[mol.atomnos[mol.reactive_indices[0]]].symbol})',
|
|
2527
|
+
mol.pka_data[0],
|
|
2528
|
+
mol.pka_data[1]])
|
|
2529
|
+
|
|
2530
|
+
# Add pKa column if we were given a reference
|
|
2531
|
+
if hasattr(self, 'pka_ref'):
|
|
2532
|
+
|
|
2533
|
+
pkas = []
|
|
2534
|
+
for mol in self.objects:
|
|
2535
|
+
if mol.filename == self.pka_ref[0]:
|
|
2536
|
+
dG_ref = mol.pka_data[1]
|
|
2537
|
+
break
|
|
2538
|
+
|
|
2539
|
+
for mol in self.objects:
|
|
2540
|
+
process, free_energy = mol.pka_data
|
|
2541
|
+
|
|
2542
|
+
dG = free_energy - dG_ref if process == 'HA -> A-' else dG_ref - free_energy
|
|
2543
|
+
# The free energy difference has a different sign for acids or bases, since
|
|
2544
|
+
# the pKa for a base is the one of its conjugate acid, BH+
|
|
2545
|
+
|
|
2546
|
+
pka = dG / (np.log(10) * 1.9872036e-3 * 298.15) + self.pka_ref[1]
|
|
2547
|
+
pkas.append(round(pka, 3))
|
|
2548
|
+
|
|
2549
|
+
table.add_column(f'pKa ({solv}, 298.15 K)', pkas)
|
|
2550
|
+
|
|
2551
|
+
self.log(table.get_string())
|
|
2552
|
+
self.log(f'\n Level used is {self.options.theory_level} via {self.options.calculator}' +
|
|
2553
|
+
f", using the ALPB solvation model for {self.options.solvent}" if self.options.solvent is not None else "")
|
|
2554
|
+
|
|
2555
|
+
if len(self.objects) == 2:
|
|
2556
|
+
mol0, mol1 = self.objects
|
|
2557
|
+
if hasattr(mol0, 'pka_data') and hasattr(mol1, 'pka_data'):
|
|
2558
|
+
tags = (mol0.pka_data[0],
|
|
2559
|
+
mol1.pka_data[0])
|
|
2560
|
+
if 'HA -> A-' in tags and 'B -> BH+' in tags:
|
|
2561
|
+
dG = mol0.pka_data[1] + mol1.pka_data[1]
|
|
2562
|
+
self.log('\n Equilibrium data:')
|
|
2563
|
+
self.log(f'\n HA + B -> BH+ + A- K({solv}, 298.15 K) = {round(np.exp(-dG/(1.9872036e-3 * 298.15)), 3)}')
|
|
2564
|
+
self.log(f'\n dG({solv}, 298.15 K) = {round(dG, 3)} kcal/mol')
|
|
2565
|
+
|
|
2566
|
+
def scan_termination(self):
|
|
2567
|
+
'''
|
|
2568
|
+
Print the unified data and write the cumulative plot
|
|
2569
|
+
for the approach of all the molecules in input
|
|
2570
|
+
'''
|
|
2571
|
+
# import pickle
|
|
2572
|
+
|
|
2573
|
+
import matplotlib.pyplot as plt
|
|
2574
|
+
|
|
2575
|
+
plt.figure()
|
|
2576
|
+
|
|
2577
|
+
for mol in self.objects:
|
|
2578
|
+
if hasattr(mol, 'scan_data'):
|
|
2579
|
+
plt.plot(*mol.scan_data, label=mol.rootname)
|
|
2580
|
+
|
|
2581
|
+
plt.legend()
|
|
2582
|
+
plt.title('Unified scan energetics')
|
|
2583
|
+
plt.xlabel('Distance (A)')
|
|
2584
|
+
plt.gca().invert_xaxis()
|
|
2585
|
+
plt.ylabel('Rel. E. (kcal/mol)')
|
|
2586
|
+
plt.savefig(f'{self.stamp}_cumulative_plt.svg')
|
|
2587
|
+
# with open(f'{self.stamp}_cumulative_plt.pickle', 'wb') as _f:
|
|
2588
|
+
# pickle.dump(fig, _f)
|
|
2589
|
+
|
|
2590
|
+
self.log(f'\n--> Written cumulative scan plot at {self.stamp}_cumulative_plt.svg')
|