firecode 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. firecode/TEST_NOTEBOOK.ipynb +3940 -0
  2. firecode/__init__.py +0 -0
  3. firecode/__main__.py +118 -0
  4. firecode/_gaussian.py +97 -0
  5. firecode/algebra.py +405 -0
  6. firecode/ase_manipulations.py +879 -0
  7. firecode/atropisomer_module.py +516 -0
  8. firecode/automep.py +130 -0
  9. firecode/calculators/__init__.py +29 -0
  10. firecode/calculators/_gaussian.py +98 -0
  11. firecode/calculators/_mopac.py +242 -0
  12. firecode/calculators/_openbabel.py +154 -0
  13. firecode/calculators/_orca.py +129 -0
  14. firecode/calculators/_xtb.py +786 -0
  15. firecode/concurrent_test.py +119 -0
  16. firecode/embedder.py +2590 -0
  17. firecode/embedder_options.py +577 -0
  18. firecode/embeds.py +881 -0
  19. firecode/errors.py +65 -0
  20. firecode/graph_manipulations.py +333 -0
  21. firecode/hypermolecule_class.py +364 -0
  22. firecode/mep_relaxer.py +199 -0
  23. firecode/modify_settings.py +186 -0
  24. firecode/mprof.py +65 -0
  25. firecode/multiembed.py +148 -0
  26. firecode/nci.py +186 -0
  27. firecode/numba_functions.py +260 -0
  28. firecode/operators.py +776 -0
  29. firecode/optimization_methods.py +609 -0
  30. firecode/parameters.py +84 -0
  31. firecode/pka.py +275 -0
  32. firecode/profiler.py +17 -0
  33. firecode/pruning.py +421 -0
  34. firecode/pt.py +32 -0
  35. firecode/quotes.json +6651 -0
  36. firecode/quotes.py +9 -0
  37. firecode/reactive_atoms_classes.py +666 -0
  38. firecode/references.py +11 -0
  39. firecode/rmsd.py +74 -0
  40. firecode/settings.py +75 -0
  41. firecode/solvents.py +126 -0
  42. firecode/tests/C2F2H4.xyz +10 -0
  43. firecode/tests/C2H4.xyz +8 -0
  44. firecode/tests/CH3Cl.xyz +7 -0
  45. firecode/tests/HCOOH.xyz +7 -0
  46. firecode/tests/HCOOOH.xyz +8 -0
  47. firecode/tests/chelotropic.txt +3 -0
  48. firecode/tests/cyclical.txt +3 -0
  49. firecode/tests/dihedral.txt +2 -0
  50. firecode/tests/string.txt +3 -0
  51. firecode/tests/trimolecular.txt +9 -0
  52. firecode/tests.py +151 -0
  53. firecode/torsion_module.py +1035 -0
  54. firecode/utils.py +541 -0
  55. firecode-1.0.0.dist-info/LICENSE +165 -0
  56. firecode-1.0.0.dist-info/METADATA +321 -0
  57. firecode-1.0.0.dist-info/RECORD +59 -0
  58. firecode-1.0.0.dist-info/WHEEL +5 -0
  59. firecode-1.0.0.dist-info/top_level.txt +1 -0
firecode/pruning.py ADDED
@@ -0,0 +1,421 @@
1
+ # coding=utf-8
2
+ '''
3
+ FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
4
+ Copyright (C) 2021-2024 Nicolò Tampellini
5
+
6
+ SPDX-License-Identifier: LGPL-3.0-or-later
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU Lesser General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public License
19
+ along with this program. If not, see
20
+ https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
21
+
22
+ '''
23
+ import numpy as np
24
+ from networkx import connected_components
25
+ from numba import njit
26
+
27
+ from firecode.algebra import all_dists, get_inertia_moments
28
+ from firecode.pt import pt
29
+ from firecode.rmsd import rmsd_and_max_numba
30
+ from firecode.torsion_module import (_get_hydrogen_bonds, _get_torsions,
31
+ _is_nondummy,
32
+ rotationally_corrected_rmsd_and_max)
33
+ from firecode.utils import get_double_bonds_indices
34
+
35
+
36
+ class Pruner:
37
+ def __init__(self, structures, atomnos, debugfunction=None):
38
+ self.structures = structures
39
+ self.atomnos = atomnos
40
+ self.calls = 0
41
+ self.cache_calls = 0
42
+ self.debugfunction = debugfunction
43
+
44
+ self.defaults_dict = {
45
+
46
+ "rmsd_rot_corr" : (
47
+ rotationally_corrected_rmsd_and_max,
48
+
49
+ [
50
+ "atomnos",
51
+ "torsions",
52
+ "graph",
53
+ "angles",
54
+ ], # args
55
+
56
+ dict(), # kwargs
57
+
58
+ ["max_rmsd", "max_dev"], # thresholds
59
+ ),
60
+
61
+ "rmsd" : (
62
+ rmsd_and_max_numba,
63
+
64
+ [], # args
65
+
66
+ dict(), # kwargs
67
+
68
+ ["max_rmsd", "max_dev"], # thresholds
69
+ ),
70
+
71
+ "moi" : (
72
+ get_moi_deviation_vec,
73
+
74
+ ["masses"], # args
75
+
76
+ dict(), # kwargs
77
+
78
+ ["max_dev", "max_dev", "max_dev"], # thresholds
79
+ ),
80
+
81
+ }
82
+
83
+ # set the operating mode
84
+ def set_mode(self, mode):
85
+ if mode not in self.defaults_dict.keys():
86
+ raise NameError(f"pruning mode \"{mode}\" not recognized.")
87
+ self.mode = mode
88
+
89
+ self.eval_func, args_names, kwargs_names, thresholds_names = self.defaults_dict[self.mode]
90
+ self.args = [getattr(self, name) for name in args_names]
91
+ self.kwargs = {name:getattr(self, value) for name, value in kwargs_names.items()}
92
+ self.thresholds = [getattr(self, name) for name in thresholds_names]
93
+
94
+ for name, value in zip(thresholds_names, self.thresholds):
95
+ if value is None:
96
+ raise UnboundLocalError(f'Class Pruner({self.mode}) does not have a \"{name}\" attriubute.')
97
+
98
+ def _main_eval_similarity(self, coords1, coords2):
99
+ results = self.eval_func(coords1, coords2, *self.args, **self.kwargs)
100
+ for r, t in zip(results, self.thresholds):
101
+ if r > t:
102
+ return 0
103
+ return 1
104
+
105
+ def _main_compute_subrow(self, ref, structures, in_mask, first_abs_index):
106
+ '''
107
+ Returns True (as the int 1) if ref is similar to any
108
+ structure in structures, returning at the first instance of a match.
109
+ Ignores structures that are False (0) in in_mask and saves pairs
110
+ that evaluate to False (0) by returning them in computed_pairs.
111
+
112
+ '''
113
+
114
+ # iterate over target structures
115
+ for i, structure in enumerate(structures):
116
+
117
+ # only compare active structures
118
+ if in_mask[i]:
119
+
120
+ # if first_abs_index == 12: print(f'Comparing 12 with {first_abs_index+1+i}')
121
+ # check if we have performed this computation already,
122
+ # and in that case we know the structures were not similar,
123
+ # since the in_mask attribute is not False for ref nor i
124
+ hash_value = (first_abs_index, first_abs_index+1+i)
125
+ self.calls += 1
126
+ if hash_value in self.cache:
127
+ self.cache_calls += 1
128
+
129
+ # if we have not computed the value before, do it
130
+ # function will return True (1) if the structures are similar
131
+ elif self._main_eval_similarity(ref, structure):
132
+ return 1
133
+
134
+ # if structures are not similar, add the result to the
135
+ # cache, because they will potentially return here,
136
+ # while similar structures are discarded and won't come back
137
+ else:
138
+ self.cache.add(hash_value)
139
+
140
+ return 0
141
+
142
+ def _main_compute_row(self, structures, in_mask, first_abs_index):
143
+ '''
144
+ For a given set of structures, check if each is similar
145
+ to any other after itself. Returns a boolean mask to slice
146
+ the array, only retaining the structures that are dissimilar.
147
+ The inner subrow function caches computed non-similar pairs.
148
+
149
+ '''
150
+ #initialize the result container
151
+ out_mask = np.ones(shape=in_mask.shape, dtype=np.bool_)
152
+
153
+ # loop over the structures
154
+ for i, ref in enumerate(structures):
155
+
156
+ # only check for similarity if the structure is active
157
+ if in_mask[i]:
158
+
159
+ # reject structure i if it is similar to any other after itself
160
+ similar = self._main_compute_subrow(
161
+ ref,
162
+ structures[i+1:],
163
+ in_mask[i+1:],
164
+ first_abs_index=first_abs_index+i,
165
+ )
166
+ out_mask[i] = not similar
167
+
168
+ else:
169
+ out_mask[i] = 0
170
+
171
+ return out_mask
172
+
173
+ def _main_compute_group(self, structures, in_mask, k):
174
+ '''
175
+ Acts on chunks of the structures array,
176
+ returning the updated mask and the non-similar pairs computed.
177
+
178
+ '''
179
+ # initialize final result container
180
+ out_mask = np.ones(shape=structures.shape[0], dtype=np.bool_)
181
+
182
+ # calculate the size of each chunk
183
+ chunksize = int(len(structures) // k)
184
+
185
+ # iterate over chunks (multithreading here?)
186
+ for chunk in range(int(k)):
187
+ first = chunk*chunksize
188
+ if chunk == k-1:
189
+ last = len(structures)
190
+ else:
191
+ last = chunksize*(chunk+1)
192
+
193
+ # get the structure chunk
194
+ structures_chunk = structures[first:last]
195
+
196
+ # compare structures within that chunk and save results to the out_mask
197
+ out_mask[first:last] = self._main_compute_row(
198
+ structures_chunk,
199
+ in_mask[first:last],
200
+ first_abs_index=first,
201
+ )
202
+ return out_mask
203
+
204
+ def prune(self):
205
+ '''
206
+ Removes similar structures by repeatedly grouping them into k
207
+ subgroups and removing similar ones. A cache is present to avoid
208
+ repeating RMSD computations.
209
+
210
+ Similarity occurs for structures with both rmsd < rmsd_thr and
211
+ maximum absolute atomic deviation < 2 * rmsd_thr.
212
+
213
+ Returns the pruned structures and the corresponding boolean mask.
214
+
215
+ '''
216
+
217
+ if self.mode in ("rmsd_rot_corr"):
218
+ # all atoms passed, but still only the
219
+ # heavy ones are used for the RMSD calc
220
+ structures = self.structures
221
+
222
+ else:
223
+ # only feed non-hydrogen atoms to eval funcs
224
+ heavy_atoms = (self.atomnos != 1)
225
+ structures = np.array([structure[heavy_atoms] for structure in self.structures])
226
+
227
+ # initialize the output mask
228
+ out_mask = np.ones(shape=self.structures.shape[0], dtype=np.bool_)
229
+ self.cache = set()
230
+
231
+ # split the structure array in subgroups and prune them internally
232
+ for k in (5e5, 2e5, 1e5, 5e4, 2e4, 1e4,
233
+ 5000, 2000, 1000, 500, 200, 100,
234
+ 50, 20, 10, 5, 2, 1):
235
+
236
+ # choose only k values such that every subgroup
237
+ # has on average at least twenty active structures in it
238
+ if k == 1 or 20*k < np.count_nonzero(out_mask):
239
+
240
+ before = np.count_nonzero(out_mask)
241
+
242
+ # compute similarities and get back the out_mask
243
+ # and the pairings to be added to cache
244
+ out_mask = self._main_compute_group(
245
+ structures,
246
+ out_mask,
247
+ k=k,
248
+ )
249
+
250
+ after = np.count_nonzero(out_mask)
251
+ newly_discarded = before - after
252
+
253
+ if self.debugfunction is not None:
254
+ self.debugfunction(f'DEBUG: Pruner({self.mode}) - k={k}, rejected {newly_discarded} (keeping {after}/{len(out_mask)})')
255
+
256
+ del self.cache
257
+ self.mask = out_mask
258
+ self.structures = self.structures[self.mask]
259
+
260
+ def prune_by_rmsd(structures, atomnos, max_rmsd=0.25, max_dev=None, debugfunction=None):
261
+ '''
262
+ Remove duplicate (enantiomeric or rotameric) structures based on the
263
+ moments of inertia on principal axes. If all three MOI
264
+ are within max_deviation percent from another structure,
265
+ they are classified as rotamers or enantiomers and therefore only one
266
+ of them is kept.
267
+ '''
268
+
269
+ # set default max_dev if not provided
270
+ max_dev = max_dev or 2*max_rmsd
271
+
272
+ pruner = Pruner(structures, atomnos, debugfunction=debugfunction)
273
+ pruner.max_rmsd = max_rmsd
274
+ pruner.max_dev = max_dev
275
+ pruner.set_mode('rmsd')
276
+ pruner.prune()
277
+ final_mask = pruner.mask
278
+
279
+ if debugfunction is not None:
280
+ fraction = 0 if pruner.calls == 0 else pruner.cache_calls/pruner.calls
281
+ debugfunction(f"DEBUG: prune_by_rmsd - Used cached data {pruner.cache_calls}/{pruner.calls} times, {100*fraction:.2f}% of total calls")
282
+
283
+ return structures[final_mask], final_mask
284
+
285
+ def prune_by_rmsd_rot_corr(structures, atomnos, graph, max_rmsd=0.25, max_dev=None, logfunction=None, debugfunction=None):
286
+ '''
287
+ Removes similar structures by repeatedly grouping them into k
288
+ subgroups and removing similar ones. A cache is present to avoid
289
+ repeating RMSD computations.
290
+
291
+ Similarity occurs for structures with both RMSD < max_rmsd and
292
+ maximum deviation < max_dev.
293
+ '''
294
+
295
+ # center structures
296
+ structures = np.array([s - s.mean(axis=0) for s in structures])
297
+ ref = structures[0]
298
+
299
+ # get the number of molecular fragments
300
+ subgraphs = list(connected_components(graph))
301
+
302
+ # if they are more than two, give up on pruning by rot corr rmsd
303
+ if len(subgraphs) > 2:
304
+ return structures, np.ones(structures.shape[0], dtype=bool)
305
+
306
+ # if they are two, we can add a fictitious bond between the closest
307
+ # atoms on the two molecular fragment in the provided graph, and
308
+ # then removing it before returning
309
+ if len(subgraphs) == 2:
310
+ subgraphs = [list(set) for set in connected_components(graph)]
311
+ all_dists_array = all_dists(ref[list(subgraphs[0])], ref[list(subgraphs[1])])
312
+ min_d = np.min(all_dists_array)
313
+ s1, s2 = np.where(all_dists_array == min_d)
314
+ i1, i2 = subgraphs[0][s1[0]], subgraphs[1][s2[0]]
315
+ graph.add_edge(i1, i2)
316
+
317
+ if debugfunction is not None:
318
+ debugfunction(f"DEBUG: prune_by_rmsd_rot_corr - temporarily added edge {i1}-{i2} to the graph (will be removed before returning)")
319
+
320
+ # set default max_dev if not provided
321
+ max_dev = max_dev or 2*max_rmsd
322
+
323
+ # add hydrogen bonds to molecular graph
324
+ hydrogen_bonds = _get_hydrogen_bonds(ref, atomnos, graph)
325
+ for hb in hydrogen_bonds:
326
+ graph.add_edge(*hb)
327
+
328
+ # get all rotable bonds in the molecule, including dummy rotations
329
+ torsions = _get_torsions(graph,
330
+ hydrogen_bonds=_get_hydrogen_bonds(ref, atomnos, graph),
331
+ double_bonds=get_double_bonds_indices(ref, atomnos),
332
+ keepdummy=True,
333
+ mode='symmetry')
334
+
335
+ # only keep dummy rotations (checking both directions)
336
+ torsions = [t for t in torsions if not (
337
+ _is_nondummy(t.i2, t.i3, graph) and (
338
+ _is_nondummy(t.i3, t.i2, graph)))]
339
+
340
+ # since we only compute RMSD based on heavy atoms, discard quadruplets that involve hydrogen atoms
341
+ torsions = [t for t in torsions if 1 not in [atomnos[i] for i in t.torsion]]
342
+
343
+ # get torsions angles
344
+ angles = [t.get_angles() for t in torsions]
345
+
346
+ # Used specific directionality of torsions so that we always rotate the dummy portion (the one attached to the last index)
347
+ torsions = [list(t.torsion) if _is_nondummy(t.i2, t.i3, graph) else list(reversed(t.torsion)) for t in torsions]
348
+
349
+ # Set up final mask and cache
350
+ final_mask = np.ones(structures.shape[0], dtype=bool)
351
+
352
+ # Halt the run if there are too many structures or no subsymmetrical bonds
353
+ if len(torsions) == 0:
354
+ if debugfunction is not None:
355
+ debugfunction('DEBUG: prune_by_rmsd_rot_corr - No subsymmetrical torsions found: skipping symmetry-corrected RMSD pruning')
356
+
357
+ return structures[final_mask], final_mask
358
+
359
+ # Print out torsion information
360
+ if logfunction is not None:
361
+ logfunction('\n >> Dihedrals considered for subsymmetry corrections:')
362
+ for i, (torsion, angle) in enumerate(zip(torsions, angles)):
363
+ logfunction(' {:2s} - {:21s} : {}{}{}{} : {}-fold'.format(
364
+ str(i+1),
365
+ str(torsion),
366
+ pt[atomnos[torsion[0]]].symbol,
367
+ pt[atomnos[torsion[1]]].symbol,
368
+ pt[atomnos[torsion[2]]].symbol,
369
+ pt[atomnos[torsion[3]]].symbol,
370
+ len(angle)))
371
+ logfunction("\n")
372
+
373
+ pruner = Pruner(structures, atomnos, debugfunction=debugfunction)
374
+ pruner.graph = graph
375
+ pruner.torsions = torsions
376
+ pruner.angles = angles
377
+ pruner.max_rmsd = max_rmsd
378
+ pruner.max_dev = max_dev
379
+ pruner.set_mode('rmsd_rot_corr')
380
+ pruner.prune()
381
+ final_mask = pruner.mask
382
+
383
+ # remove the extra bond in the molecular graph
384
+ if len(subgraphs) == 2:
385
+ graph.remove_edge(i1, i2)
386
+
387
+ if debugfunction is not None:
388
+ fraction = 0 if pruner.calls == 0 else pruner.cache_calls/pruner.calls
389
+ debugfunction(f"DEBUG: prune_by_rmsd_rot_corr - Used cached data {pruner.cache_calls}/{pruner.calls} times, {100*fraction:.2f}% of total calls")
390
+
391
+ return structures[final_mask], final_mask
392
+
393
+ def prune_by_moment_of_inertia(structures, atomnos, max_deviation=1e-2, debugfunction=None):
394
+ '''
395
+ Remove duplicate (enantiomeric or rotameric) structures based on the
396
+ moments of inertia on principal axes. If all three MOI
397
+ are within max_deviation percent from another structure,
398
+ they are classified as rotamers or enantiomers and therefore only one
399
+ of them is kept.
400
+ '''
401
+
402
+ pruner = Pruner(structures, atomnos, debugfunction=debugfunction)
403
+ pruner.max_dev = max_deviation
404
+ pruner.masses = np.array([pt[a].mass for a in atomnos])
405
+ pruner.set_mode('moi')
406
+ pruner.prune()
407
+ mask = pruner.mask
408
+
409
+ if debugfunction is not None:
410
+ fraction = 0 if pruner.calls == 0 else pruner.cache_calls/pruner.calls
411
+ debugfunction(f"DEBUG: prune_by_moment_of_inertia - Used cached data {pruner.cache_calls}/{pruner.calls} times, {100*fraction:.2f}% of total calls")
412
+
413
+ return structures[mask], mask
414
+
415
+ @njit
416
+ def get_moi_deviation_vec(coords1, coords2, masses):
417
+
418
+ im_1 = get_inertia_moments(coords1, masses)
419
+ im_2 = get_inertia_moments(coords2, masses)
420
+
421
+ return np.abs(im_1 - im_2) / im_1
firecode/pt.py ADDED
@@ -0,0 +1,32 @@
1
+ # coding=utf-8
2
+ '''
3
+ FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
4
+ Copyright (C) 2021-2024 Nicolò Tampellini
5
+
6
+ SPDX-License-Identifier: LGPL-3.0-or-later
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU Lesser General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public License
19
+ along with this program. If not, see
20
+ https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
21
+
22
+ '''
23
+ from periodictable import core, covalent_radius, mass
24
+
25
+ for pt_n in range(5):
26
+ try:
27
+ pt = core.PeriodicTable(table=f"H={pt_n+1}")
28
+ covalent_radius.init(pt)
29
+ mass.init(pt)
30
+ except ValueError:
31
+ continue
32
+ break