firecode 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecode/TEST_NOTEBOOK.ipynb +3940 -0
- firecode/__init__.py +0 -0
- firecode/__main__.py +118 -0
- firecode/_gaussian.py +97 -0
- firecode/algebra.py +405 -0
- firecode/ase_manipulations.py +879 -0
- firecode/atropisomer_module.py +516 -0
- firecode/automep.py +130 -0
- firecode/calculators/__init__.py +29 -0
- firecode/calculators/_gaussian.py +98 -0
- firecode/calculators/_mopac.py +242 -0
- firecode/calculators/_openbabel.py +154 -0
- firecode/calculators/_orca.py +129 -0
- firecode/calculators/_xtb.py +786 -0
- firecode/concurrent_test.py +119 -0
- firecode/embedder.py +2590 -0
- firecode/embedder_options.py +577 -0
- firecode/embeds.py +881 -0
- firecode/errors.py +65 -0
- firecode/graph_manipulations.py +333 -0
- firecode/hypermolecule_class.py +364 -0
- firecode/mep_relaxer.py +199 -0
- firecode/modify_settings.py +186 -0
- firecode/mprof.py +65 -0
- firecode/multiembed.py +148 -0
- firecode/nci.py +186 -0
- firecode/numba_functions.py +260 -0
- firecode/operators.py +776 -0
- firecode/optimization_methods.py +609 -0
- firecode/parameters.py +84 -0
- firecode/pka.py +275 -0
- firecode/profiler.py +17 -0
- firecode/pruning.py +421 -0
- firecode/pt.py +32 -0
- firecode/quotes.json +6651 -0
- firecode/quotes.py +9 -0
- firecode/reactive_atoms_classes.py +666 -0
- firecode/references.py +11 -0
- firecode/rmsd.py +74 -0
- firecode/settings.py +75 -0
- firecode/solvents.py +126 -0
- firecode/tests/C2F2H4.xyz +10 -0
- firecode/tests/C2H4.xyz +8 -0
- firecode/tests/CH3Cl.xyz +7 -0
- firecode/tests/HCOOH.xyz +7 -0
- firecode/tests/HCOOOH.xyz +8 -0
- firecode/tests/chelotropic.txt +3 -0
- firecode/tests/cyclical.txt +3 -0
- firecode/tests/dihedral.txt +2 -0
- firecode/tests/string.txt +3 -0
- firecode/tests/trimolecular.txt +9 -0
- firecode/tests.py +151 -0
- firecode/torsion_module.py +1035 -0
- firecode/utils.py +541 -0
- firecode-1.0.0.dist-info/LICENSE +165 -0
- firecode-1.0.0.dist-info/METADATA +321 -0
- firecode-1.0.0.dist-info/RECORD +59 -0
- firecode-1.0.0.dist-info/WHEEL +5 -0
- firecode-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
'''
|
|
3
|
+
FIRECODE: Filtering Refiner and Embedder for Conformationally Dense Ensembles
|
|
4
|
+
Copyright (C) 2021-2024 Nicolò Tampellini
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: LGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU Lesser General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU Lesser General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU Lesser General Public License
|
|
19
|
+
along with this program. If not, see
|
|
20
|
+
https://www.gnu.org/licenses/lgpl-3.0.en.html#license-text.
|
|
21
|
+
|
|
22
|
+
'''
|
|
23
|
+
|
|
24
|
+
from networkx import Graph, connected_components
|
|
25
|
+
import numpy as np
|
|
26
|
+
from numba import njit, float32, prange
|
|
27
|
+
|
|
28
|
+
from firecode.algebra import all_dists, dihedral, norm_of
|
|
29
|
+
|
|
30
|
+
@njit
|
|
31
|
+
def torsion_comp_check(coords, torsion, mask, thresh=1.5, max_clashes=0) -> bool:
|
|
32
|
+
'''
|
|
33
|
+
coords: 3D molecule coordinates
|
|
34
|
+
mask: 1D boolean array with the mask torsion
|
|
35
|
+
thresh: threshold value for when two atoms are considered clashing
|
|
36
|
+
max_clashes: maximum number of clashes to pass a structure
|
|
37
|
+
returns True if the molecule shows less than max_clashes
|
|
38
|
+
'''
|
|
39
|
+
_, i2, i3, _ = torsion
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
antimask = ~mask
|
|
43
|
+
antimask[i2] = False
|
|
44
|
+
antimask[i3] = False
|
|
45
|
+
# making sure the i2-i3 bond is not included in the clashes
|
|
46
|
+
|
|
47
|
+
m1 = coords[mask]
|
|
48
|
+
m2 = coords[antimask]
|
|
49
|
+
# fragment identification by boolean masking
|
|
50
|
+
|
|
51
|
+
return 0 if np.count_nonzero(all_dists(m2,m1) < thresh) > max_clashes else 1
|
|
52
|
+
|
|
53
|
+
@njit
|
|
54
|
+
def count_clashes(coords):
|
|
55
|
+
'''
|
|
56
|
+
'''
|
|
57
|
+
return np.count_nonzero(
|
|
58
|
+
(all_dists(coords,coords) < 0.5) & (
|
|
59
|
+
all_dists(coords,coords) > 0)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@njit
|
|
64
|
+
def compenetration_check(coords, ids=None, thresh=1.5, max_clashes=0) -> bool:
|
|
65
|
+
'''
|
|
66
|
+
coords: 3D molecule coordinates
|
|
67
|
+
ids: 1D array with the number of atoms for each
|
|
68
|
+
molecule (contiguous fragments in array)
|
|
69
|
+
thresh: threshold value for when two atoms are considered clashing
|
|
70
|
+
max_clashes: maximum number of clashes to pass a structure
|
|
71
|
+
returns True if the molecule shows less than max_clashes
|
|
72
|
+
|
|
73
|
+
'''
|
|
74
|
+
|
|
75
|
+
if ids is None:
|
|
76
|
+
return 0 if count_clashes(coords) > max_clashes else 1
|
|
77
|
+
|
|
78
|
+
if len(ids) == 2:
|
|
79
|
+
# Bimolecular
|
|
80
|
+
|
|
81
|
+
m1 = coords[0:ids[0]]
|
|
82
|
+
m2 = coords[ids[0]:]
|
|
83
|
+
# fragment identification by length (contiguous)
|
|
84
|
+
|
|
85
|
+
return 0 if np.count_nonzero(all_dists(m2,m1) < thresh) > max_clashes else 1
|
|
86
|
+
|
|
87
|
+
# if len(ids) == 3:
|
|
88
|
+
|
|
89
|
+
clashes = 0
|
|
90
|
+
# max_clashes clashes is good, max_clashes + 1 is not
|
|
91
|
+
|
|
92
|
+
m1 = coords[0:ids[0]]
|
|
93
|
+
m2 = coords[ids[0]:ids[0]+ids[1]]
|
|
94
|
+
m3 = coords[ids[0]+ids[1]:]
|
|
95
|
+
# fragment identification by length (contiguous)
|
|
96
|
+
|
|
97
|
+
clashes += np.count_nonzero(all_dists(m2,m1) < thresh)
|
|
98
|
+
if clashes > max_clashes:
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
clashes += np.count_nonzero(all_dists(m3,m2) < thresh)
|
|
102
|
+
if clashes > max_clashes:
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
clashes += np.count_nonzero(all_dists(m1,m3) < thresh)
|
|
106
|
+
if clashes > max_clashes:
|
|
107
|
+
return 0
|
|
108
|
+
|
|
109
|
+
return 1
|
|
110
|
+
|
|
111
|
+
def scramble(array, sequence):
|
|
112
|
+
return np.array([array[s] for s in sequence])
|
|
113
|
+
|
|
114
|
+
def prune_conformers_tfd(structures, quadruplets, thresh=10, verbose=False):
|
|
115
|
+
'''
|
|
116
|
+
Removes similar structures by repeatedly grouping them into k
|
|
117
|
+
subgroups and removing similar ones. A cache is present to avoid
|
|
118
|
+
repeating TFD computations.
|
|
119
|
+
|
|
120
|
+
Similarity occurs for structures with a total angle difference
|
|
121
|
+
greater than thresh degrees
|
|
122
|
+
'''
|
|
123
|
+
|
|
124
|
+
# Get torsion fingerprints for structures
|
|
125
|
+
tf_mat = _get_tf_mat(structures, quadruplets)
|
|
126
|
+
|
|
127
|
+
cache_set = set()
|
|
128
|
+
final_mask = np.ones(structures.shape[0], dtype=bool)
|
|
129
|
+
|
|
130
|
+
for k in (5e5, 2e5, 1e5, 5e4, 2e4, 1e4,
|
|
131
|
+
5000, 2000, 1000, 500, 200, 100,
|
|
132
|
+
50, 20, 10, 5, 2, 1):
|
|
133
|
+
|
|
134
|
+
num_active_str = np.count_nonzero(final_mask)
|
|
135
|
+
|
|
136
|
+
if k == 1 or 5*k < num_active_str:
|
|
137
|
+
# proceed only of there are at least five structures per group
|
|
138
|
+
|
|
139
|
+
if verbose:
|
|
140
|
+
print(f'Working on subgroups with k={k} ({num_active_str} candidates left) {" "*10}', end='\r')
|
|
141
|
+
|
|
142
|
+
d = int(len(structures) // k)
|
|
143
|
+
|
|
144
|
+
for step in range(int(k)):
|
|
145
|
+
# operating on each of the k subdivisions of the array
|
|
146
|
+
if step == k-1:
|
|
147
|
+
_l = len(range(d*step, num_active_str))
|
|
148
|
+
else:
|
|
149
|
+
_l = len(range(d*step, int(d*(step+1))))
|
|
150
|
+
|
|
151
|
+
# similarity_mat = np.zeros((_l, _l))
|
|
152
|
+
matches = set()
|
|
153
|
+
|
|
154
|
+
for i_rel in range(_l):
|
|
155
|
+
for j_rel in range(i_rel+1,_l):
|
|
156
|
+
|
|
157
|
+
i_abs = i_rel+(d*step)
|
|
158
|
+
j_abs = j_rel+(d*step)
|
|
159
|
+
|
|
160
|
+
if (i_abs, j_abs) not in cache_set:
|
|
161
|
+
# if we have already performed the comparison,
|
|
162
|
+
# structures were not similar and we can skip them
|
|
163
|
+
|
|
164
|
+
if tfd_similarity(tf_mat[i_abs],
|
|
165
|
+
tf_mat[j_abs],
|
|
166
|
+
thresh=thresh):
|
|
167
|
+
|
|
168
|
+
# similarity_mat[i_rel,j_rel] = 1
|
|
169
|
+
matches.add((i_rel,j_rel))
|
|
170
|
+
break
|
|
171
|
+
else:
|
|
172
|
+
i_abs = i_rel+(d*step)
|
|
173
|
+
j_abs = j_rel+(d*step)
|
|
174
|
+
cache_set.add((i_abs, j_abs))
|
|
175
|
+
|
|
176
|
+
# for i_rel, j_rel in zip(*np.where(similarity_mat == False)):
|
|
177
|
+
# i_abs = i_rel+(d*step)
|
|
178
|
+
# j_abs = j_rel+(d*step)
|
|
179
|
+
# cache_set.add((i_abs, j_abs))
|
|
180
|
+
# adding indices of structures that are considered equal,
|
|
181
|
+
# so as not to repeat computing their TFD
|
|
182
|
+
# Their index accounts for their position in the initial
|
|
183
|
+
# array (absolute index)
|
|
184
|
+
|
|
185
|
+
# matches = [(i,j) for i,j in zip(*np.where(similarity_mat))]
|
|
186
|
+
g = Graph(matches)
|
|
187
|
+
|
|
188
|
+
subgraphs = [g.subgraph(c) for c in connected_components(g)]
|
|
189
|
+
groups = [tuple(graph.nodes) for graph in subgraphs]
|
|
190
|
+
|
|
191
|
+
best_of_cluster = [group[0] for group in groups]
|
|
192
|
+
# of each cluster, keep the first structure
|
|
193
|
+
|
|
194
|
+
rejects_sets = [set(a) - {b} for a, b in zip(groups, best_of_cluster)]
|
|
195
|
+
rejects = []
|
|
196
|
+
for s in rejects_sets:
|
|
197
|
+
for i in s:
|
|
198
|
+
rejects.append(i)
|
|
199
|
+
|
|
200
|
+
for i in rejects:
|
|
201
|
+
abs_index = i + d*step
|
|
202
|
+
final_mask[abs_index] = 0
|
|
203
|
+
|
|
204
|
+
return structures[final_mask], final_mask
|
|
205
|
+
|
|
206
|
+
@njit(parallel=True)
|
|
207
|
+
def _get_tf_mat(structures, quadruplets):
|
|
208
|
+
'''
|
|
209
|
+
'''
|
|
210
|
+
tf_mat = np.empty(shape=(len(structures), len(quadruplets)), dtype=float32)
|
|
211
|
+
|
|
212
|
+
for i in prange(len(structures)):
|
|
213
|
+
tf_mat[i] = get_torsion_fingerprint(structures[i], quadruplets)
|
|
214
|
+
|
|
215
|
+
return tf_mat
|
|
216
|
+
|
|
217
|
+
@njit
|
|
218
|
+
def tfd_similarity(tfp1, tfp2, thresh=10) -> bool:
|
|
219
|
+
'''
|
|
220
|
+
Return True if the two structure are similar under the torsion fingeprint criteria.
|
|
221
|
+
'''
|
|
222
|
+
|
|
223
|
+
# Compute their absolute difference
|
|
224
|
+
deltas = np.abs(tfp1 - tfp2)
|
|
225
|
+
|
|
226
|
+
# Correct for rotations over 180 deg
|
|
227
|
+
deltas = np.abs(deltas - (deltas > 180) * 360)
|
|
228
|
+
|
|
229
|
+
if np.sum(deltas) < thresh:
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
@njit
|
|
235
|
+
def get_torsion_fingerprint(coords, quadruplets):
|
|
236
|
+
out = np.zeros(quadruplets.shape[0], dtype=float32)
|
|
237
|
+
for i, q in enumerate(quadruplets):
|
|
238
|
+
i1, i2, i3, i4 = q
|
|
239
|
+
out[i] = dihedral([coords[i1],
|
|
240
|
+
coords[i2],
|
|
241
|
+
coords[i3],
|
|
242
|
+
coords[i4]])
|
|
243
|
+
return out
|
|
244
|
+
|
|
245
|
+
@njit(parallel=True)
|
|
246
|
+
def _score_embed_poses(structures, constrained_indices, constrained_distances):
|
|
247
|
+
'''
|
|
248
|
+
Returns array of scores for embedded structures.
|
|
249
|
+
The score is calculated as the sum of deltas from
|
|
250
|
+
the desired embed distances.
|
|
251
|
+
'''
|
|
252
|
+
_l = len(structures)
|
|
253
|
+
scores = np.zeros(shape=_l, dtype=float32)
|
|
254
|
+
|
|
255
|
+
for j in prange(_l):
|
|
256
|
+
for i, (i1, i2) in enumerate(constrained_indices[j]):
|
|
257
|
+
dist = norm_of(structures[j][i1] - structures[j][i2])
|
|
258
|
+
scores[j] += np.abs(dist - constrained_distances[j][i])
|
|
259
|
+
|
|
260
|
+
return scores
|