mimicpy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimicpy/__init__.py +1 -1
- mimicpy/__main__.py +726 -2
- mimicpy/_authors.py +2 -2
- mimicpy/_version.py +2 -2
- mimicpy/coords/__init__.py +1 -1
- mimicpy/coords/base.py +1 -1
- mimicpy/coords/cpmdgeo.py +1 -1
- mimicpy/coords/gro.py +1 -1
- mimicpy/coords/pdb.py +1 -1
- mimicpy/core/__init__.py +1 -1
- mimicpy/core/prepare.py +3 -3
- mimicpy/core/selector.py +1 -1
- mimicpy/force_matching/__init__.py +34 -0
- mimicpy/force_matching/bonded_forces.py +628 -0
- mimicpy/force_matching/compare_top.py +809 -0
- mimicpy/force_matching/dresp.py +435 -0
- mimicpy/force_matching/nonbonded_forces.py +32 -0
- mimicpy/force_matching/opt_ff.py +2114 -0
- mimicpy/force_matching/qm_region.py +1960 -0
- mimicpy/plugins/__main_installer__.py +76 -0
- mimicpy/{__main_vmd__.py → plugins/__main_vmd__.py} +2 -2
- mimicpy/plugins/pymol.py +56 -0
- mimicpy/plugins/vmd.tcl +78 -0
- mimicpy/scripts/__init__.py +1 -1
- mimicpy/scripts/cpmd.py +1 -1
- mimicpy/scripts/fm_input.py +265 -0
- mimicpy/scripts/fmdata.py +120 -0
- mimicpy/scripts/mdp.py +1 -1
- mimicpy/scripts/ndx.py +1 -1
- mimicpy/scripts/script.py +1 -1
- mimicpy/topology/__init__.py +1 -1
- mimicpy/topology/itp.py +603 -35
- mimicpy/topology/mpt.py +1 -1
- mimicpy/topology/top.py +254 -15
- mimicpy/topology/topol_dict.py +233 -4
- mimicpy/utils/__init__.py +1 -1
- mimicpy/utils/atomic_numbers.py +1 -1
- mimicpy/utils/constants.py +17 -3
- mimicpy/utils/elements.py +1 -1
- mimicpy/utils/errors.py +1 -1
- mimicpy/utils/file_handler.py +1 -1
- mimicpy/utils/strings.py +1 -1
- mimicpy-0.3.0.dist-info/METADATA +156 -0
- mimicpy-0.3.0.dist-info/RECORD +50 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
- mimicpy-0.3.0.dist-info/entry_points.txt +4 -0
- mimicpy-0.2.0.dist-info/METADATA +0 -86
- mimicpy-0.2.0.dist-info/RECORD +0 -38
- mimicpy-0.2.0.dist-info/entry_points.txt +0 -3
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,2114 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import logging
|
|
3
|
+
from ..utils.errors import MiMiCPyError
|
|
4
|
+
from ..utils.constants import au_to_nm, kb_au2gmx, au_kjm, nm_to_au, kb_gmx2au, kjm_au, kb_au2g96, kb_g962au
|
|
5
|
+
from .bonded_forces import *
|
|
6
|
+
from .nonbonded_forces import *
|
|
7
|
+
from ..scripts.fm_input import FMInput
|
|
8
|
+
from scipy.optimize import least_squares
|
|
9
|
+
import MDAnalysis as mda
|
|
10
|
+
import multiprocessing as mp
|
|
11
|
+
import random
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
section_atom_counts = {
|
|
15
|
+
"bonds": 2,
|
|
16
|
+
"angles": 3,
|
|
17
|
+
"dihedrals": 4,
|
|
18
|
+
"pairs": 2,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
class L2Regularizer:
|
|
22
|
+
"""
|
|
23
|
+
L2 regularization for force field parameters.
|
|
24
|
+
|
|
25
|
+
Implements the harmonic penalty function:
|
|
26
|
+
Θ(p) = α * Σ_i (p_i - p_i^0)^2 / (2 * γ_i^2)
|
|
27
|
+
|
|
28
|
+
where:
|
|
29
|
+
- p_i are the current parameters
|
|
30
|
+
- p_i^0 are the initial/reference parameters
|
|
31
|
+
- γ_i are the prior widths (controls how much parameters can deviate)
|
|
32
|
+
- α is the regularization strength
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, initial_params, prior_widths=None, alpha=0.1, param_types=None):
|
|
36
|
+
"""
|
|
37
|
+
Initialize L2 regularizer.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
initial_params (np.ndarray): Initial parameter values (p^0)
|
|
41
|
+
prior_widths (np.ndarray, optional): Prior widths (γ). If None, auto-generated
|
|
42
|
+
alpha (float): Regularization strength (α)
|
|
43
|
+
param_types (list, optional): List of parameter type strings for type-specific widths
|
|
44
|
+
"""
|
|
45
|
+
self.initial_params = np.array(initial_params)
|
|
46
|
+
self.alpha = alpha
|
|
47
|
+
self.param_types = param_types
|
|
48
|
+
|
|
49
|
+
if prior_widths is None:
|
|
50
|
+
# Auto-generate prior widths based on parameter classes
|
|
51
|
+
self.prior_widths = self._auto_generate_prior_widths(initial_params)
|
|
52
|
+
else:
|
|
53
|
+
self.prior_widths = np.array(prior_widths)
|
|
54
|
+
|
|
55
|
+
# Ensure all arrays have same length
|
|
56
|
+
assert len(self.initial_params) == len(self.prior_widths), \
|
|
57
|
+
"Initial parameters and prior widths must have same length"
|
|
58
|
+
|
|
59
|
+
def _auto_generate_prior_widths(self, params):
|
|
60
|
+
"""
|
|
61
|
+
Auto-generate prior widths based on parameter classes.
|
|
62
|
+
This is a simplified version - in practice you'd want to group by parameter type.
|
|
63
|
+
"""
|
|
64
|
+
# For now, use a simple approach: 10% of parameter value or 0.1 if parameter is 0
|
|
65
|
+
widths = np.abs(params) * 0.05
|
|
66
|
+
widths[widths == 0] = 0.05 # Default width for zero parameters
|
|
67
|
+
return widths
|
|
68
|
+
|
|
69
|
+
def set_prior_widths_by_type(self, type_widths):
|
|
70
|
+
"""
|
|
71
|
+
Set prior widths based on parameter types.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
type_widths (dict): Dictionary mapping parameter types to widths
|
|
75
|
+
Example: {'bond_length': 0.1, 'bond_force': 50.0, 'angle_value': 0.1, ...}
|
|
76
|
+
"""
|
|
77
|
+
if self.param_types is None:
|
|
78
|
+
raise ValueError("Parameter types must be set to use type-specific prior widths")
|
|
79
|
+
|
|
80
|
+
# Add safety check for array bounds
|
|
81
|
+
if len(self.param_types) != len(self.prior_widths):
|
|
82
|
+
raise MiMiCPyError(f"Parameter types length ({len(self.param_types)}) does not match prior widths length ({len(self.prior_widths)})")
|
|
83
|
+
|
|
84
|
+
for i, param_type in enumerate(self.param_types):
|
|
85
|
+
if param_type in type_widths:
|
|
86
|
+
self.prior_widths[i] = type_widths[param_type]
|
|
87
|
+
|
|
88
|
+
def set_prior_widths_from_fm_input(self, fm_input):
|
|
89
|
+
"""
|
|
90
|
+
Set prior widths based on FMInput parameters.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
fm_input: FMInput object containing regularization parameters
|
|
94
|
+
"""
|
|
95
|
+
type_widths = {}
|
|
96
|
+
|
|
97
|
+
# Map FMInput parameters to type widths
|
|
98
|
+
if fm_input.regularization_bond_length_width is not None:
|
|
99
|
+
type_widths['bond_length'] = fm_input.regularization_bond_length_width
|
|
100
|
+
if fm_input.regularization_bond_force_width is not None:
|
|
101
|
+
type_widths['bond_force'] = fm_input.regularization_bond_force_width
|
|
102
|
+
if fm_input.regularization_angle_value_width is not None:
|
|
103
|
+
type_widths['angle_value'] = fm_input.regularization_angle_value_width
|
|
104
|
+
if fm_input.regularization_angle_force_width is not None:
|
|
105
|
+
type_widths['angle_force'] = fm_input.regularization_angle_force_width
|
|
106
|
+
if fm_input.regularization_dihedral_force_width is not None:
|
|
107
|
+
type_widths['dihedral_force'] = fm_input.regularization_dihedral_force_width
|
|
108
|
+
|
|
109
|
+
if type_widths:
|
|
110
|
+
self.set_prior_widths_by_type(type_widths)
|
|
111
|
+
|
|
112
|
+
def compute_regularization_term(self, current_params):
|
|
113
|
+
"""
|
|
114
|
+
Compute the L2 regularization term.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
current_params (np.ndarray): Current parameter values
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
float: Regularization penalty
|
|
121
|
+
"""
|
|
122
|
+
diff = current_params - self.initial_params
|
|
123
|
+
penalty = np.sum(diff**2 / (2 * self.prior_widths**2))
|
|
124
|
+
return self.alpha * penalty
|
|
125
|
+
|
|
126
|
+
def compute_regularization_gradient(self, current_params):
|
|
127
|
+
"""
|
|
128
|
+
Compute the gradient of the L2 regularization term.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
current_params (np.ndarray): Current parameter values
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
np.ndarray: Gradient of regularization term
|
|
135
|
+
"""
|
|
136
|
+
diff = current_params - self.initial_params
|
|
137
|
+
gradient = diff / self.prior_widths**2
|
|
138
|
+
return self.alpha * gradient
|
|
139
|
+
|
|
140
|
+
def compute_regularization_residuals(self, current_params):
|
|
141
|
+
"""
|
|
142
|
+
Compute regularization residuals for augmented residual approach.
|
|
143
|
+
|
|
144
|
+
This method returns residuals that can be appended to the force matching
|
|
145
|
+
residuals for use with scipy.least_squares Levenberg-Marquardt method.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
current_params (np.ndarray): Current parameter values
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
np.ndarray: Regularization residuals
|
|
152
|
+
"""
|
|
153
|
+
diff = current_params - self.initial_params
|
|
154
|
+
# Scale the residuals by sqrt(alpha/2) and prior widths to match the regularization term
|
|
155
|
+
# The regularization term is: α * Σ(p_i - p_i^0)² / (2 * γ_i²)
|
|
156
|
+
# So the residuals should be: √(α/2) * (p_i - p_i^0) / γ_i
|
|
157
|
+
residuals = np.sqrt(self.alpha / 2) * diff / self.prior_widths
|
|
158
|
+
return residuals
|
|
159
|
+
|
|
160
|
+
class AdaptiveL2Regularizer:
|
|
161
|
+
"""
|
|
162
|
+
Adaptive L2 regularization that applies different regularization strengths
|
|
163
|
+
based on the energy hierarchy of interactions.
|
|
164
|
+
|
|
165
|
+
Bonds (highest energy) get stronger regularization to prevent overfitting,
|
|
166
|
+
while dihedrals (lowest energy) get weaker regularization to allow more flexibility.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
def __init__(self, initial_params, bond2params, bonds, angles, dihedrals,
|
|
170
|
+
base_alpha=0.1, energy_hierarchy_scale=10.0, param_types=None):
|
|
171
|
+
"""
|
|
172
|
+
Initialize adaptive regularizer.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
initial_params (np.ndarray): Initial parameter values
|
|
176
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
177
|
+
bonds, angles, dihedrals (list): Lists of interactions
|
|
178
|
+
base_alpha (float): Base regularization strength
|
|
179
|
+
energy_hierarchy_scale (float): Scale factor for energy hierarchy
|
|
180
|
+
param_types (list, optional): List of parameter type strings
|
|
181
|
+
"""
|
|
182
|
+
self.initial_params = np.array(initial_params)
|
|
183
|
+
self.base_alpha = base_alpha
|
|
184
|
+
self.energy_hierarchy_scale = energy_hierarchy_scale
|
|
185
|
+
self.param_types = param_types
|
|
186
|
+
|
|
187
|
+
# Create parameter masks for each interaction type
|
|
188
|
+
self.bond_params_mask = np.zeros(len(initial_params), dtype=bool)
|
|
189
|
+
self.angle_params_mask = np.zeros(len(initial_params), dtype=bool)
|
|
190
|
+
self.dihedral_params_mask = np.zeros(len(initial_params), dtype=bool)
|
|
191
|
+
|
|
192
|
+
# Identify which parameters belong to which interaction types
|
|
193
|
+
for bond in bonds:
|
|
194
|
+
if bond.get('optimize', False):
|
|
195
|
+
param_indices = bond2params.get(bond['index'], [])
|
|
196
|
+
for idx in param_indices:
|
|
197
|
+
if idx is not None:
|
|
198
|
+
self.bond_params_mask[idx] = True
|
|
199
|
+
|
|
200
|
+
for angle in angles:
|
|
201
|
+
if angle.get('optimize', False):
|
|
202
|
+
param_indices = bond2params.get(angle['index'], [])
|
|
203
|
+
for idx in param_indices:
|
|
204
|
+
if idx is not None:
|
|
205
|
+
self.angle_params_mask[idx] = True
|
|
206
|
+
|
|
207
|
+
for dihedral in dihedrals:
|
|
208
|
+
if dihedral.get('optimize', False):
|
|
209
|
+
param_indices = bond2params.get(dihedral['index'], [])
|
|
210
|
+
for idx in param_indices:
|
|
211
|
+
if idx is not None:
|
|
212
|
+
self.dihedral_params_mask[idx] = True
|
|
213
|
+
|
|
214
|
+
# Generate adaptive prior widths based on energy hierarchy
|
|
215
|
+
self.prior_widths = self._generate_adaptive_prior_widths(initial_params)
|
|
216
|
+
|
|
217
|
+
# Generate adaptive alpha values for each parameter
|
|
218
|
+
self.adaptive_alphas = self._generate_adaptive_alphas()
|
|
219
|
+
|
|
220
|
+
def _generate_adaptive_prior_widths(self, params):
|
|
221
|
+
"""Generate prior widths that respect energy hierarchy."""
|
|
222
|
+
widths = np.abs(params) * 0.1
|
|
223
|
+
widths[widths == 0] = 0.1 # Default width for zero parameters
|
|
224
|
+
|
|
225
|
+
# Apply energy hierarchy scaling
|
|
226
|
+
# Bonds: tighter regularization (smaller widths)
|
|
227
|
+
widths[self.bond_params_mask] *= 0.1
|
|
228
|
+
|
|
229
|
+
# Angles: medium regularization
|
|
230
|
+
widths[self.angle_params_mask] *= 0.5
|
|
231
|
+
|
|
232
|
+
# Dihedrals: looser regularization (larger widths)
|
|
233
|
+
widths[self.dihedral_params_mask] *= 2.0
|
|
234
|
+
|
|
235
|
+
return widths
|
|
236
|
+
|
|
237
|
+
def _generate_adaptive_alphas(self):
|
|
238
|
+
"""Generate adaptive alpha values based on energy hierarchy."""
|
|
239
|
+
alphas = np.full(len(self.initial_params), self.base_alpha)
|
|
240
|
+
|
|
241
|
+
# Bonds: stronger regularization (higher alpha)
|
|
242
|
+
alphas[self.bond_params_mask] *= self.energy_hierarchy_scale
|
|
243
|
+
|
|
244
|
+
# Angles: medium regularization
|
|
245
|
+
alphas[self.angle_params_mask] *= np.sqrt(self.energy_hierarchy_scale)
|
|
246
|
+
|
|
247
|
+
# Dihedrals: weaker regularization (lower alpha)
|
|
248
|
+
alphas[self.dihedral_params_mask] *= 1.0 / self.energy_hierarchy_scale
|
|
249
|
+
|
|
250
|
+
return alphas
|
|
251
|
+
|
|
252
|
+
def compute_regularization_term(self, current_params):
|
|
253
|
+
"""Compute adaptive regularization term."""
|
|
254
|
+
diff = current_params - self.initial_params
|
|
255
|
+
penalty = np.sum(self.adaptive_alphas * diff**2 / (2 * self.prior_widths**2))
|
|
256
|
+
return penalty
|
|
257
|
+
|
|
258
|
+
def compute_regularization_gradient(self, current_params):
|
|
259
|
+
"""Compute gradient of adaptive regularization term."""
|
|
260
|
+
diff = current_params - self.initial_params
|
|
261
|
+
gradient = self.adaptive_alphas * diff / self.prior_widths**2
|
|
262
|
+
return gradient
|
|
263
|
+
|
|
264
|
+
def compute_regularization_residuals(self, current_params):
|
|
265
|
+
"""Compute adaptive regularization residuals."""
|
|
266
|
+
diff = current_params - self.initial_params
|
|
267
|
+
# Scale residuals by sqrt(adaptive_alpha/2) and prior widths
|
|
268
|
+
residuals = np.sqrt(self.adaptive_alphas / 2) * diff / self.prior_widths
|
|
269
|
+
return residuals
|
|
270
|
+
|
|
271
|
+
class ParameterOptimizer:
|
|
272
|
+
def __init__(self, eq_mapping):
|
|
273
|
+
self.eq_mapping = eq_mapping
|
|
274
|
+
self.ff_optimize = []
|
|
275
|
+
self.bond2params = {}
|
|
276
|
+
self.param_types = [] # Track parameter types as they're added
|
|
277
|
+
self.counter = 0
|
|
278
|
+
self._selected_interactions = {
|
|
279
|
+
'bonds': set(),
|
|
280
|
+
'angles': set(),
|
|
281
|
+
'dihedrals': set()
|
|
282
|
+
}
|
|
283
|
+
# Store interactions for equivalence checking
|
|
284
|
+
self._bonds = []
|
|
285
|
+
self._angles = []
|
|
286
|
+
self._dihedrals = []
|
|
287
|
+
|
|
288
|
+
def select_interactions(self, interaction_type, indices=None, all=False):
|
|
289
|
+
"""
|
|
290
|
+
Select specific interactions for optimization.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
interaction_type (str): Type of interaction ('bonds', 'angles', or 'dihedrals')
|
|
294
|
+
indices (list, optional): List of interaction indices to select. If None, selects all.
|
|
295
|
+
all (bool, optional): If True, selects all interactions of the given type.
|
|
296
|
+
"""
|
|
297
|
+
if interaction_type not in self._selected_interactions:
|
|
298
|
+
raise ValueError(f"Invalid interaction type: {interaction_type}")
|
|
299
|
+
|
|
300
|
+
if all:
|
|
301
|
+
self._selected_interactions[interaction_type] = set()
|
|
302
|
+
elif indices is not None:
|
|
303
|
+
self._selected_interactions[interaction_type] = set(indices)
|
|
304
|
+
else:
|
|
305
|
+
self._selected_interactions[interaction_type] = set()
|
|
306
|
+
|
|
307
|
+
def is_selected(self, interaction_type, index):
|
|
308
|
+
"""Check if an interaction is selected for optimization."""
|
|
309
|
+
if not self._selected_interactions[interaction_type]:
|
|
310
|
+
return True # If no specific selections, optimize all
|
|
311
|
+
return index in self._selected_interactions[interaction_type]
|
|
312
|
+
|
|
313
|
+
def optimize_bond(self, bond, optimize_length=True, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
|
|
314
|
+
|
|
315
|
+
if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in bond['atoms']):
|
|
316
|
+
bond['optimize'] = False
|
|
317
|
+
self.bond2params[bond['index']] = [None, None]
|
|
318
|
+
return
|
|
319
|
+
if not self.is_selected('bonds', bond['index']):
|
|
320
|
+
bond['optimize'] = False
|
|
321
|
+
self.bond2params[bond['index']] = [None, None]
|
|
322
|
+
return
|
|
323
|
+
if bond['parameters'][1] == 0.0:
|
|
324
|
+
idx = [None, None]
|
|
325
|
+
self.bond2params[bond['index']] = idx
|
|
326
|
+
bond['optimize'] = False
|
|
327
|
+
return
|
|
328
|
+
bond['optimize'] = True
|
|
329
|
+
params_to_optimize = []
|
|
330
|
+
param_types = []
|
|
331
|
+
# Always maintain the order: [length, force_constant]
|
|
332
|
+
if optimize_length:
|
|
333
|
+
params_to_optimize.append(bond['parameters'][0])
|
|
334
|
+
param_types.append('bond_length')
|
|
335
|
+
else:
|
|
336
|
+
params_to_optimize.append(None)
|
|
337
|
+
param_types.append(None)
|
|
338
|
+
if optimize_force_constant:
|
|
339
|
+
params_to_optimize.append(bond['parameters'][1])
|
|
340
|
+
param_types.append('bond_force')
|
|
341
|
+
else:
|
|
342
|
+
params_to_optimize.append(None)
|
|
343
|
+
param_types.append(None)
|
|
344
|
+
self._add_parameters(bond, params_to_optimize, param_types)
|
|
345
|
+
self._bonds.append(bond)
|
|
346
|
+
|
|
347
|
+
def optimize_angle(self, angle, optimize_angle=True, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
|
|
348
|
+
if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in angle['atoms']):
|
|
349
|
+
angle['optimize'] = False
|
|
350
|
+
self.bond2params[angle['index']] = [None, None]
|
|
351
|
+
return
|
|
352
|
+
if not self.is_selected('angles', angle['index']):
|
|
353
|
+
angle['optimize'] = False
|
|
354
|
+
self.bond2params[angle['index']] = [None, None]
|
|
355
|
+
return
|
|
356
|
+
if angle['parameters'][1] == 0.0:
|
|
357
|
+
idx = [None, None]
|
|
358
|
+
self.bond2params[angle['index']] = idx
|
|
359
|
+
angle['optimize'] = False
|
|
360
|
+
return
|
|
361
|
+
angle['optimize'] = True
|
|
362
|
+
params_to_optimize = []
|
|
363
|
+
param_types = []
|
|
364
|
+
# Always maintain the order: [angle_value, force_constant]
|
|
365
|
+
if optimize_angle:
|
|
366
|
+
params_to_optimize.append(angle['parameters'][0])
|
|
367
|
+
param_types.append('angle_value')
|
|
368
|
+
else:
|
|
369
|
+
params_to_optimize.append(None)
|
|
370
|
+
param_types.append(None)
|
|
371
|
+
if optimize_force_constant:
|
|
372
|
+
params_to_optimize.append(angle['parameters'][1])
|
|
373
|
+
param_types.append('angle_force')
|
|
374
|
+
else:
|
|
375
|
+
params_to_optimize.append(None)
|
|
376
|
+
param_types.append(None)
|
|
377
|
+
self._add_parameters(angle, params_to_optimize, param_types)
|
|
378
|
+
self._angles.append(angle)
|
|
379
|
+
|
|
380
|
+
def optimize_dihedral(self, dihedral, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
|
|
381
|
+
if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in dihedral['atoms']):
|
|
382
|
+
dihedral['optimize'] = False
|
|
383
|
+
if dihedral['function'] in [1, 4, 9]:
|
|
384
|
+
self.bond2params[dihedral['index']] = [None, None, None]
|
|
385
|
+
elif dihedral['function'] == 2:
|
|
386
|
+
self.bond2params[dihedral['index']] = [None, None]
|
|
387
|
+
elif dihedral['function'] == 3:
|
|
388
|
+
self.bond2params[dihedral['index']] = [None] * 6
|
|
389
|
+
return
|
|
390
|
+
if not self.is_selected('dihedrals', dihedral['index']):
|
|
391
|
+
dihedral['optimize'] = False
|
|
392
|
+
if dihedral['function'] in [1, 4, 9]:
|
|
393
|
+
self.bond2params[dihedral['index']] = [None, None, None]
|
|
394
|
+
elif dihedral['function'] == 2:
|
|
395
|
+
self.bond2params[dihedral['index']] = [None, None]
|
|
396
|
+
elif dihedral['function'] == 3:
|
|
397
|
+
self.bond2params[dihedral['index']] = [None] * 6
|
|
398
|
+
return
|
|
399
|
+
dihedral['optimize'] = True
|
|
400
|
+
params_to_optimize = []
|
|
401
|
+
param_types = []
|
|
402
|
+
if dihedral['function'] in [1, 4, 9]: # Format 1
|
|
403
|
+
if dihedral['parameters'][1] == 0.0: # Check force constant
|
|
404
|
+
self.bond2params[dihedral['index']] = [None, None, None]
|
|
405
|
+
dihedral['optimize'] = False
|
|
406
|
+
return
|
|
407
|
+
params_to_optimize = [None, None, None] # [phi0, cp, mult]
|
|
408
|
+
param_types = [None, None, None]
|
|
409
|
+
if optimize_force_constant:
|
|
410
|
+
params_to_optimize[1] = dihedral['parameters'][1] # cp
|
|
411
|
+
param_types[1] = 'dihedral_force'
|
|
412
|
+
elif dihedral['function'] == 2: # Format 2
|
|
413
|
+
if dihedral['parameters'][1] == 0.0: # Check force constant
|
|
414
|
+
self.bond2params[dihedral['index']] = [None, None]
|
|
415
|
+
dihedral['optimize'] = False
|
|
416
|
+
return
|
|
417
|
+
params_to_optimize = [None, None] # [param1, param2]
|
|
418
|
+
param_types = [None, None]
|
|
419
|
+
if optimize_force_constant:
|
|
420
|
+
params_to_optimize[1] = dihedral['parameters'][1] # param2
|
|
421
|
+
param_types[1] = 'dihedral_force'
|
|
422
|
+
elif dihedral['function'] == 3: # Format 3 (Ryckaert-Bellemans)
|
|
423
|
+
if all(p == 0.0 for p in dihedral['parameters']):
|
|
424
|
+
self.bond2params[dihedral['index']] = [None] * 6
|
|
425
|
+
dihedral['optimize'] = False
|
|
426
|
+
return
|
|
427
|
+
params_to_optimize = [None] * 6 # [C0, C1, C2, C3, C4, C5]
|
|
428
|
+
param_types = [None] * 6
|
|
429
|
+
if optimize_force_constant:
|
|
430
|
+
for i, param in enumerate(dihedral['parameters']):
|
|
431
|
+
if param != 0.0:
|
|
432
|
+
params_to_optimize[i] = param
|
|
433
|
+
param_types[i] = 'dihedral_force'
|
|
434
|
+
self._add_parameters(dihedral, params_to_optimize, param_types)
|
|
435
|
+
self._dihedrals.append(dihedral)
|
|
436
|
+
|
|
437
|
+
def _add_parameters(self, interaction, params_to_optimize, param_types_to_add=None):
|
|
438
|
+
# Check if equivalent interaction already exists
|
|
439
|
+
for existing_idx, existing_params in self.bond2params.items():
|
|
440
|
+
if self._is_equivalent(interaction, existing_idx):
|
|
441
|
+
self.bond2params[interaction['index']] = existing_params
|
|
442
|
+
return
|
|
443
|
+
|
|
444
|
+
# Determine the expected structure based on interaction type
|
|
445
|
+
n_atoms = len(interaction['atoms'])
|
|
446
|
+
if n_atoms == 2: # bond
|
|
447
|
+
expected_length = 2
|
|
448
|
+
elif n_atoms == 3: # angle
|
|
449
|
+
expected_length = 2
|
|
450
|
+
elif n_atoms == 4: # dihedral
|
|
451
|
+
if interaction['function'] in [1, 4, 9]:
|
|
452
|
+
expected_length = 3
|
|
453
|
+
elif interaction['function'] == 2:
|
|
454
|
+
expected_length = 2
|
|
455
|
+
elif interaction['function'] == 3:
|
|
456
|
+
expected_length = 6
|
|
457
|
+
else:
|
|
458
|
+
expected_length = 3 # default
|
|
459
|
+
else:
|
|
460
|
+
expected_length = 2 # default
|
|
461
|
+
|
|
462
|
+
# Initialize param_indices with None values
|
|
463
|
+
param_indices = [None] * expected_length
|
|
464
|
+
|
|
465
|
+
# Add parameters that are being optimized
|
|
466
|
+
for i, param in enumerate(params_to_optimize):
|
|
467
|
+
if param is not None:
|
|
468
|
+
self.ff_optimize.append(param)
|
|
469
|
+
param_indices[i] = len(self.ff_optimize) - 1
|
|
470
|
+
# Add parameter type if provided
|
|
471
|
+
if param_types_to_add and i < len(param_types_to_add) and param_types_to_add[i] is not None:
|
|
472
|
+
self.param_types.append(param_types_to_add[i])
|
|
473
|
+
# If param is None, param_indices[i] remains None
|
|
474
|
+
|
|
475
|
+
self.bond2params[interaction['index']] = param_indices
|
|
476
|
+
|
|
477
|
+
def _is_equivalent(self, interaction1, interaction2_idx):
|
|
478
|
+
"""Check if two interactions are equivalent based on their atoms and function type.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
interaction1 (dict): First interaction to compare
|
|
482
|
+
interaction2_idx (int): Index of second interaction in bond2params
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
bool: True if interactions are equivalent, False otherwise
|
|
486
|
+
"""
|
|
487
|
+
# Get the second interaction from the stored interactions
|
|
488
|
+
interaction2 = None
|
|
489
|
+
for interaction_type in ['bonds', 'angles', 'dihedrals']:
|
|
490
|
+
for interaction in getattr(self, f'_{interaction_type}', []):
|
|
491
|
+
if interaction['index'] == interaction2_idx:
|
|
492
|
+
interaction2 = interaction
|
|
493
|
+
break
|
|
494
|
+
if interaction2:
|
|
495
|
+
break
|
|
496
|
+
|
|
497
|
+
if not interaction2:
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
# Check if function types match
|
|
501
|
+
if interaction1['function'] != interaction2['function']:
|
|
502
|
+
return False
|
|
503
|
+
|
|
504
|
+
# Determine interaction type based on number of atoms
|
|
505
|
+
n_atoms1 = len(interaction1['atoms'])
|
|
506
|
+
n_atoms2 = len(interaction2['atoms'])
|
|
507
|
+
|
|
508
|
+
if n_atoms1 != n_atoms2:
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
# Check equivalence based on number of atoms
|
|
512
|
+
if n_atoms1 == 2: # bonds
|
|
513
|
+
return check_bond_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
|
|
514
|
+
elif n_atoms1 == 3: # angles
|
|
515
|
+
return check_angle_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
|
|
516
|
+
elif n_atoms1 == 4: # dihedrals
|
|
517
|
+
return check_dihedral_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
|
|
518
|
+
return False
|
|
519
|
+
|
|
520
|
+
def get_optimized_parameters(self):
|
|
521
|
+
return np.array(self.ff_optimize), self.bond2params, self.param_types
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def get_configurations_optff(fmdata, tpr_file, trr_file, begin, end, step,
|
|
526
|
+
qm_region):
|
|
527
|
+
"""
|
|
528
|
+
Get configurations for force field optimization.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
fmdata (FMDataset): FMDataset object containing QM data (assumed to use CPMD IDs).
|
|
532
|
+
tpr_file (str): GROMACS TPR file.
|
|
533
|
+
trr_file (str): GROMACS TRR file.
|
|
534
|
+
begin (int): Starting frame.
|
|
535
|
+
end (int): Ending frame.
|
|
536
|
+
step (int): Step size.
|
|
537
|
+
qm_region (QMRegion): QMRegion object to get GROMACS to CPMD mapping.
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
list: List of configuration dictionaries with QM data reordered to match
|
|
541
|
+
GROMACS topology order, including MM atoms bonded to QM atoms.
|
|
542
|
+
"""
|
|
543
|
+
u = mda.Universe(tpr_file, trr_file)
|
|
544
|
+
configurations = []
|
|
545
|
+
|
|
546
|
+
# Get GROMACS to CPMD mapping (CPMD IDs are 1-based)
|
|
547
|
+
gmx_to_cpmd_map = qm_region.gmx_to_cpmd_map
|
|
548
|
+
qm_atoms_gmx_indices = qm_region.qm_atoms.index
|
|
549
|
+
|
|
550
|
+
# Get the list of CPMD IDs from FMdata for the first configuration (assuming it's constant)
|
|
551
|
+
# These are assumed to be 1-based CPMD IDs.
|
|
552
|
+
cpmd_ids_fmdata_order = fmdata.get_configuration_properties(begin, 'id', 'qm')
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# Create mapping from FMdata's CPMD-based order to the desired GROMACS topology order
|
|
556
|
+
# fm_to_top_order[i] will be the index in cpmd_ids_fmdata_order (and thus in fm_coords/fm_forces)
|
|
557
|
+
# that corresponds to the i-th GROMACS atom in qm_atoms_gmx_indices.
|
|
558
|
+
fm_to_top_order = np.zeros(len(qm_atoms_gmx_indices), dtype=int)
|
|
559
|
+
for i, gmx_idx in enumerate(qm_atoms_gmx_indices):
|
|
560
|
+
target_cpmd_id = gmx_to_cpmd_map.get(gmx_idx)
|
|
561
|
+
if target_cpmd_id is None:
|
|
562
|
+
raise ValueError(f"GROMACS atom {gmx_idx} not found in gmx_to_cpmd_map.")
|
|
563
|
+
|
|
564
|
+
fm_idx_arr = np.where(cpmd_ids_fmdata_order == target_cpmd_id)[0]
|
|
565
|
+
if len(fm_idx_arr) > 0:
|
|
566
|
+
fm_to_top_order[i] = fm_idx_arr[0]
|
|
567
|
+
else:
|
|
568
|
+
raise ValueError(
|
|
569
|
+
f"CPMD ID {target_cpmd_id} (for GMX atom {gmx_idx}) "
|
|
570
|
+
f"not found in FMdata's list of CPMD IDs: {cpmd_ids_fmdata_order}"
|
|
571
|
+
)
|
|
572
|
+
if not qm_region.boundary_atoms.empty:
|
|
573
|
+
gmx_ids_mm_order = fmdata.get_configuration_properties(begin, 'id', 'mm')
|
|
574
|
+
# Create mapping from GROMACS IDs to indices in MM coordinates
|
|
575
|
+
gmx_to_mm_idx = {gmx_id: idx for idx, gmx_id in enumerate(gmx_ids_mm_order)}
|
|
576
|
+
for idx in range(begin, end, step):
|
|
577
|
+
config = dict()
|
|
578
|
+
|
|
579
|
+
# Get QM data from FMdata
|
|
580
|
+
fm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'qm')
|
|
581
|
+
fm_forces = fmdata.get_configuration_properties(idx, 'force', 'qm')
|
|
582
|
+
|
|
583
|
+
# Reorder coordinates and forces to match GROMACS topology order
|
|
584
|
+
config['qm_coordinates'] = fm_coords[fm_to_top_order]
|
|
585
|
+
config['qm_forces'] = fm_forces[fm_to_top_order]
|
|
586
|
+
|
|
587
|
+
# Get MM coordinates from FMData
|
|
588
|
+
fm_mm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'mm')
|
|
589
|
+
|
|
590
|
+
# Get extended QM atoms DataFrame
|
|
591
|
+
extended_df = qm_region.extended_qm_atoms
|
|
592
|
+
|
|
593
|
+
# Step 1: Get QM atom coordinates (already have them in config['qm_coordinates'])
|
|
594
|
+
qm_coords = config['qm_coordinates']
|
|
595
|
+
|
|
596
|
+
# Step 2: Get MM atom coordinates
|
|
597
|
+
mm_atoms = extended_df[extended_df['is_qm'] == 0] # MM atoms
|
|
598
|
+
mm_coords = np.zeros((len(mm_atoms), 3))
|
|
599
|
+
|
|
600
|
+
for mm_idx, (gmx_idx, _) in enumerate(mm_atoms.iterrows()):
|
|
601
|
+
if gmx_idx in gmx_to_mm_idx:
|
|
602
|
+
fm_mm_idx = gmx_to_mm_idx[gmx_idx]
|
|
603
|
+
mm_coords[mm_idx] = fm_mm_coords[fm_mm_idx]
|
|
604
|
+
else:
|
|
605
|
+
logging.warning(f'Could not find MM atom {gmx_idx} in FMData MM coordinates')
|
|
606
|
+
|
|
607
|
+
# Step 3: Concatenate QM and MM coordinates to create extended coordinates
|
|
608
|
+
extended_coords = np.vstack([qm_coords, mm_coords])
|
|
609
|
+
|
|
610
|
+
# Get GROMACS forces for extended QM atom set
|
|
611
|
+
forces, positions = get_qm_gmx_forces(u, idx, qm_atoms_gmx_indices)
|
|
612
|
+
|
|
613
|
+
config['qm_gmx_forces'] = forces
|
|
614
|
+
config['qm_coordinates'] = extended_coords
|
|
615
|
+
|
|
616
|
+
configurations.append(config)
|
|
617
|
+
else:
|
|
618
|
+
for idx in range(begin, end, step):
|
|
619
|
+
config = dict()
|
|
620
|
+
|
|
621
|
+
# Get QM data from FMdata
|
|
622
|
+
fm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'qm')
|
|
623
|
+
fm_forces = fmdata.get_configuration_properties(idx, 'force', 'qm')
|
|
624
|
+
|
|
625
|
+
# Reorder coordinates and forces to match GROMACS topology order
|
|
626
|
+
config['qm_coordinates'] = fm_coords[fm_to_top_order]
|
|
627
|
+
config['qm_forces'] = fm_forces[fm_to_top_order]
|
|
628
|
+
|
|
629
|
+
# Get GROMACS forces and positions (already in GROMACS topology order via qm_atoms_gmx_indices)
|
|
630
|
+
forces, positions = get_qm_gmx_forces(u, idx, qm_atoms_gmx_indices)
|
|
631
|
+
config['qm_gmx_forces'] = forces
|
|
632
|
+
config['qm_gmx_coordinates'] = positions
|
|
633
|
+
|
|
634
|
+
configurations.append(config)
|
|
635
|
+
return configurations
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def compute_residue(ff_optimize,
|
|
639
|
+
qmmm_forces,
|
|
640
|
+
gmx_force, qm_coordinates,
|
|
641
|
+
bonds,
|
|
642
|
+
angles,
|
|
643
|
+
dihedrals,
|
|
644
|
+
bond2params):
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
# Compute bonded forces for the extended set of atoms
|
|
648
|
+
# Pass qm_atoms_count to get only QM atom forces
|
|
649
|
+
qm_bonded_forces = compute_bonded_forces(ff_optimize, qm_coordinates,
|
|
650
|
+
bonds, angles, dihedrals, bond2params,
|
|
651
|
+
qm_atoms_count=len(qmmm_forces))
|
|
652
|
+
|
|
653
|
+
reference_bonded_force = qmmm_forces - gmx_force
|
|
654
|
+
residue = qm_bonded_forces - reference_bonded_force
|
|
655
|
+
|
|
656
|
+
return residue, reference_bonded_force
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _process_single_configuration(args):
|
|
660
|
+
"""
|
|
661
|
+
Process a single configuration for parallel computation.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
args: Tuple containing (ff_optimize, config, bonds, angles, dihedrals, bond2params)
|
|
665
|
+
|
|
666
|
+
Returns:
|
|
667
|
+
tuple: (residual, ref_bonded_force)
|
|
668
|
+
"""
|
|
669
|
+
ff_optimize, config, bonds, angles, dihedrals, bond2params = args
|
|
670
|
+
|
|
671
|
+
qmmm_forces = config['qm_forces']
|
|
672
|
+
residual, ref_bonded = compute_residue(ff_optimize,
|
|
673
|
+
qmmm_forces,
|
|
674
|
+
config["qm_gmx_forces"],
|
|
675
|
+
config["qm_coordinates"],
|
|
676
|
+
bonds,
|
|
677
|
+
angles,
|
|
678
|
+
dihedrals,
|
|
679
|
+
bond2params)
|
|
680
|
+
|
|
681
|
+
return residual, ref_bonded
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def compute_ff_obj(ff_optimize, configurations,
|
|
685
|
+
bonds, angles, dihedrals, bond2params,
|
|
686
|
+
n_processes=None):
|
|
687
|
+
"""
|
|
688
|
+
Compute force field objective function with optional parallelization.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
ff_optimize (numpy.ndarray): Force field parameters to optimize
|
|
692
|
+
configurations (list): List of configuration dictionaries
|
|
693
|
+
bonds, angles, dihedrals (list): Interaction lists
|
|
694
|
+
bond2params (dict): Parameter mapping
|
|
695
|
+
n_processes (int, optional): Number of processes to use. If None, uses serial processing
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
tuple: (total_residual, ref_bonded_forces)
|
|
699
|
+
"""
|
|
700
|
+
if type(configurations) != list:
|
|
701
|
+
configurations = [configurations]
|
|
702
|
+
|
|
703
|
+
# Use parallel processing if requested and beneficial
|
|
704
|
+
if n_processes is not None and n_processes > 1 and len(configurations) > 1:
|
|
705
|
+
return _compute_ff_obj_parallel(ff_optimize, configurations,
|
|
706
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
707
|
+
else:
|
|
708
|
+
return _compute_ff_obj_serial(ff_optimize, configurations,
|
|
709
|
+
bonds, angles, dihedrals, bond2params)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _compute_ff_obj_serial(ff_optimize, configurations,
|
|
713
|
+
bonds, angles, dihedrals, bond2params):
|
|
714
|
+
"""Serial version of compute_ff_obj (original implementation)."""
|
|
715
|
+
total_residual = []
|
|
716
|
+
ref_bonded_forces = []
|
|
717
|
+
for config in configurations:
|
|
718
|
+
qmmm_forces = config['qm_forces']
|
|
719
|
+
|
|
720
|
+
residual, ref_bonded = compute_residue(ff_optimize,
|
|
721
|
+
qmmm_forces,
|
|
722
|
+
config["qm_gmx_forces"],
|
|
723
|
+
config["qm_coordinates"],
|
|
724
|
+
bonds,
|
|
725
|
+
angles,
|
|
726
|
+
dihedrals,
|
|
727
|
+
bond2params)
|
|
728
|
+
|
|
729
|
+
ref_bonded_forces.append(ref_bonded)
|
|
730
|
+
total_residual.append(residual)
|
|
731
|
+
return np.vstack(total_residual), np.vstack(ref_bonded_forces)
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def _compute_ff_obj_parallel(ff_optimize, configurations,
|
|
735
|
+
bonds, angles, dihedrals, bond2params, n_processes):
|
|
736
|
+
"""Parallel version of compute_ff_obj using multiprocessing."""
|
|
737
|
+
# Prepare arguments for parallel processing
|
|
738
|
+
args_list = [(ff_optimize, config, bonds, angles, dihedrals, bond2params)
|
|
739
|
+
for config in configurations]
|
|
740
|
+
|
|
741
|
+
# Use multiprocessing pool
|
|
742
|
+
with mp.Pool(processes=n_processes) as pool:
|
|
743
|
+
results = pool.map(_process_single_configuration, args_list)
|
|
744
|
+
|
|
745
|
+
# Unpack results
|
|
746
|
+
total_residual = [result[0] for result in results]
|
|
747
|
+
ref_bonded_forces = [result[1] for result in results]
|
|
748
|
+
|
|
749
|
+
return np.vstack(total_residual), np.vstack(ref_bonded_forces)
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def check_bond_equivalence(bond1, bond2, eq_mapping):
|
|
753
|
+
eq = False
|
|
754
|
+
a1 = eq_mapping.get(bond1[0])
|
|
755
|
+
b1 = eq_mapping.get(bond1[1])
|
|
756
|
+
a2 = eq_mapping.get(bond2[0])
|
|
757
|
+
b2 = eq_mapping.get(bond2[1])
|
|
758
|
+
|
|
759
|
+
if a1 is not None and b1 is not None and a2 is not None and b2 is not None:
|
|
760
|
+
if (a1 == a2 and b1 == b2) or (a1 == b2 and b1 == a2):
|
|
761
|
+
eq = True
|
|
762
|
+
return eq
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def check_angle_equivalence(bond1, bond2, eq_mapping):
|
|
766
|
+
eq = False
|
|
767
|
+
a1 = eq_mapping.get(bond1[0])
|
|
768
|
+
b1 = eq_mapping.get(bond1[1])
|
|
769
|
+
c1 = eq_mapping.get(bond1[2])
|
|
770
|
+
a2 = eq_mapping.get(bond2[0])
|
|
771
|
+
b2 = eq_mapping.get(bond2[1])
|
|
772
|
+
c2 = eq_mapping.get(bond2[2])
|
|
773
|
+
|
|
774
|
+
if b1 == b2:
|
|
775
|
+
if (a1 == a2 and c1 == c2) or (a1 == c2 and c1 == a2):
|
|
776
|
+
eq = True
|
|
777
|
+
return eq
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
def check_dihedral_equivalence(bond1, bond2, eq_mapping):
|
|
781
|
+
eq = False
|
|
782
|
+
a1 = eq_mapping.get(bond1[0])
|
|
783
|
+
b1 = eq_mapping.get(bond1[1])
|
|
784
|
+
c1 = eq_mapping.get(bond1[2])
|
|
785
|
+
d1 = eq_mapping.get(bond1[3])
|
|
786
|
+
a2 = eq_mapping.get(bond2[0])
|
|
787
|
+
b2 = eq_mapping.get(bond2[1])
|
|
788
|
+
c2 = eq_mapping.get(bond2[2])
|
|
789
|
+
d2 = eq_mapping.get(bond2[3])
|
|
790
|
+
|
|
791
|
+
if ((a1 == a2 and b1 == b2 and c1 == c2 and d1 == d2) or
|
|
792
|
+
(a1 == d2 and b1 == c2 and c1 == b2 and d1 == a2)):
|
|
793
|
+
eq = True
|
|
794
|
+
|
|
795
|
+
return eq
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def get_optimize_ff_parameters(qm_region, eq_mapping, fm_input=None):
|
|
799
|
+
"""
|
|
800
|
+
Get optimized force field parameters with fine-grained control over which interactions to optimize.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
804
|
+
eq_mapping (dict): Mapping of equivalent atoms
|
|
805
|
+
fm_input (FMInput, optional): FMInput object containing all optimization and regularization parameters
|
|
806
|
+
|
|
807
|
+
Returns:
|
|
808
|
+
tuple: (ff_optimize, bond2params, regularizer) where ff_optimize is array of parameters to optimize,
|
|
809
|
+
bond2params maps interaction indices to parameter indices, and regularizer is the L2Regularizer object
|
|
810
|
+
"""
|
|
811
|
+
# Get interactions from QMRegion
|
|
812
|
+
if qm_region.qm_interactions is None:
|
|
813
|
+
qm_interactions = qm_region.extract_qm_interactions()
|
|
814
|
+
else:
|
|
815
|
+
qm_interactions = qm_region.qm_interactions
|
|
816
|
+
bonds = qm_interactions['bonds']
|
|
817
|
+
angles = qm_interactions['angles']
|
|
818
|
+
dihedrals = qm_interactions['dihedrals']
|
|
819
|
+
|
|
820
|
+
# atoms indices are 0-based in the QMRegion
|
|
821
|
+
hydrogen_indices = set(qm_region.qm_atoms[qm_region.qm_atoms['element'].str.startswith('H')].index - 1)
|
|
822
|
+
optimizer = ParameterOptimizer(eq_mapping)
|
|
823
|
+
|
|
824
|
+
# Optional: skip optimization for solvent molecules
|
|
825
|
+
skip_solvent_optimization = getattr(fm_input, 'skip_solvent_optimization', True)
|
|
826
|
+
|
|
827
|
+
# Process bonds
|
|
828
|
+
for bond in bonds:
|
|
829
|
+
if skip_solvent_optimization and qm_region.is_solvent_interaction(bond):
|
|
830
|
+
bond['optimize'] = False
|
|
831
|
+
optimizer.bond2params[bond['index']] = [None, None]
|
|
832
|
+
continue
|
|
833
|
+
# Skip optimization if bond involves boundary atoms (already marked in extract_qm_interactions)
|
|
834
|
+
if bond.get('involves_boundary', False):
|
|
835
|
+
bond['optimize'] = False
|
|
836
|
+
optimizer.bond2params[bond['index']] = [None, None]
|
|
837
|
+
continue
|
|
838
|
+
|
|
839
|
+
optimizer.optimize_bond(
|
|
840
|
+
bond,
|
|
841
|
+
fm_input.optimize_bond_length,
|
|
842
|
+
fm_input.optimize_bond_force,
|
|
843
|
+
hydrogen_indices=hydrogen_indices,
|
|
844
|
+
exclude_hydrogen=fm_input.exclude_hydrogen_bonds
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# Process angles
|
|
848
|
+
for angle in angles:
|
|
849
|
+
if skip_solvent_optimization and qm_region.is_solvent_interaction(angle):
|
|
850
|
+
angle['optimize'] = False
|
|
851
|
+
optimizer.bond2params[angle['index']] = [None, None]
|
|
852
|
+
continue
|
|
853
|
+
# Skip optimization if angle involves boundary atoms (already marked in extract_qm_interactions)
|
|
854
|
+
if angle.get('involves_boundary', False):
|
|
855
|
+
angle['optimize'] = False
|
|
856
|
+
optimizer.bond2params[angle['index']] = [None, None]
|
|
857
|
+
continue
|
|
858
|
+
|
|
859
|
+
optimizer.optimize_angle(
|
|
860
|
+
angle,
|
|
861
|
+
fm_input.optimize_angle_value,
|
|
862
|
+
fm_input.optimize_angle_force,
|
|
863
|
+
hydrogen_indices=hydrogen_indices,
|
|
864
|
+
exclude_hydrogen=fm_input.exclude_hydrogen_angles
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
# Process dihedrals
|
|
868
|
+
for dihedral in dihedrals:
|
|
869
|
+
if skip_solvent_optimization and qm_region.is_solvent_interaction(dihedral):
|
|
870
|
+
dihedral['optimize'] = False
|
|
871
|
+
if dihedral['function'] in [1, 4, 9]:
|
|
872
|
+
optimizer.bond2params[dihedral['index']] = [None, None, None]
|
|
873
|
+
elif dihedral['function'] == 2:
|
|
874
|
+
optimizer.bond2params[dihedral['index']] = [None, None]
|
|
875
|
+
elif dihedral['function'] == 3:
|
|
876
|
+
optimizer.bond2params[dihedral['index']] = [None] * 6
|
|
877
|
+
continue
|
|
878
|
+
# Skip optimization if dihedral involves boundary atoms (already marked in extract_qm_interactions)
|
|
879
|
+
if dihedral.get('involves_boundary', False):
|
|
880
|
+
dihedral['optimize'] = False
|
|
881
|
+
if dihedral['function'] in [1, 4, 9]:
|
|
882
|
+
optimizer.bond2params[dihedral['index']] = [None, None, None]
|
|
883
|
+
elif dihedral['function'] == 2:
|
|
884
|
+
optimizer.bond2params[dihedral['index']] = [None, None]
|
|
885
|
+
elif dihedral['function'] == 3:
|
|
886
|
+
optimizer.bond2params[dihedral['index']] = [None] * 6
|
|
887
|
+
continue
|
|
888
|
+
|
|
889
|
+
optimizer.optimize_dihedral(
|
|
890
|
+
dihedral,
|
|
891
|
+
fm_input.optimize_dihedral_force,
|
|
892
|
+
hydrogen_indices=hydrogen_indices,
|
|
893
|
+
exclude_hydrogen=fm_input.exclude_hydrogen_dihedrals
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
ff_optimize, bond2params, param_types = optimizer.get_optimized_parameters()
|
|
897
|
+
|
|
898
|
+
# Setup regularization if parameters are being optimized
|
|
899
|
+
if len(ff_optimize) > 0:
|
|
900
|
+
# Parameter types are now generated during optimization in the ParameterOptimizer
|
|
901
|
+
# This ensures consistency between parameter optimization and type assignment
|
|
902
|
+
logging.debug(f"Generated {len(param_types)} parameter types for {len(ff_optimize)} parameters")
|
|
903
|
+
if len(param_types) <= 20: # Only log if not too many
|
|
904
|
+
logging.debug(f"Parameter types: {param_types}")
|
|
905
|
+
else:
|
|
906
|
+
logging.debug(f"Parameter types (first 10): {param_types[:10]}...")
|
|
907
|
+
|
|
908
|
+
# Log parameter type distribution
|
|
909
|
+
type_counts = {}
|
|
910
|
+
for pt in param_types:
|
|
911
|
+
type_counts[pt] = type_counts.get(pt, 0) + 1
|
|
912
|
+
logging.debug(f"Parameter type distribution: {type_counts}")
|
|
913
|
+
|
|
914
|
+
if fm_input.regularization:
|
|
915
|
+
# Create regularizer with parameter types
|
|
916
|
+
regularizer = L2Regularizer(ff_optimize, None, fm_input.regularization_alpha, param_types)
|
|
917
|
+
|
|
918
|
+
# Set type-specific prior widths from FMInput
|
|
919
|
+
regularizer.set_prior_widths_from_fm_input(fm_input)
|
|
920
|
+
else:
|
|
921
|
+
regularizer = None
|
|
922
|
+
|
|
923
|
+
else:
|
|
924
|
+
# No parameters to optimize
|
|
925
|
+
raise MiMiCPyError("No parameters to optimize")
|
|
926
|
+
|
|
927
|
+
return ff_optimize, bond2params, regularizer
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
def compute_obj_lm(ff_optimize,
|
|
931
|
+
configurations,
|
|
932
|
+
bonds, angles, dihedrals, bond2params,
|
|
933
|
+
*args):
|
|
934
|
+
|
|
935
|
+
# Extract parallelization settings from args
|
|
936
|
+
n_processes = None
|
|
937
|
+
for arg in args:
|
|
938
|
+
if isinstance(arg, dict) and 'n_processes' in arg:
|
|
939
|
+
n_processes = arg.get('n_processes')
|
|
940
|
+
break
|
|
941
|
+
|
|
942
|
+
residue, ref_bonded = compute_ff_obj(ff_optimize, configurations,
|
|
943
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
944
|
+
|
|
945
|
+
# Extract filename and regularization from args
|
|
946
|
+
filename = args[0] if len(args) > 0 else "output.txt"
|
|
947
|
+
regularization = None
|
|
948
|
+
|
|
949
|
+
# Look for regularization object in args
|
|
950
|
+
for arg in args:
|
|
951
|
+
if hasattr(arg, 'compute_regularization_residuals'):
|
|
952
|
+
regularization = arg
|
|
953
|
+
break
|
|
954
|
+
|
|
955
|
+
# For Levenberg-Marquardt, we need to augment the residual vector with regularization
|
|
956
|
+
if regularization is not None:
|
|
957
|
+
# Compute regularization residuals
|
|
958
|
+
reg_residuals = regularization.compute_regularization_residuals(ff_optimize)
|
|
959
|
+
# Augment the residual vector
|
|
960
|
+
augmented_residuals = np.concatenate([residue.ravel(), reg_residuals])
|
|
961
|
+
else:
|
|
962
|
+
augmented_residuals = residue.ravel()
|
|
963
|
+
|
|
964
|
+
# Compute objective value for logging
|
|
965
|
+
sum_residual = np.sum(residue**2)
|
|
966
|
+
if regularization is not None:
|
|
967
|
+
reg_term = regularization.compute_regularization_term(ff_optimize)
|
|
968
|
+
sum_residual += reg_term
|
|
969
|
+
|
|
970
|
+
sdf = np.sqrt(sum_residual/ np.sum(ref_bonded**2))
|
|
971
|
+
abs_val = np.sqrt(sum_residual/ np.size(residue))
|
|
972
|
+
|
|
973
|
+
with open(filename, '+a' ) as file:
|
|
974
|
+
file.write(f"{sum_residual} {sdf} {abs_val} ")
|
|
975
|
+
file.write(' '.join([str(x) for x in ff_optimize]))
|
|
976
|
+
file.write('\n')
|
|
977
|
+
|
|
978
|
+
return augmented_residuals
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
def jacobian_ff_obj(ff_optimize,
|
|
982
|
+
configurations,
|
|
983
|
+
bonds, angles, dihedrals,
|
|
984
|
+
bond2params, *args):
|
|
985
|
+
|
|
986
|
+
if type(configurations) != list:
|
|
987
|
+
configurations = [configurations]
|
|
988
|
+
|
|
989
|
+
# Extract parallelization parameters from args if available
|
|
990
|
+
n_processes = None
|
|
991
|
+
|
|
992
|
+
# Look for parallelization settings in args
|
|
993
|
+
for arg in args:
|
|
994
|
+
if isinstance(arg, dict) and 'n_processes' in arg:
|
|
995
|
+
n_processes = arg.get('n_processes')
|
|
996
|
+
break
|
|
997
|
+
|
|
998
|
+
# Use parallel processing if requested and beneficial
|
|
999
|
+
if n_processes is not None and n_processes > 1 and len(configurations) > 1:
|
|
1000
|
+
total_jacobian = _compute_jacobian_parallel(ff_optimize, configurations,
|
|
1001
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
1002
|
+
else:
|
|
1003
|
+
total_jacobian = _compute_jacobian_serial(ff_optimize, configurations,
|
|
1004
|
+
bonds, angles, dihedrals, bond2params)
|
|
1005
|
+
|
|
1006
|
+
jac = np.vstack(total_jacobian)
|
|
1007
|
+
|
|
1008
|
+
# Add regularization Jacobian if provided
|
|
1009
|
+
regularization = None
|
|
1010
|
+
if len(args) > 0:
|
|
1011
|
+
# Look for regularization object in args
|
|
1012
|
+
for arg in args:
|
|
1013
|
+
if hasattr(arg, 'compute_regularization_residuals'):
|
|
1014
|
+
regularization = arg
|
|
1015
|
+
break
|
|
1016
|
+
|
|
1017
|
+
if regularization is not None:
|
|
1018
|
+
# For L2 regularization, the Jacobian is a diagonal matrix
|
|
1019
|
+
n_params = len(ff_optimize)
|
|
1020
|
+
reg_jacobian = np.zeros((n_params, n_params))
|
|
1021
|
+
np.fill_diagonal(reg_jacobian, np.sqrt(regularization.alpha / 2) / regularization.prior_widths)
|
|
1022
|
+
|
|
1023
|
+
# Augment the Jacobian matrix
|
|
1024
|
+
augmented_jacobian = np.vstack([jac, reg_jacobian])
|
|
1025
|
+
return augmented_jacobian
|
|
1026
|
+
|
|
1027
|
+
return jac
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def _compute_jacobian_serial(ff_optimize, configurations,
|
|
1031
|
+
bonds, angles, dihedrals, bond2params):
|
|
1032
|
+
"""Serial version of Jacobian computation (original implementation)."""
|
|
1033
|
+
total_jacobian = []
|
|
1034
|
+
for config in configurations:
|
|
1035
|
+
# Get QM atoms count from the forces array length
|
|
1036
|
+
qm_atoms_count = len(config['qm_forces'])
|
|
1037
|
+
jac = jacobian_ff(ff_optimize,
|
|
1038
|
+
config["qm_coordinates"],
|
|
1039
|
+
bond2params,
|
|
1040
|
+
bonds, angles, dihedrals,
|
|
1041
|
+
qm_atoms_count=qm_atoms_count)
|
|
1042
|
+
total_jacobian.append(jac)
|
|
1043
|
+
return total_jacobian
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
def _compute_jacobian_parallel(ff_optimize, configurations,
|
|
1047
|
+
bonds, angles, dihedrals, bond2params, n_processes):
|
|
1048
|
+
"""Parallel version of Jacobian computation using multiprocessing."""
|
|
1049
|
+
# Prepare arguments for parallel processing
|
|
1050
|
+
args_list = [(ff_optimize, config, bonds, angles, dihedrals, bond2params)
|
|
1051
|
+
for config in configurations]
|
|
1052
|
+
|
|
1053
|
+
with mp.Pool(processes=n_processes) as pool:
|
|
1054
|
+
results = pool.map(_process_single_jacobian, args_list)
|
|
1055
|
+
|
|
1056
|
+
return results
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename, regularizer=None,
|
|
1060
|
+
n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1061
|
+
"""
|
|
1062
|
+
Optimize force field parameters using least squares minimization with optional L2 regularization.
|
|
1063
|
+
|
|
1064
|
+
Args:
|
|
1065
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1066
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1067
|
+
configurations (list): List of configuration dictionaries
|
|
1068
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1069
|
+
filename (str): Output file to write optimization results
|
|
1070
|
+
regularizer (L2Regularizer, optional): L2 regularization object
|
|
1071
|
+
n_processes (int, optional): Number of processes to use for parallelization
|
|
1072
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1073
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1074
|
+
|
|
1075
|
+
Returns:
|
|
1076
|
+
scipy.optimize.OptimizeResult: Result of the optimization
|
|
1077
|
+
"""
|
|
1078
|
+
# Get interactions from QMRegion
|
|
1079
|
+
qm_interactions = qm_region.qm_interactions
|
|
1080
|
+
bonds = qm_interactions['bonds']
|
|
1081
|
+
angles = qm_interactions['angles']
|
|
1082
|
+
dihedrals = qm_interactions['dihedrals']
|
|
1083
|
+
|
|
1084
|
+
# Split configurations into training and validation sets
|
|
1085
|
+
train_configurations, val_configurations = split_configurations_train_val(
|
|
1086
|
+
configurations, validation_fraction, random_seed
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
# Create parallelization settings
|
|
1090
|
+
parallel_settings = {'n_processes': n_processes}
|
|
1091
|
+
|
|
1092
|
+
lower_bound = np.zeros(ff_optimize.shape)
|
|
1093
|
+
upper_bound = np.inf*np.ones(ff_optimize.shape)
|
|
1094
|
+
bounds = (lower_bound, upper_bound)
|
|
1095
|
+
|
|
1096
|
+
# Use Jacobian if regularization is provided for better convergence
|
|
1097
|
+
# if regularizer is not None:
|
|
1098
|
+
res = least_squares(fun=compute_obj_lm,
|
|
1099
|
+
x0=ff_optimize,
|
|
1100
|
+
jac=jacobian_ff_obj,
|
|
1101
|
+
method='lm',
|
|
1102
|
+
args=(train_configurations,
|
|
1103
|
+
bonds, angles, dihedrals, bond2params, filename, regularizer, parallel_settings),
|
|
1104
|
+
verbose=1)
|
|
1105
|
+
|
|
1106
|
+
# Compute validation SDB
|
|
1107
|
+
if val_configurations:
|
|
1108
|
+
val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
|
|
1109
|
+
res.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
# Log validation results
|
|
1113
|
+
with open(filename, 'a') as f:
|
|
1114
|
+
f.write(f"\n# VALIDATION RESULTS\n")
|
|
1115
|
+
f.write(f"# Validation SDF: {val_sdf:.6f}\n")
|
|
1116
|
+
f.write(f"# Validation cost: {val_cost:.6f}\n")
|
|
1117
|
+
f.write(f"# Validation configurations: {len(val_configurations)}\n\n")
|
|
1118
|
+
|
|
1119
|
+
# Store validation results in the optimization result object
|
|
1120
|
+
res.validation_sdf = val_sdf
|
|
1121
|
+
res.validation_cost = val_cost
|
|
1122
|
+
res.validation_residuals = val_residuals
|
|
1123
|
+
res.validation_ref_bonded = val_ref_bonded
|
|
1124
|
+
|
|
1125
|
+
return res
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def hierarchical_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1129
|
+
regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1130
|
+
"""
|
|
1131
|
+
Optimize force field parameters using hierarchical approach:
|
|
1132
|
+
1. Optimize bonds first (highest energy contribution)
|
|
1133
|
+
2. Fix bond parameters, optimize angles
|
|
1134
|
+
3. Fix bond and angle parameters, optimize dihedrals
|
|
1135
|
+
|
|
1136
|
+
This approach respects the energy hierarchy and should produce more physically realistic parameters.
|
|
1137
|
+
|
|
1138
|
+
Args:
|
|
1139
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1140
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1141
|
+
configurations (list): List of configuration dictionaries
|
|
1142
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1143
|
+
filename (str): Output file to write optimization results
|
|
1144
|
+
regularizer (L2Regularizer, optional): L2 regularization object
|
|
1145
|
+
fm_input (FMInput, optional): FMInput object containing optimization controls
|
|
1146
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
1147
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1148
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
tuple: (optimized_parameters, optimization_results) where optimized_parameters is the final
|
|
1152
|
+
parameter array and optimization_results is a list of optimization results for each stage
|
|
1153
|
+
"""
|
|
1154
|
+
# Get interactions from QMRegion
|
|
1155
|
+
qm_interactions = qm_region.qm_interactions
|
|
1156
|
+
bonds = qm_interactions['bonds']
|
|
1157
|
+
angles = qm_interactions['angles']
|
|
1158
|
+
dihedrals = qm_interactions['dihedrals']
|
|
1159
|
+
|
|
1160
|
+
# Split configurations into training and validation sets
|
|
1161
|
+
train_configurations, val_configurations = split_configurations_train_val(
|
|
1162
|
+
configurations, validation_fraction, random_seed
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
# Create parameter masks for each interaction type
|
|
1166
|
+
bond_params_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1167
|
+
angle_params_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1168
|
+
dihedral_params_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1169
|
+
|
|
1170
|
+
# Validate that we have parameters to optimize
|
|
1171
|
+
if len(ff_optimize) == 0:
|
|
1172
|
+
logging.warning("Warning: No parameters to optimize")
|
|
1173
|
+
return ff_optimize, []
|
|
1174
|
+
|
|
1175
|
+
# Identify which parameters belong to which interaction types
|
|
1176
|
+
for bond in bonds:
|
|
1177
|
+
if bond.get('optimize', False):
|
|
1178
|
+
param_indices = bond2params.get(bond['index'], [])
|
|
1179
|
+
for idx in param_indices:
|
|
1180
|
+
if idx is not None:
|
|
1181
|
+
bond_params_mask[idx] = True
|
|
1182
|
+
|
|
1183
|
+
for angle in angles:
|
|
1184
|
+
if angle.get('optimize', False):
|
|
1185
|
+
param_indices = bond2params.get(angle['index'], [])
|
|
1186
|
+
for idx in param_indices:
|
|
1187
|
+
if idx is not None:
|
|
1188
|
+
angle_params_mask[idx] = True
|
|
1189
|
+
|
|
1190
|
+
for dihedral in dihedrals:
|
|
1191
|
+
if dihedral.get('optimize', False):
|
|
1192
|
+
param_indices = bond2params.get(dihedral['index'], [])
|
|
1193
|
+
for idx in param_indices:
|
|
1194
|
+
if idx is not None:
|
|
1195
|
+
dihedral_params_mask[idx] = True
|
|
1196
|
+
|
|
1197
|
+
# Debug information
|
|
1198
|
+
logging.info(f"Total parameters: {len(ff_optimize)}")
|
|
1199
|
+
logging.info(f"Bond parameters: {np.sum(bond_params_mask)}")
|
|
1200
|
+
logging.info(f"Angle parameters: {np.sum(angle_params_mask)}")
|
|
1201
|
+
logging.info(f"Dihedral parameters: {np.sum(dihedral_params_mask)}")
|
|
1202
|
+
|
|
1203
|
+
# Validate regularizer dimensions
|
|
1204
|
+
if regularizer is not None:
|
|
1205
|
+
if len(regularizer.initial_params) != len(ff_optimize):
|
|
1206
|
+
logging.warning(f"Warning: Regularizer parameter count ({len(regularizer.initial_params)}) doesn't match optimization parameters ({len(ff_optimize)})")
|
|
1207
|
+
if len(regularizer.prior_widths) != len(ff_optimize):
|
|
1208
|
+
logging.warning(f"Warning: Regularizer prior widths count ({len(regularizer.prior_widths)}) doesn't match optimization parameters ({len(ff_optimize)})")
|
|
1209
|
+
if regularizer.param_types and len(regularizer.param_types) != len(ff_optimize):
|
|
1210
|
+
logging.warning(f"Warning: Regularizer parameter types count ({len(regularizer.param_types)}) doesn't match optimization parameters ({len(ff_optimize)})")
|
|
1211
|
+
else:
|
|
1212
|
+
logging.info(f"Parameter types: {len(regularizer.param_types) if regularizer.param_types else 0}")
|
|
1213
|
+
else:
|
|
1214
|
+
logging.info("No regularizer defined")
|
|
1215
|
+
|
|
1216
|
+
current_params = ff_optimize.copy()
|
|
1217
|
+
optimization_results = []
|
|
1218
|
+
|
|
1219
|
+
# Stage 1: Optimize bonds only
|
|
1220
|
+
if np.any(bond_params_mask):
|
|
1221
|
+
logging.info("Stage 1: Optimizing bond parameters...")
|
|
1222
|
+
bond_params = current_params.copy()
|
|
1223
|
+
|
|
1224
|
+
# Create bond-only regularizer if needed
|
|
1225
|
+
bond_regularizer = None
|
|
1226
|
+
if regularizer is not None:
|
|
1227
|
+
bond_param_types = _create_param_types_for_mask(regularizer.param_types, bond_params_mask)
|
|
1228
|
+
|
|
1229
|
+
bond_regularizer = L2Regularizer(
|
|
1230
|
+
regularizer.initial_params[bond_params_mask],
|
|
1231
|
+
regularizer.prior_widths[bond_params_mask],
|
|
1232
|
+
regularizer.alpha,
|
|
1233
|
+
bond_param_types
|
|
1234
|
+
)
|
|
1235
|
+
|
|
1236
|
+
# Optimize only bond parameters
|
|
1237
|
+
bond_result = _optimize_parameter_subset(
|
|
1238
|
+
current_params, bond_params_mask, train_configurations,
|
|
1239
|
+
bonds, angles, dihedrals, bond2params, filename,
|
|
1240
|
+
bond_regularizer, "bonds", n_processes
|
|
1241
|
+
)
|
|
1242
|
+
|
|
1243
|
+
# Update current parameters with optimized bond parameters
|
|
1244
|
+
current_params[bond_params_mask] = bond_result.x
|
|
1245
|
+
optimization_results.append(("bonds", bond_result))
|
|
1246
|
+
|
|
1247
|
+
logging.info(f"Bond optimization completed. Final objective: {bond_result.cost}")
|
|
1248
|
+
|
|
1249
|
+
# Stage 2: Fix bonds, optimize angles
|
|
1250
|
+
if np.any(angle_params_mask):
|
|
1251
|
+
logging.info("Stage 2: Optimizing angle parameters (bonds fixed)...")
|
|
1252
|
+
|
|
1253
|
+
# Create angle-only regularizer if needed
|
|
1254
|
+
angle_regularizer = None
|
|
1255
|
+
if regularizer is not None:
|
|
1256
|
+
angle_param_types = _create_param_types_for_mask(regularizer.param_types, angle_params_mask)
|
|
1257
|
+
|
|
1258
|
+
angle_regularizer = L2Regularizer(
|
|
1259
|
+
regularizer.initial_params[angle_params_mask],
|
|
1260
|
+
regularizer.prior_widths[angle_params_mask],
|
|
1261
|
+
regularizer.alpha,
|
|
1262
|
+
angle_param_types
|
|
1263
|
+
)
|
|
1264
|
+
|
|
1265
|
+
# Optimize only angle parameters
|
|
1266
|
+
angle_result = _optimize_parameter_subset(
|
|
1267
|
+
current_params, angle_params_mask, train_configurations,
|
|
1268
|
+
bonds, angles, dihedrals, bond2params, filename,
|
|
1269
|
+
angle_regularizer, "angles", n_processes
|
|
1270
|
+
)
|
|
1271
|
+
|
|
1272
|
+
# Update current parameters with optimized angle parameters
|
|
1273
|
+
current_params[angle_params_mask] = angle_result.x
|
|
1274
|
+
optimization_results.append(("angles", angle_result))
|
|
1275
|
+
|
|
1276
|
+
logging.info(f"Angle optimization completed. Final objective: {angle_result.cost}")
|
|
1277
|
+
|
|
1278
|
+
# Stage 3: Fix bonds and angles, optimize dihedrals
|
|
1279
|
+
if np.any(dihedral_params_mask):
|
|
1280
|
+
logging.info("Stage 3: Optimizing dihedral parameters (bonds and angles fixed)...")
|
|
1281
|
+
|
|
1282
|
+
# Create dihedral-only regularizer if needed
|
|
1283
|
+
dihedral_regularizer = None
|
|
1284
|
+
if regularizer is not None:
|
|
1285
|
+
dihedral_param_types = _create_param_types_for_mask(regularizer.param_types, dihedral_params_mask)
|
|
1286
|
+
|
|
1287
|
+
dihedral_regularizer = L2Regularizer(
|
|
1288
|
+
regularizer.initial_params[dihedral_params_mask],
|
|
1289
|
+
regularizer.prior_widths[dihedral_params_mask],
|
|
1290
|
+
regularizer.alpha,
|
|
1291
|
+
dihedral_param_types
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
# Optimize only dihedral parameters
|
|
1295
|
+
dihedral_result = _optimize_parameter_subset(
|
|
1296
|
+
current_params, dihedral_params_mask, train_configurations,
|
|
1297
|
+
bonds, angles, dihedrals, bond2params, filename,
|
|
1298
|
+
dihedral_regularizer, "dihedrals", n_processes
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1301
|
+
# Update current parameters with optimized dihedral parameters
|
|
1302
|
+
current_params[dihedral_params_mask] = dihedral_result.x
|
|
1303
|
+
optimization_results.append(("dihedrals", dihedral_result))
|
|
1304
|
+
|
|
1305
|
+
logging.info(f"Dihedral optimization completed. Final objective: {dihedral_result.cost}")
|
|
1306
|
+
|
|
1307
|
+
# Compute validation SDB on final optimized parameters
|
|
1308
|
+
if val_configurations:
|
|
1309
|
+
val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
|
|
1310
|
+
current_params, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
|
|
1311
|
+
)
|
|
1312
|
+
|
|
1313
|
+
# Log validation results
|
|
1314
|
+
with open(filename, 'a') as f:
|
|
1315
|
+
f.write(f"\n# HIERARCHICAL OPTIMIZATION VALIDATION RESULTS\n")
|
|
1316
|
+
f.write(f"# Final Validation SDF: {val_sdf:.6f}\n")
|
|
1317
|
+
f.write(f"# Final Validation cost: {val_cost:.6f}\n")
|
|
1318
|
+
f.write(f"# Validation configurations: {len(val_configurations)}\n")
|
|
1319
|
+
f.write(f"# Training configurations: {len(train_configurations)}\n\n")
|
|
1320
|
+
|
|
1321
|
+
logging.info(f"Final validation SDF: {val_sdf:.6f}")
|
|
1322
|
+
logging.info(f"Final validation cost: {val_cost:.6f}")
|
|
1323
|
+
|
|
1324
|
+
return current_params, optimization_results
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
def _optimize_parameter_subset(current_params, param_mask, configurations,
|
|
1328
|
+
bonds, angles, dihedrals, bond2params, filename,
|
|
1329
|
+
regularizer, stage_name, n_processes=None):
|
|
1330
|
+
"""
|
|
1331
|
+
Optimize a subset of parameters while keeping others fixed.
|
|
1332
|
+
|
|
1333
|
+
Args:
|
|
1334
|
+
current_params (np.ndarray): Current parameter values
|
|
1335
|
+
param_mask (np.ndarray): Boolean mask indicating which parameters to optimize
|
|
1336
|
+
configurations (list): List of configuration dictionaries
|
|
1337
|
+
bonds, angles, dihedrals: Interaction lists
|
|
1338
|
+
bond2params (dict): Parameter mapping
|
|
1339
|
+
filename (str): Output filename
|
|
1340
|
+
regularizer (L2Regularizer): Regularizer for this subset
|
|
1341
|
+
stage_name (str): Name of optimization stage for logging
|
|
1342
|
+
|
|
1343
|
+
Returns:
|
|
1344
|
+
scipy.optimize.OptimizeResult: Optimization result
|
|
1345
|
+
"""
|
|
1346
|
+
# Create objective function that only optimizes the specified parameters
|
|
1347
|
+
def subset_objective(subset_params):
|
|
1348
|
+
# Create full parameter array with fixed values
|
|
1349
|
+
full_params = current_params.copy()
|
|
1350
|
+
full_params[param_mask] = subset_params
|
|
1351
|
+
|
|
1352
|
+
# Compute objective with full parameter array
|
|
1353
|
+
residue, _ = compute_ff_obj(full_params, configurations,
|
|
1354
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
1355
|
+
|
|
1356
|
+
obj_value = np.sum(residue**2)
|
|
1357
|
+
|
|
1358
|
+
# Add regularization term if provided
|
|
1359
|
+
if regularizer is not None:
|
|
1360
|
+
reg_term = regularizer.compute_regularization_term(subset_params)
|
|
1361
|
+
obj_value += reg_term
|
|
1362
|
+
|
|
1363
|
+
return obj_value
|
|
1364
|
+
|
|
1365
|
+
def subset_residuals(subset_params):
|
|
1366
|
+
# Create full parameter array with fixed values
|
|
1367
|
+
full_params = current_params.copy()
|
|
1368
|
+
full_params[param_mask] = subset_params
|
|
1369
|
+
|
|
1370
|
+
# Compute residuals with full parameter array
|
|
1371
|
+
residue, ref_bonded = compute_ff_obj(full_params, configurations,
|
|
1372
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
1373
|
+
|
|
1374
|
+
# Compute objective value for logging
|
|
1375
|
+
sum_residual = np.sum(residue**2)
|
|
1376
|
+
if regularizer is not None:
|
|
1377
|
+
reg_term = regularizer.compute_regularization_term(subset_params)
|
|
1378
|
+
sum_residual += reg_term
|
|
1379
|
+
|
|
1380
|
+
sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
|
|
1381
|
+
abs_val = np.sqrt(sum_residual / np.size(residue))
|
|
1382
|
+
|
|
1383
|
+
with open(filename, '+a') as file:
|
|
1384
|
+
file.write(f"{sum_residual} {sdf} {abs_val} ")
|
|
1385
|
+
file.write(' '.join([str(x) for x in full_params]))
|
|
1386
|
+
file.write('\n')
|
|
1387
|
+
|
|
1388
|
+
# Augment with regularization residuals if provided
|
|
1389
|
+
if regularizer is not None:
|
|
1390
|
+
reg_residuals = regularizer.compute_regularization_residuals(subset_params)
|
|
1391
|
+
residue = np.concatenate([residue.ravel(), reg_residuals])
|
|
1392
|
+
|
|
1393
|
+
return residue
|
|
1394
|
+
|
|
1395
|
+
def subset_jacobian(subset_params):
|
|
1396
|
+
# Create full parameter array with fixed values
|
|
1397
|
+
full_params = current_params.copy()
|
|
1398
|
+
full_params[param_mask] = subset_params
|
|
1399
|
+
|
|
1400
|
+
# Compute full Jacobian
|
|
1401
|
+
full_jac = jacobian_ff_obj(full_params, configurations,
|
|
1402
|
+
bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
|
|
1403
|
+
|
|
1404
|
+
# Extract only the columns corresponding to parameters being optimized
|
|
1405
|
+
subset_jac = full_jac[:, param_mask]
|
|
1406
|
+
|
|
1407
|
+
# Add regularization Jacobian if provided
|
|
1408
|
+
if regularizer is not None:
|
|
1409
|
+
n_params = np.sum(param_mask)
|
|
1410
|
+
reg_jacobian = np.zeros((n_params, n_params))
|
|
1411
|
+
np.fill_diagonal(reg_jacobian, np.sqrt(regularizer.alpha / 2) / regularizer.prior_widths)
|
|
1412
|
+
subset_jac = np.vstack([subset_jac, reg_jacobian])
|
|
1413
|
+
|
|
1414
|
+
return subset_jac
|
|
1415
|
+
|
|
1416
|
+
# Initial parameters for this subset
|
|
1417
|
+
x0 = current_params[param_mask]
|
|
1418
|
+
|
|
1419
|
+
# Bounds for this subset (only positive values)
|
|
1420
|
+
lower_bound = np.zeros(x0.shape)
|
|
1421
|
+
upper_bound = np.inf * np.ones(x0.shape)
|
|
1422
|
+
bounds = (lower_bound, upper_bound)
|
|
1423
|
+
|
|
1424
|
+
# Optimize using least squares
|
|
1425
|
+
result = least_squares(
|
|
1426
|
+
fun=subset_residuals,
|
|
1427
|
+
x0=x0,
|
|
1428
|
+
jac=subset_jacobian,
|
|
1429
|
+
method='lm',
|
|
1430
|
+
verbose=1
|
|
1431
|
+
)
|
|
1432
|
+
|
|
1433
|
+
# Log results
|
|
1434
|
+
with open(filename, 'a') as f:
|
|
1435
|
+
f.write(f"\n# {stage_name.upper()} OPTIMIZATION RESULTS\n")
|
|
1436
|
+
f.write(f"# Final cost: {result.cost}\n")
|
|
1437
|
+
f.write(f"# Number of iterations: {result.nfev}\n")
|
|
1438
|
+
f.write(f"# Success: {result.success}\n")
|
|
1439
|
+
f.write(f"# Optimized parameters: {result.x}\n\n")
|
|
1440
|
+
|
|
1441
|
+
return result
|
|
1442
|
+
|
|
1443
|
+
|
|
1444
|
+
def energy_weighted_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1445
|
+
regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1446
|
+
"""
|
|
1447
|
+
Optimize force field parameters using energy-weighted objective function.
|
|
1448
|
+
|
|
1449
|
+
This approach weights the contribution of different interaction types based on their
|
|
1450
|
+
typical energy scales: bonds (highest), angles (medium), dihedrals (lowest).
|
|
1451
|
+
|
|
1452
|
+
Args:
|
|
1453
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1454
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1455
|
+
configurations (list): List of configuration dictionaries
|
|
1456
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1457
|
+
filename (str): Output file to write optimization results
|
|
1458
|
+
regularizer (L2Regularizer, optional): L2 regularization object
|
|
1459
|
+
fm_input (FMInput, optional): FMInput object containing optimization controls
|
|
1460
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
1461
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1462
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1463
|
+
|
|
1464
|
+
Returns:
|
|
1465
|
+
scipy.optimize.OptimizeResult: Result of the optimization
|
|
1466
|
+
"""
|
|
1467
|
+
# Get interactions from QMRegion
|
|
1468
|
+
qm_interactions = qm_region.qm_interactions
|
|
1469
|
+
bonds = qm_interactions['bonds']
|
|
1470
|
+
angles = qm_interactions['angles']
|
|
1471
|
+
dihedrals = qm_interactions['dihedrals']
|
|
1472
|
+
|
|
1473
|
+
# Split configurations into training and validation sets
|
|
1474
|
+
train_configurations, val_configurations = split_configurations_train_val(
|
|
1475
|
+
configurations, validation_fraction, random_seed
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1478
|
+
# Get energy weights from fm_input or use defaults
|
|
1479
|
+
if fm_input is not None:
|
|
1480
|
+
bond_weight = getattr(fm_input, 'bond_energy_weight', 1.0)
|
|
1481
|
+
angle_weight = getattr(fm_input, 'angle_energy_weight', 0.1)
|
|
1482
|
+
dihedral_weight = getattr(fm_input, 'dihedral_energy_weight', 0.01)
|
|
1483
|
+
else:
|
|
1484
|
+
bond_weight = 1.0
|
|
1485
|
+
angle_weight = 0.1
|
|
1486
|
+
dihedral_weight = 0.01
|
|
1487
|
+
|
|
1488
|
+
# Create interaction type masks
|
|
1489
|
+
bond_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1490
|
+
angle_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1491
|
+
dihedral_mask = np.zeros(len(ff_optimize), dtype=bool)
|
|
1492
|
+
|
|
1493
|
+
# Identify which parameters belong to which interaction types
|
|
1494
|
+
for bond in bonds:
|
|
1495
|
+
if bond.get('optimize', False):
|
|
1496
|
+
param_indices = bond2params.get(bond['index'], [])
|
|
1497
|
+
for idx in param_indices:
|
|
1498
|
+
if idx is not None:
|
|
1499
|
+
bond_mask[idx] = True
|
|
1500
|
+
|
|
1501
|
+
for angle in angles:
|
|
1502
|
+
if angle.get('optimize', False):
|
|
1503
|
+
param_indices = bond2params.get(angle['index'], [])
|
|
1504
|
+
for idx in param_indices:
|
|
1505
|
+
if idx is not None:
|
|
1506
|
+
angle_mask[idx] = True
|
|
1507
|
+
|
|
1508
|
+
for dihedral in dihedrals:
|
|
1509
|
+
if dihedral.get('optimize', False):
|
|
1510
|
+
param_indices = bond2params.get(dihedral['index'], [])
|
|
1511
|
+
for idx in param_indices:
|
|
1512
|
+
if idx is not None:
|
|
1513
|
+
dihedral_mask[idx] = True
|
|
1514
|
+
|
|
1515
|
+
def energy_weighted_residuals(params):
|
|
1516
|
+
"""Compute residuals with energy-based weighting."""
|
|
1517
|
+
residue, ref_bonded = compute_ff_obj(params, train_configurations,
|
|
1518
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
1519
|
+
|
|
1520
|
+
# Apply energy weights to different interaction types
|
|
1521
|
+
weighted_residue = residue.copy()
|
|
1522
|
+
|
|
1523
|
+
# Weight bond contributions
|
|
1524
|
+
if np.any(bond_mask):
|
|
1525
|
+
weighted_residue *= bond_weight
|
|
1526
|
+
|
|
1527
|
+
# Weight angle contributions (this is approximate - we weight all residues)
|
|
1528
|
+
# In practice, you might want to identify which residues come from which interactions
|
|
1529
|
+
if np.any(angle_mask):
|
|
1530
|
+
# For simplicity, we apply angle weight to all residues
|
|
1531
|
+
# A more sophisticated approach would track which residues come from which interactions
|
|
1532
|
+
weighted_residue *= angle_weight
|
|
1533
|
+
|
|
1534
|
+
# Weight dihedral contributions
|
|
1535
|
+
if np.any(dihedral_mask):
|
|
1536
|
+
weighted_residue *= dihedral_weight
|
|
1537
|
+
|
|
1538
|
+
# Compute objective value for logging (using unweighted residuals for consistency)
|
|
1539
|
+
sum_residual = np.sum(residue**2)
|
|
1540
|
+
if regularizer is not None:
|
|
1541
|
+
reg_term = regularizer.compute_regularization_term(params)
|
|
1542
|
+
sum_residual += reg_term
|
|
1543
|
+
|
|
1544
|
+
sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
|
|
1545
|
+
abs_val = np.sqrt(sum_residual / np.size(residue))
|
|
1546
|
+
|
|
1547
|
+
with open(filename, '+a') as file:
|
|
1548
|
+
file.write(f"{sum_residual} {sdf} {abs_val} ")
|
|
1549
|
+
file.write(' '.join([str(x) for x in params]))
|
|
1550
|
+
file.write('\n')
|
|
1551
|
+
|
|
1552
|
+
# Augment with regularization residuals if provided
|
|
1553
|
+
if regularizer is not None:
|
|
1554
|
+
reg_residuals = regularizer.compute_regularization_residuals(params)
|
|
1555
|
+
weighted_residue = np.concatenate([weighted_residue.ravel(), reg_residuals])
|
|
1556
|
+
|
|
1557
|
+
return weighted_residue
|
|
1558
|
+
|
|
1559
|
+
def energy_weighted_jacobian(params):
|
|
1560
|
+
"""Compute Jacobian with energy-based weighting."""
|
|
1561
|
+
jac = jacobian_ff_obj(params, train_configurations,
|
|
1562
|
+
bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
|
|
1563
|
+
|
|
1564
|
+
# Apply energy weights to Jacobian rows
|
|
1565
|
+
weighted_jac = jac.copy()
|
|
1566
|
+
|
|
1567
|
+
# Weight bond contributions
|
|
1568
|
+
if np.any(bond_mask):
|
|
1569
|
+
weighted_jac *= bond_weight
|
|
1570
|
+
|
|
1571
|
+
# Weight angle contributions
|
|
1572
|
+
if np.any(angle_mask):
|
|
1573
|
+
weighted_jac *= angle_weight
|
|
1574
|
+
|
|
1575
|
+
# Weight dihedral contributions
|
|
1576
|
+
if np.any(dihedral_mask):
|
|
1577
|
+
weighted_jac *= dihedral_weight
|
|
1578
|
+
|
|
1579
|
+
# Add regularization Jacobian if provided
|
|
1580
|
+
if regularizer is not None:
|
|
1581
|
+
n_params = len(params)
|
|
1582
|
+
reg_jacobian = np.zeros((n_params, n_params))
|
|
1583
|
+
np.fill_diagonal(reg_jacobian, np.sqrt(regularizer.alpha / 2) / regularizer.prior_widths)
|
|
1584
|
+
weighted_jac = np.vstack([weighted_jac, reg_jacobian])
|
|
1585
|
+
|
|
1586
|
+
return weighted_jac
|
|
1587
|
+
|
|
1588
|
+
# Bounds for parameters (only positive values)
|
|
1589
|
+
lower_bound = np.zeros(ff_optimize.shape)
|
|
1590
|
+
upper_bound = np.inf * np.ones(ff_optimize.shape)
|
|
1591
|
+
bounds = (lower_bound, upper_bound)
|
|
1592
|
+
|
|
1593
|
+
# Optimize using least squares with energy weighting
|
|
1594
|
+
result = least_squares(
|
|
1595
|
+
fun=energy_weighted_residuals,
|
|
1596
|
+
x0=ff_optimize,
|
|
1597
|
+
jac=energy_weighted_jacobian,
|
|
1598
|
+
method='lm',
|
|
1599
|
+
verbose=1
|
|
1600
|
+
)
|
|
1601
|
+
|
|
1602
|
+
# Compute validation SDB
|
|
1603
|
+
if val_configurations:
|
|
1604
|
+
val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
|
|
1605
|
+
result.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
|
|
1606
|
+
)
|
|
1607
|
+
|
|
1608
|
+
# Store validation results in the optimization result object
|
|
1609
|
+
result.validation_sdf = val_sdf
|
|
1610
|
+
result.validation_cost = val_cost
|
|
1611
|
+
result.validation_residuals = val_residuals
|
|
1612
|
+
result.validation_ref_bonded = val_ref_bonded
|
|
1613
|
+
|
|
1614
|
+
# Log results
|
|
1615
|
+
with open(filename, 'a') as f:
|
|
1616
|
+
f.write(f"\n# ENERGY-WEIGHTED OPTIMIZATION RESULTS\n")
|
|
1617
|
+
f.write(f"# Bond weight: {bond_weight}\n")
|
|
1618
|
+
f.write(f"# Angle weight: {angle_weight}\n")
|
|
1619
|
+
f.write(f"# Dihedral weight: {dihedral_weight}\n")
|
|
1620
|
+
f.write(f"# Final cost: {result.cost}\n")
|
|
1621
|
+
f.write(f"# Number of iterations: {result.nfev}\n")
|
|
1622
|
+
f.write(f"# Success: {result.success}\n")
|
|
1623
|
+
f.write(f"# Optimized parameters: {result.x}\n")
|
|
1624
|
+
if val_configurations:
|
|
1625
|
+
f.write(f"# Validation SDF: {val_sdf:.6f}\n")
|
|
1626
|
+
f.write(f"# Validation cost: {val_cost:.6f}\n")
|
|
1627
|
+
f.write(f"# Validation configurations: {len(val_configurations)}\n")
|
|
1628
|
+
f.write(f"# Training configurations: {len(train_configurations)}\n")
|
|
1629
|
+
f.write("\n")
|
|
1630
|
+
|
|
1631
|
+
return result
|
|
1632
|
+
|
|
1633
|
+
def adaptive_regularization_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1634
|
+
fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1635
|
+
"""
|
|
1636
|
+
Optimize force field parameters using adaptive regularization based on energy hierarchy.
|
|
1637
|
+
|
|
1638
|
+
This approach applies different regularization strengths to different interaction types:
|
|
1639
|
+
- Bonds: Strong regularization (prevent overfitting of high-energy terms)
|
|
1640
|
+
- Angles: Medium regularization
|
|
1641
|
+
- Dihedrals: Weak regularization (allow flexibility for low-energy terms)
|
|
1642
|
+
|
|
1643
|
+
Args:
|
|
1644
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1645
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1646
|
+
configurations (list): List of configuration dictionaries
|
|
1647
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1648
|
+
filename (str): Output file to write optimization results
|
|
1649
|
+
fm_input (FMInput, optional): FMInput object containing optimization controls
|
|
1650
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
1651
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1652
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1653
|
+
|
|
1654
|
+
Returns:
|
|
1655
|
+
scipy.optimize.OptimizeResult: Result of the optimization
|
|
1656
|
+
"""
|
|
1657
|
+
# Get interactions from QMRegion
|
|
1658
|
+
qm_interactions = qm_region.qm_interactions
|
|
1659
|
+
bonds = qm_interactions['bonds']
|
|
1660
|
+
angles = qm_interactions['angles']
|
|
1661
|
+
dihedrals = qm_interactions['dihedrals']
|
|
1662
|
+
|
|
1663
|
+
# Split configurations into training and validation sets
|
|
1664
|
+
train_configurations, val_configurations = split_configurations_train_val(
|
|
1665
|
+
configurations, validation_fraction, random_seed
|
|
1666
|
+
)
|
|
1667
|
+
|
|
1668
|
+
# Get adaptive regularization parameters from fm_input or use defaults
|
|
1669
|
+
if fm_input is not None:
|
|
1670
|
+
base_alpha = getattr(fm_input, 'adaptive_base_alpha', 0.1)
|
|
1671
|
+
energy_hierarchy_scale = getattr(fm_input, 'energy_hierarchy_scale', 10.0)
|
|
1672
|
+
else:
|
|
1673
|
+
base_alpha = 0.1
|
|
1674
|
+
energy_hierarchy_scale = 10.0
|
|
1675
|
+
|
|
1676
|
+
# Create adaptive regularizer
|
|
1677
|
+
adaptive_regularizer = AdaptiveL2Regularizer(
|
|
1678
|
+
ff_optimize, bond2params, bonds, angles, dihedrals,
|
|
1679
|
+
base_alpha=base_alpha,
|
|
1680
|
+
energy_hierarchy_scale=energy_hierarchy_scale
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
def adaptive_residuals(params):
|
|
1684
|
+
"""Compute residuals with adaptive regularization."""
|
|
1685
|
+
residue, ref_bonded = compute_ff_obj(params, train_configurations,
|
|
1686
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
1687
|
+
|
|
1688
|
+
# Compute objective value for logging
|
|
1689
|
+
sum_residual = np.sum(residue**2)
|
|
1690
|
+
reg_term = adaptive_regularizer.compute_regularization_term(params)
|
|
1691
|
+
sum_residual += reg_term
|
|
1692
|
+
|
|
1693
|
+
sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
|
|
1694
|
+
abs_val = np.sqrt(sum_residual / np.size(residue))
|
|
1695
|
+
|
|
1696
|
+
with open(filename, '+a') as file:
|
|
1697
|
+
file.write(f"{sum_residual} {sdf} {abs_val} ")
|
|
1698
|
+
file.write(' '.join([str(x) for x in params]))
|
|
1699
|
+
file.write('\n')
|
|
1700
|
+
|
|
1701
|
+
# Augment with adaptive regularization residuals
|
|
1702
|
+
reg_residuals = adaptive_regularizer.compute_regularization_residuals(params)
|
|
1703
|
+
augmented_residuals = np.concatenate([residue.ravel(), reg_residuals])
|
|
1704
|
+
|
|
1705
|
+
return augmented_residuals
|
|
1706
|
+
|
|
1707
|
+
def adaptive_jacobian(params):
|
|
1708
|
+
"""Compute Jacobian with adaptive regularization."""
|
|
1709
|
+
jac = jacobian_ff_obj(params, train_configurations,
|
|
1710
|
+
bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
|
|
1711
|
+
|
|
1712
|
+
# Add adaptive regularization Jacobian
|
|
1713
|
+
n_params = len(params)
|
|
1714
|
+
reg_jacobian = np.zeros((n_params, n_params))
|
|
1715
|
+
np.fill_diagonal(reg_jacobian, np.sqrt(adaptive_regularizer.adaptive_alphas / 2) / adaptive_regularizer.prior_widths)
|
|
1716
|
+
augmented_jacobian = np.vstack([jac, reg_jacobian])
|
|
1717
|
+
|
|
1718
|
+
return augmented_jacobian
|
|
1719
|
+
|
|
1720
|
+
# Bounds for parameters (only positive values)
|
|
1721
|
+
lower_bound = np.zeros(ff_optimize.shape)
|
|
1722
|
+
upper_bound = np.inf * np.ones(ff_optimize.shape)
|
|
1723
|
+
bounds = (lower_bound, upper_bound)
|
|
1724
|
+
|
|
1725
|
+
# Optimize using least squares with adaptive regularization
|
|
1726
|
+
result = least_squares(
|
|
1727
|
+
fun=adaptive_residuals,
|
|
1728
|
+
x0=ff_optimize,
|
|
1729
|
+
jac=adaptive_jacobian,
|
|
1730
|
+
method='lm',
|
|
1731
|
+
verbose=1
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
# Compute validation SDB
|
|
1735
|
+
if val_configurations:
|
|
1736
|
+
val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
|
|
1737
|
+
result.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
|
|
1738
|
+
)
|
|
1739
|
+
|
|
1740
|
+
# Store validation results in the optimization result object
|
|
1741
|
+
result.validation_sdf = val_sdf
|
|
1742
|
+
result.validation_cost = val_cost
|
|
1743
|
+
result.validation_residuals = val_residuals
|
|
1744
|
+
result.validation_ref_bonded = val_ref_bonded
|
|
1745
|
+
|
|
1746
|
+
# Log results
|
|
1747
|
+
with open(filename, 'a') as f:
|
|
1748
|
+
f.write(f"\n# ADAPTIVE REGULARIZATION OPTIMIZATION RESULTS\n")
|
|
1749
|
+
f.write(f"# Base alpha: {base_alpha}\n")
|
|
1750
|
+
f.write(f"# Energy hierarchy scale: {energy_hierarchy_scale}\n")
|
|
1751
|
+
f.write(f"# Bond parameters: {np.sum(adaptive_regularizer.bond_params_mask)}\n")
|
|
1752
|
+
f.write(f"# Angle parameters: {np.sum(adaptive_regularizer.angle_params_mask)}\n")
|
|
1753
|
+
f.write(f"# Dihedral parameters: {np.sum(adaptive_regularizer.dihedral_params_mask)}\n")
|
|
1754
|
+
f.write(f"# Final cost: {result.cost}\n")
|
|
1755
|
+
f.write(f"# Number of iterations: {result.nfev}\n")
|
|
1756
|
+
f.write(f"# Success: {result.success}\n")
|
|
1757
|
+
f.write(f"# Optimized parameters: {result.x}\n")
|
|
1758
|
+
if val_configurations:
|
|
1759
|
+
f.write(f"# Validation SDF: {val_sdf:.6f}\n")
|
|
1760
|
+
f.write(f"# Validation cost: {val_cost:.6f}\n")
|
|
1761
|
+
f.write(f"# Validation configurations: {len(val_configurations)}\n")
|
|
1762
|
+
f.write(f"# Training configurations: {len(train_configurations)}\n")
|
|
1763
|
+
f.write("\n")
|
|
1764
|
+
|
|
1765
|
+
return result
|
|
1766
|
+
|
|
1767
|
+
def unified_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1768
|
+
regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1769
|
+
"""
|
|
1770
|
+
Unified force field optimization function that supports multiple hierarchical approaches.
|
|
1771
|
+
|
|
1772
|
+
This function automatically selects the appropriate optimization method based on the
|
|
1773
|
+
FMInput parameters and provides a consistent interface for all optimization strategies.
|
|
1774
|
+
|
|
1775
|
+
Args:
|
|
1776
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1777
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1778
|
+
configurations (list): List of configuration dictionaries
|
|
1779
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1780
|
+
filename (str): Output file to write optimization results
|
|
1781
|
+
regularizer (L2Regularizer, optional): L2 regularization object
|
|
1782
|
+
fm_input (FMInput, optional): FMInput object containing optimization controls
|
|
1783
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
1784
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1785
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1786
|
+
|
|
1787
|
+
Returns:
|
|
1788
|
+
tuple: (optimized_parameters, optimization_result) where optimized_parameters is the final
|
|
1789
|
+
parameter array and optimization_result is the optimization result object
|
|
1790
|
+
"""
|
|
1791
|
+
if fm_input is None:
|
|
1792
|
+
fm_input = FMInput()
|
|
1793
|
+
|
|
1794
|
+
# Ensure optimization_method is set
|
|
1795
|
+
if not hasattr(fm_input, 'optimization_method') or fm_input.optimization_method is None:
|
|
1796
|
+
fm_input.optimization_method = 'hierarchical'
|
|
1797
|
+
|
|
1798
|
+
optimization_method = fm_input.optimization_method
|
|
1799
|
+
|
|
1800
|
+
print(f"Using optimization method: {optimization_method}")
|
|
1801
|
+
|
|
1802
|
+
if optimization_method == 'hierarchical':
|
|
1803
|
+
# Sequential hierarchical optimization
|
|
1804
|
+
optimized_params, optimization_results = hierarchical_optimization_ff(
|
|
1805
|
+
qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1806
|
+
regularizer, fm_input, n_processes, validation_fraction, random_seed
|
|
1807
|
+
)
|
|
1808
|
+
# Return the last optimization result for compatibility
|
|
1809
|
+
if optimization_results:
|
|
1810
|
+
return optimized_params, optimization_results[-1][1]
|
|
1811
|
+
else:
|
|
1812
|
+
return optimized_params, None
|
|
1813
|
+
|
|
1814
|
+
elif optimization_method == 'energy_weighted':
|
|
1815
|
+
# Energy-weighted simultaneous optimization
|
|
1816
|
+
result = energy_weighted_optimization_ff(
|
|
1817
|
+
qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1818
|
+
regularizer, fm_input, n_processes, validation_fraction, random_seed
|
|
1819
|
+
)
|
|
1820
|
+
return result.x, result
|
|
1821
|
+
|
|
1822
|
+
elif optimization_method == 'adaptive':
|
|
1823
|
+
# Adaptive regularization optimization
|
|
1824
|
+
result = adaptive_regularization_optimization_ff(
|
|
1825
|
+
qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1826
|
+
fm_input, n_processes, validation_fraction, random_seed
|
|
1827
|
+
)
|
|
1828
|
+
return result.x, result
|
|
1829
|
+
|
|
1830
|
+
elif optimization_method == 'simultaneous':
|
|
1831
|
+
# Traditional simultaneous optimization
|
|
1832
|
+
result = optimization_ff(
|
|
1833
|
+
qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1834
|
+
regularizer, n_processes, validation_fraction, random_seed
|
|
1835
|
+
)
|
|
1836
|
+
return result.x, result
|
|
1837
|
+
|
|
1838
|
+
else:
|
|
1839
|
+
raise ValueError(f"Unknown optimization method: {optimization_method}. "
|
|
1840
|
+
f"Supported methods: 'hierarchical', 'energy_weighted', 'adaptive', 'simultaneous'")
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
def _create_param_types_for_mask(param_types, param_mask):
|
|
1844
|
+
"""
|
|
1845
|
+
Create a parameter types list for a specific parameter mask.
|
|
1846
|
+
|
|
1847
|
+
Args:
|
|
1848
|
+
param_types (list): Original parameter types list
|
|
1849
|
+
param_mask (np.ndarray): Boolean mask indicating which parameters to include
|
|
1850
|
+
|
|
1851
|
+
Returns:
|
|
1852
|
+
list: Parameter types for the masked parameters
|
|
1853
|
+
"""
|
|
1854
|
+
if param_types is None:
|
|
1855
|
+
return None
|
|
1856
|
+
|
|
1857
|
+
# Create a list of parameter types for the masked parameters only
|
|
1858
|
+
masked_param_types = []
|
|
1859
|
+
for i, is_included in enumerate(param_mask):
|
|
1860
|
+
if is_included and i < len(param_types):
|
|
1861
|
+
masked_param_types.append(param_types[i])
|
|
1862
|
+
|
|
1863
|
+
return masked_param_types
|
|
1864
|
+
|
|
1865
|
+
|
|
1866
|
+
def _copy_fm_input(fm_input, new_method=None):
|
|
1867
|
+
"""
|
|
1868
|
+
Create a copy of FMInput object with optional method override.
|
|
1869
|
+
|
|
1870
|
+
Args:
|
|
1871
|
+
fm_input: FMInput object to copy
|
|
1872
|
+
new_method: Optional new optimization method to set
|
|
1873
|
+
|
|
1874
|
+
Returns:
|
|
1875
|
+
FMInput: Copy of the input object
|
|
1876
|
+
"""
|
|
1877
|
+
if fm_input is None:
|
|
1878
|
+
return FMInput()
|
|
1879
|
+
|
|
1880
|
+
# Try to use the copy method if available
|
|
1881
|
+
if hasattr(fm_input, 'copy'):
|
|
1882
|
+
try:
|
|
1883
|
+
copy_input = fm_input.copy()
|
|
1884
|
+
except:
|
|
1885
|
+
# Fallback: create new object with same parameters
|
|
1886
|
+
copy_input = FMInput()
|
|
1887
|
+
# Copy all attributes manually
|
|
1888
|
+
for attr in dir(fm_input):
|
|
1889
|
+
if not attr.startswith('_') and not callable(getattr(fm_input, attr)):
|
|
1890
|
+
try:
|
|
1891
|
+
setattr(copy_input, attr, getattr(fm_input, attr))
|
|
1892
|
+
except:
|
|
1893
|
+
pass
|
|
1894
|
+
else:
|
|
1895
|
+
# Fallback: create new object with same parameters
|
|
1896
|
+
copy_input = FMInput()
|
|
1897
|
+
# Copy all attributes manually
|
|
1898
|
+
for attr in dir(fm_input):
|
|
1899
|
+
if not attr.startswith('_') and not callable(getattr(fm_input, attr)):
|
|
1900
|
+
try:
|
|
1901
|
+
setattr(copy_input, attr, getattr(fm_input, attr))
|
|
1902
|
+
except:
|
|
1903
|
+
pass
|
|
1904
|
+
|
|
1905
|
+
# Set new method if specified
|
|
1906
|
+
if new_method is not None:
|
|
1907
|
+
copy_input.optimization_method = new_method
|
|
1908
|
+
|
|
1909
|
+
return copy_input
|
|
1910
|
+
|
|
1911
|
+
|
|
1912
|
+
def compare_optimization_methods(qm_region, ff_optimize, configurations, bond2params, filename,
|
|
1913
|
+
regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
|
|
1914
|
+
"""
|
|
1915
|
+
Compare different optimization methods and report their performance.
|
|
1916
|
+
|
|
1917
|
+
This function runs all available optimization methods and compares their results
|
|
1918
|
+
to help users choose the best approach for their system.
|
|
1919
|
+
|
|
1920
|
+
Args:
|
|
1921
|
+
qm_region (QMRegion): QMRegion object containing QM interactions
|
|
1922
|
+
ff_optimize (numpy.ndarray): Initial force field parameters to optimize
|
|
1923
|
+
configurations (list): List of configuration dictionaries
|
|
1924
|
+
bond2params (dict): Mapping of interaction indices to parameter indices
|
|
1925
|
+
filename (str): Output file to write optimization results
|
|
1926
|
+
regularizer (L2Regularizer, optional): L2 regularization object
|
|
1927
|
+
fm_input (FMInput, optional): FMInput object containing optimization controls
|
|
1928
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
1929
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
1930
|
+
random_seed (int, optional): Random seed for reproducible train/val split
|
|
1931
|
+
|
|
1932
|
+
Returns:
|
|
1933
|
+
dict: Dictionary containing results for each optimization method
|
|
1934
|
+
"""
|
|
1935
|
+
if fm_input is None:
|
|
1936
|
+
fm_input = FMInput()
|
|
1937
|
+
|
|
1938
|
+
methods = ['hierarchical', 'energy_weighted', 'adaptive', 'simultaneous']
|
|
1939
|
+
results = {}
|
|
1940
|
+
|
|
1941
|
+
logging.info("Comparing optimization methods...")
|
|
1942
|
+
|
|
1943
|
+
for method in methods:
|
|
1944
|
+
logging.info(f"\nTesting method: {method}")
|
|
1945
|
+
|
|
1946
|
+
# Create a copy of fm_input with the current method
|
|
1947
|
+
test_fm_input = _copy_fm_input(fm_input, method)
|
|
1948
|
+
|
|
1949
|
+
try:
|
|
1950
|
+
# Run optimization with current method
|
|
1951
|
+
optimized_params, opt_result = unified_optimization_ff(
|
|
1952
|
+
qm_region, ff_optimize.copy(), configurations, bond2params,
|
|
1953
|
+
f"{filename}_{method}", regularizer, test_fm_input, n_processes, validation_fraction, random_seed
|
|
1954
|
+
)
|
|
1955
|
+
|
|
1956
|
+
# Get validation SDF from optimization result if available
|
|
1957
|
+
if hasattr(opt_result, 'validation_sdf') and opt_result.validation_sdf is not None:
|
|
1958
|
+
val_sdf = opt_result.validation_sdf
|
|
1959
|
+
val_cost = opt_result.validation_cost
|
|
1960
|
+
else:
|
|
1961
|
+
# Compute validation SDF manually if not available
|
|
1962
|
+
train_configs, val_configs = split_configurations_train_val(
|
|
1963
|
+
configurations, validation_fraction, random_seed
|
|
1964
|
+
)
|
|
1965
|
+
val_sdf, val_cost, _, _ = compute_validation_sdf(
|
|
1966
|
+
optimized_params, val_configs,
|
|
1967
|
+
qm_region.qm_interactions['bonds'],
|
|
1968
|
+
qm_region.qm_interactions['angles'],
|
|
1969
|
+
qm_region.qm_interactions['dihedrals'],
|
|
1970
|
+
bond2params, n_processes
|
|
1971
|
+
)
|
|
1972
|
+
|
|
1973
|
+
# Compute training SDF for comparison
|
|
1974
|
+
train_configs, _ = split_configurations_train_val(
|
|
1975
|
+
configurations, validation_fraction, random_seed
|
|
1976
|
+
)
|
|
1977
|
+
train_residue, train_ref_bonded = compute_ff_obj(optimized_params, train_configs,
|
|
1978
|
+
qm_region.qm_interactions['bonds'],
|
|
1979
|
+
qm_region.qm_interactions['angles'],
|
|
1980
|
+
qm_region.qm_interactions['dihedrals'],
|
|
1981
|
+
bond2params)
|
|
1982
|
+
|
|
1983
|
+
train_cost = np.sum(train_residue**2)
|
|
1984
|
+
train_sdf = np.sqrt(train_cost / np.sum(train_ref_bonded**2))
|
|
1985
|
+
|
|
1986
|
+
results[method] = {
|
|
1987
|
+
'optimized_params': optimized_params,
|
|
1988
|
+
'train_cost': train_cost,
|
|
1989
|
+
'train_sdf': train_sdf,
|
|
1990
|
+
'val_cost': val_cost,
|
|
1991
|
+
'val_sdf': val_sdf,
|
|
1992
|
+
'success': opt_result.success if opt_result else True,
|
|
1993
|
+
'nfev': opt_result.nfev if opt_result else 0
|
|
1994
|
+
}
|
|
1995
|
+
|
|
1996
|
+
logging.info(f" Training cost: {train_cost:.6f}")
|
|
1997
|
+
logging.info(f" Training SDF: {train_sdf:.6f}")
|
|
1998
|
+
logging.info(f" Validation cost: {val_cost:.6f}")
|
|
1999
|
+
logging.info(f" Validation SDF: {val_sdf:.6f}")
|
|
2000
|
+
logging.info(f" Success: {results[method]['success']}")
|
|
2001
|
+
|
|
2002
|
+
except Exception as e:
|
|
2003
|
+
logging.error(f" Error in method {method}: {str(e)}")
|
|
2004
|
+
results[method] = {'error': str(e)}
|
|
2005
|
+
|
|
2006
|
+
# Write comparison summary
|
|
2007
|
+
with open(f"{filename}_comparison", 'w') as f:
|
|
2008
|
+
f.write("# OPTIMIZATION METHOD COMPARISON\n")
|
|
2009
|
+
f.write("# Method\tTrain Cost\tTrain SDF\tVal Cost\tVal SDF\tSuccess\tFunction Evaluations\n")
|
|
2010
|
+
|
|
2011
|
+
for method in methods:
|
|
2012
|
+
if method in results and 'error' not in results[method]:
|
|
2013
|
+
f.write(f"{method}\t{results[method]['train_cost']:.6f}\t"
|
|
2014
|
+
f"{results[method]['train_sdf']:.6f}\t"
|
|
2015
|
+
f"{results[method]['val_cost']:.6f}\t"
|
|
2016
|
+
f"{results[method]['val_sdf']:.6f}\t"
|
|
2017
|
+
f"{results[method]['success']}\t"
|
|
2018
|
+
f"{results[method]['nfev']}\n")
|
|
2019
|
+
else:
|
|
2020
|
+
f.write(f"{method}\tERROR\tERROR\tERROR\tERROR\tFalse\t0\n")
|
|
2021
|
+
|
|
2022
|
+
return results
|
|
2023
|
+
|
|
2024
|
+
|
|
2025
|
+
def _process_single_jacobian(args):
|
|
2026
|
+
"""
|
|
2027
|
+
Process a single configuration for Jacobian computation in parallel.
|
|
2028
|
+
|
|
2029
|
+
Args:
|
|
2030
|
+
args: Tuple containing (ff_optimize, config, bonds, angles, dihedrals, bond2params)
|
|
2031
|
+
|
|
2032
|
+
Returns:
|
|
2033
|
+
numpy.ndarray: Jacobian matrix for this configuration
|
|
2034
|
+
"""
|
|
2035
|
+
ff_optimize, config, bonds, angles, dihedrals, bond2params = args
|
|
2036
|
+
|
|
2037
|
+
# Get QM atoms count from the forces array length
|
|
2038
|
+
qm_atoms_count = len(config['qm_forces'])
|
|
2039
|
+
jac = jacobian_ff(ff_optimize,
|
|
2040
|
+
config["qm_coordinates"],
|
|
2041
|
+
bond2params,
|
|
2042
|
+
bonds, angles, dihedrals,
|
|
2043
|
+
qm_atoms_count=qm_atoms_count)
|
|
2044
|
+
|
|
2045
|
+
return jac
|
|
2046
|
+
|
|
2047
|
+
|
|
2048
|
+
def split_configurations_train_val(configurations, validation_fraction=0.1, random_seed=None):
|
|
2049
|
+
"""
|
|
2050
|
+
Split configurations into training and validation sets.
|
|
2051
|
+
|
|
2052
|
+
Args:
|
|
2053
|
+
configurations (list): List of configuration dictionaries
|
|
2054
|
+
validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
|
|
2055
|
+
random_seed (int, optional): Random seed for reproducible splits
|
|
2056
|
+
|
|
2057
|
+
Returns:
|
|
2058
|
+
tuple: (train_configurations, val_configurations)
|
|
2059
|
+
"""
|
|
2060
|
+
if random_seed is not None:
|
|
2061
|
+
random.seed(random_seed)
|
|
2062
|
+
np.random.seed(random_seed)
|
|
2063
|
+
|
|
2064
|
+
n_configs = len(configurations)
|
|
2065
|
+
n_val = max(1, int(n_configs * validation_fraction))
|
|
2066
|
+
n_train = n_configs - n_val
|
|
2067
|
+
|
|
2068
|
+
# Create indices and shuffle them
|
|
2069
|
+
indices = list(range(n_configs))
|
|
2070
|
+
random.shuffle(indices)
|
|
2071
|
+
|
|
2072
|
+
# Split indices
|
|
2073
|
+
train_indices = indices[:n_train]
|
|
2074
|
+
val_indices = indices[n_train:]
|
|
2075
|
+
|
|
2076
|
+
# Create configuration lists
|
|
2077
|
+
train_configurations = [configurations[i] for i in train_indices]
|
|
2078
|
+
val_configurations = [configurations[i] for i in val_indices]
|
|
2079
|
+
|
|
2080
|
+
logging.info(f"Split {n_configs} configurations into {n_train} training and {n_val} validation")
|
|
2081
|
+
|
|
2082
|
+
return train_configurations, val_configurations
|
|
2083
|
+
|
|
2084
|
+
|
|
2085
|
+
def compute_validation_sdf(ff_optimize, val_configurations, bonds, angles, dihedrals, bond2params, n_processes=None):
|
|
2086
|
+
"""
|
|
2087
|
+
Compute SDF on validation set.
|
|
2088
|
+
|
|
2089
|
+
Args:
|
|
2090
|
+
ff_optimize (numpy.ndarray): Optimized force field parameters
|
|
2091
|
+
val_configurations (list): Validation configuration list
|
|
2092
|
+
bonds, angles, dihedrals (list): Interaction lists
|
|
2093
|
+
bond2params (dict): Parameter mapping
|
|
2094
|
+
n_processes (int, optional): Number of processes for parallel computation
|
|
2095
|
+
|
|
2096
|
+
Returns:
|
|
2097
|
+
tuple: (val_sdf, val_cost, val_residuals, val_ref_bonded)
|
|
2098
|
+
"""
|
|
2099
|
+
if not val_configurations:
|
|
2100
|
+
logging.warning("No validation configurations provided")
|
|
2101
|
+
return None, None, None, None
|
|
2102
|
+
|
|
2103
|
+
# Compute residuals on validation set
|
|
2104
|
+
val_residuals, val_ref_bonded = compute_ff_obj(ff_optimize, val_configurations,
|
|
2105
|
+
bonds, angles, dihedrals, bond2params, n_processes)
|
|
2106
|
+
|
|
2107
|
+
# Compute validation metrics
|
|
2108
|
+
val_cost = np.sum(val_residuals**2)
|
|
2109
|
+
val_sdf = np.sqrt(val_cost / np.sum(val_ref_bonded**2))
|
|
2110
|
+
|
|
2111
|
+
logging.info(f"Validation SDF: {val_sdf:.6f}")
|
|
2112
|
+
logging.info(f"Validation cost: {val_cost:.6f}")
|
|
2113
|
+
|
|
2114
|
+
return val_sdf, val_cost, val_residuals, val_ref_bonded
|