mimicpy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. mimicpy/__init__.py +1 -1
  2. mimicpy/__main__.py +726 -2
  3. mimicpy/_authors.py +2 -2
  4. mimicpy/_version.py +2 -2
  5. mimicpy/coords/__init__.py +1 -1
  6. mimicpy/coords/base.py +1 -1
  7. mimicpy/coords/cpmdgeo.py +1 -1
  8. mimicpy/coords/gro.py +1 -1
  9. mimicpy/coords/pdb.py +1 -1
  10. mimicpy/core/__init__.py +1 -1
  11. mimicpy/core/prepare.py +3 -3
  12. mimicpy/core/selector.py +1 -1
  13. mimicpy/force_matching/__init__.py +34 -0
  14. mimicpy/force_matching/bonded_forces.py +628 -0
  15. mimicpy/force_matching/compare_top.py +809 -0
  16. mimicpy/force_matching/dresp.py +435 -0
  17. mimicpy/force_matching/nonbonded_forces.py +32 -0
  18. mimicpy/force_matching/opt_ff.py +2114 -0
  19. mimicpy/force_matching/qm_region.py +1960 -0
  20. mimicpy/plugins/__main_installer__.py +76 -0
  21. mimicpy/{__main_vmd__.py → plugins/__main_vmd__.py} +2 -2
  22. mimicpy/plugins/pymol.py +56 -0
  23. mimicpy/plugins/vmd.tcl +78 -0
  24. mimicpy/scripts/__init__.py +1 -1
  25. mimicpy/scripts/cpmd.py +1 -1
  26. mimicpy/scripts/fm_input.py +265 -0
  27. mimicpy/scripts/fmdata.py +120 -0
  28. mimicpy/scripts/mdp.py +1 -1
  29. mimicpy/scripts/ndx.py +1 -1
  30. mimicpy/scripts/script.py +1 -1
  31. mimicpy/topology/__init__.py +1 -1
  32. mimicpy/topology/itp.py +603 -35
  33. mimicpy/topology/mpt.py +1 -1
  34. mimicpy/topology/top.py +254 -15
  35. mimicpy/topology/topol_dict.py +233 -4
  36. mimicpy/utils/__init__.py +1 -1
  37. mimicpy/utils/atomic_numbers.py +1 -1
  38. mimicpy/utils/constants.py +17 -3
  39. mimicpy/utils/elements.py +1 -1
  40. mimicpy/utils/errors.py +1 -1
  41. mimicpy/utils/file_handler.py +1 -1
  42. mimicpy/utils/strings.py +1 -1
  43. mimicpy-0.3.0.dist-info/METADATA +156 -0
  44. mimicpy-0.3.0.dist-info/RECORD +50 -0
  45. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
  46. mimicpy-0.3.0.dist-info/entry_points.txt +4 -0
  47. mimicpy-0.2.0.dist-info/METADATA +0 -86
  48. mimicpy-0.2.0.dist-info/RECORD +0 -38
  49. mimicpy-0.2.0.dist-info/entry_points.txt +0 -3
  50. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
  51. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
  52. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
  53. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,2114 @@
1
+ import numpy as np
2
+ import logging
3
+ from ..utils.errors import MiMiCPyError
4
+ from ..utils.constants import au_to_nm, kb_au2gmx, au_kjm, nm_to_au, kb_gmx2au, kjm_au, kb_au2g96, kb_g962au
5
+ from .bonded_forces import *
6
+ from .nonbonded_forces import *
7
+ from ..scripts.fm_input import FMInput
8
+ from scipy.optimize import least_squares
9
+ import MDAnalysis as mda
10
+ import multiprocessing as mp
11
+ import random
12
+
13
+
14
+ section_atom_counts = {
15
+ "bonds": 2,
16
+ "angles": 3,
17
+ "dihedrals": 4,
18
+ "pairs": 2,
19
+ }
20
+
21
+ class L2Regularizer:
22
+ """
23
+ L2 regularization for force field parameters.
24
+
25
+ Implements the harmonic penalty function:
26
+ Θ(p) = α * Σ_i (p_i - p_i^0)^2 / (2 * γ_i^2)
27
+
28
+ where:
29
+ - p_i are the current parameters
30
+ - p_i^0 are the initial/reference parameters
31
+ - γ_i are the prior widths (controls how much parameters can deviate)
32
+ - α is the regularization strength
33
+ """
34
+
35
+ def __init__(self, initial_params, prior_widths=None, alpha=0.1, param_types=None):
36
+ """
37
+ Initialize L2 regularizer.
38
+
39
+ Args:
40
+ initial_params (np.ndarray): Initial parameter values (p^0)
41
+ prior_widths (np.ndarray, optional): Prior widths (γ). If None, auto-generated
42
+ alpha (float): Regularization strength (α)
43
+ param_types (list, optional): List of parameter type strings for type-specific widths
44
+ """
45
+ self.initial_params = np.array(initial_params)
46
+ self.alpha = alpha
47
+ self.param_types = param_types
48
+
49
+ if prior_widths is None:
50
+ # Auto-generate prior widths based on parameter classes
51
+ self.prior_widths = self._auto_generate_prior_widths(initial_params)
52
+ else:
53
+ self.prior_widths = np.array(prior_widths)
54
+
55
+ # Ensure all arrays have same length
56
+ assert len(self.initial_params) == len(self.prior_widths), \
57
+ "Initial parameters and prior widths must have same length"
58
+
59
+ def _auto_generate_prior_widths(self, params):
60
+ """
61
+ Auto-generate prior widths based on parameter classes.
62
+ This is a simplified version - in practice you'd want to group by parameter type.
63
+ """
64
+ # For now, use a simple approach: 10% of parameter value or 0.1 if parameter is 0
65
+ widths = np.abs(params) * 0.05
66
+ widths[widths == 0] = 0.05 # Default width for zero parameters
67
+ return widths
68
+
69
+ def set_prior_widths_by_type(self, type_widths):
70
+ """
71
+ Set prior widths based on parameter types.
72
+
73
+ Args:
74
+ type_widths (dict): Dictionary mapping parameter types to widths
75
+ Example: {'bond_length': 0.1, 'bond_force': 50.0, 'angle_value': 0.1, ...}
76
+ """
77
+ if self.param_types is None:
78
+ raise ValueError("Parameter types must be set to use type-specific prior widths")
79
+
80
+ # Add safety check for array bounds
81
+ if len(self.param_types) != len(self.prior_widths):
82
+ raise MiMiCPyError(f"Parameter types length ({len(self.param_types)}) does not match prior widths length ({len(self.prior_widths)})")
83
+
84
+ for i, param_type in enumerate(self.param_types):
85
+ if param_type in type_widths:
86
+ self.prior_widths[i] = type_widths[param_type]
87
+
88
+ def set_prior_widths_from_fm_input(self, fm_input):
89
+ """
90
+ Set prior widths based on FMInput parameters.
91
+
92
+ Args:
93
+ fm_input: FMInput object containing regularization parameters
94
+ """
95
+ type_widths = {}
96
+
97
+ # Map FMInput parameters to type widths
98
+ if fm_input.regularization_bond_length_width is not None:
99
+ type_widths['bond_length'] = fm_input.regularization_bond_length_width
100
+ if fm_input.regularization_bond_force_width is not None:
101
+ type_widths['bond_force'] = fm_input.regularization_bond_force_width
102
+ if fm_input.regularization_angle_value_width is not None:
103
+ type_widths['angle_value'] = fm_input.regularization_angle_value_width
104
+ if fm_input.regularization_angle_force_width is not None:
105
+ type_widths['angle_force'] = fm_input.regularization_angle_force_width
106
+ if fm_input.regularization_dihedral_force_width is not None:
107
+ type_widths['dihedral_force'] = fm_input.regularization_dihedral_force_width
108
+
109
+ if type_widths:
110
+ self.set_prior_widths_by_type(type_widths)
111
+
112
+ def compute_regularization_term(self, current_params):
113
+ """
114
+ Compute the L2 regularization term.
115
+
116
+ Args:
117
+ current_params (np.ndarray): Current parameter values
118
+
119
+ Returns:
120
+ float: Regularization penalty
121
+ """
122
+ diff = current_params - self.initial_params
123
+ penalty = np.sum(diff**2 / (2 * self.prior_widths**2))
124
+ return self.alpha * penalty
125
+
126
+ def compute_regularization_gradient(self, current_params):
127
+ """
128
+ Compute the gradient of the L2 regularization term.
129
+
130
+ Args:
131
+ current_params (np.ndarray): Current parameter values
132
+
133
+ Returns:
134
+ np.ndarray: Gradient of regularization term
135
+ """
136
+ diff = current_params - self.initial_params
137
+ gradient = diff / self.prior_widths**2
138
+ return self.alpha * gradient
139
+
140
+ def compute_regularization_residuals(self, current_params):
141
+ """
142
+ Compute regularization residuals for augmented residual approach.
143
+
144
+ This method returns residuals that can be appended to the force matching
145
+ residuals for use with scipy.least_squares Levenberg-Marquardt method.
146
+
147
+ Args:
148
+ current_params (np.ndarray): Current parameter values
149
+
150
+ Returns:
151
+ np.ndarray: Regularization residuals
152
+ """
153
+ diff = current_params - self.initial_params
154
+ # Scale the residuals by sqrt(alpha/2) and prior widths to match the regularization term
155
+ # The regularization term is: α * Σ(p_i - p_i^0)² / (2 * γ_i²)
156
+ # So the residuals should be: √(α/2) * (p_i - p_i^0) / γ_i
157
+ residuals = np.sqrt(self.alpha / 2) * diff / self.prior_widths
158
+ return residuals
159
+
160
+ class AdaptiveL2Regularizer:
161
+ """
162
+ Adaptive L2 regularization that applies different regularization strengths
163
+ based on the energy hierarchy of interactions.
164
+
165
+ Bonds (highest energy) get stronger regularization to prevent overfitting,
166
+ while dihedrals (lowest energy) get weaker regularization to allow more flexibility.
167
+ """
168
+
169
+ def __init__(self, initial_params, bond2params, bonds, angles, dihedrals,
170
+ base_alpha=0.1, energy_hierarchy_scale=10.0, param_types=None):
171
+ """
172
+ Initialize adaptive regularizer.
173
+
174
+ Args:
175
+ initial_params (np.ndarray): Initial parameter values
176
+ bond2params (dict): Mapping of interaction indices to parameter indices
177
+ bonds, angles, dihedrals (list): Lists of interactions
178
+ base_alpha (float): Base regularization strength
179
+ energy_hierarchy_scale (float): Scale factor for energy hierarchy
180
+ param_types (list, optional): List of parameter type strings
181
+ """
182
+ self.initial_params = np.array(initial_params)
183
+ self.base_alpha = base_alpha
184
+ self.energy_hierarchy_scale = energy_hierarchy_scale
185
+ self.param_types = param_types
186
+
187
+ # Create parameter masks for each interaction type
188
+ self.bond_params_mask = np.zeros(len(initial_params), dtype=bool)
189
+ self.angle_params_mask = np.zeros(len(initial_params), dtype=bool)
190
+ self.dihedral_params_mask = np.zeros(len(initial_params), dtype=bool)
191
+
192
+ # Identify which parameters belong to which interaction types
193
+ for bond in bonds:
194
+ if bond.get('optimize', False):
195
+ param_indices = bond2params.get(bond['index'], [])
196
+ for idx in param_indices:
197
+ if idx is not None:
198
+ self.bond_params_mask[idx] = True
199
+
200
+ for angle in angles:
201
+ if angle.get('optimize', False):
202
+ param_indices = bond2params.get(angle['index'], [])
203
+ for idx in param_indices:
204
+ if idx is not None:
205
+ self.angle_params_mask[idx] = True
206
+
207
+ for dihedral in dihedrals:
208
+ if dihedral.get('optimize', False):
209
+ param_indices = bond2params.get(dihedral['index'], [])
210
+ for idx in param_indices:
211
+ if idx is not None:
212
+ self.dihedral_params_mask[idx] = True
213
+
214
+ # Generate adaptive prior widths based on energy hierarchy
215
+ self.prior_widths = self._generate_adaptive_prior_widths(initial_params)
216
+
217
+ # Generate adaptive alpha values for each parameter
218
+ self.adaptive_alphas = self._generate_adaptive_alphas()
219
+
220
+ def _generate_adaptive_prior_widths(self, params):
221
+ """Generate prior widths that respect energy hierarchy."""
222
+ widths = np.abs(params) * 0.1
223
+ widths[widths == 0] = 0.1 # Default width for zero parameters
224
+
225
+ # Apply energy hierarchy scaling
226
+ # Bonds: tighter regularization (smaller widths)
227
+ widths[self.bond_params_mask] *= 0.1
228
+
229
+ # Angles: medium regularization
230
+ widths[self.angle_params_mask] *= 0.5
231
+
232
+ # Dihedrals: looser regularization (larger widths)
233
+ widths[self.dihedral_params_mask] *= 2.0
234
+
235
+ return widths
236
+
237
+ def _generate_adaptive_alphas(self):
238
+ """Generate adaptive alpha values based on energy hierarchy."""
239
+ alphas = np.full(len(self.initial_params), self.base_alpha)
240
+
241
+ # Bonds: stronger regularization (higher alpha)
242
+ alphas[self.bond_params_mask] *= self.energy_hierarchy_scale
243
+
244
+ # Angles: medium regularization
245
+ alphas[self.angle_params_mask] *= np.sqrt(self.energy_hierarchy_scale)
246
+
247
+ # Dihedrals: weaker regularization (lower alpha)
248
+ alphas[self.dihedral_params_mask] *= 1.0 / self.energy_hierarchy_scale
249
+
250
+ return alphas
251
+
252
+ def compute_regularization_term(self, current_params):
253
+ """Compute adaptive regularization term."""
254
+ diff = current_params - self.initial_params
255
+ penalty = np.sum(self.adaptive_alphas * diff**2 / (2 * self.prior_widths**2))
256
+ return penalty
257
+
258
+ def compute_regularization_gradient(self, current_params):
259
+ """Compute gradient of adaptive regularization term."""
260
+ diff = current_params - self.initial_params
261
+ gradient = self.adaptive_alphas * diff / self.prior_widths**2
262
+ return gradient
263
+
264
+ def compute_regularization_residuals(self, current_params):
265
+ """Compute adaptive regularization residuals."""
266
+ diff = current_params - self.initial_params
267
+ # Scale residuals by sqrt(adaptive_alpha/2) and prior widths
268
+ residuals = np.sqrt(self.adaptive_alphas / 2) * diff / self.prior_widths
269
+ return residuals
270
+
271
+ class ParameterOptimizer:
272
+ def __init__(self, eq_mapping):
273
+ self.eq_mapping = eq_mapping
274
+ self.ff_optimize = []
275
+ self.bond2params = {}
276
+ self.param_types = [] # Track parameter types as they're added
277
+ self.counter = 0
278
+ self._selected_interactions = {
279
+ 'bonds': set(),
280
+ 'angles': set(),
281
+ 'dihedrals': set()
282
+ }
283
+ # Store interactions for equivalence checking
284
+ self._bonds = []
285
+ self._angles = []
286
+ self._dihedrals = []
287
+
288
+ def select_interactions(self, interaction_type, indices=None, all=False):
289
+ """
290
+ Select specific interactions for optimization.
291
+
292
+ Args:
293
+ interaction_type (str): Type of interaction ('bonds', 'angles', or 'dihedrals')
294
+ indices (list, optional): List of interaction indices to select. If None, selects all.
295
+ all (bool, optional): If True, selects all interactions of the given type.
296
+ """
297
+ if interaction_type not in self._selected_interactions:
298
+ raise ValueError(f"Invalid interaction type: {interaction_type}")
299
+
300
+ if all:
301
+ self._selected_interactions[interaction_type] = set()
302
+ elif indices is not None:
303
+ self._selected_interactions[interaction_type] = set(indices)
304
+ else:
305
+ self._selected_interactions[interaction_type] = set()
306
+
307
+ def is_selected(self, interaction_type, index):
308
+ """Check if an interaction is selected for optimization."""
309
+ if not self._selected_interactions[interaction_type]:
310
+ return True # If no specific selections, optimize all
311
+ return index in self._selected_interactions[interaction_type]
312
+
313
+ def optimize_bond(self, bond, optimize_length=True, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
314
+
315
+ if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in bond['atoms']):
316
+ bond['optimize'] = False
317
+ self.bond2params[bond['index']] = [None, None]
318
+ return
319
+ if not self.is_selected('bonds', bond['index']):
320
+ bond['optimize'] = False
321
+ self.bond2params[bond['index']] = [None, None]
322
+ return
323
+ if bond['parameters'][1] == 0.0:
324
+ idx = [None, None]
325
+ self.bond2params[bond['index']] = idx
326
+ bond['optimize'] = False
327
+ return
328
+ bond['optimize'] = True
329
+ params_to_optimize = []
330
+ param_types = []
331
+ # Always maintain the order: [length, force_constant]
332
+ if optimize_length:
333
+ params_to_optimize.append(bond['parameters'][0])
334
+ param_types.append('bond_length')
335
+ else:
336
+ params_to_optimize.append(None)
337
+ param_types.append(None)
338
+ if optimize_force_constant:
339
+ params_to_optimize.append(bond['parameters'][1])
340
+ param_types.append('bond_force')
341
+ else:
342
+ params_to_optimize.append(None)
343
+ param_types.append(None)
344
+ self._add_parameters(bond, params_to_optimize, param_types)
345
+ self._bonds.append(bond)
346
+
347
+ def optimize_angle(self, angle, optimize_angle=True, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
348
+ if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in angle['atoms']):
349
+ angle['optimize'] = False
350
+ self.bond2params[angle['index']] = [None, None]
351
+ return
352
+ if not self.is_selected('angles', angle['index']):
353
+ angle['optimize'] = False
354
+ self.bond2params[angle['index']] = [None, None]
355
+ return
356
+ if angle['parameters'][1] == 0.0:
357
+ idx = [None, None]
358
+ self.bond2params[angle['index']] = idx
359
+ angle['optimize'] = False
360
+ return
361
+ angle['optimize'] = True
362
+ params_to_optimize = []
363
+ param_types = []
364
+ # Always maintain the order: [angle_value, force_constant]
365
+ if optimize_angle:
366
+ params_to_optimize.append(angle['parameters'][0])
367
+ param_types.append('angle_value')
368
+ else:
369
+ params_to_optimize.append(None)
370
+ param_types.append(None)
371
+ if optimize_force_constant:
372
+ params_to_optimize.append(angle['parameters'][1])
373
+ param_types.append('angle_force')
374
+ else:
375
+ params_to_optimize.append(None)
376
+ param_types.append(None)
377
+ self._add_parameters(angle, params_to_optimize, param_types)
378
+ self._angles.append(angle)
379
+
380
+ def optimize_dihedral(self, dihedral, optimize_force_constant=True, hydrogen_indices=None, exclude_hydrogen=False):
381
+ if exclude_hydrogen and hydrogen_indices is not None and any(atom in hydrogen_indices for atom in dihedral['atoms']):
382
+ dihedral['optimize'] = False
383
+ if dihedral['function'] in [1, 4, 9]:
384
+ self.bond2params[dihedral['index']] = [None, None, None]
385
+ elif dihedral['function'] == 2:
386
+ self.bond2params[dihedral['index']] = [None, None]
387
+ elif dihedral['function'] == 3:
388
+ self.bond2params[dihedral['index']] = [None] * 6
389
+ return
390
+ if not self.is_selected('dihedrals', dihedral['index']):
391
+ dihedral['optimize'] = False
392
+ if dihedral['function'] in [1, 4, 9]:
393
+ self.bond2params[dihedral['index']] = [None, None, None]
394
+ elif dihedral['function'] == 2:
395
+ self.bond2params[dihedral['index']] = [None, None]
396
+ elif dihedral['function'] == 3:
397
+ self.bond2params[dihedral['index']] = [None] * 6
398
+ return
399
+ dihedral['optimize'] = True
400
+ params_to_optimize = []
401
+ param_types = []
402
+ if dihedral['function'] in [1, 4, 9]: # Format 1
403
+ if dihedral['parameters'][1] == 0.0: # Check force constant
404
+ self.bond2params[dihedral['index']] = [None, None, None]
405
+ dihedral['optimize'] = False
406
+ return
407
+ params_to_optimize = [None, None, None] # [phi0, cp, mult]
408
+ param_types = [None, None, None]
409
+ if optimize_force_constant:
410
+ params_to_optimize[1] = dihedral['parameters'][1] # cp
411
+ param_types[1] = 'dihedral_force'
412
+ elif dihedral['function'] == 2: # Format 2
413
+ if dihedral['parameters'][1] == 0.0: # Check force constant
414
+ self.bond2params[dihedral['index']] = [None, None]
415
+ dihedral['optimize'] = False
416
+ return
417
+ params_to_optimize = [None, None] # [param1, param2]
418
+ param_types = [None, None]
419
+ if optimize_force_constant:
420
+ params_to_optimize[1] = dihedral['parameters'][1] # param2
421
+ param_types[1] = 'dihedral_force'
422
+ elif dihedral['function'] == 3: # Format 3 (Ryckaert-Bellemans)
423
+ if all(p == 0.0 for p in dihedral['parameters']):
424
+ self.bond2params[dihedral['index']] = [None] * 6
425
+ dihedral['optimize'] = False
426
+ return
427
+ params_to_optimize = [None] * 6 # [C0, C1, C2, C3, C4, C5]
428
+ param_types = [None] * 6
429
+ if optimize_force_constant:
430
+ for i, param in enumerate(dihedral['parameters']):
431
+ if param != 0.0:
432
+ params_to_optimize[i] = param
433
+ param_types[i] = 'dihedral_force'
434
+ self._add_parameters(dihedral, params_to_optimize, param_types)
435
+ self._dihedrals.append(dihedral)
436
+
437
+ def _add_parameters(self, interaction, params_to_optimize, param_types_to_add=None):
438
+ # Check if equivalent interaction already exists
439
+ for existing_idx, existing_params in self.bond2params.items():
440
+ if self._is_equivalent(interaction, existing_idx):
441
+ self.bond2params[interaction['index']] = existing_params
442
+ return
443
+
444
+ # Determine the expected structure based on interaction type
445
+ n_atoms = len(interaction['atoms'])
446
+ if n_atoms == 2: # bond
447
+ expected_length = 2
448
+ elif n_atoms == 3: # angle
449
+ expected_length = 2
450
+ elif n_atoms == 4: # dihedral
451
+ if interaction['function'] in [1, 4, 9]:
452
+ expected_length = 3
453
+ elif interaction['function'] == 2:
454
+ expected_length = 2
455
+ elif interaction['function'] == 3:
456
+ expected_length = 6
457
+ else:
458
+ expected_length = 3 # default
459
+ else:
460
+ expected_length = 2 # default
461
+
462
+ # Initialize param_indices with None values
463
+ param_indices = [None] * expected_length
464
+
465
+ # Add parameters that are being optimized
466
+ for i, param in enumerate(params_to_optimize):
467
+ if param is not None:
468
+ self.ff_optimize.append(param)
469
+ param_indices[i] = len(self.ff_optimize) - 1
470
+ # Add parameter type if provided
471
+ if param_types_to_add and i < len(param_types_to_add) and param_types_to_add[i] is not None:
472
+ self.param_types.append(param_types_to_add[i])
473
+ # If param is None, param_indices[i] remains None
474
+
475
+ self.bond2params[interaction['index']] = param_indices
476
+
477
+ def _is_equivalent(self, interaction1, interaction2_idx):
478
+ """Check if two interactions are equivalent based on their atoms and function type.
479
+
480
+ Args:
481
+ interaction1 (dict): First interaction to compare
482
+ interaction2_idx (int): Index of second interaction in bond2params
483
+
484
+ Returns:
485
+ bool: True if interactions are equivalent, False otherwise
486
+ """
487
+ # Get the second interaction from the stored interactions
488
+ interaction2 = None
489
+ for interaction_type in ['bonds', 'angles', 'dihedrals']:
490
+ for interaction in getattr(self, f'_{interaction_type}', []):
491
+ if interaction['index'] == interaction2_idx:
492
+ interaction2 = interaction
493
+ break
494
+ if interaction2:
495
+ break
496
+
497
+ if not interaction2:
498
+ return False
499
+
500
+ # Check if function types match
501
+ if interaction1['function'] != interaction2['function']:
502
+ return False
503
+
504
+ # Determine interaction type based on number of atoms
505
+ n_atoms1 = len(interaction1['atoms'])
506
+ n_atoms2 = len(interaction2['atoms'])
507
+
508
+ if n_atoms1 != n_atoms2:
509
+ return False
510
+
511
+ # Check equivalence based on number of atoms
512
+ if n_atoms1 == 2: # bonds
513
+ return check_bond_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
514
+ elif n_atoms1 == 3: # angles
515
+ return check_angle_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
516
+ elif n_atoms1 == 4: # dihedrals
517
+ return check_dihedral_equivalence(interaction1['atoms'], interaction2['atoms'], self.eq_mapping)
518
+ return False
519
+
520
+ def get_optimized_parameters(self):
521
+ return np.array(self.ff_optimize), self.bond2params, self.param_types
522
+
523
+
524
+
525
+ def get_configurations_optff(fmdata, tpr_file, trr_file, begin, end, step,
526
+ qm_region):
527
+ """
528
+ Get configurations for force field optimization.
529
+
530
+ Args:
531
+ fmdata (FMDataset): FMDataset object containing QM data (assumed to use CPMD IDs).
532
+ tpr_file (str): GROMACS TPR file.
533
+ trr_file (str): GROMACS TRR file.
534
+ begin (int): Starting frame.
535
+ end (int): Ending frame.
536
+ step (int): Step size.
537
+ qm_region (QMRegion): QMRegion object to get GROMACS to CPMD mapping.
538
+
539
+ Returns:
540
+ list: List of configuration dictionaries with QM data reordered to match
541
+ GROMACS topology order, including MM atoms bonded to QM atoms.
542
+ """
543
+ u = mda.Universe(tpr_file, trr_file)
544
+ configurations = []
545
+
546
+ # Get GROMACS to CPMD mapping (CPMD IDs are 1-based)
547
+ gmx_to_cpmd_map = qm_region.gmx_to_cpmd_map
548
+ qm_atoms_gmx_indices = qm_region.qm_atoms.index
549
+
550
+ # Get the list of CPMD IDs from FMdata for the first configuration (assuming it's constant)
551
+ # These are assumed to be 1-based CPMD IDs.
552
+ cpmd_ids_fmdata_order = fmdata.get_configuration_properties(begin, 'id', 'qm')
553
+
554
+
555
+ # Create mapping from FMdata's CPMD-based order to the desired GROMACS topology order
556
+ # fm_to_top_order[i] will be the index in cpmd_ids_fmdata_order (and thus in fm_coords/fm_forces)
557
+ # that corresponds to the i-th GROMACS atom in qm_atoms_gmx_indices.
558
+ fm_to_top_order = np.zeros(len(qm_atoms_gmx_indices), dtype=int)
559
+ for i, gmx_idx in enumerate(qm_atoms_gmx_indices):
560
+ target_cpmd_id = gmx_to_cpmd_map.get(gmx_idx)
561
+ if target_cpmd_id is None:
562
+ raise ValueError(f"GROMACS atom {gmx_idx} not found in gmx_to_cpmd_map.")
563
+
564
+ fm_idx_arr = np.where(cpmd_ids_fmdata_order == target_cpmd_id)[0]
565
+ if len(fm_idx_arr) > 0:
566
+ fm_to_top_order[i] = fm_idx_arr[0]
567
+ else:
568
+ raise ValueError(
569
+ f"CPMD ID {target_cpmd_id} (for GMX atom {gmx_idx}) "
570
+ f"not found in FMdata's list of CPMD IDs: {cpmd_ids_fmdata_order}"
571
+ )
572
+ if not qm_region.boundary_atoms.empty:
573
+ gmx_ids_mm_order = fmdata.get_configuration_properties(begin, 'id', 'mm')
574
+ # Create mapping from GROMACS IDs to indices in MM coordinates
575
+ gmx_to_mm_idx = {gmx_id: idx for idx, gmx_id in enumerate(gmx_ids_mm_order)}
576
+ for idx in range(begin, end, step):
577
+ config = dict()
578
+
579
+ # Get QM data from FMdata
580
+ fm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'qm')
581
+ fm_forces = fmdata.get_configuration_properties(idx, 'force', 'qm')
582
+
583
+ # Reorder coordinates and forces to match GROMACS topology order
584
+ config['qm_coordinates'] = fm_coords[fm_to_top_order]
585
+ config['qm_forces'] = fm_forces[fm_to_top_order]
586
+
587
+ # Get MM coordinates from FMData
588
+ fm_mm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'mm')
589
+
590
+ # Get extended QM atoms DataFrame
591
+ extended_df = qm_region.extended_qm_atoms
592
+
593
+ # Step 1: Get QM atom coordinates (already have them in config['qm_coordinates'])
594
+ qm_coords = config['qm_coordinates']
595
+
596
+ # Step 2: Get MM atom coordinates
597
+ mm_atoms = extended_df[extended_df['is_qm'] == 0] # MM atoms
598
+ mm_coords = np.zeros((len(mm_atoms), 3))
599
+
600
+ for mm_idx, (gmx_idx, _) in enumerate(mm_atoms.iterrows()):
601
+ if gmx_idx in gmx_to_mm_idx:
602
+ fm_mm_idx = gmx_to_mm_idx[gmx_idx]
603
+ mm_coords[mm_idx] = fm_mm_coords[fm_mm_idx]
604
+ else:
605
+ logging.warning(f'Could not find MM atom {gmx_idx} in FMData MM coordinates')
606
+
607
+ # Step 3: Concatenate QM and MM coordinates to create extended coordinates
608
+ extended_coords = np.vstack([qm_coords, mm_coords])
609
+
610
+ # Get GROMACS forces for extended QM atom set
611
+ forces, positions = get_qm_gmx_forces(u, idx, qm_atoms_gmx_indices)
612
+
613
+ config['qm_gmx_forces'] = forces
614
+ config['qm_coordinates'] = extended_coords
615
+
616
+ configurations.append(config)
617
+ else:
618
+ for idx in range(begin, end, step):
619
+ config = dict()
620
+
621
+ # Get QM data from FMdata
622
+ fm_coords = fmdata.get_configuration_properties(idx, 'coordinate', 'qm')
623
+ fm_forces = fmdata.get_configuration_properties(idx, 'force', 'qm')
624
+
625
+ # Reorder coordinates and forces to match GROMACS topology order
626
+ config['qm_coordinates'] = fm_coords[fm_to_top_order]
627
+ config['qm_forces'] = fm_forces[fm_to_top_order]
628
+
629
+ # Get GROMACS forces and positions (already in GROMACS topology order via qm_atoms_gmx_indices)
630
+ forces, positions = get_qm_gmx_forces(u, idx, qm_atoms_gmx_indices)
631
+ config['qm_gmx_forces'] = forces
632
+ config['qm_gmx_coordinates'] = positions
633
+
634
+ configurations.append(config)
635
+ return configurations
636
+
637
+
638
+ def compute_residue(ff_optimize,
639
+ qmmm_forces,
640
+ gmx_force, qm_coordinates,
641
+ bonds,
642
+ angles,
643
+ dihedrals,
644
+ bond2params):
645
+
646
+
647
+ # Compute bonded forces for the extended set of atoms
648
+ # Pass qm_atoms_count to get only QM atom forces
649
+ qm_bonded_forces = compute_bonded_forces(ff_optimize, qm_coordinates,
650
+ bonds, angles, dihedrals, bond2params,
651
+ qm_atoms_count=len(qmmm_forces))
652
+
653
+ reference_bonded_force = qmmm_forces - gmx_force
654
+ residue = qm_bonded_forces - reference_bonded_force
655
+
656
+ return residue, reference_bonded_force
657
+
658
+
659
+ def _process_single_configuration(args):
660
+ """
661
+ Process a single configuration for parallel computation.
662
+
663
+ Args:
664
+ args: Tuple containing (ff_optimize, config, bonds, angles, dihedrals, bond2params)
665
+
666
+ Returns:
667
+ tuple: (residual, ref_bonded_force)
668
+ """
669
+ ff_optimize, config, bonds, angles, dihedrals, bond2params = args
670
+
671
+ qmmm_forces = config['qm_forces']
672
+ residual, ref_bonded = compute_residue(ff_optimize,
673
+ qmmm_forces,
674
+ config["qm_gmx_forces"],
675
+ config["qm_coordinates"],
676
+ bonds,
677
+ angles,
678
+ dihedrals,
679
+ bond2params)
680
+
681
+ return residual, ref_bonded
682
+
683
+
684
+ def compute_ff_obj(ff_optimize, configurations,
685
+ bonds, angles, dihedrals, bond2params,
686
+ n_processes=None):
687
+ """
688
+ Compute force field objective function with optional parallelization.
689
+
690
+ Args:
691
+ ff_optimize (numpy.ndarray): Force field parameters to optimize
692
+ configurations (list): List of configuration dictionaries
693
+ bonds, angles, dihedrals (list): Interaction lists
694
+ bond2params (dict): Parameter mapping
695
+ n_processes (int, optional): Number of processes to use. If None, uses serial processing
696
+
697
+ Returns:
698
+ tuple: (total_residual, ref_bonded_forces)
699
+ """
700
+ if type(configurations) != list:
701
+ configurations = [configurations]
702
+
703
+ # Use parallel processing if requested and beneficial
704
+ if n_processes is not None and n_processes > 1 and len(configurations) > 1:
705
+ return _compute_ff_obj_parallel(ff_optimize, configurations,
706
+ bonds, angles, dihedrals, bond2params, n_processes)
707
+ else:
708
+ return _compute_ff_obj_serial(ff_optimize, configurations,
709
+ bonds, angles, dihedrals, bond2params)
710
+
711
+
712
+ def _compute_ff_obj_serial(ff_optimize, configurations,
713
+ bonds, angles, dihedrals, bond2params):
714
+ """Serial version of compute_ff_obj (original implementation)."""
715
+ total_residual = []
716
+ ref_bonded_forces = []
717
+ for config in configurations:
718
+ qmmm_forces = config['qm_forces']
719
+
720
+ residual, ref_bonded = compute_residue(ff_optimize,
721
+ qmmm_forces,
722
+ config["qm_gmx_forces"],
723
+ config["qm_coordinates"],
724
+ bonds,
725
+ angles,
726
+ dihedrals,
727
+ bond2params)
728
+
729
+ ref_bonded_forces.append(ref_bonded)
730
+ total_residual.append(residual)
731
+ return np.vstack(total_residual), np.vstack(ref_bonded_forces)
732
+
733
+
734
+ def _compute_ff_obj_parallel(ff_optimize, configurations,
735
+ bonds, angles, dihedrals, bond2params, n_processes):
736
+ """Parallel version of compute_ff_obj using multiprocessing."""
737
+ # Prepare arguments for parallel processing
738
+ args_list = [(ff_optimize, config, bonds, angles, dihedrals, bond2params)
739
+ for config in configurations]
740
+
741
+ # Use multiprocessing pool
742
+ with mp.Pool(processes=n_processes) as pool:
743
+ results = pool.map(_process_single_configuration, args_list)
744
+
745
+ # Unpack results
746
+ total_residual = [result[0] for result in results]
747
+ ref_bonded_forces = [result[1] for result in results]
748
+
749
+ return np.vstack(total_residual), np.vstack(ref_bonded_forces)
750
+
751
+
752
+ def check_bond_equivalence(bond1, bond2, eq_mapping):
753
+ eq = False
754
+ a1 = eq_mapping.get(bond1[0])
755
+ b1 = eq_mapping.get(bond1[1])
756
+ a2 = eq_mapping.get(bond2[0])
757
+ b2 = eq_mapping.get(bond2[1])
758
+
759
+ if a1 is not None and b1 is not None and a2 is not None and b2 is not None:
760
+ if (a1 == a2 and b1 == b2) or (a1 == b2 and b1 == a2):
761
+ eq = True
762
+ return eq
763
+
764
+
765
+ def check_angle_equivalence(bond1, bond2, eq_mapping):
766
+ eq = False
767
+ a1 = eq_mapping.get(bond1[0])
768
+ b1 = eq_mapping.get(bond1[1])
769
+ c1 = eq_mapping.get(bond1[2])
770
+ a2 = eq_mapping.get(bond2[0])
771
+ b2 = eq_mapping.get(bond2[1])
772
+ c2 = eq_mapping.get(bond2[2])
773
+
774
+ if b1 == b2:
775
+ if (a1 == a2 and c1 == c2) or (a1 == c2 and c1 == a2):
776
+ eq = True
777
+ return eq
778
+
779
+
780
+ def check_dihedral_equivalence(bond1, bond2, eq_mapping):
781
+ eq = False
782
+ a1 = eq_mapping.get(bond1[0])
783
+ b1 = eq_mapping.get(bond1[1])
784
+ c1 = eq_mapping.get(bond1[2])
785
+ d1 = eq_mapping.get(bond1[3])
786
+ a2 = eq_mapping.get(bond2[0])
787
+ b2 = eq_mapping.get(bond2[1])
788
+ c2 = eq_mapping.get(bond2[2])
789
+ d2 = eq_mapping.get(bond2[3])
790
+
791
+ if ((a1 == a2 and b1 == b2 and c1 == c2 and d1 == d2) or
792
+ (a1 == d2 and b1 == c2 and c1 == b2 and d1 == a2)):
793
+ eq = True
794
+
795
+ return eq
796
+
797
+
798
+ def get_optimize_ff_parameters(qm_region, eq_mapping, fm_input=None):
799
+ """
800
+ Get optimized force field parameters with fine-grained control over which interactions to optimize.
801
+
802
+ Args:
803
+ qm_region (QMRegion): QMRegion object containing QM interactions
804
+ eq_mapping (dict): Mapping of equivalent atoms
805
+ fm_input (FMInput, optional): FMInput object containing all optimization and regularization parameters
806
+
807
+ Returns:
808
+ tuple: (ff_optimize, bond2params, regularizer) where ff_optimize is array of parameters to optimize,
809
+ bond2params maps interaction indices to parameter indices, and regularizer is the L2Regularizer object
810
+ """
811
+ # Get interactions from QMRegion
812
+ if qm_region.qm_interactions is None:
813
+ qm_interactions = qm_region.extract_qm_interactions()
814
+ else:
815
+ qm_interactions = qm_region.qm_interactions
816
+ bonds = qm_interactions['bonds']
817
+ angles = qm_interactions['angles']
818
+ dihedrals = qm_interactions['dihedrals']
819
+
820
+ # atoms indices are 0-based in the QMRegion
821
+ hydrogen_indices = set(qm_region.qm_atoms[qm_region.qm_atoms['element'].str.startswith('H')].index - 1)
822
+ optimizer = ParameterOptimizer(eq_mapping)
823
+
824
+ # Optional: skip optimization for solvent molecules
825
+ skip_solvent_optimization = getattr(fm_input, 'skip_solvent_optimization', True)
826
+
827
+ # Process bonds
828
+ for bond in bonds:
829
+ if skip_solvent_optimization and qm_region.is_solvent_interaction(bond):
830
+ bond['optimize'] = False
831
+ optimizer.bond2params[bond['index']] = [None, None]
832
+ continue
833
+ # Skip optimization if bond involves boundary atoms (already marked in extract_qm_interactions)
834
+ if bond.get('involves_boundary', False):
835
+ bond['optimize'] = False
836
+ optimizer.bond2params[bond['index']] = [None, None]
837
+ continue
838
+
839
+ optimizer.optimize_bond(
840
+ bond,
841
+ fm_input.optimize_bond_length,
842
+ fm_input.optimize_bond_force,
843
+ hydrogen_indices=hydrogen_indices,
844
+ exclude_hydrogen=fm_input.exclude_hydrogen_bonds
845
+ )
846
+
847
+ # Process angles
848
+ for angle in angles:
849
+ if skip_solvent_optimization and qm_region.is_solvent_interaction(angle):
850
+ angle['optimize'] = False
851
+ optimizer.bond2params[angle['index']] = [None, None]
852
+ continue
853
+ # Skip optimization if angle involves boundary atoms (already marked in extract_qm_interactions)
854
+ if angle.get('involves_boundary', False):
855
+ angle['optimize'] = False
856
+ optimizer.bond2params[angle['index']] = [None, None]
857
+ continue
858
+
859
+ optimizer.optimize_angle(
860
+ angle,
861
+ fm_input.optimize_angle_value,
862
+ fm_input.optimize_angle_force,
863
+ hydrogen_indices=hydrogen_indices,
864
+ exclude_hydrogen=fm_input.exclude_hydrogen_angles
865
+ )
866
+
867
+ # Process dihedrals
868
+ for dihedral in dihedrals:
869
+ if skip_solvent_optimization and qm_region.is_solvent_interaction(dihedral):
870
+ dihedral['optimize'] = False
871
+ if dihedral['function'] in [1, 4, 9]:
872
+ optimizer.bond2params[dihedral['index']] = [None, None, None]
873
+ elif dihedral['function'] == 2:
874
+ optimizer.bond2params[dihedral['index']] = [None, None]
875
+ elif dihedral['function'] == 3:
876
+ optimizer.bond2params[dihedral['index']] = [None] * 6
877
+ continue
878
+ # Skip optimization if dihedral involves boundary atoms (already marked in extract_qm_interactions)
879
+ if dihedral.get('involves_boundary', False):
880
+ dihedral['optimize'] = False
881
+ if dihedral['function'] in [1, 4, 9]:
882
+ optimizer.bond2params[dihedral['index']] = [None, None, None]
883
+ elif dihedral['function'] == 2:
884
+ optimizer.bond2params[dihedral['index']] = [None, None]
885
+ elif dihedral['function'] == 3:
886
+ optimizer.bond2params[dihedral['index']] = [None] * 6
887
+ continue
888
+
889
+ optimizer.optimize_dihedral(
890
+ dihedral,
891
+ fm_input.optimize_dihedral_force,
892
+ hydrogen_indices=hydrogen_indices,
893
+ exclude_hydrogen=fm_input.exclude_hydrogen_dihedrals
894
+ )
895
+
896
+ ff_optimize, bond2params, param_types = optimizer.get_optimized_parameters()
897
+
898
+ # Setup regularization if parameters are being optimized
899
+ if len(ff_optimize) > 0:
900
+ # Parameter types are now generated during optimization in the ParameterOptimizer
901
+ # This ensures consistency between parameter optimization and type assignment
902
+ logging.debug(f"Generated {len(param_types)} parameter types for {len(ff_optimize)} parameters")
903
+ if len(param_types) <= 20: # Only log if not too many
904
+ logging.debug(f"Parameter types: {param_types}")
905
+ else:
906
+ logging.debug(f"Parameter types (first 10): {param_types[:10]}...")
907
+
908
+ # Log parameter type distribution
909
+ type_counts = {}
910
+ for pt in param_types:
911
+ type_counts[pt] = type_counts.get(pt, 0) + 1
912
+ logging.debug(f"Parameter type distribution: {type_counts}")
913
+
914
+ if fm_input.regularization:
915
+ # Create regularizer with parameter types
916
+ regularizer = L2Regularizer(ff_optimize, None, fm_input.regularization_alpha, param_types)
917
+
918
+ # Set type-specific prior widths from FMInput
919
+ regularizer.set_prior_widths_from_fm_input(fm_input)
920
+ else:
921
+ regularizer = None
922
+
923
+ else:
924
+ # No parameters to optimize
925
+ raise MiMiCPyError("No parameters to optimize")
926
+
927
+ return ff_optimize, bond2params, regularizer
928
+
929
+
930
+ def compute_obj_lm(ff_optimize,
931
+ configurations,
932
+ bonds, angles, dihedrals, bond2params,
933
+ *args):
934
+
935
+ # Extract parallelization settings from args
936
+ n_processes = None
937
+ for arg in args:
938
+ if isinstance(arg, dict) and 'n_processes' in arg:
939
+ n_processes = arg.get('n_processes')
940
+ break
941
+
942
+ residue, ref_bonded = compute_ff_obj(ff_optimize, configurations,
943
+ bonds, angles, dihedrals, bond2params, n_processes)
944
+
945
+ # Extract filename and regularization from args
946
+ filename = args[0] if len(args) > 0 else "output.txt"
947
+ regularization = None
948
+
949
+ # Look for regularization object in args
950
+ for arg in args:
951
+ if hasattr(arg, 'compute_regularization_residuals'):
952
+ regularization = arg
953
+ break
954
+
955
+ # For Levenberg-Marquardt, we need to augment the residual vector with regularization
956
+ if regularization is not None:
957
+ # Compute regularization residuals
958
+ reg_residuals = regularization.compute_regularization_residuals(ff_optimize)
959
+ # Augment the residual vector
960
+ augmented_residuals = np.concatenate([residue.ravel(), reg_residuals])
961
+ else:
962
+ augmented_residuals = residue.ravel()
963
+
964
+ # Compute objective value for logging
965
+ sum_residual = np.sum(residue**2)
966
+ if regularization is not None:
967
+ reg_term = regularization.compute_regularization_term(ff_optimize)
968
+ sum_residual += reg_term
969
+
970
+ sdf = np.sqrt(sum_residual/ np.sum(ref_bonded**2))
971
+ abs_val = np.sqrt(sum_residual/ np.size(residue))
972
+
973
+ with open(filename, '+a' ) as file:
974
+ file.write(f"{sum_residual} {sdf} {abs_val} ")
975
+ file.write(' '.join([str(x) for x in ff_optimize]))
976
+ file.write('\n')
977
+
978
+ return augmented_residuals
979
+
980
+
981
+ def jacobian_ff_obj(ff_optimize,
982
+ configurations,
983
+ bonds, angles, dihedrals,
984
+ bond2params, *args):
985
+
986
+ if type(configurations) != list:
987
+ configurations = [configurations]
988
+
989
+ # Extract parallelization parameters from args if available
990
+ n_processes = None
991
+
992
+ # Look for parallelization settings in args
993
+ for arg in args:
994
+ if isinstance(arg, dict) and 'n_processes' in arg:
995
+ n_processes = arg.get('n_processes')
996
+ break
997
+
998
+ # Use parallel processing if requested and beneficial
999
+ if n_processes is not None and n_processes > 1 and len(configurations) > 1:
1000
+ total_jacobian = _compute_jacobian_parallel(ff_optimize, configurations,
1001
+ bonds, angles, dihedrals, bond2params, n_processes)
1002
+ else:
1003
+ total_jacobian = _compute_jacobian_serial(ff_optimize, configurations,
1004
+ bonds, angles, dihedrals, bond2params)
1005
+
1006
+ jac = np.vstack(total_jacobian)
1007
+
1008
+ # Add regularization Jacobian if provided
1009
+ regularization = None
1010
+ if len(args) > 0:
1011
+ # Look for regularization object in args
1012
+ for arg in args:
1013
+ if hasattr(arg, 'compute_regularization_residuals'):
1014
+ regularization = arg
1015
+ break
1016
+
1017
+ if regularization is not None:
1018
+ # For L2 regularization, the Jacobian is a diagonal matrix
1019
+ n_params = len(ff_optimize)
1020
+ reg_jacobian = np.zeros((n_params, n_params))
1021
+ np.fill_diagonal(reg_jacobian, np.sqrt(regularization.alpha / 2) / regularization.prior_widths)
1022
+
1023
+ # Augment the Jacobian matrix
1024
+ augmented_jacobian = np.vstack([jac, reg_jacobian])
1025
+ return augmented_jacobian
1026
+
1027
+ return jac
1028
+
1029
+
1030
+ def _compute_jacobian_serial(ff_optimize, configurations,
1031
+ bonds, angles, dihedrals, bond2params):
1032
+ """Serial version of Jacobian computation (original implementation)."""
1033
+ total_jacobian = []
1034
+ for config in configurations:
1035
+ # Get QM atoms count from the forces array length
1036
+ qm_atoms_count = len(config['qm_forces'])
1037
+ jac = jacobian_ff(ff_optimize,
1038
+ config["qm_coordinates"],
1039
+ bond2params,
1040
+ bonds, angles, dihedrals,
1041
+ qm_atoms_count=qm_atoms_count)
1042
+ total_jacobian.append(jac)
1043
+ return total_jacobian
1044
+
1045
+
1046
+ def _compute_jacobian_parallel(ff_optimize, configurations,
1047
+ bonds, angles, dihedrals, bond2params, n_processes):
1048
+ """Parallel version of Jacobian computation using multiprocessing."""
1049
+ # Prepare arguments for parallel processing
1050
+ args_list = [(ff_optimize, config, bonds, angles, dihedrals, bond2params)
1051
+ for config in configurations]
1052
+
1053
+ with mp.Pool(processes=n_processes) as pool:
1054
+ results = pool.map(_process_single_jacobian, args_list)
1055
+
1056
+ return results
1057
+
1058
+
1059
+ def optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename, regularizer=None,
1060
+ n_processes=None, validation_fraction=0.1, random_seed=None):
1061
+ """
1062
+ Optimize force field parameters using least squares minimization with optional L2 regularization.
1063
+
1064
+ Args:
1065
+ qm_region (QMRegion): QMRegion object containing QM interactions
1066
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1067
+ configurations (list): List of configuration dictionaries
1068
+ bond2params (dict): Mapping of interaction indices to parameter indices
1069
+ filename (str): Output file to write optimization results
1070
+ regularizer (L2Regularizer, optional): L2 regularization object
1071
+ n_processes (int, optional): Number of processes to use for parallelization
1072
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1073
+ random_seed (int, optional): Random seed for reproducible train/val split
1074
+
1075
+ Returns:
1076
+ scipy.optimize.OptimizeResult: Result of the optimization
1077
+ """
1078
+ # Get interactions from QMRegion
1079
+ qm_interactions = qm_region.qm_interactions
1080
+ bonds = qm_interactions['bonds']
1081
+ angles = qm_interactions['angles']
1082
+ dihedrals = qm_interactions['dihedrals']
1083
+
1084
+ # Split configurations into training and validation sets
1085
+ train_configurations, val_configurations = split_configurations_train_val(
1086
+ configurations, validation_fraction, random_seed
1087
+ )
1088
+
1089
+ # Create parallelization settings
1090
+ parallel_settings = {'n_processes': n_processes}
1091
+
1092
+ lower_bound = np.zeros(ff_optimize.shape)
1093
+ upper_bound = np.inf*np.ones(ff_optimize.shape)
1094
+ bounds = (lower_bound, upper_bound)
1095
+
1096
+ # Use Jacobian if regularization is provided for better convergence
1097
+ # if regularizer is not None:
1098
+ res = least_squares(fun=compute_obj_lm,
1099
+ x0=ff_optimize,
1100
+ jac=jacobian_ff_obj,
1101
+ method='lm',
1102
+ args=(train_configurations,
1103
+ bonds, angles, dihedrals, bond2params, filename, regularizer, parallel_settings),
1104
+ verbose=1)
1105
+
1106
+ # Compute validation SDB
1107
+ if val_configurations:
1108
+ val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
1109
+ res.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
1110
+ )
1111
+
1112
+ # Log validation results
1113
+ with open(filename, 'a') as f:
1114
+ f.write(f"\n# VALIDATION RESULTS\n")
1115
+ f.write(f"# Validation SDF: {val_sdf:.6f}\n")
1116
+ f.write(f"# Validation cost: {val_cost:.6f}\n")
1117
+ f.write(f"# Validation configurations: {len(val_configurations)}\n\n")
1118
+
1119
+ # Store validation results in the optimization result object
1120
+ res.validation_sdf = val_sdf
1121
+ res.validation_cost = val_cost
1122
+ res.validation_residuals = val_residuals
1123
+ res.validation_ref_bonded = val_ref_bonded
1124
+
1125
+ return res
1126
+
1127
+
1128
+ def hierarchical_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
1129
+ regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
1130
+ """
1131
+ Optimize force field parameters using hierarchical approach:
1132
+ 1. Optimize bonds first (highest energy contribution)
1133
+ 2. Fix bond parameters, optimize angles
1134
+ 3. Fix bond and angle parameters, optimize dihedrals
1135
+
1136
+ This approach respects the energy hierarchy and should produce more physically realistic parameters.
1137
+
1138
+ Args:
1139
+ qm_region (QMRegion): QMRegion object containing QM interactions
1140
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1141
+ configurations (list): List of configuration dictionaries
1142
+ bond2params (dict): Mapping of interaction indices to parameter indices
1143
+ filename (str): Output file to write optimization results
1144
+ regularizer (L2Regularizer, optional): L2 regularization object
1145
+ fm_input (FMInput, optional): FMInput object containing optimization controls
1146
+ n_processes (int, optional): Number of processes for parallel computation
1147
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1148
+ random_seed (int, optional): Random seed for reproducible train/val split
1149
+
1150
+ Returns:
1151
+ tuple: (optimized_parameters, optimization_results) where optimized_parameters is the final
1152
+ parameter array and optimization_results is a list of optimization results for each stage
1153
+ """
1154
+ # Get interactions from QMRegion
1155
+ qm_interactions = qm_region.qm_interactions
1156
+ bonds = qm_interactions['bonds']
1157
+ angles = qm_interactions['angles']
1158
+ dihedrals = qm_interactions['dihedrals']
1159
+
1160
+ # Split configurations into training and validation sets
1161
+ train_configurations, val_configurations = split_configurations_train_val(
1162
+ configurations, validation_fraction, random_seed
1163
+ )
1164
+
1165
+ # Create parameter masks for each interaction type
1166
+ bond_params_mask = np.zeros(len(ff_optimize), dtype=bool)
1167
+ angle_params_mask = np.zeros(len(ff_optimize), dtype=bool)
1168
+ dihedral_params_mask = np.zeros(len(ff_optimize), dtype=bool)
1169
+
1170
+ # Validate that we have parameters to optimize
1171
+ if len(ff_optimize) == 0:
1172
+ logging.warning("Warning: No parameters to optimize")
1173
+ return ff_optimize, []
1174
+
1175
+ # Identify which parameters belong to which interaction types
1176
+ for bond in bonds:
1177
+ if bond.get('optimize', False):
1178
+ param_indices = bond2params.get(bond['index'], [])
1179
+ for idx in param_indices:
1180
+ if idx is not None:
1181
+ bond_params_mask[idx] = True
1182
+
1183
+ for angle in angles:
1184
+ if angle.get('optimize', False):
1185
+ param_indices = bond2params.get(angle['index'], [])
1186
+ for idx in param_indices:
1187
+ if idx is not None:
1188
+ angle_params_mask[idx] = True
1189
+
1190
+ for dihedral in dihedrals:
1191
+ if dihedral.get('optimize', False):
1192
+ param_indices = bond2params.get(dihedral['index'], [])
1193
+ for idx in param_indices:
1194
+ if idx is not None:
1195
+ dihedral_params_mask[idx] = True
1196
+
1197
+ # Debug information
1198
+ logging.info(f"Total parameters: {len(ff_optimize)}")
1199
+ logging.info(f"Bond parameters: {np.sum(bond_params_mask)}")
1200
+ logging.info(f"Angle parameters: {np.sum(angle_params_mask)}")
1201
+ logging.info(f"Dihedral parameters: {np.sum(dihedral_params_mask)}")
1202
+
1203
+ # Validate regularizer dimensions
1204
+ if regularizer is not None:
1205
+ if len(regularizer.initial_params) != len(ff_optimize):
1206
+ logging.warning(f"Warning: Regularizer parameter count ({len(regularizer.initial_params)}) doesn't match optimization parameters ({len(ff_optimize)})")
1207
+ if len(regularizer.prior_widths) != len(ff_optimize):
1208
+ logging.warning(f"Warning: Regularizer prior widths count ({len(regularizer.prior_widths)}) doesn't match optimization parameters ({len(ff_optimize)})")
1209
+ if regularizer.param_types and len(regularizer.param_types) != len(ff_optimize):
1210
+ logging.warning(f"Warning: Regularizer parameter types count ({len(regularizer.param_types)}) doesn't match optimization parameters ({len(ff_optimize)})")
1211
+ else:
1212
+ logging.info(f"Parameter types: {len(regularizer.param_types) if regularizer.param_types else 0}")
1213
+ else:
1214
+ logging.info("No regularizer defined")
1215
+
1216
+ current_params = ff_optimize.copy()
1217
+ optimization_results = []
1218
+
1219
+ # Stage 1: Optimize bonds only
1220
+ if np.any(bond_params_mask):
1221
+ logging.info("Stage 1: Optimizing bond parameters...")
1222
+ bond_params = current_params.copy()
1223
+
1224
+ # Create bond-only regularizer if needed
1225
+ bond_regularizer = None
1226
+ if regularizer is not None:
1227
+ bond_param_types = _create_param_types_for_mask(regularizer.param_types, bond_params_mask)
1228
+
1229
+ bond_regularizer = L2Regularizer(
1230
+ regularizer.initial_params[bond_params_mask],
1231
+ regularizer.prior_widths[bond_params_mask],
1232
+ regularizer.alpha,
1233
+ bond_param_types
1234
+ )
1235
+
1236
+ # Optimize only bond parameters
1237
+ bond_result = _optimize_parameter_subset(
1238
+ current_params, bond_params_mask, train_configurations,
1239
+ bonds, angles, dihedrals, bond2params, filename,
1240
+ bond_regularizer, "bonds", n_processes
1241
+ )
1242
+
1243
+ # Update current parameters with optimized bond parameters
1244
+ current_params[bond_params_mask] = bond_result.x
1245
+ optimization_results.append(("bonds", bond_result))
1246
+
1247
+ logging.info(f"Bond optimization completed. Final objective: {bond_result.cost}")
1248
+
1249
+ # Stage 2: Fix bonds, optimize angles
1250
+ if np.any(angle_params_mask):
1251
+ logging.info("Stage 2: Optimizing angle parameters (bonds fixed)...")
1252
+
1253
+ # Create angle-only regularizer if needed
1254
+ angle_regularizer = None
1255
+ if regularizer is not None:
1256
+ angle_param_types = _create_param_types_for_mask(regularizer.param_types, angle_params_mask)
1257
+
1258
+ angle_regularizer = L2Regularizer(
1259
+ regularizer.initial_params[angle_params_mask],
1260
+ regularizer.prior_widths[angle_params_mask],
1261
+ regularizer.alpha,
1262
+ angle_param_types
1263
+ )
1264
+
1265
+ # Optimize only angle parameters
1266
+ angle_result = _optimize_parameter_subset(
1267
+ current_params, angle_params_mask, train_configurations,
1268
+ bonds, angles, dihedrals, bond2params, filename,
1269
+ angle_regularizer, "angles", n_processes
1270
+ )
1271
+
1272
+ # Update current parameters with optimized angle parameters
1273
+ current_params[angle_params_mask] = angle_result.x
1274
+ optimization_results.append(("angles", angle_result))
1275
+
1276
+ logging.info(f"Angle optimization completed. Final objective: {angle_result.cost}")
1277
+
1278
+ # Stage 3: Fix bonds and angles, optimize dihedrals
1279
+ if np.any(dihedral_params_mask):
1280
+ logging.info("Stage 3: Optimizing dihedral parameters (bonds and angles fixed)...")
1281
+
1282
+ # Create dihedral-only regularizer if needed
1283
+ dihedral_regularizer = None
1284
+ if regularizer is not None:
1285
+ dihedral_param_types = _create_param_types_for_mask(regularizer.param_types, dihedral_params_mask)
1286
+
1287
+ dihedral_regularizer = L2Regularizer(
1288
+ regularizer.initial_params[dihedral_params_mask],
1289
+ regularizer.prior_widths[dihedral_params_mask],
1290
+ regularizer.alpha,
1291
+ dihedral_param_types
1292
+ )
1293
+
1294
+ # Optimize only dihedral parameters
1295
+ dihedral_result = _optimize_parameter_subset(
1296
+ current_params, dihedral_params_mask, train_configurations,
1297
+ bonds, angles, dihedrals, bond2params, filename,
1298
+ dihedral_regularizer, "dihedrals", n_processes
1299
+ )
1300
+
1301
+ # Update current parameters with optimized dihedral parameters
1302
+ current_params[dihedral_params_mask] = dihedral_result.x
1303
+ optimization_results.append(("dihedrals", dihedral_result))
1304
+
1305
+ logging.info(f"Dihedral optimization completed. Final objective: {dihedral_result.cost}")
1306
+
1307
+ # Compute validation SDB on final optimized parameters
1308
+ if val_configurations:
1309
+ val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
1310
+ current_params, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
1311
+ )
1312
+
1313
+ # Log validation results
1314
+ with open(filename, 'a') as f:
1315
+ f.write(f"\n# HIERARCHICAL OPTIMIZATION VALIDATION RESULTS\n")
1316
+ f.write(f"# Final Validation SDF: {val_sdf:.6f}\n")
1317
+ f.write(f"# Final Validation cost: {val_cost:.6f}\n")
1318
+ f.write(f"# Validation configurations: {len(val_configurations)}\n")
1319
+ f.write(f"# Training configurations: {len(train_configurations)}\n\n")
1320
+
1321
+ logging.info(f"Final validation SDF: {val_sdf:.6f}")
1322
+ logging.info(f"Final validation cost: {val_cost:.6f}")
1323
+
1324
+ return current_params, optimization_results
1325
+
1326
+
1327
+ def _optimize_parameter_subset(current_params, param_mask, configurations,
1328
+ bonds, angles, dihedrals, bond2params, filename,
1329
+ regularizer, stage_name, n_processes=None):
1330
+ """
1331
+ Optimize a subset of parameters while keeping others fixed.
1332
+
1333
+ Args:
1334
+ current_params (np.ndarray): Current parameter values
1335
+ param_mask (np.ndarray): Boolean mask indicating which parameters to optimize
1336
+ configurations (list): List of configuration dictionaries
1337
+ bonds, angles, dihedrals: Interaction lists
1338
+ bond2params (dict): Parameter mapping
1339
+ filename (str): Output filename
1340
+ regularizer (L2Regularizer): Regularizer for this subset
1341
+ stage_name (str): Name of optimization stage for logging
1342
+
1343
+ Returns:
1344
+ scipy.optimize.OptimizeResult: Optimization result
1345
+ """
1346
+ # Create objective function that only optimizes the specified parameters
1347
+ def subset_objective(subset_params):
1348
+ # Create full parameter array with fixed values
1349
+ full_params = current_params.copy()
1350
+ full_params[param_mask] = subset_params
1351
+
1352
+ # Compute objective with full parameter array
1353
+ residue, _ = compute_ff_obj(full_params, configurations,
1354
+ bonds, angles, dihedrals, bond2params, n_processes)
1355
+
1356
+ obj_value = np.sum(residue**2)
1357
+
1358
+ # Add regularization term if provided
1359
+ if regularizer is not None:
1360
+ reg_term = regularizer.compute_regularization_term(subset_params)
1361
+ obj_value += reg_term
1362
+
1363
+ return obj_value
1364
+
1365
+ def subset_residuals(subset_params):
1366
+ # Create full parameter array with fixed values
1367
+ full_params = current_params.copy()
1368
+ full_params[param_mask] = subset_params
1369
+
1370
+ # Compute residuals with full parameter array
1371
+ residue, ref_bonded = compute_ff_obj(full_params, configurations,
1372
+ bonds, angles, dihedrals, bond2params, n_processes)
1373
+
1374
+ # Compute objective value for logging
1375
+ sum_residual = np.sum(residue**2)
1376
+ if regularizer is not None:
1377
+ reg_term = regularizer.compute_regularization_term(subset_params)
1378
+ sum_residual += reg_term
1379
+
1380
+ sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
1381
+ abs_val = np.sqrt(sum_residual / np.size(residue))
1382
+
1383
+ with open(filename, '+a') as file:
1384
+ file.write(f"{sum_residual} {sdf} {abs_val} ")
1385
+ file.write(' '.join([str(x) for x in full_params]))
1386
+ file.write('\n')
1387
+
1388
+ # Augment with regularization residuals if provided
1389
+ if regularizer is not None:
1390
+ reg_residuals = regularizer.compute_regularization_residuals(subset_params)
1391
+ residue = np.concatenate([residue.ravel(), reg_residuals])
1392
+
1393
+ return residue
1394
+
1395
+ def subset_jacobian(subset_params):
1396
+ # Create full parameter array with fixed values
1397
+ full_params = current_params.copy()
1398
+ full_params[param_mask] = subset_params
1399
+
1400
+ # Compute full Jacobian
1401
+ full_jac = jacobian_ff_obj(full_params, configurations,
1402
+ bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
1403
+
1404
+ # Extract only the columns corresponding to parameters being optimized
1405
+ subset_jac = full_jac[:, param_mask]
1406
+
1407
+ # Add regularization Jacobian if provided
1408
+ if regularizer is not None:
1409
+ n_params = np.sum(param_mask)
1410
+ reg_jacobian = np.zeros((n_params, n_params))
1411
+ np.fill_diagonal(reg_jacobian, np.sqrt(regularizer.alpha / 2) / regularizer.prior_widths)
1412
+ subset_jac = np.vstack([subset_jac, reg_jacobian])
1413
+
1414
+ return subset_jac
1415
+
1416
+ # Initial parameters for this subset
1417
+ x0 = current_params[param_mask]
1418
+
1419
+ # Bounds for this subset (only positive values)
1420
+ lower_bound = np.zeros(x0.shape)
1421
+ upper_bound = np.inf * np.ones(x0.shape)
1422
+ bounds = (lower_bound, upper_bound)
1423
+
1424
+ # Optimize using least squares
1425
+ result = least_squares(
1426
+ fun=subset_residuals,
1427
+ x0=x0,
1428
+ jac=subset_jacobian,
1429
+ method='lm',
1430
+ verbose=1
1431
+ )
1432
+
1433
+ # Log results
1434
+ with open(filename, 'a') as f:
1435
+ f.write(f"\n# {stage_name.upper()} OPTIMIZATION RESULTS\n")
1436
+ f.write(f"# Final cost: {result.cost}\n")
1437
+ f.write(f"# Number of iterations: {result.nfev}\n")
1438
+ f.write(f"# Success: {result.success}\n")
1439
+ f.write(f"# Optimized parameters: {result.x}\n\n")
1440
+
1441
+ return result
1442
+
1443
+
1444
+ def energy_weighted_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
1445
+ regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
1446
+ """
1447
+ Optimize force field parameters using energy-weighted objective function.
1448
+
1449
+ This approach weights the contribution of different interaction types based on their
1450
+ typical energy scales: bonds (highest), angles (medium), dihedrals (lowest).
1451
+
1452
+ Args:
1453
+ qm_region (QMRegion): QMRegion object containing QM interactions
1454
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1455
+ configurations (list): List of configuration dictionaries
1456
+ bond2params (dict): Mapping of interaction indices to parameter indices
1457
+ filename (str): Output file to write optimization results
1458
+ regularizer (L2Regularizer, optional): L2 regularization object
1459
+ fm_input (FMInput, optional): FMInput object containing optimization controls
1460
+ n_processes (int, optional): Number of processes for parallel computation
1461
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1462
+ random_seed (int, optional): Random seed for reproducible train/val split
1463
+
1464
+ Returns:
1465
+ scipy.optimize.OptimizeResult: Result of the optimization
1466
+ """
1467
+ # Get interactions from QMRegion
1468
+ qm_interactions = qm_region.qm_interactions
1469
+ bonds = qm_interactions['bonds']
1470
+ angles = qm_interactions['angles']
1471
+ dihedrals = qm_interactions['dihedrals']
1472
+
1473
+ # Split configurations into training and validation sets
1474
+ train_configurations, val_configurations = split_configurations_train_val(
1475
+ configurations, validation_fraction, random_seed
1476
+ )
1477
+
1478
+ # Get energy weights from fm_input or use defaults
1479
+ if fm_input is not None:
1480
+ bond_weight = getattr(fm_input, 'bond_energy_weight', 1.0)
1481
+ angle_weight = getattr(fm_input, 'angle_energy_weight', 0.1)
1482
+ dihedral_weight = getattr(fm_input, 'dihedral_energy_weight', 0.01)
1483
+ else:
1484
+ bond_weight = 1.0
1485
+ angle_weight = 0.1
1486
+ dihedral_weight = 0.01
1487
+
1488
+ # Create interaction type masks
1489
+ bond_mask = np.zeros(len(ff_optimize), dtype=bool)
1490
+ angle_mask = np.zeros(len(ff_optimize), dtype=bool)
1491
+ dihedral_mask = np.zeros(len(ff_optimize), dtype=bool)
1492
+
1493
+ # Identify which parameters belong to which interaction types
1494
+ for bond in bonds:
1495
+ if bond.get('optimize', False):
1496
+ param_indices = bond2params.get(bond['index'], [])
1497
+ for idx in param_indices:
1498
+ if idx is not None:
1499
+ bond_mask[idx] = True
1500
+
1501
+ for angle in angles:
1502
+ if angle.get('optimize', False):
1503
+ param_indices = bond2params.get(angle['index'], [])
1504
+ for idx in param_indices:
1505
+ if idx is not None:
1506
+ angle_mask[idx] = True
1507
+
1508
+ for dihedral in dihedrals:
1509
+ if dihedral.get('optimize', False):
1510
+ param_indices = bond2params.get(dihedral['index'], [])
1511
+ for idx in param_indices:
1512
+ if idx is not None:
1513
+ dihedral_mask[idx] = True
1514
+
1515
+ def energy_weighted_residuals(params):
1516
+ """Compute residuals with energy-based weighting."""
1517
+ residue, ref_bonded = compute_ff_obj(params, train_configurations,
1518
+ bonds, angles, dihedrals, bond2params, n_processes)
1519
+
1520
+ # Apply energy weights to different interaction types
1521
+ weighted_residue = residue.copy()
1522
+
1523
+ # Weight bond contributions
1524
+ if np.any(bond_mask):
1525
+ weighted_residue *= bond_weight
1526
+
1527
+ # Weight angle contributions (this is approximate - we weight all residues)
1528
+ # In practice, you might want to identify which residues come from which interactions
1529
+ if np.any(angle_mask):
1530
+ # For simplicity, we apply angle weight to all residues
1531
+ # A more sophisticated approach would track which residues come from which interactions
1532
+ weighted_residue *= angle_weight
1533
+
1534
+ # Weight dihedral contributions
1535
+ if np.any(dihedral_mask):
1536
+ weighted_residue *= dihedral_weight
1537
+
1538
+ # Compute objective value for logging (using unweighted residuals for consistency)
1539
+ sum_residual = np.sum(residue**2)
1540
+ if regularizer is not None:
1541
+ reg_term = regularizer.compute_regularization_term(params)
1542
+ sum_residual += reg_term
1543
+
1544
+ sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
1545
+ abs_val = np.sqrt(sum_residual / np.size(residue))
1546
+
1547
+ with open(filename, '+a') as file:
1548
+ file.write(f"{sum_residual} {sdf} {abs_val} ")
1549
+ file.write(' '.join([str(x) for x in params]))
1550
+ file.write('\n')
1551
+
1552
+ # Augment with regularization residuals if provided
1553
+ if regularizer is not None:
1554
+ reg_residuals = regularizer.compute_regularization_residuals(params)
1555
+ weighted_residue = np.concatenate([weighted_residue.ravel(), reg_residuals])
1556
+
1557
+ return weighted_residue
1558
+
1559
+ def energy_weighted_jacobian(params):
1560
+ """Compute Jacobian with energy-based weighting."""
1561
+ jac = jacobian_ff_obj(params, train_configurations,
1562
+ bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
1563
+
1564
+ # Apply energy weights to Jacobian rows
1565
+ weighted_jac = jac.copy()
1566
+
1567
+ # Weight bond contributions
1568
+ if np.any(bond_mask):
1569
+ weighted_jac *= bond_weight
1570
+
1571
+ # Weight angle contributions
1572
+ if np.any(angle_mask):
1573
+ weighted_jac *= angle_weight
1574
+
1575
+ # Weight dihedral contributions
1576
+ if np.any(dihedral_mask):
1577
+ weighted_jac *= dihedral_weight
1578
+
1579
+ # Add regularization Jacobian if provided
1580
+ if regularizer is not None:
1581
+ n_params = len(params)
1582
+ reg_jacobian = np.zeros((n_params, n_params))
1583
+ np.fill_diagonal(reg_jacobian, np.sqrt(regularizer.alpha / 2) / regularizer.prior_widths)
1584
+ weighted_jac = np.vstack([weighted_jac, reg_jacobian])
1585
+
1586
+ return weighted_jac
1587
+
1588
+ # Bounds for parameters (only positive values)
1589
+ lower_bound = np.zeros(ff_optimize.shape)
1590
+ upper_bound = np.inf * np.ones(ff_optimize.shape)
1591
+ bounds = (lower_bound, upper_bound)
1592
+
1593
+ # Optimize using least squares with energy weighting
1594
+ result = least_squares(
1595
+ fun=energy_weighted_residuals,
1596
+ x0=ff_optimize,
1597
+ jac=energy_weighted_jacobian,
1598
+ method='lm',
1599
+ verbose=1
1600
+ )
1601
+
1602
+ # Compute validation SDB
1603
+ if val_configurations:
1604
+ val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
1605
+ result.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
1606
+ )
1607
+
1608
+ # Store validation results in the optimization result object
1609
+ result.validation_sdf = val_sdf
1610
+ result.validation_cost = val_cost
1611
+ result.validation_residuals = val_residuals
1612
+ result.validation_ref_bonded = val_ref_bonded
1613
+
1614
+ # Log results
1615
+ with open(filename, 'a') as f:
1616
+ f.write(f"\n# ENERGY-WEIGHTED OPTIMIZATION RESULTS\n")
1617
+ f.write(f"# Bond weight: {bond_weight}\n")
1618
+ f.write(f"# Angle weight: {angle_weight}\n")
1619
+ f.write(f"# Dihedral weight: {dihedral_weight}\n")
1620
+ f.write(f"# Final cost: {result.cost}\n")
1621
+ f.write(f"# Number of iterations: {result.nfev}\n")
1622
+ f.write(f"# Success: {result.success}\n")
1623
+ f.write(f"# Optimized parameters: {result.x}\n")
1624
+ if val_configurations:
1625
+ f.write(f"# Validation SDF: {val_sdf:.6f}\n")
1626
+ f.write(f"# Validation cost: {val_cost:.6f}\n")
1627
+ f.write(f"# Validation configurations: {len(val_configurations)}\n")
1628
+ f.write(f"# Training configurations: {len(train_configurations)}\n")
1629
+ f.write("\n")
1630
+
1631
+ return result
1632
+
1633
+ def adaptive_regularization_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
1634
+ fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
1635
+ """
1636
+ Optimize force field parameters using adaptive regularization based on energy hierarchy.
1637
+
1638
+ This approach applies different regularization strengths to different interaction types:
1639
+ - Bonds: Strong regularization (prevent overfitting of high-energy terms)
1640
+ - Angles: Medium regularization
1641
+ - Dihedrals: Weak regularization (allow flexibility for low-energy terms)
1642
+
1643
+ Args:
1644
+ qm_region (QMRegion): QMRegion object containing QM interactions
1645
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1646
+ configurations (list): List of configuration dictionaries
1647
+ bond2params (dict): Mapping of interaction indices to parameter indices
1648
+ filename (str): Output file to write optimization results
1649
+ fm_input (FMInput, optional): FMInput object containing optimization controls
1650
+ n_processes (int, optional): Number of processes for parallel computation
1651
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1652
+ random_seed (int, optional): Random seed for reproducible train/val split
1653
+
1654
+ Returns:
1655
+ scipy.optimize.OptimizeResult: Result of the optimization
1656
+ """
1657
+ # Get interactions from QMRegion
1658
+ qm_interactions = qm_region.qm_interactions
1659
+ bonds = qm_interactions['bonds']
1660
+ angles = qm_interactions['angles']
1661
+ dihedrals = qm_interactions['dihedrals']
1662
+
1663
+ # Split configurations into training and validation sets
1664
+ train_configurations, val_configurations = split_configurations_train_val(
1665
+ configurations, validation_fraction, random_seed
1666
+ )
1667
+
1668
+ # Get adaptive regularization parameters from fm_input or use defaults
1669
+ if fm_input is not None:
1670
+ base_alpha = getattr(fm_input, 'adaptive_base_alpha', 0.1)
1671
+ energy_hierarchy_scale = getattr(fm_input, 'energy_hierarchy_scale', 10.0)
1672
+ else:
1673
+ base_alpha = 0.1
1674
+ energy_hierarchy_scale = 10.0
1675
+
1676
+ # Create adaptive regularizer
1677
+ adaptive_regularizer = AdaptiveL2Regularizer(
1678
+ ff_optimize, bond2params, bonds, angles, dihedrals,
1679
+ base_alpha=base_alpha,
1680
+ energy_hierarchy_scale=energy_hierarchy_scale
1681
+ )
1682
+
1683
+ def adaptive_residuals(params):
1684
+ """Compute residuals with adaptive regularization."""
1685
+ residue, ref_bonded = compute_ff_obj(params, train_configurations,
1686
+ bonds, angles, dihedrals, bond2params, n_processes)
1687
+
1688
+ # Compute objective value for logging
1689
+ sum_residual = np.sum(residue**2)
1690
+ reg_term = adaptive_regularizer.compute_regularization_term(params)
1691
+ sum_residual += reg_term
1692
+
1693
+ sdf = np.sqrt(sum_residual / np.sum(ref_bonded**2))
1694
+ abs_val = np.sqrt(sum_residual / np.size(residue))
1695
+
1696
+ with open(filename, '+a') as file:
1697
+ file.write(f"{sum_residual} {sdf} {abs_val} ")
1698
+ file.write(' '.join([str(x) for x in params]))
1699
+ file.write('\n')
1700
+
1701
+ # Augment with adaptive regularization residuals
1702
+ reg_residuals = adaptive_regularizer.compute_regularization_residuals(params)
1703
+ augmented_residuals = np.concatenate([residue.ravel(), reg_residuals])
1704
+
1705
+ return augmented_residuals
1706
+
1707
+ def adaptive_jacobian(params):
1708
+ """Compute Jacobian with adaptive regularization."""
1709
+ jac = jacobian_ff_obj(params, train_configurations,
1710
+ bonds, angles, dihedrals, bond2params, {'n_processes': n_processes})
1711
+
1712
+ # Add adaptive regularization Jacobian
1713
+ n_params = len(params)
1714
+ reg_jacobian = np.zeros((n_params, n_params))
1715
+ np.fill_diagonal(reg_jacobian, np.sqrt(adaptive_regularizer.adaptive_alphas / 2) / adaptive_regularizer.prior_widths)
1716
+ augmented_jacobian = np.vstack([jac, reg_jacobian])
1717
+
1718
+ return augmented_jacobian
1719
+
1720
+ # Bounds for parameters (only positive values)
1721
+ lower_bound = np.zeros(ff_optimize.shape)
1722
+ upper_bound = np.inf * np.ones(ff_optimize.shape)
1723
+ bounds = (lower_bound, upper_bound)
1724
+
1725
+ # Optimize using least squares with adaptive regularization
1726
+ result = least_squares(
1727
+ fun=adaptive_residuals,
1728
+ x0=ff_optimize,
1729
+ jac=adaptive_jacobian,
1730
+ method='lm',
1731
+ verbose=1
1732
+ )
1733
+
1734
+ # Compute validation SDB
1735
+ if val_configurations:
1736
+ val_sdf, val_cost, val_residuals, val_ref_bonded = compute_validation_sdf(
1737
+ result.x, val_configurations, bonds, angles, dihedrals, bond2params, n_processes
1738
+ )
1739
+
1740
+ # Store validation results in the optimization result object
1741
+ result.validation_sdf = val_sdf
1742
+ result.validation_cost = val_cost
1743
+ result.validation_residuals = val_residuals
1744
+ result.validation_ref_bonded = val_ref_bonded
1745
+
1746
+ # Log results
1747
+ with open(filename, 'a') as f:
1748
+ f.write(f"\n# ADAPTIVE REGULARIZATION OPTIMIZATION RESULTS\n")
1749
+ f.write(f"# Base alpha: {base_alpha}\n")
1750
+ f.write(f"# Energy hierarchy scale: {energy_hierarchy_scale}\n")
1751
+ f.write(f"# Bond parameters: {np.sum(adaptive_regularizer.bond_params_mask)}\n")
1752
+ f.write(f"# Angle parameters: {np.sum(adaptive_regularizer.angle_params_mask)}\n")
1753
+ f.write(f"# Dihedral parameters: {np.sum(adaptive_regularizer.dihedral_params_mask)}\n")
1754
+ f.write(f"# Final cost: {result.cost}\n")
1755
+ f.write(f"# Number of iterations: {result.nfev}\n")
1756
+ f.write(f"# Success: {result.success}\n")
1757
+ f.write(f"# Optimized parameters: {result.x}\n")
1758
+ if val_configurations:
1759
+ f.write(f"# Validation SDF: {val_sdf:.6f}\n")
1760
+ f.write(f"# Validation cost: {val_cost:.6f}\n")
1761
+ f.write(f"# Validation configurations: {len(val_configurations)}\n")
1762
+ f.write(f"# Training configurations: {len(train_configurations)}\n")
1763
+ f.write("\n")
1764
+
1765
+ return result
1766
+
1767
+ def unified_optimization_ff(qm_region, ff_optimize, configurations, bond2params, filename,
1768
+ regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
1769
+ """
1770
+ Unified force field optimization function that supports multiple hierarchical approaches.
1771
+
1772
+ This function automatically selects the appropriate optimization method based on the
1773
+ FMInput parameters and provides a consistent interface for all optimization strategies.
1774
+
1775
+ Args:
1776
+ qm_region (QMRegion): QMRegion object containing QM interactions
1777
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1778
+ configurations (list): List of configuration dictionaries
1779
+ bond2params (dict): Mapping of interaction indices to parameter indices
1780
+ filename (str): Output file to write optimization results
1781
+ regularizer (L2Regularizer, optional): L2 regularization object
1782
+ fm_input (FMInput, optional): FMInput object containing optimization controls
1783
+ n_processes (int, optional): Number of processes for parallel computation
1784
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1785
+ random_seed (int, optional): Random seed for reproducible train/val split
1786
+
1787
+ Returns:
1788
+ tuple: (optimized_parameters, optimization_result) where optimized_parameters is the final
1789
+ parameter array and optimization_result is the optimization result object
1790
+ """
1791
+ if fm_input is None:
1792
+ fm_input = FMInput()
1793
+
1794
+ # Ensure optimization_method is set
1795
+ if not hasattr(fm_input, 'optimization_method') or fm_input.optimization_method is None:
1796
+ fm_input.optimization_method = 'hierarchical'
1797
+
1798
+ optimization_method = fm_input.optimization_method
1799
+
1800
+ print(f"Using optimization method: {optimization_method}")
1801
+
1802
+ if optimization_method == 'hierarchical':
1803
+ # Sequential hierarchical optimization
1804
+ optimized_params, optimization_results = hierarchical_optimization_ff(
1805
+ qm_region, ff_optimize, configurations, bond2params, filename,
1806
+ regularizer, fm_input, n_processes, validation_fraction, random_seed
1807
+ )
1808
+ # Return the last optimization result for compatibility
1809
+ if optimization_results:
1810
+ return optimized_params, optimization_results[-1][1]
1811
+ else:
1812
+ return optimized_params, None
1813
+
1814
+ elif optimization_method == 'energy_weighted':
1815
+ # Energy-weighted simultaneous optimization
1816
+ result = energy_weighted_optimization_ff(
1817
+ qm_region, ff_optimize, configurations, bond2params, filename,
1818
+ regularizer, fm_input, n_processes, validation_fraction, random_seed
1819
+ )
1820
+ return result.x, result
1821
+
1822
+ elif optimization_method == 'adaptive':
1823
+ # Adaptive regularization optimization
1824
+ result = adaptive_regularization_optimization_ff(
1825
+ qm_region, ff_optimize, configurations, bond2params, filename,
1826
+ fm_input, n_processes, validation_fraction, random_seed
1827
+ )
1828
+ return result.x, result
1829
+
1830
+ elif optimization_method == 'simultaneous':
1831
+ # Traditional simultaneous optimization
1832
+ result = optimization_ff(
1833
+ qm_region, ff_optimize, configurations, bond2params, filename,
1834
+ regularizer, n_processes, validation_fraction, random_seed
1835
+ )
1836
+ return result.x, result
1837
+
1838
+ else:
1839
+ raise ValueError(f"Unknown optimization method: {optimization_method}. "
1840
+ f"Supported methods: 'hierarchical', 'energy_weighted', 'adaptive', 'simultaneous'")
1841
+
1842
+
1843
+ def _create_param_types_for_mask(param_types, param_mask):
1844
+ """
1845
+ Create a parameter types list for a specific parameter mask.
1846
+
1847
+ Args:
1848
+ param_types (list): Original parameter types list
1849
+ param_mask (np.ndarray): Boolean mask indicating which parameters to include
1850
+
1851
+ Returns:
1852
+ list: Parameter types for the masked parameters
1853
+ """
1854
+ if param_types is None:
1855
+ return None
1856
+
1857
+ # Create a list of parameter types for the masked parameters only
1858
+ masked_param_types = []
1859
+ for i, is_included in enumerate(param_mask):
1860
+ if is_included and i < len(param_types):
1861
+ masked_param_types.append(param_types[i])
1862
+
1863
+ return masked_param_types
1864
+
1865
+
1866
+ def _copy_fm_input(fm_input, new_method=None):
1867
+ """
1868
+ Create a copy of FMInput object with optional method override.
1869
+
1870
+ Args:
1871
+ fm_input: FMInput object to copy
1872
+ new_method: Optional new optimization method to set
1873
+
1874
+ Returns:
1875
+ FMInput: Copy of the input object
1876
+ """
1877
+ if fm_input is None:
1878
+ return FMInput()
1879
+
1880
+ # Try to use the copy method if available
1881
+ if hasattr(fm_input, 'copy'):
1882
+ try:
1883
+ copy_input = fm_input.copy()
1884
+ except:
1885
+ # Fallback: create new object with same parameters
1886
+ copy_input = FMInput()
1887
+ # Copy all attributes manually
1888
+ for attr in dir(fm_input):
1889
+ if not attr.startswith('_') and not callable(getattr(fm_input, attr)):
1890
+ try:
1891
+ setattr(copy_input, attr, getattr(fm_input, attr))
1892
+ except:
1893
+ pass
1894
+ else:
1895
+ # Fallback: create new object with same parameters
1896
+ copy_input = FMInput()
1897
+ # Copy all attributes manually
1898
+ for attr in dir(fm_input):
1899
+ if not attr.startswith('_') and not callable(getattr(fm_input, attr)):
1900
+ try:
1901
+ setattr(copy_input, attr, getattr(fm_input, attr))
1902
+ except:
1903
+ pass
1904
+
1905
+ # Set new method if specified
1906
+ if new_method is not None:
1907
+ copy_input.optimization_method = new_method
1908
+
1909
+ return copy_input
1910
+
1911
+
1912
+ def compare_optimization_methods(qm_region, ff_optimize, configurations, bond2params, filename,
1913
+ regularizer=None, fm_input=None, n_processes=None, validation_fraction=0.1, random_seed=None):
1914
+ """
1915
+ Compare different optimization methods and report their performance.
1916
+
1917
+ This function runs all available optimization methods and compares their results
1918
+ to help users choose the best approach for their system.
1919
+
1920
+ Args:
1921
+ qm_region (QMRegion): QMRegion object containing QM interactions
1922
+ ff_optimize (numpy.ndarray): Initial force field parameters to optimize
1923
+ configurations (list): List of configuration dictionaries
1924
+ bond2params (dict): Mapping of interaction indices to parameter indices
1925
+ filename (str): Output file to write optimization results
1926
+ regularizer (L2Regularizer, optional): L2 regularization object
1927
+ fm_input (FMInput, optional): FMInput object containing optimization controls
1928
+ n_processes (int, optional): Number of processes for parallel computation
1929
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
1930
+ random_seed (int, optional): Random seed for reproducible train/val split
1931
+
1932
+ Returns:
1933
+ dict: Dictionary containing results for each optimization method
1934
+ """
1935
+ if fm_input is None:
1936
+ fm_input = FMInput()
1937
+
1938
+ methods = ['hierarchical', 'energy_weighted', 'adaptive', 'simultaneous']
1939
+ results = {}
1940
+
1941
+ logging.info("Comparing optimization methods...")
1942
+
1943
+ for method in methods:
1944
+ logging.info(f"\nTesting method: {method}")
1945
+
1946
+ # Create a copy of fm_input with the current method
1947
+ test_fm_input = _copy_fm_input(fm_input, method)
1948
+
1949
+ try:
1950
+ # Run optimization with current method
1951
+ optimized_params, opt_result = unified_optimization_ff(
1952
+ qm_region, ff_optimize.copy(), configurations, bond2params,
1953
+ f"{filename}_{method}", regularizer, test_fm_input, n_processes, validation_fraction, random_seed
1954
+ )
1955
+
1956
+ # Get validation SDF from optimization result if available
1957
+ if hasattr(opt_result, 'validation_sdf') and opt_result.validation_sdf is not None:
1958
+ val_sdf = opt_result.validation_sdf
1959
+ val_cost = opt_result.validation_cost
1960
+ else:
1961
+ # Compute validation SDF manually if not available
1962
+ train_configs, val_configs = split_configurations_train_val(
1963
+ configurations, validation_fraction, random_seed
1964
+ )
1965
+ val_sdf, val_cost, _, _ = compute_validation_sdf(
1966
+ optimized_params, val_configs,
1967
+ qm_region.qm_interactions['bonds'],
1968
+ qm_region.qm_interactions['angles'],
1969
+ qm_region.qm_interactions['dihedrals'],
1970
+ bond2params, n_processes
1971
+ )
1972
+
1973
+ # Compute training SDF for comparison
1974
+ train_configs, _ = split_configurations_train_val(
1975
+ configurations, validation_fraction, random_seed
1976
+ )
1977
+ train_residue, train_ref_bonded = compute_ff_obj(optimized_params, train_configs,
1978
+ qm_region.qm_interactions['bonds'],
1979
+ qm_region.qm_interactions['angles'],
1980
+ qm_region.qm_interactions['dihedrals'],
1981
+ bond2params)
1982
+
1983
+ train_cost = np.sum(train_residue**2)
1984
+ train_sdf = np.sqrt(train_cost / np.sum(train_ref_bonded**2))
1985
+
1986
+ results[method] = {
1987
+ 'optimized_params': optimized_params,
1988
+ 'train_cost': train_cost,
1989
+ 'train_sdf': train_sdf,
1990
+ 'val_cost': val_cost,
1991
+ 'val_sdf': val_sdf,
1992
+ 'success': opt_result.success if opt_result else True,
1993
+ 'nfev': opt_result.nfev if opt_result else 0
1994
+ }
1995
+
1996
+ logging.info(f" Training cost: {train_cost:.6f}")
1997
+ logging.info(f" Training SDF: {train_sdf:.6f}")
1998
+ logging.info(f" Validation cost: {val_cost:.6f}")
1999
+ logging.info(f" Validation SDF: {val_sdf:.6f}")
2000
+ logging.info(f" Success: {results[method]['success']}")
2001
+
2002
+ except Exception as e:
2003
+ logging.error(f" Error in method {method}: {str(e)}")
2004
+ results[method] = {'error': str(e)}
2005
+
2006
+ # Write comparison summary
2007
+ with open(f"{filename}_comparison", 'w') as f:
2008
+ f.write("# OPTIMIZATION METHOD COMPARISON\n")
2009
+ f.write("# Method\tTrain Cost\tTrain SDF\tVal Cost\tVal SDF\tSuccess\tFunction Evaluations\n")
2010
+
2011
+ for method in methods:
2012
+ if method in results and 'error' not in results[method]:
2013
+ f.write(f"{method}\t{results[method]['train_cost']:.6f}\t"
2014
+ f"{results[method]['train_sdf']:.6f}\t"
2015
+ f"{results[method]['val_cost']:.6f}\t"
2016
+ f"{results[method]['val_sdf']:.6f}\t"
2017
+ f"{results[method]['success']}\t"
2018
+ f"{results[method]['nfev']}\n")
2019
+ else:
2020
+ f.write(f"{method}\tERROR\tERROR\tERROR\tERROR\tFalse\t0\n")
2021
+
2022
+ return results
2023
+
2024
+
2025
+ def _process_single_jacobian(args):
2026
+ """
2027
+ Process a single configuration for Jacobian computation in parallel.
2028
+
2029
+ Args:
2030
+ args: Tuple containing (ff_optimize, config, bonds, angles, dihedrals, bond2params)
2031
+
2032
+ Returns:
2033
+ numpy.ndarray: Jacobian matrix for this configuration
2034
+ """
2035
+ ff_optimize, config, bonds, angles, dihedrals, bond2params = args
2036
+
2037
+ # Get QM atoms count from the forces array length
2038
+ qm_atoms_count = len(config['qm_forces'])
2039
+ jac = jacobian_ff(ff_optimize,
2040
+ config["qm_coordinates"],
2041
+ bond2params,
2042
+ bonds, angles, dihedrals,
2043
+ qm_atoms_count=qm_atoms_count)
2044
+
2045
+ return jac
2046
+
2047
+
2048
+ def split_configurations_train_val(configurations, validation_fraction=0.1, random_seed=None):
2049
+ """
2050
+ Split configurations into training and validation sets.
2051
+
2052
+ Args:
2053
+ configurations (list): List of configuration dictionaries
2054
+ validation_fraction (float): Fraction of configurations to use for validation (default: 0.1)
2055
+ random_seed (int, optional): Random seed for reproducible splits
2056
+
2057
+ Returns:
2058
+ tuple: (train_configurations, val_configurations)
2059
+ """
2060
+ if random_seed is not None:
2061
+ random.seed(random_seed)
2062
+ np.random.seed(random_seed)
2063
+
2064
+ n_configs = len(configurations)
2065
+ n_val = max(1, int(n_configs * validation_fraction))
2066
+ n_train = n_configs - n_val
2067
+
2068
+ # Create indices and shuffle them
2069
+ indices = list(range(n_configs))
2070
+ random.shuffle(indices)
2071
+
2072
+ # Split indices
2073
+ train_indices = indices[:n_train]
2074
+ val_indices = indices[n_train:]
2075
+
2076
+ # Create configuration lists
2077
+ train_configurations = [configurations[i] for i in train_indices]
2078
+ val_configurations = [configurations[i] for i in val_indices]
2079
+
2080
+ logging.info(f"Split {n_configs} configurations into {n_train} training and {n_val} validation")
2081
+
2082
+ return train_configurations, val_configurations
2083
+
2084
+
2085
+ def compute_validation_sdf(ff_optimize, val_configurations, bonds, angles, dihedrals, bond2params, n_processes=None):
2086
+ """
2087
+ Compute SDF on validation set.
2088
+
2089
+ Args:
2090
+ ff_optimize (numpy.ndarray): Optimized force field parameters
2091
+ val_configurations (list): Validation configuration list
2092
+ bonds, angles, dihedrals (list): Interaction lists
2093
+ bond2params (dict): Parameter mapping
2094
+ n_processes (int, optional): Number of processes for parallel computation
2095
+
2096
+ Returns:
2097
+ tuple: (val_sdf, val_cost, val_residuals, val_ref_bonded)
2098
+ """
2099
+ if not val_configurations:
2100
+ logging.warning("No validation configurations provided")
2101
+ return None, None, None, None
2102
+
2103
+ # Compute residuals on validation set
2104
+ val_residuals, val_ref_bonded = compute_ff_obj(ff_optimize, val_configurations,
2105
+ bonds, angles, dihedrals, bond2params, n_processes)
2106
+
2107
+ # Compute validation metrics
2108
+ val_cost = np.sum(val_residuals**2)
2109
+ val_sdf = np.sqrt(val_cost / np.sum(val_ref_bonded**2))
2110
+
2111
+ logging.info(f"Validation SDF: {val_sdf:.6f}")
2112
+ logging.info(f"Validation cost: {val_cost:.6f}")
2113
+
2114
+ return val_sdf, val_cost, val_residuals, val_ref_bonded