MultiOptPy 1.20.5__py3-none-any.whl → 1.20.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,53 +8,132 @@ from multioptpy.Utils.calc_tools import Calculationtools
8
8
 
9
9
 
10
10
  class EnhancedRSPRFO:
11
+ """
12
+ Enhanced Rational Step P-RFO (Rational Function Optimization) for transition state searches
13
+ with dynamic trust radius adjustment based on trust region methodology.
14
+
15
+ Key Improvements:
16
+ - Improved Levenberg-Marquardt-style alpha solver with backtracking
17
+ - Enhanced trust region adjustment with asymmetric expansion/contraction
18
+ - Robust Hessian update with curvature condition checks
19
+ - Improved mode following with overlap matrix tracking
20
+ - Step rejection mechanism for poor quality steps
21
+ - Hessian eigenvalue shifting for proper TS curvature
22
+ - Comprehensive step quality metrics and diagnostics
23
+ - Gradient-based step scaling for near-convergence behavior
24
+
25
+ References:
26
+ [1] Banerjee et al., Phys. Chem., 89, 52-57 (1985)
27
+ [2] Heyden et al., J. Chem. Phys., 123, 224101 (2005)
28
+ [3] Baker, J. Comput. Chem., 7, 385-395 (1986)
29
+ [4] Besalú and Bofill, Theor. Chem. Acc., 100, 265-274 (1998)
30
+ [5] Jensen and Jørgensen, J. Chem. Phys., 80, 1204 (1984) [Eigenvector following]
31
+ [6] Yuan, SIAM J. Optim. 11, 325-357 (2000) [Trust region methods]
32
+ [7] Nocedal and Wright, Numerical Optimization, 2nd ed. (2006) [Trust region]
33
+
34
+ This code is made based on:
35
+ 1. https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/RSPRFOptimizer.py
36
+ """
37
+
11
38
  def __init__(self, **config):
12
39
  """
13
- Enhanced Rational Step P-RFO (Rational Function Optimization) for transition state searches
14
- with dynamic trust radius adjustment based on trust region methodology
15
-
16
- References:
17
- [1] Banerjee et al., Phys. Chem., 89, 52-57 (1985)
18
- [2] Heyden et al., J. Chem. Phys., 123, 224101 (2005)
19
- [3] Baker, J. Comput. Chem., 7, 385-395 (1986)
20
- [4] Besalú and Bofill, Theor. Chem. Acc., 100, 265-274 (1998)
21
- [5] Jensen and Jørgensen, J. Chem. Phys., 80, 1204 (1984) [Eigenvector following]
22
- [6] Yuan, SIAM J. Optim. 11, 325-357 (2000) [Trust region methods]
23
-
24
- This code is made based on the below codes.
25
- 1, https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/RSPRFOptimizer.py
26
-
27
-
40
+ Initialize the Enhanced RS-PRFO optimizer.
41
+
42
+ Parameters (via config dict):
43
+ -----------------------------
44
+ alpha0 : float
45
+ Initial alpha parameter for RS-PRFO (default: 1.0)
46
+ max_micro_cycles : int
47
+ Maximum number of micro-iterations for alpha adjustment (default: 50)
48
+ saddle_order : int
49
+ Number of negative eigenvalues at the saddle point (default: 1)
50
+ method : str
51
+ Hessian update method (default: "auto")
52
+ display_flag : bool
53
+ Enable/disable logging output (default: True)
54
+ debug : bool
55
+ Enable detailed debug output (default: False)
56
+ trust_radius : float
57
+ Initial trust radius (default: 0.1 for TS, 0.5 for min)
58
+ trust_radius_max : float
59
+ Maximum allowed trust radius (default: same as initial)
60
+ trust_radius_min : float
61
+ Minimum allowed trust radius (default: 0.01)
62
+ adapt_trust_radius : bool
63
+ Enable dynamic trust radius adjustment (default: True)
64
+ mode_following : bool
65
+ Enable mode following for consistent TS mode tracking (default: True)
66
+ eigvec_following : bool
67
+ Enable eigenvector following for mode mixing (default: True)
68
+ overlap_threshold : float
69
+ Minimum overlap for mode identification (default: 0.5)
70
+ step_rejection : bool
71
+ Enable step rejection for very poor steps (default: True)
72
+ rejection_threshold : float
73
+ Reduction ratio threshold below which steps are rejected (default: -0.5)
74
+ hessian_shift_enabled : bool
75
+ Enable Hessian eigenvalue shifting (default: True)
76
+ min_positive_eigval : float
77
+ Minimum positive eigenvalue after shifting (default: 0.005)
78
+ gradient_scaling_enabled : bool
79
+ Enable gradient-based step scaling near convergence (default: True)
80
+ gradient_scaling_threshold : float
81
+ Gradient norm threshold below which scaling is applied (default: 0.001)
28
82
  """
29
83
  # Standard RSPRFO parameters
30
84
  self.alpha0 = config.get("alpha0", 1.0)
31
- self.max_micro_cycles = config.get("max_micro_cycles", 20) # Increased from 1 to 20
85
+ self.max_micro_cycles = config.get("max_micro_cycles", 50)
32
86
  self.saddle_order = config.get("saddle_order", 1)
33
87
  self.hessian_update_method = config.get("method", "auto")
34
88
  self.display_flag = config.get("display_flag", True)
35
89
  self.debug = config.get("debug", False)
36
90
 
37
91
  # Alpha constraints to prevent numerical instability
38
- self.alpha_max = config.get("alpha_max", 1e6)
39
- self.alpha_step_max = config.get("alpha_step_max", 10.0)
92
+ self.alpha_max = config.get("alpha_max", 1e8)
93
+ self.alpha_min = config.get("alpha_min", 1e-8)
94
+ self.alpha_step_max = config.get("alpha_step_max", 100.0)
95
+
96
+ # Micro-cycle convergence criteria
97
+ self.micro_cycle_rtol = config.get("micro_cycle_rtol", 1e-3)
98
+ self.micro_cycle_atol = config.get("micro_cycle_atol", 1e-6)
40
99
 
41
100
  # Trust region parameters
42
101
  if self.saddle_order == 0:
43
102
  self.trust_radius_initial = config.get("trust_radius", 0.5)
44
- self.trust_radius_max = config.get("trust_radius_max", 0.5) # Upper bound (delta_hat)
103
+ self.trust_radius_max = config.get("trust_radius_max", 0.5)
45
104
  else:
46
105
  self.trust_radius_initial = config.get("trust_radius", 0.1)
47
- self.trust_radius_max = config.get("trust_radius_max", 0.1) # Upper bound for TS search
106
+ self.trust_radius_max = config.get("trust_radius_max", 0.3)
48
107
 
49
- self.trust_radius = self.trust_radius_initial # Current trust radius (delta_tr)
50
- self.trust_radius_min = config.get("trust_radius_min", 0.01) # Lower bound (delta_min)
51
-
52
- # Trust region acceptance thresholds
53
- self.accept_poor_threshold = config.get("accept_poor_threshold", 0.25) # Threshold for poor steps
54
- self.accept_good_threshold = config.get("accept_good_threshold", 0.75) # Threshold for very good steps
55
- self.shrink_factor = config.get("shrink_factor", 0.50) # Factor to shrink trust radius
56
- self.expand_factor = config.get("expand_factor", 2.00) # Factor to expand trust radius
57
- self.rtol_boundary = config.get("rtol_boundary", 0.10) # Relative tolerance for boundary detection
108
+ self.trust_radius = self.trust_radius_initial
109
+ self.trust_radius_min = config.get("trust_radius_min", 0.01)
110
+
111
+ # Trust region acceptance thresholds (based on Nocedal & Wright)
112
+ self.eta_1 = config.get("eta_1", 0.1)
113
+ self.eta_2 = config.get("eta_2", 0.25)
114
+ self.eta_3 = config.get("eta_3", 0.75)
115
+ self.gamma_1 = config.get("gamma_1", 0.25)
116
+ self.gamma_2 = config.get("gamma_2", 2.0)
117
+
118
+ # Step rejection settings
119
+ self.step_rejection_enabled = config.get("step_rejection", True)
120
+ self.rejection_threshold = config.get("rejection_threshold", -0.5)
121
+ self.max_consecutive_rejections = config.get("max_consecutive_rejections", 3)
122
+ self.consecutive_rejections = 0
123
+
124
+ # Hessian eigenvalue shifting - IMPROVED: smaller minimum to avoid over-shifting
125
+ self.hessian_shift_enabled = config.get("hessian_shift_enabled", True)
126
+ self.min_positive_eigval = config.get("min_positive_eigval", 0.001)
127
+ self.min_negative_eigval = config.get("min_negative_eigval", -0.001)
128
+
129
+ # NEW: Gradient-based step scaling for near-convergence
130
+ self.gradient_scaling_enabled = config.get("gradient_scaling_enabled", True)
131
+ self.gradient_scaling_threshold = config.get("gradient_scaling_threshold", 0.001)
132
+ self.min_step_scale = config.get("min_step_scale", 0.1) # Minimum scaling factor
133
+
134
+ # NEW: Adaptive trust radius based on gradient magnitude
135
+ self.adaptive_trust_enabled = config.get("adaptive_trust_enabled", True)
136
+ self.gradient_trust_coupling = config.get("gradient_trust_coupling", 0.5)
58
137
 
59
138
  # Whether to use trust radius adaptation
60
139
  self.adapt_trust_radius = config.get("adapt_trust_radius", True)
@@ -67,6 +146,7 @@ class EnhancedRSPRFO:
67
146
  # Hessian-related variables
68
147
  self.hessian = None
69
148
  self.bias_hessian = None
149
+ self.shifted_hessian = None
70
150
 
71
151
  # Optimization tracking variables
72
152
  self.prev_eigvec_max = None
@@ -75,15 +155,22 @@ class EnhancedRSPRFO:
75
155
  self.actual_energy_changes = []
76
156
  self.reduction_ratios = []
77
157
  self.trust_radius_history = []
158
+ self.step_quality_history = []
78
159
  self.prev_geometry = None
79
160
  self.prev_gradient = None
80
161
  self.prev_energy = None
81
162
  self.prev_move_vector = None
82
163
 
164
+ # Step rejection tracking
165
+ self.rejected_step_geometry = None
166
+ self.rejected_step_gradient = None
167
+
83
168
  # Mode Following specific parameters
84
169
  self.mode_following_enabled = config.get("mode_following", True)
85
- self.eigvec_history = [] # History of eigenvectors for consistent tracking
86
- self.ts_mode_idx = None # Current index of transition state direction
170
+ self.eigvec_history = []
171
+ self.eigval_history = []
172
+ self.ts_mode_idx = None
173
+ self.ts_mode_eigvec = None
87
174
 
88
175
  # Eigenvector Following settings
89
176
  self.eigvec_following = config.get("eigvec_following", True)
@@ -104,7 +191,7 @@ class EnhancedRSPRFO:
104
191
  f"bounds=[{self.trust_radius_min:.6f}, {self.trust_radius_max:.6f}]")
105
192
 
106
193
  def _build_hessian_updater_list(self):
107
- """Builds the prioritized dispatch list for Hessian updaters (from RSIRFO)."""
194
+ """Builds the prioritized dispatch list for Hessian updaters."""
108
195
  self.default_update_method = (
109
196
  "auto (default)",
110
197
  lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")
@@ -136,714 +223,900 @@ class EnhancedRSPRFO:
136
223
  ("psb", "psb", self.hessian_updater.PSB_hessian_update),
137
224
  ("msp", "msp", self.hessian_updater.MSP_hessian_update),
138
225
  ]
139
-
140
- def compute_reduction_ratio(self, gradient, hessian, step, actual_reduction):
226
+
227
+ def _project_grad_tr_rot(self, gradient, geometry):
141
228
  """
142
- Compute ratio between actual and predicted reduction in energy
229
+ Project out translation and rotation components from the gradient.
230
+ Uses QR decomposition for orthonormalization.
143
231
 
144
232
  Parameters:
145
- gradient: numpy.ndarray - Current gradient
146
- hessian: numpy.ndarray - Current approximate Hessian
147
- step: numpy.ndarray - Step vector
148
- actual_reduction: float - Actual energy reduction (previous_energy - current_energy)
233
+ gradient : numpy.ndarray
234
+ Gradient vector to project
235
+ geometry : numpy.ndarray
236
+ Current geometry coordinates
237
+
238
+ Returns:
239
+ numpy.ndarray
240
+ Projected gradient with TR/ROT components removed
241
+ """
242
+ coords = geometry.reshape(-1, 3)
243
+ n_atoms = coords.shape[0]
244
+
245
+ if n_atoms < 3:
246
+ return gradient
247
+
248
+ center = np.mean(coords, axis=0)
249
+ coords_centered = coords - center
250
+
251
+ basis = []
252
+
253
+ # Translation (x, y, z)
254
+ basis.append(np.tile([1, 0, 0], n_atoms))
255
+ basis.append(np.tile([0, 1, 0], n_atoms))
256
+ basis.append(np.tile([0, 0, 1], n_atoms))
257
+
258
+ # Rotation (Rx, Ry, Rz via cross product)
259
+ rx = np.zeros_like(coords)
260
+ rx[:, 1] = -coords_centered[:, 2]
261
+ rx[:, 2] = coords_centered[:, 1]
262
+ basis.append(rx.flatten())
263
+
264
+ ry = np.zeros_like(coords)
265
+ ry[:, 0] = coords_centered[:, 2]
266
+ ry[:, 2] = -coords_centered[:, 0]
267
+ basis.append(ry.flatten())
268
+
269
+ rz = np.zeros_like(coords)
270
+ rz[:, 0] = -coords_centered[:, 1]
271
+ rz[:, 1] = coords_centered[:, 0]
272
+ basis.append(rz.flatten())
149
273
 
274
+ A = np.array(basis).T
275
+ Q, R = np.linalg.qr(A, mode='reduced')
276
+
277
+ diag_R = np.abs(np.diag(R))
278
+ valid_cols = diag_R > 1e-10
279
+ Q = Q[:, valid_cols]
280
+
281
+ overlaps = np.dot(Q.T, gradient)
282
+ tr_rot_part = np.dot(Q, overlaps)
283
+ projected_gradient = gradient - tr_rot_part
284
+
285
+ return projected_gradient
286
+
287
+ def _shift_hessian_eigenvalues(self, hessian, eigvals, eigvecs):
288
+ """
289
+ Shift Hessian eigenvalues to ensure proper curvature for TS search.
290
+
291
+ IMPROVED: More conservative shifting to avoid over-constraining
292
+ small eigenvalues that correspond to soft modes.
293
+
294
+ For saddle_order > 0:
295
+ - First `saddle_order` eigenvalues should be negative
296
+ - Remaining eigenvalues should be positive (but allow small values)
297
+
298
+ For saddle_order == 0 (minimization):
299
+ - All eigenvalues should be positive
300
+
301
+ Parameters:
302
+ hessian : numpy.ndarray
303
+ Original Hessian matrix
304
+ eigvals : numpy.ndarray
305
+ Eigenvalues of the Hessian
306
+ eigvecs : numpy.ndarray
307
+ Eigenvectors of the Hessian
308
+
150
309
  Returns:
151
- float: Ratio of actual to predicted reduction
310
+ tuple
311
+ (shifted_hessian, shifted_eigvals, shift_applied)
312
+ """
313
+ if not self.hessian_shift_enabled:
314
+ return hessian, eigvals, False
315
+
316
+ n = len(eigvals)
317
+ shifted_eigvals = eigvals.copy()
318
+ shift_applied = False
319
+
320
+ if self.saddle_order == 0:
321
+ # Minimization: all eigenvalues should be positive
322
+ min_eigval = np.min(eigvals)
323
+ if min_eigval < self.min_positive_eigval:
324
+ shift = self.min_positive_eigval - min_eigval
325
+ shifted_eigvals = eigvals + shift
326
+ shift_applied = True
327
+ self.log(f"Applied eigenvalue shift of {shift:.6f} for minimization")
328
+ else:
329
+ # TS search: need exactly saddle_order negative eigenvalues
330
+ sorted_indices = np.argsort(eigvals)
331
+
332
+ # Ensure first saddle_order eigenvalues are sufficiently negative
333
+ for i in range(self.saddle_order):
334
+ idx = sorted_indices[i]
335
+ if eigvals[idx] > self.min_negative_eigval:
336
+ shifted_eigvals[idx] = self.min_negative_eigval
337
+ shift_applied = True
338
+
339
+ # IMPROVED: Only shift eigenvalues that are very close to zero or negative
340
+ # when they should be positive. Don't shift already positive eigenvalues
341
+ # to a higher minimum unless they are problematically small.
342
+ for i in range(self.saddle_order, n):
343
+ idx = sorted_indices[i]
344
+ # Only shift if eigenvalue is negative or very close to zero
345
+ if eigvals[idx] < 1e-6: # Much smaller threshold
346
+ shifted_eigvals[idx] = self.min_positive_eigval
347
+ shift_applied = True
348
+
349
+ if shift_applied:
350
+ shifted_hessian = eigvecs @ np.diag(shifted_eigvals) @ eigvecs.T
351
+ shifted_hessian = 0.5 * (shifted_hessian + shifted_hessian.T)
352
+ self.log(f"Hessian eigenvalues shifted for proper curvature")
353
+ return shifted_hessian, shifted_eigvals, True
354
+
355
+ return hessian, eigvals, False
356
+
357
+ def _compute_gradient_based_scale(self, gradient_norm, step_norm):
358
+ """
359
+ Compute a scaling factor based on gradient magnitude to prevent
360
+ overshooting near convergence.
361
+
362
+ When the gradient is small but the step is large, this indicates
363
+ the Hessian may have small eigenvalues causing large steps.
364
+
365
+ Parameters:
366
+ gradient_norm : float
367
+ Norm of the current gradient
368
+ step_norm : float
369
+ Norm of the proposed step
370
+
371
+ Returns:
372
+ float
373
+ Scaling factor (0 < scale <= 1)
374
+ """
375
+ if not self.gradient_scaling_enabled:
376
+ return 1.0
377
+
378
+ if gradient_norm < 1e-10 or step_norm < 1e-10:
379
+ return 1.0
380
+
381
+ # Expected step norm based on gradient and typical curvature
382
+ # For a Newton step: s = -H^{-1}g, so |s| ~ |g| / |lambda_min|
383
+ # If |s| >> |g| / typical_curvature, we should scale down
384
+
385
+ # Use a simple heuristic: if step_norm / gradient_norm > threshold,
386
+ # scale the step proportionally
387
+ ratio = step_norm / gradient_norm
388
+
389
+ # Typical ratio for well-conditioned systems is O(1) to O(10)
390
+ # If ratio is very large (> 100), the Hessian likely has very small eigenvalues
391
+ max_ratio = 50.0 # Maximum allowed ratio
392
+
393
+ if ratio > max_ratio:
394
+ scale = max_ratio / ratio
395
+ scale = max(scale, self.min_step_scale) # Don't scale below minimum
396
+ self.log(f"Gradient-based scaling: ratio={ratio:.2f}, scale={scale:.4f}")
397
+ return scale
398
+
399
+ return 1.0
400
+
401
+ def _compute_adaptive_trust_radius(self, gradient_norm):
402
+ """
403
+ Compute an adaptive trust radius based on gradient magnitude.
404
+
405
+ Near convergence (small gradient), the trust radius should be
406
+ proportional to the gradient to prevent overshooting.
407
+
408
+ Parameters:
409
+ gradient_norm : float
410
+ Norm of the current gradient
411
+
412
+ Returns:
413
+ float
414
+ Suggested trust radius
415
+ """
416
+ if not self.adaptive_trust_enabled:
417
+ return self.trust_radius
418
+
419
+ if gradient_norm < self.gradient_scaling_threshold:
420
+ # Near convergence: scale trust radius with gradient
421
+ # Use a linear relationship with a minimum floor
422
+ adaptive_radius = self.gradient_trust_coupling * gradient_norm / self.gradient_scaling_threshold * self.trust_radius_max
423
+ adaptive_radius = max(adaptive_radius, self.trust_radius_min)
424
+ adaptive_radius = min(adaptive_radius, self.trust_radius)
425
+
426
+ if adaptive_radius < self.trust_radius * 0.9: # Only log if significant change
427
+ self.log(f"Adaptive trust radius: {self.trust_radius:.6f} -> {adaptive_radius:.6f} "
428
+ f"(gradient_norm={gradient_norm:.6e})")
429
+
430
+ return adaptive_radius
431
+
432
+ return self.trust_radius
433
+
434
+ def compute_reduction_ratio(self, gradient, hessian, step, actual_reduction):
435
+ """
436
+ Compute ratio between actual and predicted reduction in energy.
437
+
438
+ Parameters:
439
+ gradient : numpy.ndarray
440
+ Current gradient
441
+ hessian : numpy.ndarray
442
+ Current approximate Hessian
443
+ step : numpy.ndarray
444
+ Step vector
445
+ actual_reduction : float
446
+ Actual energy reduction (previous_energy - current_energy)
447
+
448
+ Returns:
449
+ float
450
+ Ratio of actual to predicted reduction
152
451
  """
153
- # Calculate predicted reduction from quadratic model
154
452
  g_flat = gradient.flatten()
155
453
  step_flat = step.flatten()
156
454
 
157
- # Linear term of the model: g^T * p
158
455
  linear_term = np.dot(g_flat, step_flat)
159
-
160
- # Quadratic term of the model: 0.5 * p^T * H * p
161
456
  quadratic_term = 0.5 * np.dot(step_flat, np.dot(hessian, step_flat))
162
-
163
- # Predicted reduction: -g^T * p - 0.5 * p^T * H * p
164
- # Negative sign because we're predicting the reduction (energy decrease)
165
457
  predicted_reduction = -(linear_term + quadratic_term)
166
458
 
167
- # Avoid division by zero or very small numbers
168
- if abs(predicted_reduction) < 1e-10:
459
+ if abs(predicted_reduction) < 1e-14:
169
460
  self.log("Warning: Predicted reduction is near zero")
170
- return 0.0
461
+ return 1.0 if abs(actual_reduction) < 1e-14 else 0.0
171
462
 
172
- # Calculate ratio
173
463
  ratio = actual_reduction / predicted_reduction
174
464
 
175
- # Safeguard against numerical issues
176
465
  if not np.isfinite(ratio):
177
466
  self.log("Warning: Non-finite reduction ratio, using 0.0")
178
467
  return 0.0
179
468
 
180
- self.log(f"Actual reduction: {actual_reduction:.6e}, "
181
- f"Predicted reduction: {predicted_reduction:.6e}, "
182
- f"Ratio: {ratio:.4f}")
469
+ self.log(f"Reduction ratio: actual={actual_reduction:.6e}, "
470
+ f"predicted={predicted_reduction:.6e}, ratio={ratio:.4f}")
183
471
 
184
472
  return ratio
185
473
 
186
- def adjust_trust_radius(self, actual_energy_change, predicted_energy_change, step_norm):
474
+ def adjust_trust_radius(self, ratio, step_norm, at_boundary):
187
475
  """
188
- Dynamically adjust the trust radius based on ratio between actual and predicted reductions
189
- using the trust region methodology
476
+ Dynamically adjust the trust radius based on reduction ratio.
477
+ Uses Nocedal & Wright's trust region update strategy.
478
+
479
+ Parameters:
480
+ ratio : float
481
+ Reduction ratio (actual/predicted)
482
+ step_norm : float
483
+ Norm of the current step
484
+ at_boundary : bool
485
+ Whether the step is at the trust region boundary
190
486
  """
191
- if not self.adapt_trust_radius or actual_energy_change is None or predicted_energy_change is None:
192
- return
193
-
194
- # Avoid division by zero or very small numbers
195
- if abs(predicted_energy_change) < 1e-10:
196
- self.log("Skipping trust radius update due to negligible predicted energy change")
487
+ if not self.adapt_trust_radius:
197
488
  return
198
489
 
199
- # Calculate the ratio between actual and predicted energy changes
200
- # Use absolute values to focus on magnitude of agreement
201
- ratio = abs(actual_energy_change / predicted_energy_change)
202
- self.log(f"Raw reduction ratio: {actual_energy_change / predicted_energy_change:.4f}")
203
- self.log(f"Absolute reduction ratio: {ratio:.4f}")
204
- self.reduction_ratios.append(ratio)
205
-
206
490
  old_trust_radius = self.trust_radius
207
-
208
- # Improved boundary detection - check if step is close to current trust radius
209
- at_boundary = step_norm >= old_trust_radius * 0.95 # Within 5% of trust radius
210
- self.log(f"Step norm: {step_norm:.6f}, Trust radius: {old_trust_radius:.6f}, At boundary: {at_boundary}")
211
-
212
- # Better logic for trust radius adjustment
213
- if ratio < 0.25 or ratio > 4.0: # Predicted energy change is very different from actual
214
- # Poor prediction - decrease the trust radius
215
- self.trust_radius = max(self.shrink_factor * self.trust_radius, self.trust_radius_min)
216
- if self.trust_radius != old_trust_radius:
217
- self.log(f"Poor step quality (ratio={ratio:.3f}), shrinking trust radius to {self.trust_radius:.6f}")
218
- elif (0.8 <= ratio <= 1.25) and at_boundary:
219
- # Very good prediction and step at trust radius boundary - increase the trust radius
220
- self.trust_radius = min(self.expand_factor * self.trust_radius, self.trust_radius_max)
221
- if self.trust_radius != old_trust_radius:
222
- self.log(f"Good step quality (ratio={ratio:.3f}) at boundary, expanding trust radius to {self.trust_radius:.6f}")
491
+ self.trust_radius_history.append(old_trust_radius)
492
+
493
+ quality_metric = {
494
+ 'iteration': self.iter,
495
+ 'ratio': ratio,
496
+ 'step_norm': step_norm,
497
+ 'at_boundary': at_boundary,
498
+ 'trust_radius': old_trust_radius
499
+ }
500
+ self.step_quality_history.append(quality_metric)
501
+
502
+ if ratio < self.eta_2:
503
+ self.trust_radius = max(self.gamma_1 * step_norm, self.trust_radius_min)
504
+ self.log(f"Poor step quality (ratio={ratio:.3f} < {self.eta_2}), "
505
+ f"shrinking trust radius: {old_trust_radius:.6f} -> {self.trust_radius:.6f}")
506
+ elif ratio > self.eta_3 and at_boundary:
507
+ self.trust_radius = min(self.gamma_2 * self.trust_radius, self.trust_radius_max)
508
+ self.log(f"Good step quality (ratio={ratio:.3f} > {self.eta_3}) at boundary, "
509
+ f"expanding trust radius: {old_trust_radius:.6f} -> {self.trust_radius:.6f}")
223
510
  else:
224
- # Acceptable prediction or step not at boundary - keep the same trust radius
225
- self.log(f"Acceptable step quality (ratio={ratio:.3f}), keeping trust radius at {self.trust_radius:.6f}")
226
-
227
- def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0, pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
511
+ self.log(f"Acceptable step quality (ratio={ratio:.3f}), "
512
+ f"keeping trust radius at {self.trust_radius:.6f}")
513
+
514
+ def _solve_alpha_micro_cycles(self, eigvals, gradient_trans, max_indices, min_indices, gradient_norm):
228
515
  """
229
- Execute one step of enhanced RSPRFO optimization with trust radius adjustment
516
+ Solve for alpha using improved micro-cycle iteration with
517
+ Levenberg-Marquardt style damping and backtracking.
230
518
 
231
519
  Parameters:
232
- geom_num_list: numpy.ndarray - Current geometry coordinates
233
- B_g: numpy.ndarray - Current gradient
234
- pre_B_g: numpy.ndarray - Previous gradient
235
- pre_geom: numpy.ndarray - Previous geometry
236
- B_e: float - Current energy
237
- pre_B_e: float - Previous energy
238
- pre_move_vector: numpy.ndarray - Previous step vector
239
- initial_geom_num_list: numpy.ndarray - Initial geometry
240
- g: numpy.ndarray - Alternative gradient representation
241
- pre_g: numpy.ndarray - Previous alternative gradient representation
242
-
520
+ eigvals : numpy.ndarray
521
+ Eigenvalues of the Hessian
522
+ gradient_trans : numpy.ndarray
523
+ Gradient transformed to eigenvector basis
524
+ max_indices : list
525
+ Indices for maximization subspace
526
+ min_indices : list
527
+ Indices for minimization subspace
528
+ gradient_norm : float
529
+ Norm of the original gradient (for adaptive scaling)
530
+
243
531
  Returns:
244
- numpy.ndarray - Optimization step vector
532
+ tuple
533
+ (step, step_norm, converged)
245
534
  """
246
- self.log(f"\n{'='*50}\nIteration {self.iter}\n{'='*50}")
247
-
248
- if self.Initialization:
249
- self.prev_eigvec_max = None
250
- self.prev_eigvec_min = None
251
- self.predicted_energy_changes = []
252
- self.actual_energy_changes = []
253
- self.reduction_ratios = []
254
- self.trust_radius_history = []
255
- self.prev_geometry = None
256
- self.prev_gradient = None
257
- self.prev_energy = None
258
- self.prev_move_vector = None
259
- self.eigvec_history = []
260
- self.ts_mode_idx = None
261
- self.Initialization = False
262
- self.log(f"First iteration - using initial trust radius {self.trust_radius:.6f}")
263
- else:
264
- # Adjust trust radius based on the previous step if we have energy data
265
- if self.prev_energy is not None and len(self.predicted_energy_changes) > 0:
266
- actual_energy_change = B_e - self.prev_energy
267
- predicted_energy_change = self.predicted_energy_changes[-1]
268
- self.actual_energy_changes.append(actual_energy_change)
269
-
270
- # Get the previous step length
271
- if len(pre_move_vector) > 0:
272
- prev_step_norm = norm(pre_move_vector.flatten())
273
- elif self.prev_move_vector is not None:
274
- prev_step_norm = norm(self.prev_move_vector.flatten())
275
- else:
276
- prev_step_norm = 0.0
277
-
278
- # Log energy comparison
279
- self.log(f"Previous energy: {self.prev_energy:.6f}, Current energy: {B_e:.6f}")
280
- self.log(f"Actual energy change: {actual_energy_change:.6f}")
281
- self.log(f"Predicted energy change: {predicted_energy_change:.6f}")
282
- self.log(f"Previous step norm: {prev_step_norm:.6f}")
283
-
284
- # Complete Hessian for the reduction ratio calculation
285
- H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
286
- H = Calculationtools().project_out_hess_tr_and_rot_for_coord(H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), display_eigval=False)
287
- # Compute reduction ratio
288
- reduction_ratio = self.compute_reduction_ratio(
289
- self.prev_gradient, H, self.prev_move_vector, actual_energy_change)
290
-
291
- # Adjust trust radius based on step quality and length
292
- self.adjust_trust_radius(actual_energy_change, predicted_energy_change, prev_step_norm)
293
-
294
- # Check Hessian
295
- if self.hessian is None:
296
- raise ValueError("Hessian matrix must be set before running optimization")
297
-
298
- # Update Hessian if we have previous geometry and gradient information
299
- if self.prev_geometry is not None and self.prev_gradient is not None and len(pre_B_g) > 0 and len(pre_geom) > 0:
300
- self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
301
-
302
- # Ensure gradient is properly shaped as a 1D array
303
- gradient = np.asarray(B_g).flatten()
304
- H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
305
-
306
- # Compute eigenvalues and eigenvectors of the hessian
307
- eigvals, eigvecs = np.linalg.eigh(H)
308
-
309
- # Count negative eigenvalues for diagnostic purposes
310
- neg_eigval_count = np.sum(eigvals < -1e-6)
311
- self.log(f"Found {neg_eigval_count} negative eigenvalues, target for this saddle order: {self.saddle_order}")
312
-
313
- # Store previous eigenvector information
314
- prev_eigvecs = None
315
- if len(self.eigvec_history) > 0:
316
- prev_eigvecs = self.eigvec_history[-1]
317
-
318
- # Standard mode selection (with mode following if enabled)
319
- if self.mode_following_enabled and self.saddle_order > 0:
320
- if self.ts_mode_idx is None:
321
- # For first run, select mode with most negative eigenvalue
322
- self.ts_mode_idx = np.argmin(eigvals)
323
- self.log(f"Initial TS mode selected: {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
324
-
325
- # Find corresponding modes between steps
326
- mode_indices = self.find_corresponding_mode(eigvals, eigvecs, prev_eigvecs, self.ts_mode_idx)
327
-
328
- # Apply Eigenvector Following for cases with mode mixing
329
- if self.eigvec_following and len(mode_indices) > 1:
330
- mode_indices = self.apply_eigenvector_following(eigvals, eigvecs, gradient.dot(eigvecs), mode_indices)
331
-
332
- # Update tracked mode
333
- if mode_indices:
334
- self.ts_mode_idx = mode_indices[0]
335
- self.log(f"Mode following: tracking mode {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
336
-
337
- # Update max_indices (saddle point direction)
338
- max_indices = mode_indices
339
- else:
340
- # If no corresponding mode found, use standard approach
341
- self.log("No corresponding mode found, using default mode selection")
342
- max_indices = self.roots
343
- else:
344
- # Standard mode selection when mode following is disabled
345
- if self.saddle_order == 0:
346
- min_indices = list(range(len(gradient)))
347
- max_indices = []
348
- else:
349
- min_indices = [i for i in range(gradient.size) if i not in self.roots]
350
- max_indices = self.roots
351
-
352
- # Store eigenvectors in history
353
- self.eigvec_history.append(eigvecs)
354
- if len(self.eigvec_history) > 5: # Keep only last 5 steps
355
- self.eigvec_history.pop(0)
356
-
357
- # Transform gradient to eigenvector space
358
- gradient_trans = eigvecs.T.dot(gradient).flatten()
359
-
360
- # Set minimization directions (all directions not in max_indices)
361
- min_indices = [i for i in range(gradient.size) if i not in max_indices]
362
-
363
- # Initialize alpha parameter
364
535
  alpha = self.alpha0
365
-
366
- # Tracking variables
367
536
  best_step = None
368
537
  best_step_norm_diff = float('inf')
369
538
  step_norm_history = []
370
539
 
371
- # NEW IMPLEMENTATION: Micro-cycle loop with improved alpha calculation
540
+ # Compute adaptive trust radius based on gradient
541
+ effective_trust_radius = self._compute_adaptive_trust_radius(gradient_norm)
542
+
372
543
  for mu in range(self.max_micro_cycles):
373
- self.log(f"RS-PRFO micro cycle {mu:02d}, alpha={alpha:.6f}, trust radius={self.trust_radius:.6f}")
544
+ self.log(f" Micro cycle {mu:02d}: alpha={alpha:.6e}, trust_radius={effective_trust_radius:.6f}")
374
545
 
375
546
  try:
376
- # Make a fresh step vector for this cycle - essential to ensure proper recalculation
377
547
  step = np.zeros_like(gradient_trans)
378
548
 
379
- # Maximization subspace calculation
549
+ # Maximization subspace
380
550
  step_max = np.array([])
381
- eigval_max = 0
551
+ eigval_max = 0.0
382
552
  if len(max_indices) > 0:
383
- # Calculate augmented Hessian
384
553
  H_aug_max = self.get_augmented_hessian(
385
554
  eigvals[max_indices], gradient_trans[max_indices], alpha
386
555
  )
387
-
388
- # Solve RFO equations
389
556
  step_max, eigval_max, nu_max, eigvec_max = self.solve_rfo(
390
557
  H_aug_max, "max", prev_eigvec=self.prev_eigvec_max
391
558
  )
392
-
393
- # Store eigenvector for next iteration
394
559
  self.prev_eigvec_max = eigvec_max
395
-
396
- # Copy step to the main step vector
397
560
  step[max_indices] = step_max
398
561
 
399
- # Minimization subspace calculation
562
+ # Minimization subspace
400
563
  step_min = np.array([])
401
- eigval_min = 0
564
+ eigval_min = 0.0
402
565
  if len(min_indices) > 0:
403
- # Calculate augmented Hessian
404
566
  H_aug_min = self.get_augmented_hessian(
405
567
  eigvals[min_indices], gradient_trans[min_indices], alpha
406
568
  )
407
-
408
- # Solve RFO equations
409
569
  step_min, eigval_min, nu_min, eigvec_min = self.solve_rfo(
410
570
  H_aug_min, "min", prev_eigvec=self.prev_eigvec_min
411
571
  )
412
-
413
- # Store eigenvector for next iteration
414
572
  self.prev_eigvec_min = eigvec_min
415
-
416
- # Copy step to the main step vector
417
573
  step[min_indices] = step_min
418
574
 
419
- # Calculate norms of the current step
420
- step_max_norm = np.linalg.norm(step_max) if len(max_indices) > 0 else 0.0
421
- step_min_norm = np.linalg.norm(step_min) if len(min_indices) > 0 else 0.0
422
575
  step_norm = np.linalg.norm(step)
576
+ step_norm_history.append(step_norm)
423
577
 
424
- # Log the current norms
425
- if len(max_indices) > 0:
426
- self.log(f"norm(step_max)={step_max_norm:.6f}")
427
- if len(min_indices) > 0:
428
- self.log(f"norm(step_min)={step_min_norm:.6f}")
429
-
430
- self.log(f"norm(step)={step_norm:.6f}")
578
+ step_max_norm = np.linalg.norm(step_max) if len(max_indices) > 0 else 0.0
579
+ step_min_norm = np.linalg.norm(step_min) if len(min_indices) > 0 else 0.0
431
580
 
432
- # Keep track of step norm history for convergence detection
433
- step_norm_history.append(step_norm)
581
+ if self.debug:
582
+ self.log(f" |step_max|={step_max_norm:.6f}, |step_min|={step_min_norm:.6f}, |step|={step_norm:.6f}")
434
583
 
435
- # Save this step if it's closest to trust radius (for later use)
436
- norm_diff = abs(step_norm - self.trust_radius)
584
+ # Track best step
585
+ norm_diff = abs(step_norm - effective_trust_radius)
437
586
  if norm_diff < best_step_norm_diff:
438
587
  best_step = step.copy()
439
588
  best_step_norm_diff = norm_diff
440
589
 
441
- # Check if step is already within trust radius
442
- if step_norm <= self.trust_radius:
443
- self.log(f"Step satisfies trust radius {self.trust_radius:.6f}")
590
+ # Check convergence
591
+ if step_norm <= effective_trust_radius:
592
+ self.log(f" Step satisfies trust radius (|step|={step_norm:.6f} <= {effective_trust_radius:.6f})")
444
593
  break
445
594
 
446
- # Calculate alpha update for each subspace
447
- # Max subspace
448
- alpha_step_max = 0.0
449
- if len(max_indices) > 0:
450
- alpha_step_max = self.get_alpha_step(
451
- alpha, eigval_max, step_max_norm, eigvals[max_indices],
452
- gradient_trans[max_indices], "max"
453
- )
595
+ # Check relative convergence
596
+ if step_norm > 0 and norm_diff / step_norm < self.micro_cycle_rtol:
597
+ self.log(f" Micro-cycle converged (relative diff={norm_diff/step_norm:.6e})")
598
+ step = step * (effective_trust_radius / step_norm)
599
+ return step, effective_trust_radius, True
454
600
 
455
- # Min subspace
456
- alpha_step_min = 0.0
457
- if len(min_indices) > 0:
458
- alpha_step_min = self.get_alpha_step(
459
- alpha, eigval_min, step_min_norm, eigvals[min_indices],
460
- gradient_trans[min_indices], "min"
461
- )
601
+ # Check for stagnation
602
+ if len(step_norm_history) >= 3:
603
+ recent_changes = [abs(step_norm_history[-i] - step_norm_history[-(i+1)])
604
+ for i in range(1, 3)]
605
+ if all(c < self.micro_cycle_atol for c in recent_changes):
606
+ self.log(f" Micro-cycle stagnated, using best step")
607
+ if best_step is not None:
608
+ best_norm = np.linalg.norm(best_step)
609
+ if best_norm > effective_trust_radius:
610
+ best_step = best_step * (effective_trust_radius / best_norm)
611
+ return best_step, min(best_norm, effective_trust_radius), True
462
612
 
463
- # Combine alpha steps with appropriate weighting
464
- alpha_step = 0.0
465
- if alpha_step_max != 0.0 and alpha_step_min != 0.0:
466
- # Weight by squared norms
467
- w_max = step_max_norm**2 if step_max_norm > 0.0 else 0.0
468
- w_min = step_min_norm**2 if step_min_norm > 0.0 else 0.0
469
- if w_max + w_min > 0.0:
470
- alpha_step = (w_max * alpha_step_max + w_min * alpha_step_min) / (w_max + w_min)
471
- else:
472
- alpha_step = alpha_step_max if abs(alpha_step_max) > abs(alpha_step_min) else alpha_step_min
473
- else:
474
- alpha_step = alpha_step_max if alpha_step_max != 0.0 else alpha_step_min
613
+ # Calculate alpha update
614
+ alpha_step = self._compute_alpha_step(
615
+ alpha, eigval_max, eigval_min,
616
+ step_max_norm, step_min_norm, step_norm,
617
+ eigvals, gradient_trans, max_indices, min_indices,
618
+ effective_trust_radius
619
+ )
475
620
 
476
- # If alpha_step is still 0, use a direct calculation with the total step
477
- if abs(alpha_step) < 1e-10 and step_norm > 0.0:
478
- try:
479
- # Calculate derivative directly using analytic formula
480
- dstep2_dalpha = self.calculate_step_derivative(
481
- alpha, eigval_max, eigval_min, eigvals,
482
- max_indices, min_indices, gradient_trans, step_norm
483
- )
484
-
485
- if abs(dstep2_dalpha) > 1e-10:
486
- alpha_step = 2.0 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
487
- self.log(f"Direct alpha_step calculation: {alpha_step:.6f}")
488
- except Exception as e:
489
- self.log(f"Error in direct derivative calculation: {str(e)}")
490
- alpha_step = 0.0
621
+ # Apply damping for stability
622
+ damping = 1.0
623
+ if abs(alpha_step) > self.alpha_step_max:
624
+ damping = self.alpha_step_max / abs(alpha_step)
625
+ alpha_step *= damping
626
+ self.log(f" Damped alpha_step by factor {damping:.4f}")
491
627
 
492
- # Update alpha with proper bounds
628
+ # Update alpha with bounds
493
629
  old_alpha = alpha
630
+ alpha = np.clip(alpha + alpha_step, self.alpha_min, self.alpha_max)
494
631
 
495
- # If derivative-based approach fails, use heuristic
496
- if abs(alpha_step) < 1e-10:
497
- # Apply a more aggressive heuristic - double alpha
498
- alpha = min(alpha * 2.0, self.alpha_max)
499
- self.log(f"Using heuristic alpha update: {old_alpha:.6f} -> {alpha:.6f}")
500
- else:
501
- # Apply safety bounds to alpha_step
502
- alpha_step_limited = np.clip(alpha_step, -self.alpha_step_max, self.alpha_step_max)
503
-
504
- if abs(alpha_step_limited) != abs(alpha_step):
505
- self.log(f"Limited alpha_step from {alpha_step:.6f} to {alpha_step_limited:.6f}")
506
-
507
- # Ensure alpha remains positive and within bounds
508
- alpha = min(max(old_alpha + alpha_step_limited, 1e-6), self.alpha_max)
509
- self.log(f"Updated alpha: {old_alpha:.6f} -> {alpha:.6f}")
632
+ if self.debug:
633
+ self.log(f" alpha: {old_alpha:.6e} -> {alpha:.6e} (step={alpha_step:.6e})")
510
634
 
511
- # Check if alpha reached its maximum value
512
- if alpha == self.alpha_max:
513
- self.log(f"Alpha reached maximum value ({self.alpha_max}), using best step found")
635
+ # Check if alpha hit bounds
636
+ if alpha == self.alpha_max or alpha == self.alpha_min:
637
+ self.log(f" Alpha reached bounds, using best step")
514
638
  if best_step is not None:
515
- step = best_step.copy()
639
+ best_norm = np.linalg.norm(best_step)
640
+ if best_norm > effective_trust_radius:
641
+ best_step = best_step * (effective_trust_radius / best_norm)
642
+ return best_step, min(best_norm, effective_trust_radius), True
516
643
  break
517
-
518
- # Check for progress in step norm adjustments
519
- if len(step_norm_history) >= 3:
520
- # Calculate consecutive changes in step norm
521
- recent_changes = [abs(step_norm_history[-i] - step_norm_history[-(i+1)])
522
- for i in range(1, min(3, len(step_norm_history)))]
523
644
 
524
- # If step norms are not changing significantly, break the loop
525
- if all(change < 1e-6 for change in recent_changes):
526
- self.log(f"Step norms not changing significantly: {step_norm_history[-3:]}")
527
- self.log("Breaking micro-cycle loop")
528
-
529
- # Use the best step found so far
530
- if best_step is not None and best_step_norm_diff < norm_diff:
531
- step = best_step.copy()
532
- self.log("Using best step found so far")
533
-
534
- break
535
-
536
645
  except Exception as e:
537
- self.log(f"Error in micro-cycle: {str(e)}")
538
- # Use best step if available, otherwise scale current step
646
+ self.log(f" Error in micro-cycle {mu}: {str(e)}")
539
647
  if best_step is not None:
540
- self.log("Using best step due to error")
541
- step = best_step.copy()
542
- else:
543
- # Simple scaling fallback
544
- if step_norm > 0 and step_norm > self.trust_radius:
545
- scale_factor = self.trust_radius / step_norm
546
- step = step * scale_factor
547
- self.log(f"Scaled step to trust radius due to error")
648
+ best_norm = np.linalg.norm(best_step)
649
+ if best_norm > effective_trust_radius:
650
+ best_step = best_step * (effective_trust_radius / best_norm)
651
+ return best_step, min(best_norm, effective_trust_radius), False
548
652
  break
549
653
 
550
- else:
551
- # If micro-cycles did not converge
552
- self.log(f"Micro-cycles did not converge in {self.max_micro_cycles} iterations")
553
- # Use the best step if available
554
- if best_step is not None and best_step_norm_diff < abs(step_norm - self.trust_radius):
555
- self.log("Using best step found during micro-cycles")
556
- step = best_step.copy()
654
+ # Micro-cycles did not converge - use best step with scaling
655
+ self.log(f" Micro-cycles did not converge in {self.max_micro_cycles} iterations")
656
+ if best_step is not None:
657
+ best_norm = np.linalg.norm(best_step)
658
+ if best_norm > effective_trust_radius:
659
+ best_step = best_step * (effective_trust_radius / best_norm)
660
+ return best_step, min(best_norm, effective_trust_radius), False
557
661
 
558
- # Transform step back to original coordinates
559
- move_vector = eigvecs.dot(step)
560
- step_norm = norm(move_vector)
561
-
562
- # Only scale down steps that exceed the trust radius
563
- if step_norm > self.trust_radius:
564
- self.log(f"Step norm {step_norm:.6f} exceeds trust radius {self.trust_radius:.6f}, scaling down")
565
- move_vector = move_vector * (self.trust_radius / step_norm)
566
- step_norm = self.trust_radius
662
+ return np.zeros_like(gradient_trans), 0.0, False
663
+
664
+ def _compute_alpha_step(self, alpha, eigval_max, eigval_min,
665
+ step_max_norm, step_min_norm, step_norm,
666
+ eigvals, gradient_trans, max_indices, min_indices,
667
+ target_trust_radius):
668
+ """
669
+ Compute the alpha step using Newton-Raphson with safeguards.
670
+
671
+ Returns:
672
+ float
673
+ Computed alpha step
674
+ """
675
+ eps = 1e-12
676
+
677
+ dstep2_dalpha_max = 0.0
678
+ if len(max_indices) > 0 and step_max_norm > eps:
679
+ denom_max = eigvals[max_indices] - eigval_max * alpha
680
+ safe_denom = np.where(np.abs(denom_max) < eps,
681
+ np.sign(denom_max) * eps, denom_max)
682
+ g_max = gradient_trans[max_indices]
683
+
684
+ step_factor = 1.0 + step_max_norm**2 * alpha
685
+ if abs(step_factor) > eps:
686
+ quot = np.sum(g_max**2 / safe_denom**3)
687
+ dstep2_dalpha_max = 2.0 * eigval_max / step_factor * quot
688
+
689
+ dstep2_dalpha_min = 0.0
690
+ if len(min_indices) > 0 and step_min_norm > eps:
691
+ denom_min = eigvals[min_indices] - eigval_min * alpha
692
+ safe_denom = np.where(np.abs(denom_min) < eps,
693
+ np.sign(denom_min) * eps, denom_min)
694
+ g_min = gradient_trans[min_indices]
695
+
696
+ step_factor = 1.0 + step_min_norm**2 * alpha
697
+ if abs(step_factor) > eps:
698
+ quot = np.sum(g_min**2 / safe_denom**3)
699
+ dstep2_dalpha_min = 2.0 * eigval_min / step_factor * quot
700
+
701
+ dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
702
+
703
+ if abs(dstep2_dalpha) < eps:
704
+ if step_norm > target_trust_radius:
705
+ return alpha * 0.5
706
+ else:
707
+ return 0.0
708
+
709
+ alpha_step = (target_trust_radius**2 - step_norm**2) / dstep2_dalpha
710
+
711
+ return alpha_step
712
+
713
+ def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0,
714
+ pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
715
+ """
716
+ Execute one step of enhanced RSPRFO optimization with trust radius adjustment.
717
+
718
+ Parameters:
719
+ geom_num_list : numpy.ndarray
720
+ Current geometry coordinates
721
+ B_g : numpy.ndarray
722
+ Current gradient
723
+ pre_B_g : numpy.ndarray
724
+ Previous gradient
725
+ pre_geom : numpy.ndarray
726
+ Previous geometry
727
+ B_e : float
728
+ Current energy
729
+ pre_B_e : float
730
+ Previous energy
731
+ pre_move_vector : numpy.ndarray
732
+ Previous step vector
733
+ initial_geom_num_list : numpy.ndarray
734
+ Initial geometry
735
+ g : numpy.ndarray
736
+ Alternative gradient representation
737
+ pre_g : numpy.ndarray
738
+ Previous alternative gradient representation
739
+
740
+ Returns:
741
+ numpy.ndarray
742
+ Optimization step vector (shaped as column vector)
743
+ """
744
+ self.log(f"\n{'='*60}")
745
+ self.log(f"RS-PRFO Iteration {self.iter}")
746
+ self.log(f"{'='*60}")
747
+
748
+ if self.Initialization:
749
+ self._reset_state()
750
+ self.Initialization = False
751
+ self.log(f"Initialized with trust radius {self.trust_radius:.6f}")
567
752
  else:
568
- self.log(f"Step norm {step_norm:.6f} is within trust radius {self.trust_radius:.6f}, no scaling needed")
753
+ step_accepted = self._process_previous_step(
754
+ B_e, geom_num_list, B_g, pre_B_g, pre_geom, pre_move_vector
755
+ )
756
+
757
+ if not step_accepted and self.step_rejection_enabled:
758
+ self.log("Step rejected - optimizer should use previous geometry")
759
+
760
+ if self.hessian is None:
761
+ raise ValueError("Hessian matrix must be set before running optimization")
569
762
 
570
- self.log(f"Final norm(step)={norm(move_vector):.6f}")
763
+ if (self.prev_geometry is not None and self.prev_gradient is not None and
764
+ len(pre_B_g) > 0 and len(pre_geom) > 0):
765
+ self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
571
766
 
572
- # Apply maxstep constraint if specified in config
573
- if self.config.get("maxstep") is not None:
574
- maxstep = self.config.get("maxstep")
767
+ gradient = np.asarray(B_g).flatten()
768
+
769
+ # Project out TR/ROT from gradient
770
+ raw_norm = np.linalg.norm(gradient)
771
+ gradient = self._project_grad_tr_rot(gradient, geom_num_list)
772
+ proj_norm = np.linalg.norm(gradient)
773
+
774
+ if abs(raw_norm - proj_norm) > 1e-10:
775
+ self.log(f"Gradient TR/ROT projection: {raw_norm:.6e} -> {proj_norm:.6e}")
776
+
777
+ gradient_norm = proj_norm # Store for later use
778
+
779
+ # Prepare Hessian
780
+ H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
781
+
782
+ # Compute eigendecomposition
783
+ eigvals, eigvecs = np.linalg.eigh(H)
784
+
785
+ # === [CRITICAL FIX] Handle NaN/Inf in Hessian ===
786
+
787
+ if not np.all(np.isfinite(eigvals)) or not np.all(np.isfinite(eigvecs)):
788
+ self.log("CRITICAL ERROR: Hessian eigendecomposition failed (NaNs detected).", force=True)
789
+ self.log("Resetting to Identity Hessian to force Steepest Descent fallback.", force=True)
790
+
791
+ eigvals = np.ones_like(eigvals)
792
+ eigvecs = np.eye(len(eigvals))
793
+ # =================================================
794
+
795
+ # Apply eigenvalue shifting if needed
796
+ H, eigvals, shifted = self._shift_hessian_eigenvalues(H, eigvals, eigvecs)
797
+ if shifted:
798
+ eigvals, eigvecs = np.linalg.eigh(H)
799
+
800
+ self.shifted_hessian = H
801
+
802
+ # Log eigenvalue information
803
+ neg_eigval_count = np.sum(eigvals < -1e-8)
804
+ self.log(f"Eigenvalue analysis: {neg_eigval_count} negative (target: {self.saddle_order})")
805
+ self.log(f"Lowest eigenvalues: {eigvals[:min(5, len(eigvals))]}")
806
+
807
+ # Mode selection with mode following
808
+ max_indices, min_indices = self._select_modes(eigvals, eigvecs, gradient)
809
+
810
+ # Store eigenvector history
811
+ self.eigvec_history.append(eigvecs.copy())
812
+ self.eigval_history.append(eigvals.copy())
813
+ if len(self.eigvec_history) > 5:
814
+ self.eigvec_history.pop(0)
815
+ self.eigval_history.pop(0)
816
+
817
+ # Transform gradient to eigenvector space
818
+ gradient_trans = eigvecs.T @ gradient
819
+
820
+ # Solve for step using micro-cycles (now with gradient_norm)
821
+ step_trans, step_norm, converged = self._solve_alpha_micro_cycles(
822
+ eigvals, gradient_trans, max_indices, min_indices, gradient_norm
823
+ )
824
+ # === [ADDED START] Safety check for NaN/Inf steps ===
825
+ if not np.isfinite(step_norm) or not np.all(np.isfinite(step_trans)):
826
+ self.log("CRITICAL WARNING: NaN detected in optimization step. Falling back to Steepest Descent.", force=True)
575
827
 
576
- # Calculate step lengths
577
- if move_vector.size % 3 == 0 and move_vector.size > 3: # Likely atomic coordinates in 3D
578
- move_vector_reshaped = move_vector.reshape(-1, 3)
579
- steplengths = np.sqrt((move_vector_reshaped**2).sum(axis=1))
580
- longest_step = np.max(steplengths)
581
- else:
582
- # Generic vector - just compute total norm
583
- longest_step = norm(move_vector)
828
+ # Fallback: Steepest Descent (SD) step within trust radius
829
+ # In eigenvector basis, SD direction is simply -gradient
830
+ sd_step = -gradient_trans
831
+ sd_norm = np.linalg.norm(sd_step)
584
832
 
585
- # Scale step if necessary
586
- if longest_step > maxstep:
587
- move_vector = move_vector * (maxstep / longest_step)
588
- self.log(f"Step constrained by maxstep={maxstep:.6f}")
833
+ # Apply trust radius
834
+ target_norm = min(sd_norm, self.trust_radius)
835
+
836
+ if sd_norm > 1e-12:
837
+ step_trans = sd_step * (target_norm / sd_norm)
838
+ step_norm = target_norm
839
+ else:
840
+ step_trans = np.zeros_like(gradient_trans)
841
+ step_norm = 0.0
842
+
843
+ converged = False
844
+ # === [ADDED END] ===
845
+
846
+ if not converged:
847
+ self.log("Warning: Micro-cycles did not fully converge")
848
+
849
+ # Transform step back to original coordinates
850
+ move_vector = eigvecs @ step_trans
851
+ step_norm = np.linalg.norm(move_vector)
852
+
853
+ # Apply gradient-based scaling for near-convergence
854
+ grad_scale = self._compute_gradient_based_scale(gradient_norm, step_norm)
855
+ if grad_scale < 1.0:
856
+ move_vector = move_vector * grad_scale
857
+ step_norm = step_norm * grad_scale
858
+ self.log(f"Applied gradient-based scaling: {1.0/grad_scale:.2f}x reduction")
859
+
860
+ # Apply trust radius constraint
861
+ effective_trust = self._compute_adaptive_trust_radius(gradient_norm)
862
+ if step_norm > effective_trust * 1.01:
863
+ self.log(f"Scaling step from {step_norm:.6f} to trust radius {effective_trust:.6f}")
864
+ move_vector = move_vector * (effective_trust / step_norm)
865
+ step_norm = effective_trust
866
+
867
+ # Apply maxstep constraint if specified
868
+ if self.config.get("maxstep") is not None:
869
+ move_vector, step_norm = self._apply_maxstep_constraint(move_vector)
870
+
871
+ self.log(f"Final step norm: {step_norm:.6f}")
589
872
 
590
873
  # Calculate predicted energy change
591
874
  predicted_energy_change = self.rfo_model(gradient, H, move_vector)
592
875
  self.predicted_energy_changes.append(predicted_energy_change)
593
- self.log(f"Predicted energy change: {predicted_energy_change:.6f}")
876
+ self.log(f"Predicted energy change: {predicted_energy_change:.6e}")
594
877
 
595
- # Store current geometry, gradient, energy, and move vector for next iteration
596
- self.prev_geometry = copy.deepcopy(geom_num_list)
597
- self.prev_gradient = copy.deepcopy(B_g)
878
+ # Store state for next iteration
879
+ self.prev_geometry = np.copy(geom_num_list)
880
+ self.prev_gradient = np.copy(B_g)
598
881
  self.prev_energy = B_e
599
- self.prev_move_vector = copy.deepcopy(move_vector)
882
+ self.prev_move_vector = np.copy(move_vector)
600
883
 
601
- # Increment iteration counter
602
884
  self.iter += 1
603
885
 
604
886
  return move_vector.reshape(-1, 1)
605
-
606
- def get_alpha_step(self, alpha, rfo_eigval, step_norm, eigvals, gradient, mode="min"):
887
+
888
+ def _reset_state(self):
889
+ """Reset optimizer state for a new optimization run."""
890
+ self.prev_eigvec_max = None
891
+ self.prev_eigvec_min = None
892
+ self.predicted_energy_changes = []
893
+ self.actual_energy_changes = []
894
+ self.reduction_ratios = []
895
+ self.trust_radius_history = []
896
+ self.step_quality_history = []
897
+ self.prev_geometry = None
898
+ self.prev_gradient = None
899
+ self.prev_energy = None
900
+ self.prev_move_vector = None
901
+ self.eigvec_history = []
902
+ self.eigval_history = []
903
+ self.ts_mode_idx = None
904
+ self.ts_mode_eigvec = None
905
+ self.consecutive_rejections = 0
906
+ self.trust_radius = self.trust_radius_initial
907
+
908
+ def _process_previous_step(self, B_e, geom_num_list, B_g, pre_B_g, pre_geom, pre_move_vector):
607
909
  """
608
- Calculate alpha step update for a specific subspace using the improved method
609
-
610
- Parameters:
611
- alpha: float - Current alpha value
612
- rfo_eigval: float - RFO eigenvalue for this subspace
613
- step_norm: float - Norm of the step in this subspace
614
- eigvals: numpy.ndarray - Eigenvalues for this subspace
615
- gradient: numpy.ndarray - Gradient components in this subspace
616
- mode: str - "min" or "max" for minimization or maximization subspace
910
+ Process results from the previous step and adjust trust radius.
617
911
 
618
912
  Returns:
619
- float: Calculated alpha step update
913
+ bool
914
+ True if step is accepted, False if rejected
620
915
  """
621
- try:
622
- # Calculate denominators with safety checks
623
- denominators = eigvals - rfo_eigval * alpha
624
-
625
- # Handle small denominators
626
- small_denoms = np.abs(denominators) < 1e-10
627
- if np.any(small_denoms):
628
- self.log(f"Small denominators detected in {mode} subspace: {np.sum(small_denoms)}")
629
- safe_denoms = denominators.copy()
630
- for i in np.where(small_denoms)[0]:
631
- safe_denoms[i] = 1e-10 * np.sign(safe_denoms[i]) if safe_denoms[i] != 0 else 1e-10
632
- denominators = safe_denoms
633
-
634
- # Calculate quotient term
635
- numerator = gradient**2
636
- denominator = denominators**3
637
- quot = np.sum(numerator / denominator)
638
- self.log(f"{mode} subspace quot={quot:.6e}")
639
-
640
- # Calculate step term with safety
641
- step_term = 1.0 + step_norm**2 * alpha
642
- if abs(step_term) < 1e-10:
643
- step_term = 1e-10 * np.sign(step_term) if step_term != 0 else 1e-10
644
-
645
- # Calculate derivative of squared step norm with respect to alpha
646
- dstep2_dalpha = 2.0 * rfo_eigval / step_term * quot
647
- self.log(f"{mode} subspace d(step^2)/dα={dstep2_dalpha:.6e}")
648
-
649
- # Return 0 if derivative is too small
650
- if abs(dstep2_dalpha) < 1e-10:
651
- return 0.0
652
-
653
- # Calculate alpha step using the trust radius formula
654
- alpha_step = 2.0 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
655
- self.log(f"{mode} subspace alpha_step={alpha_step:.6f}")
916
+ if self.prev_energy is None or len(self.predicted_energy_changes) == 0:
917
+ return True
918
+
919
+ actual_energy_change = B_e - self.prev_energy
920
+ predicted_energy_change = self.predicted_energy_changes[-1]
921
+ self.actual_energy_changes.append(actual_energy_change)
922
+
923
+ if len(pre_move_vector) > 0:
924
+ prev_step_norm = np.linalg.norm(np.asarray(pre_move_vector).flatten())
925
+ elif self.prev_move_vector is not None:
926
+ prev_step_norm = np.linalg.norm(self.prev_move_vector.flatten())
927
+ else:
928
+ prev_step_norm = 0.0
929
+
930
+ self.log(f"Energy: {self.prev_energy:.8f} -> {B_e:.8f}")
931
+ self.log(f"Actual change: {actual_energy_change:.6e}, Predicted: {predicted_energy_change:.6e}")
932
+
933
+ H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
934
+
935
+ if hasattr(Calculationtools, 'project_out_hess_tr_and_rot_for_coord'):
936
+ H = Calculationtools().project_out_hess_tr_and_rot_for_coord(
937
+ H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3),
938
+ display_eigval=False
939
+ )
940
+
941
+ ratio = self.compute_reduction_ratio(
942
+ self.prev_gradient, H, self.prev_move_vector, actual_energy_change
943
+ )
944
+ self.reduction_ratios.append(ratio)
945
+
946
+ at_boundary = prev_step_norm >= self.trust_radius * 0.95
947
+
948
+ self.adjust_trust_radius(ratio, prev_step_norm, at_boundary)
949
+
950
+ if self.step_rejection_enabled and ratio < self.rejection_threshold:
951
+ self.consecutive_rejections += 1
952
+ self.log(f"Step quality very poor (ratio={ratio:.4f}), rejection count: {self.consecutive_rejections}")
656
953
 
657
- return alpha_step
954
+ if self.consecutive_rejections >= self.max_consecutive_rejections:
955
+ self.log(f"Too many consecutive rejections, accepting step anyway")
956
+ self.consecutive_rejections = 0
957
+ return True
658
958
 
659
- except Exception as e:
660
- self.log(f"Error in get_alpha_step ({mode}): {str(e)}")
661
- return 0.0
959
+ return False
960
+
961
+ self.consecutive_rejections = 0
962
+ return True
662
963
 
663
- def calculate_step_derivative(self, alpha, eigval_max, eigval_min, eigvals, max_indices, min_indices, gradient_trans, step_norm):
964
+ def _select_modes(self, eigvals, eigvecs, gradient):
664
965
  """
665
- Calculate the derivative of the squared step norm with respect to alpha
666
- for the combined step from both subspaces
966
+ Select modes for maximization and minimization subspaces.
667
967
 
668
- Parameters:
669
- alpha: float - Current alpha value
670
- eigval_max, eigval_min: float - RFO eigenvalues from max and min subspaces
671
- eigvals: numpy.ndarray - All eigenvalues
672
- max_indices, min_indices: list - Indices of max and min subspaces
673
- gradient_trans: numpy.ndarray - Transformed gradient
674
- step_norm: float - Current total step norm
968
+ Returns:
969
+ tuple
970
+ (max_indices, min_indices)
971
+ """
972
+ n = len(eigvals)
973
+
974
+ if self.saddle_order == 0:
975
+ return [], list(range(n))
976
+
977
+ if self.mode_following_enabled:
978
+ max_indices = self._find_ts_modes(eigvals, eigvecs, gradient)
979
+ else:
980
+ sorted_indices = np.argsort(eigvals)
981
+ max_indices = sorted_indices[:self.saddle_order].tolist()
982
+
983
+ min_indices = [i for i in range(n) if i not in max_indices]
984
+
985
+ return max_indices, min_indices
986
+
987
+ def _find_ts_modes(self, eigvals, eigvecs, gradient):
988
+ """
989
+ Find transition state modes using mode following.
675
990
 
676
991
  Returns:
677
- float: Combined derivative of squared step norm with respect to alpha
992
+ list
993
+ Indices of modes to maximize
678
994
  """
679
- try:
680
- dstep2_dalpha_max = 0.0
681
- if len(max_indices) > 0:
682
- # Calculate denominator for max subspace
683
- denom_max = 1.0 + np.dot(gradient_trans[max_indices], gradient_trans[max_indices]) * alpha
684
- if abs(denom_max) < 1e-10:
685
- denom_max = 1e-10 * np.sign(denom_max) if denom_max != 0 else 1e-10
686
-
687
- # Handle small denominators in eigenvalue terms
688
- eigvals_max = eigvals[max_indices].copy()
689
- denom_terms_max = eigvals_max - eigval_max * alpha
690
-
691
- small_denoms = np.abs(denom_terms_max) < 1e-10
692
- if np.any(small_denoms):
693
- for i in np.where(small_denoms)[0]:
694
- denom_terms_max[i] = 1e-10 * np.sign(denom_terms_max[i]) if denom_terms_max[i] != 0 else 1e-10
695
-
696
- # Calculate derivative component for max subspace
697
- dstep2_dalpha_max = (
698
- 2.0 * eigval_max / denom_max * np.sum(gradient_trans[max_indices]**2 / denom_terms_max**3)
699
- )
995
+ sorted_indices = np.argsort(eigvals)
996
+
997
+ if self.ts_mode_idx is None or self.ts_mode_eigvec is None:
998
+ self.ts_mode_idx = sorted_indices[0]
999
+ self.ts_mode_eigvec = eigvecs[:, self.ts_mode_idx].copy()
1000
+ self.log(f"Initial TS mode: {self.ts_mode_idx}, eigenvalue={eigvals[self.ts_mode_idx]:.6f}")
1001
+ return sorted_indices[:self.saddle_order].tolist()
1002
+
1003
+ overlaps = np.abs(eigvecs.T @ self.ts_mode_eigvec)
1004
+
1005
+ best_idx = np.argmax(overlaps)
1006
+ best_overlap = overlaps[best_idx]
1007
+
1008
+ self.log(f"Mode following: best overlap={best_overlap:.4f} with mode {best_idx} "
1009
+ f"(eigenvalue={eigvals[best_idx]:.6f})")
1010
+
1011
+ if best_overlap > self.overlap_threshold:
1012
+ self.ts_mode_idx = best_idx
1013
+ self.ts_mode_eigvec = eigvecs[:, best_idx].copy()
700
1014
 
701
- dstep2_dalpha_min = 0.0
702
- if len(min_indices) > 0:
703
- # Calculate denominator for min subspace
704
- denom_min = 1.0 + np.dot(gradient_trans[min_indices], gradient_trans[min_indices]) * alpha
705
- if abs(denom_min) < 1e-10:
706
- denom_min = 1e-10 * np.sign(denom_min) if denom_min != 0 else 1e-10
707
-
708
- # Handle small denominators in eigenvalue terms
709
- eigvals_min = eigvals[min_indices].copy()
710
- denom_terms_min = eigvals_min - eigval_min * alpha
711
-
712
- small_denoms = np.abs(denom_terms_min) < 1e-10
713
- if np.any(small_denoms):
714
- for i in np.where(small_denoms)[0]:
715
- denom_terms_min[i] = 1e-10 * np.sign(denom_terms_min[i]) if denom_terms_min[i] != 0 else 1e-10
716
-
717
- # Calculate derivative component for min subspace
718
- dstep2_dalpha_min = (
719
- 2.0 * eigval_min / denom_min * np.sum(gradient_trans[min_indices]**2 / denom_terms_min**3)
720
- )
1015
+ if np.dot(eigvecs[:, best_idx], self.ts_mode_eigvec) < 0:
1016
+ self.ts_mode_eigvec *= -1
721
1017
 
722
- # Combine derivatives from both subspaces
723
- dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
724
- self.log(f"Combined dstep2_dalpha={dstep2_dalpha:.6e}")
1018
+ max_indices = [best_idx]
725
1019
 
726
- return dstep2_dalpha
1020
+ if self.saddle_order > 1:
1021
+ remaining = [i for i in sorted_indices if i != best_idx]
1022
+ max_indices.extend(remaining[:self.saddle_order - 1])
727
1023
 
728
- except Exception as e:
729
- self.log(f"Error in calculate_step_derivative: {str(e)}")
730
- return 0.0
731
-
732
- def find_corresponding_mode(self, eigvals, eigvecs, prev_eigvecs, target_mode_idx):
1024
+ return max_indices
1025
+ else:
1026
+ self.log(f"Warning: Poor mode overlap ({best_overlap:.4f}), possible mode crossing")
1027
+
1028
+ if self.eigvec_following:
1029
+ return self._handle_mode_mixing(eigvals, eigvecs, overlaps, sorted_indices)
1030
+
1031
+ self.ts_mode_idx = sorted_indices[0]
1032
+ self.ts_mode_eigvec = eigvecs[:, sorted_indices[0]].copy()
1033
+ return sorted_indices[:self.saddle_order].tolist()
1034
+
1035
+ def _handle_mode_mixing(self, eigvals, eigvecs, overlaps, sorted_indices):
733
1036
  """
734
- Find corresponding mode in current step based on eigenvector overlap
735
-
736
- Parameters:
737
- eigvals: numpy.ndarray - Current eigenvalues
738
- eigvecs: numpy.ndarray - Current eigenvectors as column vectors
739
- prev_eigvecs: numpy.ndarray - Previous eigenvectors
740
- target_mode_idx: int - Index of target mode from previous step
1037
+ Handle mode mixing when mode overlap is poor.
741
1038
 
742
1039
  Returns:
743
- list - List of indices of corresponding modes in current step
1040
+ list
1041
+ Selected mode indices
744
1042
  """
745
- if prev_eigvecs is None or target_mode_idx is None:
746
- # For first step or reset, simply select by eigenvalue
747
- if self.saddle_order > 0:
748
- # For TS search, choose modes with most negative eigenvalues
749
- sorted_idx = np.argsort(eigvals)
750
- return sorted_idx[:self.saddle_order].tolist()
751
- else:
752
- # For minimization, no special mode
753
- return []
754
-
755
- # Calculate overlap between target mode from previous step and all current modes
756
- target_vec = prev_eigvecs[:, target_mode_idx].reshape(-1, 1)
757
- overlaps = np.abs(np.dot(eigvecs.T, target_vec)).flatten()
1043
+ significant_overlaps = np.where(overlaps > self.mixing_threshold)[0]
758
1044
 
759
- # Sort by overlap magnitude (descending)
760
- sorted_idx = np.argsort(-overlaps)
1045
+ if len(significant_overlaps) == 0:
1046
+ self.log("No significant mode overlap - resetting mode tracking")
1047
+ self.ts_mode_idx = sorted_indices[0]
1048
+ self.ts_mode_eigvec = eigvecs[:, sorted_indices[0]].copy()
1049
+ return sorted_indices[:self.saddle_order].tolist()
761
1050
 
762
- if self.display_flag:
763
- self.log(f"Mode overlaps with previous TS mode: {overlaps[sorted_idx[0]]:.4f}, {overlaps[sorted_idx[1]]:.4f}, {overlaps[sorted_idx[2]]:.4f}")
1051
+ weights = []
1052
+ for idx in significant_overlaps:
1053
+ overlap_weight = overlaps[idx]**2
1054
+ eigval_weight = 1.0 if eigvals[idx] < 0 else 0.1
1055
+ weights.append(overlap_weight * eigval_weight)
764
1056
 
765
- # Return mode with overlap above threshold
766
- if overlaps[sorted_idx[0]] > self.overlap_threshold:
767
- return [sorted_idx[0]]
1057
+ best_local_idx = np.argmax(weights)
1058
+ best_idx = significant_overlaps[best_local_idx]
768
1059
 
769
- # Consider mode mixing if no single mode has sufficient overlap
770
- mixed_modes = []
771
- cumulative_overlap = 0.0
1060
+ self.log(f"Mode mixing resolution: selected mode {best_idx} "
1061
+ f"(overlap={overlaps[best_idx]:.4f}, eigenvalue={eigvals[best_idx]:.6f})")
772
1062
 
773
- for idx in sorted_idx:
774
- mixed_modes.append(idx)
775
- cumulative_overlap += overlaps[idx]**2 # Sum of squares
776
-
777
- if cumulative_overlap > 0.8: # 80% coverage
778
- break
779
-
780
- return mixed_modes
1063
+ self.ts_mode_idx = best_idx
1064
+ self.ts_mode_eigvec = eigvecs[:, best_idx].copy()
1065
+
1066
+ max_indices = [best_idx]
1067
+ if self.saddle_order > 1:
1068
+ remaining = [i for i in sorted_indices if i != best_idx]
1069
+ max_indices.extend(remaining[:self.saddle_order - 1])
1070
+
1071
+ return max_indices
781
1072
 
782
- def apply_eigenvector_following(self, eigvals, eigvecs, gradient_trans, mode_indices):
1073
+ def _apply_maxstep_constraint(self, move_vector):
783
1074
  """
784
- Apply Eigenvector Following method to handle mixed modes
785
-
786
- Parameters:
787
- eigvals: numpy.ndarray - Current eigenvalues
788
- eigvecs: numpy.ndarray - Current eigenvectors
789
- gradient_trans: numpy.ndarray - Gradient in eigenvector basis
790
- mode_indices: list - Indices of candidate modes
1075
+ Apply maximum step constraint.
791
1076
 
792
1077
  Returns:
793
- list - Selected mode indices after eigenvector following
1078
+ tuple
1079
+ (constrained_move_vector, step_norm)
794
1080
  """
795
- if not mode_indices or len(mode_indices) <= 1:
796
- # No mode mixing, apply standard RSPRFO processing
797
- return mode_indices
798
-
799
- # For mixed modes, build a weighted mode
800
- weights = np.zeros(len(eigvals))
801
- total_weight = 0.0
802
-
803
- for idx in mode_indices:
804
- # Use inverse of eigenvalue as weight (keep negative values as is)
805
- if eigvals[idx] < 0:
806
- weights[idx] = abs(1.0 / eigvals[idx])
807
- else:
808
- # Small weight for positive eigenvalues
809
- weights[idx] = 0.01
810
-
811
- total_weight += weights[idx]
812
-
813
- # Normalize weights
814
- if total_weight > 0:
815
- weights /= total_weight
816
-
817
- # Calculate centroid of mixed modes
818
- mixed_mode_idx = np.argmax(weights)
1081
+ maxstep = self.config.get("maxstep")
819
1082
 
820
- self.log(f"Eigenvector following: selected mixed mode {mixed_mode_idx} from candidates {mode_indices}")
821
- self.log(f"Selected mode eigenvalue: {eigvals[mixed_mode_idx]:.6f}")
1083
+ if move_vector.size % 3 == 0 and move_vector.size > 3:
1084
+ move_reshaped = move_vector.reshape(-1, 3)
1085
+ step_lengths = np.sqrt(np.sum(move_reshaped**2, axis=1))
1086
+ longest_step = np.max(step_lengths)
1087
+ else:
1088
+ longest_step = np.linalg.norm(move_vector)
822
1089
 
823
- return [mixed_mode_idx]
824
-
1090
+ if longest_step > maxstep:
1091
+ scale = maxstep / longest_step
1092
+ move_vector = move_vector * scale
1093
+ self.log(f"Step constrained by maxstep: {longest_step:.6f} -> {maxstep:.6f}")
1094
+
1095
+ return move_vector, np.linalg.norm(move_vector)
1096
+
825
1097
  def get_augmented_hessian(self, eigenvalues, gradient_components, alpha):
826
1098
  """
827
- Create the augmented hessian matrix for RFO calculation
1099
+ Create the augmented hessian matrix for RFO calculation.
828
1100
 
829
1101
  Parameters:
830
- eigenvalues: numpy.ndarray - Eigenvalues for the selected subspace
831
- gradient_components: numpy.ndarray - Gradient components in the selected subspace
832
- alpha: float - Alpha parameter for RS-RFO
833
-
1102
+ eigenvalues : numpy.ndarray
1103
+ Eigenvalues for the selected subspace
1104
+ gradient_components : numpy.ndarray
1105
+ Gradient components in the selected subspace
1106
+ alpha : float
1107
+ Alpha parameter for RS-RFO
1108
+
834
1109
  Returns:
835
- numpy.ndarray - Augmented Hessian matrix for RFO calculation
1110
+ numpy.ndarray
1111
+ Augmented Hessian matrix for RFO calculation
836
1112
  """
837
1113
  n = len(eigenvalues)
838
1114
  H_aug = np.zeros((n + 1, n + 1))
839
1115
 
840
- # Fill the upper-left block with eigenvalues / alpha
841
1116
  np.fill_diagonal(H_aug[:n, :n], eigenvalues / alpha)
842
1117
 
843
- # Make sure gradient_components is flattened to the right shape
844
1118
  gradient_components = np.asarray(gradient_components).flatten()
845
1119
 
846
- # Fill the upper-right and lower-left blocks with gradient components / alpha
847
1120
  H_aug[:n, n] = gradient_components / alpha
848
1121
  H_aug[n, :n] = gradient_components / alpha
849
1122
 
@@ -851,69 +1124,82 @@ class EnhancedRSPRFO:
851
1124
 
852
1125
  def solve_rfo(self, H_aug, mode="min", prev_eigvec=None):
853
1126
  """
854
- Solve the RFO equations to get the step
1127
+ Solve the RFO equations to get the step.
855
1128
 
856
1129
  Parameters:
857
- H_aug: numpy.ndarray - Augmented Hessian matrix
858
- mode: str - "min" for energy minimization, "max" for maximization
859
- prev_eigvec: numpy.ndarray - Previous eigenvector for consistent direction
860
-
1130
+ H_aug : numpy.ndarray
1131
+ Augmented Hessian matrix
1132
+ mode : str
1133
+ "min" for energy minimization, "max" for maximization
1134
+ prev_eigvec : numpy.ndarray
1135
+ Previous eigenvector for consistent direction
1136
+
861
1137
  Returns:
862
- tuple - (step, eigenvalue, nu parameter, eigenvector)
1138
+ tuple
1139
+ (step, eigenvalue, nu parameter, eigenvector)
863
1140
  """
864
1141
  eigvals, eigvecs = np.linalg.eigh(H_aug)
865
1142
 
866
1143
  if mode == "min":
867
1144
  idx = np.argmin(eigvals)
868
- else: # mode == "max"
1145
+ else:
869
1146
  idx = np.argmax(eigvals)
870
-
871
- # Check if we need to flip the eigenvector to maintain consistency
1147
+
872
1148
  if prev_eigvec is not None:
873
1149
  try:
874
- overlap = np.dot(eigvecs[:, idx], prev_eigvec)
875
- if overlap < 0:
876
- eigvecs[:, idx] *= -1
877
- except Exception as e:
878
- # Handle dimension mismatch or other errors
879
- self.log(f"Error in eigenvector consistency check: {str(e)}")
880
- # Continue without flipping
881
-
1150
+ if prev_eigvec.shape == eigvecs[:, idx].shape:
1151
+ overlap = np.dot(eigvecs[:, idx], prev_eigvec)
1152
+ if overlap < 0:
1153
+ eigvecs[:, idx] *= -1
1154
+ except Exception:
1155
+ pass
1156
+
882
1157
  eigval = eigvals[idx]
883
1158
  eigvec = eigvecs[:, idx]
884
1159
 
885
- # The last component is nu
886
1160
  nu = eigvec[-1]
887
1161
 
888
- # Add safeguard against very small nu values
889
- if abs(nu) < 1e-10:
890
- self.log(f"Warning: Very small nu value: {nu}. Using safe value.")
891
- nu = np.sign(nu) * max(1e-10, abs(nu))
1162
+ if abs(nu) < 1e-12:
1163
+ self.log(f"Warning: Very small nu={nu:.2e}, using safe value")
1164
+ nu = np.sign(nu) * 1e-12 if nu != 0 else 1e-12
892
1165
 
893
- # The step is -p/nu where p are the first n components of the eigenvector
894
1166
  step = -eigvec[:-1] / nu
895
1167
 
896
1168
  return step, eigval, nu, eigvec
897
1169
 
898
1170
  def rfo_model(self, gradient, hessian, step):
899
1171
  """
900
- Estimate energy change based on RFO model
1172
+ Estimate energy change based on RFO model.
901
1173
 
902
1174
  Parameters:
903
- gradient: numpy.ndarray - Energy gradient
904
- hessian: numpy.ndarray - Hessian matrix
905
- step: numpy.ndarray - Step vector
906
-
1175
+ gradient : numpy.ndarray
1176
+ Energy gradient
1177
+ hessian : numpy.ndarray
1178
+ Hessian matrix
1179
+ step : numpy.ndarray
1180
+ Step vector
1181
+
907
1182
  Returns:
908
- float - Predicted energy change
1183
+ float
1184
+ Predicted energy change
909
1185
  """
910
- return np.dot(gradient, step) + 0.5 * np.dot(step, np.dot(hessian, step))
1186
+ g = gradient.flatten()
1187
+ s = step.flatten()
1188
+ return np.dot(g, s) + 0.5 * np.dot(s, hessian @ s)
911
1189
 
912
1190
  def update_hessian(self, current_geom, current_grad, previous_geom, previous_grad):
913
1191
  """
914
- Update the Hessian using the specified update method.
915
- WARNING: This version FORCES the update even if dot_product <= 0,
916
- which may lead to numerical instability or crashes.
1192
+ Update the Hessian using the specified update method with curvature checks.
1193
+
1194
+ Parameters:
1195
+ current_geom : numpy.ndarray
1196
+ Current geometry
1197
+ current_grad : numpy.ndarray
1198
+ Current gradient
1199
+ previous_geom : numpy.ndarray
1200
+ Previous geometry
1201
+ previous_grad : numpy.ndarray
1202
+ Previous gradient
917
1203
  """
918
1204
  displacement = np.asarray(current_geom - previous_geom).reshape(-1, 1)
919
1205
  delta_grad = np.asarray(current_grad - previous_grad).reshape(-1, 1)
@@ -921,136 +1207,157 @@ class EnhancedRSPRFO:
921
1207
  disp_norm = np.linalg.norm(displacement)
922
1208
  grad_diff_norm = np.linalg.norm(delta_grad)
923
1209
 
924
- # This is a pre-check from the original code, kept for safety
925
1210
  if disp_norm < 1e-10 or grad_diff_norm < 1e-10:
926
- self.log("Skipping Hessian update due to small changes")
1211
+ self.log("Skipping Hessian update: changes too small")
927
1212
  return
928
-
1213
+
929
1214
  dot_product = np.dot(displacement.T, delta_grad)[0, 0]
930
1215
 
931
- # === [IMPROVEMENT 3] Selective Hessian update ===
932
- # Uncomment the following lines if should_update_hessian method is implemented
933
- # if not self.should_update_hessian(displacement, delta_grad, dot_product):
934
- # return
935
- # === [END IMPROVEMENT 3] ===
936
-
937
- # === [MODIFICATION] Safety check removed per user request ===
938
- if dot_product <= 0:
939
- self.log(f"WARNING: Forcing Hessian update despite poor alignment (dot_product={dot_product:.6f}).", force=True)
940
- self.log("This may cause instability or errors in the update function.", force=True)
941
- # =======================================================
942
- else:
943
- self.log(f"Hessian update: displacement norm={disp_norm:.6f}, gradient diff norm={grad_diff_norm:.6f}, dot product={dot_product:.6f}")
1216
+ curvature_ratio = dot_product / (disp_norm * grad_diff_norm)
1217
+
1218
+ self.log(f"Hessian update: |disp|={disp_norm:.6f}, |dgrad|={grad_diff_norm:.6f}, "
1219
+ f"dot={dot_product:.6f}, curvature_ratio={curvature_ratio:.4f}")
1220
+
1221
+ if abs(curvature_ratio) < 0.01:
1222
+ self.log("Warning: Very poor displacement-gradient alignment, proceeding with caution")
944
1223
 
945
1224
  method_key_lower = self.hessian_update_method.lower()
946
1225
  method_name, update_function = self.default_update_method
947
- found_method = False
948
-
1226
+
949
1227
  for key, name, func in self.updater_dispatch_list:
950
1228
  if key in method_key_lower:
951
1229
  method_name = name
952
1230
  update_function = func
953
- found_method = True
954
1231
  break
955
-
956
- if not found_method:
957
- self.log(f"Unknown Hessian update method: {self.hessian_update_method}. Using auto selection.")
958
1232
 
959
- self.log(f"Hessian update method: {method_name}")
1233
+ self.log(f"Using Hessian update method: {method_name}")
960
1234
 
961
1235
  try:
962
- delta_hess = update_function(
963
- self.hessian, displacement, delta_grad
964
- )
965
- self.hessian += delta_hess
966
- self.hessian = 0.5 * (self.hessian + self.hessian.T)
967
- self.log("Hessian update attempted.")
1236
+ old_hessian = self.hessian.copy()
1237
+
1238
+ delta_hess = update_function(self.hessian, displacement, delta_grad)
1239
+ new_hessian = self.hessian + delta_hess
1240
+ new_hessian = 0.5 * (new_hessian + new_hessian.T)
1241
+
1242
+ new_eigvals = np.linalg.eigvalsh(new_hessian)
1243
+
1244
+ n_neg = np.sum(new_eigvals < -1e-8)
1245
+ max_eigval = np.max(np.abs(new_eigvals))
1246
+
1247
+ if max_eigval > 1e6:
1248
+ self.log(f"Warning: Updated Hessian has very large eigenvalues ({max_eigval:.2e}), "
1249
+ "reverting to previous Hessian")
1250
+ return
1251
+
1252
+ if self.saddle_order > 0 and n_neg == 0:
1253
+ self.log(f"Warning: No negative eigenvalues after update (expected {self.saddle_order})")
1254
+
1255
+ self.hessian = new_hessian
1256
+ self.log(f"Hessian updated successfully ({n_neg} negative eigenvalues)")
968
1257
 
969
1258
  except Exception as e:
970
- self.log(f"ERROR during forced Hessian update ({method_name}): {e}", force=True)
971
- self.log("Hessian may be corrupted. Proceeding with caution.", force=True)
1259
+ self.log(f"Error in Hessian update: {e}")
1260
+ self.log("Keeping previous Hessian")
1261
+
1262
+ def should_update_hessian(self, displacement, delta_grad, dot_product):
1263
+ """
1264
+ Determine whether to update the Hessian based on quality metrics.
1265
+
1266
+ Parameters:
1267
+ displacement : numpy.ndarray
1268
+ Geometry displacement vector
1269
+ delta_grad : numpy.ndarray
1270
+ Gradient difference vector
1271
+ dot_product : float
1272
+ Dot product of displacement and gradient difference
1273
+
1274
+ Returns:
1275
+ bool
1276
+ True if Hessian should be updated
1277
+ """
1278
+ disp_norm = np.linalg.norm(displacement)
1279
+ grad_norm = np.linalg.norm(delta_grad)
1280
+
1281
+ if disp_norm < 1e-10 or grad_norm < 1e-10:
1282
+ return False
1283
+
1284
+ cos_angle = dot_product / (disp_norm * grad_norm)
1285
+
1286
+ if self.saddle_order == 0 and dot_product < 0:
1287
+ self.log(f"Skipping update: negative curvature in minimization (cos={cos_angle:.4f})")
1288
+ return False
1289
+
1290
+ if abs(cos_angle) < 0.001:
1291
+ self.log(f"Skipping update: nearly orthogonal vectors (cos={cos_angle:.4f})")
1292
+ return False
1293
+
1294
+ return True
972
1295
 
973
1296
  def log(self, message, force=False):
974
1297
  """
975
- Print log message if display flag is enabled or force is True
1298
+ Print log message if display flag is enabled.
976
1299
 
977
1300
  Parameters:
978
- message: str - Message to display
979
- force: bool - If True, display message regardless of display_flag
1301
+ message : str
1302
+ Message to display
1303
+ force : bool
1304
+ If True, display message regardless of display_flag
980
1305
  """
981
1306
  if self.display_flag or force:
982
1307
  print(message)
983
1308
 
984
1309
  def set_hessian(self, hessian):
985
1310
  """
986
- Set the Hessian matrix
1311
+ Set the Hessian matrix.
987
1312
 
988
1313
  Parameters:
989
- hessian: numpy.ndarray - Hessian matrix
1314
+ hessian : numpy.ndarray
1315
+ Hessian matrix
990
1316
  """
991
- self.hessian = hessian
992
- return
1317
+ self.hessian = np.asarray(hessian).copy()
1318
+ self.hessian = 0.5 * (self.hessian + self.hessian.T) # Ensure symmetry
993
1319
 
994
1320
  def set_bias_hessian(self, bias_hessian):
995
1321
  """
996
- Set the bias Hessian matrix
1322
+ Set the bias Hessian matrix.
997
1323
 
998
1324
  Parameters:
999
- bias_hessian: numpy.ndarray - Bias Hessian matrix
1325
+ bias_hessian : numpy.ndarray
1326
+ Bias Hessian matrix
1000
1327
  """
1001
- self.bias_hessian = bias_hessian
1002
- return
1328
+ self.bias_hessian = np.asarray(bias_hessian).copy()
1329
+ self.bias_hessian = 0.5 * (self.bias_hessian + self.bias_hessian.T)
1003
1330
 
1004
1331
  def get_hessian(self):
1005
1332
  """
1006
- Get the current Hessian matrix
1333
+ Get the current Hessian matrix.
1007
1334
 
1008
1335
  Returns:
1009
- numpy.ndarray - Hessian matrix
1336
+ numpy.ndarray
1337
+ Hessian matrix
1010
1338
  """
1011
1339
  return self.hessian
1012
1340
 
1013
1341
  def get_bias_hessian(self):
1014
1342
  """
1015
- Get the current bias Hessian matrix
1343
+ Get the current bias Hessian matrix.
1016
1344
 
1017
1345
  Returns:
1018
- numpy.ndarray - Bias Hessian matrix
1346
+ numpy.ndarray
1347
+ Bias Hessian matrix
1019
1348
  """
1020
1349
  return self.bias_hessian
1021
-
1022
- def reset_trust_radius(self):
1023
- """
1024
- Reset trust radius to its initial value
1025
- """
1026
- self.trust_radius = self.trust_radius_initial
1027
- self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}")
1028
-
1029
- def set_trust_radius(self, radius):
1030
- """
1031
- Manually set the trust radius
1032
-
1033
- Parameters:
1034
- radius: float - New trust radius value
1035
- """
1036
- old_value = self.trust_radius
1037
- self.trust_radius = max(min(radius, self.trust_radius_max), self.trust_radius_min)
1038
- self.log(f"Trust radius manually set from {old_value:.6f} to {self.trust_radius:.6f}")
1039
-
1040
- def get_reduction_ratios(self):
1350
+
1351
+ def get_shifted_hessian(self):
1041
1352
  """
1042
- Get the history of reduction ratios
1353
+ Get the eigenvalue-shifted Hessian matrix.
1043
1354
 
1044
1355
  Returns:
1045
- list - Reduction ratios for each iteration
1356
+ numpy.ndarray
1357
+ Shifted Hessian matrix (or None if not computed)
1046
1358
  """
1047
- return self.reduction_ratios
1359
+ return self.shifted_hessian
1048
1360
 
1049
- def get_trust_radius_history(self):
1050
- """
1051
- Get the history of trust radius values
1052
-
1053
- Returns:
1054
- list - Trust radius values for each iteration
1055
- """
1056
- return self.trust_radius_history
1361
+ def reset_trust_radius(self):
1362
+ self.trust_radius = self.trust_radius_initial
1363
+ self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}", force=True)