MultiOptPy 1.20.5__py3-none-any.whl → 1.20.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/MD/thermostat.py +236 -123
- multioptpy/ModelHessian/fischerd3.py +240 -295
- multioptpy/Optimizer/rsirfo.py +112 -4
- multioptpy/Optimizer/rsprfo.py +1005 -698
- multioptpy/entrypoints.py +406 -16
- multioptpy/moleculardynamics.py +21 -13
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/METADATA +9 -9
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/RECORD +12 -12
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/WHEEL +1 -1
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/entry_points.txt +0 -0
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/licenses/LICENSE +0 -0
- {multioptpy-1.20.5.dist-info → multioptpy-1.20.6.dist-info}/top_level.txt +0 -0
multioptpy/Optimizer/rsprfo.py
CHANGED
|
@@ -8,53 +8,132 @@ from multioptpy.Utils.calc_tools import Calculationtools
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class EnhancedRSPRFO:
|
|
11
|
+
"""
|
|
12
|
+
Enhanced Rational Step P-RFO (Rational Function Optimization) for transition state searches
|
|
13
|
+
with dynamic trust radius adjustment based on trust region methodology.
|
|
14
|
+
|
|
15
|
+
Key Improvements:
|
|
16
|
+
- Improved Levenberg-Marquardt-style alpha solver with backtracking
|
|
17
|
+
- Enhanced trust region adjustment with asymmetric expansion/contraction
|
|
18
|
+
- Robust Hessian update with curvature condition checks
|
|
19
|
+
- Improved mode following with overlap matrix tracking
|
|
20
|
+
- Step rejection mechanism for poor quality steps
|
|
21
|
+
- Hessian eigenvalue shifting for proper TS curvature
|
|
22
|
+
- Comprehensive step quality metrics and diagnostics
|
|
23
|
+
- Gradient-based step scaling for near-convergence behavior
|
|
24
|
+
|
|
25
|
+
References:
|
|
26
|
+
[1] Banerjee et al., Phys. Chem., 89, 52-57 (1985)
|
|
27
|
+
[2] Heyden et al., J. Chem. Phys., 123, 224101 (2005)
|
|
28
|
+
[3] Baker, J. Comput. Chem., 7, 385-395 (1986)
|
|
29
|
+
[4] Besalú and Bofill, Theor. Chem. Acc., 100, 265-274 (1998)
|
|
30
|
+
[5] Jensen and Jørgensen, J. Chem. Phys., 80, 1204 (1984) [Eigenvector following]
|
|
31
|
+
[6] Yuan, SIAM J. Optim. 11, 325-357 (2000) [Trust region methods]
|
|
32
|
+
[7] Nocedal and Wright, Numerical Optimization, 2nd ed. (2006) [Trust region]
|
|
33
|
+
|
|
34
|
+
This code is made based on:
|
|
35
|
+
1. https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/RSPRFOptimizer.py
|
|
36
|
+
"""
|
|
37
|
+
|
|
11
38
|
def __init__(self, **config):
|
|
12
39
|
"""
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
40
|
+
Initialize the Enhanced RS-PRFO optimizer.
|
|
41
|
+
|
|
42
|
+
Parameters (via config dict):
|
|
43
|
+
-----------------------------
|
|
44
|
+
alpha0 : float
|
|
45
|
+
Initial alpha parameter for RS-PRFO (default: 1.0)
|
|
46
|
+
max_micro_cycles : int
|
|
47
|
+
Maximum number of micro-iterations for alpha adjustment (default: 50)
|
|
48
|
+
saddle_order : int
|
|
49
|
+
Number of negative eigenvalues at the saddle point (default: 1)
|
|
50
|
+
method : str
|
|
51
|
+
Hessian update method (default: "auto")
|
|
52
|
+
display_flag : bool
|
|
53
|
+
Enable/disable logging output (default: True)
|
|
54
|
+
debug : bool
|
|
55
|
+
Enable detailed debug output (default: False)
|
|
56
|
+
trust_radius : float
|
|
57
|
+
Initial trust radius (default: 0.1 for TS, 0.5 for min)
|
|
58
|
+
trust_radius_max : float
|
|
59
|
+
Maximum allowed trust radius (default: same as initial)
|
|
60
|
+
trust_radius_min : float
|
|
61
|
+
Minimum allowed trust radius (default: 0.01)
|
|
62
|
+
adapt_trust_radius : bool
|
|
63
|
+
Enable dynamic trust radius adjustment (default: True)
|
|
64
|
+
mode_following : bool
|
|
65
|
+
Enable mode following for consistent TS mode tracking (default: True)
|
|
66
|
+
eigvec_following : bool
|
|
67
|
+
Enable eigenvector following for mode mixing (default: True)
|
|
68
|
+
overlap_threshold : float
|
|
69
|
+
Minimum overlap for mode identification (default: 0.5)
|
|
70
|
+
step_rejection : bool
|
|
71
|
+
Enable step rejection for very poor steps (default: True)
|
|
72
|
+
rejection_threshold : float
|
|
73
|
+
Reduction ratio threshold below which steps are rejected (default: -0.5)
|
|
74
|
+
hessian_shift_enabled : bool
|
|
75
|
+
Enable Hessian eigenvalue shifting (default: True)
|
|
76
|
+
min_positive_eigval : float
|
|
77
|
+
Minimum positive eigenvalue after shifting (default: 0.005)
|
|
78
|
+
gradient_scaling_enabled : bool
|
|
79
|
+
Enable gradient-based step scaling near convergence (default: True)
|
|
80
|
+
gradient_scaling_threshold : float
|
|
81
|
+
Gradient norm threshold below which scaling is applied (default: 0.001)
|
|
28
82
|
"""
|
|
29
83
|
# Standard RSPRFO parameters
|
|
30
84
|
self.alpha0 = config.get("alpha0", 1.0)
|
|
31
|
-
self.max_micro_cycles = config.get("max_micro_cycles",
|
|
85
|
+
self.max_micro_cycles = config.get("max_micro_cycles", 50)
|
|
32
86
|
self.saddle_order = config.get("saddle_order", 1)
|
|
33
87
|
self.hessian_update_method = config.get("method", "auto")
|
|
34
88
|
self.display_flag = config.get("display_flag", True)
|
|
35
89
|
self.debug = config.get("debug", False)
|
|
36
90
|
|
|
37
91
|
# Alpha constraints to prevent numerical instability
|
|
38
|
-
self.alpha_max = config.get("alpha_max",
|
|
39
|
-
self.
|
|
92
|
+
self.alpha_max = config.get("alpha_max", 1e8)
|
|
93
|
+
self.alpha_min = config.get("alpha_min", 1e-8)
|
|
94
|
+
self.alpha_step_max = config.get("alpha_step_max", 100.0)
|
|
95
|
+
|
|
96
|
+
# Micro-cycle convergence criteria
|
|
97
|
+
self.micro_cycle_rtol = config.get("micro_cycle_rtol", 1e-3)
|
|
98
|
+
self.micro_cycle_atol = config.get("micro_cycle_atol", 1e-6)
|
|
40
99
|
|
|
41
100
|
# Trust region parameters
|
|
42
101
|
if self.saddle_order == 0:
|
|
43
102
|
self.trust_radius_initial = config.get("trust_radius", 0.5)
|
|
44
|
-
self.trust_radius_max = config.get("trust_radius_max", 0.5)
|
|
103
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.5)
|
|
45
104
|
else:
|
|
46
105
|
self.trust_radius_initial = config.get("trust_radius", 0.1)
|
|
47
|
-
self.trust_radius_max = config.get("trust_radius_max", 0.
|
|
106
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.3)
|
|
48
107
|
|
|
49
|
-
self.trust_radius = self.trust_radius_initial
|
|
50
|
-
self.trust_radius_min = config.get("trust_radius_min", 0.01)
|
|
51
|
-
|
|
52
|
-
# Trust region acceptance thresholds
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
self.
|
|
57
|
-
self.
|
|
108
|
+
self.trust_radius = self.trust_radius_initial
|
|
109
|
+
self.trust_radius_min = config.get("trust_radius_min", 0.01)
|
|
110
|
+
|
|
111
|
+
# Trust region acceptance thresholds (based on Nocedal & Wright)
|
|
112
|
+
self.eta_1 = config.get("eta_1", 0.1)
|
|
113
|
+
self.eta_2 = config.get("eta_2", 0.25)
|
|
114
|
+
self.eta_3 = config.get("eta_3", 0.75)
|
|
115
|
+
self.gamma_1 = config.get("gamma_1", 0.25)
|
|
116
|
+
self.gamma_2 = config.get("gamma_2", 2.0)
|
|
117
|
+
|
|
118
|
+
# Step rejection settings
|
|
119
|
+
self.step_rejection_enabled = config.get("step_rejection", True)
|
|
120
|
+
self.rejection_threshold = config.get("rejection_threshold", -0.5)
|
|
121
|
+
self.max_consecutive_rejections = config.get("max_consecutive_rejections", 3)
|
|
122
|
+
self.consecutive_rejections = 0
|
|
123
|
+
|
|
124
|
+
# Hessian eigenvalue shifting - IMPROVED: smaller minimum to avoid over-shifting
|
|
125
|
+
self.hessian_shift_enabled = config.get("hessian_shift_enabled", True)
|
|
126
|
+
self.min_positive_eigval = config.get("min_positive_eigval", 0.001)
|
|
127
|
+
self.min_negative_eigval = config.get("min_negative_eigval", -0.001)
|
|
128
|
+
|
|
129
|
+
# NEW: Gradient-based step scaling for near-convergence
|
|
130
|
+
self.gradient_scaling_enabled = config.get("gradient_scaling_enabled", True)
|
|
131
|
+
self.gradient_scaling_threshold = config.get("gradient_scaling_threshold", 0.001)
|
|
132
|
+
self.min_step_scale = config.get("min_step_scale", 0.1) # Minimum scaling factor
|
|
133
|
+
|
|
134
|
+
# NEW: Adaptive trust radius based on gradient magnitude
|
|
135
|
+
self.adaptive_trust_enabled = config.get("adaptive_trust_enabled", True)
|
|
136
|
+
self.gradient_trust_coupling = config.get("gradient_trust_coupling", 0.5)
|
|
58
137
|
|
|
59
138
|
# Whether to use trust radius adaptation
|
|
60
139
|
self.adapt_trust_radius = config.get("adapt_trust_radius", True)
|
|
@@ -67,6 +146,7 @@ class EnhancedRSPRFO:
|
|
|
67
146
|
# Hessian-related variables
|
|
68
147
|
self.hessian = None
|
|
69
148
|
self.bias_hessian = None
|
|
149
|
+
self.shifted_hessian = None
|
|
70
150
|
|
|
71
151
|
# Optimization tracking variables
|
|
72
152
|
self.prev_eigvec_max = None
|
|
@@ -75,15 +155,22 @@ class EnhancedRSPRFO:
|
|
|
75
155
|
self.actual_energy_changes = []
|
|
76
156
|
self.reduction_ratios = []
|
|
77
157
|
self.trust_radius_history = []
|
|
158
|
+
self.step_quality_history = []
|
|
78
159
|
self.prev_geometry = None
|
|
79
160
|
self.prev_gradient = None
|
|
80
161
|
self.prev_energy = None
|
|
81
162
|
self.prev_move_vector = None
|
|
82
163
|
|
|
164
|
+
# Step rejection tracking
|
|
165
|
+
self.rejected_step_geometry = None
|
|
166
|
+
self.rejected_step_gradient = None
|
|
167
|
+
|
|
83
168
|
# Mode Following specific parameters
|
|
84
169
|
self.mode_following_enabled = config.get("mode_following", True)
|
|
85
|
-
self.eigvec_history = []
|
|
86
|
-
self.
|
|
170
|
+
self.eigvec_history = []
|
|
171
|
+
self.eigval_history = []
|
|
172
|
+
self.ts_mode_idx = None
|
|
173
|
+
self.ts_mode_eigvec = None
|
|
87
174
|
|
|
88
175
|
# Eigenvector Following settings
|
|
89
176
|
self.eigvec_following = config.get("eigvec_following", True)
|
|
@@ -104,7 +191,7 @@ class EnhancedRSPRFO:
|
|
|
104
191
|
f"bounds=[{self.trust_radius_min:.6f}, {self.trust_radius_max:.6f}]")
|
|
105
192
|
|
|
106
193
|
def _build_hessian_updater_list(self):
|
|
107
|
-
"""Builds the prioritized dispatch list for Hessian updaters
|
|
194
|
+
"""Builds the prioritized dispatch list for Hessian updaters."""
|
|
108
195
|
self.default_update_method = (
|
|
109
196
|
"auto (default)",
|
|
110
197
|
lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")
|
|
@@ -136,714 +223,900 @@ class EnhancedRSPRFO:
|
|
|
136
223
|
("psb", "psb", self.hessian_updater.PSB_hessian_update),
|
|
137
224
|
("msp", "msp", self.hessian_updater.MSP_hessian_update),
|
|
138
225
|
]
|
|
139
|
-
|
|
140
|
-
def
|
|
226
|
+
|
|
227
|
+
def _project_grad_tr_rot(self, gradient, geometry):
|
|
141
228
|
"""
|
|
142
|
-
|
|
229
|
+
Project out translation and rotation components from the gradient.
|
|
230
|
+
Uses QR decomposition for orthonormalization.
|
|
143
231
|
|
|
144
232
|
Parameters:
|
|
145
|
-
gradient: numpy.ndarray
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
233
|
+
gradient : numpy.ndarray
|
|
234
|
+
Gradient vector to project
|
|
235
|
+
geometry : numpy.ndarray
|
|
236
|
+
Current geometry coordinates
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
numpy.ndarray
|
|
240
|
+
Projected gradient with TR/ROT components removed
|
|
241
|
+
"""
|
|
242
|
+
coords = geometry.reshape(-1, 3)
|
|
243
|
+
n_atoms = coords.shape[0]
|
|
244
|
+
|
|
245
|
+
if n_atoms < 3:
|
|
246
|
+
return gradient
|
|
247
|
+
|
|
248
|
+
center = np.mean(coords, axis=0)
|
|
249
|
+
coords_centered = coords - center
|
|
250
|
+
|
|
251
|
+
basis = []
|
|
252
|
+
|
|
253
|
+
# Translation (x, y, z)
|
|
254
|
+
basis.append(np.tile([1, 0, 0], n_atoms))
|
|
255
|
+
basis.append(np.tile([0, 1, 0], n_atoms))
|
|
256
|
+
basis.append(np.tile([0, 0, 1], n_atoms))
|
|
257
|
+
|
|
258
|
+
# Rotation (Rx, Ry, Rz via cross product)
|
|
259
|
+
rx = np.zeros_like(coords)
|
|
260
|
+
rx[:, 1] = -coords_centered[:, 2]
|
|
261
|
+
rx[:, 2] = coords_centered[:, 1]
|
|
262
|
+
basis.append(rx.flatten())
|
|
263
|
+
|
|
264
|
+
ry = np.zeros_like(coords)
|
|
265
|
+
ry[:, 0] = coords_centered[:, 2]
|
|
266
|
+
ry[:, 2] = -coords_centered[:, 0]
|
|
267
|
+
basis.append(ry.flatten())
|
|
268
|
+
|
|
269
|
+
rz = np.zeros_like(coords)
|
|
270
|
+
rz[:, 0] = -coords_centered[:, 1]
|
|
271
|
+
rz[:, 1] = coords_centered[:, 0]
|
|
272
|
+
basis.append(rz.flatten())
|
|
149
273
|
|
|
274
|
+
A = np.array(basis).T
|
|
275
|
+
Q, R = np.linalg.qr(A, mode='reduced')
|
|
276
|
+
|
|
277
|
+
diag_R = np.abs(np.diag(R))
|
|
278
|
+
valid_cols = diag_R > 1e-10
|
|
279
|
+
Q = Q[:, valid_cols]
|
|
280
|
+
|
|
281
|
+
overlaps = np.dot(Q.T, gradient)
|
|
282
|
+
tr_rot_part = np.dot(Q, overlaps)
|
|
283
|
+
projected_gradient = gradient - tr_rot_part
|
|
284
|
+
|
|
285
|
+
return projected_gradient
|
|
286
|
+
|
|
287
|
+
def _shift_hessian_eigenvalues(self, hessian, eigvals, eigvecs):
|
|
288
|
+
"""
|
|
289
|
+
Shift Hessian eigenvalues to ensure proper curvature for TS search.
|
|
290
|
+
|
|
291
|
+
IMPROVED: More conservative shifting to avoid over-constraining
|
|
292
|
+
small eigenvalues that correspond to soft modes.
|
|
293
|
+
|
|
294
|
+
For saddle_order > 0:
|
|
295
|
+
- First `saddle_order` eigenvalues should be negative
|
|
296
|
+
- Remaining eigenvalues should be positive (but allow small values)
|
|
297
|
+
|
|
298
|
+
For saddle_order == 0 (minimization):
|
|
299
|
+
- All eigenvalues should be positive
|
|
300
|
+
|
|
301
|
+
Parameters:
|
|
302
|
+
hessian : numpy.ndarray
|
|
303
|
+
Original Hessian matrix
|
|
304
|
+
eigvals : numpy.ndarray
|
|
305
|
+
Eigenvalues of the Hessian
|
|
306
|
+
eigvecs : numpy.ndarray
|
|
307
|
+
Eigenvectors of the Hessian
|
|
308
|
+
|
|
150
309
|
Returns:
|
|
151
|
-
|
|
310
|
+
tuple
|
|
311
|
+
(shifted_hessian, shifted_eigvals, shift_applied)
|
|
312
|
+
"""
|
|
313
|
+
if not self.hessian_shift_enabled:
|
|
314
|
+
return hessian, eigvals, False
|
|
315
|
+
|
|
316
|
+
n = len(eigvals)
|
|
317
|
+
shifted_eigvals = eigvals.copy()
|
|
318
|
+
shift_applied = False
|
|
319
|
+
|
|
320
|
+
if self.saddle_order == 0:
|
|
321
|
+
# Minimization: all eigenvalues should be positive
|
|
322
|
+
min_eigval = np.min(eigvals)
|
|
323
|
+
if min_eigval < self.min_positive_eigval:
|
|
324
|
+
shift = self.min_positive_eigval - min_eigval
|
|
325
|
+
shifted_eigvals = eigvals + shift
|
|
326
|
+
shift_applied = True
|
|
327
|
+
self.log(f"Applied eigenvalue shift of {shift:.6f} for minimization")
|
|
328
|
+
else:
|
|
329
|
+
# TS search: need exactly saddle_order negative eigenvalues
|
|
330
|
+
sorted_indices = np.argsort(eigvals)
|
|
331
|
+
|
|
332
|
+
# Ensure first saddle_order eigenvalues are sufficiently negative
|
|
333
|
+
for i in range(self.saddle_order):
|
|
334
|
+
idx = sorted_indices[i]
|
|
335
|
+
if eigvals[idx] > self.min_negative_eigval:
|
|
336
|
+
shifted_eigvals[idx] = self.min_negative_eigval
|
|
337
|
+
shift_applied = True
|
|
338
|
+
|
|
339
|
+
# IMPROVED: Only shift eigenvalues that are very close to zero or negative
|
|
340
|
+
# when they should be positive. Don't shift already positive eigenvalues
|
|
341
|
+
# to a higher minimum unless they are problematically small.
|
|
342
|
+
for i in range(self.saddle_order, n):
|
|
343
|
+
idx = sorted_indices[i]
|
|
344
|
+
# Only shift if eigenvalue is negative or very close to zero
|
|
345
|
+
if eigvals[idx] < 1e-6: # Much smaller threshold
|
|
346
|
+
shifted_eigvals[idx] = self.min_positive_eigval
|
|
347
|
+
shift_applied = True
|
|
348
|
+
|
|
349
|
+
if shift_applied:
|
|
350
|
+
shifted_hessian = eigvecs @ np.diag(shifted_eigvals) @ eigvecs.T
|
|
351
|
+
shifted_hessian = 0.5 * (shifted_hessian + shifted_hessian.T)
|
|
352
|
+
self.log(f"Hessian eigenvalues shifted for proper curvature")
|
|
353
|
+
return shifted_hessian, shifted_eigvals, True
|
|
354
|
+
|
|
355
|
+
return hessian, eigvals, False
|
|
356
|
+
|
|
357
|
+
def _compute_gradient_based_scale(self, gradient_norm, step_norm):
|
|
358
|
+
"""
|
|
359
|
+
Compute a scaling factor based on gradient magnitude to prevent
|
|
360
|
+
overshooting near convergence.
|
|
361
|
+
|
|
362
|
+
When the gradient is small but the step is large, this indicates
|
|
363
|
+
the Hessian may have small eigenvalues causing large steps.
|
|
364
|
+
|
|
365
|
+
Parameters:
|
|
366
|
+
gradient_norm : float
|
|
367
|
+
Norm of the current gradient
|
|
368
|
+
step_norm : float
|
|
369
|
+
Norm of the proposed step
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
float
|
|
373
|
+
Scaling factor (0 < scale <= 1)
|
|
374
|
+
"""
|
|
375
|
+
if not self.gradient_scaling_enabled:
|
|
376
|
+
return 1.0
|
|
377
|
+
|
|
378
|
+
if gradient_norm < 1e-10 or step_norm < 1e-10:
|
|
379
|
+
return 1.0
|
|
380
|
+
|
|
381
|
+
# Expected step norm based on gradient and typical curvature
|
|
382
|
+
# For a Newton step: s = -H^{-1}g, so |s| ~ |g| / |lambda_min|
|
|
383
|
+
# If |s| >> |g| / typical_curvature, we should scale down
|
|
384
|
+
|
|
385
|
+
# Use a simple heuristic: if step_norm / gradient_norm > threshold,
|
|
386
|
+
# scale the step proportionally
|
|
387
|
+
ratio = step_norm / gradient_norm
|
|
388
|
+
|
|
389
|
+
# Typical ratio for well-conditioned systems is O(1) to O(10)
|
|
390
|
+
# If ratio is very large (> 100), the Hessian likely has very small eigenvalues
|
|
391
|
+
max_ratio = 50.0 # Maximum allowed ratio
|
|
392
|
+
|
|
393
|
+
if ratio > max_ratio:
|
|
394
|
+
scale = max_ratio / ratio
|
|
395
|
+
scale = max(scale, self.min_step_scale) # Don't scale below minimum
|
|
396
|
+
self.log(f"Gradient-based scaling: ratio={ratio:.2f}, scale={scale:.4f}")
|
|
397
|
+
return scale
|
|
398
|
+
|
|
399
|
+
return 1.0
|
|
400
|
+
|
|
401
|
+
def _compute_adaptive_trust_radius(self, gradient_norm):
|
|
402
|
+
"""
|
|
403
|
+
Compute an adaptive trust radius based on gradient magnitude.
|
|
404
|
+
|
|
405
|
+
Near convergence (small gradient), the trust radius should be
|
|
406
|
+
proportional to the gradient to prevent overshooting.
|
|
407
|
+
|
|
408
|
+
Parameters:
|
|
409
|
+
gradient_norm : float
|
|
410
|
+
Norm of the current gradient
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
float
|
|
414
|
+
Suggested trust radius
|
|
415
|
+
"""
|
|
416
|
+
if not self.adaptive_trust_enabled:
|
|
417
|
+
return self.trust_radius
|
|
418
|
+
|
|
419
|
+
if gradient_norm < self.gradient_scaling_threshold:
|
|
420
|
+
# Near convergence: scale trust radius with gradient
|
|
421
|
+
# Use a linear relationship with a minimum floor
|
|
422
|
+
adaptive_radius = self.gradient_trust_coupling * gradient_norm / self.gradient_scaling_threshold * self.trust_radius_max
|
|
423
|
+
adaptive_radius = max(adaptive_radius, self.trust_radius_min)
|
|
424
|
+
adaptive_radius = min(adaptive_radius, self.trust_radius)
|
|
425
|
+
|
|
426
|
+
if adaptive_radius < self.trust_radius * 0.9: # Only log if significant change
|
|
427
|
+
self.log(f"Adaptive trust radius: {self.trust_radius:.6f} -> {adaptive_radius:.6f} "
|
|
428
|
+
f"(gradient_norm={gradient_norm:.6e})")
|
|
429
|
+
|
|
430
|
+
return adaptive_radius
|
|
431
|
+
|
|
432
|
+
return self.trust_radius
|
|
433
|
+
|
|
434
|
+
def compute_reduction_ratio(self, gradient, hessian, step, actual_reduction):
|
|
435
|
+
"""
|
|
436
|
+
Compute ratio between actual and predicted reduction in energy.
|
|
437
|
+
|
|
438
|
+
Parameters:
|
|
439
|
+
gradient : numpy.ndarray
|
|
440
|
+
Current gradient
|
|
441
|
+
hessian : numpy.ndarray
|
|
442
|
+
Current approximate Hessian
|
|
443
|
+
step : numpy.ndarray
|
|
444
|
+
Step vector
|
|
445
|
+
actual_reduction : float
|
|
446
|
+
Actual energy reduction (previous_energy - current_energy)
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
float
|
|
450
|
+
Ratio of actual to predicted reduction
|
|
152
451
|
"""
|
|
153
|
-
# Calculate predicted reduction from quadratic model
|
|
154
452
|
g_flat = gradient.flatten()
|
|
155
453
|
step_flat = step.flatten()
|
|
156
454
|
|
|
157
|
-
# Linear term of the model: g^T * p
|
|
158
455
|
linear_term = np.dot(g_flat, step_flat)
|
|
159
|
-
|
|
160
|
-
# Quadratic term of the model: 0.5 * p^T * H * p
|
|
161
456
|
quadratic_term = 0.5 * np.dot(step_flat, np.dot(hessian, step_flat))
|
|
162
|
-
|
|
163
|
-
# Predicted reduction: -g^T * p - 0.5 * p^T * H * p
|
|
164
|
-
# Negative sign because we're predicting the reduction (energy decrease)
|
|
165
457
|
predicted_reduction = -(linear_term + quadratic_term)
|
|
166
458
|
|
|
167
|
-
|
|
168
|
-
if abs(predicted_reduction) < 1e-10:
|
|
459
|
+
if abs(predicted_reduction) < 1e-14:
|
|
169
460
|
self.log("Warning: Predicted reduction is near zero")
|
|
170
|
-
return 0.0
|
|
461
|
+
return 1.0 if abs(actual_reduction) < 1e-14 else 0.0
|
|
171
462
|
|
|
172
|
-
# Calculate ratio
|
|
173
463
|
ratio = actual_reduction / predicted_reduction
|
|
174
464
|
|
|
175
|
-
# Safeguard against numerical issues
|
|
176
465
|
if not np.isfinite(ratio):
|
|
177
466
|
self.log("Warning: Non-finite reduction ratio, using 0.0")
|
|
178
467
|
return 0.0
|
|
179
468
|
|
|
180
|
-
self.log(f"
|
|
181
|
-
f"
|
|
182
|
-
f"Ratio: {ratio:.4f}")
|
|
469
|
+
self.log(f"Reduction ratio: actual={actual_reduction:.6e}, "
|
|
470
|
+
f"predicted={predicted_reduction:.6e}, ratio={ratio:.4f}")
|
|
183
471
|
|
|
184
472
|
return ratio
|
|
185
473
|
|
|
186
|
-
def adjust_trust_radius(self,
|
|
474
|
+
def adjust_trust_radius(self, ratio, step_norm, at_boundary):
|
|
187
475
|
"""
|
|
188
|
-
Dynamically adjust the trust radius based on ratio
|
|
189
|
-
|
|
476
|
+
Dynamically adjust the trust radius based on reduction ratio.
|
|
477
|
+
Uses Nocedal & Wright's trust region update strategy.
|
|
478
|
+
|
|
479
|
+
Parameters:
|
|
480
|
+
ratio : float
|
|
481
|
+
Reduction ratio (actual/predicted)
|
|
482
|
+
step_norm : float
|
|
483
|
+
Norm of the current step
|
|
484
|
+
at_boundary : bool
|
|
485
|
+
Whether the step is at the trust region boundary
|
|
190
486
|
"""
|
|
191
|
-
if not self.adapt_trust_radius
|
|
192
|
-
return
|
|
193
|
-
|
|
194
|
-
# Avoid division by zero or very small numbers
|
|
195
|
-
if abs(predicted_energy_change) < 1e-10:
|
|
196
|
-
self.log("Skipping trust radius update due to negligible predicted energy change")
|
|
487
|
+
if not self.adapt_trust_radius:
|
|
197
488
|
return
|
|
198
489
|
|
|
199
|
-
# Calculate the ratio between actual and predicted energy changes
|
|
200
|
-
# Use absolute values to focus on magnitude of agreement
|
|
201
|
-
ratio = abs(actual_energy_change / predicted_energy_change)
|
|
202
|
-
self.log(f"Raw reduction ratio: {actual_energy_change / predicted_energy_change:.4f}")
|
|
203
|
-
self.log(f"Absolute reduction ratio: {ratio:.4f}")
|
|
204
|
-
self.reduction_ratios.append(ratio)
|
|
205
|
-
|
|
206
490
|
old_trust_radius = self.trust_radius
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
self.
|
|
221
|
-
|
|
222
|
-
|
|
491
|
+
self.trust_radius_history.append(old_trust_radius)
|
|
492
|
+
|
|
493
|
+
quality_metric = {
|
|
494
|
+
'iteration': self.iter,
|
|
495
|
+
'ratio': ratio,
|
|
496
|
+
'step_norm': step_norm,
|
|
497
|
+
'at_boundary': at_boundary,
|
|
498
|
+
'trust_radius': old_trust_radius
|
|
499
|
+
}
|
|
500
|
+
self.step_quality_history.append(quality_metric)
|
|
501
|
+
|
|
502
|
+
if ratio < self.eta_2:
|
|
503
|
+
self.trust_radius = max(self.gamma_1 * step_norm, self.trust_radius_min)
|
|
504
|
+
self.log(f"Poor step quality (ratio={ratio:.3f} < {self.eta_2}), "
|
|
505
|
+
f"shrinking trust radius: {old_trust_radius:.6f} -> {self.trust_radius:.6f}")
|
|
506
|
+
elif ratio > self.eta_3 and at_boundary:
|
|
507
|
+
self.trust_radius = min(self.gamma_2 * self.trust_radius, self.trust_radius_max)
|
|
508
|
+
self.log(f"Good step quality (ratio={ratio:.3f} > {self.eta_3}) at boundary, "
|
|
509
|
+
f"expanding trust radius: {old_trust_radius:.6f} -> {self.trust_radius:.6f}")
|
|
223
510
|
else:
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
def
|
|
511
|
+
self.log(f"Acceptable step quality (ratio={ratio:.3f}), "
|
|
512
|
+
f"keeping trust radius at {self.trust_radius:.6f}")
|
|
513
|
+
|
|
514
|
+
def _solve_alpha_micro_cycles(self, eigvals, gradient_trans, max_indices, min_indices, gradient_norm):
|
|
228
515
|
"""
|
|
229
|
-
|
|
516
|
+
Solve for alpha using improved micro-cycle iteration with
|
|
517
|
+
Levenberg-Marquardt style damping and backtracking.
|
|
230
518
|
|
|
231
519
|
Parameters:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
520
|
+
eigvals : numpy.ndarray
|
|
521
|
+
Eigenvalues of the Hessian
|
|
522
|
+
gradient_trans : numpy.ndarray
|
|
523
|
+
Gradient transformed to eigenvector basis
|
|
524
|
+
max_indices : list
|
|
525
|
+
Indices for maximization subspace
|
|
526
|
+
min_indices : list
|
|
527
|
+
Indices for minimization subspace
|
|
528
|
+
gradient_norm : float
|
|
529
|
+
Norm of the original gradient (for adaptive scaling)
|
|
530
|
+
|
|
243
531
|
Returns:
|
|
244
|
-
|
|
532
|
+
tuple
|
|
533
|
+
(step, step_norm, converged)
|
|
245
534
|
"""
|
|
246
|
-
self.log(f"\n{'='*50}\nIteration {self.iter}\n{'='*50}")
|
|
247
|
-
|
|
248
|
-
if self.Initialization:
|
|
249
|
-
self.prev_eigvec_max = None
|
|
250
|
-
self.prev_eigvec_min = None
|
|
251
|
-
self.predicted_energy_changes = []
|
|
252
|
-
self.actual_energy_changes = []
|
|
253
|
-
self.reduction_ratios = []
|
|
254
|
-
self.trust_radius_history = []
|
|
255
|
-
self.prev_geometry = None
|
|
256
|
-
self.prev_gradient = None
|
|
257
|
-
self.prev_energy = None
|
|
258
|
-
self.prev_move_vector = None
|
|
259
|
-
self.eigvec_history = []
|
|
260
|
-
self.ts_mode_idx = None
|
|
261
|
-
self.Initialization = False
|
|
262
|
-
self.log(f"First iteration - using initial trust radius {self.trust_radius:.6f}")
|
|
263
|
-
else:
|
|
264
|
-
# Adjust trust radius based on the previous step if we have energy data
|
|
265
|
-
if self.prev_energy is not None and len(self.predicted_energy_changes) > 0:
|
|
266
|
-
actual_energy_change = B_e - self.prev_energy
|
|
267
|
-
predicted_energy_change = self.predicted_energy_changes[-1]
|
|
268
|
-
self.actual_energy_changes.append(actual_energy_change)
|
|
269
|
-
|
|
270
|
-
# Get the previous step length
|
|
271
|
-
if len(pre_move_vector) > 0:
|
|
272
|
-
prev_step_norm = norm(pre_move_vector.flatten())
|
|
273
|
-
elif self.prev_move_vector is not None:
|
|
274
|
-
prev_step_norm = norm(self.prev_move_vector.flatten())
|
|
275
|
-
else:
|
|
276
|
-
prev_step_norm = 0.0
|
|
277
|
-
|
|
278
|
-
# Log energy comparison
|
|
279
|
-
self.log(f"Previous energy: {self.prev_energy:.6f}, Current energy: {B_e:.6f}")
|
|
280
|
-
self.log(f"Actual energy change: {actual_energy_change:.6f}")
|
|
281
|
-
self.log(f"Predicted energy change: {predicted_energy_change:.6f}")
|
|
282
|
-
self.log(f"Previous step norm: {prev_step_norm:.6f}")
|
|
283
|
-
|
|
284
|
-
# Complete Hessian for the reduction ratio calculation
|
|
285
|
-
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
286
|
-
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), display_eigval=False)
|
|
287
|
-
# Compute reduction ratio
|
|
288
|
-
reduction_ratio = self.compute_reduction_ratio(
|
|
289
|
-
self.prev_gradient, H, self.prev_move_vector, actual_energy_change)
|
|
290
|
-
|
|
291
|
-
# Adjust trust radius based on step quality and length
|
|
292
|
-
self.adjust_trust_radius(actual_energy_change, predicted_energy_change, prev_step_norm)
|
|
293
|
-
|
|
294
|
-
# Check Hessian
|
|
295
|
-
if self.hessian is None:
|
|
296
|
-
raise ValueError("Hessian matrix must be set before running optimization")
|
|
297
|
-
|
|
298
|
-
# Update Hessian if we have previous geometry and gradient information
|
|
299
|
-
if self.prev_geometry is not None and self.prev_gradient is not None and len(pre_B_g) > 0 and len(pre_geom) > 0:
|
|
300
|
-
self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
|
|
301
|
-
|
|
302
|
-
# Ensure gradient is properly shaped as a 1D array
|
|
303
|
-
gradient = np.asarray(B_g).flatten()
|
|
304
|
-
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
305
|
-
|
|
306
|
-
# Compute eigenvalues and eigenvectors of the hessian
|
|
307
|
-
eigvals, eigvecs = np.linalg.eigh(H)
|
|
308
|
-
|
|
309
|
-
# Count negative eigenvalues for diagnostic purposes
|
|
310
|
-
neg_eigval_count = np.sum(eigvals < -1e-6)
|
|
311
|
-
self.log(f"Found {neg_eigval_count} negative eigenvalues, target for this saddle order: {self.saddle_order}")
|
|
312
|
-
|
|
313
|
-
# Store previous eigenvector information
|
|
314
|
-
prev_eigvecs = None
|
|
315
|
-
if len(self.eigvec_history) > 0:
|
|
316
|
-
prev_eigvecs = self.eigvec_history[-1]
|
|
317
|
-
|
|
318
|
-
# Standard mode selection (with mode following if enabled)
|
|
319
|
-
if self.mode_following_enabled and self.saddle_order > 0:
|
|
320
|
-
if self.ts_mode_idx is None:
|
|
321
|
-
# For first run, select mode with most negative eigenvalue
|
|
322
|
-
self.ts_mode_idx = np.argmin(eigvals)
|
|
323
|
-
self.log(f"Initial TS mode selected: {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
|
|
324
|
-
|
|
325
|
-
# Find corresponding modes between steps
|
|
326
|
-
mode_indices = self.find_corresponding_mode(eigvals, eigvecs, prev_eigvecs, self.ts_mode_idx)
|
|
327
|
-
|
|
328
|
-
# Apply Eigenvector Following for cases with mode mixing
|
|
329
|
-
if self.eigvec_following and len(mode_indices) > 1:
|
|
330
|
-
mode_indices = self.apply_eigenvector_following(eigvals, eigvecs, gradient.dot(eigvecs), mode_indices)
|
|
331
|
-
|
|
332
|
-
# Update tracked mode
|
|
333
|
-
if mode_indices:
|
|
334
|
-
self.ts_mode_idx = mode_indices[0]
|
|
335
|
-
self.log(f"Mode following: tracking mode {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
|
|
336
|
-
|
|
337
|
-
# Update max_indices (saddle point direction)
|
|
338
|
-
max_indices = mode_indices
|
|
339
|
-
else:
|
|
340
|
-
# If no corresponding mode found, use standard approach
|
|
341
|
-
self.log("No corresponding mode found, using default mode selection")
|
|
342
|
-
max_indices = self.roots
|
|
343
|
-
else:
|
|
344
|
-
# Standard mode selection when mode following is disabled
|
|
345
|
-
if self.saddle_order == 0:
|
|
346
|
-
min_indices = list(range(len(gradient)))
|
|
347
|
-
max_indices = []
|
|
348
|
-
else:
|
|
349
|
-
min_indices = [i for i in range(gradient.size) if i not in self.roots]
|
|
350
|
-
max_indices = self.roots
|
|
351
|
-
|
|
352
|
-
# Store eigenvectors in history
|
|
353
|
-
self.eigvec_history.append(eigvecs)
|
|
354
|
-
if len(self.eigvec_history) > 5: # Keep only last 5 steps
|
|
355
|
-
self.eigvec_history.pop(0)
|
|
356
|
-
|
|
357
|
-
# Transform gradient to eigenvector space
|
|
358
|
-
gradient_trans = eigvecs.T.dot(gradient).flatten()
|
|
359
|
-
|
|
360
|
-
# Set minimization directions (all directions not in max_indices)
|
|
361
|
-
min_indices = [i for i in range(gradient.size) if i not in max_indices]
|
|
362
|
-
|
|
363
|
-
# Initialize alpha parameter
|
|
364
535
|
alpha = self.alpha0
|
|
365
|
-
|
|
366
|
-
# Tracking variables
|
|
367
536
|
best_step = None
|
|
368
537
|
best_step_norm_diff = float('inf')
|
|
369
538
|
step_norm_history = []
|
|
370
539
|
|
|
371
|
-
#
|
|
540
|
+
# Compute adaptive trust radius based on gradient
|
|
541
|
+
effective_trust_radius = self._compute_adaptive_trust_radius(gradient_norm)
|
|
542
|
+
|
|
372
543
|
for mu in range(self.max_micro_cycles):
|
|
373
|
-
self.log(f"
|
|
544
|
+
self.log(f" Micro cycle {mu:02d}: alpha={alpha:.6e}, trust_radius={effective_trust_radius:.6f}")
|
|
374
545
|
|
|
375
546
|
try:
|
|
376
|
-
# Make a fresh step vector for this cycle - essential to ensure proper recalculation
|
|
377
547
|
step = np.zeros_like(gradient_trans)
|
|
378
548
|
|
|
379
|
-
# Maximization subspace
|
|
549
|
+
# Maximization subspace
|
|
380
550
|
step_max = np.array([])
|
|
381
|
-
eigval_max = 0
|
|
551
|
+
eigval_max = 0.0
|
|
382
552
|
if len(max_indices) > 0:
|
|
383
|
-
# Calculate augmented Hessian
|
|
384
553
|
H_aug_max = self.get_augmented_hessian(
|
|
385
554
|
eigvals[max_indices], gradient_trans[max_indices], alpha
|
|
386
555
|
)
|
|
387
|
-
|
|
388
|
-
# Solve RFO equations
|
|
389
556
|
step_max, eigval_max, nu_max, eigvec_max = self.solve_rfo(
|
|
390
557
|
H_aug_max, "max", prev_eigvec=self.prev_eigvec_max
|
|
391
558
|
)
|
|
392
|
-
|
|
393
|
-
# Store eigenvector for next iteration
|
|
394
559
|
self.prev_eigvec_max = eigvec_max
|
|
395
|
-
|
|
396
|
-
# Copy step to the main step vector
|
|
397
560
|
step[max_indices] = step_max
|
|
398
561
|
|
|
399
|
-
# Minimization subspace
|
|
562
|
+
# Minimization subspace
|
|
400
563
|
step_min = np.array([])
|
|
401
|
-
eigval_min = 0
|
|
564
|
+
eigval_min = 0.0
|
|
402
565
|
if len(min_indices) > 0:
|
|
403
|
-
# Calculate augmented Hessian
|
|
404
566
|
H_aug_min = self.get_augmented_hessian(
|
|
405
567
|
eigvals[min_indices], gradient_trans[min_indices], alpha
|
|
406
568
|
)
|
|
407
|
-
|
|
408
|
-
# Solve RFO equations
|
|
409
569
|
step_min, eigval_min, nu_min, eigvec_min = self.solve_rfo(
|
|
410
570
|
H_aug_min, "min", prev_eigvec=self.prev_eigvec_min
|
|
411
571
|
)
|
|
412
|
-
|
|
413
|
-
# Store eigenvector for next iteration
|
|
414
572
|
self.prev_eigvec_min = eigvec_min
|
|
415
|
-
|
|
416
|
-
# Copy step to the main step vector
|
|
417
573
|
step[min_indices] = step_min
|
|
418
574
|
|
|
419
|
-
# Calculate norms of the current step
|
|
420
|
-
step_max_norm = np.linalg.norm(step_max) if len(max_indices) > 0 else 0.0
|
|
421
|
-
step_min_norm = np.linalg.norm(step_min) if len(min_indices) > 0 else 0.0
|
|
422
575
|
step_norm = np.linalg.norm(step)
|
|
576
|
+
step_norm_history.append(step_norm)
|
|
423
577
|
|
|
424
|
-
|
|
425
|
-
if len(
|
|
426
|
-
self.log(f"norm(step_max)={step_max_norm:.6f}")
|
|
427
|
-
if len(min_indices) > 0:
|
|
428
|
-
self.log(f"norm(step_min)={step_min_norm:.6f}")
|
|
429
|
-
|
|
430
|
-
self.log(f"norm(step)={step_norm:.6f}")
|
|
578
|
+
step_max_norm = np.linalg.norm(step_max) if len(max_indices) > 0 else 0.0
|
|
579
|
+
step_min_norm = np.linalg.norm(step_min) if len(min_indices) > 0 else 0.0
|
|
431
580
|
|
|
432
|
-
|
|
433
|
-
|
|
581
|
+
if self.debug:
|
|
582
|
+
self.log(f" |step_max|={step_max_norm:.6f}, |step_min|={step_min_norm:.6f}, |step|={step_norm:.6f}")
|
|
434
583
|
|
|
435
|
-
#
|
|
436
|
-
norm_diff = abs(step_norm -
|
|
584
|
+
# Track best step
|
|
585
|
+
norm_diff = abs(step_norm - effective_trust_radius)
|
|
437
586
|
if norm_diff < best_step_norm_diff:
|
|
438
587
|
best_step = step.copy()
|
|
439
588
|
best_step_norm_diff = norm_diff
|
|
440
589
|
|
|
441
|
-
# Check
|
|
442
|
-
if step_norm <=
|
|
443
|
-
self.log(f"Step satisfies trust radius {
|
|
590
|
+
# Check convergence
|
|
591
|
+
if step_norm <= effective_trust_radius:
|
|
592
|
+
self.log(f" Step satisfies trust radius (|step|={step_norm:.6f} <= {effective_trust_radius:.6f})")
|
|
444
593
|
break
|
|
445
594
|
|
|
446
|
-
#
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
alpha, eigval_max, step_max_norm, eigvals[max_indices],
|
|
452
|
-
gradient_trans[max_indices], "max"
|
|
453
|
-
)
|
|
595
|
+
# Check relative convergence
|
|
596
|
+
if step_norm > 0 and norm_diff / step_norm < self.micro_cycle_rtol:
|
|
597
|
+
self.log(f" Micro-cycle converged (relative diff={norm_diff/step_norm:.6e})")
|
|
598
|
+
step = step * (effective_trust_radius / step_norm)
|
|
599
|
+
return step, effective_trust_radius, True
|
|
454
600
|
|
|
455
|
-
#
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
601
|
+
# Check for stagnation
|
|
602
|
+
if len(step_norm_history) >= 3:
|
|
603
|
+
recent_changes = [abs(step_norm_history[-i] - step_norm_history[-(i+1)])
|
|
604
|
+
for i in range(1, 3)]
|
|
605
|
+
if all(c < self.micro_cycle_atol for c in recent_changes):
|
|
606
|
+
self.log(f" Micro-cycle stagnated, using best step")
|
|
607
|
+
if best_step is not None:
|
|
608
|
+
best_norm = np.linalg.norm(best_step)
|
|
609
|
+
if best_norm > effective_trust_radius:
|
|
610
|
+
best_step = best_step * (effective_trust_radius / best_norm)
|
|
611
|
+
return best_step, min(best_norm, effective_trust_radius), True
|
|
462
612
|
|
|
463
|
-
#
|
|
464
|
-
alpha_step =
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
alpha_step = (w_max * alpha_step_max + w_min * alpha_step_min) / (w_max + w_min)
|
|
471
|
-
else:
|
|
472
|
-
alpha_step = alpha_step_max if abs(alpha_step_max) > abs(alpha_step_min) else alpha_step_min
|
|
473
|
-
else:
|
|
474
|
-
alpha_step = alpha_step_max if alpha_step_max != 0.0 else alpha_step_min
|
|
613
|
+
# Calculate alpha update
|
|
614
|
+
alpha_step = self._compute_alpha_step(
|
|
615
|
+
alpha, eigval_max, eigval_min,
|
|
616
|
+
step_max_norm, step_min_norm, step_norm,
|
|
617
|
+
eigvals, gradient_trans, max_indices, min_indices,
|
|
618
|
+
effective_trust_radius
|
|
619
|
+
)
|
|
475
620
|
|
|
476
|
-
#
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
max_indices, min_indices, gradient_trans, step_norm
|
|
483
|
-
)
|
|
484
|
-
|
|
485
|
-
if abs(dstep2_dalpha) > 1e-10:
|
|
486
|
-
alpha_step = 2.0 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
|
|
487
|
-
self.log(f"Direct alpha_step calculation: {alpha_step:.6f}")
|
|
488
|
-
except Exception as e:
|
|
489
|
-
self.log(f"Error in direct derivative calculation: {str(e)}")
|
|
490
|
-
alpha_step = 0.0
|
|
621
|
+
# Apply damping for stability
|
|
622
|
+
damping = 1.0
|
|
623
|
+
if abs(alpha_step) > self.alpha_step_max:
|
|
624
|
+
damping = self.alpha_step_max / abs(alpha_step)
|
|
625
|
+
alpha_step *= damping
|
|
626
|
+
self.log(f" Damped alpha_step by factor {damping:.4f}")
|
|
491
627
|
|
|
492
|
-
# Update alpha with
|
|
628
|
+
# Update alpha with bounds
|
|
493
629
|
old_alpha = alpha
|
|
630
|
+
alpha = np.clip(alpha + alpha_step, self.alpha_min, self.alpha_max)
|
|
494
631
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
# Apply a more aggressive heuristic - double alpha
|
|
498
|
-
alpha = min(alpha * 2.0, self.alpha_max)
|
|
499
|
-
self.log(f"Using heuristic alpha update: {old_alpha:.6f} -> {alpha:.6f}")
|
|
500
|
-
else:
|
|
501
|
-
# Apply safety bounds to alpha_step
|
|
502
|
-
alpha_step_limited = np.clip(alpha_step, -self.alpha_step_max, self.alpha_step_max)
|
|
503
|
-
|
|
504
|
-
if abs(alpha_step_limited) != abs(alpha_step):
|
|
505
|
-
self.log(f"Limited alpha_step from {alpha_step:.6f} to {alpha_step_limited:.6f}")
|
|
506
|
-
|
|
507
|
-
# Ensure alpha remains positive and within bounds
|
|
508
|
-
alpha = min(max(old_alpha + alpha_step_limited, 1e-6), self.alpha_max)
|
|
509
|
-
self.log(f"Updated alpha: {old_alpha:.6f} -> {alpha:.6f}")
|
|
632
|
+
if self.debug:
|
|
633
|
+
self.log(f" alpha: {old_alpha:.6e} -> {alpha:.6e} (step={alpha_step:.6e})")
|
|
510
634
|
|
|
511
|
-
# Check if alpha
|
|
512
|
-
if alpha == self.alpha_max:
|
|
513
|
-
self.log(f"Alpha reached
|
|
635
|
+
# Check if alpha hit bounds
|
|
636
|
+
if alpha == self.alpha_max or alpha == self.alpha_min:
|
|
637
|
+
self.log(f" Alpha reached bounds, using best step")
|
|
514
638
|
if best_step is not None:
|
|
515
|
-
|
|
639
|
+
best_norm = np.linalg.norm(best_step)
|
|
640
|
+
if best_norm > effective_trust_radius:
|
|
641
|
+
best_step = best_step * (effective_trust_radius / best_norm)
|
|
642
|
+
return best_step, min(best_norm, effective_trust_radius), True
|
|
516
643
|
break
|
|
517
|
-
|
|
518
|
-
# Check for progress in step norm adjustments
|
|
519
|
-
if len(step_norm_history) >= 3:
|
|
520
|
-
# Calculate consecutive changes in step norm
|
|
521
|
-
recent_changes = [abs(step_norm_history[-i] - step_norm_history[-(i+1)])
|
|
522
|
-
for i in range(1, min(3, len(step_norm_history)))]
|
|
523
644
|
|
|
524
|
-
# If step norms are not changing significantly, break the loop
|
|
525
|
-
if all(change < 1e-6 for change in recent_changes):
|
|
526
|
-
self.log(f"Step norms not changing significantly: {step_norm_history[-3:]}")
|
|
527
|
-
self.log("Breaking micro-cycle loop")
|
|
528
|
-
|
|
529
|
-
# Use the best step found so far
|
|
530
|
-
if best_step is not None and best_step_norm_diff < norm_diff:
|
|
531
|
-
step = best_step.copy()
|
|
532
|
-
self.log("Using best step found so far")
|
|
533
|
-
|
|
534
|
-
break
|
|
535
|
-
|
|
536
645
|
except Exception as e:
|
|
537
|
-
self.log(f"Error in micro-cycle: {str(e)}")
|
|
538
|
-
# Use best step if available, otherwise scale current step
|
|
646
|
+
self.log(f" Error in micro-cycle {mu}: {str(e)}")
|
|
539
647
|
if best_step is not None:
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
if step_norm > 0 and step_norm > self.trust_radius:
|
|
545
|
-
scale_factor = self.trust_radius / step_norm
|
|
546
|
-
step = step * scale_factor
|
|
547
|
-
self.log(f"Scaled step to trust radius due to error")
|
|
648
|
+
best_norm = np.linalg.norm(best_step)
|
|
649
|
+
if best_norm > effective_trust_radius:
|
|
650
|
+
best_step = best_step * (effective_trust_radius / best_norm)
|
|
651
|
+
return best_step, min(best_norm, effective_trust_radius), False
|
|
548
652
|
break
|
|
549
653
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
if
|
|
555
|
-
|
|
556
|
-
|
|
654
|
+
# Micro-cycles did not converge - use best step with scaling
|
|
655
|
+
self.log(f" Micro-cycles did not converge in {self.max_micro_cycles} iterations")
|
|
656
|
+
if best_step is not None:
|
|
657
|
+
best_norm = np.linalg.norm(best_step)
|
|
658
|
+
if best_norm > effective_trust_radius:
|
|
659
|
+
best_step = best_step * (effective_trust_radius / best_norm)
|
|
660
|
+
return best_step, min(best_norm, effective_trust_radius), False
|
|
557
661
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
662
|
+
return np.zeros_like(gradient_trans), 0.0, False
|
|
663
|
+
|
|
664
|
+
def _compute_alpha_step(self, alpha, eigval_max, eigval_min,
|
|
665
|
+
step_max_norm, step_min_norm, step_norm,
|
|
666
|
+
eigvals, gradient_trans, max_indices, min_indices,
|
|
667
|
+
target_trust_radius):
|
|
668
|
+
"""
|
|
669
|
+
Compute the alpha step using Newton-Raphson with safeguards.
|
|
670
|
+
|
|
671
|
+
Returns:
|
|
672
|
+
float
|
|
673
|
+
Computed alpha step
|
|
674
|
+
"""
|
|
675
|
+
eps = 1e-12
|
|
676
|
+
|
|
677
|
+
dstep2_dalpha_max = 0.0
|
|
678
|
+
if len(max_indices) > 0 and step_max_norm > eps:
|
|
679
|
+
denom_max = eigvals[max_indices] - eigval_max * alpha
|
|
680
|
+
safe_denom = np.where(np.abs(denom_max) < eps,
|
|
681
|
+
np.sign(denom_max) * eps, denom_max)
|
|
682
|
+
g_max = gradient_trans[max_indices]
|
|
683
|
+
|
|
684
|
+
step_factor = 1.0 + step_max_norm**2 * alpha
|
|
685
|
+
if abs(step_factor) > eps:
|
|
686
|
+
quot = np.sum(g_max**2 / safe_denom**3)
|
|
687
|
+
dstep2_dalpha_max = 2.0 * eigval_max / step_factor * quot
|
|
688
|
+
|
|
689
|
+
dstep2_dalpha_min = 0.0
|
|
690
|
+
if len(min_indices) > 0 and step_min_norm > eps:
|
|
691
|
+
denom_min = eigvals[min_indices] - eigval_min * alpha
|
|
692
|
+
safe_denom = np.where(np.abs(denom_min) < eps,
|
|
693
|
+
np.sign(denom_min) * eps, denom_min)
|
|
694
|
+
g_min = gradient_trans[min_indices]
|
|
695
|
+
|
|
696
|
+
step_factor = 1.0 + step_min_norm**2 * alpha
|
|
697
|
+
if abs(step_factor) > eps:
|
|
698
|
+
quot = np.sum(g_min**2 / safe_denom**3)
|
|
699
|
+
dstep2_dalpha_min = 2.0 * eigval_min / step_factor * quot
|
|
700
|
+
|
|
701
|
+
dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
|
|
702
|
+
|
|
703
|
+
if abs(dstep2_dalpha) < eps:
|
|
704
|
+
if step_norm > target_trust_radius:
|
|
705
|
+
return alpha * 0.5
|
|
706
|
+
else:
|
|
707
|
+
return 0.0
|
|
708
|
+
|
|
709
|
+
alpha_step = (target_trust_radius**2 - step_norm**2) / dstep2_dalpha
|
|
710
|
+
|
|
711
|
+
return alpha_step
|
|
712
|
+
|
|
713
|
+
def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0,
|
|
714
|
+
pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
|
|
715
|
+
"""
|
|
716
|
+
Execute one step of enhanced RSPRFO optimization with trust radius adjustment.
|
|
717
|
+
|
|
718
|
+
Parameters:
|
|
719
|
+
geom_num_list : numpy.ndarray
|
|
720
|
+
Current geometry coordinates
|
|
721
|
+
B_g : numpy.ndarray
|
|
722
|
+
Current gradient
|
|
723
|
+
pre_B_g : numpy.ndarray
|
|
724
|
+
Previous gradient
|
|
725
|
+
pre_geom : numpy.ndarray
|
|
726
|
+
Previous geometry
|
|
727
|
+
B_e : float
|
|
728
|
+
Current energy
|
|
729
|
+
pre_B_e : float
|
|
730
|
+
Previous energy
|
|
731
|
+
pre_move_vector : numpy.ndarray
|
|
732
|
+
Previous step vector
|
|
733
|
+
initial_geom_num_list : numpy.ndarray
|
|
734
|
+
Initial geometry
|
|
735
|
+
g : numpy.ndarray
|
|
736
|
+
Alternative gradient representation
|
|
737
|
+
pre_g : numpy.ndarray
|
|
738
|
+
Previous alternative gradient representation
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
numpy.ndarray
|
|
742
|
+
Optimization step vector (shaped as column vector)
|
|
743
|
+
"""
|
|
744
|
+
self.log(f"\n{'='*60}")
|
|
745
|
+
self.log(f"RS-PRFO Iteration {self.iter}")
|
|
746
|
+
self.log(f"{'='*60}")
|
|
747
|
+
|
|
748
|
+
if self.Initialization:
|
|
749
|
+
self._reset_state()
|
|
750
|
+
self.Initialization = False
|
|
751
|
+
self.log(f"Initialized with trust radius {self.trust_radius:.6f}")
|
|
567
752
|
else:
|
|
568
|
-
|
|
753
|
+
step_accepted = self._process_previous_step(
|
|
754
|
+
B_e, geom_num_list, B_g, pre_B_g, pre_geom, pre_move_vector
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
if not step_accepted and self.step_rejection_enabled:
|
|
758
|
+
self.log("Step rejected - optimizer should use previous geometry")
|
|
759
|
+
|
|
760
|
+
if self.hessian is None:
|
|
761
|
+
raise ValueError("Hessian matrix must be set before running optimization")
|
|
569
762
|
|
|
570
|
-
self.
|
|
763
|
+
if (self.prev_geometry is not None and self.prev_gradient is not None and
|
|
764
|
+
len(pre_B_g) > 0 and len(pre_geom) > 0):
|
|
765
|
+
self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
|
|
571
766
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
767
|
+
gradient = np.asarray(B_g).flatten()
|
|
768
|
+
|
|
769
|
+
# Project out TR/ROT from gradient
|
|
770
|
+
raw_norm = np.linalg.norm(gradient)
|
|
771
|
+
gradient = self._project_grad_tr_rot(gradient, geom_num_list)
|
|
772
|
+
proj_norm = np.linalg.norm(gradient)
|
|
773
|
+
|
|
774
|
+
if abs(raw_norm - proj_norm) > 1e-10:
|
|
775
|
+
self.log(f"Gradient TR/ROT projection: {raw_norm:.6e} -> {proj_norm:.6e}")
|
|
776
|
+
|
|
777
|
+
gradient_norm = proj_norm # Store for later use
|
|
778
|
+
|
|
779
|
+
# Prepare Hessian
|
|
780
|
+
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
781
|
+
|
|
782
|
+
# Compute eigendecomposition
|
|
783
|
+
eigvals, eigvecs = np.linalg.eigh(H)
|
|
784
|
+
|
|
785
|
+
# === [CRITICAL FIX] Handle NaN/Inf in Hessian ===
|
|
786
|
+
|
|
787
|
+
if not np.all(np.isfinite(eigvals)) or not np.all(np.isfinite(eigvecs)):
|
|
788
|
+
self.log("CRITICAL ERROR: Hessian eigendecomposition failed (NaNs detected).", force=True)
|
|
789
|
+
self.log("Resetting to Identity Hessian to force Steepest Descent fallback.", force=True)
|
|
790
|
+
|
|
791
|
+
eigvals = np.ones_like(eigvals)
|
|
792
|
+
eigvecs = np.eye(len(eigvals))
|
|
793
|
+
# =================================================
|
|
794
|
+
|
|
795
|
+
# Apply eigenvalue shifting if needed
|
|
796
|
+
H, eigvals, shifted = self._shift_hessian_eigenvalues(H, eigvals, eigvecs)
|
|
797
|
+
if shifted:
|
|
798
|
+
eigvals, eigvecs = np.linalg.eigh(H)
|
|
799
|
+
|
|
800
|
+
self.shifted_hessian = H
|
|
801
|
+
|
|
802
|
+
# Log eigenvalue information
|
|
803
|
+
neg_eigval_count = np.sum(eigvals < -1e-8)
|
|
804
|
+
self.log(f"Eigenvalue analysis: {neg_eigval_count} negative (target: {self.saddle_order})")
|
|
805
|
+
self.log(f"Lowest eigenvalues: {eigvals[:min(5, len(eigvals))]}")
|
|
806
|
+
|
|
807
|
+
# Mode selection with mode following
|
|
808
|
+
max_indices, min_indices = self._select_modes(eigvals, eigvecs, gradient)
|
|
809
|
+
|
|
810
|
+
# Store eigenvector history
|
|
811
|
+
self.eigvec_history.append(eigvecs.copy())
|
|
812
|
+
self.eigval_history.append(eigvals.copy())
|
|
813
|
+
if len(self.eigvec_history) > 5:
|
|
814
|
+
self.eigvec_history.pop(0)
|
|
815
|
+
self.eigval_history.pop(0)
|
|
816
|
+
|
|
817
|
+
# Transform gradient to eigenvector space
|
|
818
|
+
gradient_trans = eigvecs.T @ gradient
|
|
819
|
+
|
|
820
|
+
# Solve for step using micro-cycles (now with gradient_norm)
|
|
821
|
+
step_trans, step_norm, converged = self._solve_alpha_micro_cycles(
|
|
822
|
+
eigvals, gradient_trans, max_indices, min_indices, gradient_norm
|
|
823
|
+
)
|
|
824
|
+
# === [ADDED START] Safety check for NaN/Inf steps ===
|
|
825
|
+
if not np.isfinite(step_norm) or not np.all(np.isfinite(step_trans)):
|
|
826
|
+
self.log("CRITICAL WARNING: NaN detected in optimization step. Falling back to Steepest Descent.", force=True)
|
|
575
827
|
|
|
576
|
-
#
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
longest_step = np.max(steplengths)
|
|
581
|
-
else:
|
|
582
|
-
# Generic vector - just compute total norm
|
|
583
|
-
longest_step = norm(move_vector)
|
|
828
|
+
# Fallback: Steepest Descent (SD) step within trust radius
|
|
829
|
+
# In eigenvector basis, SD direction is simply -gradient
|
|
830
|
+
sd_step = -gradient_trans
|
|
831
|
+
sd_norm = np.linalg.norm(sd_step)
|
|
584
832
|
|
|
585
|
-
#
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
833
|
+
# Apply trust radius
|
|
834
|
+
target_norm = min(sd_norm, self.trust_radius)
|
|
835
|
+
|
|
836
|
+
if sd_norm > 1e-12:
|
|
837
|
+
step_trans = sd_step * (target_norm / sd_norm)
|
|
838
|
+
step_norm = target_norm
|
|
839
|
+
else:
|
|
840
|
+
step_trans = np.zeros_like(gradient_trans)
|
|
841
|
+
step_norm = 0.0
|
|
842
|
+
|
|
843
|
+
converged = False
|
|
844
|
+
# === [ADDED END] ===
|
|
845
|
+
|
|
846
|
+
if not converged:
|
|
847
|
+
self.log("Warning: Micro-cycles did not fully converge")
|
|
848
|
+
|
|
849
|
+
# Transform step back to original coordinates
|
|
850
|
+
move_vector = eigvecs @ step_trans
|
|
851
|
+
step_norm = np.linalg.norm(move_vector)
|
|
852
|
+
|
|
853
|
+
# Apply gradient-based scaling for near-convergence
|
|
854
|
+
grad_scale = self._compute_gradient_based_scale(gradient_norm, step_norm)
|
|
855
|
+
if grad_scale < 1.0:
|
|
856
|
+
move_vector = move_vector * grad_scale
|
|
857
|
+
step_norm = step_norm * grad_scale
|
|
858
|
+
self.log(f"Applied gradient-based scaling: {1.0/grad_scale:.2f}x reduction")
|
|
859
|
+
|
|
860
|
+
# Apply trust radius constraint
|
|
861
|
+
effective_trust = self._compute_adaptive_trust_radius(gradient_norm)
|
|
862
|
+
if step_norm > effective_trust * 1.01:
|
|
863
|
+
self.log(f"Scaling step from {step_norm:.6f} to trust radius {effective_trust:.6f}")
|
|
864
|
+
move_vector = move_vector * (effective_trust / step_norm)
|
|
865
|
+
step_norm = effective_trust
|
|
866
|
+
|
|
867
|
+
# Apply maxstep constraint if specified
|
|
868
|
+
if self.config.get("maxstep") is not None:
|
|
869
|
+
move_vector, step_norm = self._apply_maxstep_constraint(move_vector)
|
|
870
|
+
|
|
871
|
+
self.log(f"Final step norm: {step_norm:.6f}")
|
|
589
872
|
|
|
590
873
|
# Calculate predicted energy change
|
|
591
874
|
predicted_energy_change = self.rfo_model(gradient, H, move_vector)
|
|
592
875
|
self.predicted_energy_changes.append(predicted_energy_change)
|
|
593
|
-
self.log(f"Predicted energy change: {predicted_energy_change:.
|
|
876
|
+
self.log(f"Predicted energy change: {predicted_energy_change:.6e}")
|
|
594
877
|
|
|
595
|
-
# Store
|
|
596
|
-
self.prev_geometry = copy
|
|
597
|
-
self.prev_gradient = copy
|
|
878
|
+
# Store state for next iteration
|
|
879
|
+
self.prev_geometry = np.copy(geom_num_list)
|
|
880
|
+
self.prev_gradient = np.copy(B_g)
|
|
598
881
|
self.prev_energy = B_e
|
|
599
|
-
self.prev_move_vector = copy
|
|
882
|
+
self.prev_move_vector = np.copy(move_vector)
|
|
600
883
|
|
|
601
|
-
# Increment iteration counter
|
|
602
884
|
self.iter += 1
|
|
603
885
|
|
|
604
886
|
return move_vector.reshape(-1, 1)
|
|
605
|
-
|
|
606
|
-
def
|
|
887
|
+
|
|
888
|
+
def _reset_state(self):
|
|
889
|
+
"""Reset optimizer state for a new optimization run."""
|
|
890
|
+
self.prev_eigvec_max = None
|
|
891
|
+
self.prev_eigvec_min = None
|
|
892
|
+
self.predicted_energy_changes = []
|
|
893
|
+
self.actual_energy_changes = []
|
|
894
|
+
self.reduction_ratios = []
|
|
895
|
+
self.trust_radius_history = []
|
|
896
|
+
self.step_quality_history = []
|
|
897
|
+
self.prev_geometry = None
|
|
898
|
+
self.prev_gradient = None
|
|
899
|
+
self.prev_energy = None
|
|
900
|
+
self.prev_move_vector = None
|
|
901
|
+
self.eigvec_history = []
|
|
902
|
+
self.eigval_history = []
|
|
903
|
+
self.ts_mode_idx = None
|
|
904
|
+
self.ts_mode_eigvec = None
|
|
905
|
+
self.consecutive_rejections = 0
|
|
906
|
+
self.trust_radius = self.trust_radius_initial
|
|
907
|
+
|
|
908
|
+
def _process_previous_step(self, B_e, geom_num_list, B_g, pre_B_g, pre_geom, pre_move_vector):
|
|
607
909
|
"""
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
Parameters:
|
|
611
|
-
alpha: float - Current alpha value
|
|
612
|
-
rfo_eigval: float - RFO eigenvalue for this subspace
|
|
613
|
-
step_norm: float - Norm of the step in this subspace
|
|
614
|
-
eigvals: numpy.ndarray - Eigenvalues for this subspace
|
|
615
|
-
gradient: numpy.ndarray - Gradient components in this subspace
|
|
616
|
-
mode: str - "min" or "max" for minimization or maximization subspace
|
|
910
|
+
Process results from the previous step and adjust trust radius.
|
|
617
911
|
|
|
618
912
|
Returns:
|
|
619
|
-
|
|
913
|
+
bool
|
|
914
|
+
True if step is accepted, False if rejected
|
|
620
915
|
"""
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
self.
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
916
|
+
if self.prev_energy is None or len(self.predicted_energy_changes) == 0:
|
|
917
|
+
return True
|
|
918
|
+
|
|
919
|
+
actual_energy_change = B_e - self.prev_energy
|
|
920
|
+
predicted_energy_change = self.predicted_energy_changes[-1]
|
|
921
|
+
self.actual_energy_changes.append(actual_energy_change)
|
|
922
|
+
|
|
923
|
+
if len(pre_move_vector) > 0:
|
|
924
|
+
prev_step_norm = np.linalg.norm(np.asarray(pre_move_vector).flatten())
|
|
925
|
+
elif self.prev_move_vector is not None:
|
|
926
|
+
prev_step_norm = np.linalg.norm(self.prev_move_vector.flatten())
|
|
927
|
+
else:
|
|
928
|
+
prev_step_norm = 0.0
|
|
929
|
+
|
|
930
|
+
self.log(f"Energy: {self.prev_energy:.8f} -> {B_e:.8f}")
|
|
931
|
+
self.log(f"Actual change: {actual_energy_change:.6e}, Predicted: {predicted_energy_change:.6e}")
|
|
932
|
+
|
|
933
|
+
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
934
|
+
|
|
935
|
+
if hasattr(Calculationtools, 'project_out_hess_tr_and_rot_for_coord'):
|
|
936
|
+
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(
|
|
937
|
+
H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3),
|
|
938
|
+
display_eigval=False
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
ratio = self.compute_reduction_ratio(
|
|
942
|
+
self.prev_gradient, H, self.prev_move_vector, actual_energy_change
|
|
943
|
+
)
|
|
944
|
+
self.reduction_ratios.append(ratio)
|
|
945
|
+
|
|
946
|
+
at_boundary = prev_step_norm >= self.trust_radius * 0.95
|
|
947
|
+
|
|
948
|
+
self.adjust_trust_radius(ratio, prev_step_norm, at_boundary)
|
|
949
|
+
|
|
950
|
+
if self.step_rejection_enabled and ratio < self.rejection_threshold:
|
|
951
|
+
self.consecutive_rejections += 1
|
|
952
|
+
self.log(f"Step quality very poor (ratio={ratio:.4f}), rejection count: {self.consecutive_rejections}")
|
|
656
953
|
|
|
657
|
-
|
|
954
|
+
if self.consecutive_rejections >= self.max_consecutive_rejections:
|
|
955
|
+
self.log(f"Too many consecutive rejections, accepting step anyway")
|
|
956
|
+
self.consecutive_rejections = 0
|
|
957
|
+
return True
|
|
658
958
|
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
959
|
+
return False
|
|
960
|
+
|
|
961
|
+
self.consecutive_rejections = 0
|
|
962
|
+
return True
|
|
662
963
|
|
|
663
|
-
def
|
|
964
|
+
def _select_modes(self, eigvals, eigvecs, gradient):
|
|
664
965
|
"""
|
|
665
|
-
|
|
666
|
-
for the combined step from both subspaces
|
|
966
|
+
Select modes for maximization and minimization subspaces.
|
|
667
967
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
968
|
+
Returns:
|
|
969
|
+
tuple
|
|
970
|
+
(max_indices, min_indices)
|
|
971
|
+
"""
|
|
972
|
+
n = len(eigvals)
|
|
973
|
+
|
|
974
|
+
if self.saddle_order == 0:
|
|
975
|
+
return [], list(range(n))
|
|
976
|
+
|
|
977
|
+
if self.mode_following_enabled:
|
|
978
|
+
max_indices = self._find_ts_modes(eigvals, eigvecs, gradient)
|
|
979
|
+
else:
|
|
980
|
+
sorted_indices = np.argsort(eigvals)
|
|
981
|
+
max_indices = sorted_indices[:self.saddle_order].tolist()
|
|
982
|
+
|
|
983
|
+
min_indices = [i for i in range(n) if i not in max_indices]
|
|
984
|
+
|
|
985
|
+
return max_indices, min_indices
|
|
986
|
+
|
|
987
|
+
def _find_ts_modes(self, eigvals, eigvecs, gradient):
|
|
988
|
+
"""
|
|
989
|
+
Find transition state modes using mode following.
|
|
675
990
|
|
|
676
991
|
Returns:
|
|
677
|
-
|
|
992
|
+
list
|
|
993
|
+
Indices of modes to maximize
|
|
678
994
|
"""
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
2.0 * eigval_max / denom_max * np.sum(gradient_trans[max_indices]**2 / denom_terms_max**3)
|
|
699
|
-
)
|
|
995
|
+
sorted_indices = np.argsort(eigvals)
|
|
996
|
+
|
|
997
|
+
if self.ts_mode_idx is None or self.ts_mode_eigvec is None:
|
|
998
|
+
self.ts_mode_idx = sorted_indices[0]
|
|
999
|
+
self.ts_mode_eigvec = eigvecs[:, self.ts_mode_idx].copy()
|
|
1000
|
+
self.log(f"Initial TS mode: {self.ts_mode_idx}, eigenvalue={eigvals[self.ts_mode_idx]:.6f}")
|
|
1001
|
+
return sorted_indices[:self.saddle_order].tolist()
|
|
1002
|
+
|
|
1003
|
+
overlaps = np.abs(eigvecs.T @ self.ts_mode_eigvec)
|
|
1004
|
+
|
|
1005
|
+
best_idx = np.argmax(overlaps)
|
|
1006
|
+
best_overlap = overlaps[best_idx]
|
|
1007
|
+
|
|
1008
|
+
self.log(f"Mode following: best overlap={best_overlap:.4f} with mode {best_idx} "
|
|
1009
|
+
f"(eigenvalue={eigvals[best_idx]:.6f})")
|
|
1010
|
+
|
|
1011
|
+
if best_overlap > self.overlap_threshold:
|
|
1012
|
+
self.ts_mode_idx = best_idx
|
|
1013
|
+
self.ts_mode_eigvec = eigvecs[:, best_idx].copy()
|
|
700
1014
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
# Calculate denominator for min subspace
|
|
704
|
-
denom_min = 1.0 + np.dot(gradient_trans[min_indices], gradient_trans[min_indices]) * alpha
|
|
705
|
-
if abs(denom_min) < 1e-10:
|
|
706
|
-
denom_min = 1e-10 * np.sign(denom_min) if denom_min != 0 else 1e-10
|
|
707
|
-
|
|
708
|
-
# Handle small denominators in eigenvalue terms
|
|
709
|
-
eigvals_min = eigvals[min_indices].copy()
|
|
710
|
-
denom_terms_min = eigvals_min - eigval_min * alpha
|
|
711
|
-
|
|
712
|
-
small_denoms = np.abs(denom_terms_min) < 1e-10
|
|
713
|
-
if np.any(small_denoms):
|
|
714
|
-
for i in np.where(small_denoms)[0]:
|
|
715
|
-
denom_terms_min[i] = 1e-10 * np.sign(denom_terms_min[i]) if denom_terms_min[i] != 0 else 1e-10
|
|
716
|
-
|
|
717
|
-
# Calculate derivative component for min subspace
|
|
718
|
-
dstep2_dalpha_min = (
|
|
719
|
-
2.0 * eigval_min / denom_min * np.sum(gradient_trans[min_indices]**2 / denom_terms_min**3)
|
|
720
|
-
)
|
|
1015
|
+
if np.dot(eigvecs[:, best_idx], self.ts_mode_eigvec) < 0:
|
|
1016
|
+
self.ts_mode_eigvec *= -1
|
|
721
1017
|
|
|
722
|
-
|
|
723
|
-
dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
|
|
724
|
-
self.log(f"Combined dstep2_dalpha={dstep2_dalpha:.6e}")
|
|
1018
|
+
max_indices = [best_idx]
|
|
725
1019
|
|
|
726
|
-
|
|
1020
|
+
if self.saddle_order > 1:
|
|
1021
|
+
remaining = [i for i in sorted_indices if i != best_idx]
|
|
1022
|
+
max_indices.extend(remaining[:self.saddle_order - 1])
|
|
727
1023
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
1024
|
+
return max_indices
|
|
1025
|
+
else:
|
|
1026
|
+
self.log(f"Warning: Poor mode overlap ({best_overlap:.4f}), possible mode crossing")
|
|
1027
|
+
|
|
1028
|
+
if self.eigvec_following:
|
|
1029
|
+
return self._handle_mode_mixing(eigvals, eigvecs, overlaps, sorted_indices)
|
|
1030
|
+
|
|
1031
|
+
self.ts_mode_idx = sorted_indices[0]
|
|
1032
|
+
self.ts_mode_eigvec = eigvecs[:, sorted_indices[0]].copy()
|
|
1033
|
+
return sorted_indices[:self.saddle_order].tolist()
|
|
1034
|
+
|
|
1035
|
+
def _handle_mode_mixing(self, eigvals, eigvecs, overlaps, sorted_indices):
|
|
733
1036
|
"""
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
Parameters:
|
|
737
|
-
eigvals: numpy.ndarray - Current eigenvalues
|
|
738
|
-
eigvecs: numpy.ndarray - Current eigenvectors as column vectors
|
|
739
|
-
prev_eigvecs: numpy.ndarray - Previous eigenvectors
|
|
740
|
-
target_mode_idx: int - Index of target mode from previous step
|
|
1037
|
+
Handle mode mixing when mode overlap is poor.
|
|
741
1038
|
|
|
742
1039
|
Returns:
|
|
743
|
-
list
|
|
1040
|
+
list
|
|
1041
|
+
Selected mode indices
|
|
744
1042
|
"""
|
|
745
|
-
|
|
746
|
-
# For first step or reset, simply select by eigenvalue
|
|
747
|
-
if self.saddle_order > 0:
|
|
748
|
-
# For TS search, choose modes with most negative eigenvalues
|
|
749
|
-
sorted_idx = np.argsort(eigvals)
|
|
750
|
-
return sorted_idx[:self.saddle_order].tolist()
|
|
751
|
-
else:
|
|
752
|
-
# For minimization, no special mode
|
|
753
|
-
return []
|
|
754
|
-
|
|
755
|
-
# Calculate overlap between target mode from previous step and all current modes
|
|
756
|
-
target_vec = prev_eigvecs[:, target_mode_idx].reshape(-1, 1)
|
|
757
|
-
overlaps = np.abs(np.dot(eigvecs.T, target_vec)).flatten()
|
|
1043
|
+
significant_overlaps = np.where(overlaps > self.mixing_threshold)[0]
|
|
758
1044
|
|
|
759
|
-
|
|
760
|
-
|
|
1045
|
+
if len(significant_overlaps) == 0:
|
|
1046
|
+
self.log("No significant mode overlap - resetting mode tracking")
|
|
1047
|
+
self.ts_mode_idx = sorted_indices[0]
|
|
1048
|
+
self.ts_mode_eigvec = eigvecs[:, sorted_indices[0]].copy()
|
|
1049
|
+
return sorted_indices[:self.saddle_order].tolist()
|
|
761
1050
|
|
|
762
|
-
|
|
763
|
-
|
|
1051
|
+
weights = []
|
|
1052
|
+
for idx in significant_overlaps:
|
|
1053
|
+
overlap_weight = overlaps[idx]**2
|
|
1054
|
+
eigval_weight = 1.0 if eigvals[idx] < 0 else 0.1
|
|
1055
|
+
weights.append(overlap_weight * eigval_weight)
|
|
764
1056
|
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
return [sorted_idx[0]]
|
|
1057
|
+
best_local_idx = np.argmax(weights)
|
|
1058
|
+
best_idx = significant_overlaps[best_local_idx]
|
|
768
1059
|
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
cumulative_overlap = 0.0
|
|
1060
|
+
self.log(f"Mode mixing resolution: selected mode {best_idx} "
|
|
1061
|
+
f"(overlap={overlaps[best_idx]:.4f}, eigenvalue={eigvals[best_idx]:.6f})")
|
|
772
1062
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
1063
|
+
self.ts_mode_idx = best_idx
|
|
1064
|
+
self.ts_mode_eigvec = eigvecs[:, best_idx].copy()
|
|
1065
|
+
|
|
1066
|
+
max_indices = [best_idx]
|
|
1067
|
+
if self.saddle_order > 1:
|
|
1068
|
+
remaining = [i for i in sorted_indices if i != best_idx]
|
|
1069
|
+
max_indices.extend(remaining[:self.saddle_order - 1])
|
|
1070
|
+
|
|
1071
|
+
return max_indices
|
|
781
1072
|
|
|
782
|
-
def
|
|
1073
|
+
def _apply_maxstep_constraint(self, move_vector):
|
|
783
1074
|
"""
|
|
784
|
-
Apply
|
|
785
|
-
|
|
786
|
-
Parameters:
|
|
787
|
-
eigvals: numpy.ndarray - Current eigenvalues
|
|
788
|
-
eigvecs: numpy.ndarray - Current eigenvectors
|
|
789
|
-
gradient_trans: numpy.ndarray - Gradient in eigenvector basis
|
|
790
|
-
mode_indices: list - Indices of candidate modes
|
|
1075
|
+
Apply maximum step constraint.
|
|
791
1076
|
|
|
792
1077
|
Returns:
|
|
793
|
-
|
|
1078
|
+
tuple
|
|
1079
|
+
(constrained_move_vector, step_norm)
|
|
794
1080
|
"""
|
|
795
|
-
|
|
796
|
-
# No mode mixing, apply standard RSPRFO processing
|
|
797
|
-
return mode_indices
|
|
798
|
-
|
|
799
|
-
# For mixed modes, build a weighted mode
|
|
800
|
-
weights = np.zeros(len(eigvals))
|
|
801
|
-
total_weight = 0.0
|
|
802
|
-
|
|
803
|
-
for idx in mode_indices:
|
|
804
|
-
# Use inverse of eigenvalue as weight (keep negative values as is)
|
|
805
|
-
if eigvals[idx] < 0:
|
|
806
|
-
weights[idx] = abs(1.0 / eigvals[idx])
|
|
807
|
-
else:
|
|
808
|
-
# Small weight for positive eigenvalues
|
|
809
|
-
weights[idx] = 0.01
|
|
810
|
-
|
|
811
|
-
total_weight += weights[idx]
|
|
812
|
-
|
|
813
|
-
# Normalize weights
|
|
814
|
-
if total_weight > 0:
|
|
815
|
-
weights /= total_weight
|
|
816
|
-
|
|
817
|
-
# Calculate centroid of mixed modes
|
|
818
|
-
mixed_mode_idx = np.argmax(weights)
|
|
1081
|
+
maxstep = self.config.get("maxstep")
|
|
819
1082
|
|
|
820
|
-
|
|
821
|
-
|
|
1083
|
+
if move_vector.size % 3 == 0 and move_vector.size > 3:
|
|
1084
|
+
move_reshaped = move_vector.reshape(-1, 3)
|
|
1085
|
+
step_lengths = np.sqrt(np.sum(move_reshaped**2, axis=1))
|
|
1086
|
+
longest_step = np.max(step_lengths)
|
|
1087
|
+
else:
|
|
1088
|
+
longest_step = np.linalg.norm(move_vector)
|
|
822
1089
|
|
|
823
|
-
|
|
824
|
-
|
|
1090
|
+
if longest_step > maxstep:
|
|
1091
|
+
scale = maxstep / longest_step
|
|
1092
|
+
move_vector = move_vector * scale
|
|
1093
|
+
self.log(f"Step constrained by maxstep: {longest_step:.6f} -> {maxstep:.6f}")
|
|
1094
|
+
|
|
1095
|
+
return move_vector, np.linalg.norm(move_vector)
|
|
1096
|
+
|
|
825
1097
|
def get_augmented_hessian(self, eigenvalues, gradient_components, alpha):
|
|
826
1098
|
"""
|
|
827
|
-
Create the augmented hessian matrix for RFO calculation
|
|
1099
|
+
Create the augmented hessian matrix for RFO calculation.
|
|
828
1100
|
|
|
829
1101
|
Parameters:
|
|
830
|
-
eigenvalues: numpy.ndarray
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
1102
|
+
eigenvalues : numpy.ndarray
|
|
1103
|
+
Eigenvalues for the selected subspace
|
|
1104
|
+
gradient_components : numpy.ndarray
|
|
1105
|
+
Gradient components in the selected subspace
|
|
1106
|
+
alpha : float
|
|
1107
|
+
Alpha parameter for RS-RFO
|
|
1108
|
+
|
|
834
1109
|
Returns:
|
|
835
|
-
numpy.ndarray
|
|
1110
|
+
numpy.ndarray
|
|
1111
|
+
Augmented Hessian matrix for RFO calculation
|
|
836
1112
|
"""
|
|
837
1113
|
n = len(eigenvalues)
|
|
838
1114
|
H_aug = np.zeros((n + 1, n + 1))
|
|
839
1115
|
|
|
840
|
-
# Fill the upper-left block with eigenvalues / alpha
|
|
841
1116
|
np.fill_diagonal(H_aug[:n, :n], eigenvalues / alpha)
|
|
842
1117
|
|
|
843
|
-
# Make sure gradient_components is flattened to the right shape
|
|
844
1118
|
gradient_components = np.asarray(gradient_components).flatten()
|
|
845
1119
|
|
|
846
|
-
# Fill the upper-right and lower-left blocks with gradient components / alpha
|
|
847
1120
|
H_aug[:n, n] = gradient_components / alpha
|
|
848
1121
|
H_aug[n, :n] = gradient_components / alpha
|
|
849
1122
|
|
|
@@ -851,69 +1124,82 @@ class EnhancedRSPRFO:
|
|
|
851
1124
|
|
|
852
1125
|
def solve_rfo(self, H_aug, mode="min", prev_eigvec=None):
|
|
853
1126
|
"""
|
|
854
|
-
Solve the RFO equations to get the step
|
|
1127
|
+
Solve the RFO equations to get the step.
|
|
855
1128
|
|
|
856
1129
|
Parameters:
|
|
857
|
-
H_aug: numpy.ndarray
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1130
|
+
H_aug : numpy.ndarray
|
|
1131
|
+
Augmented Hessian matrix
|
|
1132
|
+
mode : str
|
|
1133
|
+
"min" for energy minimization, "max" for maximization
|
|
1134
|
+
prev_eigvec : numpy.ndarray
|
|
1135
|
+
Previous eigenvector for consistent direction
|
|
1136
|
+
|
|
861
1137
|
Returns:
|
|
862
|
-
tuple
|
|
1138
|
+
tuple
|
|
1139
|
+
(step, eigenvalue, nu parameter, eigenvector)
|
|
863
1140
|
"""
|
|
864
1141
|
eigvals, eigvecs = np.linalg.eigh(H_aug)
|
|
865
1142
|
|
|
866
1143
|
if mode == "min":
|
|
867
1144
|
idx = np.argmin(eigvals)
|
|
868
|
-
else:
|
|
1145
|
+
else:
|
|
869
1146
|
idx = np.argmax(eigvals)
|
|
870
|
-
|
|
871
|
-
# Check if we need to flip the eigenvector to maintain consistency
|
|
1147
|
+
|
|
872
1148
|
if prev_eigvec is not None:
|
|
873
1149
|
try:
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
1150
|
+
if prev_eigvec.shape == eigvecs[:, idx].shape:
|
|
1151
|
+
overlap = np.dot(eigvecs[:, idx], prev_eigvec)
|
|
1152
|
+
if overlap < 0:
|
|
1153
|
+
eigvecs[:, idx] *= -1
|
|
1154
|
+
except Exception:
|
|
1155
|
+
pass
|
|
1156
|
+
|
|
882
1157
|
eigval = eigvals[idx]
|
|
883
1158
|
eigvec = eigvecs[:, idx]
|
|
884
1159
|
|
|
885
|
-
# The last component is nu
|
|
886
1160
|
nu = eigvec[-1]
|
|
887
1161
|
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
nu = np.sign(nu) * max(1e-10, abs(nu))
|
|
1162
|
+
if abs(nu) < 1e-12:
|
|
1163
|
+
self.log(f"Warning: Very small nu={nu:.2e}, using safe value")
|
|
1164
|
+
nu = np.sign(nu) * 1e-12 if nu != 0 else 1e-12
|
|
892
1165
|
|
|
893
|
-
# The step is -p/nu where p are the first n components of the eigenvector
|
|
894
1166
|
step = -eigvec[:-1] / nu
|
|
895
1167
|
|
|
896
1168
|
return step, eigval, nu, eigvec
|
|
897
1169
|
|
|
898
1170
|
def rfo_model(self, gradient, hessian, step):
|
|
899
1171
|
"""
|
|
900
|
-
Estimate energy change based on RFO model
|
|
1172
|
+
Estimate energy change based on RFO model.
|
|
901
1173
|
|
|
902
1174
|
Parameters:
|
|
903
|
-
gradient: numpy.ndarray
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
1175
|
+
gradient : numpy.ndarray
|
|
1176
|
+
Energy gradient
|
|
1177
|
+
hessian : numpy.ndarray
|
|
1178
|
+
Hessian matrix
|
|
1179
|
+
step : numpy.ndarray
|
|
1180
|
+
Step vector
|
|
1181
|
+
|
|
907
1182
|
Returns:
|
|
908
|
-
float
|
|
1183
|
+
float
|
|
1184
|
+
Predicted energy change
|
|
909
1185
|
"""
|
|
910
|
-
|
|
1186
|
+
g = gradient.flatten()
|
|
1187
|
+
s = step.flatten()
|
|
1188
|
+
return np.dot(g, s) + 0.5 * np.dot(s, hessian @ s)
|
|
911
1189
|
|
|
912
1190
|
def update_hessian(self, current_geom, current_grad, previous_geom, previous_grad):
|
|
913
1191
|
"""
|
|
914
|
-
Update the Hessian using the specified update method.
|
|
915
|
-
|
|
916
|
-
|
|
1192
|
+
Update the Hessian using the specified update method with curvature checks.
|
|
1193
|
+
|
|
1194
|
+
Parameters:
|
|
1195
|
+
current_geom : numpy.ndarray
|
|
1196
|
+
Current geometry
|
|
1197
|
+
current_grad : numpy.ndarray
|
|
1198
|
+
Current gradient
|
|
1199
|
+
previous_geom : numpy.ndarray
|
|
1200
|
+
Previous geometry
|
|
1201
|
+
previous_grad : numpy.ndarray
|
|
1202
|
+
Previous gradient
|
|
917
1203
|
"""
|
|
918
1204
|
displacement = np.asarray(current_geom - previous_geom).reshape(-1, 1)
|
|
919
1205
|
delta_grad = np.asarray(current_grad - previous_grad).reshape(-1, 1)
|
|
@@ -921,136 +1207,157 @@ class EnhancedRSPRFO:
|
|
|
921
1207
|
disp_norm = np.linalg.norm(displacement)
|
|
922
1208
|
grad_diff_norm = np.linalg.norm(delta_grad)
|
|
923
1209
|
|
|
924
|
-
# This is a pre-check from the original code, kept for safety
|
|
925
1210
|
if disp_norm < 1e-10 or grad_diff_norm < 1e-10:
|
|
926
|
-
self.log("Skipping Hessian update
|
|
1211
|
+
self.log("Skipping Hessian update: changes too small")
|
|
927
1212
|
return
|
|
928
|
-
|
|
1213
|
+
|
|
929
1214
|
dot_product = np.dot(displacement.T, delta_grad)[0, 0]
|
|
930
1215
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
if dot_product <= 0:
|
|
939
|
-
self.log(f"WARNING: Forcing Hessian update despite poor alignment (dot_product={dot_product:.6f}).", force=True)
|
|
940
|
-
self.log("This may cause instability or errors in the update function.", force=True)
|
|
941
|
-
# =======================================================
|
|
942
|
-
else:
|
|
943
|
-
self.log(f"Hessian update: displacement norm={disp_norm:.6f}, gradient diff norm={grad_diff_norm:.6f}, dot product={dot_product:.6f}")
|
|
1216
|
+
curvature_ratio = dot_product / (disp_norm * grad_diff_norm)
|
|
1217
|
+
|
|
1218
|
+
self.log(f"Hessian update: |disp|={disp_norm:.6f}, |dgrad|={grad_diff_norm:.6f}, "
|
|
1219
|
+
f"dot={dot_product:.6f}, curvature_ratio={curvature_ratio:.4f}")
|
|
1220
|
+
|
|
1221
|
+
if abs(curvature_ratio) < 0.01:
|
|
1222
|
+
self.log("Warning: Very poor displacement-gradient alignment, proceeding with caution")
|
|
944
1223
|
|
|
945
1224
|
method_key_lower = self.hessian_update_method.lower()
|
|
946
1225
|
method_name, update_function = self.default_update_method
|
|
947
|
-
|
|
948
|
-
|
|
1226
|
+
|
|
949
1227
|
for key, name, func in self.updater_dispatch_list:
|
|
950
1228
|
if key in method_key_lower:
|
|
951
1229
|
method_name = name
|
|
952
1230
|
update_function = func
|
|
953
|
-
found_method = True
|
|
954
1231
|
break
|
|
955
|
-
|
|
956
|
-
if not found_method:
|
|
957
|
-
self.log(f"Unknown Hessian update method: {self.hessian_update_method}. Using auto selection.")
|
|
958
1232
|
|
|
959
|
-
self.log(f"Hessian update method: {method_name}")
|
|
1233
|
+
self.log(f"Using Hessian update method: {method_name}")
|
|
960
1234
|
|
|
961
1235
|
try:
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
)
|
|
965
|
-
self.hessian
|
|
966
|
-
|
|
967
|
-
|
|
1236
|
+
old_hessian = self.hessian.copy()
|
|
1237
|
+
|
|
1238
|
+
delta_hess = update_function(self.hessian, displacement, delta_grad)
|
|
1239
|
+
new_hessian = self.hessian + delta_hess
|
|
1240
|
+
new_hessian = 0.5 * (new_hessian + new_hessian.T)
|
|
1241
|
+
|
|
1242
|
+
new_eigvals = np.linalg.eigvalsh(new_hessian)
|
|
1243
|
+
|
|
1244
|
+
n_neg = np.sum(new_eigvals < -1e-8)
|
|
1245
|
+
max_eigval = np.max(np.abs(new_eigvals))
|
|
1246
|
+
|
|
1247
|
+
if max_eigval > 1e6:
|
|
1248
|
+
self.log(f"Warning: Updated Hessian has very large eigenvalues ({max_eigval:.2e}), "
|
|
1249
|
+
"reverting to previous Hessian")
|
|
1250
|
+
return
|
|
1251
|
+
|
|
1252
|
+
if self.saddle_order > 0 and n_neg == 0:
|
|
1253
|
+
self.log(f"Warning: No negative eigenvalues after update (expected {self.saddle_order})")
|
|
1254
|
+
|
|
1255
|
+
self.hessian = new_hessian
|
|
1256
|
+
self.log(f"Hessian updated successfully ({n_neg} negative eigenvalues)")
|
|
968
1257
|
|
|
969
1258
|
except Exception as e:
|
|
970
|
-
self.log(f"
|
|
971
|
-
self.log("
|
|
1259
|
+
self.log(f"Error in Hessian update: {e}")
|
|
1260
|
+
self.log("Keeping previous Hessian")
|
|
1261
|
+
|
|
1262
|
+
def should_update_hessian(self, displacement, delta_grad, dot_product):
|
|
1263
|
+
"""
|
|
1264
|
+
Determine whether to update the Hessian based on quality metrics.
|
|
1265
|
+
|
|
1266
|
+
Parameters:
|
|
1267
|
+
displacement : numpy.ndarray
|
|
1268
|
+
Geometry displacement vector
|
|
1269
|
+
delta_grad : numpy.ndarray
|
|
1270
|
+
Gradient difference vector
|
|
1271
|
+
dot_product : float
|
|
1272
|
+
Dot product of displacement and gradient difference
|
|
1273
|
+
|
|
1274
|
+
Returns:
|
|
1275
|
+
bool
|
|
1276
|
+
True if Hessian should be updated
|
|
1277
|
+
"""
|
|
1278
|
+
disp_norm = np.linalg.norm(displacement)
|
|
1279
|
+
grad_norm = np.linalg.norm(delta_grad)
|
|
1280
|
+
|
|
1281
|
+
if disp_norm < 1e-10 or grad_norm < 1e-10:
|
|
1282
|
+
return False
|
|
1283
|
+
|
|
1284
|
+
cos_angle = dot_product / (disp_norm * grad_norm)
|
|
1285
|
+
|
|
1286
|
+
if self.saddle_order == 0 and dot_product < 0:
|
|
1287
|
+
self.log(f"Skipping update: negative curvature in minimization (cos={cos_angle:.4f})")
|
|
1288
|
+
return False
|
|
1289
|
+
|
|
1290
|
+
if abs(cos_angle) < 0.001:
|
|
1291
|
+
self.log(f"Skipping update: nearly orthogonal vectors (cos={cos_angle:.4f})")
|
|
1292
|
+
return False
|
|
1293
|
+
|
|
1294
|
+
return True
|
|
972
1295
|
|
|
973
1296
|
def log(self, message, force=False):
|
|
974
1297
|
"""
|
|
975
|
-
Print log message if display flag is enabled
|
|
1298
|
+
Print log message if display flag is enabled.
|
|
976
1299
|
|
|
977
1300
|
Parameters:
|
|
978
|
-
message: str
|
|
979
|
-
|
|
1301
|
+
message : str
|
|
1302
|
+
Message to display
|
|
1303
|
+
force : bool
|
|
1304
|
+
If True, display message regardless of display_flag
|
|
980
1305
|
"""
|
|
981
1306
|
if self.display_flag or force:
|
|
982
1307
|
print(message)
|
|
983
1308
|
|
|
984
1309
|
def set_hessian(self, hessian):
|
|
985
1310
|
"""
|
|
986
|
-
Set the Hessian matrix
|
|
1311
|
+
Set the Hessian matrix.
|
|
987
1312
|
|
|
988
1313
|
Parameters:
|
|
989
|
-
hessian: numpy.ndarray
|
|
1314
|
+
hessian : numpy.ndarray
|
|
1315
|
+
Hessian matrix
|
|
990
1316
|
"""
|
|
991
|
-
self.hessian = hessian
|
|
992
|
-
|
|
1317
|
+
self.hessian = np.asarray(hessian).copy()
|
|
1318
|
+
self.hessian = 0.5 * (self.hessian + self.hessian.T) # Ensure symmetry
|
|
993
1319
|
|
|
994
1320
|
def set_bias_hessian(self, bias_hessian):
|
|
995
1321
|
"""
|
|
996
|
-
Set the bias Hessian matrix
|
|
1322
|
+
Set the bias Hessian matrix.
|
|
997
1323
|
|
|
998
1324
|
Parameters:
|
|
999
|
-
bias_hessian: numpy.ndarray
|
|
1325
|
+
bias_hessian : numpy.ndarray
|
|
1326
|
+
Bias Hessian matrix
|
|
1000
1327
|
"""
|
|
1001
|
-
self.bias_hessian = bias_hessian
|
|
1002
|
-
|
|
1328
|
+
self.bias_hessian = np.asarray(bias_hessian).copy()
|
|
1329
|
+
self.bias_hessian = 0.5 * (self.bias_hessian + self.bias_hessian.T)
|
|
1003
1330
|
|
|
1004
1331
|
def get_hessian(self):
|
|
1005
1332
|
"""
|
|
1006
|
-
Get the current Hessian matrix
|
|
1333
|
+
Get the current Hessian matrix.
|
|
1007
1334
|
|
|
1008
1335
|
Returns:
|
|
1009
|
-
numpy.ndarray
|
|
1336
|
+
numpy.ndarray
|
|
1337
|
+
Hessian matrix
|
|
1010
1338
|
"""
|
|
1011
1339
|
return self.hessian
|
|
1012
1340
|
|
|
1013
1341
|
def get_bias_hessian(self):
|
|
1014
1342
|
"""
|
|
1015
|
-
Get the current bias Hessian matrix
|
|
1343
|
+
Get the current bias Hessian matrix.
|
|
1016
1344
|
|
|
1017
1345
|
Returns:
|
|
1018
|
-
numpy.ndarray
|
|
1346
|
+
numpy.ndarray
|
|
1347
|
+
Bias Hessian matrix
|
|
1019
1348
|
"""
|
|
1020
1349
|
return self.bias_hessian
|
|
1021
|
-
|
|
1022
|
-
def
|
|
1023
|
-
"""
|
|
1024
|
-
Reset trust radius to its initial value
|
|
1025
|
-
"""
|
|
1026
|
-
self.trust_radius = self.trust_radius_initial
|
|
1027
|
-
self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}")
|
|
1028
|
-
|
|
1029
|
-
def set_trust_radius(self, radius):
|
|
1030
|
-
"""
|
|
1031
|
-
Manually set the trust radius
|
|
1032
|
-
|
|
1033
|
-
Parameters:
|
|
1034
|
-
radius: float - New trust radius value
|
|
1035
|
-
"""
|
|
1036
|
-
old_value = self.trust_radius
|
|
1037
|
-
self.trust_radius = max(min(radius, self.trust_radius_max), self.trust_radius_min)
|
|
1038
|
-
self.log(f"Trust radius manually set from {old_value:.6f} to {self.trust_radius:.6f}")
|
|
1039
|
-
|
|
1040
|
-
def get_reduction_ratios(self):
|
|
1350
|
+
|
|
1351
|
+
def get_shifted_hessian(self):
|
|
1041
1352
|
"""
|
|
1042
|
-
Get the
|
|
1353
|
+
Get the eigenvalue-shifted Hessian matrix.
|
|
1043
1354
|
|
|
1044
1355
|
Returns:
|
|
1045
|
-
|
|
1356
|
+
numpy.ndarray
|
|
1357
|
+
Shifted Hessian matrix (or None if not computed)
|
|
1046
1358
|
"""
|
|
1047
|
-
return self.
|
|
1359
|
+
return self.shifted_hessian
|
|
1048
1360
|
|
|
1049
|
-
def
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
Returns:
|
|
1054
|
-
list - Trust radius values for each iteration
|
|
1055
|
-
"""
|
|
1056
|
-
return self.trust_radius_history
|
|
1361
|
+
def reset_trust_radius(self):
|
|
1362
|
+
self.trust_radius = self.trust_radius_initial
|
|
1363
|
+
self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}", force=True)
|