MultiOptPy 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/Calculator/__init__.py +0 -0
- multioptpy/Calculator/ase_calculation_tools.py +424 -0
- multioptpy/Calculator/ase_tools/__init__.py +0 -0
- multioptpy/Calculator/ase_tools/fairchem.py +28 -0
- multioptpy/Calculator/ase_tools/gamess.py +19 -0
- multioptpy/Calculator/ase_tools/gaussian.py +165 -0
- multioptpy/Calculator/ase_tools/mace.py +28 -0
- multioptpy/Calculator/ase_tools/mopac.py +19 -0
- multioptpy/Calculator/ase_tools/nwchem.py +31 -0
- multioptpy/Calculator/ase_tools/orca.py +22 -0
- multioptpy/Calculator/ase_tools/pygfn0.py +37 -0
- multioptpy/Calculator/dxtb_calculation_tools.py +344 -0
- multioptpy/Calculator/emt_calculation_tools.py +458 -0
- multioptpy/Calculator/gpaw_calculation_tools.py +183 -0
- multioptpy/Calculator/lj_calculation_tools.py +314 -0
- multioptpy/Calculator/psi4_calculation_tools.py +334 -0
- multioptpy/Calculator/pwscf_calculation_tools.py +189 -0
- multioptpy/Calculator/pyscf_calculation_tools.py +327 -0
- multioptpy/Calculator/sqm1_calculation_tools.py +611 -0
- multioptpy/Calculator/sqm2_calculation_tools.py +376 -0
- multioptpy/Calculator/tblite_calculation_tools.py +352 -0
- multioptpy/Calculator/tersoff_calculation_tools.py +818 -0
- multioptpy/Constraint/__init__.py +0 -0
- multioptpy/Constraint/constraint_condition.py +834 -0
- multioptpy/Coordinate/__init__.py +0 -0
- multioptpy/Coordinate/polar_coordinate.py +199 -0
- multioptpy/Coordinate/redundant_coordinate.py +638 -0
- multioptpy/IRC/__init__.py +0 -0
- multioptpy/IRC/converge_criteria.py +28 -0
- multioptpy/IRC/dvv.py +544 -0
- multioptpy/IRC/euler.py +439 -0
- multioptpy/IRC/hpc.py +564 -0
- multioptpy/IRC/lqa.py +540 -0
- multioptpy/IRC/modekill.py +662 -0
- multioptpy/IRC/rk4.py +579 -0
- multioptpy/Interpolation/__init__.py +0 -0
- multioptpy/Interpolation/adaptive_interpolation.py +283 -0
- multioptpy/Interpolation/binomial_interpolation.py +179 -0
- multioptpy/Interpolation/geodesic_interpolation.py +785 -0
- multioptpy/Interpolation/interpolation.py +156 -0
- multioptpy/Interpolation/linear_interpolation.py +473 -0
- multioptpy/Interpolation/savitzky_golay_interpolation.py +252 -0
- multioptpy/Interpolation/spline_interpolation.py +353 -0
- multioptpy/MD/__init__.py +0 -0
- multioptpy/MD/thermostat.py +185 -0
- multioptpy/MEP/__init__.py +0 -0
- multioptpy/MEP/pathopt_bneb_force.py +443 -0
- multioptpy/MEP/pathopt_dmf_force.py +448 -0
- multioptpy/MEP/pathopt_dneb_force.py +130 -0
- multioptpy/MEP/pathopt_ewbneb_force.py +207 -0
- multioptpy/MEP/pathopt_gpneb_force.py +512 -0
- multioptpy/MEP/pathopt_lup_force.py +113 -0
- multioptpy/MEP/pathopt_neb_force.py +225 -0
- multioptpy/MEP/pathopt_nesb_force.py +205 -0
- multioptpy/MEP/pathopt_om_force.py +153 -0
- multioptpy/MEP/pathopt_qsm_force.py +174 -0
- multioptpy/MEP/pathopt_qsmv2_force.py +304 -0
- multioptpy/ModelFunction/__init__.py +7 -0
- multioptpy/ModelFunction/avoiding_model_function.py +29 -0
- multioptpy/ModelFunction/binary_image_ts_search_model_function.py +47 -0
- multioptpy/ModelFunction/conical_model_function.py +26 -0
- multioptpy/ModelFunction/opt_meci.py +50 -0
- multioptpy/ModelFunction/opt_mesx.py +47 -0
- multioptpy/ModelFunction/opt_mesx_2.py +49 -0
- multioptpy/ModelFunction/seam_model_function.py +27 -0
- multioptpy/ModelHessian/__init__.py +0 -0
- multioptpy/ModelHessian/approx_hessian.py +147 -0
- multioptpy/ModelHessian/calc_params.py +227 -0
- multioptpy/ModelHessian/fischer.py +236 -0
- multioptpy/ModelHessian/fischerd3.py +360 -0
- multioptpy/ModelHessian/fischerd4.py +398 -0
- multioptpy/ModelHessian/gfn0xtb.py +633 -0
- multioptpy/ModelHessian/gfnff.py +709 -0
- multioptpy/ModelHessian/lindh.py +165 -0
- multioptpy/ModelHessian/lindh2007d2.py +707 -0
- multioptpy/ModelHessian/lindh2007d3.py +822 -0
- multioptpy/ModelHessian/lindh2007d4.py +1030 -0
- multioptpy/ModelHessian/morse.py +106 -0
- multioptpy/ModelHessian/schlegel.py +144 -0
- multioptpy/ModelHessian/schlegeld3.py +322 -0
- multioptpy/ModelHessian/schlegeld4.py +559 -0
- multioptpy/ModelHessian/shortrange.py +346 -0
- multioptpy/ModelHessian/swartd2.py +496 -0
- multioptpy/ModelHessian/swartd3.py +706 -0
- multioptpy/ModelHessian/swartd4.py +918 -0
- multioptpy/ModelHessian/tshess.py +40 -0
- multioptpy/Optimizer/QHAdam.py +61 -0
- multioptpy/Optimizer/__init__.py +0 -0
- multioptpy/Optimizer/abc_fire.py +83 -0
- multioptpy/Optimizer/adabelief.py +58 -0
- multioptpy/Optimizer/adabound.py +68 -0
- multioptpy/Optimizer/adadelta.py +65 -0
- multioptpy/Optimizer/adaderivative.py +56 -0
- multioptpy/Optimizer/adadiff.py +68 -0
- multioptpy/Optimizer/adafactor.py +70 -0
- multioptpy/Optimizer/adam.py +65 -0
- multioptpy/Optimizer/adamax.py +62 -0
- multioptpy/Optimizer/adamod.py +83 -0
- multioptpy/Optimizer/adamw.py +65 -0
- multioptpy/Optimizer/adiis.py +523 -0
- multioptpy/Optimizer/afire_neb.py +282 -0
- multioptpy/Optimizer/block_hessian_update.py +709 -0
- multioptpy/Optimizer/c2diis.py +491 -0
- multioptpy/Optimizer/component_wise_scaling.py +405 -0
- multioptpy/Optimizer/conjugate_gradient.py +82 -0
- multioptpy/Optimizer/conjugate_gradient_neb.py +345 -0
- multioptpy/Optimizer/coordinate_locking.py +405 -0
- multioptpy/Optimizer/dic_rsirfo.py +1015 -0
- multioptpy/Optimizer/ediis.py +417 -0
- multioptpy/Optimizer/eve.py +76 -0
- multioptpy/Optimizer/fastadabelief.py +61 -0
- multioptpy/Optimizer/fire.py +77 -0
- multioptpy/Optimizer/fire2.py +249 -0
- multioptpy/Optimizer/fire_neb.py +92 -0
- multioptpy/Optimizer/gan_step.py +486 -0
- multioptpy/Optimizer/gdiis.py +609 -0
- multioptpy/Optimizer/gediis.py +203 -0
- multioptpy/Optimizer/geodesic_step.py +433 -0
- multioptpy/Optimizer/gpmin.py +633 -0
- multioptpy/Optimizer/gpr_step.py +364 -0
- multioptpy/Optimizer/gradientdescent.py +78 -0
- multioptpy/Optimizer/gradientdescent_neb.py +52 -0
- multioptpy/Optimizer/hessian_update.py +433 -0
- multioptpy/Optimizer/hybrid_rfo.py +998 -0
- multioptpy/Optimizer/kdiis.py +625 -0
- multioptpy/Optimizer/lars.py +21 -0
- multioptpy/Optimizer/lbfgs.py +253 -0
- multioptpy/Optimizer/lbfgs_neb.py +355 -0
- multioptpy/Optimizer/linesearch.py +236 -0
- multioptpy/Optimizer/lookahead.py +40 -0
- multioptpy/Optimizer/nadam.py +64 -0
- multioptpy/Optimizer/newton.py +200 -0
- multioptpy/Optimizer/prodigy.py +70 -0
- multioptpy/Optimizer/purtubation.py +16 -0
- multioptpy/Optimizer/quickmin_neb.py +245 -0
- multioptpy/Optimizer/radam.py +75 -0
- multioptpy/Optimizer/rfo_neb.py +302 -0
- multioptpy/Optimizer/ric_rfo.py +842 -0
- multioptpy/Optimizer/rl_step.py +627 -0
- multioptpy/Optimizer/rmspropgrave.py +65 -0
- multioptpy/Optimizer/rsirfo.py +1647 -0
- multioptpy/Optimizer/rsprfo.py +1056 -0
- multioptpy/Optimizer/sadam.py +60 -0
- multioptpy/Optimizer/samsgrad.py +63 -0
- multioptpy/Optimizer/tr_lbfgs.py +678 -0
- multioptpy/Optimizer/trim.py +273 -0
- multioptpy/Optimizer/trust_radius.py +207 -0
- multioptpy/Optimizer/trust_radius_neb.py +121 -0
- multioptpy/Optimizer/yogi.py +60 -0
- multioptpy/OtherMethod/__init__.py +0 -0
- multioptpy/OtherMethod/addf.py +1150 -0
- multioptpy/OtherMethod/dimer.py +895 -0
- multioptpy/OtherMethod/elastic_image_pair.py +629 -0
- multioptpy/OtherMethod/modelfunction.py +456 -0
- multioptpy/OtherMethod/newton_traj.py +454 -0
- multioptpy/OtherMethod/twopshs.py +1095 -0
- multioptpy/PESAnalyzer/__init__.py +0 -0
- multioptpy/PESAnalyzer/calc_irc_curvature.py +125 -0
- multioptpy/PESAnalyzer/cmds_analysis.py +152 -0
- multioptpy/PESAnalyzer/koopman_analysis.py +268 -0
- multioptpy/PESAnalyzer/pca_analysis.py +314 -0
- multioptpy/Parameters/__init__.py +0 -0
- multioptpy/Parameters/atomic_mass.py +20 -0
- multioptpy/Parameters/atomic_number.py +22 -0
- multioptpy/Parameters/covalent_radii.py +44 -0
- multioptpy/Parameters/d2.py +61 -0
- multioptpy/Parameters/d3.py +63 -0
- multioptpy/Parameters/d4.py +103 -0
- multioptpy/Parameters/dreiding.py +34 -0
- multioptpy/Parameters/gfn0xtb_param.py +137 -0
- multioptpy/Parameters/gfnff_param.py +315 -0
- multioptpy/Parameters/gnb.py +104 -0
- multioptpy/Parameters/parameter.py +22 -0
- multioptpy/Parameters/uff.py +72 -0
- multioptpy/Parameters/unit_values.py +20 -0
- multioptpy/Potential/AFIR_potential.py +55 -0
- multioptpy/Potential/LJ_repulsive_potential.py +345 -0
- multioptpy/Potential/__init__.py +0 -0
- multioptpy/Potential/anharmonic_keep_potential.py +28 -0
- multioptpy/Potential/asym_elllipsoidal_potential.py +718 -0
- multioptpy/Potential/electrostatic_potential.py +69 -0
- multioptpy/Potential/flux_potential.py +30 -0
- multioptpy/Potential/gaussian_potential.py +101 -0
- multioptpy/Potential/idpp.py +516 -0
- multioptpy/Potential/keep_angle_potential.py +146 -0
- multioptpy/Potential/keep_dihedral_angle_potential.py +105 -0
- multioptpy/Potential/keep_outofplain_angle_potential.py +70 -0
- multioptpy/Potential/keep_potential.py +99 -0
- multioptpy/Potential/mechano_force_potential.py +74 -0
- multioptpy/Potential/nanoreactor_potential.py +52 -0
- multioptpy/Potential/potential.py +896 -0
- multioptpy/Potential/spacer_model_potential.py +221 -0
- multioptpy/Potential/switching_potential.py +258 -0
- multioptpy/Potential/universal_potential.py +34 -0
- multioptpy/Potential/value_range_potential.py +36 -0
- multioptpy/Potential/void_point_potential.py +25 -0
- multioptpy/SQM/__init__.py +0 -0
- multioptpy/SQM/sqm1/__init__.py +0 -0
- multioptpy/SQM/sqm1/sqm1_core.py +1792 -0
- multioptpy/SQM/sqm2/__init__.py +0 -0
- multioptpy/SQM/sqm2/calc_tools.py +95 -0
- multioptpy/SQM/sqm2/sqm2_basis.py +850 -0
- multioptpy/SQM/sqm2/sqm2_bond.py +119 -0
- multioptpy/SQM/sqm2/sqm2_core.py +303 -0
- multioptpy/SQM/sqm2/sqm2_data.py +1229 -0
- multioptpy/SQM/sqm2/sqm2_disp.py +65 -0
- multioptpy/SQM/sqm2/sqm2_eeq.py +243 -0
- multioptpy/SQM/sqm2/sqm2_overlapint.py +704 -0
- multioptpy/SQM/sqm2/sqm2_qm.py +578 -0
- multioptpy/SQM/sqm2/sqm2_rep.py +66 -0
- multioptpy/SQM/sqm2/sqm2_srb.py +70 -0
- multioptpy/Thermo/__init__.py +0 -0
- multioptpy/Thermo/normal_mode_analyzer.py +865 -0
- multioptpy/Utils/__init__.py +0 -0
- multioptpy/Utils/bond_connectivity.py +264 -0
- multioptpy/Utils/calc_tools.py +884 -0
- multioptpy/Utils/oniom.py +96 -0
- multioptpy/Utils/pbc.py +48 -0
- multioptpy/Utils/riemann_curvature.py +208 -0
- multioptpy/Utils/symmetry_analyzer.py +482 -0
- multioptpy/Visualization/__init__.py +0 -0
- multioptpy/Visualization/visualization.py +156 -0
- multioptpy/WFAnalyzer/MO_analysis.py +104 -0
- multioptpy/WFAnalyzer/__init__.py +0 -0
- multioptpy/Wrapper/__init__.py +0 -0
- multioptpy/Wrapper/autots.py +1239 -0
- multioptpy/Wrapper/ieip_wrapper.py +93 -0
- multioptpy/Wrapper/md_wrapper.py +92 -0
- multioptpy/Wrapper/neb_wrapper.py +94 -0
- multioptpy/Wrapper/optimize_wrapper.py +76 -0
- multioptpy/__init__.py +5 -0
- multioptpy/entrypoints.py +916 -0
- multioptpy/fileio.py +660 -0
- multioptpy/ieip.py +340 -0
- multioptpy/interface.py +1086 -0
- multioptpy/irc.py +529 -0
- multioptpy/moleculardynamics.py +432 -0
- multioptpy/neb.py +1267 -0
- multioptpy/optimization.py +1553 -0
- multioptpy/optimizer.py +709 -0
- multioptpy-1.20.2.dist-info/METADATA +438 -0
- multioptpy-1.20.2.dist-info/RECORD +246 -0
- multioptpy-1.20.2.dist-info/WHEEL +5 -0
- multioptpy-1.20.2.dist-info/entry_points.txt +9 -0
- multioptpy-1.20.2.dist-info/licenses/LICENSE +674 -0
- multioptpy-1.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1647 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from multioptpy.Optimizer.hessian_update import ModelHessianUpdate
|
|
4
|
+
from multioptpy.Optimizer.block_hessian_update import BlockHessianUpdate
|
|
5
|
+
|
|
6
|
+
from scipy.optimize import brentq
|
|
7
|
+
from multioptpy.Utils.calc_tools import Calculationtools
|
|
8
|
+
|
|
9
|
+
class RSIRFO:
|
|
10
|
+
def __init__(self, **config):
|
|
11
|
+
"""
|
|
12
|
+
Rational Step Image-RFO (Rational Function Optimization) for transition state searches
|
|
13
|
+
|
|
14
|
+
References:
|
|
15
|
+
[1] Banerjee et al., Phys. Chem., 89, 52-57 (1985)
|
|
16
|
+
[2] Heyden et al., J. Chem. Phys., 123, 224101 (2005)
|
|
17
|
+
[3] Baker, J. Comput. Chem., 7, 385-395 (1986)
|
|
18
|
+
[4] Besalú and Bofill, Theor. Chem. Acc., 100, 265-274 (1998)
|
|
19
|
+
|
|
20
|
+
This code is made based on the below codes.
|
|
21
|
+
1, https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/TSHessianOptimizer.py
|
|
22
|
+
2, https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/RSIRFOptimizer.py
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
# Configuration parameters
|
|
26
|
+
self.alpha0 = config.get("alpha0", 1.0)
|
|
27
|
+
self.max_micro_cycles = config.get("max_micro_cycles", 40)
|
|
28
|
+
self.saddle_order = config.get("saddle_order", 1)
|
|
29
|
+
self.hessian_update_method = config.get("method", "auto")
|
|
30
|
+
self.small_eigval_thresh = config.get("small_eigval_thresh", 1e-6)
|
|
31
|
+
|
|
32
|
+
self.alpha_max = config.get("alpha_max", 1000.0)
|
|
33
|
+
self.alpha_step_max = config.get("alpha_step_max", 10.0)
|
|
34
|
+
|
|
35
|
+
# Trust radius parameters
|
|
36
|
+
if self.saddle_order == 0:
|
|
37
|
+
self.trust_radius_initial = config.get("trust_radius", 0.5)
|
|
38
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.5)
|
|
39
|
+
else:
|
|
40
|
+
self.trust_radius_initial = config.get("trust_radius", 0.1)
|
|
41
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.1)
|
|
42
|
+
|
|
43
|
+
self.trust_radius = self.trust_radius_initial
|
|
44
|
+
self.trust_radius_min = config.get("trust_radius_min", 0.01)
|
|
45
|
+
|
|
46
|
+
# Trust radius adjustment parameters
|
|
47
|
+
self.good_step_threshold = config.get("good_step_threshold", 0.75)
|
|
48
|
+
self.poor_step_threshold = config.get("poor_step_threshold", 0.25)
|
|
49
|
+
self.trust_radius_increase_factor = config.get("trust_radius_increase_factor", 1.2)
|
|
50
|
+
self.trust_radius_decrease_factor = config.get("trust_radius_decrease_factor", 0.5)
|
|
51
|
+
|
|
52
|
+
# Convergence criteria
|
|
53
|
+
self.energy_change_threshold = config.get("energy_change_threshold", 1e-6)
|
|
54
|
+
self.gradient_norm_threshold = config.get("gradient_norm_threshold", 1e-4)
|
|
55
|
+
self.step_norm_tolerance = config.get("step_norm_tolerance", 1e-3)
|
|
56
|
+
|
|
57
|
+
# Debug and display settings
|
|
58
|
+
self.debug_mode = config.get("debug_mode", False)
|
|
59
|
+
self.display_flag = config.get("display_flag", True)
|
|
60
|
+
|
|
61
|
+
# Adaptive Trust Radius Management Settings
|
|
62
|
+
self.use_adaptive_trust_radius = config.get("use_adaptive_trust_radius", True)
|
|
63
|
+
|
|
64
|
+
# === [NEW] Threshold to activate adaptive radius ===
|
|
65
|
+
# Only use adaptive trust radius if grad_norm is *below* this value
|
|
66
|
+
self.adaptive_trust_gradient_norm_threshold = config.get(
|
|
67
|
+
"adaptive_trust_gradient_norm_threshold",
|
|
68
|
+
1e-2 # Default: activate when norm is 0.01 (adjust as needed)
|
|
69
|
+
)
|
|
70
|
+
# === [END NEW] ===
|
|
71
|
+
|
|
72
|
+
self.max_curvature_factor = config.get("max_curvature_factor", 2.5)
|
|
73
|
+
self.negative_curvature_safety = config.get("negative_curvature_safety", 0.8)
|
|
74
|
+
self.min_eigenvalue_history = []
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Enable/disable level-shifting manually
|
|
78
|
+
# Default is False for conservative approach
|
|
79
|
+
self.use_level_shift = config.get("use_level_shift", False)
|
|
80
|
+
|
|
81
|
+
# Magnitude of the level shift
|
|
82
|
+
# Should be much smaller than typical eigenvalue magnitudes
|
|
83
|
+
self.level_shift_value = config.get("level_shift_value", 1e-5)
|
|
84
|
+
|
|
85
|
+
# Automatic level-shifting based on condition number
|
|
86
|
+
# Enabled by default for adaptive behavior
|
|
87
|
+
self.auto_level_shift = config.get("auto_level_shift", True)
|
|
88
|
+
|
|
89
|
+
# Threshold condition number for automatic level-shifting
|
|
90
|
+
# If condition number exceeds this, automatically apply shift
|
|
91
|
+
self.condition_number_threshold = config.get("condition_number_threshold", 1e8)
|
|
92
|
+
|
|
93
|
+
# Track whether shift was applied in current iteration
|
|
94
|
+
self.level_shift_applied = False
|
|
95
|
+
# === [END MODIFICATION] ===
|
|
96
|
+
|
|
97
|
+
# Initialize state variables
|
|
98
|
+
self.Initialization = True
|
|
99
|
+
self.hessian = None
|
|
100
|
+
self.bias_hessian = None
|
|
101
|
+
|
|
102
|
+
# For tracking optimization (using more compact storage)
|
|
103
|
+
self.prev_eigvec_min = None
|
|
104
|
+
self.prev_eigvec_size = None
|
|
105
|
+
# Only store last few changes instead of full history for memory efficiency
|
|
106
|
+
self.predicted_energy_changes = []
|
|
107
|
+
self.actual_energy_changes = []
|
|
108
|
+
self.prev_geometry = None # Will be set with numpy array reference (no deepcopy)
|
|
109
|
+
self.prev_gradient = None # Will be set with numpy array reference (no deepcopy)
|
|
110
|
+
self.prev_energy = None
|
|
111
|
+
self.converged = False
|
|
112
|
+
self.iteration = 0
|
|
113
|
+
|
|
114
|
+
# Define modes to maximize based on saddle order
|
|
115
|
+
self.roots = list(range(self.saddle_order))
|
|
116
|
+
|
|
117
|
+
# Initialize the hessian update module
|
|
118
|
+
self.hessian_updater = ModelHessianUpdate()
|
|
119
|
+
self.block_hessian_updater = BlockHessianUpdate()
|
|
120
|
+
|
|
121
|
+
# Build the prioritized list of Hessian updaters
|
|
122
|
+
self._build_hessian_updater_list()
|
|
123
|
+
|
|
124
|
+
# Initial alpha values to try - more memory efficient than np.linspace
|
|
125
|
+
self.alpha_init_values = [0.001 + (10.0 - 0.001) * i / 14 for i in range(15)]
|
|
126
|
+
self.NEB_mode = False
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_hessian_updater_list(self):
|
|
131
|
+
"""
|
|
132
|
+
Builds the prioritized dispatch list for Hessian updaters.
|
|
133
|
+
The order of this list is CRITICAL as it mimics the original
|
|
134
|
+
if/elif chain (most specific matches must come first).
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
# Define the default (fallback) method
|
|
138
|
+
# We store this tuple (name, function)
|
|
139
|
+
self.default_update_method = (
|
|
140
|
+
"auto (default)",
|
|
141
|
+
lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# List of (substring_key, display_name, function) tuples
|
|
145
|
+
# The order MUST match the original if/elif logic exactly.
|
|
146
|
+
self.updater_dispatch_list = [
|
|
147
|
+
# (key to check with 'in', name for logging, function to call)
|
|
148
|
+
|
|
149
|
+
("flowchart", "flowchart", lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")),
|
|
150
|
+
|
|
151
|
+
# --- Block methods (most specific first) ---
|
|
152
|
+
("block_cfd_fsb_dd", "block_cfd_fsb_dd", self.block_hessian_updater.block_CFD_FSB_hessian_update_dd),
|
|
153
|
+
("block_cfd_fsb_weighted", "block_cfd_fsb_weighted", self.block_hessian_updater.block_CFD_FSB_hessian_update_weighted),
|
|
154
|
+
("block_cfd_fsb", "block_cfd_fsb", self.block_hessian_updater.block_CFD_FSB_hessian_update),
|
|
155
|
+
|
|
156
|
+
("block_cfd_bofill_weighted", "block_cfd_bofill_weighted", self.block_hessian_updater.block_CFD_Bofill_hessian_update_weighted),
|
|
157
|
+
("block_cfd_bofill", "block_cfd_bofill", self.block_hessian_updater.block_CFD_Bofill_hessian_update),
|
|
158
|
+
|
|
159
|
+
("block_bfgs_dd", "block_bfgs_dd", self.block_hessian_updater.block_BFGS_hessian_update_dd),
|
|
160
|
+
("block_bfgs", "block_bfgs", self.block_hessian_updater.block_BFGS_hessian_update),
|
|
161
|
+
|
|
162
|
+
("block_fsb_dd", "block_fsb_dd", self.block_hessian_updater.block_FSB_hessian_update_dd),
|
|
163
|
+
("block_fsb_weighted", "block_fsb_weighted", self.block_hessian_updater.block_FSB_hessian_update_weighted),
|
|
164
|
+
("block_fsb", "block_fsb", self.block_hessian_updater.block_FSB_hessian_update),
|
|
165
|
+
|
|
166
|
+
("block_bofill_weighted", "block_bofill_weighted", self.block_hessian_updater.block_Bofill_hessian_update_weighted),
|
|
167
|
+
("block_bofill", "block_bofill", self.block_hessian_updater.block_Bofill_hessian_update),
|
|
168
|
+
|
|
169
|
+
# --- Standard methods (specific first) ---
|
|
170
|
+
("bfgs_dd", "bfgs_dd", self.hessian_updater.BFGS_hessian_update_dd),
|
|
171
|
+
("bfgs", "bfgs", self.hessian_updater.BFGS_hessian_update),
|
|
172
|
+
|
|
173
|
+
("sr1", "sr1", self.hessian_updater.SR1_hessian_update),
|
|
174
|
+
|
|
175
|
+
("pcfd_bofill", "pcfd_bofill", self.hessian_updater.pCFD_Bofill_hessian_update),
|
|
176
|
+
|
|
177
|
+
("cfd_fsb_dd", "cfd_fsb_dd", self.hessian_updater.CFD_FSB_hessian_update_dd),
|
|
178
|
+
("cfd_fsb", "cfd_fsb", self.hessian_updater.CFD_FSB_hessian_update),
|
|
179
|
+
|
|
180
|
+
("cfd_bofill", "cfd_bofill", self.hessian_updater.CFD_Bofill_hessian_update),
|
|
181
|
+
|
|
182
|
+
("fsb_dd", "fsb_dd", self.hessian_updater.FSB_hessian_update_dd),
|
|
183
|
+
("fsb", "fsb", self.hessian_updater.FSB_hessian_update),
|
|
184
|
+
|
|
185
|
+
("bofill", "bofill", self.hessian_updater.Bofill_hessian_update),
|
|
186
|
+
|
|
187
|
+
("psb", "psb", self.hessian_updater.PSB_hessian_update),
|
|
188
|
+
("msp", "msp", self.hessian_updater.MSP_hessian_update),
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def switch_NEB_mode(self):
|
|
193
|
+
if self.NEB_mode:
|
|
194
|
+
self.NEB_mode = False
|
|
195
|
+
else:
|
|
196
|
+
self.NEB_mode = True
|
|
197
|
+
|
|
198
|
+
def log(self, message, force=False):
|
|
199
|
+
"""Print message if display flag is enabled and either force is True or in debug mode"""
|
|
200
|
+
if self.display_flag and (force or self.debug_mode):
|
|
201
|
+
print(message)
|
|
202
|
+
|
|
203
|
+
def filter_small_eigvals(self, eigvals, eigvecs, mask=False):
|
|
204
|
+
"""Remove small eigenvalues and corresponding eigenvectors from the Hessian"""
|
|
205
|
+
small_inds = np.abs(eigvals) < self.small_eigval_thresh
|
|
206
|
+
small_num = np.sum(small_inds)
|
|
207
|
+
|
|
208
|
+
if small_num > 0:
|
|
209
|
+
self.log(f"Found {small_num} small eigenvalues in Hessian. Removed corresponding eigenvalues and eigenvectors.")
|
|
210
|
+
|
|
211
|
+
filtered_eigvals = eigvals[~small_inds]
|
|
212
|
+
filtered_eigvecs = eigvecs[:, ~small_inds]
|
|
213
|
+
|
|
214
|
+
if small_num > 6:
|
|
215
|
+
self.log(f"Warning: Found {small_num} small eigenvalues, which is more than expected. "
|
|
216
|
+
"This may indicate numerical issues. Proceeding with caution.", force=True)
|
|
217
|
+
|
|
218
|
+
if mask:
|
|
219
|
+
return filtered_eigvals, filtered_eigvecs, small_inds
|
|
220
|
+
else:
|
|
221
|
+
return filtered_eigvals, filtered_eigvecs
|
|
222
|
+
|
|
223
|
+
def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0, pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
|
|
224
|
+
"""Execute one step of RS-I-RFO optimization"""
|
|
225
|
+
# Print iteration header
|
|
226
|
+
self.log(f"\n{'='*50}\nRS-I-RFO Iteration {self.iteration}\n{'='*50}", force=True)
|
|
227
|
+
|
|
228
|
+
# Initialize on first call
|
|
229
|
+
if self.Initialization:
|
|
230
|
+
self.prev_eigvec_min = None
|
|
231
|
+
self.prev_eigvec_size = None
|
|
232
|
+
self.predicted_energy_changes = []
|
|
233
|
+
self.actual_energy_changes = []
|
|
234
|
+
self.prev_geometry = None
|
|
235
|
+
self.prev_gradient = None
|
|
236
|
+
self.prev_energy = None
|
|
237
|
+
self.converged = False
|
|
238
|
+
self.iteration = 0
|
|
239
|
+
self.Initialization = False
|
|
240
|
+
|
|
241
|
+
# Check if hessian is set
|
|
242
|
+
if self.hessian is None:
|
|
243
|
+
raise ValueError("Hessian matrix must be set before running optimization")
|
|
244
|
+
|
|
245
|
+
# Update Hessian if we have previous geometry and gradient information
|
|
246
|
+
if self.prev_geometry is not None and self.prev_gradient is not None and len(pre_g) > 0 and len(pre_geom) > 0:
|
|
247
|
+
self.update_hessian(geom_num_list, g, pre_geom, pre_g)
|
|
248
|
+
|
|
249
|
+
# Check for convergence based on gradient
|
|
250
|
+
gradient_norm = np.linalg.norm(B_g)
|
|
251
|
+
self.log(f"Gradient norm: {gradient_norm:.6f}", force=True)
|
|
252
|
+
|
|
253
|
+
if gradient_norm < self.gradient_norm_threshold:
|
|
254
|
+
self.log(f"Converged: Gradient norm {gradient_norm:.6f} below threshold {self.gradient_norm_threshold:.6f}", force=True)
|
|
255
|
+
self.converged = True
|
|
256
|
+
|
|
257
|
+
# Check for convergence based on energy change
|
|
258
|
+
if self.actual_energy_changes:
|
|
259
|
+
last_energy_change = abs(self.actual_energy_changes[-1])
|
|
260
|
+
if last_energy_change < self.energy_change_threshold:
|
|
261
|
+
self.log(f"Converged: Energy change {last_energy_change:.6f} below threshold {self.energy_change_threshold:.6f}", force=True)
|
|
262
|
+
self.converged = True
|
|
263
|
+
|
|
264
|
+
# Store current energy
|
|
265
|
+
current_energy = B_e
|
|
266
|
+
|
|
267
|
+
# Ensure gradient is properly shaped as a 1D array (reuse existing array without copy)
|
|
268
|
+
gradient = np.asarray(B_g).ravel()
|
|
269
|
+
|
|
270
|
+
# Use effective Hessian
|
|
271
|
+
tmp_hess = self.hessian
|
|
272
|
+
if self.bias_hessian is not None:
|
|
273
|
+
# Add bias_hessian directly to H - avoid creating intermediate matrix
|
|
274
|
+
#print("Adding bias_hessian to hessian")
|
|
275
|
+
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(tmp_hess + self.bias_hessian, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), False)
|
|
276
|
+
else:
|
|
277
|
+
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(tmp_hess, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), False)
|
|
278
|
+
|
|
279
|
+
# === [MODIFIED] First eigendecomposition: Full Hessian H ===
|
|
280
|
+
H = 0.5 * (H + H.T) # Ensure symmetry
|
|
281
|
+
# Use new method that applies/removes shift for numerical stability
|
|
282
|
+
eigvals, eigvecs = self.compute_eigendecomposition_with_shift(H)
|
|
283
|
+
|
|
284
|
+
# Always check conditioning (provides useful diagnostic information)
|
|
285
|
+
condition_number, is_ill_conditioned = self.check_hessian_conditioning(eigvals)
|
|
286
|
+
print(f"Condition number of Hessian: {condition_number:.2f}, Ill-conditioned: {is_ill_conditioned}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# Trust Radius Adjustment (Moved here to use eigenvalues)
|
|
290
|
+
if not self.Initialization:
|
|
291
|
+
if self.prev_energy is not None:
|
|
292
|
+
actual_energy_change = B_e - self.prev_energy
|
|
293
|
+
|
|
294
|
+
# Keep limited history
|
|
295
|
+
if len(self.actual_energy_changes) >= 3:
|
|
296
|
+
self.actual_energy_changes.pop(0)
|
|
297
|
+
self.actual_energy_changes.append(actual_energy_change)
|
|
298
|
+
|
|
299
|
+
if self.predicted_energy_changes:
|
|
300
|
+
# Pass the minimum eigenvalue (which is the first one after eigh)
|
|
301
|
+
min_eigval = eigvals[0] if len(eigvals) > 0 else None
|
|
302
|
+
self.adjust_trust_radius(
|
|
303
|
+
actual_energy_change,
|
|
304
|
+
self.predicted_energy_changes[-1],
|
|
305
|
+
min_eigval, # Pass minimum eigenvalue
|
|
306
|
+
gradient_norm # === [MODIFIED] Pass gradient norm ===
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Count negative eigenvalues for diagnostic purposes
|
|
310
|
+
neg_eigvals = np.sum(eigvals < -1e-10)
|
|
311
|
+
self.log(f"Found {neg_eigvals} negative eigenvalues (target for saddle order: {self.saddle_order})", force=True)
|
|
312
|
+
|
|
313
|
+
# Create the projection matrix for RS-I-RFO
|
|
314
|
+
self.log(f"Using projection to construct image potential gradient and hessian for root(s) {self.roots}.")
|
|
315
|
+
|
|
316
|
+
# More efficient projection matrix construction for multiple roots
|
|
317
|
+
P = np.eye(gradient.size)
|
|
318
|
+
root_num = 0
|
|
319
|
+
i = 0
|
|
320
|
+
while root_num < len(self.roots):
|
|
321
|
+
if np.abs(eigvals[i]) > 1e-10:
|
|
322
|
+
# Extract the eigenvector once
|
|
323
|
+
trans_vec = eigvecs[:, i]
|
|
324
|
+
# Use inplace operation to update P (avoid new allocation)
|
|
325
|
+
if self.NEB_mode:
|
|
326
|
+
P -= np.outer(trans_vec, trans_vec)
|
|
327
|
+
else:
|
|
328
|
+
P -= 2 * np.outer(trans_vec, trans_vec)
|
|
329
|
+
root_num += 1
|
|
330
|
+
i += 1
|
|
331
|
+
# Create the image Hessian H_star and image gradient grad_star
|
|
332
|
+
H_star = np.dot(P, H)
|
|
333
|
+
H_star = 0.5 * (H_star + H_star.T) # Symmetrize the Hessian
|
|
334
|
+
grad_star = np.dot(P, gradient)
|
|
335
|
+
|
|
336
|
+
eigvals_star, eigvecs_star = self.compute_eigendecomposition_with_shift(H_star)
|
|
337
|
+
|
|
338
|
+
# === Apply existing small eigenvalue filter ===
|
|
339
|
+
# This is INDEPENDENT of level-shifting.
|
|
340
|
+
# Level-shifting affects numerical stability during computation.
|
|
341
|
+
# This filtering affects which eigenvalues are used in optimization.
|
|
342
|
+
eigvals_star, eigvecs_star = self.filter_small_eigvals(eigvals_star, eigvecs_star)
|
|
343
|
+
|
|
344
|
+
# Remember the size of the eigenvalue/vector arrays after filtering
|
|
345
|
+
current_eigvec_size = eigvecs_star.shape[1]
|
|
346
|
+
self.log(f"Using {current_eigvec_size} eigenvalues/vectors after filtering")
|
|
347
|
+
|
|
348
|
+
# Reset previous eigenvector if dimensions don't match
|
|
349
|
+
if self.prev_eigvec_size is not None and self.prev_eigvec_size != current_eigvec_size:
|
|
350
|
+
self.log(f"Resetting previous eigenvector due to dimension change: "
|
|
351
|
+
f"{self.prev_eigvec_size} → {current_eigvec_size}")
|
|
352
|
+
self.prev_eigvec_min = None
|
|
353
|
+
|
|
354
|
+
# Get the RS step using the image Hessian and gradient
|
|
355
|
+
move_vector = self.get_rs_step(eigvals_star, eigvecs_star, grad_star)
|
|
356
|
+
|
|
357
|
+
# Update prev_eigvec_size for next iteration
|
|
358
|
+
self.prev_eigvec_size = current_eigvec_size
|
|
359
|
+
|
|
360
|
+
# Calculate predicted energy change
|
|
361
|
+
predicted_energy_change = self.rfo_model(gradient, H, move_vector)
|
|
362
|
+
|
|
363
|
+
# Keep limited history - only store the last few values
|
|
364
|
+
if len(self.predicted_energy_changes) >= 3:
|
|
365
|
+
self.predicted_energy_changes.pop(0)
|
|
366
|
+
self.predicted_energy_changes.append(predicted_energy_change)
|
|
367
|
+
|
|
368
|
+
self.log(f"Predicted energy change: {predicted_energy_change:.6f}", force=True)
|
|
369
|
+
|
|
370
|
+
# Evaluate step quality if we have history
|
|
371
|
+
if self.actual_energy_changes and len(self.predicted_energy_changes) > 1:
|
|
372
|
+
self.evaluate_step_quality()
|
|
373
|
+
|
|
374
|
+
# Store current geometry, gradient and energy for next iteration (no deep copy)
|
|
375
|
+
self.prev_geometry = geom_num_list
|
|
376
|
+
self.prev_gradient = B_g
|
|
377
|
+
self.prev_energy = current_energy
|
|
378
|
+
|
|
379
|
+
# Increment iteration counter
|
|
380
|
+
self.iteration += 1
|
|
381
|
+
|
|
382
|
+
return -1 * move_vector.reshape(-1, 1)
|
|
383
|
+
|
|
384
|
+
def check_hessian_conditioning(self, eigvals):
|
|
385
|
+
"""
|
|
386
|
+
Check the condition number of the Hessian.
|
|
387
|
+
|
|
388
|
+
The condition number κ = |λ_max| / |λ_min| indicates how ill-conditioned
|
|
389
|
+
the matrix is. Large condition numbers suggest numerical instability.
|
|
390
|
+
|
|
391
|
+
This method filters out near-zero eigenvalues (likely from projected-out
|
|
392
|
+
modes like translation/rotation) before computing the condition number.
|
|
393
|
+
|
|
394
|
+
Parameters:
|
|
395
|
+
eigvals: np.ndarray
|
|
396
|
+
Eigenvalues of the Hessian (sorted in ascending order)
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
condition_number: float or None
|
|
400
|
+
Condition number of the Hessian, or None if cannot be computed
|
|
401
|
+
is_ill_conditioned: bool
|
|
402
|
+
True if Hessian is considered ill-conditioned
|
|
403
|
+
"""
|
|
404
|
+
if len(eigvals) < 2:
|
|
405
|
+
self.log("Warning: Too few eigenvalues to compute condition number", force=True)
|
|
406
|
+
return None, False
|
|
407
|
+
|
|
408
|
+
# Filter eigenvalues: exclude those near zero
|
|
409
|
+
# These are typically translation/rotation modes that were projected out
|
|
410
|
+
# Note: This is different from filter_small_eigvals() which filters after all processing
|
|
411
|
+
nonzero_mask = np.abs(eigvals) > 1e-10
|
|
412
|
+
nonzero_eigvals = eigvals[nonzero_mask]
|
|
413
|
+
|
|
414
|
+
if len(nonzero_eigvals) < 2:
|
|
415
|
+
self.log("Warning: Insufficient non-zero eigenvalues for condition number", force=True)
|
|
416
|
+
return None, True # Likely ill-conditioned
|
|
417
|
+
|
|
418
|
+
# Condition number = |λ_max| / |λ_min| among non-zero eigenvalues
|
|
419
|
+
max_abs_eigval = np.max(np.abs(nonzero_eigvals))
|
|
420
|
+
min_abs_eigval = np.min(np.abs(nonzero_eigvals))
|
|
421
|
+
|
|
422
|
+
if min_abs_eigval < 1e-15:
|
|
423
|
+
self.log("Warning: Extremely small minimum eigenvalue detected", force=True)
|
|
424
|
+
return None, True
|
|
425
|
+
|
|
426
|
+
condition_number = max_abs_eigval / min_abs_eigval
|
|
427
|
+
|
|
428
|
+
# Classify conditioning
|
|
429
|
+
is_ill_conditioned = condition_number > self.condition_number_threshold
|
|
430
|
+
|
|
431
|
+
# Diagnostic output
|
|
432
|
+
if condition_number > 1e10:
|
|
433
|
+
self.log(f"WARNING: Hessian is severely ill-conditioned (κ={condition_number:.2e})", force=True)
|
|
434
|
+
if not self.use_level_shift and not self.auto_level_shift:
|
|
435
|
+
self.log(" Suggestion: Enable auto_level_shift=True for better stability", force=True)
|
|
436
|
+
elif condition_number > 1e8:
|
|
437
|
+
self.log(f"CAUTION: Hessian is ill-conditioned (κ={condition_number:.2e})", force=True)
|
|
438
|
+
elif condition_number > 1e6:
|
|
439
|
+
self.log(f"Hessian condition number is moderate (κ={condition_number:.2e})")
|
|
440
|
+
else:
|
|
441
|
+
self.log(f"Hessian is well-conditioned (κ={condition_number:.2e})")
|
|
442
|
+
|
|
443
|
+
return condition_number, is_ill_conditioned
|
|
444
|
+
|
|
445
|
+
def compute_eigendecomposition_with_shift(self, H):
|
|
446
|
+
"""
|
|
447
|
+
Compute eigenvalue decomposition with optional level-shifting.
|
|
448
|
+
|
|
449
|
+
Level-shifting temporarily improves numerical conditioning during the
|
|
450
|
+
eigenvalue computation by adding a uniform shift to all diagonal elements:
|
|
451
|
+
H_shifted = H + shift * I
|
|
452
|
+
|
|
453
|
+
The shift is removed from eigenvalues afterward, so the returned eigenvalues
|
|
454
|
+
are identical to those from standard eigendecomposition (in exact arithmetic).
|
|
455
|
+
|
|
456
|
+
Key properties:
|
|
457
|
+
- Eigenvalues: λ_shifted = λ + shift, so λ = λ_shifted - shift
|
|
458
|
+
- Eigenvectors: Unchanged by uniform shift
|
|
459
|
+
- Numerical stability: Improved during computation
|
|
460
|
+
- Final result: Same as non-shifted (after shift removal)
|
|
461
|
+
|
|
462
|
+
This is fully compatible with subsequent filter_small_eigvals():
|
|
463
|
+
Workflow: shift → compute → remove shift → filter small eigenvalues
|
|
464
|
+
|
|
465
|
+
The method works for ALL saddle orders:
|
|
466
|
+
- saddle_order = 0: Shift improves positive eigenvalue conditioning
|
|
467
|
+
- saddle_order > 0: Shift improves conditioning without affecting negative eigenvalues
|
|
468
|
+
(negative eigenvalues remain negative after shift removal)
|
|
469
|
+
|
|
470
|
+
Parameters:
|
|
471
|
+
H: np.ndarray
|
|
472
|
+
Hessian matrix (symmetric, n×n)
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
eigvals: np.ndarray
|
|
476
|
+
Eigenvalues (with shift removed, identical to original)
|
|
477
|
+
eigvecs: np.ndarray
|
|
478
|
+
Eigenvectors (unchanged by shift)
|
|
479
|
+
"""
|
|
480
|
+
n = H.shape[0]
|
|
481
|
+
self.level_shift_applied = False
|
|
482
|
+
|
|
483
|
+
# === Decide whether to apply level-shifting ===
|
|
484
|
+
apply_shift = False
|
|
485
|
+
shift_reason = ""
|
|
486
|
+
eigvals_check = None # To store results from auto-check
|
|
487
|
+
eigvecs_check = None
|
|
488
|
+
|
|
489
|
+
if self.use_level_shift:
|
|
490
|
+
# User explicitly requested level-shifting
|
|
491
|
+
apply_shift = True
|
|
492
|
+
shift_reason = "user-enabled"
|
|
493
|
+
|
|
494
|
+
elif self.auto_level_shift:
|
|
495
|
+
# Automatic level-shifting based on condition number
|
|
496
|
+
try:
|
|
497
|
+
# Quick eigendecomposition to check conditioning
|
|
498
|
+
eigvals_check, eigvecs_check = np.linalg.eigh(H)
|
|
499
|
+
condition_number, is_ill_conditioned = self.check_hessian_conditioning(eigvals_check)
|
|
500
|
+
|
|
501
|
+
if is_ill_conditioned:
|
|
502
|
+
apply_shift = True
|
|
503
|
+
shift_reason = f"auto (κ={condition_number:.2e})"
|
|
504
|
+
self.log(f"Auto level-shifting triggered: κ={condition_number:.2e} > threshold={self.condition_number_threshold:.2e}", force=True)
|
|
505
|
+
except Exception as e:
|
|
506
|
+
self.log(f"Could not check condition number for auto level-shift: {e}")
|
|
507
|
+
apply_shift = False
|
|
508
|
+
|
|
509
|
+
# === Perform eigendecomposition ===
|
|
510
|
+
if apply_shift:
|
|
511
|
+
shift = self.level_shift_value
|
|
512
|
+
self.log(f"Applying level shift: {shift:.2e} ({shift_reason})", force=True)
|
|
513
|
+
|
|
514
|
+
# Add uniform shift to all diagonal elements
|
|
515
|
+
H_shifted = H + shift * np.eye(n)
|
|
516
|
+
|
|
517
|
+
# Eigendecomposition of shifted matrix
|
|
518
|
+
eigvals_shifted, eigvecs = np.linalg.eigh(H_shifted)
|
|
519
|
+
|
|
520
|
+
# Remove shift to restore original eigenvalues
|
|
521
|
+
eigvals = eigvals_shifted - shift
|
|
522
|
+
|
|
523
|
+
self.level_shift_applied = True
|
|
524
|
+
|
|
525
|
+
# Diagnostic output
|
|
526
|
+
if self.debug_mode:
|
|
527
|
+
self.log(f" Eigenvalue range (original): [{eigvals[0]:.6e}, {eigvals[-1]:.6e}]")
|
|
528
|
+
self.log(f" Eigenvalue range (shifted): [{eigvals_shifted[0]:.6e}, {eigvals_shifted[-1]:.6e}]")
|
|
529
|
+
self.log(f" Eigenvalue range (after removal): [{eigvals[0]:.6e}, {eigvals[-1]:.6e}]")
|
|
530
|
+
self.log(f" Note: Small eigenvalue filtering (if any) will be applied separately by filter_small_eigvals()")
|
|
531
|
+
else:
|
|
532
|
+
self.log(f" Level shift applied during computation and removed from eigenvalues")
|
|
533
|
+
self.log(f" Final eigenvalues are identical to non-shifted computation")
|
|
534
|
+
|
|
535
|
+
else:
|
|
536
|
+
# Standard eigendecomposition without shift
|
|
537
|
+
# Check if we already computed it during the auto-check
|
|
538
|
+
if eigvals_check is not None:
|
|
539
|
+
self.log("No level shift applied (auto-check passed)")
|
|
540
|
+
eigvals, eigvecs = eigvals_check, eigvecs_check
|
|
541
|
+
else:
|
|
542
|
+
# Both use_level_shift and auto_level_shift were False
|
|
543
|
+
self.log("No level shift applied (disabled)")
|
|
544
|
+
eigvals, eigvecs = np.linalg.eigh(H)
|
|
545
|
+
|
|
546
|
+
if self.debug_mode and not (eigvals_check is not None):
|
|
547
|
+
self.log(f"No level shift applied")
|
|
548
|
+
|
|
549
|
+
return eigvals, eigvecs
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def adjust_trust_radius_adaptive(self, actual_change, predicted_change, min_eigenvalue):
|
|
553
|
+
"""
|
|
554
|
+
Adaptive trust radius update.
|
|
555
|
+
Adjusts the trust radius considering Hessian curvature information (minimum eigenvalue).
|
|
556
|
+
|
|
557
|
+
Parameters:
|
|
558
|
+
actual_change: float
|
|
559
|
+
Actual energy change (current energy - previous energy)
|
|
560
|
+
predicted_change: float
|
|
561
|
+
Predicted energy change from the RFO model
|
|
562
|
+
min_eigenvalue: float
|
|
563
|
+
Minimum eigenvalue of the Hessian (curvature information)
|
|
564
|
+
- Positive value: curvature in the minimization direction
|
|
565
|
+
- Negative value: curvature in the maximization/saddle point direction
|
|
566
|
+
"""
|
|
567
|
+
# Skip if predicted change is too small
|
|
568
|
+
if abs(predicted_change) < 1e-10:
|
|
569
|
+
self.log("Skipping trust radius update: predicted change too small")
|
|
570
|
+
return
|
|
571
|
+
|
|
572
|
+
# === Step 1: Evaluate prediction accuracy ===
|
|
573
|
+
# ratio = actual change / predicted change
|
|
574
|
+
# Ideally ratio ≈ 1.0 (prediction is accurate)
|
|
575
|
+
ratio = actual_change / predicted_change
|
|
576
|
+
|
|
577
|
+
self.log(f"Step quality: actual={actual_change:.6e}, predicted={predicted_change:.6e}, ratio={ratio:.3f}")
|
|
578
|
+
|
|
579
|
+
# === Step 2: Calculate adjustment factor based on curvature ===
|
|
580
|
+
# Large curvature (steep) → small step is appropriate → smaller factor
|
|
581
|
+
# Small curvature (flat) → large step is safe → larger factor
|
|
582
|
+
|
|
583
|
+
abs_eigenvalue = abs(min_eigenvalue)
|
|
584
|
+
|
|
585
|
+
if abs_eigenvalue > 1e-6:
|
|
586
|
+
# When curvature is clearly present
|
|
587
|
+
# Use relationship like curvature_factor = 1 / sqrt(|λ_min|)
|
|
588
|
+
# But set an upper limit
|
|
589
|
+
curvature_factor = min(
|
|
590
|
+
self.max_curvature_factor,
|
|
591
|
+
1.0 / max(abs_eigenvalue, 0.1)
|
|
592
|
+
)
|
|
593
|
+
else:
|
|
594
|
+
# When curvature is nearly zero (very flat)
|
|
595
|
+
# Allow larger steps
|
|
596
|
+
curvature_factor = 1.5
|
|
597
|
+
|
|
598
|
+
# === Step 3: Additional adjustment for transition state searches ===
|
|
599
|
+
if self.saddle_order > 0:
|
|
600
|
+
if min_eigenvalue < -1e-6:
|
|
601
|
+
# Negative curvature direction (reaction coordinate of transition state)
|
|
602
|
+
# Adjust step size more carefully
|
|
603
|
+
curvature_factor *= self.negative_curvature_safety
|
|
604
|
+
self.log(f"Negative curvature detected (λ_min={min_eigenvalue:.6e}), "
|
|
605
|
+
f"applying safety factor {self.negative_curvature_safety}")
|
|
606
|
+
elif min_eigenvalue > 1e-6:
|
|
607
|
+
# If minimum is positive curvature, may have crossed the transition state
|
|
608
|
+
self.log(f"Warning: Positive minimum eigenvalue (λ_min={min_eigenvalue:.6e}) "
|
|
609
|
+
f"in transition state search", force=True)
|
|
610
|
+
|
|
611
|
+
# === Step 4: Trust radius adjustment based on ratio ===
|
|
612
|
+
old_trust_radius = self.trust_radius
|
|
613
|
+
|
|
614
|
+
if ratio > 0.75:
|
|
615
|
+
# === Excellent prediction accuracy (ratio > 0.75) ===
|
|
616
|
+
# Model is very accurate → aggressively increase
|
|
617
|
+
increase_factor = 1.5 * curvature_factor
|
|
618
|
+
# Set upper limit (don't change too drastically at once)
|
|
619
|
+
increase_factor = min(increase_factor, self.max_curvature_factor)
|
|
620
|
+
|
|
621
|
+
self.trust_radius = min(
|
|
622
|
+
self.trust_radius * increase_factor,
|
|
623
|
+
self.trust_radius_max
|
|
624
|
+
)
|
|
625
|
+
status = "excellent"
|
|
626
|
+
|
|
627
|
+
elif ratio > 0.5:
|
|
628
|
+
# === Good prediction accuracy (0.5 < ratio ≤ 0.75) ===
|
|
629
|
+
# Model is generally accurate → gradually increase
|
|
630
|
+
increase_factor = 1.1 * curvature_factor
|
|
631
|
+
increase_factor = min(increase_factor, 1.5)
|
|
632
|
+
|
|
633
|
+
self.trust_radius = min(
|
|
634
|
+
self.trust_radius * increase_factor,
|
|
635
|
+
self.trust_radius_max
|
|
636
|
+
)
|
|
637
|
+
status = "good"
|
|
638
|
+
|
|
639
|
+
elif ratio > 0.25:
|
|
640
|
+
# === Acceptable prediction accuracy (0.25 < ratio ≤ 0.5) ===
|
|
641
|
+
# Model accuracy is moderate
|
|
642
|
+
|
|
643
|
+
if curvature_factor > 1.2:
|
|
644
|
+
# Flat region (small curvature) → try increasing slightly
|
|
645
|
+
self.trust_radius = min(
|
|
646
|
+
self.trust_radius * 1.05,
|
|
647
|
+
self.trust_radius_max
|
|
648
|
+
)
|
|
649
|
+
status = "acceptable (expanding slowly)"
|
|
650
|
+
else:
|
|
651
|
+
# Steep region or normal curvature → maintain
|
|
652
|
+
status = "acceptable (maintaining)"
|
|
653
|
+
|
|
654
|
+
elif ratio > 0.1:
|
|
655
|
+
# === Poor prediction (0.1 < ratio ≤ 0.25) ===
|
|
656
|
+
# Model accuracy is low → decrease
|
|
657
|
+
self.trust_radius = max(
|
|
658
|
+
self.trust_radius * 0.5,
|
|
659
|
+
self.trust_radius_min
|
|
660
|
+
)
|
|
661
|
+
status = "poor"
|
|
662
|
+
|
|
663
|
+
else:
|
|
664
|
+
# === Very poor prediction (ratio ≤ 0.1 or ratio < 0) ===
|
|
665
|
+
# Model is completely inaccurate, or energy increased → drastically decrease
|
|
666
|
+
self.trust_radius = max(
|
|
667
|
+
self.trust_radius * 0.25,
|
|
668
|
+
self.trust_radius_min
|
|
669
|
+
)
|
|
670
|
+
status = "very poor"
|
|
671
|
+
|
|
672
|
+
# === Step 5: Boundary check ===
|
|
673
|
+
self.trust_radius = np.clip(
|
|
674
|
+
self.trust_radius,
|
|
675
|
+
self.trust_radius_min,
|
|
676
|
+
self.trust_radius_max
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
# === Step 6: Log output ===
|
|
680
|
+
if self.trust_radius != old_trust_radius:
|
|
681
|
+
self.log(
|
|
682
|
+
f"Trust radius adjusted: {old_trust_radius:.6f} → {self.trust_radius:.6f}",
|
|
683
|
+
force=True
|
|
684
|
+
)
|
|
685
|
+
self.log(
|
|
686
|
+
f" Reason: ratio={ratio:.3f}, curvature_factor={curvature_factor:.3f}, "
|
|
687
|
+
f"λ_min={min_eigenvalue:.6e}, status={status}"
|
|
688
|
+
)
|
|
689
|
+
else:
|
|
690
|
+
self.log(f"Trust radius maintained: {self.trust_radius:.6f} (status={status})")
|
|
691
|
+
|
|
692
|
+
# Optional: Save minimum eigenvalue history (for trend analysis)
|
|
693
|
+
if len(self.min_eigenvalue_history) >= 10:
|
|
694
|
+
self.min_eigenvalue_history.pop(0)
|
|
695
|
+
self.min_eigenvalue_history.append(min_eigenvalue)
|
|
696
|
+
|
|
697
|
+
def adjust_trust_radius(self, actual_change, predicted_change, min_eigenvalue=None, gradient_norm=None):
|
|
698
|
+
"""
|
|
699
|
+
Trust radius adjustment.
|
|
700
|
+
|
|
701
|
+
If the adaptive method is enabled, min_eigenvalue is provided,
|
|
702
|
+
AND the gradient_norm is below the threshold,
|
|
703
|
+
performs adjustment considering curvature information.
|
|
704
|
+
Otherwise, uses the conventional simple method.
|
|
705
|
+
|
|
706
|
+
Parameters:
|
|
707
|
+
actual_change: float
|
|
708
|
+
Actual energy change
|
|
709
|
+
predicted_change: float
|
|
710
|
+
Predicted energy change
|
|
711
|
+
min_eigenvalue: float, optional
|
|
712
|
+
Minimum eigenvalue of the Hessian (default: None)
|
|
713
|
+
gradient_norm: float, optional
|
|
714
|
+
Current L2 norm of the gradient. Used to conditionally
|
|
715
|
+
activate the adaptive method.
|
|
716
|
+
"""
|
|
717
|
+
|
|
718
|
+
# === [MODIFIED] Check conditions for using the ADAPTIVE method ===
|
|
719
|
+
|
|
720
|
+
# 1. Must be globally enabled
|
|
721
|
+
# 2. Must have the minimum eigenvalue
|
|
722
|
+
can_use_adaptive = self.use_adaptive_trust_radius and min_eigenvalue is not None
|
|
723
|
+
|
|
724
|
+
if can_use_adaptive:
|
|
725
|
+
# If gradient norm was provided, check if it's below the threshold
|
|
726
|
+
if gradient_norm is not None:
|
|
727
|
+
if gradient_norm < self.adaptive_trust_gradient_norm_threshold:
|
|
728
|
+
# Gradient is small enough -> Use ADAPTIVE
|
|
729
|
+
self.log(f"Gradient norm ({gradient_norm:.6f}) < threshold "
|
|
730
|
+
f"({self.adaptive_trust_gradient_norm_threshold:.6f}). "
|
|
731
|
+
f"Using ADAPTIVE trust radius.", force=True)
|
|
732
|
+
self.adjust_trust_radius_adaptive(actual_change, predicted_change, min_eigenvalue)
|
|
733
|
+
return
|
|
734
|
+
else:
|
|
735
|
+
# Gradient is still large -> Fallback to CONVENTIONAL
|
|
736
|
+
self.log(f"Gradient norm ({gradient_norm:.6f}) >= threshold "
|
|
737
|
+
f"({self.adaptive_trust_gradient_norm_threshold:.6f}). "
|
|
738
|
+
f"Using CONVENTIONAL trust radius.", force=True)
|
|
739
|
+
else:
|
|
740
|
+
# Gradient norm was *not* provided, but adaptive is on.
|
|
741
|
+
# Default to using it (legacy behavior for backward compatibility).
|
|
742
|
+
self.log("Gradient norm not provided. Defaulting to ADAPTIVE trust radius.")
|
|
743
|
+
self.adjust_trust_radius_adaptive(actual_change, predicted_change, min_eigenvalue)
|
|
744
|
+
return
|
|
745
|
+
|
|
746
|
+
# === Conventional simple method (fallback) ===
|
|
747
|
+
# (This block is reached if can_use_adaptive=False OR if gradient was too large)
|
|
748
|
+
|
|
749
|
+
if abs(predicted_change) < 1e-10:
|
|
750
|
+
self.log("Skipping trust radius update: predicted change too small")
|
|
751
|
+
return
|
|
752
|
+
|
|
753
|
+
ratio = actual_change / predicted_change
|
|
754
|
+
|
|
755
|
+
self.log(f"Energy change: actual={actual_change:.6f}, predicted={predicted_change:.6f}, ratio={ratio:.3f}", force=True)
|
|
756
|
+
|
|
757
|
+
old_trust_radius = self.trust_radius
|
|
758
|
+
|
|
759
|
+
if ratio > self.good_step_threshold:
|
|
760
|
+
# Good step
|
|
761
|
+
self.trust_radius = min(
|
|
762
|
+
self.trust_radius * self.trust_radius_increase_factor,
|
|
763
|
+
self.trust_radius_max
|
|
764
|
+
)
|
|
765
|
+
if self.trust_radius != old_trust_radius:
|
|
766
|
+
self.log(f"Good step quality (ratio={ratio:.3f}), increasing trust radius to {self.trust_radius:.6f}", force=True)
|
|
767
|
+
|
|
768
|
+
elif ratio < self.poor_step_threshold:
|
|
769
|
+
# Poor step
|
|
770
|
+
self.trust_radius = max(
|
|
771
|
+
self.trust_radius * self.trust_radius_decrease_factor,
|
|
772
|
+
self.trust_radius_min
|
|
773
|
+
)
|
|
774
|
+
if self.trust_radius != old_trust_radius:
|
|
775
|
+
self.log(f"Poor step quality (ratio={ratio:.3f}), decreasing trust radius to {self.trust_radius:.6f}", force=True)
|
|
776
|
+
|
|
777
|
+
else:
|
|
778
|
+
# Acceptable step
|
|
779
|
+
self.log(f"Acceptable step quality (ratio={ratio:.3f}), keeping trust radius at {self.trust_radius:.6f}", force=True)
|
|
780
|
+
|
|
781
|
+
def evaluate_step_quality(self):
|
|
782
|
+
"""Evaluate the quality of recent optimization steps"""
|
|
783
|
+
if len(self.predicted_energy_changes) < 2 or len(self.actual_energy_changes) < 2:
|
|
784
|
+
return "unknown"
|
|
785
|
+
|
|
786
|
+
# Calculate ratios correctly considering the sign
|
|
787
|
+
ratios = []
|
|
788
|
+
for actual, predicted in zip(self.actual_energy_changes[-2:], self.predicted_energy_changes[-2:]):
|
|
789
|
+
if abs(predicted) > 1e-10:
|
|
790
|
+
# Directly use the raw ratio without taking absolute values
|
|
791
|
+
ratios.append(actual / predicted)
|
|
792
|
+
|
|
793
|
+
if not ratios:
|
|
794
|
+
return "unknown"
|
|
795
|
+
|
|
796
|
+
avg_ratio = sum(ratios) / len(ratios)
|
|
797
|
+
|
|
798
|
+
# Check if energy is decreasing (energy changes have same sign and in expected direction)
|
|
799
|
+
same_direction = all(
|
|
800
|
+
(actual * predicted > 0) for actual, predicted in zip(
|
|
801
|
+
self.actual_energy_changes[-2:], self.predicted_energy_changes[-2:]
|
|
802
|
+
)
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
if 0.8 < avg_ratio < 1.2 and same_direction:
|
|
806
|
+
quality = "good"
|
|
807
|
+
elif 0.5 < avg_ratio < 1.5 and same_direction:
|
|
808
|
+
quality = "acceptable"
|
|
809
|
+
else:
|
|
810
|
+
quality = "poor"
|
|
811
|
+
|
|
812
|
+
self.log(f"Step quality assessment: {quality} (avg ratio: {avg_ratio:.3f})", force=True)
|
|
813
|
+
return quality
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def get_rs_step(self, eigvals, eigvecs, gradient):
|
|
817
|
+
"""Compute the Rational Step using the RS-I-RFO algorithm"""
|
|
818
|
+
# Transform gradient to basis of eigenvectors - use matrix multiplication for efficiency
|
|
819
|
+
gradient_trans = np.dot(eigvecs.T, gradient)
|
|
820
|
+
|
|
821
|
+
try:
|
|
822
|
+
# Calculate step with default alpha (alpha0) using the new O(N) solver
|
|
823
|
+
initial_step, _, _, _ = self.solve_rfo(eigvals, gradient_trans, self.alpha0)
|
|
824
|
+
initial_step_norm = np.linalg.norm(initial_step)
|
|
825
|
+
|
|
826
|
+
self.log(f"Initial step with alpha={self.alpha0:.6f} has norm={initial_step_norm:.6f}", force=True)
|
|
827
|
+
|
|
828
|
+
# If the step is already within trust radius, use it directly
|
|
829
|
+
if initial_step_norm <= self.trust_radius:
|
|
830
|
+
self.log(f"Initial step is within trust radius ({self.trust_radius:.6f}), using it directly", force=True)
|
|
831
|
+
# Transform step back to original basis
|
|
832
|
+
final_step = np.dot(eigvecs, initial_step)
|
|
833
|
+
return final_step
|
|
834
|
+
|
|
835
|
+
self.log(f"Initial step exceeds trust radius, optimizing alpha to match radius...", force=True)
|
|
836
|
+
|
|
837
|
+
# --- MODIFICATION START ---
|
|
838
|
+
# If the initial step is outside the trust radius, we must find the
|
|
839
|
+
# alpha that puts the step *on* the trust radius boundary.
|
|
840
|
+
# We call compute_rsprfo_step *once* to solve this.
|
|
841
|
+
|
|
842
|
+
step, step_norm, final_alpha = self.compute_rsprfo_step(
|
|
843
|
+
eigvals, gradient_trans, self.alpha0
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
self.log(f"Optimized alpha={final_alpha:.6f} to get step_norm={step_norm:.6f}", force=True)
|
|
847
|
+
|
|
848
|
+
# Transform step back to original basis (use matrix multiplication for efficiency)
|
|
849
|
+
step_original_basis = np.dot(eigvecs, step)
|
|
850
|
+
|
|
851
|
+
step_norm_original = np.linalg.norm(step_original_basis)
|
|
852
|
+
self.log(f"Final norm(step)={step_norm_original:.6f}", force=True)
|
|
853
|
+
|
|
854
|
+
return step_original_basis
|
|
855
|
+
|
|
856
|
+
# --- MODIFICATION END ---
|
|
857
|
+
|
|
858
|
+
except Exception as e:
|
|
859
|
+
self.log(f"Error during RS step calculation: {str(e)}", force=True)
|
|
860
|
+
# If all else fails, use a steepest descent step
|
|
861
|
+
self.log("Using steepest descent step as fallback", force=True)
|
|
862
|
+
sd_step = -gradient_trans
|
|
863
|
+
sd_norm = np.linalg.norm(sd_step)
|
|
864
|
+
|
|
865
|
+
if sd_norm > self.trust_radius:
|
|
866
|
+
best_overall_step = sd_step / sd_norm * self.trust_radius
|
|
867
|
+
else:
|
|
868
|
+
best_overall_step = sd_step
|
|
869
|
+
|
|
870
|
+
# Transform step back to original basis
|
|
871
|
+
step = np.dot(eigvecs, best_overall_step)
|
|
872
|
+
|
|
873
|
+
step_norm = np.linalg.norm(step)
|
|
874
|
+
self.log(f"Final norm(step)={step_norm:.6f}", force=True)
|
|
875
|
+
|
|
876
|
+
return step
|
|
877
|
+
|
|
878
|
+
def compute_rsprfo_step(self, eigvals, gradient_trans, alpha_init):
|
|
879
|
+
"""
|
|
880
|
+
Compute an RS-P-RFO step using a specific initial alpha value.
|
|
881
|
+
Prioritizes Brent's method (brentq) for finding the root 'alpha'
|
|
882
|
+
that matches the trust radius, falling back to Newton iterations
|
|
883
|
+
only if brentq fails or its result is not sufficiently precise.
|
|
884
|
+
"""
|
|
885
|
+
|
|
886
|
+
# Pre-calculate squared gradient components for efficiency
|
|
887
|
+
grad_trans_sq = gradient_trans**2
|
|
888
|
+
|
|
889
|
+
# Create proxy functions for step norm calculation
|
|
890
|
+
def calculate_step(alpha):
|
|
891
|
+
"""Calculate RFO step for a given alpha value"""
|
|
892
|
+
try:
|
|
893
|
+
# Use the new O(N) solver
|
|
894
|
+
step, eigval_min, _, _ = self.solve_rfo(eigvals, gradient_trans, alpha)
|
|
895
|
+
return step, eigval_min
|
|
896
|
+
except Exception as e:
|
|
897
|
+
self.log(f"Error in step calculation: {str(e)}")
|
|
898
|
+
raise
|
|
899
|
+
|
|
900
|
+
def step_norm_squared(alpha):
|
|
901
|
+
"""Calculate ||step||^2 for a given alpha value"""
|
|
902
|
+
# This function is only used by brentq, which only needs the step norm
|
|
903
|
+
step, _ = calculate_step(alpha)
|
|
904
|
+
return np.dot(step, step)
|
|
905
|
+
|
|
906
|
+
def objective_function(alpha):
|
|
907
|
+
"""U(a) = ||step||^2 - R^2"""
|
|
908
|
+
return step_norm_squared(alpha) - self.trust_radius**2
|
|
909
|
+
|
|
910
|
+
# --- MODIFICATION START ---
|
|
911
|
+
# Prioritize Brent's method (brentq) as it does not rely on derivatives.
|
|
912
|
+
|
|
913
|
+
alpha_lo = 1e-6 # Very small alpha gives large step
|
|
914
|
+
alpha_hi = self.alpha_max # Very large alpha gives small step
|
|
915
|
+
|
|
916
|
+
try:
|
|
917
|
+
# Check step norms at boundaries to establish bracket
|
|
918
|
+
step_lo, _ = calculate_step(alpha_lo)
|
|
919
|
+
norm_lo = np.linalg.norm(step_lo)
|
|
920
|
+
obj_lo = norm_lo**2 - self.trust_radius**2
|
|
921
|
+
|
|
922
|
+
step_hi, _ = calculate_step(alpha_hi)
|
|
923
|
+
norm_hi = np.linalg.norm(step_hi)
|
|
924
|
+
obj_hi = norm_hi**2 - self.trust_radius**2
|
|
925
|
+
|
|
926
|
+
self.log(f"Bracket search: alpha_lo={alpha_lo:.6e}, step_norm={norm_lo:.6f}, obj={obj_lo:.6e}")
|
|
927
|
+
self.log(f"Bracket search: alpha_hi={alpha_hi:.6e}, step_norm={norm_hi:.6f}, obj={obj_hi:.6e}")
|
|
928
|
+
|
|
929
|
+
# Check if we have a proper bracket (signs differ)
|
|
930
|
+
if obj_lo * obj_hi < 0:
|
|
931
|
+
# We have a bracket, use Brent's method for robust root finding
|
|
932
|
+
self.log("Bracket established, using Brent's method (brentq) for root finding")
|
|
933
|
+
|
|
934
|
+
alpha_brent = brentq(objective_function, alpha_lo, alpha_hi,
|
|
935
|
+
xtol=1e-6, rtol=1e-6, maxiter=50)
|
|
936
|
+
|
|
937
|
+
self.log(f"Brent's method converged to alpha={alpha_brent:.6e}")
|
|
938
|
+
|
|
939
|
+
# Calculate the step using the alpha from brentq
|
|
940
|
+
step, _ = calculate_step(alpha_brent)
|
|
941
|
+
step_norm = np.linalg.norm(step)
|
|
942
|
+
norm_diff = abs(step_norm - self.trust_radius)
|
|
943
|
+
|
|
944
|
+
# Check if the result from brentq is within the strict tolerance
|
|
945
|
+
if norm_diff < self.step_norm_tolerance:
|
|
946
|
+
self.log(f"brentq result is within tolerance ({self.step_norm_tolerance:.2e}). Using this step (norm={step_norm:.6f}).")
|
|
947
|
+
# Return immediately, skipping the Newton loop
|
|
948
|
+
return step, step_norm, alpha_brent
|
|
949
|
+
else:
|
|
950
|
+
self.log(f"brentq result norm={step_norm:.6f} (diff={norm_diff:.2e}) still outside tolerance. Proceeding to Newton refinement.")
|
|
951
|
+
# Use the brentq result as the starting point for Newton
|
|
952
|
+
alpha = alpha_brent
|
|
953
|
+
|
|
954
|
+
else:
|
|
955
|
+
# No bracket, so use initial alpha and proceed with Newton iterations
|
|
956
|
+
self.log("Could not establish bracket with opposite signs, proceeding to Newton iterations")
|
|
957
|
+
alpha = alpha_init
|
|
958
|
+
|
|
959
|
+
except Exception as e:
|
|
960
|
+
# Handle any error during bracketing or brentq
|
|
961
|
+
self.log(f"Error during brentq attempt: {str(e)}. Falling back to Newton iterations with initial alpha.")
|
|
962
|
+
alpha = alpha_init
|
|
963
|
+
|
|
964
|
+
# --- MODIFICATION END ---
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
# Fallback: Use Newton iterations to refine alpha (or if brentq was imprecise)
|
|
968
|
+
# 'alpha' is either alpha_init (if brentq failed) or alpha_brent (if brentq succeeded but was imprecise)
|
|
969
|
+
|
|
970
|
+
self.log(f"Starting Newton refinement loop with alpha={alpha:.6f}")
|
|
971
|
+
|
|
972
|
+
# Use a fixed size numpy array instead of growing list for step_norm_history
|
|
973
|
+
step_norm_history = np.zeros(self.max_micro_cycles)
|
|
974
|
+
history_count = 0
|
|
975
|
+
best_step = None
|
|
976
|
+
best_step_norm_diff = float('inf')
|
|
977
|
+
|
|
978
|
+
# Variables to track bracketing
|
|
979
|
+
alpha_left = None
|
|
980
|
+
alpha_right = None
|
|
981
|
+
objval_left = None
|
|
982
|
+
objval_right = None
|
|
983
|
+
|
|
984
|
+
for mu in range(self.max_micro_cycles):
|
|
985
|
+
self.log(f"RS-I-RFO (Newton) micro cycle {mu:02d}, alpha={alpha:.6f}")
|
|
986
|
+
|
|
987
|
+
try:
|
|
988
|
+
# Calculate current step and its properties
|
|
989
|
+
# (Re-use eigval_min from calculate_step)
|
|
990
|
+
step, eigval_min = calculate_step(alpha)
|
|
991
|
+
step_norm = np.linalg.norm(step)
|
|
992
|
+
self.log(f"norm(step)={step_norm:.6f}")
|
|
993
|
+
|
|
994
|
+
# Keep track of the best step seen so far (closest to trust radius)
|
|
995
|
+
norm_diff = abs(step_norm - self.trust_radius)
|
|
996
|
+
if norm_diff < best_step_norm_diff:
|
|
997
|
+
if best_step is None:
|
|
998
|
+
best_step = step.copy()
|
|
999
|
+
else:
|
|
1000
|
+
# In-place update of best_step
|
|
1001
|
+
best_step[:] = step
|
|
1002
|
+
best_step_norm_diff = norm_diff
|
|
1003
|
+
|
|
1004
|
+
# Calculate objective function value U(a) = ||step||^2 - R^2
|
|
1005
|
+
objval = step_norm**2 - self.trust_radius**2
|
|
1006
|
+
self.log(f"U(a)={objval:.6e}")
|
|
1007
|
+
|
|
1008
|
+
# Update bracketing information
|
|
1009
|
+
if objval < 0 and (alpha_left is None or alpha > alpha_left):
|
|
1010
|
+
alpha_left = alpha
|
|
1011
|
+
objval_left = objval
|
|
1012
|
+
elif objval > 0 and (alpha_right is None or alpha < alpha_right):
|
|
1013
|
+
alpha_right = alpha
|
|
1014
|
+
objval_right = objval
|
|
1015
|
+
|
|
1016
|
+
# Check if we're already very close to the target radius
|
|
1017
|
+
if abs(objval) < 1e-8 or norm_diff < self.step_norm_tolerance:
|
|
1018
|
+
self.log(f"Step norm {step_norm:.6f} is sufficiently close to trust radius. Newton loop converged.")
|
|
1019
|
+
# --- MODIFICATION ---
|
|
1020
|
+
# (Original code had: if mu >= 1: break)
|
|
1021
|
+
# We now break immediately upon convergence.
|
|
1022
|
+
best_step = step # Ensure the final step is the one that converged
|
|
1023
|
+
break
|
|
1024
|
+
|
|
1025
|
+
# Track step norm history for convergence detection (use fixed size array)
|
|
1026
|
+
if history_count < self.max_micro_cycles:
|
|
1027
|
+
step_norm_history[history_count] = step_norm
|
|
1028
|
+
history_count += 1
|
|
1029
|
+
|
|
1030
|
+
# Compute derivative of squared step norm with respect to alpha
|
|
1031
|
+
# (Pass computed step and eigval_min to avoid re-calculation)
|
|
1032
|
+
dstep2_dalpha = self.get_step_derivative(alpha, eigvals, gradient_trans,
|
|
1033
|
+
step=step, eigval_min=eigval_min)
|
|
1034
|
+
self.log(f"d(||step||^2)/dα={dstep2_dalpha:.6e}")
|
|
1035
|
+
|
|
1036
|
+
# Update alpha with correct Newton formula: a' = a - U(a)/U'(a)
|
|
1037
|
+
if abs(dstep2_dalpha) < 1e-10:
|
|
1038
|
+
# Small derivative - use bisection if bracket is available
|
|
1039
|
+
if alpha_left is not None and alpha_right is not None:
|
|
1040
|
+
alpha_new = (alpha_left + alpha_right) / 2
|
|
1041
|
+
self.log(f"Small derivative, using bisection: alpha {alpha:.6f} -> {alpha_new:.6f}")
|
|
1042
|
+
else:
|
|
1043
|
+
# No bracket yet, use heuristic scaling
|
|
1044
|
+
if objval > 0: # Step too small, need smaller alpha
|
|
1045
|
+
alpha_new = max(alpha / 2, 1e-6)
|
|
1046
|
+
else: # Step too large, need larger alpha
|
|
1047
|
+
alpha_new = min(alpha * 2, self.alpha_max)
|
|
1048
|
+
self.log(f"Small derivative, no bracket, using heuristic: alpha {alpha:.6f} -> {alpha_new:.6f}")
|
|
1049
|
+
else:
|
|
1050
|
+
# Use Newton update with proper U(a)/U'(a)
|
|
1051
|
+
alpha_step_raw = -objval / dstep2_dalpha
|
|
1052
|
+
|
|
1053
|
+
# Apply safeguards to Newton step
|
|
1054
|
+
alpha_step = np.clip(alpha_step_raw, -self.alpha_step_max, self.alpha_step_max)
|
|
1055
|
+
if abs(alpha_step) != abs(alpha_step_raw):
|
|
1056
|
+
self.log(f"Limited alpha step from {alpha_step_raw:.6f} to {alpha_step:.6f}")
|
|
1057
|
+
|
|
1058
|
+
alpha_new = alpha + alpha_step
|
|
1059
|
+
|
|
1060
|
+
# Additional protection: if bracket available, ensure we stay within bracket
|
|
1061
|
+
if alpha_left is not None and alpha_right is not None:
|
|
1062
|
+
# Safeguard to keep alpha within established bracket
|
|
1063
|
+
alpha_new = max(min(alpha_new, alpha_right * 0.99), alpha_left * 1.01)
|
|
1064
|
+
if alpha_new != alpha + alpha_step:
|
|
1065
|
+
self.log(f"Safeguarded alpha to stay within bracket: {alpha_new:.6f}")
|
|
1066
|
+
|
|
1067
|
+
# Update alpha with bounds checking
|
|
1068
|
+
old_alpha = alpha
|
|
1069
|
+
alpha = min(max(alpha_new, 1e-6), self.alpha_max)
|
|
1070
|
+
self.log(f"Updated alpha: {old_alpha:.6f} -> {alpha:.6f}")
|
|
1071
|
+
|
|
1072
|
+
# Check if alpha is hitting limits
|
|
1073
|
+
if alpha == self.alpha_max or alpha == 1e-6:
|
|
1074
|
+
self.log(f"Alpha hit boundary at {alpha:.6e}, stopping iterations")
|
|
1075
|
+
break
|
|
1076
|
+
|
|
1077
|
+
# Check for convergence in step norm using the last 3 values
|
|
1078
|
+
if history_count >= 3:
|
|
1079
|
+
idx = history_count - 1
|
|
1080
|
+
recent_changes = [
|
|
1081
|
+
abs(step_norm_history[idx] - step_norm_history[idx-1]),
|
|
1082
|
+
abs(step_norm_history[idx-1] - step_norm_history[idx-2])
|
|
1083
|
+
]
|
|
1084
|
+
if all(change < 1e-6 for change in recent_changes):
|
|
1085
|
+
self.log("Step norm not changing significantly, stopping iterations")
|
|
1086
|
+
break
|
|
1087
|
+
|
|
1088
|
+
except Exception as e:
|
|
1089
|
+
self.log(f"Error in micro-cycle {mu}: {str(e)}")
|
|
1090
|
+
# If we have a good step, use it and stop
|
|
1091
|
+
if best_step is not None:
|
|
1092
|
+
self.log("Using best step found so far due to error")
|
|
1093
|
+
step = best_step
|
|
1094
|
+
step_norm = np.linalg.norm(step)
|
|
1095
|
+
break
|
|
1096
|
+
else:
|
|
1097
|
+
# Last resort: steepest descent
|
|
1098
|
+
self.log("Falling back to steepest descent due to errors")
|
|
1099
|
+
step = -gradient_trans
|
|
1100
|
+
step_norm = np.linalg.norm(step)
|
|
1101
|
+
if step_norm > self.trust_radius:
|
|
1102
|
+
step = step / step_norm * self.trust_radius
|
|
1103
|
+
step_norm = self.trust_radius
|
|
1104
|
+
break
|
|
1105
|
+
else:
|
|
1106
|
+
# === [MODIFIED] If we exhausted micro-cycles without converging ===
|
|
1107
|
+
self.log(f"RS-I-RFO (Newton) did not converge in {self.max_micro_cycles} cycles", force=True)
|
|
1108
|
+
|
|
1109
|
+
# Check if the 'best_step' found is close enough to the trust radius
|
|
1110
|
+
if best_step is not None:
|
|
1111
|
+
best_step_norm = np.linalg.norm(best_step)
|
|
1112
|
+
# Use a slightly relaxed tolerance
|
|
1113
|
+
if abs(best_step_norm - self.trust_radius) < self.step_norm_tolerance * 1.1:
|
|
1114
|
+
self.log(f"Using best step found during iterations (norm={best_step_norm:.6f} was close enough)")
|
|
1115
|
+
step = best_step
|
|
1116
|
+
step_norm = best_step_norm
|
|
1117
|
+
else:
|
|
1118
|
+
# If 'best_step' is not close (e.g., norm=506),
|
|
1119
|
+
# discard it as junk and fall back to safe steepest descent.
|
|
1120
|
+
self.log(f"Best step found (norm={best_step_norm:.6f}) was NOT close to trust radius. Forcing steepest descent.", force=True)
|
|
1121
|
+
step = -gradient_trans
|
|
1122
|
+
step_norm = np.linalg.norm(step)
|
|
1123
|
+
if step_norm > 1e-10:
|
|
1124
|
+
step = step / step_norm * self.trust_radius
|
|
1125
|
+
else:
|
|
1126
|
+
step = np.zeros_like(gradient_trans) # Gradient is zero
|
|
1127
|
+
step_norm = self.trust_radius
|
|
1128
|
+
else:
|
|
1129
|
+
# If no 'best_step' was ever found, fall back to steepest descent.
|
|
1130
|
+
self.log("No usable step found. Forcing steepest descent as a last resort.", force=True)
|
|
1131
|
+
step = -gradient_trans
|
|
1132
|
+
step_norm = np.linalg.norm(step)
|
|
1133
|
+
if step_norm > 1e-10:
|
|
1134
|
+
step = step / step_norm * self.trust_radius
|
|
1135
|
+
else:
|
|
1136
|
+
step = np.zeros_like(gradient_trans) # Gradient is zero
|
|
1137
|
+
step_norm = self.trust_radius
|
|
1138
|
+
# === [END MODIFICATION] ===
|
|
1139
|
+
|
|
1140
|
+
return step, step_norm, alpha
|
|
1141
|
+
|
|
1142
|
+
def get_step_derivative(self, alpha, eigvals, gradient_trans, step=None, eigval_min=None):
|
|
1143
|
+
"""
|
|
1144
|
+
Compute derivative of squared step norm with respect to alpha directly.
|
|
1145
|
+
Assumes eigval_min is (approximately) constant w.r.t alpha.
|
|
1146
|
+
"""
|
|
1147
|
+
# If step or eigval_min was not provided, compute them
|
|
1148
|
+
if step is None or eigval_min is None:
|
|
1149
|
+
try:
|
|
1150
|
+
# Use the new O(N) solver
|
|
1151
|
+
step, eigval_min, _, _ = self.solve_rfo(eigvals, gradient_trans, alpha)
|
|
1152
|
+
except Exception as e:
|
|
1153
|
+
self.log(f"Error in step calculation for derivative: {str(e)}")
|
|
1154
|
+
return 1e-8 # Return a small value as fallback
|
|
1155
|
+
|
|
1156
|
+
try:
|
|
1157
|
+
# Calculate the denominators with safety
|
|
1158
|
+
denominators = eigvals - eigval_min * alpha
|
|
1159
|
+
|
|
1160
|
+
# Handle small denominators safely (vectorized operations for efficiency)
|
|
1161
|
+
small_denoms = np.abs(denominators) < 1e-8
|
|
1162
|
+
if np.any(small_denoms):
|
|
1163
|
+
# Create safe denominators with minimal new memory allocation
|
|
1164
|
+
safe_denoms = denominators.copy()
|
|
1165
|
+
safe_denoms[small_denoms] = np.sign(safe_denoms[small_denoms]) * np.maximum(1e-8, np.abs(safe_denoms[small_denoms]))
|
|
1166
|
+
# Apply sign correction for zeros
|
|
1167
|
+
zero_mask = safe_denoms[small_denoms] == 0
|
|
1168
|
+
if np.any(zero_mask):
|
|
1169
|
+
safe_denoms[small_denoms][zero_mask] = 1e-8
|
|
1170
|
+
denominators = safe_denoms
|
|
1171
|
+
|
|
1172
|
+
# Calculate the summation term - use vectorized operations
|
|
1173
|
+
numerator = gradient_trans**2
|
|
1174
|
+
denominator = denominators**3
|
|
1175
|
+
|
|
1176
|
+
# Avoid division by very small values
|
|
1177
|
+
valid_indices = np.abs(denominator) > 1e-10
|
|
1178
|
+
|
|
1179
|
+
if not np.any(valid_indices):
|
|
1180
|
+
return 1e-8 # Return a small positive value if no valid indices
|
|
1181
|
+
|
|
1182
|
+
# Initialize sum terms as zeros to avoid allocation inside loop
|
|
1183
|
+
sum_terms = np.zeros_like(numerator)
|
|
1184
|
+
sum_terms[valid_indices] = numerator[valid_indices] / denominator[valid_indices]
|
|
1185
|
+
|
|
1186
|
+
# Clip extremely large values
|
|
1187
|
+
max_magnitude = 1e20
|
|
1188
|
+
large_values = np.abs(sum_terms) > max_magnitude
|
|
1189
|
+
if np.any(large_values):
|
|
1190
|
+
sum_terms[large_values] = np.sign(sum_terms[large_values]) * max_magnitude
|
|
1191
|
+
|
|
1192
|
+
sum_term = np.sum(sum_terms)
|
|
1193
|
+
|
|
1194
|
+
# Calculate the derivative with protection
|
|
1195
|
+
dstep2_dalpha = 2.0 * eigval_min * sum_term
|
|
1196
|
+
|
|
1197
|
+
# Additional safety check
|
|
1198
|
+
if not np.isfinite(dstep2_dalpha) or abs(dstep2_dalpha) > max_magnitude:
|
|
1199
|
+
dstep2_dalpha = np.sign(dstep2_dalpha) * max_magnitude if dstep2_dalpha != 0 else 1e-8
|
|
1200
|
+
|
|
1201
|
+
return dstep2_dalpha
|
|
1202
|
+
|
|
1203
|
+
except Exception as e:
|
|
1204
|
+
self.log(f"Error in derivative calculation: {str(e)}")
|
|
1205
|
+
return 1e-8 # Return a small positive value as fallback
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
def update_hessian(self, current_geom, current_grad, previous_geom, previous_grad):
|
|
1209
|
+
"""Update the Hessian using the specified update method"""
|
|
1210
|
+
# Calculate displacement and gradient difference (avoid unnecessary reshaping)
|
|
1211
|
+
displacement = np.asarray(current_geom - previous_geom).reshape(-1, 1)
|
|
1212
|
+
delta_grad = np.asarray(current_grad - previous_grad).reshape(-1, 1)
|
|
1213
|
+
|
|
1214
|
+
# Skip update if changes are too small
|
|
1215
|
+
disp_norm = np.linalg.norm(displacement)
|
|
1216
|
+
grad_diff_norm = np.linalg.norm(delta_grad)
|
|
1217
|
+
|
|
1218
|
+
if disp_norm < 1e-10 or grad_diff_norm < 1e-10:
|
|
1219
|
+
self.log("Skipping Hessian update due to small changes")
|
|
1220
|
+
return
|
|
1221
|
+
|
|
1222
|
+
# Check if displacement and gradient difference are sufficiently aligned
|
|
1223
|
+
dot_product = np.dot(displacement.T, delta_grad)
|
|
1224
|
+
dot_product = dot_product[0, 0] # Extract scalar value from 1x1 matrix
|
|
1225
|
+
if dot_product <= 0:
|
|
1226
|
+
self.log("Skipping Hessian update due to poor alignment")
|
|
1227
|
+
return
|
|
1228
|
+
|
|
1229
|
+
self.log(f"Hessian update: displacement norm={disp_norm:.6f}, gradient diff norm={grad_diff_norm:.6f}, dot product={dot_product:.6f}")
|
|
1230
|
+
|
|
1231
|
+
# --- [Refactored Method Dispatch (maintaining 'in' logic)] ---
|
|
1232
|
+
|
|
1233
|
+
method_key_lower = self.hessian_update_method.lower()
|
|
1234
|
+
|
|
1235
|
+
# Default values (fallback)
|
|
1236
|
+
method_name, update_function = self.default_update_method
|
|
1237
|
+
found_method = False
|
|
1238
|
+
|
|
1239
|
+
# Iterate through the prioritized list
|
|
1240
|
+
for key, name, func in self.updater_dispatch_list:
|
|
1241
|
+
if key in method_key_lower:
|
|
1242
|
+
method_name = name
|
|
1243
|
+
update_function = func
|
|
1244
|
+
found_method = True
|
|
1245
|
+
break # Found the first (highest priority) match
|
|
1246
|
+
|
|
1247
|
+
if not found_method:
|
|
1248
|
+
self.log(f"Unknown Hessian update method: {self.hessian_update_method}. Using auto selection.")
|
|
1249
|
+
|
|
1250
|
+
self.log(f"Hessian update method: {method_name}")
|
|
1251
|
+
|
|
1252
|
+
# Call the selected function (either found or default)
|
|
1253
|
+
delta_hess = update_function(
|
|
1254
|
+
self.hessian, displacement, delta_grad
|
|
1255
|
+
)
|
|
1256
|
+
|
|
1257
|
+
# --- [End of Refactored Section] ---
|
|
1258
|
+
|
|
1259
|
+
# Update the Hessian (in-place addition)
|
|
1260
|
+
self.hessian += delta_hess
|
|
1261
|
+
|
|
1262
|
+
# Ensure Hessian symmetry (numerical errors might cause slight asymmetry)
|
|
1263
|
+
# Use in-place operation for symmetrization
|
|
1264
|
+
self.hessian = 0.5 * (self.hessian + self.hessian.T)
|
|
1265
|
+
|
|
1266
|
+
def _solve_secular_safeguarded(self, eigvals_prime, grad_comps_prime_sq, lambda_min_asymptote, initial_guess):
|
|
1267
|
+
"""
|
|
1268
|
+
[NEW] Safeguarded Newton's Method for the RFO Secular Equation.
|
|
1269
|
+
|
|
1270
|
+
This solver is specifically designed for the secular equation's structure.
|
|
1271
|
+
It combines the rapid convergence of Newton's method with the
|
|
1272
|
+
guaranteed convergence of bisection.
|
|
1273
|
+
|
|
1274
|
+
It maintains a bracket [a, b] known to contain the root and uses
|
|
1275
|
+
Newton's method. If the Newton step would fall outside the bracket,
|
|
1276
|
+
it reverts to a bisection step.
|
|
1277
|
+
"""
|
|
1278
|
+
|
|
1279
|
+
# Define the secular function and its derivative
|
|
1280
|
+
def f_secular(lmd):
|
|
1281
|
+
denominators = eigvals_prime - lmd
|
|
1282
|
+
# Safety for division
|
|
1283
|
+
safe_denoms = np.where(
|
|
1284
|
+
np.abs(denominators) < 1e-30,
|
|
1285
|
+
np.sign(denominators) * 1e-30,
|
|
1286
|
+
denominators
|
|
1287
|
+
)
|
|
1288
|
+
safe_denoms[safe_denoms == 0] = 1e-30 # Handle exact zeros
|
|
1289
|
+
terms_f = grad_comps_prime_sq / safe_denoms
|
|
1290
|
+
return lmd + np.sum(terms_f)
|
|
1291
|
+
|
|
1292
|
+
def f_prime_secular(lmd):
|
|
1293
|
+
denominators = eigvals_prime - lmd
|
|
1294
|
+
safe_denoms = np.where(
|
|
1295
|
+
np.abs(denominators) < 1e-30,
|
|
1296
|
+
np.sign(denominators) * 1e-30,
|
|
1297
|
+
denominators
|
|
1298
|
+
)
|
|
1299
|
+
safe_denoms[safe_denoms == 0] = 1e-30
|
|
1300
|
+
terms_f_prime = grad_comps_prime_sq / (safe_denoms**2)
|
|
1301
|
+
return 1.0 + np.sum(terms_f_prime)
|
|
1302
|
+
|
|
1303
|
+
# --- Setup Bracket [a, b] ---
|
|
1304
|
+
# b is the upper bound (the first pole)
|
|
1305
|
+
b = lambda_min_asymptote
|
|
1306
|
+
|
|
1307
|
+
# a is the lower bound. We need f(a) < 0.
|
|
1308
|
+
# Start with the initial guess.
|
|
1309
|
+
a = initial_guess
|
|
1310
|
+
f_a = f_secular(a)
|
|
1311
|
+
|
|
1312
|
+
# If f(a) is not negative, step back until it is.
|
|
1313
|
+
g_norm = np.sqrt(np.sum(grad_comps_prime_sq))
|
|
1314
|
+
search_limit = 10
|
|
1315
|
+
while f_a > 0 and search_limit > 0:
|
|
1316
|
+
self.log(f" Safeguard Solver: f(a) > 0 at a={a:.6e}. Stepping back.")
|
|
1317
|
+
step_back = max(g_norm, np.abs(a) * 0.1, 1e-8)
|
|
1318
|
+
a = a - step_back
|
|
1319
|
+
f_a = f_secular(a)
|
|
1320
|
+
search_limit -= 1
|
|
1321
|
+
|
|
1322
|
+
if f_a > 0:
|
|
1323
|
+
self.log(f" Safeguard Solver: Could not establish lower bound 'a'.", force=True)
|
|
1324
|
+
return initial_guess # Fallback
|
|
1325
|
+
|
|
1326
|
+
# We don't calculate f(b) because it's +infinity.
|
|
1327
|
+
# We know the root is in [a, b).
|
|
1328
|
+
|
|
1329
|
+
# Start iteration from the best initial guess
|
|
1330
|
+
lambda_k = initial_guess
|
|
1331
|
+
if lambda_k <= a or lambda_k >= b:
|
|
1332
|
+
lambda_k = (a + b) / 2.0 # Fallback to bisection if guess is out of bounds
|
|
1333
|
+
|
|
1334
|
+
self.log(f" Safeguard Solver: Starting search in [{a:.6e}, {b:.6e}]")
|
|
1335
|
+
|
|
1336
|
+
max_iterations = 50
|
|
1337
|
+
# Use a tolerance relative to the pole
|
|
1338
|
+
tolerance = (1e-10 * abs(lambda_min_asymptote)) + 1e-12
|
|
1339
|
+
|
|
1340
|
+
for iteration in range(max_iterations):
|
|
1341
|
+
f_lambda = f_secular(lambda_k)
|
|
1342
|
+
|
|
1343
|
+
# Check convergence
|
|
1344
|
+
if abs(f_lambda) < tolerance:
|
|
1345
|
+
self.log(f" Safeguard Solver: Converged in {iteration + 1} iterations", force=True)
|
|
1346
|
+
self.log(f" Final: lambda_aug={lambda_k:.6e}, f(λ)={f_lambda:.2e}")
|
|
1347
|
+
return lambda_k
|
|
1348
|
+
|
|
1349
|
+
f_prime_lambda = f_prime_secular(lambda_k)
|
|
1350
|
+
|
|
1351
|
+
# --- Calculate Newton Step ---
|
|
1352
|
+
delta_newton = 0.0
|
|
1353
|
+
if abs(f_prime_lambda) > 1e-20:
|
|
1354
|
+
delta_newton = -f_lambda / f_prime_lambda
|
|
1355
|
+
else:
|
|
1356
|
+
self.log(f" Warning: f'(λ) too small. Switching to bisection.")
|
|
1357
|
+
|
|
1358
|
+
lambda_newton = lambda_k + delta_newton
|
|
1359
|
+
|
|
1360
|
+
# --- Calculate Bisection Step ---
|
|
1361
|
+
lambda_bisection = (a + b) / 2.0
|
|
1362
|
+
|
|
1363
|
+
# --- Safeguard Check ---
|
|
1364
|
+
# Is the Newton step safe (i.e., within the bracket [a, b])?
|
|
1365
|
+
if (delta_newton != 0.0) and (lambda_newton > a) and (lambda_newton < b):
|
|
1366
|
+
# Yes: Use Newton step
|
|
1367
|
+
lambda_k_next = lambda_newton
|
|
1368
|
+
if self.debug_mode:
|
|
1369
|
+
self.log(f" Iter {iteration:2d} (Newton): λ={lambda_k_next:.6e}")
|
|
1370
|
+
else:
|
|
1371
|
+
# No: Use safe bisection step
|
|
1372
|
+
lambda_k_next = lambda_bisection
|
|
1373
|
+
if self.debug_mode:
|
|
1374
|
+
self.log(f" Iter {iteration:2d} (Bisection): λ={lambda_k_next:.6e}")
|
|
1375
|
+
|
|
1376
|
+
# --- Update Bracket [a, b] for next iteration ---
|
|
1377
|
+
# (This is the key to safety)
|
|
1378
|
+
if f_lambda > 0:
|
|
1379
|
+
# Root is to the left, new upper bound is current lambda
|
|
1380
|
+
b = lambda_k
|
|
1381
|
+
else:
|
|
1382
|
+
# Root is to the right, new lower bound is current lambda
|
|
1383
|
+
a = lambda_k
|
|
1384
|
+
|
|
1385
|
+
lambda_k = lambda_k_next
|
|
1386
|
+
|
|
1387
|
+
# Check if bracket is too small
|
|
1388
|
+
if abs(b - a) < tolerance:
|
|
1389
|
+
self.log(f" Safeguard Solver: Bracket converged", force=True)
|
|
1390
|
+
return (a + b) / 2.0
|
|
1391
|
+
|
|
1392
|
+
else:
|
|
1393
|
+
# Max iterations reached
|
|
1394
|
+
self.log(f"Warning: Safeguard Solver did not converge in {max_iterations} iterations", force=True)
|
|
1395
|
+
return (a + b) / 2.0 # Return the center of the last known bracket
|
|
1396
|
+
|
|
1397
|
+
def _solve_secular_more_sorensen(self, eigvals, grad_comps, alpha):
|
|
1398
|
+
"""
|
|
1399
|
+
[MODIFIED] Robust solver for the RFO secular equation with fallback.
|
|
1400
|
+
|
|
1401
|
+
Attempts to find the smallest root (lambda_aug) of the secular equation
|
|
1402
|
+
using brentq first for maximum robustness. If brentq fails (e.g.,
|
|
1403
|
+
cannot establish a bracket), it falls back to the Moré-Sorensen
|
|
1404
|
+
(Newton-style) solver.
|
|
1405
|
+
|
|
1406
|
+
Secular equation:
|
|
1407
|
+
f(λ) = λ + Σ_i [g_i'^2 / (λ_i' - λ)] = 0
|
|
1408
|
+
|
|
1409
|
+
Where: λ_i' = λ_i/α, g_i' = g_i/α
|
|
1410
|
+
|
|
1411
|
+
Parameters:
|
|
1412
|
+
eigvals: np.ndarray (sorted ascending)
|
|
1413
|
+
grad_comps: np.ndarray
|
|
1414
|
+
alpha: float
|
|
1415
|
+
|
|
1416
|
+
Returns:
|
|
1417
|
+
lambda_aug: float (smallest root)
|
|
1418
|
+
"""
|
|
1419
|
+
|
|
1420
|
+
# Define the secular function and its derivative
|
|
1421
|
+
def f_secular(lmd):
|
|
1422
|
+
denominators = eigvals_prime - lmd
|
|
1423
|
+
# Safety for division
|
|
1424
|
+
safe_denoms = np.where(
|
|
1425
|
+
np.abs(denominators) < 1e-30,
|
|
1426
|
+
np.sign(denominators) * 1e-30,
|
|
1427
|
+
denominators
|
|
1428
|
+
)
|
|
1429
|
+
safe_denoms[safe_denoms == 0] = 1e-30 # Handle exact zeros
|
|
1430
|
+
terms_f = grad_comps_prime_sq / safe_denoms
|
|
1431
|
+
return lmd + np.sum(terms_f)
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
# 1. Scale values
|
|
1436
|
+
eigvals_prime = eigvals / alpha
|
|
1437
|
+
grad_comps_prime = grad_comps / alpha
|
|
1438
|
+
grad_comps_prime_sq = grad_comps_prime**2
|
|
1439
|
+
|
|
1440
|
+
# 2. Find the first asymptote (smallest λ_i') where g_i' is non-zero
|
|
1441
|
+
lambda_min_asymptote = None
|
|
1442
|
+
g_norm_sq = 0.0
|
|
1443
|
+
|
|
1444
|
+
for i in range(len(eigvals_prime)):
|
|
1445
|
+
g_sq = grad_comps_prime_sq[i]
|
|
1446
|
+
g_norm_sq += g_sq
|
|
1447
|
+
|
|
1448
|
+
if lambda_min_asymptote is None and g_sq > 1e-20:
|
|
1449
|
+
lambda_min_asymptote = eigvals_prime[i]
|
|
1450
|
+
|
|
1451
|
+
if lambda_min_asymptote is None:
|
|
1452
|
+
# Hard case: All gradient components are zero
|
|
1453
|
+
self.log("Hard case detected: All gradient components are zero.", force=True)
|
|
1454
|
+
return eigvals_prime[0]
|
|
1455
|
+
|
|
1456
|
+
# 3. Initial guess (Baker, JCC 1986, Eq. 15)
|
|
1457
|
+
lambda_initial_guess = 0.5 * (lambda_min_asymptote - np.sqrt(max(0.0, lambda_min_asymptote**2 + 4 * g_norm_sq)))
|
|
1458
|
+
|
|
1459
|
+
# 4. Call the dedicated solver
|
|
1460
|
+
try:
|
|
1461
|
+
lambda_aug = self._solve_secular_safeguarded(
|
|
1462
|
+
eigvals_prime,
|
|
1463
|
+
grad_comps_prime_sq,
|
|
1464
|
+
lambda_min_asymptote,
|
|
1465
|
+
lambda_initial_guess
|
|
1466
|
+
)
|
|
1467
|
+
return lambda_aug
|
|
1468
|
+
|
|
1469
|
+
except Exception as e:
|
|
1470
|
+
self.log(f"CRITICAL ERROR in _solve_secular_safeguarded: {e}", force=True)
|
|
1471
|
+
self.log("Falling back to initial guess as last resort.", force=True)
|
|
1472
|
+
|
|
1473
|
+
# --- Primary Strategy: brentq ---
|
|
1474
|
+
try:
|
|
1475
|
+
self.log(f"Normal case: solving RFO secular equation f(λ)=0 using brentq (Primary)")
|
|
1476
|
+
self.log(f"First asymptote (lambda_min_asymptote) = {lambda_min_asymptote:.6e}")
|
|
1477
|
+
|
|
1478
|
+
# --- Establish bracket [a, b] ---
|
|
1479
|
+
|
|
1480
|
+
# b (upper bound) is just below the asymptote, f(b) should be large positive
|
|
1481
|
+
b_margin = max(1e-12, np.abs(lambda_min_asymptote) * 1e-10)
|
|
1482
|
+
b = lambda_min_asymptote - b_margin
|
|
1483
|
+
f_b = f_secular(b)
|
|
1484
|
+
|
|
1485
|
+
if f_b < 0:
|
|
1486
|
+
self.log(f" Warning: f(b) < 0 at {b:.6e}. Evaluating at asymptote limit.")
|
|
1487
|
+
b = lambda_min_asymptote
|
|
1488
|
+
f_b = f_secular(b) # This will be large and positive due to safe_denoms
|
|
1489
|
+
|
|
1490
|
+
# a (lower bound), f(a) must be < 0
|
|
1491
|
+
a = lambda_initial_guess
|
|
1492
|
+
f_a = f_secular(a)
|
|
1493
|
+
|
|
1494
|
+
search_limit = 10
|
|
1495
|
+
while f_a > 0 and search_limit > 0:
|
|
1496
|
+
self.log(f" brentq bracket search: f(a) > 0 at a={a:.6e}. Stepping back.")
|
|
1497
|
+
step_back = max(g_norm, np.abs(a) * 0.1, 1e-8) # Ensure step back is non-zero
|
|
1498
|
+
a = a - step_back
|
|
1499
|
+
f_a = f_secular(a)
|
|
1500
|
+
search_limit -= 1
|
|
1501
|
+
|
|
1502
|
+
if f_a * f_b >= 0:
|
|
1503
|
+
# Failed to find a bracket
|
|
1504
|
+
self.log(f" Error: Could not establish a bracket for brentq. [a,b]=[{a:.2e},{b:.2e}], [f(a),f(b)]=[{f_a:.2e},{f_b:.2e}]", force=True)
|
|
1505
|
+
# This will raise an exception, triggering the fallback
|
|
1506
|
+
raise ValueError("brentq bracketing failed")
|
|
1507
|
+
|
|
1508
|
+
self.log(f" brentq bracket established: [a, b] = [{a:.6e}, {b:.6e}], [f(a), f(b)] = [{f_a:.2e}, {f_b:.2e}]")
|
|
1509
|
+
|
|
1510
|
+
# Use brentq to find the root
|
|
1511
|
+
lambda_aug_brent = brentq(f_secular, a, b, xtol=1e-10, rtol=1e-10, maxiter=100)
|
|
1512
|
+
|
|
1513
|
+
self.log(f" brentq solver converged: lambda_aug = {lambda_aug_brent:.6e}", force=True)
|
|
1514
|
+
return lambda_aug_brent # Return the successful brentq result
|
|
1515
|
+
|
|
1516
|
+
except Exception as e:
|
|
1517
|
+
self.log(f"brentq solver failed ({str(e)}). Falling back to Moré-Sorensen (Newton) solver.", force=True)
|
|
1518
|
+
|
|
1519
|
+
# --- Fallback Strategy: Moré-Sorensen (Newton) ---
|
|
1520
|
+
# (This logic is from the original file, lines 1445-1502, with English comments)
|
|
1521
|
+
|
|
1522
|
+
lambda_aug = lambda_initial_guess
|
|
1523
|
+
self.log(f"Fallback (Newton): Initial lambda_aug guess = {lambda_aug:.6e}")
|
|
1524
|
+
|
|
1525
|
+
max_iterations = 50
|
|
1526
|
+
tolerance = (1e-10 * abs(lambda_min_asymptote)) + 1e-12
|
|
1527
|
+
|
|
1528
|
+
for iteration in range(max_iterations):
|
|
1529
|
+
# Denominators (λ_i' - λ)
|
|
1530
|
+
denominators = eigvals_prime - lambda_aug
|
|
1531
|
+
|
|
1532
|
+
# Safe denominators
|
|
1533
|
+
safe_denoms = np.where(
|
|
1534
|
+
np.abs(denominators) < 1e-30,
|
|
1535
|
+
np.sign(denominators) * 1e-30,
|
|
1536
|
+
denominators
|
|
1537
|
+
)
|
|
1538
|
+
safe_denoms[safe_denoms == 0] = 1e-30 # Handle exact zeros
|
|
1539
|
+
|
|
1540
|
+
# f(λ) and f'(λ)
|
|
1541
|
+
terms_f = grad_comps_prime_sq / safe_denoms
|
|
1542
|
+
terms_f_prime = grad_comps_prime_sq / (safe_denoms**2)
|
|
1543
|
+
|
|
1544
|
+
f_lambda = lambda_aug + np.sum(terms_f)
|
|
1545
|
+
f_prime_lambda = 1.0 + np.sum(terms_f_prime)
|
|
1546
|
+
|
|
1547
|
+
# Check convergence
|
|
1548
|
+
if abs(f_lambda) < tolerance:
|
|
1549
|
+
self.log(f"RFO Newton (Fallback) converged in {iteration + 1} iterations", force=True)
|
|
1550
|
+
self.log(f"Final: lambda_aug={lambda_aug:.6e}, f(λ)={f_lambda:.2e}")
|
|
1551
|
+
break
|
|
1552
|
+
|
|
1553
|
+
if abs(f_prime_lambda) < 1e-20:
|
|
1554
|
+
self.log(f"Warning: f'(λ) too small ({f_prime_lambda:.2e}) at iteration {iteration}", force=True)
|
|
1555
|
+
break
|
|
1556
|
+
|
|
1557
|
+
# Newton update
|
|
1558
|
+
delta_lambda = -f_lambda / f_prime_lambda
|
|
1559
|
+
|
|
1560
|
+
lambda_aug_old = lambda_aug
|
|
1561
|
+
lambda_aug += delta_lambda
|
|
1562
|
+
|
|
1563
|
+
# Safeguard: must stay below the asymptote
|
|
1564
|
+
if lambda_aug >= lambda_min_asymptote:
|
|
1565
|
+
self.log(f"Warning: lambda_aug ({lambda_aug:.6e}) >= asymptote ({lambda_min_asymptote:.6e}), adjusting")
|
|
1566
|
+
lambda_aug = 0.5 * (lambda_aug_old + lambda_min_asymptote)
|
|
1567
|
+
|
|
1568
|
+
if self.debug_mode:
|
|
1569
|
+
self.log(f" Iter {iteration:2d}: λ={lambda_aug:.6e}, f(λ)={f_lambda:.2e}, "
|
|
1570
|
+
f"f'(λ)={f_prime_lambda:.2e}, Δλ={delta_lambda:.2e}")
|
|
1571
|
+
|
|
1572
|
+
else:
|
|
1573
|
+
# Max iterations reached for Newton
|
|
1574
|
+
self.log(f"Warning: RFO Newton (Fallback) did not converge in {max_iterations} iterations", force=True)
|
|
1575
|
+
self.log(f"Final residual f(λ): {f_lambda:.2e}. Using last value.", force=True)
|
|
1576
|
+
|
|
1577
|
+
# Return the result from the Newton solver (even if it didn't converge, it's the best guess)
|
|
1578
|
+
return lambda_aug
|
|
1579
|
+
|
|
1580
|
+
def solve_rfo(self, eigvals, gradient_components, alpha, mode="min"):
|
|
1581
|
+
"""
|
|
1582
|
+
Solve the RFO equations to get the step using the O(N) secular equation.
|
|
1583
|
+
"""
|
|
1584
|
+
if mode != "min":
|
|
1585
|
+
raise NotImplementedError("Secular equation solver is only implemented for RFO minimization (mode='min')")
|
|
1586
|
+
|
|
1587
|
+
# 1. Find the smallest eigenvalue (lambda_aug) of the augmented Hessian
|
|
1588
|
+
eigval_min = self._solve_secular_more_sorensen(eigvals, gradient_components, alpha)
|
|
1589
|
+
|
|
1590
|
+
# 2. Calculate the step components directly. This is O(N).
|
|
1591
|
+
denominators = (eigvals / alpha) - eigval_min
|
|
1592
|
+
|
|
1593
|
+
# Safety for division
|
|
1594
|
+
safe_denoms = np.where(
|
|
1595
|
+
np.abs(denominators) < 1e-20,
|
|
1596
|
+
np.sign(denominators) * 1e-20,
|
|
1597
|
+
denominators
|
|
1598
|
+
)
|
|
1599
|
+
|
|
1600
|
+
# Handle exact zeros that slipped through (e.g., in the 'hard case')
|
|
1601
|
+
safe_denoms[safe_denoms == 0] = 1e-20
|
|
1602
|
+
|
|
1603
|
+
# Calculate step s_i = -(g_i/alpha) / (denominators)
|
|
1604
|
+
step = -(gradient_components / alpha) / safe_denoms
|
|
1605
|
+
|
|
1606
|
+
# Return dummy values for nu and eigvec, as they are no longer computed
|
|
1607
|
+
return step, eigval_min, 1.0, None
|
|
1608
|
+
|
|
1609
|
+
def rfo_model(self, gradient, hessian, step):
|
|
1610
|
+
"""Estimate energy change based on RFO model"""
|
|
1611
|
+
# Use more efficient matrix operations
|
|
1612
|
+
return np.dot(gradient, step) + 0.5 * np.dot(np.dot(step, hessian), step)
|
|
1613
|
+
|
|
1614
|
+
def is_converged(self):
|
|
1615
|
+
"""Check if optimization has converged"""
|
|
1616
|
+
return self.converged
|
|
1617
|
+
|
|
1618
|
+
def get_predicted_energy_changes(self):
|
|
1619
|
+
"""Get the history of predicted energy changes"""
|
|
1620
|
+
return self.predicted_energy_changes
|
|
1621
|
+
|
|
1622
|
+
def get_actual_energy_changes(self):
|
|
1623
|
+
"""Get the history of actual energy changes"""
|
|
1624
|
+
return self.actual_energy_changes
|
|
1625
|
+
|
|
1626
|
+
def set_hessian(self, hessian):
|
|
1627
|
+
"""Set the Hessian matrix"""
|
|
1628
|
+
self.hessian = hessian
|
|
1629
|
+
return
|
|
1630
|
+
|
|
1631
|
+
def set_bias_hessian(self, bias_hessian):
|
|
1632
|
+
"""Set the bias Hessian matrix"""
|
|
1633
|
+
self.bias_hessian = bias_hessian
|
|
1634
|
+
return
|
|
1635
|
+
|
|
1636
|
+
def get_hessian(self):
|
|
1637
|
+
"""Get the current Hessian matrix"""
|
|
1638
|
+
return self.hessian
|
|
1639
|
+
|
|
1640
|
+
def get_bias_hessian(self):
|
|
1641
|
+
"""Get the current bias Hessian matrix"""
|
|
1642
|
+
return self.bias_hessian
|
|
1643
|
+
|
|
1644
|
+
def reset_trust_radius(self):
|
|
1645
|
+
"""Reset trust radius to its initial value"""
|
|
1646
|
+
self.trust_radius = self.trust_radius_initial
|
|
1647
|
+
self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}", force=True)
|