MultiOptPy 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/Calculator/__init__.py +0 -0
- multioptpy/Calculator/ase_calculation_tools.py +424 -0
- multioptpy/Calculator/ase_tools/__init__.py +0 -0
- multioptpy/Calculator/ase_tools/fairchem.py +28 -0
- multioptpy/Calculator/ase_tools/gamess.py +19 -0
- multioptpy/Calculator/ase_tools/gaussian.py +165 -0
- multioptpy/Calculator/ase_tools/mace.py +28 -0
- multioptpy/Calculator/ase_tools/mopac.py +19 -0
- multioptpy/Calculator/ase_tools/nwchem.py +31 -0
- multioptpy/Calculator/ase_tools/orca.py +22 -0
- multioptpy/Calculator/ase_tools/pygfn0.py +37 -0
- multioptpy/Calculator/dxtb_calculation_tools.py +344 -0
- multioptpy/Calculator/emt_calculation_tools.py +458 -0
- multioptpy/Calculator/gpaw_calculation_tools.py +183 -0
- multioptpy/Calculator/lj_calculation_tools.py +314 -0
- multioptpy/Calculator/psi4_calculation_tools.py +334 -0
- multioptpy/Calculator/pwscf_calculation_tools.py +189 -0
- multioptpy/Calculator/pyscf_calculation_tools.py +327 -0
- multioptpy/Calculator/sqm1_calculation_tools.py +611 -0
- multioptpy/Calculator/sqm2_calculation_tools.py +376 -0
- multioptpy/Calculator/tblite_calculation_tools.py +352 -0
- multioptpy/Calculator/tersoff_calculation_tools.py +818 -0
- multioptpy/Constraint/__init__.py +0 -0
- multioptpy/Constraint/constraint_condition.py +834 -0
- multioptpy/Coordinate/__init__.py +0 -0
- multioptpy/Coordinate/polar_coordinate.py +199 -0
- multioptpy/Coordinate/redundant_coordinate.py +638 -0
- multioptpy/IRC/__init__.py +0 -0
- multioptpy/IRC/converge_criteria.py +28 -0
- multioptpy/IRC/dvv.py +544 -0
- multioptpy/IRC/euler.py +439 -0
- multioptpy/IRC/hpc.py +564 -0
- multioptpy/IRC/lqa.py +540 -0
- multioptpy/IRC/modekill.py +662 -0
- multioptpy/IRC/rk4.py +579 -0
- multioptpy/Interpolation/__init__.py +0 -0
- multioptpy/Interpolation/adaptive_interpolation.py +283 -0
- multioptpy/Interpolation/binomial_interpolation.py +179 -0
- multioptpy/Interpolation/geodesic_interpolation.py +785 -0
- multioptpy/Interpolation/interpolation.py +156 -0
- multioptpy/Interpolation/linear_interpolation.py +473 -0
- multioptpy/Interpolation/savitzky_golay_interpolation.py +252 -0
- multioptpy/Interpolation/spline_interpolation.py +353 -0
- multioptpy/MD/__init__.py +0 -0
- multioptpy/MD/thermostat.py +185 -0
- multioptpy/MEP/__init__.py +0 -0
- multioptpy/MEP/pathopt_bneb_force.py +443 -0
- multioptpy/MEP/pathopt_dmf_force.py +448 -0
- multioptpy/MEP/pathopt_dneb_force.py +130 -0
- multioptpy/MEP/pathopt_ewbneb_force.py +207 -0
- multioptpy/MEP/pathopt_gpneb_force.py +512 -0
- multioptpy/MEP/pathopt_lup_force.py +113 -0
- multioptpy/MEP/pathopt_neb_force.py +225 -0
- multioptpy/MEP/pathopt_nesb_force.py +205 -0
- multioptpy/MEP/pathopt_om_force.py +153 -0
- multioptpy/MEP/pathopt_qsm_force.py +174 -0
- multioptpy/MEP/pathopt_qsmv2_force.py +304 -0
- multioptpy/ModelFunction/__init__.py +7 -0
- multioptpy/ModelFunction/avoiding_model_function.py +29 -0
- multioptpy/ModelFunction/binary_image_ts_search_model_function.py +47 -0
- multioptpy/ModelFunction/conical_model_function.py +26 -0
- multioptpy/ModelFunction/opt_meci.py +50 -0
- multioptpy/ModelFunction/opt_mesx.py +47 -0
- multioptpy/ModelFunction/opt_mesx_2.py +49 -0
- multioptpy/ModelFunction/seam_model_function.py +27 -0
- multioptpy/ModelHessian/__init__.py +0 -0
- multioptpy/ModelHessian/approx_hessian.py +147 -0
- multioptpy/ModelHessian/calc_params.py +227 -0
- multioptpy/ModelHessian/fischer.py +236 -0
- multioptpy/ModelHessian/fischerd3.py +360 -0
- multioptpy/ModelHessian/fischerd4.py +398 -0
- multioptpy/ModelHessian/gfn0xtb.py +633 -0
- multioptpy/ModelHessian/gfnff.py +709 -0
- multioptpy/ModelHessian/lindh.py +165 -0
- multioptpy/ModelHessian/lindh2007d2.py +707 -0
- multioptpy/ModelHessian/lindh2007d3.py +822 -0
- multioptpy/ModelHessian/lindh2007d4.py +1030 -0
- multioptpy/ModelHessian/morse.py +106 -0
- multioptpy/ModelHessian/schlegel.py +144 -0
- multioptpy/ModelHessian/schlegeld3.py +322 -0
- multioptpy/ModelHessian/schlegeld4.py +559 -0
- multioptpy/ModelHessian/shortrange.py +346 -0
- multioptpy/ModelHessian/swartd2.py +496 -0
- multioptpy/ModelHessian/swartd3.py +706 -0
- multioptpy/ModelHessian/swartd4.py +918 -0
- multioptpy/ModelHessian/tshess.py +40 -0
- multioptpy/Optimizer/QHAdam.py +61 -0
- multioptpy/Optimizer/__init__.py +0 -0
- multioptpy/Optimizer/abc_fire.py +83 -0
- multioptpy/Optimizer/adabelief.py +58 -0
- multioptpy/Optimizer/adabound.py +68 -0
- multioptpy/Optimizer/adadelta.py +65 -0
- multioptpy/Optimizer/adaderivative.py +56 -0
- multioptpy/Optimizer/adadiff.py +68 -0
- multioptpy/Optimizer/adafactor.py +70 -0
- multioptpy/Optimizer/adam.py +65 -0
- multioptpy/Optimizer/adamax.py +62 -0
- multioptpy/Optimizer/adamod.py +83 -0
- multioptpy/Optimizer/adamw.py +65 -0
- multioptpy/Optimizer/adiis.py +523 -0
- multioptpy/Optimizer/afire_neb.py +282 -0
- multioptpy/Optimizer/block_hessian_update.py +709 -0
- multioptpy/Optimizer/c2diis.py +491 -0
- multioptpy/Optimizer/component_wise_scaling.py +405 -0
- multioptpy/Optimizer/conjugate_gradient.py +82 -0
- multioptpy/Optimizer/conjugate_gradient_neb.py +345 -0
- multioptpy/Optimizer/coordinate_locking.py +405 -0
- multioptpy/Optimizer/dic_rsirfo.py +1015 -0
- multioptpy/Optimizer/ediis.py +417 -0
- multioptpy/Optimizer/eve.py +76 -0
- multioptpy/Optimizer/fastadabelief.py +61 -0
- multioptpy/Optimizer/fire.py +77 -0
- multioptpy/Optimizer/fire2.py +249 -0
- multioptpy/Optimizer/fire_neb.py +92 -0
- multioptpy/Optimizer/gan_step.py +486 -0
- multioptpy/Optimizer/gdiis.py +609 -0
- multioptpy/Optimizer/gediis.py +203 -0
- multioptpy/Optimizer/geodesic_step.py +433 -0
- multioptpy/Optimizer/gpmin.py +633 -0
- multioptpy/Optimizer/gpr_step.py +364 -0
- multioptpy/Optimizer/gradientdescent.py +78 -0
- multioptpy/Optimizer/gradientdescent_neb.py +52 -0
- multioptpy/Optimizer/hessian_update.py +433 -0
- multioptpy/Optimizer/hybrid_rfo.py +998 -0
- multioptpy/Optimizer/kdiis.py +625 -0
- multioptpy/Optimizer/lars.py +21 -0
- multioptpy/Optimizer/lbfgs.py +253 -0
- multioptpy/Optimizer/lbfgs_neb.py +355 -0
- multioptpy/Optimizer/linesearch.py +236 -0
- multioptpy/Optimizer/lookahead.py +40 -0
- multioptpy/Optimizer/nadam.py +64 -0
- multioptpy/Optimizer/newton.py +200 -0
- multioptpy/Optimizer/prodigy.py +70 -0
- multioptpy/Optimizer/purtubation.py +16 -0
- multioptpy/Optimizer/quickmin_neb.py +245 -0
- multioptpy/Optimizer/radam.py +75 -0
- multioptpy/Optimizer/rfo_neb.py +302 -0
- multioptpy/Optimizer/ric_rfo.py +842 -0
- multioptpy/Optimizer/rl_step.py +627 -0
- multioptpy/Optimizer/rmspropgrave.py +65 -0
- multioptpy/Optimizer/rsirfo.py +1647 -0
- multioptpy/Optimizer/rsprfo.py +1056 -0
- multioptpy/Optimizer/sadam.py +60 -0
- multioptpy/Optimizer/samsgrad.py +63 -0
- multioptpy/Optimizer/tr_lbfgs.py +678 -0
- multioptpy/Optimizer/trim.py +273 -0
- multioptpy/Optimizer/trust_radius.py +207 -0
- multioptpy/Optimizer/trust_radius_neb.py +121 -0
- multioptpy/Optimizer/yogi.py +60 -0
- multioptpy/OtherMethod/__init__.py +0 -0
- multioptpy/OtherMethod/addf.py +1150 -0
- multioptpy/OtherMethod/dimer.py +895 -0
- multioptpy/OtherMethod/elastic_image_pair.py +629 -0
- multioptpy/OtherMethod/modelfunction.py +456 -0
- multioptpy/OtherMethod/newton_traj.py +454 -0
- multioptpy/OtherMethod/twopshs.py +1095 -0
- multioptpy/PESAnalyzer/__init__.py +0 -0
- multioptpy/PESAnalyzer/calc_irc_curvature.py +125 -0
- multioptpy/PESAnalyzer/cmds_analysis.py +152 -0
- multioptpy/PESAnalyzer/koopman_analysis.py +268 -0
- multioptpy/PESAnalyzer/pca_analysis.py +314 -0
- multioptpy/Parameters/__init__.py +0 -0
- multioptpy/Parameters/atomic_mass.py +20 -0
- multioptpy/Parameters/atomic_number.py +22 -0
- multioptpy/Parameters/covalent_radii.py +44 -0
- multioptpy/Parameters/d2.py +61 -0
- multioptpy/Parameters/d3.py +63 -0
- multioptpy/Parameters/d4.py +103 -0
- multioptpy/Parameters/dreiding.py +34 -0
- multioptpy/Parameters/gfn0xtb_param.py +137 -0
- multioptpy/Parameters/gfnff_param.py +315 -0
- multioptpy/Parameters/gnb.py +104 -0
- multioptpy/Parameters/parameter.py +22 -0
- multioptpy/Parameters/uff.py +72 -0
- multioptpy/Parameters/unit_values.py +20 -0
- multioptpy/Potential/AFIR_potential.py +55 -0
- multioptpy/Potential/LJ_repulsive_potential.py +345 -0
- multioptpy/Potential/__init__.py +0 -0
- multioptpy/Potential/anharmonic_keep_potential.py +28 -0
- multioptpy/Potential/asym_elllipsoidal_potential.py +718 -0
- multioptpy/Potential/electrostatic_potential.py +69 -0
- multioptpy/Potential/flux_potential.py +30 -0
- multioptpy/Potential/gaussian_potential.py +101 -0
- multioptpy/Potential/idpp.py +516 -0
- multioptpy/Potential/keep_angle_potential.py +146 -0
- multioptpy/Potential/keep_dihedral_angle_potential.py +105 -0
- multioptpy/Potential/keep_outofplain_angle_potential.py +70 -0
- multioptpy/Potential/keep_potential.py +99 -0
- multioptpy/Potential/mechano_force_potential.py +74 -0
- multioptpy/Potential/nanoreactor_potential.py +52 -0
- multioptpy/Potential/potential.py +896 -0
- multioptpy/Potential/spacer_model_potential.py +221 -0
- multioptpy/Potential/switching_potential.py +258 -0
- multioptpy/Potential/universal_potential.py +34 -0
- multioptpy/Potential/value_range_potential.py +36 -0
- multioptpy/Potential/void_point_potential.py +25 -0
- multioptpy/SQM/__init__.py +0 -0
- multioptpy/SQM/sqm1/__init__.py +0 -0
- multioptpy/SQM/sqm1/sqm1_core.py +1792 -0
- multioptpy/SQM/sqm2/__init__.py +0 -0
- multioptpy/SQM/sqm2/calc_tools.py +95 -0
- multioptpy/SQM/sqm2/sqm2_basis.py +850 -0
- multioptpy/SQM/sqm2/sqm2_bond.py +119 -0
- multioptpy/SQM/sqm2/sqm2_core.py +303 -0
- multioptpy/SQM/sqm2/sqm2_data.py +1229 -0
- multioptpy/SQM/sqm2/sqm2_disp.py +65 -0
- multioptpy/SQM/sqm2/sqm2_eeq.py +243 -0
- multioptpy/SQM/sqm2/sqm2_overlapint.py +704 -0
- multioptpy/SQM/sqm2/sqm2_qm.py +578 -0
- multioptpy/SQM/sqm2/sqm2_rep.py +66 -0
- multioptpy/SQM/sqm2/sqm2_srb.py +70 -0
- multioptpy/Thermo/__init__.py +0 -0
- multioptpy/Thermo/normal_mode_analyzer.py +865 -0
- multioptpy/Utils/__init__.py +0 -0
- multioptpy/Utils/bond_connectivity.py +264 -0
- multioptpy/Utils/calc_tools.py +884 -0
- multioptpy/Utils/oniom.py +96 -0
- multioptpy/Utils/pbc.py +48 -0
- multioptpy/Utils/riemann_curvature.py +208 -0
- multioptpy/Utils/symmetry_analyzer.py +482 -0
- multioptpy/Visualization/__init__.py +0 -0
- multioptpy/Visualization/visualization.py +156 -0
- multioptpy/WFAnalyzer/MO_analysis.py +104 -0
- multioptpy/WFAnalyzer/__init__.py +0 -0
- multioptpy/Wrapper/__init__.py +0 -0
- multioptpy/Wrapper/autots.py +1239 -0
- multioptpy/Wrapper/ieip_wrapper.py +93 -0
- multioptpy/Wrapper/md_wrapper.py +92 -0
- multioptpy/Wrapper/neb_wrapper.py +94 -0
- multioptpy/Wrapper/optimize_wrapper.py +76 -0
- multioptpy/__init__.py +5 -0
- multioptpy/entrypoints.py +916 -0
- multioptpy/fileio.py +660 -0
- multioptpy/ieip.py +340 -0
- multioptpy/interface.py +1086 -0
- multioptpy/irc.py +529 -0
- multioptpy/moleculardynamics.py +432 -0
- multioptpy/neb.py +1267 -0
- multioptpy/optimization.py +1553 -0
- multioptpy/optimizer.py +709 -0
- multioptpy-1.20.2.dist-info/METADATA +438 -0
- multioptpy-1.20.2.dist-info/RECORD +246 -0
- multioptpy-1.20.2.dist-info/WHEEL +5 -0
- multioptpy-1.20.2.dist-info/entry_points.txt +9 -0
- multioptpy-1.20.2.dist-info/licenses/LICENSE +674 -0
- multioptpy-1.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1056 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.linalg import norm
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
from multioptpy.Optimizer.hessian_update import ModelHessianUpdate
|
|
6
|
+
from multioptpy.Optimizer.block_hessian_update import BlockHessianUpdate
|
|
7
|
+
from multioptpy.Utils.calc_tools import Calculationtools
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EnhancedRSPRFO:
|
|
11
|
+
def __init__(self, **config):
|
|
12
|
+
"""
|
|
13
|
+
Enhanced Rational Step P-RFO (Rational Function Optimization) for transition state searches
|
|
14
|
+
with dynamic trust radius adjustment based on trust region methodology
|
|
15
|
+
|
|
16
|
+
References:
|
|
17
|
+
[1] Banerjee et al., Phys. Chem., 89, 52-57 (1985)
|
|
18
|
+
[2] Heyden et al., J. Chem. Phys., 123, 224101 (2005)
|
|
19
|
+
[3] Baker, J. Comput. Chem., 7, 385-395 (1986)
|
|
20
|
+
[4] Besalú and Bofill, Theor. Chem. Acc., 100, 265-274 (1998)
|
|
21
|
+
[5] Jensen and Jørgensen, J. Chem. Phys., 80, 1204 (1984) [Eigenvector following]
|
|
22
|
+
[6] Yuan, SIAM J. Optim. 11, 325-357 (2000) [Trust region methods]
|
|
23
|
+
|
|
24
|
+
This code is made based on the below codes.
|
|
25
|
+
1, https://github.com/eljost/pysisyphus/blob/master/pysisyphus/tsoptimizers/RSPRFOptimizer.py
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
# Standard RSPRFO parameters
|
|
30
|
+
self.alpha0 = config.get("alpha0", 1.0)
|
|
31
|
+
self.max_micro_cycles = config.get("max_micro_cycles", 20) # Increased from 1 to 20
|
|
32
|
+
self.saddle_order = config.get("saddle_order", 1)
|
|
33
|
+
self.hessian_update_method = config.get("method", "auto")
|
|
34
|
+
self.display_flag = config.get("display_flag", True)
|
|
35
|
+
self.debug = config.get("debug", False)
|
|
36
|
+
|
|
37
|
+
# Alpha constraints to prevent numerical instability
|
|
38
|
+
self.alpha_max = config.get("alpha_max", 1e6)
|
|
39
|
+
self.alpha_step_max = config.get("alpha_step_max", 10.0)
|
|
40
|
+
|
|
41
|
+
# Trust region parameters
|
|
42
|
+
if self.saddle_order == 0:
|
|
43
|
+
self.trust_radius_initial = config.get("trust_radius", 0.5)
|
|
44
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.5) # Upper bound (delta_hat)
|
|
45
|
+
else:
|
|
46
|
+
self.trust_radius_initial = config.get("trust_radius", 0.1)
|
|
47
|
+
self.trust_radius_max = config.get("trust_radius_max", 0.1) # Upper bound for TS search
|
|
48
|
+
|
|
49
|
+
self.trust_radius = self.trust_radius_initial # Current trust radius (delta_tr)
|
|
50
|
+
self.trust_radius_min = config.get("trust_radius_min", 0.01) # Lower bound (delta_min)
|
|
51
|
+
|
|
52
|
+
# Trust region acceptance thresholds
|
|
53
|
+
self.accept_poor_threshold = config.get("accept_poor_threshold", 0.25) # Threshold for poor steps
|
|
54
|
+
self.accept_good_threshold = config.get("accept_good_threshold", 0.75) # Threshold for very good steps
|
|
55
|
+
self.shrink_factor = config.get("shrink_factor", 0.50) # Factor to shrink trust radius
|
|
56
|
+
self.expand_factor = config.get("expand_factor", 2.00) # Factor to expand trust radius
|
|
57
|
+
self.rtol_boundary = config.get("rtol_boundary", 0.10) # Relative tolerance for boundary detection
|
|
58
|
+
|
|
59
|
+
# Whether to use trust radius adaptation
|
|
60
|
+
self.adapt_trust_radius = config.get("adapt_trust_radius", True)
|
|
61
|
+
|
|
62
|
+
# Rest of initialization
|
|
63
|
+
self.config = config
|
|
64
|
+
self.Initialization = True
|
|
65
|
+
self.iter = 0
|
|
66
|
+
|
|
67
|
+
# Hessian-related variables
|
|
68
|
+
self.hessian = None
|
|
69
|
+
self.bias_hessian = None
|
|
70
|
+
|
|
71
|
+
# Optimization tracking variables
|
|
72
|
+
self.prev_eigvec_max = None
|
|
73
|
+
self.prev_eigvec_min = None
|
|
74
|
+
self.predicted_energy_changes = []
|
|
75
|
+
self.actual_energy_changes = []
|
|
76
|
+
self.reduction_ratios = []
|
|
77
|
+
self.trust_radius_history = []
|
|
78
|
+
self.prev_geometry = None
|
|
79
|
+
self.prev_gradient = None
|
|
80
|
+
self.prev_energy = None
|
|
81
|
+
self.prev_move_vector = None
|
|
82
|
+
|
|
83
|
+
# Mode Following specific parameters
|
|
84
|
+
self.mode_following_enabled = config.get("mode_following", True)
|
|
85
|
+
self.eigvec_history = [] # History of eigenvectors for consistent tracking
|
|
86
|
+
self.ts_mode_idx = None # Current index of transition state direction
|
|
87
|
+
|
|
88
|
+
# Eigenvector Following settings
|
|
89
|
+
self.eigvec_following = config.get("eigvec_following", True)
|
|
90
|
+
self.overlap_threshold = config.get("overlap_threshold", 0.5)
|
|
91
|
+
self.mixing_threshold = config.get("mixing_threshold", 0.3)
|
|
92
|
+
|
|
93
|
+
# Define modes based on saddle order
|
|
94
|
+
self.roots = list(range(self.saddle_order))
|
|
95
|
+
|
|
96
|
+
# Initialize the hessian update module
|
|
97
|
+
self.hessian_updater = ModelHessianUpdate()
|
|
98
|
+
self.block_hessian_updater = BlockHessianUpdate()
|
|
99
|
+
|
|
100
|
+
# Build Hessian updater dispatch list
|
|
101
|
+
self._build_hessian_updater_list()
|
|
102
|
+
|
|
103
|
+
self.log(f"Initialized EnhancedRSPRFO with trust radius={self.trust_radius:.6f}, "
|
|
104
|
+
f"bounds=[{self.trust_radius_min:.6f}, {self.trust_radius_max:.6f}]")
|
|
105
|
+
|
|
106
|
+
def _build_hessian_updater_list(self):
|
|
107
|
+
"""Builds the prioritized dispatch list for Hessian updaters (from RSIRFO)."""
|
|
108
|
+
self.default_update_method = (
|
|
109
|
+
"auto (default)",
|
|
110
|
+
lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")
|
|
111
|
+
)
|
|
112
|
+
self.updater_dispatch_list = [
|
|
113
|
+
("flowchart", "flowchart", lambda h, d, g: self.hessian_updater.flowchart_hessian_update(h, d, g, "auto")),
|
|
114
|
+
("block_cfd_fsb_dd", "block_cfd_fsb_dd", self.block_hessian_updater.block_CFD_FSB_hessian_update_dd),
|
|
115
|
+
("block_cfd_fsb_weighted", "block_cfd_fsb_weighted", self.block_hessian_updater.block_CFD_FSB_hessian_update_weighted),
|
|
116
|
+
("block_cfd_fsb", "block_cfd_fsb", self.block_hessian_updater.block_CFD_FSB_hessian_update),
|
|
117
|
+
("block_cfd_bofill_weighted", "block_cfd_bofill_weighted", self.block_hessian_updater.block_CFD_Bofill_hessian_update_weighted),
|
|
118
|
+
("block_cfd_bofill", "block_cfd_bofill", self.block_hessian_updater.block_CFD_Bofill_hessian_update),
|
|
119
|
+
("block_bfgs_dd", "block_bfgs_dd", self.block_hessian_updater.block_BFGS_hessian_update_dd),
|
|
120
|
+
("block_bfgs", "block_bfgs", self.block_hessian_updater.block_BFGS_hessian_update),
|
|
121
|
+
("block_fsb_dd", "block_fsb_dd", self.block_hessian_updater.block_FSB_hessian_update_dd),
|
|
122
|
+
("block_fsb_weighted", "block_fsb_weighted", self.block_hessian_updater.block_FSB_hessian_update_weighted),
|
|
123
|
+
("block_fsb", "block_fsb", self.block_hessian_updater.block_FSB_hessian_update),
|
|
124
|
+
("block_bofill_weighted", "block_bofill_weighted", self.block_hessian_updater.block_Bofill_hessian_update_weighted),
|
|
125
|
+
("block_bofill", "block_bofill", self.block_hessian_updater.block_Bofill_hessian_update),
|
|
126
|
+
("bfgs_dd", "bfgs_dd", self.hessian_updater.BFGS_hessian_update_dd),
|
|
127
|
+
("bfgs", "bfgs", self.hessian_updater.BFGS_hessian_update),
|
|
128
|
+
("sr1", "sr1", self.hessian_updater.SR1_hessian_update),
|
|
129
|
+
("pcfd_bofill", "pcfd_bofill", self.hessian_updater.pCFD_Bofill_hessian_update),
|
|
130
|
+
("cfd_fsb_dd", "cfd_fsb_dd", self.hessian_updater.CFD_FSB_hessian_update_dd),
|
|
131
|
+
("cfd_fsb", "cfd_fsb", self.hessian_updater.CFD_FSB_hessian_update),
|
|
132
|
+
("cfd_bofill", "cfd_bofill", self.hessian_updater.CFD_Bofill_hessian_update),
|
|
133
|
+
("fsb_dd", "fsb_dd", self.hessian_updater.FSB_hessian_update_dd),
|
|
134
|
+
("fsb", "fsb", self.hessian_updater.FSB_hessian_update),
|
|
135
|
+
("bofill", "bofill", self.hessian_updater.Bofill_hessian_update),
|
|
136
|
+
("psb", "psb", self.hessian_updater.PSB_hessian_update),
|
|
137
|
+
("msp", "msp", self.hessian_updater.MSP_hessian_update),
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
def compute_reduction_ratio(self, gradient, hessian, step, actual_reduction):
|
|
141
|
+
"""
|
|
142
|
+
Compute ratio between actual and predicted reduction in energy
|
|
143
|
+
|
|
144
|
+
Parameters:
|
|
145
|
+
gradient: numpy.ndarray - Current gradient
|
|
146
|
+
hessian: numpy.ndarray - Current approximate Hessian
|
|
147
|
+
step: numpy.ndarray - Step vector
|
|
148
|
+
actual_reduction: float - Actual energy reduction (previous_energy - current_energy)
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
float: Ratio of actual to predicted reduction
|
|
152
|
+
"""
|
|
153
|
+
# Calculate predicted reduction from quadratic model
|
|
154
|
+
g_flat = gradient.flatten()
|
|
155
|
+
step_flat = step.flatten()
|
|
156
|
+
|
|
157
|
+
# Linear term of the model: g^T * p
|
|
158
|
+
linear_term = np.dot(g_flat, step_flat)
|
|
159
|
+
|
|
160
|
+
# Quadratic term of the model: 0.5 * p^T * H * p
|
|
161
|
+
quadratic_term = 0.5 * np.dot(step_flat, np.dot(hessian, step_flat))
|
|
162
|
+
|
|
163
|
+
# Predicted reduction: -g^T * p - 0.5 * p^T * H * p
|
|
164
|
+
# Negative sign because we're predicting the reduction (energy decrease)
|
|
165
|
+
predicted_reduction = -(linear_term + quadratic_term)
|
|
166
|
+
|
|
167
|
+
# Avoid division by zero or very small numbers
|
|
168
|
+
if abs(predicted_reduction) < 1e-10:
|
|
169
|
+
self.log("Warning: Predicted reduction is near zero")
|
|
170
|
+
return 0.0
|
|
171
|
+
|
|
172
|
+
# Calculate ratio
|
|
173
|
+
ratio = actual_reduction / predicted_reduction
|
|
174
|
+
|
|
175
|
+
# Safeguard against numerical issues
|
|
176
|
+
if not np.isfinite(ratio):
|
|
177
|
+
self.log("Warning: Non-finite reduction ratio, using 0.0")
|
|
178
|
+
return 0.0
|
|
179
|
+
|
|
180
|
+
self.log(f"Actual reduction: {actual_reduction:.6e}, "
|
|
181
|
+
f"Predicted reduction: {predicted_reduction:.6e}, "
|
|
182
|
+
f"Ratio: {ratio:.4f}")
|
|
183
|
+
|
|
184
|
+
return ratio
|
|
185
|
+
|
|
186
|
+
def adjust_trust_radius(self, actual_energy_change, predicted_energy_change, step_norm):
|
|
187
|
+
"""
|
|
188
|
+
Dynamically adjust the trust radius based on ratio between actual and predicted reductions
|
|
189
|
+
using the trust region methodology
|
|
190
|
+
"""
|
|
191
|
+
if not self.adapt_trust_radius or actual_energy_change is None or predicted_energy_change is None:
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
# Avoid division by zero or very small numbers
|
|
195
|
+
if abs(predicted_energy_change) < 1e-10:
|
|
196
|
+
self.log("Skipping trust radius update due to negligible predicted energy change")
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
# Calculate the ratio between actual and predicted energy changes
|
|
200
|
+
# Use absolute values to focus on magnitude of agreement
|
|
201
|
+
ratio = abs(actual_energy_change / predicted_energy_change)
|
|
202
|
+
self.log(f"Raw reduction ratio: {actual_energy_change / predicted_energy_change:.4f}")
|
|
203
|
+
self.log(f"Absolute reduction ratio: {ratio:.4f}")
|
|
204
|
+
self.reduction_ratios.append(ratio)
|
|
205
|
+
|
|
206
|
+
old_trust_radius = self.trust_radius
|
|
207
|
+
|
|
208
|
+
# Improved boundary detection - check if step is close to current trust radius
|
|
209
|
+
at_boundary = step_norm >= old_trust_radius * 0.95 # Within 5% of trust radius
|
|
210
|
+
self.log(f"Step norm: {step_norm:.6f}, Trust radius: {old_trust_radius:.6f}, At boundary: {at_boundary}")
|
|
211
|
+
|
|
212
|
+
# Better logic for trust radius adjustment
|
|
213
|
+
if ratio < 0.25 or ratio > 4.0: # Predicted energy change is very different from actual
|
|
214
|
+
# Poor prediction - decrease the trust radius
|
|
215
|
+
self.trust_radius = max(self.shrink_factor * self.trust_radius, self.trust_radius_min)
|
|
216
|
+
if self.trust_radius != old_trust_radius:
|
|
217
|
+
self.log(f"Poor step quality (ratio={ratio:.3f}), shrinking trust radius to {self.trust_radius:.6f}")
|
|
218
|
+
elif (0.8 <= ratio <= 1.25) and at_boundary:
|
|
219
|
+
# Very good prediction and step at trust radius boundary - increase the trust radius
|
|
220
|
+
self.trust_radius = min(self.expand_factor * self.trust_radius, self.trust_radius_max)
|
|
221
|
+
if self.trust_radius != old_trust_radius:
|
|
222
|
+
self.log(f"Good step quality (ratio={ratio:.3f}) at boundary, expanding trust radius to {self.trust_radius:.6f}")
|
|
223
|
+
else:
|
|
224
|
+
# Acceptable prediction or step not at boundary - keep the same trust radius
|
|
225
|
+
self.log(f"Acceptable step quality (ratio={ratio:.3f}), keeping trust radius at {self.trust_radius:.6f}")
|
|
226
|
+
|
|
227
|
+
def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0, pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
|
|
228
|
+
"""
|
|
229
|
+
Execute one step of enhanced RSPRFO optimization with trust radius adjustment
|
|
230
|
+
|
|
231
|
+
Parameters:
|
|
232
|
+
geom_num_list: numpy.ndarray - Current geometry coordinates
|
|
233
|
+
B_g: numpy.ndarray - Current gradient
|
|
234
|
+
pre_B_g: numpy.ndarray - Previous gradient
|
|
235
|
+
pre_geom: numpy.ndarray - Previous geometry
|
|
236
|
+
B_e: float - Current energy
|
|
237
|
+
pre_B_e: float - Previous energy
|
|
238
|
+
pre_move_vector: numpy.ndarray - Previous step vector
|
|
239
|
+
initial_geom_num_list: numpy.ndarray - Initial geometry
|
|
240
|
+
g: numpy.ndarray - Alternative gradient representation
|
|
241
|
+
pre_g: numpy.ndarray - Previous alternative gradient representation
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
numpy.ndarray - Optimization step vector
|
|
245
|
+
"""
|
|
246
|
+
self.log(f"\n{'='*50}\nIteration {self.iter}\n{'='*50}")
|
|
247
|
+
|
|
248
|
+
if self.Initialization:
|
|
249
|
+
self.prev_eigvec_max = None
|
|
250
|
+
self.prev_eigvec_min = None
|
|
251
|
+
self.predicted_energy_changes = []
|
|
252
|
+
self.actual_energy_changes = []
|
|
253
|
+
self.reduction_ratios = []
|
|
254
|
+
self.trust_radius_history = []
|
|
255
|
+
self.prev_geometry = None
|
|
256
|
+
self.prev_gradient = None
|
|
257
|
+
self.prev_energy = None
|
|
258
|
+
self.prev_move_vector = None
|
|
259
|
+
self.eigvec_history = []
|
|
260
|
+
self.ts_mode_idx = None
|
|
261
|
+
self.Initialization = False
|
|
262
|
+
self.log(f"First iteration - using initial trust radius {self.trust_radius:.6f}")
|
|
263
|
+
else:
|
|
264
|
+
# Adjust trust radius based on the previous step if we have energy data
|
|
265
|
+
if self.prev_energy is not None and len(self.predicted_energy_changes) > 0:
|
|
266
|
+
actual_energy_change = B_e - self.prev_energy
|
|
267
|
+
predicted_energy_change = self.predicted_energy_changes[-1]
|
|
268
|
+
self.actual_energy_changes.append(actual_energy_change)
|
|
269
|
+
|
|
270
|
+
# Get the previous step length
|
|
271
|
+
if len(pre_move_vector) > 0:
|
|
272
|
+
prev_step_norm = norm(pre_move_vector.flatten())
|
|
273
|
+
elif self.prev_move_vector is not None:
|
|
274
|
+
prev_step_norm = norm(self.prev_move_vector.flatten())
|
|
275
|
+
else:
|
|
276
|
+
prev_step_norm = 0.0
|
|
277
|
+
|
|
278
|
+
# Log energy comparison
|
|
279
|
+
self.log(f"Previous energy: {self.prev_energy:.6f}, Current energy: {B_e:.6f}")
|
|
280
|
+
self.log(f"Actual energy change: {actual_energy_change:.6f}")
|
|
281
|
+
self.log(f"Predicted energy change: {predicted_energy_change:.6f}")
|
|
282
|
+
self.log(f"Previous step norm: {prev_step_norm:.6f}")
|
|
283
|
+
|
|
284
|
+
# Complete Hessian for the reduction ratio calculation
|
|
285
|
+
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
286
|
+
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), display_eigval=False)
|
|
287
|
+
# Compute reduction ratio
|
|
288
|
+
reduction_ratio = self.compute_reduction_ratio(
|
|
289
|
+
self.prev_gradient, H, self.prev_move_vector, actual_energy_change)
|
|
290
|
+
|
|
291
|
+
# Adjust trust radius based on step quality and length
|
|
292
|
+
self.adjust_trust_radius(actual_energy_change, predicted_energy_change, prev_step_norm)
|
|
293
|
+
|
|
294
|
+
# Check Hessian
|
|
295
|
+
if self.hessian is None:
|
|
296
|
+
raise ValueError("Hessian matrix must be set before running optimization")
|
|
297
|
+
|
|
298
|
+
# Update Hessian if we have previous geometry and gradient information
|
|
299
|
+
if self.prev_geometry is not None and self.prev_gradient is not None and len(pre_B_g) > 0 and len(pre_geom) > 0:
|
|
300
|
+
self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
|
|
301
|
+
|
|
302
|
+
# Ensure gradient is properly shaped as a 1D array
|
|
303
|
+
gradient = np.asarray(B_g).flatten()
|
|
304
|
+
H = self.hessian + self.bias_hessian if self.bias_hessian is not None else self.hessian
|
|
305
|
+
|
|
306
|
+
# Compute eigenvalues and eigenvectors of the hessian
|
|
307
|
+
eigvals, eigvecs = np.linalg.eigh(H)
|
|
308
|
+
|
|
309
|
+
# Count negative eigenvalues for diagnostic purposes
|
|
310
|
+
neg_eigval_count = np.sum(eigvals < -1e-6)
|
|
311
|
+
self.log(f"Found {neg_eigval_count} negative eigenvalues, target for this saddle order: {self.saddle_order}")
|
|
312
|
+
|
|
313
|
+
# Store previous eigenvector information
|
|
314
|
+
prev_eigvecs = None
|
|
315
|
+
if len(self.eigvec_history) > 0:
|
|
316
|
+
prev_eigvecs = self.eigvec_history[-1]
|
|
317
|
+
|
|
318
|
+
# Standard mode selection (with mode following if enabled)
|
|
319
|
+
if self.mode_following_enabled and self.saddle_order > 0:
|
|
320
|
+
if self.ts_mode_idx is None:
|
|
321
|
+
# For first run, select mode with most negative eigenvalue
|
|
322
|
+
self.ts_mode_idx = np.argmin(eigvals)
|
|
323
|
+
self.log(f"Initial TS mode selected: {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
|
|
324
|
+
|
|
325
|
+
# Find corresponding modes between steps
|
|
326
|
+
mode_indices = self.find_corresponding_mode(eigvals, eigvecs, prev_eigvecs, self.ts_mode_idx)
|
|
327
|
+
|
|
328
|
+
# Apply Eigenvector Following for cases with mode mixing
|
|
329
|
+
if self.eigvec_following and len(mode_indices) > 1:
|
|
330
|
+
mode_indices = self.apply_eigenvector_following(eigvals, eigvecs, gradient.dot(eigvecs), mode_indices)
|
|
331
|
+
|
|
332
|
+
# Update tracked mode
|
|
333
|
+
if mode_indices:
|
|
334
|
+
self.ts_mode_idx = mode_indices[0]
|
|
335
|
+
self.log(f"Mode following: tracking mode {self.ts_mode_idx} with eigenvalue {eigvals[self.ts_mode_idx]:.6f}")
|
|
336
|
+
|
|
337
|
+
# Update max_indices (saddle point direction)
|
|
338
|
+
max_indices = mode_indices
|
|
339
|
+
else:
|
|
340
|
+
# If no corresponding mode found, use standard approach
|
|
341
|
+
self.log("No corresponding mode found, using default mode selection")
|
|
342
|
+
max_indices = self.roots
|
|
343
|
+
else:
|
|
344
|
+
# Standard mode selection when mode following is disabled
|
|
345
|
+
if self.saddle_order == 0:
|
|
346
|
+
min_indices = list(range(len(gradient)))
|
|
347
|
+
max_indices = []
|
|
348
|
+
else:
|
|
349
|
+
min_indices = [i for i in range(gradient.size) if i not in self.roots]
|
|
350
|
+
max_indices = self.roots
|
|
351
|
+
|
|
352
|
+
# Store eigenvectors in history
|
|
353
|
+
self.eigvec_history.append(eigvecs)
|
|
354
|
+
if len(self.eigvec_history) > 5: # Keep only last 5 steps
|
|
355
|
+
self.eigvec_history.pop(0)
|
|
356
|
+
|
|
357
|
+
# Transform gradient to eigenvector space
|
|
358
|
+
gradient_trans = eigvecs.T.dot(gradient).flatten()
|
|
359
|
+
|
|
360
|
+
# Set minimization directions (all directions not in max_indices)
|
|
361
|
+
min_indices = [i for i in range(gradient.size) if i not in max_indices]
|
|
362
|
+
|
|
363
|
+
# Initialize alpha parameter
|
|
364
|
+
alpha = self.alpha0
|
|
365
|
+
|
|
366
|
+
# Tracking variables
|
|
367
|
+
best_step = None
|
|
368
|
+
best_step_norm_diff = float('inf')
|
|
369
|
+
step_norm_history = []
|
|
370
|
+
|
|
371
|
+
# NEW IMPLEMENTATION: Micro-cycle loop with improved alpha calculation
|
|
372
|
+
for mu in range(self.max_micro_cycles):
|
|
373
|
+
self.log(f"RS-PRFO micro cycle {mu:02d}, alpha={alpha:.6f}, trust radius={self.trust_radius:.6f}")
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
# Make a fresh step vector for this cycle - essential to ensure proper recalculation
|
|
377
|
+
step = np.zeros_like(gradient_trans)
|
|
378
|
+
|
|
379
|
+
# Maximization subspace calculation
|
|
380
|
+
step_max = np.array([])
|
|
381
|
+
eigval_max = 0
|
|
382
|
+
if len(max_indices) > 0:
|
|
383
|
+
# Calculate augmented Hessian
|
|
384
|
+
H_aug_max = self.get_augmented_hessian(
|
|
385
|
+
eigvals[max_indices], gradient_trans[max_indices], alpha
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Solve RFO equations
|
|
389
|
+
step_max, eigval_max, nu_max, eigvec_max = self.solve_rfo(
|
|
390
|
+
H_aug_max, "max", prev_eigvec=self.prev_eigvec_max
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Store eigenvector for next iteration
|
|
394
|
+
self.prev_eigvec_max = eigvec_max
|
|
395
|
+
|
|
396
|
+
# Copy step to the main step vector
|
|
397
|
+
step[max_indices] = step_max
|
|
398
|
+
|
|
399
|
+
# Minimization subspace calculation
|
|
400
|
+
step_min = np.array([])
|
|
401
|
+
eigval_min = 0
|
|
402
|
+
if len(min_indices) > 0:
|
|
403
|
+
# Calculate augmented Hessian
|
|
404
|
+
H_aug_min = self.get_augmented_hessian(
|
|
405
|
+
eigvals[min_indices], gradient_trans[min_indices], alpha
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Solve RFO equations
|
|
409
|
+
step_min, eigval_min, nu_min, eigvec_min = self.solve_rfo(
|
|
410
|
+
H_aug_min, "min", prev_eigvec=self.prev_eigvec_min
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# Store eigenvector for next iteration
|
|
414
|
+
self.prev_eigvec_min = eigvec_min
|
|
415
|
+
|
|
416
|
+
# Copy step to the main step vector
|
|
417
|
+
step[min_indices] = step_min
|
|
418
|
+
|
|
419
|
+
# Calculate norms of the current step
|
|
420
|
+
step_max_norm = np.linalg.norm(step_max) if len(max_indices) > 0 else 0.0
|
|
421
|
+
step_min_norm = np.linalg.norm(step_min) if len(min_indices) > 0 else 0.0
|
|
422
|
+
step_norm = np.linalg.norm(step)
|
|
423
|
+
|
|
424
|
+
# Log the current norms
|
|
425
|
+
if len(max_indices) > 0:
|
|
426
|
+
self.log(f"norm(step_max)={step_max_norm:.6f}")
|
|
427
|
+
if len(min_indices) > 0:
|
|
428
|
+
self.log(f"norm(step_min)={step_min_norm:.6f}")
|
|
429
|
+
|
|
430
|
+
self.log(f"norm(step)={step_norm:.6f}")
|
|
431
|
+
|
|
432
|
+
# Keep track of step norm history for convergence detection
|
|
433
|
+
step_norm_history.append(step_norm)
|
|
434
|
+
|
|
435
|
+
# Save this step if it's closest to trust radius (for later use)
|
|
436
|
+
norm_diff = abs(step_norm - self.trust_radius)
|
|
437
|
+
if norm_diff < best_step_norm_diff:
|
|
438
|
+
best_step = step.copy()
|
|
439
|
+
best_step_norm_diff = norm_diff
|
|
440
|
+
|
|
441
|
+
# Check if step is already within trust radius
|
|
442
|
+
if step_norm <= self.trust_radius:
|
|
443
|
+
self.log(f"Step satisfies trust radius {self.trust_radius:.6f}")
|
|
444
|
+
break
|
|
445
|
+
|
|
446
|
+
# Calculate alpha update for each subspace
|
|
447
|
+
# Max subspace
|
|
448
|
+
alpha_step_max = 0.0
|
|
449
|
+
if len(max_indices) > 0:
|
|
450
|
+
alpha_step_max = self.get_alpha_step(
|
|
451
|
+
alpha, eigval_max, step_max_norm, eigvals[max_indices],
|
|
452
|
+
gradient_trans[max_indices], "max"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Min subspace
|
|
456
|
+
alpha_step_min = 0.0
|
|
457
|
+
if len(min_indices) > 0:
|
|
458
|
+
alpha_step_min = self.get_alpha_step(
|
|
459
|
+
alpha, eigval_min, step_min_norm, eigvals[min_indices],
|
|
460
|
+
gradient_trans[min_indices], "min"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Combine alpha steps with appropriate weighting
|
|
464
|
+
alpha_step = 0.0
|
|
465
|
+
if alpha_step_max != 0.0 and alpha_step_min != 0.0:
|
|
466
|
+
# Weight by squared norms
|
|
467
|
+
w_max = step_max_norm**2 if step_max_norm > 0.0 else 0.0
|
|
468
|
+
w_min = step_min_norm**2 if step_min_norm > 0.0 else 0.0
|
|
469
|
+
if w_max + w_min > 0.0:
|
|
470
|
+
alpha_step = (w_max * alpha_step_max + w_min * alpha_step_min) / (w_max + w_min)
|
|
471
|
+
else:
|
|
472
|
+
alpha_step = alpha_step_max if abs(alpha_step_max) > abs(alpha_step_min) else alpha_step_min
|
|
473
|
+
else:
|
|
474
|
+
alpha_step = alpha_step_max if alpha_step_max != 0.0 else alpha_step_min
|
|
475
|
+
|
|
476
|
+
# If alpha_step is still 0, use a direct calculation with the total step
|
|
477
|
+
if abs(alpha_step) < 1e-10 and step_norm > 0.0:
|
|
478
|
+
try:
|
|
479
|
+
# Calculate derivative directly using analytic formula
|
|
480
|
+
dstep2_dalpha = self.calculate_step_derivative(
|
|
481
|
+
alpha, eigval_max, eigval_min, eigvals,
|
|
482
|
+
max_indices, min_indices, gradient_trans, step_norm
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
if abs(dstep2_dalpha) > 1e-10:
|
|
486
|
+
alpha_step = 2.0 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
|
|
487
|
+
self.log(f"Direct alpha_step calculation: {alpha_step:.6f}")
|
|
488
|
+
except Exception as e:
|
|
489
|
+
self.log(f"Error in direct derivative calculation: {str(e)}")
|
|
490
|
+
alpha_step = 0.0
|
|
491
|
+
|
|
492
|
+
# Update alpha with proper bounds
|
|
493
|
+
old_alpha = alpha
|
|
494
|
+
|
|
495
|
+
# If derivative-based approach fails, use heuristic
|
|
496
|
+
if abs(alpha_step) < 1e-10:
|
|
497
|
+
# Apply a more aggressive heuristic - double alpha
|
|
498
|
+
alpha = min(alpha * 2.0, self.alpha_max)
|
|
499
|
+
self.log(f"Using heuristic alpha update: {old_alpha:.6f} -> {alpha:.6f}")
|
|
500
|
+
else:
|
|
501
|
+
# Apply safety bounds to alpha_step
|
|
502
|
+
alpha_step_limited = np.clip(alpha_step, -self.alpha_step_max, self.alpha_step_max)
|
|
503
|
+
|
|
504
|
+
if abs(alpha_step_limited) != abs(alpha_step):
|
|
505
|
+
self.log(f"Limited alpha_step from {alpha_step:.6f} to {alpha_step_limited:.6f}")
|
|
506
|
+
|
|
507
|
+
# Ensure alpha remains positive and within bounds
|
|
508
|
+
alpha = min(max(old_alpha + alpha_step_limited, 1e-6), self.alpha_max)
|
|
509
|
+
self.log(f"Updated alpha: {old_alpha:.6f} -> {alpha:.6f}")
|
|
510
|
+
|
|
511
|
+
# Check if alpha reached its maximum value
|
|
512
|
+
if alpha == self.alpha_max:
|
|
513
|
+
self.log(f"Alpha reached maximum value ({self.alpha_max}), using best step found")
|
|
514
|
+
if best_step is not None:
|
|
515
|
+
step = best_step.copy()
|
|
516
|
+
break
|
|
517
|
+
|
|
518
|
+
# Check for progress in step norm adjustments
|
|
519
|
+
if len(step_norm_history) >= 3:
|
|
520
|
+
# Calculate consecutive changes in step norm
|
|
521
|
+
recent_changes = [abs(step_norm_history[-i] - step_norm_history[-(i+1)])
|
|
522
|
+
for i in range(1, min(3, len(step_norm_history)))]
|
|
523
|
+
|
|
524
|
+
# If step norms are not changing significantly, break the loop
|
|
525
|
+
if all(change < 1e-6 for change in recent_changes):
|
|
526
|
+
self.log(f"Step norms not changing significantly: {step_norm_history[-3:]}")
|
|
527
|
+
self.log("Breaking micro-cycle loop")
|
|
528
|
+
|
|
529
|
+
# Use the best step found so far
|
|
530
|
+
if best_step is not None and best_step_norm_diff < norm_diff:
|
|
531
|
+
step = best_step.copy()
|
|
532
|
+
self.log("Using best step found so far")
|
|
533
|
+
|
|
534
|
+
break
|
|
535
|
+
|
|
536
|
+
except Exception as e:
|
|
537
|
+
self.log(f"Error in micro-cycle: {str(e)}")
|
|
538
|
+
# Use best step if available, otherwise scale current step
|
|
539
|
+
if best_step is not None:
|
|
540
|
+
self.log("Using best step due to error")
|
|
541
|
+
step = best_step.copy()
|
|
542
|
+
else:
|
|
543
|
+
# Simple scaling fallback
|
|
544
|
+
if step_norm > 0 and step_norm > self.trust_radius:
|
|
545
|
+
scale_factor = self.trust_radius / step_norm
|
|
546
|
+
step = step * scale_factor
|
|
547
|
+
self.log(f"Scaled step to trust radius due to error")
|
|
548
|
+
break
|
|
549
|
+
|
|
550
|
+
else:
|
|
551
|
+
# If micro-cycles did not converge
|
|
552
|
+
self.log(f"Micro-cycles did not converge in {self.max_micro_cycles} iterations")
|
|
553
|
+
# Use the best step if available
|
|
554
|
+
if best_step is not None and best_step_norm_diff < abs(step_norm - self.trust_radius):
|
|
555
|
+
self.log("Using best step found during micro-cycles")
|
|
556
|
+
step = best_step.copy()
|
|
557
|
+
|
|
558
|
+
# Transform step back to original coordinates
|
|
559
|
+
move_vector = eigvecs.dot(step)
|
|
560
|
+
step_norm = norm(move_vector)
|
|
561
|
+
|
|
562
|
+
# Only scale down steps that exceed the trust radius
|
|
563
|
+
if step_norm > self.trust_radius:
|
|
564
|
+
self.log(f"Step norm {step_norm:.6f} exceeds trust radius {self.trust_radius:.6f}, scaling down")
|
|
565
|
+
move_vector = move_vector * (self.trust_radius / step_norm)
|
|
566
|
+
step_norm = self.trust_radius
|
|
567
|
+
else:
|
|
568
|
+
self.log(f"Step norm {step_norm:.6f} is within trust radius {self.trust_radius:.6f}, no scaling needed")
|
|
569
|
+
|
|
570
|
+
self.log(f"Final norm(step)={norm(move_vector):.6f}")
|
|
571
|
+
|
|
572
|
+
# Apply maxstep constraint if specified in config
|
|
573
|
+
if self.config.get("maxstep") is not None:
|
|
574
|
+
maxstep = self.config.get("maxstep")
|
|
575
|
+
|
|
576
|
+
# Calculate step lengths
|
|
577
|
+
if move_vector.size % 3 == 0 and move_vector.size > 3: # Likely atomic coordinates in 3D
|
|
578
|
+
move_vector_reshaped = move_vector.reshape(-1, 3)
|
|
579
|
+
steplengths = np.sqrt((move_vector_reshaped**2).sum(axis=1))
|
|
580
|
+
longest_step = np.max(steplengths)
|
|
581
|
+
else:
|
|
582
|
+
# Generic vector - just compute total norm
|
|
583
|
+
longest_step = norm(move_vector)
|
|
584
|
+
|
|
585
|
+
# Scale step if necessary
|
|
586
|
+
if longest_step > maxstep:
|
|
587
|
+
move_vector = move_vector * (maxstep / longest_step)
|
|
588
|
+
self.log(f"Step constrained by maxstep={maxstep:.6f}")
|
|
589
|
+
|
|
590
|
+
# Calculate predicted energy change
|
|
591
|
+
predicted_energy_change = self.rfo_model(gradient, H, move_vector)
|
|
592
|
+
self.predicted_energy_changes.append(predicted_energy_change)
|
|
593
|
+
self.log(f"Predicted energy change: {predicted_energy_change:.6f}")
|
|
594
|
+
|
|
595
|
+
# Store current geometry, gradient, energy, and move vector for next iteration
|
|
596
|
+
self.prev_geometry = copy.deepcopy(geom_num_list)
|
|
597
|
+
self.prev_gradient = copy.deepcopy(B_g)
|
|
598
|
+
self.prev_energy = B_e
|
|
599
|
+
self.prev_move_vector = copy.deepcopy(move_vector)
|
|
600
|
+
|
|
601
|
+
# Increment iteration counter
|
|
602
|
+
self.iter += 1
|
|
603
|
+
|
|
604
|
+
return move_vector.reshape(-1, 1)
|
|
605
|
+
|
|
606
|
+
def get_alpha_step(self, alpha, rfo_eigval, step_norm, eigvals, gradient, mode="min"):
|
|
607
|
+
"""
|
|
608
|
+
Calculate alpha step update for a specific subspace using the improved method
|
|
609
|
+
|
|
610
|
+
Parameters:
|
|
611
|
+
alpha: float - Current alpha value
|
|
612
|
+
rfo_eigval: float - RFO eigenvalue for this subspace
|
|
613
|
+
step_norm: float - Norm of the step in this subspace
|
|
614
|
+
eigvals: numpy.ndarray - Eigenvalues for this subspace
|
|
615
|
+
gradient: numpy.ndarray - Gradient components in this subspace
|
|
616
|
+
mode: str - "min" or "max" for minimization or maximization subspace
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
float: Calculated alpha step update
|
|
620
|
+
"""
|
|
621
|
+
try:
|
|
622
|
+
# Calculate denominators with safety checks
|
|
623
|
+
denominators = eigvals - rfo_eigval * alpha
|
|
624
|
+
|
|
625
|
+
# Handle small denominators
|
|
626
|
+
small_denoms = np.abs(denominators) < 1e-10
|
|
627
|
+
if np.any(small_denoms):
|
|
628
|
+
self.log(f"Small denominators detected in {mode} subspace: {np.sum(small_denoms)}")
|
|
629
|
+
safe_denoms = denominators.copy()
|
|
630
|
+
for i in np.where(small_denoms)[0]:
|
|
631
|
+
safe_denoms[i] = 1e-10 * np.sign(safe_denoms[i]) if safe_denoms[i] != 0 else 1e-10
|
|
632
|
+
denominators = safe_denoms
|
|
633
|
+
|
|
634
|
+
# Calculate quotient term
|
|
635
|
+
numerator = gradient**2
|
|
636
|
+
denominator = denominators**3
|
|
637
|
+
quot = np.sum(numerator / denominator)
|
|
638
|
+
self.log(f"{mode} subspace quot={quot:.6e}")
|
|
639
|
+
|
|
640
|
+
# Calculate step term with safety
|
|
641
|
+
step_term = 1.0 + step_norm**2 * alpha
|
|
642
|
+
if abs(step_term) < 1e-10:
|
|
643
|
+
step_term = 1e-10 * np.sign(step_term) if step_term != 0 else 1e-10
|
|
644
|
+
|
|
645
|
+
# Calculate derivative of squared step norm with respect to alpha
|
|
646
|
+
dstep2_dalpha = 2.0 * rfo_eigval / step_term * quot
|
|
647
|
+
self.log(f"{mode} subspace d(step^2)/dα={dstep2_dalpha:.6e}")
|
|
648
|
+
|
|
649
|
+
# Return 0 if derivative is too small
|
|
650
|
+
if abs(dstep2_dalpha) < 1e-10:
|
|
651
|
+
return 0.0
|
|
652
|
+
|
|
653
|
+
# Calculate alpha step using the trust radius formula
|
|
654
|
+
alpha_step = 2.0 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
|
|
655
|
+
self.log(f"{mode} subspace alpha_step={alpha_step:.6f}")
|
|
656
|
+
|
|
657
|
+
return alpha_step
|
|
658
|
+
|
|
659
|
+
except Exception as e:
|
|
660
|
+
self.log(f"Error in get_alpha_step ({mode}): {str(e)}")
|
|
661
|
+
return 0.0
|
|
662
|
+
|
|
663
|
+
def calculate_step_derivative(self, alpha, eigval_max, eigval_min, eigvals, max_indices, min_indices, gradient_trans, step_norm):
|
|
664
|
+
"""
|
|
665
|
+
Calculate the derivative of the squared step norm with respect to alpha
|
|
666
|
+
for the combined step from both subspaces
|
|
667
|
+
|
|
668
|
+
Parameters:
|
|
669
|
+
alpha: float - Current alpha value
|
|
670
|
+
eigval_max, eigval_min: float - RFO eigenvalues from max and min subspaces
|
|
671
|
+
eigvals: numpy.ndarray - All eigenvalues
|
|
672
|
+
max_indices, min_indices: list - Indices of max and min subspaces
|
|
673
|
+
gradient_trans: numpy.ndarray - Transformed gradient
|
|
674
|
+
step_norm: float - Current total step norm
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
float: Combined derivative of squared step norm with respect to alpha
|
|
678
|
+
"""
|
|
679
|
+
try:
|
|
680
|
+
dstep2_dalpha_max = 0.0
|
|
681
|
+
if len(max_indices) > 0:
|
|
682
|
+
# Calculate denominator for max subspace
|
|
683
|
+
denom_max = 1.0 + np.dot(gradient_trans[max_indices], gradient_trans[max_indices]) * alpha
|
|
684
|
+
if abs(denom_max) < 1e-10:
|
|
685
|
+
denom_max = 1e-10 * np.sign(denom_max) if denom_max != 0 else 1e-10
|
|
686
|
+
|
|
687
|
+
# Handle small denominators in eigenvalue terms
|
|
688
|
+
eigvals_max = eigvals[max_indices].copy()
|
|
689
|
+
denom_terms_max = eigvals_max - eigval_max * alpha
|
|
690
|
+
|
|
691
|
+
small_denoms = np.abs(denom_terms_max) < 1e-10
|
|
692
|
+
if np.any(small_denoms):
|
|
693
|
+
for i in np.where(small_denoms)[0]:
|
|
694
|
+
denom_terms_max[i] = 1e-10 * np.sign(denom_terms_max[i]) if denom_terms_max[i] != 0 else 1e-10
|
|
695
|
+
|
|
696
|
+
# Calculate derivative component for max subspace
|
|
697
|
+
dstep2_dalpha_max = (
|
|
698
|
+
2.0 * eigval_max / denom_max * np.sum(gradient_trans[max_indices]**2 / denom_terms_max**3)
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
dstep2_dalpha_min = 0.0
|
|
702
|
+
if len(min_indices) > 0:
|
|
703
|
+
# Calculate denominator for min subspace
|
|
704
|
+
denom_min = 1.0 + np.dot(gradient_trans[min_indices], gradient_trans[min_indices]) * alpha
|
|
705
|
+
if abs(denom_min) < 1e-10:
|
|
706
|
+
denom_min = 1e-10 * np.sign(denom_min) if denom_min != 0 else 1e-10
|
|
707
|
+
|
|
708
|
+
# Handle small denominators in eigenvalue terms
|
|
709
|
+
eigvals_min = eigvals[min_indices].copy()
|
|
710
|
+
denom_terms_min = eigvals_min - eigval_min * alpha
|
|
711
|
+
|
|
712
|
+
small_denoms = np.abs(denom_terms_min) < 1e-10
|
|
713
|
+
if np.any(small_denoms):
|
|
714
|
+
for i in np.where(small_denoms)[0]:
|
|
715
|
+
denom_terms_min[i] = 1e-10 * np.sign(denom_terms_min[i]) if denom_terms_min[i] != 0 else 1e-10
|
|
716
|
+
|
|
717
|
+
# Calculate derivative component for min subspace
|
|
718
|
+
dstep2_dalpha_min = (
|
|
719
|
+
2.0 * eigval_min / denom_min * np.sum(gradient_trans[min_indices]**2 / denom_terms_min**3)
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
# Combine derivatives from both subspaces
|
|
723
|
+
dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
|
|
724
|
+
self.log(f"Combined dstep2_dalpha={dstep2_dalpha:.6e}")
|
|
725
|
+
|
|
726
|
+
return dstep2_dalpha
|
|
727
|
+
|
|
728
|
+
except Exception as e:
|
|
729
|
+
self.log(f"Error in calculate_step_derivative: {str(e)}")
|
|
730
|
+
return 0.0
|
|
731
|
+
|
|
732
|
+
def find_corresponding_mode(self, eigvals, eigvecs, prev_eigvecs, target_mode_idx):
|
|
733
|
+
"""
|
|
734
|
+
Find corresponding mode in current step based on eigenvector overlap
|
|
735
|
+
|
|
736
|
+
Parameters:
|
|
737
|
+
eigvals: numpy.ndarray - Current eigenvalues
|
|
738
|
+
eigvecs: numpy.ndarray - Current eigenvectors as column vectors
|
|
739
|
+
prev_eigvecs: numpy.ndarray - Previous eigenvectors
|
|
740
|
+
target_mode_idx: int - Index of target mode from previous step
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
list - List of indices of corresponding modes in current step
|
|
744
|
+
"""
|
|
745
|
+
if prev_eigvecs is None or target_mode_idx is None:
|
|
746
|
+
# For first step or reset, simply select by eigenvalue
|
|
747
|
+
if self.saddle_order > 0:
|
|
748
|
+
# For TS search, choose modes with most negative eigenvalues
|
|
749
|
+
sorted_idx = np.argsort(eigvals)
|
|
750
|
+
return sorted_idx[:self.saddle_order].tolist()
|
|
751
|
+
else:
|
|
752
|
+
# For minimization, no special mode
|
|
753
|
+
return []
|
|
754
|
+
|
|
755
|
+
# Calculate overlap between target mode from previous step and all current modes
|
|
756
|
+
target_vec = prev_eigvecs[:, target_mode_idx].reshape(-1, 1)
|
|
757
|
+
overlaps = np.abs(np.dot(eigvecs.T, target_vec)).flatten()
|
|
758
|
+
|
|
759
|
+
# Sort by overlap magnitude (descending)
|
|
760
|
+
sorted_idx = np.argsort(-overlaps)
|
|
761
|
+
|
|
762
|
+
if self.display_flag:
|
|
763
|
+
self.log(f"Mode overlaps with previous TS mode: {overlaps[sorted_idx[0]]:.4f}, {overlaps[sorted_idx[1]]:.4f}, {overlaps[sorted_idx[2]]:.4f}")
|
|
764
|
+
|
|
765
|
+
# Return mode with overlap above threshold
|
|
766
|
+
if overlaps[sorted_idx[0]] > self.overlap_threshold:
|
|
767
|
+
return [sorted_idx[0]]
|
|
768
|
+
|
|
769
|
+
# Consider mode mixing if no single mode has sufficient overlap
|
|
770
|
+
mixed_modes = []
|
|
771
|
+
cumulative_overlap = 0.0
|
|
772
|
+
|
|
773
|
+
for idx in sorted_idx:
|
|
774
|
+
mixed_modes.append(idx)
|
|
775
|
+
cumulative_overlap += overlaps[idx]**2 # Sum of squares
|
|
776
|
+
|
|
777
|
+
if cumulative_overlap > 0.8: # 80% coverage
|
|
778
|
+
break
|
|
779
|
+
|
|
780
|
+
return mixed_modes
|
|
781
|
+
|
|
782
|
+
def apply_eigenvector_following(self, eigvals, eigvecs, gradient_trans, mode_indices):
|
|
783
|
+
"""
|
|
784
|
+
Apply Eigenvector Following method to handle mixed modes
|
|
785
|
+
|
|
786
|
+
Parameters:
|
|
787
|
+
eigvals: numpy.ndarray - Current eigenvalues
|
|
788
|
+
eigvecs: numpy.ndarray - Current eigenvectors
|
|
789
|
+
gradient_trans: numpy.ndarray - Gradient in eigenvector basis
|
|
790
|
+
mode_indices: list - Indices of candidate modes
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
list - Selected mode indices after eigenvector following
|
|
794
|
+
"""
|
|
795
|
+
if not mode_indices or len(mode_indices) <= 1:
|
|
796
|
+
# No mode mixing, apply standard RSPRFO processing
|
|
797
|
+
return mode_indices
|
|
798
|
+
|
|
799
|
+
# For mixed modes, build a weighted mode
|
|
800
|
+
weights = np.zeros(len(eigvals))
|
|
801
|
+
total_weight = 0.0
|
|
802
|
+
|
|
803
|
+
for idx in mode_indices:
|
|
804
|
+
# Use inverse of eigenvalue as weight (keep negative values as is)
|
|
805
|
+
if eigvals[idx] < 0:
|
|
806
|
+
weights[idx] = abs(1.0 / eigvals[idx])
|
|
807
|
+
else:
|
|
808
|
+
# Small weight for positive eigenvalues
|
|
809
|
+
weights[idx] = 0.01
|
|
810
|
+
|
|
811
|
+
total_weight += weights[idx]
|
|
812
|
+
|
|
813
|
+
# Normalize weights
|
|
814
|
+
if total_weight > 0:
|
|
815
|
+
weights /= total_weight
|
|
816
|
+
|
|
817
|
+
# Calculate centroid of mixed modes
|
|
818
|
+
mixed_mode_idx = np.argmax(weights)
|
|
819
|
+
|
|
820
|
+
self.log(f"Eigenvector following: selected mixed mode {mixed_mode_idx} from candidates {mode_indices}")
|
|
821
|
+
self.log(f"Selected mode eigenvalue: {eigvals[mixed_mode_idx]:.6f}")
|
|
822
|
+
|
|
823
|
+
return [mixed_mode_idx]
|
|
824
|
+
|
|
825
|
+
def get_augmented_hessian(self, eigenvalues, gradient_components, alpha):
|
|
826
|
+
"""
|
|
827
|
+
Create the augmented hessian matrix for RFO calculation
|
|
828
|
+
|
|
829
|
+
Parameters:
|
|
830
|
+
eigenvalues: numpy.ndarray - Eigenvalues for the selected subspace
|
|
831
|
+
gradient_components: numpy.ndarray - Gradient components in the selected subspace
|
|
832
|
+
alpha: float - Alpha parameter for RS-RFO
|
|
833
|
+
|
|
834
|
+
Returns:
|
|
835
|
+
numpy.ndarray - Augmented Hessian matrix for RFO calculation
|
|
836
|
+
"""
|
|
837
|
+
n = len(eigenvalues)
|
|
838
|
+
H_aug = np.zeros((n + 1, n + 1))
|
|
839
|
+
|
|
840
|
+
# Fill the upper-left block with eigenvalues / alpha
|
|
841
|
+
np.fill_diagonal(H_aug[:n, :n], eigenvalues / alpha)
|
|
842
|
+
|
|
843
|
+
# Make sure gradient_components is flattened to the right shape
|
|
844
|
+
gradient_components = np.asarray(gradient_components).flatten()
|
|
845
|
+
|
|
846
|
+
# Fill the upper-right and lower-left blocks with gradient components / alpha
|
|
847
|
+
H_aug[:n, n] = gradient_components / alpha
|
|
848
|
+
H_aug[n, :n] = gradient_components / alpha
|
|
849
|
+
|
|
850
|
+
return H_aug
|
|
851
|
+
|
|
852
|
+
def solve_rfo(self, H_aug, mode="min", prev_eigvec=None):
|
|
853
|
+
"""
|
|
854
|
+
Solve the RFO equations to get the step
|
|
855
|
+
|
|
856
|
+
Parameters:
|
|
857
|
+
H_aug: numpy.ndarray - Augmented Hessian matrix
|
|
858
|
+
mode: str - "min" for energy minimization, "max" for maximization
|
|
859
|
+
prev_eigvec: numpy.ndarray - Previous eigenvector for consistent direction
|
|
860
|
+
|
|
861
|
+
Returns:
|
|
862
|
+
tuple - (step, eigenvalue, nu parameter, eigenvector)
|
|
863
|
+
"""
|
|
864
|
+
eigvals, eigvecs = np.linalg.eigh(H_aug)
|
|
865
|
+
|
|
866
|
+
if mode == "min":
|
|
867
|
+
idx = np.argmin(eigvals)
|
|
868
|
+
else: # mode == "max"
|
|
869
|
+
idx = np.argmax(eigvals)
|
|
870
|
+
|
|
871
|
+
# Check if we need to flip the eigenvector to maintain consistency
|
|
872
|
+
if prev_eigvec is not None:
|
|
873
|
+
try:
|
|
874
|
+
overlap = np.dot(eigvecs[:, idx], prev_eigvec)
|
|
875
|
+
if overlap < 0:
|
|
876
|
+
eigvecs[:, idx] *= -1
|
|
877
|
+
except Exception as e:
|
|
878
|
+
# Handle dimension mismatch or other errors
|
|
879
|
+
self.log(f"Error in eigenvector consistency check: {str(e)}")
|
|
880
|
+
# Continue without flipping
|
|
881
|
+
|
|
882
|
+
eigval = eigvals[idx]
|
|
883
|
+
eigvec = eigvecs[:, idx]
|
|
884
|
+
|
|
885
|
+
# The last component is nu
|
|
886
|
+
nu = eigvec[-1]
|
|
887
|
+
|
|
888
|
+
# Add safeguard against very small nu values
|
|
889
|
+
if abs(nu) < 1e-10:
|
|
890
|
+
self.log(f"Warning: Very small nu value: {nu}. Using safe value.")
|
|
891
|
+
nu = np.sign(nu) * max(1e-10, abs(nu))
|
|
892
|
+
|
|
893
|
+
# The step is -p/nu where p are the first n components of the eigenvector
|
|
894
|
+
step = -eigvec[:-1] / nu
|
|
895
|
+
|
|
896
|
+
return step, eigval, nu, eigvec
|
|
897
|
+
|
|
898
|
+
def rfo_model(self, gradient, hessian, step):
|
|
899
|
+
"""
|
|
900
|
+
Estimate energy change based on RFO model
|
|
901
|
+
|
|
902
|
+
Parameters:
|
|
903
|
+
gradient: numpy.ndarray - Energy gradient
|
|
904
|
+
hessian: numpy.ndarray - Hessian matrix
|
|
905
|
+
step: numpy.ndarray - Step vector
|
|
906
|
+
|
|
907
|
+
Returns:
|
|
908
|
+
float - Predicted energy change
|
|
909
|
+
"""
|
|
910
|
+
return np.dot(gradient, step) + 0.5 * np.dot(step, np.dot(hessian, step))
|
|
911
|
+
|
|
912
|
+
def update_hessian(self, current_geom, current_grad, previous_geom, previous_grad):
|
|
913
|
+
"""
|
|
914
|
+
Update the Hessian using the specified update method.
|
|
915
|
+
WARNING: This version FORCES the update even if dot_product <= 0,
|
|
916
|
+
which may lead to numerical instability or crashes.
|
|
917
|
+
"""
|
|
918
|
+
displacement = np.asarray(current_geom - previous_geom).reshape(-1, 1)
|
|
919
|
+
delta_grad = np.asarray(current_grad - previous_grad).reshape(-1, 1)
|
|
920
|
+
|
|
921
|
+
disp_norm = np.linalg.norm(displacement)
|
|
922
|
+
grad_diff_norm = np.linalg.norm(delta_grad)
|
|
923
|
+
|
|
924
|
+
# This is a pre-check from the original code, kept for safety
|
|
925
|
+
if disp_norm < 1e-10 or grad_diff_norm < 1e-10:
|
|
926
|
+
self.log("Skipping Hessian update due to small changes")
|
|
927
|
+
return
|
|
928
|
+
|
|
929
|
+
dot_product = np.dot(displacement.T, delta_grad)[0, 0]
|
|
930
|
+
|
|
931
|
+
# === [IMPROVEMENT 3] Selective Hessian update ===
|
|
932
|
+
# Uncomment the following lines if should_update_hessian method is implemented
|
|
933
|
+
# if not self.should_update_hessian(displacement, delta_grad, dot_product):
|
|
934
|
+
# return
|
|
935
|
+
# === [END IMPROVEMENT 3] ===
|
|
936
|
+
|
|
937
|
+
# === [MODIFICATION] Safety check removed per user request ===
|
|
938
|
+
if dot_product <= 0:
|
|
939
|
+
self.log(f"WARNING: Forcing Hessian update despite poor alignment (dot_product={dot_product:.6f}).", force=True)
|
|
940
|
+
self.log("This may cause instability or errors in the update function.", force=True)
|
|
941
|
+
# =======================================================
|
|
942
|
+
else:
|
|
943
|
+
self.log(f"Hessian update: displacement norm={disp_norm:.6f}, gradient diff norm={grad_diff_norm:.6f}, dot product={dot_product:.6f}")
|
|
944
|
+
|
|
945
|
+
method_key_lower = self.hessian_update_method.lower()
|
|
946
|
+
method_name, update_function = self.default_update_method
|
|
947
|
+
found_method = False
|
|
948
|
+
|
|
949
|
+
for key, name, func in self.updater_dispatch_list:
|
|
950
|
+
if key in method_key_lower:
|
|
951
|
+
method_name = name
|
|
952
|
+
update_function = func
|
|
953
|
+
found_method = True
|
|
954
|
+
break
|
|
955
|
+
|
|
956
|
+
if not found_method:
|
|
957
|
+
self.log(f"Unknown Hessian update method: {self.hessian_update_method}. Using auto selection.")
|
|
958
|
+
|
|
959
|
+
self.log(f"Hessian update method: {method_name}")
|
|
960
|
+
|
|
961
|
+
try:
|
|
962
|
+
delta_hess = update_function(
|
|
963
|
+
self.hessian, displacement, delta_grad
|
|
964
|
+
)
|
|
965
|
+
self.hessian += delta_hess
|
|
966
|
+
self.hessian = 0.5 * (self.hessian + self.hessian.T)
|
|
967
|
+
self.log("Hessian update attempted.")
|
|
968
|
+
|
|
969
|
+
except Exception as e:
|
|
970
|
+
self.log(f"ERROR during forced Hessian update ({method_name}): {e}", force=True)
|
|
971
|
+
self.log("Hessian may be corrupted. Proceeding with caution.", force=True)
|
|
972
|
+
|
|
973
|
+
def log(self, message, force=False):
|
|
974
|
+
"""
|
|
975
|
+
Print log message if display flag is enabled or force is True
|
|
976
|
+
|
|
977
|
+
Parameters:
|
|
978
|
+
message: str - Message to display
|
|
979
|
+
force: bool - If True, display message regardless of display_flag
|
|
980
|
+
"""
|
|
981
|
+
if self.display_flag or force:
|
|
982
|
+
print(message)
|
|
983
|
+
|
|
984
|
+
def set_hessian(self, hessian):
|
|
985
|
+
"""
|
|
986
|
+
Set the Hessian matrix
|
|
987
|
+
|
|
988
|
+
Parameters:
|
|
989
|
+
hessian: numpy.ndarray - Hessian matrix
|
|
990
|
+
"""
|
|
991
|
+
self.hessian = hessian
|
|
992
|
+
return
|
|
993
|
+
|
|
994
|
+
def set_bias_hessian(self, bias_hessian):
|
|
995
|
+
"""
|
|
996
|
+
Set the bias Hessian matrix
|
|
997
|
+
|
|
998
|
+
Parameters:
|
|
999
|
+
bias_hessian: numpy.ndarray - Bias Hessian matrix
|
|
1000
|
+
"""
|
|
1001
|
+
self.bias_hessian = bias_hessian
|
|
1002
|
+
return
|
|
1003
|
+
|
|
1004
|
+
def get_hessian(self):
|
|
1005
|
+
"""
|
|
1006
|
+
Get the current Hessian matrix
|
|
1007
|
+
|
|
1008
|
+
Returns:
|
|
1009
|
+
numpy.ndarray - Hessian matrix
|
|
1010
|
+
"""
|
|
1011
|
+
return self.hessian
|
|
1012
|
+
|
|
1013
|
+
def get_bias_hessian(self):
|
|
1014
|
+
"""
|
|
1015
|
+
Get the current bias Hessian matrix
|
|
1016
|
+
|
|
1017
|
+
Returns:
|
|
1018
|
+
numpy.ndarray - Bias Hessian matrix
|
|
1019
|
+
"""
|
|
1020
|
+
return self.bias_hessian
|
|
1021
|
+
|
|
1022
|
+
def reset_trust_radius(self):
|
|
1023
|
+
"""
|
|
1024
|
+
Reset trust radius to its initial value
|
|
1025
|
+
"""
|
|
1026
|
+
self.trust_radius = self.trust_radius_initial
|
|
1027
|
+
self.log(f"Trust radius reset to initial value: {self.trust_radius:.6f}")
|
|
1028
|
+
|
|
1029
|
+
def set_trust_radius(self, radius):
|
|
1030
|
+
"""
|
|
1031
|
+
Manually set the trust radius
|
|
1032
|
+
|
|
1033
|
+
Parameters:
|
|
1034
|
+
radius: float - New trust radius value
|
|
1035
|
+
"""
|
|
1036
|
+
old_value = self.trust_radius
|
|
1037
|
+
self.trust_radius = max(min(radius, self.trust_radius_max), self.trust_radius_min)
|
|
1038
|
+
self.log(f"Trust radius manually set from {old_value:.6f} to {self.trust_radius:.6f}")
|
|
1039
|
+
|
|
1040
|
+
def get_reduction_ratios(self):
|
|
1041
|
+
"""
|
|
1042
|
+
Get the history of reduction ratios
|
|
1043
|
+
|
|
1044
|
+
Returns:
|
|
1045
|
+
list - Reduction ratios for each iteration
|
|
1046
|
+
"""
|
|
1047
|
+
return self.reduction_ratios
|
|
1048
|
+
|
|
1049
|
+
def get_trust_radius_history(self):
|
|
1050
|
+
"""
|
|
1051
|
+
Get the history of trust radius values
|
|
1052
|
+
|
|
1053
|
+
Returns:
|
|
1054
|
+
list - Trust radius values for each iteration
|
|
1055
|
+
"""
|
|
1056
|
+
return self.trust_radius_history
|