MultiOptPy 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/Calculator/__init__.py +0 -0
- multioptpy/Calculator/ase_calculation_tools.py +424 -0
- multioptpy/Calculator/ase_tools/__init__.py +0 -0
- multioptpy/Calculator/ase_tools/fairchem.py +28 -0
- multioptpy/Calculator/ase_tools/gamess.py +19 -0
- multioptpy/Calculator/ase_tools/gaussian.py +165 -0
- multioptpy/Calculator/ase_tools/mace.py +28 -0
- multioptpy/Calculator/ase_tools/mopac.py +19 -0
- multioptpy/Calculator/ase_tools/nwchem.py +31 -0
- multioptpy/Calculator/ase_tools/orca.py +22 -0
- multioptpy/Calculator/ase_tools/pygfn0.py +37 -0
- multioptpy/Calculator/dxtb_calculation_tools.py +344 -0
- multioptpy/Calculator/emt_calculation_tools.py +458 -0
- multioptpy/Calculator/gpaw_calculation_tools.py +183 -0
- multioptpy/Calculator/lj_calculation_tools.py +314 -0
- multioptpy/Calculator/psi4_calculation_tools.py +334 -0
- multioptpy/Calculator/pwscf_calculation_tools.py +189 -0
- multioptpy/Calculator/pyscf_calculation_tools.py +327 -0
- multioptpy/Calculator/sqm1_calculation_tools.py +611 -0
- multioptpy/Calculator/sqm2_calculation_tools.py +376 -0
- multioptpy/Calculator/tblite_calculation_tools.py +352 -0
- multioptpy/Calculator/tersoff_calculation_tools.py +818 -0
- multioptpy/Constraint/__init__.py +0 -0
- multioptpy/Constraint/constraint_condition.py +834 -0
- multioptpy/Coordinate/__init__.py +0 -0
- multioptpy/Coordinate/polar_coordinate.py +199 -0
- multioptpy/Coordinate/redundant_coordinate.py +638 -0
- multioptpy/IRC/__init__.py +0 -0
- multioptpy/IRC/converge_criteria.py +28 -0
- multioptpy/IRC/dvv.py +544 -0
- multioptpy/IRC/euler.py +439 -0
- multioptpy/IRC/hpc.py +564 -0
- multioptpy/IRC/lqa.py +540 -0
- multioptpy/IRC/modekill.py +662 -0
- multioptpy/IRC/rk4.py +579 -0
- multioptpy/Interpolation/__init__.py +0 -0
- multioptpy/Interpolation/adaptive_interpolation.py +283 -0
- multioptpy/Interpolation/binomial_interpolation.py +179 -0
- multioptpy/Interpolation/geodesic_interpolation.py +785 -0
- multioptpy/Interpolation/interpolation.py +156 -0
- multioptpy/Interpolation/linear_interpolation.py +473 -0
- multioptpy/Interpolation/savitzky_golay_interpolation.py +252 -0
- multioptpy/Interpolation/spline_interpolation.py +353 -0
- multioptpy/MD/__init__.py +0 -0
- multioptpy/MD/thermostat.py +185 -0
- multioptpy/MEP/__init__.py +0 -0
- multioptpy/MEP/pathopt_bneb_force.py +443 -0
- multioptpy/MEP/pathopt_dmf_force.py +448 -0
- multioptpy/MEP/pathopt_dneb_force.py +130 -0
- multioptpy/MEP/pathopt_ewbneb_force.py +207 -0
- multioptpy/MEP/pathopt_gpneb_force.py +512 -0
- multioptpy/MEP/pathopt_lup_force.py +113 -0
- multioptpy/MEP/pathopt_neb_force.py +225 -0
- multioptpy/MEP/pathopt_nesb_force.py +205 -0
- multioptpy/MEP/pathopt_om_force.py +153 -0
- multioptpy/MEP/pathopt_qsm_force.py +174 -0
- multioptpy/MEP/pathopt_qsmv2_force.py +304 -0
- multioptpy/ModelFunction/__init__.py +7 -0
- multioptpy/ModelFunction/avoiding_model_function.py +29 -0
- multioptpy/ModelFunction/binary_image_ts_search_model_function.py +47 -0
- multioptpy/ModelFunction/conical_model_function.py +26 -0
- multioptpy/ModelFunction/opt_meci.py +50 -0
- multioptpy/ModelFunction/opt_mesx.py +47 -0
- multioptpy/ModelFunction/opt_mesx_2.py +49 -0
- multioptpy/ModelFunction/seam_model_function.py +27 -0
- multioptpy/ModelHessian/__init__.py +0 -0
- multioptpy/ModelHessian/approx_hessian.py +147 -0
- multioptpy/ModelHessian/calc_params.py +227 -0
- multioptpy/ModelHessian/fischer.py +236 -0
- multioptpy/ModelHessian/fischerd3.py +360 -0
- multioptpy/ModelHessian/fischerd4.py +398 -0
- multioptpy/ModelHessian/gfn0xtb.py +633 -0
- multioptpy/ModelHessian/gfnff.py +709 -0
- multioptpy/ModelHessian/lindh.py +165 -0
- multioptpy/ModelHessian/lindh2007d2.py +707 -0
- multioptpy/ModelHessian/lindh2007d3.py +822 -0
- multioptpy/ModelHessian/lindh2007d4.py +1030 -0
- multioptpy/ModelHessian/morse.py +106 -0
- multioptpy/ModelHessian/schlegel.py +144 -0
- multioptpy/ModelHessian/schlegeld3.py +322 -0
- multioptpy/ModelHessian/schlegeld4.py +559 -0
- multioptpy/ModelHessian/shortrange.py +346 -0
- multioptpy/ModelHessian/swartd2.py +496 -0
- multioptpy/ModelHessian/swartd3.py +706 -0
- multioptpy/ModelHessian/swartd4.py +918 -0
- multioptpy/ModelHessian/tshess.py +40 -0
- multioptpy/Optimizer/QHAdam.py +61 -0
- multioptpy/Optimizer/__init__.py +0 -0
- multioptpy/Optimizer/abc_fire.py +83 -0
- multioptpy/Optimizer/adabelief.py +58 -0
- multioptpy/Optimizer/adabound.py +68 -0
- multioptpy/Optimizer/adadelta.py +65 -0
- multioptpy/Optimizer/adaderivative.py +56 -0
- multioptpy/Optimizer/adadiff.py +68 -0
- multioptpy/Optimizer/adafactor.py +70 -0
- multioptpy/Optimizer/adam.py +65 -0
- multioptpy/Optimizer/adamax.py +62 -0
- multioptpy/Optimizer/adamod.py +83 -0
- multioptpy/Optimizer/adamw.py +65 -0
- multioptpy/Optimizer/adiis.py +523 -0
- multioptpy/Optimizer/afire_neb.py +282 -0
- multioptpy/Optimizer/block_hessian_update.py +709 -0
- multioptpy/Optimizer/c2diis.py +491 -0
- multioptpy/Optimizer/component_wise_scaling.py +405 -0
- multioptpy/Optimizer/conjugate_gradient.py +82 -0
- multioptpy/Optimizer/conjugate_gradient_neb.py +345 -0
- multioptpy/Optimizer/coordinate_locking.py +405 -0
- multioptpy/Optimizer/dic_rsirfo.py +1015 -0
- multioptpy/Optimizer/ediis.py +417 -0
- multioptpy/Optimizer/eve.py +76 -0
- multioptpy/Optimizer/fastadabelief.py +61 -0
- multioptpy/Optimizer/fire.py +77 -0
- multioptpy/Optimizer/fire2.py +249 -0
- multioptpy/Optimizer/fire_neb.py +92 -0
- multioptpy/Optimizer/gan_step.py +486 -0
- multioptpy/Optimizer/gdiis.py +609 -0
- multioptpy/Optimizer/gediis.py +203 -0
- multioptpy/Optimizer/geodesic_step.py +433 -0
- multioptpy/Optimizer/gpmin.py +633 -0
- multioptpy/Optimizer/gpr_step.py +364 -0
- multioptpy/Optimizer/gradientdescent.py +78 -0
- multioptpy/Optimizer/gradientdescent_neb.py +52 -0
- multioptpy/Optimizer/hessian_update.py +433 -0
- multioptpy/Optimizer/hybrid_rfo.py +998 -0
- multioptpy/Optimizer/kdiis.py +625 -0
- multioptpy/Optimizer/lars.py +21 -0
- multioptpy/Optimizer/lbfgs.py +253 -0
- multioptpy/Optimizer/lbfgs_neb.py +355 -0
- multioptpy/Optimizer/linesearch.py +236 -0
- multioptpy/Optimizer/lookahead.py +40 -0
- multioptpy/Optimizer/nadam.py +64 -0
- multioptpy/Optimizer/newton.py +200 -0
- multioptpy/Optimizer/prodigy.py +70 -0
- multioptpy/Optimizer/purtubation.py +16 -0
- multioptpy/Optimizer/quickmin_neb.py +245 -0
- multioptpy/Optimizer/radam.py +75 -0
- multioptpy/Optimizer/rfo_neb.py +302 -0
- multioptpy/Optimizer/ric_rfo.py +842 -0
- multioptpy/Optimizer/rl_step.py +627 -0
- multioptpy/Optimizer/rmspropgrave.py +65 -0
- multioptpy/Optimizer/rsirfo.py +1647 -0
- multioptpy/Optimizer/rsprfo.py +1056 -0
- multioptpy/Optimizer/sadam.py +60 -0
- multioptpy/Optimizer/samsgrad.py +63 -0
- multioptpy/Optimizer/tr_lbfgs.py +678 -0
- multioptpy/Optimizer/trim.py +273 -0
- multioptpy/Optimizer/trust_radius.py +207 -0
- multioptpy/Optimizer/trust_radius_neb.py +121 -0
- multioptpy/Optimizer/yogi.py +60 -0
- multioptpy/OtherMethod/__init__.py +0 -0
- multioptpy/OtherMethod/addf.py +1150 -0
- multioptpy/OtherMethod/dimer.py +895 -0
- multioptpy/OtherMethod/elastic_image_pair.py +629 -0
- multioptpy/OtherMethod/modelfunction.py +456 -0
- multioptpy/OtherMethod/newton_traj.py +454 -0
- multioptpy/OtherMethod/twopshs.py +1095 -0
- multioptpy/PESAnalyzer/__init__.py +0 -0
- multioptpy/PESAnalyzer/calc_irc_curvature.py +125 -0
- multioptpy/PESAnalyzer/cmds_analysis.py +152 -0
- multioptpy/PESAnalyzer/koopman_analysis.py +268 -0
- multioptpy/PESAnalyzer/pca_analysis.py +314 -0
- multioptpy/Parameters/__init__.py +0 -0
- multioptpy/Parameters/atomic_mass.py +20 -0
- multioptpy/Parameters/atomic_number.py +22 -0
- multioptpy/Parameters/covalent_radii.py +44 -0
- multioptpy/Parameters/d2.py +61 -0
- multioptpy/Parameters/d3.py +63 -0
- multioptpy/Parameters/d4.py +103 -0
- multioptpy/Parameters/dreiding.py +34 -0
- multioptpy/Parameters/gfn0xtb_param.py +137 -0
- multioptpy/Parameters/gfnff_param.py +315 -0
- multioptpy/Parameters/gnb.py +104 -0
- multioptpy/Parameters/parameter.py +22 -0
- multioptpy/Parameters/uff.py +72 -0
- multioptpy/Parameters/unit_values.py +20 -0
- multioptpy/Potential/AFIR_potential.py +55 -0
- multioptpy/Potential/LJ_repulsive_potential.py +345 -0
- multioptpy/Potential/__init__.py +0 -0
- multioptpy/Potential/anharmonic_keep_potential.py +28 -0
- multioptpy/Potential/asym_elllipsoidal_potential.py +718 -0
- multioptpy/Potential/electrostatic_potential.py +69 -0
- multioptpy/Potential/flux_potential.py +30 -0
- multioptpy/Potential/gaussian_potential.py +101 -0
- multioptpy/Potential/idpp.py +516 -0
- multioptpy/Potential/keep_angle_potential.py +146 -0
- multioptpy/Potential/keep_dihedral_angle_potential.py +105 -0
- multioptpy/Potential/keep_outofplain_angle_potential.py +70 -0
- multioptpy/Potential/keep_potential.py +99 -0
- multioptpy/Potential/mechano_force_potential.py +74 -0
- multioptpy/Potential/nanoreactor_potential.py +52 -0
- multioptpy/Potential/potential.py +896 -0
- multioptpy/Potential/spacer_model_potential.py +221 -0
- multioptpy/Potential/switching_potential.py +258 -0
- multioptpy/Potential/universal_potential.py +34 -0
- multioptpy/Potential/value_range_potential.py +36 -0
- multioptpy/Potential/void_point_potential.py +25 -0
- multioptpy/SQM/__init__.py +0 -0
- multioptpy/SQM/sqm1/__init__.py +0 -0
- multioptpy/SQM/sqm1/sqm1_core.py +1792 -0
- multioptpy/SQM/sqm2/__init__.py +0 -0
- multioptpy/SQM/sqm2/calc_tools.py +95 -0
- multioptpy/SQM/sqm2/sqm2_basis.py +850 -0
- multioptpy/SQM/sqm2/sqm2_bond.py +119 -0
- multioptpy/SQM/sqm2/sqm2_core.py +303 -0
- multioptpy/SQM/sqm2/sqm2_data.py +1229 -0
- multioptpy/SQM/sqm2/sqm2_disp.py +65 -0
- multioptpy/SQM/sqm2/sqm2_eeq.py +243 -0
- multioptpy/SQM/sqm2/sqm2_overlapint.py +704 -0
- multioptpy/SQM/sqm2/sqm2_qm.py +578 -0
- multioptpy/SQM/sqm2/sqm2_rep.py +66 -0
- multioptpy/SQM/sqm2/sqm2_srb.py +70 -0
- multioptpy/Thermo/__init__.py +0 -0
- multioptpy/Thermo/normal_mode_analyzer.py +865 -0
- multioptpy/Utils/__init__.py +0 -0
- multioptpy/Utils/bond_connectivity.py +264 -0
- multioptpy/Utils/calc_tools.py +884 -0
- multioptpy/Utils/oniom.py +96 -0
- multioptpy/Utils/pbc.py +48 -0
- multioptpy/Utils/riemann_curvature.py +208 -0
- multioptpy/Utils/symmetry_analyzer.py +482 -0
- multioptpy/Visualization/__init__.py +0 -0
- multioptpy/Visualization/visualization.py +156 -0
- multioptpy/WFAnalyzer/MO_analysis.py +104 -0
- multioptpy/WFAnalyzer/__init__.py +0 -0
- multioptpy/Wrapper/__init__.py +0 -0
- multioptpy/Wrapper/autots.py +1239 -0
- multioptpy/Wrapper/ieip_wrapper.py +93 -0
- multioptpy/Wrapper/md_wrapper.py +92 -0
- multioptpy/Wrapper/neb_wrapper.py +94 -0
- multioptpy/Wrapper/optimize_wrapper.py +76 -0
- multioptpy/__init__.py +5 -0
- multioptpy/entrypoints.py +916 -0
- multioptpy/fileio.py +660 -0
- multioptpy/ieip.py +340 -0
- multioptpy/interface.py +1086 -0
- multioptpy/irc.py +529 -0
- multioptpy/moleculardynamics.py +432 -0
- multioptpy/neb.py +1267 -0
- multioptpy/optimization.py +1553 -0
- multioptpy/optimizer.py +709 -0
- multioptpy-1.20.2.dist-info/METADATA +438 -0
- multioptpy-1.20.2.dist-info/RECORD +246 -0
- multioptpy-1.20.2.dist-info/WHEEL +5 -0
- multioptpy-1.20.2.dist-info/entry_points.txt +9 -0
- multioptpy-1.20.2.dist-info/licenses/LICENSE +674 -0
- multioptpy-1.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.linalg import norm, inv, qr, eig, pinv
|
|
3
|
+
|
|
4
|
+
class TRLBFGS:
|
|
5
|
+
"""Trust-Region Limited-memory BFGS optimizer.
|
|
6
|
+
|
|
7
|
+
A trust region variant of the L-BFGS algorithm that approximates the inverse Hessian
|
|
8
|
+
matrix using a limited amount of memory while enforcing step length constraints
|
|
9
|
+
through a trust region approach.
|
|
10
|
+
|
|
11
|
+
Unlike standard L-BFGS with line search, this implementation adapts the trust region
|
|
12
|
+
radius dynamically based on the agreement between predicted and actual function
|
|
13
|
+
value reductions.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, **config):
|
|
17
|
+
# Configuration parameters
|
|
18
|
+
self.config = config
|
|
19
|
+
|
|
20
|
+
# Initialize flags
|
|
21
|
+
self.Initialization = True
|
|
22
|
+
self.iter = 0
|
|
23
|
+
|
|
24
|
+
# Set default parameters
|
|
25
|
+
self.FC_COUNT = config.get("fc_count", -1)
|
|
26
|
+
self.saddle_order = 0
|
|
27
|
+
self.memory = config.get("memory", 30) # Number of previous steps to remember
|
|
28
|
+
|
|
29
|
+
# Trust region parameters
|
|
30
|
+
self.delta_hat = config.get("delta_hat", 0.5) # Upper bound for trust region radius (0.5)
|
|
31
|
+
self.delta_min = config.get("delta_min", 0.01) # Lower bound for trust region radius (0.01)
|
|
32
|
+
self.delta_tr = config.get("initial_delta", self.delta_hat * 0.75) # Current trust region radius
|
|
33
|
+
self.eta = config.get("eta", 0.25 * 0.9) # η ∈ [0, 1/4)
|
|
34
|
+
|
|
35
|
+
# Powell damping parameters
|
|
36
|
+
self.use_powell_damping = config.get("use_powell_damping", True)
|
|
37
|
+
self.powell_theta = config.get("powell_theta", 0.2) # Damping threshold
|
|
38
|
+
|
|
39
|
+
# Newton solver parameters
|
|
40
|
+
self.newton_max_iter = config.get("newton_max_iter", 50)
|
|
41
|
+
self.newton_tol = config.get("newton_tol", 1e-6)
|
|
42
|
+
self.newton_alpha_min = config.get("newton_alpha_min", 1e-8)
|
|
43
|
+
|
|
44
|
+
# Storage for L-BFGS vectors
|
|
45
|
+
self.s = [] # Position differences
|
|
46
|
+
self.y = [] # Gradient differences
|
|
47
|
+
self.rho = [] # 1 / (y_k^T s_k)
|
|
48
|
+
|
|
49
|
+
# Initialize Hessian related variables
|
|
50
|
+
self.hessian = None
|
|
51
|
+
self.bias_hessian = None
|
|
52
|
+
self.gamma = 1.0 # Initial scaling factor
|
|
53
|
+
|
|
54
|
+
# Trust region internal variables
|
|
55
|
+
self.P_ll = None # P_parallel matrix
|
|
56
|
+
self.Lambda_1 = None # Eigenvalues
|
|
57
|
+
self.lambda_min = 0.0 # Minimum eigenvalue
|
|
58
|
+
self.prev_move_vector = None # Previous step
|
|
59
|
+
self.tr_subproblem_solved = False # Flag to check if trust region subproblem has been solved
|
|
60
|
+
|
|
61
|
+
print(f"Initialized TRLBFGS optimizer with memory={self.memory}, "
|
|
62
|
+
f"initial trust region radius={self.delta_tr}, "
|
|
63
|
+
f"bounds=[{self.delta_min}, {self.delta_hat}]")
|
|
64
|
+
print(f"Powell damping: {self.use_powell_damping}, theta={self.powell_theta}")
|
|
65
|
+
print(f"Newton solver: max_iter={self.newton_max_iter}, tol={self.newton_tol}")
|
|
66
|
+
|
|
67
|
+
def set_hessian(self, hessian):
|
|
68
|
+
"""Set explicit Hessian matrix."""
|
|
69
|
+
self.hessian = hessian
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
def set_bias_hessian(self, bias_hessian):
|
|
73
|
+
"""Set bias Hessian matrix."""
|
|
74
|
+
self.bias_hessian = bias_hessian
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
def get_hessian(self):
|
|
78
|
+
"""Get Hessian matrix (if available)."""
|
|
79
|
+
return self.hessian
|
|
80
|
+
|
|
81
|
+
def get_bias_hessian(self):
|
|
82
|
+
"""Get bias Hessian matrix."""
|
|
83
|
+
return self.bias_hessian
|
|
84
|
+
|
|
85
|
+
def apply_powell_damping(self, s, y):
|
|
86
|
+
"""Apply Powell's damping strategy to ensure positive definiteness.
|
|
87
|
+
|
|
88
|
+
This method modifies the gradient difference y to satisfy the curvature condition
|
|
89
|
+
y^T s > 0, which is necessary for maintaining a positive definite Hessian approximation.
|
|
90
|
+
|
|
91
|
+
Parameters:
|
|
92
|
+
-----------
|
|
93
|
+
s : ndarray
|
|
94
|
+
Position difference vector (x_{k+1} - x_k)
|
|
95
|
+
y : ndarray
|
|
96
|
+
Gradient difference vector (g_{k+1} - g_k)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
--------
|
|
100
|
+
y_corrected : ndarray
|
|
101
|
+
Corrected gradient difference that satisfies curvature condition
|
|
102
|
+
damped : bool
|
|
103
|
+
True if damping was applied, False otherwise
|
|
104
|
+
"""
|
|
105
|
+
s_flat = s.flatten()
|
|
106
|
+
y_flat = y.flatten()
|
|
107
|
+
|
|
108
|
+
s_dot_y = np.dot(s_flat, y_flat)
|
|
109
|
+
s_dot_s = np.dot(s_flat, s_flat)
|
|
110
|
+
|
|
111
|
+
# Check if curvature condition is satisfied
|
|
112
|
+
threshold = self.powell_theta * s_dot_s
|
|
113
|
+
|
|
114
|
+
if s_dot_y < threshold:
|
|
115
|
+
# Apply damping correction
|
|
116
|
+
# y_corrected = r * y + (1 - r) * B * s
|
|
117
|
+
# where B * s ≈ s for simplicity (identity approximation)
|
|
118
|
+
r = (1 - self.powell_theta) * s_dot_s / (s_dot_s - s_dot_y)
|
|
119
|
+
y_corrected = r * y_flat + (1 - r) * self.gamma * s_flat
|
|
120
|
+
|
|
121
|
+
print(f"Powell damping applied: s^T y = {s_dot_y:.4e} < {threshold:.4e}, r = {r:.4f}")
|
|
122
|
+
return y_corrected, True
|
|
123
|
+
|
|
124
|
+
return y_flat, False
|
|
125
|
+
|
|
126
|
+
def check_curvature_condition(self, s, y, epsilon=1e-10):
|
|
127
|
+
"""Check if the curvature condition is satisfied.
|
|
128
|
+
|
|
129
|
+
The curvature condition y^T s > 0 must be satisfied to maintain
|
|
130
|
+
a positive definite Hessian approximation.
|
|
131
|
+
|
|
132
|
+
Parameters:
|
|
133
|
+
-----------
|
|
134
|
+
s : ndarray
|
|
135
|
+
Position difference vector
|
|
136
|
+
y : ndarray
|
|
137
|
+
Gradient difference vector
|
|
138
|
+
epsilon : float
|
|
139
|
+
Tolerance for checking positivity
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
--------
|
|
143
|
+
satisfied : bool
|
|
144
|
+
True if curvature condition is satisfied
|
|
145
|
+
dot_product : float
|
|
146
|
+
The value of y^T s
|
|
147
|
+
"""
|
|
148
|
+
s_flat = s.flatten()
|
|
149
|
+
y_flat = y.flatten()
|
|
150
|
+
|
|
151
|
+
dot_product = np.dot(y_flat, s_flat)
|
|
152
|
+
|
|
153
|
+
# Also check for numerical issues
|
|
154
|
+
s_norm = norm(s_flat)
|
|
155
|
+
y_norm = norm(y_flat)
|
|
156
|
+
|
|
157
|
+
if s_norm < epsilon or y_norm < epsilon:
|
|
158
|
+
print(f"Warning: Very small vector norms (||s||={s_norm:.4e}, ||y||={y_norm:.4e})")
|
|
159
|
+
return False, dot_product
|
|
160
|
+
|
|
161
|
+
# Normalized check for better numerical stability
|
|
162
|
+
cos_angle = dot_product / (s_norm * y_norm)
|
|
163
|
+
|
|
164
|
+
satisfied = dot_product > epsilon
|
|
165
|
+
|
|
166
|
+
if not satisfied:
|
|
167
|
+
print(f"Curvature condition violated: y^T s = {dot_product:.4e}, cos(angle) = {cos_angle:.4f}")
|
|
168
|
+
|
|
169
|
+
return satisfied, dot_product
|
|
170
|
+
|
|
171
|
+
def update_vectors(self, displacement, delta_grad):
|
|
172
|
+
"""Update the vectors used for the L-BFGS approximation.
|
|
173
|
+
|
|
174
|
+
This method incorporates improved curvature condition checking
|
|
175
|
+
and optional Powell damping for better numerical stability.
|
|
176
|
+
"""
|
|
177
|
+
# Flatten vectors if they're not already
|
|
178
|
+
s = displacement.flatten()
|
|
179
|
+
y = delta_grad.flatten()
|
|
180
|
+
|
|
181
|
+
# Check curvature condition
|
|
182
|
+
curvature_satisfied, dot_product = self.check_curvature_condition(s, y)
|
|
183
|
+
|
|
184
|
+
# Apply Powell damping if enabled and curvature condition is not satisfied
|
|
185
|
+
if self.use_powell_damping and not curvature_satisfied:
|
|
186
|
+
y, damped = self.apply_powell_damping(s, y)
|
|
187
|
+
if damped:
|
|
188
|
+
# Recompute dot product after damping
|
|
189
|
+
dot_product = np.dot(y, s)
|
|
190
|
+
print(f"After damping: y^T s = {dot_product:.4e}")
|
|
191
|
+
|
|
192
|
+
# Final check before updating
|
|
193
|
+
if abs(dot_product) < 1e-10:
|
|
194
|
+
print("Warning: y^T s is still too small after correction, skipping update")
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# Calculate rho = 1 / (y^T * s)
|
|
198
|
+
rho = 1.0 / dot_product
|
|
199
|
+
|
|
200
|
+
# Add to history
|
|
201
|
+
self.s.append(s)
|
|
202
|
+
self.y.append(y)
|
|
203
|
+
self.rho.append(rho)
|
|
204
|
+
|
|
205
|
+
# Remove oldest vectors if exceeding memory limit
|
|
206
|
+
if len(self.s) > self.memory:
|
|
207
|
+
self.s.pop(0)
|
|
208
|
+
self.y.pop(0)
|
|
209
|
+
self.rho.pop(0)
|
|
210
|
+
|
|
211
|
+
# Update gamma (scaling factor for initial Hessian approximation)
|
|
212
|
+
y_dot_y = np.dot(y, y)
|
|
213
|
+
self.gamma = y_dot_y / dot_product
|
|
214
|
+
print(f"Updated gamma = {self.gamma:.4f}, memory size = {len(self.s)}")
|
|
215
|
+
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
def solve_trust_region_newton(self, g_ll, g_NL_norm, delta):
|
|
219
|
+
"""Solve the trust region subproblem using improved Newton's method.
|
|
220
|
+
|
|
221
|
+
This method finds the Lagrange multiplier sigma that satisfies:
|
|
222
|
+
||p(sigma)|| = delta, where p(sigma) is the step in the trust region.
|
|
223
|
+
|
|
224
|
+
Parameters:
|
|
225
|
+
-----------
|
|
226
|
+
g_ll : ndarray
|
|
227
|
+
Projection of gradient onto eigenspace
|
|
228
|
+
g_NL_norm : float
|
|
229
|
+
Norm of gradient component orthogonal to eigenspace
|
|
230
|
+
delta : float
|
|
231
|
+
Trust region radius
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
--------
|
|
235
|
+
sigma_star : float
|
|
236
|
+
Optimal Lagrange multiplier
|
|
237
|
+
iterations : int
|
|
238
|
+
Number of Newton iterations performed
|
|
239
|
+
"""
|
|
240
|
+
# Define the trust region constraint function
|
|
241
|
+
def phi_bar_func(sigma):
|
|
242
|
+
"""Compute phi_bar(sigma) = 1/||p(sigma)|| - 1/delta"""
|
|
243
|
+
u = np.sum((g_ll ** 2) / ((self.Lambda_1 + sigma) ** 2)) + \
|
|
244
|
+
(g_NL_norm ** 2) / ((self.gamma + sigma) ** 2)
|
|
245
|
+
v = np.sqrt(u)
|
|
246
|
+
return 1.0 / v - 1.0 / delta
|
|
247
|
+
|
|
248
|
+
def phi_bar_prime_func(sigma):
|
|
249
|
+
"""Compute the derivative of phi_bar function."""
|
|
250
|
+
lambda_sigma = self.Lambda_1 + sigma
|
|
251
|
+
gamma_sigma = self.gamma + sigma
|
|
252
|
+
|
|
253
|
+
u = np.sum(g_ll ** 2 / lambda_sigma ** 2) + g_NL_norm ** 2 / gamma_sigma ** 2
|
|
254
|
+
u_prime = -2.0 * np.sum(g_ll ** 2 / lambda_sigma ** 3) - 2.0 * g_NL_norm ** 2 / gamma_sigma ** 3
|
|
255
|
+
|
|
256
|
+
return -0.5 * u ** (-1.5) * u_prime
|
|
257
|
+
|
|
258
|
+
# Initialize sigma
|
|
259
|
+
sigma = max(0.0, -self.lambda_min)
|
|
260
|
+
phi_bar_0 = phi_bar_func(sigma)
|
|
261
|
+
|
|
262
|
+
print(f"Newton solver: initial sigma = {sigma:.6e}, phi_bar = {phi_bar_0:.6e}")
|
|
263
|
+
|
|
264
|
+
# Check if we're already at the solution (interior case)
|
|
265
|
+
if abs(phi_bar_0) < self.newton_tol:
|
|
266
|
+
print("Interior solution found (sigma = 0 or -lambda_min)")
|
|
267
|
+
return sigma, 0
|
|
268
|
+
|
|
269
|
+
# Need to solve for positive sigma if phi_bar_0 < 0
|
|
270
|
+
if phi_bar_0 < 0:
|
|
271
|
+
# Initialize with a better starting guess
|
|
272
|
+
sigma_hat = max(np.max(np.abs(g_ll) / delta - self.Lambda_1), 0.0)
|
|
273
|
+
sigma = max(sigma, sigma_hat)
|
|
274
|
+
|
|
275
|
+
print(f"Boundary case: starting Newton from sigma = {sigma:.6e}")
|
|
276
|
+
|
|
277
|
+
# Newton iterations with backtracking line search
|
|
278
|
+
for iteration in range(self.newton_max_iter):
|
|
279
|
+
phi_bar = phi_bar_func(sigma)
|
|
280
|
+
|
|
281
|
+
# Check convergence
|
|
282
|
+
if abs(phi_bar) < self.newton_tol:
|
|
283
|
+
print(f"Newton converged in {iteration + 1} iterations, sigma = {sigma:.6e}")
|
|
284
|
+
return sigma, iteration + 1
|
|
285
|
+
|
|
286
|
+
# Compute Newton direction
|
|
287
|
+
phi_bar_prime = phi_bar_prime_func(sigma)
|
|
288
|
+
|
|
289
|
+
# Check for zero derivative (should not happen in practice)
|
|
290
|
+
if abs(phi_bar_prime) < 1e-15:
|
|
291
|
+
print(f"Warning: phi_bar_prime too small ({phi_bar_prime:.4e}), stopping Newton")
|
|
292
|
+
break
|
|
293
|
+
|
|
294
|
+
# Newton step
|
|
295
|
+
delta_sigma = -phi_bar / phi_bar_prime
|
|
296
|
+
|
|
297
|
+
# Backtracking line search to ensure progress
|
|
298
|
+
alpha = 1.0
|
|
299
|
+
sigma_new = sigma + alpha * delta_sigma
|
|
300
|
+
|
|
301
|
+
# Ensure sigma stays non-negative
|
|
302
|
+
while sigma_new < 0:
|
|
303
|
+
alpha *= 0.5
|
|
304
|
+
sigma_new = sigma + alpha * delta_sigma
|
|
305
|
+
if alpha < self.newton_alpha_min:
|
|
306
|
+
sigma_new = sigma * 0.5
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
# Simple backtracking: ensure |phi_bar| decreases
|
|
310
|
+
phi_bar_new = phi_bar_func(sigma_new)
|
|
311
|
+
backtrack_count = 0
|
|
312
|
+
max_backtrack = 10
|
|
313
|
+
|
|
314
|
+
while abs(phi_bar_new) > abs(phi_bar) and backtrack_count < max_backtrack:
|
|
315
|
+
alpha *= 0.5
|
|
316
|
+
if alpha < self.newton_alpha_min:
|
|
317
|
+
# Can't make progress, accept current sigma
|
|
318
|
+
print(f"Backtracking failed, accepting sigma = {sigma:.6e}")
|
|
319
|
+
return sigma, iteration + 1
|
|
320
|
+
|
|
321
|
+
sigma_new = max(0.0, sigma + alpha * delta_sigma)
|
|
322
|
+
phi_bar_new = phi_bar_func(sigma_new)
|
|
323
|
+
backtrack_count += 1
|
|
324
|
+
|
|
325
|
+
if backtrack_count > 0:
|
|
326
|
+
print(f" Iter {iteration + 1}: backtracked {backtrack_count} times, alpha = {alpha:.4f}")
|
|
327
|
+
|
|
328
|
+
# Update sigma
|
|
329
|
+
sigma = sigma_new
|
|
330
|
+
|
|
331
|
+
if iteration % 10 == 0 or iteration == self.newton_max_iter - 1:
|
|
332
|
+
print(f" Iter {iteration + 1}: sigma = {sigma:.6e}, phi_bar = {phi_bar:.6e}, alpha = {alpha:.4f}")
|
|
333
|
+
|
|
334
|
+
print(f"Newton reached max iterations ({self.newton_max_iter}), sigma = {sigma:.6e}")
|
|
335
|
+
return sigma, self.newton_max_iter
|
|
336
|
+
|
|
337
|
+
elif self.lambda_min < 0:
|
|
338
|
+
# Hard case: negative curvature, set sigma to make Hessian PSD
|
|
339
|
+
sigma_star = -self.lambda_min
|
|
340
|
+
print(f"Hard case (negative curvature), using sigma = {sigma_star:.6e}")
|
|
341
|
+
return sigma_star, 0
|
|
342
|
+
else:
|
|
343
|
+
# Interior solution
|
|
344
|
+
print("Interior solution (no constraint active)")
|
|
345
|
+
return 0.0, 0
|
|
346
|
+
|
|
347
|
+
def compute_lbfgs_tr_step(self, gradient, delta):
|
|
348
|
+
"""Compute trust region step using L-BFGS approximation.
|
|
349
|
+
|
|
350
|
+
Parameters:
|
|
351
|
+
----------
|
|
352
|
+
gradient : ndarray
|
|
353
|
+
Current gradient vector
|
|
354
|
+
delta : float
|
|
355
|
+
Current trust region radius
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
-------
|
|
359
|
+
ndarray
|
|
360
|
+
Step vector satisfying the trust region constraint
|
|
361
|
+
"""
|
|
362
|
+
n = gradient.size
|
|
363
|
+
g = gradient.flatten()
|
|
364
|
+
|
|
365
|
+
# If we have no history yet, just return scaled negative gradient
|
|
366
|
+
if len(self.s) == 0:
|
|
367
|
+
direction = -g
|
|
368
|
+
step_length = norm(direction)
|
|
369
|
+
if step_length > delta:
|
|
370
|
+
direction = direction * (delta / step_length)
|
|
371
|
+
# Initialize P_ll and Lambda_1 as None to indicate we're using steepest descent
|
|
372
|
+
self.P_ll = None
|
|
373
|
+
self.Lambda_1 = None
|
|
374
|
+
self.tr_subproblem_solved = False
|
|
375
|
+
return direction.reshape(gradient.shape)
|
|
376
|
+
|
|
377
|
+
# Create S and Y matrices from history
|
|
378
|
+
try:
|
|
379
|
+
# Stack vectors as columns
|
|
380
|
+
S_matrix = np.column_stack(self.s)
|
|
381
|
+
Y_matrix = np.column_stack(self.y)
|
|
382
|
+
|
|
383
|
+
# Construct Psi matrix
|
|
384
|
+
Psi = np.hstack((self.gamma * S_matrix, Y_matrix))
|
|
385
|
+
|
|
386
|
+
# Compute S^T Y and related matrices
|
|
387
|
+
S_T_Y = np.dot(S_matrix.T, Y_matrix)
|
|
388
|
+
L = np.tril(S_T_Y, k=-1)
|
|
389
|
+
D = np.diag(np.diag(S_T_Y))
|
|
390
|
+
|
|
391
|
+
# Construct the M matrix
|
|
392
|
+
M_block = np.block([
|
|
393
|
+
[self.gamma * np.dot(S_matrix.T, S_matrix), L],
|
|
394
|
+
[L.T, -D]
|
|
395
|
+
])
|
|
396
|
+
|
|
397
|
+
# Check if M is well-conditioned
|
|
398
|
+
try:
|
|
399
|
+
M = -inv(M_block)
|
|
400
|
+
except np.linalg.LinAlgError:
|
|
401
|
+
print("Warning: M matrix is singular, using pseudoinverse")
|
|
402
|
+
M = -pinv(M_block)
|
|
403
|
+
|
|
404
|
+
# QR factorization
|
|
405
|
+
Q, R = qr(Psi, mode='reduced')
|
|
406
|
+
|
|
407
|
+
# Check if R is invertible and handle accordingly
|
|
408
|
+
try:
|
|
409
|
+
# Try to compute eigendecomposition
|
|
410
|
+
R_inv = pinv(R) # Use pseudoinverse instead of inv for robustness
|
|
411
|
+
eigen_decomp = np.dot(np.dot(R, M), R.T)
|
|
412
|
+
eigen_values, eigen_vectors = eig(eigen_decomp)
|
|
413
|
+
|
|
414
|
+
# Sort eigenvalues and eigenvectors
|
|
415
|
+
idx = eigen_values.argsort()
|
|
416
|
+
eigen_values_sorted = eigen_values[idx].real
|
|
417
|
+
eigen_vectors_sorted = eigen_vectors[:, idx].real
|
|
418
|
+
|
|
419
|
+
# Store for later use
|
|
420
|
+
self.Lambda_1 = self.gamma + eigen_values_sorted
|
|
421
|
+
self.lambda_min = min(np.min(self.Lambda_1), self.gamma)
|
|
422
|
+
|
|
423
|
+
# Compute P_ll using the pseudoinverse for robustness
|
|
424
|
+
self.P_ll = np.dot(Psi, np.dot(R_inv, eigen_vectors_sorted))
|
|
425
|
+
|
|
426
|
+
g_ll = np.dot(self.P_ll.T, g)
|
|
427
|
+
g_NL_norm_squared = max(0, norm(g)**2 - norm(g_ll)**2)
|
|
428
|
+
g_NL_norm = np.sqrt(g_NL_norm_squared)
|
|
429
|
+
|
|
430
|
+
# Solve trust region subproblem using improved Newton method
|
|
431
|
+
sigma_star, newton_iters = self.solve_trust_region_newton(g_ll, g_NL_norm, delta)
|
|
432
|
+
|
|
433
|
+
# Compute trust region step
|
|
434
|
+
tau_star = self.gamma + sigma_star
|
|
435
|
+
|
|
436
|
+
# Use pseudoinverse for robustness
|
|
437
|
+
p_star = -1/tau_star * (g - np.dot(Psi, np.dot(pinv(tau_star * inv(M) + np.dot(Psi.T, Psi)), np.dot(Psi.T, g))))
|
|
438
|
+
|
|
439
|
+
# Verify step is within trust region
|
|
440
|
+
step_norm = norm(p_star)
|
|
441
|
+
if step_norm > delta * (1 + 1e-6): # Allow slight numerical error
|
|
442
|
+
print(f"Warning: Step length ({step_norm:.6e}) exceeds trust region radius ({delta:.6e})")
|
|
443
|
+
p_star = p_star * (delta / step_norm)
|
|
444
|
+
print(f"Rescaled step to length {norm(p_star):.6e}")
|
|
445
|
+
|
|
446
|
+
self.tr_subproblem_solved = True
|
|
447
|
+
print(f"Trust region step computed: ||p|| = {norm(p_star):.6e}, sigma = {sigma_star:.6e}")
|
|
448
|
+
return p_star.reshape(gradient.shape)
|
|
449
|
+
|
|
450
|
+
except (np.linalg.LinAlgError, ValueError) as e:
|
|
451
|
+
print(f"Error in L-BFGS trust region calculation: {e}")
|
|
452
|
+
print("Falling back to steepest descent")
|
|
453
|
+
|
|
454
|
+
except (ValueError, np.linalg.LinAlgError) as e:
|
|
455
|
+
print(f"Error constructing L-BFGS matrices: {e}")
|
|
456
|
+
print("Falling back to steepest descent")
|
|
457
|
+
|
|
458
|
+
# Fallback to steepest descent if any errors occur
|
|
459
|
+
direction = -g
|
|
460
|
+
step_length = norm(direction)
|
|
461
|
+
if step_length > delta:
|
|
462
|
+
direction = direction * (delta / step_length)
|
|
463
|
+
|
|
464
|
+
# Reset internal variables
|
|
465
|
+
self.P_ll = None
|
|
466
|
+
self.Lambda_1 = None
|
|
467
|
+
self.tr_subproblem_solved = False
|
|
468
|
+
|
|
469
|
+
return direction.reshape(gradient.shape)
|
|
470
|
+
|
|
471
|
+
def compute_reduction_ratio(self, g, p, actual_reduction):
|
|
472
|
+
"""Compute the ratio between actual and predicted reduction.
|
|
473
|
+
|
|
474
|
+
Parameters:
|
|
475
|
+
----------
|
|
476
|
+
g : ndarray
|
|
477
|
+
Gradient at current point
|
|
478
|
+
p : ndarray
|
|
479
|
+
Step vector
|
|
480
|
+
actual_reduction : float
|
|
481
|
+
Actual reduction in function value f(x) - f(x+p)
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
-------
|
|
485
|
+
float
|
|
486
|
+
Reduction ratio: actual reduction / predicted reduction
|
|
487
|
+
"""
|
|
488
|
+
# Calculate predicted reduction from quadratic model
|
|
489
|
+
p_flat = p.flatten()
|
|
490
|
+
g_flat = g.flatten()
|
|
491
|
+
|
|
492
|
+
# Basic predicted reduction (first-order term)
|
|
493
|
+
linear_reduction = np.dot(g_flat, p_flat)
|
|
494
|
+
|
|
495
|
+
if len(self.s) > 0 and self.tr_subproblem_solved and self.P_ll is not None and self.Lambda_1 is not None:
|
|
496
|
+
try:
|
|
497
|
+
# Use the built L-BFGS model
|
|
498
|
+
# Compute parallel component
|
|
499
|
+
p_ll = np.dot(self.P_ll.T, p_flat)
|
|
500
|
+
p_NL_norm_squared = max(0, norm(p_flat)**2 - norm(p_ll)**2)
|
|
501
|
+
p_NL_norm = np.sqrt(p_NL_norm_squared)
|
|
502
|
+
|
|
503
|
+
# Compute p^T B p (p^T times Hessian approximation times p)
|
|
504
|
+
p_T_B_p = np.sum(self.Lambda_1 * p_ll**2) + self.gamma * p_NL_norm**2
|
|
505
|
+
|
|
506
|
+
# Predicted reduction: -g^T p - 0.5 p^T B p
|
|
507
|
+
pred_reduction = -(linear_reduction + 0.5 * p_T_B_p)
|
|
508
|
+
except Exception as e:
|
|
509
|
+
print(f"Error computing quadratic model: {e}")
|
|
510
|
+
# Fallback to linear model
|
|
511
|
+
pred_reduction = -linear_reduction
|
|
512
|
+
else:
|
|
513
|
+
# For first iteration or when subproblem not fully solved, use simple model
|
|
514
|
+
pred_reduction = -linear_reduction
|
|
515
|
+
|
|
516
|
+
# Avoid division by zero or very small numbers
|
|
517
|
+
if abs(pred_reduction) < 1e-10:
|
|
518
|
+
print("Warning: Predicted reduction is near zero")
|
|
519
|
+
return 0.0
|
|
520
|
+
|
|
521
|
+
ratio = actual_reduction / pred_reduction
|
|
522
|
+
print(f"Actual reduction: {actual_reduction:.6e}, "
|
|
523
|
+
f"Predicted reduction: {pred_reduction:.6e}, "
|
|
524
|
+
f"Ratio: {ratio:.4f}")
|
|
525
|
+
|
|
526
|
+
return ratio
|
|
527
|
+
|
|
528
|
+
def determine_step(self, dr):
|
|
529
|
+
"""Determine step to take according to maxstep.
|
|
530
|
+
|
|
531
|
+
Parameters:
|
|
532
|
+
----------
|
|
533
|
+
dr : ndarray
|
|
534
|
+
Step vector
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
-------
|
|
538
|
+
ndarray
|
|
539
|
+
Step vector constrained by maxstep if necessary
|
|
540
|
+
"""
|
|
541
|
+
if self.config.get("maxstep") is None:
|
|
542
|
+
return dr
|
|
543
|
+
|
|
544
|
+
# Get maxstep from config
|
|
545
|
+
maxstep = self.config.get("maxstep")
|
|
546
|
+
|
|
547
|
+
# Calculate step lengths
|
|
548
|
+
dr_reshaped = dr.reshape(-1, 3) if dr.size % 3 == 0 else dr.reshape(-1, dr.size)
|
|
549
|
+
steplengths = np.sqrt((dr_reshaped**2).sum(axis=1))
|
|
550
|
+
longest_step = np.max(steplengths)
|
|
551
|
+
|
|
552
|
+
# Scale step if necessary
|
|
553
|
+
if longest_step > maxstep:
|
|
554
|
+
dr = dr * (maxstep / longest_step)
|
|
555
|
+
print(f"Step constrained by maxstep={maxstep}")
|
|
556
|
+
|
|
557
|
+
return dr
|
|
558
|
+
|
|
559
|
+
def trust_region_step(self, g, B_e, pre_B_e, actual_reduction=None):
|
|
560
|
+
"""Perform a trust region optimization step.
|
|
561
|
+
|
|
562
|
+
Parameters:
|
|
563
|
+
----------
|
|
564
|
+
g : ndarray
|
|
565
|
+
Current gradient
|
|
566
|
+
B_e : float
|
|
567
|
+
Current energy/function value
|
|
568
|
+
pre_B_e : float
|
|
569
|
+
Previous energy/function value
|
|
570
|
+
actual_reduction : float, optional
|
|
571
|
+
Actual reduction in function value
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
-------
|
|
575
|
+
ndarray
|
|
576
|
+
Step vector
|
|
577
|
+
"""
|
|
578
|
+
print("Trust region step calculation")
|
|
579
|
+
|
|
580
|
+
# First iteration or if actual_reduction is None (before energy evaluation)
|
|
581
|
+
if self.Initialization or actual_reduction is None:
|
|
582
|
+
if self.Initialization:
|
|
583
|
+
self.Initialization = False
|
|
584
|
+
print("First iteration - using initial trust region radius")
|
|
585
|
+
|
|
586
|
+
# Compute step using current trust region radius
|
|
587
|
+
p = self.compute_lbfgs_tr_step(g, self.delta_tr)
|
|
588
|
+
self.prev_move_vector = p
|
|
589
|
+
return p
|
|
590
|
+
|
|
591
|
+
# Compute reduction ratio
|
|
592
|
+
reduction_ratio = self.compute_reduction_ratio(g, self.prev_move_vector, actual_reduction)
|
|
593
|
+
|
|
594
|
+
# Update trust region radius based on reduction ratio
|
|
595
|
+
if reduction_ratio < 0.25:
|
|
596
|
+
self.delta_tr = max(0.25 * self.delta_tr, self.delta_min) # Ensure we don't go below min radius
|
|
597
|
+
print(f"Shrinking trust region radius to {self.delta_tr:.4f}")
|
|
598
|
+
elif reduction_ratio > 0.75 and np.isclose(norm(self.prev_move_vector.flatten()), self.delta_tr, rtol=1e-2):
|
|
599
|
+
self.delta_tr = min(2.0 * self.delta_tr, self.delta_hat) # Ensure we don't exceed max radius
|
|
600
|
+
print(f"Expanding trust region radius to {self.delta_tr:.4f}")
|
|
601
|
+
else:
|
|
602
|
+
print(f"Maintaining trust region radius at {self.delta_tr:.4f}")
|
|
603
|
+
|
|
604
|
+
# Compute next step using updated trust region radius
|
|
605
|
+
p = self.compute_lbfgs_tr_step(g, self.delta_tr)
|
|
606
|
+
|
|
607
|
+
# Store step for later reduction ratio calculation
|
|
608
|
+
self.prev_move_vector = p
|
|
609
|
+
|
|
610
|
+
return p
|
|
611
|
+
|
|
612
|
+
def run(self, geom_num_list, B_g, pre_B_g, pre_geom, B_e, pre_B_e, pre_move_vector, initial_geom_num_list, g, pre_g):
|
|
613
|
+
"""Run a single optimization step.
|
|
614
|
+
|
|
615
|
+
Parameters:
|
|
616
|
+
----------
|
|
617
|
+
geom_num_list : ndarray
|
|
618
|
+
Current geometry/position
|
|
619
|
+
B_g : ndarray
|
|
620
|
+
Current gradient
|
|
621
|
+
pre_B_g : ndarray
|
|
622
|
+
Previous gradient
|
|
623
|
+
pre_geom : ndarray
|
|
624
|
+
Previous geometry/position
|
|
625
|
+
B_e : float
|
|
626
|
+
Current energy/function value
|
|
627
|
+
pre_B_e : float
|
|
628
|
+
Previous energy/function value
|
|
629
|
+
pre_move_vector : ndarray
|
|
630
|
+
Previous step vector
|
|
631
|
+
initial_geom_num_list : ndarray
|
|
632
|
+
Initial geometry
|
|
633
|
+
g : ndarray
|
|
634
|
+
Current gradient (unbranched)
|
|
635
|
+
pre_g : ndarray
|
|
636
|
+
Previous gradient (unbranched)
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
-------
|
|
640
|
+
ndarray
|
|
641
|
+
Step vector for the next iteration
|
|
642
|
+
"""
|
|
643
|
+
print(f"\n{'='*50}\nIteration {self.iter}\n{'='*50}")
|
|
644
|
+
|
|
645
|
+
# Compute actual energy reduction if not first iteration
|
|
646
|
+
actual_reduction = None
|
|
647
|
+
if not self.Initialization and self.iter > 0:
|
|
648
|
+
actual_reduction = pre_B_e - B_e
|
|
649
|
+
print(f"Energy change: {actual_reduction:.6e}")
|
|
650
|
+
|
|
651
|
+
# Compute trust region step
|
|
652
|
+
try:
|
|
653
|
+
move_vector = self.trust_region_step(B_g, B_e, pre_B_e, actual_reduction)
|
|
654
|
+
except Exception as e:
|
|
655
|
+
print(f"Error in trust region step: {e}")
|
|
656
|
+
print("Falling back to steepest descent")
|
|
657
|
+
# Fallback to simple steepest descent
|
|
658
|
+
move_vector = -B_g
|
|
659
|
+
if norm(move_vector) > self.delta_tr:
|
|
660
|
+
move_vector = move_vector * (self.delta_tr / norm(move_vector))
|
|
661
|
+
|
|
662
|
+
# Apply maxstep constraint if needed
|
|
663
|
+
move_vector = self.determine_step(move_vector)
|
|
664
|
+
|
|
665
|
+
# If this is not the first iteration, update L-BFGS vectors
|
|
666
|
+
if self.iter > 0:
|
|
667
|
+
# Calculate displacement and gradient difference
|
|
668
|
+
delta_grad = (g - pre_g).reshape(len(geom_num_list), 1)
|
|
669
|
+
displacement = (geom_num_list - pre_geom).reshape(len(geom_num_list), 1)
|
|
670
|
+
|
|
671
|
+
# Update L-BFGS vectors with improved curvature checking
|
|
672
|
+
update_success = self.update_vectors(displacement, delta_grad)
|
|
673
|
+
|
|
674
|
+
if not update_success:
|
|
675
|
+
print("Warning: L-BFGS update skipped due to curvature condition failure")
|
|
676
|
+
|
|
677
|
+
self.iter += 1
|
|
678
|
+
return -move_vector
|