MultiOptPy 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multioptpy/Calculator/__init__.py +0 -0
- multioptpy/Calculator/ase_calculation_tools.py +424 -0
- multioptpy/Calculator/ase_tools/__init__.py +0 -0
- multioptpy/Calculator/ase_tools/fairchem.py +28 -0
- multioptpy/Calculator/ase_tools/gamess.py +19 -0
- multioptpy/Calculator/ase_tools/gaussian.py +165 -0
- multioptpy/Calculator/ase_tools/mace.py +28 -0
- multioptpy/Calculator/ase_tools/mopac.py +19 -0
- multioptpy/Calculator/ase_tools/nwchem.py +31 -0
- multioptpy/Calculator/ase_tools/orca.py +22 -0
- multioptpy/Calculator/ase_tools/pygfn0.py +37 -0
- multioptpy/Calculator/dxtb_calculation_tools.py +344 -0
- multioptpy/Calculator/emt_calculation_tools.py +458 -0
- multioptpy/Calculator/gpaw_calculation_tools.py +183 -0
- multioptpy/Calculator/lj_calculation_tools.py +314 -0
- multioptpy/Calculator/psi4_calculation_tools.py +334 -0
- multioptpy/Calculator/pwscf_calculation_tools.py +189 -0
- multioptpy/Calculator/pyscf_calculation_tools.py +327 -0
- multioptpy/Calculator/sqm1_calculation_tools.py +611 -0
- multioptpy/Calculator/sqm2_calculation_tools.py +376 -0
- multioptpy/Calculator/tblite_calculation_tools.py +352 -0
- multioptpy/Calculator/tersoff_calculation_tools.py +818 -0
- multioptpy/Constraint/__init__.py +0 -0
- multioptpy/Constraint/constraint_condition.py +834 -0
- multioptpy/Coordinate/__init__.py +0 -0
- multioptpy/Coordinate/polar_coordinate.py +199 -0
- multioptpy/Coordinate/redundant_coordinate.py +638 -0
- multioptpy/IRC/__init__.py +0 -0
- multioptpy/IRC/converge_criteria.py +28 -0
- multioptpy/IRC/dvv.py +544 -0
- multioptpy/IRC/euler.py +439 -0
- multioptpy/IRC/hpc.py +564 -0
- multioptpy/IRC/lqa.py +540 -0
- multioptpy/IRC/modekill.py +662 -0
- multioptpy/IRC/rk4.py +579 -0
- multioptpy/Interpolation/__init__.py +0 -0
- multioptpy/Interpolation/adaptive_interpolation.py +283 -0
- multioptpy/Interpolation/binomial_interpolation.py +179 -0
- multioptpy/Interpolation/geodesic_interpolation.py +785 -0
- multioptpy/Interpolation/interpolation.py +156 -0
- multioptpy/Interpolation/linear_interpolation.py +473 -0
- multioptpy/Interpolation/savitzky_golay_interpolation.py +252 -0
- multioptpy/Interpolation/spline_interpolation.py +353 -0
- multioptpy/MD/__init__.py +0 -0
- multioptpy/MD/thermostat.py +185 -0
- multioptpy/MEP/__init__.py +0 -0
- multioptpy/MEP/pathopt_bneb_force.py +443 -0
- multioptpy/MEP/pathopt_dmf_force.py +448 -0
- multioptpy/MEP/pathopt_dneb_force.py +130 -0
- multioptpy/MEP/pathopt_ewbneb_force.py +207 -0
- multioptpy/MEP/pathopt_gpneb_force.py +512 -0
- multioptpy/MEP/pathopt_lup_force.py +113 -0
- multioptpy/MEP/pathopt_neb_force.py +225 -0
- multioptpy/MEP/pathopt_nesb_force.py +205 -0
- multioptpy/MEP/pathopt_om_force.py +153 -0
- multioptpy/MEP/pathopt_qsm_force.py +174 -0
- multioptpy/MEP/pathopt_qsmv2_force.py +304 -0
- multioptpy/ModelFunction/__init__.py +7 -0
- multioptpy/ModelFunction/avoiding_model_function.py +29 -0
- multioptpy/ModelFunction/binary_image_ts_search_model_function.py +47 -0
- multioptpy/ModelFunction/conical_model_function.py +26 -0
- multioptpy/ModelFunction/opt_meci.py +50 -0
- multioptpy/ModelFunction/opt_mesx.py +47 -0
- multioptpy/ModelFunction/opt_mesx_2.py +49 -0
- multioptpy/ModelFunction/seam_model_function.py +27 -0
- multioptpy/ModelHessian/__init__.py +0 -0
- multioptpy/ModelHessian/approx_hessian.py +147 -0
- multioptpy/ModelHessian/calc_params.py +227 -0
- multioptpy/ModelHessian/fischer.py +236 -0
- multioptpy/ModelHessian/fischerd3.py +360 -0
- multioptpy/ModelHessian/fischerd4.py +398 -0
- multioptpy/ModelHessian/gfn0xtb.py +633 -0
- multioptpy/ModelHessian/gfnff.py +709 -0
- multioptpy/ModelHessian/lindh.py +165 -0
- multioptpy/ModelHessian/lindh2007d2.py +707 -0
- multioptpy/ModelHessian/lindh2007d3.py +822 -0
- multioptpy/ModelHessian/lindh2007d4.py +1030 -0
- multioptpy/ModelHessian/morse.py +106 -0
- multioptpy/ModelHessian/schlegel.py +144 -0
- multioptpy/ModelHessian/schlegeld3.py +322 -0
- multioptpy/ModelHessian/schlegeld4.py +559 -0
- multioptpy/ModelHessian/shortrange.py +346 -0
- multioptpy/ModelHessian/swartd2.py +496 -0
- multioptpy/ModelHessian/swartd3.py +706 -0
- multioptpy/ModelHessian/swartd4.py +918 -0
- multioptpy/ModelHessian/tshess.py +40 -0
- multioptpy/Optimizer/QHAdam.py +61 -0
- multioptpy/Optimizer/__init__.py +0 -0
- multioptpy/Optimizer/abc_fire.py +83 -0
- multioptpy/Optimizer/adabelief.py +58 -0
- multioptpy/Optimizer/adabound.py +68 -0
- multioptpy/Optimizer/adadelta.py +65 -0
- multioptpy/Optimizer/adaderivative.py +56 -0
- multioptpy/Optimizer/adadiff.py +68 -0
- multioptpy/Optimizer/adafactor.py +70 -0
- multioptpy/Optimizer/adam.py +65 -0
- multioptpy/Optimizer/adamax.py +62 -0
- multioptpy/Optimizer/adamod.py +83 -0
- multioptpy/Optimizer/adamw.py +65 -0
- multioptpy/Optimizer/adiis.py +523 -0
- multioptpy/Optimizer/afire_neb.py +282 -0
- multioptpy/Optimizer/block_hessian_update.py +709 -0
- multioptpy/Optimizer/c2diis.py +491 -0
- multioptpy/Optimizer/component_wise_scaling.py +405 -0
- multioptpy/Optimizer/conjugate_gradient.py +82 -0
- multioptpy/Optimizer/conjugate_gradient_neb.py +345 -0
- multioptpy/Optimizer/coordinate_locking.py +405 -0
- multioptpy/Optimizer/dic_rsirfo.py +1015 -0
- multioptpy/Optimizer/ediis.py +417 -0
- multioptpy/Optimizer/eve.py +76 -0
- multioptpy/Optimizer/fastadabelief.py +61 -0
- multioptpy/Optimizer/fire.py +77 -0
- multioptpy/Optimizer/fire2.py +249 -0
- multioptpy/Optimizer/fire_neb.py +92 -0
- multioptpy/Optimizer/gan_step.py +486 -0
- multioptpy/Optimizer/gdiis.py +609 -0
- multioptpy/Optimizer/gediis.py +203 -0
- multioptpy/Optimizer/geodesic_step.py +433 -0
- multioptpy/Optimizer/gpmin.py +633 -0
- multioptpy/Optimizer/gpr_step.py +364 -0
- multioptpy/Optimizer/gradientdescent.py +78 -0
- multioptpy/Optimizer/gradientdescent_neb.py +52 -0
- multioptpy/Optimizer/hessian_update.py +433 -0
- multioptpy/Optimizer/hybrid_rfo.py +998 -0
- multioptpy/Optimizer/kdiis.py +625 -0
- multioptpy/Optimizer/lars.py +21 -0
- multioptpy/Optimizer/lbfgs.py +253 -0
- multioptpy/Optimizer/lbfgs_neb.py +355 -0
- multioptpy/Optimizer/linesearch.py +236 -0
- multioptpy/Optimizer/lookahead.py +40 -0
- multioptpy/Optimizer/nadam.py +64 -0
- multioptpy/Optimizer/newton.py +200 -0
- multioptpy/Optimizer/prodigy.py +70 -0
- multioptpy/Optimizer/purtubation.py +16 -0
- multioptpy/Optimizer/quickmin_neb.py +245 -0
- multioptpy/Optimizer/radam.py +75 -0
- multioptpy/Optimizer/rfo_neb.py +302 -0
- multioptpy/Optimizer/ric_rfo.py +842 -0
- multioptpy/Optimizer/rl_step.py +627 -0
- multioptpy/Optimizer/rmspropgrave.py +65 -0
- multioptpy/Optimizer/rsirfo.py +1647 -0
- multioptpy/Optimizer/rsprfo.py +1056 -0
- multioptpy/Optimizer/sadam.py +60 -0
- multioptpy/Optimizer/samsgrad.py +63 -0
- multioptpy/Optimizer/tr_lbfgs.py +678 -0
- multioptpy/Optimizer/trim.py +273 -0
- multioptpy/Optimizer/trust_radius.py +207 -0
- multioptpy/Optimizer/trust_radius_neb.py +121 -0
- multioptpy/Optimizer/yogi.py +60 -0
- multioptpy/OtherMethod/__init__.py +0 -0
- multioptpy/OtherMethod/addf.py +1150 -0
- multioptpy/OtherMethod/dimer.py +895 -0
- multioptpy/OtherMethod/elastic_image_pair.py +629 -0
- multioptpy/OtherMethod/modelfunction.py +456 -0
- multioptpy/OtherMethod/newton_traj.py +454 -0
- multioptpy/OtherMethod/twopshs.py +1095 -0
- multioptpy/PESAnalyzer/__init__.py +0 -0
- multioptpy/PESAnalyzer/calc_irc_curvature.py +125 -0
- multioptpy/PESAnalyzer/cmds_analysis.py +152 -0
- multioptpy/PESAnalyzer/koopman_analysis.py +268 -0
- multioptpy/PESAnalyzer/pca_analysis.py +314 -0
- multioptpy/Parameters/__init__.py +0 -0
- multioptpy/Parameters/atomic_mass.py +20 -0
- multioptpy/Parameters/atomic_number.py +22 -0
- multioptpy/Parameters/covalent_radii.py +44 -0
- multioptpy/Parameters/d2.py +61 -0
- multioptpy/Parameters/d3.py +63 -0
- multioptpy/Parameters/d4.py +103 -0
- multioptpy/Parameters/dreiding.py +34 -0
- multioptpy/Parameters/gfn0xtb_param.py +137 -0
- multioptpy/Parameters/gfnff_param.py +315 -0
- multioptpy/Parameters/gnb.py +104 -0
- multioptpy/Parameters/parameter.py +22 -0
- multioptpy/Parameters/uff.py +72 -0
- multioptpy/Parameters/unit_values.py +20 -0
- multioptpy/Potential/AFIR_potential.py +55 -0
- multioptpy/Potential/LJ_repulsive_potential.py +345 -0
- multioptpy/Potential/__init__.py +0 -0
- multioptpy/Potential/anharmonic_keep_potential.py +28 -0
- multioptpy/Potential/asym_elllipsoidal_potential.py +718 -0
- multioptpy/Potential/electrostatic_potential.py +69 -0
- multioptpy/Potential/flux_potential.py +30 -0
- multioptpy/Potential/gaussian_potential.py +101 -0
- multioptpy/Potential/idpp.py +516 -0
- multioptpy/Potential/keep_angle_potential.py +146 -0
- multioptpy/Potential/keep_dihedral_angle_potential.py +105 -0
- multioptpy/Potential/keep_outofplain_angle_potential.py +70 -0
- multioptpy/Potential/keep_potential.py +99 -0
- multioptpy/Potential/mechano_force_potential.py +74 -0
- multioptpy/Potential/nanoreactor_potential.py +52 -0
- multioptpy/Potential/potential.py +896 -0
- multioptpy/Potential/spacer_model_potential.py +221 -0
- multioptpy/Potential/switching_potential.py +258 -0
- multioptpy/Potential/universal_potential.py +34 -0
- multioptpy/Potential/value_range_potential.py +36 -0
- multioptpy/Potential/void_point_potential.py +25 -0
- multioptpy/SQM/__init__.py +0 -0
- multioptpy/SQM/sqm1/__init__.py +0 -0
- multioptpy/SQM/sqm1/sqm1_core.py +1792 -0
- multioptpy/SQM/sqm2/__init__.py +0 -0
- multioptpy/SQM/sqm2/calc_tools.py +95 -0
- multioptpy/SQM/sqm2/sqm2_basis.py +850 -0
- multioptpy/SQM/sqm2/sqm2_bond.py +119 -0
- multioptpy/SQM/sqm2/sqm2_core.py +303 -0
- multioptpy/SQM/sqm2/sqm2_data.py +1229 -0
- multioptpy/SQM/sqm2/sqm2_disp.py +65 -0
- multioptpy/SQM/sqm2/sqm2_eeq.py +243 -0
- multioptpy/SQM/sqm2/sqm2_overlapint.py +704 -0
- multioptpy/SQM/sqm2/sqm2_qm.py +578 -0
- multioptpy/SQM/sqm2/sqm2_rep.py +66 -0
- multioptpy/SQM/sqm2/sqm2_srb.py +70 -0
- multioptpy/Thermo/__init__.py +0 -0
- multioptpy/Thermo/normal_mode_analyzer.py +865 -0
- multioptpy/Utils/__init__.py +0 -0
- multioptpy/Utils/bond_connectivity.py +264 -0
- multioptpy/Utils/calc_tools.py +884 -0
- multioptpy/Utils/oniom.py +96 -0
- multioptpy/Utils/pbc.py +48 -0
- multioptpy/Utils/riemann_curvature.py +208 -0
- multioptpy/Utils/symmetry_analyzer.py +482 -0
- multioptpy/Visualization/__init__.py +0 -0
- multioptpy/Visualization/visualization.py +156 -0
- multioptpy/WFAnalyzer/MO_analysis.py +104 -0
- multioptpy/WFAnalyzer/__init__.py +0 -0
- multioptpy/Wrapper/__init__.py +0 -0
- multioptpy/Wrapper/autots.py +1239 -0
- multioptpy/Wrapper/ieip_wrapper.py +93 -0
- multioptpy/Wrapper/md_wrapper.py +92 -0
- multioptpy/Wrapper/neb_wrapper.py +94 -0
- multioptpy/Wrapper/optimize_wrapper.py +76 -0
- multioptpy/__init__.py +5 -0
- multioptpy/entrypoints.py +916 -0
- multioptpy/fileio.py +660 -0
- multioptpy/ieip.py +340 -0
- multioptpy/interface.py +1086 -0
- multioptpy/irc.py +529 -0
- multioptpy/moleculardynamics.py +432 -0
- multioptpy/neb.py +1267 -0
- multioptpy/optimization.py +1553 -0
- multioptpy/optimizer.py +709 -0
- multioptpy-1.20.2.dist-info/METADATA +438 -0
- multioptpy-1.20.2.dist-info/RECORD +246 -0
- multioptpy-1.20.2.dist-info/WHEEL +5 -0
- multioptpy-1.20.2.dist-info/entry_points.txt +9 -0
- multioptpy-1.20.2.dist-info/licenses/LICENSE +674 -0
- multioptpy-1.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Block Hessian Update Class
|
|
5
|
+
|
|
6
|
+
ref: https://arxiv.org/pdf/1609.00318
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def symm(A):
|
|
10
|
+
return 0.5 * (A + A.T)
|
|
11
|
+
|
|
12
|
+
def safe_inv(A, reg=1e-10):
|
|
13
|
+
"""Invert A with small regularization fallback, then pinv."""
|
|
14
|
+
try:
|
|
15
|
+
return np.linalg.inv(A)
|
|
16
|
+
except np.linalg.LinAlgError:
|
|
17
|
+
Areg = A + reg * np.eye(A.shape[0])
|
|
18
|
+
try:
|
|
19
|
+
return np.linalg.inv(Areg)
|
|
20
|
+
except np.linalg.LinAlgError:
|
|
21
|
+
return np.linalg.pinv(Areg)
|
|
22
|
+
|
|
23
|
+
class BlockHessianUpdate:
|
|
24
|
+
def __init__(self, block_size=4, max_window=8, denom_threshold=1e-12, inv_reg=1e-10):
|
|
25
|
+
"""
|
|
26
|
+
block_size: number of stored steps to use when performing a block update
|
|
27
|
+
max_window: maximum history length to retain (>= block_size)
|
|
28
|
+
"""
|
|
29
|
+
assert max_window >= block_size
|
|
30
|
+
self.block_size = int(block_size)
|
|
31
|
+
self.max_window = int(max_window)
|
|
32
|
+
self.denom_threshold = denom_threshold
|
|
33
|
+
self.inv_reg = inv_reg
|
|
34
|
+
|
|
35
|
+
# history stored as lists of vectors (each vector shape (n,))
|
|
36
|
+
self.S_list = []
|
|
37
|
+
self.Y_list = []
|
|
38
|
+
|
|
39
|
+
# Default parameters for Double Damping
|
|
40
|
+
self.dd_mu1 = 0.2
|
|
41
|
+
self.dd_mu2 = 0.2
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def delete_old_data(self):
|
|
45
|
+
"""Drop the oldest history item (if any)."""
|
|
46
|
+
if self.S_list:
|
|
47
|
+
self.S_list.pop(0)
|
|
48
|
+
self.Y_list.pop(0)
|
|
49
|
+
|
|
50
|
+
def _push_history(self, s, y):
|
|
51
|
+
"""Append new step (s,y), maintain window."""
|
|
52
|
+
self.S_list.append(s.copy())
|
|
53
|
+
self.Y_list.append(y.copy())
|
|
54
|
+
if len(self.S_list) > self.max_window:
|
|
55
|
+
self.S_list.pop(0); self.Y_list.pop(0)
|
|
56
|
+
|
|
57
|
+
def _assemble_block(self, use_last_k=None):
|
|
58
|
+
"""Return S (n x q) and Y (n x q) matrices from most recent columns."""
|
|
59
|
+
if use_last_k is None:
|
|
60
|
+
use_last_k = min(self.block_size, len(self.S_list))
|
|
61
|
+
k = min(use_last_k, len(self.S_list))
|
|
62
|
+
if k == 0:
|
|
63
|
+
return None, None
|
|
64
|
+
# take last k entries
|
|
65
|
+
Scols = [self.S_list[-k + i] for i in range(k)]
|
|
66
|
+
Ycols = [self.Y_list[-k + i] for i in range(k)]
|
|
67
|
+
S = np.column_stack(Scols) # n x k
|
|
68
|
+
Y = np.column_stack(Ycols)
|
|
69
|
+
return S, Y
|
|
70
|
+
|
|
71
|
+
# -----------------------------------------------------------------
|
|
72
|
+
# --- Base Block Update Methods ---
|
|
73
|
+
# -----------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
def _block_BFGS_update(self, B, S, Y):
|
|
76
|
+
"""
|
|
77
|
+
B <- B - B S (S^T B S)^{-1} S^T B + Y (S^T Y)^{-1} Y^T
|
|
78
|
+
S,Y are n x q with columns as steps.
|
|
79
|
+
"""
|
|
80
|
+
if S is None or Y is None or S.shape[1] == 0:
|
|
81
|
+
return B.copy()
|
|
82
|
+
|
|
83
|
+
# filter near linear dependence in S by SVD (drop tiny singular values)
|
|
84
|
+
U, svals, Vt = np.linalg.svd(S, full_matrices=False)
|
|
85
|
+
keep = svals > 1e-8
|
|
86
|
+
if not np.any(keep):
|
|
87
|
+
return B.copy()
|
|
88
|
+
rank = np.sum(keep)
|
|
89
|
+
col_norms = np.linalg.norm(S, axis=0)
|
|
90
|
+
idx_sorted = np.argsort(-col_norms)
|
|
91
|
+
keep_idx = np.sort(idx_sorted[:rank])
|
|
92
|
+
Sf = S[:, keep_idx]
|
|
93
|
+
Yf = Y[:, keep_idx]
|
|
94
|
+
|
|
95
|
+
# Further filter columns based on curvature condition y^T s > threshold
|
|
96
|
+
keep_cols = []
|
|
97
|
+
for i in range(Sf.shape[1]):
|
|
98
|
+
s = Sf[:, i]
|
|
99
|
+
y = Yf[:, i]
|
|
100
|
+
denom = np.dot(y, s)
|
|
101
|
+
if denom <= self.denom_threshold:
|
|
102
|
+
continue
|
|
103
|
+
keep_cols.append(i)
|
|
104
|
+
|
|
105
|
+
if len(keep_cols) == 0:
|
|
106
|
+
return B.copy()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
M1 = np.dot(np.dot(Sf.T, B), Sf) # q x q
|
|
110
|
+
M2 = np.dot(Sf.T, Yf) # q x q
|
|
111
|
+
|
|
112
|
+
invM1 = safe_inv(M1, reg=self.inv_reg)
|
|
113
|
+
invM2 = safe_inv(M2, reg=self.inv_reg)
|
|
114
|
+
|
|
115
|
+
term1 = np.dot(np.dot(np.dot(B, Sf), invM1), np.dot(Sf.T, B))
|
|
116
|
+
term2 = np.dot(np.dot(Yf, invM2), Yf.T)
|
|
117
|
+
Bp = B - term1 + term2
|
|
118
|
+
return symm(Bp)
|
|
119
|
+
|
|
120
|
+
def _block_PSB_update(self, B, S, Y, denom_threshold=1e-8):
|
|
121
|
+
"""
|
|
122
|
+
Block PSB Hessian update. Applies single-step PSB for each column.
|
|
123
|
+
"""
|
|
124
|
+
if S is None or Y is None or S.shape[1] == 0:
|
|
125
|
+
return B.copy()
|
|
126
|
+
|
|
127
|
+
# SVD filtering for near linear dependence in S
|
|
128
|
+
_, svals, _ = np.linalg.svd(S, full_matrices=False)
|
|
129
|
+
keep = svals > denom_threshold
|
|
130
|
+
if not np.any(keep):
|
|
131
|
+
return B.copy()
|
|
132
|
+
|
|
133
|
+
rank = np.sum(keep)
|
|
134
|
+
col_norms = np.linalg.norm(S, axis=0)
|
|
135
|
+
idx_sorted = np.argsort(-col_norms)
|
|
136
|
+
keep_idx = np.sort(idx_sorted[:rank])
|
|
137
|
+
Sf = S[:, keep_idx]
|
|
138
|
+
Yf = Y[:, keep_idx]
|
|
139
|
+
|
|
140
|
+
# Calculate block_1 and denominator for each column
|
|
141
|
+
n, q = Sf.shape
|
|
142
|
+
delta_hess_total = np.zeros((n,n))
|
|
143
|
+
|
|
144
|
+
for i in range(q):
|
|
145
|
+
s = Sf[:, i:i+1] # column vector, shape (n, 1)
|
|
146
|
+
y = Yf[:, i:i+1]
|
|
147
|
+
block_1 = y - np.dot(B, s)
|
|
148
|
+
block_2_denominator = float(np.dot(s.T, s))
|
|
149
|
+
if np.abs(block_2_denominator) >= denom_threshold:
|
|
150
|
+
block_2 = np.dot(s, s.T) / (block_2_denominator ** 2)
|
|
151
|
+
delta_hess_P = (-np.dot(block_1.T, s) * block_2 +
|
|
152
|
+
(np.dot(block_1, s.T) + np.dot(s, block_1.T)) / block_2_denominator)
|
|
153
|
+
delta_hess_total += delta_hess_P
|
|
154
|
+
# else: delta_hess_P is zero, so do nothing
|
|
155
|
+
|
|
156
|
+
Bp = B + delta_hess_total
|
|
157
|
+
return Bp # Already symmetric if B is
|
|
158
|
+
|
|
159
|
+
def _block_SR1_update(self, B, S, Y):
|
|
160
|
+
"""
|
|
161
|
+
Block SR1 generalization: R = Y - B S
|
|
162
|
+
Delta = R (S^T R)^{-1} R^T
|
|
163
|
+
"""
|
|
164
|
+
if S is None or Y is None or S.shape[1] == 0:
|
|
165
|
+
return B.copy()
|
|
166
|
+
R = Y - np.dot(B, S)
|
|
167
|
+
M = np.dot(S.T, R)
|
|
168
|
+
invM = safe_inv(M, reg=self.inv_reg)
|
|
169
|
+
Delta = np.dot(np.dot(R, invM), R.T)
|
|
170
|
+
Bp = B + Delta
|
|
171
|
+
return symm(Bp)
|
|
172
|
+
|
|
173
|
+
def _block_CFD_SR1_update(self, B, S, Y):
|
|
174
|
+
"""
|
|
175
|
+
Block CFD-SR1 generalization: R = 2.0 * (Y - B S)
|
|
176
|
+
"""
|
|
177
|
+
if S is None or Y is None or S.shape[1] == 0:
|
|
178
|
+
return B.copy()
|
|
179
|
+
R = 2.0 * (Y - np.dot(B, S))
|
|
180
|
+
M = np.dot(S.T, R)
|
|
181
|
+
invM = safe_inv(M, reg=self.inv_reg)
|
|
182
|
+
Delta = np.dot(np.dot(R, invM), R.T)
|
|
183
|
+
Bp = B + Delta
|
|
184
|
+
return symm(Bp)
|
|
185
|
+
|
|
186
|
+
# -----------------------------------------------------------------
|
|
187
|
+
# --- Helper for Calculating Weights ---
|
|
188
|
+
# -----------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
def _get_individual_weights(self, B, S, Y, is_cfd=False, use_bofill_logic=False):
|
|
191
|
+
"""
|
|
192
|
+
Internal helper to calculate individual weights for mixing.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
is_cfd (bool): If True, use A = 2.0 * (y - Bs)
|
|
196
|
+
use_bofill_logic (bool):
|
|
197
|
+
If True (for Bofill/CFD-FSB), returns w_j = c_j
|
|
198
|
+
If False (for FSB), returns w_j = sqrt(c_j)
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
c_list (list of phi^2), w_list (list of mixing weights)
|
|
202
|
+
"""
|
|
203
|
+
if S is None or Y is None:
|
|
204
|
+
return [], []
|
|
205
|
+
|
|
206
|
+
q = S.shape[1]
|
|
207
|
+
c_list = [] # phi^2
|
|
208
|
+
w_list = [] # mixing weight (phi or phi^2)
|
|
209
|
+
|
|
210
|
+
for j in range(q):
|
|
211
|
+
s = S[:, j]
|
|
212
|
+
y = Y[:, j]
|
|
213
|
+
|
|
214
|
+
A = y - np.dot(B, s)
|
|
215
|
+
if is_cfd:
|
|
216
|
+
A = 2.0 * A
|
|
217
|
+
|
|
218
|
+
num = (np.dot(A.T, s)) ** 2
|
|
219
|
+
denom = (np.dot(A.T, A)) * (np.dot(s.T, s))
|
|
220
|
+
c = num / denom if np.abs(denom) > self.denom_threshold else 0.0
|
|
221
|
+
if np.isnan(c):
|
|
222
|
+
c = 0.0
|
|
223
|
+
c = float(max(0.0, min(1.0, c)))
|
|
224
|
+
c_list.append(c)
|
|
225
|
+
|
|
226
|
+
if use_bofill_logic:
|
|
227
|
+
w_list.append(c) # Use c (phi^2)
|
|
228
|
+
else:
|
|
229
|
+
w_list.append(np.sqrt(c)) # Use sqrt(c) (phi)
|
|
230
|
+
|
|
231
|
+
return c_list, w_list
|
|
232
|
+
|
|
233
|
+
# -----------------------------------------------------------------
|
|
234
|
+
# --- "Mean Weight" Mixed Methods ---
|
|
235
|
+
# -----------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
def _block_FSB_update(self, B, S, Y):
|
|
238
|
+
"""
|
|
239
|
+
Original Block-FSB: Mix block-SR1 and block-BFGS updates
|
|
240
|
+
using the *mean* of individual sqrt(c_j) weights.
|
|
241
|
+
"""
|
|
242
|
+
if S is None or Y is None:
|
|
243
|
+
return B.copy()
|
|
244
|
+
|
|
245
|
+
# Build block SR1 and block BFGS deltas
|
|
246
|
+
Delta_sr1 = self._block_SR1_update(B, S, Y) - B
|
|
247
|
+
Delta_bfgs = self._block_BFGS_update(B, S, Y) - B
|
|
248
|
+
|
|
249
|
+
# Get individual weights (w_j = sqrt(c_j) for FSB)
|
|
250
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
251
|
+
is_cfd=False, use_bofill_logic=False)
|
|
252
|
+
|
|
253
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
254
|
+
Bp = B + w_mean * Delta_sr1 + (1.0 - w_mean) * Delta_bfgs
|
|
255
|
+
return symm(Bp)
|
|
256
|
+
|
|
257
|
+
def _block_CFD_FSB_update(self, B, S, Y):
|
|
258
|
+
"""
|
|
259
|
+
Original Block-CFD-FSB: Mix block-CFD-SR1 and block-BFGS updates
|
|
260
|
+
using the *mean* of individual c_j weights.
|
|
261
|
+
"""
|
|
262
|
+
if S is None or Y is None:
|
|
263
|
+
return B.copy()
|
|
264
|
+
|
|
265
|
+
# Build block CFD_SR1 and block BFGS deltas
|
|
266
|
+
Delta_sr1 = self._block_CFD_SR1_update(B, S, Y) - B
|
|
267
|
+
Delta_bfgs = self._block_BFGS_update(B, S, Y) - B
|
|
268
|
+
|
|
269
|
+
# Get individual weights (w_j = c_j for CFD-FSB)
|
|
270
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
271
|
+
is_cfd=True, use_bofill_logic=True)
|
|
272
|
+
|
|
273
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
274
|
+
Bp = B + w_mean * Delta_sr1 + (1.0 - w_mean) * Delta_bfgs
|
|
275
|
+
return symm(Bp)
|
|
276
|
+
|
|
277
|
+
def _block_Bofill_update(self, B, S, Y):
|
|
278
|
+
"""
|
|
279
|
+
Original Block-Bofill: Mix block-SR1 and block-PSB updates
|
|
280
|
+
using the *mean* of individual c_j weights.
|
|
281
|
+
"""
|
|
282
|
+
if S is None or Y is None:
|
|
283
|
+
return B.copy()
|
|
284
|
+
|
|
285
|
+
Delta_psb = self._block_PSB_update(B, S, Y) - B
|
|
286
|
+
Delta_sr1 = self._block_SR1_update(B, S, Y) - B
|
|
287
|
+
|
|
288
|
+
# Get individual weights (w_j = c_j for Bofill)
|
|
289
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
290
|
+
is_cfd=False, use_bofill_logic=True)
|
|
291
|
+
|
|
292
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
293
|
+
Bp = B + w_mean * (Delta_sr1) + (1.0 - w_mean) * (Delta_psb)
|
|
294
|
+
return symm(Bp)
|
|
295
|
+
|
|
296
|
+
def _block_CFD_Bofill_update(self, B, S, Y):
|
|
297
|
+
"""
|
|
298
|
+
Original Block-CFD-Bofill: Mix block-CFD-SR1 and block-PSB updates
|
|
299
|
+
using the *mean* of individual c_j weights.
|
|
300
|
+
"""
|
|
301
|
+
if S is None or Y is None:
|
|
302
|
+
return B.copy()
|
|
303
|
+
|
|
304
|
+
Delta_psb = self._block_PSB_update(B, S, Y) - B
|
|
305
|
+
Delta_sr1 = self._block_CFD_SR1_update(B, S, Y) - B
|
|
306
|
+
|
|
307
|
+
# Get individual weights (w_j = c_j for CFD-Bofill)
|
|
308
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
309
|
+
is_cfd=True, use_bofill_logic=True)
|
|
310
|
+
|
|
311
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
312
|
+
Bp = B + w_mean * (Delta_sr1) + (1.0 - w_mean) * (Delta_psb)
|
|
313
|
+
return symm(Bp)
|
|
314
|
+
|
|
315
|
+
# -----------------------------------------------------------------
|
|
316
|
+
# --- "Weighted Subspace" Mixed Methods ---
|
|
317
|
+
# -----------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
def _block_FSB_update_weighted(self, B, S, Y):
|
|
320
|
+
"""
|
|
321
|
+
Block-FSB update using the "Weighted Subspace" approach.
|
|
322
|
+
"""
|
|
323
|
+
if S is None or Y is None:
|
|
324
|
+
return B.copy()
|
|
325
|
+
|
|
326
|
+
print("Calculating Weighted Subspace FSB update")
|
|
327
|
+
# 1. Get individual weights (w_j = sqrt(c_j) for FSB)
|
|
328
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
329
|
+
is_cfd=False, use_bofill_logic=False)
|
|
330
|
+
W_sr1 = np.diag(w_list)
|
|
331
|
+
W_bfgs = np.diag([1.0 - w for w in w_list])
|
|
332
|
+
|
|
333
|
+
# 2. Build weighted subspace matrices
|
|
334
|
+
S_sr1 = np.dot(S, W_sr1)
|
|
335
|
+
Y_sr1 = np.dot(Y, W_sr1)
|
|
336
|
+
S_bfgs = np.dot(S, W_bfgs)
|
|
337
|
+
Y_bfgs = np.dot(Y, W_bfgs)
|
|
338
|
+
|
|
339
|
+
# 3. Calculate updates in each subspace
|
|
340
|
+
print("... calculating weighted SR1 subspace ...")
|
|
341
|
+
Delta_sr1 = self._block_SR1_update(B, S_sr1, Y_sr1) - B
|
|
342
|
+
print("... calculating weighted BFGS subspace ...")
|
|
343
|
+
Delta_bfgs = self._block_BFGS_update(B, S_bfgs, Y_bfgs) - B
|
|
344
|
+
|
|
345
|
+
# 4. Combine the updates
|
|
346
|
+
Bp = B + Delta_sr1 + Delta_bfgs
|
|
347
|
+
return symm(Bp)
|
|
348
|
+
|
|
349
|
+
def _block_CFD_FSB_update_weighted(self, B, S, Y):
|
|
350
|
+
"""
|
|
351
|
+
Block-CFD-FSB update using the "Weighted Subspace" approach.
|
|
352
|
+
"""
|
|
353
|
+
if S is None or Y is None:
|
|
354
|
+
return B.copy()
|
|
355
|
+
|
|
356
|
+
print("Calculating Weighted Subspace CFD-FSB update")
|
|
357
|
+
# 1. Get individual weights (w_j = c_j for CFD-FSB)
|
|
358
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
359
|
+
is_cfd=True, use_bofill_logic=True)
|
|
360
|
+
W_sr1 = np.diag(w_list)
|
|
361
|
+
W_bfgs = np.diag([1.0 - w for w in w_list])
|
|
362
|
+
|
|
363
|
+
# 2. Build weighted subspace matrices
|
|
364
|
+
S_sr1 = np.dot(S, W_sr1)
|
|
365
|
+
Y_sr1 = np.dot(Y, W_sr1)
|
|
366
|
+
S_bfgs = np.dot(S, W_bfgs)
|
|
367
|
+
Y_bfgs = np.dot(Y, W_bfgs)
|
|
368
|
+
|
|
369
|
+
# 3. Calculate updates in each subspace
|
|
370
|
+
print("... calculating weighted CFD-SR1 subspace ...")
|
|
371
|
+
Delta_sr1 = self._block_CFD_SR1_update(B, S_sr1, Y_sr1) - B
|
|
372
|
+
print("... calculating weighted BFGS subspace ...")
|
|
373
|
+
Delta_bfgs = self._block_BFGS_update(B, S_bfgs, Y_bfgs) - B
|
|
374
|
+
|
|
375
|
+
# 4. Combine the updates
|
|
376
|
+
Bp = B + Delta_sr1 + Delta_bfgs
|
|
377
|
+
return symm(Bp)
|
|
378
|
+
|
|
379
|
+
def _block_Bofill_update_weighted(self, B, S, Y):
|
|
380
|
+
"""
|
|
381
|
+
Block-Bofill update using the "Weighted Subspace" approach.
|
|
382
|
+
"""
|
|
383
|
+
if S is None or Y is None:
|
|
384
|
+
return B.copy()
|
|
385
|
+
|
|
386
|
+
print("Calculating Weighted Subspace Bofill update")
|
|
387
|
+
# 1. Get individual weights (w_j = c_j for Bofill)
|
|
388
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
389
|
+
is_cfd=False, use_bofill_logic=True)
|
|
390
|
+
W_sr1 = np.diag(w_list)
|
|
391
|
+
W_psb = np.diag([1.0 - w for w in w_list])
|
|
392
|
+
|
|
393
|
+
# 2. Build weighted subspace matrices
|
|
394
|
+
S_sr1 = np.dot(S, W_sr1)
|
|
395
|
+
Y_sr1 = np.dot(Y, W_sr1)
|
|
396
|
+
S_psb = np.dot(S, W_psb)
|
|
397
|
+
Y_psb = np.dot(Y, W_psb)
|
|
398
|
+
|
|
399
|
+
# 3. Calculate updates in each subspace
|
|
400
|
+
print("... calculating weighted SR1 subspace ...")
|
|
401
|
+
Delta_sr1 = self._block_SR1_update(B, S_sr1, Y_sr1) - B
|
|
402
|
+
print("... calculating weighted PSB subspace ...")
|
|
403
|
+
Delta_psb = self._block_PSB_update(B, S_psb, Y_psb) - B
|
|
404
|
+
|
|
405
|
+
# 4. Combine the updates
|
|
406
|
+
Bp = B + Delta_sr1 + Delta_psb
|
|
407
|
+
return symm(Bp)
|
|
408
|
+
|
|
409
|
+
def _block_CFD_Bofill_update_weighted(self, B, S, Y):
|
|
410
|
+
"""
|
|
411
|
+
Block-CFD-Bofill update using the "Weighted Subspace" approach.
|
|
412
|
+
"""
|
|
413
|
+
if S is None or Y is None:
|
|
414
|
+
return B.copy()
|
|
415
|
+
|
|
416
|
+
print("Calculating Weighted Subspace CFD-Bofill update")
|
|
417
|
+
# 1. Get individual weights (w_j = c_j for CFD-Bofill)
|
|
418
|
+
c_list, w_list = self._get_individual_weights(B, S, Y,
|
|
419
|
+
is_cfd=True, use_bofill_logic=True)
|
|
420
|
+
W_sr1 = np.diag(w_list)
|
|
421
|
+
W_psb = np.diag([1.0 - w for w in w_list])
|
|
422
|
+
|
|
423
|
+
# 2. Build weighted subspace matrices
|
|
424
|
+
S_sr1 = np.dot(S, W_sr1)
|
|
425
|
+
Y_sr1 = np.dot(Y, W_sr1)
|
|
426
|
+
S_psb = np.dot(S, W_psb)
|
|
427
|
+
Y_psb = np.dot(Y, W_psb)
|
|
428
|
+
|
|
429
|
+
# 3. Calculate updates in each subspace
|
|
430
|
+
print("... calculating weighted CFD-SR1 subspace ...")
|
|
431
|
+
Delta_sr1 = self._block_CFD_SR1_update(B, S_sr1, Y_sr1) - B
|
|
432
|
+
print("... calculating weighted PSB subspace ...")
|
|
433
|
+
Delta_psb = self._block_PSB_update(B, S_psb, Y_psb) - B
|
|
434
|
+
|
|
435
|
+
# 4. Combine the updates
|
|
436
|
+
Bp = B + Delta_sr1 + Delta_psb
|
|
437
|
+
return symm(Bp)
|
|
438
|
+
|
|
439
|
+
# -----------------------------------------------------------------
|
|
440
|
+
# --- Public Methods ---
|
|
441
|
+
# -----------------------------------------------------------------
|
|
442
|
+
|
|
443
|
+
def block_BFGS_hessian_update(self, B, displacement, delta_grad):
|
|
444
|
+
print("Block BFGS update method")
|
|
445
|
+
s = displacement.reshape(-1)
|
|
446
|
+
y = delta_grad.reshape(-1)
|
|
447
|
+
self._push_history(s, y)
|
|
448
|
+
S, Y = self._assemble_block(self.block_size)
|
|
449
|
+
Bp = self._block_BFGS_update(B, S, Y)
|
|
450
|
+
self.delete_old_data()
|
|
451
|
+
return Bp - B # Return deltaB
|
|
452
|
+
|
|
453
|
+
def block_FSB_hessian_update(self, B, displacement, delta_grad):
|
|
454
|
+
print("Block FSB update method (Mean Weight)")
|
|
455
|
+
s = displacement.reshape(-1)
|
|
456
|
+
y = delta_grad.reshape(-1)
|
|
457
|
+
self._push_history(s, y)
|
|
458
|
+
S, Y = self._assemble_block(self.block_size)
|
|
459
|
+
Bp = self._block_FSB_update(B, S, Y)
|
|
460
|
+
self.delete_old_data()
|
|
461
|
+
return Bp - B # Return deltaB
|
|
462
|
+
|
|
463
|
+
def block_CFD_FSB_hessian_update(self, B, displacement, delta_grad):
|
|
464
|
+
print("Block CFD_FSB update method (Mean Weight)")
|
|
465
|
+
s = displacement.reshape(-1)
|
|
466
|
+
y = delta_grad.reshape(-1)
|
|
467
|
+
self._push_history(s, y)
|
|
468
|
+
S, Y = self._assemble_block(self.block_size)
|
|
469
|
+
Bp = self._block_CFD_FSB_update(B, S, Y)
|
|
470
|
+
self.delete_old_data()
|
|
471
|
+
return Bp - B # Return deltaB
|
|
472
|
+
|
|
473
|
+
def block_Bofill_hessian_update(self, B, displacement, delta_grad):
|
|
474
|
+
print("Block Bofill update method (Mean Weight)")
|
|
475
|
+
s = displacement.reshape(-1)
|
|
476
|
+
y = delta_grad.reshape(-1)
|
|
477
|
+
self._push_history(s, y)
|
|
478
|
+
S, Y = self._assemble_block(self.block_size)
|
|
479
|
+
Bp = self._block_Bofill_update(B, S, Y)
|
|
480
|
+
self.delete_old_data()
|
|
481
|
+
return Bp - B # Return deltaB
|
|
482
|
+
|
|
483
|
+
def block_CFD_Bofill_hessian_update(self, B, displacement, delta_grad):
|
|
484
|
+
print("Block CFD_Bofill update method (Mean Weight)")
|
|
485
|
+
s = displacement.reshape(-1)
|
|
486
|
+
y = delta_grad.reshape(-1)
|
|
487
|
+
self._push_history(s, y)
|
|
488
|
+
S, Y = self._assemble_block(self.block_size)
|
|
489
|
+
Bp = self._block_CFD_Bofill_update(B, S, Y)
|
|
490
|
+
self.delete_old_data()
|
|
491
|
+
return Bp - B # Return deltaB
|
|
492
|
+
|
|
493
|
+
# -----------------------------------------------------------------
|
|
494
|
+
# --- Public Methods (Weighted Subspace) ---
|
|
495
|
+
# -----------------------------------------------------------------
|
|
496
|
+
|
|
497
|
+
def block_FSB_hessian_update_weighted(self, B, displacement, delta_grad):
|
|
498
|
+
"""
|
|
499
|
+
Public entry point for "Weighted Subspace" FSB update.
|
|
500
|
+
(Alternative to block_FSB_hessian_update)
|
|
501
|
+
"""
|
|
502
|
+
print("Block FSB update method (Weighted Subspace)")
|
|
503
|
+
s = displacement.reshape(-1)
|
|
504
|
+
y = delta_grad.reshape(-1)
|
|
505
|
+
self._push_history(s, y)
|
|
506
|
+
S, Y = self._assemble_block(self.block_size)
|
|
507
|
+
|
|
508
|
+
Bp = self._block_FSB_update_weighted(B, S, Y)
|
|
509
|
+
|
|
510
|
+
self.delete_old_data()
|
|
511
|
+
return Bp - B # Return deltaB
|
|
512
|
+
|
|
513
|
+
def block_CFD_FSB_hessian_update_weighted(self, B, displacement, delta_grad):
|
|
514
|
+
"""
|
|
515
|
+
Public entry point for "Weighted Subspace" CFD-FSB update.
|
|
516
|
+
(Alternative to block_CFD_FSB_hessian_update)
|
|
517
|
+
"""
|
|
518
|
+
print("Block CFD_FSB update method (Weighted Subspace)")
|
|
519
|
+
s = displacement.reshape(-1)
|
|
520
|
+
y = delta_grad.reshape(-1)
|
|
521
|
+
self._push_history(s, y)
|
|
522
|
+
S, Y = self._assemble_block(self.block_size)
|
|
523
|
+
|
|
524
|
+
Bp = self._block_CFD_FSB_update_weighted(B, S, Y)
|
|
525
|
+
|
|
526
|
+
self.delete_old_data()
|
|
527
|
+
return Bp - B # Return deltaB
|
|
528
|
+
|
|
529
|
+
def block_Bofill_hessian_update_weighted(self, B, displacement, delta_grad):
|
|
530
|
+
"""
|
|
531
|
+
Public entry point for "Weighted Subspace" Bofill update.
|
|
532
|
+
(Alternative to block_Bofill_hessian_update)
|
|
533
|
+
"""
|
|
534
|
+
print("Block Bofill update method (Weighted Subspace)")
|
|
535
|
+
s = displacement.reshape(-1)
|
|
536
|
+
y = delta_grad.reshape(-1)
|
|
537
|
+
self._push_history(s, y)
|
|
538
|
+
S, Y = self._assemble_block(self.block_size)
|
|
539
|
+
|
|
540
|
+
Bp = self._block_Bofill_update_weighted(B, S, Y)
|
|
541
|
+
|
|
542
|
+
self.delete_old_data()
|
|
543
|
+
return Bp - B # Return deltaB
|
|
544
|
+
|
|
545
|
+
def block_CFD_Bofill_hessian_update_weighted(self, B, displacement, delta_grad):
|
|
546
|
+
"""
|
|
547
|
+
Public entry point for "Weighted Subspace" CFD-Bofill update.
|
|
548
|
+
(Alternative to block_CFD_Bofill_hessian_update)
|
|
549
|
+
"""
|
|
550
|
+
print("Block CFD_Bofill update method (Weighted Subspace)")
|
|
551
|
+
s = displacement.reshape(-1)
|
|
552
|
+
y = delta_grad.reshape(-1)
|
|
553
|
+
self._push_history(s, y)
|
|
554
|
+
S, Y = self._assemble_block(self.block_size)
|
|
555
|
+
|
|
556
|
+
Bp = self._block_CFD_Bofill_update_weighted(B, S, Y)
|
|
557
|
+
|
|
558
|
+
self.delete_old_data()
|
|
559
|
+
return Bp - B # Return deltaB
|
|
560
|
+
|
|
561
|
+
# -----------------------------------------------------------------
|
|
562
|
+
# --- Public Methods (DD-Enabled) ---
|
|
563
|
+
# -----------------------------------------------------------------
|
|
564
|
+
|
|
565
|
+
def double_damping_step2_only(self, s, y, mu2):
|
|
566
|
+
"""
|
|
567
|
+
Implements ONLY Step 2 of the Double Damping (DD) procedure [cite: 102, 362-364].
|
|
568
|
+
This step does NOT require the inverse Hessian H.
|
|
569
|
+
It is equivalent to Powell's damping with B=I [cite: 365-367].
|
|
570
|
+
"""
|
|
571
|
+
s_tilde = s
|
|
572
|
+
y_tilde = y
|
|
573
|
+
|
|
574
|
+
s_tilde_y = np.dot(s_tilde.T, y)
|
|
575
|
+
s_tilde_s_tilde = np.dot(s_tilde.T, s_tilde)
|
|
576
|
+
|
|
577
|
+
# Check if damping is needed
|
|
578
|
+
if s_tilde_y < mu2 * s_tilde_s_tilde:
|
|
579
|
+
print(f"DD Step 2 active: s_tilde.T*y ({s_tilde_y:.4e}) < mu2*s_tilde.T*s_tilde ({mu2 * s_tilde_s_tilde:.4e})")
|
|
580
|
+
denominator = s_tilde_s_tilde - s_tilde_y
|
|
581
|
+
|
|
582
|
+
if np.abs(denominator) < self.denom_threshold:
|
|
583
|
+
theta2 = 0.1 # Fallback
|
|
584
|
+
print("Warning: DD Step 2 denominator near zero. Using default theta2=0.1.")
|
|
585
|
+
else:
|
|
586
|
+
theta2 = (1.0 - mu2) * s_tilde_s_tilde / denominator
|
|
587
|
+
|
|
588
|
+
theta2 = np.clip(theta2, 0.0, 1.0)
|
|
589
|
+
y_tilde = theta2 * y + (1.0 - theta2) * s_tilde
|
|
590
|
+
|
|
591
|
+
final_sy = np.dot(s_tilde.T, y_tilde)
|
|
592
|
+
if final_sy <= 0:
|
|
593
|
+
print(f"Warning: Damping (Step 2 only) resulted in s.T * y_tilde = {final_sy:.4e} <= 0.")
|
|
594
|
+
|
|
595
|
+
return s_tilde, y_tilde # s_tilde is the original s
|
|
596
|
+
|
|
597
|
+
def _apply_block_damping_step2(self, S, Y):
|
|
598
|
+
"""
|
|
599
|
+
Helper to apply H-free DD Step 2 [cite: 102, 362-364] to all columns of S, Y.
|
|
600
|
+
"""
|
|
601
|
+
if S is None or Y is None:
|
|
602
|
+
return None, None
|
|
603
|
+
|
|
604
|
+
q = S.shape[1]
|
|
605
|
+
S_tilde = S.copy()
|
|
606
|
+
Y_tilde = Y.copy()
|
|
607
|
+
|
|
608
|
+
print(f"Applying H-free DD (Step 2) to {q} pairs...")
|
|
609
|
+
for i in range(q):
|
|
610
|
+
s_i = S[:, i]
|
|
611
|
+
y_i = Y[:, i]
|
|
612
|
+
# Apply H-free DD Step 2
|
|
613
|
+
s_tilde_i, y_tilde_i = self.double_damping_step2_only(s_i, y_i, self.dd_mu2)
|
|
614
|
+
S_tilde[:, i] = s_tilde_i
|
|
615
|
+
Y_tilde[:, i] = y_tilde_i
|
|
616
|
+
|
|
617
|
+
return S_tilde, Y_tilde
|
|
618
|
+
|
|
619
|
+
def _block_BFGS_update_dd(self, B, S, Y):
|
|
620
|
+
# (Internal logic for DD-BFGS)
|
|
621
|
+
if S is None or Y is None:
|
|
622
|
+
return B.copy()
|
|
623
|
+
U, svals, Vt = np.linalg.svd(S, full_matrices=False)
|
|
624
|
+
keep = svals > 1e-8
|
|
625
|
+
if not np.any(keep):
|
|
626
|
+
return B.copy()
|
|
627
|
+
rank = np.sum(keep)
|
|
628
|
+
col_norms = np.linalg.norm(S, axis=0)
|
|
629
|
+
idx_sorted = np.argsort(-col_norms)
|
|
630
|
+
keep_idx = np.sort(idx_sorted[:rank])
|
|
631
|
+
Sf = S[:, keep_idx]
|
|
632
|
+
Yf = Y[:, keep_idx]
|
|
633
|
+
Sf_tilde, Yf_tilde = self._apply_block_damping_step2(Sf, Yf)
|
|
634
|
+
M1 = np.dot(np.dot(Sf_tilde.T, B), Sf_tilde)
|
|
635
|
+
M2 = np.dot(Sf_tilde.T, Yf_tilde)
|
|
636
|
+
invM1 = safe_inv(M1, reg=self.inv_reg)
|
|
637
|
+
invM2 = safe_inv(M2, reg=self.inv_reg)
|
|
638
|
+
term1 = np.dot(np.dot(np.dot(B, Sf_tilde), invM1), np.dot(Sf_tilde.T, B))
|
|
639
|
+
term2 = np.dot(np.dot(Yf_tilde, invM2), Yf_tilde.T)
|
|
640
|
+
Bp = B - term1 + term2
|
|
641
|
+
return symm(Bp)
|
|
642
|
+
|
|
643
|
+
def _block_FSB_update_dd(self, B, S, Y):
|
|
644
|
+
# (Internal logic for DD-FSB)
|
|
645
|
+
if S is None or Y is None:
|
|
646
|
+
return B.copy()
|
|
647
|
+
S_tilde, Y_tilde = self._apply_block_damping_step2(S, Y)
|
|
648
|
+
q = S_tilde.shape[1]
|
|
649
|
+
c_list, w_list = self._get_individual_weights(B, S_tilde, Y_tilde,
|
|
650
|
+
is_cfd=False, use_bofill_logic=False)
|
|
651
|
+
B_sr1_delta = self._block_SR1_update(B, S_tilde, Y_tilde) - B
|
|
652
|
+
B_bfgs_delta = self._block_BFGS_update(B, S_tilde, Y_tilde) - B
|
|
653
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
654
|
+
Bp = B + w_mean * B_sr1_delta + (1.0 - w_mean) * B_bfgs_delta
|
|
655
|
+
return symm(Bp)
|
|
656
|
+
|
|
657
|
+
def _block_CFD_FSB_update_dd(self, B, S, Y):
|
|
658
|
+
# (Internal logic for DD-CFD-FSB)
|
|
659
|
+
if S is None or Y is None:
|
|
660
|
+
return B.copy()
|
|
661
|
+
S_tilde, Y_tilde = self._apply_block_damping_step2(S, Y)
|
|
662
|
+
q = S_tilde.shape[1]
|
|
663
|
+
c_list, w_list = self._get_individual_weights(B, S_tilde, Y_tilde,
|
|
664
|
+
is_cfd=True, use_bofill_logic=True)
|
|
665
|
+
B_sr1_delta = self._block_CFD_SR1_update(B, S_tilde, Y_tilde) - B
|
|
666
|
+
B_bfgs_delta = self._block_BFGS_update(B, S_tilde, Y_tilde) - B
|
|
667
|
+
w_mean = float(np.mean(w_list)) if len(w_list) > 0 else 0.0
|
|
668
|
+
Bp = B + w_mean * B_sr1_delta + (1.0 - w_mean) * B_bfgs_delta
|
|
669
|
+
return symm(Bp)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def block_BFGS_hessian_update_dd(self, B, displacement, delta_grad):
|
|
673
|
+
"""
|
|
674
|
+
Public entry point for Block BFGS update with H-free DD (Step 2).
|
|
675
|
+
"""
|
|
676
|
+
print("Block BFGS update method with DD (Step 2 only)")
|
|
677
|
+
s = displacement.reshape(-1)
|
|
678
|
+
y = delta_grad.reshape(-1)
|
|
679
|
+
self._push_history(s, y)
|
|
680
|
+
S, Y = self._assemble_block(self.block_size)
|
|
681
|
+
Bp = self._block_BFGS_update_dd(B, S, Y)
|
|
682
|
+
self.delete_old_data()
|
|
683
|
+
return Bp - B # Return deltaB
|
|
684
|
+
|
|
685
|
+
def block_FSB_hessian_update_dd(self, B, displacement, delta_grad):
|
|
686
|
+
"""
|
|
687
|
+
Public entry point for Block FSB update with H-free DD (Step 2).
|
|
688
|
+
"""
|
|
689
|
+
print("Block FSB update method with DD (Step 2 only)")
|
|
690
|
+
s = displacement.reshape(-1)
|
|
691
|
+
y = delta_grad.reshape(-1)
|
|
692
|
+
self._push_history(s, y)
|
|
693
|
+
S, Y = self._assemble_block(self.block_size)
|
|
694
|
+
Bp = self._block_FSB_update_dd(B, S, Y)
|
|
695
|
+
self.delete_old_data()
|
|
696
|
+
return Bp - B # Return deltaB
|
|
697
|
+
|
|
698
|
+
def block_CFD_FSB_hessian_update_dd(self, B, displacement, delta_grad):
|
|
699
|
+
"""
|
|
700
|
+
Public entry point for Block CFD-FSB update with H-free DD (Step 2).
|
|
701
|
+
"""
|
|
702
|
+
print("Block CFD_FSB update method with DD (Step 2 only)")
|
|
703
|
+
s = displacement.reshape(-1)
|
|
704
|
+
y = delta_grad.reshape(-1)
|
|
705
|
+
self._push_history(s, y)
|
|
706
|
+
S, Y = self._assemble_block(self.block_size)
|
|
707
|
+
Bp = self._block_CFD_FSB_update_dd(B, S, Y)
|
|
708
|
+
self.delete_old_data()
|
|
709
|
+
return Bp - B # Return deltaB
|