numopt-js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODING_RULES.md +161 -0
- package/LICENSE +22 -0
- package/README.md +807 -0
- package/dist/core/adjointGradientDescent.d.ts +61 -0
- package/dist/core/adjointGradientDescent.d.ts.map +1 -0
- package/dist/core/adjointGradientDescent.js +764 -0
- package/dist/core/adjointGradientDescent.js.map +1 -0
- package/dist/core/constrainedGaussNewton.d.ts +44 -0
- package/dist/core/constrainedGaussNewton.d.ts.map +1 -0
- package/dist/core/constrainedGaussNewton.js +314 -0
- package/dist/core/constrainedGaussNewton.js.map +1 -0
- package/dist/core/constrainedLevenbergMarquardt.d.ts +46 -0
- package/dist/core/constrainedLevenbergMarquardt.d.ts.map +1 -0
- package/dist/core/constrainedLevenbergMarquardt.js +469 -0
- package/dist/core/constrainedLevenbergMarquardt.js.map +1 -0
- package/dist/core/constrainedUtils.d.ts +92 -0
- package/dist/core/constrainedUtils.d.ts.map +1 -0
- package/dist/core/constrainedUtils.js +364 -0
- package/dist/core/constrainedUtils.js.map +1 -0
- package/dist/core/convergence.d.ts +35 -0
- package/dist/core/convergence.d.ts.map +1 -0
- package/dist/core/convergence.js +51 -0
- package/dist/core/convergence.js.map +1 -0
- package/dist/core/createGradientFunction.d.ts +85 -0
- package/dist/core/createGradientFunction.d.ts.map +1 -0
- package/dist/core/createGradientFunction.js +93 -0
- package/dist/core/createGradientFunction.js.map +1 -0
- package/dist/core/effectiveJacobian.d.ts +90 -0
- package/dist/core/effectiveJacobian.d.ts.map +1 -0
- package/dist/core/effectiveJacobian.js +128 -0
- package/dist/core/effectiveJacobian.js.map +1 -0
- package/dist/core/finiteDiff.d.ts +171 -0
- package/dist/core/finiteDiff.d.ts.map +1 -0
- package/dist/core/finiteDiff.js +363 -0
- package/dist/core/finiteDiff.js.map +1 -0
- package/dist/core/gaussNewton.d.ts +29 -0
- package/dist/core/gaussNewton.d.ts.map +1 -0
- package/dist/core/gaussNewton.js +151 -0
- package/dist/core/gaussNewton.js.map +1 -0
- package/dist/core/gradientDescent.d.ts +35 -0
- package/dist/core/gradientDescent.d.ts.map +1 -0
- package/dist/core/gradientDescent.js +204 -0
- package/dist/core/gradientDescent.js.map +1 -0
- package/dist/core/jacobianComputation.d.ts +24 -0
- package/dist/core/jacobianComputation.d.ts.map +1 -0
- package/dist/core/jacobianComputation.js +38 -0
- package/dist/core/jacobianComputation.js.map +1 -0
- package/dist/core/levenbergMarquardt.d.ts +36 -0
- package/dist/core/levenbergMarquardt.d.ts.map +1 -0
- package/dist/core/levenbergMarquardt.js +286 -0
- package/dist/core/levenbergMarquardt.js.map +1 -0
- package/dist/core/lineSearch.d.ts +42 -0
- package/dist/core/lineSearch.d.ts.map +1 -0
- package/dist/core/lineSearch.js +106 -0
- package/dist/core/lineSearch.js.map +1 -0
- package/dist/core/logger.d.ts +77 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +162 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/types.d.ts +427 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +15 -0
- package/dist/core/types.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/utils/formatting.d.ts +27 -0
- package/dist/utils/formatting.d.ts.map +1 -0
- package/dist/utils/formatting.js +54 -0
- package/dist/utils/formatting.js.map +1 -0
- package/dist/utils/matrix.d.ts +63 -0
- package/dist/utils/matrix.d.ts.map +1 -0
- package/dist/utils/matrix.js +129 -0
- package/dist/utils/matrix.js.map +1 -0
- package/dist/utils/resultFormatter.d.ts +122 -0
- package/dist/utils/resultFormatter.d.ts.map +1 -0
- package/dist/utils/resultFormatter.js +342 -0
- package/dist/utils/resultFormatter.js.map +1 -0
- package/package.json +74 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file implements the adjoint method for constrained optimization problems.
|
|
3
|
+
*
|
|
4
|
+
* The adjoint method efficiently computes gradients for constrained optimization
|
|
5
|
+
* by solving for an adjoint variable λ instead of explicitly inverting matrices.
|
|
6
|
+
*
|
|
7
|
+
* Mathematical background:
|
|
8
|
+
* - For constraint c(p, x) = 0, the implicit function theorem gives:
|
|
9
|
+
* df/dp = ∂f/∂p - ∂f/∂x (∂c/∂x)^-1 ∂c/∂p
|
|
10
|
+
* - Instead of computing (∂c/∂x)^-1 ∂c/∂p explicitly, we solve:
|
|
11
|
+
* (∂c/∂x)^T λ = (∂f/∂x)^T
|
|
12
|
+
* Then: df/dp = ∂f/∂p - λ^T ∂c/∂p
|
|
13
|
+
* - This requires solving only one linear system per iteration instead of
|
|
14
|
+
* paramCount systems, making it much more efficient.
|
|
15
|
+
*
|
|
16
|
+
* For residual functions r(p, x) where f = 1/2 r^T r:
|
|
17
|
+
* - Solve: (∂c/∂x)^T λ = r^T ∂r/∂x
|
|
18
|
+
* - Then: df/dp = r^T ∂r/∂p - λ^T ∂c/∂p
|
|
19
|
+
*
|
|
20
|
+
* References:
|
|
21
|
+
* - Nocedal & Wright, "Numerical Optimization" (2nd ed.), Chapter 12 (constrained optimization)
|
|
22
|
+
* - Adjoint method is widely used in optimal control and shape optimization
|
|
23
|
+
*
|
|
24
|
+
* Role in system:
|
|
25
|
+
* - Provides efficient constrained optimization using adjoint method
|
|
26
|
+
* - Supports both cost functions and residual functions
|
|
27
|
+
* - Uses finite differences or analytical derivatives
|
|
28
|
+
* - For residual functions r(p, x), can compute dr/dp (Jacobian matrix) efficiently
|
|
29
|
+
* by reusing ∂c/∂x decomposition for all residual components. This is more efficient
|
|
30
|
+
* than BFGS or Lagrange multiplier methods. The Jacobian enables Gauss-Newton or
|
|
31
|
+
* Levenberg-Marquardt methods for quadratic convergence in constrained optimization
|
|
32
|
+
*
|
|
33
|
+
* For first-time readers:
|
|
34
|
+
* - Start with adjointGradientDescent function
|
|
35
|
+
* - Understand how adjoint variable λ is computed
|
|
36
|
+
* - Check how states x are updated using linear approximation
|
|
37
|
+
*/
|
|
38
|
+
// @ts-ignore - SingularValueDecomposition exists in ml-matrix but may not be in type definitions
|
|
39
|
+
import { SingularValueDecomposition } from 'ml-matrix';
|
|
40
|
+
import { finiteDiffPartialP, finiteDiffPartialX, finiteDiffConstraintPartialP, finiteDiffConstraintPartialX, finiteDiffResidualPartialP, finiteDiffResidualPartialX } from './finiteDiff.js';
|
|
41
|
+
import { backtrackingLineSearch } from './lineSearch.js';
|
|
42
|
+
import { vectorNorm, scaleVector, addVectors, subtractVectors } from '../utils/matrix.js';
|
|
43
|
+
import { checkGradientConvergence, checkStepSizeConvergence, createConvergenceResult } from './convergence.js';
|
|
44
|
+
import { Logger } from './logger.js';
|
|
45
|
+
import { float64ArrayToMatrix, matrixToFloat64Array } from '../utils/matrix.js';
|
|
46
|
+
import { solveAdjointEquation as solveAdjointEquationShared, solveLeastSquares as solveLeastSquaresShared } from './constrainedUtils.js';
|
|
47
|
+
const DEFAULT_MAX_ITERATIONS = 1000;
|
|
48
|
+
const DEFAULT_TOLERANCE = 1e-6;
|
|
49
|
+
const DEFAULT_STEP_SIZE = 0.01;
|
|
50
|
+
const DEFAULT_USE_LINE_SEARCH = true;
|
|
51
|
+
const DEFAULT_CONSTRAINT_TOLERANCE = 1e-6;
|
|
52
|
+
const DEFAULT_STEP_SIZE_P = 1e-6;
|
|
53
|
+
const DEFAULT_STEP_SIZE_X = 1e-6;
|
|
54
|
+
const ZERO_STEP_SIZE = 0.0;
|
|
55
|
+
const NEGATIVE_GRADIENT_DIRECTION = -1.0;
|
|
56
|
+
const RESIDUAL_COST_COEFFICIENT = 0.5; // Coefficient for residual cost: f = 1/2 r^T r
|
|
57
|
+
const NEGATIVE_COEFFICIENT = -1.0; // Coefficient for negating vectors
|
|
58
|
+
const MAX_DIMENSION_FOR_DETAILED_LOGGING = 3; // Maximum dimension for detailed parameter/state logging
|
|
59
|
+
const DEFAULT_REGULARIZATION = 0.0; // Optional Tikhonov regularization for adjoint solve
|
|
60
|
+
const MAX_SVD_DIAGNOSTIC_DIMENSION = 50; // Limit for expensive SVD diagnostics in logs
|
|
61
|
+
const CONDITION_WARNING_THRESHOLD = 1e10; // Threshold to warn about ill-conditioning
|
|
62
|
+
const AUTO_REGULARIZATION = 1e-8; // Floor regularization injected when Jacobian is singular/ill-conditioned
|
|
63
|
+
const MAX_REGULARIZATION_RETRY_ATTEMPTS = 20; // Maximum number of retry attempts with increasing regularization
|
|
64
|
+
const REGULARIZATION_MULTIPLIER = 10; // Multiplier for increasing regularization on each retry
|
|
65
|
+
const FALLBACK_REGULARIZATION = 1e-6; // Fallback regularization value when current regularization is zero
|
|
66
|
+
const MAX_MATRIX_DIMENSION_FOR_SVD_DIAGNOSTICS = 300; // Maximum matrix dimension for expensive SVD diagnostics
|
|
67
|
+
const FLOATING_POINT_EQUALITY_TOLERANCE = 1e-15; // Tolerance for floating point equality comparisons
|
|
68
|
+
const INITIAL_ATTEMPT_NUMBER = 1; // Initial attempt number for logging
|
|
69
|
+
/**
|
|
70
|
+
* Checks if a function is a residual function by calling it and checking return type.
|
|
71
|
+
*/
|
|
72
|
+
function isResidualFunction(costFunction, parameters, states) {
|
|
73
|
+
const result = costFunction(parameters, states);
|
|
74
|
+
return result instanceof Float64Array;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Computes cost from either a cost function or residual function.
|
|
78
|
+
* For residual functions r(p,x), computes f = 1/2 r^T r.
|
|
79
|
+
*/
|
|
80
|
+
function computeCost(costFunction, parameters, states) {
|
|
81
|
+
if (isResidualFunction(costFunction, parameters, states)) {
|
|
82
|
+
const residual = costFunction(parameters, states);
|
|
83
|
+
const residualNorm = vectorNorm(residual);
|
|
84
|
+
return RESIDUAL_COST_COEFFICIENT * residualNorm * residualNorm;
|
|
85
|
+
}
|
|
86
|
+
return costFunction(parameters, states);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Computes gradient from residual function: df/dp = r^T ∂r/∂p
|
|
90
|
+
* This formula comes from the chain rule applied to the residual cost function f = 1/2 r^T r.
|
|
91
|
+
*/
|
|
92
|
+
function computeGradientFromResidual(residual, derivativeMatrix) {
|
|
93
|
+
const residualMatrix = float64ArrayToMatrix(residual);
|
|
94
|
+
const gradientMatrix = residualMatrix.transpose().mmul(derivativeMatrix);
|
|
95
|
+
return rowVectorToFloat64Array(gradientMatrix);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Computes ∂f/∂p or ∂r/∂p using analytical functions or finite differences.
|
|
99
|
+
*/
|
|
100
|
+
function computeDfdp(parameters, states, costFunction, options) {
|
|
101
|
+
const stepSizeP = options.stepSizeP ?? DEFAULT_STEP_SIZE_P;
|
|
102
|
+
if (options.dfdp) {
|
|
103
|
+
return options.dfdp(parameters, states);
|
|
104
|
+
}
|
|
105
|
+
const isResidual = isResidualFunction(costFunction, parameters, states);
|
|
106
|
+
if (isResidual) {
|
|
107
|
+
const derivativeResidualPartialP = finiteDiffResidualPartialP(parameters, states, costFunction, { stepSize: stepSizeP });
|
|
108
|
+
const residual = costFunction(parameters, states);
|
|
109
|
+
return computeGradientFromResidual(residual, derivativeResidualPartialP);
|
|
110
|
+
}
|
|
111
|
+
return finiteDiffPartialP(parameters, states, costFunction, { stepSize: stepSizeP });
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Computes ∂f/∂x or ∂r/∂x using analytical functions or finite differences.
|
|
115
|
+
*/
|
|
116
|
+
function computeDfdx(parameters, states, costFunction, options) {
|
|
117
|
+
const stepSizeX = options.stepSizeX ?? DEFAULT_STEP_SIZE_X;
|
|
118
|
+
if (options.dfdx) {
|
|
119
|
+
return options.dfdx(parameters, states);
|
|
120
|
+
}
|
|
121
|
+
const isResidual = isResidualFunction(costFunction, parameters, states);
|
|
122
|
+
if (isResidual) {
|
|
123
|
+
const derivativeResidualPartialX = finiteDiffResidualPartialX(parameters, states, costFunction, { stepSize: stepSizeX });
|
|
124
|
+
const residual = costFunction(parameters, states);
|
|
125
|
+
return computeGradientFromResidual(residual, derivativeResidualPartialX);
|
|
126
|
+
}
|
|
127
|
+
return finiteDiffPartialX(parameters, states, costFunction, { stepSize: stepSizeX });
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Computes partial derivatives using analytical functions or finite differences.
|
|
131
|
+
*/
|
|
132
|
+
function computePartialDerivatives(parameters, states, costFunction, constraintFunction, options) {
|
|
133
|
+
const stepSizeP = options.stepSizeP ?? DEFAULT_STEP_SIZE_P;
|
|
134
|
+
const stepSizeX = options.stepSizeX ?? DEFAULT_STEP_SIZE_X;
|
|
135
|
+
const dfdp = computeDfdp(parameters, states, costFunction, options);
|
|
136
|
+
const dfdx = computeDfdx(parameters, states, costFunction, options);
|
|
137
|
+
// Compute ∂c/∂p: needed for adjoint gradient computation (df/dp = ∂f/∂p - λ^T ∂c/∂p)
|
|
138
|
+
const dcdp = options.dcdp
|
|
139
|
+
? options.dcdp(parameters, states)
|
|
140
|
+
: finiteDiffConstraintPartialP(parameters, states, constraintFunction, { stepSize: stepSizeP });
|
|
141
|
+
// Compute ∂c/∂x: needed to solve adjoint equation (∂c/∂x)^T λ = (∂f/∂x)^T
|
|
142
|
+
const dcdx = options.dcdx
|
|
143
|
+
? options.dcdx(parameters, states)
|
|
144
|
+
: finiteDiffConstraintPartialX(parameters, states, constraintFunction, { stepSize: stepSizeX });
|
|
145
|
+
return { dfdp, dfdx, dcdp, dcdx };
|
|
146
|
+
}
|
|
147
|
+
function computeMatrixDiagnostics(matrix) {
|
|
148
|
+
let sumSquares = 0;
|
|
149
|
+
let maxAbs = 0;
|
|
150
|
+
let minAbs = Number.POSITIVE_INFINITY;
|
|
151
|
+
for (let r = 0; r < matrix.rows; r++) {
|
|
152
|
+
for (let c = 0; c < matrix.columns; c++) {
|
|
153
|
+
const value = matrix.get(r, c);
|
|
154
|
+
const abs = Math.abs(value);
|
|
155
|
+
sumSquares += value * value;
|
|
156
|
+
maxAbs = Math.max(maxAbs, abs);
|
|
157
|
+
minAbs = Math.min(minAbs, abs);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return {
|
|
161
|
+
frobenius: Math.sqrt(sumSquares),
|
|
162
|
+
maxAbs,
|
|
163
|
+
minAbs: minAbs === Number.POSITIVE_INFINITY ? 0 : minAbs
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
function rowVectorToFloat64Array(matrix) {
|
|
167
|
+
if (matrix.rows !== 1) {
|
|
168
|
+
throw new Error('Expected row vector (1 x n)');
|
|
169
|
+
}
|
|
170
|
+
const result = new Float64Array(matrix.columns);
|
|
171
|
+
for (let c = 0; c < matrix.columns; c++) {
|
|
172
|
+
result[c] = matrix.get(0, c);
|
|
173
|
+
}
|
|
174
|
+
return result;
|
|
175
|
+
}
|
|
176
|
+
function computeVectorDiagnostics(vector) {
|
|
177
|
+
let sumSquares = 0;
|
|
178
|
+
let maxAbs = 0;
|
|
179
|
+
for (let i = 0; i < vector.length; i++) {
|
|
180
|
+
const value = vector[i];
|
|
181
|
+
const abs = Math.abs(value);
|
|
182
|
+
sumSquares += value * value;
|
|
183
|
+
maxAbs = Math.max(maxAbs, abs);
|
|
184
|
+
}
|
|
185
|
+
return { norm: Math.sqrt(sumSquares), maxAbs };
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Updates regularization value by multiplying with the multiplier, or sets to fallback if current is zero.
|
|
189
|
+
* This exponential backoff strategy helps stabilize ill-conditioned linear systems.
|
|
190
|
+
*/
|
|
191
|
+
function updateRegularizationWithMultiplier(currentRegularization) {
|
|
192
|
+
return currentRegularization > 0 ? currentRegularization * REGULARIZATION_MULTIPLIER : FALLBACK_REGULARIZATION;
|
|
193
|
+
}
|
|
194
|
+
function computeSvdDiagnostics(matrix) {
|
|
195
|
+
if (matrix.rows > MAX_SVD_DIAGNOSTIC_DIMENSION || matrix.columns > MAX_SVD_DIAGNOSTIC_DIMENSION) {
|
|
196
|
+
return undefined;
|
|
197
|
+
}
|
|
198
|
+
try {
|
|
199
|
+
const svd = new SingularValueDecomposition(matrix);
|
|
200
|
+
const singularValues = svd.diagonal;
|
|
201
|
+
const sigmaMax = Math.max(...singularValues);
|
|
202
|
+
const sigmaMin = Math.min(...singularValues);
|
|
203
|
+
const condEst = sigmaMin > 0 ? sigmaMax / sigmaMin : Infinity;
|
|
204
|
+
const threshold = sigmaMax * Number.EPSILON * Math.max(matrix.rows, matrix.columns);
|
|
205
|
+
const rankEst = singularValues.filter((s) => s > threshold).length;
|
|
206
|
+
return { sigmaMax, sigmaMin, condEst, rankEst };
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
return undefined;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
function logAdjointDiagnostics(dcdx, rightHandSide, logger, message, level, attempt, regularization, error, svdDiagnostics) {
|
|
213
|
+
const matrixStats = computeMatrixDiagnostics(dcdx);
|
|
214
|
+
const rightHandSideStats = computeVectorDiagnostics(rightHandSide);
|
|
215
|
+
const details = [
|
|
216
|
+
{ key: 'attempt', value: attempt },
|
|
217
|
+
{ key: 'reg', value: regularization },
|
|
218
|
+
{ key: 'rows', value: dcdx.rows },
|
|
219
|
+
{ key: 'columns', value: dcdx.columns },
|
|
220
|
+
{ key: 'frobenius', value: matrixStats.frobenius },
|
|
221
|
+
{ key: 'max_abs', value: matrixStats.maxAbs },
|
|
222
|
+
{ key: 'min_abs', value: matrixStats.minAbs },
|
|
223
|
+
{ key: 'rhs_norm', value: rightHandSideStats.norm },
|
|
224
|
+
{ key: 'rhs_max_abs', value: rightHandSideStats.maxAbs }
|
|
225
|
+
];
|
|
226
|
+
const svd = svdDiagnostics ?? computeSvdDiagnostics(dcdx);
|
|
227
|
+
if (svd) {
|
|
228
|
+
details.push({ key: 'sigma_max', value: svd.sigmaMax }, { key: 'sigma_min', value: svd.sigmaMin }, { key: 'cond_est', value: svd.condEst }, { key: 'rank_est', value: svd.rankEst });
|
|
229
|
+
}
|
|
230
|
+
// For tiny systems, include raw entries to quickly spot zero/duplicate rows/cols.
|
|
231
|
+
if (dcdx.rows <= MAX_DIMENSION_FOR_DETAILED_LOGGING && dcdx.columns <= MAX_DIMENSION_FOR_DETAILED_LOGGING) {
|
|
232
|
+
for (let r = 0; r < dcdx.rows; r++) {
|
|
233
|
+
for (let c = 0; c < dcdx.columns; c++) {
|
|
234
|
+
details.push({ key: `A[${r},${c}]`, value: dcdx.get(r, c) });
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
if (error !== undefined) {
|
|
239
|
+
details.push({ key: 'error', value: String(error) });
|
|
240
|
+
}
|
|
241
|
+
// Filter out string values for logger (logger only accepts numbers)
|
|
242
|
+
const numericDetails = details.filter(d => typeof d.value === 'number');
|
|
243
|
+
if (level === 'warn') {
|
|
244
|
+
logger.warn('adjointGradientDescent', undefined, message, numericDetails);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
logger.debug('adjointGradientDescent', undefined, message, numericDetails);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Logs SVD diagnostics for small matrices to help diagnose numerical issues.
|
|
252
|
+
* Only computed for small matrices to avoid performance overhead.
|
|
253
|
+
*/
|
|
254
|
+
function logSvdDiagnosticsForSmallMatrix(matrix, logger) {
|
|
255
|
+
if (matrix.rows > MAX_MATRIX_DIMENSION_FOR_SVD_DIAGNOSTICS || matrix.columns > MAX_MATRIX_DIMENSION_FOR_SVD_DIAGNOSTICS) {
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
try {
|
|
259
|
+
const svd = new SingularValueDecomposition(matrix);
|
|
260
|
+
const singularValues = svd.diagonal;
|
|
261
|
+
const maxSingularValue = Math.max(...singularValues);
|
|
262
|
+
const minSingularValue = Math.min(...singularValues);
|
|
263
|
+
logger.debug('adjointGradientDescent', undefined, 'SVD diagnostics', [
|
|
264
|
+
{ key: 'sigma_max', value: maxSingularValue },
|
|
265
|
+
{ key: 'sigma_min', value: minSingularValue },
|
|
266
|
+
{ key: 'cond_est', value: minSingularValue > 0 ? maxSingularValue / minSingularValue : Infinity }
|
|
267
|
+
]);
|
|
268
|
+
}
|
|
269
|
+
catch (svdError) {
|
|
270
|
+
// Error message is string, so we log it separately without details
|
|
271
|
+
logger.debug('adjointGradientDescent', undefined, `SVD diagnostics failed: ${String(svdError)}`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// Override shared solver to add retry logic with exponentially increasing regularization.
|
|
275
|
+
// This handles ill-conditioned matrices that fail on first attempt but succeed with regularization.
|
|
276
|
+
function solveLeastSquares(A, b, logger) {
|
|
277
|
+
const baseRegularization = globalThis.__ADJOINT_REGULARIZATION__ ?? DEFAULT_REGULARIZATION;
|
|
278
|
+
let regularization = baseRegularization;
|
|
279
|
+
let lastError;
|
|
280
|
+
for (let attempt = 0; attempt < MAX_REGULARIZATION_RETRY_ATTEMPTS; attempt++) {
|
|
281
|
+
try {
|
|
282
|
+
return solveLeastSquaresShared(A, b, logger, 'adjointGradientDescent', regularization);
|
|
283
|
+
}
|
|
284
|
+
catch (error) {
|
|
285
|
+
lastError = error;
|
|
286
|
+
regularization = updateRegularizationWithMultiplier(regularization);
|
|
287
|
+
if (!logger) {
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
logger.warn('adjointGradientDescent', undefined, 'solveLeastSquares failed, retrying with higher regularization', [
|
|
291
|
+
{ key: 'attempt', value: attempt + 1 },
|
|
292
|
+
{ key: 'rows', value: A.rows },
|
|
293
|
+
{ key: 'columns', value: A.columns },
|
|
294
|
+
{ key: 'reg', value: regularization },
|
|
295
|
+
{ key: 'error', value: String(error) }
|
|
296
|
+
]);
|
|
297
|
+
logSvdDiagnosticsForSmallMatrix(A, logger);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
throw lastError instanceof Error ? lastError : new Error(String(lastError));
|
|
301
|
+
}
|
|
302
|
+
function solveAdjointEquation(dcdx, dfdx, logger) {
|
|
303
|
+
const baseRegularization = globalThis.__ADJOINT_REGULARIZATION__ ?? DEFAULT_REGULARIZATION;
|
|
304
|
+
let regularization = baseRegularization;
|
|
305
|
+
let lastError;
|
|
306
|
+
const svdDiagnostics = computeSvdDiagnostics(dcdx);
|
|
307
|
+
const warnAboutCondition = dcdx.rows === dcdx.columns &&
|
|
308
|
+
svdDiagnostics !== undefined &&
|
|
309
|
+
svdDiagnostics.condEst > CONDITION_WARNING_THRESHOLD;
|
|
310
|
+
// If Jacobian is numerically singular, seed a tiny regularization up front.
|
|
311
|
+
if (svdDiagnostics && (!isFinite(svdDiagnostics.condEst) || svdDiagnostics.rankEst === 0 || svdDiagnostics.sigmaMin === 0)) {
|
|
312
|
+
regularization = Math.max(regularization, AUTO_REGULARIZATION);
|
|
313
|
+
}
|
|
314
|
+
logAdjointDiagnostics(dcdx, dfdx, logger, warnAboutCondition
|
|
315
|
+
? 'Adjoint Jacobian appears ill-conditioned before solve'
|
|
316
|
+
: 'Adjoint Jacobian diagnostics before solve', warnAboutCondition ? 'warn' : 'debug', INITIAL_ATTEMPT_NUMBER, regularization, undefined, svdDiagnostics);
|
|
317
|
+
for (let attempt = 0; attempt < MAX_REGULARIZATION_RETRY_ATTEMPTS; attempt++) {
|
|
318
|
+
try {
|
|
319
|
+
return solveAdjointEquationShared(dcdx, dfdx, logger, 'adjointGradientDescent', regularization);
|
|
320
|
+
}
|
|
321
|
+
catch (error) {
|
|
322
|
+
lastError = error;
|
|
323
|
+
regularization = updateRegularizationWithMultiplier(regularization);
|
|
324
|
+
logAdjointDiagnostics(dcdx, dfdx, logger, 'solveAdjointEquation failed, retrying with higher regularization', 'warn', attempt + 1, regularization, error, svdDiagnostics);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
throw lastError instanceof Error ? lastError : new Error(String(lastError));
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Computes the adjoint gradient: df/dp = ∂f/∂p - λ^T ∂c/∂p
|
|
331
|
+
*/
|
|
332
|
+
function computeAdjointGradient(dfdp, lambda, dcdp) {
|
|
333
|
+
// Compute λ^T ∂c/∂p: this term represents how constraint violations affect the gradient.
|
|
334
|
+
// Matrix dimensions: λ is constraintCount × 1, λ^T is 1 × constraintCount,
|
|
335
|
+
// ∂c/∂p is constraintCount × parameterCount, so λ^T ∂c/∂p is 1 × parameterCount (row vector).
|
|
336
|
+
const lambdaMatrix = float64ArrayToMatrix(lambda);
|
|
337
|
+
const lambdaTranspose = lambdaMatrix.transpose();
|
|
338
|
+
const lambdaTdcdp = lambdaTranspose.mmul(dcdp);
|
|
339
|
+
// Convert row vector to Float64Array for vector subtraction.
|
|
340
|
+
// The matrix multiplication produces a 1 × parameterCount matrix, so we extract the first (and only) row.
|
|
341
|
+
const parameterCount = dfdp.length;
|
|
342
|
+
const lambdaTdcdpVector = new Float64Array(parameterCount);
|
|
343
|
+
for (let i = 0; i < parameterCount; i++) {
|
|
344
|
+
lambdaTdcdpVector[i] = lambdaTdcdp.get(0, i);
|
|
345
|
+
}
|
|
346
|
+
// df/dp = ∂f/∂p - λ^T ∂c/∂p
|
|
347
|
+
return subtractVectors(dfdp, lambdaTdcdpVector);
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Updates states using linear approximation: x_new = x_old + dx
|
|
351
|
+
* where dx solves (∂c/∂x) dx = -∂c/∂p · Δp
|
|
352
|
+
* Supports both square and non-square constraint Jacobians.
|
|
353
|
+
*/
|
|
354
|
+
function updateStates(currentStates, dcdx, dcdp, deltaP, logger) {
|
|
355
|
+
// Compute ∂c/∂p · Δp: this represents how parameter changes affect constraint values.
|
|
356
|
+
const deltaPMatrix = float64ArrayToMatrix(deltaP);
|
|
357
|
+
const dcdpDeltaP = dcdp.mmul(deltaPMatrix);
|
|
358
|
+
const dcdpDeltaPVector = matrixToFloat64Array(dcdpDeltaP);
|
|
359
|
+
// Solve: (∂c/∂x) dx = -∂c/∂p · Δp to find state update that maintains constraint satisfaction.
|
|
360
|
+
// The negative sign ensures states adjust to compensate for constraint changes from parameter updates.
|
|
361
|
+
const negativeDcdpDeltaP = scaleVector(dcdpDeltaPVector, NEGATIVE_COEFFICIENT);
|
|
362
|
+
const negativeDcdpDeltaPMatrix = float64ArrayToMatrix(negativeDcdpDeltaP);
|
|
363
|
+
// Use hierarchical solver that handles both square and non-square matrices
|
|
364
|
+
const dx = solveLeastSquares(dcdx, negativeDcdpDeltaPMatrix, logger);
|
|
365
|
+
// Update states using linear approximation to maintain constraint satisfaction.
|
|
366
|
+
// This is more efficient than solving the full nonlinear constraint system each iteration.
|
|
367
|
+
return addVectors(currentStates, dx);
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Creates a cost function wrapper for line search that updates states using linear approximation.
|
|
371
|
+
* Partial derivatives are pre-computed and cached to avoid recomputation during line search.
|
|
372
|
+
*/
|
|
373
|
+
function createCostFunctionWrapper(currentParameters, currentStates, costFunction, constraintFunction, options, logger, cachedPartials) {
|
|
374
|
+
// Pre-compute partial derivatives once if not provided
|
|
375
|
+
const partials = cachedPartials ?? computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
376
|
+
const { dcdx, dcdp } = partials;
|
|
377
|
+
return (params) => {
|
|
378
|
+
// Update states during line search to maintain constraint satisfaction.
|
|
379
|
+
// We use linear approximation (x_new = x_old + dx) where dx solves (∂c/∂x) dx = -∂c/∂p · Δp
|
|
380
|
+
// because solving the full nonlinear constraint system for each line search step would be too expensive.
|
|
381
|
+
const deltaP = subtractVectors(params, currentParameters);
|
|
382
|
+
const newStates = updateStates(currentStates, dcdx, dcdp, deltaP, logger);
|
|
383
|
+
return computeCost(costFunction, params, newStates);
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Checks if two parameter arrays are equal within floating point tolerance.
|
|
388
|
+
* Used to avoid redundant gradient computation when line search evaluates at the starting point.
|
|
389
|
+
*/
|
|
390
|
+
function areParametersEqual(parameters1, parameters2) {
|
|
391
|
+
if (parameters1.length !== parameters2.length) {
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
394
|
+
for (let i = 0; i < parameters1.length; i++) {
|
|
395
|
+
if (Math.abs(parameters1[i] - parameters2[i]) > FLOATING_POINT_EQUALITY_TOLERANCE) {
|
|
396
|
+
return false;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return true;
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Creates a gradient function wrapper for line search.
|
|
403
|
+
* For each trial parameter, updates states and computes gradient at that point.
|
|
404
|
+
* Uses pre-computed currentGradient for current point to ensure consistency.
|
|
405
|
+
*/
|
|
406
|
+
function createGradientFunctionWrapper(currentParameters, currentStates, currentGradient, costFunction, constraintFunction, options, logger, cachedPartials) {
|
|
407
|
+
// Pre-compute partial derivatives once for state updates
|
|
408
|
+
const currentPartials = cachedPartials ?? computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
409
|
+
const { dcdx: currentDcdx, dcdp: currentDcdp } = currentPartials;
|
|
410
|
+
return (trialParams) => {
|
|
411
|
+
// Return pre-computed gradient if parameters haven't changed to avoid redundant computation.
|
|
412
|
+
// Line search evaluates gradient at the starting point for direction derivative calculation.
|
|
413
|
+
if (areParametersEqual(trialParams, currentParameters)) {
|
|
414
|
+
return new Float64Array(currentGradient);
|
|
415
|
+
}
|
|
416
|
+
// For different trial parameters, update states to maintain constraints and compute gradient.
|
|
417
|
+
// We use linear approximation for efficiency: solving full nonlinear constraints for each trial would be too slow.
|
|
418
|
+
const deltaP = subtractVectors(trialParams, currentParameters);
|
|
419
|
+
const trialStates = updateStates(currentStates, currentDcdx, currentDcdp, deltaP, logger);
|
|
420
|
+
// Compute gradient at trial point to evaluate search direction quality in line search.
|
|
421
|
+
const trialPartials = computePartialDerivatives(trialParams, trialStates, costFunction, constraintFunction, options);
|
|
422
|
+
const lambda = solveAdjointEquation(trialPartials.dcdx, trialPartials.dfdx, logger);
|
|
423
|
+
return computeAdjointGradient(trialPartials.dfdp, lambda, trialPartials.dcdp);
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Determines the step size for gradient descent iteration.
|
|
428
|
+
*/
|
|
429
|
+
function determineStepSize(currentGradient, currentParameters, currentStates, costFunction, constraintFunction, useLineSearch, fixedStepSize, options, logger, cachedPartials) {
|
|
430
|
+
if (!useLineSearch || fixedStepSize !== undefined) {
|
|
431
|
+
return { stepSize: fixedStepSize ?? DEFAULT_STEP_SIZE, usedLineSearch: false };
|
|
432
|
+
}
|
|
433
|
+
// Pre-compute partial derivatives once and reuse in both wrappers
|
|
434
|
+
const partials = cachedPartials ?? computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
435
|
+
const costFnWrapper = createCostFunctionWrapper(currentParameters, currentStates, costFunction, constraintFunction, options, logger, { dcdx: partials.dcdx, dcdp: partials.dcdp });
|
|
436
|
+
const gradientFnWrapper = createGradientFunctionWrapper(currentParameters, currentStates, currentGradient, costFunction, constraintFunction, options, logger, partials);
|
|
437
|
+
const searchDirection = scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION);
|
|
438
|
+
const stepSize = backtrackingLineSearch(costFnWrapper, gradientFnWrapper, currentParameters, searchDirection);
|
|
439
|
+
return { stepSize, usedLineSearch: true };
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* Checks constraint violation and logs warning if needed.
|
|
443
|
+
*/
|
|
444
|
+
function checkConstraintViolation(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger) {
|
|
445
|
+
const constraint = constraintFunction(currentParameters, currentStates);
|
|
446
|
+
const constraintNorm = vectorNorm(constraint);
|
|
447
|
+
if (constraintNorm > constraintTolerance) {
|
|
448
|
+
logger.warn('adjointGradientDescent', iteration, 'Constraint violation detected', [
|
|
449
|
+
{ key: '||c(p,x)||:', value: constraintNorm },
|
|
450
|
+
{ key: 'Tolerance:', value: constraintTolerance }
|
|
451
|
+
]);
|
|
452
|
+
}
|
|
453
|
+
return { constraint, constraintNorm };
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Computes the adjoint gradient and its norm.
|
|
457
|
+
* Returns both the gradient and partial derivatives for reuse.
|
|
458
|
+
*/
|
|
459
|
+
function computeAdjointGradientAndNorm(currentParameters, currentStates, costFunction, constraintFunction, options, logger) {
|
|
460
|
+
const partials = computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
461
|
+
const lambda = solveAdjointEquation(partials.dcdx, partials.dfdx, logger);
|
|
462
|
+
const adjointGradient = computeAdjointGradient(partials.dfdp, lambda, partials.dcdp);
|
|
463
|
+
const gradientNorm = vectorNorm(adjointGradient);
|
|
464
|
+
return { adjointGradient, gradientNorm, partials };
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Checks gradient convergence and returns result if converged.
|
|
468
|
+
*/
|
|
469
|
+
function checkGradientConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, constraintNorm, constraintTolerance, tolerance, usedLineSearchFlag, logger) {
|
|
470
|
+
if (constraintNorm <= constraintTolerance && checkGradientConvergence(gradientNorm, tolerance, iteration)) {
|
|
471
|
+
logger.info('adjointGradientDescent', iteration, 'Converged', [
|
|
472
|
+
{ key: 'Cost:', value: currentCost },
|
|
473
|
+
{ key: 'Gradient norm:', value: gradientNorm },
|
|
474
|
+
{ key: 'Constraint norm:', value: constraintNorm }
|
|
475
|
+
]);
|
|
476
|
+
const result = createConvergenceResult(currentParameters, iteration, true, currentCost, gradientNorm);
|
|
477
|
+
return {
|
|
478
|
+
converged: true,
|
|
479
|
+
result: {
|
|
480
|
+
...result,
|
|
481
|
+
usedLineSearch: usedLineSearchFlag,
|
|
482
|
+
finalStates: currentStates,
|
|
483
|
+
finalConstraintNorm: constraintNorm
|
|
484
|
+
}
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
return { converged: false };
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Handles line search failure case.
|
|
491
|
+
*/
|
|
492
|
+
function handleLineSearchFailure(currentParameters, currentStates, iteration, currentCost, gradientNorm, constraintNorm, logger) {
|
|
493
|
+
logger.warn('adjointGradientDescent', iteration, 'Line search failed', [
|
|
494
|
+
{ key: 'Cost:', value: currentCost },
|
|
495
|
+
{ key: 'Gradient norm:', value: gradientNorm }
|
|
496
|
+
]);
|
|
497
|
+
return {
|
|
498
|
+
converged: true,
|
|
499
|
+
result: {
|
|
500
|
+
parameters: currentParameters,
|
|
501
|
+
iterations: iteration,
|
|
502
|
+
converged: false,
|
|
503
|
+
finalCost: currentCost,
|
|
504
|
+
finalGradientNorm: gradientNorm,
|
|
505
|
+
usedLineSearch: true,
|
|
506
|
+
finalStates: currentStates,
|
|
507
|
+
finalConstraintNorm: constraintNorm
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
/**
|
|
512
|
+
* Updates parameters and states, then computes new cost.
|
|
513
|
+
*/
|
|
514
|
+
function updateParametersAndStates(currentParameters, currentStates, adjointGradient, stepSize, partials, costFunction, logger) {
|
|
515
|
+
const negativeStepSize = NEGATIVE_GRADIENT_DIRECTION * stepSize;
|
|
516
|
+
const step = scaleVector(adjointGradient, negativeStepSize);
|
|
517
|
+
const newParameters = addVectors(currentParameters, step);
|
|
518
|
+
const deltaP = subtractVectors(newParameters, currentParameters);
|
|
519
|
+
const newStates = updateStates(currentStates, partials.dcdx, partials.dcdp, deltaP, logger);
|
|
520
|
+
const newCost = computeCost(costFunction, newParameters, newStates);
|
|
521
|
+
return { newParameters, newStates, newCost };
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Checks step size convergence and returns result if converged.
|
|
525
|
+
*/
|
|
526
|
+
function checkStepSizeConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, stepNorm, constraintNorm, constraintTolerance, tolerance, newUsedLineSearch, logger) {
|
|
527
|
+
if (constraintNorm <= constraintTolerance && checkStepSizeConvergence(stepNorm, tolerance, iteration)) {
|
|
528
|
+
logger.info('adjointGradientDescent', iteration, 'Converged', [
|
|
529
|
+
{ key: 'Cost:', value: currentCost },
|
|
530
|
+
{ key: 'Gradient norm:', value: gradientNorm },
|
|
531
|
+
{ key: 'Step size:', value: stepNorm }
|
|
532
|
+
]);
|
|
533
|
+
const result = createConvergenceResult(currentParameters, iteration, true, currentCost, gradientNorm);
|
|
534
|
+
return {
|
|
535
|
+
converged: true,
|
|
536
|
+
result: {
|
|
537
|
+
...result,
|
|
538
|
+
usedLineSearch: newUsedLineSearch,
|
|
539
|
+
finalStates: currentStates,
|
|
540
|
+
finalConstraintNorm: constraintNorm
|
|
541
|
+
}
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
return { converged: false };
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Creates detailed log information for progress logging.
|
|
548
|
+
*/
|
|
549
|
+
/**
|
|
550
|
+
* Adds array elements to log details with a prefix for readability.
|
|
551
|
+
* This helps create consistent logging format across different array types.
|
|
552
|
+
*/
|
|
553
|
+
function addArrayToLogDetails(details, array, prefix) {
|
|
554
|
+
for (let i = 0; i < array.length; i++) {
|
|
555
|
+
details.push({ key: `${prefix}[${i}]:`, value: array[i] });
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
function createProgressLogDetails(currentParameters, currentStates, constraint, currentCost, gradientNorm, stepSize, constraintNorm) {
|
|
559
|
+
const logDetails = [
|
|
560
|
+
{ key: 'Cost:', value: currentCost },
|
|
561
|
+
{ key: 'Gradient norm:', value: gradientNorm },
|
|
562
|
+
{ key: 'Step size:', value: stepSize },
|
|
563
|
+
{ key: 'Constraint norm:', value: constraintNorm }
|
|
564
|
+
];
|
|
565
|
+
// Add parameter and state information for small dimensions (for readability)
|
|
566
|
+
if (currentParameters.length <= MAX_DIMENSION_FOR_DETAILED_LOGGING && currentStates.length <= MAX_DIMENSION_FOR_DETAILED_LOGGING) {
|
|
567
|
+
addArrayToLogDetails(logDetails, currentParameters, 'p');
|
|
568
|
+
addArrayToLogDetails(logDetails, currentStates, 'x');
|
|
569
|
+
// Add constraint values for small dimensions
|
|
570
|
+
if (constraint.length <= MAX_DIMENSION_FOR_DETAILED_LOGGING) {
|
|
571
|
+
addArrayToLogDetails(logDetails, constraint, 'c');
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return logDetails;
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Handles callback and checks gradient convergence.
|
|
578
|
+
*/
|
|
579
|
+
function checkConvergenceAndHandleCallback(iteration, currentParameters, currentStates, currentCost, gradientNorm, constraintNorm, constraintTolerance, tolerance, usedLineSearchFlag, onIteration, logger) {
|
|
580
|
+
// Handle callback
|
|
581
|
+
if (onIteration) {
|
|
582
|
+
const callbackIteration = iteration;
|
|
583
|
+
onIteration(callbackIteration, currentCost, currentParameters);
|
|
584
|
+
}
|
|
585
|
+
// Check gradient convergence
|
|
586
|
+
const gradientConvergenceResult = checkGradientConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, constraintNorm, constraintTolerance, tolerance, usedLineSearchFlag, logger);
|
|
587
|
+
if (gradientConvergenceResult.converged && gradientConvergenceResult.result) {
|
|
588
|
+
return gradientConvergenceResult;
|
|
589
|
+
}
|
|
590
|
+
return { converged: false };
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Handles step size determination, parameter update, and step size convergence check.
|
|
594
|
+
*/
|
|
595
|
+
function handleStepSizeAndUpdate(adjointGradient, currentParameters, currentStates, constraint, currentCost, gradientNorm, constraintNorm, iteration, constraintTolerance, tolerance, costFunction, constraintFunction, useLineSearch, fixedStepSize, usedLineSearchFlag, partials, options, logger) {
|
|
596
|
+
// Determine step size (reuse pre-computed partials to avoid recomputation in line search)
|
|
597
|
+
const stepSizeResult = determineStepSize(adjointGradient, currentParameters, currentStates, costFunction, constraintFunction, useLineSearch, fixedStepSize, options, logger, partials);
|
|
598
|
+
if (stepSizeResult.stepSize === ZERO_STEP_SIZE) {
|
|
599
|
+
return handleLineSearchFailure(currentParameters, currentStates, iteration, currentCost, gradientNorm, constraintNorm, logger);
|
|
600
|
+
}
|
|
601
|
+
const newUsedLineSearch = usedLineSearchFlag || stepSizeResult.usedLineSearch;
|
|
602
|
+
// Update parameters and states, then compute new cost to evaluate the step quality.
|
|
603
|
+
// States are updated to maintain constraint satisfaction after parameter changes.
|
|
604
|
+
const { newParameters, newStates, newCost } = updateParametersAndStates(currentParameters, currentStates, adjointGradient, stepSizeResult.stepSize, partials, costFunction, logger);
|
|
605
|
+
// Check step size convergence to detect when optimization has stalled.
|
|
606
|
+
// Log progress to help diagnose convergence issues.
|
|
607
|
+
if (!newParameters) {
|
|
608
|
+
return {
|
|
609
|
+
converged: false,
|
|
610
|
+
newParameters,
|
|
611
|
+
newStates,
|
|
612
|
+
newCost,
|
|
613
|
+
newUsedLineSearch
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
const stepSizeConvergenceResult = checkStepSizeConvergenceAndLog(currentParameters, currentStates, constraint, currentCost, gradientNorm, stepSizeResult.stepSize, constraintNorm, iteration, constraintTolerance, tolerance, newUsedLineSearch, newParameters, logger);
|
|
617
|
+
if (stepSizeConvergenceResult.converged && stepSizeConvergenceResult.result) {
|
|
618
|
+
return stepSizeConvergenceResult;
|
|
619
|
+
}
|
|
620
|
+
return {
|
|
621
|
+
converged: false,
|
|
622
|
+
newParameters,
|
|
623
|
+
newStates,
|
|
624
|
+
newCost,
|
|
625
|
+
newUsedLineSearch
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
/**
|
|
629
|
+
* Checks step size convergence and logs progress.
|
|
630
|
+
*/
|
|
631
|
+
function checkStepSizeConvergenceAndLog(currentParameters, currentStates, constraint, currentCost, gradientNorm, stepSize, constraintNorm, iteration, constraintTolerance, tolerance, newUsedLineSearch, newParameters, logger) {
|
|
632
|
+
// Check step size convergence: if step is too small, optimization has likely converged or stalled.
|
|
633
|
+
const step = subtractVectors(newParameters, currentParameters);
|
|
634
|
+
const stepNorm = vectorNorm(step);
|
|
635
|
+
const stepSizeConvergenceResult = checkStepSizeConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, stepNorm, constraintNorm, constraintTolerance, tolerance, newUsedLineSearch, logger);
|
|
636
|
+
if (stepSizeConvergenceResult.converged && stepSizeConvergenceResult.result) {
|
|
637
|
+
return stepSizeConvergenceResult;
|
|
638
|
+
}
|
|
639
|
+
// Log progress with detailed information to help diagnose optimization behavior and convergence issues.
|
|
640
|
+
const logDetails = createProgressLogDetails(currentParameters, currentStates, constraint, currentCost, gradientNorm, stepSize, constraintNorm);
|
|
641
|
+
logger.debug('adjointGradientDescent', iteration, 'Progress', logDetails);
|
|
642
|
+
return { converged: false };
|
|
643
|
+
}
|
|
644
|
+
/**
|
|
645
|
+
* Performs a single adjoint gradient descent iteration.
|
|
646
|
+
*/
|
|
647
|
+
function performAdjointGradientDescentIteration(iteration, currentParameters, currentStates, currentCost, costFunction, constraintFunction, tolerance, useLineSearch, fixedStepSize, constraintTolerance, onIteration, logger, usedLineSearchFlag, options) {
|
|
648
|
+
// Check constraint satisfaction each iteration to detect and warn about constraint violations.
|
|
649
|
+
// This helps identify when the linear approximation for state updates is breaking down.
|
|
650
|
+
const { constraint, constraintNorm } = checkConstraintViolation(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger);
|
|
651
|
+
// Compute adjoint gradient and norm: the gradient drives parameter updates, norm indicates convergence.
|
|
652
|
+
const { adjointGradient, gradientNorm, partials } = computeAdjointGradientAndNorm(currentParameters, currentStates, costFunction, constraintFunction, options, logger);
|
|
653
|
+
// Handle callback to allow user monitoring, then check gradient convergence to detect optimality.
|
|
654
|
+
const convergenceResult = checkConvergenceAndHandleCallback(iteration, currentParameters, currentStates, currentCost, gradientNorm, constraintNorm, constraintTolerance, tolerance, usedLineSearchFlag, onIteration, logger);
|
|
655
|
+
if (convergenceResult.converged && convergenceResult.result) {
|
|
656
|
+
return convergenceResult;
|
|
657
|
+
}
|
|
658
|
+
// Handle step size determination and parameter update: this is the core optimization step.
|
|
659
|
+
const updateResult = handleStepSizeAndUpdate(adjointGradient, currentParameters, currentStates, constraint, currentCost, gradientNorm, constraintNorm, iteration, constraintTolerance, tolerance, costFunction, constraintFunction, useLineSearch, fixedStepSize, usedLineSearchFlag, partials, options, logger);
|
|
660
|
+
if (updateResult.converged && updateResult.result) {
|
|
661
|
+
return updateResult;
|
|
662
|
+
}
|
|
663
|
+
return updateResult;
|
|
664
|
+
}
|
|
665
|
+
/**
|
|
666
|
+
* Validates initial conditions including constraint satisfaction and dimensions.
|
|
667
|
+
*/
|
|
668
|
+
function validateInitialConditions(initialParameters, initialStates, constraintFunction, constraintTolerance, logger) {
|
|
669
|
+
const initialConstraint = constraintFunction(initialParameters, initialStates);
|
|
670
|
+
const initialConstraintNorm = vectorNorm(initialConstraint);
|
|
671
|
+
if (initialConstraintNorm > constraintTolerance) {
|
|
672
|
+
logger.warn('adjointGradientDescent', undefined, 'Initial constraint violation', [
|
|
673
|
+
{ key: '||c(p0,x0)||:', value: initialConstraintNorm },
|
|
674
|
+
{ key: 'Tolerance:', value: constraintTolerance }
|
|
675
|
+
]);
|
|
676
|
+
}
|
|
677
|
+
// Note: Constraint count and state count no longer need to match.
|
|
678
|
+
// The adjoint method now supports non-square constraint Jacobians.
|
|
679
|
+
}
|
|
680
|
+
/**
|
|
681
|
+
* Computes initial cost from initial parameters and states.
|
|
682
|
+
*/
|
|
683
|
+
function computeInitialCost(costFunction, initialParameters, initialStates) {
|
|
684
|
+
return computeCost(costFunction, initialParameters, initialStates);
|
|
685
|
+
}
|
|
686
|
+
/**
|
|
687
|
+
* Creates result for maximum iterations reached case.
|
|
688
|
+
*/
|
|
689
|
+
function createMaxIterationsResult(currentParameters, currentStates, currentCost, costFunction, constraintFunction, maxIterations, usedLineSearchFlag, options, logger) {
|
|
690
|
+
const partials = computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
691
|
+
const lambda = solveAdjointEquation(partials.dcdx, partials.dfdx, logger);
|
|
692
|
+
const finalGradient = computeAdjointGradient(partials.dfdp, lambda, partials.dcdp);
|
|
693
|
+
const finalGradientNorm = vectorNorm(finalGradient);
|
|
694
|
+
const finalConstraint = constraintFunction(currentParameters, currentStates);
|
|
695
|
+
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
696
|
+
logger.warn('adjointGradientDescent', undefined, 'Maximum iterations reached', [
|
|
697
|
+
{ key: 'Iterations:', value: maxIterations },
|
|
698
|
+
{ key: 'Final cost:', value: currentCost },
|
|
699
|
+
{ key: 'Final gradient norm:', value: finalGradientNorm },
|
|
700
|
+
{ key: 'Final constraint norm:', value: finalConstraintNorm }
|
|
701
|
+
]);
|
|
702
|
+
return {
|
|
703
|
+
parameters: currentParameters,
|
|
704
|
+
iterations: maxIterations,
|
|
705
|
+
converged: false,
|
|
706
|
+
finalCost: currentCost,
|
|
707
|
+
finalGradientNorm: finalGradientNorm,
|
|
708
|
+
usedLineSearch: usedLineSearchFlag,
|
|
709
|
+
finalStates: currentStates,
|
|
710
|
+
finalConstraintNorm: finalConstraintNorm
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* Performs adjoint gradient descent optimization to minimize a constrained cost function.
|
|
715
|
+
*
|
|
716
|
+
* Algorithm:
|
|
717
|
+
* 1. Start with initial parameters p0 and states x0 (satisfying c(p0, x0) = 0)
|
|
718
|
+
* 2. Compute partial derivatives ∂f/∂p, ∂f/∂x, ∂c/∂p, ∂c/∂x
|
|
719
|
+
* 3. Solve adjoint equation: (∂c/∂x)^T λ = (∂f/∂x)^T
|
|
720
|
+
* 4. Compute gradient: df/dp = ∂f/∂p - λ^T ∂c/∂p
|
|
721
|
+
* 5. Update parameters: p_new = p_old - stepSize * df/dp
|
|
722
|
+
* 6. Update states: x_new = x_old - (∂c/∂x)^-1 ∂c/∂p · Δp (linear approximation)
|
|
723
|
+
* 7. Repeat until convergence or max iterations
|
|
724
|
+
*
|
|
725
|
+
* Supports both cost functions f(p,x) and residual functions r(p,x) where f = 1/2 r^T r.
|
|
726
|
+
*
|
|
727
|
+
* @param initialParameters - Initial parameter vector p0
|
|
728
|
+
* @param initialStates - Initial state vector x0 (should satisfy c(p0, x0) = 0)
|
|
729
|
+
* @param costFunction - Cost function f(p, x) or residual function r(p, x)
|
|
730
|
+
* @param constraintFunction - Constraint function c(p, x) = 0
|
|
731
|
+
* @param options - Optimization options
|
|
732
|
+
* @returns Optimization result with final parameters, states, and constraint norm
|
|
733
|
+
*/
|
|
734
|
+
export function adjointGradientDescent(initialParameters, initialStates, costFunction, constraintFunction, options = {}) {
|
|
735
|
+
const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS;
|
|
736
|
+
const tolerance = options.tolerance ?? DEFAULT_TOLERANCE;
|
|
737
|
+
const stepSize = options.stepSize;
|
|
738
|
+
const useLineSearch = options.useLineSearch ?? DEFAULT_USE_LINE_SEARCH;
|
|
739
|
+
const constraintTolerance = options.constraintTolerance ?? DEFAULT_CONSTRAINT_TOLERANCE;
|
|
740
|
+
const onIteration = options.onIteration;
|
|
741
|
+
const logger = new Logger(options.logLevel, options.verbose);
|
|
742
|
+
validateInitialConditions(initialParameters, initialStates, constraintFunction, constraintTolerance, logger);
|
|
743
|
+
let currentParameters = new Float64Array(initialParameters);
|
|
744
|
+
let currentStates = new Float64Array(initialStates);
|
|
745
|
+
let currentCost = computeInitialCost(costFunction, currentParameters, currentStates);
|
|
746
|
+
let usedLineSearchFlag = false;
|
|
747
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
748
|
+
const iterationResult = performAdjointGradientDescentIteration(iteration, currentParameters, currentStates, currentCost, costFunction, constraintFunction, tolerance, useLineSearch, stepSize, constraintTolerance, onIteration, logger, usedLineSearchFlag, options);
|
|
749
|
+
if (iterationResult.converged && iterationResult.result) {
|
|
750
|
+
return iterationResult.result;
|
|
751
|
+
}
|
|
752
|
+
if (!iterationResult.newParameters || !iterationResult.newStates || iterationResult.newCost === undefined) {
|
|
753
|
+
continue;
|
|
754
|
+
}
|
|
755
|
+
currentParameters = new Float64Array(iterationResult.newParameters);
|
|
756
|
+
currentStates = new Float64Array(iterationResult.newStates);
|
|
757
|
+
currentCost = iterationResult.newCost;
|
|
758
|
+
if (iterationResult.newUsedLineSearch !== undefined) {
|
|
759
|
+
usedLineSearchFlag = iterationResult.newUsedLineSearch;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return createMaxIterationsResult(currentParameters, currentStates, currentCost, costFunction, constraintFunction, maxIterations, usedLineSearchFlag, options, logger);
|
|
763
|
+
}
|
|
764
|
+
//# sourceMappingURL=adjointGradientDescent.js.map
|