numopt-js 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -1
- package/dist/core/bfgs.d.ts +16 -0
- package/dist/core/bfgs.d.ts.map +1 -0
- package/dist/core/bfgs.js +167 -0
- package/dist/core/bfgs.js.map +1 -0
- package/dist/core/cmaEs.d.ts +17 -0
- package/dist/core/cmaEs.d.ts.map +1 -0
- package/dist/core/cmaEs.js +671 -0
- package/dist/core/cmaEs.js.map +1 -0
- package/dist/core/constrainedUtils.d.ts +5 -3
- package/dist/core/constrainedUtils.d.ts.map +1 -1
- package/dist/core/constrainedUtils.js +5 -3
- package/dist/core/constrainedUtils.js.map +1 -1
- package/dist/core/convergence.d.ts.map +1 -1
- package/dist/core/convergence.js +13 -6
- package/dist/core/convergence.js.map +1 -1
- package/dist/core/lbfgs.d.ts +17 -0
- package/dist/core/lbfgs.d.ts.map +1 -0
- package/dist/core/lbfgs.js +199 -0
- package/dist/core/lbfgs.js.map +1 -0
- package/dist/core/lineSearch.d.ts +15 -11
- package/dist/core/lineSearch.d.ts.map +1 -1
- package/dist/core/lineSearch.js +138 -15
- package/dist/core/lineSearch.js.map +1 -1
- package/dist/core/types.d.ts +215 -1
- package/dist/core/types.d.ts.map +1 -1
- package/dist/index.browser.js +1365 -111
- package/dist/index.browser.js.map +1 -1
- package/dist/index.cjs +1372 -112
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +6 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -2
- package/dist/index.js.map +1 -1
- package/dist/utils/random.d.ts +20 -0
- package/dist/utils/random.d.ts.map +1 -0
- package/dist/utils/random.js +71 -0
- package/dist/utils/random.js.map +1 -0
- package/dist/utils/resultFormatter.d.ts +11 -1
- package/dist/utils/resultFormatter.d.ts.map +1 -1
- package/dist/utils/resultFormatter.js +40 -0
- package/dist/utils/resultFormatter.js.map +1 -1
- package/package.json +3 -2
package/dist/index.cjs
CHANGED
|
@@ -59,7 +59,7 @@ function matrixToFloat64Array2D(matrix) {
|
|
|
59
59
|
* Computes the L2 (Euclidean) norm of a Float64Array vector.
|
|
60
60
|
* Used for convergence checks and gradient norm calculations.
|
|
61
61
|
*/
|
|
62
|
-
function vectorNorm(vector) {
|
|
62
|
+
function vectorNorm$1(vector) {
|
|
63
63
|
let sumOfSquares = 0.0;
|
|
64
64
|
for (let i = 0; i < vector.length; i++) {
|
|
65
65
|
const value = vector[i];
|
|
@@ -99,7 +99,7 @@ function addVectors(vectorA, vectorB) {
|
|
|
99
99
|
* Subtracts vectorB from vectorA element-wise.
|
|
100
100
|
* Returns a new Float64Array with the result.
|
|
101
101
|
*/
|
|
102
|
-
function subtractVectors(vectorA, vectorB) {
|
|
102
|
+
function subtractVectors$1(vectorA, vectorB) {
|
|
103
103
|
if (vectorA.length !== vectorB.length) {
|
|
104
104
|
throw new Error('Vectors must have the same length for subtraction');
|
|
105
105
|
}
|
|
@@ -131,20 +131,17 @@ function computeSumOfSquaredResiduals(residualNorm) {
|
|
|
131
131
|
}
|
|
132
132
|
|
|
133
133
|
/**
|
|
134
|
-
* This file implements line search algorithms for determining
|
|
135
|
-
* in optimization algorithms. The implementation follows the backtracking
|
|
136
|
-
* Armijo line search described in Nocedal & Wright, "Numerical Optimization"
|
|
137
|
-
* (2nd ed.), Algorithm 3.1.
|
|
134
|
+
* This file implements line search algorithms for determining step sizes.
|
|
138
135
|
*
|
|
139
136
|
* Role in system:
|
|
140
|
-
* -
|
|
141
|
-
* -
|
|
142
|
-
* -
|
|
137
|
+
* - Provides step size selection for gradient-based optimizers
|
|
138
|
+
* - Backtracking Armijo: simple and robust default (used by gradient descent)
|
|
139
|
+
* - Strong Wolfe: preferred for quasi-Newton methods (BFGS / L-BFGS) to help satisfy
|
|
140
|
+
* the curvature condition \(s^T y > 0\), improving Hessian approximation stability
|
|
143
141
|
*
|
|
144
142
|
* For first-time readers:
|
|
145
|
-
* - Start with backtrackingLineSearch
|
|
146
|
-
* -
|
|
147
|
-
* - Line search prevents overshooting the minimum
|
|
143
|
+
* - Start with `backtrackingLineSearch` (simpler)
|
|
144
|
+
* - Then read `strongWolfeLineSearch` and `zoom` (more subtle but more powerful)
|
|
148
145
|
*/
|
|
149
146
|
const DEFAULT_INITIAL_STEP_SIZE = 1.0;
|
|
150
147
|
const GRADIENT_NORM_THRESHOLD = 1e-10; // Threshold below which we use default step size to avoid numerical instability
|
|
@@ -152,8 +149,133 @@ const GRADIENT_NORM_THRESHOLD = 1e-10; // Threshold below which we use default s
|
|
|
152
149
|
const DEFAULT_CONTRACTION_FACTOR = 0.5;
|
|
153
150
|
const DEFAULT_ARMIJO_PARAMETER = 1e-4;
|
|
154
151
|
const DEFAULT_MAX_LINE_SEARCH_ITERATIONS = 50;
|
|
155
|
-
const INVALID_STEP_SIZE = 0.0; // Returned when search direction is not a descent direction
|
|
152
|
+
const INVALID_STEP_SIZE$2 = 0.0; // Returned when search direction is not a descent direction
|
|
156
153
|
const NON_DESCENT_DIRECTION_THRESHOLD = 0.0; // Threshold for directional derivative: >= 0 means not a descent direction
|
|
154
|
+
// Typical Strong Wolfe defaults (Nocedal & Wright, 2nd ed.)
|
|
155
|
+
const DEFAULT_WOLFE_C1 = 1e-4;
|
|
156
|
+
const DEFAULT_WOLFE_C2 = 0.9;
|
|
157
|
+
const DEFAULT_MAX_STRONG_WOLFE_ITERATIONS = 25;
|
|
158
|
+
const DEFAULT_MAX_ZOOM_ITERATIONS = 25;
|
|
159
|
+
const DEFAULT_STEP_SIZE_GROWTH_FACTOR = 2.0;
|
|
160
|
+
const DEFAULT_STRONG_WOLFE_INITIAL_STEP_SIZE = 1.0;
|
|
161
|
+
const MINIMUM_STEP_SIZE = 1e-20; // Prevents infinite loops when step size underflows
|
|
162
|
+
const MAXIMUM_STEP_SIZE = 1e20; // Prevents overflow in x + alpha * p
|
|
163
|
+
function clampStepSize(stepSize) {
|
|
164
|
+
if (!isFinite(stepSize))
|
|
165
|
+
return DEFAULT_INITIAL_STEP_SIZE;
|
|
166
|
+
if (stepSize < MINIMUM_STEP_SIZE)
|
|
167
|
+
return MINIMUM_STEP_SIZE;
|
|
168
|
+
if (stepSize > MAXIMUM_STEP_SIZE)
|
|
169
|
+
return MAXIMUM_STEP_SIZE;
|
|
170
|
+
return stepSize;
|
|
171
|
+
}
|
|
172
|
+
function computeTrialParameters(currentParameters, searchDirection, stepSize) {
|
|
173
|
+
const trialParameters = new Float64Array(currentParameters.length);
|
|
174
|
+
for (let index = 0; index < currentParameters.length; index++) {
|
|
175
|
+
trialParameters[index] = currentParameters[index] + stepSize * searchDirection[index];
|
|
176
|
+
}
|
|
177
|
+
return trialParameters;
|
|
178
|
+
}
|
|
179
|
+
function evaluateCostAndDirectionalDerivative(costFunction, gradientFunction, currentParameters, searchDirection, stepSize) {
|
|
180
|
+
const clampedStepSize = clampStepSize(stepSize);
|
|
181
|
+
const trialParameters = computeTrialParameters(currentParameters, searchDirection, clampedStepSize);
|
|
182
|
+
const trialCost = costFunction(trialParameters);
|
|
183
|
+
const trialGradient = gradientFunction(trialParameters);
|
|
184
|
+
const trialDirectionalDerivative = dotProduct(trialGradient, searchDirection);
|
|
185
|
+
return { stepSize: clampedStepSize, cost: trialCost, directionalDerivative: trialDirectionalDerivative };
|
|
186
|
+
}
|
|
187
|
+
function determineInitialStepSize(providedInitialStepSize, currentGradient) {
|
|
188
|
+
if (providedInitialStepSize !== undefined) {
|
|
189
|
+
return clampStepSize(providedInitialStepSize);
|
|
190
|
+
}
|
|
191
|
+
return DEFAULT_STRONG_WOLFE_INITIAL_STEP_SIZE;
|
|
192
|
+
}
|
|
193
|
+
function satisfiesArmijoCondition(trialCost, currentCost, wolfeC1, stepSize, directionalDerivativeAtZero) {
|
|
194
|
+
const armijoThreshold = currentCost + wolfeC1 * stepSize * directionalDerivativeAtZero;
|
|
195
|
+
return trialCost <= armijoThreshold;
|
|
196
|
+
}
|
|
197
|
+
function satisfiesStrongWolfeCurvatureCondition(trialDirectionalDerivative, wolfeC2, directionalDerivativeAtZero) {
|
|
198
|
+
const leftSide = Math.abs(trialDirectionalDerivative);
|
|
199
|
+
const rightSide = wolfeC2 * Math.abs(directionalDerivativeAtZero);
|
|
200
|
+
return leftSide <= rightSide;
|
|
201
|
+
}
|
|
202
|
+
function computeBisectionStepSize(stepSizeLow, stepSizeHigh) {
|
|
203
|
+
return 0.5 * (stepSizeLow + stepSizeHigh);
|
|
204
|
+
}
|
|
205
|
+
function zoom(costFunction, gradientFunction, currentParameters, searchDirection, currentCost, directionalDerivativeAtZero, wolfeC1, wolfeC2, stepSizeLowInitial, stepSizeHighInitial, costAtStepSizeLowInitial, maxZoomIterations) {
|
|
206
|
+
let stepSizeLow = stepSizeLowInitial;
|
|
207
|
+
let stepSizeHigh = stepSizeHighInitial;
|
|
208
|
+
let costAtStepSizeLow = costAtStepSizeLowInitial;
|
|
209
|
+
for (let zoomIteration = 0; zoomIteration < maxZoomIterations; zoomIteration++) {
|
|
210
|
+
const trialStepSize = computeBisectionStepSize(stepSizeLow, stepSizeHigh);
|
|
211
|
+
const evaluation = evaluateCostAndDirectionalDerivative(costFunction, gradientFunction, currentParameters, searchDirection, trialStepSize);
|
|
212
|
+
if (!satisfiesArmijoCondition(evaluation.cost, currentCost, wolfeC1, evaluation.stepSize, directionalDerivativeAtZero)) {
|
|
213
|
+
stepSizeHigh = evaluation.stepSize;
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
if (evaluation.cost >= costAtStepSizeLow) {
|
|
217
|
+
stepSizeHigh = evaluation.stepSize;
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
if (satisfiesStrongWolfeCurvatureCondition(evaluation.directionalDerivative, wolfeC2, directionalDerivativeAtZero)) {
|
|
221
|
+
return evaluation.stepSize;
|
|
222
|
+
}
|
|
223
|
+
const bracketWidth = stepSizeHigh - stepSizeLow;
|
|
224
|
+
const shouldSwapBracketSide = evaluation.directionalDerivative * bracketWidth >= 0.0;
|
|
225
|
+
if (shouldSwapBracketSide) {
|
|
226
|
+
stepSizeHigh = stepSizeLow;
|
|
227
|
+
}
|
|
228
|
+
stepSizeLow = evaluation.stepSize;
|
|
229
|
+
costAtStepSizeLow = evaluation.cost;
|
|
230
|
+
}
|
|
231
|
+
// If zoom fails to find a point satisfying Strong Wolfe, return the best-known lower bound.
|
|
232
|
+
return clampStepSize(stepSizeLow);
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Strong Wolfe line search (Nocedal & Wright, 2nd ed., Algorithm 3.5).
|
|
236
|
+
*
|
|
237
|
+
* WHY: For quasi-Newton methods, satisfying the curvature condition improves the chance that
|
|
238
|
+
* the update will maintain a stable approximation (e.g., positive definiteness).
|
|
239
|
+
*/
|
|
240
|
+
function strongWolfeLineSearch(costFunction, gradientFunction, currentParameters, searchDirection, options = {}) {
|
|
241
|
+
const wolfeC1 = options.wolfeC1 ?? DEFAULT_WOLFE_C1;
|
|
242
|
+
const wolfeC2 = options.wolfeC2 ?? DEFAULT_WOLFE_C2;
|
|
243
|
+
const maxIterations = options.maxIterations ?? DEFAULT_MAX_STRONG_WOLFE_ITERATIONS;
|
|
244
|
+
const maxZoomIterations = options.maxZoomIterations ?? DEFAULT_MAX_ZOOM_ITERATIONS;
|
|
245
|
+
const stepSizeGrowthFactor = options.stepSizeGrowthFactor ?? DEFAULT_STEP_SIZE_GROWTH_FACTOR;
|
|
246
|
+
const currentCost = costFunction(currentParameters);
|
|
247
|
+
const currentGradient = gradientFunction(currentParameters);
|
|
248
|
+
const directionalDerivativeAtZero = dotProduct(currentGradient, searchDirection);
|
|
249
|
+
// Strong Wolfe requires a descent direction (phi'(0) < 0). Otherwise, a line search is ill-posed.
|
|
250
|
+
if (directionalDerivativeAtZero >= NON_DESCENT_DIRECTION_THRESHOLD) {
|
|
251
|
+
return INVALID_STEP_SIZE$2;
|
|
252
|
+
}
|
|
253
|
+
let previousStepSize = 0.0;
|
|
254
|
+
let previousCost = currentCost;
|
|
255
|
+
let stepSize = determineInitialStepSize(options.initialStepSize);
|
|
256
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
257
|
+
const evaluation = evaluateCostAndDirectionalDerivative(costFunction, gradientFunction, currentParameters, searchDirection, stepSize);
|
|
258
|
+
const violatesArmijo = !satisfiesArmijoCondition(evaluation.cost, currentCost, wolfeC1, evaluation.stepSize, directionalDerivativeAtZero);
|
|
259
|
+
const isNotImprovingEnough = iteration > 0 && evaluation.cost >= previousCost;
|
|
260
|
+
if (violatesArmijo || isNotImprovingEnough) {
|
|
261
|
+
return zoom(costFunction, gradientFunction, currentParameters, searchDirection, currentCost, directionalDerivativeAtZero, wolfeC1, wolfeC2, previousStepSize, evaluation.stepSize, previousCost, maxZoomIterations);
|
|
262
|
+
}
|
|
263
|
+
if (satisfiesStrongWolfeCurvatureCondition(evaluation.directionalDerivative, wolfeC2, directionalDerivativeAtZero)) {
|
|
264
|
+
return evaluation.stepSize;
|
|
265
|
+
}
|
|
266
|
+
if (evaluation.directionalDerivative >= 0.0) {
|
|
267
|
+
const stepSizeLow = Math.min(previousStepSize, evaluation.stepSize);
|
|
268
|
+
const stepSizeHigh = Math.max(previousStepSize, evaluation.stepSize);
|
|
269
|
+
const costAtStepSizeLow = stepSizeLow === previousStepSize ? previousCost : evaluation.cost;
|
|
270
|
+
return zoom(costFunction, gradientFunction, currentParameters, searchDirection, currentCost, directionalDerivativeAtZero, wolfeC1, wolfeC2, stepSizeLow, stepSizeHigh, costAtStepSizeLow, maxZoomIterations);
|
|
271
|
+
}
|
|
272
|
+
previousStepSize = evaluation.stepSize;
|
|
273
|
+
previousCost = evaluation.cost;
|
|
274
|
+
stepSize = clampStepSize(evaluation.stepSize * stepSizeGrowthFactor);
|
|
275
|
+
}
|
|
276
|
+
// If we couldn't satisfy Strong Wolfe within the iteration limit, return the last tried step size.
|
|
277
|
+
return clampStepSize(stepSize);
|
|
278
|
+
}
|
|
157
279
|
/**
|
|
158
280
|
* Performs backtracking line search to find a step size that satisfies
|
|
159
281
|
* the Armijo condition (sufficient decrease). This follows the textbook
|
|
@@ -192,7 +314,7 @@ function backtrackingLineSearch(costFunction, gradientFunction, currentParameter
|
|
|
192
314
|
else {
|
|
193
315
|
// Scale initial step size by gradient norm: 1.0 / ||gradient||
|
|
194
316
|
// This prevents steps from being too large when gradients are large
|
|
195
|
-
const gradientNorm = vectorNorm(currentGradient);
|
|
317
|
+
const gradientNorm = vectorNorm$1(currentGradient);
|
|
196
318
|
// Handle edge cases: very small or zero gradient norm
|
|
197
319
|
if (gradientNorm < GRADIENT_NORM_THRESHOLD) {
|
|
198
320
|
initialStepSize = DEFAULT_INITIAL_STEP_SIZE;
|
|
@@ -206,14 +328,11 @@ function backtrackingLineSearch(costFunction, gradientFunction, currentParameter
|
|
|
206
328
|
}
|
|
207
329
|
}
|
|
208
330
|
// Compute directional derivative: ∇f(x)^T * d
|
|
209
|
-
|
|
210
|
-
for (let i = 0; i < currentGradient.length; i++) {
|
|
211
|
-
directionalDerivative += currentGradient[i] * searchDirection[i];
|
|
212
|
-
}
|
|
331
|
+
const directionalDerivative = dotProduct(currentGradient, searchDirection);
|
|
213
332
|
// Early return if search direction is not a descent direction
|
|
214
333
|
// Directional derivative >= 0 means moving in this direction increases the cost
|
|
215
334
|
if (directionalDerivative >= NON_DESCENT_DIRECTION_THRESHOLD) {
|
|
216
|
-
return INVALID_STEP_SIZE;
|
|
335
|
+
return INVALID_STEP_SIZE$2;
|
|
217
336
|
}
|
|
218
337
|
let stepSize = initialStepSize;
|
|
219
338
|
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
@@ -267,24 +386,21 @@ function createConvergenceResult(finalParameters, iteration, converged, finalCos
|
|
|
267
386
|
* Returns true if gradient is small enough (algorithm has found a stationary point).
|
|
268
387
|
*/
|
|
269
388
|
function checkGradientConvergence(gradientNorm, tolerance, iteration) {
|
|
270
|
-
|
|
271
|
-
return iteration > 0 && gradientNorm < tolerance;
|
|
389
|
+
return gradientNorm < tolerance;
|
|
272
390
|
}
|
|
273
391
|
/**
|
|
274
392
|
* Checks if step size indicates convergence.
|
|
275
393
|
* Returns true if step is small enough (algorithm is making minimal progress).
|
|
276
394
|
*/
|
|
277
395
|
function checkStepSizeConvergence(stepNorm, tolerance, iteration) {
|
|
278
|
-
|
|
279
|
-
return iteration > 0 && stepNorm < tolerance;
|
|
396
|
+
return stepNorm < tolerance;
|
|
280
397
|
}
|
|
281
398
|
/**
|
|
282
399
|
* Checks if residual norm indicates convergence.
|
|
283
400
|
* Returns true if residual is small enough (problem is solved to desired accuracy).
|
|
284
401
|
*/
|
|
285
402
|
function checkResidualConvergence(residualNorm, tolerance, iteration) {
|
|
286
|
-
|
|
287
|
-
return iteration > 0 && residualNorm < tolerance;
|
|
403
|
+
return residualNorm < tolerance;
|
|
288
404
|
}
|
|
289
405
|
|
|
290
406
|
/**
|
|
@@ -519,12 +635,12 @@ class Logger {
|
|
|
519
635
|
* - Understand how it uses line search or fixed step size
|
|
520
636
|
* - Check convergence criteria implementation
|
|
521
637
|
*/
|
|
522
|
-
const DEFAULT_MAX_ITERATIONS$
|
|
523
|
-
const DEFAULT_TOLERANCE$
|
|
638
|
+
const DEFAULT_MAX_ITERATIONS$8 = 1000;
|
|
639
|
+
const DEFAULT_TOLERANCE$5 = 1e-6;
|
|
524
640
|
const DEFAULT_STEP_SIZE$2 = 0.01;
|
|
525
|
-
const DEFAULT_USE_LINE_SEARCH$
|
|
641
|
+
const DEFAULT_USE_LINE_SEARCH$3 = true;
|
|
526
642
|
const ZERO_STEP_SIZE$1 = 0.0; // Indicates line search found no valid step (not a descent direction)
|
|
527
|
-
const NEGATIVE_GRADIENT_DIRECTION$
|
|
643
|
+
const NEGATIVE_GRADIENT_DIRECTION$3 = -1; // Multiplier for negative gradient direction (steepest descent)
|
|
528
644
|
/**
|
|
529
645
|
* Determines the step size for gradient descent iteration.
|
|
530
646
|
* Uses line search if enabled, otherwise uses fixed step size.
|
|
@@ -536,7 +652,7 @@ function determineStepSize$1(currentGradient, currentParameters, costFunction, g
|
|
|
536
652
|
return { stepSize: fixedStepSize ?? DEFAULT_STEP_SIZE$2, usedLineSearch: false };
|
|
537
653
|
}
|
|
538
654
|
// Use line search when enabled and no fixed step size provided
|
|
539
|
-
const searchDirection = scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION$
|
|
655
|
+
const searchDirection = scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION$3);
|
|
540
656
|
// Backtracking line search with Armijo condition (Boyd & Vandenberghe, Sec. 9.3)
|
|
541
657
|
// to choose a step satisfying sufficient decrease (Nocedal & Wright, Ch. 2)
|
|
542
658
|
const stepSize = backtrackingLineSearch(costFunction, gradientFunction, currentParameters, searchDirection);
|
|
@@ -547,7 +663,7 @@ function determineStepSize$1(currentGradient, currentParameters, costFunction, g
|
|
|
547
663
|
* Returns the new parameters and the step vector.
|
|
548
664
|
*/
|
|
549
665
|
function updateParametersWithGradientStep(currentParameters, currentGradient, stepSize) {
|
|
550
|
-
const negativeStepSize = NEGATIVE_GRADIENT_DIRECTION$
|
|
666
|
+
const negativeStepSize = NEGATIVE_GRADIENT_DIRECTION$3 * stepSize;
|
|
551
667
|
const step = scaleVector(currentGradient, negativeStepSize);
|
|
552
668
|
const newParameters = addVectors(currentParameters, step);
|
|
553
669
|
return { newParameters, step };
|
|
@@ -557,7 +673,7 @@ function updateParametersWithGradientStep(currentParameters, currentGradient, st
|
|
|
557
673
|
* Early return pattern to reduce nesting.
|
|
558
674
|
*/
|
|
559
675
|
function checkGradientConvergenceAndReturn$1(currentParameters, iteration, currentCost, gradientNorm, tolerance, usedLineSearchFlag, logger) {
|
|
560
|
-
if (checkGradientConvergence(gradientNorm, tolerance
|
|
676
|
+
if (checkGradientConvergence(gradientNorm, tolerance)) {
|
|
561
677
|
logger.info('gradientDescent', iteration, 'Converged', [
|
|
562
678
|
{ key: 'Cost:', value: currentCost },
|
|
563
679
|
{ key: 'Gradient norm:', value: gradientNorm }
|
|
@@ -571,7 +687,7 @@ function checkGradientConvergenceAndReturn$1(currentParameters, iteration, curre
|
|
|
571
687
|
* Handles line search failure case.
|
|
572
688
|
* Returns convergence result indicating failure.
|
|
573
689
|
*/
|
|
574
|
-
function handleLineSearchFailure$
|
|
690
|
+
function handleLineSearchFailure$3(currentParameters, iteration, currentCost, gradientNorm, logger) {
|
|
575
691
|
logger.warn('gradientDescent', iteration, 'Line search failed', [
|
|
576
692
|
{ key: 'Cost:', value: currentCost },
|
|
577
693
|
{ key: 'Gradient norm:', value: gradientNorm }
|
|
@@ -594,7 +710,7 @@ function handleLineSearchFailure$1(currentParameters, iteration, currentCost, gr
|
|
|
594
710
|
* Early return pattern to reduce nesting.
|
|
595
711
|
*/
|
|
596
712
|
function checkStepSizeConvergenceAndReturn$1(currentParameters, iteration, currentCost, gradientNorm, stepNorm, tolerance, newUsedLineSearch, logger) {
|
|
597
|
-
if (checkStepSizeConvergence(stepNorm, tolerance
|
|
713
|
+
if (checkStepSizeConvergence(stepNorm, tolerance)) {
|
|
598
714
|
logger.info('gradientDescent', iteration, 'Converged', [
|
|
599
715
|
{ key: 'Cost:', value: currentCost },
|
|
600
716
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -611,7 +727,7 @@ function checkStepSizeConvergenceAndReturn$1(currentParameters, iteration, curre
|
|
|
611
727
|
*/
|
|
612
728
|
function performGradientDescentIteration(iteration, currentParameters, currentCost, costFunction, gradientFunction, tolerance, useLineSearch, fixedStepSize, onIteration, logger, usedLineSearchFlag) {
|
|
613
729
|
const currentGradient = gradientFunction(currentParameters);
|
|
614
|
-
const gradientNorm = vectorNorm(currentGradient); // Uses Euclidean norm for steepest descent direction (Nocedal & Wright, Ch. 2)
|
|
730
|
+
const gradientNorm = vectorNorm$1(currentGradient); // Uses Euclidean norm for steepest descent direction (Nocedal & Wright, Ch. 2)
|
|
615
731
|
// Handle callback (different behavior for first iteration)
|
|
616
732
|
if (onIteration) {
|
|
617
733
|
const callbackIteration = iteration === 0 ? 0 : iteration;
|
|
@@ -626,7 +742,7 @@ function performGradientDescentIteration(iteration, currentParameters, currentCo
|
|
|
626
742
|
const stepSizeResult = determineStepSize$1(currentGradient, currentParameters, costFunction, gradientFunction, useLineSearch, fixedStepSize);
|
|
627
743
|
// Early return: line search failed
|
|
628
744
|
if (stepSizeResult.stepSize === ZERO_STEP_SIZE$1) {
|
|
629
|
-
const failureResult = handleLineSearchFailure$
|
|
745
|
+
const failureResult = handleLineSearchFailure$3(currentParameters, iteration, currentCost, gradientNorm, logger);
|
|
630
746
|
return failureResult;
|
|
631
747
|
}
|
|
632
748
|
const newUsedLineSearch = usedLineSearchFlag || stepSizeResult.usedLineSearch;
|
|
@@ -634,7 +750,7 @@ function performGradientDescentIteration(iteration, currentParameters, currentCo
|
|
|
634
750
|
const { newParameters, step } = updateParametersWithGradientStep(currentParameters, currentGradient, stepSizeResult.stepSize);
|
|
635
751
|
const newCost = costFunction(newParameters);
|
|
636
752
|
// Check step size convergence - early return
|
|
637
|
-
const stepNorm = vectorNorm(step); // Step length via 2-norm for step-size convergence (Boyd & Vandenberghe, Sec. 9.3)
|
|
753
|
+
const stepNorm = vectorNorm$1(step); // Step length via 2-norm for step-size convergence (Boyd & Vandenberghe, Sec. 9.3)
|
|
638
754
|
const stepSizeConvergenceResult = checkStepSizeConvergenceAndReturn$1(currentParameters, iteration, currentCost, gradientNorm, stepNorm, tolerance, newUsedLineSearch, logger);
|
|
639
755
|
if (stepSizeConvergenceResult.converged && stepSizeConvergenceResult.result) {
|
|
640
756
|
return { converged: true, result: stepSizeConvergenceResult.result };
|
|
@@ -663,10 +779,10 @@ function performGradientDescentIteration(iteration, currentParameters, currentCo
|
|
|
663
779
|
* - Maximum iterations reached
|
|
664
780
|
*/
|
|
665
781
|
function gradientDescent(initialParameters, costFunction, gradientFunction, options = {}) {
|
|
666
|
-
const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS$
|
|
667
|
-
const tolerance = options.tolerance ?? DEFAULT_TOLERANCE$
|
|
782
|
+
const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS$8;
|
|
783
|
+
const tolerance = options.tolerance ?? DEFAULT_TOLERANCE$5;
|
|
668
784
|
const stepSize = options.stepSize;
|
|
669
|
-
const useLineSearch = options.useLineSearch ?? DEFAULT_USE_LINE_SEARCH$
|
|
785
|
+
const useLineSearch = options.useLineSearch ?? DEFAULT_USE_LINE_SEARCH$3;
|
|
670
786
|
const onIteration = options.onIteration;
|
|
671
787
|
const logger = new Logger(options.logLevel, options.verbose);
|
|
672
788
|
let currentParameters = new Float64Array(initialParameters);
|
|
@@ -687,7 +803,7 @@ function gradientDescent(initialParameters, costFunction, gradientFunction, opti
|
|
|
687
803
|
}
|
|
688
804
|
// Maximum iterations reached
|
|
689
805
|
const finalGradient = gradientFunction(currentParameters);
|
|
690
|
-
const finalGradientNorm = vectorNorm(finalGradient);
|
|
806
|
+
const finalGradientNorm = vectorNorm$1(finalGradient);
|
|
691
807
|
logger.warn('gradientDescent', undefined, 'Maximum iterations reached', [
|
|
692
808
|
{ key: 'Iterations:', value: maxIterations },
|
|
693
809
|
{ key: 'Final cost:', value: currentCost },
|
|
@@ -1116,8 +1232,8 @@ function computeJacobianMatrix(jacobianFunction, residualFunction, parameters, u
|
|
|
1116
1232
|
* - Understand how it solves normal equations: (J^T J) δ = -J^T r
|
|
1117
1233
|
* - This is a special case of Newton's method for least squares
|
|
1118
1234
|
*/
|
|
1119
|
-
const DEFAULT_MAX_ITERATIONS$
|
|
1120
|
-
const DEFAULT_TOLERANCE$
|
|
1235
|
+
const DEFAULT_MAX_ITERATIONS$7 = 1000;
|
|
1236
|
+
const DEFAULT_TOLERANCE$4 = 1e-6;
|
|
1121
1237
|
const DEFAULT_USE_NUMERIC_JACOBIAN$1 = true;
|
|
1122
1238
|
const DEFAULT_JACOBIAN_STEP$1 = 1e-6;
|
|
1123
1239
|
const NEGATIVE_COEFFICIENT$5 = -1; // Coefficient for negative right-hand side in normal equations: (J^T J) δ = -J^T r
|
|
@@ -1137,8 +1253,8 @@ const NEGATIVE_COEFFICIENT$5 = -1; // Coefficient for negative right-hand side i
|
|
|
1137
1253
|
function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
1138
1254
|
const actualOptions = options;
|
|
1139
1255
|
const jacobianFunction = actualOptions.jacobian;
|
|
1140
|
-
const maxIterations = actualOptions.maxIterations ?? DEFAULT_MAX_ITERATIONS$
|
|
1141
|
-
const tolerance = actualOptions.tolerance ?? DEFAULT_TOLERANCE$
|
|
1256
|
+
const maxIterations = actualOptions.maxIterations ?? DEFAULT_MAX_ITERATIONS$7;
|
|
1257
|
+
const tolerance = actualOptions.tolerance ?? DEFAULT_TOLERANCE$4;
|
|
1142
1258
|
const useNumericJacobian = actualOptions.useNumericJacobian ?? DEFAULT_USE_NUMERIC_JACOBIAN$1;
|
|
1143
1259
|
const jacobianStep = actualOptions.jacobianStep ?? DEFAULT_JACOBIAN_STEP$1;
|
|
1144
1260
|
const onIteration = actualOptions.onIteration;
|
|
@@ -1147,7 +1263,7 @@ function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
|
1147
1263
|
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
1148
1264
|
// Compute residual vector
|
|
1149
1265
|
const residual = residualFunction(currentParameters);
|
|
1150
|
-
const residualNorm = vectorNorm(residual);
|
|
1266
|
+
const residualNorm = vectorNorm$1(residual);
|
|
1151
1267
|
const cost = computeSumOfSquaredResiduals(residualNorm);
|
|
1152
1268
|
// Call progress callback if provided
|
|
1153
1269
|
if (onIteration) {
|
|
@@ -1195,8 +1311,8 @@ function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
|
1195
1311
|
return { ...result, finalResidualNorm: residualNorm };
|
|
1196
1312
|
}
|
|
1197
1313
|
// Check convergence: step size is small enough
|
|
1198
|
-
const stepNorm = vectorNorm(step);
|
|
1199
|
-
if (checkStepSizeConvergence(stepNorm, tolerance
|
|
1314
|
+
const stepNorm = vectorNorm$1(step);
|
|
1315
|
+
if (checkStepSizeConvergence(stepNorm, tolerance)) {
|
|
1200
1316
|
logger.info('gaussNewton', iteration, 'Converged', [
|
|
1201
1317
|
{ key: 'Cost:', value: cost },
|
|
1202
1318
|
{ key: 'Residual norm:', value: residualNorm },
|
|
@@ -1212,10 +1328,10 @@ function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
|
1212
1328
|
}
|
|
1213
1329
|
// Compute residual for new parameters
|
|
1214
1330
|
const newResidual = residualFunction(newParameters);
|
|
1215
|
-
const newResidualNorm = vectorNorm(newResidual);
|
|
1331
|
+
const newResidualNorm = vectorNorm$1(newResidual);
|
|
1216
1332
|
const newCost = computeSumOfSquaredResiduals(newResidualNorm);
|
|
1217
1333
|
// Check convergence: residual norm is small enough
|
|
1218
|
-
if (checkResidualConvergence(newResidualNorm, tolerance
|
|
1334
|
+
if (checkResidualConvergence(newResidualNorm, tolerance)) {
|
|
1219
1335
|
logger.info('gaussNewton', iteration, 'Converged', [
|
|
1220
1336
|
{ key: 'Cost:', value: newCost },
|
|
1221
1337
|
{ key: 'Residual norm:', value: newResidualNorm }
|
|
@@ -1232,7 +1348,7 @@ function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
|
1232
1348
|
}
|
|
1233
1349
|
// Maximum iterations reached
|
|
1234
1350
|
const finalResidual = residualFunction(currentParameters);
|
|
1235
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
1351
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
1236
1352
|
const finalCost = computeSumOfSquaredResiduals(finalResidualNorm);
|
|
1237
1353
|
logger.warn('gaussNewton', undefined, 'Maximum iterations reached', [
|
|
1238
1354
|
{ key: 'Iterations:', value: maxIterations },
|
|
@@ -1267,7 +1383,7 @@ function gaussNewton(initialParameters, residualFunction, options = {}) {
|
|
|
1267
1383
|
* - Check convergence criteria implementation
|
|
1268
1384
|
* - Debug features (callbacks, verbose logging) are top priority
|
|
1269
1385
|
*/
|
|
1270
|
-
const DEFAULT_MAX_ITERATIONS$
|
|
1386
|
+
const DEFAULT_MAX_ITERATIONS$6 = 1000;
|
|
1271
1387
|
const DEFAULT_LAMBDA_INITIAL$1 = 1e-3;
|
|
1272
1388
|
const DEFAULT_LAMBDA_FACTOR$1 = 10.0;
|
|
1273
1389
|
const DEFAULT_TOL_GRADIENT$1 = 1e-6;
|
|
@@ -1341,7 +1457,7 @@ function tryLevenbergMarquardtStep(jtj, jtr, currentParameters, currentLambda, l
|
|
|
1341
1457
|
stepMatrix = mlMatrix.solve(dampedHessian, negativeJtr);
|
|
1342
1458
|
}
|
|
1343
1459
|
const step = matrixToFloat64Array(stepMatrix);
|
|
1344
|
-
const stepNorm = vectorNorm(step);
|
|
1460
|
+
const stepNorm = vectorNorm$1(step);
|
|
1345
1461
|
// Check step size convergence (termination test suggested in Lourakis 2005, Section 5)
|
|
1346
1462
|
if (checkStepSizeConvergence(stepNorm, tolStep, iteration)) {
|
|
1347
1463
|
return { stepAccepted: false, newLambda: currentLambda, stepNorm };
|
|
@@ -1352,7 +1468,7 @@ function tryLevenbergMarquardtStep(jtj, jtr, currentParameters, currentLambda, l
|
|
|
1352
1468
|
newParameters[i] = currentParameters[i] + step[i];
|
|
1353
1469
|
}
|
|
1354
1470
|
const newResidual = residualFunction(newParameters);
|
|
1355
|
-
const newResidualNorm = vectorNorm(newResidual);
|
|
1471
|
+
const newResidualNorm = vectorNorm$1(newResidual);
|
|
1356
1472
|
const newCost = computeSumOfSquaredResiduals(newResidualNorm);
|
|
1357
1473
|
// Check if step improved the cost
|
|
1358
1474
|
if (newCost < currentCost) {
|
|
@@ -1405,7 +1521,7 @@ function tryLevenbergMarquardtStep(jtj, jtr, currentParameters, currentLambda, l
|
|
|
1405
1521
|
function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
1406
1522
|
const actualOptions = options;
|
|
1407
1523
|
const jacobianFunction = actualOptions.jacobian;
|
|
1408
|
-
const maxIterations = actualOptions.maxIterations ?? DEFAULT_MAX_ITERATIONS$
|
|
1524
|
+
const maxIterations = actualOptions.maxIterations ?? DEFAULT_MAX_ITERATIONS$6;
|
|
1409
1525
|
const lambdaInitial = actualOptions.lambdaInitial ?? DEFAULT_LAMBDA_INITIAL$1;
|
|
1410
1526
|
const lambdaFactor = actualOptions.lambdaFactor ?? DEFAULT_LAMBDA_FACTOR$1;
|
|
1411
1527
|
const tolGradient = actualOptions.tolGradient ?? DEFAULT_TOL_GRADIENT$1;
|
|
@@ -1422,7 +1538,7 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1422
1538
|
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
1423
1539
|
// Compute residual vector
|
|
1424
1540
|
const residual = residualFunction(currentParameters);
|
|
1425
|
-
const residualNorm = vectorNorm(residual);
|
|
1541
|
+
const residualNorm = vectorNorm$1(residual);
|
|
1426
1542
|
const cost = computeSumOfSquaredResiduals(residualNorm);
|
|
1427
1543
|
// Track best solution so far
|
|
1428
1544
|
if (cost < bestCost) {
|
|
@@ -1440,9 +1556,9 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1440
1556
|
const { jtj, jtr } = computeNormalEquationsMatrices$1(jacobianMatrix, residual);
|
|
1441
1557
|
// Compute gradient norm: ||J^T r||
|
|
1442
1558
|
const gradientVector = matrixToFloat64Array(jtr);
|
|
1443
|
-
const gradientNorm = vectorNorm(gradientVector);
|
|
1559
|
+
const gradientNorm = vectorNorm$1(gradientVector);
|
|
1444
1560
|
// Check convergence: gradient norm is small enough (Moré 1978, Section 4 termination test; Lourakis 2005, Section 5)
|
|
1445
|
-
if (checkGradientConvergence(gradientNorm, tolGradient
|
|
1561
|
+
if (checkGradientConvergence(gradientNorm, tolGradient)) {
|
|
1446
1562
|
logger.info('levenbergMarquardt', iteration, 'Converged', [
|
|
1447
1563
|
{ key: 'Cost:', value: cost },
|
|
1448
1564
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -1460,7 +1576,7 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1460
1576
|
return createConvergenceResultForLM$1(bestParameters, iteration, false, bestCost, gradientNorm, residualNorm, stepResult.newLambda);
|
|
1461
1577
|
}
|
|
1462
1578
|
// Early return: step size convergence (Lourakis 2005, Section 5)
|
|
1463
|
-
if (stepResult.stepNorm !== undefined && checkStepSizeConvergence(stepResult.stepNorm, tolStep
|
|
1579
|
+
if (stepResult.stepNorm !== undefined && checkStepSizeConvergence(stepResult.stepNorm, tolStep)) {
|
|
1464
1580
|
logger.info('levenbergMarquardt', iteration, 'Converged', [
|
|
1465
1581
|
{ key: 'Cost:', value: cost },
|
|
1466
1582
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -1486,18 +1602,18 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1486
1602
|
{ key: 'Best cost:', value: bestCost }
|
|
1487
1603
|
]);
|
|
1488
1604
|
const finalResidual = residualFunction(bestParameters);
|
|
1489
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
1605
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
1490
1606
|
const finalGradient = jacobianFunction
|
|
1491
1607
|
? matrixToFloat64Array(jacobianFunction(bestParameters).transpose().mmul(float64ArrayToMatrix(finalResidual)))
|
|
1492
1608
|
: undefined;
|
|
1493
|
-
const finalGradientNorm = finalGradient ? vectorNorm(finalGradient) : undefined;
|
|
1609
|
+
const finalGradientNorm = finalGradient ? vectorNorm$1(finalGradient) : undefined;
|
|
1494
1610
|
return createConvergenceResultForLM$1(bestParameters, iteration, false, bestCost, finalGradientNorm ?? gradientNorm, finalResidualNorm, currentLambda);
|
|
1495
1611
|
}
|
|
1496
1612
|
// Check residual norm convergence (Moré 1978, Section 4 stopping rule; Lourakis 2005, Section 5)
|
|
1497
1613
|
const currentResidual = residualFunction(currentParameters);
|
|
1498
|
-
const currentResidualNorm = vectorNorm(currentResidual);
|
|
1614
|
+
const currentResidualNorm = vectorNorm$1(currentResidual);
|
|
1499
1615
|
const currentCost = computeSumOfSquaredResiduals(currentResidualNorm);
|
|
1500
|
-
if (checkResidualConvergence(currentResidualNorm, tolResidual
|
|
1616
|
+
if (checkResidualConvergence(currentResidualNorm, tolResidual)) {
|
|
1501
1617
|
logger.info('levenbergMarquardt', iteration, 'Converged', [
|
|
1502
1618
|
{ key: 'Cost:', value: currentCost },
|
|
1503
1619
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -1509,11 +1625,11 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1509
1625
|
}
|
|
1510
1626
|
// Maximum iterations reached - return best solution found
|
|
1511
1627
|
const finalResidual = residualFunction(bestParameters);
|
|
1512
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
1628
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
1513
1629
|
const finalGradient = jacobianFunction
|
|
1514
1630
|
? matrixToFloat64Array(jacobianFunction(bestParameters).transpose().mmul(float64ArrayToMatrix(finalResidual)))
|
|
1515
1631
|
: undefined;
|
|
1516
|
-
const finalGradientNorm = finalGradient ? vectorNorm(finalGradient) : undefined;
|
|
1632
|
+
const finalGradientNorm = finalGradient ? vectorNorm$1(finalGradient) : undefined;
|
|
1517
1633
|
logger.warn('levenbergMarquardt', undefined, 'Maximum iterations reached', [
|
|
1518
1634
|
{ key: 'Iterations:', value: maxIterations },
|
|
1519
1635
|
{ key: 'Final cost:', value: bestCost },
|
|
@@ -1533,6 +1649,1102 @@ function levenbergMarquardt(initialParameters, residualFunction, options = {}) {
|
|
|
1533
1649
|
};
|
|
1534
1650
|
}
|
|
1535
1651
|
|
|
1652
|
+
/**
|
|
1653
|
+
* This file implements the (dense) BFGS algorithm for unconstrained smooth optimization.
|
|
1654
|
+
*
|
|
1655
|
+
* Role in system:
|
|
1656
|
+
* - Quasi-Newton optimizer for scalar cost functions with user-provided gradients
|
|
1657
|
+
* - Uses Strong Wolfe line search to encourage curvature conditions needed for stable updates
|
|
1658
|
+
* - Dense method: stores a full inverse Hessian approximation (O(n^2) memory)
|
|
1659
|
+
*
|
|
1660
|
+
* For first-time readers:
|
|
1661
|
+
* - Start with `bfgs` (main entry point)
|
|
1662
|
+
* - Then read `updateInverseHessianApproximation` (core BFGS update)
|
|
1663
|
+
* - Finally, check safeguard helpers (descent direction / curvature checks)
|
|
1664
|
+
*/
|
|
1665
|
+
const DEFAULT_MAX_ITERATIONS$5 = 1000;
|
|
1666
|
+
const DEFAULT_TOLERANCE$3 = 1e-6;
|
|
1667
|
+
const DEFAULT_USE_LINE_SEARCH$2 = true;
|
|
1668
|
+
const DEFAULT_FIXED_STEP_SIZE$1 = 1.0;
|
|
1669
|
+
const INVALID_STEP_SIZE$1 = 0.0;
|
|
1670
|
+
const NEGATIVE_GRADIENT_DIRECTION$2 = -1;
|
|
1671
|
+
const MINIMUM_CURVATURE_THRESHOLD$1 = 1e-10;
|
|
1672
|
+
function createIdentityMatrix$1(dimension) {
|
|
1673
|
+
// NOTE: Provide both dimensions to stay compatible with our (older) local typing history
|
|
1674
|
+
// and with ml-matrix's API where `columns` is optional.
|
|
1675
|
+
return mlMatrix.Matrix.eye(dimension, dimension);
|
|
1676
|
+
}
|
|
1677
|
+
function multiplyMatrixVector(matrix, vector) {
|
|
1678
|
+
const result = new Float64Array(vector.length);
|
|
1679
|
+
for (let rowIndex = 0; rowIndex < matrix.rows; rowIndex++) {
|
|
1680
|
+
let sum = 0.0;
|
|
1681
|
+
for (let columnIndex = 0; columnIndex < matrix.columns; columnIndex++) {
|
|
1682
|
+
sum += matrix.get(rowIndex, columnIndex) * vector[columnIndex];
|
|
1683
|
+
}
|
|
1684
|
+
result[rowIndex] = sum;
|
|
1685
|
+
}
|
|
1686
|
+
return result;
|
|
1687
|
+
}
|
|
1688
|
+
function computeBfgsSearchDirection(inverseHessianApproximation, currentGradient) {
|
|
1689
|
+
const approximateNewtonDirection = multiplyMatrixVector(inverseHessianApproximation, currentGradient);
|
|
1690
|
+
return scaleVector(approximateNewtonDirection, NEGATIVE_GRADIENT_DIRECTION$2);
|
|
1691
|
+
}
|
|
1692
|
+
function ensureDescentDirectionOrFallback$1(currentGradient, proposedSearchDirection, currentInverseHessianApproximation, logger, iteration, currentCost) {
|
|
1693
|
+
const directionalDerivative = dotProduct(currentGradient, proposedSearchDirection);
|
|
1694
|
+
const isDescentDirection = directionalDerivative < 0.0;
|
|
1695
|
+
if (isDescentDirection) {
|
|
1696
|
+
return { searchDirection: proposedSearchDirection, inverseHessianApproximation: currentInverseHessianApproximation };
|
|
1697
|
+
}
|
|
1698
|
+
// WHY: If numerical issues yield a non-descent direction, reset H to identity and fall back to -g.
|
|
1699
|
+
logger.warn('bfgs', iteration, 'Non-descent direction detected; resetting inverse Hessian and using negative gradient.', [
|
|
1700
|
+
{ key: 'Cost:', value: currentCost },
|
|
1701
|
+
{ key: 'Directional derivative:', value: directionalDerivative }
|
|
1702
|
+
]);
|
|
1703
|
+
return {
|
|
1704
|
+
searchDirection: scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION$2),
|
|
1705
|
+
inverseHessianApproximation: createIdentityMatrix$1(currentGradient.length)
|
|
1706
|
+
};
|
|
1707
|
+
}
|
|
1708
|
+
function updateInverseHessianApproximation(inverseHessianApproximation, stepVector, gradientChangeVector, logger, iteration, currentCost) {
|
|
1709
|
+
const stepDotGradientChange = dotProduct(stepVector, gradientChangeVector);
|
|
1710
|
+
const curvatureIsTooWeak = stepDotGradientChange <= MINIMUM_CURVATURE_THRESHOLD$1;
|
|
1711
|
+
if (curvatureIsTooWeak) {
|
|
1712
|
+
// WHY: If curvature is weak/negative, the BFGS update can break positive definiteness.
|
|
1713
|
+
logger.warn('bfgs', iteration, 'Curvature condition too weak; resetting inverse Hessian approximation.', [
|
|
1714
|
+
{ key: 'Cost:', value: currentCost },
|
|
1715
|
+
{ key: 'stepDotGradientChange:', value: stepDotGradientChange }
|
|
1716
|
+
]);
|
|
1717
|
+
return createIdentityMatrix$1(stepVector.length);
|
|
1718
|
+
}
|
|
1719
|
+
const curvatureScaling = 1.0 / stepDotGradientChange;
|
|
1720
|
+
const stepMatrix = mlMatrix.Matrix.columnVector(Array.from(stepVector));
|
|
1721
|
+
const gradientChangeMatrix = mlMatrix.Matrix.columnVector(Array.from(gradientChangeVector));
|
|
1722
|
+
const identityMatrix = createIdentityMatrix$1(stepVector.length);
|
|
1723
|
+
const stepGradientOuterProduct = stepMatrix.mmul(gradientChangeMatrix.transpose()).mul(curvatureScaling);
|
|
1724
|
+
const gradientStepOuterProduct = gradientChangeMatrix.mmul(stepMatrix.transpose()).mul(curvatureScaling);
|
|
1725
|
+
const leftFactor = identityMatrix.sub(stepGradientOuterProduct);
|
|
1726
|
+
const rightFactor = identityMatrix.sub(gradientStepOuterProduct);
|
|
1727
|
+
const rankTwoPart = leftFactor.mmul(inverseHessianApproximation).mmul(rightFactor);
|
|
1728
|
+
const rankOnePart = stepMatrix.mmul(stepMatrix.transpose()).mul(curvatureScaling);
|
|
1729
|
+
return rankTwoPart.add(rankOnePart);
|
|
1730
|
+
}
|
|
1731
|
+
function computeNextParameters$1(currentParameters, searchDirection, stepSize) {
|
|
1732
|
+
const stepVector = scaleVector(searchDirection, stepSize);
|
|
1733
|
+
return addVectors(currentParameters, stepVector);
|
|
1734
|
+
}
|
|
1735
|
+
function handleLineSearchFailure$2(currentParameters, iteration, currentCost, gradientNorm, logger) {
|
|
1736
|
+
logger.warn('bfgs', iteration, 'Line search failed (non-descent direction).', [
|
|
1737
|
+
{ key: 'Cost:', value: currentCost },
|
|
1738
|
+
{ key: 'Gradient norm:', value: gradientNorm }
|
|
1739
|
+
]);
|
|
1740
|
+
return {
|
|
1741
|
+
finalParameters: currentParameters,
|
|
1742
|
+
parameters: currentParameters,
|
|
1743
|
+
iterations: iteration + 1,
|
|
1744
|
+
converged: false,
|
|
1745
|
+
finalCost: currentCost,
|
|
1746
|
+
finalGradientNorm: gradientNorm
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1749
|
+
function bfgs(initialParameters, costFunction, gradientFunction, options = {}) {
|
|
1750
|
+
const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS$5;
|
|
1751
|
+
const tolerance = options.tolerance ?? DEFAULT_TOLERANCE$3;
|
|
1752
|
+
const useLineSearch = options.useLineSearch ?? DEFAULT_USE_LINE_SEARCH$2;
|
|
1753
|
+
const fixedStepSize = options.stepSize ?? DEFAULT_FIXED_STEP_SIZE$1;
|
|
1754
|
+
const onIteration = options.onIteration;
|
|
1755
|
+
const logger = new Logger(options.logLevel, options.verbose);
|
|
1756
|
+
let currentParameters = new Float64Array(initialParameters);
|
|
1757
|
+
let currentCost = costFunction(currentParameters);
|
|
1758
|
+
let inverseHessianApproximation = createIdentityMatrix$1(currentParameters.length);
|
|
1759
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
1760
|
+
const currentGradient = gradientFunction(currentParameters);
|
|
1761
|
+
const gradientNorm = vectorNorm$1(currentGradient);
|
|
1762
|
+
if (onIteration)
|
|
1763
|
+
onIteration(iteration, currentCost, currentParameters);
|
|
1764
|
+
if (checkGradientConvergence(gradientNorm, tolerance)) {
|
|
1765
|
+
logger.info('bfgs', iteration, 'Converged', [
|
|
1766
|
+
{ key: 'Cost:', value: currentCost },
|
|
1767
|
+
{ key: 'Gradient norm:', value: gradientNorm }
|
|
1768
|
+
]);
|
|
1769
|
+
return createConvergenceResult(currentParameters, iteration, true, currentCost, gradientNorm);
|
|
1770
|
+
}
|
|
1771
|
+
const proposedSearchDirection = computeBfgsSearchDirection(inverseHessianApproximation, currentGradient);
|
|
1772
|
+
const descentResult = ensureDescentDirectionOrFallback$1(currentGradient, proposedSearchDirection, inverseHessianApproximation, logger, iteration, currentCost);
|
|
1773
|
+
const searchDirection = descentResult.searchDirection;
|
|
1774
|
+
inverseHessianApproximation = descentResult.inverseHessianApproximation;
|
|
1775
|
+
const stepSize = useLineSearch
|
|
1776
|
+
? strongWolfeLineSearch(costFunction, gradientFunction, currentParameters, searchDirection, options.lineSearchOptions)
|
|
1777
|
+
: fixedStepSize;
|
|
1778
|
+
if (stepSize === INVALID_STEP_SIZE$1) {
|
|
1779
|
+
return handleLineSearchFailure$2(currentParameters, iteration, currentCost, gradientNorm, logger);
|
|
1780
|
+
}
|
|
1781
|
+
const newParameters = computeNextParameters$1(currentParameters, searchDirection, stepSize);
|
|
1782
|
+
const stepVector = subtractVectors$1(newParameters, currentParameters);
|
|
1783
|
+
const stepNorm = vectorNorm$1(stepVector);
|
|
1784
|
+
const newCost = costFunction(newParameters);
|
|
1785
|
+
const newGradient = gradientFunction(newParameters);
|
|
1786
|
+
const gradientChangeVector = subtractVectors$1(newGradient, currentGradient);
|
|
1787
|
+
inverseHessianApproximation = updateInverseHessianApproximation(inverseHessianApproximation, stepVector, gradientChangeVector, logger, iteration, newCost);
|
|
1788
|
+
logger.debug('bfgs', iteration, 'Progress', [
|
|
1789
|
+
{ key: 'Cost:', value: currentCost },
|
|
1790
|
+
{ key: 'Gradient norm:', value: gradientNorm },
|
|
1791
|
+
{ key: 'Step size:', value: stepSize },
|
|
1792
|
+
{ key: 'Step norm:', value: stepNorm }
|
|
1793
|
+
]);
|
|
1794
|
+
currentParameters = new Float64Array(newParameters);
|
|
1795
|
+
currentCost = newCost;
|
|
1796
|
+
}
|
|
1797
|
+
const finalGradient = gradientFunction(currentParameters);
|
|
1798
|
+
const finalGradientNorm = vectorNorm$1(finalGradient);
|
|
1799
|
+
logger.warn('bfgs', undefined, 'Maximum iterations reached', [
|
|
1800
|
+
{ key: 'Iterations:', value: maxIterations },
|
|
1801
|
+
{ key: 'Final cost:', value: currentCost },
|
|
1802
|
+
{ key: 'Final gradient norm:', value: finalGradientNorm }
|
|
1803
|
+
]);
|
|
1804
|
+
return {
|
|
1805
|
+
finalParameters: currentParameters,
|
|
1806
|
+
parameters: currentParameters,
|
|
1807
|
+
iterations: maxIterations,
|
|
1808
|
+
converged: false,
|
|
1809
|
+
finalCost: currentCost,
|
|
1810
|
+
finalGradientNorm: finalGradientNorm
|
|
1811
|
+
};
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
/**
|
|
1815
|
+
* This file implements the L-BFGS (Limited-memory BFGS) algorithm for unconstrained
|
|
1816
|
+
* smooth optimization.
|
|
1817
|
+
*
|
|
1818
|
+
* Role in system:
|
|
1819
|
+
* - Quasi-Newton optimizer for scalar cost functions with user-provided gradients
|
|
1820
|
+
* - Uses Strong Wolfe line search to obtain steps that typically satisfy curvature conditions
|
|
1821
|
+
* - Memory-efficient alternative to dense BFGS for medium/large parameter counts
|
|
1822
|
+
*
|
|
1823
|
+
* For first-time readers:
|
|
1824
|
+
* - Start with `lbfgs` (main entry point)
|
|
1825
|
+
* - Then read `computeLbfgsSearchDirection` (two-loop recursion)
|
|
1826
|
+
* - Finally, check safeguard helpers (descent direction / curvature checks)
|
|
1827
|
+
*/
|
|
1828
|
+
const DEFAULT_MAX_ITERATIONS$4 = 1000;
|
|
1829
|
+
const DEFAULT_TOLERANCE$2 = 1e-6;
|
|
1830
|
+
const DEFAULT_HISTORY_SIZE = 10;
|
|
1831
|
+
const DEFAULT_USE_LINE_SEARCH$1 = true;
|
|
1832
|
+
const DEFAULT_FIXED_STEP_SIZE = 1.0;
|
|
1833
|
+
const INVALID_STEP_SIZE = 0.0;
|
|
1834
|
+
const NEGATIVE_GRADIENT_DIRECTION$1 = -1;
|
|
1835
|
+
const MINIMUM_CURVATURE_THRESHOLD = 1e-10;
|
|
1836
|
+
const DEFAULT_INITIAL_SCALING_FACTOR = 1.0;
|
|
1837
|
+
function createEmptyHistory() {
|
|
1838
|
+
return { stepVectorHistory: [], gradientChangeVectorHistory: [], reciprocalCurvatureHistory: [] };
|
|
1839
|
+
}
|
|
1840
|
+
function clearHistory(history) {
|
|
1841
|
+
history.stepVectorHistory.length = 0;
|
|
1842
|
+
history.gradientChangeVectorHistory.length = 0;
|
|
1843
|
+
history.reciprocalCurvatureHistory.length = 0;
|
|
1844
|
+
}
|
|
1845
|
+
function computeInitialScalingFactor(history) {
|
|
1846
|
+
const historyLength = history.stepVectorHistory.length;
|
|
1847
|
+
if (historyLength === 0)
|
|
1848
|
+
return DEFAULT_INITIAL_SCALING_FACTOR;
|
|
1849
|
+
const lastIndex = historyLength - 1;
|
|
1850
|
+
const lastStepVector = history.stepVectorHistory[lastIndex];
|
|
1851
|
+
const lastGradientChangeVector = history.gradientChangeVectorHistory[lastIndex];
|
|
1852
|
+
const stepDotGradientChange = dotProduct(lastStepVector, lastGradientChangeVector);
|
|
1853
|
+
const gradientChangeDotGradientChange = dotProduct(lastGradientChangeVector, lastGradientChangeVector);
|
|
1854
|
+
if (stepDotGradientChange <= 0.0)
|
|
1855
|
+
return DEFAULT_INITIAL_SCALING_FACTOR;
|
|
1856
|
+
if (gradientChangeDotGradientChange <= 0.0)
|
|
1857
|
+
return DEFAULT_INITIAL_SCALING_FACTOR;
|
|
1858
|
+
const scalingFactor = stepDotGradientChange / gradientChangeDotGradientChange;
|
|
1859
|
+
if (!isFinite(scalingFactor) || scalingFactor <= 0.0)
|
|
1860
|
+
return DEFAULT_INITIAL_SCALING_FACTOR;
|
|
1861
|
+
return scalingFactor;
|
|
1862
|
+
}
|
|
1863
|
+
function computeLbfgsSearchDirection(currentGradient, history) {
|
|
1864
|
+
const historyLength = history.stepVectorHistory.length;
|
|
1865
|
+
if (historyLength === 0) {
|
|
1866
|
+
return scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION$1);
|
|
1867
|
+
}
|
|
1868
|
+
// NOTE: Avoid `new Float64Array(existingFloat64Array)` because TS can infer
|
|
1869
|
+
// `ArrayBufferLike` for the resulting buffer, which conflicts with stricter lib types.
|
|
1870
|
+
const qVectorInitial = new Float64Array(currentGradient.length);
|
|
1871
|
+
qVectorInitial.set(currentGradient);
|
|
1872
|
+
let qVector = qVectorInitial;
|
|
1873
|
+
const alphaCoefficients = new Array(historyLength);
|
|
1874
|
+
for (let index = historyLength - 1; index >= 0; index--) {
|
|
1875
|
+
const stepVector = history.stepVectorHistory[index];
|
|
1876
|
+
const reciprocalCurvature = history.reciprocalCurvatureHistory[index];
|
|
1877
|
+
const alphaCoefficient = reciprocalCurvature * dotProduct(stepVector, qVector);
|
|
1878
|
+
alphaCoefficients[index] = alphaCoefficient;
|
|
1879
|
+
qVector = subtractVectors$1(qVector, scaleVector(history.gradientChangeVectorHistory[index], alphaCoefficient));
|
|
1880
|
+
}
|
|
1881
|
+
const initialScalingFactor = computeInitialScalingFactor(history);
|
|
1882
|
+
let rVector = scaleVector(qVector, initialScalingFactor);
|
|
1883
|
+
for (let index = 0; index < historyLength; index++) {
|
|
1884
|
+
const gradientChangeVector = history.gradientChangeVectorHistory[index];
|
|
1885
|
+
const reciprocalCurvature = history.reciprocalCurvatureHistory[index];
|
|
1886
|
+
const betaCoefficient = reciprocalCurvature * dotProduct(gradientChangeVector, rVector);
|
|
1887
|
+
const correctionCoefficient = alphaCoefficients[index] - betaCoefficient;
|
|
1888
|
+
rVector = addVectors(rVector, scaleVector(history.stepVectorHistory[index], correctionCoefficient));
|
|
1889
|
+
}
|
|
1890
|
+
return scaleVector(rVector, NEGATIVE_GRADIENT_DIRECTION$1);
|
|
1891
|
+
}
|
|
1892
|
+
function ensureDescentDirectionOrFallback(currentGradient, proposedSearchDirection, history, logger, iteration, currentCost) {
|
|
1893
|
+
const directionalDerivative = dotProduct(currentGradient, proposedSearchDirection);
|
|
1894
|
+
const isDescentDirection = directionalDerivative < 0.0;
|
|
1895
|
+
if (isDescentDirection)
|
|
1896
|
+
return proposedSearchDirection;
|
|
1897
|
+
// WHY: If numerical issues break descent, fall back to steepest descent and clear history.
|
|
1898
|
+
clearHistory(history);
|
|
1899
|
+
logger.warn('lbfgs', iteration, 'Non-descent direction detected; falling back to negative gradient and clearing history.', [
|
|
1900
|
+
{ key: 'Cost:', value: currentCost },
|
|
1901
|
+
{ key: 'Directional derivative:', value: directionalDerivative }
|
|
1902
|
+
]);
|
|
1903
|
+
return scaleVector(currentGradient, NEGATIVE_GRADIENT_DIRECTION$1);
|
|
1904
|
+
}
|
|
1905
|
+
function updateHistoryIfCurvatureIsValid(history, historySize, stepVector, gradientChangeVector, logger, iteration, currentCost) {
|
|
1906
|
+
const stepDotGradientChange = dotProduct(stepVector, gradientChangeVector);
|
|
1907
|
+
const curvatureIsTooWeak = stepDotGradientChange <= MINIMUM_CURVATURE_THRESHOLD;
|
|
1908
|
+
if (curvatureIsTooWeak) {
|
|
1909
|
+
// WHY: Weak/negative curvature can destabilize updates; clearing history is the simplest safe recovery.
|
|
1910
|
+
clearHistory(history);
|
|
1911
|
+
logger.warn('lbfgs', iteration, 'Curvature condition too weak; clearing history to regain robustness.', [
|
|
1912
|
+
{ key: 'Cost:', value: currentCost },
|
|
1913
|
+
{ key: 'stepDotGradientChange:', value: stepDotGradientChange }
|
|
1914
|
+
]);
|
|
1915
|
+
return;
|
|
1916
|
+
}
|
|
1917
|
+
const reciprocalCurvature = 1.0 / stepDotGradientChange;
|
|
1918
|
+
history.stepVectorHistory.push(stepVector);
|
|
1919
|
+
history.gradientChangeVectorHistory.push(gradientChangeVector);
|
|
1920
|
+
history.reciprocalCurvatureHistory.push(reciprocalCurvature);
|
|
1921
|
+
while (history.stepVectorHistory.length > historySize) {
|
|
1922
|
+
history.stepVectorHistory.shift();
|
|
1923
|
+
history.gradientChangeVectorHistory.shift();
|
|
1924
|
+
history.reciprocalCurvatureHistory.shift();
|
|
1925
|
+
}
|
|
1926
|
+
}
|
|
1927
|
+
function computeNextParameters(currentParameters, searchDirection, stepSize) {
|
|
1928
|
+
const stepVector = scaleVector(searchDirection, stepSize);
|
|
1929
|
+
return addVectors(currentParameters, stepVector);
|
|
1930
|
+
}
|
|
1931
|
+
function handleLineSearchFailure$1(currentParameters, iteration, currentCost, gradientNorm, logger) {
|
|
1932
|
+
logger.warn('lbfgs', iteration, 'Line search failed (non-descent direction).', [
|
|
1933
|
+
{ key: 'Cost:', value: currentCost },
|
|
1934
|
+
{ key: 'Gradient norm:', value: gradientNorm }
|
|
1935
|
+
]);
|
|
1936
|
+
return {
|
|
1937
|
+
finalParameters: currentParameters,
|
|
1938
|
+
parameters: currentParameters,
|
|
1939
|
+
iterations: iteration + 1,
|
|
1940
|
+
converged: false,
|
|
1941
|
+
finalCost: currentCost,
|
|
1942
|
+
finalGradientNorm: gradientNorm
|
|
1943
|
+
};
|
|
1944
|
+
}
|
|
1945
|
+
function lbfgs(initialParameters, costFunction, gradientFunction, options = {}) {
|
|
1946
|
+
const maxIterations = options.maxIterations ?? DEFAULT_MAX_ITERATIONS$4;
|
|
1947
|
+
const tolerance = options.tolerance ?? DEFAULT_TOLERANCE$2;
|
|
1948
|
+
const historySize = options.historySize ?? DEFAULT_HISTORY_SIZE;
|
|
1949
|
+
const useLineSearch = options.useLineSearch ?? DEFAULT_USE_LINE_SEARCH$1;
|
|
1950
|
+
const fixedStepSize = options.stepSize ?? DEFAULT_FIXED_STEP_SIZE;
|
|
1951
|
+
const onIteration = options.onIteration;
|
|
1952
|
+
const logger = new Logger(options.logLevel, options.verbose);
|
|
1953
|
+
let currentParameters = new Float64Array(initialParameters);
|
|
1954
|
+
let currentCost = costFunction(currentParameters);
|
|
1955
|
+
const history = createEmptyHistory();
|
|
1956
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
1957
|
+
const currentGradient = gradientFunction(currentParameters);
|
|
1958
|
+
const gradientNorm = vectorNorm$1(currentGradient);
|
|
1959
|
+
if (onIteration)
|
|
1960
|
+
onIteration(iteration, currentCost, currentParameters);
|
|
1961
|
+
if (checkGradientConvergence(gradientNorm, tolerance)) {
|
|
1962
|
+
logger.info('lbfgs', iteration, 'Converged', [
|
|
1963
|
+
{ key: 'Cost:', value: currentCost },
|
|
1964
|
+
{ key: 'Gradient norm:', value: gradientNorm }
|
|
1965
|
+
]);
|
|
1966
|
+
return createConvergenceResult(currentParameters, iteration, true, currentCost, gradientNorm);
|
|
1967
|
+
}
|
|
1968
|
+
const proposedSearchDirection = computeLbfgsSearchDirection(currentGradient, history);
|
|
1969
|
+
const searchDirection = ensureDescentDirectionOrFallback(currentGradient, proposedSearchDirection, history, logger, iteration, currentCost);
|
|
1970
|
+
const stepSize = useLineSearch
|
|
1971
|
+
? strongWolfeLineSearch(costFunction, gradientFunction, currentParameters, searchDirection, options.lineSearchOptions)
|
|
1972
|
+
: fixedStepSize;
|
|
1973
|
+
if (stepSize === INVALID_STEP_SIZE) {
|
|
1974
|
+
return handleLineSearchFailure$1(currentParameters, iteration, currentCost, gradientNorm, logger);
|
|
1975
|
+
}
|
|
1976
|
+
const newParameters = computeNextParameters(currentParameters, searchDirection, stepSize);
|
|
1977
|
+
const stepVector = subtractVectors$1(newParameters, currentParameters);
|
|
1978
|
+
const stepNorm = vectorNorm$1(stepVector);
|
|
1979
|
+
const newCost = costFunction(newParameters);
|
|
1980
|
+
const newGradient = gradientFunction(newParameters);
|
|
1981
|
+
const gradientChangeVector = subtractVectors$1(newGradient, currentGradient);
|
|
1982
|
+
updateHistoryIfCurvatureIsValid(history, historySize, stepVector, gradientChangeVector, logger, iteration, newCost);
|
|
1983
|
+
logger.debug('lbfgs', iteration, 'Progress', [
|
|
1984
|
+
{ key: 'Cost:', value: currentCost },
|
|
1985
|
+
{ key: 'Gradient norm:', value: gradientNorm },
|
|
1986
|
+
{ key: 'Step size:', value: stepSize },
|
|
1987
|
+
{ key: 'Step norm:', value: stepNorm }
|
|
1988
|
+
]);
|
|
1989
|
+
currentParameters = new Float64Array(newParameters);
|
|
1990
|
+
currentCost = newCost;
|
|
1991
|
+
}
|
|
1992
|
+
const finalGradient = gradientFunction(currentParameters);
|
|
1993
|
+
const finalGradientNorm = vectorNorm$1(finalGradient);
|
|
1994
|
+
logger.warn('lbfgs', undefined, 'Maximum iterations reached', [
|
|
1995
|
+
{ key: 'Iterations:', value: maxIterations },
|
|
1996
|
+
{ key: 'Final cost:', value: currentCost },
|
|
1997
|
+
{ key: 'Final gradient norm:', value: finalGradientNorm }
|
|
1998
|
+
]);
|
|
1999
|
+
return {
|
|
2000
|
+
finalParameters: currentParameters,
|
|
2001
|
+
parameters: currentParameters,
|
|
2002
|
+
iterations: maxIterations,
|
|
2003
|
+
converged: false,
|
|
2004
|
+
finalCost: currentCost,
|
|
2005
|
+
finalGradientNorm: finalGradientNorm
|
|
2006
|
+
};
|
|
2007
|
+
}
|
|
2008
|
+
|
|
2009
|
+
/**
|
|
2010
|
+
* This file provides deterministic random number generation utilities.
|
|
2011
|
+
*
|
|
2012
|
+
* Role in system:
|
|
2013
|
+
* - Enables reproducible stochastic optimizers (e.g., CMA-ES) via explicit seeds
|
|
2014
|
+
* - Provides uniform and standard normal sampling without external dependencies
|
|
2015
|
+
* - Keeps browser compatibility (no Node-specific APIs required)
|
|
2016
|
+
*
|
|
2017
|
+
* For first-time readers:
|
|
2018
|
+
* - Use createSeededRandom(seed) to get a generator
|
|
2019
|
+
* - Call nextUniform() for U[0,1), nextStandardNormal() for N(0,1)
|
|
2020
|
+
* - Seed behavior: seed > 0 is deterministic; seed is auto-generated if 0/undefined
|
|
2021
|
+
*/
|
|
2022
|
+
const UINT32_MAX_PLUS_ONE = 2 ** 32;
|
|
2023
|
+
const AUTO_SEED_MULTIPLIER = 1664525;
|
|
2024
|
+
const AUTO_SEED_INCREMENT = 1013904223;
|
|
2025
|
+
const TWO_PI = 2.0 * Math.PI;
|
|
2026
|
+
const MINIMUM_POSITIVE_UNIFORM = 1e-12; // Guard against log(0) in Box–Muller
|
|
2027
|
+
function coerceToUint32(value) {
|
|
2028
|
+
// WHY: We use uint32 arithmetic for deterministic, portable PRNG behavior.
|
|
2029
|
+
return value >>> 0;
|
|
2030
|
+
}
|
|
2031
|
+
function computeAutoSeed() {
|
|
2032
|
+
// WHY: When seed is not forced, we want a different stream each run.
|
|
2033
|
+
// We mix time and Math.random() so that environments with coarse timers still vary.
|
|
2034
|
+
const timeSeed = coerceToUint32(Date.now());
|
|
2035
|
+
const randomSeed = coerceToUint32(Math.floor(Math.random() * UINT32_MAX_PLUS_ONE));
|
|
2036
|
+
const mixed = coerceToUint32(timeSeed ^ randomSeed);
|
|
2037
|
+
// One LCG step to further diffuse obvious patterns like time-only seeds.
|
|
2038
|
+
return coerceToUint32(mixed * AUTO_SEED_MULTIPLIER + AUTO_SEED_INCREMENT);
|
|
2039
|
+
}
|
|
2040
|
+
function mulberry32(nextState) {
|
|
2041
|
+
// Reference: Mulberry32 PRNG (fast, decent quality for optimization sampling).
|
|
2042
|
+
// State is kept in an object so callers can share and advance it.
|
|
2043
|
+
let t = (nextState.value += 0x6d2b79f5);
|
|
2044
|
+
t = Math.imul(t ^ (t >>> 15), t | 1);
|
|
2045
|
+
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
|
|
2046
|
+
const result = (t ^ (t >>> 14)) >>> 0;
|
|
2047
|
+
return result / UINT32_MAX_PLUS_ONE;
|
|
2048
|
+
}
|
|
2049
|
+
function computeUniformOpenInterval(nextUniform) {
|
|
2050
|
+
const u = nextUniform();
|
|
2051
|
+
// Guard: avoid exactly 0 which would break Box–Muller.
|
|
2052
|
+
return u <= 0.0 ? MINIMUM_POSITIVE_UNIFORM : u;
|
|
2053
|
+
}
|
|
2054
|
+
function createSeededRandom(seed) {
|
|
2055
|
+
const resolvedSeed = seed && seed > 0 ? coerceToUint32(seed) : computeAutoSeed();
|
|
2056
|
+
const state = { value: resolvedSeed };
|
|
2057
|
+
let cachedNormal;
|
|
2058
|
+
function nextUniform() {
|
|
2059
|
+
return mulberry32(state);
|
|
2060
|
+
}
|
|
2061
|
+
function nextStandardNormal() {
|
|
2062
|
+
if (cachedNormal !== undefined) {
|
|
2063
|
+
const value = cachedNormal;
|
|
2064
|
+
cachedNormal = undefined;
|
|
2065
|
+
return value;
|
|
2066
|
+
}
|
|
2067
|
+
// Box–Muller transform (polar form would require rejection; classic is fine here).
|
|
2068
|
+
const u1 = computeUniformOpenInterval(nextUniform);
|
|
2069
|
+
const u2 = nextUniform();
|
|
2070
|
+
const radius = Math.sqrt(-2 * Math.log(u1));
|
|
2071
|
+
const angle = TWO_PI * u2;
|
|
2072
|
+
const z0 = radius * Math.cos(angle);
|
|
2073
|
+
const z1 = radius * Math.sin(angle);
|
|
2074
|
+
cachedNormal = z1;
|
|
2075
|
+
return z0;
|
|
2076
|
+
}
|
|
2077
|
+
return { seed: resolvedSeed, nextUniform, nextStandardNormal };
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
/**
|
|
2081
|
+
* This file implements vanilla CMA-ES and IPOP-CMA-ES restart strategy
|
|
2082
|
+
* for unconstrained black-box optimization (no gradients required).
|
|
2083
|
+
*
|
|
2084
|
+
* Role in system:
|
|
2085
|
+
* - Provides a derivative-free optimizer for scalar cost functions
|
|
2086
|
+
* - Adds IPOP restarts (λ doubles per restart) while preserving libcmaes semantics
|
|
2087
|
+
* - Mirrors libcmaes default parameter formulas and core stop criteria
|
|
2088
|
+
*
|
|
2089
|
+
* For first-time readers:
|
|
2090
|
+
* - Start with `cmaEs()` (public entry point)
|
|
2091
|
+
* - `runSingleCmaEs()` executes one CMA-ES run (no restarts)
|
|
2092
|
+
* - Restart logic wraps `runSingleCmaEs()` when `restartStrategy: "ipop"`
|
|
2093
|
+
*/
|
|
2094
|
+
const DEFAULT_MAX_ITERATIONS$3 = 1000;
|
|
2095
|
+
const DEFAULT_MAX_RESTARTS = 9; // libcmaes default
|
|
2096
|
+
const DEFAULT_RESTART_STRATEGY = 'none';
|
|
2097
|
+
const DEFAULT_FUNCTION_TOLERANCE = 1e-12; // libcmaes default
|
|
2098
|
+
const DEFAULT_PARAMETER_TOLERANCE = 1e-12; // libcmaes default
|
|
2099
|
+
const MINIMUM_FUNCTION_TOLERANCE = 1e-12;
|
|
2100
|
+
const MINIMUM_PARAMETER_TOLERANCE = 1e-12;
|
|
2101
|
+
const MINIMUM_POPULATION_SIZE = 2;
|
|
2102
|
+
const DEFAULT_STEP_SIZE_FALLBACK_SCALE = 1.0; // used to compute 1 / dim fallback
|
|
2103
|
+
const DEFAULT_COVARIANCE_REGULARIZATION = 1e-12;
|
|
2104
|
+
const MAX_REGULARIZATION_ATTEMPTS$1 = 8;
|
|
2105
|
+
const REGULARIZATION_GROWTH_BASE = 10;
|
|
2106
|
+
const H_SIGMA_BASE = 1.4;
|
|
2107
|
+
const H_SIGMA_DIMENSION_FACTOR_NUMERATOR = 2.0;
|
|
2108
|
+
const H_SIGMA_POWER_FACTOR = 2.0;
|
|
2109
|
+
const LARGE_DIMENSION_THRESHOLD_FOR_CSIGMA = 1000;
|
|
2110
|
+
const IPOPN_LAMBDA_MULTIPLIER = 2;
|
|
2111
|
+
function nowMs() {
|
|
2112
|
+
if (typeof performance !== 'undefined' && typeof performance.now === 'function') {
|
|
2113
|
+
return performance.now();
|
|
2114
|
+
}
|
|
2115
|
+
return Date.now();
|
|
2116
|
+
}
|
|
2117
|
+
function assertValidDimension(dimension) {
|
|
2118
|
+
// Guard: CMA-ES requires at least one parameter dimension.
|
|
2119
|
+
if (!Number.isInteger(dimension) || dimension <= 0) {
|
|
2120
|
+
throw new Error(`CMA-ES requires dimension >= 1, got ${dimension}`);
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
function normalizePopulationSize(dimension, populationSize, logger) {
|
|
2124
|
+
const defaultValue = computeDefaultPopulationSize(dimension);
|
|
2125
|
+
if (populationSize === undefined)
|
|
2126
|
+
return defaultValue;
|
|
2127
|
+
if (populationSize < MINIMUM_POPULATION_SIZE || !Number.isFinite(populationSize)) {
|
|
2128
|
+
logger.warn('cmaEs', undefined, 'Invalid populationSize; falling back to default.', [
|
|
2129
|
+
{ key: 'populationSize:', value: populationSize },
|
|
2130
|
+
{ key: 'default:', value: defaultValue }
|
|
2131
|
+
]);
|
|
2132
|
+
return defaultValue;
|
|
2133
|
+
}
|
|
2134
|
+
return Math.floor(populationSize);
|
|
2135
|
+
}
|
|
2136
|
+
function normalizeMaxIterations(value, logger) {
|
|
2137
|
+
if (value === undefined)
|
|
2138
|
+
return DEFAULT_MAX_ITERATIONS$3;
|
|
2139
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
2140
|
+
logger.warn('cmaEs', undefined, 'Invalid maxIterations; falling back to default.', [
|
|
2141
|
+
{ key: 'maxIterations:', value }
|
|
2142
|
+
]);
|
|
2143
|
+
return DEFAULT_MAX_ITERATIONS$3;
|
|
2144
|
+
}
|
|
2145
|
+
return Math.floor(value);
|
|
2146
|
+
}
|
|
2147
|
+
function normalizeMaxFunctionEvaluations(value, logger) {
|
|
2148
|
+
if (value === undefined)
|
|
2149
|
+
return undefined;
|
|
2150
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
2151
|
+
logger.warn('cmaEs', undefined, 'Invalid maxFunctionEvaluations; disabling evaluation budget.', [
|
|
2152
|
+
{ key: 'maxFunctionEvaluations:', value }
|
|
2153
|
+
]);
|
|
2154
|
+
return undefined;
|
|
2155
|
+
}
|
|
2156
|
+
return Math.floor(value);
|
|
2157
|
+
}
|
|
2158
|
+
function normalizeMaxRestarts(value, logger) {
|
|
2159
|
+
if (value === undefined)
|
|
2160
|
+
return DEFAULT_MAX_RESTARTS;
|
|
2161
|
+
if (!Number.isFinite(value) || value < 0) {
|
|
2162
|
+
logger.warn('cmaEs', undefined, 'Invalid maxRestarts; falling back to default.', [
|
|
2163
|
+
{ key: 'maxRestarts:', value }
|
|
2164
|
+
]);
|
|
2165
|
+
return DEFAULT_MAX_RESTARTS;
|
|
2166
|
+
}
|
|
2167
|
+
return Math.floor(value);
|
|
2168
|
+
}
|
|
2169
|
+
function normalizeRestartStrategy(value, logger) {
|
|
2170
|
+
if (value === undefined)
|
|
2171
|
+
return DEFAULT_RESTART_STRATEGY;
|
|
2172
|
+
if (value === 'none' || value === 'ipop')
|
|
2173
|
+
return value;
|
|
2174
|
+
logger.warn('cmaEs', undefined, 'Unknown restartStrategy; falling back to "none".', [
|
|
2175
|
+
{ key: 'restartStrategy:', value: Number.NaN }
|
|
2176
|
+
]);
|
|
2177
|
+
return DEFAULT_RESTART_STRATEGY;
|
|
2178
|
+
}
|
|
2179
|
+
function computeDefaultPopulationSize(dimension) {
|
|
2180
|
+
// libcmaes default when lambda is unspecified or < 2:
|
|
2181
|
+
// lambda = 4 + floor(3 * log(dim))
|
|
2182
|
+
const value = 4 + Math.floor(3.0 * Math.log(dimension));
|
|
2183
|
+
return Math.max(MINIMUM_POPULATION_SIZE, value);
|
|
2184
|
+
}
|
|
2185
|
+
function computeLibcmaesDefaults(dimension, populationSize) {
|
|
2186
|
+
const parentCount = Math.floor(populationSize / 2.0);
|
|
2187
|
+
const weights = computeLibcmaesWeights(parentCount);
|
|
2188
|
+
const effectiveParentCount = computeEffectiveParentCount(weights);
|
|
2189
|
+
const csigma = computeLibcmaesCsigma(dimension, effectiveParentCount);
|
|
2190
|
+
const cc = computeLibcmaesCc(dimension, effectiveParentCount);
|
|
2191
|
+
const c1 = computeLibcmaesC1(dimension, effectiveParentCount);
|
|
2192
|
+
const cmu = computeLibcmaesCmu(dimension, effectiveParentCount, c1);
|
|
2193
|
+
const dsigma = computeLibcmaesDsigma(dimension, effectiveParentCount, csigma);
|
|
2194
|
+
return {
|
|
2195
|
+
populationSize,
|
|
2196
|
+
parentCount,
|
|
2197
|
+
weights,
|
|
2198
|
+
csigma,
|
|
2199
|
+
cc,
|
|
2200
|
+
c1,
|
|
2201
|
+
cmu,
|
|
2202
|
+
dsigma,
|
|
2203
|
+
psFactor: Math.sqrt(csigma * (2.0 - csigma) * effectiveParentCount),
|
|
2204
|
+
pcFactor: Math.sqrt(cc * (2.0 - cc) * effectiveParentCount),
|
|
2205
|
+
chiN: computeLibcmaesChiN(dimension)
|
|
2206
|
+
};
|
|
2207
|
+
}
|
|
2208
|
+
function computeLibcmaesWeights(parentCount) {
|
|
2209
|
+
const weights = new Float64Array(parentCount);
|
|
2210
|
+
let sum = 0.0;
|
|
2211
|
+
for (let index = 0; index < parentCount; index++) {
|
|
2212
|
+
const weight = Math.log(parentCount + 1) - Math.log(index + 1);
|
|
2213
|
+
weights[index] = weight;
|
|
2214
|
+
sum += weight;
|
|
2215
|
+
}
|
|
2216
|
+
for (let index = 0; index < parentCount; index++) {
|
|
2217
|
+
weights[index] /= sum;
|
|
2218
|
+
}
|
|
2219
|
+
return weights;
|
|
2220
|
+
}
|
|
2221
|
+
function computeEffectiveParentCount(weights) {
|
|
2222
|
+
let sum = 0.0;
|
|
2223
|
+
let sumSquared = 0.0;
|
|
2224
|
+
for (let index = 0; index < weights.length; index++) {
|
|
2225
|
+
sum += weights[index];
|
|
2226
|
+
sumSquared += weights[index] * weights[index];
|
|
2227
|
+
}
|
|
2228
|
+
return (sum * sum) / sumSquared;
|
|
2229
|
+
}
|
|
2230
|
+
function computeLibcmaesCsigma(dimension, effectiveParentCount) {
|
|
2231
|
+
if (dimension < LARGE_DIMENSION_THRESHOLD_FOR_CSIGMA) {
|
|
2232
|
+
return (effectiveParentCount + 2.0) / (dimension + effectiveParentCount + 5.0);
|
|
2233
|
+
}
|
|
2234
|
+
return (Math.sqrt(effectiveParentCount) + 2.0) / (Math.sqrt(dimension) + Math.sqrt(effectiveParentCount) + 3.0);
|
|
2235
|
+
}
|
|
2236
|
+
function computeLibcmaesCc(dimension, effectiveParentCount) {
|
|
2237
|
+
return (4.0 + effectiveParentCount / dimension) / (dimension + 4.0 + (2.0 * effectiveParentCount) / dimension);
|
|
2238
|
+
}
|
|
2239
|
+
function computeLibcmaesC1(dimension, effectiveParentCount) {
|
|
2240
|
+
return 2.0 / (Math.pow(dimension + 1.3, 2) + effectiveParentCount);
|
|
2241
|
+
}
|
|
2242
|
+
function computeLibcmaesCmu(dimension, effectiveParentCount, c1) {
|
|
2243
|
+
const cmuUnclamped = (2.0 * (effectiveParentCount - 2.0 + 1.0 / effectiveParentCount)) /
|
|
2244
|
+
(Math.pow(dimension + 2.0, 2) + effectiveParentCount);
|
|
2245
|
+
return Math.min(1.0 - c1, cmuUnclamped);
|
|
2246
|
+
}
|
|
2247
|
+
function computeLibcmaesDsigma(dimension, effectiveParentCount, csigma) {
|
|
2248
|
+
const term = Math.sqrt((effectiveParentCount - 1.0) / (dimension + 1.0)) - 1.0;
|
|
2249
|
+
return 1.0 + csigma + 2.0 * Math.max(0.0, term);
|
|
2250
|
+
}
|
|
2251
|
+
function computeLibcmaesChiN(dimension) {
|
|
2252
|
+
return (Math.sqrt(dimension) *
|
|
2253
|
+
(1.0 - 1.0 / (4.0 * dimension) + 1.0 / (21.0 * dimension * dimension)));
|
|
2254
|
+
}
|
|
2255
|
+
function computeInitialStepSize(initialStepSize, dimension, logger) {
|
|
2256
|
+
if (initialStepSize !== undefined && initialStepSize > 0.0)
|
|
2257
|
+
return initialStepSize;
|
|
2258
|
+
logger.warn('cmaEs', undefined, 'initialStepSize is missing or non-positive; falling back to 1/dim.', [
|
|
2259
|
+
{ key: 'dim:', value: dimension }
|
|
2260
|
+
]);
|
|
2261
|
+
return DEFAULT_STEP_SIZE_FALLBACK_SCALE / dimension;
|
|
2262
|
+
}
|
|
2263
|
+
function sanitizeCost(rawCost) {
|
|
2264
|
+
return Number.isFinite(rawCost) ? rawCost : Number.POSITIVE_INFINITY;
|
|
2265
|
+
}
|
|
2266
|
+
function createIdentityMatrix(dimension) {
|
|
2267
|
+
return mlMatrix.Matrix.eye(dimension, dimension);
|
|
2268
|
+
}
|
|
2269
|
+
function computeMaxDiagonalElement(matrix) {
|
|
2270
|
+
let maxValue = 0.0;
|
|
2271
|
+
for (let index = 0; index < matrix.rows; index++) {
|
|
2272
|
+
maxValue = Math.max(maxValue, matrix.get(index, index));
|
|
2273
|
+
}
|
|
2274
|
+
return maxValue;
|
|
2275
|
+
}
|
|
2276
|
+
function symmetrizeMatrixInPlace(matrix) {
|
|
2277
|
+
for (let rowIndex = 0; rowIndex < matrix.rows; rowIndex++) {
|
|
2278
|
+
for (let colIndex = rowIndex + 1; colIndex < matrix.columns; colIndex++) {
|
|
2279
|
+
const average = 0.5 * (matrix.get(rowIndex, colIndex) + matrix.get(colIndex, rowIndex));
|
|
2280
|
+
matrix.set(rowIndex, colIndex, average);
|
|
2281
|
+
matrix.set(colIndex, rowIndex, average);
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
function computeRegularizationLambda$1(base, attempt) {
|
|
2286
|
+
return base * Math.pow(REGULARIZATION_GROWTH_BASE, attempt);
|
|
2287
|
+
}
|
|
2288
|
+
function computeCholeskyLowerOrRegularize(covarianceMatrix, regularizationBase, logger) {
|
|
2289
|
+
for (let attempt = 0; attempt < MAX_REGULARIZATION_ATTEMPTS$1; attempt++) {
|
|
2290
|
+
const lambda = computeRegularizationLambda$1(regularizationBase, attempt);
|
|
2291
|
+
const regularized = covarianceMatrix.add(createIdentityMatrix(covarianceMatrix.rows).mul(lambda));
|
|
2292
|
+
try {
|
|
2293
|
+
const decomposition = new mlMatrix.CholeskyDecomposition(regularized);
|
|
2294
|
+
if (decomposition.isPositiveDefinite()) {
|
|
2295
|
+
if (attempt > 0) {
|
|
2296
|
+
logger.warn('cmaEs', undefined, 'Covariance not SPD; recovered via diagonal regularization.', [
|
|
2297
|
+
{ key: 'regularization:', value: lambda }
|
|
2298
|
+
]);
|
|
2299
|
+
}
|
|
2300
|
+
return decomposition.lowerTriangularMatrix;
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
catch {
|
|
2304
|
+
continue;
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
logger.warn('cmaEs', undefined, 'Covariance Cholesky failed; resetting covariance to identity.', []);
|
|
2308
|
+
return new mlMatrix.CholeskyDecomposition(createIdentityMatrix(covarianceMatrix.rows)).lowerTriangularMatrix;
|
|
2309
|
+
}
|
|
2310
|
+
function solveLowerTriangularSystem(lowerTriangular, rhs) {
|
|
2311
|
+
const dimension = rhs.length;
|
|
2312
|
+
const solution = new Float64Array(dimension);
|
|
2313
|
+
for (let rowIndex = 0; rowIndex < dimension; rowIndex++) {
|
|
2314
|
+
let sum = rhs[rowIndex];
|
|
2315
|
+
for (let colIndex = 0; colIndex < rowIndex; colIndex++) {
|
|
2316
|
+
sum -= lowerTriangular.get(rowIndex, colIndex) * solution[colIndex];
|
|
2317
|
+
}
|
|
2318
|
+
solution[rowIndex] = sum / lowerTriangular.get(rowIndex, rowIndex);
|
|
2319
|
+
}
|
|
2320
|
+
return solution;
|
|
2321
|
+
}
|
|
2322
|
+
function vectorNormSquared(vector) {
|
|
2323
|
+
let sum = 0.0;
|
|
2324
|
+
for (let index = 0; index < vector.length; index++) {
|
|
2325
|
+
sum += vector[index] * vector[index];
|
|
2326
|
+
}
|
|
2327
|
+
return sum;
|
|
2328
|
+
}
|
|
2329
|
+
function vectorNorm(vector) {
|
|
2330
|
+
return Math.sqrt(vectorNormSquared(vector));
|
|
2331
|
+
}
|
|
2332
|
+
function addScaledInPlace(target, source, scale) {
|
|
2333
|
+
for (let index = 0; index < target.length; index++) {
|
|
2334
|
+
target[index] += scale * source[index];
|
|
2335
|
+
}
|
|
2336
|
+
}
|
|
2337
|
+
function scaleInPlace(vector, scale) {
|
|
2338
|
+
for (let index = 0; index < vector.length; index++) {
|
|
2339
|
+
vector[index] *= scale;
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
2342
|
+
function subtractVectors(a, b) {
|
|
2343
|
+
const result = new Float64Array(a.length);
|
|
2344
|
+
for (let index = 0; index < a.length; index++) {
|
|
2345
|
+
result[index] = a[index] - b[index];
|
|
2346
|
+
}
|
|
2347
|
+
return result;
|
|
2348
|
+
}
|
|
2349
|
+
function computeWeightedMean(candidates, weights, parentCount) {
|
|
2350
|
+
const dimension = candidates[0].parameters.length;
|
|
2351
|
+
const mean = new Float64Array(dimension);
|
|
2352
|
+
for (let index = 0; index < parentCount; index++) {
|
|
2353
|
+
addScaledInPlace(mean, candidates[index].parameters, weights[index]);
|
|
2354
|
+
}
|
|
2355
|
+
return mean;
|
|
2356
|
+
}
|
|
2357
|
+
function computePcOuterProduct(pc) {
|
|
2358
|
+
const dimension = pc.length;
|
|
2359
|
+
const result = mlMatrix.Matrix.zeros(dimension, dimension);
|
|
2360
|
+
for (let rowIndex = 0; rowIndex < dimension; rowIndex++) {
|
|
2361
|
+
const vRow = pc[rowIndex];
|
|
2362
|
+
for (let colIndex = 0; colIndex < dimension; colIndex++) {
|
|
2363
|
+
result.set(rowIndex, colIndex, vRow * pc[colIndex]);
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
return result;
|
|
2367
|
+
}
|
|
2368
|
+
function addWeightedOuterProductInPlace(accumulator, vector, weight) {
|
|
2369
|
+
const dimension = vector.length;
|
|
2370
|
+
for (let rowIndex = 0; rowIndex < dimension; rowIndex++) {
|
|
2371
|
+
const vRow = vector[rowIndex];
|
|
2372
|
+
for (let colIndex = 0; colIndex < dimension; colIndex++) {
|
|
2373
|
+
accumulator.set(rowIndex, colIndex, accumulator.get(rowIndex, colIndex) + weight * vRow * vector[colIndex]);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
}
|
|
2377
|
+
function computeDefaultMaxHistorySize(dimension, populationSize) {
|
|
2378
|
+
const base = 10;
|
|
2379
|
+
const scale = 30;
|
|
2380
|
+
return base + Math.ceil((scale * dimension) / populationSize);
|
|
2381
|
+
}
|
|
2382
|
+
function checkStopMaxFevals(functionEvaluations, maxFunctionEvaluations) {
|
|
2383
|
+
if (maxFunctionEvaluations !== undefined && functionEvaluations >= maxFunctionEvaluations) {
|
|
2384
|
+
return { shouldStop: true, converged: false, reason: 'MAXFEVALS' };
|
|
2385
|
+
}
|
|
2386
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2387
|
+
}
|
|
2388
|
+
function checkStopFtarget(bestCost, targetCost) {
|
|
2389
|
+
if (targetCost !== undefined && bestCost <= targetCost) {
|
|
2390
|
+
return { shouldStop: true, converged: true, reason: 'FTARGET' };
|
|
2391
|
+
}
|
|
2392
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2393
|
+
}
|
|
2394
|
+
function checkStopMaxIter(iteration, maxIterations) {
|
|
2395
|
+
if (iteration >= maxIterations) {
|
|
2396
|
+
return { shouldStop: true, converged: false, reason: 'MAXITER' };
|
|
2397
|
+
}
|
|
2398
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2399
|
+
}
|
|
2400
|
+
function checkStopTolHistFun(bestCostHistory, maxHistorySize, functionTolerance) {
|
|
2401
|
+
if (bestCostHistory.length < maxHistorySize) {
|
|
2402
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2403
|
+
}
|
|
2404
|
+
let recentMin = Number.POSITIVE_INFINITY;
|
|
2405
|
+
let recentMax = Number.NEGATIVE_INFINITY;
|
|
2406
|
+
for (let index = bestCostHistory.length - maxHistorySize; index < bestCostHistory.length; index++) {
|
|
2407
|
+
recentMin = Math.min(recentMin, bestCostHistory[index]);
|
|
2408
|
+
recentMax = Math.max(recentMax, bestCostHistory[index]);
|
|
2409
|
+
}
|
|
2410
|
+
const range = Math.abs(recentMax - recentMin);
|
|
2411
|
+
if (range < functionTolerance) {
|
|
2412
|
+
return { shouldStop: true, converged: true, reason: 'TOLHISTFUN' };
|
|
2413
|
+
}
|
|
2414
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2415
|
+
}
|
|
2416
|
+
function checkStopTolX(args) {
|
|
2417
|
+
if (args.iteration <= 0)
|
|
2418
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2419
|
+
const factor = args.sigma / args.sigmaInit;
|
|
2420
|
+
const thresholdFactor = args.parameterTolerance * factor;
|
|
2421
|
+
for (let index = 0; index < args.pc.length; index++) {
|
|
2422
|
+
if (args.pc[index] >= thresholdFactor) {
|
|
2423
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
for (let index = 0; index < args.covariance.rows; index++) {
|
|
2427
|
+
const diagonalStd = Math.sqrt(args.covariance.get(index, index));
|
|
2428
|
+
if (diagonalStd >= thresholdFactor) {
|
|
2429
|
+
return { shouldStop: false, converged: false, reason: 'CONT' };
|
|
2430
|
+
}
|
|
2431
|
+
}
|
|
2432
|
+
return { shouldStop: true, converged: true, reason: 'TOLX' };
|
|
2433
|
+
}
|
|
2434
|
+
function checkLibcmaesStopCriteria(args) {
|
|
2435
|
+
const maxFevals = checkStopMaxFevals(args.functionEvaluations, args.maxFunctionEvaluations);
|
|
2436
|
+
if (maxFevals.shouldStop)
|
|
2437
|
+
return maxFevals;
|
|
2438
|
+
const ftarget = checkStopFtarget(args.bestCost, args.targetCost);
|
|
2439
|
+
if (ftarget.shouldStop)
|
|
2440
|
+
return ftarget;
|
|
2441
|
+
const maxIter = checkStopMaxIter(args.iteration, args.maxIterations);
|
|
2442
|
+
if (maxIter.shouldStop)
|
|
2443
|
+
return maxIter;
|
|
2444
|
+
const tolHistFun = checkStopTolHistFun(args.bestCostHistory, args.maxHistorySize, args.functionTolerance);
|
|
2445
|
+
if (tolHistFun.shouldStop)
|
|
2446
|
+
return tolHistFun;
|
|
2447
|
+
return checkStopTolX({
|
|
2448
|
+
iteration: args.iteration,
|
|
2449
|
+
sigma: args.sigma,
|
|
2450
|
+
sigmaInit: args.sigmaInit,
|
|
2451
|
+
parameterTolerance: args.parameterTolerance,
|
|
2452
|
+
pc: args.pc,
|
|
2453
|
+
covariance: args.covariance
|
|
2454
|
+
});
|
|
2455
|
+
}
|
|
2456
|
+
function computeHsigThreshold(iteration, csigma, chiN, dimension) {
|
|
2457
|
+
const decay = Math.pow(1.0 - csigma, H_SIGMA_POWER_FACTOR * (iteration + 1));
|
|
2458
|
+
const normalization = Math.sqrt(1.0 - decay);
|
|
2459
|
+
const dimensionFactor = H_SIGMA_BASE + H_SIGMA_DIMENSION_FACTOR_NUMERATOR / (dimension + 1.0);
|
|
2460
|
+
return normalization * dimensionFactor * chiN;
|
|
2461
|
+
}
|
|
2462
|
+
function sampleCandidate(mean, sigma, lowerTriangular, nextStandardNormal) {
|
|
2463
|
+
const dimension = mean.length;
|
|
2464
|
+
const z = new Float64Array(dimension);
|
|
2465
|
+
for (let index = 0; index < dimension; index++)
|
|
2466
|
+
z[index] = nextStandardNormal();
|
|
2467
|
+
const y = new Float64Array(dimension);
|
|
2468
|
+
for (let rowIndex = 0; rowIndex < dimension; rowIndex++) {
|
|
2469
|
+
let sum = 0.0;
|
|
2470
|
+
for (let colIndex = 0; colIndex <= rowIndex; colIndex++) {
|
|
2471
|
+
sum += lowerTriangular.get(rowIndex, colIndex) * z[colIndex];
|
|
2472
|
+
}
|
|
2473
|
+
y[rowIndex] = sum;
|
|
2474
|
+
}
|
|
2475
|
+
const parameters = new Float64Array(dimension);
|
|
2476
|
+
for (let index = 0; index < dimension; index++)
|
|
2477
|
+
parameters[index] = mean[index] + sigma * y[index];
|
|
2478
|
+
return { parameters, normalizedStep: y };
|
|
2479
|
+
}
|
|
2480
|
+
function initializeState(initialParameters, sigmaInit, costFunction, counters, profiling) {
|
|
2481
|
+
const mean = new Float64Array(initialParameters);
|
|
2482
|
+
const costStart = profiling ? nowMs() : 0;
|
|
2483
|
+
const bestCost = sanitizeCost(costFunction(mean));
|
|
2484
|
+
if (profiling)
|
|
2485
|
+
profiling.costMs += nowMs() - costStart;
|
|
2486
|
+
counters.functionEvaluations += 1;
|
|
2487
|
+
return {
|
|
2488
|
+
mean,
|
|
2489
|
+
covariance: createIdentityMatrix(initialParameters.length),
|
|
2490
|
+
psigma: new Float64Array(initialParameters.length),
|
|
2491
|
+
pc: new Float64Array(initialParameters.length),
|
|
2492
|
+
sigma: sigmaInit,
|
|
2493
|
+
sigmaInit,
|
|
2494
|
+
bestCost,
|
|
2495
|
+
bestParameters: new Float64Array(mean),
|
|
2496
|
+
bestCostHistory: [bestCost]
|
|
2497
|
+
};
|
|
2498
|
+
}
|
|
2499
|
+
function updateBestIfImproved(state, bestCandidate) {
|
|
2500
|
+
if (bestCandidate.cost >= state.bestCost)
|
|
2501
|
+
return;
|
|
2502
|
+
state.bestCost = bestCandidate.cost;
|
|
2503
|
+
state.bestParameters = new Float64Array(bestCandidate.parameters);
|
|
2504
|
+
}
|
|
2505
|
+
function pushBestCostHistory(state, bestCost, maxHistorySize) {
|
|
2506
|
+
state.bestCostHistory.push(bestCost);
|
|
2507
|
+
while (state.bestCostHistory.length > maxHistorySize)
|
|
2508
|
+
state.bestCostHistory.shift();
|
|
2509
|
+
}
|
|
2510
|
+
function buildResult(state, defaults, iterations, converged, stopReason, functionEvaluations, profiling) {
|
|
2511
|
+
const finalMaxStdDev = state.sigma * Math.sqrt(Math.max(0.0, computeMaxDiagonalElement(state.covariance)));
|
|
2512
|
+
return {
|
|
2513
|
+
finalParameters: state.bestParameters,
|
|
2514
|
+
parameters: state.bestParameters,
|
|
2515
|
+
iterations,
|
|
2516
|
+
converged,
|
|
2517
|
+
finalCost: state.bestCost,
|
|
2518
|
+
populationSize: defaults.populationSize,
|
|
2519
|
+
functionEvaluations: functionEvaluations ?? 0,
|
|
2520
|
+
finalStepSize: state.sigma,
|
|
2521
|
+
finalMaxStdDev,
|
|
2522
|
+
stopReason,
|
|
2523
|
+
profiling
|
|
2524
|
+
};
|
|
2525
|
+
}
|
|
2526
|
+
function runOneGeneration(context, state) {
|
|
2527
|
+
const choleskyStart = context.profiling ? nowMs() : 0;
|
|
2528
|
+
const lowerTriangular = computeCholeskyLowerOrRegularize(state.covariance, context.covarianceRegularizationBase, context.logger);
|
|
2529
|
+
if (context.profiling) {
|
|
2530
|
+
context.profiling.choleskyMs += nowMs() - choleskyStart;
|
|
2531
|
+
}
|
|
2532
|
+
const candidates = [];
|
|
2533
|
+
for (let sampleIndex = 0; sampleIndex < context.defaults.populationSize; sampleIndex++) {
|
|
2534
|
+
const sampleStart = context.profiling ? nowMs() : 0;
|
|
2535
|
+
const sampled = sampleCandidate(state.mean, state.sigma, lowerTriangular, context.nextStandardNormal);
|
|
2536
|
+
if (context.profiling) {
|
|
2537
|
+
context.profiling.samplingMs += nowMs() - sampleStart;
|
|
2538
|
+
}
|
|
2539
|
+
const costStart = context.profiling ? nowMs() : 0;
|
|
2540
|
+
const cost = sanitizeCost(context.costFunction(sampled.parameters));
|
|
2541
|
+
if (context.profiling) {
|
|
2542
|
+
context.profiling.costMs += nowMs() - costStart;
|
|
2543
|
+
}
|
|
2544
|
+
context.counters.functionEvaluations += 1;
|
|
2545
|
+
candidates.push({ parameters: sampled.parameters, normalizedStep: sampled.normalizedStep, cost });
|
|
2546
|
+
if (context.maxFunctionEvaluations !== undefined &&
|
|
2547
|
+
context.counters.functionEvaluations >= context.maxFunctionEvaluations) {
|
|
2548
|
+
break;
|
|
2549
|
+
}
|
|
2550
|
+
}
|
|
2551
|
+
candidates.sort((a, b) => a.cost - b.cost);
|
|
2552
|
+
return { candidates, lowerTriangular };
|
|
2553
|
+
}
|
|
2554
|
+
function updateDistributionParameters(context, state, candidates, lowerTriangular, iteration) {
|
|
2555
|
+
const updateStart = context.profiling ? nowMs() : 0;
|
|
2556
|
+
const parentCount = Math.min(context.defaults.parentCount, candidates.length);
|
|
2557
|
+
const xmean = computeWeightedMean(candidates, context.defaults.weights, parentCount);
|
|
2558
|
+
const diffxmean = subtractVectors(xmean, state.mean);
|
|
2559
|
+
scaleInPlace(diffxmean, 1.0 / state.sigma);
|
|
2560
|
+
scaleInPlace(state.psigma, 1.0 - context.defaults.csigma);
|
|
2561
|
+
const csqinvDiff = solveLowerTriangularSystem(lowerTriangular, diffxmean);
|
|
2562
|
+
addScaledInPlace(state.psigma, csqinvDiff, context.defaults.psFactor);
|
|
2563
|
+
const normPs = vectorNorm(state.psigma);
|
|
2564
|
+
const hsigThreshold = computeHsigThreshold(iteration, context.defaults.csigma, context.defaults.chiN, context.dimension);
|
|
2565
|
+
const hsig = normPs < hsigThreshold ? 1.0 : 0.0;
|
|
2566
|
+
scaleInPlace(state.pc, 1.0 - context.defaults.cc);
|
|
2567
|
+
addScaledInPlace(state.pc, diffxmean, hsig * context.defaults.pcFactor);
|
|
2568
|
+
const spc = computePcOuterProduct(state.pc);
|
|
2569
|
+
const wdiff = mlMatrix.Matrix.zeros(context.dimension, context.dimension);
|
|
2570
|
+
for (let index = 0; index < parentCount; index++) {
|
|
2571
|
+
addWeightedOuterProductInPlace(wdiff, candidates[index].normalizedStep, context.defaults.weights[index]);
|
|
2572
|
+
}
|
|
2573
|
+
const covarianceScale = 1.0 -
|
|
2574
|
+
context.defaults.c1 -
|
|
2575
|
+
context.defaults.cmu +
|
|
2576
|
+
(1.0 - hsig) * context.defaults.c1 * context.defaults.cc * (2.0 - context.defaults.cc);
|
|
2577
|
+
state.covariance = state.covariance.mul(covarianceScale).add(spc.mul(context.defaults.c1)).add(wdiff.mul(context.defaults.cmu));
|
|
2578
|
+
symmetrizeMatrixInPlace(state.covariance);
|
|
2579
|
+
const sigmaExponent = (context.defaults.csigma / context.defaults.dsigma) * (normPs / context.defaults.chiN - 1.0);
|
|
2580
|
+
state.sigma *= Math.exp(sigmaExponent);
|
|
2581
|
+
state.mean = xmean;
|
|
2582
|
+
if (context.profiling) {
|
|
2583
|
+
context.profiling.updateMs += nowMs() - updateStart;
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
function runSingleCmaEs(context, state) {
|
|
2587
|
+
const initialStop = checkLibcmaesStopCriteria({
|
|
2588
|
+
iteration: context.counters.iterations,
|
|
2589
|
+
maxIterations: context.maxIterations,
|
|
2590
|
+
functionEvaluations: context.counters.functionEvaluations,
|
|
2591
|
+
maxFunctionEvaluations: context.maxFunctionEvaluations,
|
|
2592
|
+
bestCost: state.bestCost,
|
|
2593
|
+
targetCost: context.targetCost,
|
|
2594
|
+
bestCostHistory: state.bestCostHistory,
|
|
2595
|
+
maxHistorySize: context.maxHistorySize,
|
|
2596
|
+
functionTolerance: context.functionTolerance,
|
|
2597
|
+
sigma: state.sigma,
|
|
2598
|
+
sigmaInit: state.sigmaInit,
|
|
2599
|
+
parameterTolerance: context.parameterTolerance,
|
|
2600
|
+
pc: state.pc,
|
|
2601
|
+
covariance: state.covariance
|
|
2602
|
+
});
|
|
2603
|
+
if (initialStop.shouldStop) {
|
|
2604
|
+
return { stop: initialStop, state };
|
|
2605
|
+
}
|
|
2606
|
+
while (true) {
|
|
2607
|
+
const { candidates, lowerTriangular } = runOneGeneration(context, state);
|
|
2608
|
+
if (candidates.length === 0) {
|
|
2609
|
+
const budgetStop = checkStopMaxFevals(context.counters.functionEvaluations, context.maxFunctionEvaluations);
|
|
2610
|
+
return { stop: budgetStop, state };
|
|
2611
|
+
}
|
|
2612
|
+
updateBestIfImproved(state, candidates[0]);
|
|
2613
|
+
pushBestCostHistory(state, state.bestCost, context.maxHistorySize);
|
|
2614
|
+
if (context.onIteration) {
|
|
2615
|
+
context.onIteration(context.counters.iterations, state.bestCost, state.bestParameters);
|
|
2616
|
+
}
|
|
2617
|
+
const stop = checkLibcmaesStopCriteria({
|
|
2618
|
+
iteration: context.counters.iterations + 1,
|
|
2619
|
+
maxIterations: context.maxIterations,
|
|
2620
|
+
functionEvaluations: context.counters.functionEvaluations,
|
|
2621
|
+
maxFunctionEvaluations: context.maxFunctionEvaluations,
|
|
2622
|
+
bestCost: state.bestCost,
|
|
2623
|
+
targetCost: context.targetCost,
|
|
2624
|
+
bestCostHistory: state.bestCostHistory,
|
|
2625
|
+
maxHistorySize: context.maxHistorySize,
|
|
2626
|
+
functionTolerance: context.functionTolerance,
|
|
2627
|
+
sigma: state.sigma,
|
|
2628
|
+
sigmaInit: state.sigmaInit,
|
|
2629
|
+
parameterTolerance: context.parameterTolerance,
|
|
2630
|
+
pc: state.pc,
|
|
2631
|
+
covariance: state.covariance
|
|
2632
|
+
});
|
|
2633
|
+
if (stop.shouldStop) {
|
|
2634
|
+
return { stop, state };
|
|
2635
|
+
}
|
|
2636
|
+
updateDistributionParameters(context, state, candidates, lowerTriangular, context.counters.iterations);
|
|
2637
|
+
context.counters.iterations += 1;
|
|
2638
|
+
context.logger.debug('cmaEs', context.counters.iterations, 'Progress', [
|
|
2639
|
+
{ key: 'bestCost:', value: state.bestCost },
|
|
2640
|
+
{ key: 'sigma:', value: state.sigma },
|
|
2641
|
+
{ key: 'fevals:', value: context.counters.functionEvaluations }
|
|
2642
|
+
]);
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
function cmaEs(initialParameters, costFunction, options = {}) {
|
|
2646
|
+
const dimension = initialParameters.length;
|
|
2647
|
+
assertValidDimension(dimension);
|
|
2648
|
+
const logger = new Logger(options.logLevel, options.verbose);
|
|
2649
|
+
const restartStrategy = normalizeRestartStrategy(options.restartStrategy, logger);
|
|
2650
|
+
const maxRestarts = normalizeMaxRestarts(options.maxRestarts, logger);
|
|
2651
|
+
const maxIterations = normalizeMaxIterations(options.maxIterations, logger);
|
|
2652
|
+
const maxFunctionEvaluations = normalizeMaxFunctionEvaluations(options.maxFunctionEvaluations, logger);
|
|
2653
|
+
const functionTolerance = Math.max(options.functionTolerance ?? DEFAULT_FUNCTION_TOLERANCE, MINIMUM_FUNCTION_TOLERANCE);
|
|
2654
|
+
const parameterTolerance = Math.max(options.parameterTolerance ?? DEFAULT_PARAMETER_TOLERANCE, MINIMUM_PARAMETER_TOLERANCE);
|
|
2655
|
+
const covarianceRegularizationBase = options.covarianceRegularization ?? DEFAULT_COVARIANCE_REGULARIZATION;
|
|
2656
|
+
const sigmaInit = computeInitialStepSize(options.initialStepSize, dimension, logger);
|
|
2657
|
+
const targetCost = options.targetCost;
|
|
2658
|
+
const onIteration = options.onIteration;
|
|
2659
|
+
const seededRandom = createSeededRandom(options.randomSeed);
|
|
2660
|
+
const counters = { iterations: 0, functionEvaluations: 0 };
|
|
2661
|
+
const profiling = options.profiling
|
|
2662
|
+
? { totalMs: 0, costMs: 0, choleskyMs: 0, samplingMs: 0, updateMs: 0 }
|
|
2663
|
+
: undefined;
|
|
2664
|
+
const totalStart = profiling ? nowMs() : 0;
|
|
2665
|
+
let populationSize = normalizePopulationSize(dimension, options.populationSize, logger);
|
|
2666
|
+
let defaults = computeLibcmaesDefaults(dimension, populationSize);
|
|
2667
|
+
let maxHistorySize = options.maxHistorySize && options.maxHistorySize > 0
|
|
2668
|
+
? options.maxHistorySize
|
|
2669
|
+
: computeDefaultMaxHistorySize(dimension, defaults.populationSize);
|
|
2670
|
+
logger.info('cmaEs', 0, 'Starting', [
|
|
2671
|
+
{ key: 'dim:', value: dimension },
|
|
2672
|
+
{ key: 'lambda:', value: defaults.populationSize },
|
|
2673
|
+
{ key: 'mu:', value: defaults.parentCount },
|
|
2674
|
+
{ key: 'sigma0:', value: sigmaInit }
|
|
2675
|
+
]);
|
|
2676
|
+
let globalBestCost = Number.POSITIVE_INFINITY;
|
|
2677
|
+
let globalBestParameters = new Float64Array(initialParameters);
|
|
2678
|
+
let globalStopReason;
|
|
2679
|
+
let globalConverged = false;
|
|
2680
|
+
let globalState = null;
|
|
2681
|
+
const totalRuns = restartStrategy === 'ipop' ? maxRestarts + 1 : 1;
|
|
2682
|
+
for (let runIndex = 0; runIndex < totalRuns; runIndex++) {
|
|
2683
|
+
defaults = computeLibcmaesDefaults(dimension, populationSize);
|
|
2684
|
+
maxHistorySize =
|
|
2685
|
+
options.maxHistorySize && options.maxHistorySize > 0
|
|
2686
|
+
? options.maxHistorySize
|
|
2687
|
+
: computeDefaultMaxHistorySize(dimension, defaults.populationSize);
|
|
2688
|
+
const state = initializeState(initialParameters, sigmaInit, costFunction, counters, profiling);
|
|
2689
|
+
const context = {
|
|
2690
|
+
dimension,
|
|
2691
|
+
defaults,
|
|
2692
|
+
maxHistorySize,
|
|
2693
|
+
functionTolerance,
|
|
2694
|
+
parameterTolerance,
|
|
2695
|
+
covarianceRegularizationBase,
|
|
2696
|
+
maxIterations,
|
|
2697
|
+
maxFunctionEvaluations,
|
|
2698
|
+
targetCost,
|
|
2699
|
+
costFunction,
|
|
2700
|
+
logger,
|
|
2701
|
+
nextStandardNormal: seededRandom.nextStandardNormal,
|
|
2702
|
+
onIteration,
|
|
2703
|
+
counters,
|
|
2704
|
+
profiling
|
|
2705
|
+
};
|
|
2706
|
+
const runResult = runSingleCmaEs(context, state);
|
|
2707
|
+
globalState = runResult.state;
|
|
2708
|
+
if (globalState.bestCost < globalBestCost) {
|
|
2709
|
+
globalBestCost = globalState.bestCost;
|
|
2710
|
+
globalBestParameters = new Float64Array(globalState.bestParameters);
|
|
2711
|
+
}
|
|
2712
|
+
if (runResult.stop.reason === 'FTARGET') {
|
|
2713
|
+
globalStopReason = 'FTARGET';
|
|
2714
|
+
globalConverged = true;
|
|
2715
|
+
break;
|
|
2716
|
+
}
|
|
2717
|
+
if (runResult.stop.reason === 'MAXFEVALS' || runResult.stop.reason === 'MAXITER') {
|
|
2718
|
+
globalStopReason = runResult.stop.reason;
|
|
2719
|
+
globalConverged = false;
|
|
2720
|
+
break;
|
|
2721
|
+
}
|
|
2722
|
+
if (restartStrategy !== 'ipop') {
|
|
2723
|
+
globalStopReason = runResult.stop.reason === 'CONT' ? undefined : runResult.stop.reason;
|
|
2724
|
+
globalConverged = runResult.stop.converged;
|
|
2725
|
+
break;
|
|
2726
|
+
}
|
|
2727
|
+
if (runIndex >= maxRestarts) {
|
|
2728
|
+
globalStopReason = 'IPOP_MAX_RESTARTS';
|
|
2729
|
+
globalConverged = false;
|
|
2730
|
+
break;
|
|
2731
|
+
}
|
|
2732
|
+
populationSize *= IPOPN_LAMBDA_MULTIPLIER;
|
|
2733
|
+
}
|
|
2734
|
+
if (!globalState) {
|
|
2735
|
+
const fallbackDefaults = computeLibcmaesDefaults(dimension, populationSize);
|
|
2736
|
+
const fallbackState = initializeState(initialParameters, sigmaInit, costFunction, counters, profiling);
|
|
2737
|
+
if (profiling)
|
|
2738
|
+
profiling.totalMs = nowMs() - totalStart;
|
|
2739
|
+
return buildResult(fallbackState, fallbackDefaults, counters.iterations, false, globalStopReason, counters.functionEvaluations, profiling);
|
|
2740
|
+
}
|
|
2741
|
+
globalState.bestCost = globalBestCost;
|
|
2742
|
+
globalState.bestParameters = globalBestParameters;
|
|
2743
|
+
if (profiling)
|
|
2744
|
+
profiling.totalMs = nowMs() - totalStart;
|
|
2745
|
+
return buildResult(globalState, defaults, counters.iterations, globalConverged, globalStopReason, counters.functionEvaluations, profiling);
|
|
2746
|
+
}
|
|
2747
|
+
|
|
1536
2748
|
/**
|
|
1537
2749
|
* This file provides shared utility functions for constrained optimization algorithms
|
|
1538
2750
|
* using the adjoint method.
|
|
@@ -1846,7 +3058,7 @@ function projectStatesToConstraints(parameters, states, constraintFunction, step
|
|
|
1846
3058
|
let projectedStates = new Float64Array(states);
|
|
1847
3059
|
for (let i = 0; i < maxIterations; i++) {
|
|
1848
3060
|
const constraint = constraintFunction(parameters, projectedStates);
|
|
1849
|
-
const constraintNorm = vectorNorm(constraint);
|
|
3061
|
+
const constraintNorm = vectorNorm$1(constraint);
|
|
1850
3062
|
if (constraintNorm <= constraintTolerance) {
|
|
1851
3063
|
break;
|
|
1852
3064
|
}
|
|
@@ -1869,8 +3081,11 @@ function projectStatesToConstraints(parameters, states, constraintFunction, step
|
|
|
1869
3081
|
* Validates initial conditions including constraint satisfaction and dimensions.
|
|
1870
3082
|
*
|
|
1871
3083
|
* Checks that:
|
|
1872
|
-
* 1.
|
|
1873
|
-
*
|
|
3084
|
+
* 1. Initial constraint violation is within tolerance (warns if not)
|
|
3085
|
+
*
|
|
3086
|
+
* Note:
|
|
3087
|
+
* - Constraint count and state count do not need to match.
|
|
3088
|
+
* - Both square and non-square constraint Jacobians are supported by the shared solvers.
|
|
1874
3089
|
*
|
|
1875
3090
|
* @param initialParameters - Initial parameter vector p0
|
|
1876
3091
|
* @param initialStates - Initial state vector x0
|
|
@@ -1878,11 +3093,10 @@ function projectStatesToConstraints(parameters, states, constraintFunction, step
|
|
|
1878
3093
|
* @param constraintTolerance - Tolerance for constraint violation
|
|
1879
3094
|
* @param logger - Logger instance for warnings
|
|
1880
3095
|
* @param algorithmName - Name of calling algorithm (for error messages)
|
|
1881
|
-
* @throws Error if constraint count != state count
|
|
1882
3096
|
*/
|
|
1883
3097
|
function validateInitialConditions$1(initialParameters, initialStates, constraintFunction, constraintTolerance, logger, algorithmName = 'constrainedOptimization') {
|
|
1884
3098
|
const initialConstraint = constraintFunction(initialParameters, initialStates);
|
|
1885
|
-
const initialConstraintNorm = vectorNorm(initialConstraint);
|
|
3099
|
+
const initialConstraintNorm = vectorNorm$1(initialConstraint);
|
|
1886
3100
|
if (initialConstraintNorm > constraintTolerance) {
|
|
1887
3101
|
logger.warn(algorithmName, undefined, 'Initial constraint violation', [
|
|
1888
3102
|
{ key: '||c(p0,x0)||:', value: initialConstraintNorm },
|
|
@@ -1967,7 +3181,7 @@ function isResidualFunction(costFunction, parameters, states) {
|
|
|
1967
3181
|
function computeCost(costFunction, parameters, states) {
|
|
1968
3182
|
if (isResidualFunction(costFunction, parameters, states)) {
|
|
1969
3183
|
const residual = costFunction(parameters, states);
|
|
1970
|
-
const residualNorm = vectorNorm(residual);
|
|
3184
|
+
const residualNorm = vectorNorm$1(residual);
|
|
1971
3185
|
return RESIDUAL_COST_COEFFICIENT * residualNorm * residualNorm;
|
|
1972
3186
|
}
|
|
1973
3187
|
return costFunction(parameters, states);
|
|
@@ -2231,7 +3445,7 @@ function computeAdjointGradient(dfdp, lambda, dcdp) {
|
|
|
2231
3445
|
lambdaTdcdpVector[i] = lambdaTdcdp.get(0, i);
|
|
2232
3446
|
}
|
|
2233
3447
|
// df/dp = ∂f/∂p - λ^T ∂c/∂p
|
|
2234
|
-
return subtractVectors(dfdp, lambdaTdcdpVector);
|
|
3448
|
+
return subtractVectors$1(dfdp, lambdaTdcdpVector);
|
|
2235
3449
|
}
|
|
2236
3450
|
/**
|
|
2237
3451
|
* Updates states using linear approximation: x_new = x_old + dx
|
|
@@ -2265,7 +3479,7 @@ function createCostFunctionWrapper(currentParameters, currentStates, costFunctio
|
|
|
2265
3479
|
// Update states during line search to maintain constraint satisfaction.
|
|
2266
3480
|
// We use linear approximation (x_new = x_old + dx) where dx solves (∂c/∂x) dx = -∂c/∂p · Δp
|
|
2267
3481
|
// because solving the full nonlinear constraint system for each line search step would be too expensive.
|
|
2268
|
-
const deltaP = subtractVectors(params, currentParameters);
|
|
3482
|
+
const deltaP = subtractVectors$1(params, currentParameters);
|
|
2269
3483
|
const newStates = updateStates(currentStates, dcdx, dcdp, deltaP, logger);
|
|
2270
3484
|
return computeCost(costFunction, params, newStates);
|
|
2271
3485
|
};
|
|
@@ -2302,7 +3516,7 @@ function createGradientFunctionWrapper(currentParameters, currentStates, current
|
|
|
2302
3516
|
}
|
|
2303
3517
|
// For different trial parameters, update states to maintain constraints and compute gradient.
|
|
2304
3518
|
// We use linear approximation for efficiency: solving full nonlinear constraints for each trial would be too slow.
|
|
2305
|
-
const deltaP = subtractVectors(trialParams, currentParameters);
|
|
3519
|
+
const deltaP = subtractVectors$1(trialParams, currentParameters);
|
|
2306
3520
|
const trialStates = updateStates(currentStates, currentDcdx, currentDcdp, deltaP, logger);
|
|
2307
3521
|
// Compute gradient at trial point to evaluate search direction quality in line search.
|
|
2308
3522
|
const trialPartials = computePartialDerivatives(trialParams, trialStates, costFunction, constraintFunction, options);
|
|
@@ -2330,7 +3544,7 @@ function determineStepSize(currentGradient, currentParameters, currentStates, co
|
|
|
2330
3544
|
*/
|
|
2331
3545
|
function checkConstraintViolation$2(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger) {
|
|
2332
3546
|
const constraint = constraintFunction(currentParameters, currentStates);
|
|
2333
|
-
const constraintNorm = vectorNorm(constraint);
|
|
3547
|
+
const constraintNorm = vectorNorm$1(constraint);
|
|
2334
3548
|
if (constraintNorm > constraintTolerance) {
|
|
2335
3549
|
logger.warn('adjointGradientDescent', iteration, 'Constraint violation detected', [
|
|
2336
3550
|
{ key: '||c(p,x)||:', value: constraintNorm },
|
|
@@ -2347,14 +3561,14 @@ function computeAdjointGradientAndNorm(currentParameters, currentStates, costFun
|
|
|
2347
3561
|
const partials = computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
2348
3562
|
const lambda = solveAdjointEquation(partials.dcdx, partials.dfdx, logger);
|
|
2349
3563
|
const adjointGradient = computeAdjointGradient(partials.dfdp, lambda, partials.dcdp);
|
|
2350
|
-
const gradientNorm = vectorNorm(adjointGradient);
|
|
3564
|
+
const gradientNorm = vectorNorm$1(adjointGradient);
|
|
2351
3565
|
return { adjointGradient, gradientNorm, partials };
|
|
2352
3566
|
}
|
|
2353
3567
|
/**
|
|
2354
3568
|
* Checks gradient convergence and returns result if converged.
|
|
2355
3569
|
*/
|
|
2356
3570
|
function checkGradientConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, constraintNorm, constraintTolerance, tolerance, usedLineSearchFlag, logger) {
|
|
2357
|
-
if (constraintNorm <= constraintTolerance && checkGradientConvergence(gradientNorm, tolerance
|
|
3571
|
+
if (constraintNorm <= constraintTolerance && checkGradientConvergence(gradientNorm, tolerance)) {
|
|
2358
3572
|
logger.info('adjointGradientDescent', iteration, 'Converged', [
|
|
2359
3573
|
{ key: 'Cost:', value: currentCost },
|
|
2360
3574
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -2403,7 +3617,7 @@ function updateParametersAndStates(currentParameters, currentStates, adjointGrad
|
|
|
2403
3617
|
const negativeStepSize = NEGATIVE_GRADIENT_DIRECTION * stepSize;
|
|
2404
3618
|
const step = scaleVector(adjointGradient, negativeStepSize);
|
|
2405
3619
|
const newParameters = addVectors(currentParameters, step);
|
|
2406
|
-
const deltaP = subtractVectors(newParameters, currentParameters);
|
|
3620
|
+
const deltaP = subtractVectors$1(newParameters, currentParameters);
|
|
2407
3621
|
const newStates = updateStates(currentStates, partials.dcdx, partials.dcdp, deltaP, logger);
|
|
2408
3622
|
const newCost = computeCost(costFunction, newParameters, newStates);
|
|
2409
3623
|
return { newParameters, newStates, newCost };
|
|
@@ -2412,7 +3626,7 @@ function updateParametersAndStates(currentParameters, currentStates, adjointGrad
|
|
|
2412
3626
|
* Checks step size convergence and returns result if converged.
|
|
2413
3627
|
*/
|
|
2414
3628
|
function checkStepSizeConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, stepNorm, constraintNorm, constraintTolerance, tolerance, newUsedLineSearch, logger) {
|
|
2415
|
-
if (constraintNorm <= constraintTolerance && checkStepSizeConvergence(stepNorm, tolerance
|
|
3629
|
+
if (constraintNorm <= constraintTolerance && checkStepSizeConvergence(stepNorm, tolerance)) {
|
|
2416
3630
|
logger.info('adjointGradientDescent', iteration, 'Converged', [
|
|
2417
3631
|
{ key: 'Cost:', value: currentCost },
|
|
2418
3632
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -2518,8 +3732,8 @@ function handleStepSizeAndUpdate(adjointGradient, currentParameters, currentStat
|
|
|
2518
3732
|
*/
|
|
2519
3733
|
function checkStepSizeConvergenceAndLog(currentParameters, currentStates, constraint, currentCost, gradientNorm, stepSize, constraintNorm, iteration, constraintTolerance, tolerance, newUsedLineSearch, newParameters, logger) {
|
|
2520
3734
|
// Check step size convergence: if step is too small, optimization has likely converged or stalled.
|
|
2521
|
-
const step = subtractVectors(newParameters, currentParameters);
|
|
2522
|
-
const stepNorm = vectorNorm(step);
|
|
3735
|
+
const step = subtractVectors$1(newParameters, currentParameters);
|
|
3736
|
+
const stepNorm = vectorNorm$1(step);
|
|
2523
3737
|
const stepSizeConvergenceResult = checkStepSizeConvergenceAndReturn(currentParameters, currentStates, iteration, currentCost, gradientNorm, stepNorm, constraintNorm, constraintTolerance, tolerance, newUsedLineSearch, logger);
|
|
2524
3738
|
if (stepSizeConvergenceResult.converged && stepSizeConvergenceResult.result) {
|
|
2525
3739
|
return stepSizeConvergenceResult;
|
|
@@ -2555,7 +3769,7 @@ function performAdjointGradientDescentIteration(iteration, currentParameters, cu
|
|
|
2555
3769
|
*/
|
|
2556
3770
|
function validateInitialConditions(initialParameters, initialStates, constraintFunction, constraintTolerance, logger) {
|
|
2557
3771
|
const initialConstraint = constraintFunction(initialParameters, initialStates);
|
|
2558
|
-
const initialConstraintNorm = vectorNorm(initialConstraint);
|
|
3772
|
+
const initialConstraintNorm = vectorNorm$1(initialConstraint);
|
|
2559
3773
|
if (initialConstraintNorm > constraintTolerance) {
|
|
2560
3774
|
logger.warn('adjointGradientDescent', undefined, 'Initial constraint violation', [
|
|
2561
3775
|
{ key: '||c(p0,x0)||:', value: initialConstraintNorm },
|
|
@@ -2578,9 +3792,9 @@ function createMaxIterationsResult(currentParameters, currentStates, currentCost
|
|
|
2578
3792
|
const partials = computePartialDerivatives(currentParameters, currentStates, costFunction, constraintFunction, options);
|
|
2579
3793
|
const lambda = solveAdjointEquation(partials.dcdx, partials.dfdx, logger);
|
|
2580
3794
|
const finalGradient = computeAdjointGradient(partials.dfdp, lambda, partials.dcdp);
|
|
2581
|
-
const finalGradientNorm = vectorNorm(finalGradient);
|
|
3795
|
+
const finalGradientNorm = vectorNorm$1(finalGradient);
|
|
2582
3796
|
const finalConstraint = constraintFunction(currentParameters, currentStates);
|
|
2583
|
-
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
3797
|
+
const finalConstraintNorm = vectorNorm$1(finalConstraint);
|
|
2584
3798
|
logger.warn('adjointGradientDescent', undefined, 'Maximum iterations reached', [
|
|
2585
3799
|
{ key: 'Iterations:', value: maxIterations },
|
|
2586
3800
|
{ key: 'Final cost:', value: currentCost },
|
|
@@ -2807,7 +4021,7 @@ const DEFAULT_RIDGE_REGULARIZATION = 1e-8; // Small ridge regularization to reco
|
|
|
2807
4021
|
*/
|
|
2808
4022
|
function checkConstraintViolation$1(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger) {
|
|
2809
4023
|
const constraint = constraintFunction(currentParameters, currentStates);
|
|
2810
|
-
const constraintNorm = vectorNorm(constraint);
|
|
4024
|
+
const constraintNorm = vectorNorm$1(constraint);
|
|
2811
4025
|
if (constraintNorm > constraintTolerance) {
|
|
2812
4026
|
logger.warn('constrainedGaussNewton', iteration, 'Constraint violation detected', [
|
|
2813
4027
|
{ key: '||c(p,x)||:', value: constraintNorm },
|
|
@@ -2906,7 +4120,7 @@ function computeStepForGN(currentParameters, currentStates, residualFunction, co
|
|
|
2906
4120
|
* Returns convergence result if converged, null otherwise.
|
|
2907
4121
|
*/
|
|
2908
4122
|
function checkStepSizeConvergenceForGN(stepNorm, constraintSatisfied, tolerance, iteration, currentParameters, currentStates, cost, residualNorm, constraintNorm, logger) {
|
|
2909
|
-
if (constraintSatisfied && checkStepSizeConvergence(stepNorm, tolerance
|
|
4123
|
+
if (constraintSatisfied && checkStepSizeConvergence(stepNorm, tolerance)) {
|
|
2910
4124
|
logger.info('constrainedGaussNewton', iteration, 'Converged', [
|
|
2911
4125
|
{ key: 'Cost:', value: cost },
|
|
2912
4126
|
{ key: 'Residual norm:', value: residualNorm },
|
|
@@ -2928,14 +4142,14 @@ function checkStepSizeConvergenceForGN(stepNorm, constraintSatisfied, tolerance,
|
|
|
2928
4142
|
* Returns convergence result if converged, null otherwise.
|
|
2929
4143
|
*/
|
|
2930
4144
|
function checkResidualConvergenceForGN(newParameters, newStates, newResidualNorm, newCost, constraintFunction, constraintTolerance, tolerance, iteration, constraintNorm, logger) {
|
|
2931
|
-
if (checkResidualConvergence(newResidualNorm, tolerance
|
|
4145
|
+
if (checkResidualConvergence(newResidualNorm, tolerance)) {
|
|
2932
4146
|
logger.info('constrainedGaussNewton', iteration, 'Converged', [
|
|
2933
4147
|
{ key: 'Cost:', value: newCost },
|
|
2934
4148
|
{ key: 'Residual norm:', value: newResidualNorm },
|
|
2935
4149
|
{ key: 'Constraint norm:', value: constraintNorm }
|
|
2936
4150
|
]);
|
|
2937
4151
|
const finalConstraint = constraintFunction(newParameters, newStates);
|
|
2938
|
-
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
4152
|
+
const finalConstraintNorm = vectorNorm$1(finalConstraint);
|
|
2939
4153
|
if (finalConstraintNorm <= constraintTolerance) {
|
|
2940
4154
|
const result = createConvergenceResult(newParameters, iteration, true, newCost, undefined);
|
|
2941
4155
|
return {
|
|
@@ -2955,7 +4169,7 @@ function checkResidualConvergenceForGN(newParameters, newStates, newResidualNorm
|
|
|
2955
4169
|
function performConstrainedGaussNewtonIteration(currentParameters, currentStates, residualFunction, constraintFunction, effectiveJacobianOptions, tolerance, constraintTolerance, stepSizeP, stepSizeX, iteration, logger, onIteration, dcdp, dcdx) {
|
|
2956
4170
|
const { constraintNorm } = checkConstraintViolation$1(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger);
|
|
2957
4171
|
const residual = residualFunction(currentParameters, currentStates);
|
|
2958
|
-
const residualNorm = vectorNorm(residual);
|
|
4172
|
+
const residualNorm = vectorNorm$1(residual);
|
|
2959
4173
|
const cost = computeSumOfSquaredResiduals(residualNorm);
|
|
2960
4174
|
const constraintSatisfied = constraintNorm <= constraintTolerance;
|
|
2961
4175
|
if (onIteration) {
|
|
@@ -2966,7 +4180,7 @@ function performConstrainedGaussNewtonIteration(currentParameters, currentStates
|
|
|
2966
4180
|
return stepResult;
|
|
2967
4181
|
}
|
|
2968
4182
|
const step = stepResult.step;
|
|
2969
|
-
const stepNorm = vectorNorm(step);
|
|
4183
|
+
const stepNorm = vectorNorm$1(step);
|
|
2970
4184
|
const stepSizeConvergenceResult = checkStepSizeConvergenceForGN(stepNorm, constraintSatisfied, tolerance, iteration, currentParameters, currentStates, cost, residualNorm, constraintNorm, logger);
|
|
2971
4185
|
if (stepSizeConvergenceResult) {
|
|
2972
4186
|
return {
|
|
@@ -2976,7 +4190,7 @@ function performConstrainedGaussNewtonIteration(currentParameters, currentStates
|
|
|
2976
4190
|
}
|
|
2977
4191
|
const { newParameters, newStates } = updateParametersAndStatesForConstrainedGN(currentParameters, currentStates, step, constraintFunction, stepSizeP, stepSizeX, constraintTolerance, logger, dcdp, dcdx);
|
|
2978
4192
|
const newResidual = residualFunction(newParameters, newStates);
|
|
2979
|
-
const newResidualNorm = vectorNorm(newResidual);
|
|
4193
|
+
const newResidualNorm = vectorNorm$1(newResidual);
|
|
2980
4194
|
const newCost = computeSumOfSquaredResiduals(newResidualNorm);
|
|
2981
4195
|
const residualConvergenceResult = checkResidualConvergenceForGN(newParameters, newStates, newResidualNorm, newCost, constraintFunction, constraintTolerance, tolerance, iteration, constraintNorm, logger);
|
|
2982
4196
|
if (residualConvergenceResult) {
|
|
@@ -3060,10 +4274,10 @@ function constrainedGaussNewton(initialParameters, initialStates, residualFuncti
|
|
|
3060
4274
|
const currentParameters = iterationResult.finalParameters;
|
|
3061
4275
|
const currentStates = iterationResult.finalStates;
|
|
3062
4276
|
const finalResidual = residualFunction(currentParameters, currentStates);
|
|
3063
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
4277
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
3064
4278
|
const finalCost = computeSumOfSquaredResiduals(finalResidualNorm);
|
|
3065
4279
|
const finalConstraint = constraintFunction(currentParameters, currentStates);
|
|
3066
|
-
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
4280
|
+
const finalConstraintNorm = vectorNorm$1(finalConstraint);
|
|
3067
4281
|
logger.warn('constrainedGaussNewton', undefined, 'Maximum iterations reached', [
|
|
3068
4282
|
{ key: 'Iterations:', value: maxIterations },
|
|
3069
4283
|
{ key: 'Final cost:', value: finalCost },
|
|
@@ -3148,7 +4362,7 @@ function createConvergenceResultForLM(finalParameters, states, iteration, conver
|
|
|
3148
4362
|
*/
|
|
3149
4363
|
function checkConstraintViolation(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger) {
|
|
3150
4364
|
const constraint = constraintFunction(currentParameters, currentStates);
|
|
3151
|
-
const constraintNorm = vectorNorm(constraint);
|
|
4365
|
+
const constraintNorm = vectorNorm$1(constraint);
|
|
3152
4366
|
if (constraintNorm > constraintTolerance) {
|
|
3153
4367
|
logger.warn('constrainedLevenbergMarquardt', iteration, 'Constraint violation detected', [
|
|
3154
4368
|
{ key: '||c(p,x)||:', value: constraintNorm },
|
|
@@ -3183,7 +4397,7 @@ function solveDampedNormalEquations(jtj, jtr, currentLambda, lambdaFactor) {
|
|
|
3183
4397
|
return { newLambda };
|
|
3184
4398
|
}
|
|
3185
4399
|
const step = matrixToFloat64Array(stepMatrix);
|
|
3186
|
-
const stepNorm = vectorNorm(step);
|
|
4400
|
+
const stepNorm = vectorNorm$1(step);
|
|
3187
4401
|
return { step, stepNorm };
|
|
3188
4402
|
}
|
|
3189
4403
|
/**
|
|
@@ -3244,7 +4458,7 @@ function tryConstrainedLevenbergMarquardtStep(jtj, jtr, currentParameters, curre
|
|
|
3244
4458
|
const newStates = updateStates$1(currentStates, c_x, c_p, step, logger, 'constrainedLevenbergMarquardt');
|
|
3245
4459
|
const projectedStates = projectStatesToConstraints(newParameters, newStates, constraintFunction, stepSizeX, constraintTolerance, logger, 'constrainedLevenbergMarquardt');
|
|
3246
4460
|
const newResidual = residualFunction(newParameters, projectedStates);
|
|
3247
|
-
const newResidualNorm = vectorNorm(newResidual);
|
|
4461
|
+
const newResidualNorm = vectorNorm$1(newResidual);
|
|
3248
4462
|
const newCost = computeSumOfSquaredResiduals(newResidualNorm);
|
|
3249
4463
|
const evaluationResult = evaluateStepQuality(newParameters, projectedStates, newCost, currentCost, currentLambda, lambdaFactor, iteration, logger);
|
|
3250
4464
|
return {
|
|
@@ -3271,7 +4485,7 @@ function tryConstrainedLevenbergMarquardtStep(jtj, jtr, currentParameters, curre
|
|
|
3271
4485
|
* Returns convergence result if converged, null otherwise.
|
|
3272
4486
|
*/
|
|
3273
4487
|
function checkConvergenceForLM(gradientNorm, constraintSatisfied, tolGradient, iteration, currentParameters, currentStates, cost, residualNorm, constraintNorm, currentLambda, logger) {
|
|
3274
|
-
if (constraintSatisfied && checkGradientConvergence(gradientNorm, tolGradient
|
|
4488
|
+
if (constraintSatisfied && checkGradientConvergence(gradientNorm, tolGradient)) {
|
|
3275
4489
|
logger.info('constrainedLevenbergMarquardt', iteration, 'Converged', [
|
|
3276
4490
|
{ key: 'Cost:', value: cost },
|
|
3277
4491
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -3299,7 +4513,7 @@ function tryStepWithLambda(jtj, jtr, currentParameters, currentStates, currentLa
|
|
|
3299
4513
|
}
|
|
3300
4514
|
if (stepResult.stepNorm !== undefined &&
|
|
3301
4515
|
constraintSatisfied &&
|
|
3302
|
-
checkStepSizeConvergence(stepResult.stepNorm, tolStep
|
|
4516
|
+
checkStepSizeConvergence(stepResult.stepNorm, tolStep)) {
|
|
3303
4517
|
logger.info('constrainedLevenbergMarquardt', iteration, 'Converged', [
|
|
3304
4518
|
{ key: 'Cost:', value: cost },
|
|
3305
4519
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -3332,11 +4546,11 @@ function tryStepWithLambda(jtj, jtr, currentParameters, currentStates, currentLa
|
|
|
3332
4546
|
*/
|
|
3333
4547
|
function processStepResult(updatedParameters, updatedStates, updatedLambda, residualFunction, constraintFunction, constraintTolerance, tolResidual, iteration, gradientNorm, logger) {
|
|
3334
4548
|
const currentResidual = residualFunction(updatedParameters, updatedStates);
|
|
3335
|
-
const currentResidualNorm = vectorNorm(currentResidual);
|
|
4549
|
+
const currentResidualNorm = vectorNorm$1(currentResidual);
|
|
3336
4550
|
const currentCost = computeSumOfSquaredResiduals(currentResidualNorm);
|
|
3337
4551
|
const currentConstraint = constraintFunction(updatedParameters, updatedStates);
|
|
3338
|
-
const currentConstraintNorm = vectorNorm(currentConstraint);
|
|
3339
|
-
if (currentConstraintNorm <= constraintTolerance && checkResidualConvergence(currentResidualNorm, tolResidual
|
|
4552
|
+
const currentConstraintNorm = vectorNorm$1(currentConstraint);
|
|
4553
|
+
if (currentConstraintNorm <= constraintTolerance && checkResidualConvergence(currentResidualNorm, tolResidual)) {
|
|
3340
4554
|
logger.info('constrainedLevenbergMarquardt', iteration, 'Converged', [
|
|
3341
4555
|
{ key: 'Cost:', value: currentCost },
|
|
3342
4556
|
{ key: 'Gradient norm:', value: gradientNorm },
|
|
@@ -3366,7 +4580,7 @@ function processStepResult(updatedParameters, updatedStates, updatedLambda, resi
|
|
|
3366
4580
|
function performConstrainedLevenbergMarquardtIteration(currentParameters, currentStates, currentLambda, residualFunction, constraintFunction, effectiveJacobianOptions, tolGradient, tolStep, tolResidual, constraintTolerance, stepSizeP, stepSizeX, lambdaFactor, iteration, logger, onIteration, dcdp, dcdx) {
|
|
3367
4581
|
const { constraintNorm } = checkConstraintViolation(currentParameters, currentStates, constraintFunction, constraintTolerance, iteration, logger);
|
|
3368
4582
|
const residual = residualFunction(currentParameters, currentStates);
|
|
3369
|
-
const residualNorm = vectorNorm(residual);
|
|
4583
|
+
const residualNorm = vectorNorm$1(residual);
|
|
3370
4584
|
const cost = computeSumOfSquaredResiduals(residualNorm);
|
|
3371
4585
|
if (onIteration) {
|
|
3372
4586
|
onIteration(iteration, cost, currentParameters);
|
|
@@ -3374,7 +4588,7 @@ function performConstrainedLevenbergMarquardtIteration(currentParameters, curren
|
|
|
3374
4588
|
const effectiveJacobian = computeEffectiveJacobian(currentParameters, currentStates, residualFunction, constraintFunction, effectiveJacobianOptions, logger, 'constrainedLevenbergMarquardt');
|
|
3375
4589
|
const { jtj, jtr } = computeNormalEquationsMatrices(effectiveJacobian, residual);
|
|
3376
4590
|
const gradientVector = matrixToFloat64Array(jtr);
|
|
3377
|
-
const gradientNorm = vectorNorm(gradientVector);
|
|
4591
|
+
const gradientNorm = vectorNorm$1(gradientVector);
|
|
3378
4592
|
const constraintSatisfied = constraintNorm <= constraintTolerance;
|
|
3379
4593
|
const gradientConvergenceResult = checkConvergenceForLM(gradientNorm, constraintSatisfied, tolGradient, iteration, currentParameters, currentStates, cost, residualNorm, constraintNorm, currentLambda, logger);
|
|
3380
4594
|
if (gradientConvergenceResult) {
|
|
@@ -3423,7 +4637,7 @@ function initializeLMState(initialParameters, initialStates, residualFunction) {
|
|
|
3423
4637
|
const bestParameters = new Float64Array(initialParameters);
|
|
3424
4638
|
const bestStates = new Float64Array(initialStates);
|
|
3425
4639
|
const initialResidual = residualFunction(initialParameters, initialStates);
|
|
3426
|
-
const initialResidualNorm = vectorNorm(initialResidual);
|
|
4640
|
+
const initialResidualNorm = vectorNorm$1(initialResidual);
|
|
3427
4641
|
const bestCost = computeSumOfSquaredResiduals(initialResidualNorm);
|
|
3428
4642
|
return { currentParameters, currentStates, bestParameters, bestStates, bestCost };
|
|
3429
4643
|
}
|
|
@@ -3473,12 +4687,12 @@ function runLMIterations(initialParameters, initialStates, residualFunction, con
|
|
|
3473
4687
|
*/
|
|
3474
4688
|
function createFinalLMResult(bestParameters, bestStates, bestCost, currentLambda, actualIterations, residualFunction, constraintFunction, effectiveJacobianOptions, logger) {
|
|
3475
4689
|
const finalResidual = residualFunction(bestParameters, bestStates);
|
|
3476
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
4690
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
3477
4691
|
const finalConstraint = constraintFunction(bestParameters, bestStates);
|
|
3478
|
-
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
4692
|
+
const finalConstraintNorm = vectorNorm$1(finalConstraint);
|
|
3479
4693
|
const finalEffectiveJacobian = computeEffectiveJacobian(bestParameters, bestStates, residualFunction, constraintFunction, effectiveJacobianOptions, logger, 'constrainedLevenbergMarquardt');
|
|
3480
4694
|
const { jtr: finalJtr } = computeNormalEquationsMatrices(finalEffectiveJacobian, finalResidual);
|
|
3481
|
-
const finalGradientNorm = vectorNorm(matrixToFloat64Array(finalJtr));
|
|
4695
|
+
const finalGradientNorm = vectorNorm$1(matrixToFloat64Array(finalJtr));
|
|
3482
4696
|
return createConvergenceResultForLM(bestParameters, bestStates, actualIterations - 1, false, bestCost, finalGradientNorm, finalResidualNorm, finalConstraintNorm, currentLambda);
|
|
3483
4697
|
}
|
|
3484
4698
|
/**
|
|
@@ -3530,14 +4744,14 @@ function constrainedLevenbergMarquardt(initialParameters, initialStates, residua
|
|
|
3530
4744
|
return iterationResult.result;
|
|
3531
4745
|
}
|
|
3532
4746
|
const finalResidual = residualFunction(iterationResult.bestParameters, iterationResult.bestStates);
|
|
3533
|
-
const finalResidualNorm = vectorNorm(finalResidual);
|
|
4747
|
+
const finalResidualNorm = vectorNorm$1(finalResidual);
|
|
3534
4748
|
const finalConstraint = constraintFunction(iterationResult.bestParameters, iterationResult.bestStates);
|
|
3535
|
-
const finalConstraintNorm = vectorNorm(finalConstraint);
|
|
4749
|
+
const finalConstraintNorm = vectorNorm$1(finalConstraint);
|
|
3536
4750
|
logger.warn('constrainedLevenbergMarquardt', undefined, 'Maximum iterations reached', [
|
|
3537
4751
|
{ key: 'Iterations:', value: iterationResult.actualIterations },
|
|
3538
4752
|
{ key: 'Final cost:', value: iterationResult.bestCost },
|
|
3539
4753
|
{
|
|
3540
|
-
key: 'Final gradient norm:', value: vectorNorm(matrixToFloat64Array(computeNormalEquationsMatrices(computeEffectiveJacobian(iterationResult.bestParameters, iterationResult.bestStates, residualFunction, constraintFunction, effectiveJacobianOptions, logger, 'constrainedLevenbergMarquardt'), finalResidual).jtr))
|
|
4754
|
+
key: 'Final gradient norm:', value: vectorNorm$1(matrixToFloat64Array(computeNormalEquationsMatrices(computeEffectiveJacobian(iterationResult.bestParameters, iterationResult.bestStates, residualFunction, constraintFunction, effectiveJacobianOptions, logger, 'constrainedLevenbergMarquardt'), finalResidual).jtr))
|
|
3541
4755
|
},
|
|
3542
4756
|
{ key: 'Final residual norm:', value: finalResidualNorm },
|
|
3543
4757
|
{ key: 'Final constraint norm:', value: finalConstraintNorm },
|
|
@@ -3777,6 +4991,37 @@ function formatLevenbergMarquardtResult(result, options) {
|
|
|
3777
4991
|
}
|
|
3778
4992
|
return lines.join('\n');
|
|
3779
4993
|
}
|
|
4994
|
+
/**
|
|
4995
|
+
* Formats a CMA-ES result.
|
|
4996
|
+
*/
|
|
4997
|
+
function formatCmaEsResult(result, options) {
|
|
4998
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
4999
|
+
const lines = formatBasicResult(result, opts);
|
|
5000
|
+
const insertionIndex = lines.findIndex(line => line.includes('Final gradient norm') || line.includes('Final residual norm'));
|
|
5001
|
+
const extraLines = [
|
|
5002
|
+
` Population size (λ): ${result.populationSize}`,
|
|
5003
|
+
` Function evaluations: ${result.functionEvaluations}`,
|
|
5004
|
+
` Final step size (σ): ${formatNumberWithPrecision(result.finalStepSize, 6)}`,
|
|
5005
|
+
` Final max std dev: ${formatNumberWithPrecision(result.finalMaxStdDev, opts.costPrecision)}`
|
|
5006
|
+
];
|
|
5007
|
+
if (result.stopReason) {
|
|
5008
|
+
extraLines.push(` Stop reason: ${result.stopReason}`);
|
|
5009
|
+
}
|
|
5010
|
+
if (result.profiling) {
|
|
5011
|
+
extraLines.push(` Profiling (ms): total=${formatNumberWithPrecision(result.profiling.totalMs, 2)}, ` +
|
|
5012
|
+
`cost=${formatNumberWithPrecision(result.profiling.costMs, 2)}, ` +
|
|
5013
|
+
`cholesky=${formatNumberWithPrecision(result.profiling.choleskyMs, 2)}, ` +
|
|
5014
|
+
`sampling=${formatNumberWithPrecision(result.profiling.samplingMs, 2)}, ` +
|
|
5015
|
+
`update=${formatNumberWithPrecision(result.profiling.updateMs, 2)}`);
|
|
5016
|
+
}
|
|
5017
|
+
if (insertionIndex >= 0) {
|
|
5018
|
+
lines.splice(insertionIndex + 1, 0, ...extraLines);
|
|
5019
|
+
}
|
|
5020
|
+
else {
|
|
5021
|
+
lines.push(...extraLines);
|
|
5022
|
+
}
|
|
5023
|
+
return lines.join('\n');
|
|
5024
|
+
}
|
|
3780
5025
|
/**
|
|
3781
5026
|
* Formats a ConstrainedGaussNewtonResult.
|
|
3782
5027
|
*/
|
|
@@ -3934,6 +5179,9 @@ function formatResult(result, options) {
|
|
|
3934
5179
|
if ('finalLambda' in result) {
|
|
3935
5180
|
return formatLevenbergMarquardtResult(result, options);
|
|
3936
5181
|
}
|
|
5182
|
+
if ('populationSize' in result && 'functionEvaluations' in result && 'finalStepSize' in result) {
|
|
5183
|
+
return formatCmaEsResult(result, options);
|
|
5184
|
+
}
|
|
3937
5185
|
if ('usedLineSearch' in result) {
|
|
3938
5186
|
return formatGradientDescentResult(result, options);
|
|
3939
5187
|
}
|
|
@@ -3957,6 +5205,12 @@ function printGradientDescentResult(result, options) {
|
|
|
3957
5205
|
function printLevenbergMarquardtResult(result, options) {
|
|
3958
5206
|
console.log(formatLevenbergMarquardtResult(result, options));
|
|
3959
5207
|
}
|
|
5208
|
+
/**
|
|
5209
|
+
* Prints a CMA-ES result directly to console.
|
|
5210
|
+
*/
|
|
5211
|
+
function printCmaEsResult(result, options) {
|
|
5212
|
+
console.log(formatCmaEsResult(result, options));
|
|
5213
|
+
}
|
|
3960
5214
|
/**
|
|
3961
5215
|
* Prints a constrained Gauss-Newton result directly to console.
|
|
3962
5216
|
*/
|
|
@@ -3982,6 +5236,8 @@ function printResult(result, options) {
|
|
|
3982
5236
|
exports.addVectors = addVectors;
|
|
3983
5237
|
exports.adjointGradientDescent = adjointGradientDescent;
|
|
3984
5238
|
exports.backtrackingLineSearch = backtrackingLineSearch;
|
|
5239
|
+
exports.bfgs = bfgs;
|
|
5240
|
+
exports.cmaEs = cmaEs;
|
|
3985
5241
|
exports.constrainedGaussNewton = constrainedGaussNewton;
|
|
3986
5242
|
exports.constrainedLevenbergMarquardt = constrainedLevenbergMarquardt;
|
|
3987
5243
|
exports.createFiniteDiffGradient = createFiniteDiffGradient;
|
|
@@ -3997,6 +5253,7 @@ exports.finiteDiffResidualPartialP = finiteDiffResidualPartialP;
|
|
|
3997
5253
|
exports.finiteDiffResidualPartialX = finiteDiffResidualPartialX;
|
|
3998
5254
|
exports.float64ArrayToMatrix = float64ArrayToMatrix;
|
|
3999
5255
|
exports.formatAdjointGradientDescentResult = formatAdjointGradientDescentResult;
|
|
5256
|
+
exports.formatCmaEsResult = formatCmaEsResult;
|
|
4000
5257
|
exports.formatConstrainedGaussNewtonResult = formatConstrainedGaussNewtonResult;
|
|
4001
5258
|
exports.formatConstrainedLevenbergMarquardtResult = formatConstrainedLevenbergMarquardtResult;
|
|
4002
5259
|
exports.formatGradientDescentResult = formatGradientDescentResult;
|
|
@@ -4005,10 +5262,12 @@ exports.formatOptimizationResult = formatOptimizationResult;
|
|
|
4005
5262
|
exports.formatResult = formatResult;
|
|
4006
5263
|
exports.gaussNewton = gaussNewton;
|
|
4007
5264
|
exports.gradientDescent = gradientDescent;
|
|
5265
|
+
exports.lbfgs = lbfgs;
|
|
4008
5266
|
exports.levenbergMarquardt = levenbergMarquardt;
|
|
4009
5267
|
exports.matrixToFloat64Array = matrixToFloat64Array;
|
|
4010
5268
|
exports.matrixToFloat64Array2D = matrixToFloat64Array2D;
|
|
4011
5269
|
exports.printAdjointGradientDescentResult = printAdjointGradientDescentResult;
|
|
5270
|
+
exports.printCmaEsResult = printCmaEsResult;
|
|
4012
5271
|
exports.printConstrainedGaussNewtonResult = printConstrainedGaussNewtonResult;
|
|
4013
5272
|
exports.printConstrainedLevenbergMarquardtResult = printConstrainedLevenbergMarquardtResult;
|
|
4014
5273
|
exports.printGradientDescentResult = printGradientDescentResult;
|
|
@@ -4016,6 +5275,7 @@ exports.printLevenbergMarquardtResult = printLevenbergMarquardtResult;
|
|
|
4016
5275
|
exports.printOptimizationResult = printOptimizationResult;
|
|
4017
5276
|
exports.printResult = printResult;
|
|
4018
5277
|
exports.scaleVector = scaleVector;
|
|
4019
|
-
exports.
|
|
4020
|
-
exports.
|
|
5278
|
+
exports.strongWolfeLineSearch = strongWolfeLineSearch;
|
|
5279
|
+
exports.subtractVectors = subtractVectors$1;
|
|
5280
|
+
exports.vectorNorm = vectorNorm$1;
|
|
4021
5281
|
//# sourceMappingURL=index.cjs.map
|