@energy8platform/stake-math-tools 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@energy8platform/stake-math-tools",
3
- "version": "0.1.0",
3
+ "version": "0.3.0",
4
4
  "description": "Node-only dev-time math utilities for the Energy8 Stake bridge: lookup-table (force matrix) builder",
5
5
  "author": "Energy8 Platform",
6
6
  "license": "MIT",
package/src/index.ts CHANGED
@@ -16,4 +16,6 @@ export { mulberry32, weightedReservoirSample, computeQuotas, stratifiedSample }
16
16
  export type { Quotas, QuotaInput, QuotaParams } from './sample.js';
17
17
  export { solveNNLS } from './nnls.js';
18
18
  export type { NNLSOptions } from './nnls.js';
19
+ export { solveQP, projectSimplex } from './qp.js';
20
+ export type { QPOptions } from './qp.js';
19
21
  export { quantizeWeights } from './quantize.js';
package/src/nnls.ts CHANGED
@@ -17,8 +17,9 @@ export interface NNLSOptions {
17
17
  * A is m×n (rows = features, cols = variables). m ≪ n is permitted thanks to ε > 0.
18
18
  *
19
19
  * Algorithm: classical active-set NNLS as in Lawson & Hanson §23.3. The Tikhonov term
20
- * is folded in by appending √ε · I to A and √ε · prior to b the augmented system
21
- * (m+n) × n is then well-posed for all passive subsets.
20
+ * is applied *implicitly* we never materialize the √ε · I block. Folding it into
21
+ * the gradient and the normal equations keeps the storage at O(m · n) instead of
22
+ * O(n²), which matters when n can reach 10⁵.
22
23
  */
23
24
  export function solveNNLS(
24
25
  A: ReadonlyArray<ReadonlyArray<number>>,
@@ -32,34 +33,25 @@ export function solveNNLS(
32
33
  const tol = options.tolerance ?? 1e-12;
33
34
  const maxIter = options.maxIterations ?? 3 * Math.max(1, n);
34
35
 
35
- // Augment: A_aug = [A; √ε I], b_aug = [b; √ε · prior]
36
- const sqrtEps = Math.sqrt(epsilon);
37
- const M = m + (epsilon > 0 ? n : 0);
38
- const Ah: number[][] = new Array(M);
39
- const bh: number[] = new Array(M);
40
- for (let i = 0; i < m; i++) {
41
- Ah[i] = A[i].slice();
42
- bh[i] = b[i];
43
- }
44
- if (epsilon > 0) {
45
- for (let j = 0; j < n; j++) {
46
- const row = new Array(n).fill(0);
47
- row[j] = sqrtEps;
48
- Ah[m + j] = row;
49
- bh[m + j] = sqrtEps * prior[j];
50
- }
51
- }
36
+ // No augmentation: A stays m×n. Tikhonov enters only via the gradient
37
+ // (in lawsonHansonNNLS) and the normal equations (in solveLS).
38
+ // Shallow-copy A to a mutable number[][] for the inner routine.
39
+ const Ah: number[][] = new Array(m);
40
+ for (let i = 0; i < m; i++) Ah[i] = A[i].slice();
41
+ const bh: number[] = b.slice();
52
42
 
53
- return lawsonHansonNNLS(Ah, bh, n, tol, maxIter);
43
+ return lawsonHansonNNLS(Ah, bh, n, tol, maxIter, epsilon, prior);
54
44
  }
55
45
 
56
46
  /**
57
- * Lawson–Hanson active-set NNLS, matrix form. Returns x ≥ 0 minimizing ||A x − b||².
47
+ * Lawson–Hanson active-set NNLS, matrix form. Returns x ≥ 0 minimizing
48
+ * ||A x − b||² + ε ||x − prior||².
58
49
  *
59
50
  * Variables:
60
51
  * P (passive set): indices where x_i > 0, x_i is "free"
61
52
  * Z (active set): indices where x_i = 0, x_i is "constrained"
62
- * w = Aᵀ(bAx) — gradient of the residual squared (negated)
53
+ * w = A_augᵀ(b_augA_aug x) — gradient of the augmented residual squared (negated).
54
+ * Split as w_j = (Aᵀ(b − A x))_j + ε · (prior_j − x_j).
63
55
  *
64
56
  * Outer loop: pick the most negative-gradient index from Z, move it to P.
65
57
  * Inner loop: solve unconstrained LS on P; if any x_i ≤ 0, perform an interpolation
@@ -71,6 +63,8 @@ function lawsonHansonNNLS(
71
63
  n: number,
72
64
  tol: number,
73
65
  maxIter: number,
66
+ epsilon: number,
67
+ prior: ReadonlyArray<number>,
74
68
  ): number[] {
75
69
  const m = A.length;
76
70
  const x = new Array(n).fill(0);
@@ -78,18 +72,19 @@ function lawsonHansonNNLS(
78
72
  let iter = 0;
79
73
 
80
74
  while (iter++ < maxIter) {
81
- // residual r = b − A x
75
+ // residual r = b − A x (against the un-augmented A only)
82
76
  const r = b.slice();
83
77
  for (let i = 0; i < m; i++) {
84
78
  let s = 0;
85
79
  for (let j = 0; j < n; j++) s += A[i][j] * x[j];
86
80
  r[i] -= s;
87
81
  }
88
- // w = Aᵀ r
82
+ // w = Aᵀ r + ε · (prior − x) ← implicit Tikhonov in the gradient
89
83
  const w = new Array(n).fill(0);
90
84
  for (let j = 0; j < n; j++) {
91
85
  let s = 0;
92
86
  for (let i = 0; i < m; i++) s += A[i][j] * r[i];
87
+ if (epsilon > 0) s += epsilon * (prior[j] - x[j]);
93
88
  w[j] = s;
94
89
  }
95
90
 
@@ -112,7 +107,7 @@ function lawsonHansonNNLS(
112
107
  // Solve LS over P only
113
108
  const pIdx: number[] = [];
114
109
  for (let j = 0; j < n; j++) if (inP[j]) pIdx.push(j);
115
- const sP = solveLS(A, b, pIdx);
110
+ const sP = solveLS(A, b, pIdx, epsilon, prior);
116
111
  // Build full s
117
112
  const s = new Array(n).fill(0);
118
113
  for (let k = 0; k < pIdx.length; k++) s[pIdx[k]] = sP[k];
@@ -153,12 +148,23 @@ function lawsonHansonNNLS(
153
148
  }
154
149
 
155
150
  /**
156
- * Solve unconstrained LS for the passive subset: argmin ‖A_P x_P − b‖² where A_P
157
- * is the columns of A indexed by `pIdx`. Uses normal equations (A_Pᵀ A_P) x = A_Pᵀ b
151
+ * Solve unconstrained LS for the passive subset: argmin ‖A_P x_P − b‖² + ε ‖x_P − prior_P‖²
152
+ * where A_P is the columns of A indexed by `pIdx`. Uses normal equations
153
+ * (A_Pᵀ A_P + ε I) x = A_Pᵀ b + ε · prior_P
158
154
  * with Gaussian elimination — adequate for the small passive sets that arise in
159
155
  * Tikhonov-regularized NNLS (|P| ≤ m + a few extras at convergence).
156
+ *
157
+ * The Tikhonov term enters as +ε on the Gram diagonal and +ε·prior on the RHS,
158
+ * which is exactly what augmenting A with √ε · I would produce — without the
159
+ * O(n²) storage.
160
160
  */
161
- function solveLS(A: number[][], b: number[], pIdx: ReadonlyArray<number>): number[] {
161
+ function solveLS(
162
+ A: number[][],
163
+ b: number[],
164
+ pIdx: ReadonlyArray<number>,
165
+ epsilon = 0,
166
+ prior?: ReadonlyArray<number>,
167
+ ): number[] {
162
168
  const m = A.length;
163
169
  const k = pIdx.length;
164
170
  if (k === 0) return [];
@@ -177,6 +183,14 @@ function solveLS(A: number[][], b: number[], pIdx: ReadonlyArray<number>): numbe
177
183
  G[a][k] = s;
178
184
  }
179
185
 
186
+ // Implicit Tikhonov: add ε to the Gram diagonal and ε·prior to the RHS.
187
+ if (epsilon > 0) {
188
+ for (let col = 0; col < k; col++) {
189
+ G[col][col] += epsilon;
190
+ if (prior !== undefined) G[col][k] += epsilon * prior[pIdx[col]];
191
+ }
192
+ }
193
+
180
194
  // Gaussian elimination with partial pivoting
181
195
  for (let col = 0; col < k; col++) {
182
196
  let pivot = col;
@@ -80,6 +80,7 @@ export function optimizeLookupTable(
80
80
  nRowsOut: params.nRowsOut,
81
81
  minPerBucket,
82
82
  requireMaxReached,
83
+ targetHitRate: params.targetHitRate,
83
84
  });
84
85
  const sampledIdx = stratifiedSample(buckets, filtered, quotas, rng);
85
86
 
package/src/qp.ts ADDED
@@ -0,0 +1,412 @@
1
+ // src/qp.ts
2
+ //
3
+ // FISTA (Fast Iterative Shrinkage-Thresholding Algorithm) with simplex projection
4
+ // for the underdetermined Tikhonov-regularized QP
5
+ //
6
+ // min ‖A x − b‖² + ε ‖x − prior‖² s.t. x ≥ 0, Σx = T
7
+ //
8
+ // Per-iteration cost: O(m·n) for the matvecs, O(n log n) for the simplex projection.
9
+ // For our m=3 / n≤100k regime that's ~10·n flops per iter — vs Lawson-Hanson NNLS
10
+ // which is O(m·n²) on underdetermined active-set-bouncing problems.
11
+ //
12
+ // Implementation notes for THIS problem family (RTP/variance/hit-rate targets):
13
+ //
14
+ // 1. **Jacobi (column-norm) preconditioning.** A's rows in our usage have wildly
15
+ // different scales — the RTP coefficient row can dominate by ~10⁷×. The
16
+ // change of variables x = D u with D_jj = 1/√(‖A col j‖²+ε) puts the
17
+ // preconditioned data-fit Hessian (AD)ᵀ(AD) into a well-conditioned regime
18
+ // (κ ~ m for the range space). The remaining 1/ε strong-convexity from
19
+ // the Tikhonov term is unaffected, but the *range* directions — which
20
+ // are where the data fit lives — accelerate properly.
21
+ // The sum constraint Σx=T maps to a weighted simplex Σ D_jj u_j = T;
22
+ // we project onto that with a Duchi-style O(n log n) routine.
23
+ //
24
+ // 2. **Adaptive restart** (O'Donoghue & Candès 2015): if the proximal step
25
+ // direction (uNew − u) is uphill against the gradient at y, momentum has
26
+ // overshot — reset t = 1. Essential for stable progress on tightly-toleranced
27
+ // problems where the iterates oscillate near the boundary of the active set.
28
+ //
29
+ // 3. **Sherman-Morrison-Woodbury warm start was considered and rejected.** When
30
+ // ε ≪ ‖A‖² (the common case for our toleranceRTP ~ 0.002 inputs), the
31
+ // formula M⁻¹ = (1/ε)(I − Aᵀ(εI + AAᵀ)⁻¹A) suffers catastrophic
32
+ // cancellation in the `(v − Aᵀ·…)/ε` step. The preconditioner above is
33
+ // sufficient on its own.
34
+ //
35
+ // CAVEAT: For very ill-conditioned instances (small ε, broad coefficient range),
36
+ // FISTA needs many thousands of iterations to nail the user's tight tolerances.
37
+ // In those regimes the active-set NNLS in `./nnls.ts` is dramatically faster on
38
+ // the same problem class because m is tiny. solveQP is offered as a parallel
39
+ // option in the public API; whether the orchestrator uses it or solveNNLS is
40
+ // a deployment decision driven by the tolerance regime.
41
+
42
+ export interface QPOptions {
43
+ /** Tikhonov coefficient ε ≥ 0. Default 1e-6. */
44
+ regularization?: number;
45
+ /** Tikhonov prior. Default = uniform sumConstraint/n. */
46
+ prior?: ReadonlyArray<number>;
47
+ /** Sum constraint: Σx = sumConstraint. Required. */
48
+ sumConstraint: number;
49
+ /** Maximum FISTA iterations. Default 500. */
50
+ maxIterations?: number;
51
+ /** Convergence tolerance on ‖x_{k+1} − x_k‖_2 / max(‖x_k‖_2, 1). Default 1e-6. */
52
+ tolerance?: number;
53
+ }
54
+
55
+ /**
56
+ * Solve `min ‖A x − b‖² + ε ‖x − prior‖² s.t. x ≥ 0, Σx = T`
57
+ * via Jacobi-preconditioned FISTA with weighted-simplex projection.
58
+ *
59
+ * A is m × n. For our use case m = 3 (RTP, variance, hit-rate features);
60
+ * the sum constraint is enforced via projection, not as a feature row.
61
+ */
62
+ export function solveQP(
63
+ A: ReadonlyArray<ReadonlyArray<number>>,
64
+ b: ReadonlyArray<number>,
65
+ options: QPOptions,
66
+ ): number[] {
67
+ const m = A.length;
68
+ const n = m === 0 ? 0 : A[0].length;
69
+ if (n === 0) return [];
70
+
71
+ const T = options.sumConstraint;
72
+ if (!Number.isFinite(T) || T < 0) {
73
+ throw new Error(`solveQP: sumConstraint must be a non-negative finite number, got ${T}`);
74
+ }
75
+ const epsilon = options.regularization ?? 1e-6;
76
+ const maxIter = options.maxIterations ?? 500;
77
+ const tol = options.tolerance ?? 1e-6;
78
+ const prior = options.prior ?? new Array(n).fill(T / n);
79
+ if (prior.length !== n) {
80
+ throw new Error(`solveQP: prior length ${prior.length} does not match n=${n}`);
81
+ }
82
+ if (b.length !== m) {
83
+ throw new Error(`solveQP: b length ${b.length} does not match m=${m}`);
84
+ }
85
+
86
+ // ── Jacobi preconditioner ───────────────────────────────────────────────────
87
+ // Change of variables x = D u with D_jj = 1/√(‖A col j‖² + ε). Columns of AD
88
+ // then have norm ≈ 1, dramatically improving (AD)ᵀ(AD)'s conditioning.
89
+ // In u-coordinates:
90
+ // - loss: ‖(AD) u − b‖² + ε ‖D u − prior‖²
91
+ // - constraints: u ≥ 0, Σ D_jj u_j = T (weighted simplex)
92
+ const colNormSq = new Float64Array(n);
93
+ for (let i = 0; i < m; i++) {
94
+ const row = A[i];
95
+ for (let j = 0; j < n; j++) colNormSq[j] += row[j] * row[j];
96
+ }
97
+ let totColNormSq = 0;
98
+ for (let j = 0; j < n; j++) totColNormSq += colNormSq[j];
99
+ const typicalScale = totColNormSq > 0 ? Math.sqrt(totColNormSq / n) : 1;
100
+ const D = new Float64Array(n);
101
+ const Dinv = new Float64Array(n);
102
+ for (let j = 0; j < n; j++) {
103
+ const s = Math.sqrt(colNormSq[j] + epsilon);
104
+ const s_ = s > 1e-30 ? s : typicalScale;
105
+ Dinv[j] = s_;
106
+ D[j] = 1 / s_;
107
+ }
108
+
109
+ // Preconditioned matrix AD (m × n) as Float64Array for tight inner loops.
110
+ const AD: Float64Array[] = new Array(m);
111
+ for (let i = 0; i < m; i++) {
112
+ const row = A[i];
113
+ const out = new Float64Array(n);
114
+ for (let j = 0; j < n; j++) out[j] = row[j] * D[j];
115
+ AD[i] = out;
116
+ }
117
+ // Prior anchor in u-space: priorU = D⁻¹ · prior so that D · priorU = prior.
118
+ const priorU = new Float64Array(n);
119
+ for (let j = 0; j < n; j++) priorU[j] = prior[j] * Dinv[j];
120
+ // Isotropic ε in u-space — see file header note 2.
121
+ const regDiag = epsilon;
122
+
123
+ // ── Lipschitz estimate in u-space ───────────────────────────────────────────
124
+ // L = 2 σ_max((AD)ᵀ(AD)) + 2 ε. Power iteration on the m×m matrix (AD)(AD)ᵀ.
125
+ const L0 = 2 * spectralNormSquaredF64(AD, m, n) + 2 * regDiag;
126
+ let L = L0 > 0 ? L0 : 1;
127
+
128
+ // ── FISTA state in u-space (all Float64Array for tight inner loops) ────────
129
+ const xInit = T / n;
130
+ const u = new Float64Array(n);
131
+ const uPrev = new Float64Array(n);
132
+ const y = new Float64Array(n);
133
+ for (let j = 0; j < n; j++) {
134
+ const v = xInit * Dinv[j]; // u = D⁻¹ x
135
+ u[j] = v;
136
+ uPrev[j] = v;
137
+ y[j] = v;
138
+ }
139
+ // Reusable scratch.
140
+ const g = new Float64Array(n);
141
+ const z = new Float64Array(n);
142
+ const uNew = new Float64Array(n);
143
+ const ADy = new Float64Array(m);
144
+ const r = new Float64Array(m);
145
+
146
+ let t = 1;
147
+
148
+ for (let iter = 0; iter < maxIter; iter++) {
149
+ // ── Gradient at y: g = 2 (AD)ᵀ ((AD) y − b) + 2 ε (y − priorU) ──────────
150
+ for (let i = 0; i < m; i++) {
151
+ const row = AD[i];
152
+ let s = 0;
153
+ for (let j = 0; j < n; j++) s += row[j] * y[j];
154
+ ADy[i] = s;
155
+ r[i] = s - b[i];
156
+ }
157
+ if (m === 3) {
158
+ // Hot path: unrolled for the common m=3 (RTP / CV / hit-rate).
159
+ const r0 = r[0], r1 = r[1], r2 = r[2];
160
+ const a0 = AD[0], a1 = AD[1], a2 = AD[2];
161
+ const tw = 2 * regDiag;
162
+ for (let j = 0; j < n; j++) {
163
+ g[j] = 2 * (a0[j] * r0 + a1[j] * r1 + a2[j] * r2) + tw * (y[j] - priorU[j]);
164
+ }
165
+ } else {
166
+ for (let j = 0; j < n; j++) {
167
+ let s = 0;
168
+ for (let i = 0; i < m; i++) s += AD[i][j] * r[i];
169
+ g[j] = 2 * s + 2 * regDiag * (y[j] - priorU[j]);
170
+ }
171
+ }
172
+
173
+ // ── Trial step + weighted-simplex projection, backtracking on L ──────────
174
+ // Compute f(y) lazily — only when we may need to backtrack. With a tight L
175
+ // bound, the first attempt nearly always succeeds, so we skip this work.
176
+ let fY = NaN;
177
+ let backtracks = 0;
178
+ const maxBacktracks = 30;
179
+ while (backtracks++ < maxBacktracks) {
180
+ const eta = 1 / L;
181
+ for (let j = 0; j < n; j++) z[j] = y[j] - eta * g[j];
182
+ projectWeightedSimplexInto(z, D, T, uNew);
183
+
184
+ // Cheap descent test based on the linear (not full quadratic) Taylor.
185
+ // ‖uNew − y‖² · L/2 + ⟨g, uNew − y⟩ should give an upper bound on f(uNew) - f(y);
186
+ // we accept the step on the first try unless this differs grossly from reality.
187
+ // For tight L this is fine; the explicit fY check only kicks in if we doubled L
188
+ // and want to verify before further increases.
189
+ let dot = 0;
190
+ let diffSq = 0;
191
+ for (let j = 0; j < n; j++) {
192
+ const diff = uNew[j] - y[j];
193
+ dot += g[j] * diff;
194
+ diffSq += diff * diff;
195
+ }
196
+ if (backtracks === 1) {
197
+ // Standard FISTA descent direction check: the proximal step on a smooth
198
+ // ‖∇²f‖ ≤ L surface yields dot + 0.5·L·diffSq ≤ 0 when the step is valid.
199
+ // Skip the explicit f-computation here.
200
+ if (dot + 0.5 * L * diffSq <= 1e-12 * Math.max(1, L)) break;
201
+ }
202
+ // Reluctant fallback: compute f(y) and f(uNew) and check the canonical bound.
203
+ if (Number.isNaN(fY)) fY = computeLossUF64(AD, b, y, m, n, regDiag, priorU);
204
+ const fNew = computeLossUF64(AD, b, uNew, m, n, regDiag, priorU);
205
+ const upper = fY + dot + 0.5 * L * diffSq;
206
+ if (fNew <= upper + 1e-12 * Math.max(1, Math.abs(fY))) break;
207
+ L *= 2;
208
+ }
209
+
210
+ // ── Adaptive restart: if step (uNew - u) is uphill against g(y), reset t ─
211
+ let gradTest = 0;
212
+ for (let j = 0; j < n; j++) gradTest += g[j] * (uNew[j] - u[j]);
213
+ if (gradTest > 0) t = 1;
214
+
215
+ // ── Convergence: relative ‖u_{k+1} − u_k‖ ────────────────────────────────
216
+ let duSq = 0;
217
+ let uNorm = 0;
218
+ for (let j = 0; j < n; j++) {
219
+ const diff = uNew[j] - u[j];
220
+ duSq += diff * diff;
221
+ uNorm += u[j] * u[j];
222
+ }
223
+ const dxNorm = Math.sqrt(duSq);
224
+ const xn = Math.sqrt(uNorm);
225
+
226
+ // ── Nesterov momentum ─────────────────────────────────────────────────────
227
+ const tNext = (1 + Math.sqrt(1 + 4 * t * t)) / 2;
228
+ const momentum = (t - 1) / tNext;
229
+ for (let j = 0; j < n; j++) {
230
+ uPrev[j] = u[j];
231
+ u[j] = uNew[j];
232
+ y[j] = uNew[j] + momentum * (uNew[j] - uPrev[j]);
233
+ }
234
+ t = tNext;
235
+
236
+ if (dxNorm < tol * Math.max(xn, 1)) break;
237
+ }
238
+
239
+ // ── Return to x-space: x = D u ─────────────────────────────────────────────
240
+ const x = new Array(n);
241
+ for (let j = 0; j < n; j++) x[j] = u[j] * D[j];
242
+ return x;
243
+ }
244
+
245
+ /**
246
+ * Project y onto the simplex {x : x ≥ 0, Σx = T} via Duchi et al. 2008.
247
+ * Returns a new array. O(n log n) due to the sort.
248
+ *
249
+ * Exported for testing and direct reuse.
250
+ */
251
+ export function projectSimplex(y: ReadonlyArray<number>, T: number): number[] {
252
+ const n = y.length;
253
+ if (n === 0) return [];
254
+ if (!Number.isFinite(T) || T < 0) {
255
+ throw new Error(`projectSimplex: T must be a non-negative finite number, got ${T}`);
256
+ }
257
+
258
+ const sorted = y.slice().sort((a, b) => b - a) as number[];
259
+ let cssv = 0;
260
+ let bestCssv = 0;
261
+ let rho = -1;
262
+ for (let j = 0; j < n; j++) {
263
+ cssv += sorted[j];
264
+ const threshold = (cssv - T) / (j + 1);
265
+ if (sorted[j] - threshold > 0) {
266
+ rho = j;
267
+ bestCssv = cssv;
268
+ } else {
269
+ break;
270
+ }
271
+ }
272
+ if (rho < 0) {
273
+ const uVal = T / n;
274
+ return new Array(n).fill(uVal);
275
+ }
276
+ const tau = (bestCssv - T) / (rho + 1);
277
+ const out = new Array(n);
278
+ for (let i = 0; i < n; i++) {
279
+ const v = y[i] - tau;
280
+ out[i] = v > 0 ? v : 0;
281
+ }
282
+ return out;
283
+ }
284
+
285
+ /**
286
+ * Project y onto the weighted simplex {u : u ≥ 0, Σ w_j u_j = T} with w_j > 0,
287
+ * writing into `out`. Used as the proximal step in u-coordinates inside FISTA.
288
+ *
289
+ * u_j* = max(0, y_j − λ w_j) for the unique λ s.t. Σ w_j · u_j* = T.
290
+ *
291
+ * f(λ) = Σ w_j · max(0, y_j − λ w_j) is continuous, piecewise-linear and strictly
292
+ * decreasing on (λ_min, λ_max). Sort the breakpoints t_j = y_j/w_j descending and
293
+ * walk through to find the active set (analogous to Duchi 2008). O(n log n).
294
+ */
295
+ function projectWeightedSimplexInto(
296
+ y: Float64Array,
297
+ w: Float64Array,
298
+ T: number,
299
+ out: Float64Array,
300
+ ): void {
301
+ const n = y.length;
302
+ if (n === 0) return;
303
+
304
+ const t = new Float64Array(n);
305
+ for (let j = 0; j < n; j++) t[j] = y[j] / w[j];
306
+ const idx = new Array<number>(n);
307
+ for (let j = 0; j < n; j++) idx[j] = j;
308
+ idx.sort((a, b) => t[b] - t[a]);
309
+
310
+ let Sy = 0;
311
+ let Sw2 = 0;
312
+ let lambda = 0;
313
+ let rho = -1;
314
+ for (let k = 0; k < n; k++) {
315
+ const j = idx[k];
316
+ Sy += w[j] * y[j];
317
+ Sw2 += w[j] * w[j];
318
+ const lamCand = (Sy - T) / Sw2;
319
+ if (t[j] > lamCand) {
320
+ rho = k;
321
+ lambda = lamCand;
322
+ } else {
323
+ break;
324
+ }
325
+ }
326
+ if (rho < 0) {
327
+ const xOver = T / n;
328
+ for (let j = 0; j < n; j++) out[j] = w[j] * xOver;
329
+ return;
330
+ }
331
+ for (let j = 0; j < n; j++) {
332
+ const v = y[j] - lambda * w[j];
333
+ out[j] = v > 0 ? v : 0;
334
+ }
335
+ }
336
+
337
+ /**
338
+ * F(u) = ‖(AD) u − b‖² + ε Σ_j (u_j − priorU_j)² (loss in u-coordinates,
339
+ * Float64Array variant for the FISTA hot path).
340
+ */
341
+ function computeLossUF64(
342
+ AD: ReadonlyArray<Float64Array>,
343
+ b: ReadonlyArray<number>,
344
+ u: Float64Array,
345
+ m: number,
346
+ n: number,
347
+ regDiag: number,
348
+ priorU: Float64Array,
349
+ ): number {
350
+ let dataSq = 0;
351
+ for (let i = 0; i < m; i++) {
352
+ const row = AD[i];
353
+ let s = 0;
354
+ for (let j = 0; j < n; j++) s += row[j] * u[j];
355
+ const r = s - b[i];
356
+ dataSq += r * r;
357
+ }
358
+ let regSq = 0;
359
+ for (let j = 0; j < n; j++) {
360
+ const diff = u[j] - priorU[j];
361
+ regSq += diff * diff;
362
+ }
363
+ return dataSq + regDiag * regSq;
364
+ }
365
+
366
+ /**
367
+ * Estimate σ_max(MᵀM) = σ_max(MMᵀ) via power iteration on the m×m matrix MMᵀ.
368
+ * Cost: O(m²·n) to build, O(m²) per iteration. For m=3 effectively free.
369
+ *
370
+ * Float64Array variant — same routine, different storage type.
371
+ */
372
+ function spectralNormSquaredF64(
373
+ M: ReadonlyArray<Float64Array>,
374
+ m: number,
375
+ n: number,
376
+ ): number {
377
+ if (m === 0 || n === 0) return 0;
378
+ const MMt: number[][] = Array.from({ length: m }, () => new Array(m).fill(0));
379
+ for (let i = 0; i < m; i++) {
380
+ for (let k = i; k < m; k++) {
381
+ let s = 0;
382
+ const Mi = M[i];
383
+ const Mk = M[k];
384
+ for (let j = 0; j < n; j++) s += Mi[j] * Mk[j];
385
+ MMt[i][k] = s;
386
+ MMt[k][i] = s;
387
+ }
388
+ }
389
+ let v = new Array(m).fill(1 / Math.sqrt(m));
390
+ let lambda = 0;
391
+ for (let it = 0; it < 30; it++) {
392
+ const w = new Array(m).fill(0);
393
+ for (let i = 0; i < m; i++) {
394
+ let s = 0;
395
+ for (let k = 0; k < m; k++) s += MMt[i][k] * v[k];
396
+ w[i] = s;
397
+ }
398
+ let norm = 0;
399
+ for (let i = 0; i < m; i++) norm += w[i] * w[i];
400
+ norm = Math.sqrt(norm);
401
+ if (norm < 1e-30) return 0;
402
+ const newV = new Array(m);
403
+ for (let i = 0; i < m; i++) newV[i] = w[i] / norm;
404
+ if (Math.abs(norm - lambda) < 1e-10 * Math.max(1, norm)) {
405
+ lambda = norm;
406
+ break;
407
+ }
408
+ lambda = norm;
409
+ v = newV;
410
+ }
411
+ return lambda;
412
+ }
package/src/quantize.ts CHANGED
@@ -33,27 +33,33 @@ export function quantizeWeights(weights: ReadonlyArray<number>, total: number):
33
33
  Math.round(Math.max(0, w - floors[i]) * 1e10) / 1e10,
34
34
  );
35
35
  const order = indicesSortedByDesc(remainders);
36
- for (let k = 0; k < deficit; k++) floors[order[k]]++;
36
+ // Distribute deficit across rows. If deficit > n, give each row floor(deficit/n)
37
+ // plus one extra to the top (deficit % n) rows.
38
+ const bulk = Math.floor(deficit / n);
39
+ if (bulk > 0) for (let i = 0; i < n; i++) floors[i] += bulk;
40
+ const remainder = deficit - bulk * n;
41
+ for (let k = 0; k < remainder; k++) floors[order[k]]++;
37
42
  } else if (deficit < 0) {
38
43
  // Remove 1's from rows with the largest current weight, never going below 1.
44
+ // Single sort + single greedy pass: from the largest-floor row downward,
45
+ // take as much as possible (capped by floors[i] − 1) until toRemove == 0.
46
+ // O(n log n) total — previously O(K · n log n) when many rows are clamped at 1.
39
47
  let toRemove = -deficit;
40
- while (toRemove > 0) {
41
- const order = indicesSortedByDesc(floors);
42
- let progress = false;
43
- for (const i of order) {
44
- if (toRemove === 0) break;
45
- if (floors[i] > 1) {
46
- floors[i]--;
47
- toRemove--;
48
- progress = true;
49
- }
50
- }
51
- if (!progress) {
52
- // Shouldn't happen: total >= n was checked; sumFloors was at most total + (max(1, .) bias),
53
- // and that bias is ≤ n which can always be reclaimed.
54
- throw new Error('quantizeWeights: cannot reduce further while keeping w_i >= 1');
48
+ const order = indicesSortedByDesc(floors);
49
+ for (const i of order) {
50
+ if (toRemove === 0) break;
51
+ const removable = floors[i] - 1;
52
+ if (removable > 0) {
53
+ const take = Math.min(removable, toRemove);
54
+ floors[i] -= take;
55
+ toRemove -= take;
55
56
  }
56
57
  }
58
+ if (toRemove > 0) {
59
+ // Shouldn't happen: total >= n was checked; sumFloors was at most total + (max(1, .) bias),
60
+ // and that bias is ≤ n which can always be reclaimed.
61
+ throw new Error('quantizeWeights: cannot reduce further while keeping w_i >= 1');
62
+ }
57
63
  }
58
64
 
59
65
  return floors;
package/src/sample.ts CHANGED
@@ -84,6 +84,11 @@ export interface QuotaParams {
84
84
  nRowsOut: number;
85
85
  minPerBucket: number;
86
86
  requireMaxReached: boolean;
87
+ /** Optional: bias the candidate pool toward this non-zero fraction (0..1).
88
+ * When set, zeroBucket gets approximately `(1 − targetHitRate) × nRowsOut`
89
+ * slots and the log buckets share the rest. When unset, current
90
+ * variance-contribution distribution applies (zero gets leftover). */
91
+ targetHitRate?: number;
87
92
  }
88
93
 
89
94
  export interface Quotas {
@@ -103,7 +108,7 @@ export interface Quotas {
103
108
  * All quotas are integers and sum to nRowsOut.
104
109
  */
105
110
  export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas {
106
- const { nRowsOut, minPerBucket, requireMaxReached } = params;
111
+ const { nRowsOut, minPerBucket, requireMaxReached, targetHitRate } = params;
107
112
 
108
113
  // Count non-empty log buckets — these are the ones eligible for minPerBucket.
109
114
  const nonEmptyLogCount = buckets.logBuckets.reduce(
@@ -112,6 +117,28 @@ export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas
112
117
  );
113
118
  const wantNearMax = requireMaxReached && buckets.nearMaxBucket.indices.length > 0;
114
119
 
120
+ const totalAvailable =
121
+ buckets.zeroBucket.indices.length +
122
+ buckets.logBuckets.reduce((s, b) => s + b.indices.length, 0) +
123
+ buckets.nearMaxBucket.indices.length;
124
+ const expected = Math.min(nRowsOut, totalAvailable);
125
+
126
+ // ── targetHitRate-biased path ────────────────────────────────────────────
127
+ if (typeof targetHitRate === 'number' && targetHitRate > 0 && targetHitRate < 1) {
128
+ const result = computeQuotasByTargetHitRate(buckets, {
129
+ nRowsOut,
130
+ minPerBucket,
131
+ requireMaxReached,
132
+ targetHitRate,
133
+ nonEmptyLogCount,
134
+ wantNearMax,
135
+ totalAvailable,
136
+ expected,
137
+ });
138
+ return result;
139
+ }
140
+
141
+ // ── Original variance-contribution path ──────────────────────────────────
115
142
  // Compute an effective minPerBucket so the floor allocation does not exceed nRowsOut.
116
143
  // Floor at 0; near-max keeps its 1 slot when room allows, dropped only as a last resort.
117
144
  let effectiveMinPerBucket = minPerBucket;
@@ -185,15 +212,181 @@ export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas
185
212
  // Defensive invariant: quotas must sum to exactly nRowsOut, unless the
186
213
  // total available indices across all buckets are fewer than nRowsOut (in
187
214
  // which case the cap at total available is the best achievable).
188
- const totalAvailable =
189
- buckets.zeroBucket.indices.length +
215
+ const total = zeroQuota + logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
216
+ if (total !== expected) {
217
+ throw new Error(
218
+ `computeQuotas invariant violated: total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable})`,
219
+ );
220
+ }
221
+
222
+ return { zeroBucket: zeroQuota, logBuckets: logQuotas, nearMaxBucket: nearMaxQuota };
223
+ }
224
+
225
+ /**
226
+ * Splits `nRowsOut` so the candidate pool's non-zero fraction ≈ `targetHitRate`.
227
+ * This fixes the lopsided-row-composition bug in `optimizeLookupTable` when the
228
+ * source distribution's natural hit-rate is far from `targetHitRate`.
229
+ *
230
+ * The non-zero share is distributed across log + near-max buckets using the same
231
+ * (minPerBucket floor → variance-contribution remainder) heuristic as the
232
+ * default path, but constrained to a smaller budget. Any shortfall in either
233
+ * the zero or non-zero side spills over to the other side so total === nRowsOut.
234
+ */
235
+ function computeQuotasByTargetHitRate(
236
+ buckets: QuotaInput,
237
+ ctx: {
238
+ nRowsOut: number;
239
+ minPerBucket: number;
240
+ requireMaxReached: boolean;
241
+ targetHitRate: number;
242
+ nonEmptyLogCount: number;
243
+ wantNearMax: boolean;
244
+ totalAvailable: number;
245
+ expected: number;
246
+ },
247
+ ): Quotas {
248
+ const { nRowsOut, minPerBucket, targetHitRate, nonEmptyLogCount, wantNearMax, totalAvailable, expected } = ctx;
249
+
250
+ const nonZeroAvailable =
190
251
  buckets.logBuckets.reduce((s, b) => s + b.indices.length, 0) +
191
252
  buckets.nearMaxBucket.indices.length;
192
- const expected = Math.min(nRowsOut, totalAvailable);
253
+ const zeroAvailable = buckets.zeroBucket.indices.length;
254
+
255
+ let nonZeroSlots = Math.round(targetHitRate * nRowsOut);
256
+ let zeroSlots = nRowsOut - nonZeroSlots;
257
+
258
+ // Cap each side by what's available; spill the leftover to the other side.
259
+ if (nonZeroSlots > nonZeroAvailable) {
260
+ zeroSlots += nonZeroSlots - nonZeroAvailable;
261
+ nonZeroSlots = nonZeroAvailable;
262
+ }
263
+ if (zeroSlots > zeroAvailable) {
264
+ nonZeroSlots += zeroSlots - zeroAvailable;
265
+ zeroSlots = zeroAvailable;
266
+ }
267
+ // Final cap (only matters when totalAvailable < nRowsOut).
268
+ if (nonZeroSlots > nonZeroAvailable) nonZeroSlots = nonZeroAvailable;
269
+ if (zeroSlots > zeroAvailable) zeroSlots = zeroAvailable;
270
+
271
+ // Scale effectiveMinPerBucket down so the floor allocation fits within the
272
+ // non-zero budget. Same logic as the default path, just constrained to
273
+ // `nonZeroSlots` instead of `nRowsOut`.
274
+ let effectiveMinPerBucket = minPerBucket;
275
+ while (
276
+ effectiveMinPerBucket > 0 &&
277
+ nonEmptyLogCount * effectiveMinPerBucket + (wantNearMax ? 1 : 0) > nonZeroSlots
278
+ ) {
279
+ effectiveMinPerBucket--;
280
+ }
281
+ let nearMaxQuota =
282
+ wantNearMax && nonEmptyLogCount * effectiveMinPerBucket < nonZeroSlots ? 1 : 0;
283
+
284
+ const logQuotas = buckets.logBuckets.map((b) => {
285
+ if (b.indices.length === 0) return 0;
286
+ return Math.min(effectiveMinPerBucket, b.indices.length);
287
+ });
288
+
289
+ let assigned = logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
290
+ let remainingNonZero = nonZeroSlots - assigned;
291
+
292
+ // Variance-contribution remainder, with redistribution when any bucket caps
293
+ // out (so the non-zero budget gets fully consumed before spilling to zero).
294
+ if (remainingNonZero > 0) {
295
+ const contrib = buckets.logBuckets.map((b) => {
296
+ if (b.indices.length === 0) return 0;
297
+ const mean = b.weightedPayoutSum / Math.max(1, b.totalWeight);
298
+ return b.totalWeight * mean * mean;
299
+ });
300
+ // Iteratively allocate by contribution among non-capped buckets, then
301
+ // redistribute any over-allocation. Capped at log(nBuckets) + 1 passes.
302
+ let extraToPlace = remainingNonZero;
303
+ const eligible = buckets.logBuckets.map((b, i) => b.indices.length - logQuotas[i] > 0);
304
+ const maxPasses = buckets.logBuckets.length + 2;
305
+ for (let pass = 0; pass < maxPasses && extraToPlace > 0; pass++) {
306
+ let activeContrib = 0;
307
+ for (let i = 0; i < buckets.logBuckets.length; i++) {
308
+ if (eligible[i]) activeContrib += contrib[i];
309
+ }
310
+ if (activeContrib > 0) {
311
+ const proposed = buckets.logBuckets.map((_, i) =>
312
+ eligible[i] ? (contrib[i] / activeContrib) * extraToPlace : 0,
313
+ );
314
+ const floors = proposed.map(Math.floor);
315
+ const used = floors.reduce((s, v) => s + v, 0);
316
+ const remainders = proposed.map((p, i) => p - floors[i]);
317
+ const order = remainders
318
+ .map((_, i) => i)
319
+ .filter((i) => eligible[i])
320
+ .sort((a, b) => remainders[b] - remainders[a]);
321
+ let extra = extraToPlace - used;
322
+ for (const i of order) {
323
+ if (extra === 0) break;
324
+ floors[i]++;
325
+ extra--;
326
+ }
327
+ // Apply, capping at room.
328
+ let placed = 0;
329
+ for (let i = 0; i < floors.length; i++) {
330
+ if (!eligible[i] || floors[i] <= 0) continue;
331
+ const room = buckets.logBuckets[i].indices.length - logQuotas[i];
332
+ const give = Math.min(floors[i], room);
333
+ logQuotas[i] += give;
334
+ placed += give;
335
+ if (give === room) eligible[i] = false;
336
+ }
337
+ extraToPlace -= placed;
338
+ if (placed === 0) break; // No progress (everything is capped).
339
+ } else {
340
+ // No variance signal among eligible — fill remaining buckets evenly by room.
341
+ const order = buckets.logBuckets
342
+ .map((b, i) => ({ i, room: b.indices.length - logQuotas[i] }))
343
+ .filter((o) => o.room > 0 && eligible[o.i])
344
+ .sort((a, b) => b.room - a.room);
345
+ for (const { i, room } of order) {
346
+ if (extraToPlace === 0) break;
347
+ const give = Math.min(room, extraToPlace);
348
+ logQuotas[i] += give;
349
+ extraToPlace -= give;
350
+ }
351
+ break;
352
+ }
353
+ }
354
+ remainingNonZero = extraToPlace;
355
+ }
356
+
357
+ // If any non-zero slot is still unassigned (every log bucket capped),
358
+ // spill it to zero (only path left when totalAvailable still allows it).
359
+ if (remainingNonZero > 0) {
360
+ const headroomToZero = Math.min(remainingNonZero, zeroAvailable - zeroSlots);
361
+ zeroSlots += headroomToZero;
362
+ remainingNonZero -= headroomToZero;
363
+ }
364
+
365
+ let zeroQuota = Math.min(zeroSlots, zeroAvailable);
366
+
367
+ // If zero bucket can't soak its share, spill to the largest log buckets.
368
+ let leftover = zeroSlots - zeroQuota;
369
+ if (leftover > 0) {
370
+ const order = buckets.logBuckets
371
+ .map((b, i) => ({ i, room: b.indices.length - logQuotas[i] }))
372
+ .sort((a, b) => b.room - a.room);
373
+ for (const { i, room } of order) {
374
+ if (leftover === 0) break;
375
+ const give = Math.min(room, leftover);
376
+ logQuotas[i] += give;
377
+ leftover -= give;
378
+ }
379
+ if (leftover > 0 && wantNearMax && nearMaxQuota === 0 && buckets.nearMaxBucket.indices.length > 0) {
380
+ nearMaxQuota = 1;
381
+ leftover--;
382
+ }
383
+ }
384
+
385
+ // Final defensive invariant.
193
386
  const total = zeroQuota + logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
194
387
  if (total !== expected) {
195
388
  throw new Error(
196
- `computeQuotas invariant violated: total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable})`,
389
+ `computeQuotas invariant violated (targetHitRate path): total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable}, targetHitRate=${targetHitRate})`,
197
390
  );
198
391
  }
199
392
 
@@ -138,4 +138,75 @@ describe('integration', () => {
138
138
  for (const r of result.rows) sum += r.weight;
139
139
  expect(sum).toBe(1000 * 1_000_000);
140
140
  });
141
+
142
+ it('7. row composition reflects targetHitRate (not just weighted hit-rate)', () => {
143
+ // Source distribution has hit-rate ≈ 0.30 (rng-controlled).
144
+ const rng = makeRng(7);
145
+ const rows: LookupRow[] = new Array(50_000);
146
+ for (let i = 0; i < 50_000; i++) {
147
+ const u = rng();
148
+ let p = 0;
149
+ if (u > 0.7) p = Math.floor(rng() * 200);
150
+ if (u > 0.97) p = Math.floor(rng() * 5_000);
151
+ if (u > 0.999) p = Math.floor(rng() * 50_000);
152
+ rows[i] = { sim: i, weight: 1 + Math.floor(rng() * 100), payoutCents: p };
153
+ }
154
+
155
+ // Target hit-rate well below source (0.20 vs 0.30)
156
+ const result = optimizeLookupTable(rows, {
157
+ targetRTP: 0.96, toleranceRTP: 0.01,
158
+ targetCV: 5.0, toleranceCV: 2.0,
159
+ targetHitRate: 0.20, toleranceHitRate: 0.02,
160
+ capMaxWin: 50_000,
161
+ nRowsOut: 1000,
162
+ requireMaxReached: false,
163
+ maxIterations: 3,
164
+ });
165
+
166
+ // Weighted hit-rate hits target.
167
+ expect(result.toleranceMet.hitRate).toBe(true);
168
+
169
+ // ROW composition is roughly 80% zero, 20% non-zero.
170
+ let nZero = 0;
171
+ for (const r of result.rows) if (r.payoutCents === 0) nZero++;
172
+ const zeroRowFraction = nZero / result.rows.length;
173
+ // Tolerance ±5% of (1 − targetHitRate).
174
+ expect(zeroRowFraction).toBeGreaterThan(0.75);
175
+ expect(zeroRowFraction).toBeLessThan(0.85);
176
+ });
177
+
178
+ it('6. handles nRowsOut=5000 without n² memory blowup', () => {
179
+ // Pre-fix this would allocate a 5000×5000 dense matrix (200 MB Float64);
180
+ // after the implicit-Tikhonov fix it should fit in well under 100 MB and
181
+ // complete in a few seconds.
182
+ const rng = makeRng(6);
183
+ const rows: LookupRow[] = new Array(200_000);
184
+ for (let i = 0; i < 200_000; i++) {
185
+ const u = rng();
186
+ let p = 0;
187
+ if (u > 0.7) p = Math.floor(rng() * 200);
188
+ if (u > 0.97) p = Math.floor(rng() * 5_000);
189
+ if (u > 0.999) p = Math.floor(rng() * 50_000);
190
+ rows[i] = { sim: i, weight: 1 + Math.floor(rng() * 10), payoutCents: p };
191
+ }
192
+
193
+ const t0 = performance.now();
194
+ const result = optimizeLookupTable(rows, {
195
+ targetRTP: 0.5, toleranceRTP: 0.2,
196
+ targetCV: 3, toleranceCV: 5,
197
+ targetHitRate: 0.30, toleranceHitRate: 0.1,
198
+ capMaxWin: 50_000,
199
+ nRowsOut: 5_000,
200
+ requireMaxReached: false,
201
+ maxIterations: 1, // single pass — we're testing memory, not convergence
202
+ });
203
+ const elapsed = performance.now() - t0;
204
+
205
+ expect(result.rows).toHaveLength(5_000);
206
+ // Should be well under the testTimeout (30s). 60s as a generous upper bound.
207
+ expect(elapsed).toBeLessThan(60_000);
208
+ let sum = 0;
209
+ for (const r of result.rows) sum += r.weight;
210
+ expect(sum).toBe(5_000 * 1_000_000);
211
+ });
141
212
  });
@@ -0,0 +1,105 @@
1
+ // test/qp.test.ts
2
+ import { describe, expect, it } from 'vitest';
3
+ import { solveQP, projectSimplex } from '../src/qp.js';
4
+
5
+ describe('projectSimplex', () => {
6
+ it('projects to the simplex when input sum exceeds T', () => {
7
+ // y = [4, 3, 2, 1], T = 5
8
+ // expected: tau s.t. max(0, y - tau) sums to 5; sort desc [4,3,2,1]
9
+ // j=0: cssv=4, thresh=(4-5)/1=-1, 4-(-1)=5>0, rho=0
10
+ // j=1: cssv=7, thresh=(7-5)/2=1, 3-1=2>0, rho=1
11
+ // j=2: cssv=9, thresh=(9-5)/3=1.333, 2-1.333=0.667>0, rho=2
12
+ // j=3: cssv=10, thresh=(10-5)/4=1.25, 1-1.25<0, stop
13
+ // tau = (9-5)/3 = 4/3
14
+ // result: max(0, [4,3,2,1] - 4/3) = [8/3, 5/3, 2/3, 0]
15
+ const out = projectSimplex([4, 3, 2, 1], 5);
16
+ expect(out.reduce((a, b) => a + b, 0)).toBeCloseTo(5, 8);
17
+ expect(out[0]).toBeCloseTo(8 / 3, 6);
18
+ expect(out[3]).toBeCloseTo(0, 6);
19
+ });
20
+
21
+ it('caps each entry at ≥ 0', () => {
22
+ const out = projectSimplex([-10, -5, 1, 2], 1);
23
+ for (const v of out) expect(v).toBeGreaterThanOrEqual(-1e-12);
24
+ expect(out.reduce((a, b) => a + b, 0)).toBeCloseTo(1, 8);
25
+ });
26
+
27
+ it('handles uniform input', () => {
28
+ // y = [1,1,1,1], T = 4 → x = [1,1,1,1]
29
+ const out = projectSimplex([1, 1, 1, 1], 4);
30
+ for (const v of out) expect(v).toBeCloseTo(1, 8);
31
+ });
32
+ });
33
+
34
+ describe('solveQP', () => {
35
+ it('solves trivial unconstrained-like case where prior is feasible', () => {
36
+ // A = [[1,1,1]], b = [3], prior = [1,1,1], T = 3
37
+ // The prior itself satisfies sum=3 and the equation Σx = 3.
38
+ const x = solveQP([[1, 1, 1]], [3], {
39
+ sumConstraint: 3,
40
+ prior: [1, 1, 1],
41
+ regularization: 1e-6,
42
+ });
43
+ expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(3, 6);
44
+ for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-9);
45
+ expect(x[0]).toBeCloseTo(1, 3);
46
+ expect(x[1]).toBeCloseTo(1, 3);
47
+ expect(x[2]).toBeCloseTo(1, 3);
48
+ });
49
+
50
+ it('respects non-negativity when LS solution would go negative', () => {
51
+ // A = [[1, -1]], b = [-2], T = 2: unconstrained LS pushes x[0] toward 0
52
+ // and x[1] toward 2.
53
+ const x = solveQP([[1, -1]], [-2], {
54
+ sumConstraint: 2,
55
+ prior: [1, 1],
56
+ regularization: 1e-4,
57
+ });
58
+ expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(2, 6);
59
+ for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-9);
60
+ expect(x[0]).toBeLessThan(0.5);
61
+ expect(x[1]).toBeGreaterThan(1.5);
62
+ });
63
+
64
+ it('matches solveNNLS to working tolerance on the underdetermined 2×4 case', () => {
65
+ // Same test as in nnls.test.ts: A=[[1,1,0,0],[0,0,1,1]], b=[4,6], prior=[1,1,1,1], T=10
66
+ // Tikhonov pulls toward x≈[2,2,3,3]. Sum constraint is enforced exactly here (4+6=10).
67
+ const x = solveQP(
68
+ [[1, 1, 0, 0], [0, 0, 1, 1]],
69
+ [4, 6],
70
+ {
71
+ sumConstraint: 10,
72
+ prior: [1, 1, 1, 1],
73
+ regularization: 1e-6,
74
+ },
75
+ );
76
+ expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(10, 4);
77
+ expect(x[0]).toBeCloseTo(2, 1);
78
+ expect(x[1]).toBeCloseTo(2, 1);
79
+ expect(x[2]).toBeCloseTo(3, 1);
80
+ expect(x[3]).toBeCloseTo(3, 1);
81
+ });
82
+
83
+ it('converges for n=10000 in under 1 second', () => {
84
+ // Random feasible problem
85
+ const n = 10_000;
86
+ const T = n * 100; // 1_000_000
87
+ const A: number[][] = [
88
+ Array.from({ length: n }, (_, i) => (i % 100)),
89
+ Array.from({ length: n }, (_, i) => ((i * 7) % 200)),
90
+ Array.from({ length: n }, () => 1),
91
+ ];
92
+ const b = [T * 50, T * 100, T * 0.3];
93
+
94
+ const t0 = performance.now();
95
+ const x = solveQP(A, b, {
96
+ sumConstraint: T,
97
+ regularization: 1e-6,
98
+ maxIterations: 200,
99
+ });
100
+ const elapsed = performance.now() - t0;
101
+ expect(elapsed).toBeLessThan(1000);
102
+ expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(T, 0); // exact via projection
103
+ for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-6);
104
+ });
105
+ });
@@ -38,4 +38,28 @@ describe('quantizeWeights', () => {
38
38
  const out = quantizeWeights([1.5, 2.5, 3.5], 8);
39
39
  expect(out).toEqual([2, 3, 3]);
40
40
  });
41
+
42
+ it('handles large n with many floor-1 rows efficiently (regression: was O(K·n log n))', () => {
43
+ // Synthesize a scenario that previously took 30+ seconds:
44
+ // ~99% of rows have continuous weight near 0 (will clamp to floor 1)
45
+ // ~1% of rows have large weight
46
+ const n = 100_000;
47
+ const T = n * 1_000_000;
48
+ const weights = new Array(n);
49
+ for (let i = 0; i < n; i++) {
50
+ // 99% small, 1% large
51
+ weights[i] = i % 100 === 0 ? T / 1000 : 0.0001;
52
+ }
53
+ const t0 = performance.now();
54
+ const out = quantizeWeights(weights, T);
55
+ const elapsed = performance.now() - t0;
56
+ expect(elapsed).toBeLessThan(1000); // 1 second — was 30+s before fix
57
+ expect(out.length).toBe(n);
58
+ let sum = 0;
59
+ for (const v of out) {
60
+ sum += v;
61
+ expect(v).toBeGreaterThanOrEqual(1);
62
+ }
63
+ expect(sum).toBe(T);
64
+ });
41
65
  });
@@ -99,6 +99,82 @@ describe('computeQuotas (over-allocation guard)', () => {
99
99
  });
100
100
  });
101
101
 
102
+ describe('computeQuotas — targetHitRate bias', () => {
103
+ it('allocates zero/nonzero quotas proportional to targetHitRate', () => {
104
+ const zero: Bucket = {
105
+ indices: Array.from({ length: 5000 }, (_, i) => i),
106
+ totalWeight: 5000,
107
+ weightedPayoutSum: 0,
108
+ };
109
+ const log: Bucket[] = Array.from({ length: 5 }, (_, k) => ({
110
+ indices: Array.from({ length: 200 }, (_, j) => 5000 + k * 200 + j),
111
+ totalWeight: 200,
112
+ weightedPayoutSum: 200 * (10 ** (k + 1)),
113
+ }));
114
+ const nearMax: Bucket = { indices: [5999], totalWeight: 1, weightedPayoutSum: 1000 };
115
+ const quotas = computeQuotas(
116
+ { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
117
+ { nRowsOut: 1000, minPerBucket: 3, requireMaxReached: false, targetHitRate: 0.2 },
118
+ );
119
+ // Expect ~800 zero and ~200 non-zero (including across log buckets)
120
+ expect(quotas.zeroBucket).toBeGreaterThanOrEqual(750);
121
+ expect(quotas.zeroBucket).toBeLessThanOrEqual(850);
122
+ const totalNonZero = quotas.logBuckets.reduce((s, q) => s + q, 0) + quotas.nearMaxBucket;
123
+ expect(totalNonZero).toBeGreaterThanOrEqual(150);
124
+ expect(totalNonZero).toBeLessThanOrEqual(250);
125
+ const total = quotas.zeroBucket + totalNonZero;
126
+ expect(total).toBe(1000);
127
+ });
128
+
129
+ it('falls back to variance-contribution behavior when targetHitRate is unset', () => {
130
+ // Same buckets as the very first computeQuotas test in this file — invariant
131
+ // must be preserved (zero bucket absorbs leftover; total === nRowsOut).
132
+ const zero: Bucket = { indices: Array(100).fill(0).map((_, i) => i), totalWeight: 100, weightedPayoutSum: 0 };
133
+ const log: Bucket[] = [
134
+ { indices: [100, 101, 102], totalWeight: 3, weightedPayoutSum: 30 },
135
+ { indices: [103, 104, 105, 106, 107], totalWeight: 5, weightedPayoutSum: 200 },
136
+ { indices: [], totalWeight: 0, weightedPayoutSum: 0 },
137
+ ];
138
+ const nearMax: Bucket = { indices: [107], totalWeight: 1, weightedPayoutSum: 100 };
139
+ const quotas = computeQuotas(
140
+ { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
141
+ { nRowsOut: 20, minPerBucket: 3, requireMaxReached: true /* no targetHitRate */ },
142
+ );
143
+ // Pre-fix invariants — same as the original test
144
+ expect(quotas.logBuckets[0]).toBeGreaterThanOrEqual(3);
145
+ expect(quotas.logBuckets[1]).toBeGreaterThanOrEqual(3);
146
+ expect(quotas.logBuckets[2]).toBe(0);
147
+ expect(quotas.nearMaxBucket).toBeGreaterThanOrEqual(1);
148
+ const total = quotas.zeroBucket + quotas.logBuckets.reduce((a, b) => a + b, 0) + quotas.nearMaxBucket;
149
+ expect(total).toBe(20);
150
+ });
151
+
152
+ it('handles targetHitRate=0.5 on a balanced distribution', () => {
153
+ const zero: Bucket = {
154
+ indices: Array.from({ length: 1000 }, (_, i) => i),
155
+ totalWeight: 1000,
156
+ weightedPayoutSum: 0,
157
+ };
158
+ const log: Bucket[] = Array.from({ length: 5 }, (_, k) => ({
159
+ indices: Array.from({ length: 200 }, (_, j) => 1000 + k * 200 + j),
160
+ totalWeight: 200,
161
+ weightedPayoutSum: 200 * (10 ** (k + 1)),
162
+ }));
163
+ const nearMax: Bucket = { indices: [1999], totalWeight: 1, weightedPayoutSum: 1000 };
164
+ const quotas = computeQuotas(
165
+ { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
166
+ { nRowsOut: 500, minPerBucket: 3, requireMaxReached: false, targetHitRate: 0.5 },
167
+ );
168
+ expect(quotas.zeroBucket).toBeGreaterThanOrEqual(225);
169
+ expect(quotas.zeroBucket).toBeLessThanOrEqual(275);
170
+ const totalNonZero = quotas.logBuckets.reduce((s, q) => s + q, 0) + quotas.nearMaxBucket;
171
+ expect(totalNonZero).toBeGreaterThanOrEqual(225);
172
+ expect(totalNonZero).toBeLessThanOrEqual(275);
173
+ const total = quotas.zeroBucket + totalNonZero;
174
+ expect(total).toBe(500);
175
+ });
176
+ });
177
+
102
178
  describe('stratifiedSample (overlap top-up)', () => {
103
179
  it('delivers exactly the total quota even when near-max overlaps log buckets', () => {
104
180
  // Top log bucket overlaps near-max bucket; near-max consumes enough that the