npm - @energy8platform/stake-math-tools - Versions diffs - 0.2.0 → 0.3.0 - Mend

@energy8platform/stake-math-tools 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/index.ts +2 -0
package/src/optimize-lookup.ts +1 -0
package/src/qp.ts +412 -0
package/src/quantize.ts +22 -16
package/src/sample.ts +198 -5
package/test/optimize-lookup.integration.test.ts +36 -0
package/test/qp.test.ts +105 -0
package/test/quantize.test.ts +24 -0
package/test/sample.test.ts +76 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@energy8platform/stake-math-tools",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Node-only dev-time math utilities for the Energy8 Stake bridge: lookup-table (force matrix) builder",
   "author": "Energy8 Platform",
   "license": "MIT",

package/src/index.ts CHANGED Viewed

@@ -16,4 +16,6 @@ export { mulberry32, weightedReservoirSample, computeQuotas, stratifiedSample }
 export type { Quotas, QuotaInput, QuotaParams } from './sample.js';
 export { solveNNLS } from './nnls.js';
 export type { NNLSOptions } from './nnls.js';
+export { solveQP, projectSimplex } from './qp.js';
+export type { QPOptions } from './qp.js';
 export { quantizeWeights } from './quantize.js';

package/src/optimize-lookup.ts CHANGED Viewed

@@ -80,6 +80,7 @@ export function optimizeLookupTable(
       nRowsOut: params.nRowsOut,
       minPerBucket,
       requireMaxReached,
+      targetHitRate: params.targetHitRate,
     });
     const sampledIdx = stratifiedSample(buckets, filtered, quotas, rng);

package/src/qp.ts ADDED Viewed

@@ -0,0 +1,412 @@
+// src/qp.ts
+//
+// FISTA (Fast Iterative Shrinkage-Thresholding Algorithm) with simplex projection
+// for the underdetermined Tikhonov-regularized QP
+//
+//   min ‖A x − b‖² + ε ‖x − prior‖²   s.t.   x ≥ 0,  Σx = T
+//
+// Per-iteration cost: O(m·n) for the matvecs, O(n log n) for the simplex projection.
+// For our m=3 / n≤100k regime that's ~10·n flops per iter — vs Lawson-Hanson NNLS
+// which is O(m·n²) on underdetermined active-set-bouncing problems.
+//
+// Implementation notes for THIS problem family (RTP/variance/hit-rate targets):
+//
+//   1. **Jacobi (column-norm) preconditioning.** A's rows in our usage have wildly
+//      different scales — the RTP coefficient row can dominate by ~10⁷×. The
+//      change of variables x = D u with D_jj = 1/√(‖A col j‖²+ε) puts the
+//      preconditioned data-fit Hessian (AD)ᵀ(AD) into a well-conditioned regime
+//      (κ ~ m for the range space). The remaining 1/ε strong-convexity from
+//      the Tikhonov term is unaffected, but the *range* directions — which
+//      are where the data fit lives — accelerate properly.
+//      The sum constraint Σx=T maps to a weighted simplex Σ D_jj u_j = T;
+//      we project onto that with a Duchi-style O(n log n) routine.
+//
+//   2. **Adaptive restart** (O'Donoghue & Candès 2015): if the proximal step
+//      direction (uNew − u) is uphill against the gradient at y, momentum has
+//      overshot — reset t = 1. Essential for stable progress on tightly-toleranced
+//      problems where the iterates oscillate near the boundary of the active set.
+//
+//   3. **Sherman-Morrison-Woodbury warm start was considered and rejected.** When
+//      ε ≪ ‖A‖² (the common case for our toleranceRTP ~ 0.002 inputs), the
+//      formula M⁻¹ = (1/ε)(I − Aᵀ(εI + AAᵀ)⁻¹A) suffers catastrophic
+//      cancellation in the `(v − Aᵀ·…)/ε` step. The preconditioner above is
+//      sufficient on its own.
+//
+// CAVEAT: For very ill-conditioned instances (small ε, broad coefficient range),
+// FISTA needs many thousands of iterations to nail the user's tight tolerances.
+// In those regimes the active-set NNLS in `./nnls.ts` is dramatically faster on
+// the same problem class because m is tiny. solveQP is offered as a parallel
+// option in the public API; whether the orchestrator uses it or solveNNLS is
+// a deployment decision driven by the tolerance regime.
+export interface QPOptions {
+  /** Tikhonov coefficient ε ≥ 0. Default 1e-6. */
+  regularization?: number;
+  /** Tikhonov prior. Default = uniform sumConstraint/n. */
+  prior?: ReadonlyArray<number>;
+  /** Sum constraint: Σx = sumConstraint. Required. */
+  sumConstraint: number;
+  /** Maximum FISTA iterations. Default 500. */
+  maxIterations?: number;
+  /** Convergence tolerance on ‖x_{k+1} − x_k‖_2 / max(‖x_k‖_2, 1). Default 1e-6. */
+  tolerance?: number;
+}
+/**
+ * Solve `min ‖A x − b‖² + ε ‖x − prior‖²  s.t.  x ≥ 0, Σx = T`
+ * via Jacobi-preconditioned FISTA with weighted-simplex projection.
+ *
+ * A is m × n. For our use case m = 3 (RTP, variance, hit-rate features);
+ * the sum constraint is enforced via projection, not as a feature row.
+ */
+export function solveQP(
+  A: ReadonlyArray<ReadonlyArray<number>>,
+  b: ReadonlyArray<number>,
+  options: QPOptions,
+): number[] {
+  const m = A.length;
+  const n = m === 0 ? 0 : A[0].length;
+  if (n === 0) return [];
+  const T = options.sumConstraint;
+  if (!Number.isFinite(T) || T < 0) {
+    throw new Error(`solveQP: sumConstraint must be a non-negative finite number, got ${T}`);
+  }
+  const epsilon = options.regularization ?? 1e-6;
+  const maxIter = options.maxIterations ?? 500;
+  const tol = options.tolerance ?? 1e-6;
+  const prior = options.prior ?? new Array(n).fill(T / n);
+  if (prior.length !== n) {
+    throw new Error(`solveQP: prior length ${prior.length} does not match n=${n}`);
+  }
+  if (b.length !== m) {
+    throw new Error(`solveQP: b length ${b.length} does not match m=${m}`);
+  }
+  // ── Jacobi preconditioner ───────────────────────────────────────────────────
+  // Change of variables x = D u with D_jj = 1/√(‖A col j‖² + ε). Columns of AD
+  // then have norm ≈ 1, dramatically improving (AD)ᵀ(AD)'s conditioning.
+  // In u-coordinates:
+  //   - loss: ‖(AD) u − b‖² + ε ‖D u − prior‖²
+  //   - constraints: u ≥ 0, Σ D_jj u_j = T (weighted simplex)
+  const colNormSq = new Float64Array(n);
+  for (let i = 0; i < m; i++) {
+    const row = A[i];
+    for (let j = 0; j < n; j++) colNormSq[j] += row[j] * row[j];
+  }
+  let totColNormSq = 0;
+  for (let j = 0; j < n; j++) totColNormSq += colNormSq[j];
+  const typicalScale = totColNormSq > 0 ? Math.sqrt(totColNormSq / n) : 1;
+  const D = new Float64Array(n);
+  const Dinv = new Float64Array(n);
+  for (let j = 0; j < n; j++) {
+    const s = Math.sqrt(colNormSq[j] + epsilon);
+    const s_ = s > 1e-30 ? s : typicalScale;
+    Dinv[j] = s_;
+    D[j] = 1 / s_;
+  }
+  // Preconditioned matrix AD (m × n) as Float64Array for tight inner loops.
+  const AD: Float64Array[] = new Array(m);
+  for (let i = 0; i < m; i++) {
+    const row = A[i];
+    const out = new Float64Array(n);
+    for (let j = 0; j < n; j++) out[j] = row[j] * D[j];
+    AD[i] = out;
+  }
+  // Prior anchor in u-space: priorU = D⁻¹ · prior so that D · priorU = prior.
+  const priorU = new Float64Array(n);
+  for (let j = 0; j < n; j++) priorU[j] = prior[j] * Dinv[j];
+  // Isotropic ε in u-space — see file header note 2.
+  const regDiag = epsilon;
+  // ── Lipschitz estimate in u-space ───────────────────────────────────────────
+  // L = 2 σ_max((AD)ᵀ(AD)) + 2 ε. Power iteration on the m×m matrix (AD)(AD)ᵀ.
+  const L0 = 2 * spectralNormSquaredF64(AD, m, n) + 2 * regDiag;
+  let L = L0 > 0 ? L0 : 1;
+  // ── FISTA state in u-space (all Float64Array for tight inner loops) ────────
+  const xInit = T / n;
+  const u = new Float64Array(n);
+  const uPrev = new Float64Array(n);
+  const y = new Float64Array(n);
+  for (let j = 0; j < n; j++) {
+    const v = xInit * Dinv[j]; // u = D⁻¹ x
+    u[j] = v;
+    uPrev[j] = v;
+    y[j] = v;
+  }
+  // Reusable scratch.
+  const g = new Float64Array(n);
+  const z = new Float64Array(n);
+  const uNew = new Float64Array(n);
+  const ADy = new Float64Array(m);
+  const r = new Float64Array(m);
+  let t = 1;
+  for (let iter = 0; iter < maxIter; iter++) {
+    // ── Gradient at y: g = 2 (AD)ᵀ ((AD) y − b) + 2 ε (y − priorU) ──────────
+    for (let i = 0; i < m; i++) {
+      const row = AD[i];
+      let s = 0;
+      for (let j = 0; j < n; j++) s += row[j] * y[j];
+      ADy[i] = s;
+      r[i] = s - b[i];
+    }
+    if (m === 3) {
+      // Hot path: unrolled for the common m=3 (RTP / CV / hit-rate).
+      const r0 = r[0], r1 = r[1], r2 = r[2];
+      const a0 = AD[0], a1 = AD[1], a2 = AD[2];
+      const tw = 2 * regDiag;
+      for (let j = 0; j < n; j++) {
+        g[j] = 2 * (a0[j] * r0 + a1[j] * r1 + a2[j] * r2) + tw * (y[j] - priorU[j]);
+      }
+    } else {
+      for (let j = 0; j < n; j++) {
+        let s = 0;
+        for (let i = 0; i < m; i++) s += AD[i][j] * r[i];
+        g[j] = 2 * s + 2 * regDiag * (y[j] - priorU[j]);
+      }
+    }
+    // ── Trial step + weighted-simplex projection, backtracking on L ──────────
+    // Compute f(y) lazily — only when we may need to backtrack. With a tight L
+    // bound, the first attempt nearly always succeeds, so we skip this work.
+    let fY = NaN;
+    let backtracks = 0;
+    const maxBacktracks = 30;
+    while (backtracks++ < maxBacktracks) {
+      const eta = 1 / L;
+      for (let j = 0; j < n; j++) z[j] = y[j] - eta * g[j];
+      projectWeightedSimplexInto(z, D, T, uNew);
+      // Cheap descent test based on the linear (not full quadratic) Taylor.
+      // ‖uNew − y‖² · L/2 + ⟨g, uNew − y⟩ should give an upper bound on f(uNew) - f(y);
+      // we accept the step on the first try unless this differs grossly from reality.
+      // For tight L this is fine; the explicit fY check only kicks in if we doubled L
+      // and want to verify before further increases.
+      let dot = 0;
+      let diffSq = 0;
+      for (let j = 0; j < n; j++) {
+        const diff = uNew[j] - y[j];
+        dot += g[j] * diff;
+        diffSq += diff * diff;
+      }
+      if (backtracks === 1) {
+        // Standard FISTA descent direction check: the proximal step on a smooth
+        // ‖∇²f‖ ≤ L surface yields dot + 0.5·L·diffSq ≤ 0 when the step is valid.
+        // Skip the explicit f-computation here.
+        if (dot + 0.5 * L * diffSq <= 1e-12 * Math.max(1, L)) break;
+      }
+      // Reluctant fallback: compute f(y) and f(uNew) and check the canonical bound.
+      if (Number.isNaN(fY)) fY = computeLossUF64(AD, b, y, m, n, regDiag, priorU);
+      const fNew = computeLossUF64(AD, b, uNew, m, n, regDiag, priorU);
+      const upper = fY + dot + 0.5 * L * diffSq;
+      if (fNew <= upper + 1e-12 * Math.max(1, Math.abs(fY))) break;
+      L *= 2;
+    }
+    // ── Adaptive restart: if step (uNew - u) is uphill against g(y), reset t ─
+    let gradTest = 0;
+    for (let j = 0; j < n; j++) gradTest += g[j] * (uNew[j] - u[j]);
+    if (gradTest > 0) t = 1;
+    // ── Convergence: relative ‖u_{k+1} − u_k‖ ────────────────────────────────
+    let duSq = 0;
+    let uNorm = 0;
+    for (let j = 0; j < n; j++) {
+      const diff = uNew[j] - u[j];
+      duSq += diff * diff;
+      uNorm += u[j] * u[j];
+    }
+    const dxNorm = Math.sqrt(duSq);
+    const xn = Math.sqrt(uNorm);
+    // ── Nesterov momentum ─────────────────────────────────────────────────────
+    const tNext = (1 + Math.sqrt(1 + 4 * t * t)) / 2;
+    const momentum = (t - 1) / tNext;
+    for (let j = 0; j < n; j++) {
+      uPrev[j] = u[j];
+      u[j] = uNew[j];
+      y[j] = uNew[j] + momentum * (uNew[j] - uPrev[j]);
+    }
+    t = tNext;
+    if (dxNorm < tol * Math.max(xn, 1)) break;
+  }
+  // ── Return to x-space: x = D u ─────────────────────────────────────────────
+  const x = new Array(n);
+  for (let j = 0; j < n; j++) x[j] = u[j] * D[j];
+  return x;
+}
+/**
+ * Project y onto the simplex {x : x ≥ 0, Σx = T} via Duchi et al. 2008.
+ * Returns a new array. O(n log n) due to the sort.
+ *
+ * Exported for testing and direct reuse.
+ */
+export function projectSimplex(y: ReadonlyArray<number>, T: number): number[] {
+  const n = y.length;
+  if (n === 0) return [];
+  if (!Number.isFinite(T) || T < 0) {
+    throw new Error(`projectSimplex: T must be a non-negative finite number, got ${T}`);
+  }
+  const sorted = y.slice().sort((a, b) => b - a) as number[];
+  let cssv = 0;
+  let bestCssv = 0;
+  let rho = -1;
+  for (let j = 0; j < n; j++) {
+    cssv += sorted[j];
+    const threshold = (cssv - T) / (j + 1);
+    if (sorted[j] - threshold > 0) {
+      rho = j;
+      bestCssv = cssv;
+    } else {
+      break;
+    }
+  }
+  if (rho < 0) {
+    const uVal = T / n;
+    return new Array(n).fill(uVal);
+  }
+  const tau = (bestCssv - T) / (rho + 1);
+  const out = new Array(n);
+  for (let i = 0; i < n; i++) {
+    const v = y[i] - tau;
+    out[i] = v > 0 ? v : 0;
+  }
+  return out;
+}
+/**
+ * Project y onto the weighted simplex {u : u ≥ 0, Σ w_j u_j = T} with w_j > 0,
+ * writing into `out`. Used as the proximal step in u-coordinates inside FISTA.
+ *
+ *   u_j* = max(0, y_j − λ w_j)     for the unique λ s.t. Σ w_j · u_j* = T.
+ *
+ * f(λ) = Σ w_j · max(0, y_j − λ w_j) is continuous, piecewise-linear and strictly
+ * decreasing on (λ_min, λ_max). Sort the breakpoints t_j = y_j/w_j descending and
+ * walk through to find the active set (analogous to Duchi 2008). O(n log n).
+ */
+function projectWeightedSimplexInto(
+  y: Float64Array,
+  w: Float64Array,
+  T: number,
+  out: Float64Array,
+): void {
+  const n = y.length;
+  if (n === 0) return;
+  const t = new Float64Array(n);
+  for (let j = 0; j < n; j++) t[j] = y[j] / w[j];
+  const idx = new Array<number>(n);
+  for (let j = 0; j < n; j++) idx[j] = j;
+  idx.sort((a, b) => t[b] - t[a]);
+  let Sy = 0;
+  let Sw2 = 0;
+  let lambda = 0;
+  let rho = -1;
+  for (let k = 0; k < n; k++) {
+    const j = idx[k];
+    Sy += w[j] * y[j];
+    Sw2 += w[j] * w[j];
+    const lamCand = (Sy - T) / Sw2;
+    if (t[j] > lamCand) {
+      rho = k;
+      lambda = lamCand;
+    } else {
+      break;
+    }
+  }
+  if (rho < 0) {
+    const xOver = T / n;
+    for (let j = 0; j < n; j++) out[j] = w[j] * xOver;
+    return;
+  }
+  for (let j = 0; j < n; j++) {
+    const v = y[j] - lambda * w[j];
+    out[j] = v > 0 ? v : 0;
+  }
+}
+/**
+ * F(u) = ‖(AD) u − b‖² + ε Σ_j (u_j − priorU_j)²   (loss in u-coordinates,
+ * Float64Array variant for the FISTA hot path).
+ */
+function computeLossUF64(
+  AD: ReadonlyArray<Float64Array>,
+  b: ReadonlyArray<number>,
+  u: Float64Array,
+  m: number,
+  n: number,
+  regDiag: number,
+  priorU: Float64Array,
+): number {
+  let dataSq = 0;
+  for (let i = 0; i < m; i++) {
+    const row = AD[i];
+    let s = 0;
+    for (let j = 0; j < n; j++) s += row[j] * u[j];
+    const r = s - b[i];
+    dataSq += r * r;
+  }
+  let regSq = 0;
+  for (let j = 0; j < n; j++) {
+    const diff = u[j] - priorU[j];
+    regSq += diff * diff;
+  }
+  return dataSq + regDiag * regSq;
+}
+/**
+ * Estimate σ_max(MᵀM) = σ_max(MMᵀ) via power iteration on the m×m matrix MMᵀ.
+ * Cost: O(m²·n) to build, O(m²) per iteration. For m=3 effectively free.
+ *
+ * Float64Array variant — same routine, different storage type.
+ */
+function spectralNormSquaredF64(
+  M: ReadonlyArray<Float64Array>,
+  m: number,
+  n: number,
+): number {
+  if (m === 0 || n === 0) return 0;
+  const MMt: number[][] = Array.from({ length: m }, () => new Array(m).fill(0));
+  for (let i = 0; i < m; i++) {
+    for (let k = i; k < m; k++) {
+      let s = 0;
+      const Mi = M[i];
+      const Mk = M[k];
+      for (let j = 0; j < n; j++) s += Mi[j] * Mk[j];
+      MMt[i][k] = s;
+      MMt[k][i] = s;
+    }
+  }
+  let v = new Array(m).fill(1 / Math.sqrt(m));
+  let lambda = 0;
+  for (let it = 0; it < 30; it++) {
+    const w = new Array(m).fill(0);
+    for (let i = 0; i < m; i++) {
+      let s = 0;
+      for (let k = 0; k < m; k++) s += MMt[i][k] * v[k];
+      w[i] = s;
+    }
+    let norm = 0;
+    for (let i = 0; i < m; i++) norm += w[i] * w[i];
+    norm = Math.sqrt(norm);
+    if (norm < 1e-30) return 0;
+    const newV = new Array(m);
+    for (let i = 0; i < m; i++) newV[i] = w[i] / norm;
+    if (Math.abs(norm - lambda) < 1e-10 * Math.max(1, norm)) {
+      lambda = norm;
+      break;
+    }
+    lambda = norm;
+    v = newV;
+  }
+  return lambda;
+}

package/src/quantize.ts CHANGED Viewed

@@ -33,27 +33,33 @@ export function quantizeWeights(weights: ReadonlyArray<number>, total: number):
       Math.round(Math.max(0, w - floors[i]) * 1e10) / 1e10,
     );
     const order = indicesSortedByDesc(remainders);
-    for (let k = 0; k < deficit; k++) floors[order[k]]++;
+    // Distribute deficit across rows. If deficit > n, give each row floor(deficit/n)
+    // plus one extra to the top (deficit % n) rows.
+    const bulk = Math.floor(deficit / n);
+    if (bulk > 0) for (let i = 0; i < n; i++) floors[i] += bulk;
+    const remainder = deficit - bulk * n;
+    for (let k = 0; k < remainder; k++) floors[order[k]]++;
   } else if (deficit < 0) {
     // Remove 1's from rows with the largest current weight, never going below 1.
+    // Single sort + single greedy pass: from the largest-floor row downward,
+    // take as much as possible (capped by floors[i] − 1) until toRemove == 0.
+    // O(n log n) total — previously O(K · n log n) when many rows are clamped at 1.
     let toRemove = -deficit;
-    while (toRemove > 0) {
-      const order = indicesSortedByDesc(floors);
-      let progress = false;
-      for (const i of order) {
-        if (toRemove === 0) break;
-        if (floors[i] > 1) {
-          floors[i]--;
-          toRemove--;
-          progress = true;
-        }
-      }
-      if (!progress) {
-        // Shouldn't happen: total >= n was checked; sumFloors was at most total + (max(1, .) bias),
-        // and that bias is ≤ n which can always be reclaimed.
-        throw new Error('quantizeWeights: cannot reduce further while keeping w_i >= 1');
+    const order = indicesSortedByDesc(floors);
+    for (const i of order) {
+      if (toRemove === 0) break;
+      const removable = floors[i] - 1;
+      if (removable > 0) {
+        const take = Math.min(removable, toRemove);
+        floors[i] -= take;
+        toRemove -= take;
       }
     }
+    if (toRemove > 0) {
+      // Shouldn't happen: total >= n was checked; sumFloors was at most total + (max(1, .) bias),
+      // and that bias is ≤ n which can always be reclaimed.
+      throw new Error('quantizeWeights: cannot reduce further while keeping w_i >= 1');
+    }
   }
   return floors;

package/src/sample.ts CHANGED Viewed

@@ -84,6 +84,11 @@ export interface QuotaParams {
   nRowsOut: number;
   minPerBucket: number;
   requireMaxReached: boolean;
+  /** Optional: bias the candidate pool toward this non-zero fraction (0..1).
+   *  When set, zeroBucket gets approximately `(1 − targetHitRate) × nRowsOut`
+   *  slots and the log buckets share the rest. When unset, current
+   *  variance-contribution distribution applies (zero gets leftover). */
+  targetHitRate?: number;
 }
 export interface Quotas {
@@ -103,7 +108,7 @@ export interface Quotas {
  * All quotas are integers and sum to nRowsOut.
  */
 export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas {
-  const { nRowsOut, minPerBucket, requireMaxReached } = params;
+  const { nRowsOut, minPerBucket, requireMaxReached, targetHitRate } = params;
   // Count non-empty log buckets — these are the ones eligible for minPerBucket.
   const nonEmptyLogCount = buckets.logBuckets.reduce(
@@ -112,6 +117,28 @@ export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas
   );
   const wantNearMax = requireMaxReached && buckets.nearMaxBucket.indices.length > 0;
+  const totalAvailable =
+    buckets.zeroBucket.indices.length +
+    buckets.logBuckets.reduce((s, b) => s + b.indices.length, 0) +
+    buckets.nearMaxBucket.indices.length;
+  const expected = Math.min(nRowsOut, totalAvailable);
+  // ── targetHitRate-biased path ────────────────────────────────────────────
+  if (typeof targetHitRate === 'number' && targetHitRate > 0 && targetHitRate < 1) {
+    const result = computeQuotasByTargetHitRate(buckets, {
+      nRowsOut,
+      minPerBucket,
+      requireMaxReached,
+      targetHitRate,
+      nonEmptyLogCount,
+      wantNearMax,
+      totalAvailable,
+      expected,
+    });
+    return result;
+  }
+  // ── Original variance-contribution path ──────────────────────────────────
   // Compute an effective minPerBucket so the floor allocation does not exceed nRowsOut.
   // Floor at 0; near-max keeps its 1 slot when room allows, dropped only as a last resort.
   let effectiveMinPerBucket = minPerBucket;
@@ -185,15 +212,181 @@ export function computeQuotas(buckets: QuotaInput, params: QuotaParams): Quotas
   // Defensive invariant: quotas must sum to exactly nRowsOut, unless the
   // total available indices across all buckets are fewer than nRowsOut (in
   // which case the cap at total available is the best achievable).
-  const totalAvailable =
-    buckets.zeroBucket.indices.length +
+  const total = zeroQuota + logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
+  if (total !== expected) {
+    throw new Error(
+      `computeQuotas invariant violated: total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable})`,
+    );
+  }
+  return { zeroBucket: zeroQuota, logBuckets: logQuotas, nearMaxBucket: nearMaxQuota };
+}
+/**
+ * Splits `nRowsOut` so the candidate pool's non-zero fraction ≈ `targetHitRate`.
+ * This fixes the lopsided-row-composition bug in `optimizeLookupTable` when the
+ * source distribution's natural hit-rate is far from `targetHitRate`.
+ *
+ * The non-zero share is distributed across log + near-max buckets using the same
+ * (minPerBucket floor → variance-contribution remainder) heuristic as the
+ * default path, but constrained to a smaller budget. Any shortfall in either
+ * the zero or non-zero side spills over to the other side so total === nRowsOut.
+ */
+function computeQuotasByTargetHitRate(
+  buckets: QuotaInput,
+  ctx: {
+    nRowsOut: number;
+    minPerBucket: number;
+    requireMaxReached: boolean;
+    targetHitRate: number;
+    nonEmptyLogCount: number;
+    wantNearMax: boolean;
+    totalAvailable: number;
+    expected: number;
+  },
+): Quotas {
+  const { nRowsOut, minPerBucket, targetHitRate, nonEmptyLogCount, wantNearMax, totalAvailable, expected } = ctx;
+  const nonZeroAvailable =
     buckets.logBuckets.reduce((s, b) => s + b.indices.length, 0) +
     buckets.nearMaxBucket.indices.length;
-  const expected = Math.min(nRowsOut, totalAvailable);
+  const zeroAvailable = buckets.zeroBucket.indices.length;
+  let nonZeroSlots = Math.round(targetHitRate * nRowsOut);
+  let zeroSlots = nRowsOut - nonZeroSlots;
+  // Cap each side by what's available; spill the leftover to the other side.
+  if (nonZeroSlots > nonZeroAvailable) {
+    zeroSlots += nonZeroSlots - nonZeroAvailable;
+    nonZeroSlots = nonZeroAvailable;
+  }
+  if (zeroSlots > zeroAvailable) {
+    nonZeroSlots += zeroSlots - zeroAvailable;
+    zeroSlots = zeroAvailable;
+  }
+  // Final cap (only matters when totalAvailable < nRowsOut).
+  if (nonZeroSlots > nonZeroAvailable) nonZeroSlots = nonZeroAvailable;
+  if (zeroSlots > zeroAvailable) zeroSlots = zeroAvailable;
+  // Scale effectiveMinPerBucket down so the floor allocation fits within the
+  // non-zero budget. Same logic as the default path, just constrained to
+  // `nonZeroSlots` instead of `nRowsOut`.
+  let effectiveMinPerBucket = minPerBucket;
+  while (
+    effectiveMinPerBucket > 0 &&
+    nonEmptyLogCount * effectiveMinPerBucket + (wantNearMax ? 1 : 0) > nonZeroSlots
+  ) {
+    effectiveMinPerBucket--;
+  }
+  let nearMaxQuota =
+    wantNearMax && nonEmptyLogCount * effectiveMinPerBucket < nonZeroSlots ? 1 : 0;
+  const logQuotas = buckets.logBuckets.map((b) => {
+    if (b.indices.length === 0) return 0;
+    return Math.min(effectiveMinPerBucket, b.indices.length);
+  });
+  let assigned = logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
+  let remainingNonZero = nonZeroSlots - assigned;
+  // Variance-contribution remainder, with redistribution when any bucket caps
+  // out (so the non-zero budget gets fully consumed before spilling to zero).
+  if (remainingNonZero > 0) {
+    const contrib = buckets.logBuckets.map((b) => {
+      if (b.indices.length === 0) return 0;
+      const mean = b.weightedPayoutSum / Math.max(1, b.totalWeight);
+      return b.totalWeight * mean * mean;
+    });
+    // Iteratively allocate by contribution among non-capped buckets, then
+    // redistribute any over-allocation. Capped at log(nBuckets) + 1 passes.
+    let extraToPlace = remainingNonZero;
+    const eligible = buckets.logBuckets.map((b, i) => b.indices.length - logQuotas[i] > 0);
+    const maxPasses = buckets.logBuckets.length + 2;
+    for (let pass = 0; pass < maxPasses && extraToPlace > 0; pass++) {
+      let activeContrib = 0;
+      for (let i = 0; i < buckets.logBuckets.length; i++) {
+        if (eligible[i]) activeContrib += contrib[i];
+      }
+      if (activeContrib > 0) {
+        const proposed = buckets.logBuckets.map((_, i) =>
+          eligible[i] ? (contrib[i] / activeContrib) * extraToPlace : 0,
+        );
+        const floors = proposed.map(Math.floor);
+        const used = floors.reduce((s, v) => s + v, 0);
+        const remainders = proposed.map((p, i) => p - floors[i]);
+        const order = remainders
+          .map((_, i) => i)
+          .filter((i) => eligible[i])
+          .sort((a, b) => remainders[b] - remainders[a]);
+        let extra = extraToPlace - used;
+        for (const i of order) {
+          if (extra === 0) break;
+          floors[i]++;
+          extra--;
+        }
+        // Apply, capping at room.
+        let placed = 0;
+        for (let i = 0; i < floors.length; i++) {
+          if (!eligible[i] || floors[i] <= 0) continue;
+          const room = buckets.logBuckets[i].indices.length - logQuotas[i];
+          const give = Math.min(floors[i], room);
+          logQuotas[i] += give;
+          placed += give;
+          if (give === room) eligible[i] = false;
+        }
+        extraToPlace -= placed;
+        if (placed === 0) break; // No progress (everything is capped).
+      } else {
+        // No variance signal among eligible — fill remaining buckets evenly by room.
+        const order = buckets.logBuckets
+          .map((b, i) => ({ i, room: b.indices.length - logQuotas[i] }))
+          .filter((o) => o.room > 0 && eligible[o.i])
+          .sort((a, b) => b.room - a.room);
+        for (const { i, room } of order) {
+          if (extraToPlace === 0) break;
+          const give = Math.min(room, extraToPlace);
+          logQuotas[i] += give;
+          extraToPlace -= give;
+        }
+        break;
+      }
+    }
+    remainingNonZero = extraToPlace;
+  }
+  // If any non-zero slot is still unassigned (every log bucket capped),
+  // spill it to zero (only path left when totalAvailable still allows it).
+  if (remainingNonZero > 0) {
+    const headroomToZero = Math.min(remainingNonZero, zeroAvailable - zeroSlots);
+    zeroSlots += headroomToZero;
+    remainingNonZero -= headroomToZero;
+  }
+  let zeroQuota = Math.min(zeroSlots, zeroAvailable);
+  // If zero bucket can't soak its share, spill to the largest log buckets.
+  let leftover = zeroSlots - zeroQuota;
+  if (leftover > 0) {
+    const order = buckets.logBuckets
+      .map((b, i) => ({ i, room: b.indices.length - logQuotas[i] }))
+      .sort((a, b) => b.room - a.room);
+    for (const { i, room } of order) {
+      if (leftover === 0) break;
+      const give = Math.min(room, leftover);
+      logQuotas[i] += give;
+      leftover -= give;
+    }
+    if (leftover > 0 && wantNearMax && nearMaxQuota === 0 && buckets.nearMaxBucket.indices.length > 0) {
+      nearMaxQuota = 1;
+      leftover--;
+    }
+  }
+  // Final defensive invariant.
   const total = zeroQuota + logQuotas.reduce((s, q) => s + q, 0) + nearMaxQuota;
   if (total !== expected) {
     throw new Error(
-      `computeQuotas invariant violated: total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable})`,
+      `computeQuotas invariant violated (targetHitRate path): total=${total}, expected=${expected} (nRowsOut=${nRowsOut}, totalAvailable=${totalAvailable}, targetHitRate=${targetHitRate})`,
     );
   }

package/test/optimize-lookup.integration.test.ts CHANGED Viewed

@@ -139,6 +139,42 @@ describe('integration', () => {
     expect(sum).toBe(1000 * 1_000_000);
   });
+  it('7. row composition reflects targetHitRate (not just weighted hit-rate)', () => {
+    // Source distribution has hit-rate ≈ 0.30 (rng-controlled).
+    const rng = makeRng(7);
+    const rows: LookupRow[] = new Array(50_000);
+    for (let i = 0; i < 50_000; i++) {
+      const u = rng();
+      let p = 0;
+      if (u > 0.7) p = Math.floor(rng() * 200);
+      if (u > 0.97) p = Math.floor(rng() * 5_000);
+      if (u > 0.999) p = Math.floor(rng() * 50_000);
+      rows[i] = { sim: i, weight: 1 + Math.floor(rng() * 100), payoutCents: p };
+    }
+    // Target hit-rate well below source (0.20 vs 0.30)
+    const result = optimizeLookupTable(rows, {
+      targetRTP: 0.96, toleranceRTP: 0.01,
+      targetCV: 5.0, toleranceCV: 2.0,
+      targetHitRate: 0.20, toleranceHitRate: 0.02,
+      capMaxWin: 50_000,
+      nRowsOut: 1000,
+      requireMaxReached: false,
+      maxIterations: 3,
+    });
+    // Weighted hit-rate hits target.
+    expect(result.toleranceMet.hitRate).toBe(true);
+    // ROW composition is roughly 80% zero, 20% non-zero.
+    let nZero = 0;
+    for (const r of result.rows) if (r.payoutCents === 0) nZero++;
+    const zeroRowFraction = nZero / result.rows.length;
+    // Tolerance ±5% of (1 − targetHitRate).
+    expect(zeroRowFraction).toBeGreaterThan(0.75);
+    expect(zeroRowFraction).toBeLessThan(0.85);
+  });
   it('6. handles nRowsOut=5000 without n² memory blowup', () => {
     // Pre-fix this would allocate a 5000×5000 dense matrix (200 MB Float64);
     // after the implicit-Tikhonov fix it should fit in well under 100 MB and

package/test/qp.test.ts ADDED Viewed

@@ -0,0 +1,105 @@
+// test/qp.test.ts
+import { describe, expect, it } from 'vitest';
+import { solveQP, projectSimplex } from '../src/qp.js';
+describe('projectSimplex', () => {
+  it('projects to the simplex when input sum exceeds T', () => {
+    // y = [4, 3, 2, 1], T = 5
+    // expected: tau s.t. max(0, y - tau) sums to 5; sort desc [4,3,2,1]
+    //   j=0: cssv=4, thresh=(4-5)/1=-1, 4-(-1)=5>0, rho=0
+    //   j=1: cssv=7, thresh=(7-5)/2=1, 3-1=2>0, rho=1
+    //   j=2: cssv=9, thresh=(9-5)/3=1.333, 2-1.333=0.667>0, rho=2
+    //   j=3: cssv=10, thresh=(10-5)/4=1.25, 1-1.25<0, stop
+    // tau = (9-5)/3 = 4/3
+    // result: max(0, [4,3,2,1] - 4/3) = [8/3, 5/3, 2/3, 0]
+    const out = projectSimplex([4, 3, 2, 1], 5);
+    expect(out.reduce((a, b) => a + b, 0)).toBeCloseTo(5, 8);
+    expect(out[0]).toBeCloseTo(8 / 3, 6);
+    expect(out[3]).toBeCloseTo(0, 6);
+  });
+  it('caps each entry at ≥ 0', () => {
+    const out = projectSimplex([-10, -5, 1, 2], 1);
+    for (const v of out) expect(v).toBeGreaterThanOrEqual(-1e-12);
+    expect(out.reduce((a, b) => a + b, 0)).toBeCloseTo(1, 8);
+  });
+  it('handles uniform input', () => {
+    // y = [1,1,1,1], T = 4 → x = [1,1,1,1]
+    const out = projectSimplex([1, 1, 1, 1], 4);
+    for (const v of out) expect(v).toBeCloseTo(1, 8);
+  });
+});
+describe('solveQP', () => {
+  it('solves trivial unconstrained-like case where prior is feasible', () => {
+    // A = [[1,1,1]], b = [3], prior = [1,1,1], T = 3
+    // The prior itself satisfies sum=3 and the equation Σx = 3.
+    const x = solveQP([[1, 1, 1]], [3], {
+      sumConstraint: 3,
+      prior: [1, 1, 1],
+      regularization: 1e-6,
+    });
+    expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(3, 6);
+    for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-9);
+    expect(x[0]).toBeCloseTo(1, 3);
+    expect(x[1]).toBeCloseTo(1, 3);
+    expect(x[2]).toBeCloseTo(1, 3);
+  });
+  it('respects non-negativity when LS solution would go negative', () => {
+    // A = [[1, -1]], b = [-2], T = 2: unconstrained LS pushes x[0] toward 0
+    // and x[1] toward 2.
+    const x = solveQP([[1, -1]], [-2], {
+      sumConstraint: 2,
+      prior: [1, 1],
+      regularization: 1e-4,
+    });
+    expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(2, 6);
+    for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-9);
+    expect(x[0]).toBeLessThan(0.5);
+    expect(x[1]).toBeGreaterThan(1.5);
+  });
+  it('matches solveNNLS to working tolerance on the underdetermined 2×4 case', () => {
+    // Same test as in nnls.test.ts: A=[[1,1,0,0],[0,0,1,1]], b=[4,6], prior=[1,1,1,1], T=10
+    // Tikhonov pulls toward x≈[2,2,3,3]. Sum constraint is enforced exactly here (4+6=10).
+    const x = solveQP(
+      [[1, 1, 0, 0], [0, 0, 1, 1]],
+      [4, 6],
+      {
+        sumConstraint: 10,
+        prior: [1, 1, 1, 1],
+        regularization: 1e-6,
+      },
+    );
+    expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(10, 4);
+    expect(x[0]).toBeCloseTo(2, 1);
+    expect(x[1]).toBeCloseTo(2, 1);
+    expect(x[2]).toBeCloseTo(3, 1);
+    expect(x[3]).toBeCloseTo(3, 1);
+  });
+  it('converges for n=10000 in under 1 second', () => {
+    // Random feasible problem
+    const n = 10_000;
+    const T = n * 100; // 1_000_000
+    const A: number[][] = [
+      Array.from({ length: n }, (_, i) => (i % 100)),
+      Array.from({ length: n }, (_, i) => ((i * 7) % 200)),
+      Array.from({ length: n }, () => 1),
+    ];
+    const b = [T * 50, T * 100, T * 0.3];
+    const t0 = performance.now();
+    const x = solveQP(A, b, {
+      sumConstraint: T,
+      regularization: 1e-6,
+      maxIterations: 200,
+    });
+    const elapsed = performance.now() - t0;
+    expect(elapsed).toBeLessThan(1000);
+    expect(x.reduce((a, b) => a + b, 0)).toBeCloseTo(T, 0); // exact via projection
+    for (const v of x) expect(v).toBeGreaterThanOrEqual(-1e-6);
+  });
+});

package/test/quantize.test.ts CHANGED Viewed

@@ -38,4 +38,28 @@ describe('quantizeWeights', () => {
     const out = quantizeWeights([1.5, 2.5, 3.5], 8);
     expect(out).toEqual([2, 3, 3]);
   });
+  it('handles large n with many floor-1 rows efficiently (regression: was O(K·n log n))', () => {
+    // Synthesize a scenario that previously took 30+ seconds:
+    // ~99% of rows have continuous weight near 0 (will clamp to floor 1)
+    // ~1% of rows have large weight
+    const n = 100_000;
+    const T = n * 1_000_000;
+    const weights = new Array(n);
+    for (let i = 0; i < n; i++) {
+      // 99% small, 1% large
+      weights[i] = i % 100 === 0 ? T / 1000 : 0.0001;
+    }
+    const t0 = performance.now();
+    const out = quantizeWeights(weights, T);
+    const elapsed = performance.now() - t0;
+    expect(elapsed).toBeLessThan(1000); // 1 second — was 30+s before fix
+    expect(out.length).toBe(n);
+    let sum = 0;
+    for (const v of out) {
+      sum += v;
+      expect(v).toBeGreaterThanOrEqual(1);
+    }
+    expect(sum).toBe(T);
+  });
 });

package/test/sample.test.ts CHANGED Viewed

@@ -99,6 +99,82 @@ describe('computeQuotas (over-allocation guard)', () => {
   });
 });
+describe('computeQuotas — targetHitRate bias', () => {
+  it('allocates zero/nonzero quotas proportional to targetHitRate', () => {
+    const zero: Bucket = {
+      indices: Array.from({ length: 5000 }, (_, i) => i),
+      totalWeight: 5000,
+      weightedPayoutSum: 0,
+    };
+    const log: Bucket[] = Array.from({ length: 5 }, (_, k) => ({
+      indices: Array.from({ length: 200 }, (_, j) => 5000 + k * 200 + j),
+      totalWeight: 200,
+      weightedPayoutSum: 200 * (10 ** (k + 1)),
+    }));
+    const nearMax: Bucket = { indices: [5999], totalWeight: 1, weightedPayoutSum: 1000 };
+    const quotas = computeQuotas(
+      { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
+      { nRowsOut: 1000, minPerBucket: 3, requireMaxReached: false, targetHitRate: 0.2 },
+    );
+    // Expect ~800 zero and ~200 non-zero (including across log buckets)
+    expect(quotas.zeroBucket).toBeGreaterThanOrEqual(750);
+    expect(quotas.zeroBucket).toBeLessThanOrEqual(850);
+    const totalNonZero = quotas.logBuckets.reduce((s, q) => s + q, 0) + quotas.nearMaxBucket;
+    expect(totalNonZero).toBeGreaterThanOrEqual(150);
+    expect(totalNonZero).toBeLessThanOrEqual(250);
+    const total = quotas.zeroBucket + totalNonZero;
+    expect(total).toBe(1000);
+  });
+  it('falls back to variance-contribution behavior when targetHitRate is unset', () => {
+    // Same buckets as the very first computeQuotas test in this file — invariant
+    // must be preserved (zero bucket absorbs leftover; total === nRowsOut).
+    const zero: Bucket = { indices: Array(100).fill(0).map((_, i) => i), totalWeight: 100, weightedPayoutSum: 0 };
+    const log: Bucket[] = [
+      { indices: [100, 101, 102], totalWeight: 3, weightedPayoutSum: 30 },
+      { indices: [103, 104, 105, 106, 107], totalWeight: 5, weightedPayoutSum: 200 },
+      { indices: [], totalWeight: 0, weightedPayoutSum: 0 },
+    ];
+    const nearMax: Bucket = { indices: [107], totalWeight: 1, weightedPayoutSum: 100 };
+    const quotas = computeQuotas(
+      { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
+      { nRowsOut: 20, minPerBucket: 3, requireMaxReached: true /* no targetHitRate */ },
+    );
+    // Pre-fix invariants — same as the original test
+    expect(quotas.logBuckets[0]).toBeGreaterThanOrEqual(3);
+    expect(quotas.logBuckets[1]).toBeGreaterThanOrEqual(3);
+    expect(quotas.logBuckets[2]).toBe(0);
+    expect(quotas.nearMaxBucket).toBeGreaterThanOrEqual(1);
+    const total = quotas.zeroBucket + quotas.logBuckets.reduce((a, b) => a + b, 0) + quotas.nearMaxBucket;
+    expect(total).toBe(20);
+  });
+  it('handles targetHitRate=0.5 on a balanced distribution', () => {
+    const zero: Bucket = {
+      indices: Array.from({ length: 1000 }, (_, i) => i),
+      totalWeight: 1000,
+      weightedPayoutSum: 0,
+    };
+    const log: Bucket[] = Array.from({ length: 5 }, (_, k) => ({
+      indices: Array.from({ length: 200 }, (_, j) => 1000 + k * 200 + j),
+      totalWeight: 200,
+      weightedPayoutSum: 200 * (10 ** (k + 1)),
+    }));
+    const nearMax: Bucket = { indices: [1999], totalWeight: 1, weightedPayoutSum: 1000 };
+    const quotas = computeQuotas(
+      { zeroBucket: zero, logBuckets: log, nearMaxBucket: nearMax },
+      { nRowsOut: 500, minPerBucket: 3, requireMaxReached: false, targetHitRate: 0.5 },
+    );
+    expect(quotas.zeroBucket).toBeGreaterThanOrEqual(225);
+    expect(quotas.zeroBucket).toBeLessThanOrEqual(275);
+    const totalNonZero = quotas.logBuckets.reduce((s, q) => s + q, 0) + quotas.nearMaxBucket;
+    expect(totalNonZero).toBeGreaterThanOrEqual(225);
+    expect(totalNonZero).toBeLessThanOrEqual(275);
+    const total = quotas.zeroBucket + totalNonZero;
+    expect(total).toBe(500);
+  });
+});
 describe('stratifiedSample (overlap top-up)', () => {
   it('delivers exactly the total quota even when near-max overlaps log buckets', () => {
     // Top log bucket overlaps near-max bucket; near-max consumes enough that the