@energy8platform/stake-math-tools 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ /**
2
+ * Streaming `*.jsonl.zst → *.jsonl.zst` transformer.
3
+ *
4
+ * Decompresses the input with zstd, runs each line through an optional user
5
+ * mapper, recompresses the result. Pipes everything — no temp files, memory
6
+ * footprint stays at one line regardless of input size, so gigabyte books
7
+ * files work fine.
8
+ *
9
+ * Why not `readline.createInterface`? readline accumulates each line as a
10
+ * growing JS string (`buffer += chunk`), which hits V8's ~512MB string-length
11
+ * limit on books files that contain very long event arrays — manifests as
12
+ * `RangeError: Invalid string length` deep in node:internal/readline. The
13
+ * Buffer-based splitter here keeps incomplete-line state as raw bytes and
14
+ * only materializes a string at the LF boundary (mapper mode) or never
15
+ * (identity mode), so any line that fits in OS memory is fine.
16
+ *
17
+ * In identity mode (no mapper) we don't split at all — we pipe decompressor
18
+ * output directly into compressor input as raw bytes. That's the fastest
19
+ * path: ~25 MB/s of compressed input on a single core, dominated by the
20
+ * zstd subprocesses, with zero JS string allocation.
21
+ *
22
+ * Requires the `zstd` binary on PATH (same precondition as the rest of the
23
+ * stake-bridge tooling).
24
+ */
25
+
26
+ import { spawn } from 'node:child_process';
27
+
28
+ export type LineMapper = (
29
+ line: string,
30
+ index: number,
31
+ ) => string | string[] | null | undefined;
32
+
33
+ export type BinaryLineMapper = (
34
+ line: Buffer,
35
+ index: number,
36
+ ) => Buffer | string | Array<Buffer | string> | null | undefined;
37
+
38
+ export interface TransformJsonlZstParams {
39
+ /** Path to a zstd-compressed `.jsonl.zst` file. */
40
+ inputPath: string;
41
+ /** Path where the transformed `.jsonl.zst` will be written (overwritten). */
42
+ outputPath: string;
43
+ /** Per-line transform with the line decoded as a JS string. Default =
44
+ * identity passthrough (byte pipe, no per-line allocations).
45
+ *
46
+ * - Return a `string` to replace the line with that content.
47
+ * - Return a `string[]` to expand one input line into several output lines.
48
+ * - Return `null` / `undefined` to drop the line entirely.
49
+ *
50
+ * Mutually exclusive with `binaryMapper`. Use `binaryMapper` instead when
51
+ * any single line could exceed V8's ~512 MB string-length cap (e.g. bonus
52
+ * game books with massive event arrays) — `toString('utf8')` will throw
53
+ * `ERR_STRING_TOO_LONG` on lines above that limit. */
54
+ mapper?: LineMapper;
55
+ /** Per-line transform with the line passed as a raw `Buffer`. Use this
56
+ * for any line that may exceed V8's ~512 MB string limit, or when you
57
+ * only need to peek at a small prefix (`line.subarray(0, 64).toString()`)
58
+ * and want to pass the rest of the bytes through verbatim.
59
+ *
60
+ * Return shape:
61
+ * - `Buffer` or `string` — replace the line with that content.
62
+ * - array of `Buffer | string` — expand to N output lines.
63
+ * - `null` / `undefined` — drop.
64
+ *
65
+ * Mutually exclusive with `mapper`. */
66
+ binaryMapper?: BinaryLineMapper;
67
+ /** zstd compression level for the output. 1 = fastest, 22 = smallest.
68
+ * Default 9 — same level the kitsune optimize pipeline uses. */
69
+ zstdLevel?: number;
70
+ /** Called every `progressEveryLines` input lines with running counts.
71
+ * Useful for progress bars on multi-million-row files. Identity mode
72
+ * reports `linesRead == linesWritten == 0` because we don't split. */
73
+ onProgress?: (linesRead: number, linesWritten: number) => void;
74
+ /** How often to fire `onProgress`. Default 100_000. */
75
+ progressEveryLines?: number;
76
+ }
77
+
78
+ export interface TransformJsonlZstResult {
79
+ linesRead: number;
80
+ linesWritten: number;
81
+ /** True when identity mode was used: byte-pipe passthrough without
82
+ * per-line counting (so `linesRead`/`linesWritten` will be 0). */
83
+ identityPassthrough: boolean;
84
+ }
85
+
86
+ const LF = 0x0a;
87
+ const LF_BUFFER = Buffer.from([LF]);
88
+
89
+ export async function transformJsonlZst(
90
+ params: TransformJsonlZstParams,
91
+ ): Promise<TransformJsonlZstResult> {
92
+ const {
93
+ inputPath,
94
+ outputPath,
95
+ mapper,
96
+ binaryMapper,
97
+ zstdLevel = 9,
98
+ onProgress,
99
+ progressEveryLines = 100_000,
100
+ } = params;
101
+
102
+ if (mapper && binaryMapper) {
103
+ throw new Error(
104
+ 'transformJsonlZst: pass either `mapper` (string) or `binaryMapper` (Buffer), not both',
105
+ );
106
+ }
107
+ const anyMapper = mapper ?? binaryMapper;
108
+
109
+ const decompress = spawn('zstd', ['-dc', '-q', inputPath], {
110
+ stdio: ['ignore', 'pipe', 'inherit'],
111
+ });
112
+ const compress = spawn(
113
+ 'zstd',
114
+ [`-${zstdLevel}`, '-q', '-f', '-o', outputPath],
115
+ { stdio: ['pipe', 'inherit', 'inherit'] },
116
+ );
117
+
118
+ const decompressDone = waitForExit(decompress, 'zstd -d');
119
+ const compressDone = waitForExit(compress, 'zstd -c');
120
+
121
+ const writeChunk = (chunk: Buffer | string): Promise<void> => {
122
+ if (compress.stdin.write(chunk)) return Promise.resolve();
123
+ return new Promise<void>((resolve, reject) => {
124
+ const onDrain = () => {
125
+ cleanup();
126
+ resolve();
127
+ };
128
+ const onError = (err: Error) => {
129
+ cleanup();
130
+ reject(err);
131
+ };
132
+ const cleanup = () => {
133
+ compress.stdin.off('drain', onDrain);
134
+ compress.stdin.off('error', onError);
135
+ };
136
+ compress.stdin.once('drain', onDrain);
137
+ compress.stdin.once('error', onError);
138
+ });
139
+ };
140
+
141
+ let linesRead = 0;
142
+ let linesWritten = 0;
143
+
144
+ try {
145
+ if (!anyMapper) {
146
+ // Identity mode: byte-pipe. Never split into lines, never materialize
147
+ // strings, never accumulate buffers. Constant memory regardless of how
148
+ // long individual lines are.
149
+ for await (const chunk of decompress.stdout!) {
150
+ await writeChunk(chunk);
151
+ }
152
+ } else {
153
+ // Mapper mode: split on LF boundaries by scanning raw bytes. We keep
154
+ // incomplete-line bytes in a small array of Buffers (no concatenation
155
+ // into a single growing JS string), then `Buffer.concat` + `toString`
156
+ // when the LF is finally seen (string mapper) or never (binary mapper).
157
+ let pending: Buffer[] = [];
158
+ let pendingLen = 0;
159
+
160
+ const writeMapperResult = async (out: Buffer | string): Promise<void> => {
161
+ await writeChunk(out);
162
+ await writeChunk(LF_BUFFER);
163
+ linesWritten++;
164
+ };
165
+
166
+ const flushLine = async (lineBuf: Buffer): Promise<void> => {
167
+ // Strip trailing CR for CRLF tolerance, matching readline behaviour.
168
+ const trimmed =
169
+ lineBuf.length > 0 && lineBuf[lineBuf.length - 1] === 0x0d
170
+ ? lineBuf.subarray(0, lineBuf.length - 1)
171
+ : lineBuf;
172
+
173
+ let result: string | string[] | Buffer | Array<Buffer | string> | null | undefined;
174
+ if (binaryMapper) {
175
+ result = binaryMapper(trimmed, linesRead);
176
+ } else {
177
+ // String mapper: decode the line. Lines above V8's ~512 MB string
178
+ // cap throw ERR_STRING_TOO_LONG here — re-throw with a pointer to
179
+ // `binaryMapper` so the failure mode is obvious.
180
+ let lineStr: string;
181
+ try {
182
+ lineStr = trimmed.toString('utf8');
183
+ } catch (err) {
184
+ if (
185
+ err instanceof Error &&
186
+ (err as NodeJS.ErrnoException).code === 'ERR_STRING_TOO_LONG'
187
+ ) {
188
+ const wrapped = new Error(
189
+ `transformJsonlZst: line ${linesRead} is ${trimmed.length} bytes — ` +
190
+ `exceeds V8 max JS string length (~512 MB). Use the ` +
191
+ '`binaryMapper` option to receive the line as a Buffer.',
192
+ );
193
+ (wrapped as { cause?: unknown }).cause = err;
194
+ throw wrapped;
195
+ }
196
+ throw err;
197
+ }
198
+ result = mapper!(lineStr, linesRead);
199
+ }
200
+ linesRead++;
201
+
202
+ if (result === null || result === undefined) {
203
+ // drop
204
+ } else if (Array.isArray(result)) {
205
+ for (const out of result) {
206
+ await writeMapperResult(out);
207
+ }
208
+ } else {
209
+ await writeMapperResult(result);
210
+ }
211
+
212
+ if (onProgress && linesRead % progressEveryLines === 0) {
213
+ onProgress(linesRead, linesWritten);
214
+ }
215
+ };
216
+
217
+ for await (const chunk of decompress.stdout! as AsyncIterable<Buffer>) {
218
+ let start = 0;
219
+ // Buffer.indexOf(LF) is a C++ scan, ~20× faster than a JS byte loop.
220
+ while (start < chunk.length) {
221
+ const lf = chunk.indexOf(LF, start);
222
+ if (lf < 0) {
223
+ // No LF in the remainder — stash as pending and move on.
224
+ const remainder = chunk.subarray(start);
225
+ const owned = Buffer.from(remainder);
226
+ pending.push(owned);
227
+ pendingLen += owned.length;
228
+ break;
229
+ }
230
+ const tail = chunk.subarray(start, lf);
231
+ let lineBuf: Buffer;
232
+ if (pendingLen === 0) {
233
+ lineBuf = tail;
234
+ } else {
235
+ pending.push(tail);
236
+ lineBuf = Buffer.concat(pending, pendingLen + tail.length);
237
+ pending = [];
238
+ pendingLen = 0;
239
+ }
240
+ await flushLine(lineBuf);
241
+ start = lf + 1;
242
+ }
243
+ }
244
+
245
+ // Trailing line without a terminating LF — emit it the same way readline
246
+ // would (so callers don't silently lose data when the input lacks a
247
+ // final newline).
248
+ if (pendingLen > 0) {
249
+ const lineBuf = Buffer.concat(pending, pendingLen);
250
+ await flushLine(lineBuf);
251
+ }
252
+ }
253
+ } catch (err) {
254
+ compress.stdin.destroy();
255
+ throw err;
256
+ }
257
+
258
+ compress.stdin.end();
259
+
260
+ await Promise.all([decompressDone, compressDone]);
261
+
262
+ if (mapper && onProgress) onProgress(linesRead, linesWritten);
263
+
264
+ return { linesRead, linesWritten, identityPassthrough: !mapper };
265
+ }
266
+
267
+ function waitForExit(
268
+ child: ReturnType<typeof spawn>,
269
+ label: string,
270
+ ): Promise<void> {
271
+ return new Promise((resolve, reject) => {
272
+ child.on('error', (err) => {
273
+ reject(new Error(`${label} failed to spawn: ${err.message}`));
274
+ });
275
+ child.on('close', (code, signal) => {
276
+ if (code === 0) resolve();
277
+ else
278
+ reject(
279
+ new Error(
280
+ `${label} exited with ${code === null ? `signal ${signal}` : `code ${code}`}`,
281
+ ),
282
+ );
283
+ });
284
+ });
285
+ }
package/src/types.ts CHANGED
@@ -20,6 +20,10 @@ export interface OptimizeParams {
20
20
  /** Hard cap. Rows with payoutCents > capMaxWin are dropped. */
21
21
  capMaxWin: number;
22
22
 
23
+ /** Cost of a single bet in cents. Used to convert payouts to "bet multiplier" units
24
+ * for the Stake-style report. Default 100 (1.0 bet = 100 cents). */
25
+ betCostCents?: number;
26
+
23
27
  /** When true, force ≥ 1 row with payoutCents ≥ maxReachedFraction × capMaxWin. Default true. */
24
28
  requireMaxReached?: boolean;
25
29
  /** Default 0.95. */
@@ -43,6 +47,73 @@ export interface OptimizeParams {
43
47
  * Default 0.05 (5%). Set to 1.0 to disable.
44
48
  */
45
49
  maxRowRtpShare?: number;
50
+
51
+ /** Maximum integer weight allowed for any single output row, as a multiple of the
52
+ * uniform prior weight (totalWeightOut / nRowsOut). E.g., 10 means no row can have
53
+ * weight greater than 10 × (totalWeightOut / nRowsOut). This prevents Stake's ETL
54
+ * ("Within Liability Limits") check from failing due to over-concentrated weight.
55
+ * Default 10. Set to Infinity to disable. */
56
+ maxWeightPerRow?: number;
57
+
58
+ /** Algorithm for compressing source rows into a weighted lookup table.
59
+ * - 'tiered' (default): tier-based rarity weighting (cap/large rows get weight=1,
60
+ * small rows get calculated weight W). Preserves source distribution rates;
61
+ * passes Stake Engine's "Within Liability Limits" check.
62
+ * - 'nnls': legacy NNLS optimization; hits RTP/CV/HR targets exactly but may
63
+ * concentrate weight on few rows and fail Stake's Liability check. */
64
+ algorithm?: 'tiered' | 'nnls';
65
+
66
+ /** Tier-based only: payout multiplier (payoutCents / betCostCents) above which
67
+ * a row is in the "cap" tier (weight=1, rare). Default: 0.95 × max source pm. */
68
+ capPmThreshold?: number;
69
+
70
+ /** Tier-based only: payout multiplier threshold for the "large" tier.
71
+ * Rows with capPmThreshold > pm >= largePmThreshold get weight=1.
72
+ * Default: undefined (no large tier — only cap vs small). */
73
+ largePmThreshold?: number;
74
+
75
+ /** Tier-based only: target effective probability for cap+large rows in output.
76
+ * Default: natural rate from source = (n_cap + n_large) / n_source. */
77
+ largeTarget?: number;
78
+
79
+ /** Tier-based only: when true, ensure every Stake hit-rate distribution range
80
+ * up to the actual max payout has ≥ 1 output row when source has rows in
81
+ * that range. Prevents Stake's "Gaps in the Hit Rate Table" rejection.
82
+ * Default true. */
83
+ ensureRangeCoverage?: boolean;
84
+
85
+ /** Tier-based only: reshape the high-tier sampling so the per-bucket row
86
+ * counts follow a log-decay curve across Stake hit-rate ranges — each
87
+ * bucket above the lowest one in the high tier targets `ratio × prev`
88
+ * rows. Turns the typical sparse tail of `…18 → 1 → 1 → 1 → 4` into
89
+ * a smooth `…18 → 9 → 4 → 2 → 1` instead.
90
+ *
91
+ * When true and `largePmThreshold` is unset, auto-sets it to
92
+ * `max(50, capPmThreshold / 20)` so the decay covers multiple Stake
93
+ * buckets, not just the single cap bucket. Default false. */
94
+ shapeDistribution?: boolean;
95
+
96
+ /** Tier-based only: ratio between adjacent Stake-bucket row counts when
97
+ * `shapeDistribution=true`. 0.5 = each higher bucket has half the rows
98
+ * of the one below it. Default 0.5. */
99
+ shapeDecayRatio?: number;
100
+
101
+ /** Tier-based only: auto-pick `shapeDecayRatio` by binary search so the
102
+ * achieved CV lands at `targetCV` within `toleranceCV`. Requires
103
+ * `shapeDistribution=true` and a `targetCV > 0`. Runs the full pipeline
104
+ * up to 6 times (one per bisection step) — expect a 5×-6× wall-clock
105
+ * hit on builds where it triggers. Default false. */
106
+ shapeAutoMatchCV?: boolean;
107
+
108
+ /** Tier-based only: minimum fraction of nRowsOut that must be distinct payoutCents
109
+ * values in the output. Stake Engine rejects "Insufficient Unique Events" when
110
+ * too few distinct outcomes exist (same events repeat in a session). Default 0.01
111
+ * (1%). For 100K output → 1K unique payouts required. Set to 0 to disable.
112
+ *
113
+ * When the target cannot be reached (source lacks enough distinct payouts, or
114
+ * RTP-drift budget exhausts), the optimizer falls back to maximizing unique
115
+ * count under the budget and emits a warning. */
116
+ minUniqueEventsRate?: number;
46
117
  }
47
118
 
48
119
  export interface OptimizeAchieved {
@@ -60,6 +131,71 @@ export interface ToleranceMet {
60
131
  maxReached: boolean;
61
132
  /** True if no output row contributes more than maxRowRtpShare of total RTP. */
62
133
  rtpConcentration: boolean;
134
+ /** True if no output row's weight exceeds maxWeightPerRow × (totalWeightOut / nRowsOut). */
135
+ weightCap: boolean;
136
+ }
137
+
138
+ export interface TopKShare {
139
+ /** Cumulative share of total RTP coming from the top-K rows (ordered by w·payout descending). */
140
+ k: number;
141
+ share: number;
142
+ }
143
+
144
+ export interface HitRateBucket {
145
+ /** Inclusive lower bound of the payout-multiplier range. */
146
+ low: number;
147
+ /** Exclusive upper bound (Infinity for the open top bucket). */
148
+ high: number;
149
+ /** Number of distinct output rows with pm in [low, high). */
150
+ count: number;
151
+ /** Σ weight in this range / Σ weight total — the player-facing probability. */
152
+ effectiveHitRate: number;
153
+ }
154
+
155
+ export interface StakeReport {
156
+ /** Maximum payout in the output, as a bet multiplier (payoutCents / betCostCents). */
157
+ payoutMultMax: number;
158
+
159
+ /** Standard deviation of payouts in bet-cost units (= stddev_payout_cents / betCostCents).
160
+ * Equivalent to cv × rtp × (100 / betCostCents). For bet=100 cents, equals cv × rtp × 1. */
161
+ baseStd: number;
162
+
163
+ /** Probability that a sampled spin pays ≥ 5000 × betCost. */
164
+ prob5K: number;
165
+
166
+ /** Probability that a sampled spin pays ≥ 10000 × betCost. */
167
+ prob10K: number;
168
+
169
+ /** Top-K cumulative RTP shares, sorted by per-row (w × payout) descending.
170
+ * Standard K values reported: 1, 5, 10, 100. */
171
+ topKShare: TopKShare[];
172
+
173
+ /** Stake's hit-rate-distribution table: payout-multiplier ranges with row count
174
+ * and effective probability. Ranges are: [0, 0.1), [0.1, 1), [1, 2), [2, 5),
175
+ * [5, 10), [10, 20), [20, 50), [50, 100), [100, 200), [200, 500), [500, 1000),
176
+ * [1000, 2000), [2000, 5000), [5000, 10000), [10000, 20000), [20000, ∞).
177
+ * Stake fails publication when any intermediate range is empty (gap). */
178
+ hitRateDistribution: HitRateBucket[];
179
+
180
+ /** Number of distinct payoutCents values in the output. Stake flags "Insufficient
181
+ * Unique Events" when this is too low — same outcomes repeat in a session. */
182
+ uniqueEvents: number;
183
+
184
+ /** Bet cost in cents used for the multiplier conversions (echoed from params). */
185
+ betCostCents: number;
186
+ }
187
+
188
+ export interface RefinementStats {
189
+ /** Single-row swaps applied during refineRtpBySwap to close residual RTP gap. */
190
+ rtpSwaps: number;
191
+ /** Σ-preserving 2-swaps applied during refineCvBySwap to nudge CV. */
192
+ cvSwaps: number;
193
+ /** Swaps applied to fill empty Stake distribution ranges (ensureRangeCoverage). */
194
+ gapFillSwaps: number;
195
+ /** Stake distribution ranges where source has no rows — gaps that cannot be filled. */
196
+ gapsUnfillable: number;
197
+ /** Swaps applied to introduce new distinct payoutCents into the output (minUniqueEventsRate). */
198
+ diversifySwaps: number;
63
199
  }
64
200
 
65
201
  export interface OptimizeResult {
@@ -68,5 +204,10 @@ export interface OptimizeResult {
68
204
  toleranceMet: ToleranceMet;
69
205
  /** The single output row's largest fraction of total RTP. */
70
206
  maxRowRtpShare: number;
207
+ /** Maximum integer weight observed in output, as a multiple of uniform prior. */
208
+ maxWeightRatio: number;
209
+ /** Per-pass swap counters from the refinement loops. */
210
+ refinement: RefinementStats;
71
211
  warnings: string[];
212
+ stakeReport: StakeReport;
72
213
  }