@energy8platform/stake-math-tools 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -56
- package/package.json +1 -1
- package/src/index.ts +13 -0
- package/src/optimize-lookup.ts +174 -19
- package/src/stake-report.ts +145 -0
- package/src/tiered.ts +1832 -0
- package/src/transform-jsonl-zst.ts +285 -0
- package/src/types.ts +141 -0
- package/test/optimize-lookup.integration.test.ts +423 -0
- package/test/optimize-lookup.unit.test.ts +2 -0
- package/test/transform-jsonl-zst.test.ts +343 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { execFileSync } from 'node:child_process';
|
|
3
|
+
import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, statSync } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
|
|
7
|
+
import { transformJsonlZst } from '../src/transform-jsonl-zst.js';
|
|
8
|
+
|
|
9
|
+
let workDir: string;
|
|
10
|
+
|
|
11
|
+
beforeEach(() => {
|
|
12
|
+
workDir = mkdtempSync(join(tmpdir(), 'transform-jsonl-zst-'));
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
afterEach(() => {
|
|
16
|
+
rmSync(workDir, { recursive: true, force: true });
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
function writeJsonlZst(name: string, lines: string[]): string {
|
|
20
|
+
const jsonlPath = join(workDir, `${name}.jsonl`);
|
|
21
|
+
const zstPath = join(workDir, `${name}.jsonl.zst`);
|
|
22
|
+
writeFileSync(jsonlPath, lines.join('\n') + (lines.length > 0 ? '\n' : ''));
|
|
23
|
+
execFileSync('zstd', ['-q', '-f', '-o', zstPath, jsonlPath]);
|
|
24
|
+
return zstPath;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function readJsonlZst(zstPath: string): string[] {
|
|
28
|
+
// Bump maxBuffer well past the 1 MiB default — some tests round-trip
|
|
29
|
+
// multi-megabyte payloads.
|
|
30
|
+
const jsonl = execFileSync('zstd', ['-dc', '-q', zstPath], {
|
|
31
|
+
maxBuffer: 64 * 1024 * 1024,
|
|
32
|
+
}).toString('utf8');
|
|
33
|
+
if (jsonl.length === 0) return [];
|
|
34
|
+
return jsonl.endsWith('\n') ? jsonl.slice(0, -1).split('\n') : jsonl.split('\n');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe('transformJsonlZst', () => {
|
|
38
|
+
it('round-trips identity as a pure byte passthrough', async () => {
|
|
39
|
+
const lines = [
|
|
40
|
+
'{"id":0,"payoutMultiplier":120}',
|
|
41
|
+
'{"id":1,"payoutMultiplier":0}',
|
|
42
|
+
'{"id":2,"payoutMultiplier":500}',
|
|
43
|
+
];
|
|
44
|
+
const input = writeJsonlZst('in', lines);
|
|
45
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
46
|
+
|
|
47
|
+
const result = await transformJsonlZst({ inputPath: input, outputPath: output });
|
|
48
|
+
|
|
49
|
+
// Identity mode does not split into lines, so the counters stay at zero —
|
|
50
|
+
// by design, to keep the path allocation-free.
|
|
51
|
+
expect(result.identityPassthrough).toBe(true);
|
|
52
|
+
expect(result.linesRead).toBe(0);
|
|
53
|
+
expect(result.linesWritten).toBe(0);
|
|
54
|
+
expect(readJsonlZst(output)).toEqual(lines);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('applies a line mapper as a 1:1 transform', async () => {
|
|
58
|
+
const input = writeJsonlZst('in', [
|
|
59
|
+
'{"id":0,"v":1}',
|
|
60
|
+
'{"id":1,"v":2}',
|
|
61
|
+
'{"id":2,"v":3}',
|
|
62
|
+
]);
|
|
63
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
64
|
+
|
|
65
|
+
const result = await transformJsonlZst({
|
|
66
|
+
inputPath: input,
|
|
67
|
+
outputPath: output,
|
|
68
|
+
mapper: (line, i) => {
|
|
69
|
+
const obj = JSON.parse(line);
|
|
70
|
+
return JSON.stringify({ ...obj, idx: i, doubled: obj.v * 2 });
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(result.identityPassthrough).toBe(false);
|
|
75
|
+
expect(result.linesWritten).toBe(3);
|
|
76
|
+
expect(readJsonlZst(output)).toEqual([
|
|
77
|
+
'{"id":0,"v":1,"idx":0,"doubled":2}',
|
|
78
|
+
'{"id":1,"v":2,"idx":1,"doubled":4}',
|
|
79
|
+
'{"id":2,"v":3,"idx":2,"doubled":6}',
|
|
80
|
+
]);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('drops lines when mapper returns null', async () => {
|
|
84
|
+
const input = writeJsonlZst('in', [
|
|
85
|
+
'{"keep":true,"id":0}',
|
|
86
|
+
'{"keep":false,"id":1}',
|
|
87
|
+
'{"keep":true,"id":2}',
|
|
88
|
+
'{"keep":false,"id":3}',
|
|
89
|
+
]);
|
|
90
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
91
|
+
|
|
92
|
+
const result = await transformJsonlZst({
|
|
93
|
+
inputPath: input,
|
|
94
|
+
outputPath: output,
|
|
95
|
+
mapper: (line) => (JSON.parse(line).keep ? line : null),
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
expect(result.linesRead).toBe(4);
|
|
99
|
+
expect(result.linesWritten).toBe(2);
|
|
100
|
+
expect(readJsonlZst(output)).toEqual([
|
|
101
|
+
'{"keep":true,"id":0}',
|
|
102
|
+
'{"keep":true,"id":2}',
|
|
103
|
+
]);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('expands a single input line into multiple outputs when mapper returns an array', async () => {
|
|
107
|
+
const input = writeJsonlZst('in', ['{"id":0}', '{"id":1}']);
|
|
108
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
109
|
+
|
|
110
|
+
const result = await transformJsonlZst({
|
|
111
|
+
inputPath: input,
|
|
112
|
+
outputPath: output,
|
|
113
|
+
mapper: (line) => [line, line],
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
expect(result.linesRead).toBe(2);
|
|
117
|
+
expect(result.linesWritten).toBe(4);
|
|
118
|
+
expect(readJsonlZst(output)).toEqual([
|
|
119
|
+
'{"id":0}',
|
|
120
|
+
'{"id":0}',
|
|
121
|
+
'{"id":1}',
|
|
122
|
+
'{"id":1}',
|
|
123
|
+
]);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('handles an empty input file', async () => {
|
|
127
|
+
const input = writeJsonlZst('in', []);
|
|
128
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
129
|
+
|
|
130
|
+
const result = await transformJsonlZst({ inputPath: input, outputPath: output });
|
|
131
|
+
|
|
132
|
+
expect(result.linesRead).toBe(0);
|
|
133
|
+
expect(result.linesWritten).toBe(0);
|
|
134
|
+
expect(existsSync(output)).toBe(true);
|
|
135
|
+
expect(readJsonlZst(output)).toEqual([]);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('emits a trailing line that lacks a final newline', async () => {
|
|
139
|
+
// Build a raw jsonl with no terminating \n, compress it manually so we
|
|
140
|
+
// exercise the trailing-flush path in the mapper branch.
|
|
141
|
+
const jsonlPath = join(workDir, 'no-final-lf.jsonl');
|
|
142
|
+
writeFileSync(jsonlPath, '{"id":0}\n{"id":1}');
|
|
143
|
+
const input = join(workDir, 'no-final-lf.jsonl.zst');
|
|
144
|
+
execFileSync('zstd', ['-q', '-f', '-o', input, jsonlPath]);
|
|
145
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
146
|
+
|
|
147
|
+
const result = await transformJsonlZst({
|
|
148
|
+
inputPath: input,
|
|
149
|
+
outputPath: output,
|
|
150
|
+
mapper: (line) => line,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
expect(result.linesRead).toBe(2);
|
|
154
|
+
expect(result.linesWritten).toBe(2);
|
|
155
|
+
expect(readJsonlZst(output)).toEqual(['{"id":0}', '{"id":1}']);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('rejects when the input file does not exist', async () => {
|
|
159
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
160
|
+
await expect(
|
|
161
|
+
transformJsonlZst({
|
|
162
|
+
inputPath: join(workDir, 'does-not-exist.jsonl.zst'),
|
|
163
|
+
outputPath: output,
|
|
164
|
+
}),
|
|
165
|
+
).rejects.toThrow(/zstd -d/);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it('calls onProgress with the running counts (mapper mode)', async () => {
|
|
169
|
+
const lines = Array.from({ length: 250 }, (_, i) => `{"i":${i}}`);
|
|
170
|
+
const input = writeJsonlZst('in', lines);
|
|
171
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
172
|
+
const calls: Array<[number, number]> = [];
|
|
173
|
+
|
|
174
|
+
const result = await transformJsonlZst({
|
|
175
|
+
inputPath: input,
|
|
176
|
+
outputPath: output,
|
|
177
|
+
mapper: (line) => line,
|
|
178
|
+
progressEveryLines: 100,
|
|
179
|
+
onProgress: (r, w) => calls.push([r, w]),
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
expect(result.linesRead).toBe(250);
|
|
183
|
+
// Mid-stream ticks at 100 / 200 + a final flush at 250.
|
|
184
|
+
expect(calls).toEqual([
|
|
185
|
+
[100, 100],
|
|
186
|
+
[200, 200],
|
|
187
|
+
[250, 250],
|
|
188
|
+
]);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it('supports a large stream (identity byte-pipe) without per-line allocations', async () => {
|
|
192
|
+
const N = 50_000;
|
|
193
|
+
const lines = Array.from({ length: N }, (_, i) =>
|
|
194
|
+
JSON.stringify({ id: i, payload: 'x'.repeat(40) }),
|
|
195
|
+
);
|
|
196
|
+
const input = writeJsonlZst('in', lines);
|
|
197
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
198
|
+
|
|
199
|
+
const result = await transformJsonlZst({ inputPath: input, outputPath: output });
|
|
200
|
+
|
|
201
|
+
expect(result.identityPassthrough).toBe(true);
|
|
202
|
+
const out = readJsonlZst(output);
|
|
203
|
+
expect(out.length).toBe(N);
|
|
204
|
+
expect(out[0]).toBe(lines[0]);
|
|
205
|
+
expect(out[N - 1]).toBe(lines[N - 1]);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('processes a single line larger than a default stream chunk (128 KiB) via mapper', async () => {
|
|
209
|
+
// Construct a line of ~512 KiB so it spans many decompressor chunks. This
|
|
210
|
+
// is the failure mode that `readline += string` hits at scale; the
|
|
211
|
+
// Buffer-based splitter must concatenate transparently.
|
|
212
|
+
const bigLine = '{"id":0,"payload":"' + 'x'.repeat(500_000) + '"}';
|
|
213
|
+
const input = writeJsonlZst('in', [bigLine, '{"id":1}']);
|
|
214
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
215
|
+
|
|
216
|
+
const sizes: number[] = [];
|
|
217
|
+
const result = await transformJsonlZst({
|
|
218
|
+
inputPath: input,
|
|
219
|
+
outputPath: output,
|
|
220
|
+
mapper: (line) => {
|
|
221
|
+
sizes.push(line.length);
|
|
222
|
+
return line;
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
expect(result.linesRead).toBe(2);
|
|
227
|
+
expect(result.linesWritten).toBe(2);
|
|
228
|
+
expect(sizes[0]).toBe(bigLine.length);
|
|
229
|
+
expect(sizes[1]).toBe('{"id":1}'.length);
|
|
230
|
+
const out = readJsonlZst(output);
|
|
231
|
+
expect(out[0].length).toBe(bigLine.length);
|
|
232
|
+
expect(out[1]).toBe('{"id":1}');
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it('passes raw Buffer to binaryMapper and lets it emit Buffer or string', async () => {
|
|
236
|
+
const input = writeJsonlZst('in', [
|
|
237
|
+
'{"id":0,"v":1}',
|
|
238
|
+
'{"id":1,"v":2}',
|
|
239
|
+
'{"id":2,"v":3}',
|
|
240
|
+
]);
|
|
241
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
242
|
+
|
|
243
|
+
const seen: Array<{ isBuffer: boolean; byteLength: number; firstByte: number }> = [];
|
|
244
|
+
const result = await transformJsonlZst({
|
|
245
|
+
inputPath: input,
|
|
246
|
+
outputPath: output,
|
|
247
|
+
binaryMapper: (lineBuf, i) => {
|
|
248
|
+
seen.push({
|
|
249
|
+
isBuffer: Buffer.isBuffer(lineBuf),
|
|
250
|
+
byteLength: lineBuf.length,
|
|
251
|
+
firstByte: lineBuf[0],
|
|
252
|
+
});
|
|
253
|
+
// Mix Buffer + string returns: even indices stay as Buffer, odd as string.
|
|
254
|
+
return i % 2 === 0 ? lineBuf : lineBuf.toString('utf8');
|
|
255
|
+
},
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
expect(result.linesRead).toBe(3);
|
|
259
|
+
expect(result.linesWritten).toBe(3);
|
|
260
|
+
expect(seen.every((s) => s.isBuffer)).toBe(true);
|
|
261
|
+
expect(seen[0].firstByte).toBe('{'.charCodeAt(0));
|
|
262
|
+
expect(readJsonlZst(output)).toEqual([
|
|
263
|
+
'{"id":0,"v":1}',
|
|
264
|
+
'{"id":1,"v":2}',
|
|
265
|
+
'{"id":2,"v":3}',
|
|
266
|
+
]);
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
it('binaryMapper rewrites a multi-megabyte line via prefix-only string conversion', async () => {
|
|
270
|
+
// Mimic the curate use case: id-prefix lookup + verbatim tail. Build a
|
|
271
|
+
// ~3 MB line so the test stays fast but the path is identical to what a
|
|
272
|
+
// 1 GB book line would exercise — only the prefix becomes a string.
|
|
273
|
+
const bigTail = '"events":[' + '0,'.repeat(1_500_000) + '0]';
|
|
274
|
+
const bigLine = `{"id":42,${bigTail}}`;
|
|
275
|
+
const input = writeJsonlZst('in', [
|
|
276
|
+
`{"id":1,"keep":false}`,
|
|
277
|
+
bigLine,
|
|
278
|
+
`{"id":99,"keep":true}`,
|
|
279
|
+
]);
|
|
280
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
281
|
+
|
|
282
|
+
const selected = new Map<number, number>([
|
|
283
|
+
[42, 0],
|
|
284
|
+
[99, 1],
|
|
285
|
+
]);
|
|
286
|
+
const idPrefix = /^\{"id":(\d+),/;
|
|
287
|
+
|
|
288
|
+
const result = await transformJsonlZst({
|
|
289
|
+
inputPath: input,
|
|
290
|
+
outputPath: output,
|
|
291
|
+
binaryMapper: (lineBuf) => {
|
|
292
|
+
// Peek only the first 32 bytes — works regardless of full line size.
|
|
293
|
+
const head = lineBuf.subarray(0, 32).toString('utf8');
|
|
294
|
+
const m = idPrefix.exec(head);
|
|
295
|
+
if (!m) return null;
|
|
296
|
+
const newId = selected.get(Number(m[1]));
|
|
297
|
+
if (newId === undefined) return null;
|
|
298
|
+
const prefix = Buffer.from(`{"id":${newId},`);
|
|
299
|
+
const tail = lineBuf.subarray(m[0].length);
|
|
300
|
+
return Buffer.concat([prefix, tail], prefix.length + tail.length);
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
expect(result.linesRead).toBe(3);
|
|
305
|
+
expect(result.linesWritten).toBe(2);
|
|
306
|
+
const out = readJsonlZst(output);
|
|
307
|
+
expect(out.length).toBe(2);
|
|
308
|
+
// First written line is the rewritten big one (id 42 → 0).
|
|
309
|
+
expect(out[0].startsWith('{"id":0,"events":[')).toBe(true);
|
|
310
|
+
expect(out[0].length).toBe(bigLine.length - `{"id":42,`.length + `{"id":0,`.length);
|
|
311
|
+
expect(out[1]).toBe('{"id":1,"keep":true}');
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it('rejects when both mapper and binaryMapper are provided', async () => {
|
|
315
|
+
const input = writeJsonlZst('in', ['{"id":0}']);
|
|
316
|
+
const output = join(workDir, 'out.jsonl.zst');
|
|
317
|
+
await expect(
|
|
318
|
+
transformJsonlZst({
|
|
319
|
+
inputPath: input,
|
|
320
|
+
outputPath: output,
|
|
321
|
+
mapper: (l) => l,
|
|
322
|
+
binaryMapper: (b) => b,
|
|
323
|
+
}),
|
|
324
|
+
).rejects.toThrow(/either.*mapper.*binaryMapper/);
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
it('honors the zstdLevel parameter', async () => {
|
|
328
|
+
const lines = Array.from({ length: 1000 }, (_, i) =>
|
|
329
|
+
JSON.stringify({ id: i, lots: 'of repeating text '.repeat(5) }),
|
|
330
|
+
);
|
|
331
|
+
const input = writeJsonlZst('in', lines);
|
|
332
|
+
const outFast = join(workDir, 'fast.jsonl.zst');
|
|
333
|
+
const outSmall = join(workDir, 'small.jsonl.zst');
|
|
334
|
+
|
|
335
|
+
await transformJsonlZst({ inputPath: input, outputPath: outFast, zstdLevel: 1 });
|
|
336
|
+
await transformJsonlZst({ inputPath: input, outputPath: outSmall, zstdLevel: 19 });
|
|
337
|
+
|
|
338
|
+
expect(readJsonlZst(outFast)).toEqual(readJsonlZst(outSmall));
|
|
339
|
+
const fastSize = statSync(outFast).size;
|
|
340
|
+
const smallSize = statSync(outSmall).size;
|
|
341
|
+
expect(smallSize).toBeLessThanOrEqual(fastSize);
|
|
342
|
+
});
|
|
343
|
+
});
|