@prisma/streams-server 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +68 -0
- package/LICENSE +201 -0
- package/README.md +39 -2
- package/SECURITY.md +33 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +29 -34
- package/src/app.ts +74 -0
- package/src/app_core.ts +1706 -0
- package/src/app_local.ts +46 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +239 -0
- package/src/config.ts +251 -0
- package/src/db/db.ts +1386 -0
- package/src/db/schema.ts +625 -0
- package/src/expiry_sweeper.ts +44 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +745 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/ingest.ts +655 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +114 -0
- package/src/memory.ts +155 -0
- package/src/metrics.ts +161 -0
- package/src/metrics_emitter.ts +50 -0
- package/src/notifier.ts +64 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +128 -0
- package/src/offset.ts +70 -0
- package/src/reader.ts +454 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/registry.ts +405 -0
- package/src/segment/cache.ts +179 -0
- package/src/segment/format.ts +331 -0
- package/src/segment/segmenter.ts +326 -0
- package/src/segment/segmenter_worker.ts +43 -0
- package/src/segment/segmenter_workers.ts +94 -0
- package/src/server.ts +326 -0
- package/src/sqlite/adapter.ts +164 -0
- package/src/stats.ts +205 -0
- package/src/touch/engine.ts +41 -0
- package/src/touch/interpreter_worker.ts +442 -0
- package/src/touch/live_keys.ts +118 -0
- package/src/touch/live_metrics.ts +827 -0
- package/src/touch/live_templates.ts +619 -0
- package/src/touch/manager.ts +1199 -0
- package/src/touch/spec.ts +456 -0
- package/src/touch/touch_journal.ts +671 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +189 -0
- package/src/touch/worker_protocol.ts +56 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +317 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +45 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +31 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/build/index.d.mts +0 -1
- package/build/index.d.ts +0 -1
- package/build/index.js +0 -0
- package/build/index.mjs +0 -1
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
import { Result } from "better-result";
|
|
2
|
+
import { zstdCompressSync, zstdDecompressSync } from "node:zlib";
|
|
3
|
+
import { Bloom256 } from "../util/bloom256";
|
|
4
|
+
import { crc32c } from "../util/crc32c";
|
|
5
|
+
import { concatBytes, readU32BE, readU64BE, writeU32BE, writeU64BE } from "../util/endian";
|
|
6
|
+
import { dsError } from "../util/ds_error.ts";
|
|
7
|
+
|
|
8
|
+
export type SegmentRecord = {
|
|
9
|
+
appendNs: bigint;
|
|
10
|
+
routingKey: Uint8Array; // UTF8 bytes (may be empty)
|
|
11
|
+
payload: Uint8Array;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export type DecodedBlock = {
|
|
15
|
+
recordCount: number;
|
|
16
|
+
firstAppendNs: bigint;
|
|
17
|
+
lastAppendNs: bigint;
|
|
18
|
+
bloom: Uint8Array; // 32 bytes
|
|
19
|
+
records: SegmentRecord[];
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export type BlockIndexEntry = {
|
|
23
|
+
blockOffset: number;
|
|
24
|
+
firstOffset: bigint;
|
|
25
|
+
recordCount: number;
|
|
26
|
+
compressedLen: number;
|
|
27
|
+
firstAppendNs: bigint;
|
|
28
|
+
lastAppendNs: bigint;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export type SegmentFooter = {
|
|
32
|
+
version: number;
|
|
33
|
+
blocks: BlockIndexEntry[];
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export type ParsedFooter = {
|
|
37
|
+
footer: SegmentFooter | null;
|
|
38
|
+
footerStart: number;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export type BlockHeader = {
|
|
42
|
+
uncompressedLen: number;
|
|
43
|
+
compressedLen: number;
|
|
44
|
+
recordCount: number;
|
|
45
|
+
bloom: Uint8Array;
|
|
46
|
+
firstAppendNs: bigint;
|
|
47
|
+
lastAppendNs: bigint;
|
|
48
|
+
crc32c: number;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
export type SegmentFormatError = {
|
|
52
|
+
kind: "invalid_segment_format";
|
|
53
|
+
message: string;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export type IterateBlockEntry = {
|
|
57
|
+
blockOffset: number;
|
|
58
|
+
blockBytes: Uint8Array;
|
|
59
|
+
decoded: DecodedBlock;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
function invalidSegment<T = never>(message: string): Result<T, SegmentFormatError> {
|
|
63
|
+
return Result.err({ kind: "invalid_segment_format", message });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export const DSB3_HEADER_BYTES = 68;
|
|
67
|
+
|
|
68
|
+
const FOOTER_MAGIC = "DSF1";
|
|
69
|
+
const FOOTER_VERSION = 1;
|
|
70
|
+
const FOOTER_ENTRY_BYTES = 40; // 8+8+4+4+8+8
|
|
71
|
+
const FOOTER_TRAILER_BYTES = 8; // u32 len + 4-byte magic
|
|
72
|
+
|
|
73
|
+
export function encodeRecord(rec: SegmentRecord): Uint8Array {
|
|
74
|
+
const keyLen = rec.routingKey.byteLength;
|
|
75
|
+
const dataLen = rec.payload.byteLength;
|
|
76
|
+
const out = new Uint8Array(8 + 4 + keyLen + 4 + dataLen);
|
|
77
|
+
writeU64BE(out, 0, rec.appendNs);
|
|
78
|
+
writeU32BE(out, 8, keyLen);
|
|
79
|
+
out.set(rec.routingKey, 12);
|
|
80
|
+
writeU32BE(out, 12 + keyLen, dataLen);
|
|
81
|
+
out.set(rec.payload, 16 + keyLen);
|
|
82
|
+
return out;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function encodeBlock(records: SegmentRecord[]): Uint8Array {
|
|
86
|
+
const res = encodeBlockResult(records);
|
|
87
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
88
|
+
return res.value;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function encodeBlockResult(records: SegmentRecord[]): Result<Uint8Array, SegmentFormatError> {
|
|
92
|
+
if (records.length === 0) return invalidSegment("empty block");
|
|
93
|
+
|
|
94
|
+
const bloom = new Bloom256();
|
|
95
|
+
for (const r of records) bloom.add(r.routingKey);
|
|
96
|
+
|
|
97
|
+
const recBytes = records.map(encodeRecord);
|
|
98
|
+
const uncompressed = concatBytes(recBytes);
|
|
99
|
+
const compressed = new Uint8Array(zstdCompressSync(uncompressed));
|
|
100
|
+
const crc = crc32c(compressed);
|
|
101
|
+
|
|
102
|
+
const header = new Uint8Array(DSB3_HEADER_BYTES);
|
|
103
|
+
header[0] = "D".charCodeAt(0);
|
|
104
|
+
header[1] = "S".charCodeAt(0);
|
|
105
|
+
header[2] = "B".charCodeAt(0);
|
|
106
|
+
header[3] = "3".charCodeAt(0);
|
|
107
|
+
writeU32BE(header, 4, uncompressed.byteLength);
|
|
108
|
+
writeU32BE(header, 8, compressed.byteLength);
|
|
109
|
+
writeU32BE(header, 12, records.length);
|
|
110
|
+
header.set(bloom.toBytes(), 16);
|
|
111
|
+
|
|
112
|
+
const firstTs = records[0].appendNs;
|
|
113
|
+
const lastTs = records[records.length - 1].appendNs;
|
|
114
|
+
writeU64BE(header, 48, firstTs);
|
|
115
|
+
writeU64BE(header, 56, lastTs);
|
|
116
|
+
writeU32BE(header, 64, crc);
|
|
117
|
+
|
|
118
|
+
return Result.ok(concatBytes([header, compressed]));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function decodeBlock(blockBytes: Uint8Array): DecodedBlock {
|
|
122
|
+
const res = decodeBlockResult(blockBytes);
|
|
123
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
124
|
+
return res.value;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export function decodeBlockResult(blockBytes: Uint8Array): Result<DecodedBlock, SegmentFormatError> {
|
|
128
|
+
if (blockBytes.byteLength < DSB3_HEADER_BYTES) return invalidSegment("block too small");
|
|
129
|
+
if (
|
|
130
|
+
blockBytes[0] !== "D".charCodeAt(0) ||
|
|
131
|
+
blockBytes[1] !== "S".charCodeAt(0) ||
|
|
132
|
+
blockBytes[2] !== "B".charCodeAt(0) ||
|
|
133
|
+
blockBytes[3] !== "3".charCodeAt(0)
|
|
134
|
+
) {
|
|
135
|
+
return invalidSegment("bad block magic");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const uncompressedLen = readU32BE(blockBytes, 4);
|
|
139
|
+
const compressedLen = readU32BE(blockBytes, 8);
|
|
140
|
+
const recordCount = readU32BE(blockBytes, 12);
|
|
141
|
+
const bloom = blockBytes.slice(16, 48);
|
|
142
|
+
const firstAppendNs = readU64BE(blockBytes, 48);
|
|
143
|
+
const lastAppendNs = readU64BE(blockBytes, 56);
|
|
144
|
+
const expectedCrc = readU32BE(blockBytes, 64);
|
|
145
|
+
|
|
146
|
+
const payload = blockBytes.slice(DSB3_HEADER_BYTES, DSB3_HEADER_BYTES + compressedLen);
|
|
147
|
+
if (payload.byteLength !== compressedLen) return invalidSegment("truncated block");
|
|
148
|
+
const actualCrc = crc32c(payload);
|
|
149
|
+
if (actualCrc !== expectedCrc) return invalidSegment("crc mismatch");
|
|
150
|
+
|
|
151
|
+
let uncompressed: Uint8Array;
|
|
152
|
+
try {
|
|
153
|
+
uncompressed = new Uint8Array(zstdDecompressSync(payload));
|
|
154
|
+
} catch (e: any) {
|
|
155
|
+
return invalidSegment(String(e?.message ?? e));
|
|
156
|
+
}
|
|
157
|
+
if (uncompressed.byteLength !== uncompressedLen) {
|
|
158
|
+
return invalidSegment(`bad uncompressed len: got=${uncompressed.byteLength} expected=${uncompressedLen}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const records: SegmentRecord[] = [];
|
|
162
|
+
let off = 0;
|
|
163
|
+
for (let i = 0; i < recordCount; i++) {
|
|
164
|
+
if (off + 8 + 4 > uncompressed.byteLength) return invalidSegment("truncated record");
|
|
165
|
+
const appendNs = readU64BE(uncompressed, off);
|
|
166
|
+
off += 8;
|
|
167
|
+
const keyLen = readU32BE(uncompressed, off);
|
|
168
|
+
off += 4;
|
|
169
|
+
if (off + keyLen + 4 > uncompressed.byteLength) return invalidSegment("truncated key");
|
|
170
|
+
const routingKey = uncompressed.slice(off, off + keyLen);
|
|
171
|
+
off += keyLen;
|
|
172
|
+
const dataLen = readU32BE(uncompressed, off);
|
|
173
|
+
off += 4;
|
|
174
|
+
if (off + dataLen > uncompressed.byteLength) return invalidSegment("truncated payload");
|
|
175
|
+
const payload = uncompressed.slice(off, off + dataLen);
|
|
176
|
+
off += dataLen;
|
|
177
|
+
records.push({ appendNs, routingKey, payload });
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return Result.ok({ recordCount, firstAppendNs, lastAppendNs, bloom, records });
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export function encodeFooter(entries: BlockIndexEntry[]): Uint8Array {
|
|
184
|
+
const footerLen = 12 + entries.length * FOOTER_ENTRY_BYTES;
|
|
185
|
+
const footer = new Uint8Array(footerLen + FOOTER_TRAILER_BYTES);
|
|
186
|
+
footer[0] = FOOTER_MAGIC.charCodeAt(0);
|
|
187
|
+
footer[1] = FOOTER_MAGIC.charCodeAt(1);
|
|
188
|
+
footer[2] = FOOTER_MAGIC.charCodeAt(2);
|
|
189
|
+
footer[3] = FOOTER_MAGIC.charCodeAt(3);
|
|
190
|
+
writeU32BE(footer, 4, FOOTER_VERSION);
|
|
191
|
+
writeU32BE(footer, 8, entries.length);
|
|
192
|
+
|
|
193
|
+
let off = 12;
|
|
194
|
+
for (const e of entries) {
|
|
195
|
+
writeU64BE(footer, off, BigInt(e.blockOffset));
|
|
196
|
+
off += 8;
|
|
197
|
+
writeU64BE(footer, off, e.firstOffset);
|
|
198
|
+
off += 8;
|
|
199
|
+
writeU32BE(footer, off, e.recordCount);
|
|
200
|
+
off += 4;
|
|
201
|
+
writeU32BE(footer, off, e.compressedLen);
|
|
202
|
+
off += 4;
|
|
203
|
+
writeU64BE(footer, off, e.firstAppendNs);
|
|
204
|
+
off += 8;
|
|
205
|
+
writeU64BE(footer, off, e.lastAppendNs);
|
|
206
|
+
off += 8;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Trailer: footer length + magic
|
|
210
|
+
writeU32BE(footer, footerLen, footerLen);
|
|
211
|
+
footer[footerLen + 4] = FOOTER_MAGIC.charCodeAt(0);
|
|
212
|
+
footer[footerLen + 5] = FOOTER_MAGIC.charCodeAt(1);
|
|
213
|
+
footer[footerLen + 6] = FOOTER_MAGIC.charCodeAt(2);
|
|
214
|
+
footer[footerLen + 7] = FOOTER_MAGIC.charCodeAt(3);
|
|
215
|
+
|
|
216
|
+
return footer;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export function parseFooter(segmentBytes: Uint8Array): ParsedFooter | null {
|
|
220
|
+
if (segmentBytes.byteLength < FOOTER_TRAILER_BYTES) return null;
|
|
221
|
+
const tail = segmentBytes.slice(segmentBytes.byteLength - 4);
|
|
222
|
+
const tailMagic = String.fromCharCode(tail[0], tail[1], tail[2], tail[3]);
|
|
223
|
+
if (tailMagic !== FOOTER_MAGIC) return null;
|
|
224
|
+
|
|
225
|
+
const footerLen = readU32BE(segmentBytes, segmentBytes.byteLength - 8);
|
|
226
|
+
if (footerLen <= 0 || footerLen + FOOTER_TRAILER_BYTES > segmentBytes.byteLength) return null;
|
|
227
|
+
|
|
228
|
+
const footerStart = segmentBytes.byteLength - FOOTER_TRAILER_BYTES - footerLen;
|
|
229
|
+
if (footerStart < 0) return null;
|
|
230
|
+
const footer = segmentBytes.slice(footerStart, footerStart + footerLen);
|
|
231
|
+
const parsed = parseFooterBytes(footer);
|
|
232
|
+
return { footer: parsed, footerStart };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export function* iterateBlocksResult(
|
|
236
|
+
segmentBytes: Uint8Array
|
|
237
|
+
): Generator<Result<IterateBlockEntry, SegmentFormatError>, void, void> {
|
|
238
|
+
const parsed = parseFooter(segmentBytes);
|
|
239
|
+
const limit = parsed ? parsed.footerStart : segmentBytes.byteLength;
|
|
240
|
+
let off = 0;
|
|
241
|
+
while (off < limit) {
|
|
242
|
+
if (off + DSB3_HEADER_BYTES > limit) {
|
|
243
|
+
yield invalidSegment("truncated segment (block header)");
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
const header = segmentBytes.slice(off, off + DSB3_HEADER_BYTES);
|
|
247
|
+
const compressedLen = readU32BE(header, 8);
|
|
248
|
+
const totalLen = DSB3_HEADER_BYTES + compressedLen;
|
|
249
|
+
if (off + totalLen > limit) {
|
|
250
|
+
yield invalidSegment("truncated segment (block payload)");
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
const blockBytes = segmentBytes.slice(off, off + totalLen);
|
|
254
|
+
const decodedRes = decodeBlockResult(blockBytes);
|
|
255
|
+
if (Result.isError(decodedRes)) {
|
|
256
|
+
yield decodedRes;
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
yield Result.ok({ blockOffset: off, blockBytes, decoded: decodedRes.value });
|
|
260
|
+
off += totalLen;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export function* iterateBlocks(segmentBytes: Uint8Array): Generator<IterateBlockEntry, void, void> {
|
|
265
|
+
for (const itemRes of iterateBlocksResult(segmentBytes)) {
|
|
266
|
+
if (Result.isError(itemRes)) throw dsError(itemRes.error.message);
|
|
267
|
+
yield itemRes.value;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export function parseFooterBytes(footer: Uint8Array): SegmentFooter | null {
|
|
272
|
+
if (footer.byteLength < 12) return null;
|
|
273
|
+
const magic = String.fromCharCode(footer[0], footer[1], footer[2], footer[3]);
|
|
274
|
+
if (magic !== FOOTER_MAGIC) return null;
|
|
275
|
+
const version = readU32BE(footer, 4);
|
|
276
|
+
const blockCount = readU32BE(footer, 8);
|
|
277
|
+
const expectedLen = 12 + blockCount * FOOTER_ENTRY_BYTES;
|
|
278
|
+
if (footer.byteLength !== expectedLen) return null;
|
|
279
|
+
const blocks: BlockIndexEntry[] = [];
|
|
280
|
+
let off = 12;
|
|
281
|
+
for (let i = 0; i < blockCount; i++) {
|
|
282
|
+
const blockOffset = Number(readU64BE(footer, off));
|
|
283
|
+
off += 8;
|
|
284
|
+
const firstOffset = readU64BE(footer, off);
|
|
285
|
+
off += 8;
|
|
286
|
+
const recordCount = readU32BE(footer, off);
|
|
287
|
+
off += 4;
|
|
288
|
+
const compressedLen = readU32BE(footer, off);
|
|
289
|
+
off += 4;
|
|
290
|
+
const firstAppendNs = readU64BE(footer, off);
|
|
291
|
+
off += 8;
|
|
292
|
+
const lastAppendNs = readU64BE(footer, off);
|
|
293
|
+
off += 8;
|
|
294
|
+
blocks.push({ blockOffset, firstOffset, recordCount, compressedLen, firstAppendNs, lastAppendNs });
|
|
295
|
+
}
|
|
296
|
+
return { version, blocks };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export function parseBlockHeader(header: Uint8Array): BlockHeader {
|
|
300
|
+
const res = parseBlockHeaderResult(header);
|
|
301
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
302
|
+
return res.value;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
export function parseBlockHeaderResult(header: Uint8Array): Result<BlockHeader, SegmentFormatError> {
|
|
306
|
+
if (header.byteLength < DSB3_HEADER_BYTES) return invalidSegment("block header too small");
|
|
307
|
+
if (
|
|
308
|
+
header[0] !== "D".charCodeAt(0) ||
|
|
309
|
+
header[1] !== "S".charCodeAt(0) ||
|
|
310
|
+
header[2] !== "B".charCodeAt(0) ||
|
|
311
|
+
header[3] !== "3".charCodeAt(0)
|
|
312
|
+
) {
|
|
313
|
+
return invalidSegment("bad block magic");
|
|
314
|
+
}
|
|
315
|
+
const uncompressedLen = readU32BE(header, 4);
|
|
316
|
+
const compressedLen = readU32BE(header, 8);
|
|
317
|
+
const recordCount = readU32BE(header, 12);
|
|
318
|
+
const bloom = header.slice(16, 48);
|
|
319
|
+
const firstAppendNs = readU64BE(header, 48);
|
|
320
|
+
const lastAppendNs = readU64BE(header, 56);
|
|
321
|
+
const crc32cVal = readU32BE(header, 64);
|
|
322
|
+
return Result.ok({
|
|
323
|
+
uncompressedLen,
|
|
324
|
+
compressedLen,
|
|
325
|
+
recordCount,
|
|
326
|
+
bloom,
|
|
327
|
+
firstAppendNs,
|
|
328
|
+
lastAppendNs,
|
|
329
|
+
crc32c: crc32cVal,
|
|
330
|
+
});
|
|
331
|
+
}
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
import { mkdirSync, openSync, closeSync, writeSync, fsyncSync, renameSync, existsSync, unlinkSync } from "node:fs";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
import type { Config } from "../config";
|
|
4
|
+
import type { SqliteDurableStore } from "../db/db";
|
|
5
|
+
import { encodeBlock, encodeFooter, type BlockIndexEntry, type SegmentRecord } from "./format";
|
|
6
|
+
import { readU32BE } from "../util/endian";
|
|
7
|
+
import { localSegmentPath, streamHash16Hex } from "../util/stream_paths";
|
|
8
|
+
import { LruCache } from "../util/lru";
|
|
9
|
+
import { yieldToEventLoop } from "../util/yield";
|
|
10
|
+
|
|
11
|
+
export type SegmenterOptions = {
|
|
12
|
+
minCandidateBytes?: number; // default: segmentMaxBytes
|
|
13
|
+
minCandidateRows?: number; // default: segmentTargetRows
|
|
14
|
+
maxIntervalMs?: number; // default: segmentMaxIntervalMs
|
|
15
|
+
candidatesPerTick?: number;
|
|
16
|
+
maxRowsPerSegment?: number;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export type SegmenterHooks = {
|
|
20
|
+
onSegmentSealed?: (payloadBytes: number, segmentBytes: number) => void;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export class Segmenter {
|
|
24
|
+
private readonly config: Config;
|
|
25
|
+
private readonly db: SqliteDurableStore;
|
|
26
|
+
private readonly opts: Required<SegmenterOptions>;
|
|
27
|
+
private readonly hooks?: SegmenterHooks;
|
|
28
|
+
private timer: any | null = null;
|
|
29
|
+
private running = false;
|
|
30
|
+
private stopping = false;
|
|
31
|
+
private readonly failures = new FailureTracker(1024);
|
|
32
|
+
|
|
33
|
+
constructor(config: Config, db: SqliteDurableStore, opts: SegmenterOptions = {}, hooks?: SegmenterHooks) {
|
|
34
|
+
this.config = config;
|
|
35
|
+
this.db = db;
|
|
36
|
+
this.opts = {
|
|
37
|
+
minCandidateBytes: opts.minCandidateBytes ?? config.segmentMaxBytes,
|
|
38
|
+
minCandidateRows: opts.minCandidateRows ?? config.segmentTargetRows,
|
|
39
|
+
maxIntervalMs: opts.maxIntervalMs ?? config.segmentMaxIntervalMs,
|
|
40
|
+
candidatesPerTick: opts.candidatesPerTick ?? 8,
|
|
41
|
+
maxRowsPerSegment: opts.maxRowsPerSegment ?? 250_000,
|
|
42
|
+
};
|
|
43
|
+
this.hooks = hooks;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
start(): void {
|
|
47
|
+
this.stopping = false;
|
|
48
|
+
if (this.timer) return;
|
|
49
|
+
this.timer = setInterval(() => {
|
|
50
|
+
void this.tick();
|
|
51
|
+
}, this.config.segmentCheckIntervalMs);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
stop(hard = false): void {
|
|
55
|
+
if (hard) this.stopping = true;
|
|
56
|
+
else this.stopping = false;
|
|
57
|
+
if (this.timer) clearInterval(this.timer);
|
|
58
|
+
this.timer = null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
private async tick(): Promise<void> {
|
|
62
|
+
if (this.stopping) return;
|
|
63
|
+
if (this.running) return;
|
|
64
|
+
this.running = true;
|
|
65
|
+
try {
|
|
66
|
+
const candidates = this.db.candidates(
|
|
67
|
+
BigInt(this.opts.minCandidateBytes),
|
|
68
|
+
BigInt(this.opts.minCandidateRows),
|
|
69
|
+
BigInt(this.opts.maxIntervalMs),
|
|
70
|
+
this.opts.candidatesPerTick
|
|
71
|
+
);
|
|
72
|
+
for (const c of candidates) {
|
|
73
|
+
if (this.failures.shouldSkip(c.stream)) continue;
|
|
74
|
+
try {
|
|
75
|
+
await this.buildOne(c.stream);
|
|
76
|
+
this.failures.recordSuccess(c.stream);
|
|
77
|
+
} catch (e) {
|
|
78
|
+
this.failures.recordFailure(c.stream);
|
|
79
|
+
const msg = String((e as any)?.message ?? e);
|
|
80
|
+
const lower = msg.toLowerCase();
|
|
81
|
+
if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
|
|
82
|
+
// eslint-disable-next-line no-console
|
|
83
|
+
console.error("segment build failed", c.stream, e);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
} catch (e) {
|
|
88
|
+
const msg = String((e as any)?.message ?? e);
|
|
89
|
+
const lower = msg.toLowerCase();
|
|
90
|
+
if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
|
|
91
|
+
// eslint-disable-next-line no-console
|
|
92
|
+
console.error("segmenter tick error", e);
|
|
93
|
+
}
|
|
94
|
+
} finally {
|
|
95
|
+
this.running = false;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private isSqliteBusy(err: any): boolean {
|
|
100
|
+
const code = String(err?.code ?? "");
|
|
101
|
+
const errno = Number(err?.errno ?? -1);
|
|
102
|
+
return code === "SQLITE_BUSY" || code === "SQLITE_BUSY_SNAPSHOT" || errno === 5 || errno === 517;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
private async runWithBusyRetry<T>(fn: () => T): Promise<T> {
|
|
106
|
+
const maxBusyMs = Math.max(0, this.config.ingestBusyTimeoutMs);
|
|
107
|
+
if (maxBusyMs <= 0) return fn();
|
|
108
|
+
const startMs = Date.now();
|
|
109
|
+
let attempt = 0;
|
|
110
|
+
for (;;) {
|
|
111
|
+
try {
|
|
112
|
+
return fn();
|
|
113
|
+
} catch (e) {
|
|
114
|
+
if (!this.isSqliteBusy(e)) throw e;
|
|
115
|
+
const elapsed = Date.now() - startMs;
|
|
116
|
+
if (elapsed >= maxBusyMs) throw e;
|
|
117
|
+
const delay = Math.min(200, 5 * 2 ** attempt);
|
|
118
|
+
attempt += 1;
|
|
119
|
+
await new Promise((res) => setTimeout(res, delay));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
private cleanupTmp(tmpPath: string): void {
|
|
125
|
+
try {
|
|
126
|
+
if (existsSync(tmpPath)) unlinkSync(tmpPath);
|
|
127
|
+
} catch {
|
|
128
|
+
// ignore
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
private async buildOne(stream: string): Promise<void> {
|
|
133
|
+
if (this.stopping) return;
|
|
134
|
+
const row = this.db.getStream(stream);
|
|
135
|
+
if (!row || this.db.isDeleted(row)) return;
|
|
136
|
+
if (row.segment_in_progress) return;
|
|
137
|
+
|
|
138
|
+
const startOffset = row.sealed_through + 1n;
|
|
139
|
+
const maxOffset = row.next_offset - 1n;
|
|
140
|
+
if (startOffset > maxOffset) return;
|
|
141
|
+
|
|
142
|
+
// Claim.
|
|
143
|
+
if (!this.db.tryClaimSegment(stream)) return;
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
const segmentIndex = this.db.nextSegmentIndexForStream(stream);
|
|
147
|
+
const shash = streamHash16Hex(stream);
|
|
148
|
+
const localPath = localSegmentPath(this.config.rootDir, shash, segmentIndex);
|
|
149
|
+
const tmpPath = `${localPath}.tmp`;
|
|
150
|
+
mkdirSync(dirname(localPath), { recursive: true });
|
|
151
|
+
|
|
152
|
+
// Build blocks and stream-write to temp file.
|
|
153
|
+
const fd = openSync(tmpPath, "w");
|
|
154
|
+
try {
|
|
155
|
+
let blockRecords: SegmentRecord[] = [];
|
|
156
|
+
let blockBytesApprox = 0;
|
|
157
|
+
let fileBytes = 0;
|
|
158
|
+
let blockCount = 0;
|
|
159
|
+
let blockFirstOffset = startOffset;
|
|
160
|
+
const blockIndex: BlockIndexEntry[] = [];
|
|
161
|
+
|
|
162
|
+
// Decide endOffset by scanning WAL rows until threshold.
|
|
163
|
+
// IMPORTANT: pending_bytes tracks WAL payload bytes only (not record/block overhead).
|
|
164
|
+
let payloadBytes = 0n;
|
|
165
|
+
let rowsSealed = 0n;
|
|
166
|
+
let endOffset = startOffset - 1n;
|
|
167
|
+
let lastAppendMs = 0n;
|
|
168
|
+
|
|
169
|
+
let lastYieldMs = Date.now();
|
|
170
|
+
let recordsSinceYield = 0;
|
|
171
|
+
for (const rec of this.db.iterWalRange(stream, startOffset, maxOffset)) {
|
|
172
|
+
const offset = BigInt(rec.offset);
|
|
173
|
+
const payload: Uint8Array = rec.payload;
|
|
174
|
+
const routingKey: Uint8Array | null = rec.routing_key ?? null;
|
|
175
|
+
const appendMs = BigInt(rec.ts_ms);
|
|
176
|
+
lastAppendMs = appendMs;
|
|
177
|
+
|
|
178
|
+
const keyBytes = routingKey ?? new Uint8Array(0);
|
|
179
|
+
const segRec: SegmentRecord = {
|
|
180
|
+
appendNs: appendMs * 1_000_000n,
|
|
181
|
+
routingKey: keyBytes,
|
|
182
|
+
payload,
|
|
183
|
+
};
|
|
184
|
+
const recSize = 8 + 4 + keyBytes.byteLength + 4 + payload.byteLength;
|
|
185
|
+
|
|
186
|
+
if (blockRecords.length > 0 && blockBytesApprox + recSize > this.config.blockMaxBytes) {
|
|
187
|
+
const blockOffset = fileBytes;
|
|
188
|
+
const block = encodeBlock(blockRecords);
|
|
189
|
+
const compressedLen = readU32BE(block, 8);
|
|
190
|
+
blockIndex.push({
|
|
191
|
+
blockOffset,
|
|
192
|
+
firstOffset: blockFirstOffset,
|
|
193
|
+
recordCount: blockRecords.length,
|
|
194
|
+
compressedLen,
|
|
195
|
+
firstAppendNs: blockRecords[0].appendNs,
|
|
196
|
+
lastAppendNs: blockRecords[blockRecords.length - 1].appendNs,
|
|
197
|
+
});
|
|
198
|
+
writeSync(fd, block);
|
|
199
|
+
fileBytes += block.byteLength;
|
|
200
|
+
blockCount += 1;
|
|
201
|
+
blockRecords = [];
|
|
202
|
+
blockBytesApprox = 0;
|
|
203
|
+
await yieldToEventLoop();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (blockRecords.length === 0) blockFirstOffset = offset;
|
|
207
|
+
blockRecords.push(segRec);
|
|
208
|
+
blockBytesApprox += recSize;
|
|
209
|
+
|
|
210
|
+
payloadBytes += BigInt(payload.byteLength);
|
|
211
|
+
rowsSealed += 1n;
|
|
212
|
+
endOffset = offset;
|
|
213
|
+
|
|
214
|
+
recordsSinceYield += 1;
|
|
215
|
+
if (recordsSinceYield >= 512 || Date.now() - lastYieldMs >= 10) {
|
|
216
|
+
await yieldToEventLoop();
|
|
217
|
+
lastYieldMs = Date.now();
|
|
218
|
+
recordsSinceYield = 0;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (payloadBytes >= BigInt(this.config.segmentMaxBytes)) break;
|
|
222
|
+
if (rowsSealed >= BigInt(this.opts.maxRowsPerSegment)) break;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (rowsSealed === 0n) return;
|
|
226
|
+
|
|
227
|
+
if (blockRecords.length > 0) {
|
|
228
|
+
const blockOffset = fileBytes;
|
|
229
|
+
const block = encodeBlock(blockRecords);
|
|
230
|
+
const compressedLen = readU32BE(block, 8);
|
|
231
|
+
blockIndex.push({
|
|
232
|
+
blockOffset,
|
|
233
|
+
firstOffset: blockFirstOffset,
|
|
234
|
+
recordCount: blockRecords.length,
|
|
235
|
+
compressedLen,
|
|
236
|
+
firstAppendNs: blockRecords[0].appendNs,
|
|
237
|
+
lastAppendNs: blockRecords[blockRecords.length - 1].appendNs,
|
|
238
|
+
});
|
|
239
|
+
writeSync(fd, block);
|
|
240
|
+
fileBytes += block.byteLength;
|
|
241
|
+
blockCount += 1;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const footer = encodeFooter(blockIndex);
|
|
245
|
+
writeSync(fd, footer);
|
|
246
|
+
fileBytes += footer.byteLength;
|
|
247
|
+
|
|
248
|
+
fsyncSync(fd);
|
|
249
|
+
|
|
250
|
+
const segmentId = `${shash}-${segmentIndex}-${startOffset.toString()}-${endOffset.toString()}`;
|
|
251
|
+
renameSync(tmpPath, localPath);
|
|
252
|
+
|
|
253
|
+
if (!this.stopping) {
|
|
254
|
+
try {
|
|
255
|
+
await this.runWithBusyRetry(() => {
|
|
256
|
+
this.db.commitSealedSegment({
|
|
257
|
+
segmentId,
|
|
258
|
+
stream,
|
|
259
|
+
segmentIndex,
|
|
260
|
+
startOffset,
|
|
261
|
+
endOffset,
|
|
262
|
+
blockCount,
|
|
263
|
+
lastAppendMs,
|
|
264
|
+
sizeBytes: fileBytes,
|
|
265
|
+
localPath,
|
|
266
|
+
payloadBytes,
|
|
267
|
+
rowsSealed,
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(Number(payloadBytes), fileBytes);
|
|
271
|
+
} catch (e) {
|
|
272
|
+
try {
|
|
273
|
+
if (existsSync(localPath)) unlinkSync(localPath);
|
|
274
|
+
} catch {
|
|
275
|
+
// ignore
|
|
276
|
+
}
|
|
277
|
+
throw e;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
} finally {
|
|
281
|
+
closeSync(fd);
|
|
282
|
+
this.cleanupTmp(tmpPath);
|
|
283
|
+
}
|
|
284
|
+
} finally {
|
|
285
|
+
// Release claim.
|
|
286
|
+
if (!this.stopping) {
|
|
287
|
+
try {
|
|
288
|
+
this.db.setSegmentInProgress(stream, 0);
|
|
289
|
+
} catch {
|
|
290
|
+
// ignore
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
class FailureTracker {
|
|
298
|
+
private readonly cache: LruCache<string, { attempts: number; untilMs: number }>;
|
|
299
|
+
|
|
300
|
+
constructor(maxEntries: number) {
|
|
301
|
+
this.cache = new LruCache(maxEntries);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
shouldSkip(stream: string): boolean {
|
|
305
|
+
const item = this.cache.get(stream);
|
|
306
|
+
if (!item) return false;
|
|
307
|
+
if (Date.now() >= item.untilMs) {
|
|
308
|
+
this.cache.delete(stream);
|
|
309
|
+
return false;
|
|
310
|
+
}
|
|
311
|
+
return true;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
recordFailure(stream: string): void {
|
|
315
|
+
const now = Date.now();
|
|
316
|
+
const item = this.cache.get(stream) ?? { attempts: 0, untilMs: now };
|
|
317
|
+
item.attempts += 1;
|
|
318
|
+
const backoff = Math.min(60_000, 500 * 2 ** (item.attempts - 1));
|
|
319
|
+
item.untilMs = now + backoff;
|
|
320
|
+
this.cache.set(stream, item);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
recordSuccess(stream: string): void {
|
|
324
|
+
this.cache.delete(stream);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { parentPort, workerData } from "node:worker_threads";
|
|
2
|
+
import type { Config } from "../config.ts";
|
|
3
|
+
import { SqliteDurableStore } from "../db/db.ts";
|
|
4
|
+
import { Segmenter, type SegmenterHooks, type SegmenterOptions } from "./segmenter.ts";
|
|
5
|
+
import { initConsoleLogging } from "../util/log.ts";
|
|
6
|
+
|
|
7
|
+
initConsoleLogging();
|
|
8
|
+
|
|
9
|
+
const data = workerData as { config: Config; opts?: SegmenterOptions };
|
|
10
|
+
const cfg = data.config;
|
|
11
|
+
// The main server process initializes/migrates schema; workers should avoid
|
|
12
|
+
// concurrent migrations on the same sqlite file.
|
|
13
|
+
const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.sqliteCacheBytes, skipMigrations: true });
|
|
14
|
+
|
|
15
|
+
const hooks: SegmenterHooks = {
|
|
16
|
+
onSegmentSealed: (payloadBytes, segmentBytes) => {
|
|
17
|
+
parentPort?.postMessage({ type: "sealed", payloadBytes, segmentBytes });
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks);
|
|
22
|
+
segmenter.start();
|
|
23
|
+
|
|
24
|
+
parentPort?.on("message", (msg: any) => {
|
|
25
|
+
if (!msg || typeof msg !== "object") return;
|
|
26
|
+
if (msg.type === "stop") {
|
|
27
|
+
try {
|
|
28
|
+
segmenter.stop();
|
|
29
|
+
} catch {
|
|
30
|
+
// ignore
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
db.close();
|
|
34
|
+
} catch {
|
|
35
|
+
// ignore
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
parentPort?.postMessage({ type: "stopped" });
|
|
39
|
+
} catch {
|
|
40
|
+
// ignore
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
});
|