@prisma/streams-server 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CODE_OF_CONDUCT.md +45 -0
  2. package/CONTRIBUTING.md +68 -0
  3. package/LICENSE +201 -0
  4. package/README.md +39 -2
  5. package/SECURITY.md +33 -0
  6. package/bin/prisma-streams-server +2 -0
  7. package/package.json +29 -34
  8. package/src/app.ts +74 -0
  9. package/src/app_core.ts +1983 -0
  10. package/src/app_local.ts +46 -0
  11. package/src/backpressure.ts +66 -0
  12. package/src/bootstrap.ts +239 -0
  13. package/src/config.ts +251 -0
  14. package/src/db/db.ts +1440 -0
  15. package/src/db/schema.ts +619 -0
  16. package/src/expiry_sweeper.ts +44 -0
  17. package/src/hist.ts +169 -0
  18. package/src/index/binary_fuse.ts +379 -0
  19. package/src/index/indexer.ts +745 -0
  20. package/src/index/run_cache.ts +84 -0
  21. package/src/index/run_format.ts +213 -0
  22. package/src/ingest.ts +655 -0
  23. package/src/lens/lens.ts +501 -0
  24. package/src/manifest.ts +114 -0
  25. package/src/memory.ts +155 -0
  26. package/src/metrics.ts +161 -0
  27. package/src/metrics_emitter.ts +50 -0
  28. package/src/notifier.ts +64 -0
  29. package/src/objectstore/interface.ts +13 -0
  30. package/src/objectstore/mock_r2.ts +269 -0
  31. package/src/objectstore/null.ts +32 -0
  32. package/src/objectstore/r2.ts +128 -0
  33. package/src/offset.ts +70 -0
  34. package/src/reader.ts +454 -0
  35. package/src/runtime/hash.ts +156 -0
  36. package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
  37. package/src/runtime/hash_vendor/NOTICE.md +8 -0
  38. package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
  39. package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
  40. package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
  41. package/src/schema/lens_schema.ts +290 -0
  42. package/src/schema/proof.ts +547 -0
  43. package/src/schema/registry.ts +405 -0
  44. package/src/segment/cache.ts +179 -0
  45. package/src/segment/format.ts +331 -0
  46. package/src/segment/segmenter.ts +326 -0
  47. package/src/segment/segmenter_worker.ts +43 -0
  48. package/src/segment/segmenter_workers.ts +94 -0
  49. package/src/server.ts +326 -0
  50. package/src/sqlite/adapter.ts +164 -0
  51. package/src/stats.ts +205 -0
  52. package/src/touch/engine.ts +41 -0
  53. package/src/touch/interpreter_worker.ts +459 -0
  54. package/src/touch/live_keys.ts +118 -0
  55. package/src/touch/live_metrics.ts +858 -0
  56. package/src/touch/live_templates.ts +619 -0
  57. package/src/touch/manager.ts +1341 -0
  58. package/src/touch/naming.ts +13 -0
  59. package/src/touch/routing_key_notifier.ts +275 -0
  60. package/src/touch/spec.ts +526 -0
  61. package/src/touch/touch_journal.ts +671 -0
  62. package/src/touch/touch_key_id.ts +20 -0
  63. package/src/touch/worker_pool.ts +189 -0
  64. package/src/touch/worker_protocol.ts +58 -0
  65. package/src/types/proper-lockfile.d.ts +1 -0
  66. package/src/uploader.ts +317 -0
  67. package/src/util/base32_crockford.ts +81 -0
  68. package/src/util/bloom256.ts +67 -0
  69. package/src/util/cleanup.ts +22 -0
  70. package/src/util/crc32c.ts +29 -0
  71. package/src/util/ds_error.ts +15 -0
  72. package/src/util/duration.ts +17 -0
  73. package/src/util/endian.ts +53 -0
  74. package/src/util/json_pointer.ts +148 -0
  75. package/src/util/log.ts +25 -0
  76. package/src/util/lru.ts +45 -0
  77. package/src/util/retry.ts +35 -0
  78. package/src/util/siphash.ts +71 -0
  79. package/src/util/stream_paths.ts +31 -0
  80. package/src/util/time.ts +14 -0
  81. package/src/util/yield.ts +3 -0
  82. package/build/index.d.mts +0 -1
  83. package/build/index.d.ts +0 -1
  84. package/build/index.js +0 -0
  85. package/build/index.mjs +0 -1
package/src/reader.ts ADDED
@@ -0,0 +1,454 @@
1
+ import { existsSync, openSync, readSync, closeSync } from "node:fs";
2
+ import type { Config } from "./config";
3
+ import type { SqliteDurableStore, SegmentRow } from "./db/db";
4
+ import type { ObjectStore } from "./objectstore/interface";
5
+ import { parseOffsetResult, offsetToSeqOrNeg1, encodeOffset } from "./offset";
6
+ import { decodeBlockResult, iterateBlocksResult, parseBlockHeaderResult, parseFooterBytes, DSB3_HEADER_BYTES, type SegmentFooter } from "./segment/format";
7
+ import { SegmentDiskCache, type SegmentCacheStats } from "./segment/cache";
8
+ import { Bloom256 } from "./util/bloom256";
9
+ import { segmentObjectKey, streamHash16Hex } from "./util/stream_paths";
10
+ import { readU32BE } from "./util/endian";
11
+ import { retry, type RetryOptions } from "./util/retry";
12
+ import { LruCache } from "./util/lru";
13
+ import type { IndexManager } from "./index/indexer";
14
+ import { dsError } from "./util/ds_error.ts";
15
+ import { Result } from "better-result";
16
+
17
+ export type ReadFormat = "raw" | "json";
18
+
19
+ export type ReadBatch = {
20
+ stream: string;
21
+ format: ReadFormat;
22
+ key: string | null;
23
+ requestOffset: string;
24
+ endOffset: string; // checkpoint at end of stream
25
+ nextOffset: string; // checkpoint after this response
26
+ endOffsetSeq: bigint;
27
+ nextOffsetSeq: bigint;
28
+ records: Array<{ offset: bigint; payload: Uint8Array }>; // payload bytes in wire order
29
+ };
30
+
31
+ export type ReaderError =
32
+ | { kind: "not_found"; message: string }
33
+ | { kind: "gone"; message: string }
34
+ | { kind: "invalid_offset"; message: string }
35
+ | { kind: "internal"; message: string };
36
+
37
+ type FooterCacheEntry = { footer: SegmentFooter | null; footerStart: number };
38
+
39
+ function errorMessage(e: unknown): string {
40
+ return String((e as any)?.message ?? e);
41
+ }
42
+
43
+ function utf8Bytes(s: string): Uint8Array {
44
+ return new TextEncoder().encode(s);
45
+ }
46
+
47
+ function objectKeyForSegment(seg: SegmentRow): string {
48
+ const streamHash = streamHash16Hex(seg.stream);
49
+ return segmentObjectKey(streamHash, seg.segment_index);
50
+ }
51
+
52
+ function readRangeFromFile(path: string, start: number, end: number): Uint8Array {
53
+ const len = end - start + 1;
54
+ const fd = openSync(path, "r");
55
+ try {
56
+ const buf = Buffer.alloc(len);
57
+ const bytesRead = readSync(fd, buf, 0, len, start);
58
+ if (bytesRead !== len) throw dsError("short read");
59
+ return new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
60
+ } finally {
61
+ closeSync(fd);
62
+ }
63
+ }
64
+
65
+ async function readSegmentRange(
66
+ os: ObjectStore,
67
+ seg: SegmentRow,
68
+ start: number,
69
+ end: number,
70
+ diskCache?: SegmentDiskCache,
71
+ retryOpts?: RetryOptions
72
+ ): Promise<Uint8Array> {
73
+ const local = seg.local_path;
74
+ if (existsSync(local)) return readRangeFromFile(local, start, end);
75
+
76
+ const objectKey = objectKeyForSegment(seg);
77
+ if (diskCache && diskCache.has(objectKey)) {
78
+ diskCache.recordHit();
79
+ diskCache.touch(objectKey);
80
+ return readRangeFromFile(diskCache.getPath(objectKey), start, end);
81
+ }
82
+ if (diskCache) diskCache.recordMiss();
83
+
84
+ const bytes = await retry(
85
+ async () => {
86
+ const res = await os.get(objectKey, { range: { start, end } });
87
+ if (!res) throw dsError(`object store missing segment: ${objectKey}`);
88
+ return res;
89
+ },
90
+ retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
91
+ );
92
+ if (diskCache && start === 0 && end === seg.size_bytes - 1) {
93
+ diskCache.put(objectKey, bytes);
94
+ }
95
+ return bytes;
96
+ }
97
+
98
+ async function loadSegmentBytes(
99
+ os: ObjectStore,
100
+ seg: SegmentRow,
101
+ diskCache?: SegmentDiskCache,
102
+ retryOpts?: RetryOptions
103
+ ): Promise<Uint8Array> {
104
+ return readSegmentRange(os, seg, 0, seg.size_bytes - 1, diskCache, retryOpts);
105
+ }
106
+
107
+ async function loadSegmentFooter(
108
+ os: ObjectStore,
109
+ seg: SegmentRow,
110
+ diskCache?: SegmentDiskCache,
111
+ retryOpts?: RetryOptions,
112
+ footerCache?: LruCache<string, FooterCacheEntry>
113
+ ): Promise<{ footer: SegmentFooter | null; footerStart: number } | null> {
114
+ const cacheKey = seg.segment_id;
115
+ if (footerCache) {
116
+ const cached = footerCache.get(cacheKey);
117
+ if (cached) return cached;
118
+ }
119
+ if (seg.size_bytes < 8) return null;
120
+ const tail = await readSegmentRange(os, seg, seg.size_bytes - 8, seg.size_bytes - 1, diskCache, retryOpts);
121
+ const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
122
+ if (magic !== "DSF1") return null;
123
+ const footerLen = readU32BE(tail, 0);
124
+ const footerStart = seg.size_bytes - 8 - footerLen;
125
+ if (footerStart < 0) return null;
126
+ const footerBytes = await readSegmentRange(os, seg, footerStart, footerStart + footerLen - 1, diskCache, retryOpts);
127
+ const footer = parseFooterBytes(footerBytes);
128
+ const result = { footer, footerStart };
129
+ if (footerCache) footerCache.set(cacheKey, result);
130
+ return result;
131
+ }
132
+
133
+ export class StreamReader {
134
+ private readonly config: Config;
135
+ private readonly db: SqliteDurableStore;
136
+ private readonly os: ObjectStore;
137
+ private readonly diskCache?: SegmentDiskCache;
138
+ private readonly footerCache?: LruCache<string, FooterCacheEntry>;
139
+ private readonly index?: IndexManager;
140
+
141
+ constructor(config: Config, db: SqliteDurableStore, os: ObjectStore, diskCache?: SegmentDiskCache, index?: IndexManager) {
142
+ this.config = config;
143
+ this.db = db;
144
+ this.os = os;
145
+ this.diskCache = diskCache;
146
+ this.index = index;
147
+ if (config.segmentFooterCacheEntries > 0) {
148
+ this.footerCache = new LruCache(config.segmentFooterCacheEntries);
149
+ }
150
+ }
151
+
152
+ cacheStats(): SegmentCacheStats | null {
153
+ return this.diskCache ? this.diskCache.stats() : null;
154
+ }
155
+
156
+ private retryOpts(): RetryOptions {
157
+ return {
158
+ retries: this.config.objectStoreRetries,
159
+ baseDelayMs: this.config.objectStoreBaseDelayMs,
160
+ maxDelayMs: this.config.objectStoreMaxDelayMs,
161
+ timeoutMs: this.config.objectStoreTimeoutMs,
162
+ };
163
+ }
164
+
165
+ async seekOffsetByTimestampResult(stream: string, sinceMs: bigint, key: string | null): Promise<Result<string, ReaderError>> {
166
+ const srow = this.db.getStream(stream);
167
+ if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
168
+ if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
169
+ return Result.err({ kind: "gone", message: "stream expired" });
170
+ }
171
+ try {
172
+ const sinceNs = sinceMs * 1_000_000n;
173
+ const keyBytes = key ? utf8Bytes(key) : null;
174
+
175
+ // Scan segments in order.
176
+ const segments = this.db.listSegmentsForStream(stream);
177
+ for (const seg of segments) {
178
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
179
+ let curOffset = seg.start_offset;
180
+ for (const blockRes of iterateBlocksResult(segBytes)) {
181
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
182
+ const { decoded } = blockRes.value;
183
+ if (decoded.lastAppendNs < sinceNs) {
184
+ curOffset += BigInt(decoded.recordCount);
185
+ continue;
186
+ }
187
+ for (const r of decoded.records) {
188
+ if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
189
+ curOffset += 1n;
190
+ continue;
191
+ }
192
+ if (r.appendNs >= sinceNs) {
193
+ const prev = curOffset - 1n;
194
+ return Result.ok(encodeOffset(srow.epoch, prev));
195
+ }
196
+ curOffset += 1n;
197
+ }
198
+ }
199
+ }
200
+
201
+ // Scan WAL tail.
202
+ const start = srow.sealed_through + 1n;
203
+ const end = srow.next_offset - 1n;
204
+ if (start <= end) {
205
+ for (const rec of this.db.iterWalRange(stream, start, end, keyBytes ?? undefined)) {
206
+ const tsNs = BigInt(rec.ts_ms) * 1_000_000n;
207
+ if (tsNs >= sinceNs) {
208
+ const off = BigInt(rec.offset) - 1n;
209
+ return Result.ok(encodeOffset(srow.epoch, off));
210
+ }
211
+ }
212
+ }
213
+
214
+ const endOffsetNum = srow.next_offset - 1n;
215
+ return Result.ok(encodeOffset(srow.epoch, endOffsetNum));
216
+ } catch (e: unknown) {
217
+ return Result.err({ kind: "internal", message: errorMessage(e) });
218
+ }
219
+ }
220
+
221
+ async seekOffsetByTimestamp(stream: string, sinceMs: bigint, key: string | null): Promise<string> {
222
+ const res = await this.seekOffsetByTimestampResult(stream, sinceMs, key);
223
+ if (Result.isError(res)) throw dsError(res.error.message);
224
+ return res.value;
225
+ }
226
+
227
+ async readResult(args: { stream: string; offset: string; key: string | null; format: ReadFormat }): Promise<Result<ReadBatch, ReaderError>> {
228
+ const { stream, offset, key, format } = args;
229
+ const srow = this.db.getStream(stream);
230
+ if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
231
+ if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
232
+ return Result.err({ kind: "gone", message: "stream expired" });
233
+ }
234
+ const epoch = srow.epoch;
235
+
236
+ try {
237
+ const parsed = parseOffsetResult(offset);
238
+ if (Result.isError(parsed)) {
239
+ return Result.err({ kind: "invalid_offset", message: parsed.error.message });
240
+ }
241
+ const startOffsetExclusive = offsetToSeqOrNeg1(parsed.value);
242
+ const desiredOffset = startOffsetExclusive + 1n;
243
+
244
+ const endOffsetNum = srow.next_offset - 1n;
245
+ const endOffset = encodeOffset(srow.epoch, endOffsetNum);
246
+
247
+ const results: Array<{ offset: bigint; payload: Uint8Array }> = [];
248
+ let bytesOut = 0;
249
+
250
+ // Nothing to read.
251
+ if (desiredOffset > endOffsetNum) {
252
+ return Result.ok({
253
+ stream,
254
+ format,
255
+ key,
256
+ requestOffset: offset,
257
+ endOffset,
258
+ nextOffset: encodeOffset(srow.epoch, startOffsetExclusive),
259
+ endOffsetSeq: endOffsetNum,
260
+ nextOffsetSeq: startOffsetExclusive,
261
+ records: [],
262
+ });
263
+ }
264
+
265
+ let seq = desiredOffset;
266
+ const keyBytes = key ? utf8Bytes(key) : null;
267
+ const indexInfo = keyBytes && this.index ? await this.index.candidateSegments(stream, keyBytes) : null;
268
+ const candidateSegments = indexInfo?.segments ?? null;
269
+ const indexedThrough = indexInfo?.indexedThrough ?? 0;
270
+
271
+ const scanSegmentBytes = async (segBytes: Uint8Array, seg: SegmentRow): Promise<Result<void, ReaderError>> => {
272
+ let curOffset = seg.start_offset;
273
+ for (const blockRes of iterateBlocksResult(segBytes)) {
274
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
275
+ const { decoded } = blockRes.value;
276
+ if (keyBytes) {
277
+ const bloom = new Bloom256(decoded.bloom);
278
+ if (!bloom.maybeHas(keyBytes)) {
279
+ curOffset += BigInt(decoded.recordCount);
280
+ continue;
281
+ }
282
+ }
283
+ for (const r of decoded.records) {
284
+ if (curOffset < seq) {
285
+ curOffset += 1n;
286
+ continue;
287
+ }
288
+ if (curOffset > endOffsetNum) break;
289
+ if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
290
+ curOffset += 1n;
291
+ continue;
292
+ }
293
+ results.push({ offset: curOffset, payload: r.payload });
294
+ bytesOut += r.payload.byteLength;
295
+ curOffset += 1n;
296
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
297
+ seq = curOffset;
298
+ return Result.ok(undefined);
299
+ }
300
+ }
301
+ }
302
+ return Result.ok(undefined);
303
+ };
304
+
305
+ // 1) Read from sealed segments.
306
+ while (seq <= endOffsetNum && seq <= srow.sealed_through) {
307
+ const seg = this.db.findSegmentForOffset(stream, seq);
308
+ if (!seg) {
309
+ // Corruption in local metadata: sealed_through points past segments table.
310
+ break;
311
+ }
312
+ if (keyBytes && candidateSegments && seg.segment_index < indexedThrough && !candidateSegments.has(seg.segment_index)) {
313
+ seq = seg.end_offset + 1n;
314
+ continue;
315
+ }
316
+ const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
317
+ if (preferFull) {
318
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
319
+ const scanRes = await scanSegmentBytes(segBytes, seg);
320
+ if (Result.isError(scanRes)) return scanRes;
321
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
322
+ } else {
323
+ const footerInfo = await loadSegmentFooter(this.os, seg, this.diskCache, this.retryOpts(), this.footerCache);
324
+ if (!footerInfo || !footerInfo.footer) {
325
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
326
+ const scanRes = await scanSegmentBytes(segBytes, seg);
327
+ if (Result.isError(scanRes)) return scanRes;
328
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
329
+ } else {
330
+ const footer = footerInfo.footer;
331
+ for (const entry of footer.blocks) {
332
+ const blockStart = entry.firstOffset;
333
+ const blockEnd = entry.firstOffset + BigInt(entry.recordCount) - 1n;
334
+ if (blockEnd < seq) continue;
335
+ if (blockStart > endOffsetNum) break;
336
+
337
+ if (keyBytes) {
338
+ const headerBytes = await readSegmentRange(
339
+ this.os,
340
+ seg,
341
+ entry.blockOffset,
342
+ entry.blockOffset + DSB3_HEADER_BYTES - 1,
343
+ this.diskCache,
344
+ this.retryOpts()
345
+ );
346
+ const headerRes = parseBlockHeaderResult(headerBytes);
347
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
348
+ const header = headerRes.value;
349
+ const bloom = new Bloom256(header.bloom);
350
+ if (!bloom.maybeHas(keyBytes)) continue;
351
+ }
352
+
353
+ const totalLen = DSB3_HEADER_BYTES + entry.compressedLen;
354
+ const blockBytes = await readSegmentRange(
355
+ this.os,
356
+ seg,
357
+ entry.blockOffset,
358
+ entry.blockOffset + totalLen - 1,
359
+ this.diskCache,
360
+ this.retryOpts()
361
+ );
362
+ const decodedRes = decodeBlockResult(blockBytes);
363
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
364
+ const decoded = decodedRes.value;
365
+ let curOffset = entry.firstOffset;
366
+ for (const r of decoded.records) {
367
+ if (curOffset < seq) {
368
+ curOffset += 1n;
369
+ continue;
370
+ }
371
+ if (curOffset > endOffsetNum) break;
372
+ if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
373
+ curOffset += 1n;
374
+ continue;
375
+ }
376
+ results.push({ offset: curOffset, payload: r.payload });
377
+ bytesOut += r.payload.byteLength;
378
+ curOffset += 1n;
379
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
380
+ seq = curOffset;
381
+ return Result.ok(finalize());
382
+ }
383
+ }
384
+ }
385
+ }
386
+ }
387
+
388
+ // Move to next segment.
389
+ seq = seg.end_offset + 1n;
390
+ }
391
+
392
+ // 2) Read remaining from WAL tail.
393
+ if (seq <= endOffsetNum) {
394
+ let hitLimit = false;
395
+ for (const rec of this.db.iterWalRange(stream, seq, endOffsetNum, keyBytes ?? undefined)) {
396
+ const s = BigInt(rec.offset);
397
+ const payload: Uint8Array = rec.payload;
398
+ results.push({ offset: s, payload });
399
+ bytesOut += payload.byteLength;
400
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
401
+ hitLimit = true;
402
+ // We only emitted payloads up through this offset (key-filtered reads
403
+ // may skip offsets in SQL). Resume from the next offset.
404
+ seq = s + 1n;
405
+ break;
406
+ }
407
+ }
408
+ if (!hitLimit) {
409
+ // We exhausted the iterator for this [seq, endOffsetNum] range. Even if
410
+ // it yielded zero records (common for key-filtered reads), we have
411
+ // scanned through endOffsetNum and should advance the stream cursor to
412
+ // avoid tight catchup loops.
413
+ seq = endOffsetNum + 1n;
414
+ }
415
+ }
416
+
417
+ return Result.ok(finalize());
418
+
419
+ function finalize(): ReadBatch {
420
+ // nextOffset is a stream cursor, not a "last matching record" cursor. For
421
+ // key-filtered reads, this must still advance past non-matching offsets,
422
+ // otherwise SSE/long-poll can spin forever when the stream advances but no
423
+ // matching keys appear.
424
+ const scannedThrough = seq - 1n;
425
+ const nextOffset = encodeOffset(epoch, scannedThrough);
426
+ return {
427
+ stream,
428
+ format,
429
+ key,
430
+ requestOffset: offset,
431
+ endOffset,
432
+ nextOffset,
433
+ endOffsetSeq: endOffsetNum,
434
+ nextOffsetSeq: scannedThrough,
435
+ records: results,
436
+ };
437
+ }
438
+ } catch (e: unknown) {
439
+ return Result.err({ kind: "internal", message: errorMessage(e) });
440
+ }
441
+ }
442
+
443
+ async read(args: { stream: string; offset: string; key: string | null; format: ReadFormat }): Promise<ReadBatch> {
444
+ const res = await this.readResult(args);
445
+ if (Result.isError(res)) throw dsError(res.error.message);
446
+ return res.value;
447
+ }
448
+ }
449
+
450
+ function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
451
+ if (a.byteLength !== b.byteLength) return false;
452
+ for (let i = 0; i < a.byteLength; i++) if (a[i] !== b[i]) return false;
453
+ return true;
454
+ }
@@ -0,0 +1,156 @@
1
+ import { Result } from "better-result";
2
+ import { createRequire } from "node:module";
3
+ import { fileURLToPath } from "node:url";
4
+ import { dsError } from "../util/ds_error.ts";
5
+ type HashInput = string | Uint8Array;
6
+
7
+ export type HashError = { kind: "hasher_not_initialized"; message: string };
8
+
9
+ type Hash32Hasher = {
10
+ init(seed?: number): void;
11
+ update(input: HashInput): void;
12
+ digest(outputType?: "hex"): string | number | bigint;
13
+ };
14
+
15
+ type Hash64Hasher = {
16
+ init(seed?: number | bigint): void;
17
+ update(input: HashInput): void;
18
+ digest(outputType?: "hex"): string | number | bigint;
19
+ };
20
+
21
+ type XXH3Module = {
22
+ createXXHash3(seedLow?: number, seedHigh?: number): Promise<Hash64Hasher>;
23
+ };
24
+
25
+ type XXH64Module = {
26
+ createXXHash64(seed?: number | bigint): Promise<Hash64Hasher>;
27
+ };
28
+
29
+ type XXH32Module = {
30
+ createXXHash32(seed?: number): Promise<Hash32Hasher>;
31
+ };
32
+
33
+ let xxh3Hasher: Hash64Hasher | null = null;
34
+ let xxh64Hasher: Hash64Hasher | null = null;
35
+ let xxh32Hasher: Hash32Hasher | null = null;
36
+
37
+ const isBunRuntime = typeof (globalThis as any).Bun !== "undefined";
38
+ const require = createRequire(import.meta.url);
39
+
40
+ function loadVendoredModule<T>(name: string): T {
41
+ const path = fileURLToPath(new URL(`./hash_vendor/${name}`, import.meta.url));
42
+ return require(path) as T;
43
+ }
44
+
45
+ if (!isBunRuntime) {
46
+ const xxh3Module = loadVendoredModule<XXH3Module>("xxhash3.umd.min.cjs");
47
+ const xxh64Module = loadVendoredModule<XXH64Module>("xxhash64.umd.min.cjs");
48
+ const xxh32Module = loadVendoredModule<XXH32Module>("xxhash32.umd.min.cjs");
49
+ xxh3Hasher = (await xxh3Module.createXXHash3()) as Hash64Hasher;
50
+ xxh64Hasher = (await xxh64Module.createXXHash64()) as Hash64Hasher;
51
+ xxh32Hasher = (await xxh32Module.createXXHash32()) as Hash32Hasher;
52
+ }
53
+
54
+ function toBigIntDigest(value: string | number | bigint): bigint {
55
+ if (typeof value === "bigint") return value;
56
+ if (typeof value === "number") return BigInt(value >>> 0);
57
+ const hex = value.startsWith("0x") ? value.slice(2) : value;
58
+ if (hex.length === 0) return 0n;
59
+ return BigInt(`0x${hex}`);
60
+ }
61
+
62
+ function toHex16(value: bigint): string {
63
+ const masked = value & 0xffff_ffff_ffff_ffffn;
64
+ return masked.toString(16).padStart(16, "0");
65
+ }
66
+
67
+ function bunHash64(input: HashInput, fn: (x: HashInput) => bigint): bigint {
68
+ return fn(input);
69
+ }
70
+
71
+ function nodeHash64Result(input: HashInput, hasher: Hash64Hasher | null, label: string): Result<bigint, HashError> {
72
+ if (!hasher) return Result.err({ kind: "hasher_not_initialized", message: `${label} hasher not initialized` });
73
+ hasher.init();
74
+ hasher.update(input);
75
+ const digest = hasher.digest("hex");
76
+ return Result.ok(toBigIntDigest(digest));
77
+ }
78
+
79
+ function nodeHash32Result(input: HashInput): Result<number, HashError> {
80
+ if (!xxh32Hasher) return Result.err({ kind: "hasher_not_initialized", message: "xxh32 hasher not initialized" });
81
+ xxh32Hasher.init();
82
+ xxh32Hasher.update(input);
83
+ const digest = xxh32Hasher.digest("hex");
84
+ if (typeof digest === "number") return Result.ok(digest >>> 0);
85
+ const asBigInt = toBigIntDigest(digest);
86
+ return Result.ok(Number(asBigInt & 0xffff_ffffn) >>> 0);
87
+ }
88
+
89
+ export function xxh3BigIntResult(input: HashInput): Result<bigint, HashError> {
90
+ if (isBunRuntime) return Result.ok(bunHash64(input, (x) => Bun.hash.xxHash3(x)));
91
+ return nodeHash64Result(input, xxh3Hasher, "xxh3");
92
+ }
93
+
94
+ export function xxh64BigIntResult(input: HashInput): Result<bigint, HashError> {
95
+ if (isBunRuntime) return Result.ok(bunHash64(input, (x) => Bun.hash.xxHash64(x)));
96
+ return nodeHash64Result(input, xxh64Hasher, "xxh64");
97
+ }
98
+
99
+ export function wyhashBigIntResult(input: HashInput): Result<bigint, HashError> {
100
+ if (isBunRuntime) return Result.ok(bunHash64(input, (x) => Bun.hash.wyhash(x)));
101
+ // Node fallback: use xxh3 for deterministic cross-runtime behavior.
102
+ return xxh3BigIntResult(input);
103
+ }
104
+
105
+ export function xxh3HexResult(input: HashInput): Result<string, HashError> {
106
+ const res = xxh3BigIntResult(input);
107
+ if (Result.isError(res)) return res;
108
+ return Result.ok(toHex16(res.value));
109
+ }
110
+
111
+ export function xxh64HexResult(input: HashInput): Result<string, HashError> {
112
+ const res = xxh64BigIntResult(input);
113
+ if (Result.isError(res)) return res;
114
+ return Result.ok(toHex16(res.value));
115
+ }
116
+
117
+ export function xxh32Result(input: HashInput): Result<number, HashError> {
118
+ if (isBunRuntime) return Result.ok((Bun.hash.xxHash32(input) as number) >>> 0);
119
+ return nodeHash32Result(input);
120
+ }
121
+
122
+ export function xxh3BigInt(input: HashInput): bigint {
123
+ const res = xxh3BigIntResult(input);
124
+ if (Result.isError(res)) throw dsError(res.error.message);
125
+ return res.value;
126
+ }
127
+
128
+ export function xxh64BigInt(input: HashInput): bigint {
129
+ const res = xxh64BigIntResult(input);
130
+ if (Result.isError(res)) throw dsError(res.error.message);
131
+ return res.value;
132
+ }
133
+
134
+ export function wyhashBigInt(input: HashInput): bigint {
135
+ const res = wyhashBigIntResult(input);
136
+ if (Result.isError(res)) throw dsError(res.error.message);
137
+ return res.value;
138
+ }
139
+
140
+ export function xxh3Hex(input: HashInput): string {
141
+ const res = xxh3HexResult(input);
142
+ if (Result.isError(res)) throw dsError(res.error.message);
143
+ return res.value;
144
+ }
145
+
146
+ export function xxh64Hex(input: HashInput): string {
147
+ const res = xxh64HexResult(input);
148
+ if (Result.isError(res)) throw dsError(res.error.message);
149
+ return res.value;
150
+ }
151
+
152
+ export function xxh32(input: HashInput): number {
153
+ const res = xxh32Result(input);
154
+ if (Result.isError(res)) throw dsError(res.error.message);
155
+ return res.value;
156
+ }
@@ -0,0 +1,38 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Dani Biró
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ Embedded C implementations might use other, similarly permissive licenses.
24
+ Check the beginning of the files from the /src directory.
25
+
26
+ Special thank you to the authors of original C algorithms:
27
+ - Alexander Peslyak <solar@openwall.com>
28
+ - Aleksey Kravchenko <rhash.admin@gmail.com>
29
+ - Colin Percival
30
+ - Stephan Brumme <create@stephan-brumme.com>
31
+ - Steve Reid <steve@edmweb.com>
32
+ - Samuel Neves <sneves@dei.uc.pt>
33
+ - Solar Designer <solar@openwall.com>
34
+ - Project Nayuki
35
+ - ARM Limited
36
+ - Yanbo Li dreamfly281@gmail.com, goldboar@163.comYanbo Li
37
+ - Mark Adler
38
+ - Yann Collet
@@ -0,0 +1,8 @@
1
+ Vendored xxhash runtime files in this directory are copied from `hash-wasm@4.12.0`:
2
+
3
+ - `dist/xxhash3.umd.min.js`
4
+ - `dist/xxhash32.umd.min.js`
5
+ - `dist/xxhash64.umd.min.js`
6
+
7
+ These files are used only for the Node runtime path in local development builds.
8
+ The upstream license is preserved in `LICENSE.hash-wasm` in this directory.