@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
@@ -0,0 +1,245 @@
1
+ import { Result } from "better-result";
2
+ import type { SchemaRegistry, SearchConfig, SearchFieldBinding, SearchFieldConfig } from "../schema/registry";
3
+ import { resolvePointerResult } from "../util/json_pointer";
4
+ import { schemaVersionForOffset } from "../schema/read_json";
5
+
6
+ export type SearchExactTerm = {
7
+ field: string;
8
+ config: SearchFieldConfig;
9
+ canonical: string;
10
+ bytes: Uint8Array;
11
+ };
12
+
13
+ export function resolveSearchAlias(search: SearchConfig | undefined, fieldName: string): string {
14
+ return search?.aliases?.[fieldName] ?? fieldName;
15
+ }
16
+
17
+ export function getSearchFieldConfig(search: SearchConfig | undefined, fieldName: string): SearchFieldConfig | null {
18
+ const resolved = resolveSearchAlias(search, fieldName);
19
+ return search?.fields?.[resolved] ?? null;
20
+ }
21
+
22
+ export function getSearchFieldBinding(config: SearchFieldConfig, version: number): SearchFieldBinding | null {
23
+ let selected: SearchFieldBinding | null = null;
24
+ for (const binding of config.bindings) {
25
+ if (binding.version <= version && (!selected || binding.version > selected.version)) {
26
+ selected = binding;
27
+ }
28
+ }
29
+ return selected;
30
+ }
31
+
32
+ export function normalizeKeywordValue(value: unknown, normalizer: SearchFieldConfig["normalizer"]): string | null {
33
+ if (typeof value !== "string") return null;
34
+ return normalizer === "lowercase_v1" ? value.toLowerCase() : value;
35
+ }
36
+
37
+ export function canonicalizeExactValue(config: SearchFieldConfig, value: unknown): string | null {
38
+ switch (config.kind) {
39
+ case "keyword":
40
+ return normalizeKeywordValue(value, config.normalizer);
41
+ case "integer":
42
+ if (typeof value === "bigint") return value.toString();
43
+ if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value)) return String(value);
44
+ if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
45
+ return null;
46
+ case "float":
47
+ if (typeof value === "bigint") return value.toString();
48
+ if (typeof value === "number" && Number.isFinite(value)) return String(value);
49
+ if (typeof value === "string" && value.trim() !== "") {
50
+ const n = Number(value);
51
+ if (Number.isFinite(n)) return String(n);
52
+ }
53
+ return null;
54
+ case "date":
55
+ if (typeof value === "number" && Number.isFinite(value)) return String(Math.trunc(value));
56
+ if (typeof value === "bigint") return value.toString();
57
+ if (typeof value === "string" && value.trim() !== "") {
58
+ const parsed = Date.parse(value);
59
+ if (Number.isFinite(parsed)) return String(Math.trunc(parsed));
60
+ if (/^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
61
+ }
62
+ return null;
63
+ case "bool":
64
+ if (typeof value === "boolean") return value ? "true" : "false";
65
+ if (typeof value === "string") {
66
+ const lowered = value.trim().toLowerCase();
67
+ if (lowered === "true" || lowered === "false") return lowered;
68
+ }
69
+ return null;
70
+ default:
71
+ return null;
72
+ }
73
+ }
74
+
75
+ export function canonicalizeColumnValue(config: SearchFieldConfig, value: unknown): bigint | number | boolean | null {
76
+ switch (config.kind) {
77
+ case "integer": {
78
+ const canonical = canonicalizeExactValue(config, value);
79
+ return canonical == null ? null : BigInt(canonical);
80
+ }
81
+ case "date": {
82
+ const canonical = canonicalizeExactValue(config, value);
83
+ return canonical == null ? null : BigInt(canonical);
84
+ }
85
+ case "float": {
86
+ const canonical = canonicalizeExactValue(config, value);
87
+ if (canonical == null) return null;
88
+ const parsed = Number(canonical);
89
+ return Number.isFinite(parsed) ? parsed : null;
90
+ }
91
+ case "bool":
92
+ return canonicalizeExactValue(config, value) === "true"
93
+ ? true
94
+ : canonicalizeExactValue(config, value) === "false"
95
+ ? false
96
+ : null;
97
+ default:
98
+ return null;
99
+ }
100
+ }
101
+
102
+ export function analyzeTextValue(value: string, analyzer: SearchFieldConfig["analyzer"]): string[] {
103
+ if (analyzer !== "unicode_word_v1") return [];
104
+ const matches = value.toLowerCase().match(/[\p{L}\p{N}]+/gu);
105
+ return matches ? matches.filter((token) => token.length > 0) : [];
106
+ }
107
+
108
+ function addRawValues(out: unknown[], value: unknown): void {
109
+ if (Array.isArray(value)) {
110
+ for (const item of value) addRawValues(out, item);
111
+ return;
112
+ }
113
+ out.push(value);
114
+ }
115
+
116
+ export function extractRawSearchValuesForFieldsResult(
117
+ reg: SchemaRegistry,
118
+ offset: bigint,
119
+ value: unknown,
120
+ fieldNames: Iterable<string>
121
+ ): Result<Map<string, unknown[]>, { message: string }> {
122
+ if (!reg.search) return Result.ok(new Map());
123
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
124
+ return Result.err({ message: "search fields require JSON object records" });
125
+ }
126
+ const version = schemaVersionForOffset(reg, offset);
127
+ const out = new Map<string, unknown[]>();
128
+ for (const fieldName of fieldNames) {
129
+ const config = reg.search.fields[fieldName];
130
+ if (!config) continue;
131
+ const binding = getSearchFieldBinding(config, version);
132
+ if (!binding) continue;
133
+ const resolvedRes = resolvePointerResult(value, binding.jsonPointer);
134
+ if (Result.isError(resolvedRes)) return Result.err({ message: resolvedRes.error.message });
135
+ if (!resolvedRes.value.exists) continue;
136
+ const values: unknown[] = [];
137
+ addRawValues(values, resolvedRes.value.value);
138
+ if (values.length > 0) out.set(fieldName, values);
139
+ }
140
+ return Result.ok(out);
141
+ }
142
+
143
+ export function extractRawSearchValuesResult(
144
+ reg: SchemaRegistry,
145
+ offset: bigint,
146
+ value: unknown
147
+ ): Result<Map<string, unknown[]>, { message: string }> {
148
+ return extractRawSearchValuesForFieldsResult(reg, offset, value, Object.keys(reg.search?.fields ?? {}));
149
+ }
150
+
151
+ export function extractSearchExactTermsResult(
152
+ reg: SchemaRegistry,
153
+ offset: bigint,
154
+ value: unknown
155
+ ): Result<SearchExactTerm[], { message: string }> {
156
+ const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
157
+ if (Result.isError(rawValuesRes)) return rawValuesRes;
158
+ const out: SearchExactTerm[] = [];
159
+ const seen = new Set<string>();
160
+ for (const [fieldName, values] of rawValuesRes.value) {
161
+ const config = reg.search?.fields[fieldName];
162
+ if (!config?.exact) continue;
163
+ for (const rawValue of values) {
164
+ const canonical = canonicalizeExactValue(config, rawValue);
165
+ if (canonical == null) continue;
166
+ const dedupeKey = `${fieldName}\u0000${canonical}`;
167
+ if (seen.has(dedupeKey)) continue;
168
+ seen.add(dedupeKey);
169
+ out.push({
170
+ field: fieldName,
171
+ config,
172
+ canonical,
173
+ bytes: new TextEncoder().encode(canonical),
174
+ });
175
+ }
176
+ }
177
+ return Result.ok(out);
178
+ }
179
+
180
+ export function extractSearchExactValuesResult(
181
+ reg: SchemaRegistry,
182
+ offset: bigint,
183
+ value: unknown
184
+ ): Result<Map<string, string[]>, { message: string }> {
185
+ const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
186
+ if (Result.isError(rawValuesRes)) return rawValuesRes;
187
+ const out = new Map<string, string[]>();
188
+ for (const [fieldName, values] of rawValuesRes.value) {
189
+ const config = reg.search?.fields[fieldName];
190
+ if (!config) continue;
191
+ const exactValues: string[] = [];
192
+ for (const rawValue of values) {
193
+ const canonical = canonicalizeExactValue(config, rawValue);
194
+ if (canonical != null) exactValues.push(canonical);
195
+ }
196
+ if (exactValues.length > 0) out.set(fieldName, exactValues);
197
+ }
198
+ return Result.ok(out);
199
+ }
200
+
201
+ export function extractSearchColumnValuesResult(
202
+ reg: SchemaRegistry,
203
+ offset: bigint,
204
+ value: unknown
205
+ ): Result<Map<string, Array<bigint | number | boolean>>, { message: string }> {
206
+ const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
207
+ if (Result.isError(rawValuesRes)) return rawValuesRes;
208
+ const out = new Map<string, Array<bigint | number | boolean>>();
209
+ for (const [fieldName, values] of rawValuesRes.value) {
210
+ const config = reg.search?.fields[fieldName];
211
+ if (!config?.column) continue;
212
+ const colValues: Array<bigint | number | boolean> = [];
213
+ for (const rawValue of values) {
214
+ const normalized = canonicalizeColumnValue(config, rawValue);
215
+ if (normalized != null) colValues.push(normalized);
216
+ }
217
+ if (colValues.length > 0) out.set(fieldName, colValues);
218
+ }
219
+ return Result.ok(out);
220
+ }
221
+
222
+ export function extractSearchTextValuesResult(
223
+ reg: SchemaRegistry,
224
+ offset: bigint,
225
+ value: unknown
226
+ ): Result<Map<string, string[]>, { message: string }> {
227
+ const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
228
+ if (Result.isError(rawValuesRes)) return rawValuesRes;
229
+ const out = new Map<string, string[]>();
230
+ for (const [fieldName, values] of rawValuesRes.value) {
231
+ const config = reg.search?.fields[fieldName];
232
+ if (!config) continue;
233
+ const textValues: string[] = [];
234
+ for (const rawValue of values) {
235
+ if (config.kind === "keyword") {
236
+ const normalized = normalizeKeywordValue(rawValue, config.normalizer);
237
+ if (normalized != null) textValues.push(normalized);
238
+ } else if (config.kind === "text" && typeof rawValue === "string") {
239
+ textValues.push(rawValue);
240
+ }
241
+ }
242
+ if (textValues.length > 0) out.set(fieldName, textValues);
243
+ }
244
+ return Result.ok(out);
245
+ }
@@ -1,5 +1,13 @@
1
1
  import { mkdirSync, readdirSync, statSync, unlinkSync, renameSync, existsSync, writeFileSync, readFileSync } from "node:fs";
2
2
  import { dirname, join, relative } from "node:path";
3
+ import { LruCache } from "../util/lru";
4
+
5
+ export type MappedSegmentFile = {
6
+ objectKey: string;
7
+ path: string;
8
+ bytes: Uint8Array;
9
+ sizeBytes: number;
10
+ };
3
11
 
4
12
  export type SegmentCacheStats = {
5
13
  hits: number;
@@ -9,21 +17,27 @@ export type SegmentCacheStats = {
9
17
  usedBytes: number;
10
18
  maxBytes: number;
11
19
  entryCount: number;
20
+ mappedBytes: number;
21
+ mappedEntryCount: number;
22
+ pinnedEntryCount: number;
12
23
  };
13
24
 
14
25
  export class SegmentDiskCache {
15
26
  private readonly rootDir: string;
16
27
  private readonly maxBytes: number;
17
28
  private readonly entries = new Map<string, { path: string; size: number }>();
29
+ private readonly pinnedKeys = new Set<string>();
30
+ private readonly mappedFiles: LruCache<string, MappedSegmentFile>;
18
31
  private totalBytes = 0;
19
32
  private hits = 0;
20
33
  private misses = 0;
21
34
  private evictions = 0;
22
35
  private bytesAdded = 0;
23
36
 
24
- constructor(rootDir: string, maxBytes: number) {
37
+ constructor(rootDir: string, maxBytes: number, mappedEntries = 64) {
25
38
  this.rootDir = rootDir;
26
39
  this.maxBytes = maxBytes;
40
+ this.mappedFiles = new LruCache(Math.max(1, mappedEntries));
27
41
  if (this.maxBytes > 0) {
28
42
  mkdirSync(this.rootDir, { recursive: true });
29
43
  this.loadIndex();
@@ -86,8 +100,43 @@ export class SegmentDiskCache {
86
100
  }
87
101
  this.recordHit();
88
102
  this.touch(objectKey);
103
+ const mapped = this.getMapped(objectKey);
104
+ if (mapped) return mapped.bytes;
105
+ const path = this.getPath(objectKey);
106
+ return readFileSync(path);
107
+ }
108
+
109
+ getMapped(objectKey: string): MappedSegmentFile | null {
110
+ if (!this.has(objectKey)) return null;
111
+ const cached = this.mappedFiles.get(objectKey);
112
+ if (cached) {
113
+ this.pinnedKeys.add(objectKey);
114
+ this.touch(objectKey);
115
+ return cached;
116
+ }
117
+
89
118
  const path = this.getPath(objectKey);
90
- return new Uint8Array(readFileSync(path));
119
+ let sizeBytes: number;
120
+ try {
121
+ sizeBytes = statSync(path).size;
122
+ } catch {
123
+ this.entries.delete(objectKey);
124
+ return null;
125
+ }
126
+
127
+ let bytes: Uint8Array;
128
+ try {
129
+ bytes = (Bun as any).mmap(path, { shared: true }) as Uint8Array;
130
+ } catch {
131
+ return null;
132
+ }
133
+ if (bytes.byteLength !== sizeBytes) return null;
134
+
135
+ const mapped = { objectKey, path, bytes, sizeBytes };
136
+ this.mappedFiles.set(objectKey, mapped);
137
+ this.pinnedKeys.add(objectKey);
138
+ this.touch(objectKey);
139
+ return mapped;
91
140
  }
92
141
 
93
142
  put(objectKey: string, bytes: Uint8Array): boolean {
@@ -111,6 +160,7 @@ export class SegmentDiskCache {
111
160
  }
112
161
  const existing = this.entries.get(objectKey);
113
162
  if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
163
+ this.mappedFiles.delete(objectKey);
114
164
  this.entries.set(objectKey, { path: dest, size: sizeBytes });
115
165
  this.totalBytes += sizeBytes;
116
166
  this.bytesAdded += sizeBytes;
@@ -130,6 +180,7 @@ export class SegmentDiskCache {
130
180
  }
131
181
  const existing = this.entries.get(objectKey);
132
182
  if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
183
+ this.mappedFiles.delete(objectKey);
133
184
  this.entries.set(objectKey, { path: dest, size: sizeBytes });
134
185
  this.totalBytes += sizeBytes;
135
186
  this.bytesAdded += sizeBytes;
@@ -137,6 +188,7 @@ export class SegmentDiskCache {
137
188
  }
138
189
 
139
190
  remove(objectKey: string): void {
191
+ if (this.pinnedKeys.has(objectKey)) return;
140
192
  const entry = this.entries.get(objectKey);
141
193
  if (!entry) return;
142
194
  try {
@@ -151,6 +203,27 @@ export class SegmentDiskCache {
151
203
  private evictIfNeeded(incomingBytes: number): void {
152
204
  while (this.totalBytes + incomingBytes > this.maxBytes && this.entries.size > 0) {
153
205
  const oldestKey = this.entries.keys().next().value as string;
206
+ if (this.pinnedKeys.has(oldestKey)) {
207
+ let removed = false;
208
+ for (const candidateKey of this.entries.keys()) {
209
+ if (this.pinnedKeys.has(candidateKey)) continue;
210
+ const candidate = this.entries.get(candidateKey);
211
+ if (!candidate) continue;
212
+ try {
213
+ unlinkSync(candidate.path);
214
+ } catch {
215
+ // ignore
216
+ }
217
+ this.totalBytes = Math.max(0, this.totalBytes - candidate.size);
218
+ this.entries.delete(candidateKey);
219
+ this.mappedFiles.delete(candidateKey);
220
+ this.evictions += 1;
221
+ removed = true;
222
+ break;
223
+ }
224
+ if (!removed) break;
225
+ continue;
226
+ }
154
227
  const entry = this.entries.get(oldestKey);
155
228
  if (entry) {
156
229
  try {
@@ -159,6 +232,7 @@ export class SegmentDiskCache {
159
232
  // ignore
160
233
  }
161
234
  this.totalBytes = Math.max(0, this.totalBytes - entry.size);
235
+ this.mappedFiles.delete(oldestKey);
162
236
  this.evictions += 1;
163
237
  }
164
238
  this.entries.delete(oldestKey);
@@ -166,6 +240,12 @@ export class SegmentDiskCache {
166
240
  }
167
241
 
168
242
  stats(): SegmentCacheStats {
243
+ let mappedBytes = 0;
244
+ let mappedEntryCount = 0;
245
+ for (const mapped of this.mappedFiles.values()) {
246
+ mappedBytes += mapped.sizeBytes;
247
+ mappedEntryCount += 1;
248
+ }
169
249
  return {
170
250
  hits: this.hits,
171
251
  misses: this.misses,
@@ -174,6 +254,17 @@ export class SegmentDiskCache {
174
254
  usedBytes: this.totalBytes,
175
255
  maxBytes: this.maxBytes,
176
256
  entryCount: this.entries.size,
257
+ mappedBytes,
258
+ mappedEntryCount,
259
+ pinnedEntryCount: this.pinnedKeys.size,
177
260
  };
178
261
  }
262
+
263
+ bytesForObjectKeyPrefix(prefix: string): number {
264
+ let total = 0;
265
+ for (const [objectKey, entry] of this.entries.entries()) {
266
+ if (objectKey.startsWith(prefix)) total += entry.size;
267
+ }
268
+ return total;
269
+ }
179
270
  }
@@ -0,0 +1,89 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import type { SegmentRow } from "../db/db";
3
+ import type { ObjectStore } from "../objectstore/interface";
4
+ import type { SegmentDiskCache } from "./cache";
5
+ import { dsError } from "../util/ds_error";
6
+ import type { RetryOptions } from "../util/retry";
7
+ import { retry } from "../util/retry";
8
+ import { segmentObjectKey, streamHash16Hex } from "../util/stream_paths";
9
+
10
+ export type SegmentReadSource =
11
+ | { kind: "mapped"; path: string; bytes: Uint8Array }
12
+ | { kind: "bytes"; bytes: Uint8Array };
13
+
14
+ function readRangeFromBytes(bytes: Uint8Array, start: number, end: number): Uint8Array {
15
+ const boundedStart = Math.max(0, Math.min(start, bytes.byteLength));
16
+ const boundedEnd = Math.max(boundedStart, Math.min(end + 1, bytes.byteLength));
17
+ return bytes.subarray(boundedStart, boundedEnd);
18
+ }
19
+
20
+ export function readRangeFromSource(source: SegmentReadSource, start: number, end: number): Uint8Array {
21
+ return readRangeFromBytes(source.bytes, start, end);
22
+ }
23
+
24
+ export async function loadSegmentSource(
25
+ os: ObjectStore,
26
+ seg: SegmentRow,
27
+ diskCache?: SegmentDiskCache,
28
+ retryOpts?: RetryOptions
29
+ ): Promise<SegmentReadSource> {
30
+ if (seg.local_path && seg.local_path.length > 0 && existsSync(seg.local_path)) {
31
+ try {
32
+ const bytes = (Bun as any).mmap(seg.local_path, { shared: true }) as Uint8Array;
33
+ return { kind: "mapped", path: seg.local_path, bytes };
34
+ } catch {
35
+ return { kind: "bytes", bytes: readFileSync(seg.local_path) };
36
+ }
37
+ }
38
+
39
+ const objectKey = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
40
+ if (diskCache && diskCache.has(objectKey)) {
41
+ diskCache.recordHit();
42
+ diskCache.touch(objectKey);
43
+ const mapped = diskCache.getMapped(objectKey);
44
+ if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
45
+ const cachedPath = diskCache.getPath(objectKey);
46
+ if (existsSync(cachedPath)) return { kind: "bytes", bytes: readFileSync(cachedPath) };
47
+ diskCache.remove(objectKey);
48
+ }
49
+
50
+ if (diskCache) diskCache.recordMiss();
51
+
52
+ const bytes = await retry(
53
+ async () => {
54
+ const res = await os.get(objectKey);
55
+ if (!res) throw dsError(`object store missing segment: ${objectKey}`);
56
+ return res;
57
+ },
58
+ retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
59
+ );
60
+
61
+ if (diskCache?.put(objectKey, bytes)) {
62
+ const mapped = diskCache.getMapped(objectKey);
63
+ if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
64
+ return { kind: "bytes", bytes: readFileSync(diskCache.getPath(objectKey)) };
65
+ }
66
+ return { kind: "bytes", bytes };
67
+ }
68
+
69
+ export async function loadSegmentBytesCached(
70
+ os: ObjectStore,
71
+ seg: SegmentRow,
72
+ diskCache?: SegmentDiskCache,
73
+ retryOpts?: RetryOptions
74
+ ): Promise<Uint8Array> {
75
+ const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
76
+ return source.bytes;
77
+ }
78
+
79
+ export async function readSegmentRangeCached(
80
+ os: ObjectStore,
81
+ seg: SegmentRow,
82
+ start: number,
83
+ end: number,
84
+ diskCache?: SegmentDiskCache,
85
+ retryOpts?: RetryOptions
86
+ ): Promise<Uint8Array> {
87
+ const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
88
+ return readRangeFromSource(source, start, end);
89
+ }
@@ -59,6 +59,14 @@ export type IterateBlockEntry = {
59
59
  decoded: DecodedBlock;
60
60
  };
61
61
 
62
+ export type IterateBlockRecordEntry = {
63
+ blockOffset: number;
64
+ recordIndex: number;
65
+ appendNs: bigint;
66
+ routingKey: Uint8Array;
67
+ payload: Uint8Array;
68
+ };
69
+
62
70
  function invalidSegment<T = never>(message: string): Result<T, SegmentFormatError> {
63
71
  return Result.err({ kind: "invalid_segment_format", message });
64
72
  }
@@ -125,59 +133,39 @@ export function decodeBlock(blockBytes: Uint8Array): DecodedBlock {
125
133
  }
126
134
 
127
135
  export function decodeBlockResult(blockBytes: Uint8Array): Result<DecodedBlock, SegmentFormatError> {
128
- if (blockBytes.byteLength < DSB3_HEADER_BYTES) return invalidSegment("block too small");
129
- if (
130
- blockBytes[0] !== "D".charCodeAt(0) ||
131
- blockBytes[1] !== "S".charCodeAt(0) ||
132
- blockBytes[2] !== "B".charCodeAt(0) ||
133
- blockBytes[3] !== "3".charCodeAt(0)
134
- ) {
135
- return invalidSegment("bad block magic");
136
- }
137
-
138
- const uncompressedLen = readU32BE(blockBytes, 4);
139
- const compressedLen = readU32BE(blockBytes, 8);
140
- const recordCount = readU32BE(blockBytes, 12);
141
- const bloom = blockBytes.slice(16, 48);
142
- const firstAppendNs = readU64BE(blockBytes, 48);
143
- const lastAppendNs = readU64BE(blockBytes, 56);
144
- const expectedCrc = readU32BE(blockBytes, 64);
145
-
146
- const payload = blockBytes.slice(DSB3_HEADER_BYTES, DSB3_HEADER_BYTES + compressedLen);
147
- if (payload.byteLength !== compressedLen) return invalidSegment("truncated block");
148
- const actualCrc = crc32c(payload);
149
- if (actualCrc !== expectedCrc) return invalidSegment("crc mismatch");
150
-
151
- let uncompressed: Uint8Array;
152
- try {
153
- uncompressed = new Uint8Array(zstdDecompressSync(payload));
154
- } catch (e: any) {
155
- return invalidSegment(String(e?.message ?? e));
156
- }
157
- if (uncompressed.byteLength !== uncompressedLen) {
158
- return invalidSegment(`bad uncompressed len: got=${uncompressed.byteLength} expected=${uncompressedLen}`);
159
- }
136
+ const headerRes = parseBlockHeaderResult(blockBytes);
137
+ if (Result.isError(headerRes)) return headerRes;
138
+ const header = headerRes.value;
139
+ const uncompressedRes = decompressBlockPayloadResult(blockBytes, header);
140
+ if (Result.isError(uncompressedRes)) return uncompressedRes;
141
+ const uncompressed = uncompressedRes.value;
160
142
 
161
143
  const records: SegmentRecord[] = [];
162
144
  let off = 0;
163
- for (let i = 0; i < recordCount; i++) {
145
+ for (let i = 0; i < header.recordCount; i++) {
164
146
  if (off + 8 + 4 > uncompressed.byteLength) return invalidSegment("truncated record");
165
147
  const appendNs = readU64BE(uncompressed, off);
166
148
  off += 8;
167
149
  const keyLen = readU32BE(uncompressed, off);
168
150
  off += 4;
169
151
  if (off + keyLen + 4 > uncompressed.byteLength) return invalidSegment("truncated key");
170
- const routingKey = uncompressed.slice(off, off + keyLen);
152
+ const routingKey = uncompressed.subarray(off, off + keyLen);
171
153
  off += keyLen;
172
154
  const dataLen = readU32BE(uncompressed, off);
173
155
  off += 4;
174
156
  if (off + dataLen > uncompressed.byteLength) return invalidSegment("truncated payload");
175
- const payload = uncompressed.slice(off, off + dataLen);
157
+ const payload = uncompressed.subarray(off, off + dataLen);
176
158
  off += dataLen;
177
159
  records.push({ appendNs, routingKey, payload });
178
160
  }
179
161
 
180
- return Result.ok({ recordCount, firstAppendNs, lastAppendNs, bloom, records });
162
+ return Result.ok({
163
+ recordCount: header.recordCount,
164
+ firstAppendNs: header.firstAppendNs,
165
+ lastAppendNs: header.lastAppendNs,
166
+ bloom: header.bloom.slice(),
167
+ records,
168
+ });
181
169
  }
182
170
 
183
171
  export function encodeFooter(entries: BlockIndexEntry[]): Uint8Array {
@@ -261,6 +249,65 @@ export function* iterateBlocksResult(
261
249
  }
262
250
  }
263
251
 
252
+ export function* iterateBlockRecordsResult(
253
+ segmentBytes: Uint8Array
254
+ ): Generator<Result<IterateBlockRecordEntry, SegmentFormatError>, void, void> {
255
+ const parsed = parseFooter(segmentBytes);
256
+ const limit = parsed ? parsed.footerStart : segmentBytes.byteLength;
257
+ let off = 0;
258
+ while (off < limit) {
259
+ if (off + DSB3_HEADER_BYTES > limit) {
260
+ yield invalidSegment("truncated segment (block header)");
261
+ return;
262
+ }
263
+ const headerRes = parseBlockHeaderResult(segmentBytes.subarray(off, off + DSB3_HEADER_BYTES));
264
+ if (Result.isError(headerRes)) {
265
+ yield headerRes;
266
+ return;
267
+ }
268
+ const header = headerRes.value;
269
+ const totalLen = DSB3_HEADER_BYTES + header.compressedLen;
270
+ if (off + totalLen > limit) {
271
+ yield invalidSegment("truncated segment (block payload)");
272
+ return;
273
+ }
274
+ const blockBytes = segmentBytes.subarray(off, off + totalLen);
275
+ const uncompressedRes = decompressBlockPayloadResult(blockBytes, header);
276
+ if (Result.isError(uncompressedRes)) {
277
+ yield uncompressedRes;
278
+ return;
279
+ }
280
+ const uncompressed = uncompressedRes.value;
281
+ let recOff = 0;
282
+ for (let recordIndex = 0; recordIndex < header.recordCount; recordIndex++) {
283
+ if (recOff + 8 + 4 > uncompressed.byteLength) {
284
+ yield invalidSegment("truncated record");
285
+ return;
286
+ }
287
+ const appendNs = readU64BE(uncompressed, recOff);
288
+ recOff += 8;
289
+ const keyLen = readU32BE(uncompressed, recOff);
290
+ recOff += 4;
291
+ if (recOff + keyLen + 4 > uncompressed.byteLength) {
292
+ yield invalidSegment("truncated key");
293
+ return;
294
+ }
295
+ const routingKey = uncompressed.subarray(recOff, recOff + keyLen);
296
+ recOff += keyLen;
297
+ const dataLen = readU32BE(uncompressed, recOff);
298
+ recOff += 4;
299
+ if (recOff + dataLen > uncompressed.byteLength) {
300
+ yield invalidSegment("truncated payload");
301
+ return;
302
+ }
303
+ const payload = uncompressed.subarray(recOff, recOff + dataLen);
304
+ recOff += dataLen;
305
+ yield Result.ok({ blockOffset: off, recordIndex, appendNs, routingKey, payload });
306
+ }
307
+ off += totalLen;
308
+ }
309
+ }
310
+
264
311
  export function* iterateBlocks(segmentBytes: Uint8Array): Generator<IterateBlockEntry, void, void> {
265
312
  for (const itemRes of iterateBlocksResult(segmentBytes)) {
266
313
  if (Result.isError(itemRes)) throw dsError(itemRes.error.message);
@@ -268,6 +315,13 @@ export function* iterateBlocks(segmentBytes: Uint8Array): Generator<IterateBlock
268
315
  }
269
316
  }
270
317
 
318
+ export function* iterateBlockRecords(segmentBytes: Uint8Array): Generator<IterateBlockRecordEntry, void, void> {
319
+ for (const itemRes of iterateBlockRecordsResult(segmentBytes)) {
320
+ if (Result.isError(itemRes)) throw dsError(itemRes.error.message);
321
+ yield itemRes.value;
322
+ }
323
+ }
324
+
271
325
  export function parseFooterBytes(footer: Uint8Array): SegmentFooter | null {
272
326
  if (footer.byteLength < 12) return null;
273
327
  const magic = String.fromCharCode(footer[0], footer[1], footer[2], footer[3]);
@@ -329,3 +383,21 @@ export function parseBlockHeaderResult(header: Uint8Array): Result<BlockHeader,
329
383
  crc32c: crc32cVal,
330
384
  });
331
385
  }
386
+
387
+ function decompressBlockPayloadResult(blockBytes: Uint8Array, header: BlockHeader): Result<Uint8Array, SegmentFormatError> {
388
+ const payload = blockBytes.subarray(DSB3_HEADER_BYTES, DSB3_HEADER_BYTES + header.compressedLen);
389
+ if (payload.byteLength !== header.compressedLen) return invalidSegment("truncated block");
390
+ const actualCrc = crc32c(payload);
391
+ if (actualCrc !== header.crc32c) return invalidSegment("crc mismatch");
392
+
393
+ let uncompressed: Uint8Array;
394
+ try {
395
+ uncompressed = new Uint8Array(zstdDecompressSync(payload));
396
+ } catch (e: any) {
397
+ return invalidSegment(String(e?.message ?? e));
398
+ }
399
+ if (uncompressed.byteLength !== header.uncompressedLen) {
400
+ return invalidSegment(`bad uncompressed len: got=${uncompressed.byteLength} expected=${header.uncompressedLen}`);
401
+ }
402
+ return Result.ok(uncompressed);
403
+ }