@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
@@ -0,0 +1,824 @@
1
+ import { randomBytes } from "node:crypto";
2
+ import { readFileSync } from "node:fs";
3
+ import { Result } from "better-result";
4
+ import type { Config } from "../config";
5
+ import type { SecondaryIndexRunRow, SegmentRow, SqliteDurableStore } from "../db/db";
6
+ import type { ObjectStore } from "../objectstore/interface";
7
+ import { SchemaRegistryStore } from "../schema/registry";
8
+ import { SegmentDiskCache } from "../segment/cache";
9
+ import { loadSegmentBytesCached } from "../segment/cached_segment";
10
+ import { iterateBlockRecordsResult } from "../segment/format";
11
+ import { retry } from "../util/retry";
12
+ import { dsError } from "../util/ds_error.ts";
13
+ import { secondaryIndexRunObjectKey, streamHash16Hex } from "../util/stream_paths";
14
+ import { siphash24 } from "../util/siphash";
15
+ import { yieldToEventLoop } from "../util/yield";
16
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler";
17
+ import { ConcurrencyGate } from "../concurrency_gate";
18
+ import type { ForegroundActivityTracker } from "../foreground_activity";
19
+ import { binaryFuseContains, buildBinaryFuseResult } from "./binary_fuse";
20
+ import { IndexRunCache } from "./run_cache";
21
+ import {
22
+ decodeIndexRunResult,
23
+ encodeIndexRunResult,
24
+ RUN_TYPE_MASK16,
25
+ RUN_TYPE_POSTINGS,
26
+ type IndexRun,
27
+ } from "./run_format";
28
+ import {
29
+ extractSecondaryIndexValuesForFieldResult,
30
+ extractSecondaryIndexValuesResult,
31
+ getConfiguredSecondaryIndexes,
32
+ hashSecondaryIndexField,
33
+ type SecondaryIndexField,
34
+ } from "./secondary_schema";
35
+
36
+ type SecondaryIndexBuildError = { kind: "invalid_index_build"; message: string };
37
+
38
+ function invalidIndexBuild<T = never>(message: string): Result<T, SecondaryIndexBuildError> {
39
+ return Result.err({ kind: "invalid_index_build", message });
40
+ }
41
+
42
+ function binarySearch(values: bigint[], needle: bigint): number {
43
+ let lo = 0;
44
+ let hi = values.length - 1;
45
+ while (lo <= hi) {
46
+ const mid = (lo + hi) >>> 1;
47
+ const cur = values[mid];
48
+ if (cur === needle) return mid;
49
+ if (cur < needle) lo = mid + 1;
50
+ else hi = mid - 1;
51
+ }
52
+ return -1;
53
+ }
54
+
55
+ const PAYLOAD_DECODER = new TextDecoder();
56
+ const TERM_ENCODER = new TextEncoder();
57
+ export class SecondaryIndexManager {
58
+ private readonly cfg: Config;
59
+ private readonly db: SqliteDurableStore;
60
+ private readonly os: ObjectStore;
61
+ private readonly registry: SchemaRegistryStore;
62
+ private readonly segmentCache?: SegmentDiskCache;
63
+ private readonly runDiskCache?: SegmentDiskCache;
64
+ private readonly runCache: IndexRunCache;
65
+ private readonly span: number;
66
+ private readonly buildConcurrency: number;
67
+ private readonly compactionFanout: number;
68
+ private readonly maxLevel: number;
69
+ private readonly compactionConcurrency: number;
70
+ private readonly retireGenWindow: number;
71
+ private readonly retireMinMs: number;
72
+ private readonly queue = new Set<string>();
73
+ private readonly building = new Set<string>();
74
+ private readonly compacting = new Set<string>();
75
+ private readonly streamIdleTicks = new Map<string, { logicalSizeBytes: bigint; nextOffset: bigint; flatTicks: number }>();
76
+ private timer: any | null = null;
77
+ private running = false;
78
+ private readonly publishManifest?: (stream: string) => Promise<void>;
79
+ private readonly onMetadataChanged?: (stream: string) => void;
80
+ private readonly memorySampler?: RuntimeMemorySampler;
81
+ private readonly asyncGate: ConcurrencyGate;
82
+ private readonly foregroundActivity?: ForegroundActivityTracker;
83
+
84
+ constructor(
85
+ cfg: Config,
86
+ db: SqliteDurableStore,
87
+ os: ObjectStore,
88
+ registry: SchemaRegistryStore,
89
+ segmentCache?: SegmentDiskCache,
90
+ publishManifest?: (stream: string) => Promise<void>,
91
+ onMetadataChanged?: (stream: string) => void,
92
+ memorySampler?: RuntimeMemorySampler,
93
+ asyncGate?: ConcurrencyGate,
94
+ foregroundActivity?: ForegroundActivityTracker
95
+ ) {
96
+ this.cfg = cfg;
97
+ this.db = db;
98
+ this.os = os;
99
+ this.registry = registry;
100
+ this.segmentCache = segmentCache;
101
+ this.publishManifest = publishManifest;
102
+ this.onMetadataChanged = onMetadataChanged;
103
+ this.memorySampler = memorySampler;
104
+ this.asyncGate = asyncGate ?? new ConcurrencyGate(1);
105
+ this.foregroundActivity = foregroundActivity;
106
+ this.span = cfg.indexL0SpanSegments;
107
+ this.buildConcurrency = Math.max(1, cfg.indexBuildConcurrency);
108
+ this.compactionFanout = cfg.indexCompactionFanout;
109
+ this.maxLevel = cfg.indexMaxLevel;
110
+ this.compactionConcurrency = Math.max(1, cfg.indexCompactionConcurrency);
111
+ this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
112
+ this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
113
+ this.runCache = new IndexRunCache(cfg.indexRunMemoryCacheBytes);
114
+ this.runDiskCache =
115
+ cfg.indexRunCacheMaxBytes > 0
116
+ ? new SegmentDiskCache(`${cfg.rootDir}/cache/secondary-index`, cfg.indexRunCacheMaxBytes)
117
+ : undefined;
118
+ }
119
+
120
+ private async yieldBackgroundWork(): Promise<void> {
121
+ if (this.foregroundActivity) {
122
+ await this.foregroundActivity.yieldBackgroundWork();
123
+ return;
124
+ }
125
+ await yieldToEventLoop();
126
+ }
127
+
128
+ start(): void {
129
+ if (this.span <= 0) return;
130
+ if (this.timer) return;
131
+ this.timer = setInterval(() => {
132
+ void this.tick();
133
+ }, this.cfg.indexCheckIntervalMs);
134
+ }
135
+
136
+ stop(): void {
137
+ if (this.timer) clearInterval(this.timer);
138
+ this.timer = null;
139
+ this.streamIdleTicks.clear();
140
+ }
141
+
142
+ enqueue(stream: string): void {
143
+ if (this.span <= 0) return;
144
+ this.queue.add(stream);
145
+ }
146
+
147
+ async candidateSegmentsForSecondaryIndex(
148
+ stream: string,
149
+ indexName: string,
150
+ keyBytes: Uint8Array
151
+ ): Promise<{ segments: Set<number>; indexedThrough: number } | null> {
152
+ if (this.span <= 0) return null;
153
+ const regRes = this.registry.getRegistryResult(stream);
154
+ if (Result.isError(regRes)) return null;
155
+ const configured = getConfiguredSecondaryIndexes(regRes.value).find((entry) => entry.name === indexName);
156
+ if (!configured) return null;
157
+ const state = this.db.getSecondaryIndexState(stream, indexName);
158
+ if (!state) return null;
159
+ if (state.config_hash !== hashSecondaryIndexField(configured)) return null;
160
+ const runs = this.db.listSecondaryIndexRuns(stream, indexName);
161
+ if (runs.length === 0 && state.indexed_through === 0) return null;
162
+
163
+ const fp = siphash24(state.index_secret, keyBytes);
164
+ const segments = new Set<number>();
165
+ for (const meta of runs) {
166
+ const runRes = await this.loadRunResult(meta);
167
+ if (Result.isError(runRes)) continue;
168
+ const run = runRes.value;
169
+ if (!run) continue;
170
+ if (run.filter && !binaryFuseContains(run.filter, fp)) continue;
171
+ if (run.runType === RUN_TYPE_MASK16 && run.masks) {
172
+ const idx = binarySearch(run.fingerprints, fp);
173
+ if (idx >= 0) {
174
+ const mask = run.masks[idx];
175
+ for (let bit = 0; bit < 16; bit++) {
176
+ if ((mask & (1 << bit)) !== 0) segments.add(run.meta.startSegment + bit);
177
+ }
178
+ }
179
+ } else if (run.postings) {
180
+ const idx = binarySearch(run.fingerprints, fp);
181
+ if (idx >= 0) {
182
+ for (const seg of run.postings[idx]) segments.add(seg);
183
+ }
184
+ }
185
+ }
186
+ return { segments, indexedThrough: state.indexed_through };
187
+ }
188
+
189
+ getLocalCacheBytes(stream: string): number {
190
+ if (!this.runDiskCache) return 0;
191
+ return this.runDiskCache.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/secondary-index/`);
192
+ }
193
+
194
+ getMemoryStats(): {
195
+ runCacheBytes: number;
196
+ runCacheEntries: number;
197
+ runDiskCacheBytes: number;
198
+ runDiskCacheEntries: number;
199
+ runDiskMappedBytes: number;
200
+ runDiskMappedEntries: number;
201
+ runDiskPinnedEntries: number;
202
+ streamIdleTickEntries: number;
203
+ } {
204
+ const mem = this.runCache.stats();
205
+ const disk = this.runDiskCache?.stats();
206
+ return {
207
+ runCacheBytes: mem.usedBytes,
208
+ runCacheEntries: mem.entries,
209
+ runDiskCacheBytes: disk?.usedBytes ?? 0,
210
+ runDiskCacheEntries: disk?.entryCount ?? 0,
211
+ runDiskMappedBytes: disk?.mappedBytes ?? 0,
212
+ runDiskMappedEntries: disk?.mappedEntryCount ?? 0,
213
+ runDiskPinnedEntries: disk?.pinnedEntryCount ?? 0,
214
+ streamIdleTickEntries: this.streamIdleTicks.size,
215
+ };
216
+ }
217
+
218
+ private async tick(): Promise<void> {
219
+ if (this.running) return;
220
+ this.running = true;
221
+ try {
222
+ const streams = Array.from(this.queue);
223
+ this.queue.clear();
224
+ for (const stream of streams) {
225
+ const regRes = this.registry.getRegistryResult(stream);
226
+ if (Result.isError(regRes)) continue;
227
+ if (this.shouldPauseExactBackgroundWork(stream)) {
228
+ this.queue.add(stream);
229
+ continue;
230
+ }
231
+ const configured = getConfiguredSecondaryIndexes(regRes.value);
232
+ const configuredNames = new Set(configured.map((entry) => entry.name));
233
+ const existing = this.db.listSecondaryIndexStates(stream);
234
+ let removedAny = false;
235
+ for (const state of existing) {
236
+ if (configuredNames.has(state.index_name)) continue;
237
+ this.db.deleteSecondaryIndex(stream, state.index_name);
238
+ removedAny = true;
239
+ }
240
+ if (removedAny) {
241
+ this.onMetadataChanged?.(stream);
242
+ if (this.publishManifest) {
243
+ try {
244
+ await this.publishManifest(stream);
245
+ } catch {
246
+ // ignore and retry on next enqueue
247
+ }
248
+ }
249
+ }
250
+ for (const index of configured) {
251
+ try {
252
+ const buildRes = await this.maybeBuildRuns(stream, index);
253
+ if (Result.isError(buildRes)) {
254
+ this.queue.add(stream);
255
+ continue;
256
+ }
257
+ const compactRes = await this.maybeCompactRuns(stream, index.name);
258
+ if (Result.isError(compactRes)) {
259
+ this.queue.add(stream);
260
+ continue;
261
+ }
262
+ } catch (e) {
263
+ const msg = String((e as any)?.message ?? e).toLowerCase();
264
+ if (!msg.includes("database has closed") && !msg.includes("closed database") && !msg.includes("statement has finalized")) {
265
+ // eslint-disable-next-line no-console
266
+ console.error("secondary index build failed", stream, index.name, e);
267
+ }
268
+ this.queue.add(stream);
269
+ }
270
+ }
271
+ }
272
+ } finally {
273
+ this.running = false;
274
+ }
275
+ }
276
+
277
+ private async maybeBuildRuns(stream: string, index: SecondaryIndexField): Promise<Result<void, SecondaryIndexBuildError>> {
278
+ if (this.span <= 0) return Result.ok(undefined);
279
+ const key = `${stream}:${index.name}`;
280
+ if (this.building.has(key)) return Result.ok(undefined);
281
+ this.building.add(key);
282
+ try {
283
+ return await this.asyncGate.run(async () => {
284
+ const configHash = hashSecondaryIndexField(index);
285
+ let state = this.db.getSecondaryIndexState(stream, index.name);
286
+ if (!state) {
287
+ this.db.upsertSecondaryIndexState(stream, index.name, randomBytes(16), configHash, 0);
288
+ state = this.db.getSecondaryIndexState(stream, index.name);
289
+ } else if (state.config_hash !== configHash) {
290
+ this.db.deleteSecondaryIndex(stream, index.name);
291
+ this.db.upsertSecondaryIndexState(stream, index.name, randomBytes(16), configHash, 0);
292
+ state = this.db.getSecondaryIndexState(stream, index.name);
293
+ this.onMetadataChanged?.(stream);
294
+ if (this.publishManifest) {
295
+ try {
296
+ await this.publishManifest(stream);
297
+ } catch {
298
+ // ignore and retry later
299
+ }
300
+ }
301
+ }
302
+ if (!state) return Result.ok(undefined);
303
+ if (this.shouldPauseExactBackgroundWork(stream)) {
304
+ this.queue.add(stream);
305
+ return Result.ok(undefined);
306
+ }
307
+ const indexedThrough = state.indexed_through;
308
+ const uploadedCount = this.db.countUploadedSegments(stream);
309
+ if (uploadedCount < indexedThrough + this.span) return Result.ok(undefined);
310
+ const start = indexedThrough;
311
+ const end = start + this.span - 1;
312
+ const segments: SegmentRow[] = [];
313
+ for (let i = start; i <= end; i++) {
314
+ const seg = this.db.getSegmentByIndex(stream, i);
315
+ if (!seg || !seg.r2_etag) return Result.ok(undefined);
316
+ segments.push(seg);
317
+ }
318
+
319
+ const runRes = this.memorySampler
320
+ ? await this.memorySampler.track(
321
+ "exact_l0",
322
+ { stream, index_name: index.name, start_segment: start, end_segment: end },
323
+ () => this.buildL0RunResult(stream, index, start, segments, state.index_secret)
324
+ )
325
+ : await this.buildL0RunResult(stream, index, start, segments, state.index_secret);
326
+ if (Result.isError(runRes)) return runRes;
327
+ const run = runRes.value;
328
+ const persistRes = await this.persistRunResult(run);
329
+ if (Result.isError(persistRes)) return persistRes;
330
+ const sizeBytes = persistRes.value;
331
+ this.db.insertSecondaryIndexRun({
332
+ run_id: run.meta.runId,
333
+ stream,
334
+ index_name: index.name,
335
+ level: run.meta.level,
336
+ start_segment: run.meta.startSegment,
337
+ end_segment: run.meta.endSegment,
338
+ object_key: run.meta.objectKey,
339
+ size_bytes: sizeBytes,
340
+ filter_len: run.meta.filterLen,
341
+ record_count: run.meta.recordCount,
342
+ });
343
+ const nextIndexedThrough = end + 1;
344
+ this.db.updateSecondaryIndexedThrough(stream, index.name, nextIndexedThrough);
345
+ state.indexed_through = nextIndexedThrough;
346
+ this.onMetadataChanged?.(stream);
347
+ if (this.publishManifest) {
348
+ try {
349
+ await this.publishManifest(stream);
350
+ } catch {
351
+ // ignore and retry later
352
+ }
353
+ }
354
+ if (this.db.countUploadedSegments(stream) >= nextIndexedThrough + this.span) this.queue.add(stream);
355
+ return Result.ok(undefined);
356
+ });
357
+ } finally {
358
+ this.building.delete(key);
359
+ }
360
+ }
361
+
362
+ private async maybeCompactRuns(stream: string, indexName: string): Promise<Result<void, SecondaryIndexBuildError>> {
363
+ if (this.span <= 0) return Result.ok(undefined);
364
+ if (this.compactionFanout <= 1) return Result.ok(undefined);
365
+ const key = `${stream}:${indexName}`;
366
+ if (this.compacting.has(key)) return Result.ok(undefined);
367
+ if (this.foregroundActivity?.wasActiveWithin(2000)) {
368
+ this.queue.add(stream);
369
+ return Result.ok(undefined);
370
+ }
371
+ this.compacting.add(key);
372
+ try {
373
+ return await this.asyncGate.run(async () => {
374
+ if (this.shouldPauseExactBackgroundWork(stream)) {
375
+ this.queue.add(stream);
376
+ return Result.ok(undefined);
377
+ }
378
+ const group = this.findCompactionGroup(stream, indexName);
379
+ if (!group) {
380
+ await this.gcRetiredRuns(stream, indexName);
381
+ return Result.ok(undefined);
382
+ }
383
+ const { level, runs } = group;
384
+ const runRes = await this.buildCompactedRunResult(stream, indexName, level + 1, runs);
385
+ if (Result.isError(runRes)) return runRes;
386
+ const run = runRes.value;
387
+ const persistRes = await this.persistRunResult(run);
388
+ if (Result.isError(persistRes)) return persistRes;
389
+ const sizeBytes = persistRes.value;
390
+ this.db.insertSecondaryIndexRun({
391
+ run_id: run.meta.runId,
392
+ stream,
393
+ index_name: indexName,
394
+ level: run.meta.level,
395
+ start_segment: run.meta.startSegment,
396
+ end_segment: run.meta.endSegment,
397
+ object_key: run.meta.objectKey,
398
+ size_bytes: sizeBytes,
399
+ filter_len: run.meta.filterLen,
400
+ record_count: run.meta.recordCount,
401
+ });
402
+ const state = this.db.getSecondaryIndexState(stream, indexName);
403
+ if (state && run.meta.endSegment + 1 > state.indexed_through) {
404
+ this.db.updateSecondaryIndexedThrough(stream, indexName, run.meta.endSegment + 1);
405
+ }
406
+ const manifestRow = this.db.getManifestRow(stream);
407
+ this.db.retireSecondaryIndexRuns(
408
+ runs.map((r) => r.run_id),
409
+ manifestRow.generation + 1,
410
+ this.db.nowMs()
411
+ );
412
+ this.onMetadataChanged?.(stream);
413
+ if (this.publishManifest) {
414
+ try {
415
+ await this.publishManifest(stream);
416
+ } catch {
417
+ // ignore and retry later
418
+ }
419
+ }
420
+ await this.gcRetiredRuns(stream, indexName);
421
+ this.queue.add(stream);
422
+ return Result.ok(undefined);
423
+ });
424
+ } finally {
425
+ this.compacting.delete(key);
426
+ }
427
+ }
428
+
429
+ private findCompactionGroup(stream: string, indexName: string): { level: number; runs: SecondaryIndexRunRow[] } | null {
430
+ const runs = this.db.listSecondaryIndexRuns(stream, indexName);
431
+ if (runs.length < this.compactionFanout) return null;
432
+ const byLevel = new Map<number, SecondaryIndexRunRow[]>();
433
+ for (const run of runs) {
434
+ const arr = byLevel.get(run.level) ?? [];
435
+ arr.push(run);
436
+ byLevel.set(run.level, arr);
437
+ }
438
+ for (let level = 0; level <= this.maxLevel; level++) {
439
+ const levelRuns = byLevel.get(level);
440
+ if (!levelRuns || levelRuns.length < this.compactionFanout) continue;
441
+ const span = this.levelSpan(level);
442
+ for (let i = 0; i + this.compactionFanout <= levelRuns.length; i++) {
443
+ const base = levelRuns[i].start_segment;
444
+ let ok = true;
445
+ for (let j = 0; j < this.compactionFanout; j++) {
446
+ const run = levelRuns[i + j];
447
+ const expectStart = base + j * span;
448
+ if (run.start_segment !== expectStart || run.end_segment !== expectStart + span - 1) {
449
+ ok = false;
450
+ break;
451
+ }
452
+ }
453
+ if (ok) return { level, runs: levelRuns.slice(i, i + this.compactionFanout) };
454
+ }
455
+ }
456
+ return null;
457
+ }
458
+
459
+ private levelSpan(level: number): number {
460
+ let span = this.span;
461
+ for (let i = 0; i < level; i++) span *= this.compactionFanout;
462
+ return span;
463
+ }
464
+
465
+ private async buildCompactedRunResult(
466
+ stream: string,
467
+ indexName: string,
468
+ level: number,
469
+ inputs: SecondaryIndexRunRow[]
470
+ ): Promise<Result<IndexRun, SecondaryIndexBuildError>> {
471
+ if (inputs.length === 0) return invalidIndexBuild("compact: missing inputs");
472
+ const segments = new Map<bigint, number[]>();
473
+ const addSegment = (fp: bigint, seg: number) => {
474
+ let list = segments.get(fp);
475
+ if (!list) {
476
+ list = [];
477
+ segments.set(fp, list);
478
+ }
479
+ list.push(seg);
480
+ };
481
+ const mergeRun = (meta: SecondaryIndexRunRow, run: IndexRun): void => {
482
+ if (run.runType === RUN_TYPE_MASK16 && run.masks) {
483
+ for (let i = 0; i < run.fingerprints.length; i++) {
484
+ const fp = run.fingerprints[i];
485
+ const mask = run.masks[i];
486
+ for (let bit = 0; bit < 16; bit++) {
487
+ if ((mask & (1 << bit)) !== 0) addSegment(fp, meta.start_segment + bit);
488
+ }
489
+ }
490
+ return;
491
+ }
492
+ if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
493
+ for (let i = 0; i < run.fingerprints.length; i++) {
494
+ const fp = run.fingerprints[i];
495
+ for (const rel of run.postings[i]) addSegment(fp, meta.start_segment + rel);
496
+ }
497
+ return;
498
+ }
499
+ throw dsError(`unknown run type ${run.runType}`);
500
+ };
501
+
502
+ const pending = inputs.slice();
503
+ const workers = Math.min(this.compactionConcurrency, pending.length);
504
+ let buildError: string | null = null;
505
+ const workerTasks: Promise<void>[] = [];
506
+ for (let w = 0; w < workers; w++) {
507
+ workerTasks.push(
508
+ (async () => {
509
+ for (;;) {
510
+ if (buildError) return;
511
+ const meta = pending.shift();
512
+ if (!meta) return;
513
+ const runRes = await this.loadRunResult(meta);
514
+ if (Result.isError(runRes)) {
515
+ buildError = runRes.error.message;
516
+ return;
517
+ }
518
+ const run = runRes.value;
519
+ if (!run) {
520
+ buildError = `missing run ${meta.run_id}`;
521
+ return;
522
+ }
523
+ try {
524
+ mergeRun(meta, run);
525
+ } catch (e: unknown) {
526
+ buildError = String((e as any)?.message ?? e);
527
+ return;
528
+ }
529
+ await this.yieldBackgroundWork();
530
+ }
531
+ })()
532
+ );
533
+ }
534
+ await Promise.all(workerTasks);
535
+ if (buildError) return invalidIndexBuild(buildError);
536
+
537
+ const startSegment = inputs[0].start_segment;
538
+ const endSegment = inputs[inputs.length - 1].end_segment;
539
+ const fingerprints = Array.from(segments.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
540
+ const postings: number[][] = new Array(fingerprints.length);
541
+ for (let i = 0; i < fingerprints.length; i++) {
542
+ const fp = fingerprints[i]!;
543
+ const list = segments.get(fp) ?? [];
544
+ list.sort((a, b) => a - b);
545
+ const rel: number[] = [];
546
+ let lastSeg = Number.NaN;
547
+ for (const seg of list) {
548
+ if (seg === lastSeg) continue;
549
+ rel.push(seg - startSegment);
550
+ lastSeg = seg;
551
+ }
552
+ postings[i] = rel;
553
+ }
554
+ const fuseRes = buildBinaryFuseResult(fingerprints);
555
+ if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
556
+ const shash = streamHash16Hex(stream);
557
+ const runId = `${indexName}-l${level}-${startSegment.toString().padStart(16, "0")}-${endSegment.toString().padStart(16, "0")}-${Date.now()}`;
558
+ return Result.ok({
559
+ meta: {
560
+ runId,
561
+ level,
562
+ startSegment,
563
+ endSegment,
564
+ objectKey: secondaryIndexRunObjectKey(shash, indexName, runId),
565
+ filterLen: fuseRes.value.bytes.byteLength,
566
+ recordCount: fingerprints.length,
567
+ },
568
+ runType: RUN_TYPE_POSTINGS,
569
+ filterBytes: fuseRes.value.bytes,
570
+ filter: fuseRes.value.filter,
571
+ fingerprints,
572
+ postings,
573
+ });
574
+ }
575
+
576
+ private async buildL0RunResult(
577
+ stream: string,
578
+ index: SecondaryIndexField,
579
+ startSegment: number,
580
+ segments: SegmentRow[],
581
+ secret: Uint8Array
582
+ ): Promise<Result<IndexRun, SecondaryIndexBuildError>> {
583
+ const regRes = this.registry.getRegistryResult(stream);
584
+ if (Result.isError(regRes)) return invalidIndexBuild(regRes.error.message);
585
+ const registry = regRes.value;
586
+ const maskByFp = new Map<bigint, number>();
587
+ const pending = segments.slice();
588
+ const concurrency = Math.max(1, Math.min(this.buildConcurrency, pending.length));
589
+ let buildError: string | null = null;
590
+ const workers: Promise<void>[] = [];
591
+ for (let i = 0; i < concurrency; i++) {
592
+ workers.push(
593
+ (async () => {
594
+ for (;;) {
595
+ if (buildError) return;
596
+ const seg = pending.shift();
597
+ if (!seg) return;
598
+ const segBytesRes = await this.loadSegmentBytesResult(seg);
599
+ if (Result.isError(segBytesRes)) {
600
+ buildError = segBytesRes.error.message;
601
+ return;
602
+ }
603
+ const segBytes = segBytesRes.value;
604
+ const bit = seg.segment_index - startSegment;
605
+ const maskBit = 1 << bit;
606
+ const local = new Map<bigint, number>();
607
+ let offset = seg.start_offset;
608
+ let processedRecords = 0;
609
+ for (const recRes of iterateBlockRecordsResult(segBytes)) {
610
+ if (Result.isError(recRes)) {
611
+ buildError = recRes.error.message;
612
+ return;
613
+ }
614
+ let parsed: unknown;
615
+ try {
616
+ parsed = JSON.parse(PAYLOAD_DECODER.decode(recRes.value.payload));
617
+ } catch {
618
+ offset += 1n;
619
+ continue;
620
+ }
621
+ const valuesRes = extractSecondaryIndexValuesForFieldResult(registry, offset, parsed, index);
622
+ if (!Result.isError(valuesRes)) {
623
+ for (const value of valuesRes.value) {
624
+ const fp = siphash24(secret, TERM_ENCODER.encode(value));
625
+ const prev = local.get(fp) ?? 0;
626
+ local.set(fp, prev | maskBit);
627
+ }
628
+ }
629
+ offset += 1n;
630
+ processedRecords += 1;
631
+ if (processedRecords % 64 === 0) {
632
+ await this.yieldBackgroundWork();
633
+ }
634
+ }
635
+ for (const [fp, mask] of local.entries()) {
636
+ const prev = maskByFp.get(fp) ?? 0;
637
+ maskByFp.set(fp, prev | mask);
638
+ }
639
+ local.clear();
640
+ await this.yieldBackgroundWork();
641
+ }
642
+ })()
643
+ );
644
+ }
645
+ await Promise.all(workers);
646
+ if (buildError) return invalidIndexBuild(buildError);
647
+ const fingerprints = Array.from(maskByFp.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
648
+ const masks = fingerprints.map((fp) => maskByFp.get(fp) ?? 0);
649
+ const fuseRes = buildBinaryFuseResult(fingerprints);
650
+ if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
651
+ const shash = streamHash16Hex(stream);
652
+ const endSegment = startSegment + this.span - 1;
653
+ const runId = `${index.name}-l0-${startSegment.toString().padStart(16, "0")}-${endSegment.toString().padStart(16, "0")}-${Date.now()}`;
654
+ return Result.ok({
655
+ meta: {
656
+ runId,
657
+ level: 0,
658
+ startSegment,
659
+ endSegment,
660
+ objectKey: secondaryIndexRunObjectKey(shash, index.name, runId),
661
+ filterLen: fuseRes.value.bytes.byteLength,
662
+ recordCount: fingerprints.length,
663
+ },
664
+ runType: RUN_TYPE_MASK16,
665
+ filterBytes: fuseRes.value.bytes,
666
+ filter: fuseRes.value.filter,
667
+ fingerprints,
668
+ masks,
669
+ });
670
+ }
671
+
672
+ private async gcRetiredRuns(stream: string, indexName: string): Promise<void> {
673
+ const retired = this.db.listRetiredSecondaryIndexRuns(stream, indexName);
674
+ if (retired.length === 0) return;
675
+ const manifest = this.db.getManifestRow(stream);
676
+ const nowMs = this.db.nowMs();
677
+ const cutoffGen =
678
+ this.retireGenWindow > 0 && manifest.generation > this.retireGenWindow
679
+ ? manifest.generation - this.retireGenWindow
680
+ : 0;
681
+ const toDelete: SecondaryIndexRunRow[] = [];
682
+ for (const run of retired) {
683
+ const expiredByGen = run.retired_gen != null && run.retired_gen > 0 && run.retired_gen <= cutoffGen;
684
+ const expiredByTTL = run.retired_at_ms != null && run.retired_at_ms + BigInt(this.retireMinMs) <= nowMs;
685
+ if (expiredByGen || expiredByTTL) toDelete.push(run);
686
+ }
687
+ if (toDelete.length === 0) return;
688
+ for (const run of toDelete) {
689
+ try {
690
+ await this.os.delete(run.object_key);
691
+ } catch {
692
+ // ignore deletion errors
693
+ }
694
+ this.runCache.remove(run.object_key);
695
+ this.runDiskCache?.remove(run.object_key);
696
+ }
697
+ this.db.deleteSecondaryIndexRuns(toDelete.map((run) => run.run_id));
698
+ }
699
+
700
+ private hasCompanionBacklog(stream: string): boolean {
701
+ const plan = this.db.getSearchCompanionPlan(stream);
702
+ if (!plan) return false;
703
+ const uploadedCount = this.db.countUploadedSegments(stream);
704
+ for (let segmentIndex = 0; segmentIndex < uploadedCount; segmentIndex++) {
705
+ const row = this.db.getSearchSegmentCompanion(stream, segmentIndex);
706
+ if (!row || row.plan_generation !== plan.generation) return true;
707
+ }
708
+ return false;
709
+ }
710
+
711
+ private shouldPauseExactBackgroundWork(stream: string): boolean {
712
+ if (this.hasCompanionBacklog(stream)) {
713
+ this.streamIdleTicks.delete(stream);
714
+ return true;
715
+ }
716
+ const streamRow = this.db.getStream(stream);
717
+ if (!streamRow) return false;
718
+ if (streamRow.segment_in_progress !== 0) {
719
+ this.streamIdleTicks.delete(stream);
720
+ return true;
721
+ }
722
+ if (streamRow.pending_bytes > 0n) {
723
+ this.streamIdleTicks.delete(stream);
724
+ return true;
725
+ }
726
+ if (this.db.countSegmentsForStream(stream) > this.db.countUploadedSegments(stream)) {
727
+ this.streamIdleTicks.delete(stream);
728
+ return true;
729
+ }
730
+
731
+ const requiredFlatTicks = Math.max(3, Math.ceil(60_000 / this.cfg.indexCheckIntervalMs));
732
+ const previous = this.streamIdleTicks.get(stream) ?? {
733
+ logicalSizeBytes: -1n,
734
+ nextOffset: -1n,
735
+ flatTicks: 0,
736
+ };
737
+ if (previous.logicalSizeBytes === streamRow.logical_size_bytes && previous.nextOffset === streamRow.next_offset) {
738
+ previous.flatTicks += 1;
739
+ } else {
740
+ previous.logicalSizeBytes = streamRow.logical_size_bytes;
741
+ previous.nextOffset = streamRow.next_offset;
742
+ previous.flatTicks = 0;
743
+ }
744
+ this.streamIdleTicks.set(stream, previous);
745
+ return previous.flatTicks < requiredFlatTicks;
746
+ }
747
+
748
+ private async persistRunResult(run: IndexRun): Promise<Result<number, SecondaryIndexBuildError>> {
749
+ const payloadRes = encodeIndexRunResult(run);
750
+ if (Result.isError(payloadRes)) return invalidIndexBuild(payloadRes.error.message);
751
+ try {
752
+ await retry(
753
+ () => this.os.put(run.meta.objectKey, payloadRes.value, { contentLength: payloadRes.value.byteLength }),
754
+ {
755
+ retries: this.cfg.objectStoreRetries,
756
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
757
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
758
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
759
+ }
760
+ );
761
+ } catch (e: unknown) {
762
+ return invalidIndexBuild(String((e as any)?.message ?? e));
763
+ }
764
+ this.runDiskCache?.put(run.meta.objectKey, payloadRes.value);
765
+ this.runCache.put(run.meta.objectKey, run, payloadRes.value.byteLength);
766
+ return Result.ok(payloadRes.value.byteLength);
767
+ }
768
+
769
+ private async loadRunResult(meta: SecondaryIndexRunRow): Promise<Result<IndexRun | null, SecondaryIndexBuildError>> {
770
+ const cached = this.runCache.get(meta.object_key);
771
+ if (cached) return Result.ok(cached);
772
+ let bytes: Uint8Array | null = null;
773
+ if (this.runDiskCache) {
774
+ try {
775
+ bytes = this.runDiskCache.get(meta.object_key);
776
+ } catch {
777
+ this.runDiskCache.remove(meta.object_key);
778
+ }
779
+ }
780
+ if (!bytes) {
781
+ try {
782
+ bytes = await retry(
783
+ async () => {
784
+ const data = await this.os.get(meta.object_key);
785
+ if (!data) throw dsError(`missing secondary index run ${meta.object_key}`);
786
+ return data;
787
+ },
788
+ {
789
+ retries: this.cfg.objectStoreRetries,
790
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
791
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
792
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
793
+ }
794
+ );
795
+ } catch (e: unknown) {
796
+ return invalidIndexBuild(String((e as any)?.message ?? e));
797
+ }
798
+ this.runDiskCache?.put(meta.object_key, bytes);
799
+ }
800
+ const decodeRes = decodeIndexRunResult(bytes);
801
+ if (Result.isError(decodeRes)) return invalidIndexBuild(decodeRes.error.message);
802
+ this.runCache.put(meta.object_key, decodeRes.value, meta.size_bytes);
803
+ return Result.ok(decodeRes.value);
804
+ }
805
+
806
+ private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, SecondaryIndexBuildError>> {
807
+ try {
808
+ const data = await loadSegmentBytesCached(
809
+ this.os,
810
+ seg,
811
+ this.segmentCache,
812
+ {
813
+ retries: this.cfg.objectStoreRetries,
814
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
815
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
816
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
817
+ }
818
+ );
819
+ return Result.ok(data);
820
+ } catch (e: unknown) {
821
+ return invalidIndexBuild(String((e as any)?.message ?? e));
822
+ }
823
+ }
824
+ }