@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
@@ -5,7 +5,8 @@ import type { Config } from "../config";
5
5
  import type { IndexRunRow, SegmentRow, SqliteDurableStore } from "../db/db";
6
6
  import type { ObjectStore } from "../objectstore/interface";
7
7
  import { SegmentDiskCache } from "../segment/cache";
8
- import { iterateBlocksResult } from "../segment/format";
8
+ import { loadSegmentBytesCached } from "../segment/cached_segment";
9
+ import { iterateBlockRecordsResult } from "../segment/format";
9
10
  import { siphash24 } from "../util/siphash";
10
11
  import { retry } from "../util/retry";
11
12
  import { indexRunObjectKey, segmentObjectKey, streamHash16Hex } from "../util/stream_paths";
@@ -14,9 +15,46 @@ import { decodeIndexRunResult, encodeIndexRunResult, RUN_TYPE_MASK16, RUN_TYPE_P
14
15
  import { IndexRunCache } from "./run_cache";
15
16
  import type { Metrics } from "../metrics";
16
17
  import { dsError } from "../util/ds_error.ts";
18
+ import { yieldToEventLoop } from "../util/yield";
19
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler";
20
+ import { ConcurrencyGate } from "../concurrency_gate";
21
+ import type { ForegroundActivityTracker } from "../foreground_activity";
22
+ import type { AggSectionView } from "../search/agg_format";
23
+ import type { ColSectionView } from "../search/col_format";
24
+ import type { FtsSectionView } from "../search/fts_format";
25
+ import type { MetricsBlockSectionView } from "../profiles/metrics/block_format";
26
+ import type { SchemaRegistryStore } from "../schema/registry";
27
+ import type { RoutingKeyLexiconListResult } from "./lexicon_indexer";
17
28
 
18
29
  export type IndexCandidate = { segments: Set<number>; indexedThrough: number };
19
30
  type IndexBuildError = { kind: "invalid_index_build"; message: string };
31
+ export type CompanionSectionLookupStats = {
32
+ sectionGetMs: number;
33
+ decodeMs: number;
34
+ };
35
+
36
+ export type StreamIndexLookup = {
37
+ start(): void;
38
+ stop(): void;
39
+ enqueue(stream: string): void;
40
+ candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise<IndexCandidate | null>;
41
+ candidateSegmentsForSecondaryIndex(stream: string, indexName: string, keyBytes: Uint8Array): Promise<IndexCandidate | null>;
42
+ getAggSegmentCompanion(stream: string, segmentIndex: number): Promise<AggSectionView | null>;
43
+ getColSegmentCompanion(stream: string, segmentIndex: number): Promise<ColSectionView | null>;
44
+ getFtsSegmentCompanion(stream: string, segmentIndex: number): Promise<FtsSectionView | null>;
45
+ getFtsSegmentCompanionWithStats?(
46
+ stream: string,
47
+ segmentIndex: number
48
+ ): Promise<{ companion: FtsSectionView | null; stats: CompanionSectionLookupStats }>;
49
+ getMetricsBlockSegmentCompanion(stream: string, segmentIndex: number): Promise<MetricsBlockSectionView | null>;
50
+ listRoutingKeysResult?(stream: string, after: string | null, limit: number): Promise<Result<RoutingKeyLexiconListResult, { kind: string; message: string }>>;
51
+ getLocalStorageUsage?(stream: string): {
52
+ routing_index_cache_bytes: number;
53
+ exact_index_cache_bytes: number;
54
+ companion_cache_bytes: number;
55
+ lexicon_index_cache_bytes: number;
56
+ };
57
+ };
20
58
 
21
59
  function invalidIndexBuild<T = never>(message: string): Result<T, IndexBuildError> {
22
60
  return Result.err({ kind: "invalid_index_build", message });
@@ -54,6 +92,11 @@ export class IndexManager {
54
92
  private timer: any | null = null;
55
93
  private running = false;
56
94
  private readonly publishManifest?: (stream: string) => Promise<void>;
95
+ private readonly onMetadataChanged?: (stream: string) => void;
96
+ private readonly memorySampler?: RuntimeMemorySampler;
97
+ private readonly registry?: SchemaRegistryStore;
98
+ private readonly asyncGate: ConcurrencyGate;
99
+ private readonly foregroundActivity?: ForegroundActivityTracker;
57
100
 
58
101
  constructor(
59
102
  cfg: Config,
@@ -61,7 +104,12 @@ export class IndexManager {
61
104
  os: ObjectStore,
62
105
  segmentCache: SegmentDiskCache | undefined,
63
106
  publishManifest?: (stream: string) => Promise<void>,
64
- metrics?: Metrics
107
+ metrics?: Metrics,
108
+ onMetadataChanged?: (stream: string) => void,
109
+ memorySampler?: RuntimeMemorySampler,
110
+ registry?: SchemaRegistryStore,
111
+ asyncGate?: ConcurrencyGate,
112
+ foregroundActivity?: ForegroundActivityTracker
65
113
  ) {
66
114
  this.cfg = cfg;
67
115
  this.db = db;
@@ -76,10 +124,23 @@ export class IndexManager {
76
124
  this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
77
125
  this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
78
126
  this.metrics = metrics;
127
+ this.onMetadataChanged = onMetadataChanged;
128
+ this.memorySampler = memorySampler;
129
+ this.registry = registry;
130
+ this.asyncGate = asyncGate ?? new ConcurrencyGate(1);
131
+ this.foregroundActivity = foregroundActivity;
79
132
  this.runCache = new IndexRunCache(cfg.indexRunMemoryCacheBytes);
80
133
  this.runDiskCache = cfg.indexRunCacheMaxBytes > 0 ? new SegmentDiskCache(`${cfg.rootDir}/cache/index`, cfg.indexRunCacheMaxBytes) : undefined;
81
134
  }
82
135
 
136
+ private async yieldBackgroundWork(): Promise<void> {
137
+ if (this.foregroundActivity) {
138
+ await this.foregroundActivity.yieldBackgroundWork();
139
+ return;
140
+ }
141
+ await yieldToEventLoop();
142
+ }
143
+
83
144
  start(): void {
84
145
  if (this.span <= 0) return;
85
146
  if (this.timer) return;
@@ -98,8 +159,9 @@ export class IndexManager {
98
159
  this.queue.add(stream);
99
160
  }
100
161
 
101
- async candidateSegments(stream: string, keyBytes: Uint8Array): Promise<IndexCandidate | null> {
162
+ async candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise<IndexCandidate | null> {
102
163
  if (this.span <= 0) return null;
164
+ if (!this.isRoutingConfigured(stream)) return null;
103
165
  const state = this.db.getIndexState(stream);
104
166
  if (!state) return null;
105
167
  const runs = this.db.listIndexRuns(stream);
@@ -131,6 +193,53 @@ export class IndexManager {
131
193
  return { segments, indexedThrough: state.indexed_through };
132
194
  }
133
195
 
196
+ async candidateSegmentsForSecondaryIndex(_stream: string, _indexName: string, _keyBytes: Uint8Array): Promise<IndexCandidate | null> {
197
+ return null;
198
+ }
199
+
200
+ async getColSegmentCompanion(_stream: string, _segmentIndex: number): Promise<ColSectionView | null> {
201
+ return null;
202
+ }
203
+
204
+ async getAggSegmentCompanion(_stream: string, _segmentIndex: number): Promise<AggSectionView | null> {
205
+ return null;
206
+ }
207
+
208
+ async getFtsSegmentCompanion(_stream: string, _segmentIndex: number): Promise<FtsSectionView | null> {
209
+ return null;
210
+ }
211
+
212
+ async getMetricsBlockSegmentCompanion(_stream: string, _segmentIndex: number): Promise<MetricsBlockSectionView | null> {
213
+ return null;
214
+ }
215
+
216
+ getLocalCacheBytes(stream: string): number {
217
+ if (!this.runDiskCache) return 0;
218
+ return this.runDiskCache.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/index/`);
219
+ }
220
+
221
+ getMemoryStats(): {
222
+ runCacheBytes: number;
223
+ runCacheEntries: number;
224
+ runDiskCacheBytes: number;
225
+ runDiskCacheEntries: number;
226
+ runDiskMappedBytes: number;
227
+ runDiskMappedEntries: number;
228
+ runDiskPinnedEntries: number;
229
+ } {
230
+ const mem = this.runCache.stats();
231
+ const disk = this.runDiskCache?.stats();
232
+ return {
233
+ runCacheBytes: mem.usedBytes,
234
+ runCacheEntries: mem.entries,
235
+ runDiskCacheBytes: disk?.usedBytes ?? 0,
236
+ runDiskCacheEntries: disk?.entryCount ?? 0,
237
+ runDiskMappedBytes: disk?.mappedBytes ?? 0,
238
+ runDiskMappedEntries: disk?.mappedEntryCount ?? 0,
239
+ runDiskPinnedEntries: disk?.pinnedEntryCount ?? 0,
240
+ };
241
+ }
242
+
134
243
  private async tick(): Promise<void> {
135
244
  if (this.running) return;
136
245
  this.running = true;
@@ -142,6 +251,21 @@ export class IndexManager {
142
251
  const streams = Array.from(this.queue);
143
252
  this.queue.clear();
144
253
  for (const stream of streams) {
254
+ if (!this.isRoutingConfigured(stream)) {
255
+ const hadRoutingState = !!this.db.getIndexState(stream) || this.db.listIndexRunsAll(stream).length > 0;
256
+ if (hadRoutingState) {
257
+ this.db.deleteIndex(stream);
258
+ this.onMetadataChanged?.(stream);
259
+ if (this.publishManifest) {
260
+ try {
261
+ await this.publishManifest(stream);
262
+ } catch {
263
+ // ignore and retry on next enqueue
264
+ }
265
+ }
266
+ }
267
+ continue;
268
+ }
145
269
  try {
146
270
  const buildRes = await this.maybeBuildRuns(stream);
147
271
  if (Result.isError(buildRes)) {
@@ -179,41 +303,43 @@ export class IndexManager {
179
303
  if (this.building.has(stream)) return Result.ok(undefined);
180
304
  this.building.add(stream);
181
305
  try {
182
- let state = this.db.getIndexState(stream);
183
- if (!state) {
184
- const secret = randomBytes(16);
185
- this.db.upsertIndexState(stream, secret, 0);
186
- state = this.db.getIndexState(stream);
187
- }
188
- if (!state) return Result.ok(undefined);
189
- if (this.metrics) {
190
- const lag = Math.max(0, this.db.countUploadedSegments(stream) - state.indexed_through);
191
- this.metrics.record("tieredstore.index.lag.segments", lag, "count", undefined, stream);
192
- }
193
- let indexedThrough = state.indexed_through;
194
- for (;;) {
306
+ return await this.asyncGate.run(async () => {
307
+ let state = this.db.getIndexState(stream);
308
+ if (!state) {
309
+ const secret = randomBytes(16);
310
+ this.db.upsertIndexState(stream, secret, 0);
311
+ state = this.db.getIndexState(stream);
312
+ }
313
+ if (!state) return Result.ok(undefined);
314
+ if (this.metrics) {
315
+ const lag = Math.max(0, this.db.countUploadedSegments(stream) - state.indexed_through);
316
+ this.metrics.record("tieredstore.index.lag.segments", lag, "count", undefined, stream);
317
+ }
318
+ const indexedThrough = state.indexed_through;
195
319
  const uploadedCount = this.db.countUploadedSegments(stream);
196
320
  if (uploadedCount < indexedThrough + this.span) return Result.ok(undefined);
197
321
  const start = indexedThrough;
198
322
  const end = start + this.span - 1;
199
323
  const segments: SegmentRow[] = [];
200
- let ok = true;
201
324
  for (let i = start; i <= end; i++) {
202
325
  const seg = this.db.getSegmentByIndex(stream, i);
203
- if (!seg || !seg.r2_etag) {
204
- ok = false;
205
- break;
206
- }
326
+ if (!seg || !seg.r2_etag) return Result.ok(undefined);
207
327
  segments.push(seg);
208
328
  }
209
- if (!ok) return Result.ok(undefined);
210
329
  const t0 = Date.now();
211
- const runRes = await this.buildL0RunResult(stream, start, segments, state.index_secret);
330
+ const runRes = this.memorySampler
331
+ ? await this.memorySampler.track(
332
+ "routing_l0",
333
+ { stream, start_segment: start, end_segment: end },
334
+ () => this.buildL0RunResult(stream, start, segments, state.index_secret)
335
+ )
336
+ : await this.buildL0RunResult(stream, start, segments, state.index_secret);
212
337
  if (Result.isError(runRes)) return runRes;
213
338
  const run = runRes.value;
214
339
  const elapsedNs = BigInt(Date.now() - t0) * 1_000_000n;
215
340
  const persistRes = await this.persistRunResult(run, stream);
216
341
  if (Result.isError(persistRes)) return persistRes;
342
+ const sizeBytes = persistRes.value;
217
343
  this.db.insertIndexRun({
218
344
  run_id: run.meta.runId,
219
345
  stream,
@@ -221,6 +347,7 @@ export class IndexManager {
221
347
  start_segment: run.meta.startSegment,
222
348
  end_segment: run.meta.endSegment,
223
349
  object_key: run.meta.objectKey,
350
+ size_bytes: sizeBytes,
224
351
  filter_len: run.meta.filterLen,
225
352
  record_count: run.meta.recordCount,
226
353
  });
@@ -229,9 +356,10 @@ export class IndexManager {
229
356
  this.metrics.record("tieredstore.index.runs.built", 1, "count", { level: String(run.meta.level) }, stream);
230
357
  this.recordActiveRuns(stream);
231
358
  }
232
- indexedThrough = end + 1;
233
- this.db.updateIndexedThrough(stream, indexedThrough);
234
- state.indexed_through = indexedThrough;
359
+ const nextIndexedThrough = end + 1;
360
+ this.db.updateIndexedThrough(stream, nextIndexedThrough);
361
+ state.indexed_through = nextIndexedThrough;
362
+ this.onMetadataChanged?.(stream);
235
363
  if (this.publishManifest) {
236
364
  try {
237
365
  await this.publishManifest(stream);
@@ -239,7 +367,9 @@ export class IndexManager {
239
367
  // ignore manifest publish errors; will be retried by uploader/indexer
240
368
  }
241
369
  }
242
- }
370
+ if (this.db.countUploadedSegments(stream) >= nextIndexedThrough + this.span) this.queue.add(stream);
371
+ return Result.ok(undefined);
372
+ });
243
373
  } finally {
244
374
  this.building.delete(stream);
245
375
  }
@@ -249,9 +379,13 @@ export class IndexManager {
249
379
  if (this.span <= 0) return Result.ok(undefined);
250
380
  if (this.compactionFanout <= 1) return Result.ok(undefined);
251
381
  if (this.compacting.has(stream)) return Result.ok(undefined);
382
+ if (this.foregroundActivity?.wasActiveWithin(2000)) {
383
+ this.queue.add(stream);
384
+ return Result.ok(undefined);
385
+ }
252
386
  this.compacting.add(stream);
253
387
  try {
254
- for (;;) {
388
+ return await this.asyncGate.run(async () => {
255
389
  const group = this.findCompactionGroup(stream);
256
390
  if (!group) {
257
391
  await this.gcRetiredRuns(stream);
@@ -265,6 +399,7 @@ export class IndexManager {
265
399
  const elapsedNs = BigInt(Date.now() - t0) * 1_000_000n;
266
400
  const persistRes = await this.persistRunResult(run, stream);
267
401
  if (Result.isError(persistRes)) return persistRes;
402
+ const sizeBytes = persistRes.value;
268
403
  this.db.insertIndexRun({
269
404
  run_id: run.meta.runId,
270
405
  stream,
@@ -272,6 +407,7 @@ export class IndexManager {
272
407
  start_segment: run.meta.startSegment,
273
408
  end_segment: run.meta.endSegment,
274
409
  object_key: run.meta.objectKey,
410
+ size_bytes: sizeBytes,
275
411
  filter_len: run.meta.filterLen,
276
412
  record_count: run.meta.recordCount,
277
413
  });
@@ -288,6 +424,7 @@ export class IndexManager {
288
424
  retiredGen,
289
425
  nowMs
290
426
  );
427
+ this.onMetadataChanged?.(stream);
291
428
  if (this.metrics) {
292
429
  this.metrics.record("tieredstore.index.compact.latency", Number(elapsedNs), "ns", { level: String(run.meta.level) }, stream);
293
430
  this.metrics.record("tieredstore.index.runs.compacted", 1, "count", { level: String(run.meta.level) }, stream);
@@ -305,7 +442,9 @@ export class IndexManager {
305
442
  }
306
443
  }
307
444
  await this.gcRetiredRuns(stream);
308
- }
445
+ this.queue.add(stream);
446
+ return Result.ok(undefined);
447
+ });
309
448
  } finally {
310
449
  this.compacting.delete(stream);
311
450
  }
@@ -353,18 +492,39 @@ export class IndexManager {
353
492
  inputs: IndexRunRow[]
354
493
  ): Promise<Result<IndexRun, IndexBuildError>> {
355
494
  if (inputs.length === 0) return invalidIndexBuild("compact: missing inputs");
356
- const segments = new Map<bigint, Set<number>>();
495
+ const segments = new Map<bigint, number[]>();
357
496
  const addSegment = (fp: bigint, seg: number) => {
358
- let set = segments.get(fp);
359
- if (!set) {
360
- set = new Set<number>();
361
- segments.set(fp, set);
497
+ let list = segments.get(fp);
498
+ if (!list) {
499
+ list = [];
500
+ segments.set(fp, list);
362
501
  }
363
- set.add(seg);
502
+ list.push(seg);
503
+ };
504
+ const mergeRun = (meta: IndexRunRow, run: IndexRun): void => {
505
+ if (run.runType === RUN_TYPE_MASK16 && run.masks) {
506
+ for (let i = 0; i < run.fingerprints.length; i++) {
507
+ const fp = run.fingerprints[i];
508
+ const mask = run.masks[i];
509
+ for (let bit = 0; bit < 16; bit++) {
510
+ if ((mask & (1 << bit)) === 0) continue;
511
+ addSegment(fp, meta.start_segment + bit);
512
+ }
513
+ }
514
+ return;
515
+ }
516
+ if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
517
+ for (let i = 0; i < run.fingerprints.length; i++) {
518
+ const fp = run.fingerprints[i];
519
+ const postings = run.postings[i];
520
+ for (const rel of postings) addSegment(fp, meta.start_segment + rel);
521
+ }
522
+ return;
523
+ }
524
+ throw dsError(`unknown run type ${run.runType}`);
364
525
  };
365
526
 
366
527
  const pending = inputs.slice();
367
- const results: Array<{ meta: IndexRunRow; run: IndexRun }> = [];
368
528
  const workers = Math.min(this.compactionConcurrency, pending.length);
369
529
  let buildError: string | null = null;
370
530
  const workerTasks: Promise<void>[] = [];
@@ -385,7 +545,13 @@ export class IndexManager {
385
545
  buildError = `missing run ${meta.run_id}`;
386
546
  return;
387
547
  }
388
- results.push({ meta, run });
548
+ try {
549
+ mergeRun(meta, run);
550
+ } catch (e: unknown) {
551
+ buildError = errorMessage(e);
552
+ return;
553
+ }
554
+ await this.yieldBackgroundWork();
389
555
  }
390
556
  })()
391
557
  );
@@ -393,45 +559,22 @@ export class IndexManager {
393
559
  await Promise.all(workerTasks);
394
560
  if (buildError) return invalidIndexBuild(buildError);
395
561
 
396
- for (const res of results) {
397
- const run = res.run;
398
- const meta = res.meta;
399
- if (run.runType === RUN_TYPE_MASK16 && run.masks) {
400
- for (let i = 0; i < run.fingerprints.length; i++) {
401
- const fp = run.fingerprints[i];
402
- const mask = run.masks[i];
403
- for (let bit = 0; bit < 16; bit++) {
404
- if ((mask & (1 << bit)) === 0) continue;
405
- addSegment(fp, meta.start_segment + bit);
406
- }
407
- }
408
- } else if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
409
- for (let i = 0; i < run.fingerprints.length; i++) {
410
- const fp = run.fingerprints[i];
411
- const postings = run.postings[i];
412
- for (const rel of postings) addSegment(fp, meta.start_segment + rel);
413
- }
414
- } else {
415
- return invalidIndexBuild(`unknown run type ${run.runType}`);
416
- }
417
- }
418
-
419
562
  const startSegment = inputs[0].start_segment;
420
563
  const endSegment = inputs[inputs.length - 1].end_segment;
421
- const pairs = Array.from(segments.entries())
422
- .map(([fp, set]) => {
423
- const list = Array.from(set);
424
- list.sort((a, b) => a - b);
425
- const rel = list.map((seg) => seg - startSegment);
426
- return { fp, rel };
427
- })
428
- .sort((a, b) => (a.fp < b.fp ? -1 : a.fp > b.fp ? 1 : 0));
429
-
430
- const fingerprints: bigint[] = [];
431
- const postings: number[][] = [];
432
- for (const p of pairs) {
433
- fingerprints.push(p.fp);
434
- postings.push(p.rel);
564
+ const fingerprints = Array.from(segments.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
565
+ const postings: number[][] = new Array(fingerprints.length);
566
+ for (let i = 0; i < fingerprints.length; i++) {
567
+ const fp = fingerprints[i]!;
568
+ const list = segments.get(fp) ?? [];
569
+ list.sort((a, b) => a - b);
570
+ const rel: number[] = [];
571
+ let lastSeg = Number.NaN;
572
+ for (const seg of list) {
573
+ if (seg === lastSeg) continue;
574
+ rel.push(seg - startSegment);
575
+ lastSeg = seg;
576
+ }
577
+ postings[i] = rel;
435
578
  }
436
579
 
437
580
  const fuseRes = buildBinaryFuseResult(fingerprints);
@@ -492,7 +635,6 @@ export class IndexManager {
492
635
  const maskByFp = new Map<bigint, number>();
493
636
  const pending = segments.slice();
494
637
  const concurrency = Math.max(1, Math.min(this.buildConcurrency, pending.length));
495
- const results: Array<Map<bigint, number>> = [];
496
638
  let buildError: string | null = null;
497
639
  const workers: Promise<void>[] = [];
498
640
  for (let i = 0; i < concurrency; i++) {
@@ -511,35 +653,35 @@ export class IndexManager {
511
653
  const bit = seg.segment_index - startSegment;
512
654
  const maskBit = 1 << bit;
513
655
  const local = new Map<bigint, number>();
514
- for (const blockRes of iterateBlocksResult(segBytes)) {
515
- if (Result.isError(blockRes)) {
516
- buildError = blockRes.error.message;
656
+ let processedRecords = 0;
657
+ for (const recRes of iterateBlockRecordsResult(segBytes)) {
658
+ if (Result.isError(recRes)) {
659
+ buildError = recRes.error.message;
517
660
  return;
518
661
  }
519
- const { decoded } = blockRes.value;
520
- for (const rec of decoded.records) {
521
- if (rec.routingKey.byteLength === 0) continue;
522
- const fp = siphash24(secret, rec.routingKey);
523
- const prev = local.get(fp) ?? 0;
524
- local.set(fp, prev | maskBit);
662
+ if (recRes.value.routingKey.byteLength === 0) continue;
663
+ const fp = siphash24(secret, recRes.value.routingKey);
664
+ const prev = local.get(fp) ?? 0;
665
+ local.set(fp, prev | maskBit);
666
+ processedRecords += 1;
667
+ if (processedRecords % 256 === 0) {
668
+ await this.yieldBackgroundWork();
525
669
  }
526
670
  }
527
- results.push(local);
671
+ for (const [fp, mask] of local.entries()) {
672
+ const prev = maskByFp.get(fp) ?? 0;
673
+ maskByFp.set(fp, prev | mask);
674
+ }
675
+ local.clear();
676
+ await this.yieldBackgroundWork();
528
677
  }
529
678
  })()
530
679
  );
531
680
  }
532
681
  await Promise.all(workers);
533
682
  if (buildError) return invalidIndexBuild(buildError);
534
- for (const local of results) {
535
- for (const [fp, mask] of local.entries()) {
536
- const prev = maskByFp.get(fp) ?? 0;
537
- maskByFp.set(fp, prev | mask);
538
- }
539
- }
540
- const entries = Array.from(maskByFp.entries()).sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0));
541
- const fingerprints = entries.map(([fp]) => fp);
542
- const masks = entries.map(([, mask]) => mask);
683
+ const fingerprints = Array.from(maskByFp.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
684
+ const masks = fingerprints.map((fp) => maskByFp.get(fp) ?? 0);
543
685
  const fuseRes = buildBinaryFuseResult(fingerprints);
544
686
  if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
545
687
  const { filter, bytes } = fuseRes.value;
@@ -566,7 +708,17 @@ export class IndexManager {
566
708
  return Result.ok(run);
567
709
  }
568
710
 
569
- private async persistRunResult(run: IndexRun, stream?: string): Promise<Result<void, IndexBuildError>> {
711
+ private isRoutingConfigured(stream: string): boolean {
712
+ const streamRow = this.db.getStream(stream);
713
+ const contentType = streamRow?.content_type.split(";")[0]?.trim().toLowerCase() ?? null;
714
+ if (contentType != null && contentType !== "application/json") return true;
715
+ if (!this.registry) return false;
716
+ const regRes = this.registry.getRegistryResult(stream);
717
+ if (Result.isError(regRes)) return false;
718
+ return !!regRes.value.routingKey;
719
+ }
720
+
721
+ private async persistRunResult(run: IndexRun, stream?: string): Promise<Result<number, IndexBuildError>> {
570
722
  const payloadRes = encodeIndexRunResult(run);
571
723
  if (Result.isError(payloadRes)) return invalidIndexBuild(payloadRes.error.message);
572
724
  const payload = payloadRes.value;
@@ -587,8 +739,8 @@ export class IndexManager {
587
739
  return invalidIndexBuild(String(e?.message ?? e));
588
740
  }
589
741
  this.runDiskCache?.put(run.meta.objectKey, payload);
590
- this.runCache.put(run.meta.objectKey, run);
591
- return Result.ok(undefined);
742
+ this.runCache.put(run.meta.objectKey, run, payload.byteLength);
743
+ return Result.ok(payload.byteLength);
592
744
  }
593
745
 
594
746
  private async loadRunResult(meta: IndexRunRow): Promise<Result<IndexRun | null, IndexBuildError>> {
@@ -638,38 +790,16 @@ export class IndexManager {
638
790
  run.meta.endSegment = meta.end_segment;
639
791
  run.meta.filterLen = meta.filter_len;
640
792
  run.meta.recordCount = meta.record_count;
641
- this.runCache.put(meta.object_key, run);
793
+ this.runCache.put(meta.object_key, run, meta.size_bytes);
642
794
  return Result.ok(run);
643
795
  }
644
796
 
645
797
  private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, IndexBuildError>> {
646
- if (seg.local_path && seg.local_path.length > 0) {
647
- try {
648
- return Result.ok(new Uint8Array(readFileSync(seg.local_path)));
649
- } catch {
650
- // fall through
651
- }
652
- }
653
- const diskCache = this.segmentCache;
654
- const key = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
655
- if (diskCache && diskCache.has(key)) {
656
- diskCache.recordHit();
657
- diskCache.touch(key);
658
- try {
659
- return Result.ok(new Uint8Array(readFileSync(diskCache.getPath(key))));
660
- } catch {
661
- diskCache.remove(key);
662
- }
663
- }
664
- if (diskCache) diskCache.recordMiss();
665
798
  try {
666
- const data = await retry(
667
- async () => {
668
- const objectBytes = await this.os.get(key);
669
- if (!objectBytes) throw dsError(`missing segment ${seg.segment_id}`);
670
- if (diskCache) diskCache.put(key, objectBytes);
671
- return objectBytes;
672
- },
799
+ const data = await loadSegmentBytesCached(
800
+ this.os,
801
+ seg,
802
+ this.segmentCache,
673
803
  {
674
804
  retries: this.cfg.objectStoreRetries,
675
805
  baseDelayMs: this.cfg.objectStoreBaseDelayMs,