@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
package/src/app_local.ts CHANGED
@@ -3,9 +3,15 @@ import { createAppCore, type App } from "./app_core";
3
3
  import type { ObjectStore } from "./objectstore/interface";
4
4
  import { NullObjectStore } from "./objectstore/null";
5
5
  import { StreamReader } from "./reader";
6
+ import type { StreamIndexLookup } from "./index/indexer";
7
+ import type { RoutingKeyLexiconListResult } from "./index/lexicon_indexer";
6
8
  import type { StatsCollector } from "./stats";
7
- import type { UploaderController } from "./uploader";
9
+ import type { UploaderController, UploaderHooks } from "./uploader";
8
10
  import type { SegmenterController } from "./segment/segmenter_workers";
11
+ import { readSqliteRuntimeMemoryStats } from "./sqlite/runtime_stats";
12
+ import { Result } from "better-result";
13
+
14
+ const TEXT_DECODER = new TextDecoder();
9
15
 
10
16
  class NoopUploader implements UploaderController {
11
17
  start(): void {}
@@ -13,7 +19,7 @@ class NoopUploader implements UploaderController {
13
19
  countSegmentsWaiting(): number {
14
20
  return 0;
15
21
  }
16
- setHooks(_hooks: { onSegmentsUploaded?: (stream: string) => void } | undefined): void {}
22
+ setHooks(_hooks: UploaderHooks | undefined): void {}
17
23
  async publishManifest(_stream: string): Promise<void> {}
18
24
  }
19
25
 
@@ -22,6 +28,97 @@ const noopSegmenter: SegmenterController = {
22
28
  stop(_hard?: boolean): void {},
23
29
  };
24
30
 
31
+ class LocalIndexLookup implements StreamIndexLookup {
32
+ constructor(private readonly db: App["deps"]["db"]) {}
33
+
34
+ start(): void {}
35
+
36
+ stop(): void {}
37
+
38
+ enqueue(_stream: string): void {}
39
+
40
+ async candidateSegmentsForRoutingKey(_stream: string, _keyBytes: Uint8Array): Promise<null> {
41
+ return null;
42
+ }
43
+
44
+ async candidateSegmentsForSecondaryIndex(_stream: string, _indexName: string, _keyBytes: Uint8Array): Promise<null> {
45
+ return null;
46
+ }
47
+
48
+ async getAggSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
49
+ return null;
50
+ }
51
+
52
+ async getColSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
53
+ return null;
54
+ }
55
+
56
+ async getFtsSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
57
+ return null;
58
+ }
59
+
60
+ async getMetricsBlockSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
61
+ return null;
62
+ }
63
+
64
+ async listRoutingKeysResult(
65
+ stream: string,
66
+ after: string | null,
67
+ limit: number
68
+ ): Promise<Result<RoutingKeyLexiconListResult, { kind: string; message: string }>> {
69
+ const srow = this.db.getStream(stream);
70
+ if (!srow || this.db.isDeleted(srow)) {
71
+ return Result.err({ kind: "invalid_lexicon_index", message: "stream not found" });
72
+ }
73
+ const safeLimit = Math.max(1, Math.min(limit, 500));
74
+ const keys = new Set<string>();
75
+ let scannedWalRows = 0;
76
+ for (const rec of this.db.iterWalRange(stream, 0n, srow.next_offset - 1n)) {
77
+ scannedWalRows += 1;
78
+ const rawKey = rec.routing_key == null ? null : rec.routing_key instanceof Uint8Array ? rec.routing_key : new Uint8Array(rec.routing_key);
79
+ if (!rawKey || rawKey.byteLength === 0) continue;
80
+ keys.add(TEXT_DECODER.decode(rawKey));
81
+ }
82
+ const sorted = Array.from(keys).sort();
83
+ const filtered = after == null ? sorted : sorted.filter((key) => key > after);
84
+ const page = filtered.slice(0, safeLimit);
85
+ const nextAfter = filtered.length > safeLimit ? page[page.length - 1] ?? null : null;
86
+ return Result.ok({
87
+ keys: page,
88
+ nextAfter,
89
+ tookMs: 0,
90
+ coverage: {
91
+ complete: true,
92
+ indexedSegments: 0,
93
+ scannedUploadedSegments: 0,
94
+ scannedLocalSegments: 0,
95
+ scannedWalRows,
96
+ possibleMissingUploadedSegments: 0,
97
+ possibleMissingLocalSegments: 0,
98
+ },
99
+ timing: {
100
+ lexiconRunGetMs: 0,
101
+ lexiconDecodeMs: 0,
102
+ lexiconEnumerateMs: 0,
103
+ lexiconMergeMs: 0,
104
+ fallbackScanMs: 0,
105
+ fallbackSegmentGetMs: 0,
106
+ fallbackWalScanMs: 0,
107
+ lexiconRunsLoaded: 0,
108
+ },
109
+ });
110
+ }
111
+
112
+ getLocalStorageUsage(_stream: string) {
113
+ return {
114
+ routing_index_cache_bytes: 0,
115
+ exact_index_cache_bytes: 0,
116
+ companion_cache_bytes: 0,
117
+ lexicon_index_cache_bytes: 0,
118
+ };
119
+ }
120
+ }
121
+
25
122
  export type CreateLocalAppOptions = {
26
123
  stats?: StatsCollector;
27
124
  };
@@ -29,16 +126,59 @@ export type CreateLocalAppOptions = {
29
126
  export function createLocalApp(cfg: Config, os?: ObjectStore, opts: CreateLocalAppOptions = {}): App {
30
127
  return createAppCore(cfg, {
31
128
  stats: opts.stats,
32
- createRuntime: ({ config, db }) => {
129
+ createRuntime: ({ config, db, registry, memorySampler, memory }) => {
33
130
  const store = os ?? new NullObjectStore();
34
- const reader = new StreamReader(config, db, store);
131
+ const indexer = new LocalIndexLookup(db);
132
+ const reader = new StreamReader(config, db, store, registry, undefined, indexer, memorySampler, memory);
35
133
 
36
134
  return {
37
135
  store,
38
136
  reader,
39
137
  segmenter: noopSegmenter,
40
138
  uploader: new NoopUploader(),
139
+ indexer,
41
140
  uploadSchemaRegistry: async (): Promise<void> => {},
141
+ getRuntimeMemorySnapshot: () => {
142
+ const sqliteRuntime = readSqliteRuntimeMemoryStats();
143
+ return {
144
+ subsystems: {
145
+ heap_estimates: {
146
+ ingest_queue_payload_bytes: 0,
147
+ },
148
+ mapped_files: {},
149
+ disk_caches: {},
150
+ configured_budgets: {
151
+ sqlite_cache_budget_bytes: config.sqliteCacheBytes,
152
+ worker_sqlite_cache_budget_bytes: config.workerSqliteCacheBytes,
153
+ },
154
+ pipeline_buffers: {},
155
+ sqlite_runtime: {
156
+ sqlite_memory_used_bytes: sqliteRuntime.memory_used_bytes,
157
+ sqlite_memory_highwater_bytes: sqliteRuntime.memory_highwater_bytes,
158
+ sqlite_pagecache_overflow_bytes: sqliteRuntime.pagecache_overflow_bytes,
159
+ sqlite_pagecache_overflow_highwater_bytes: sqliteRuntime.pagecache_overflow_highwater_bytes,
160
+ },
161
+ counts: {
162
+ ingest_queue_requests: 0,
163
+ pending_upload_segments: 0,
164
+ sqlite_pagecache_used_slots: sqliteRuntime.pagecache_used_slots,
165
+ sqlite_pagecache_used_slots_highwater: sqliteRuntime.pagecache_used_slots_highwater,
166
+ sqlite_malloc_count: sqliteRuntime.malloc_count,
167
+ sqlite_malloc_count_highwater: sqliteRuntime.malloc_count_highwater,
168
+ sqlite_open_connections: sqliteRuntime.open_connections,
169
+ sqlite_prepared_statements: sqliteRuntime.prepared_statements,
170
+ },
171
+ },
172
+ totals: {
173
+ heap_estimate_bytes: 0,
174
+ mapped_file_bytes: 0,
175
+ disk_cache_bytes: 0,
176
+ configured_budget_bytes: config.sqliteCacheBytes + config.workerSqliteCacheBytes,
177
+ pipeline_buffer_bytes: 0,
178
+ sqlite_runtime_bytes: sqliteRuntime.memory_used_bytes + sqliteRuntime.pagecache_overflow_bytes,
179
+ },
180
+ };
181
+ },
42
182
  start: (): void => {},
43
183
  };
44
184
  },
@@ -0,0 +1,62 @@
1
+ export type AutoTuneConfig = {
2
+ segmentMaxMiB: number;
3
+ segmentTargetRows: number;
4
+ sqliteCacheMb: number;
5
+ workerSqliteCacheMb: number;
6
+ indexMemMb: number;
7
+ lexiconIndexCacheMb: number;
8
+ searchCompanionTocCacheMb: number;
9
+ searchCompanionSectionCacheMb: number;
10
+ ingestBatchMb: number;
11
+ ingestQueueMb: number;
12
+ ingestConcurrency: number;
13
+ readConcurrency: number;
14
+ searchConcurrency: number;
15
+ asyncIndexConcurrency: number;
16
+ indexBuildConcurrency: number;
17
+ indexCompactConcurrency: number;
18
+ segmenterWorkers: number;
19
+ uploadConcurrency: number;
20
+ searchCompanionBatchSegments: number;
21
+ searchCompanionYieldBlocks: number;
22
+ };
23
+
24
+ export const AUTO_TUNE_PRESETS = [256, 512, 1024, 2048, 4096, 8192] as const;
25
+
26
+ export function memoryLimitForPreset(preset: number): number {
27
+ return preset === 256 ? 300 : preset;
28
+ }
29
+
30
+ export function tuneForPreset(p: number): AutoTuneConfig {
31
+ return {
32
+ // Segment geometry is fixed across presets. Smaller hosts still scale
33
+ // concurrency and cache budgets down, but they keep the same 16 MiB /
34
+ // 100k-row seal thresholds so upload throughput is not dominated by many
35
+ // tiny compressed segment objects.
36
+ segmentMaxMiB: 16,
37
+ segmentTargetRows: 100_000,
38
+ sqliteCacheMb: Math.max(8, Math.floor(p / 16)),
39
+ workerSqliteCacheMb: Math.max(8, Math.min(32, Math.floor(p / 128))),
40
+ indexMemMb: Math.max(4, Math.floor(p / 64)),
41
+ lexiconIndexCacheMb: p >= 8192 ? 256 : p >= 4096 ? 128 : p >= 2048 ? 64 : p >= 1024 ? 32 : p >= 512 ? 16 : 8,
42
+ searchCompanionTocCacheMb: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
43
+ searchCompanionSectionCacheMb: p >= 8192 ? 128 : p >= 4096 ? 64 : p >= 2048 ? 32 : p >= 1024 ? 16 : 8,
44
+ // Keep append working sets tighter on <=2 GiB presets because the request path
45
+ // still holds multiple copies of JSON batches while normalizing and queuing.
46
+ ingestBatchMb: p >= 8192 ? 64 : p >= 4096 ? 16 : p >= 2048 ? 8 : p >= 1024 ? 4 : 2,
47
+ ingestQueueMb: p >= 8192 ? 128 : p >= 4096 ? 64 : p >= 2048 ? 32 : p >= 1024 ? 16 : 8,
48
+ ingestConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
49
+ readConcurrency: p >= 8192 ? 16 : p >= 4096 ? 8 : p >= 1024 ? 4 : 2,
50
+ searchConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
51
+ asyncIndexConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
52
+ // Keep <=2 GiB presets single-lane for background work. These hosts do not
53
+ // have enough headroom for append, segment cut, upload, and companion work
54
+ // to overlap aggressively under the GH Archive "all" workload.
55
+ indexBuildConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
56
+ indexCompactConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
57
+ segmenterWorkers: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
58
+ uploadConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
59
+ searchCompanionBatchSegments: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
60
+ searchCompanionYieldBlocks: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
61
+ };
62
+ }
package/src/bootstrap.ts CHANGED
@@ -59,8 +59,11 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
59
59
  const epoch = typeof manifest.epoch === "number" ? manifest.epoch : 0;
60
60
  const nextOffsetNum = typeof manifest.next_offset === "number" ? manifest.next_offset : 0;
61
61
  const nextOffset = BigInt(nextOffsetNum);
62
+ const logicalSizeBytes = parseManifestBigInt(manifest.logical_size_bytes) ?? 0n;
62
63
 
63
64
  const contentType = typeof manifest.content_type === "string" ? manifest.content_type : "application/octet-stream";
65
+ const profile = typeof manifest.profile === "string" && manifest.profile !== "" ? manifest.profile : "generic";
66
+ const profileJson = manifest.profile_json && typeof manifest.profile_json === "object" ? manifest.profile_json : null;
64
67
  const streamSeq = typeof manifest.stream_seq === "string" ? manifest.stream_seq : null;
65
68
  const closed = typeof manifest.closed === "number" ? manifest.closed : 0;
66
69
  const closedProducerId = typeof manifest.closed_producer_id === "string" ? manifest.closed_producer_id : null;
@@ -92,6 +95,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
92
95
  created_at_ms: createdAtMs,
93
96
  updated_at_ms: nowMs,
94
97
  content_type: contentType,
98
+ profile,
95
99
  stream_seq: streamSeq,
96
100
  closed,
97
101
  closed_producer_id: closedProducerId,
@@ -105,6 +109,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
105
109
  uploaded_segment_count: uploadedPrefix,
106
110
  pending_rows: 0n,
107
111
  pending_bytes: 0n,
112
+ logical_size_bytes: logicalSizeBytes,
108
113
  wal_rows: 0n,
109
114
  wal_bytes: 0n,
110
115
  last_append_ms: lastAppendMs,
@@ -113,6 +118,11 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
113
118
  expires_at_ms: expiresAtMs,
114
119
  stream_flags: streamFlags,
115
120
  });
121
+ if (profileJson) {
122
+ db.upsertStreamProfile(stream, JSON.stringify(profileJson));
123
+ } else {
124
+ db.deleteStreamProfile(stream);
125
+ }
116
126
 
117
127
  db.upsertSegmentMeta(stream, segmentCount, segmentOffsetsBytes, segmentBlocksBytes, segmentLastTsBytes);
118
128
 
@@ -121,7 +131,14 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
121
131
  if (!head) throw dsError(`missing manifest head ${mkey}`);
122
132
  return head;
123
133
  }, retryOpts);
124
- db.upsertManifestRow(stream, Number(manifest.generation ?? 0), Number(manifest.generation ?? 0), nowMs, manifestHead?.etag ?? null);
134
+ db.upsertManifestRow(
135
+ stream,
136
+ Number(manifest.generation ?? 0),
137
+ Number(manifest.generation ?? 0),
138
+ nowMs,
139
+ manifestHead?.etag ?? null,
140
+ manifestHead?.size ?? null
141
+ );
125
142
 
126
143
  for (let i = 0; i < segmentCount; i++) {
127
144
  const startOffset = i === 0 ? 0n : segmentOffsets[i - 1];
@@ -145,6 +162,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
145
162
  endOffset,
146
163
  blockCount: segmentBlocks[i],
147
164
  lastAppendMs: lastTsMs,
165
+ payloadBytes: 0n,
148
166
  sizeBytes: head.size,
149
167
  localPath,
150
168
  });
@@ -168,6 +186,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
168
186
  start_segment: Number(r.start_segment),
169
187
  end_segment: Number(r.end_segment),
170
188
  object_key: String(r.object_key),
189
+ size_bytes: Number(r.size_bytes ?? 0),
171
190
  filter_len: Number(r.filter_len ?? 0),
172
191
  record_count: Number(r.record_count ?? 0),
173
192
  });
@@ -181,6 +200,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
181
200
  start_segment: Number(r.start_segment),
182
201
  end_segment: Number(r.end_segment),
183
202
  object_key: String(r.object_key),
203
+ size_bytes: Number(r.size_bytes ?? 0),
184
204
  filter_len: Number(r.filter_len ?? 0),
185
205
  record_count: Number(r.record_count ?? 0),
186
206
  });
@@ -189,6 +209,136 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
189
209
  db.retireIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
190
210
  }
191
211
 
212
+ const secondaryIndexes = manifest.secondary_indexes && typeof manifest.secondary_indexes === "object" ? manifest.secondary_indexes : {};
213
+ for (const [indexName, rawState] of Object.entries(secondaryIndexes)) {
214
+ if (!rawState || typeof rawState !== "object") continue;
215
+ const indexSecretB64 = typeof (rawState as any).index_secret === "string" ? (rawState as any).index_secret : "";
216
+ if (!indexSecretB64) continue;
217
+ const secret = new Uint8Array(Buffer.from(indexSecretB64, "base64"));
218
+ const configHash = typeof (rawState as any).config_hash === "string" ? (rawState as any).config_hash : "";
219
+ const indexedThrough =
220
+ typeof (rawState as any).indexed_through === "number" ? Number((rawState as any).indexed_through) : 0;
221
+ db.upsertSecondaryIndexState(stream, indexName, secret, configHash, indexedThrough);
222
+
223
+ const activeSecondaryRuns = Array.isArray((rawState as any).active_runs) ? (rawState as any).active_runs : [];
224
+ const retiredSecondaryRuns = Array.isArray((rawState as any).retired_runs) ? (rawState as any).retired_runs : [];
225
+ for (const run of activeSecondaryRuns) {
226
+ db.insertSecondaryIndexRun({
227
+ run_id: String(run.run_id),
228
+ stream,
229
+ index_name: indexName,
230
+ level: Number(run.level),
231
+ start_segment: Number(run.start_segment),
232
+ end_segment: Number(run.end_segment),
233
+ object_key: String(run.object_key),
234
+ size_bytes: Number(run.size_bytes ?? 0),
235
+ filter_len: Number(run.filter_len ?? 0),
236
+ record_count: Number(run.record_count ?? 0),
237
+ });
238
+ }
239
+ for (const run of retiredSecondaryRuns) {
240
+ const runId = String(run.run_id);
241
+ db.insertSecondaryIndexRun({
242
+ run_id: runId,
243
+ stream,
244
+ index_name: indexName,
245
+ level: Number(run.level),
246
+ start_segment: Number(run.start_segment),
247
+ end_segment: Number(run.end_segment),
248
+ object_key: String(run.object_key),
249
+ size_bytes: Number(run.size_bytes ?? 0),
250
+ filter_len: Number(run.filter_len ?? 0),
251
+ record_count: Number(run.record_count ?? 0),
252
+ });
253
+ const retiredGen = typeof run.retired_gen === "number" ? run.retired_gen : Number(manifest.generation ?? 0);
254
+ const retiredAtUnix = typeof run.retired_at_unix === "number" ? run.retired_at_unix : Math.floor(Number(nowMs) / 1000);
255
+ db.retireSecondaryIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
256
+ }
257
+ }
258
+
259
+ const lexiconIndexes = Array.isArray(manifest.lexicon_indexes) ? manifest.lexicon_indexes : [];
260
+ for (const rawState of lexiconIndexes) {
261
+ if (!rawState || typeof rawState !== "object") continue;
262
+ const sourceKind = typeof (rawState as any).source_kind === "string" ? (rawState as any).source_kind : "";
263
+ if (sourceKind === "") continue;
264
+ const sourceName = typeof (rawState as any).source_name === "string" ? (rawState as any).source_name : "";
265
+ const indexedThrough =
266
+ typeof (rawState as any).indexed_through === "number" ? Number((rawState as any).indexed_through) : 0;
267
+ db.upsertLexiconIndexState(stream, sourceKind, sourceName, indexedThrough);
268
+
269
+ const activeLexiconRuns = Array.isArray((rawState as any).active_runs) ? (rawState as any).active_runs : [];
270
+ const retiredLexiconRuns = Array.isArray((rawState as any).retired_runs) ? (rawState as any).retired_runs : [];
271
+ for (const run of activeLexiconRuns) {
272
+ db.insertLexiconIndexRun({
273
+ run_id: String(run.run_id),
274
+ stream,
275
+ source_kind: sourceKind,
276
+ source_name: sourceName,
277
+ level: Number(run.level),
278
+ start_segment: Number(run.start_segment),
279
+ end_segment: Number(run.end_segment),
280
+ object_key: String(run.object_key),
281
+ size_bytes: Number(run.size_bytes ?? 0),
282
+ record_count: Number(run.record_count ?? 0),
283
+ });
284
+ }
285
+ for (const run of retiredLexiconRuns) {
286
+ const runId = String(run.run_id);
287
+ db.insertLexiconIndexRun({
288
+ run_id: runId,
289
+ stream,
290
+ source_kind: sourceKind,
291
+ source_name: sourceName,
292
+ level: Number(run.level),
293
+ start_segment: Number(run.start_segment),
294
+ end_segment: Number(run.end_segment),
295
+ object_key: String(run.object_key),
296
+ size_bytes: Number(run.size_bytes ?? 0),
297
+ record_count: Number(run.record_count ?? 0),
298
+ });
299
+ const retiredGen = typeof run.retired_gen === "number" ? run.retired_gen : Number(manifest.generation ?? 0);
300
+ const retiredAtUnix = typeof run.retired_at_unix === "number" ? run.retired_at_unix : Math.floor(Number(nowMs) / 1000);
301
+ db.retireLexiconIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
302
+ }
303
+ }
304
+
305
+ const searchCompanions =
306
+ manifest.search_companions && typeof manifest.search_companions === "object" ? manifest.search_companions : null;
307
+ if (searchCompanions) {
308
+ const generation = typeof searchCompanions.generation === "number" ? searchCompanions.generation : 0;
309
+ const planHash = typeof searchCompanions.plan_hash === "string" ? searchCompanions.plan_hash : "";
310
+ const planJson =
311
+ searchCompanions.plan_json && typeof searchCompanions.plan_json === "object"
312
+ ? JSON.stringify(searchCompanions.plan_json)
313
+ : JSON.stringify({ families: {}, summary: {} });
314
+ if (generation > 0 && planHash) {
315
+ db.upsertSearchCompanionPlan(stream, generation, planHash, planJson);
316
+ }
317
+ const segments = Array.isArray(searchCompanions.segments) ? searchCompanions.segments : [];
318
+ for (const segment of segments) {
319
+ if (!segment || typeof segment !== "object") continue;
320
+ if (
321
+ typeof (segment as any).segment_index !== "number" ||
322
+ typeof (segment as any).object_key !== "string" ||
323
+ typeof (segment as any).plan_generation !== "number"
324
+ ) {
325
+ continue;
326
+ }
327
+ const sections = Array.isArray((segment as any).sections) ? (segment as any).sections : [];
328
+ db.upsertSearchSegmentCompanion(
329
+ stream,
330
+ Number((segment as any).segment_index),
331
+ String((segment as any).object_key),
332
+ Number((segment as any).plan_generation),
333
+ JSON.stringify(sections),
334
+ JSON.stringify((segment as any).section_sizes ?? {}),
335
+ Number((segment as any).size_bytes ?? 0),
336
+ parseManifestBigInt((segment as any).primary_timestamp_min_ms),
337
+ parseManifestBigInt((segment as any).primary_timestamp_max_ms)
338
+ );
339
+ }
340
+ }
341
+
192
342
  const schemaKey = schemaObjectKey(shash);
193
343
  const schemaBytes = await retry(async () => {
194
344
  const data = await store.get(schemaKey);
@@ -197,6 +347,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
197
347
  }, retryOpts);
198
348
  if (schemaBytes) {
199
349
  db.upsertSchemaRegistry(stream, new TextDecoder().decode(schemaBytes));
350
+ db.setSchemaUploadedSizeBytes(stream, schemaBytes.byteLength);
200
351
  }
201
352
  }
202
353
  } finally {
@@ -204,6 +355,13 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
204
355
  }
205
356
  }
206
357
 
358
+ function parseManifestBigInt(value: unknown): bigint | null {
359
+ if (typeof value === "bigint") return value;
360
+ if (typeof value === "number" && Number.isFinite(value)) return BigInt(Math.trunc(value));
361
+ if (typeof value === "string" && /^-?[0-9]+$/.test(value)) return BigInt(value);
362
+ return null;
363
+ }
364
+
207
365
  function decodeZstdBase64(value: string): Uint8Array {
208
366
  if (!value) return new Uint8Array(0);
209
367
  const raw = Buffer.from(value, "base64");
@@ -0,0 +1,108 @@
1
+ export type GateRelease = () => void;
2
+
3
+ type Waiter = {
4
+ resolve: (release: GateRelease) => void;
5
+ reject: (error: unknown) => void;
6
+ signal: AbortSignal | null;
7
+ onAbort: (() => void) | null;
8
+ };
9
+
10
+ function abortError(): Error {
11
+ const err = new Error("operation aborted");
12
+ err.name = "AbortError";
13
+ return err;
14
+ }
15
+
16
+ export class ConcurrencyGate {
17
+ private limit: number;
18
+ private active = 0;
19
+ private readonly waiters: Waiter[] = [];
20
+
21
+ constructor(limit: number) {
22
+ this.limit = Math.max(1, Math.floor(limit));
23
+ }
24
+
25
+ getLimit(): number {
26
+ return this.limit;
27
+ }
28
+
29
+ getActive(): number {
30
+ return this.active;
31
+ }
32
+
33
+ getQueued(): number {
34
+ return this.waiters.length;
35
+ }
36
+
37
+ setLimit(nextLimit: number): void {
38
+ this.limit = Math.max(1, Math.floor(nextLimit));
39
+ this.drain();
40
+ }
41
+
42
+ async acquire(signal?: AbortSignal | null): Promise<GateRelease> {
43
+ if (signal?.aborted) throw abortError();
44
+ if (this.active < this.limit) {
45
+ this.active += 1;
46
+ return this.releaseFactory();
47
+ }
48
+ return await new Promise<GateRelease>((resolve, reject) => {
49
+ const waiter: Waiter = {
50
+ resolve,
51
+ reject,
52
+ signal: signal ?? null,
53
+ onAbort: null,
54
+ };
55
+ if (signal) {
56
+ waiter.onAbort = () => {
57
+ this.removeWaiter(waiter);
58
+ reject(abortError());
59
+ };
60
+ signal.addEventListener("abort", waiter.onAbort, { once: true });
61
+ }
62
+ this.waiters.push(waiter);
63
+ });
64
+ }
65
+
66
+ async run<T>(fn: () => Promise<T>, signal?: AbortSignal | null): Promise<T> {
67
+ const release = await this.acquire(signal);
68
+ try {
69
+ return await fn();
70
+ } finally {
71
+ release();
72
+ }
73
+ }
74
+
75
+ private releaseFactory(): GateRelease {
76
+ let released = false;
77
+ return () => {
78
+ if (released) return;
79
+ released = true;
80
+ this.active = Math.max(0, this.active - 1);
81
+ this.drain();
82
+ };
83
+ }
84
+
85
+ private removeWaiter(waiter: Waiter): void {
86
+ const idx = this.waiters.indexOf(waiter);
87
+ if (idx >= 0) this.waiters.splice(idx, 1);
88
+ if (waiter.signal && waiter.onAbort) {
89
+ waiter.signal.removeEventListener("abort", waiter.onAbort);
90
+ waiter.onAbort = null;
91
+ }
92
+ }
93
+
94
+ private drain(): void {
95
+ while (this.active < this.limit && this.waiters.length > 0) {
96
+ const waiter = this.waiters.shift()!;
97
+ if (waiter.signal?.aborted) {
98
+ if (waiter.signal && waiter.onAbort) waiter.signal.removeEventListener("abort", waiter.onAbort);
99
+ waiter.reject(abortError());
100
+ continue;
101
+ }
102
+ if (waiter.signal && waiter.onAbort) waiter.signal.removeEventListener("abort", waiter.onAbort);
103
+ waiter.onAbort = null;
104
+ this.active += 1;
105
+ waiter.resolve(this.releaseFactory());
106
+ }
107
+ }
108
+ }