@tungthedev/streams-server 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/CODE_OF_CONDUCT.md +45 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/LICENSE +201 -0
  4. package/README.md +58 -0
  5. package/SECURITY.md +42 -0
  6. package/bin/prisma-streams-server +2 -0
  7. package/package.json +46 -0
  8. package/src/app.ts +583 -0
  9. package/src/app_core.ts +3144 -0
  10. package/src/app_local.ts +206 -0
  11. package/src/auth.ts +124 -0
  12. package/src/auto_tune.ts +69 -0
  13. package/src/backpressure.ts +66 -0
  14. package/src/bootstrap.ts +613 -0
  15. package/src/compute/demo_entry.ts +415 -0
  16. package/src/compute/demo_site.ts +1242 -0
  17. package/src/compute/entry.ts +19 -0
  18. package/src/compute/package_entry.ts +4 -0
  19. package/src/compute/virtual-modules.d.ts +15 -0
  20. package/src/compute/worker_module_url.ts +9 -0
  21. package/src/concurrency_gate.ts +108 -0
  22. package/src/config.ts +402 -0
  23. package/src/db/bootstrap_store.ts +9 -0
  24. package/src/db/db.ts +2424 -0
  25. package/src/db/schema.ts +925 -0
  26. package/src/db/sqlite_manifest_snapshot.ts +81 -0
  27. package/src/db/sqlite_touch_store.ts +491 -0
  28. package/src/db/sqlite_wal_store.ts +472 -0
  29. package/src/details/full_mode_details.ts +568 -0
  30. package/src/expiry_sweeper.ts +47 -0
  31. package/src/foreground_activity.ts +55 -0
  32. package/src/hist.ts +169 -0
  33. package/src/index/binary_fuse.ts +379 -0
  34. package/src/index/indexer.ts +947 -0
  35. package/src/index/lexicon_file_cache.ts +261 -0
  36. package/src/index/lexicon_format.ts +93 -0
  37. package/src/index/lexicon_indexer.ts +863 -0
  38. package/src/index/run_cache.ts +84 -0
  39. package/src/index/run_format.ts +213 -0
  40. package/src/index/schedule.ts +28 -0
  41. package/src/index/secondary_indexer.ts +901 -0
  42. package/src/index/secondary_schema.ts +105 -0
  43. package/src/ingest.ts +309 -0
  44. package/src/lens/lens.ts +501 -0
  45. package/src/manifest.ts +249 -0
  46. package/src/memory.ts +334 -0
  47. package/src/metrics.ts +147 -0
  48. package/src/metrics_emitter.ts +83 -0
  49. package/src/notifier.ts +180 -0
  50. package/src/objectstore/accounting.ts +151 -0
  51. package/src/objectstore/interface.ts +13 -0
  52. package/src/objectstore/mock_r2.ts +269 -0
  53. package/src/objectstore/null.ts +32 -0
  54. package/src/objectstore/r2.ts +318 -0
  55. package/src/observe/pairing.ts +61 -0
  56. package/src/observe/request.ts +772 -0
  57. package/src/offset.ts +70 -0
  58. package/src/postgres/bootstrap.ts +269 -0
  59. package/src/postgres/companions.ts +197 -0
  60. package/src/postgres/control_restore.ts +109 -0
  61. package/src/postgres/details.ts +189 -0
  62. package/src/postgres/lexicon_index.ts +260 -0
  63. package/src/postgres/routing_index.ts +189 -0
  64. package/src/postgres/rows.ts +132 -0
  65. package/src/postgres/schema.ts +355 -0
  66. package/src/postgres/secondary_index.ts +238 -0
  67. package/src/postgres/segments.ts +900 -0
  68. package/src/postgres/stats.ts +103 -0
  69. package/src/postgres/store.ts +947 -0
  70. package/src/postgres/touch.ts +591 -0
  71. package/src/postgres/types.ts +32 -0
  72. package/src/profiles/evlog/schema.ts +234 -0
  73. package/src/profiles/evlog.ts +473 -0
  74. package/src/profiles/generic.ts +51 -0
  75. package/src/profiles/index.ts +237 -0
  76. package/src/profiles/metrics/block_format.ts +109 -0
  77. package/src/profiles/metrics/normalize.ts +366 -0
  78. package/src/profiles/metrics/schema.ts +319 -0
  79. package/src/profiles/metrics.ts +83 -0
  80. package/src/profiles/otelTraces/normalize.ts +955 -0
  81. package/src/profiles/otelTraces/otlp.ts +1002 -0
  82. package/src/profiles/otelTraces/schema.ts +408 -0
  83. package/src/profiles/otelTraces.ts +390 -0
  84. package/src/profiles/profile.ts +284 -0
  85. package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
  86. package/src/profiles/stateProtocol/changes.ts +24 -0
  87. package/src/profiles/stateProtocol/ingest.ts +115 -0
  88. package/src/profiles/stateProtocol/routes.ts +511 -0
  89. package/src/profiles/stateProtocol/types.ts +6 -0
  90. package/src/profiles/stateProtocol/validation.ts +51 -0
  91. package/src/profiles/stateProtocol.ts +107 -0
  92. package/src/read_filter.ts +468 -0
  93. package/src/reader.ts +2986 -0
  94. package/src/runtime/hash.ts +156 -0
  95. package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
  96. package/src/runtime/hash_vendor/NOTICE.md +8 -0
  97. package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
  98. package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
  99. package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
  100. package/src/runtime/host_runtime.ts +5 -0
  101. package/src/runtime_memory.ts +200 -0
  102. package/src/runtime_memory_sampler.ts +237 -0
  103. package/src/schema/lens_schema.ts +290 -0
  104. package/src/schema/proof.ts +547 -0
  105. package/src/schema/read_json.ts +51 -0
  106. package/src/schema/registry.ts +966 -0
  107. package/src/search/agg_format.ts +638 -0
  108. package/src/search/aggregate.ts +409 -0
  109. package/src/search/binary/codec.ts +162 -0
  110. package/src/search/binary/docset.ts +67 -0
  111. package/src/search/binary/restart_strings.ts +181 -0
  112. package/src/search/binary/varint.ts +34 -0
  113. package/src/search/bitset.ts +19 -0
  114. package/src/search/col_format.ts +382 -0
  115. package/src/search/col_runtime.ts +59 -0
  116. package/src/search/column_encoding.ts +43 -0
  117. package/src/search/companion_file_cache.ts +319 -0
  118. package/src/search/companion_format.ts +327 -0
  119. package/src/search/companion_manager.ts +1305 -0
  120. package/src/search/companion_plan.ts +229 -0
  121. package/src/search/exact_format.ts +281 -0
  122. package/src/search/exact_runtime.ts +55 -0
  123. package/src/search/fts_format.ts +423 -0
  124. package/src/search/fts_runtime.ts +333 -0
  125. package/src/search/query.ts +875 -0
  126. package/src/search/schema.ts +245 -0
  127. package/src/segment/cache.ts +270 -0
  128. package/src/segment/cached_segment.ts +89 -0
  129. package/src/segment/format.ts +403 -0
  130. package/src/segment/segmenter.ts +412 -0
  131. package/src/segment/segmenter_worker.ts +72 -0
  132. package/src/segment/segmenter_workers.ts +130 -0
  133. package/src/server.ts +264 -0
  134. package/src/server_auto_tune.ts +158 -0
  135. package/src/sqlite/adapter.ts +335 -0
  136. package/src/sqlite/runtime_stats.ts +163 -0
  137. package/src/stats.ts +205 -0
  138. package/src/store/append.ts +50 -0
  139. package/src/store/bootstrap_restore_store.ts +71 -0
  140. package/src/store/capabilities.ts +86 -0
  141. package/src/store/full_mode_details_store.ts +71 -0
  142. package/src/store/index_store.ts +104 -0
  143. package/src/store/profile_touch_store.ts +1 -0
  144. package/src/store/rows.ts +144 -0
  145. package/src/store/schema_profile_store.ts +73 -0
  146. package/src/store/schema_publication.ts +6 -0
  147. package/src/store/segment_manifest_store.ts +129 -0
  148. package/src/store/segment_read_store.ts +22 -0
  149. package/src/store/stats_accounting_store.ts +83 -0
  150. package/src/store/touch_store.ts +98 -0
  151. package/src/store/wal_store.ts +21 -0
  152. package/src/stream_size_reconciler.ts +100 -0
  153. package/src/touch/canonical_change.ts +7 -0
  154. package/src/touch/live_keys.ts +158 -0
  155. package/src/touch/live_metrics.ts +841 -0
  156. package/src/touch/live_templates.ts +449 -0
  157. package/src/touch/manager.ts +1292 -0
  158. package/src/touch/process_batch.ts +576 -0
  159. package/src/touch/processor_worker.ts +85 -0
  160. package/src/touch/spec.ts +459 -0
  161. package/src/touch/touch_journal.ts +771 -0
  162. package/src/touch/touch_key_id.ts +20 -0
  163. package/src/touch/worker_pool.ts +191 -0
  164. package/src/touch/worker_protocol.ts +57 -0
  165. package/src/types/proper-lockfile.d.ts +1 -0
  166. package/src/uploader.ts +358 -0
  167. package/src/util/base32_crockford.ts +81 -0
  168. package/src/util/bloom256.ts +67 -0
  169. package/src/util/byte_lru.ts +73 -0
  170. package/src/util/cleanup.ts +22 -0
  171. package/src/util/crc32c.ts +29 -0
  172. package/src/util/ds_error.ts +15 -0
  173. package/src/util/duration.ts +17 -0
  174. package/src/util/endian.ts +53 -0
  175. package/src/util/json_pointer.ts +148 -0
  176. package/src/util/log.ts +25 -0
  177. package/src/util/lru.ts +53 -0
  178. package/src/util/retry.ts +35 -0
  179. package/src/util/siphash.ts +71 -0
  180. package/src/util/stream_paths.ts +50 -0
  181. package/src/util/time.ts +14 -0
  182. package/src/util/yield.ts +3 -0
  183. package/src/util/zstd.ts +24 -0
@@ -0,0 +1,863 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { Result } from "better-result";
3
+ import type { Config } from "../config";
4
+ import type { LexiconIndexRunRow, LexiconIndexStateRow, SegmentRow } from "../store/rows";
5
+ import type { LexiconIndexStore } from "../store/index_store";
6
+ import type { Metrics } from "../metrics";
7
+ import type { ObjectStore } from "../objectstore/interface";
8
+ import type { SchemaRegistryStore } from "../schema/registry";
9
+ import { iterateBlockRecordsResult } from "../segment/format";
10
+ import { SegmentDiskCache } from "../segment/cache";
11
+ import { loadSegmentBytesCached } from "../segment/cached_segment";
12
+ import { RestartStringTableView } from "../search/binary/restart_strings";
13
+ import { retry } from "../util/retry";
14
+ import { dsError } from "../util/ds_error.ts";
15
+ import { streamHash16Hex, lexiconRunObjectKey } from "../util/stream_paths";
16
+ import { yieldToEventLoop } from "../util/yield";
17
+ import { ConcurrencyGate } from "../concurrency_gate";
18
+ import type { ForegroundActivityTracker } from "../foreground_activity";
19
+ import { LexiconFileCache } from "./lexicon_file_cache";
20
+ import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "./schedule";
21
+ import {
22
+ buildLexiconRunPayload,
23
+ decodeLexiconRunResult,
24
+ encodeLexiconRunResult,
25
+ type LexiconRun,
26
+ } from "./lexicon_format";
27
+
28
+ const TEXT_DECODER = new TextDecoder();
29
+ const ROUTING_KEY_SOURCE_KIND = "routing_key";
30
+ const ROUTING_KEY_SOURCE_NAME = "";
31
+
32
+ export type RoutingKeyLexiconListResult = {
33
+ keys: string[];
34
+ nextAfter: string | null;
35
+ tookMs: number;
36
+ coverage: {
37
+ complete: boolean;
38
+ indexedSegments: number;
39
+ scannedUploadedSegments: number;
40
+ scannedLocalSegments: number;
41
+ scannedWalRows: number;
42
+ possibleMissingUploadedSegments: number;
43
+ possibleMissingLocalSegments: number;
44
+ };
45
+ timing: {
46
+ lexiconRunGetMs: number;
47
+ lexiconDecodeMs: number;
48
+ lexiconEnumerateMs: number;
49
+ lexiconMergeMs: number;
50
+ fallbackScanMs: number;
51
+ fallbackSegmentGetMs: number;
52
+ fallbackWalScanMs: number;
53
+ lexiconRunsLoaded: number;
54
+ };
55
+ };
56
+
57
+ type LexiconIndexError = {
58
+ kind: "invalid_lexicon_index";
59
+ message: string;
60
+ };
61
+
62
+ function invalidLexiconIndex<T = never>(message: string): Result<T, LexiconIndexError> {
63
+ return Result.err({ kind: "invalid_lexicon_index", message });
64
+ }
65
+
66
+ function errorMessage(error: unknown): string {
67
+ return String((error as any)?.message ?? error);
68
+ }
69
+
70
+ function compareKeys(left: string, right: string): number {
71
+ return left < right ? -1 : left > right ? 1 : 0;
72
+ }
73
+
74
+ function nextLexiconTerm(view: RestartStringTableView, after: string | null): { ordinal: number; term: string | null } {
75
+ let ordinal = after == null ? 0 : view.lowerBoundOrdinal(after);
76
+ while (ordinal < view.count()) {
77
+ const term = view.termAt(ordinal);
78
+ if (term == null) break;
79
+ if (after == null || compareKeys(term, after) > 0) return { ordinal, term };
80
+ ordinal += 1;
81
+ }
82
+ return { ordinal: view.count(), term: null };
83
+ }
84
+
85
+ export class LexiconIndexManager {
86
+ private readonly span: number;
87
+ private readonly compactionFanout: number;
88
+ private readonly maxLevel: number;
89
+ private readonly retireGenWindow: number;
90
+ private readonly retireMinMs: number;
91
+ private readonly fileCache?: LexiconFileCache;
92
+ private readonly foregroundActivity?: ForegroundActivityTracker;
93
+ private readonly queue = new Set<string>();
94
+ private readonly building = new Set<string>();
95
+ private readonly compacting = new Set<string>();
96
+ private timer: any | null = null;
97
+ private wakeTimer: any | null = null;
98
+ private running = false;
99
+ private stopped = false;
100
+ private tickPromise: Promise<void> | null = null;
101
+ private firstQueuedAtMs: number | null = null;
102
+
103
+ constructor(
104
+ private readonly cfg: Config,
105
+ private readonly db: LexiconIndexStore,
106
+ private readonly os: ObjectStore,
107
+ private readonly segmentCache: SegmentDiskCache | undefined,
108
+ private readonly publishManifest: ((stream: string) => Promise<void>) | undefined,
109
+ private readonly onMetadataChanged: ((stream: string) => void) | undefined,
110
+ private readonly metrics: Metrics | undefined,
111
+ private readonly registry: SchemaRegistryStore | undefined,
112
+ private readonly asyncGate: ConcurrencyGate,
113
+ foregroundActivity?: ForegroundActivityTracker
114
+ ) {
115
+ this.span = cfg.indexL0SpanSegments;
116
+ this.compactionFanout = cfg.indexCompactionFanout;
117
+ this.maxLevel = cfg.indexMaxLevel;
118
+ this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
119
+ this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
120
+ this.foregroundActivity = foregroundActivity;
121
+ this.fileCache =
122
+ cfg.lexiconIndexCacheMaxBytes > 0
123
+ ? new LexiconFileCache(`${cfg.rootDir}/cache/lexicon`, cfg.lexiconIndexCacheMaxBytes, cfg.lexiconMappedCacheEntries)
124
+ : undefined;
125
+ }
126
+
127
+ private async yieldBackgroundWork(): Promise<void> {
128
+ if (this.foregroundActivity) {
129
+ await this.foregroundActivity.yieldBackgroundWork();
130
+ return;
131
+ }
132
+ await yieldToEventLoop();
133
+ }
134
+
135
+ start(): void {
136
+ if (this.span <= 0 || this.timer) return;
137
+ this.stopped = false;
138
+ this.timer = setInterval(() => {
139
+ if (!this.stopped) this.runTick();
140
+ }, this.cfg.indexCheckIntervalMs);
141
+ }
142
+
143
+ async stop(): Promise<void> {
144
+ this.stopped = true;
145
+ if (this.timer) clearInterval(this.timer);
146
+ if (this.wakeTimer) clearTimeout(this.wakeTimer);
147
+ this.timer = null;
148
+ this.wakeTimer = null;
149
+ while (this.tickPromise) await this.tickPromise;
150
+ this.firstQueuedAtMs = null;
151
+ this.fileCache?.clearMapped();
152
+ }
153
+
154
+ enqueue(stream: string): void {
155
+ if (this.span <= 0 || this.stopped) return;
156
+ if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
157
+ this.queue.add(stream);
158
+ if (shouldDeferEnqueuedIndexWork(this.cfg)) {
159
+ this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
160
+ return;
161
+ }
162
+ this.scheduleTick();
163
+ }
164
+
165
+ private scheduleTick(delayMs = 0): void {
166
+ if (this.stopped || !this.timer || this.wakeTimer) return;
167
+ this.wakeTimer = setTimeout(() => {
168
+ this.wakeTimer = null;
169
+ if (this.stopped) return;
170
+ if (
171
+ shouldWaitForLowMemoryIndexQuiet(
172
+ this.cfg,
173
+ this.firstQueuedAtMs,
174
+ this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false
175
+ )
176
+ ) {
177
+ this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
178
+ return;
179
+ }
180
+ if (this.running) {
181
+ this.scheduleTick(250);
182
+ return;
183
+ }
184
+ this.runTick();
185
+ }, delayMs);
186
+ (this.wakeTimer as { unref?: () => void }).unref?.();
187
+ }
188
+
189
+ private runTick(): void {
190
+ if (this.tickPromise) return;
191
+ const promise = this.tick()
192
+ .catch((e) => {
193
+ const lower = errorMessage(e).toLowerCase();
194
+ const shutdownError =
195
+ lower.includes("database has closed") ||
196
+ lower.includes("closed database") ||
197
+ lower.includes("statement has finalized") ||
198
+ lower.includes("disk i/o error");
199
+ if (!this.stopped || !shutdownError) {
200
+ // eslint-disable-next-line no-console
201
+ console.error("lexicon tick failed", e);
202
+ }
203
+ })
204
+ .finally(() => {
205
+ if (this.tickPromise === promise) this.tickPromise = null;
206
+ });
207
+ this.tickPromise = promise;
208
+ }
209
+
210
+ getLocalCacheBytes(stream: string): number {
211
+ return this.fileCache?.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/lexicon/`) ?? 0;
212
+ }
213
+
214
+ getMemoryStats(): {
215
+ fileCacheBytes: number;
216
+ fileCacheEntries: number;
217
+ mappedFileBytes: number;
218
+ mappedFileEntries: number;
219
+ pinnedFileEntries: number;
220
+ } {
221
+ const stats = this.fileCache?.stats();
222
+ return {
223
+ fileCacheBytes: stats?.usedBytes ?? 0,
224
+ fileCacheEntries: stats?.entryCount ?? 0,
225
+ mappedFileBytes: stats?.mappedBytes ?? 0,
226
+ mappedFileEntries: stats?.mappedEntryCount ?? 0,
227
+ pinnedFileEntries: stats?.pinnedEntryCount ?? 0,
228
+ };
229
+ }
230
+
231
+ async listRoutingKeysResult(stream: string, after: string | null, limit: number): Promise<Result<RoutingKeyLexiconListResult, LexiconIndexError>> {
232
+ const safeLimit = Math.max(1, Math.min(limit, 500));
233
+ const startedAt = Date.now();
234
+ const timing = {
235
+ lexiconRunGetMs: 0,
236
+ lexiconDecodeMs: 0,
237
+ lexiconEnumerateMs: 0,
238
+ lexiconMergeMs: 0,
239
+ fallbackScanMs: 0,
240
+ fallbackSegmentGetMs: 0,
241
+ fallbackWalScanMs: 0,
242
+ lexiconRunsLoaded: 0,
243
+ };
244
+ const sourceState = await this.db.getLexiconIndexState(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
245
+ const uploadedSegmentCount = await this.db.countUploadedSegments(stream);
246
+ const indexedThrough = Math.max(0, Math.min(sourceState?.indexed_through ?? 0, uploadedSegmentCount));
247
+ const fallbackScan = await this.scanFallbackKeysResult(stream, indexedThrough, uploadedSegmentCount, after, timing);
248
+ if (Result.isError(fallbackScan)) return fallbackScan;
249
+
250
+ const indexedRuns = await this.db.listLexiconIndexRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
251
+ const indexedPage = await this.listKeysFromRunsResult(indexedRuns, after, safeLimit + 1, timing);
252
+ if (Result.isError(indexedPage)) return indexedPage;
253
+
254
+ const mergeStartedAt = Date.now();
255
+ const merged = mergeSortedUnique(indexedPage.value, fallbackScan.value.keys, safeLimit + 1);
256
+ timing.lexiconMergeMs += Date.now() - mergeStartedAt;
257
+ const keys = merged.length > safeLimit ? merged.slice(0, safeLimit) : merged;
258
+ const complete =
259
+ fallbackScan.value.possibleMissingUploadedSegments === 0 && fallbackScan.value.possibleMissingLocalSegments === 0;
260
+ const nextAfter = keys.length === 0 ? null : merged.length > safeLimit || !complete ? keys[keys.length - 1] ?? null : null;
261
+ return Result.ok({
262
+ keys,
263
+ nextAfter,
264
+ tookMs: Date.now() - startedAt,
265
+ coverage: {
266
+ complete,
267
+ indexedSegments: indexedThrough,
268
+ scannedUploadedSegments: fallbackScan.value.scannedUploadedSegments,
269
+ scannedLocalSegments: fallbackScan.value.scannedLocalSegments,
270
+ scannedWalRows: fallbackScan.value.scannedWalRows,
271
+ possibleMissingUploadedSegments: fallbackScan.value.possibleMissingUploadedSegments,
272
+ possibleMissingLocalSegments: fallbackScan.value.possibleMissingLocalSegments,
273
+ },
274
+ timing,
275
+ });
276
+ }
277
+
278
+ private async tick(): Promise<void> {
279
+ if (this.running || this.stopped) return;
280
+ this.running = true;
281
+ try {
282
+ const streams = Array.from(this.queue);
283
+ this.queue.clear();
284
+ for (const stream of streams) {
285
+ if (this.stopped) break;
286
+ if (!(await this.isRoutingLexiconConfigured(stream))) {
287
+ const hadState =
288
+ (await this.db.getLexiconIndexState(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME)) != null ||
289
+ (await this.db.listLexiconIndexRunsAll(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME)).length > 0;
290
+ if (hadState) {
291
+ await this.db.deleteLexiconIndexSource(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
292
+ this.onMetadataChanged?.(stream);
293
+ if (this.publishManifest) {
294
+ try {
295
+ await this.publishManifest(stream);
296
+ } catch {
297
+ // retry on next enqueue
298
+ }
299
+ }
300
+ }
301
+ continue;
302
+ }
303
+ const buildRes = await this.maybeBuildRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
304
+ if (Result.isError(buildRes)) {
305
+ // eslint-disable-next-line no-console
306
+ console.error("lexicon build failed", stream, buildRes.error.message);
307
+ this.queue.add(stream);
308
+ continue;
309
+ }
310
+ const compactRes = await this.maybeCompactRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
311
+ if (Result.isError(compactRes)) {
312
+ // eslint-disable-next-line no-console
313
+ console.error("lexicon compaction failed", stream, compactRes.error.message);
314
+ this.queue.add(stream);
315
+ continue;
316
+ }
317
+ }
318
+ } finally {
319
+ this.running = false;
320
+ if (!this.stopped && this.queue.size > 0) {
321
+ if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
322
+ this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0);
323
+ } else {
324
+ this.firstQueuedAtMs = null;
325
+ }
326
+ }
327
+ }
328
+
329
+ private async maybeBuildRuns(
330
+ stream: string,
331
+ sourceKind: string,
332
+ sourceName: string
333
+ ): Promise<Result<void, LexiconIndexError>> {
334
+ if (this.building.has(stream)) return Result.ok(undefined);
335
+ this.building.add(stream);
336
+ try {
337
+ return await this.asyncGate.run(async () => {
338
+ let state = await this.db.getLexiconIndexState(stream, sourceKind, sourceName);
339
+ if (!state) {
340
+ await this.db.upsertLexiconIndexState(stream, sourceKind, sourceName, 0);
341
+ state = await this.db.getLexiconIndexState(stream, sourceKind, sourceName);
342
+ }
343
+ if (!state) return Result.ok(undefined);
344
+ const uploadedCount = await this.db.countUploadedSegments(stream);
345
+ if (uploadedCount < state.indexed_through + this.span) return Result.ok(undefined);
346
+ const startSegment = state.indexed_through;
347
+ const endSegment = startSegment + this.span - 1;
348
+ const segments: SegmentRow[] = [];
349
+ for (let segmentIndex = startSegment; segmentIndex <= endSegment; segmentIndex += 1) {
350
+ const segment = await this.db.getSegmentByIndex(stream, segmentIndex);
351
+ if (!segment || !segment.r2_etag) return Result.ok(undefined);
352
+ segments.push(segment);
353
+ }
354
+ const runRes = await this.buildL0RunResult(stream, sourceKind, sourceName, startSegment, segments);
355
+ if (Result.isError(runRes)) return runRes;
356
+ const persistRes = await this.persistRunResult(runRes.value, stream);
357
+ if (Result.isError(persistRes)) return persistRes;
358
+ await this.db.insertLexiconIndexRun({
359
+ run_id: runRes.value.meta.runId,
360
+ stream,
361
+ source_kind: sourceKind,
362
+ source_name: sourceName,
363
+ level: runRes.value.meta.level,
364
+ start_segment: runRes.value.meta.startSegment,
365
+ end_segment: runRes.value.meta.endSegment,
366
+ object_key: runRes.value.meta.objectKey,
367
+ size_bytes: persistRes.value,
368
+ record_count: runRes.value.meta.recordCount,
369
+ });
370
+ await this.db.updateLexiconIndexedThrough(stream, sourceKind, sourceName, endSegment + 1);
371
+ this.onMetadataChanged?.(stream);
372
+ if (this.publishManifest) {
373
+ try {
374
+ await this.publishManifest(stream);
375
+ } catch {
376
+ // retry on next publish
377
+ }
378
+ }
379
+ if ((await this.db.countUploadedSegments(stream)) >= endSegment + 1 + this.span) {
380
+ this.queue.add(stream);
381
+ }
382
+ return Result.ok(undefined);
383
+ });
384
+ } catch (error) {
385
+ return invalidLexiconIndex(errorMessage(error));
386
+ } finally {
387
+ this.building.delete(stream);
388
+ }
389
+ }
390
+
391
+ private async maybeCompactRuns(
392
+ stream: string,
393
+ sourceKind: string,
394
+ sourceName: string
395
+ ): Promise<Result<void, LexiconIndexError>> {
396
+ if (this.compactionFanout <= 1) return Result.ok(undefined);
397
+ if (this.compacting.has(stream)) return Result.ok(undefined);
398
+ if (this.foregroundActivity?.wasActiveWithin(2000)) {
399
+ this.queue.add(stream);
400
+ return Result.ok(undefined);
401
+ }
402
+ this.compacting.add(stream);
403
+ try {
404
+ return await this.asyncGate.run(async () => {
405
+ const group = await this.findCompactionGroup(stream, sourceKind, sourceName);
406
+ if (!group) {
407
+ await this.gcRetiredRuns(stream, sourceKind, sourceName);
408
+ return Result.ok(undefined);
409
+ }
410
+ const runRes = await this.buildCompactedRunResult(stream, sourceKind, sourceName, group.level + 1, group.runs);
411
+ if (Result.isError(runRes)) return runRes;
412
+ const persistRes = await this.persistRunResult(runRes.value, stream);
413
+ if (Result.isError(persistRes)) return persistRes;
414
+ await this.db.insertLexiconIndexRun({
415
+ run_id: runRes.value.meta.runId,
416
+ stream,
417
+ source_kind: sourceKind,
418
+ source_name: sourceName,
419
+ level: runRes.value.meta.level,
420
+ start_segment: runRes.value.meta.startSegment,
421
+ end_segment: runRes.value.meta.endSegment,
422
+ object_key: runRes.value.meta.objectKey,
423
+ size_bytes: persistRes.value,
424
+ record_count: runRes.value.meta.recordCount,
425
+ });
426
+ const state = await this.db.getLexiconIndexState(stream, sourceKind, sourceName);
427
+ if (state && runRes.value.meta.endSegment + 1 > state.indexed_through) {
428
+ await this.db.updateLexiconIndexedThrough(stream, sourceKind, sourceName, runRes.value.meta.endSegment + 1);
429
+ }
430
+ const manifestRow = await this.db.getManifestRow(stream);
431
+ await this.db.retireLexiconIndexRuns(group.runs.map((run) => run.run_id), manifestRow.generation + 1, this.db.nowMs());
432
+ this.onMetadataChanged?.(stream);
433
+ if (this.publishManifest) {
434
+ try {
435
+ await this.publishManifest(stream);
436
+ } catch {
437
+ // retry on next publish
438
+ }
439
+ }
440
+ await this.gcRetiredRuns(stream, sourceKind, sourceName);
441
+ this.queue.add(stream);
442
+ return Result.ok(undefined);
443
+ });
444
+ } catch (error) {
445
+ return invalidLexiconIndex(errorMessage(error));
446
+ } finally {
447
+ this.compacting.delete(stream);
448
+ }
449
+ }
450
+
451
+ private async findCompactionGroup(
452
+ stream: string,
453
+ sourceKind: string,
454
+ sourceName: string
455
+ ): Promise<{ level: number; runs: LexiconIndexRunRow[] } | null> {
456
+ const runs = await this.db.listLexiconIndexRuns(stream, sourceKind, sourceName);
457
+ if (runs.length < this.compactionFanout) return null;
458
+ const byLevel = new Map<number, LexiconIndexRunRow[]>();
459
+ for (const run of runs) {
460
+ const entries = byLevel.get(run.level) ?? [];
461
+ entries.push(run);
462
+ byLevel.set(run.level, entries);
463
+ }
464
+ for (let level = 0; level <= this.maxLevel; level += 1) {
465
+ const levelRuns = byLevel.get(level);
466
+ if (!levelRuns || levelRuns.length < this.compactionFanout) continue;
467
+ const span = this.levelSpan(level);
468
+ for (let offset = 0; offset + this.compactionFanout <= levelRuns.length; offset += 1) {
469
+ const baseStart = levelRuns[offset]!.start_segment;
470
+ let matches = true;
471
+ for (let i = 0; i < this.compactionFanout; i += 1) {
472
+ const run = levelRuns[offset + i]!;
473
+ const expectedStart = baseStart + i * span;
474
+ if (run.level !== level || run.start_segment !== expectedStart || run.end_segment !== expectedStart + span - 1) {
475
+ matches = false;
476
+ break;
477
+ }
478
+ }
479
+ if (matches) return { level, runs: levelRuns.slice(offset, offset + this.compactionFanout) };
480
+ }
481
+ }
482
+ return null;
483
+ }
484
+
485
+ private levelSpan(level: number): number {
486
+ let span = this.span;
487
+ for (let i = 0; i < level; i += 1) span *= this.compactionFanout;
488
+ return span;
489
+ }
490
+
491
+ private async buildL0RunResult(
492
+ stream: string,
493
+ sourceKind: string,
494
+ sourceName: string,
495
+ startSegment: number,
496
+ segments: SegmentRow[]
497
+ ): Promise<Result<LexiconRun, LexiconIndexError>> {
498
+ const keys = new Set<string>();
499
+ for (const segment of segments) {
500
+ const segmentBytesRes = await this.loadSegmentBytesResult(segment);
501
+ if (Result.isError(segmentBytesRes)) return segmentBytesRes;
502
+ let processedRecords = 0;
503
+ for (const recordRes of iterateBlockRecordsResult(segmentBytesRes.value)) {
504
+ if (Result.isError(recordRes)) return invalidLexiconIndex(recordRes.error.message);
505
+ if (recordRes.value.routingKey.byteLength === 0) continue;
506
+ keys.add(TEXT_DECODER.decode(recordRes.value.routingKey));
507
+ processedRecords += 1;
508
+ if (processedRecords % 256 === 0) {
509
+ await this.yieldBackgroundWork();
510
+ }
511
+ }
512
+ await this.yieldBackgroundWork();
513
+ }
514
+ return Result.ok(this.createRun(stream, sourceKind, sourceName, 0, startSegment, startSegment + this.span - 1, Array.from(keys).sort(compareKeys)));
515
+ }
516
+
517
+ private async buildCompactedRunResult(
518
+ stream: string,
519
+ sourceKind: string,
520
+ sourceName: string,
521
+ level: number,
522
+ runs: LexiconIndexRunRow[]
523
+ ): Promise<Result<LexiconRun, LexiconIndexError>> {
524
+ const merged = await this.listKeysFromRunsResult(runs, null, Number.MAX_SAFE_INTEGER, {
525
+ lexiconRunGetMs: 0,
526
+ lexiconDecodeMs: 0,
527
+ lexiconEnumerateMs: 0,
528
+ lexiconMergeMs: 0,
529
+ fallbackScanMs: 0,
530
+ fallbackSegmentGetMs: 0,
531
+ fallbackWalScanMs: 0,
532
+ lexiconRunsLoaded: 0,
533
+ });
534
+ if (Result.isError(merged)) return merged;
535
+ return Result.ok(
536
+ this.createRun(
537
+ stream,
538
+ sourceKind,
539
+ sourceName,
540
+ level,
541
+ runs[0]!.start_segment,
542
+ runs[runs.length - 1]!.end_segment,
543
+ merged.value
544
+ )
545
+ );
546
+ }
547
+
548
+ private createRun(
549
+ stream: string,
550
+ sourceKind: string,
551
+ sourceName: string,
552
+ level: number,
553
+ startSegment: number,
554
+ endSegment: number,
555
+ keys: string[]
556
+ ): LexiconRun {
557
+ const streamHash = streamHash16Hex(stream);
558
+ const runId = `${sourceKind}-${sourceName || "default"}-l${level}-${startSegment.toString().padStart(16, "0")}-${endSegment
559
+ .toString()
560
+ .padStart(16, "0")}-${Date.now()}`;
561
+ const objectKey = lexiconRunObjectKey(streamHash, sourceKind, sourceName, runId);
562
+ const payloadBytes = buildLexiconRunPayload(keys);
563
+ return {
564
+ meta: {
565
+ runId,
566
+ level,
567
+ startSegment,
568
+ endSegment,
569
+ objectKey,
570
+ recordCount: keys.length,
571
+ },
572
+ payloadBytes,
573
+ terms: new RestartStringTableView(payloadBytes),
574
+ };
575
+ }
576
+
577
+ private async persistRunResult(run: LexiconRun, stream: string): Promise<Result<number, LexiconIndexError>> {
578
+ const payloadRes = encodeLexiconRunResult(run);
579
+ if (Result.isError(payloadRes)) return invalidLexiconIndex(payloadRes.error.message);
580
+ const payload = payloadRes.value;
581
+ try {
582
+ await retry(
583
+ () => this.os.put(run.meta.objectKey, payload, { contentLength: payload.byteLength }),
584
+ {
585
+ retries: this.cfg.objectStoreRetries,
586
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
587
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
588
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
589
+ }
590
+ );
591
+ this.fileCache?.storeBytesResult(run.meta.objectKey, payload);
592
+ this.metrics?.record("tieredstore.lexicon.bytes.written", payload.byteLength, "bytes", { source: ROUTING_KEY_SOURCE_KIND }, stream);
593
+ return Result.ok(payload.byteLength);
594
+ } catch (error) {
595
+ return invalidLexiconIndex(errorMessage(error));
596
+ }
597
+ }
598
+
599
+ private async listKeysFromRunsResult(
600
+ runs: LexiconIndexRunRow[],
601
+ after: string | null,
602
+ limit: number,
603
+ timing: RoutingKeyLexiconListResult["timing"]
604
+ ): Promise<Result<string[], LexiconIndexError>> {
605
+ const enumerateStartedAt = Date.now();
606
+ const cursors: Array<{ run: LexiconRun; ordinal: number; current: string | null }> = [];
607
+ for (const meta of runs) {
608
+ const runRes = await this.loadRunResult(meta, timing);
609
+ if (Result.isError(runRes)) return runRes;
610
+ if (!runRes.value) continue;
611
+ const next = nextLexiconTerm(runRes.value.terms, after);
612
+ cursors.push({ run: runRes.value, ordinal: next.ordinal, current: next.term });
613
+ }
614
+ const results: string[] = [];
615
+ let lastValue: string | null = null;
616
+ let emittedSinceYield = 0;
617
+ while (results.length < limit) {
618
+ let smallest: string | null = null;
619
+ for (const cursor of cursors) {
620
+ if (cursor.current == null) continue;
621
+ if (smallest == null || compareKeys(cursor.current, smallest) < 0) smallest = cursor.current;
622
+ }
623
+ if (smallest == null) break;
624
+ if (smallest !== lastValue) {
625
+ results.push(smallest);
626
+ lastValue = smallest;
627
+ }
628
+ for (const cursor of cursors) {
629
+ while (cursor.current != null && cursor.current === smallest) {
630
+ cursor.ordinal += 1;
631
+ cursor.current = cursor.ordinal < cursor.run.terms.count() ? cursor.run.terms.termAt(cursor.ordinal) : null;
632
+ }
633
+ }
634
+ emittedSinceYield += 1;
635
+ if (emittedSinceYield >= 256) {
636
+ emittedSinceYield = 0;
637
+ await this.yieldBackgroundWork();
638
+ }
639
+ }
640
+ timing.lexiconEnumerateMs += Date.now() - enumerateStartedAt;
641
+ return Result.ok(results);
642
+ }
643
+
644
+ private async loadRunResult(
645
+ meta: LexiconIndexRunRow,
646
+ timing: RoutingKeyLexiconListResult["timing"]
647
+ ): Promise<Result<LexiconRun | null, LexiconIndexError>> {
648
+ try {
649
+ let bytes: Uint8Array;
650
+ const runGetStartedAt = Date.now();
651
+ if (this.fileCache) {
652
+ const mappedRes = await this.fileCache.loadMappedFileResult({
653
+ objectKey: meta.object_key,
654
+ expectedSize: meta.size_bytes,
655
+ loadBytes: () =>
656
+ retry(
657
+ async () => {
658
+ const data = await this.os.get(meta.object_key);
659
+ if (!data) throw dsError(`missing lexicon run ${meta.object_key}`);
660
+ return data;
661
+ },
662
+ {
663
+ retries: this.cfg.objectStoreRetries,
664
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
665
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
666
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
667
+ }
668
+ ),
669
+ });
670
+ if (Result.isError(mappedRes)) return invalidLexiconIndex(mappedRes.error.message);
671
+ bytes = mappedRes.value.bytes;
672
+ } else {
673
+ bytes = await retry(
674
+ async () => {
675
+ const data = await this.os.get(meta.object_key);
676
+ if (!data) throw dsError(`missing lexicon run ${meta.object_key}`);
677
+ return data;
678
+ },
679
+ {
680
+ retries: this.cfg.objectStoreRetries,
681
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
682
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
683
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
684
+ }
685
+ );
686
+ }
687
+ timing.lexiconRunGetMs += Date.now() - runGetStartedAt;
688
+ const decodeStartedAt = Date.now();
689
+ const runRes = decodeLexiconRunResult(bytes);
690
+ if (Result.isError(runRes)) return invalidLexiconIndex(runRes.error.message);
691
+ timing.lexiconDecodeMs += Date.now() - decodeStartedAt;
692
+ timing.lexiconRunsLoaded += 1;
693
+ const run = runRes.value;
694
+ run.meta.runId = meta.run_id;
695
+ run.meta.level = meta.level;
696
+ run.meta.startSegment = meta.start_segment;
697
+ run.meta.endSegment = meta.end_segment;
698
+ run.meta.objectKey = meta.object_key;
699
+ run.meta.recordCount = meta.record_count;
700
+ this.metrics?.record("tieredstore.lexicon.bytes.read", bytes.byteLength, "bytes", { source: ROUTING_KEY_SOURCE_KIND }, meta.stream);
701
+ return Result.ok(run);
702
+ } catch (error) {
703
+ return invalidLexiconIndex(errorMessage(error));
704
+ }
705
+ }
706
+
707
+ private async scanFallbackKeysResult(
708
+ stream: string,
709
+ indexedThrough: number,
710
+ uploadedSegmentCount: number,
711
+ after: string | null,
712
+ timing: RoutingKeyLexiconListResult["timing"]
713
+ ): Promise<
714
+ Result<
715
+ {
716
+ keys: string[];
717
+ scannedUploadedSegments: number;
718
+ scannedLocalSegments: number;
719
+ scannedWalRows: number;
720
+ possibleMissingUploadedSegments: number;
721
+ possibleMissingLocalSegments: number;
722
+ },
723
+ LexiconIndexError
724
+ >
725
+ > {
726
+ const startedAt = Date.now();
727
+ const streamRow = await this.db.getStream(stream);
728
+ if (!streamRow) return invalidLexiconIndex(`missing stream ${stream}`);
729
+ const segmentCount = await this.db.countSegmentsForStream(stream);
730
+ const fallbackKeys = new Set<string>();
731
+ let scannedUploadedSegments = 0;
732
+ let scannedLocalSegments = 0;
733
+ const shouldScanUploadedSegments = indexedThrough === 0;
734
+ const segmentScanLimit = 1;
735
+ let scannedSegments = 0;
736
+ const fallbackStartSegment = shouldScanUploadedSegments ? indexedThrough : uploadedSegmentCount;
737
+ for (let segmentIndex = fallbackStartSegment; segmentIndex < segmentCount; segmentIndex += 1) {
738
+ if (scannedSegments >= segmentScanLimit) break;
739
+ const segment = await this.db.getSegmentByIndex(stream, segmentIndex);
740
+ if (!segment) continue;
741
+ const segmentGetStartedAt = Date.now();
742
+ const bytesRes = await this.loadSegmentBytesResult(segment);
743
+ if (Result.isError(bytesRes)) return bytesRes;
744
+ timing.fallbackSegmentGetMs += Date.now() - segmentGetStartedAt;
745
+ for (const recordRes of iterateBlockRecordsResult(bytesRes.value)) {
746
+ if (Result.isError(recordRes)) return invalidLexiconIndex(recordRes.error.message);
747
+ if (recordRes.value.routingKey.byteLength === 0) continue;
748
+ const key = TEXT_DECODER.decode(recordRes.value.routingKey);
749
+ if (after != null && compareKeys(key, after) <= 0) continue;
750
+ fallbackKeys.add(key);
751
+ }
752
+ if (segmentIndex < uploadedSegmentCount) scannedUploadedSegments += 1;
753
+ else scannedLocalSegments += 1;
754
+ scannedSegments += 1;
755
+ await this.yieldBackgroundWork();
756
+ }
757
+
758
+ let scannedWalRows = 0;
759
+ const walStart = streamRow.sealed_through + 1n;
760
+ const walEnd = streamRow.next_offset - 1n;
761
+ if (walStart <= walEnd) {
762
+ const walStartedAt = Date.now();
763
+ for await (const row of this.db.readWalRange(stream, walStart, walEnd)) {
764
+ scannedWalRows += 1;
765
+ const routingKey = row.routingKey == null ? null : row.routingKey instanceof Uint8Array ? row.routingKey : new Uint8Array(row.routingKey);
766
+ if (!routingKey || routingKey.byteLength === 0) continue;
767
+ const key = TEXT_DECODER.decode(routingKey);
768
+ if (after != null && compareKeys(key, after) <= 0) continue;
769
+ fallbackKeys.add(key);
770
+ }
771
+ timing.fallbackWalScanMs += Date.now() - walStartedAt;
772
+ }
773
+
774
+ const totalUncoveredUploadedSegments = Math.max(0, uploadedSegmentCount - indexedThrough);
775
+ const totalUncoveredLocalSegments = Math.max(0, segmentCount - uploadedSegmentCount);
776
+ timing.fallbackScanMs += Date.now() - startedAt;
777
+
778
+ return Result.ok({
779
+ keys: Array.from(fallbackKeys).sort(compareKeys),
780
+ scannedUploadedSegments,
781
+ scannedLocalSegments,
782
+ scannedWalRows,
783
+ possibleMissingUploadedSegments: Math.max(0, totalUncoveredUploadedSegments - scannedUploadedSegments),
784
+ possibleMissingLocalSegments: Math.max(0, totalUncoveredLocalSegments - scannedLocalSegments),
785
+ });
786
+ }
787
+
788
+ private async gcRetiredRuns(stream: string, sourceKind: string, sourceName: string): Promise<void> {
789
+ const retiredRuns = await this.db.listRetiredLexiconIndexRuns(stream, sourceKind, sourceName);
790
+ if (retiredRuns.length === 0) return;
791
+ const manifest = await this.db.getManifestRow(stream);
792
+ const nowMs = this.db.nowMs();
793
+ const cutoffGen =
794
+ this.retireGenWindow > 0 && manifest.generation > this.retireGenWindow ? manifest.generation - this.retireGenWindow : 0;
795
+ const deletions = retiredRuns.filter((run) => {
796
+ const expiredByGen = run.retired_gen != null && run.retired_gen > 0 && run.retired_gen <= cutoffGen;
797
+ const expiredByTtl = run.retired_at_ms != null && run.retired_at_ms + BigInt(this.retireMinMs) <= nowMs;
798
+ return expiredByGen || expiredByTtl;
799
+ });
800
+ if (deletions.length === 0) return;
801
+ for (const run of deletions) {
802
+ try {
803
+ await this.os.delete(run.object_key);
804
+ } catch {
805
+ // best effort
806
+ }
807
+ }
808
+ await this.db.deleteLexiconIndexRuns(deletions.map((run) => run.run_id));
809
+ }
810
+
811
+ private async isRoutingLexiconConfigured(stream: string): Promise<boolean> {
812
+ if (!this.registry) return false;
813
+ const registryRes = await this.registry.getRegistryResult(stream);
814
+ if (Result.isError(registryRes)) return false;
815
+ return registryRes.value.routingKey != null;
816
+ }
817
+
818
+ private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, LexiconIndexError>> {
819
+ try {
820
+ const bytes = await loadSegmentBytesCached(
821
+ this.os,
822
+ seg,
823
+ this.segmentCache,
824
+ {
825
+ retries: this.cfg.objectStoreRetries,
826
+ baseDelayMs: this.cfg.objectStoreBaseDelayMs,
827
+ maxDelayMs: this.cfg.objectStoreMaxDelayMs,
828
+ timeoutMs: this.cfg.objectStoreTimeoutMs,
829
+ }
830
+ );
831
+ return Result.ok(bytes);
832
+ } catch (error) {
833
+ return invalidLexiconIndex(errorMessage(error));
834
+ }
835
+ }
836
+ }
837
+
838
+ function mergeSortedUnique(left: string[], right: string[], limit: number): string[] {
839
+ const merged: string[] = [];
840
+ let li = 0;
841
+ let ri = 0;
842
+ let last: string | null = null;
843
+ while (merged.length < limit && (li < left.length || ri < right.length)) {
844
+ let next: string;
845
+ if (li >= left.length) {
846
+ next = right[ri++]!;
847
+ } else if (ri >= right.length) {
848
+ next = left[li++]!;
849
+ } else {
850
+ const cmp = compareKeys(left[li]!, right[ri]!);
851
+ if (cmp <= 0) {
852
+ next = left[li++]!;
853
+ if (cmp === 0) ri += 1;
854
+ } else {
855
+ next = right[ri++]!;
856
+ }
857
+ }
858
+ if (next === last) continue;
859
+ merged.push(next);
860
+ last = next;
861
+ }
862
+ return merged;
863
+ }