@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
|
@@ -0,0 +1,789 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { Result } from "better-result";
|
|
3
|
+
import type { Config } from "../config";
|
|
4
|
+
import type { LexiconIndexRunRow, LexiconIndexStateRow, SegmentRow, SqliteDurableStore } from "../db/db";
|
|
5
|
+
import type { Metrics } from "../metrics";
|
|
6
|
+
import type { ObjectStore } from "../objectstore/interface";
|
|
7
|
+
import type { SchemaRegistryStore } from "../schema/registry";
|
|
8
|
+
import { iterateBlockRecordsResult } from "../segment/format";
|
|
9
|
+
import { SegmentDiskCache } from "../segment/cache";
|
|
10
|
+
import { loadSegmentBytesCached } from "../segment/cached_segment";
|
|
11
|
+
import { RestartStringTableView } from "../search/binary/restart_strings";
|
|
12
|
+
import { retry } from "../util/retry";
|
|
13
|
+
import { dsError } from "../util/ds_error.ts";
|
|
14
|
+
import { streamHash16Hex, lexiconRunObjectKey } from "../util/stream_paths";
|
|
15
|
+
import { yieldToEventLoop } from "../util/yield";
|
|
16
|
+
import { ConcurrencyGate } from "../concurrency_gate";
|
|
17
|
+
import type { ForegroundActivityTracker } from "../foreground_activity";
|
|
18
|
+
import { LexiconFileCache } from "./lexicon_file_cache";
|
|
19
|
+
import {
|
|
20
|
+
buildLexiconRunPayload,
|
|
21
|
+
decodeLexiconRunResult,
|
|
22
|
+
encodeLexiconRunResult,
|
|
23
|
+
type LexiconRun,
|
|
24
|
+
} from "./lexicon_format";
|
|
25
|
+
|
|
26
|
+
const TEXT_DECODER = new TextDecoder();
|
|
27
|
+
const ROUTING_KEY_SOURCE_KIND = "routing_key";
|
|
28
|
+
const ROUTING_KEY_SOURCE_NAME = "";
|
|
29
|
+
|
|
30
|
+
export type RoutingKeyLexiconListResult = {
|
|
31
|
+
keys: string[];
|
|
32
|
+
nextAfter: string | null;
|
|
33
|
+
tookMs: number;
|
|
34
|
+
coverage: {
|
|
35
|
+
complete: boolean;
|
|
36
|
+
indexedSegments: number;
|
|
37
|
+
scannedUploadedSegments: number;
|
|
38
|
+
scannedLocalSegments: number;
|
|
39
|
+
scannedWalRows: number;
|
|
40
|
+
possibleMissingUploadedSegments: number;
|
|
41
|
+
possibleMissingLocalSegments: number;
|
|
42
|
+
};
|
|
43
|
+
timing: {
|
|
44
|
+
lexiconRunGetMs: number;
|
|
45
|
+
lexiconDecodeMs: number;
|
|
46
|
+
lexiconEnumerateMs: number;
|
|
47
|
+
lexiconMergeMs: number;
|
|
48
|
+
fallbackScanMs: number;
|
|
49
|
+
fallbackSegmentGetMs: number;
|
|
50
|
+
fallbackWalScanMs: number;
|
|
51
|
+
lexiconRunsLoaded: number;
|
|
52
|
+
};
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
type LexiconIndexError = {
|
|
56
|
+
kind: "invalid_lexicon_index";
|
|
57
|
+
message: string;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
function invalidLexiconIndex<T = never>(message: string): Result<T, LexiconIndexError> {
|
|
61
|
+
return Result.err({ kind: "invalid_lexicon_index", message });
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function errorMessage(error: unknown): string {
|
|
65
|
+
return String((error as any)?.message ?? error);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function compareKeys(left: string, right: string): number {
|
|
69
|
+
return left < right ? -1 : left > right ? 1 : 0;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function nextLexiconTerm(view: RestartStringTableView, after: string | null): { ordinal: number; term: string | null } {
|
|
73
|
+
let ordinal = after == null ? 0 : view.lowerBoundOrdinal(after);
|
|
74
|
+
while (ordinal < view.count()) {
|
|
75
|
+
const term = view.termAt(ordinal);
|
|
76
|
+
if (term == null) break;
|
|
77
|
+
if (after == null || compareKeys(term, after) > 0) return { ordinal, term };
|
|
78
|
+
ordinal += 1;
|
|
79
|
+
}
|
|
80
|
+
return { ordinal: view.count(), term: null };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export class LexiconIndexManager {
|
|
84
|
+
private readonly span: number;
|
|
85
|
+
private readonly compactionFanout: number;
|
|
86
|
+
private readonly maxLevel: number;
|
|
87
|
+
private readonly retireGenWindow: number;
|
|
88
|
+
private readonly retireMinMs: number;
|
|
89
|
+
private readonly fileCache?: LexiconFileCache;
|
|
90
|
+
private readonly foregroundActivity?: ForegroundActivityTracker;
|
|
91
|
+
private readonly queue = new Set<string>();
|
|
92
|
+
private readonly building = new Set<string>();
|
|
93
|
+
private readonly compacting = new Set<string>();
|
|
94
|
+
private timer: any | null = null;
|
|
95
|
+
private running = false;
|
|
96
|
+
|
|
97
|
+
constructor(
|
|
98
|
+
private readonly cfg: Config,
|
|
99
|
+
private readonly db: SqliteDurableStore,
|
|
100
|
+
private readonly os: ObjectStore,
|
|
101
|
+
private readonly segmentCache: SegmentDiskCache | undefined,
|
|
102
|
+
private readonly publishManifest: ((stream: string) => Promise<void>) | undefined,
|
|
103
|
+
private readonly onMetadataChanged: ((stream: string) => void) | undefined,
|
|
104
|
+
private readonly metrics: Metrics | undefined,
|
|
105
|
+
private readonly registry: SchemaRegistryStore | undefined,
|
|
106
|
+
private readonly asyncGate: ConcurrencyGate,
|
|
107
|
+
foregroundActivity?: ForegroundActivityTracker
|
|
108
|
+
) {
|
|
109
|
+
this.span = cfg.indexL0SpanSegments;
|
|
110
|
+
this.compactionFanout = cfg.indexCompactionFanout;
|
|
111
|
+
this.maxLevel = cfg.indexMaxLevel;
|
|
112
|
+
this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
|
|
113
|
+
this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
|
|
114
|
+
this.foregroundActivity = foregroundActivity;
|
|
115
|
+
this.fileCache =
|
|
116
|
+
cfg.lexiconIndexCacheMaxBytes > 0
|
|
117
|
+
? new LexiconFileCache(`${cfg.rootDir}/cache/lexicon`, cfg.lexiconIndexCacheMaxBytes, cfg.lexiconMappedCacheEntries)
|
|
118
|
+
: undefined;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
private async yieldBackgroundWork(): Promise<void> {
|
|
122
|
+
if (this.foregroundActivity) {
|
|
123
|
+
await this.foregroundActivity.yieldBackgroundWork();
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
await yieldToEventLoop();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
start(): void {
|
|
130
|
+
if (this.span <= 0 || this.timer) return;
|
|
131
|
+
this.timer = setInterval(() => {
|
|
132
|
+
void this.tick();
|
|
133
|
+
}, this.cfg.indexCheckIntervalMs);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
stop(): void {
|
|
137
|
+
if (this.timer) clearInterval(this.timer);
|
|
138
|
+
this.timer = null;
|
|
139
|
+
this.fileCache?.clearMapped();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
enqueue(stream: string): void {
|
|
143
|
+
if (this.span <= 0) return;
|
|
144
|
+
this.queue.add(stream);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
getLocalCacheBytes(stream: string): number {
|
|
148
|
+
return this.fileCache?.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/lexicon/`) ?? 0;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
getMemoryStats(): {
|
|
152
|
+
fileCacheBytes: number;
|
|
153
|
+
fileCacheEntries: number;
|
|
154
|
+
mappedFileBytes: number;
|
|
155
|
+
mappedFileEntries: number;
|
|
156
|
+
pinnedFileEntries: number;
|
|
157
|
+
} {
|
|
158
|
+
const stats = this.fileCache?.stats();
|
|
159
|
+
return {
|
|
160
|
+
fileCacheBytes: stats?.usedBytes ?? 0,
|
|
161
|
+
fileCacheEntries: stats?.entryCount ?? 0,
|
|
162
|
+
mappedFileBytes: stats?.mappedBytes ?? 0,
|
|
163
|
+
mappedFileEntries: stats?.mappedEntryCount ?? 0,
|
|
164
|
+
pinnedFileEntries: stats?.pinnedEntryCount ?? 0,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async listRoutingKeysResult(stream: string, after: string | null, limit: number): Promise<Result<RoutingKeyLexiconListResult, LexiconIndexError>> {
|
|
169
|
+
const safeLimit = Math.max(1, Math.min(limit, 500));
|
|
170
|
+
const startedAt = Date.now();
|
|
171
|
+
const timing = {
|
|
172
|
+
lexiconRunGetMs: 0,
|
|
173
|
+
lexiconDecodeMs: 0,
|
|
174
|
+
lexiconEnumerateMs: 0,
|
|
175
|
+
lexiconMergeMs: 0,
|
|
176
|
+
fallbackScanMs: 0,
|
|
177
|
+
fallbackSegmentGetMs: 0,
|
|
178
|
+
fallbackWalScanMs: 0,
|
|
179
|
+
lexiconRunsLoaded: 0,
|
|
180
|
+
};
|
|
181
|
+
const sourceState = this.db.getLexiconIndexState(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
|
|
182
|
+
const uploadedSegmentCount = this.db.countUploadedSegments(stream);
|
|
183
|
+
const indexedThrough = Math.max(0, Math.min(sourceState?.indexed_through ?? 0, uploadedSegmentCount));
|
|
184
|
+
const fallbackScan = await this.scanFallbackKeysResult(stream, indexedThrough, uploadedSegmentCount, after, timing);
|
|
185
|
+
if (Result.isError(fallbackScan)) return fallbackScan;
|
|
186
|
+
|
|
187
|
+
const indexedRuns = this.db.listLexiconIndexRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
|
|
188
|
+
const indexedPage = await this.listKeysFromRunsResult(indexedRuns, after, safeLimit + 1, timing);
|
|
189
|
+
if (Result.isError(indexedPage)) return indexedPage;
|
|
190
|
+
|
|
191
|
+
const mergeStartedAt = Date.now();
|
|
192
|
+
const merged = mergeSortedUnique(indexedPage.value, fallbackScan.value.keys, safeLimit + 1);
|
|
193
|
+
timing.lexiconMergeMs += Date.now() - mergeStartedAt;
|
|
194
|
+
const keys = merged.length > safeLimit ? merged.slice(0, safeLimit) : merged;
|
|
195
|
+
const complete =
|
|
196
|
+
fallbackScan.value.possibleMissingUploadedSegments === 0 && fallbackScan.value.possibleMissingLocalSegments === 0;
|
|
197
|
+
const nextAfter = keys.length === 0 ? null : merged.length > safeLimit || !complete ? keys[keys.length - 1] ?? null : null;
|
|
198
|
+
return Result.ok({
|
|
199
|
+
keys,
|
|
200
|
+
nextAfter,
|
|
201
|
+
tookMs: Date.now() - startedAt,
|
|
202
|
+
coverage: {
|
|
203
|
+
complete,
|
|
204
|
+
indexedSegments: indexedThrough,
|
|
205
|
+
scannedUploadedSegments: fallbackScan.value.scannedUploadedSegments,
|
|
206
|
+
scannedLocalSegments: fallbackScan.value.scannedLocalSegments,
|
|
207
|
+
scannedWalRows: fallbackScan.value.scannedWalRows,
|
|
208
|
+
possibleMissingUploadedSegments: fallbackScan.value.possibleMissingUploadedSegments,
|
|
209
|
+
possibleMissingLocalSegments: fallbackScan.value.possibleMissingLocalSegments,
|
|
210
|
+
},
|
|
211
|
+
timing,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
private async tick(): Promise<void> {
|
|
216
|
+
if (this.running) return;
|
|
217
|
+
this.running = true;
|
|
218
|
+
try {
|
|
219
|
+
const streams = Array.from(this.queue);
|
|
220
|
+
this.queue.clear();
|
|
221
|
+
for (const stream of streams) {
|
|
222
|
+
if (!this.isRoutingLexiconConfigured(stream)) {
|
|
223
|
+
const hadState =
|
|
224
|
+
this.db.getLexiconIndexState(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME) != null ||
|
|
225
|
+
this.db.listLexiconIndexRunsAll(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME).length > 0;
|
|
226
|
+
if (hadState) {
|
|
227
|
+
this.db.deleteLexiconIndexSource(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
|
|
228
|
+
this.onMetadataChanged?.(stream);
|
|
229
|
+
if (this.publishManifest) {
|
|
230
|
+
try {
|
|
231
|
+
await this.publishManifest(stream);
|
|
232
|
+
} catch {
|
|
233
|
+
// retry on next enqueue
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
const buildRes = await this.maybeBuildRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
|
|
240
|
+
if (Result.isError(buildRes)) {
|
|
241
|
+
// eslint-disable-next-line no-console
|
|
242
|
+
console.error("lexicon build failed", stream, buildRes.error.message);
|
|
243
|
+
this.queue.add(stream);
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
const compactRes = await this.maybeCompactRuns(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME);
|
|
247
|
+
if (Result.isError(compactRes)) {
|
|
248
|
+
// eslint-disable-next-line no-console
|
|
249
|
+
console.error("lexicon compaction failed", stream, compactRes.error.message);
|
|
250
|
+
this.queue.add(stream);
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
} finally {
|
|
255
|
+
this.running = false;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
private async maybeBuildRuns(
|
|
260
|
+
stream: string,
|
|
261
|
+
sourceKind: string,
|
|
262
|
+
sourceName: string
|
|
263
|
+
): Promise<Result<void, LexiconIndexError>> {
|
|
264
|
+
if (this.building.has(stream)) return Result.ok(undefined);
|
|
265
|
+
this.building.add(stream);
|
|
266
|
+
try {
|
|
267
|
+
return await this.asyncGate.run(async () => {
|
|
268
|
+
let state = this.db.getLexiconIndexState(stream, sourceKind, sourceName);
|
|
269
|
+
if (!state) {
|
|
270
|
+
this.db.upsertLexiconIndexState(stream, sourceKind, sourceName, 0);
|
|
271
|
+
state = this.db.getLexiconIndexState(stream, sourceKind, sourceName);
|
|
272
|
+
}
|
|
273
|
+
if (!state) return Result.ok(undefined);
|
|
274
|
+
const uploadedCount = this.db.countUploadedSegments(stream);
|
|
275
|
+
if (uploadedCount < state.indexed_through + this.span) return Result.ok(undefined);
|
|
276
|
+
const startSegment = state.indexed_through;
|
|
277
|
+
const endSegment = startSegment + this.span - 1;
|
|
278
|
+
const segments: SegmentRow[] = [];
|
|
279
|
+
for (let segmentIndex = startSegment; segmentIndex <= endSegment; segmentIndex += 1) {
|
|
280
|
+
const segment = this.db.getSegmentByIndex(stream, segmentIndex);
|
|
281
|
+
if (!segment || !segment.r2_etag) return Result.ok(undefined);
|
|
282
|
+
segments.push(segment);
|
|
283
|
+
}
|
|
284
|
+
const runRes = await this.buildL0RunResult(stream, sourceKind, sourceName, startSegment, segments);
|
|
285
|
+
if (Result.isError(runRes)) return runRes;
|
|
286
|
+
const persistRes = await this.persistRunResult(runRes.value, stream);
|
|
287
|
+
if (Result.isError(persistRes)) return persistRes;
|
|
288
|
+
this.db.insertLexiconIndexRun({
|
|
289
|
+
run_id: runRes.value.meta.runId,
|
|
290
|
+
stream,
|
|
291
|
+
source_kind: sourceKind,
|
|
292
|
+
source_name: sourceName,
|
|
293
|
+
level: runRes.value.meta.level,
|
|
294
|
+
start_segment: runRes.value.meta.startSegment,
|
|
295
|
+
end_segment: runRes.value.meta.endSegment,
|
|
296
|
+
object_key: runRes.value.meta.objectKey,
|
|
297
|
+
size_bytes: persistRes.value,
|
|
298
|
+
record_count: runRes.value.meta.recordCount,
|
|
299
|
+
});
|
|
300
|
+
this.db.updateLexiconIndexedThrough(stream, sourceKind, sourceName, endSegment + 1);
|
|
301
|
+
this.onMetadataChanged?.(stream);
|
|
302
|
+
if (this.publishManifest) {
|
|
303
|
+
try {
|
|
304
|
+
await this.publishManifest(stream);
|
|
305
|
+
} catch {
|
|
306
|
+
// retry on next publish
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
if (this.db.countUploadedSegments(stream) >= endSegment + 1 + this.span) {
|
|
310
|
+
this.queue.add(stream);
|
|
311
|
+
}
|
|
312
|
+
return Result.ok(undefined);
|
|
313
|
+
});
|
|
314
|
+
} catch (error) {
|
|
315
|
+
return invalidLexiconIndex(errorMessage(error));
|
|
316
|
+
} finally {
|
|
317
|
+
this.building.delete(stream);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
private async maybeCompactRuns(
|
|
322
|
+
stream: string,
|
|
323
|
+
sourceKind: string,
|
|
324
|
+
sourceName: string
|
|
325
|
+
): Promise<Result<void, LexiconIndexError>> {
|
|
326
|
+
if (this.compactionFanout <= 1) return Result.ok(undefined);
|
|
327
|
+
if (this.compacting.has(stream)) return Result.ok(undefined);
|
|
328
|
+
if (this.foregroundActivity?.wasActiveWithin(2000)) {
|
|
329
|
+
this.queue.add(stream);
|
|
330
|
+
return Result.ok(undefined);
|
|
331
|
+
}
|
|
332
|
+
this.compacting.add(stream);
|
|
333
|
+
try {
|
|
334
|
+
return await this.asyncGate.run(async () => {
|
|
335
|
+
const group = this.findCompactionGroup(stream, sourceKind, sourceName);
|
|
336
|
+
if (!group) {
|
|
337
|
+
await this.gcRetiredRuns(stream, sourceKind, sourceName);
|
|
338
|
+
return Result.ok(undefined);
|
|
339
|
+
}
|
|
340
|
+
const runRes = await this.buildCompactedRunResult(stream, sourceKind, sourceName, group.level + 1, group.runs);
|
|
341
|
+
if (Result.isError(runRes)) return runRes;
|
|
342
|
+
const persistRes = await this.persistRunResult(runRes.value, stream);
|
|
343
|
+
if (Result.isError(persistRes)) return persistRes;
|
|
344
|
+
this.db.insertLexiconIndexRun({
|
|
345
|
+
run_id: runRes.value.meta.runId,
|
|
346
|
+
stream,
|
|
347
|
+
source_kind: sourceKind,
|
|
348
|
+
source_name: sourceName,
|
|
349
|
+
level: runRes.value.meta.level,
|
|
350
|
+
start_segment: runRes.value.meta.startSegment,
|
|
351
|
+
end_segment: runRes.value.meta.endSegment,
|
|
352
|
+
object_key: runRes.value.meta.objectKey,
|
|
353
|
+
size_bytes: persistRes.value,
|
|
354
|
+
record_count: runRes.value.meta.recordCount,
|
|
355
|
+
});
|
|
356
|
+
const state = this.db.getLexiconIndexState(stream, sourceKind, sourceName);
|
|
357
|
+
if (state && runRes.value.meta.endSegment + 1 > state.indexed_through) {
|
|
358
|
+
this.db.updateLexiconIndexedThrough(stream, sourceKind, sourceName, runRes.value.meta.endSegment + 1);
|
|
359
|
+
}
|
|
360
|
+
const manifestRow = this.db.getManifestRow(stream);
|
|
361
|
+
this.db.retireLexiconIndexRuns(group.runs.map((run) => run.run_id), manifestRow.generation + 1, this.db.nowMs());
|
|
362
|
+
this.onMetadataChanged?.(stream);
|
|
363
|
+
if (this.publishManifest) {
|
|
364
|
+
try {
|
|
365
|
+
await this.publishManifest(stream);
|
|
366
|
+
} catch {
|
|
367
|
+
// retry on next publish
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
await this.gcRetiredRuns(stream, sourceKind, sourceName);
|
|
371
|
+
this.queue.add(stream);
|
|
372
|
+
return Result.ok(undefined);
|
|
373
|
+
});
|
|
374
|
+
} catch (error) {
|
|
375
|
+
return invalidLexiconIndex(errorMessage(error));
|
|
376
|
+
} finally {
|
|
377
|
+
this.compacting.delete(stream);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
private findCompactionGroup(stream: string, sourceKind: string, sourceName: string): { level: number; runs: LexiconIndexRunRow[] } | null {
|
|
382
|
+
const runs = this.db.listLexiconIndexRuns(stream, sourceKind, sourceName);
|
|
383
|
+
if (runs.length < this.compactionFanout) return null;
|
|
384
|
+
const byLevel = new Map<number, LexiconIndexRunRow[]>();
|
|
385
|
+
for (const run of runs) {
|
|
386
|
+
const entries = byLevel.get(run.level) ?? [];
|
|
387
|
+
entries.push(run);
|
|
388
|
+
byLevel.set(run.level, entries);
|
|
389
|
+
}
|
|
390
|
+
for (let level = 0; level <= this.maxLevel; level += 1) {
|
|
391
|
+
const levelRuns = byLevel.get(level);
|
|
392
|
+
if (!levelRuns || levelRuns.length < this.compactionFanout) continue;
|
|
393
|
+
const span = this.levelSpan(level);
|
|
394
|
+
for (let offset = 0; offset + this.compactionFanout <= levelRuns.length; offset += 1) {
|
|
395
|
+
const baseStart = levelRuns[offset]!.start_segment;
|
|
396
|
+
let matches = true;
|
|
397
|
+
for (let i = 0; i < this.compactionFanout; i += 1) {
|
|
398
|
+
const run = levelRuns[offset + i]!;
|
|
399
|
+
const expectedStart = baseStart + i * span;
|
|
400
|
+
if (run.level !== level || run.start_segment !== expectedStart || run.end_segment !== expectedStart + span - 1) {
|
|
401
|
+
matches = false;
|
|
402
|
+
break;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
if (matches) return { level, runs: levelRuns.slice(offset, offset + this.compactionFanout) };
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
return null;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
private levelSpan(level: number): number {
|
|
412
|
+
let span = this.span;
|
|
413
|
+
for (let i = 0; i < level; i += 1) span *= this.compactionFanout;
|
|
414
|
+
return span;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
private async buildL0RunResult(
|
|
418
|
+
stream: string,
|
|
419
|
+
sourceKind: string,
|
|
420
|
+
sourceName: string,
|
|
421
|
+
startSegment: number,
|
|
422
|
+
segments: SegmentRow[]
|
|
423
|
+
): Promise<Result<LexiconRun, LexiconIndexError>> {
|
|
424
|
+
const keys = new Set<string>();
|
|
425
|
+
for (const segment of segments) {
|
|
426
|
+
const segmentBytesRes = await this.loadSegmentBytesResult(segment);
|
|
427
|
+
if (Result.isError(segmentBytesRes)) return segmentBytesRes;
|
|
428
|
+
let processedRecords = 0;
|
|
429
|
+
for (const recordRes of iterateBlockRecordsResult(segmentBytesRes.value)) {
|
|
430
|
+
if (Result.isError(recordRes)) return invalidLexiconIndex(recordRes.error.message);
|
|
431
|
+
if (recordRes.value.routingKey.byteLength === 0) continue;
|
|
432
|
+
keys.add(TEXT_DECODER.decode(recordRes.value.routingKey));
|
|
433
|
+
processedRecords += 1;
|
|
434
|
+
if (processedRecords % 256 === 0) {
|
|
435
|
+
await this.yieldBackgroundWork();
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
await this.yieldBackgroundWork();
|
|
439
|
+
}
|
|
440
|
+
return Result.ok(this.createRun(stream, sourceKind, sourceName, 0, startSegment, startSegment + this.span - 1, Array.from(keys).sort(compareKeys)));
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
private async buildCompactedRunResult(
|
|
444
|
+
stream: string,
|
|
445
|
+
sourceKind: string,
|
|
446
|
+
sourceName: string,
|
|
447
|
+
level: number,
|
|
448
|
+
runs: LexiconIndexRunRow[]
|
|
449
|
+
): Promise<Result<LexiconRun, LexiconIndexError>> {
|
|
450
|
+
const merged = await this.listKeysFromRunsResult(runs, null, Number.MAX_SAFE_INTEGER, {
|
|
451
|
+
lexiconRunGetMs: 0,
|
|
452
|
+
lexiconDecodeMs: 0,
|
|
453
|
+
lexiconEnumerateMs: 0,
|
|
454
|
+
lexiconMergeMs: 0,
|
|
455
|
+
fallbackScanMs: 0,
|
|
456
|
+
fallbackSegmentGetMs: 0,
|
|
457
|
+
fallbackWalScanMs: 0,
|
|
458
|
+
lexiconRunsLoaded: 0,
|
|
459
|
+
});
|
|
460
|
+
if (Result.isError(merged)) return merged;
|
|
461
|
+
return Result.ok(
|
|
462
|
+
this.createRun(
|
|
463
|
+
stream,
|
|
464
|
+
sourceKind,
|
|
465
|
+
sourceName,
|
|
466
|
+
level,
|
|
467
|
+
runs[0]!.start_segment,
|
|
468
|
+
runs[runs.length - 1]!.end_segment,
|
|
469
|
+
merged.value
|
|
470
|
+
)
|
|
471
|
+
);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
private createRun(
|
|
475
|
+
stream: string,
|
|
476
|
+
sourceKind: string,
|
|
477
|
+
sourceName: string,
|
|
478
|
+
level: number,
|
|
479
|
+
startSegment: number,
|
|
480
|
+
endSegment: number,
|
|
481
|
+
keys: string[]
|
|
482
|
+
): LexiconRun {
|
|
483
|
+
const streamHash = streamHash16Hex(stream);
|
|
484
|
+
const runId = `${sourceKind}-${sourceName || "default"}-l${level}-${startSegment.toString().padStart(16, "0")}-${endSegment
|
|
485
|
+
.toString()
|
|
486
|
+
.padStart(16, "0")}-${Date.now()}`;
|
|
487
|
+
const objectKey = lexiconRunObjectKey(streamHash, sourceKind, sourceName, runId);
|
|
488
|
+
const payloadBytes = buildLexiconRunPayload(keys);
|
|
489
|
+
return {
|
|
490
|
+
meta: {
|
|
491
|
+
runId,
|
|
492
|
+
level,
|
|
493
|
+
startSegment,
|
|
494
|
+
endSegment,
|
|
495
|
+
objectKey,
|
|
496
|
+
recordCount: keys.length,
|
|
497
|
+
},
|
|
498
|
+
payloadBytes,
|
|
499
|
+
terms: new RestartStringTableView(payloadBytes),
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
private async persistRunResult(run: LexiconRun, stream: string): Promise<Result<number, LexiconIndexError>> {
|
|
504
|
+
const payloadRes = encodeLexiconRunResult(run);
|
|
505
|
+
if (Result.isError(payloadRes)) return invalidLexiconIndex(payloadRes.error.message);
|
|
506
|
+
const payload = payloadRes.value;
|
|
507
|
+
try {
|
|
508
|
+
await retry(
|
|
509
|
+
() => this.os.put(run.meta.objectKey, payload, { contentLength: payload.byteLength }),
|
|
510
|
+
{
|
|
511
|
+
retries: this.cfg.objectStoreRetries,
|
|
512
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
513
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
514
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
515
|
+
}
|
|
516
|
+
);
|
|
517
|
+
this.fileCache?.storeBytesResult(run.meta.objectKey, payload);
|
|
518
|
+
this.metrics?.record("tieredstore.lexicon.bytes.written", payload.byteLength, "bytes", { source: ROUTING_KEY_SOURCE_KIND }, stream);
|
|
519
|
+
return Result.ok(payload.byteLength);
|
|
520
|
+
} catch (error) {
|
|
521
|
+
return invalidLexiconIndex(errorMessage(error));
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
private async listKeysFromRunsResult(
|
|
526
|
+
runs: LexiconIndexRunRow[],
|
|
527
|
+
after: string | null,
|
|
528
|
+
limit: number,
|
|
529
|
+
timing: RoutingKeyLexiconListResult["timing"]
|
|
530
|
+
): Promise<Result<string[], LexiconIndexError>> {
|
|
531
|
+
const enumerateStartedAt = Date.now();
|
|
532
|
+
const cursors: Array<{ run: LexiconRun; ordinal: number; current: string | null }> = [];
|
|
533
|
+
for (const meta of runs) {
|
|
534
|
+
const runRes = await this.loadRunResult(meta, timing);
|
|
535
|
+
if (Result.isError(runRes)) return runRes;
|
|
536
|
+
if (!runRes.value) continue;
|
|
537
|
+
const next = nextLexiconTerm(runRes.value.terms, after);
|
|
538
|
+
cursors.push({ run: runRes.value, ordinal: next.ordinal, current: next.term });
|
|
539
|
+
}
|
|
540
|
+
const results: string[] = [];
|
|
541
|
+
let lastValue: string | null = null;
|
|
542
|
+
let emittedSinceYield = 0;
|
|
543
|
+
while (results.length < limit) {
|
|
544
|
+
let smallest: string | null = null;
|
|
545
|
+
for (const cursor of cursors) {
|
|
546
|
+
if (cursor.current == null) continue;
|
|
547
|
+
if (smallest == null || compareKeys(cursor.current, smallest) < 0) smallest = cursor.current;
|
|
548
|
+
}
|
|
549
|
+
if (smallest == null) break;
|
|
550
|
+
if (smallest !== lastValue) {
|
|
551
|
+
results.push(smallest);
|
|
552
|
+
lastValue = smallest;
|
|
553
|
+
}
|
|
554
|
+
for (const cursor of cursors) {
|
|
555
|
+
while (cursor.current != null && cursor.current === smallest) {
|
|
556
|
+
cursor.ordinal += 1;
|
|
557
|
+
cursor.current = cursor.ordinal < cursor.run.terms.count() ? cursor.run.terms.termAt(cursor.ordinal) : null;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
emittedSinceYield += 1;
|
|
561
|
+
if (emittedSinceYield >= 256) {
|
|
562
|
+
emittedSinceYield = 0;
|
|
563
|
+
await this.yieldBackgroundWork();
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
timing.lexiconEnumerateMs += Date.now() - enumerateStartedAt;
|
|
567
|
+
return Result.ok(results);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
private async loadRunResult(
|
|
571
|
+
meta: LexiconIndexRunRow,
|
|
572
|
+
timing: RoutingKeyLexiconListResult["timing"]
|
|
573
|
+
): Promise<Result<LexiconRun | null, LexiconIndexError>> {
|
|
574
|
+
try {
|
|
575
|
+
let bytes: Uint8Array;
|
|
576
|
+
const runGetStartedAt = Date.now();
|
|
577
|
+
if (this.fileCache) {
|
|
578
|
+
const mappedRes = await this.fileCache.loadMappedFileResult({
|
|
579
|
+
objectKey: meta.object_key,
|
|
580
|
+
expectedSize: meta.size_bytes,
|
|
581
|
+
loadBytes: () =>
|
|
582
|
+
retry(
|
|
583
|
+
async () => {
|
|
584
|
+
const data = await this.os.get(meta.object_key);
|
|
585
|
+
if (!data) throw dsError(`missing lexicon run ${meta.object_key}`);
|
|
586
|
+
return data;
|
|
587
|
+
},
|
|
588
|
+
{
|
|
589
|
+
retries: this.cfg.objectStoreRetries,
|
|
590
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
591
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
592
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
593
|
+
}
|
|
594
|
+
),
|
|
595
|
+
});
|
|
596
|
+
if (Result.isError(mappedRes)) return invalidLexiconIndex(mappedRes.error.message);
|
|
597
|
+
bytes = mappedRes.value.bytes;
|
|
598
|
+
} else {
|
|
599
|
+
bytes = await retry(
|
|
600
|
+
async () => {
|
|
601
|
+
const data = await this.os.get(meta.object_key);
|
|
602
|
+
if (!data) throw dsError(`missing lexicon run ${meta.object_key}`);
|
|
603
|
+
return data;
|
|
604
|
+
},
|
|
605
|
+
{
|
|
606
|
+
retries: this.cfg.objectStoreRetries,
|
|
607
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
608
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
609
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
610
|
+
}
|
|
611
|
+
);
|
|
612
|
+
}
|
|
613
|
+
timing.lexiconRunGetMs += Date.now() - runGetStartedAt;
|
|
614
|
+
const decodeStartedAt = Date.now();
|
|
615
|
+
const runRes = decodeLexiconRunResult(bytes);
|
|
616
|
+
if (Result.isError(runRes)) return invalidLexiconIndex(runRes.error.message);
|
|
617
|
+
timing.lexiconDecodeMs += Date.now() - decodeStartedAt;
|
|
618
|
+
timing.lexiconRunsLoaded += 1;
|
|
619
|
+
const run = runRes.value;
|
|
620
|
+
run.meta.runId = meta.run_id;
|
|
621
|
+
run.meta.level = meta.level;
|
|
622
|
+
run.meta.startSegment = meta.start_segment;
|
|
623
|
+
run.meta.endSegment = meta.end_segment;
|
|
624
|
+
run.meta.objectKey = meta.object_key;
|
|
625
|
+
run.meta.recordCount = meta.record_count;
|
|
626
|
+
this.metrics?.record("tieredstore.lexicon.bytes.read", bytes.byteLength, "bytes", { source: ROUTING_KEY_SOURCE_KIND }, meta.stream);
|
|
627
|
+
return Result.ok(run);
|
|
628
|
+
} catch (error) {
|
|
629
|
+
return invalidLexiconIndex(errorMessage(error));
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
private async scanFallbackKeysResult(
|
|
634
|
+
stream: string,
|
|
635
|
+
indexedThrough: number,
|
|
636
|
+
uploadedSegmentCount: number,
|
|
637
|
+
after: string | null,
|
|
638
|
+
timing: RoutingKeyLexiconListResult["timing"]
|
|
639
|
+
): Promise<
|
|
640
|
+
Result<
|
|
641
|
+
{
|
|
642
|
+
keys: string[];
|
|
643
|
+
scannedUploadedSegments: number;
|
|
644
|
+
scannedLocalSegments: number;
|
|
645
|
+
scannedWalRows: number;
|
|
646
|
+
possibleMissingUploadedSegments: number;
|
|
647
|
+
possibleMissingLocalSegments: number;
|
|
648
|
+
},
|
|
649
|
+
LexiconIndexError
|
|
650
|
+
>
|
|
651
|
+
> {
|
|
652
|
+
const startedAt = Date.now();
|
|
653
|
+
const streamRow = this.db.getStream(stream);
|
|
654
|
+
if (!streamRow) return invalidLexiconIndex(`missing stream ${stream}`);
|
|
655
|
+
const segmentCount = this.db.countSegmentsForStream(stream);
|
|
656
|
+
const fallbackKeys = new Set<string>();
|
|
657
|
+
let scannedUploadedSegments = 0;
|
|
658
|
+
let scannedLocalSegments = 0;
|
|
659
|
+
const shouldScanUploadedSegments = indexedThrough === 0;
|
|
660
|
+
const segmentScanLimit = 1;
|
|
661
|
+
let scannedSegments = 0;
|
|
662
|
+
const fallbackStartSegment = shouldScanUploadedSegments ? indexedThrough : uploadedSegmentCount;
|
|
663
|
+
for (let segmentIndex = fallbackStartSegment; segmentIndex < segmentCount; segmentIndex += 1) {
|
|
664
|
+
if (scannedSegments >= segmentScanLimit) break;
|
|
665
|
+
const segment = this.db.getSegmentByIndex(stream, segmentIndex);
|
|
666
|
+
if (!segment) continue;
|
|
667
|
+
const segmentGetStartedAt = Date.now();
|
|
668
|
+
const bytesRes = await this.loadSegmentBytesResult(segment);
|
|
669
|
+
if (Result.isError(bytesRes)) return bytesRes;
|
|
670
|
+
timing.fallbackSegmentGetMs += Date.now() - segmentGetStartedAt;
|
|
671
|
+
for (const recordRes of iterateBlockRecordsResult(bytesRes.value)) {
|
|
672
|
+
if (Result.isError(recordRes)) return invalidLexiconIndex(recordRes.error.message);
|
|
673
|
+
if (recordRes.value.routingKey.byteLength === 0) continue;
|
|
674
|
+
const key = TEXT_DECODER.decode(recordRes.value.routingKey);
|
|
675
|
+
if (after != null && compareKeys(key, after) <= 0) continue;
|
|
676
|
+
fallbackKeys.add(key);
|
|
677
|
+
}
|
|
678
|
+
if (segmentIndex < uploadedSegmentCount) scannedUploadedSegments += 1;
|
|
679
|
+
else scannedLocalSegments += 1;
|
|
680
|
+
scannedSegments += 1;
|
|
681
|
+
await this.yieldBackgroundWork();
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
let scannedWalRows = 0;
|
|
685
|
+
const walStart = streamRow.sealed_through + 1n;
|
|
686
|
+
const walEnd = streamRow.next_offset - 1n;
|
|
687
|
+
if (walStart <= walEnd) {
|
|
688
|
+
const walStartedAt = Date.now();
|
|
689
|
+
for (const row of this.db.iterWalRange(stream, walStart, walEnd)) {
|
|
690
|
+
scannedWalRows += 1;
|
|
691
|
+
const routingKey = row.routing_key == null ? null : row.routing_key instanceof Uint8Array ? row.routing_key : new Uint8Array(row.routing_key);
|
|
692
|
+
if (!routingKey || routingKey.byteLength === 0) continue;
|
|
693
|
+
const key = TEXT_DECODER.decode(routingKey);
|
|
694
|
+
if (after != null && compareKeys(key, after) <= 0) continue;
|
|
695
|
+
fallbackKeys.add(key);
|
|
696
|
+
}
|
|
697
|
+
timing.fallbackWalScanMs += Date.now() - walStartedAt;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
const totalUncoveredUploadedSegments = Math.max(0, uploadedSegmentCount - indexedThrough);
|
|
701
|
+
const totalUncoveredLocalSegments = Math.max(0, segmentCount - uploadedSegmentCount);
|
|
702
|
+
timing.fallbackScanMs += Date.now() - startedAt;
|
|
703
|
+
|
|
704
|
+
return Result.ok({
|
|
705
|
+
keys: Array.from(fallbackKeys).sort(compareKeys),
|
|
706
|
+
scannedUploadedSegments,
|
|
707
|
+
scannedLocalSegments,
|
|
708
|
+
scannedWalRows,
|
|
709
|
+
possibleMissingUploadedSegments: Math.max(0, totalUncoveredUploadedSegments - scannedUploadedSegments),
|
|
710
|
+
possibleMissingLocalSegments: Math.max(0, totalUncoveredLocalSegments - scannedLocalSegments),
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
private async gcRetiredRuns(stream: string, sourceKind: string, sourceName: string): Promise<void> {
|
|
715
|
+
const retiredRuns = this.db.listRetiredLexiconIndexRuns(stream, sourceKind, sourceName);
|
|
716
|
+
if (retiredRuns.length === 0) return;
|
|
717
|
+
const manifest = this.db.getManifestRow(stream);
|
|
718
|
+
const nowMs = this.db.nowMs();
|
|
719
|
+
const cutoffGen =
|
|
720
|
+
this.retireGenWindow > 0 && manifest.generation > this.retireGenWindow ? manifest.generation - this.retireGenWindow : 0;
|
|
721
|
+
const deletions = retiredRuns.filter((run) => {
|
|
722
|
+
const expiredByGen = run.retired_gen != null && run.retired_gen > 0 && run.retired_gen <= cutoffGen;
|
|
723
|
+
const expiredByTtl = run.retired_at_ms != null && run.retired_at_ms + BigInt(this.retireMinMs) <= nowMs;
|
|
724
|
+
return expiredByGen || expiredByTtl;
|
|
725
|
+
});
|
|
726
|
+
if (deletions.length === 0) return;
|
|
727
|
+
for (const run of deletions) {
|
|
728
|
+
try {
|
|
729
|
+
await this.os.delete(run.object_key);
|
|
730
|
+
} catch {
|
|
731
|
+
// best effort
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
this.db.deleteLexiconIndexRuns(deletions.map((run) => run.run_id));
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
private isRoutingLexiconConfigured(stream: string): boolean {
|
|
738
|
+
if (!this.registry) return false;
|
|
739
|
+
const registryRes = this.registry.getRegistryResult(stream);
|
|
740
|
+
if (Result.isError(registryRes)) return false;
|
|
741
|
+
return registryRes.value.routingKey != null;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, LexiconIndexError>> {
|
|
745
|
+
try {
|
|
746
|
+
const bytes = await loadSegmentBytesCached(
|
|
747
|
+
this.os,
|
|
748
|
+
seg,
|
|
749
|
+
this.segmentCache,
|
|
750
|
+
{
|
|
751
|
+
retries: this.cfg.objectStoreRetries,
|
|
752
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
753
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
754
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
755
|
+
}
|
|
756
|
+
);
|
|
757
|
+
return Result.ok(bytes);
|
|
758
|
+
} catch (error) {
|
|
759
|
+
return invalidLexiconIndex(errorMessage(error));
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
function mergeSortedUnique(left: string[], right: string[], limit: number): string[] {
|
|
765
|
+
const merged: string[] = [];
|
|
766
|
+
let li = 0;
|
|
767
|
+
let ri = 0;
|
|
768
|
+
let last: string | null = null;
|
|
769
|
+
while (merged.length < limit && (li < left.length || ri < right.length)) {
|
|
770
|
+
let next: string;
|
|
771
|
+
if (li >= left.length) {
|
|
772
|
+
next = right[ri++]!;
|
|
773
|
+
} else if (ri >= right.length) {
|
|
774
|
+
next = left[li++]!;
|
|
775
|
+
} else {
|
|
776
|
+
const cmp = compareKeys(left[li]!, right[ri]!);
|
|
777
|
+
if (cmp <= 0) {
|
|
778
|
+
next = left[li++]!;
|
|
779
|
+
if (cmp === 0) ri += 1;
|
|
780
|
+
} else {
|
|
781
|
+
next = right[ri++]!;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
if (next === last) continue;
|
|
785
|
+
merged.push(next);
|
|
786
|
+
last = next;
|
|
787
|
+
}
|
|
788
|
+
return merged;
|
|
789
|
+
}
|