@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
package/src/index/indexer.ts
CHANGED
|
@@ -5,7 +5,8 @@ import type { Config } from "../config";
|
|
|
5
5
|
import type { IndexRunRow, SegmentRow, SqliteDurableStore } from "../db/db";
|
|
6
6
|
import type { ObjectStore } from "../objectstore/interface";
|
|
7
7
|
import { SegmentDiskCache } from "../segment/cache";
|
|
8
|
-
import {
|
|
8
|
+
import { loadSegmentBytesCached } from "../segment/cached_segment";
|
|
9
|
+
import { iterateBlockRecordsResult } from "../segment/format";
|
|
9
10
|
import { siphash24 } from "../util/siphash";
|
|
10
11
|
import { retry } from "../util/retry";
|
|
11
12
|
import { indexRunObjectKey, segmentObjectKey, streamHash16Hex } from "../util/stream_paths";
|
|
@@ -14,9 +15,46 @@ import { decodeIndexRunResult, encodeIndexRunResult, RUN_TYPE_MASK16, RUN_TYPE_P
|
|
|
14
15
|
import { IndexRunCache } from "./run_cache";
|
|
15
16
|
import type { Metrics } from "../metrics";
|
|
16
17
|
import { dsError } from "../util/ds_error.ts";
|
|
18
|
+
import { yieldToEventLoop } from "../util/yield";
|
|
19
|
+
import { RuntimeMemorySampler } from "../runtime_memory_sampler";
|
|
20
|
+
import { ConcurrencyGate } from "../concurrency_gate";
|
|
21
|
+
import type { ForegroundActivityTracker } from "../foreground_activity";
|
|
22
|
+
import type { AggSectionView } from "../search/agg_format";
|
|
23
|
+
import type { ColSectionView } from "../search/col_format";
|
|
24
|
+
import type { FtsSectionView } from "../search/fts_format";
|
|
25
|
+
import type { MetricsBlockSectionView } from "../profiles/metrics/block_format";
|
|
26
|
+
import type { SchemaRegistryStore } from "../schema/registry";
|
|
27
|
+
import type { RoutingKeyLexiconListResult } from "./lexicon_indexer";
|
|
17
28
|
|
|
18
29
|
export type IndexCandidate = { segments: Set<number>; indexedThrough: number };
|
|
19
30
|
type IndexBuildError = { kind: "invalid_index_build"; message: string };
|
|
31
|
+
export type CompanionSectionLookupStats = {
|
|
32
|
+
sectionGetMs: number;
|
|
33
|
+
decodeMs: number;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export type StreamIndexLookup = {
|
|
37
|
+
start(): void;
|
|
38
|
+
stop(): void;
|
|
39
|
+
enqueue(stream: string): void;
|
|
40
|
+
candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise<IndexCandidate | null>;
|
|
41
|
+
candidateSegmentsForSecondaryIndex(stream: string, indexName: string, keyBytes: Uint8Array): Promise<IndexCandidate | null>;
|
|
42
|
+
getAggSegmentCompanion(stream: string, segmentIndex: number): Promise<AggSectionView | null>;
|
|
43
|
+
getColSegmentCompanion(stream: string, segmentIndex: number): Promise<ColSectionView | null>;
|
|
44
|
+
getFtsSegmentCompanion(stream: string, segmentIndex: number): Promise<FtsSectionView | null>;
|
|
45
|
+
getFtsSegmentCompanionWithStats?(
|
|
46
|
+
stream: string,
|
|
47
|
+
segmentIndex: number
|
|
48
|
+
): Promise<{ companion: FtsSectionView | null; stats: CompanionSectionLookupStats }>;
|
|
49
|
+
getMetricsBlockSegmentCompanion(stream: string, segmentIndex: number): Promise<MetricsBlockSectionView | null>;
|
|
50
|
+
listRoutingKeysResult?(stream: string, after: string | null, limit: number): Promise<Result<RoutingKeyLexiconListResult, { kind: string; message: string }>>;
|
|
51
|
+
getLocalStorageUsage?(stream: string): {
|
|
52
|
+
routing_index_cache_bytes: number;
|
|
53
|
+
exact_index_cache_bytes: number;
|
|
54
|
+
companion_cache_bytes: number;
|
|
55
|
+
lexicon_index_cache_bytes: number;
|
|
56
|
+
};
|
|
57
|
+
};
|
|
20
58
|
|
|
21
59
|
function invalidIndexBuild<T = never>(message: string): Result<T, IndexBuildError> {
|
|
22
60
|
return Result.err({ kind: "invalid_index_build", message });
|
|
@@ -54,6 +92,11 @@ export class IndexManager {
|
|
|
54
92
|
private timer: any | null = null;
|
|
55
93
|
private running = false;
|
|
56
94
|
private readonly publishManifest?: (stream: string) => Promise<void>;
|
|
95
|
+
private readonly onMetadataChanged?: (stream: string) => void;
|
|
96
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
97
|
+
private readonly registry?: SchemaRegistryStore;
|
|
98
|
+
private readonly asyncGate: ConcurrencyGate;
|
|
99
|
+
private readonly foregroundActivity?: ForegroundActivityTracker;
|
|
57
100
|
|
|
58
101
|
constructor(
|
|
59
102
|
cfg: Config,
|
|
@@ -61,7 +104,12 @@ export class IndexManager {
|
|
|
61
104
|
os: ObjectStore,
|
|
62
105
|
segmentCache: SegmentDiskCache | undefined,
|
|
63
106
|
publishManifest?: (stream: string) => Promise<void>,
|
|
64
|
-
metrics?: Metrics
|
|
107
|
+
metrics?: Metrics,
|
|
108
|
+
onMetadataChanged?: (stream: string) => void,
|
|
109
|
+
memorySampler?: RuntimeMemorySampler,
|
|
110
|
+
registry?: SchemaRegistryStore,
|
|
111
|
+
asyncGate?: ConcurrencyGate,
|
|
112
|
+
foregroundActivity?: ForegroundActivityTracker
|
|
65
113
|
) {
|
|
66
114
|
this.cfg = cfg;
|
|
67
115
|
this.db = db;
|
|
@@ -76,10 +124,23 @@ export class IndexManager {
|
|
|
76
124
|
this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
|
|
77
125
|
this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
|
|
78
126
|
this.metrics = metrics;
|
|
127
|
+
this.onMetadataChanged = onMetadataChanged;
|
|
128
|
+
this.memorySampler = memorySampler;
|
|
129
|
+
this.registry = registry;
|
|
130
|
+
this.asyncGate = asyncGate ?? new ConcurrencyGate(1);
|
|
131
|
+
this.foregroundActivity = foregroundActivity;
|
|
79
132
|
this.runCache = new IndexRunCache(cfg.indexRunMemoryCacheBytes);
|
|
80
133
|
this.runDiskCache = cfg.indexRunCacheMaxBytes > 0 ? new SegmentDiskCache(`${cfg.rootDir}/cache/index`, cfg.indexRunCacheMaxBytes) : undefined;
|
|
81
134
|
}
|
|
82
135
|
|
|
136
|
+
private async yieldBackgroundWork(): Promise<void> {
|
|
137
|
+
if (this.foregroundActivity) {
|
|
138
|
+
await this.foregroundActivity.yieldBackgroundWork();
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
await yieldToEventLoop();
|
|
142
|
+
}
|
|
143
|
+
|
|
83
144
|
start(): void {
|
|
84
145
|
if (this.span <= 0) return;
|
|
85
146
|
if (this.timer) return;
|
|
@@ -98,8 +159,9 @@ export class IndexManager {
|
|
|
98
159
|
this.queue.add(stream);
|
|
99
160
|
}
|
|
100
161
|
|
|
101
|
-
async
|
|
162
|
+
async candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise<IndexCandidate | null> {
|
|
102
163
|
if (this.span <= 0) return null;
|
|
164
|
+
if (!this.isRoutingConfigured(stream)) return null;
|
|
103
165
|
const state = this.db.getIndexState(stream);
|
|
104
166
|
if (!state) return null;
|
|
105
167
|
const runs = this.db.listIndexRuns(stream);
|
|
@@ -131,6 +193,53 @@ export class IndexManager {
|
|
|
131
193
|
return { segments, indexedThrough: state.indexed_through };
|
|
132
194
|
}
|
|
133
195
|
|
|
196
|
+
async candidateSegmentsForSecondaryIndex(_stream: string, _indexName: string, _keyBytes: Uint8Array): Promise<IndexCandidate | null> {
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async getColSegmentCompanion(_stream: string, _segmentIndex: number): Promise<ColSectionView | null> {
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async getAggSegmentCompanion(_stream: string, _segmentIndex: number): Promise<AggSectionView | null> {
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async getFtsSegmentCompanion(_stream: string, _segmentIndex: number): Promise<FtsSectionView | null> {
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async getMetricsBlockSegmentCompanion(_stream: string, _segmentIndex: number): Promise<MetricsBlockSectionView | null> {
|
|
213
|
+
return null;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
getLocalCacheBytes(stream: string): number {
|
|
217
|
+
if (!this.runDiskCache) return 0;
|
|
218
|
+
return this.runDiskCache.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/index/`);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
getMemoryStats(): {
|
|
222
|
+
runCacheBytes: number;
|
|
223
|
+
runCacheEntries: number;
|
|
224
|
+
runDiskCacheBytes: number;
|
|
225
|
+
runDiskCacheEntries: number;
|
|
226
|
+
runDiskMappedBytes: number;
|
|
227
|
+
runDiskMappedEntries: number;
|
|
228
|
+
runDiskPinnedEntries: number;
|
|
229
|
+
} {
|
|
230
|
+
const mem = this.runCache.stats();
|
|
231
|
+
const disk = this.runDiskCache?.stats();
|
|
232
|
+
return {
|
|
233
|
+
runCacheBytes: mem.usedBytes,
|
|
234
|
+
runCacheEntries: mem.entries,
|
|
235
|
+
runDiskCacheBytes: disk?.usedBytes ?? 0,
|
|
236
|
+
runDiskCacheEntries: disk?.entryCount ?? 0,
|
|
237
|
+
runDiskMappedBytes: disk?.mappedBytes ?? 0,
|
|
238
|
+
runDiskMappedEntries: disk?.mappedEntryCount ?? 0,
|
|
239
|
+
runDiskPinnedEntries: disk?.pinnedEntryCount ?? 0,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
134
243
|
private async tick(): Promise<void> {
|
|
135
244
|
if (this.running) return;
|
|
136
245
|
this.running = true;
|
|
@@ -142,6 +251,21 @@ export class IndexManager {
|
|
|
142
251
|
const streams = Array.from(this.queue);
|
|
143
252
|
this.queue.clear();
|
|
144
253
|
for (const stream of streams) {
|
|
254
|
+
if (!this.isRoutingConfigured(stream)) {
|
|
255
|
+
const hadRoutingState = !!this.db.getIndexState(stream) || this.db.listIndexRunsAll(stream).length > 0;
|
|
256
|
+
if (hadRoutingState) {
|
|
257
|
+
this.db.deleteIndex(stream);
|
|
258
|
+
this.onMetadataChanged?.(stream);
|
|
259
|
+
if (this.publishManifest) {
|
|
260
|
+
try {
|
|
261
|
+
await this.publishManifest(stream);
|
|
262
|
+
} catch {
|
|
263
|
+
// ignore and retry on next enqueue
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
145
269
|
try {
|
|
146
270
|
const buildRes = await this.maybeBuildRuns(stream);
|
|
147
271
|
if (Result.isError(buildRes)) {
|
|
@@ -179,41 +303,43 @@ export class IndexManager {
|
|
|
179
303
|
if (this.building.has(stream)) return Result.ok(undefined);
|
|
180
304
|
this.building.add(stream);
|
|
181
305
|
try {
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
306
|
+
return await this.asyncGate.run(async () => {
|
|
307
|
+
let state = this.db.getIndexState(stream);
|
|
308
|
+
if (!state) {
|
|
309
|
+
const secret = randomBytes(16);
|
|
310
|
+
this.db.upsertIndexState(stream, secret, 0);
|
|
311
|
+
state = this.db.getIndexState(stream);
|
|
312
|
+
}
|
|
313
|
+
if (!state) return Result.ok(undefined);
|
|
314
|
+
if (this.metrics) {
|
|
315
|
+
const lag = Math.max(0, this.db.countUploadedSegments(stream) - state.indexed_through);
|
|
316
|
+
this.metrics.record("tieredstore.index.lag.segments", lag, "count", undefined, stream);
|
|
317
|
+
}
|
|
318
|
+
const indexedThrough = state.indexed_through;
|
|
195
319
|
const uploadedCount = this.db.countUploadedSegments(stream);
|
|
196
320
|
if (uploadedCount < indexedThrough + this.span) return Result.ok(undefined);
|
|
197
321
|
const start = indexedThrough;
|
|
198
322
|
const end = start + this.span - 1;
|
|
199
323
|
const segments: SegmentRow[] = [];
|
|
200
|
-
let ok = true;
|
|
201
324
|
for (let i = start; i <= end; i++) {
|
|
202
325
|
const seg = this.db.getSegmentByIndex(stream, i);
|
|
203
|
-
if (!seg || !seg.r2_etag)
|
|
204
|
-
ok = false;
|
|
205
|
-
break;
|
|
206
|
-
}
|
|
326
|
+
if (!seg || !seg.r2_etag) return Result.ok(undefined);
|
|
207
327
|
segments.push(seg);
|
|
208
328
|
}
|
|
209
|
-
if (!ok) return Result.ok(undefined);
|
|
210
329
|
const t0 = Date.now();
|
|
211
|
-
const runRes =
|
|
330
|
+
const runRes = this.memorySampler
|
|
331
|
+
? await this.memorySampler.track(
|
|
332
|
+
"routing_l0",
|
|
333
|
+
{ stream, start_segment: start, end_segment: end },
|
|
334
|
+
() => this.buildL0RunResult(stream, start, segments, state.index_secret)
|
|
335
|
+
)
|
|
336
|
+
: await this.buildL0RunResult(stream, start, segments, state.index_secret);
|
|
212
337
|
if (Result.isError(runRes)) return runRes;
|
|
213
338
|
const run = runRes.value;
|
|
214
339
|
const elapsedNs = BigInt(Date.now() - t0) * 1_000_000n;
|
|
215
340
|
const persistRes = await this.persistRunResult(run, stream);
|
|
216
341
|
if (Result.isError(persistRes)) return persistRes;
|
|
342
|
+
const sizeBytes = persistRes.value;
|
|
217
343
|
this.db.insertIndexRun({
|
|
218
344
|
run_id: run.meta.runId,
|
|
219
345
|
stream,
|
|
@@ -221,6 +347,7 @@ export class IndexManager {
|
|
|
221
347
|
start_segment: run.meta.startSegment,
|
|
222
348
|
end_segment: run.meta.endSegment,
|
|
223
349
|
object_key: run.meta.objectKey,
|
|
350
|
+
size_bytes: sizeBytes,
|
|
224
351
|
filter_len: run.meta.filterLen,
|
|
225
352
|
record_count: run.meta.recordCount,
|
|
226
353
|
});
|
|
@@ -229,9 +356,10 @@ export class IndexManager {
|
|
|
229
356
|
this.metrics.record("tieredstore.index.runs.built", 1, "count", { level: String(run.meta.level) }, stream);
|
|
230
357
|
this.recordActiveRuns(stream);
|
|
231
358
|
}
|
|
232
|
-
|
|
233
|
-
this.db.updateIndexedThrough(stream,
|
|
234
|
-
state.indexed_through =
|
|
359
|
+
const nextIndexedThrough = end + 1;
|
|
360
|
+
this.db.updateIndexedThrough(stream, nextIndexedThrough);
|
|
361
|
+
state.indexed_through = nextIndexedThrough;
|
|
362
|
+
this.onMetadataChanged?.(stream);
|
|
235
363
|
if (this.publishManifest) {
|
|
236
364
|
try {
|
|
237
365
|
await this.publishManifest(stream);
|
|
@@ -239,7 +367,9 @@ export class IndexManager {
|
|
|
239
367
|
// ignore manifest publish errors; will be retried by uploader/indexer
|
|
240
368
|
}
|
|
241
369
|
}
|
|
242
|
-
|
|
370
|
+
if (this.db.countUploadedSegments(stream) >= nextIndexedThrough + this.span) this.queue.add(stream);
|
|
371
|
+
return Result.ok(undefined);
|
|
372
|
+
});
|
|
243
373
|
} finally {
|
|
244
374
|
this.building.delete(stream);
|
|
245
375
|
}
|
|
@@ -249,9 +379,13 @@ export class IndexManager {
|
|
|
249
379
|
if (this.span <= 0) return Result.ok(undefined);
|
|
250
380
|
if (this.compactionFanout <= 1) return Result.ok(undefined);
|
|
251
381
|
if (this.compacting.has(stream)) return Result.ok(undefined);
|
|
382
|
+
if (this.foregroundActivity?.wasActiveWithin(2000)) {
|
|
383
|
+
this.queue.add(stream);
|
|
384
|
+
return Result.ok(undefined);
|
|
385
|
+
}
|
|
252
386
|
this.compacting.add(stream);
|
|
253
387
|
try {
|
|
254
|
-
|
|
388
|
+
return await this.asyncGate.run(async () => {
|
|
255
389
|
const group = this.findCompactionGroup(stream);
|
|
256
390
|
if (!group) {
|
|
257
391
|
await this.gcRetiredRuns(stream);
|
|
@@ -265,6 +399,7 @@ export class IndexManager {
|
|
|
265
399
|
const elapsedNs = BigInt(Date.now() - t0) * 1_000_000n;
|
|
266
400
|
const persistRes = await this.persistRunResult(run, stream);
|
|
267
401
|
if (Result.isError(persistRes)) return persistRes;
|
|
402
|
+
const sizeBytes = persistRes.value;
|
|
268
403
|
this.db.insertIndexRun({
|
|
269
404
|
run_id: run.meta.runId,
|
|
270
405
|
stream,
|
|
@@ -272,6 +407,7 @@ export class IndexManager {
|
|
|
272
407
|
start_segment: run.meta.startSegment,
|
|
273
408
|
end_segment: run.meta.endSegment,
|
|
274
409
|
object_key: run.meta.objectKey,
|
|
410
|
+
size_bytes: sizeBytes,
|
|
275
411
|
filter_len: run.meta.filterLen,
|
|
276
412
|
record_count: run.meta.recordCount,
|
|
277
413
|
});
|
|
@@ -288,6 +424,7 @@ export class IndexManager {
|
|
|
288
424
|
retiredGen,
|
|
289
425
|
nowMs
|
|
290
426
|
);
|
|
427
|
+
this.onMetadataChanged?.(stream);
|
|
291
428
|
if (this.metrics) {
|
|
292
429
|
this.metrics.record("tieredstore.index.compact.latency", Number(elapsedNs), "ns", { level: String(run.meta.level) }, stream);
|
|
293
430
|
this.metrics.record("tieredstore.index.runs.compacted", 1, "count", { level: String(run.meta.level) }, stream);
|
|
@@ -305,7 +442,9 @@ export class IndexManager {
|
|
|
305
442
|
}
|
|
306
443
|
}
|
|
307
444
|
await this.gcRetiredRuns(stream);
|
|
308
|
-
|
|
445
|
+
this.queue.add(stream);
|
|
446
|
+
return Result.ok(undefined);
|
|
447
|
+
});
|
|
309
448
|
} finally {
|
|
310
449
|
this.compacting.delete(stream);
|
|
311
450
|
}
|
|
@@ -353,18 +492,39 @@ export class IndexManager {
|
|
|
353
492
|
inputs: IndexRunRow[]
|
|
354
493
|
): Promise<Result<IndexRun, IndexBuildError>> {
|
|
355
494
|
if (inputs.length === 0) return invalidIndexBuild("compact: missing inputs");
|
|
356
|
-
const segments = new Map<bigint,
|
|
495
|
+
const segments = new Map<bigint, number[]>();
|
|
357
496
|
const addSegment = (fp: bigint, seg: number) => {
|
|
358
|
-
let
|
|
359
|
-
if (!
|
|
360
|
-
|
|
361
|
-
segments.set(fp,
|
|
497
|
+
let list = segments.get(fp);
|
|
498
|
+
if (!list) {
|
|
499
|
+
list = [];
|
|
500
|
+
segments.set(fp, list);
|
|
362
501
|
}
|
|
363
|
-
|
|
502
|
+
list.push(seg);
|
|
503
|
+
};
|
|
504
|
+
const mergeRun = (meta: IndexRunRow, run: IndexRun): void => {
|
|
505
|
+
if (run.runType === RUN_TYPE_MASK16 && run.masks) {
|
|
506
|
+
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
507
|
+
const fp = run.fingerprints[i];
|
|
508
|
+
const mask = run.masks[i];
|
|
509
|
+
for (let bit = 0; bit < 16; bit++) {
|
|
510
|
+
if ((mask & (1 << bit)) === 0) continue;
|
|
511
|
+
addSegment(fp, meta.start_segment + bit);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
|
|
517
|
+
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
518
|
+
const fp = run.fingerprints[i];
|
|
519
|
+
const postings = run.postings[i];
|
|
520
|
+
for (const rel of postings) addSegment(fp, meta.start_segment + rel);
|
|
521
|
+
}
|
|
522
|
+
return;
|
|
523
|
+
}
|
|
524
|
+
throw dsError(`unknown run type ${run.runType}`);
|
|
364
525
|
};
|
|
365
526
|
|
|
366
527
|
const pending = inputs.slice();
|
|
367
|
-
const results: Array<{ meta: IndexRunRow; run: IndexRun }> = [];
|
|
368
528
|
const workers = Math.min(this.compactionConcurrency, pending.length);
|
|
369
529
|
let buildError: string | null = null;
|
|
370
530
|
const workerTasks: Promise<void>[] = [];
|
|
@@ -385,7 +545,13 @@ export class IndexManager {
|
|
|
385
545
|
buildError = `missing run ${meta.run_id}`;
|
|
386
546
|
return;
|
|
387
547
|
}
|
|
388
|
-
|
|
548
|
+
try {
|
|
549
|
+
mergeRun(meta, run);
|
|
550
|
+
} catch (e: unknown) {
|
|
551
|
+
buildError = errorMessage(e);
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
554
|
+
await this.yieldBackgroundWork();
|
|
389
555
|
}
|
|
390
556
|
})()
|
|
391
557
|
);
|
|
@@ -393,45 +559,22 @@ export class IndexManager {
|
|
|
393
559
|
await Promise.all(workerTasks);
|
|
394
560
|
if (buildError) return invalidIndexBuild(buildError);
|
|
395
561
|
|
|
396
|
-
for (const res of results) {
|
|
397
|
-
const run = res.run;
|
|
398
|
-
const meta = res.meta;
|
|
399
|
-
if (run.runType === RUN_TYPE_MASK16 && run.masks) {
|
|
400
|
-
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
401
|
-
const fp = run.fingerprints[i];
|
|
402
|
-
const mask = run.masks[i];
|
|
403
|
-
for (let bit = 0; bit < 16; bit++) {
|
|
404
|
-
if ((mask & (1 << bit)) === 0) continue;
|
|
405
|
-
addSegment(fp, meta.start_segment + bit);
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
} else if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
|
|
409
|
-
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
410
|
-
const fp = run.fingerprints[i];
|
|
411
|
-
const postings = run.postings[i];
|
|
412
|
-
for (const rel of postings) addSegment(fp, meta.start_segment + rel);
|
|
413
|
-
}
|
|
414
|
-
} else {
|
|
415
|
-
return invalidIndexBuild(`unknown run type ${run.runType}`);
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
|
|
419
562
|
const startSegment = inputs[0].start_segment;
|
|
420
563
|
const endSegment = inputs[inputs.length - 1].end_segment;
|
|
421
|
-
const
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
postings
|
|
564
|
+
const fingerprints = Array.from(segments.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
|
|
565
|
+
const postings: number[][] = new Array(fingerprints.length);
|
|
566
|
+
for (let i = 0; i < fingerprints.length; i++) {
|
|
567
|
+
const fp = fingerprints[i]!;
|
|
568
|
+
const list = segments.get(fp) ?? [];
|
|
569
|
+
list.sort((a, b) => a - b);
|
|
570
|
+
const rel: number[] = [];
|
|
571
|
+
let lastSeg = Number.NaN;
|
|
572
|
+
for (const seg of list) {
|
|
573
|
+
if (seg === lastSeg) continue;
|
|
574
|
+
rel.push(seg - startSegment);
|
|
575
|
+
lastSeg = seg;
|
|
576
|
+
}
|
|
577
|
+
postings[i] = rel;
|
|
435
578
|
}
|
|
436
579
|
|
|
437
580
|
const fuseRes = buildBinaryFuseResult(fingerprints);
|
|
@@ -492,7 +635,6 @@ export class IndexManager {
|
|
|
492
635
|
const maskByFp = new Map<bigint, number>();
|
|
493
636
|
const pending = segments.slice();
|
|
494
637
|
const concurrency = Math.max(1, Math.min(this.buildConcurrency, pending.length));
|
|
495
|
-
const results: Array<Map<bigint, number>> = [];
|
|
496
638
|
let buildError: string | null = null;
|
|
497
639
|
const workers: Promise<void>[] = [];
|
|
498
640
|
for (let i = 0; i < concurrency; i++) {
|
|
@@ -511,35 +653,35 @@ export class IndexManager {
|
|
|
511
653
|
const bit = seg.segment_index - startSegment;
|
|
512
654
|
const maskBit = 1 << bit;
|
|
513
655
|
const local = new Map<bigint, number>();
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
656
|
+
let processedRecords = 0;
|
|
657
|
+
for (const recRes of iterateBlockRecordsResult(segBytes)) {
|
|
658
|
+
if (Result.isError(recRes)) {
|
|
659
|
+
buildError = recRes.error.message;
|
|
517
660
|
return;
|
|
518
661
|
}
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
662
|
+
if (recRes.value.routingKey.byteLength === 0) continue;
|
|
663
|
+
const fp = siphash24(secret, recRes.value.routingKey);
|
|
664
|
+
const prev = local.get(fp) ?? 0;
|
|
665
|
+
local.set(fp, prev | maskBit);
|
|
666
|
+
processedRecords += 1;
|
|
667
|
+
if (processedRecords % 256 === 0) {
|
|
668
|
+
await this.yieldBackgroundWork();
|
|
525
669
|
}
|
|
526
670
|
}
|
|
527
|
-
|
|
671
|
+
for (const [fp, mask] of local.entries()) {
|
|
672
|
+
const prev = maskByFp.get(fp) ?? 0;
|
|
673
|
+
maskByFp.set(fp, prev | mask);
|
|
674
|
+
}
|
|
675
|
+
local.clear();
|
|
676
|
+
await this.yieldBackgroundWork();
|
|
528
677
|
}
|
|
529
678
|
})()
|
|
530
679
|
);
|
|
531
680
|
}
|
|
532
681
|
await Promise.all(workers);
|
|
533
682
|
if (buildError) return invalidIndexBuild(buildError);
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
const prev = maskByFp.get(fp) ?? 0;
|
|
537
|
-
maskByFp.set(fp, prev | mask);
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
const entries = Array.from(maskByFp.entries()).sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0));
|
|
541
|
-
const fingerprints = entries.map(([fp]) => fp);
|
|
542
|
-
const masks = entries.map(([, mask]) => mask);
|
|
683
|
+
const fingerprints = Array.from(maskByFp.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
|
|
684
|
+
const masks = fingerprints.map((fp) => maskByFp.get(fp) ?? 0);
|
|
543
685
|
const fuseRes = buildBinaryFuseResult(fingerprints);
|
|
544
686
|
if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
|
|
545
687
|
const { filter, bytes } = fuseRes.value;
|
|
@@ -566,7 +708,17 @@ export class IndexManager {
|
|
|
566
708
|
return Result.ok(run);
|
|
567
709
|
}
|
|
568
710
|
|
|
569
|
-
private
|
|
711
|
+
private isRoutingConfigured(stream: string): boolean {
|
|
712
|
+
const streamRow = this.db.getStream(stream);
|
|
713
|
+
const contentType = streamRow?.content_type.split(";")[0]?.trim().toLowerCase() ?? null;
|
|
714
|
+
if (contentType != null && contentType !== "application/json") return true;
|
|
715
|
+
if (!this.registry) return false;
|
|
716
|
+
const regRes = this.registry.getRegistryResult(stream);
|
|
717
|
+
if (Result.isError(regRes)) return false;
|
|
718
|
+
return !!regRes.value.routingKey;
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
private async persistRunResult(run: IndexRun, stream?: string): Promise<Result<number, IndexBuildError>> {
|
|
570
722
|
const payloadRes = encodeIndexRunResult(run);
|
|
571
723
|
if (Result.isError(payloadRes)) return invalidIndexBuild(payloadRes.error.message);
|
|
572
724
|
const payload = payloadRes.value;
|
|
@@ -587,8 +739,8 @@ export class IndexManager {
|
|
|
587
739
|
return invalidIndexBuild(String(e?.message ?? e));
|
|
588
740
|
}
|
|
589
741
|
this.runDiskCache?.put(run.meta.objectKey, payload);
|
|
590
|
-
this.runCache.put(run.meta.objectKey, run);
|
|
591
|
-
return Result.ok(
|
|
742
|
+
this.runCache.put(run.meta.objectKey, run, payload.byteLength);
|
|
743
|
+
return Result.ok(payload.byteLength);
|
|
592
744
|
}
|
|
593
745
|
|
|
594
746
|
private async loadRunResult(meta: IndexRunRow): Promise<Result<IndexRun | null, IndexBuildError>> {
|
|
@@ -638,38 +790,16 @@ export class IndexManager {
|
|
|
638
790
|
run.meta.endSegment = meta.end_segment;
|
|
639
791
|
run.meta.filterLen = meta.filter_len;
|
|
640
792
|
run.meta.recordCount = meta.record_count;
|
|
641
|
-
this.runCache.put(meta.object_key, run);
|
|
793
|
+
this.runCache.put(meta.object_key, run, meta.size_bytes);
|
|
642
794
|
return Result.ok(run);
|
|
643
795
|
}
|
|
644
796
|
|
|
645
797
|
private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, IndexBuildError>> {
|
|
646
|
-
if (seg.local_path && seg.local_path.length > 0) {
|
|
647
|
-
try {
|
|
648
|
-
return Result.ok(new Uint8Array(readFileSync(seg.local_path)));
|
|
649
|
-
} catch {
|
|
650
|
-
// fall through
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
const diskCache = this.segmentCache;
|
|
654
|
-
const key = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
|
|
655
|
-
if (diskCache && diskCache.has(key)) {
|
|
656
|
-
diskCache.recordHit();
|
|
657
|
-
diskCache.touch(key);
|
|
658
|
-
try {
|
|
659
|
-
return Result.ok(new Uint8Array(readFileSync(diskCache.getPath(key))));
|
|
660
|
-
} catch {
|
|
661
|
-
diskCache.remove(key);
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
if (diskCache) diskCache.recordMiss();
|
|
665
798
|
try {
|
|
666
|
-
const data = await
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
if (diskCache) diskCache.put(key, objectBytes);
|
|
671
|
-
return objectBytes;
|
|
672
|
-
},
|
|
799
|
+
const data = await loadSegmentBytesCached(
|
|
800
|
+
this.os,
|
|
801
|
+
seg,
|
|
802
|
+
this.segmentCache,
|
|
673
803
|
{
|
|
674
804
|
retries: this.cfg.objectStoreRetries,
|
|
675
805
|
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|