@tungthedev/streams-server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +76 -0
- package/LICENSE +201 -0
- package/README.md +58 -0
- package/SECURITY.md +42 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +46 -0
- package/src/app.ts +583 -0
- package/src/app_core.ts +3144 -0
- package/src/app_local.ts +206 -0
- package/src/auth.ts +124 -0
- package/src/auto_tune.ts +69 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +613 -0
- package/src/compute/demo_entry.ts +415 -0
- package/src/compute/demo_site.ts +1242 -0
- package/src/compute/entry.ts +19 -0
- package/src/compute/package_entry.ts +4 -0
- package/src/compute/virtual-modules.d.ts +15 -0
- package/src/compute/worker_module_url.ts +9 -0
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +402 -0
- package/src/db/bootstrap_store.ts +9 -0
- package/src/db/db.ts +2424 -0
- package/src/db/schema.ts +925 -0
- package/src/db/sqlite_manifest_snapshot.ts +81 -0
- package/src/db/sqlite_touch_store.ts +491 -0
- package/src/db/sqlite_wal_store.ts +472 -0
- package/src/details/full_mode_details.ts +568 -0
- package/src/expiry_sweeper.ts +47 -0
- package/src/foreground_activity.ts +55 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +947 -0
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +863 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/index/schedule.ts +28 -0
- package/src/index/secondary_indexer.ts +901 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +309 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +249 -0
- package/src/memory.ts +334 -0
- package/src/metrics.ts +147 -0
- package/src/metrics_emitter.ts +83 -0
- package/src/notifier.ts +180 -0
- package/src/objectstore/accounting.ts +151 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +318 -0
- package/src/observe/pairing.ts +61 -0
- package/src/observe/request.ts +772 -0
- package/src/offset.ts +70 -0
- package/src/postgres/bootstrap.ts +269 -0
- package/src/postgres/companions.ts +197 -0
- package/src/postgres/control_restore.ts +109 -0
- package/src/postgres/details.ts +189 -0
- package/src/postgres/lexicon_index.ts +260 -0
- package/src/postgres/routing_index.ts +189 -0
- package/src/postgres/rows.ts +132 -0
- package/src/postgres/schema.ts +355 -0
- package/src/postgres/secondary_index.ts +238 -0
- package/src/postgres/segments.ts +900 -0
- package/src/postgres/stats.ts +103 -0
- package/src/postgres/store.ts +947 -0
- package/src/postgres/touch.ts +591 -0
- package/src/postgres/types.ts +32 -0
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +473 -0
- package/src/profiles/generic.ts +51 -0
- package/src/profiles/index.ts +237 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +83 -0
- package/src/profiles/otelTraces/normalize.ts +955 -0
- package/src/profiles/otelTraces/otlp.ts +1002 -0
- package/src/profiles/otelTraces/schema.ts +408 -0
- package/src/profiles/otelTraces.ts +390 -0
- package/src/profiles/profile.ts +284 -0
- package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
- package/src/profiles/stateProtocol/changes.ts +24 -0
- package/src/profiles/stateProtocol/ingest.ts +115 -0
- package/src/profiles/stateProtocol/routes.ts +511 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +107 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2986 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +237 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/read_json.ts +51 -0
- package/src/schema/registry.ts +966 -0
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +409 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +327 -0
- package/src/search/companion_manager.ts +1305 -0
- package/src/search/companion_plan.ts +229 -0
- package/src/search/exact_format.ts +281 -0
- package/src/search/exact_runtime.ts +55 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +270 -0
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +403 -0
- package/src/segment/segmenter.ts +412 -0
- package/src/segment/segmenter_worker.ts +72 -0
- package/src/segment/segmenter_workers.ts +130 -0
- package/src/server.ts +264 -0
- package/src/server_auto_tune.ts +158 -0
- package/src/sqlite/adapter.ts +335 -0
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +205 -0
- package/src/store/append.ts +50 -0
- package/src/store/bootstrap_restore_store.ts +71 -0
- package/src/store/capabilities.ts +86 -0
- package/src/store/full_mode_details_store.ts +71 -0
- package/src/store/index_store.ts +104 -0
- package/src/store/profile_touch_store.ts +1 -0
- package/src/store/rows.ts +144 -0
- package/src/store/schema_profile_store.ts +73 -0
- package/src/store/schema_publication.ts +6 -0
- package/src/store/segment_manifest_store.ts +129 -0
- package/src/store/segment_read_store.ts +22 -0
- package/src/store/stats_accounting_store.ts +83 -0
- package/src/store/touch_store.ts +98 -0
- package/src/store/wal_store.ts +21 -0
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_keys.ts +158 -0
- package/src/touch/live_metrics.ts +841 -0
- package/src/touch/live_templates.ts +449 -0
- package/src/touch/manager.ts +1292 -0
- package/src/touch/process_batch.ts +576 -0
- package/src/touch/processor_worker.ts +85 -0
- package/src/touch/spec.ts +459 -0
- package/src/touch/touch_journal.ts +771 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +191 -0
- package/src/touch/worker_protocol.ts +57 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +358 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/byte_lru.ts +73 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +53 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +50 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/src/util/zstd.ts +24 -0
|
@@ -0,0 +1,901 @@
|
|
|
1
|
+
import { randomBytes } from "node:crypto";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import { Result } from "better-result";
|
|
4
|
+
import type { Config } from "../config";
|
|
5
|
+
import type { SecondaryIndexRunRow, SegmentRow } from "../store/rows";
|
|
6
|
+
import type { CompanionProgressStore, SecondaryIndexStore } from "../store/index_store";
|
|
7
|
+
import type { ObjectStore } from "../objectstore/interface";
|
|
8
|
+
import { SchemaRegistryStore } from "../schema/registry";
|
|
9
|
+
import { SegmentDiskCache } from "../segment/cache";
|
|
10
|
+
import { loadSegmentBytesCached } from "../segment/cached_segment";
|
|
11
|
+
import { iterateBlockRecordsResult } from "../segment/format";
|
|
12
|
+
import { retry } from "../util/retry";
|
|
13
|
+
import { dsError } from "../util/ds_error.ts";
|
|
14
|
+
import { secondaryIndexRunObjectKey, streamHash16Hex } from "../util/stream_paths";
|
|
15
|
+
import { siphash24 } from "../util/siphash";
|
|
16
|
+
import { yieldToEventLoop } from "../util/yield";
|
|
17
|
+
import { RuntimeMemorySampler } from "../runtime_memory_sampler";
|
|
18
|
+
import { ConcurrencyGate } from "../concurrency_gate";
|
|
19
|
+
import type { ForegroundActivityTracker } from "../foreground_activity";
|
|
20
|
+
import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "./schedule";
|
|
21
|
+
import { binaryFuseContains, buildBinaryFuseResult } from "./binary_fuse";
|
|
22
|
+
import { IndexRunCache } from "./run_cache";
|
|
23
|
+
import {
|
|
24
|
+
decodeIndexRunResult,
|
|
25
|
+
encodeIndexRunResult,
|
|
26
|
+
RUN_TYPE_MASK16,
|
|
27
|
+
RUN_TYPE_POSTINGS,
|
|
28
|
+
type IndexRun,
|
|
29
|
+
} from "./run_format";
|
|
30
|
+
import {
|
|
31
|
+
extractSecondaryIndexValuesForFieldResult,
|
|
32
|
+
extractSecondaryIndexValuesResult,
|
|
33
|
+
getConfiguredSecondaryIndexes,
|
|
34
|
+
hashSecondaryIndexField,
|
|
35
|
+
type SecondaryIndexField,
|
|
36
|
+
} from "./secondary_schema";
|
|
37
|
+
|
|
38
|
+
type SecondaryIndexBuildError = { kind: "invalid_index_build"; message: string };
|
|
39
|
+
|
|
40
|
+
function invalidIndexBuild<T = never>(message: string): Result<T, SecondaryIndexBuildError> {
|
|
41
|
+
return Result.err({ kind: "invalid_index_build", message });
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function binarySearch(values: bigint[], needle: bigint): number {
|
|
45
|
+
let lo = 0;
|
|
46
|
+
let hi = values.length - 1;
|
|
47
|
+
while (lo <= hi) {
|
|
48
|
+
const mid = (lo + hi) >>> 1;
|
|
49
|
+
const cur = values[mid];
|
|
50
|
+
if (cur === needle) return mid;
|
|
51
|
+
if (cur < needle) lo = mid + 1;
|
|
52
|
+
else hi = mid - 1;
|
|
53
|
+
}
|
|
54
|
+
return -1;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function errorMessage(e: unknown): string {
|
|
58
|
+
return String((e as any)?.message ?? e);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const PAYLOAD_DECODER = new TextDecoder();
|
|
62
|
+
const TERM_ENCODER = new TextEncoder();
|
|
63
|
+
export class SecondaryIndexManager {
|
|
64
|
+
private readonly cfg: Config;
|
|
65
|
+
private readonly db: SecondaryIndexStore;
|
|
66
|
+
private readonly os: ObjectStore;
|
|
67
|
+
private readonly registry: SchemaRegistryStore;
|
|
68
|
+
private readonly segmentCache?: SegmentDiskCache;
|
|
69
|
+
private readonly runDiskCache?: SegmentDiskCache;
|
|
70
|
+
private readonly runCache: IndexRunCache;
|
|
71
|
+
private readonly span: number;
|
|
72
|
+
private readonly buildConcurrency: number;
|
|
73
|
+
private readonly compactionFanout: number;
|
|
74
|
+
private readonly maxLevel: number;
|
|
75
|
+
private readonly compactionConcurrency: number;
|
|
76
|
+
private readonly retireGenWindow: number;
|
|
77
|
+
private readonly retireMinMs: number;
|
|
78
|
+
private readonly queue = new Set<string>();
|
|
79
|
+
private readonly building = new Set<string>();
|
|
80
|
+
private readonly compacting = new Set<string>();
|
|
81
|
+
private readonly streamIdleTicks = new Map<string, { logicalSizeBytes: bigint; nextOffset: bigint; flatTicks: number }>();
|
|
82
|
+
private timer: any | null = null;
|
|
83
|
+
private wakeTimer: any | null = null;
|
|
84
|
+
private running = false;
|
|
85
|
+
private stopped = false;
|
|
86
|
+
private tickPromise: Promise<void> | null = null;
|
|
87
|
+
private readonly publishManifest?: (stream: string) => Promise<void>;
|
|
88
|
+
private readonly onMetadataChanged?: (stream: string) => void;
|
|
89
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
90
|
+
private readonly asyncGate: ConcurrencyGate;
|
|
91
|
+
private readonly foregroundActivity?: ForegroundActivityTracker;
|
|
92
|
+
private firstQueuedAtMs: number | null = null;
|
|
93
|
+
|
|
94
|
+
constructor(
|
|
95
|
+
cfg: Config,
|
|
96
|
+
db: SecondaryIndexStore,
|
|
97
|
+
private readonly companionProgress: CompanionProgressStore,
|
|
98
|
+
os: ObjectStore,
|
|
99
|
+
registry: SchemaRegistryStore,
|
|
100
|
+
segmentCache?: SegmentDiskCache,
|
|
101
|
+
publishManifest?: (stream: string) => Promise<void>,
|
|
102
|
+
onMetadataChanged?: (stream: string) => void,
|
|
103
|
+
memorySampler?: RuntimeMemorySampler,
|
|
104
|
+
asyncGate?: ConcurrencyGate,
|
|
105
|
+
foregroundActivity?: ForegroundActivityTracker
|
|
106
|
+
) {
|
|
107
|
+
this.cfg = cfg;
|
|
108
|
+
this.db = db;
|
|
109
|
+
this.os = os;
|
|
110
|
+
this.registry = registry;
|
|
111
|
+
this.segmentCache = segmentCache;
|
|
112
|
+
this.publishManifest = publishManifest;
|
|
113
|
+
this.onMetadataChanged = onMetadataChanged;
|
|
114
|
+
this.memorySampler = memorySampler;
|
|
115
|
+
this.asyncGate = asyncGate ?? new ConcurrencyGate(1);
|
|
116
|
+
this.foregroundActivity = foregroundActivity;
|
|
117
|
+
this.span = cfg.indexL0SpanSegments;
|
|
118
|
+
this.buildConcurrency = Math.max(1, cfg.indexBuildConcurrency);
|
|
119
|
+
this.compactionFanout = cfg.indexCompactionFanout;
|
|
120
|
+
this.maxLevel = cfg.indexMaxLevel;
|
|
121
|
+
this.compactionConcurrency = Math.max(1, cfg.indexCompactionConcurrency);
|
|
122
|
+
this.retireGenWindow = Math.max(0, cfg.indexRetireGenWindow);
|
|
123
|
+
this.retireMinMs = Math.max(0, cfg.indexRetireMinMs);
|
|
124
|
+
this.runCache = new IndexRunCache(cfg.indexRunMemoryCacheBytes);
|
|
125
|
+
this.runDiskCache =
|
|
126
|
+
cfg.indexRunCacheMaxBytes > 0
|
|
127
|
+
? new SegmentDiskCache(`${cfg.rootDir}/cache/secondary-index`, cfg.indexRunCacheMaxBytes)
|
|
128
|
+
: undefined;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
private async yieldBackgroundWork(): Promise<void> {
|
|
132
|
+
if (this.foregroundActivity) {
|
|
133
|
+
await this.foregroundActivity.yieldBackgroundWork();
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
await yieldToEventLoop();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
start(): void {
|
|
140
|
+
if (this.span <= 0) return;
|
|
141
|
+
if (this.timer) return;
|
|
142
|
+
this.stopped = false;
|
|
143
|
+
this.timer = setInterval(() => {
|
|
144
|
+
if (!this.stopped) this.runTick();
|
|
145
|
+
}, this.cfg.indexCheckIntervalMs);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async stop(): Promise<void> {
|
|
149
|
+
this.stopped = true;
|
|
150
|
+
if (this.timer) clearInterval(this.timer);
|
|
151
|
+
if (this.wakeTimer) clearTimeout(this.wakeTimer);
|
|
152
|
+
this.timer = null;
|
|
153
|
+
this.wakeTimer = null;
|
|
154
|
+
while (this.tickPromise) await this.tickPromise;
|
|
155
|
+
this.streamIdleTicks.clear();
|
|
156
|
+
this.firstQueuedAtMs = null;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
enqueue(stream: string): void {
|
|
160
|
+
if (this.span <= 0 || this.stopped) return;
|
|
161
|
+
if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
|
|
162
|
+
this.queue.add(stream);
|
|
163
|
+
if (shouldDeferEnqueuedIndexWork(this.cfg)) {
|
|
164
|
+
this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
this.scheduleTick();
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
private scheduleTick(delayMs = 0): void {
|
|
171
|
+
if (this.stopped || !this.timer || this.wakeTimer) return;
|
|
172
|
+
this.wakeTimer = setTimeout(() => {
|
|
173
|
+
this.wakeTimer = null;
|
|
174
|
+
if (this.stopped) return;
|
|
175
|
+
if (
|
|
176
|
+
shouldWaitForLowMemoryIndexQuiet(
|
|
177
|
+
this.cfg,
|
|
178
|
+
this.firstQueuedAtMs,
|
|
179
|
+
this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false
|
|
180
|
+
)
|
|
181
|
+
) {
|
|
182
|
+
this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
if (this.running) {
|
|
186
|
+
this.scheduleTick(250);
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
this.runTick();
|
|
190
|
+
}, delayMs);
|
|
191
|
+
(this.wakeTimer as { unref?: () => void }).unref?.();
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
private runTick(): void {
|
|
195
|
+
if (this.tickPromise) return;
|
|
196
|
+
const promise = this.tick()
|
|
197
|
+
.catch((e) => {
|
|
198
|
+
const lower = errorMessage(e).toLowerCase();
|
|
199
|
+
const shutdownError =
|
|
200
|
+
lower.includes("database has closed") ||
|
|
201
|
+
lower.includes("closed database") ||
|
|
202
|
+
lower.includes("statement has finalized") ||
|
|
203
|
+
lower.includes("disk i/o error");
|
|
204
|
+
if (!this.stopped || !shutdownError) {
|
|
205
|
+
// eslint-disable-next-line no-console
|
|
206
|
+
console.error("secondary index tick failed", e);
|
|
207
|
+
}
|
|
208
|
+
})
|
|
209
|
+
.finally(() => {
|
|
210
|
+
if (this.tickPromise === promise) this.tickPromise = null;
|
|
211
|
+
});
|
|
212
|
+
this.tickPromise = promise;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async candidateSegmentsForSecondaryIndex(
|
|
216
|
+
stream: string,
|
|
217
|
+
indexName: string,
|
|
218
|
+
keyBytes: Uint8Array
|
|
219
|
+
): Promise<{ segments: Set<number>; indexedThrough: number } | null> {
|
|
220
|
+
if (this.span <= 0) return null;
|
|
221
|
+
const regRes = await this.registry.getRegistryResult(stream);
|
|
222
|
+
if (Result.isError(regRes)) return null;
|
|
223
|
+
const configured = getConfiguredSecondaryIndexes(regRes.value).find((entry) => entry.name === indexName);
|
|
224
|
+
if (!configured) return null;
|
|
225
|
+
const state = await this.db.getSecondaryIndexState(stream, indexName);
|
|
226
|
+
if (!state) return null;
|
|
227
|
+
if (state.config_hash !== hashSecondaryIndexField(configured)) return null;
|
|
228
|
+
const runs = await this.db.listSecondaryIndexRuns(stream, indexName);
|
|
229
|
+
if (runs.length === 0 && state.indexed_through === 0) return null;
|
|
230
|
+
|
|
231
|
+
const fp = siphash24(state.index_secret, keyBytes);
|
|
232
|
+
const segments = new Set<number>();
|
|
233
|
+
for (const meta of runs) {
|
|
234
|
+
const runRes = await this.loadRunResult(meta);
|
|
235
|
+
if (Result.isError(runRes)) continue;
|
|
236
|
+
const run = runRes.value;
|
|
237
|
+
if (!run) continue;
|
|
238
|
+
if (run.filter && !binaryFuseContains(run.filter, fp)) continue;
|
|
239
|
+
if (run.runType === RUN_TYPE_MASK16 && run.masks) {
|
|
240
|
+
const idx = binarySearch(run.fingerprints, fp);
|
|
241
|
+
if (idx >= 0) {
|
|
242
|
+
const mask = run.masks[idx];
|
|
243
|
+
for (let bit = 0; bit < 16; bit++) {
|
|
244
|
+
if ((mask & (1 << bit)) !== 0) segments.add(run.meta.startSegment + bit);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
} else if (run.postings) {
|
|
248
|
+
const idx = binarySearch(run.fingerprints, fp);
|
|
249
|
+
if (idx >= 0) {
|
|
250
|
+
for (const seg of run.postings[idx]) segments.add(seg);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
return { segments, indexedThrough: state.indexed_through };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
getLocalCacheBytes(stream: string): number {
|
|
258
|
+
if (!this.runDiskCache) return 0;
|
|
259
|
+
return this.runDiskCache.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/secondary-index/`);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
getMemoryStats(): {
|
|
263
|
+
runCacheBytes: number;
|
|
264
|
+
runCacheEntries: number;
|
|
265
|
+
runDiskCacheBytes: number;
|
|
266
|
+
runDiskCacheEntries: number;
|
|
267
|
+
runDiskMappedBytes: number;
|
|
268
|
+
runDiskMappedEntries: number;
|
|
269
|
+
runDiskPinnedEntries: number;
|
|
270
|
+
streamIdleTickEntries: number;
|
|
271
|
+
} {
|
|
272
|
+
const mem = this.runCache.stats();
|
|
273
|
+
const disk = this.runDiskCache?.stats();
|
|
274
|
+
return {
|
|
275
|
+
runCacheBytes: mem.usedBytes,
|
|
276
|
+
runCacheEntries: mem.entries,
|
|
277
|
+
runDiskCacheBytes: disk?.usedBytes ?? 0,
|
|
278
|
+
runDiskCacheEntries: disk?.entryCount ?? 0,
|
|
279
|
+
runDiskMappedBytes: disk?.mappedBytes ?? 0,
|
|
280
|
+
runDiskMappedEntries: disk?.mappedEntryCount ?? 0,
|
|
281
|
+
runDiskPinnedEntries: disk?.pinnedEntryCount ?? 0,
|
|
282
|
+
streamIdleTickEntries: this.streamIdleTicks.size,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
private async tick(): Promise<void> {
|
|
287
|
+
if (this.running || this.stopped) return;
|
|
288
|
+
this.running = true;
|
|
289
|
+
try {
|
|
290
|
+
const streams = Array.from(this.queue);
|
|
291
|
+
this.queue.clear();
|
|
292
|
+
for (const stream of streams) {
|
|
293
|
+
if (this.stopped) break;
|
|
294
|
+
const regRes = await this.registry.getRegistryResult(stream);
|
|
295
|
+
if (Result.isError(regRes)) continue;
|
|
296
|
+
if (await this.shouldPauseExactBackgroundWork(stream)) {
|
|
297
|
+
this.queue.add(stream);
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
const configured = getConfiguredSecondaryIndexes(regRes.value);
|
|
301
|
+
const configuredNames = new Set(configured.map((entry) => entry.name));
|
|
302
|
+
const existing = await this.db.listSecondaryIndexStates(stream);
|
|
303
|
+
let removedAny = false;
|
|
304
|
+
for (const state of existing) {
|
|
305
|
+
if (configuredNames.has(state.index_name)) continue;
|
|
306
|
+
await this.db.deleteSecondaryIndex(stream, state.index_name);
|
|
307
|
+
removedAny = true;
|
|
308
|
+
}
|
|
309
|
+
if (removedAny) {
|
|
310
|
+
this.onMetadataChanged?.(stream);
|
|
311
|
+
if (this.publishManifest) {
|
|
312
|
+
try {
|
|
313
|
+
await this.publishManifest(stream);
|
|
314
|
+
} catch {
|
|
315
|
+
// ignore and retry on next enqueue
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
for (const index of configured) {
|
|
320
|
+
try {
|
|
321
|
+
const buildRes = await this.maybeBuildRuns(stream, index);
|
|
322
|
+
if (Result.isError(buildRes)) {
|
|
323
|
+
this.queue.add(stream);
|
|
324
|
+
continue;
|
|
325
|
+
}
|
|
326
|
+
const compactRes = await this.maybeCompactRuns(stream, index.name);
|
|
327
|
+
if (Result.isError(compactRes)) {
|
|
328
|
+
this.queue.add(stream);
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
} catch (e) {
|
|
332
|
+
const msg = String((e as any)?.message ?? e).toLowerCase();
|
|
333
|
+
if (!msg.includes("database has closed") && !msg.includes("closed database") && !msg.includes("statement has finalized")) {
|
|
334
|
+
// eslint-disable-next-line no-console
|
|
335
|
+
console.error("secondary index build failed", stream, index.name, e);
|
|
336
|
+
}
|
|
337
|
+
this.queue.add(stream);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
} finally {
|
|
342
|
+
this.running = false;
|
|
343
|
+
if (!this.stopped && this.queue.size > 0) {
|
|
344
|
+
if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
|
|
345
|
+
this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0);
|
|
346
|
+
} else {
|
|
347
|
+
this.firstQueuedAtMs = null;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
private async maybeBuildRuns(stream: string, index: SecondaryIndexField): Promise<Result<void, SecondaryIndexBuildError>> {
|
|
353
|
+
if (this.span <= 0) return Result.ok(undefined);
|
|
354
|
+
const key = `${stream}:${index.name}`;
|
|
355
|
+
if (this.building.has(key)) return Result.ok(undefined);
|
|
356
|
+
this.building.add(key);
|
|
357
|
+
try {
|
|
358
|
+
return await this.asyncGate.run(async () => {
|
|
359
|
+
const configHash = hashSecondaryIndexField(index);
|
|
360
|
+
let state = await this.db.getSecondaryIndexState(stream, index.name);
|
|
361
|
+
if (!state) {
|
|
362
|
+
await this.db.upsertSecondaryIndexState(stream, index.name, randomBytes(16), configHash, 0);
|
|
363
|
+
state = await this.db.getSecondaryIndexState(stream, index.name);
|
|
364
|
+
} else if (state.config_hash !== configHash) {
|
|
365
|
+
await this.db.deleteSecondaryIndex(stream, index.name);
|
|
366
|
+
await this.db.upsertSecondaryIndexState(stream, index.name, randomBytes(16), configHash, 0);
|
|
367
|
+
state = await this.db.getSecondaryIndexState(stream, index.name);
|
|
368
|
+
this.onMetadataChanged?.(stream);
|
|
369
|
+
if (this.publishManifest) {
|
|
370
|
+
try {
|
|
371
|
+
await this.publishManifest(stream);
|
|
372
|
+
} catch {
|
|
373
|
+
// ignore and retry later
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
if (!state) return Result.ok(undefined);
|
|
378
|
+
if (await this.shouldPauseExactBackgroundWork(stream)) {
|
|
379
|
+
this.queue.add(stream);
|
|
380
|
+
return Result.ok(undefined);
|
|
381
|
+
}
|
|
382
|
+
const indexedThrough = state.indexed_through;
|
|
383
|
+
const uploadedCount = await this.db.countUploadedSegments(stream);
|
|
384
|
+
if (uploadedCount < indexedThrough + this.span) return Result.ok(undefined);
|
|
385
|
+
const start = indexedThrough;
|
|
386
|
+
const end = start + this.span - 1;
|
|
387
|
+
const segments: SegmentRow[] = [];
|
|
388
|
+
for (let i = start; i <= end; i++) {
|
|
389
|
+
const seg = await this.db.getSegmentByIndex(stream, i);
|
|
390
|
+
if (!seg || !seg.r2_etag) return Result.ok(undefined);
|
|
391
|
+
segments.push(seg);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const runRes = this.memorySampler
|
|
395
|
+
? await this.memorySampler.track(
|
|
396
|
+
"exact_l0",
|
|
397
|
+
{ stream, index_name: index.name, start_segment: start, end_segment: end },
|
|
398
|
+
() => this.buildL0RunResult(stream, index, start, segments, state.index_secret)
|
|
399
|
+
)
|
|
400
|
+
: await this.buildL0RunResult(stream, index, start, segments, state.index_secret);
|
|
401
|
+
if (Result.isError(runRes)) return runRes;
|
|
402
|
+
const run = runRes.value;
|
|
403
|
+
const persistRes = await this.persistRunResult(run);
|
|
404
|
+
if (Result.isError(persistRes)) return persistRes;
|
|
405
|
+
const sizeBytes = persistRes.value;
|
|
406
|
+
await this.db.insertSecondaryIndexRun({
|
|
407
|
+
run_id: run.meta.runId,
|
|
408
|
+
stream,
|
|
409
|
+
index_name: index.name,
|
|
410
|
+
level: run.meta.level,
|
|
411
|
+
start_segment: run.meta.startSegment,
|
|
412
|
+
end_segment: run.meta.endSegment,
|
|
413
|
+
object_key: run.meta.objectKey,
|
|
414
|
+
size_bytes: sizeBytes,
|
|
415
|
+
filter_len: run.meta.filterLen,
|
|
416
|
+
record_count: run.meta.recordCount,
|
|
417
|
+
});
|
|
418
|
+
const nextIndexedThrough = end + 1;
|
|
419
|
+
await this.db.updateSecondaryIndexedThrough(stream, index.name, nextIndexedThrough);
|
|
420
|
+
state.indexed_through = nextIndexedThrough;
|
|
421
|
+
this.onMetadataChanged?.(stream);
|
|
422
|
+
if (this.publishManifest) {
|
|
423
|
+
try {
|
|
424
|
+
await this.publishManifest(stream);
|
|
425
|
+
} catch {
|
|
426
|
+
// ignore and retry later
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
if ((await this.db.countUploadedSegments(stream)) >= nextIndexedThrough + this.span) this.queue.add(stream);
|
|
430
|
+
return Result.ok(undefined);
|
|
431
|
+
});
|
|
432
|
+
} finally {
|
|
433
|
+
this.building.delete(key);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
private async maybeCompactRuns(stream: string, indexName: string): Promise<Result<void, SecondaryIndexBuildError>> {
|
|
438
|
+
if (this.span <= 0) return Result.ok(undefined);
|
|
439
|
+
if (this.compactionFanout <= 1) return Result.ok(undefined);
|
|
440
|
+
const key = `${stream}:${indexName}`;
|
|
441
|
+
if (this.compacting.has(key)) return Result.ok(undefined);
|
|
442
|
+
if (this.foregroundActivity?.wasActiveWithin(2000)) {
|
|
443
|
+
this.queue.add(stream);
|
|
444
|
+
return Result.ok(undefined);
|
|
445
|
+
}
|
|
446
|
+
this.compacting.add(key);
|
|
447
|
+
try {
|
|
448
|
+
return await this.asyncGate.run(async () => {
|
|
449
|
+
if (await this.shouldPauseExactBackgroundWork(stream)) {
|
|
450
|
+
this.queue.add(stream);
|
|
451
|
+
return Result.ok(undefined);
|
|
452
|
+
}
|
|
453
|
+
const group = await this.findCompactionGroup(stream, indexName);
|
|
454
|
+
if (!group) {
|
|
455
|
+
await this.gcRetiredRuns(stream, indexName);
|
|
456
|
+
return Result.ok(undefined);
|
|
457
|
+
}
|
|
458
|
+
const { level, runs } = group;
|
|
459
|
+
const runRes = await this.buildCompactedRunResult(stream, indexName, level + 1, runs);
|
|
460
|
+
if (Result.isError(runRes)) return runRes;
|
|
461
|
+
const run = runRes.value;
|
|
462
|
+
const persistRes = await this.persistRunResult(run);
|
|
463
|
+
if (Result.isError(persistRes)) return persistRes;
|
|
464
|
+
const sizeBytes = persistRes.value;
|
|
465
|
+
await this.db.insertSecondaryIndexRun({
|
|
466
|
+
run_id: run.meta.runId,
|
|
467
|
+
stream,
|
|
468
|
+
index_name: indexName,
|
|
469
|
+
level: run.meta.level,
|
|
470
|
+
start_segment: run.meta.startSegment,
|
|
471
|
+
end_segment: run.meta.endSegment,
|
|
472
|
+
object_key: run.meta.objectKey,
|
|
473
|
+
size_bytes: sizeBytes,
|
|
474
|
+
filter_len: run.meta.filterLen,
|
|
475
|
+
record_count: run.meta.recordCount,
|
|
476
|
+
});
|
|
477
|
+
const state = await this.db.getSecondaryIndexState(stream, indexName);
|
|
478
|
+
if (state && run.meta.endSegment + 1 > state.indexed_through) {
|
|
479
|
+
await this.db.updateSecondaryIndexedThrough(stream, indexName, run.meta.endSegment + 1);
|
|
480
|
+
}
|
|
481
|
+
const manifestRow = await this.db.getManifestRow(stream);
|
|
482
|
+
await this.db.retireSecondaryIndexRuns(
|
|
483
|
+
runs.map((r) => r.run_id),
|
|
484
|
+
manifestRow.generation + 1,
|
|
485
|
+
this.db.nowMs()
|
|
486
|
+
);
|
|
487
|
+
this.onMetadataChanged?.(stream);
|
|
488
|
+
if (this.publishManifest) {
|
|
489
|
+
try {
|
|
490
|
+
await this.publishManifest(stream);
|
|
491
|
+
} catch {
|
|
492
|
+
// ignore and retry later
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
await this.gcRetiredRuns(stream, indexName);
|
|
496
|
+
this.queue.add(stream);
|
|
497
|
+
return Result.ok(undefined);
|
|
498
|
+
});
|
|
499
|
+
} finally {
|
|
500
|
+
this.compacting.delete(key);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
private async findCompactionGroup(stream: string, indexName: string): Promise<{ level: number; runs: SecondaryIndexRunRow[] } | null> {
|
|
505
|
+
const runs = await this.db.listSecondaryIndexRuns(stream, indexName);
|
|
506
|
+
if (runs.length < this.compactionFanout) return null;
|
|
507
|
+
const byLevel = new Map<number, SecondaryIndexRunRow[]>();
|
|
508
|
+
for (const run of runs) {
|
|
509
|
+
const arr = byLevel.get(run.level) ?? [];
|
|
510
|
+
arr.push(run);
|
|
511
|
+
byLevel.set(run.level, arr);
|
|
512
|
+
}
|
|
513
|
+
for (let level = 0; level <= this.maxLevel; level++) {
|
|
514
|
+
const levelRuns = byLevel.get(level);
|
|
515
|
+
if (!levelRuns || levelRuns.length < this.compactionFanout) continue;
|
|
516
|
+
const span = this.levelSpan(level);
|
|
517
|
+
for (let i = 0; i + this.compactionFanout <= levelRuns.length; i++) {
|
|
518
|
+
const base = levelRuns[i].start_segment;
|
|
519
|
+
let ok = true;
|
|
520
|
+
for (let j = 0; j < this.compactionFanout; j++) {
|
|
521
|
+
const run = levelRuns[i + j];
|
|
522
|
+
const expectStart = base + j * span;
|
|
523
|
+
if (run.start_segment !== expectStart || run.end_segment !== expectStart + span - 1) {
|
|
524
|
+
ok = false;
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
if (ok) return { level, runs: levelRuns.slice(i, i + this.compactionFanout) };
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
return null;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
private levelSpan(level: number): number {
|
|
535
|
+
let span = this.span;
|
|
536
|
+
for (let i = 0; i < level; i++) span *= this.compactionFanout;
|
|
537
|
+
return span;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
private async buildCompactedRunResult(
|
|
541
|
+
stream: string,
|
|
542
|
+
indexName: string,
|
|
543
|
+
level: number,
|
|
544
|
+
inputs: SecondaryIndexRunRow[]
|
|
545
|
+
): Promise<Result<IndexRun, SecondaryIndexBuildError>> {
|
|
546
|
+
if (inputs.length === 0) return invalidIndexBuild("compact: missing inputs");
|
|
547
|
+
const segments = new Map<bigint, number[]>();
|
|
548
|
+
const addSegment = (fp: bigint, seg: number) => {
|
|
549
|
+
let list = segments.get(fp);
|
|
550
|
+
if (!list) {
|
|
551
|
+
list = [];
|
|
552
|
+
segments.set(fp, list);
|
|
553
|
+
}
|
|
554
|
+
list.push(seg);
|
|
555
|
+
};
|
|
556
|
+
const mergeRun = (meta: SecondaryIndexRunRow, run: IndexRun): void => {
|
|
557
|
+
if (run.runType === RUN_TYPE_MASK16 && run.masks) {
|
|
558
|
+
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
559
|
+
const fp = run.fingerprints[i];
|
|
560
|
+
const mask = run.masks[i];
|
|
561
|
+
for (let bit = 0; bit < 16; bit++) {
|
|
562
|
+
if ((mask & (1 << bit)) !== 0) addSegment(fp, meta.start_segment + bit);
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
if (run.runType === RUN_TYPE_POSTINGS && run.postings) {
|
|
568
|
+
for (let i = 0; i < run.fingerprints.length; i++) {
|
|
569
|
+
const fp = run.fingerprints[i];
|
|
570
|
+
for (const rel of run.postings[i]) addSegment(fp, meta.start_segment + rel);
|
|
571
|
+
}
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
throw dsError(`unknown run type ${run.runType}`);
|
|
575
|
+
};
|
|
576
|
+
|
|
577
|
+
const pending = inputs.slice();
|
|
578
|
+
const workers = Math.min(this.compactionConcurrency, pending.length);
|
|
579
|
+
let buildError: string | null = null;
|
|
580
|
+
const workerTasks: Promise<void>[] = [];
|
|
581
|
+
for (let w = 0; w < workers; w++) {
|
|
582
|
+
workerTasks.push(
|
|
583
|
+
(async () => {
|
|
584
|
+
for (;;) {
|
|
585
|
+
if (buildError) return;
|
|
586
|
+
const meta = pending.shift();
|
|
587
|
+
if (!meta) return;
|
|
588
|
+
const runRes = await this.loadRunResult(meta);
|
|
589
|
+
if (Result.isError(runRes)) {
|
|
590
|
+
buildError = runRes.error.message;
|
|
591
|
+
return;
|
|
592
|
+
}
|
|
593
|
+
const run = runRes.value;
|
|
594
|
+
if (!run) {
|
|
595
|
+
buildError = `missing run ${meta.run_id}`;
|
|
596
|
+
return;
|
|
597
|
+
}
|
|
598
|
+
try {
|
|
599
|
+
mergeRun(meta, run);
|
|
600
|
+
} catch (e: unknown) {
|
|
601
|
+
buildError = String((e as any)?.message ?? e);
|
|
602
|
+
return;
|
|
603
|
+
}
|
|
604
|
+
await this.yieldBackgroundWork();
|
|
605
|
+
}
|
|
606
|
+
})()
|
|
607
|
+
);
|
|
608
|
+
}
|
|
609
|
+
await Promise.all(workerTasks);
|
|
610
|
+
if (buildError) return invalidIndexBuild(buildError);
|
|
611
|
+
|
|
612
|
+
const startSegment = inputs[0].start_segment;
|
|
613
|
+
const endSegment = inputs[inputs.length - 1].end_segment;
|
|
614
|
+
const fingerprints = Array.from(segments.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
|
|
615
|
+
const postings: number[][] = new Array(fingerprints.length);
|
|
616
|
+
for (let i = 0; i < fingerprints.length; i++) {
|
|
617
|
+
const fp = fingerprints[i]!;
|
|
618
|
+
const list = segments.get(fp) ?? [];
|
|
619
|
+
list.sort((a, b) => a - b);
|
|
620
|
+
const rel: number[] = [];
|
|
621
|
+
let lastSeg = Number.NaN;
|
|
622
|
+
for (const seg of list) {
|
|
623
|
+
if (seg === lastSeg) continue;
|
|
624
|
+
rel.push(seg - startSegment);
|
|
625
|
+
lastSeg = seg;
|
|
626
|
+
}
|
|
627
|
+
postings[i] = rel;
|
|
628
|
+
}
|
|
629
|
+
const fuseRes = buildBinaryFuseResult(fingerprints);
|
|
630
|
+
if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
|
|
631
|
+
const shash = streamHash16Hex(stream);
|
|
632
|
+
const runId = `${indexName}-l${level}-${startSegment.toString().padStart(16, "0")}-${endSegment.toString().padStart(16, "0")}-${Date.now()}`;
|
|
633
|
+
return Result.ok({
|
|
634
|
+
meta: {
|
|
635
|
+
runId,
|
|
636
|
+
level,
|
|
637
|
+
startSegment,
|
|
638
|
+
endSegment,
|
|
639
|
+
objectKey: secondaryIndexRunObjectKey(shash, indexName, runId),
|
|
640
|
+
filterLen: fuseRes.value.bytes.byteLength,
|
|
641
|
+
recordCount: fingerprints.length,
|
|
642
|
+
},
|
|
643
|
+
runType: RUN_TYPE_POSTINGS,
|
|
644
|
+
filterBytes: fuseRes.value.bytes,
|
|
645
|
+
filter: fuseRes.value.filter,
|
|
646
|
+
fingerprints,
|
|
647
|
+
postings,
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
private async buildL0RunResult(
|
|
652
|
+
stream: string,
|
|
653
|
+
index: SecondaryIndexField,
|
|
654
|
+
startSegment: number,
|
|
655
|
+
segments: SegmentRow[],
|
|
656
|
+
secret: Uint8Array
|
|
657
|
+
): Promise<Result<IndexRun, SecondaryIndexBuildError>> {
|
|
658
|
+
const regRes = await this.registry.getRegistryResult(stream);
|
|
659
|
+
if (Result.isError(regRes)) return invalidIndexBuild(regRes.error.message);
|
|
660
|
+
const registry = regRes.value;
|
|
661
|
+
const maskByFp = new Map<bigint, number>();
|
|
662
|
+
const pending = segments.slice();
|
|
663
|
+
const concurrency = Math.max(1, Math.min(this.buildConcurrency, pending.length));
|
|
664
|
+
let buildError: string | null = null;
|
|
665
|
+
const workers: Promise<void>[] = [];
|
|
666
|
+
for (let i = 0; i < concurrency; i++) {
|
|
667
|
+
workers.push(
|
|
668
|
+
(async () => {
|
|
669
|
+
for (;;) {
|
|
670
|
+
if (buildError) return;
|
|
671
|
+
const seg = pending.shift();
|
|
672
|
+
if (!seg) return;
|
|
673
|
+
const segBytesRes = await this.loadSegmentBytesResult(seg);
|
|
674
|
+
if (Result.isError(segBytesRes)) {
|
|
675
|
+
buildError = segBytesRes.error.message;
|
|
676
|
+
return;
|
|
677
|
+
}
|
|
678
|
+
const segBytes = segBytesRes.value;
|
|
679
|
+
const bit = seg.segment_index - startSegment;
|
|
680
|
+
const maskBit = 1 << bit;
|
|
681
|
+
const local = new Map<bigint, number>();
|
|
682
|
+
let offset = seg.start_offset;
|
|
683
|
+
let processedRecords = 0;
|
|
684
|
+
for (const recRes of iterateBlockRecordsResult(segBytes)) {
|
|
685
|
+
if (Result.isError(recRes)) {
|
|
686
|
+
buildError = recRes.error.message;
|
|
687
|
+
return;
|
|
688
|
+
}
|
|
689
|
+
let parsed: unknown;
|
|
690
|
+
try {
|
|
691
|
+
parsed = JSON.parse(PAYLOAD_DECODER.decode(recRes.value.payload));
|
|
692
|
+
} catch {
|
|
693
|
+
offset += 1n;
|
|
694
|
+
continue;
|
|
695
|
+
}
|
|
696
|
+
const valuesRes = extractSecondaryIndexValuesForFieldResult(registry, offset, parsed, index);
|
|
697
|
+
if (!Result.isError(valuesRes)) {
|
|
698
|
+
for (const value of valuesRes.value) {
|
|
699
|
+
const fp = siphash24(secret, TERM_ENCODER.encode(value));
|
|
700
|
+
const prev = local.get(fp) ?? 0;
|
|
701
|
+
local.set(fp, prev | maskBit);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
offset += 1n;
|
|
705
|
+
processedRecords += 1;
|
|
706
|
+
if (processedRecords % 64 === 0) {
|
|
707
|
+
await this.yieldBackgroundWork();
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
for (const [fp, mask] of local.entries()) {
|
|
711
|
+
const prev = maskByFp.get(fp) ?? 0;
|
|
712
|
+
maskByFp.set(fp, prev | mask);
|
|
713
|
+
}
|
|
714
|
+
local.clear();
|
|
715
|
+
await this.yieldBackgroundWork();
|
|
716
|
+
}
|
|
717
|
+
})()
|
|
718
|
+
);
|
|
719
|
+
}
|
|
720
|
+
await Promise.all(workers);
|
|
721
|
+
if (buildError) return invalidIndexBuild(buildError);
|
|
722
|
+
const fingerprints = Array.from(maskByFp.keys()).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0));
|
|
723
|
+
const masks = fingerprints.map((fp) => maskByFp.get(fp) ?? 0);
|
|
724
|
+
const fuseRes = buildBinaryFuseResult(fingerprints);
|
|
725
|
+
if (Result.isError(fuseRes)) return invalidIndexBuild(fuseRes.error.message);
|
|
726
|
+
const shash = streamHash16Hex(stream);
|
|
727
|
+
const endSegment = startSegment + this.span - 1;
|
|
728
|
+
const runId = `${index.name}-l0-${startSegment.toString().padStart(16, "0")}-${endSegment.toString().padStart(16, "0")}-${Date.now()}`;
|
|
729
|
+
return Result.ok({
|
|
730
|
+
meta: {
|
|
731
|
+
runId,
|
|
732
|
+
level: 0,
|
|
733
|
+
startSegment,
|
|
734
|
+
endSegment,
|
|
735
|
+
objectKey: secondaryIndexRunObjectKey(shash, index.name, runId),
|
|
736
|
+
filterLen: fuseRes.value.bytes.byteLength,
|
|
737
|
+
recordCount: fingerprints.length,
|
|
738
|
+
},
|
|
739
|
+
runType: RUN_TYPE_MASK16,
|
|
740
|
+
filterBytes: fuseRes.value.bytes,
|
|
741
|
+
filter: fuseRes.value.filter,
|
|
742
|
+
fingerprints,
|
|
743
|
+
masks,
|
|
744
|
+
});
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
private async gcRetiredRuns(stream: string, indexName: string): Promise<void> {
|
|
748
|
+
const retired = await this.db.listRetiredSecondaryIndexRuns(stream, indexName);
|
|
749
|
+
if (retired.length === 0) return;
|
|
750
|
+
const manifest = await this.db.getManifestRow(stream);
|
|
751
|
+
const nowMs = this.db.nowMs();
|
|
752
|
+
const cutoffGen =
|
|
753
|
+
this.retireGenWindow > 0 && manifest.generation > this.retireGenWindow
|
|
754
|
+
? manifest.generation - this.retireGenWindow
|
|
755
|
+
: 0;
|
|
756
|
+
const toDelete: SecondaryIndexRunRow[] = [];
|
|
757
|
+
for (const run of retired) {
|
|
758
|
+
const expiredByGen = run.retired_gen != null && run.retired_gen > 0 && run.retired_gen <= cutoffGen;
|
|
759
|
+
const expiredByTTL = run.retired_at_ms != null && run.retired_at_ms + BigInt(this.retireMinMs) <= nowMs;
|
|
760
|
+
if (expiredByGen || expiredByTTL) toDelete.push(run);
|
|
761
|
+
}
|
|
762
|
+
if (toDelete.length === 0) return;
|
|
763
|
+
for (const run of toDelete) {
|
|
764
|
+
try {
|
|
765
|
+
await this.os.delete(run.object_key);
|
|
766
|
+
} catch {
|
|
767
|
+
// ignore deletion errors
|
|
768
|
+
}
|
|
769
|
+
this.runCache.remove(run.object_key);
|
|
770
|
+
this.runDiskCache?.remove(run.object_key);
|
|
771
|
+
}
|
|
772
|
+
await this.db.deleteSecondaryIndexRuns(toDelete.map((run) => run.run_id));
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
private async hasCompanionBacklog(stream: string): Promise<boolean> {
|
|
776
|
+
const plan = await this.companionProgress.getSearchCompanionPlan(stream);
|
|
777
|
+
if (!plan) return false;
|
|
778
|
+
const uploadedCount = await this.companionProgress.countUploadedSegments(stream);
|
|
779
|
+
const companionRows = await this.companionProgress.listSearchSegmentCompanions(stream);
|
|
780
|
+
const companionBySegment = new Map(companionRows.map((row) => [row.segment_index, row]));
|
|
781
|
+
for (let segmentIndex = 0; segmentIndex < uploadedCount; segmentIndex++) {
|
|
782
|
+
const row = companionBySegment.get(segmentIndex);
|
|
783
|
+
if (!row || row.plan_generation !== plan.generation) return true;
|
|
784
|
+
}
|
|
785
|
+
return false;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
private async shouldPauseExactBackgroundWork(stream: string): Promise<boolean> {
|
|
789
|
+
if (await this.hasCompanionBacklog(stream)) {
|
|
790
|
+
this.streamIdleTicks.delete(stream);
|
|
791
|
+
return true;
|
|
792
|
+
}
|
|
793
|
+
const streamRow = await this.db.getStream(stream);
|
|
794
|
+
if (!streamRow) return false;
|
|
795
|
+
if (streamRow.segment_in_progress !== 0) {
|
|
796
|
+
this.streamIdleTicks.delete(stream);
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
if (streamRow.pending_bytes > 0n) {
|
|
800
|
+
this.streamIdleTicks.delete(stream);
|
|
801
|
+
return true;
|
|
802
|
+
}
|
|
803
|
+
if ((await this.db.countSegmentsForStream(stream)) > (await this.db.countUploadedSegments(stream))) {
|
|
804
|
+
this.streamIdleTicks.delete(stream);
|
|
805
|
+
return true;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
const requiredFlatTicks = Math.max(3, Math.ceil(60_000 / this.cfg.indexCheckIntervalMs));
|
|
809
|
+
const previous = this.streamIdleTicks.get(stream) ?? {
|
|
810
|
+
logicalSizeBytes: -1n,
|
|
811
|
+
nextOffset: -1n,
|
|
812
|
+
flatTicks: 0,
|
|
813
|
+
};
|
|
814
|
+
if (previous.logicalSizeBytes === streamRow.logical_size_bytes && previous.nextOffset === streamRow.next_offset) {
|
|
815
|
+
previous.flatTicks += 1;
|
|
816
|
+
} else {
|
|
817
|
+
previous.logicalSizeBytes = streamRow.logical_size_bytes;
|
|
818
|
+
previous.nextOffset = streamRow.next_offset;
|
|
819
|
+
previous.flatTicks = 0;
|
|
820
|
+
}
|
|
821
|
+
this.streamIdleTicks.set(stream, previous);
|
|
822
|
+
return previous.flatTicks < requiredFlatTicks;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
private async persistRunResult(run: IndexRun): Promise<Result<number, SecondaryIndexBuildError>> {
|
|
826
|
+
const payloadRes = encodeIndexRunResult(run);
|
|
827
|
+
if (Result.isError(payloadRes)) return invalidIndexBuild(payloadRes.error.message);
|
|
828
|
+
try {
|
|
829
|
+
await retry(
|
|
830
|
+
() => this.os.put(run.meta.objectKey, payloadRes.value, { contentLength: payloadRes.value.byteLength }),
|
|
831
|
+
{
|
|
832
|
+
retries: this.cfg.objectStoreRetries,
|
|
833
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
834
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
835
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
836
|
+
}
|
|
837
|
+
);
|
|
838
|
+
} catch (e: unknown) {
|
|
839
|
+
return invalidIndexBuild(String((e as any)?.message ?? e));
|
|
840
|
+
}
|
|
841
|
+
this.runDiskCache?.put(run.meta.objectKey, payloadRes.value);
|
|
842
|
+
this.runCache.put(run.meta.objectKey, run, payloadRes.value.byteLength);
|
|
843
|
+
return Result.ok(payloadRes.value.byteLength);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
private async loadRunResult(meta: SecondaryIndexRunRow): Promise<Result<IndexRun | null, SecondaryIndexBuildError>> {
|
|
847
|
+
const cached = this.runCache.get(meta.object_key);
|
|
848
|
+
if (cached) return Result.ok(cached);
|
|
849
|
+
let bytes: Uint8Array | null = null;
|
|
850
|
+
if (this.runDiskCache) {
|
|
851
|
+
try {
|
|
852
|
+
bytes = this.runDiskCache.get(meta.object_key);
|
|
853
|
+
} catch {
|
|
854
|
+
this.runDiskCache.remove(meta.object_key);
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
if (!bytes) {
|
|
858
|
+
try {
|
|
859
|
+
bytes = await retry(
|
|
860
|
+
async () => {
|
|
861
|
+
const data = await this.os.get(meta.object_key);
|
|
862
|
+
if (!data) throw dsError(`missing secondary index run ${meta.object_key}`);
|
|
863
|
+
return data;
|
|
864
|
+
},
|
|
865
|
+
{
|
|
866
|
+
retries: this.cfg.objectStoreRetries,
|
|
867
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
868
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
869
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
870
|
+
}
|
|
871
|
+
);
|
|
872
|
+
} catch (e: unknown) {
|
|
873
|
+
return invalidIndexBuild(String((e as any)?.message ?? e));
|
|
874
|
+
}
|
|
875
|
+
this.runDiskCache?.put(meta.object_key, bytes);
|
|
876
|
+
}
|
|
877
|
+
const decodeRes = decodeIndexRunResult(bytes);
|
|
878
|
+
if (Result.isError(decodeRes)) return invalidIndexBuild(decodeRes.error.message);
|
|
879
|
+
this.runCache.put(meta.object_key, decodeRes.value, meta.size_bytes);
|
|
880
|
+
return Result.ok(decodeRes.value);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, SecondaryIndexBuildError>> {
|
|
884
|
+
try {
|
|
885
|
+
const data = await loadSegmentBytesCached(
|
|
886
|
+
this.os,
|
|
887
|
+
seg,
|
|
888
|
+
this.segmentCache,
|
|
889
|
+
{
|
|
890
|
+
retries: this.cfg.objectStoreRetries,
|
|
891
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
892
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
893
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
894
|
+
}
|
|
895
|
+
);
|
|
896
|
+
return Result.ok(data);
|
|
897
|
+
} catch (e: unknown) {
|
|
898
|
+
return invalidIndexBuild(String((e as any)?.message ?? e));
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
}
|