@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
package/src/app_local.ts
CHANGED
|
@@ -3,9 +3,15 @@ import { createAppCore, type App } from "./app_core";
|
|
|
3
3
|
import type { ObjectStore } from "./objectstore/interface";
|
|
4
4
|
import { NullObjectStore } from "./objectstore/null";
|
|
5
5
|
import { StreamReader } from "./reader";
|
|
6
|
+
import type { StreamIndexLookup } from "./index/indexer";
|
|
7
|
+
import type { RoutingKeyLexiconListResult } from "./index/lexicon_indexer";
|
|
6
8
|
import type { StatsCollector } from "./stats";
|
|
7
|
-
import type { UploaderController } from "./uploader";
|
|
9
|
+
import type { UploaderController, UploaderHooks } from "./uploader";
|
|
8
10
|
import type { SegmenterController } from "./segment/segmenter_workers";
|
|
11
|
+
import { readSqliteRuntimeMemoryStats } from "./sqlite/runtime_stats";
|
|
12
|
+
import { Result } from "better-result";
|
|
13
|
+
|
|
14
|
+
const TEXT_DECODER = new TextDecoder();
|
|
9
15
|
|
|
10
16
|
class NoopUploader implements UploaderController {
|
|
11
17
|
start(): void {}
|
|
@@ -13,7 +19,7 @@ class NoopUploader implements UploaderController {
|
|
|
13
19
|
countSegmentsWaiting(): number {
|
|
14
20
|
return 0;
|
|
15
21
|
}
|
|
16
|
-
setHooks(_hooks:
|
|
22
|
+
setHooks(_hooks: UploaderHooks | undefined): void {}
|
|
17
23
|
async publishManifest(_stream: string): Promise<void> {}
|
|
18
24
|
}
|
|
19
25
|
|
|
@@ -22,6 +28,97 @@ const noopSegmenter: SegmenterController = {
|
|
|
22
28
|
stop(_hard?: boolean): void {},
|
|
23
29
|
};
|
|
24
30
|
|
|
31
|
+
class LocalIndexLookup implements StreamIndexLookup {
|
|
32
|
+
constructor(private readonly db: App["deps"]["db"]) {}
|
|
33
|
+
|
|
34
|
+
start(): void {}
|
|
35
|
+
|
|
36
|
+
stop(): void {}
|
|
37
|
+
|
|
38
|
+
enqueue(_stream: string): void {}
|
|
39
|
+
|
|
40
|
+
async candidateSegmentsForRoutingKey(_stream: string, _keyBytes: Uint8Array): Promise<null> {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async candidateSegmentsForSecondaryIndex(_stream: string, _indexName: string, _keyBytes: Uint8Array): Promise<null> {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async getAggSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async getColSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async getFtsSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async getMetricsBlockSegmentCompanion(_stream: string, _segmentIndex: number): Promise<null> {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async listRoutingKeysResult(
|
|
65
|
+
stream: string,
|
|
66
|
+
after: string | null,
|
|
67
|
+
limit: number
|
|
68
|
+
): Promise<Result<RoutingKeyLexiconListResult, { kind: string; message: string }>> {
|
|
69
|
+
const srow = this.db.getStream(stream);
|
|
70
|
+
if (!srow || this.db.isDeleted(srow)) {
|
|
71
|
+
return Result.err({ kind: "invalid_lexicon_index", message: "stream not found" });
|
|
72
|
+
}
|
|
73
|
+
const safeLimit = Math.max(1, Math.min(limit, 500));
|
|
74
|
+
const keys = new Set<string>();
|
|
75
|
+
let scannedWalRows = 0;
|
|
76
|
+
for (const rec of this.db.iterWalRange(stream, 0n, srow.next_offset - 1n)) {
|
|
77
|
+
scannedWalRows += 1;
|
|
78
|
+
const rawKey = rec.routing_key == null ? null : rec.routing_key instanceof Uint8Array ? rec.routing_key : new Uint8Array(rec.routing_key);
|
|
79
|
+
if (!rawKey || rawKey.byteLength === 0) continue;
|
|
80
|
+
keys.add(TEXT_DECODER.decode(rawKey));
|
|
81
|
+
}
|
|
82
|
+
const sorted = Array.from(keys).sort();
|
|
83
|
+
const filtered = after == null ? sorted : sorted.filter((key) => key > after);
|
|
84
|
+
const page = filtered.slice(0, safeLimit);
|
|
85
|
+
const nextAfter = filtered.length > safeLimit ? page[page.length - 1] ?? null : null;
|
|
86
|
+
return Result.ok({
|
|
87
|
+
keys: page,
|
|
88
|
+
nextAfter,
|
|
89
|
+
tookMs: 0,
|
|
90
|
+
coverage: {
|
|
91
|
+
complete: true,
|
|
92
|
+
indexedSegments: 0,
|
|
93
|
+
scannedUploadedSegments: 0,
|
|
94
|
+
scannedLocalSegments: 0,
|
|
95
|
+
scannedWalRows,
|
|
96
|
+
possibleMissingUploadedSegments: 0,
|
|
97
|
+
possibleMissingLocalSegments: 0,
|
|
98
|
+
},
|
|
99
|
+
timing: {
|
|
100
|
+
lexiconRunGetMs: 0,
|
|
101
|
+
lexiconDecodeMs: 0,
|
|
102
|
+
lexiconEnumerateMs: 0,
|
|
103
|
+
lexiconMergeMs: 0,
|
|
104
|
+
fallbackScanMs: 0,
|
|
105
|
+
fallbackSegmentGetMs: 0,
|
|
106
|
+
fallbackWalScanMs: 0,
|
|
107
|
+
lexiconRunsLoaded: 0,
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
getLocalStorageUsage(_stream: string) {
|
|
113
|
+
return {
|
|
114
|
+
routing_index_cache_bytes: 0,
|
|
115
|
+
exact_index_cache_bytes: 0,
|
|
116
|
+
companion_cache_bytes: 0,
|
|
117
|
+
lexicon_index_cache_bytes: 0,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
25
122
|
export type CreateLocalAppOptions = {
|
|
26
123
|
stats?: StatsCollector;
|
|
27
124
|
};
|
|
@@ -29,16 +126,59 @@ export type CreateLocalAppOptions = {
|
|
|
29
126
|
export function createLocalApp(cfg: Config, os?: ObjectStore, opts: CreateLocalAppOptions = {}): App {
|
|
30
127
|
return createAppCore(cfg, {
|
|
31
128
|
stats: opts.stats,
|
|
32
|
-
createRuntime: ({ config, db }) => {
|
|
129
|
+
createRuntime: ({ config, db, registry, memorySampler, memory }) => {
|
|
33
130
|
const store = os ?? new NullObjectStore();
|
|
34
|
-
const
|
|
131
|
+
const indexer = new LocalIndexLookup(db);
|
|
132
|
+
const reader = new StreamReader(config, db, store, registry, undefined, indexer, memorySampler, memory);
|
|
35
133
|
|
|
36
134
|
return {
|
|
37
135
|
store,
|
|
38
136
|
reader,
|
|
39
137
|
segmenter: noopSegmenter,
|
|
40
138
|
uploader: new NoopUploader(),
|
|
139
|
+
indexer,
|
|
41
140
|
uploadSchemaRegistry: async (): Promise<void> => {},
|
|
141
|
+
getRuntimeMemorySnapshot: () => {
|
|
142
|
+
const sqliteRuntime = readSqliteRuntimeMemoryStats();
|
|
143
|
+
return {
|
|
144
|
+
subsystems: {
|
|
145
|
+
heap_estimates: {
|
|
146
|
+
ingest_queue_payload_bytes: 0,
|
|
147
|
+
},
|
|
148
|
+
mapped_files: {},
|
|
149
|
+
disk_caches: {},
|
|
150
|
+
configured_budgets: {
|
|
151
|
+
sqlite_cache_budget_bytes: config.sqliteCacheBytes,
|
|
152
|
+
worker_sqlite_cache_budget_bytes: config.workerSqliteCacheBytes,
|
|
153
|
+
},
|
|
154
|
+
pipeline_buffers: {},
|
|
155
|
+
sqlite_runtime: {
|
|
156
|
+
sqlite_memory_used_bytes: sqliteRuntime.memory_used_bytes,
|
|
157
|
+
sqlite_memory_highwater_bytes: sqliteRuntime.memory_highwater_bytes,
|
|
158
|
+
sqlite_pagecache_overflow_bytes: sqliteRuntime.pagecache_overflow_bytes,
|
|
159
|
+
sqlite_pagecache_overflow_highwater_bytes: sqliteRuntime.pagecache_overflow_highwater_bytes,
|
|
160
|
+
},
|
|
161
|
+
counts: {
|
|
162
|
+
ingest_queue_requests: 0,
|
|
163
|
+
pending_upload_segments: 0,
|
|
164
|
+
sqlite_pagecache_used_slots: sqliteRuntime.pagecache_used_slots,
|
|
165
|
+
sqlite_pagecache_used_slots_highwater: sqliteRuntime.pagecache_used_slots_highwater,
|
|
166
|
+
sqlite_malloc_count: sqliteRuntime.malloc_count,
|
|
167
|
+
sqlite_malloc_count_highwater: sqliteRuntime.malloc_count_highwater,
|
|
168
|
+
sqlite_open_connections: sqliteRuntime.open_connections,
|
|
169
|
+
sqlite_prepared_statements: sqliteRuntime.prepared_statements,
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
totals: {
|
|
173
|
+
heap_estimate_bytes: 0,
|
|
174
|
+
mapped_file_bytes: 0,
|
|
175
|
+
disk_cache_bytes: 0,
|
|
176
|
+
configured_budget_bytes: config.sqliteCacheBytes + config.workerSqliteCacheBytes,
|
|
177
|
+
pipeline_buffer_bytes: 0,
|
|
178
|
+
sqlite_runtime_bytes: sqliteRuntime.memory_used_bytes + sqliteRuntime.pagecache_overflow_bytes,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
},
|
|
42
182
|
start: (): void => {},
|
|
43
183
|
};
|
|
44
184
|
},
|
package/src/auto_tune.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
export type AutoTuneConfig = {
|
|
2
|
+
segmentMaxMiB: number;
|
|
3
|
+
segmentTargetRows: number;
|
|
4
|
+
sqliteCacheMb: number;
|
|
5
|
+
workerSqliteCacheMb: number;
|
|
6
|
+
indexMemMb: number;
|
|
7
|
+
lexiconIndexCacheMb: number;
|
|
8
|
+
searchCompanionTocCacheMb: number;
|
|
9
|
+
searchCompanionSectionCacheMb: number;
|
|
10
|
+
ingestBatchMb: number;
|
|
11
|
+
ingestQueueMb: number;
|
|
12
|
+
ingestConcurrency: number;
|
|
13
|
+
readConcurrency: number;
|
|
14
|
+
searchConcurrency: number;
|
|
15
|
+
asyncIndexConcurrency: number;
|
|
16
|
+
indexBuildConcurrency: number;
|
|
17
|
+
indexCompactConcurrency: number;
|
|
18
|
+
segmenterWorkers: number;
|
|
19
|
+
uploadConcurrency: number;
|
|
20
|
+
searchCompanionBatchSegments: number;
|
|
21
|
+
searchCompanionYieldBlocks: number;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export const AUTO_TUNE_PRESETS = [256, 512, 1024, 2048, 4096, 8192] as const;
|
|
25
|
+
|
|
26
|
+
export function memoryLimitForPreset(preset: number): number {
|
|
27
|
+
return preset === 256 ? 300 : preset;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function tuneForPreset(p: number): AutoTuneConfig {
|
|
31
|
+
return {
|
|
32
|
+
// Segment geometry is fixed across presets. Smaller hosts still scale
|
|
33
|
+
// concurrency and cache budgets down, but they keep the same 16 MiB /
|
|
34
|
+
// 100k-row seal thresholds so upload throughput is not dominated by many
|
|
35
|
+
// tiny compressed segment objects.
|
|
36
|
+
segmentMaxMiB: 16,
|
|
37
|
+
segmentTargetRows: 100_000,
|
|
38
|
+
sqliteCacheMb: Math.max(8, Math.floor(p / 16)),
|
|
39
|
+
workerSqliteCacheMb: Math.max(8, Math.min(32, Math.floor(p / 128))),
|
|
40
|
+
indexMemMb: Math.max(4, Math.floor(p / 64)),
|
|
41
|
+
lexiconIndexCacheMb: p >= 8192 ? 256 : p >= 4096 ? 128 : p >= 2048 ? 64 : p >= 1024 ? 32 : p >= 512 ? 16 : 8,
|
|
42
|
+
searchCompanionTocCacheMb: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
43
|
+
searchCompanionSectionCacheMb: p >= 8192 ? 128 : p >= 4096 ? 64 : p >= 2048 ? 32 : p >= 1024 ? 16 : 8,
|
|
44
|
+
// Keep append working sets tighter on <=2 GiB presets because the request path
|
|
45
|
+
// still holds multiple copies of JSON batches while normalizing and queuing.
|
|
46
|
+
ingestBatchMb: p >= 8192 ? 64 : p >= 4096 ? 16 : p >= 2048 ? 8 : p >= 1024 ? 4 : 2,
|
|
47
|
+
ingestQueueMb: p >= 8192 ? 128 : p >= 4096 ? 64 : p >= 2048 ? 32 : p >= 1024 ? 16 : 8,
|
|
48
|
+
ingestConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
49
|
+
readConcurrency: p >= 8192 ? 16 : p >= 4096 ? 8 : p >= 1024 ? 4 : 2,
|
|
50
|
+
searchConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
51
|
+
asyncIndexConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
52
|
+
// Keep <=2 GiB presets single-lane for background work. These hosts do not
|
|
53
|
+
// have enough headroom for append, segment cut, upload, and companion work
|
|
54
|
+
// to overlap aggressively under the GH Archive "all" workload.
|
|
55
|
+
indexBuildConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
56
|
+
indexCompactConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
57
|
+
segmenterWorkers: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
58
|
+
uploadConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
59
|
+
searchCompanionBatchSegments: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
60
|
+
searchCompanionYieldBlocks: p >= 8192 ? 4 : p >= 4096 ? 2 : 1,
|
|
61
|
+
};
|
|
62
|
+
}
|
package/src/bootstrap.ts
CHANGED
|
@@ -59,8 +59,11 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
59
59
|
const epoch = typeof manifest.epoch === "number" ? manifest.epoch : 0;
|
|
60
60
|
const nextOffsetNum = typeof manifest.next_offset === "number" ? manifest.next_offset : 0;
|
|
61
61
|
const nextOffset = BigInt(nextOffsetNum);
|
|
62
|
+
const logicalSizeBytes = parseManifestBigInt(manifest.logical_size_bytes) ?? 0n;
|
|
62
63
|
|
|
63
64
|
const contentType = typeof manifest.content_type === "string" ? manifest.content_type : "application/octet-stream";
|
|
65
|
+
const profile = typeof manifest.profile === "string" && manifest.profile !== "" ? manifest.profile : "generic";
|
|
66
|
+
const profileJson = manifest.profile_json && typeof manifest.profile_json === "object" ? manifest.profile_json : null;
|
|
64
67
|
const streamSeq = typeof manifest.stream_seq === "string" ? manifest.stream_seq : null;
|
|
65
68
|
const closed = typeof manifest.closed === "number" ? manifest.closed : 0;
|
|
66
69
|
const closedProducerId = typeof manifest.closed_producer_id === "string" ? manifest.closed_producer_id : null;
|
|
@@ -92,6 +95,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
92
95
|
created_at_ms: createdAtMs,
|
|
93
96
|
updated_at_ms: nowMs,
|
|
94
97
|
content_type: contentType,
|
|
98
|
+
profile,
|
|
95
99
|
stream_seq: streamSeq,
|
|
96
100
|
closed,
|
|
97
101
|
closed_producer_id: closedProducerId,
|
|
@@ -105,6 +109,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
105
109
|
uploaded_segment_count: uploadedPrefix,
|
|
106
110
|
pending_rows: 0n,
|
|
107
111
|
pending_bytes: 0n,
|
|
112
|
+
logical_size_bytes: logicalSizeBytes,
|
|
108
113
|
wal_rows: 0n,
|
|
109
114
|
wal_bytes: 0n,
|
|
110
115
|
last_append_ms: lastAppendMs,
|
|
@@ -113,6 +118,11 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
113
118
|
expires_at_ms: expiresAtMs,
|
|
114
119
|
stream_flags: streamFlags,
|
|
115
120
|
});
|
|
121
|
+
if (profileJson) {
|
|
122
|
+
db.upsertStreamProfile(stream, JSON.stringify(profileJson));
|
|
123
|
+
} else {
|
|
124
|
+
db.deleteStreamProfile(stream);
|
|
125
|
+
}
|
|
116
126
|
|
|
117
127
|
db.upsertSegmentMeta(stream, segmentCount, segmentOffsetsBytes, segmentBlocksBytes, segmentLastTsBytes);
|
|
118
128
|
|
|
@@ -121,7 +131,14 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
121
131
|
if (!head) throw dsError(`missing manifest head ${mkey}`);
|
|
122
132
|
return head;
|
|
123
133
|
}, retryOpts);
|
|
124
|
-
db.upsertManifestRow(
|
|
134
|
+
db.upsertManifestRow(
|
|
135
|
+
stream,
|
|
136
|
+
Number(manifest.generation ?? 0),
|
|
137
|
+
Number(manifest.generation ?? 0),
|
|
138
|
+
nowMs,
|
|
139
|
+
manifestHead?.etag ?? null,
|
|
140
|
+
manifestHead?.size ?? null
|
|
141
|
+
);
|
|
125
142
|
|
|
126
143
|
for (let i = 0; i < segmentCount; i++) {
|
|
127
144
|
const startOffset = i === 0 ? 0n : segmentOffsets[i - 1];
|
|
@@ -145,6 +162,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
145
162
|
endOffset,
|
|
146
163
|
blockCount: segmentBlocks[i],
|
|
147
164
|
lastAppendMs: lastTsMs,
|
|
165
|
+
payloadBytes: 0n,
|
|
148
166
|
sizeBytes: head.size,
|
|
149
167
|
localPath,
|
|
150
168
|
});
|
|
@@ -168,6 +186,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
168
186
|
start_segment: Number(r.start_segment),
|
|
169
187
|
end_segment: Number(r.end_segment),
|
|
170
188
|
object_key: String(r.object_key),
|
|
189
|
+
size_bytes: Number(r.size_bytes ?? 0),
|
|
171
190
|
filter_len: Number(r.filter_len ?? 0),
|
|
172
191
|
record_count: Number(r.record_count ?? 0),
|
|
173
192
|
});
|
|
@@ -181,6 +200,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
181
200
|
start_segment: Number(r.start_segment),
|
|
182
201
|
end_segment: Number(r.end_segment),
|
|
183
202
|
object_key: String(r.object_key),
|
|
203
|
+
size_bytes: Number(r.size_bytes ?? 0),
|
|
184
204
|
filter_len: Number(r.filter_len ?? 0),
|
|
185
205
|
record_count: Number(r.record_count ?? 0),
|
|
186
206
|
});
|
|
@@ -189,6 +209,136 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
189
209
|
db.retireIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
|
|
190
210
|
}
|
|
191
211
|
|
|
212
|
+
const secondaryIndexes = manifest.secondary_indexes && typeof manifest.secondary_indexes === "object" ? manifest.secondary_indexes : {};
|
|
213
|
+
for (const [indexName, rawState] of Object.entries(secondaryIndexes)) {
|
|
214
|
+
if (!rawState || typeof rawState !== "object") continue;
|
|
215
|
+
const indexSecretB64 = typeof (rawState as any).index_secret === "string" ? (rawState as any).index_secret : "";
|
|
216
|
+
if (!indexSecretB64) continue;
|
|
217
|
+
const secret = new Uint8Array(Buffer.from(indexSecretB64, "base64"));
|
|
218
|
+
const configHash = typeof (rawState as any).config_hash === "string" ? (rawState as any).config_hash : "";
|
|
219
|
+
const indexedThrough =
|
|
220
|
+
typeof (rawState as any).indexed_through === "number" ? Number((rawState as any).indexed_through) : 0;
|
|
221
|
+
db.upsertSecondaryIndexState(stream, indexName, secret, configHash, indexedThrough);
|
|
222
|
+
|
|
223
|
+
const activeSecondaryRuns = Array.isArray((rawState as any).active_runs) ? (rawState as any).active_runs : [];
|
|
224
|
+
const retiredSecondaryRuns = Array.isArray((rawState as any).retired_runs) ? (rawState as any).retired_runs : [];
|
|
225
|
+
for (const run of activeSecondaryRuns) {
|
|
226
|
+
db.insertSecondaryIndexRun({
|
|
227
|
+
run_id: String(run.run_id),
|
|
228
|
+
stream,
|
|
229
|
+
index_name: indexName,
|
|
230
|
+
level: Number(run.level),
|
|
231
|
+
start_segment: Number(run.start_segment),
|
|
232
|
+
end_segment: Number(run.end_segment),
|
|
233
|
+
object_key: String(run.object_key),
|
|
234
|
+
size_bytes: Number(run.size_bytes ?? 0),
|
|
235
|
+
filter_len: Number(run.filter_len ?? 0),
|
|
236
|
+
record_count: Number(run.record_count ?? 0),
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
for (const run of retiredSecondaryRuns) {
|
|
240
|
+
const runId = String(run.run_id);
|
|
241
|
+
db.insertSecondaryIndexRun({
|
|
242
|
+
run_id: runId,
|
|
243
|
+
stream,
|
|
244
|
+
index_name: indexName,
|
|
245
|
+
level: Number(run.level),
|
|
246
|
+
start_segment: Number(run.start_segment),
|
|
247
|
+
end_segment: Number(run.end_segment),
|
|
248
|
+
object_key: String(run.object_key),
|
|
249
|
+
size_bytes: Number(run.size_bytes ?? 0),
|
|
250
|
+
filter_len: Number(run.filter_len ?? 0),
|
|
251
|
+
record_count: Number(run.record_count ?? 0),
|
|
252
|
+
});
|
|
253
|
+
const retiredGen = typeof run.retired_gen === "number" ? run.retired_gen : Number(manifest.generation ?? 0);
|
|
254
|
+
const retiredAtUnix = typeof run.retired_at_unix === "number" ? run.retired_at_unix : Math.floor(Number(nowMs) / 1000);
|
|
255
|
+
db.retireSecondaryIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const lexiconIndexes = Array.isArray(manifest.lexicon_indexes) ? manifest.lexicon_indexes : [];
|
|
260
|
+
for (const rawState of lexiconIndexes) {
|
|
261
|
+
if (!rawState || typeof rawState !== "object") continue;
|
|
262
|
+
const sourceKind = typeof (rawState as any).source_kind === "string" ? (rawState as any).source_kind : "";
|
|
263
|
+
if (sourceKind === "") continue;
|
|
264
|
+
const sourceName = typeof (rawState as any).source_name === "string" ? (rawState as any).source_name : "";
|
|
265
|
+
const indexedThrough =
|
|
266
|
+
typeof (rawState as any).indexed_through === "number" ? Number((rawState as any).indexed_through) : 0;
|
|
267
|
+
db.upsertLexiconIndexState(stream, sourceKind, sourceName, indexedThrough);
|
|
268
|
+
|
|
269
|
+
const activeLexiconRuns = Array.isArray((rawState as any).active_runs) ? (rawState as any).active_runs : [];
|
|
270
|
+
const retiredLexiconRuns = Array.isArray((rawState as any).retired_runs) ? (rawState as any).retired_runs : [];
|
|
271
|
+
for (const run of activeLexiconRuns) {
|
|
272
|
+
db.insertLexiconIndexRun({
|
|
273
|
+
run_id: String(run.run_id),
|
|
274
|
+
stream,
|
|
275
|
+
source_kind: sourceKind,
|
|
276
|
+
source_name: sourceName,
|
|
277
|
+
level: Number(run.level),
|
|
278
|
+
start_segment: Number(run.start_segment),
|
|
279
|
+
end_segment: Number(run.end_segment),
|
|
280
|
+
object_key: String(run.object_key),
|
|
281
|
+
size_bytes: Number(run.size_bytes ?? 0),
|
|
282
|
+
record_count: Number(run.record_count ?? 0),
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
for (const run of retiredLexiconRuns) {
|
|
286
|
+
const runId = String(run.run_id);
|
|
287
|
+
db.insertLexiconIndexRun({
|
|
288
|
+
run_id: runId,
|
|
289
|
+
stream,
|
|
290
|
+
source_kind: sourceKind,
|
|
291
|
+
source_name: sourceName,
|
|
292
|
+
level: Number(run.level),
|
|
293
|
+
start_segment: Number(run.start_segment),
|
|
294
|
+
end_segment: Number(run.end_segment),
|
|
295
|
+
object_key: String(run.object_key),
|
|
296
|
+
size_bytes: Number(run.size_bytes ?? 0),
|
|
297
|
+
record_count: Number(run.record_count ?? 0),
|
|
298
|
+
});
|
|
299
|
+
const retiredGen = typeof run.retired_gen === "number" ? run.retired_gen : Number(manifest.generation ?? 0);
|
|
300
|
+
const retiredAtUnix = typeof run.retired_at_unix === "number" ? run.retired_at_unix : Math.floor(Number(nowMs) / 1000);
|
|
301
|
+
db.retireLexiconIndexRuns([runId], retiredGen, BigInt(retiredAtUnix) * 1000n);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const searchCompanions =
|
|
306
|
+
manifest.search_companions && typeof manifest.search_companions === "object" ? manifest.search_companions : null;
|
|
307
|
+
if (searchCompanions) {
|
|
308
|
+
const generation = typeof searchCompanions.generation === "number" ? searchCompanions.generation : 0;
|
|
309
|
+
const planHash = typeof searchCompanions.plan_hash === "string" ? searchCompanions.plan_hash : "";
|
|
310
|
+
const planJson =
|
|
311
|
+
searchCompanions.plan_json && typeof searchCompanions.plan_json === "object"
|
|
312
|
+
? JSON.stringify(searchCompanions.plan_json)
|
|
313
|
+
: JSON.stringify({ families: {}, summary: {} });
|
|
314
|
+
if (generation > 0 && planHash) {
|
|
315
|
+
db.upsertSearchCompanionPlan(stream, generation, planHash, planJson);
|
|
316
|
+
}
|
|
317
|
+
const segments = Array.isArray(searchCompanions.segments) ? searchCompanions.segments : [];
|
|
318
|
+
for (const segment of segments) {
|
|
319
|
+
if (!segment || typeof segment !== "object") continue;
|
|
320
|
+
if (
|
|
321
|
+
typeof (segment as any).segment_index !== "number" ||
|
|
322
|
+
typeof (segment as any).object_key !== "string" ||
|
|
323
|
+
typeof (segment as any).plan_generation !== "number"
|
|
324
|
+
) {
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
const sections = Array.isArray((segment as any).sections) ? (segment as any).sections : [];
|
|
328
|
+
db.upsertSearchSegmentCompanion(
|
|
329
|
+
stream,
|
|
330
|
+
Number((segment as any).segment_index),
|
|
331
|
+
String((segment as any).object_key),
|
|
332
|
+
Number((segment as any).plan_generation),
|
|
333
|
+
JSON.stringify(sections),
|
|
334
|
+
JSON.stringify((segment as any).section_sizes ?? {}),
|
|
335
|
+
Number((segment as any).size_bytes ?? 0),
|
|
336
|
+
parseManifestBigInt((segment as any).primary_timestamp_min_ms),
|
|
337
|
+
parseManifestBigInt((segment as any).primary_timestamp_max_ms)
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
192
342
|
const schemaKey = schemaObjectKey(shash);
|
|
193
343
|
const schemaBytes = await retry(async () => {
|
|
194
344
|
const data = await store.get(schemaKey);
|
|
@@ -197,6 +347,7 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
197
347
|
}, retryOpts);
|
|
198
348
|
if (schemaBytes) {
|
|
199
349
|
db.upsertSchemaRegistry(stream, new TextDecoder().decode(schemaBytes));
|
|
350
|
+
db.setSchemaUploadedSizeBytes(stream, schemaBytes.byteLength);
|
|
200
351
|
}
|
|
201
352
|
}
|
|
202
353
|
} finally {
|
|
@@ -204,6 +355,13 @@ export async function bootstrapFromR2(cfg: Config, store: ObjectStore, opts: { c
|
|
|
204
355
|
}
|
|
205
356
|
}
|
|
206
357
|
|
|
358
|
+
function parseManifestBigInt(value: unknown): bigint | null {
|
|
359
|
+
if (typeof value === "bigint") return value;
|
|
360
|
+
if (typeof value === "number" && Number.isFinite(value)) return BigInt(Math.trunc(value));
|
|
361
|
+
if (typeof value === "string" && /^-?[0-9]+$/.test(value)) return BigInt(value);
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
|
|
207
365
|
function decodeZstdBase64(value: string): Uint8Array {
|
|
208
366
|
if (!value) return new Uint8Array(0);
|
|
209
367
|
const raw = Buffer.from(value, "base64");
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
export type GateRelease = () => void;
|
|
2
|
+
|
|
3
|
+
type Waiter = {
|
|
4
|
+
resolve: (release: GateRelease) => void;
|
|
5
|
+
reject: (error: unknown) => void;
|
|
6
|
+
signal: AbortSignal | null;
|
|
7
|
+
onAbort: (() => void) | null;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
function abortError(): Error {
|
|
11
|
+
const err = new Error("operation aborted");
|
|
12
|
+
err.name = "AbortError";
|
|
13
|
+
return err;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export class ConcurrencyGate {
|
|
17
|
+
private limit: number;
|
|
18
|
+
private active = 0;
|
|
19
|
+
private readonly waiters: Waiter[] = [];
|
|
20
|
+
|
|
21
|
+
constructor(limit: number) {
|
|
22
|
+
this.limit = Math.max(1, Math.floor(limit));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
getLimit(): number {
|
|
26
|
+
return this.limit;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
getActive(): number {
|
|
30
|
+
return this.active;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
getQueued(): number {
|
|
34
|
+
return this.waiters.length;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
setLimit(nextLimit: number): void {
|
|
38
|
+
this.limit = Math.max(1, Math.floor(nextLimit));
|
|
39
|
+
this.drain();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async acquire(signal?: AbortSignal | null): Promise<GateRelease> {
|
|
43
|
+
if (signal?.aborted) throw abortError();
|
|
44
|
+
if (this.active < this.limit) {
|
|
45
|
+
this.active += 1;
|
|
46
|
+
return this.releaseFactory();
|
|
47
|
+
}
|
|
48
|
+
return await new Promise<GateRelease>((resolve, reject) => {
|
|
49
|
+
const waiter: Waiter = {
|
|
50
|
+
resolve,
|
|
51
|
+
reject,
|
|
52
|
+
signal: signal ?? null,
|
|
53
|
+
onAbort: null,
|
|
54
|
+
};
|
|
55
|
+
if (signal) {
|
|
56
|
+
waiter.onAbort = () => {
|
|
57
|
+
this.removeWaiter(waiter);
|
|
58
|
+
reject(abortError());
|
|
59
|
+
};
|
|
60
|
+
signal.addEventListener("abort", waiter.onAbort, { once: true });
|
|
61
|
+
}
|
|
62
|
+
this.waiters.push(waiter);
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async run<T>(fn: () => Promise<T>, signal?: AbortSignal | null): Promise<T> {
|
|
67
|
+
const release = await this.acquire(signal);
|
|
68
|
+
try {
|
|
69
|
+
return await fn();
|
|
70
|
+
} finally {
|
|
71
|
+
release();
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
private releaseFactory(): GateRelease {
|
|
76
|
+
let released = false;
|
|
77
|
+
return () => {
|
|
78
|
+
if (released) return;
|
|
79
|
+
released = true;
|
|
80
|
+
this.active = Math.max(0, this.active - 1);
|
|
81
|
+
this.drain();
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private removeWaiter(waiter: Waiter): void {
|
|
86
|
+
const idx = this.waiters.indexOf(waiter);
|
|
87
|
+
if (idx >= 0) this.waiters.splice(idx, 1);
|
|
88
|
+
if (waiter.signal && waiter.onAbort) {
|
|
89
|
+
waiter.signal.removeEventListener("abort", waiter.onAbort);
|
|
90
|
+
waiter.onAbort = null;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
private drain(): void {
|
|
95
|
+
while (this.active < this.limit && this.waiters.length > 0) {
|
|
96
|
+
const waiter = this.waiters.shift()!;
|
|
97
|
+
if (waiter.signal?.aborted) {
|
|
98
|
+
if (waiter.signal && waiter.onAbort) waiter.signal.removeEventListener("abort", waiter.onAbort);
|
|
99
|
+
waiter.reject(abortError());
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
if (waiter.signal && waiter.onAbort) waiter.signal.removeEventListener("abort", waiter.onAbort);
|
|
103
|
+
waiter.onAbort = null;
|
|
104
|
+
this.active += 1;
|
|
105
|
+
waiter.resolve(this.releaseFactory());
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|