@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
package/src/segment/segmenter.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { encodeBlock, encodeFooter, type BlockIndexEntry, type SegmentRecord } f
|
|
|
6
6
|
import { readU32BE } from "../util/endian";
|
|
7
7
|
import { localSegmentPath, streamHash16Hex } from "../util/stream_paths";
|
|
8
8
|
import { LruCache } from "../util/lru";
|
|
9
|
+
import { RuntimeMemorySampler } from "../runtime_memory_sampler";
|
|
9
10
|
import { yieldToEventLoop } from "../util/yield";
|
|
10
11
|
|
|
11
12
|
export type SegmenterOptions = {
|
|
@@ -17,20 +18,42 @@ export type SegmenterOptions = {
|
|
|
17
18
|
};
|
|
18
19
|
|
|
19
20
|
export type SegmenterHooks = {
|
|
20
|
-
onSegmentSealed?: (payloadBytes: number, segmentBytes: number) => void;
|
|
21
|
+
onSegmentSealed?: (stream: string, payloadBytes: number, segmentBytes: number) => void;
|
|
21
22
|
};
|
|
22
23
|
|
|
24
|
+
export type SegmenterMemoryStats = {
|
|
25
|
+
active_builds: number;
|
|
26
|
+
active_streams: number;
|
|
27
|
+
active_payload_bytes: number;
|
|
28
|
+
active_segment_bytes_estimate: number;
|
|
29
|
+
active_rows: number;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const SEGMENT_COMPRESSION_WINDOW = 8;
|
|
33
|
+
const MIN_COMPRESSED_FILL_RATIO = 0.5;
|
|
34
|
+
|
|
23
35
|
export class Segmenter {
|
|
24
36
|
private readonly config: Config;
|
|
25
37
|
private readonly db: SqliteDurableStore;
|
|
26
38
|
private readonly opts: Required<SegmenterOptions>;
|
|
27
39
|
private readonly hooks?: SegmenterHooks;
|
|
40
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
28
41
|
private timer: any | null = null;
|
|
29
42
|
private running = false;
|
|
30
43
|
private stopping = false;
|
|
31
44
|
private readonly failures = new FailureTracker(1024);
|
|
32
|
-
|
|
33
|
-
|
|
45
|
+
private activeBuildStream: string | null = null;
|
|
46
|
+
private activePayloadBytes = 0;
|
|
47
|
+
private activeSegmentBytesEstimate = 0;
|
|
48
|
+
private activeRows = 0;
|
|
49
|
+
|
|
50
|
+
constructor(
|
|
51
|
+
config: Config,
|
|
52
|
+
db: SqliteDurableStore,
|
|
53
|
+
opts: SegmenterOptions = {},
|
|
54
|
+
hooks?: SegmenterHooks,
|
|
55
|
+
memorySampler?: RuntimeMemorySampler
|
|
56
|
+
) {
|
|
34
57
|
this.config = config;
|
|
35
58
|
this.db = db;
|
|
36
59
|
this.opts = {
|
|
@@ -41,6 +64,7 @@ export class Segmenter {
|
|
|
41
64
|
maxRowsPerSegment: opts.maxRowsPerSegment ?? 250_000,
|
|
42
65
|
};
|
|
43
66
|
this.hooks = hooks;
|
|
67
|
+
this.memorySampler = memorySampler;
|
|
44
68
|
}
|
|
45
69
|
|
|
46
70
|
start(): void {
|
|
@@ -58,6 +82,16 @@ export class Segmenter {
|
|
|
58
82
|
this.timer = null;
|
|
59
83
|
}
|
|
60
84
|
|
|
85
|
+
getMemoryStats(): SegmenterMemoryStats {
|
|
86
|
+
return {
|
|
87
|
+
active_builds: this.activeBuildStream ? 1 : 0,
|
|
88
|
+
active_streams: this.activeBuildStream ? 1 : 0,
|
|
89
|
+
active_payload_bytes: this.activePayloadBytes,
|
|
90
|
+
active_segment_bytes_estimate: this.activeSegmentBytesEstimate,
|
|
91
|
+
active_rows: this.activeRows,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
61
95
|
private async tick(): Promise<void> {
|
|
62
96
|
if (this.stopping) return;
|
|
63
97
|
if (this.running) return;
|
|
@@ -129,11 +163,31 @@ export class Segmenter {
|
|
|
129
163
|
}
|
|
130
164
|
}
|
|
131
165
|
|
|
166
|
+
private resolvePayloadSealTargetBytes(stream: string): bigint {
|
|
167
|
+
const baseTarget = BigInt(this.config.segmentMaxBytes);
|
|
168
|
+
const ratio = this.db.recentSegmentCompressionRatio(stream, SEGMENT_COMPRESSION_WINDOW);
|
|
169
|
+
if (ratio == null || !Number.isFinite(ratio) || ratio <= 0 || ratio >= MIN_COMPRESSED_FILL_RATIO) {
|
|
170
|
+
return baseTarget;
|
|
171
|
+
}
|
|
172
|
+
const desiredCompressedBytes = Math.ceil(this.config.segmentMaxBytes * MIN_COMPRESSED_FILL_RATIO);
|
|
173
|
+
const boosted = BigInt(Math.ceil(desiredCompressedBytes / ratio));
|
|
174
|
+
return boosted > baseTarget ? boosted : baseTarget;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private shouldSealStream(row: { stream: string; pending_bytes: bigint; pending_rows: bigint; last_segment_cut_ms: bigint }): boolean {
|
|
178
|
+
const payloadSealTargetBytes = this.resolvePayloadSealTargetBytes(row.stream);
|
|
179
|
+
if (row.pending_bytes >= payloadSealTargetBytes) return true;
|
|
180
|
+
if (row.pending_rows >= BigInt(this.opts.minCandidateRows)) return true;
|
|
181
|
+
if (this.opts.maxIntervalMs > 0 && BigInt(Date.now()) - row.last_segment_cut_ms >= BigInt(this.opts.maxIntervalMs)) return true;
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
|
|
132
185
|
private async buildOne(stream: string): Promise<void> {
|
|
133
186
|
if (this.stopping) return;
|
|
134
187
|
const row = this.db.getStream(stream);
|
|
135
188
|
if (!row || this.db.isDeleted(row)) return;
|
|
136
189
|
if (row.segment_in_progress) return;
|
|
190
|
+
if (!this.shouldSealStream(row)) return;
|
|
137
191
|
|
|
138
192
|
const startOffset = row.sealed_through + 1n;
|
|
139
193
|
const maxOffset = row.next_offset - 1n;
|
|
@@ -143,10 +197,18 @@ export class Segmenter {
|
|
|
143
197
|
if (!this.db.tryClaimSegment(stream)) return;
|
|
144
198
|
|
|
145
199
|
try {
|
|
200
|
+
this.activeBuildStream = stream;
|
|
201
|
+
this.activePayloadBytes = 0;
|
|
202
|
+
this.activeSegmentBytesEstimate = 0;
|
|
203
|
+
this.activeRows = 0;
|
|
146
204
|
const segmentIndex = this.db.nextSegmentIndexForStream(stream);
|
|
147
205
|
const shash = streamHash16Hex(stream);
|
|
148
206
|
const localPath = localSegmentPath(this.config.rootDir, shash, segmentIndex);
|
|
149
207
|
const tmpPath = `${localPath}.tmp`;
|
|
208
|
+
const leaveCutPhase = this.memorySampler?.enter("cut", {
|
|
209
|
+
stream,
|
|
210
|
+
segment_index: segmentIndex,
|
|
211
|
+
});
|
|
150
212
|
mkdirSync(dirname(localPath), { recursive: true });
|
|
151
213
|
|
|
152
214
|
// Build blocks and stream-write to temp file.
|
|
@@ -161,6 +223,8 @@ export class Segmenter {
|
|
|
161
223
|
|
|
162
224
|
// Decide endOffset by scanning WAL rows until threshold.
|
|
163
225
|
// IMPORTANT: pending_bytes tracks WAL payload bytes only (not record/block overhead).
|
|
226
|
+
const payloadSealTargetBytes = this.resolvePayloadSealTargetBytes(stream);
|
|
227
|
+
const rowSealTarget = BigInt(this.opts.minCandidateRows);
|
|
164
228
|
let payloadBytes = 0n;
|
|
165
229
|
let rowsSealed = 0n;
|
|
166
230
|
let endOffset = startOffset - 1n;
|
|
@@ -210,6 +274,9 @@ export class Segmenter {
|
|
|
210
274
|
payloadBytes += BigInt(payload.byteLength);
|
|
211
275
|
rowsSealed += 1n;
|
|
212
276
|
endOffset = offset;
|
|
277
|
+
this.activePayloadBytes = Number(payloadBytes);
|
|
278
|
+
this.activeRows = Number(rowsSealed);
|
|
279
|
+
this.activeSegmentBytesEstimate = fileBytes + blockBytesApprox;
|
|
213
280
|
|
|
214
281
|
recordsSinceYield += 1;
|
|
215
282
|
if (recordsSinceYield >= 512 || Date.now() - lastYieldMs >= 10) {
|
|
@@ -218,7 +285,8 @@ export class Segmenter {
|
|
|
218
285
|
recordsSinceYield = 0;
|
|
219
286
|
}
|
|
220
287
|
|
|
221
|
-
if (payloadBytes >=
|
|
288
|
+
if (payloadBytes >= payloadSealTargetBytes) break;
|
|
289
|
+
if (rowsSealed >= rowSealTarget) break;
|
|
222
290
|
if (rowsSealed >= BigInt(this.opts.maxRowsPerSegment)) break;
|
|
223
291
|
}
|
|
224
292
|
|
|
@@ -244,6 +312,7 @@ export class Segmenter {
|
|
|
244
312
|
const footer = encodeFooter(blockIndex);
|
|
245
313
|
writeSync(fd, footer);
|
|
246
314
|
fileBytes += footer.byteLength;
|
|
315
|
+
this.activeSegmentBytesEstimate = fileBytes;
|
|
247
316
|
|
|
248
317
|
fsyncSync(fd);
|
|
249
318
|
|
|
@@ -267,7 +336,7 @@ export class Segmenter {
|
|
|
267
336
|
rowsSealed,
|
|
268
337
|
});
|
|
269
338
|
});
|
|
270
|
-
if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(Number(payloadBytes), fileBytes);
|
|
339
|
+
if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(stream, Number(payloadBytes), fileBytes);
|
|
271
340
|
} catch (e) {
|
|
272
341
|
try {
|
|
273
342
|
if (existsSync(localPath)) unlinkSync(localPath);
|
|
@@ -280,8 +349,13 @@ export class Segmenter {
|
|
|
280
349
|
} finally {
|
|
281
350
|
closeSync(fd);
|
|
282
351
|
this.cleanupTmp(tmpPath);
|
|
352
|
+
leaveCutPhase?.();
|
|
283
353
|
}
|
|
284
354
|
} finally {
|
|
355
|
+
this.activeBuildStream = null;
|
|
356
|
+
this.activePayloadBytes = 0;
|
|
357
|
+
this.activeSegmentBytesEstimate = 0;
|
|
358
|
+
this.activeRows = 0;
|
|
285
359
|
// Release claim.
|
|
286
360
|
if (!this.stopping) {
|
|
287
361
|
try {
|
|
@@ -1,29 +1,53 @@
|
|
|
1
|
-
import { parentPort, workerData } from "node:worker_threads";
|
|
1
|
+
import { parentPort, workerData, threadId } from "node:worker_threads";
|
|
2
2
|
import type { Config } from "../config.ts";
|
|
3
3
|
import { SqliteDurableStore } from "../db/db.ts";
|
|
4
|
+
import type { HostRuntime } from "../runtime/host_runtime.ts";
|
|
5
|
+
import { RuntimeMemorySampler } from "../runtime_memory_sampler.ts";
|
|
6
|
+
import { setSqliteRuntimeOverride } from "../sqlite/adapter.ts";
|
|
4
7
|
import { Segmenter, type SegmenterHooks, type SegmenterOptions } from "./segmenter.ts";
|
|
5
8
|
import { initConsoleLogging } from "../util/log.ts";
|
|
6
9
|
|
|
7
10
|
initConsoleLogging();
|
|
8
11
|
|
|
9
|
-
const data = workerData as { config: Config; opts?: SegmenterOptions };
|
|
12
|
+
const data = workerData as { config: Config; hostRuntime?: HostRuntime; opts?: SegmenterOptions };
|
|
10
13
|
const cfg = data.config;
|
|
14
|
+
setSqliteRuntimeOverride(data.hostRuntime ?? null);
|
|
11
15
|
// The main server process initializes/migrates schema; workers should avoid
|
|
12
16
|
// concurrent migrations on the same sqlite file.
|
|
13
|
-
const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.
|
|
17
|
+
const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.workerSqliteCacheBytes, skipMigrations: true });
|
|
18
|
+
const memorySampler =
|
|
19
|
+
cfg.memorySamplerPath != null
|
|
20
|
+
? new RuntimeMemorySampler(cfg.memorySamplerPath, {
|
|
21
|
+
intervalMs: cfg.memorySamplerIntervalMs,
|
|
22
|
+
scope: `segmenter-worker-${threadId}`,
|
|
23
|
+
})
|
|
24
|
+
: undefined;
|
|
25
|
+
memorySampler?.start();
|
|
14
26
|
|
|
15
27
|
const hooks: SegmenterHooks = {
|
|
16
|
-
onSegmentSealed: (payloadBytes, segmentBytes) => {
|
|
17
|
-
parentPort?.postMessage({ type: "sealed", payloadBytes, segmentBytes });
|
|
28
|
+
onSegmentSealed: (stream, payloadBytes, segmentBytes) => {
|
|
29
|
+
parentPort?.postMessage({ type: "sealed", stream, payloadBytes, segmentBytes });
|
|
18
30
|
},
|
|
19
31
|
};
|
|
20
32
|
|
|
21
|
-
const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks);
|
|
33
|
+
const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks, memorySampler);
|
|
22
34
|
segmenter.start();
|
|
35
|
+
const memoryTimer = setInterval(() => {
|
|
36
|
+
try {
|
|
37
|
+
parentPort?.postMessage({ type: "memory", workerId: threadId, stats: segmenter.getMemoryStats() });
|
|
38
|
+
} catch {
|
|
39
|
+
// ignore
|
|
40
|
+
}
|
|
41
|
+
}, 1_000);
|
|
23
42
|
|
|
24
43
|
parentPort?.on("message", (msg: any) => {
|
|
25
44
|
if (!msg || typeof msg !== "object") return;
|
|
26
45
|
if (msg.type === "stop") {
|
|
46
|
+
try {
|
|
47
|
+
clearInterval(memoryTimer);
|
|
48
|
+
} catch {
|
|
49
|
+
// ignore
|
|
50
|
+
}
|
|
27
51
|
try {
|
|
28
52
|
segmenter.stop();
|
|
29
53
|
} catch {
|
|
@@ -34,6 +58,11 @@ parentPort?.on("message", (msg: any) => {
|
|
|
34
58
|
} catch {
|
|
35
59
|
// ignore
|
|
36
60
|
}
|
|
61
|
+
try {
|
|
62
|
+
memorySampler?.stop();
|
|
63
|
+
} catch {
|
|
64
|
+
// ignore
|
|
65
|
+
}
|
|
37
66
|
try {
|
|
38
67
|
parentPort?.postMessage({ type: "stopped" });
|
|
39
68
|
} catch {
|
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
import { existsSync } from "node:fs";
|
|
2
|
-
import { resolve } from "node:path";
|
|
3
1
|
import { fileURLToPath } from "node:url";
|
|
4
2
|
import { Worker } from "node:worker_threads";
|
|
5
3
|
import type { Config } from "../config";
|
|
6
|
-
import
|
|
4
|
+
import { detectHostRuntime } from "../runtime/host_runtime.ts";
|
|
5
|
+
import type { SegmenterHooks, SegmenterMemoryStats, SegmenterOptions } from "./segmenter";
|
|
7
6
|
|
|
8
7
|
export type SegmenterController = {
|
|
9
8
|
start: () => void;
|
|
10
9
|
stop: (hard?: boolean) => void;
|
|
10
|
+
getMemoryStats?: () => SegmenterMemoryStats;
|
|
11
11
|
};
|
|
12
12
|
|
|
13
13
|
type WorkerMessage =
|
|
14
|
-
| { type: "sealed"; payloadBytes: number; segmentBytes: number }
|
|
14
|
+
| { type: "sealed"; stream: string; payloadBytes: number; segmentBytes: number }
|
|
15
|
+
| { type: "memory"; workerId: number; stats: SegmenterMemoryStats }
|
|
15
16
|
| { type: "stopped" };
|
|
16
17
|
|
|
17
18
|
export class SegmenterWorkerPool implements SegmenterController {
|
|
@@ -20,6 +21,7 @@ export class SegmenterWorkerPool implements SegmenterController {
|
|
|
20
21
|
private readonly opts: SegmenterOptions;
|
|
21
22
|
private readonly hooks?: SegmenterHooks;
|
|
22
23
|
private readonly workers: Worker[] = [];
|
|
24
|
+
private readonly workerMemory = new Map<number, { stats: SegmenterMemoryStats; reportedAtMs: number }>();
|
|
23
25
|
private started = false;
|
|
24
26
|
|
|
25
27
|
constructor(config: Config, workerCount: number, opts: SegmenterOptions = {}, hooks?: SegmenterHooks) {
|
|
@@ -49,20 +51,42 @@ export class SegmenterWorkerPool implements SegmenterController {
|
|
|
49
51
|
void w.terminate();
|
|
50
52
|
}
|
|
51
53
|
this.workers.length = 0;
|
|
54
|
+
this.workerMemory.clear();
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
let
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
getMemoryStats(): SegmenterMemoryStats {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
let activeBuilds = 0;
|
|
60
|
+
let activeStreams = 0;
|
|
61
|
+
let activePayloadBytes = 0;
|
|
62
|
+
let activeSegmentBytesEstimate = 0;
|
|
63
|
+
let activeRows = 0;
|
|
64
|
+
for (const [workerId, entry] of this.workerMemory) {
|
|
65
|
+
if (now - entry.reportedAtMs > 5_000) {
|
|
66
|
+
this.workerMemory.delete(workerId);
|
|
67
|
+
continue;
|
|
61
68
|
}
|
|
69
|
+
activeBuilds += Math.max(0, entry.stats.active_builds);
|
|
70
|
+
activeStreams += Math.max(0, entry.stats.active_streams);
|
|
71
|
+
activePayloadBytes += Math.max(0, entry.stats.active_payload_bytes);
|
|
72
|
+
activeSegmentBytesEstimate += Math.max(0, entry.stats.active_segment_bytes_estimate);
|
|
73
|
+
activeRows += Math.max(0, entry.stats.active_rows);
|
|
62
74
|
}
|
|
75
|
+
return {
|
|
76
|
+
active_builds: activeBuilds,
|
|
77
|
+
active_streams: activeStreams,
|
|
78
|
+
active_payload_bytes: activePayloadBytes,
|
|
79
|
+
active_segment_bytes_estimate: activeSegmentBytesEstimate,
|
|
80
|
+
active_rows: activeRows,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private spawnWorker(idx: number): void {
|
|
85
|
+
const workerSpec = fileURLToPath(new URL("./segmenter_worker.ts", import.meta.url));
|
|
63
86
|
const worker = new Worker(workerSpec, {
|
|
64
87
|
workerData: {
|
|
65
88
|
config: this.config,
|
|
89
|
+
hostRuntime: detectHostRuntime(),
|
|
66
90
|
opts: this.opts,
|
|
67
91
|
},
|
|
68
92
|
type: "module",
|
|
@@ -71,7 +95,12 @@ export class SegmenterWorkerPool implements SegmenterController {
|
|
|
71
95
|
|
|
72
96
|
worker.on("message", (msg: WorkerMessage) => {
|
|
73
97
|
if (msg?.type === "sealed") {
|
|
74
|
-
this.hooks?.onSegmentSealed?.(msg.payloadBytes, msg.segmentBytes);
|
|
98
|
+
this.hooks?.onSegmentSealed?.(msg.stream, msg.payloadBytes, msg.segmentBytes);
|
|
99
|
+
} else if (msg?.type === "memory") {
|
|
100
|
+
this.workerMemory.set(msg.workerId, {
|
|
101
|
+
stats: msg.stats,
|
|
102
|
+
reportedAtMs: Date.now(),
|
|
103
|
+
});
|
|
75
104
|
}
|
|
76
105
|
});
|
|
77
106
|
|
|
@@ -81,6 +110,7 @@ export class SegmenterWorkerPool implements SegmenterController {
|
|
|
81
110
|
});
|
|
82
111
|
|
|
83
112
|
worker.on("exit", (code) => {
|
|
113
|
+
this.workerMemory.delete(worker.threadId);
|
|
84
114
|
if (!this.started) return;
|
|
85
115
|
if (code !== 0) {
|
|
86
116
|
// eslint-disable-next-line no-console
|
package/src/server.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { MockR2Store } from "./objectstore/mock_r2";
|
|
|
6
6
|
import { R2ObjectStore } from "./objectstore/r2";
|
|
7
7
|
import { bootstrapFromR2 } from "./bootstrap";
|
|
8
8
|
import { initConsoleLogging } from "./util/log";
|
|
9
|
+
import { AUTO_TUNE_PRESETS, memoryLimitForPreset, tuneForPreset, type AutoTuneConfig } from "./auto_tune";
|
|
9
10
|
|
|
10
11
|
initConsoleLogging();
|
|
11
12
|
|
|
@@ -36,21 +37,6 @@ function formatPresetList<T>(presets: number[], selected: number, map: (preset:
|
|
|
36
37
|
.join(", ");
|
|
37
38
|
}
|
|
38
39
|
|
|
39
|
-
type AutoTuneConfig = {
|
|
40
|
-
sqliteCacheMb: number;
|
|
41
|
-
indexMemMb: number;
|
|
42
|
-
ingestBatchMb: number;
|
|
43
|
-
ingestQueueMb: number;
|
|
44
|
-
indexBuildConcurrency: number;
|
|
45
|
-
indexCompactConcurrency: number;
|
|
46
|
-
segmenterWorkers: number;
|
|
47
|
-
uploadConcurrency: number;
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
function memoryLimitForPreset(preset: number): number {
|
|
51
|
-
return preset === 256 ? 300 : preset;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
40
|
function applyAutoTune(overrideMb: number | null): void {
|
|
55
41
|
const envMemRaw = process.env.DS_MEMORY_LIMIT_MB;
|
|
56
42
|
if (overrideMb != null) {
|
|
@@ -74,11 +60,24 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
74
60
|
}
|
|
75
61
|
|
|
76
62
|
const conflictVars = [
|
|
63
|
+
"DS_SEGMENT_MAX_BYTES",
|
|
64
|
+
"DS_SEGMENT_TARGET_ROWS",
|
|
77
65
|
"DS_SQLITE_CACHE_MB",
|
|
78
66
|
"DS_SQLITE_CACHE_BYTES",
|
|
67
|
+
"DS_WORKER_SQLITE_CACHE_MB",
|
|
68
|
+
"DS_WORKER_SQLITE_CACHE_BYTES",
|
|
79
69
|
"DS_INDEX_RUN_MEM_CACHE_BYTES",
|
|
70
|
+
"DS_LEXICON_INDEX_CACHE_MAX_BYTES",
|
|
80
71
|
"DS_INGEST_MAX_BATCH_BYTES",
|
|
81
72
|
"DS_INGEST_MAX_QUEUE_BYTES",
|
|
73
|
+
"DS_INGEST_CONCURRENCY",
|
|
74
|
+
"DS_READ_CONCURRENCY",
|
|
75
|
+
"DS_SEARCH_CONCURRENCY",
|
|
76
|
+
"DS_ASYNC_INDEX_CONCURRENCY",
|
|
77
|
+
"DS_SEARCH_COMPANION_TOC_CACHE_BYTES",
|
|
78
|
+
"DS_SEARCH_COMPANION_SECTION_CACHE_BYTES",
|
|
79
|
+
"DS_SEARCH_COMPANION_BATCH_SEGMENTS",
|
|
80
|
+
"DS_SEARCH_COMPANION_YIELD_BLOCKS",
|
|
82
81
|
];
|
|
83
82
|
const conflicts = conflictVars.filter((v) => process.env[v] != null);
|
|
84
83
|
if (conflicts.length > 0) {
|
|
@@ -86,35 +85,39 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
86
85
|
process.exit(1);
|
|
87
86
|
}
|
|
88
87
|
|
|
89
|
-
const presets = [
|
|
88
|
+
const presets = [...AUTO_TUNE_PRESETS];
|
|
90
89
|
const preset = [...presets].reverse().find((v) => v <= memMb);
|
|
91
90
|
if (!preset) {
|
|
92
91
|
console.error(`DS_MEMORY_LIMIT_MB=${memMb} is below the minimum preset (256)`);
|
|
93
92
|
process.exit(1);
|
|
94
93
|
}
|
|
95
|
-
|
|
96
|
-
const tuneFor = (p: number): AutoTuneConfig => ({
|
|
97
|
-
sqliteCacheMb: Math.max(8, Math.floor(p / 16)),
|
|
98
|
-
indexMemMb: Math.max(4, Math.floor(p / 64)),
|
|
99
|
-
ingestBatchMb: Math.max(2, Math.floor(p / 128)),
|
|
100
|
-
ingestQueueMb: Math.max(8, Math.floor(p / 32)),
|
|
101
|
-
indexBuildConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
102
|
-
indexCompactConcurrency: p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
103
|
-
segmenterWorkers: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
|
|
104
|
-
uploadConcurrency: p >= 8192 ? 16 : p >= 4096 ? 8 : p >= 1024 ? 4 : 2,
|
|
105
|
-
});
|
|
106
|
-
const tune = tuneFor(preset);
|
|
94
|
+
const tune: AutoTuneConfig = tuneForPreset(preset);
|
|
107
95
|
|
|
108
96
|
const memoryLimitMb = memoryLimitForPreset(preset);
|
|
97
|
+
process.env.DS_AUTO_TUNE_REQUESTED_MB = String(memMb);
|
|
98
|
+
process.env.DS_AUTO_TUNE_PRESET_MB = String(preset);
|
|
99
|
+
process.env.DS_AUTO_TUNE_EFFECTIVE_MEMORY_LIMIT_MB = String(memoryLimitMb);
|
|
109
100
|
process.env.DS_MEMORY_LIMIT_MB = String(memoryLimitMb);
|
|
101
|
+
process.env.DS_SEGMENT_MAX_BYTES = String(tune.segmentMaxMiB * 1024 * 1024);
|
|
102
|
+
process.env.DS_SEGMENT_TARGET_ROWS = String(tune.segmentTargetRows);
|
|
110
103
|
process.env.DS_SQLITE_CACHE_MB = String(tune.sqliteCacheMb);
|
|
104
|
+
process.env.DS_WORKER_SQLITE_CACHE_MB = String(tune.workerSqliteCacheMb);
|
|
111
105
|
process.env.DS_INDEX_RUN_MEM_CACHE_BYTES = String(tune.indexMemMb * 1024 * 1024);
|
|
106
|
+
process.env.DS_LEXICON_INDEX_CACHE_MAX_BYTES = String(tune.lexiconIndexCacheMb * 1024 * 1024);
|
|
107
|
+
process.env.DS_SEARCH_COMPANION_TOC_CACHE_BYTES = String(tune.searchCompanionTocCacheMb * 1024 * 1024);
|
|
108
|
+
process.env.DS_SEARCH_COMPANION_SECTION_CACHE_BYTES = String(tune.searchCompanionSectionCacheMb * 1024 * 1024);
|
|
112
109
|
process.env.DS_INGEST_MAX_BATCH_BYTES = String(tune.ingestBatchMb * 1024 * 1024);
|
|
113
110
|
process.env.DS_INGEST_MAX_QUEUE_BYTES = String(tune.ingestQueueMb * 1024 * 1024);
|
|
111
|
+
process.env.DS_INGEST_CONCURRENCY = String(tune.ingestConcurrency);
|
|
112
|
+
process.env.DS_READ_CONCURRENCY = String(tune.readConcurrency);
|
|
113
|
+
process.env.DS_SEARCH_CONCURRENCY = String(tune.searchConcurrency);
|
|
114
|
+
process.env.DS_ASYNC_INDEX_CONCURRENCY = String(tune.asyncIndexConcurrency);
|
|
114
115
|
process.env.DS_INDEX_BUILD_CONCURRENCY = String(tune.indexBuildConcurrency);
|
|
115
116
|
process.env.DS_INDEX_COMPACT_CONCURRENCY = String(tune.indexCompactConcurrency);
|
|
116
117
|
process.env.DS_SEGMENTER_WORKERS = String(tune.segmenterWorkers);
|
|
117
118
|
process.env.DS_UPLOAD_CONCURRENCY = String(tune.uploadConcurrency);
|
|
119
|
+
process.env.DS_SEARCH_COMPANION_BATCH_SEGMENTS = String(tune.searchCompanionBatchSegments);
|
|
120
|
+
process.env.DS_SEARCH_COMPANION_YIELD_BLOCKS = String(tune.searchCompanionYieldBlocks);
|
|
118
121
|
|
|
119
122
|
const presetLine = formatPresetList(presets, preset, (v) => v, (v) => String(v));
|
|
120
123
|
console.log(`Auto-tuning for memory preset ${presetLine}`);
|
|
@@ -122,22 +125,117 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
122
125
|
`DS_MEMORY_LIMIT_MB presets: ${formatPresetList(presets, preset, (p) => memoryLimitForPreset(p), (v) => String(v))}`
|
|
123
126
|
);
|
|
124
127
|
console.log(
|
|
125
|
-
`
|
|
128
|
+
`DS_SEGMENT_MAX_MIB presets: ${formatPresetList(
|
|
129
|
+
presets,
|
|
130
|
+
preset,
|
|
131
|
+
(p) => tuneForPreset(p).segmentMaxMiB,
|
|
132
|
+
(v) => String(v)
|
|
133
|
+
)}`
|
|
134
|
+
);
|
|
135
|
+
console.log(
|
|
136
|
+
`DS_SEGMENT_TARGET_ROWS presets: ${formatPresetList(
|
|
137
|
+
presets,
|
|
138
|
+
preset,
|
|
139
|
+
(p) => tuneForPreset(p).segmentTargetRows,
|
|
140
|
+
(v) => String(v)
|
|
141
|
+
)}`
|
|
142
|
+
);
|
|
143
|
+
console.log(
|
|
144
|
+
`DS_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).sqliteCacheMb, (v) => String(v))}`
|
|
145
|
+
);
|
|
146
|
+
console.log(
|
|
147
|
+
`DS_WORKER_SQLITE_CACHE_MB presets: ${formatPresetList(
|
|
148
|
+
presets,
|
|
149
|
+
preset,
|
|
150
|
+
(p) => tuneForPreset(p).workerSqliteCacheMb,
|
|
151
|
+
(v) => String(v)
|
|
152
|
+
)}`
|
|
153
|
+
);
|
|
154
|
+
console.log(
|
|
155
|
+
`DS_INDEX_RUN_MEM_CACHE_MB presets: ${formatPresetList(
|
|
156
|
+
presets,
|
|
157
|
+
preset,
|
|
158
|
+
(p) => tuneForPreset(p).indexMemMb,
|
|
159
|
+
(v) => String(v)
|
|
160
|
+
)}`
|
|
161
|
+
);
|
|
162
|
+
console.log(
|
|
163
|
+
`DS_LEXICON_INDEX_CACHE_MB presets: ${formatPresetList(
|
|
164
|
+
presets,
|
|
165
|
+
preset,
|
|
166
|
+
(p) => tuneForPreset(p).lexiconIndexCacheMb,
|
|
167
|
+
(v) => String(v)
|
|
168
|
+
)}`
|
|
169
|
+
);
|
|
170
|
+
console.log(
|
|
171
|
+
`DS_SEARCH_COMPANION_TOC_CACHE_MB presets: ${formatPresetList(
|
|
172
|
+
presets,
|
|
173
|
+
preset,
|
|
174
|
+
(p) => tuneForPreset(p).searchCompanionTocCacheMb,
|
|
175
|
+
(v) => String(v)
|
|
176
|
+
)}`
|
|
177
|
+
);
|
|
178
|
+
console.log(
|
|
179
|
+
`DS_SEARCH_COMPANION_SECTION_CACHE_MB presets: ${formatPresetList(
|
|
180
|
+
presets,
|
|
181
|
+
preset,
|
|
182
|
+
(p) => tuneForPreset(p).searchCompanionSectionCacheMb,
|
|
183
|
+
(v) => String(v)
|
|
184
|
+
)}`
|
|
185
|
+
);
|
|
186
|
+
console.log(
|
|
187
|
+
`DS_INGEST_MAX_BATCH_MB presets: ${formatPresetList(
|
|
188
|
+
presets,
|
|
189
|
+
preset,
|
|
190
|
+
(p) => tuneForPreset(p).ingestBatchMb,
|
|
191
|
+
(v) => String(v)
|
|
192
|
+
)}`
|
|
126
193
|
);
|
|
127
194
|
console.log(
|
|
128
|
-
`
|
|
195
|
+
`DS_INGEST_MAX_QUEUE_MB presets: ${formatPresetList(
|
|
196
|
+
presets,
|
|
197
|
+
preset,
|
|
198
|
+
(p) => tuneForPreset(p).ingestQueueMb,
|
|
199
|
+
(v) => String(v)
|
|
200
|
+
)}`
|
|
129
201
|
);
|
|
130
202
|
console.log(
|
|
131
|
-
`
|
|
203
|
+
`DS_INGEST_CONCURRENCY presets: ${formatPresetList(
|
|
204
|
+
presets,
|
|
205
|
+
preset,
|
|
206
|
+
(p) => tuneForPreset(p).ingestConcurrency,
|
|
207
|
+
(v) => String(v)
|
|
208
|
+
)}`
|
|
132
209
|
);
|
|
133
210
|
console.log(
|
|
134
|
-
`
|
|
211
|
+
`DS_READ_CONCURRENCY presets: ${formatPresetList(
|
|
212
|
+
presets,
|
|
213
|
+
preset,
|
|
214
|
+
(p) => tuneForPreset(p).readConcurrency,
|
|
215
|
+
(v) => String(v)
|
|
216
|
+
)}`
|
|
217
|
+
);
|
|
218
|
+
console.log(
|
|
219
|
+
`DS_SEARCH_CONCURRENCY presets: ${formatPresetList(
|
|
220
|
+
presets,
|
|
221
|
+
preset,
|
|
222
|
+
(p) => tuneForPreset(p).searchConcurrency,
|
|
223
|
+
(v) => String(v)
|
|
224
|
+
)}`
|
|
225
|
+
);
|
|
226
|
+
console.log(
|
|
227
|
+
`DS_ASYNC_INDEX_CONCURRENCY presets: ${formatPresetList(
|
|
228
|
+
presets,
|
|
229
|
+
preset,
|
|
230
|
+
(p) => tuneForPreset(p).asyncIndexConcurrency,
|
|
231
|
+
(v) => String(v)
|
|
232
|
+
)}`
|
|
135
233
|
);
|
|
136
234
|
console.log(
|
|
137
235
|
`DS_INDEX_BUILD_CONCURRENCY presets: ${formatPresetList(
|
|
138
236
|
presets,
|
|
139
237
|
preset,
|
|
140
|
-
(p) =>
|
|
238
|
+
(p) => tuneForPreset(p).indexBuildConcurrency,
|
|
141
239
|
(v) => String(v)
|
|
142
240
|
)}`
|
|
143
241
|
);
|
|
@@ -145,7 +243,7 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
145
243
|
`DS_INDEX_COMPACT_CONCURRENCY presets: ${formatPresetList(
|
|
146
244
|
presets,
|
|
147
245
|
preset,
|
|
148
|
-
(p) =>
|
|
246
|
+
(p) => tuneForPreset(p).indexCompactConcurrency,
|
|
149
247
|
(v) => String(v)
|
|
150
248
|
)}`
|
|
151
249
|
);
|
|
@@ -153,7 +251,7 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
153
251
|
`DS_SEGMENTER_WORKERS presets: ${formatPresetList(
|
|
154
252
|
presets,
|
|
155
253
|
preset,
|
|
156
|
-
(p) =>
|
|
254
|
+
(p) => tuneForPreset(p).segmenterWorkers,
|
|
157
255
|
(v) => String(v)
|
|
158
256
|
)}`
|
|
159
257
|
);
|
|
@@ -161,7 +259,23 @@ function applyAutoTune(overrideMb: number | null): void {
|
|
|
161
259
|
`DS_UPLOAD_CONCURRENCY presets: ${formatPresetList(
|
|
162
260
|
presets,
|
|
163
261
|
preset,
|
|
164
|
-
(p) =>
|
|
262
|
+
(p) => tuneForPreset(p).uploadConcurrency,
|
|
263
|
+
(v) => String(v)
|
|
264
|
+
)}`
|
|
265
|
+
);
|
|
266
|
+
console.log(
|
|
267
|
+
`DS_SEARCH_COMPANION_BATCH_SEGMENTS presets: ${formatPresetList(
|
|
268
|
+
presets,
|
|
269
|
+
preset,
|
|
270
|
+
(p) => tuneForPreset(p).searchCompanionBatchSegments,
|
|
271
|
+
(v) => String(v)
|
|
272
|
+
)}`
|
|
273
|
+
);
|
|
274
|
+
console.log(
|
|
275
|
+
`DS_SEARCH_COMPANION_YIELD_BLOCKS presets: ${formatPresetList(
|
|
276
|
+
presets,
|
|
277
|
+
preset,
|
|
278
|
+
(p) => tuneForPreset(p).searchCompanionYieldBlocks,
|
|
165
279
|
(v) => String(v)
|
|
166
280
|
)}`
|
|
167
281
|
);
|