@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
@@ -6,6 +6,7 @@ import { encodeBlock, encodeFooter, type BlockIndexEntry, type SegmentRecord } f
6
6
  import { readU32BE } from "../util/endian";
7
7
  import { localSegmentPath, streamHash16Hex } from "../util/stream_paths";
8
8
  import { LruCache } from "../util/lru";
9
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler";
9
10
  import { yieldToEventLoop } from "../util/yield";
10
11
 
11
12
  export type SegmenterOptions = {
@@ -17,20 +18,42 @@ export type SegmenterOptions = {
17
18
  };
18
19
 
19
20
  export type SegmenterHooks = {
20
- onSegmentSealed?: (payloadBytes: number, segmentBytes: number) => void;
21
+ onSegmentSealed?: (stream: string, payloadBytes: number, segmentBytes: number) => void;
21
22
  };
22
23
 
24
+ export type SegmenterMemoryStats = {
25
+ active_builds: number;
26
+ active_streams: number;
27
+ active_payload_bytes: number;
28
+ active_segment_bytes_estimate: number;
29
+ active_rows: number;
30
+ };
31
+
32
+ const SEGMENT_COMPRESSION_WINDOW = 8;
33
+ const MIN_COMPRESSED_FILL_RATIO = 0.5;
34
+
23
35
  export class Segmenter {
24
36
  private readonly config: Config;
25
37
  private readonly db: SqliteDurableStore;
26
38
  private readonly opts: Required<SegmenterOptions>;
27
39
  private readonly hooks?: SegmenterHooks;
40
+ private readonly memorySampler?: RuntimeMemorySampler;
28
41
  private timer: any | null = null;
29
42
  private running = false;
30
43
  private stopping = false;
31
44
  private readonly failures = new FailureTracker(1024);
32
-
33
- constructor(config: Config, db: SqliteDurableStore, opts: SegmenterOptions = {}, hooks?: SegmenterHooks) {
45
+ private activeBuildStream: string | null = null;
46
+ private activePayloadBytes = 0;
47
+ private activeSegmentBytesEstimate = 0;
48
+ private activeRows = 0;
49
+
50
+ constructor(
51
+ config: Config,
52
+ db: SqliteDurableStore,
53
+ opts: SegmenterOptions = {},
54
+ hooks?: SegmenterHooks,
55
+ memorySampler?: RuntimeMemorySampler
56
+ ) {
34
57
  this.config = config;
35
58
  this.db = db;
36
59
  this.opts = {
@@ -41,6 +64,7 @@ export class Segmenter {
41
64
  maxRowsPerSegment: opts.maxRowsPerSegment ?? 250_000,
42
65
  };
43
66
  this.hooks = hooks;
67
+ this.memorySampler = memorySampler;
44
68
  }
45
69
 
46
70
  start(): void {
@@ -58,6 +82,16 @@ export class Segmenter {
58
82
  this.timer = null;
59
83
  }
60
84
 
85
+ getMemoryStats(): SegmenterMemoryStats {
86
+ return {
87
+ active_builds: this.activeBuildStream ? 1 : 0,
88
+ active_streams: this.activeBuildStream ? 1 : 0,
89
+ active_payload_bytes: this.activePayloadBytes,
90
+ active_segment_bytes_estimate: this.activeSegmentBytesEstimate,
91
+ active_rows: this.activeRows,
92
+ };
93
+ }
94
+
61
95
  private async tick(): Promise<void> {
62
96
  if (this.stopping) return;
63
97
  if (this.running) return;
@@ -129,11 +163,31 @@ export class Segmenter {
129
163
  }
130
164
  }
131
165
 
166
+ private resolvePayloadSealTargetBytes(stream: string): bigint {
167
+ const baseTarget = BigInt(this.config.segmentMaxBytes);
168
+ const ratio = this.db.recentSegmentCompressionRatio(stream, SEGMENT_COMPRESSION_WINDOW);
169
+ if (ratio == null || !Number.isFinite(ratio) || ratio <= 0 || ratio >= MIN_COMPRESSED_FILL_RATIO) {
170
+ return baseTarget;
171
+ }
172
+ const desiredCompressedBytes = Math.ceil(this.config.segmentMaxBytes * MIN_COMPRESSED_FILL_RATIO);
173
+ const boosted = BigInt(Math.ceil(desiredCompressedBytes / ratio));
174
+ return boosted > baseTarget ? boosted : baseTarget;
175
+ }
176
+
177
+ private shouldSealStream(row: { stream: string; pending_bytes: bigint; pending_rows: bigint; last_segment_cut_ms: bigint }): boolean {
178
+ const payloadSealTargetBytes = this.resolvePayloadSealTargetBytes(row.stream);
179
+ if (row.pending_bytes >= payloadSealTargetBytes) return true;
180
+ if (row.pending_rows >= BigInt(this.opts.minCandidateRows)) return true;
181
+ if (this.opts.maxIntervalMs > 0 && BigInt(Date.now()) - row.last_segment_cut_ms >= BigInt(this.opts.maxIntervalMs)) return true;
182
+ return false;
183
+ }
184
+
132
185
  private async buildOne(stream: string): Promise<void> {
133
186
  if (this.stopping) return;
134
187
  const row = this.db.getStream(stream);
135
188
  if (!row || this.db.isDeleted(row)) return;
136
189
  if (row.segment_in_progress) return;
190
+ if (!this.shouldSealStream(row)) return;
137
191
 
138
192
  const startOffset = row.sealed_through + 1n;
139
193
  const maxOffset = row.next_offset - 1n;
@@ -143,10 +197,18 @@ export class Segmenter {
143
197
  if (!this.db.tryClaimSegment(stream)) return;
144
198
 
145
199
  try {
200
+ this.activeBuildStream = stream;
201
+ this.activePayloadBytes = 0;
202
+ this.activeSegmentBytesEstimate = 0;
203
+ this.activeRows = 0;
146
204
  const segmentIndex = this.db.nextSegmentIndexForStream(stream);
147
205
  const shash = streamHash16Hex(stream);
148
206
  const localPath = localSegmentPath(this.config.rootDir, shash, segmentIndex);
149
207
  const tmpPath = `${localPath}.tmp`;
208
+ const leaveCutPhase = this.memorySampler?.enter("cut", {
209
+ stream,
210
+ segment_index: segmentIndex,
211
+ });
150
212
  mkdirSync(dirname(localPath), { recursive: true });
151
213
 
152
214
  // Build blocks and stream-write to temp file.
@@ -161,6 +223,8 @@ export class Segmenter {
161
223
 
162
224
  // Decide endOffset by scanning WAL rows until threshold.
163
225
  // IMPORTANT: pending_bytes tracks WAL payload bytes only (not record/block overhead).
226
+ const payloadSealTargetBytes = this.resolvePayloadSealTargetBytes(stream);
227
+ const rowSealTarget = BigInt(this.opts.minCandidateRows);
164
228
  let payloadBytes = 0n;
165
229
  let rowsSealed = 0n;
166
230
  let endOffset = startOffset - 1n;
@@ -210,6 +274,9 @@ export class Segmenter {
210
274
  payloadBytes += BigInt(payload.byteLength);
211
275
  rowsSealed += 1n;
212
276
  endOffset = offset;
277
+ this.activePayloadBytes = Number(payloadBytes);
278
+ this.activeRows = Number(rowsSealed);
279
+ this.activeSegmentBytesEstimate = fileBytes + blockBytesApprox;
213
280
 
214
281
  recordsSinceYield += 1;
215
282
  if (recordsSinceYield >= 512 || Date.now() - lastYieldMs >= 10) {
@@ -218,7 +285,8 @@ export class Segmenter {
218
285
  recordsSinceYield = 0;
219
286
  }
220
287
 
221
- if (payloadBytes >= BigInt(this.config.segmentMaxBytes)) break;
288
+ if (payloadBytes >= payloadSealTargetBytes) break;
289
+ if (rowsSealed >= rowSealTarget) break;
222
290
  if (rowsSealed >= BigInt(this.opts.maxRowsPerSegment)) break;
223
291
  }
224
292
 
@@ -244,6 +312,7 @@ export class Segmenter {
244
312
  const footer = encodeFooter(blockIndex);
245
313
  writeSync(fd, footer);
246
314
  fileBytes += footer.byteLength;
315
+ this.activeSegmentBytesEstimate = fileBytes;
247
316
 
248
317
  fsyncSync(fd);
249
318
 
@@ -267,7 +336,7 @@ export class Segmenter {
267
336
  rowsSealed,
268
337
  });
269
338
  });
270
- if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(Number(payloadBytes), fileBytes);
339
+ if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(stream, Number(payloadBytes), fileBytes);
271
340
  } catch (e) {
272
341
  try {
273
342
  if (existsSync(localPath)) unlinkSync(localPath);
@@ -280,8 +349,13 @@ export class Segmenter {
280
349
  } finally {
281
350
  closeSync(fd);
282
351
  this.cleanupTmp(tmpPath);
352
+ leaveCutPhase?.();
283
353
  }
284
354
  } finally {
355
+ this.activeBuildStream = null;
356
+ this.activePayloadBytes = 0;
357
+ this.activeSegmentBytesEstimate = 0;
358
+ this.activeRows = 0;
285
359
  // Release claim.
286
360
  if (!this.stopping) {
287
361
  try {
@@ -1,29 +1,53 @@
1
- import { parentPort, workerData } from "node:worker_threads";
1
+ import { parentPort, workerData, threadId } from "node:worker_threads";
2
2
  import type { Config } from "../config.ts";
3
3
  import { SqliteDurableStore } from "../db/db.ts";
4
+ import type { HostRuntime } from "../runtime/host_runtime.ts";
5
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler.ts";
6
+ import { setSqliteRuntimeOverride } from "../sqlite/adapter.ts";
4
7
  import { Segmenter, type SegmenterHooks, type SegmenterOptions } from "./segmenter.ts";
5
8
  import { initConsoleLogging } from "../util/log.ts";
6
9
 
7
10
  initConsoleLogging();
8
11
 
9
- const data = workerData as { config: Config; opts?: SegmenterOptions };
12
+ const data = workerData as { config: Config; hostRuntime?: HostRuntime; opts?: SegmenterOptions };
10
13
  const cfg = data.config;
14
+ setSqliteRuntimeOverride(data.hostRuntime ?? null);
11
15
  // The main server process initializes/migrates schema; workers should avoid
12
16
  // concurrent migrations on the same sqlite file.
13
- const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.sqliteCacheBytes, skipMigrations: true });
17
+ const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.workerSqliteCacheBytes, skipMigrations: true });
18
+ const memorySampler =
19
+ cfg.memorySamplerPath != null
20
+ ? new RuntimeMemorySampler(cfg.memorySamplerPath, {
21
+ intervalMs: cfg.memorySamplerIntervalMs,
22
+ scope: `segmenter-worker-${threadId}`,
23
+ })
24
+ : undefined;
25
+ memorySampler?.start();
14
26
 
15
27
  const hooks: SegmenterHooks = {
16
- onSegmentSealed: (payloadBytes, segmentBytes) => {
17
- parentPort?.postMessage({ type: "sealed", payloadBytes, segmentBytes });
28
+ onSegmentSealed: (stream, payloadBytes, segmentBytes) => {
29
+ parentPort?.postMessage({ type: "sealed", stream, payloadBytes, segmentBytes });
18
30
  },
19
31
  };
20
32
 
21
- const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks);
33
+ const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks, memorySampler);
22
34
  segmenter.start();
35
+ const memoryTimer = setInterval(() => {
36
+ try {
37
+ parentPort?.postMessage({ type: "memory", workerId: threadId, stats: segmenter.getMemoryStats() });
38
+ } catch {
39
+ // ignore
40
+ }
41
+ }, 1_000);
23
42
 
24
43
  parentPort?.on("message", (msg: any) => {
25
44
  if (!msg || typeof msg !== "object") return;
26
45
  if (msg.type === "stop") {
46
+ try {
47
+ clearInterval(memoryTimer);
48
+ } catch {
49
+ // ignore
50
+ }
27
51
  try {
28
52
  segmenter.stop();
29
53
  } catch {
@@ -34,6 +58,11 @@ parentPort?.on("message", (msg: any) => {
34
58
  } catch {
35
59
  // ignore
36
60
  }
61
+ try {
62
+ memorySampler?.stop();
63
+ } catch {
64
+ // ignore
65
+ }
37
66
  try {
38
67
  parentPort?.postMessage({ type: "stopped" });
39
68
  } catch {
@@ -1,17 +1,18 @@
1
- import { existsSync } from "node:fs";
2
- import { resolve } from "node:path";
3
1
  import { fileURLToPath } from "node:url";
4
2
  import { Worker } from "node:worker_threads";
5
3
  import type { Config } from "../config";
6
- import type { SegmenterHooks, SegmenterOptions } from "./segmenter";
4
+ import { detectHostRuntime } from "../runtime/host_runtime.ts";
5
+ import type { SegmenterHooks, SegmenterMemoryStats, SegmenterOptions } from "./segmenter";
7
6
 
8
7
  export type SegmenterController = {
9
8
  start: () => void;
10
9
  stop: (hard?: boolean) => void;
10
+ getMemoryStats?: () => SegmenterMemoryStats;
11
11
  };
12
12
 
13
13
  type WorkerMessage =
14
- | { type: "sealed"; payloadBytes: number; segmentBytes: number }
14
+ | { type: "sealed"; stream: string; payloadBytes: number; segmentBytes: number }
15
+ | { type: "memory"; workerId: number; stats: SegmenterMemoryStats }
15
16
  | { type: "stopped" };
16
17
 
17
18
  export class SegmenterWorkerPool implements SegmenterController {
@@ -20,6 +21,7 @@ export class SegmenterWorkerPool implements SegmenterController {
20
21
  private readonly opts: SegmenterOptions;
21
22
  private readonly hooks?: SegmenterHooks;
22
23
  private readonly workers: Worker[] = [];
24
+ private readonly workerMemory = new Map<number, { stats: SegmenterMemoryStats; reportedAtMs: number }>();
23
25
  private started = false;
24
26
 
25
27
  constructor(config: Config, workerCount: number, opts: SegmenterOptions = {}, hooks?: SegmenterHooks) {
@@ -49,20 +51,42 @@ export class SegmenterWorkerPool implements SegmenterController {
49
51
  void w.terminate();
50
52
  }
51
53
  this.workers.length = 0;
54
+ this.workerMemory.clear();
52
55
  }
53
56
 
54
- private spawnWorker(idx: number): void {
55
- const workerUrl = new URL("./segmenter_worker.ts", import.meta.url);
56
- let workerSpec = fileURLToPath(workerUrl);
57
- if (!existsSync(workerSpec)) {
58
- const fallback = resolve(process.cwd(), "src/segment/segmenter_worker.ts");
59
- if (existsSync(fallback)) {
60
- workerSpec = fallback;
57
+ getMemoryStats(): SegmenterMemoryStats {
58
+ const now = Date.now();
59
+ let activeBuilds = 0;
60
+ let activeStreams = 0;
61
+ let activePayloadBytes = 0;
62
+ let activeSegmentBytesEstimate = 0;
63
+ let activeRows = 0;
64
+ for (const [workerId, entry] of this.workerMemory) {
65
+ if (now - entry.reportedAtMs > 5_000) {
66
+ this.workerMemory.delete(workerId);
67
+ continue;
61
68
  }
69
+ activeBuilds += Math.max(0, entry.stats.active_builds);
70
+ activeStreams += Math.max(0, entry.stats.active_streams);
71
+ activePayloadBytes += Math.max(0, entry.stats.active_payload_bytes);
72
+ activeSegmentBytesEstimate += Math.max(0, entry.stats.active_segment_bytes_estimate);
73
+ activeRows += Math.max(0, entry.stats.active_rows);
62
74
  }
75
+ return {
76
+ active_builds: activeBuilds,
77
+ active_streams: activeStreams,
78
+ active_payload_bytes: activePayloadBytes,
79
+ active_segment_bytes_estimate: activeSegmentBytesEstimate,
80
+ active_rows: activeRows,
81
+ };
82
+ }
83
+
84
+ private spawnWorker(idx: number): void {
85
+ const workerSpec = fileURLToPath(new URL("./segmenter_worker.ts", import.meta.url));
63
86
  const worker = new Worker(workerSpec, {
64
87
  workerData: {
65
88
  config: this.config,
89
+ hostRuntime: detectHostRuntime(),
66
90
  opts: this.opts,
67
91
  },
68
92
  type: "module",
@@ -71,7 +95,12 @@ export class SegmenterWorkerPool implements SegmenterController {
71
95
 
72
96
  worker.on("message", (msg: WorkerMessage) => {
73
97
  if (msg?.type === "sealed") {
74
- this.hooks?.onSegmentSealed?.(msg.payloadBytes, msg.segmentBytes);
98
+ this.hooks?.onSegmentSealed?.(msg.stream, msg.payloadBytes, msg.segmentBytes);
99
+ } else if (msg?.type === "memory") {
100
+ this.workerMemory.set(msg.workerId, {
101
+ stats: msg.stats,
102
+ reportedAtMs: Date.now(),
103
+ });
75
104
  }
76
105
  });
77
106
 
@@ -81,6 +110,7 @@ export class SegmenterWorkerPool implements SegmenterController {
81
110
  });
82
111
 
83
112
  worker.on("exit", (code) => {
113
+ this.workerMemory.delete(worker.threadId);
84
114
  if (!this.started) return;
85
115
  if (code !== 0) {
86
116
  // eslint-disable-next-line no-console
package/src/server.ts CHANGED
@@ -6,6 +6,7 @@ import { MockR2Store } from "./objectstore/mock_r2";
6
6
  import { R2ObjectStore } from "./objectstore/r2";
7
7
  import { bootstrapFromR2 } from "./bootstrap";
8
8
  import { initConsoleLogging } from "./util/log";
9
+ import { AUTO_TUNE_PRESETS, memoryLimitForPreset, tuneForPreset, type AutoTuneConfig } from "./auto_tune";
9
10
 
10
11
  initConsoleLogging();
11
12
 
@@ -36,21 +37,6 @@ function formatPresetList<T>(presets: number[], selected: number, map: (preset:
36
37
  .join(", ");
37
38
  }
38
39
 
39
- type AutoTuneConfig = {
40
- sqliteCacheMb: number;
41
- indexMemMb: number;
42
- ingestBatchMb: number;
43
- ingestQueueMb: number;
44
- indexBuildConcurrency: number;
45
- indexCompactConcurrency: number;
46
- segmenterWorkers: number;
47
- uploadConcurrency: number;
48
- };
49
-
50
- function memoryLimitForPreset(preset: number): number {
51
- return preset === 256 ? 300 : preset;
52
- }
53
-
54
40
  function applyAutoTune(overrideMb: number | null): void {
55
41
  const envMemRaw = process.env.DS_MEMORY_LIMIT_MB;
56
42
  if (overrideMb != null) {
@@ -74,11 +60,24 @@ function applyAutoTune(overrideMb: number | null): void {
74
60
  }
75
61
 
76
62
  const conflictVars = [
63
+ "DS_SEGMENT_MAX_BYTES",
64
+ "DS_SEGMENT_TARGET_ROWS",
77
65
  "DS_SQLITE_CACHE_MB",
78
66
  "DS_SQLITE_CACHE_BYTES",
67
+ "DS_WORKER_SQLITE_CACHE_MB",
68
+ "DS_WORKER_SQLITE_CACHE_BYTES",
79
69
  "DS_INDEX_RUN_MEM_CACHE_BYTES",
70
+ "DS_LEXICON_INDEX_CACHE_MAX_BYTES",
80
71
  "DS_INGEST_MAX_BATCH_BYTES",
81
72
  "DS_INGEST_MAX_QUEUE_BYTES",
73
+ "DS_INGEST_CONCURRENCY",
74
+ "DS_READ_CONCURRENCY",
75
+ "DS_SEARCH_CONCURRENCY",
76
+ "DS_ASYNC_INDEX_CONCURRENCY",
77
+ "DS_SEARCH_COMPANION_TOC_CACHE_BYTES",
78
+ "DS_SEARCH_COMPANION_SECTION_CACHE_BYTES",
79
+ "DS_SEARCH_COMPANION_BATCH_SEGMENTS",
80
+ "DS_SEARCH_COMPANION_YIELD_BLOCKS",
82
81
  ];
83
82
  const conflicts = conflictVars.filter((v) => process.env[v] != null);
84
83
  if (conflicts.length > 0) {
@@ -86,35 +85,39 @@ function applyAutoTune(overrideMb: number | null): void {
86
85
  process.exit(1);
87
86
  }
88
87
 
89
- const presets = [256, 512, 1024, 2048, 4096, 8192];
88
+ const presets = [...AUTO_TUNE_PRESETS];
90
89
  const preset = [...presets].reverse().find((v) => v <= memMb);
91
90
  if (!preset) {
92
91
  console.error(`DS_MEMORY_LIMIT_MB=${memMb} is below the minimum preset (256)`);
93
92
  process.exit(1);
94
93
  }
95
-
96
- const tuneFor = (p: number): AutoTuneConfig => ({
97
- sqliteCacheMb: Math.max(8, Math.floor(p / 16)),
98
- indexMemMb: Math.max(4, Math.floor(p / 64)),
99
- ingestBatchMb: Math.max(2, Math.floor(p / 128)),
100
- ingestQueueMb: Math.max(8, Math.floor(p / 32)),
101
- indexBuildConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
102
- indexCompactConcurrency: p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
103
- segmenterWorkers: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1,
104
- uploadConcurrency: p >= 8192 ? 16 : p >= 4096 ? 8 : p >= 1024 ? 4 : 2,
105
- });
106
- const tune = tuneFor(preset);
94
+ const tune: AutoTuneConfig = tuneForPreset(preset);
107
95
 
108
96
  const memoryLimitMb = memoryLimitForPreset(preset);
97
+ process.env.DS_AUTO_TUNE_REQUESTED_MB = String(memMb);
98
+ process.env.DS_AUTO_TUNE_PRESET_MB = String(preset);
99
+ process.env.DS_AUTO_TUNE_EFFECTIVE_MEMORY_LIMIT_MB = String(memoryLimitMb);
109
100
  process.env.DS_MEMORY_LIMIT_MB = String(memoryLimitMb);
101
+ process.env.DS_SEGMENT_MAX_BYTES = String(tune.segmentMaxMiB * 1024 * 1024);
102
+ process.env.DS_SEGMENT_TARGET_ROWS = String(tune.segmentTargetRows);
110
103
  process.env.DS_SQLITE_CACHE_MB = String(tune.sqliteCacheMb);
104
+ process.env.DS_WORKER_SQLITE_CACHE_MB = String(tune.workerSqliteCacheMb);
111
105
  process.env.DS_INDEX_RUN_MEM_CACHE_BYTES = String(tune.indexMemMb * 1024 * 1024);
106
+ process.env.DS_LEXICON_INDEX_CACHE_MAX_BYTES = String(tune.lexiconIndexCacheMb * 1024 * 1024);
107
+ process.env.DS_SEARCH_COMPANION_TOC_CACHE_BYTES = String(tune.searchCompanionTocCacheMb * 1024 * 1024);
108
+ process.env.DS_SEARCH_COMPANION_SECTION_CACHE_BYTES = String(tune.searchCompanionSectionCacheMb * 1024 * 1024);
112
109
  process.env.DS_INGEST_MAX_BATCH_BYTES = String(tune.ingestBatchMb * 1024 * 1024);
113
110
  process.env.DS_INGEST_MAX_QUEUE_BYTES = String(tune.ingestQueueMb * 1024 * 1024);
111
+ process.env.DS_INGEST_CONCURRENCY = String(tune.ingestConcurrency);
112
+ process.env.DS_READ_CONCURRENCY = String(tune.readConcurrency);
113
+ process.env.DS_SEARCH_CONCURRENCY = String(tune.searchConcurrency);
114
+ process.env.DS_ASYNC_INDEX_CONCURRENCY = String(tune.asyncIndexConcurrency);
114
115
  process.env.DS_INDEX_BUILD_CONCURRENCY = String(tune.indexBuildConcurrency);
115
116
  process.env.DS_INDEX_COMPACT_CONCURRENCY = String(tune.indexCompactConcurrency);
116
117
  process.env.DS_SEGMENTER_WORKERS = String(tune.segmenterWorkers);
117
118
  process.env.DS_UPLOAD_CONCURRENCY = String(tune.uploadConcurrency);
119
+ process.env.DS_SEARCH_COMPANION_BATCH_SEGMENTS = String(tune.searchCompanionBatchSegments);
120
+ process.env.DS_SEARCH_COMPANION_YIELD_BLOCKS = String(tune.searchCompanionYieldBlocks);
118
121
 
119
122
  const presetLine = formatPresetList(presets, preset, (v) => v, (v) => String(v));
120
123
  console.log(`Auto-tuning for memory preset ${presetLine}`);
@@ -122,22 +125,117 @@ function applyAutoTune(overrideMb: number | null): void {
122
125
  `DS_MEMORY_LIMIT_MB presets: ${formatPresetList(presets, preset, (p) => memoryLimitForPreset(p), (v) => String(v))}`
123
126
  );
124
127
  console.log(
125
- `DS_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneFor(p).sqliteCacheMb, (v) => String(v))}`
128
+ `DS_SEGMENT_MAX_MIB presets: ${formatPresetList(
129
+ presets,
130
+ preset,
131
+ (p) => tuneForPreset(p).segmentMaxMiB,
132
+ (v) => String(v)
133
+ )}`
134
+ );
135
+ console.log(
136
+ `DS_SEGMENT_TARGET_ROWS presets: ${formatPresetList(
137
+ presets,
138
+ preset,
139
+ (p) => tuneForPreset(p).segmentTargetRows,
140
+ (v) => String(v)
141
+ )}`
142
+ );
143
+ console.log(
144
+ `DS_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).sqliteCacheMb, (v) => String(v))}`
145
+ );
146
+ console.log(
147
+ `DS_WORKER_SQLITE_CACHE_MB presets: ${formatPresetList(
148
+ presets,
149
+ preset,
150
+ (p) => tuneForPreset(p).workerSqliteCacheMb,
151
+ (v) => String(v)
152
+ )}`
153
+ );
154
+ console.log(
155
+ `DS_INDEX_RUN_MEM_CACHE_MB presets: ${formatPresetList(
156
+ presets,
157
+ preset,
158
+ (p) => tuneForPreset(p).indexMemMb,
159
+ (v) => String(v)
160
+ )}`
161
+ );
162
+ console.log(
163
+ `DS_LEXICON_INDEX_CACHE_MB presets: ${formatPresetList(
164
+ presets,
165
+ preset,
166
+ (p) => tuneForPreset(p).lexiconIndexCacheMb,
167
+ (v) => String(v)
168
+ )}`
169
+ );
170
+ console.log(
171
+ `DS_SEARCH_COMPANION_TOC_CACHE_MB presets: ${formatPresetList(
172
+ presets,
173
+ preset,
174
+ (p) => tuneForPreset(p).searchCompanionTocCacheMb,
175
+ (v) => String(v)
176
+ )}`
177
+ );
178
+ console.log(
179
+ `DS_SEARCH_COMPANION_SECTION_CACHE_MB presets: ${formatPresetList(
180
+ presets,
181
+ preset,
182
+ (p) => tuneForPreset(p).searchCompanionSectionCacheMb,
183
+ (v) => String(v)
184
+ )}`
185
+ );
186
+ console.log(
187
+ `DS_INGEST_MAX_BATCH_MB presets: ${formatPresetList(
188
+ presets,
189
+ preset,
190
+ (p) => tuneForPreset(p).ingestBatchMb,
191
+ (v) => String(v)
192
+ )}`
126
193
  );
127
194
  console.log(
128
- `DS_INDEX_RUN_MEM_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneFor(p).indexMemMb, (v) => String(v))}`
195
+ `DS_INGEST_MAX_QUEUE_MB presets: ${formatPresetList(
196
+ presets,
197
+ preset,
198
+ (p) => tuneForPreset(p).ingestQueueMb,
199
+ (v) => String(v)
200
+ )}`
129
201
  );
130
202
  console.log(
131
- `DS_INGEST_MAX_BATCH_MB presets: ${formatPresetList(presets, preset, (p) => tuneFor(p).ingestBatchMb, (v) => String(v))}`
203
+ `DS_INGEST_CONCURRENCY presets: ${formatPresetList(
204
+ presets,
205
+ preset,
206
+ (p) => tuneForPreset(p).ingestConcurrency,
207
+ (v) => String(v)
208
+ )}`
132
209
  );
133
210
  console.log(
134
- `DS_INGEST_MAX_QUEUE_MB presets: ${formatPresetList(presets, preset, (p) => tuneFor(p).ingestQueueMb, (v) => String(v))}`
211
+ `DS_READ_CONCURRENCY presets: ${formatPresetList(
212
+ presets,
213
+ preset,
214
+ (p) => tuneForPreset(p).readConcurrency,
215
+ (v) => String(v)
216
+ )}`
217
+ );
218
+ console.log(
219
+ `DS_SEARCH_CONCURRENCY presets: ${formatPresetList(
220
+ presets,
221
+ preset,
222
+ (p) => tuneForPreset(p).searchConcurrency,
223
+ (v) => String(v)
224
+ )}`
225
+ );
226
+ console.log(
227
+ `DS_ASYNC_INDEX_CONCURRENCY presets: ${formatPresetList(
228
+ presets,
229
+ preset,
230
+ (p) => tuneForPreset(p).asyncIndexConcurrency,
231
+ (v) => String(v)
232
+ )}`
135
233
  );
136
234
  console.log(
137
235
  `DS_INDEX_BUILD_CONCURRENCY presets: ${formatPresetList(
138
236
  presets,
139
237
  preset,
140
- (p) => tuneFor(p).indexBuildConcurrency,
238
+ (p) => tuneForPreset(p).indexBuildConcurrency,
141
239
  (v) => String(v)
142
240
  )}`
143
241
  );
@@ -145,7 +243,7 @@ function applyAutoTune(overrideMb: number | null): void {
145
243
  `DS_INDEX_COMPACT_CONCURRENCY presets: ${formatPresetList(
146
244
  presets,
147
245
  preset,
148
- (p) => tuneFor(p).indexCompactConcurrency,
246
+ (p) => tuneForPreset(p).indexCompactConcurrency,
149
247
  (v) => String(v)
150
248
  )}`
151
249
  );
@@ -153,7 +251,7 @@ function applyAutoTune(overrideMb: number | null): void {
153
251
  `DS_SEGMENTER_WORKERS presets: ${formatPresetList(
154
252
  presets,
155
253
  preset,
156
- (p) => tuneFor(p).segmenterWorkers,
254
+ (p) => tuneForPreset(p).segmenterWorkers,
157
255
  (v) => String(v)
158
256
  )}`
159
257
  );
@@ -161,7 +259,23 @@ function applyAutoTune(overrideMb: number | null): void {
161
259
  `DS_UPLOAD_CONCURRENCY presets: ${formatPresetList(
162
260
  presets,
163
261
  preset,
164
- (p) => tuneFor(p).uploadConcurrency,
262
+ (p) => tuneForPreset(p).uploadConcurrency,
263
+ (v) => String(v)
264
+ )}`
265
+ );
266
+ console.log(
267
+ `DS_SEARCH_COMPANION_BATCH_SEGMENTS presets: ${formatPresetList(
268
+ presets,
269
+ preset,
270
+ (p) => tuneForPreset(p).searchCompanionBatchSegments,
271
+ (v) => String(v)
272
+ )}`
273
+ );
274
+ console.log(
275
+ `DS_SEARCH_COMPANION_YIELD_BLOCKS presets: ${formatPresetList(
276
+ presets,
277
+ preset,
278
+ (p) => tuneForPreset(p).searchCompanionYieldBlocks,
165
279
  (v) => String(v)
166
280
  )}`
167
281
  );