@tungthedev/streams-server 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/CODE_OF_CONDUCT.md +45 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/LICENSE +201 -0
  4. package/README.md +58 -0
  5. package/SECURITY.md +42 -0
  6. package/bin/prisma-streams-server +2 -0
  7. package/package.json +46 -0
  8. package/src/app.ts +583 -0
  9. package/src/app_core.ts +3144 -0
  10. package/src/app_local.ts +206 -0
  11. package/src/auth.ts +124 -0
  12. package/src/auto_tune.ts +69 -0
  13. package/src/backpressure.ts +66 -0
  14. package/src/bootstrap.ts +613 -0
  15. package/src/compute/demo_entry.ts +415 -0
  16. package/src/compute/demo_site.ts +1242 -0
  17. package/src/compute/entry.ts +19 -0
  18. package/src/compute/package_entry.ts +4 -0
  19. package/src/compute/virtual-modules.d.ts +15 -0
  20. package/src/compute/worker_module_url.ts +9 -0
  21. package/src/concurrency_gate.ts +108 -0
  22. package/src/config.ts +402 -0
  23. package/src/db/bootstrap_store.ts +9 -0
  24. package/src/db/db.ts +2424 -0
  25. package/src/db/schema.ts +925 -0
  26. package/src/db/sqlite_manifest_snapshot.ts +81 -0
  27. package/src/db/sqlite_touch_store.ts +491 -0
  28. package/src/db/sqlite_wal_store.ts +472 -0
  29. package/src/details/full_mode_details.ts +568 -0
  30. package/src/expiry_sweeper.ts +47 -0
  31. package/src/foreground_activity.ts +55 -0
  32. package/src/hist.ts +169 -0
  33. package/src/index/binary_fuse.ts +379 -0
  34. package/src/index/indexer.ts +947 -0
  35. package/src/index/lexicon_file_cache.ts +261 -0
  36. package/src/index/lexicon_format.ts +93 -0
  37. package/src/index/lexicon_indexer.ts +863 -0
  38. package/src/index/run_cache.ts +84 -0
  39. package/src/index/run_format.ts +213 -0
  40. package/src/index/schedule.ts +28 -0
  41. package/src/index/secondary_indexer.ts +901 -0
  42. package/src/index/secondary_schema.ts +105 -0
  43. package/src/ingest.ts +309 -0
  44. package/src/lens/lens.ts +501 -0
  45. package/src/manifest.ts +249 -0
  46. package/src/memory.ts +334 -0
  47. package/src/metrics.ts +147 -0
  48. package/src/metrics_emitter.ts +83 -0
  49. package/src/notifier.ts +180 -0
  50. package/src/objectstore/accounting.ts +151 -0
  51. package/src/objectstore/interface.ts +13 -0
  52. package/src/objectstore/mock_r2.ts +269 -0
  53. package/src/objectstore/null.ts +32 -0
  54. package/src/objectstore/r2.ts +318 -0
  55. package/src/observe/pairing.ts +61 -0
  56. package/src/observe/request.ts +772 -0
  57. package/src/offset.ts +70 -0
  58. package/src/postgres/bootstrap.ts +269 -0
  59. package/src/postgres/companions.ts +197 -0
  60. package/src/postgres/control_restore.ts +109 -0
  61. package/src/postgres/details.ts +189 -0
  62. package/src/postgres/lexicon_index.ts +260 -0
  63. package/src/postgres/routing_index.ts +189 -0
  64. package/src/postgres/rows.ts +132 -0
  65. package/src/postgres/schema.ts +355 -0
  66. package/src/postgres/secondary_index.ts +238 -0
  67. package/src/postgres/segments.ts +900 -0
  68. package/src/postgres/stats.ts +103 -0
  69. package/src/postgres/store.ts +947 -0
  70. package/src/postgres/touch.ts +591 -0
  71. package/src/postgres/types.ts +32 -0
  72. package/src/profiles/evlog/schema.ts +234 -0
  73. package/src/profiles/evlog.ts +473 -0
  74. package/src/profiles/generic.ts +51 -0
  75. package/src/profiles/index.ts +237 -0
  76. package/src/profiles/metrics/block_format.ts +109 -0
  77. package/src/profiles/metrics/normalize.ts +366 -0
  78. package/src/profiles/metrics/schema.ts +319 -0
  79. package/src/profiles/metrics.ts +83 -0
  80. package/src/profiles/otelTraces/normalize.ts +955 -0
  81. package/src/profiles/otelTraces/otlp.ts +1002 -0
  82. package/src/profiles/otelTraces/schema.ts +408 -0
  83. package/src/profiles/otelTraces.ts +390 -0
  84. package/src/profiles/profile.ts +284 -0
  85. package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
  86. package/src/profiles/stateProtocol/changes.ts +24 -0
  87. package/src/profiles/stateProtocol/ingest.ts +115 -0
  88. package/src/profiles/stateProtocol/routes.ts +511 -0
  89. package/src/profiles/stateProtocol/types.ts +6 -0
  90. package/src/profiles/stateProtocol/validation.ts +51 -0
  91. package/src/profiles/stateProtocol.ts +107 -0
  92. package/src/read_filter.ts +468 -0
  93. package/src/reader.ts +2986 -0
  94. package/src/runtime/hash.ts +156 -0
  95. package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
  96. package/src/runtime/hash_vendor/NOTICE.md +8 -0
  97. package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
  98. package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
  99. package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
  100. package/src/runtime/host_runtime.ts +5 -0
  101. package/src/runtime_memory.ts +200 -0
  102. package/src/runtime_memory_sampler.ts +237 -0
  103. package/src/schema/lens_schema.ts +290 -0
  104. package/src/schema/proof.ts +547 -0
  105. package/src/schema/read_json.ts +51 -0
  106. package/src/schema/registry.ts +966 -0
  107. package/src/search/agg_format.ts +638 -0
  108. package/src/search/aggregate.ts +409 -0
  109. package/src/search/binary/codec.ts +162 -0
  110. package/src/search/binary/docset.ts +67 -0
  111. package/src/search/binary/restart_strings.ts +181 -0
  112. package/src/search/binary/varint.ts +34 -0
  113. package/src/search/bitset.ts +19 -0
  114. package/src/search/col_format.ts +382 -0
  115. package/src/search/col_runtime.ts +59 -0
  116. package/src/search/column_encoding.ts +43 -0
  117. package/src/search/companion_file_cache.ts +319 -0
  118. package/src/search/companion_format.ts +327 -0
  119. package/src/search/companion_manager.ts +1305 -0
  120. package/src/search/companion_plan.ts +229 -0
  121. package/src/search/exact_format.ts +281 -0
  122. package/src/search/exact_runtime.ts +55 -0
  123. package/src/search/fts_format.ts +423 -0
  124. package/src/search/fts_runtime.ts +333 -0
  125. package/src/search/query.ts +875 -0
  126. package/src/search/schema.ts +245 -0
  127. package/src/segment/cache.ts +270 -0
  128. package/src/segment/cached_segment.ts +89 -0
  129. package/src/segment/format.ts +403 -0
  130. package/src/segment/segmenter.ts +412 -0
  131. package/src/segment/segmenter_worker.ts +72 -0
  132. package/src/segment/segmenter_workers.ts +130 -0
  133. package/src/server.ts +264 -0
  134. package/src/server_auto_tune.ts +158 -0
  135. package/src/sqlite/adapter.ts +335 -0
  136. package/src/sqlite/runtime_stats.ts +163 -0
  137. package/src/stats.ts +205 -0
  138. package/src/store/append.ts +50 -0
  139. package/src/store/bootstrap_restore_store.ts +71 -0
  140. package/src/store/capabilities.ts +86 -0
  141. package/src/store/full_mode_details_store.ts +71 -0
  142. package/src/store/index_store.ts +104 -0
  143. package/src/store/profile_touch_store.ts +1 -0
  144. package/src/store/rows.ts +144 -0
  145. package/src/store/schema_profile_store.ts +73 -0
  146. package/src/store/schema_publication.ts +6 -0
  147. package/src/store/segment_manifest_store.ts +129 -0
  148. package/src/store/segment_read_store.ts +22 -0
  149. package/src/store/stats_accounting_store.ts +83 -0
  150. package/src/store/touch_store.ts +98 -0
  151. package/src/store/wal_store.ts +21 -0
  152. package/src/stream_size_reconciler.ts +100 -0
  153. package/src/touch/canonical_change.ts +7 -0
  154. package/src/touch/live_keys.ts +158 -0
  155. package/src/touch/live_metrics.ts +841 -0
  156. package/src/touch/live_templates.ts +449 -0
  157. package/src/touch/manager.ts +1292 -0
  158. package/src/touch/process_batch.ts +576 -0
  159. package/src/touch/processor_worker.ts +85 -0
  160. package/src/touch/spec.ts +459 -0
  161. package/src/touch/touch_journal.ts +771 -0
  162. package/src/touch/touch_key_id.ts +20 -0
  163. package/src/touch/worker_pool.ts +191 -0
  164. package/src/touch/worker_protocol.ts +57 -0
  165. package/src/types/proper-lockfile.d.ts +1 -0
  166. package/src/uploader.ts +358 -0
  167. package/src/util/base32_crockford.ts +81 -0
  168. package/src/util/bloom256.ts +67 -0
  169. package/src/util/byte_lru.ts +73 -0
  170. package/src/util/cleanup.ts +22 -0
  171. package/src/util/crc32c.ts +29 -0
  172. package/src/util/ds_error.ts +15 -0
  173. package/src/util/duration.ts +17 -0
  174. package/src/util/endian.ts +53 -0
  175. package/src/util/json_pointer.ts +148 -0
  176. package/src/util/log.ts +25 -0
  177. package/src/util/lru.ts +53 -0
  178. package/src/util/retry.ts +35 -0
  179. package/src/util/siphash.ts +71 -0
  180. package/src/util/stream_paths.ts +50 -0
  181. package/src/util/time.ts +14 -0
  182. package/src/util/yield.ts +3 -0
  183. package/src/util/zstd.ts +24 -0
@@ -0,0 +1,412 @@
1
+ import { mkdirSync, openSync, closeSync, writeSync, fsyncSync, renameSync, existsSync, unlinkSync } from "node:fs";
2
+ import { dirname } from "node:path";
3
+ import type { Config } from "../config";
4
+ import type { SegmentStore } from "../store/segment_manifest_store";
5
+ import { encodeBlock, encodeFooter, type BlockIndexEntry, type SegmentRecord } from "./format";
6
+ import { readU32BE } from "../util/endian";
7
+ import { localSegmentPath, streamHash16Hex } from "../util/stream_paths";
8
+ import { LruCache } from "../util/lru";
9
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler";
10
+ import { yieldToEventLoop } from "../util/yield";
11
+
12
+ export type SegmenterOptions = {
13
+ minCandidateBytes?: number; // default: segmentMaxBytes
14
+ minCandidateRows?: number; // default: segmentTargetRows
15
+ maxIntervalMs?: number; // default: segmentMaxIntervalMs
16
+ candidatesPerTick?: number;
17
+ maxRowsPerSegment?: number;
18
+ };
19
+
20
+ export type SegmenterHooks = {
21
+ onSegmentSealed?: (stream: string, payloadBytes: number, segmentBytes: number) => void;
22
+ };
23
+
24
+ export type SegmenterMemoryStats = {
25
+ active_builds: number;
26
+ active_streams: number;
27
+ active_payload_bytes: number;
28
+ active_segment_bytes_estimate: number;
29
+ active_rows: number;
30
+ };
31
+
32
+ const SEGMENT_COMPRESSION_WINDOW = 8;
33
+ const MIN_COMPRESSED_FILL_RATIO = 0.5;
34
+ const MAX_COMPRESSION_BOOST_MULTIPLIER = 5;
35
+
36
+ export class Segmenter {
37
+ private readonly config: Config;
38
+ private readonly db: SegmentStore;
39
+ private readonly opts: Required<SegmenterOptions>;
40
+ private readonly hooks?: SegmenterHooks;
41
+ private readonly memorySampler?: RuntimeMemorySampler;
42
+ private timer: any | null = null;
43
+ private running = false;
44
+ private stopping = false;
45
+ private readonly failures = new FailureTracker(1024);
46
+ private activeBuildStream: string | null = null;
47
+ private activePayloadBytes = 0;
48
+ private activeSegmentBytesEstimate = 0;
49
+ private activeRows = 0;
50
+
51
+ constructor(
52
+ config: Config,
53
+ db: SegmentStore,
54
+ opts: SegmenterOptions = {},
55
+ hooks?: SegmenterHooks,
56
+ memorySampler?: RuntimeMemorySampler
57
+ ) {
58
+ this.config = config;
59
+ this.db = db;
60
+ this.opts = {
61
+ minCandidateBytes: opts.minCandidateBytes ?? config.segmentMaxBytes,
62
+ minCandidateRows: opts.minCandidateRows ?? config.segmentTargetRows,
63
+ maxIntervalMs: opts.maxIntervalMs ?? config.segmentMaxIntervalMs,
64
+ candidatesPerTick: opts.candidatesPerTick ?? 8,
65
+ maxRowsPerSegment: opts.maxRowsPerSegment ?? 250_000,
66
+ };
67
+ this.hooks = hooks;
68
+ this.memorySampler = memorySampler;
69
+ }
70
+
71
+ start(): void {
72
+ this.stopping = false;
73
+ if (this.timer) return;
74
+ if (this.config.segmentCheckIntervalMs <= 0) return;
75
+ this.timer = setInterval(() => {
76
+ void this.tick();
77
+ }, this.config.segmentCheckIntervalMs);
78
+ }
79
+
80
+ async stop(hard = false): Promise<void> {
81
+ if (hard) this.stopping = true;
82
+ else this.stopping = false;
83
+ if (this.timer) clearInterval(this.timer);
84
+ this.timer = null;
85
+ while (this.running) {
86
+ await new Promise((resolve) => setTimeout(resolve, 5));
87
+ }
88
+ }
89
+
90
+ getMemoryStats(): SegmenterMemoryStats {
91
+ return {
92
+ active_builds: this.activeBuildStream ? 1 : 0,
93
+ active_streams: this.activeBuildStream ? 1 : 0,
94
+ active_payload_bytes: this.activePayloadBytes,
95
+ active_segment_bytes_estimate: this.activeSegmentBytesEstimate,
96
+ active_rows: this.activeRows,
97
+ };
98
+ }
99
+
100
+ async tick(): Promise<void> {
101
+ if (this.stopping) return;
102
+ if (this.running) return;
103
+ this.running = true;
104
+ try {
105
+ const candidates = await this.db.candidates(
106
+ BigInt(this.opts.minCandidateBytes),
107
+ BigInt(this.opts.minCandidateRows),
108
+ BigInt(this.opts.maxIntervalMs),
109
+ this.opts.candidatesPerTick
110
+ );
111
+ for (const c of candidates) {
112
+ if (this.failures.shouldSkip(c.stream)) continue;
113
+ try {
114
+ await this.buildOne(c.stream);
115
+ this.failures.recordSuccess(c.stream);
116
+ } catch (e) {
117
+ this.failures.recordFailure(c.stream);
118
+ const msg = String((e as any)?.message ?? e);
119
+ const lower = msg.toLowerCase();
120
+ if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
121
+ // eslint-disable-next-line no-console
122
+ console.error("segment build failed", c.stream, e);
123
+ }
124
+ }
125
+ }
126
+ } catch (e) {
127
+ const msg = String((e as any)?.message ?? e);
128
+ const lower = msg.toLowerCase();
129
+ if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
130
+ // eslint-disable-next-line no-console
131
+ console.error("segmenter tick error", e);
132
+ }
133
+ } finally {
134
+ this.running = false;
135
+ }
136
+ }
137
+
138
+ private isSqliteBusy(err: any): boolean {
139
+ const code = String(err?.code ?? "");
140
+ const errno = Number(err?.errno ?? -1);
141
+ return code === "SQLITE_BUSY" || code === "SQLITE_BUSY_SNAPSHOT" || errno === 5 || errno === 517;
142
+ }
143
+
144
+ private async runWithBusyRetry<T>(fn: () => T | Promise<T>): Promise<T> {
145
+ const maxBusyMs = Math.max(0, this.config.ingestBusyTimeoutMs);
146
+ if (maxBusyMs <= 0) return await fn();
147
+ const startMs = Date.now();
148
+ let attempt = 0;
149
+ for (;;) {
150
+ try {
151
+ return await fn();
152
+ } catch (e) {
153
+ if (!this.isSqliteBusy(e)) throw e;
154
+ const elapsed = Date.now() - startMs;
155
+ if (elapsed >= maxBusyMs) throw e;
156
+ const delay = Math.min(200, 5 * 2 ** attempt);
157
+ attempt += 1;
158
+ await new Promise((res) => setTimeout(res, delay));
159
+ }
160
+ }
161
+ }
162
+
163
+ private cleanupTmp(tmpPath: string): void {
164
+ try {
165
+ if (existsSync(tmpPath)) unlinkSync(tmpPath);
166
+ } catch {
167
+ // ignore
168
+ }
169
+ }
170
+
171
+ private async resolvePayloadSealTargetBytes(stream: string): Promise<bigint> {
172
+ const baseTarget = BigInt(this.config.segmentMaxBytes);
173
+ const ratio = await this.db.recentSegmentCompressionRatio(stream, SEGMENT_COMPRESSION_WINDOW);
174
+ if (ratio == null || !Number.isFinite(ratio) || ratio <= 0 || ratio >= MIN_COMPRESSED_FILL_RATIO) {
175
+ return baseTarget;
176
+ }
177
+ const desiredCompressedBytes = Math.ceil(this.config.segmentMaxBytes * MIN_COMPRESSED_FILL_RATIO);
178
+ const boosted = BigInt(Math.ceil(desiredCompressedBytes / ratio));
179
+ const maxBoosted = baseTarget * BigInt(MAX_COMPRESSION_BOOST_MULTIPLIER);
180
+ if (boosted > maxBoosted) return maxBoosted;
181
+ return boosted > baseTarget ? boosted : baseTarget;
182
+ }
183
+
184
+ private async shouldSealStream(row: { stream: string; pending_bytes: bigint; pending_rows: bigint; last_segment_cut_ms: bigint }): Promise<boolean> {
185
+ const payloadSealTargetBytes = await this.resolvePayloadSealTargetBytes(row.stream);
186
+ if (row.pending_bytes >= payloadSealTargetBytes) return true;
187
+ if (row.pending_rows >= BigInt(this.opts.minCandidateRows)) return true;
188
+ if (this.opts.maxIntervalMs > 0 && BigInt(Date.now()) - row.last_segment_cut_ms >= BigInt(this.opts.maxIntervalMs)) return true;
189
+ return false;
190
+ }
191
+
192
+ private async buildOne(stream: string): Promise<void> {
193
+ if (this.stopping) return;
194
+ let row = await this.db.getSegmentStreamState(stream);
195
+ if (!row || this.db.isDeleted(row)) return;
196
+ if (!(await this.shouldSealStream(row))) return;
197
+
198
+ // Claim.
199
+ const claim = await this.db.tryClaimSegment(stream);
200
+ if (!claim) return;
201
+
202
+ try {
203
+ const claimedRow = await this.db.getSegmentStreamState(stream);
204
+ if (!claimedRow || this.db.isDeleted(claimedRow)) return;
205
+ row = claimedRow;
206
+ if (!(await this.shouldSealStream(row))) return;
207
+ const startOffset = row.sealed_through + 1n;
208
+ const maxOffset = row.next_offset - 1n;
209
+ if (startOffset > maxOffset) return;
210
+
211
+ this.activeBuildStream = stream;
212
+ this.activePayloadBytes = 0;
213
+ this.activeSegmentBytesEstimate = 0;
214
+ this.activeRows = 0;
215
+ const segmentIndex = await this.db.nextSegmentIndexForStream(stream);
216
+ const shash = streamHash16Hex(stream);
217
+ const localPath = localSegmentPath(this.config.rootDir, shash, segmentIndex);
218
+ const tmpPath = `${localPath}.tmp`;
219
+ const leaveCutPhase = this.memorySampler?.enter("cut", {
220
+ stream,
221
+ segment_index: segmentIndex,
222
+ });
223
+ mkdirSync(dirname(localPath), { recursive: true });
224
+
225
+ // Build blocks and stream-write to temp file.
226
+ const fd = openSync(tmpPath, "w");
227
+ try {
228
+ let blockRecords: SegmentRecord[] = [];
229
+ let blockBytesApprox = 0;
230
+ let fileBytes = 0;
231
+ let blockCount = 0;
232
+ let blockFirstOffset = startOffset;
233
+ const blockIndex: BlockIndexEntry[] = [];
234
+
235
+ // Decide endOffset by scanning WAL rows until threshold.
236
+ // IMPORTANT: pending_bytes tracks WAL payload bytes only (not record/block overhead).
237
+ const payloadSealTargetBytes = await this.resolvePayloadSealTargetBytes(stream);
238
+ const rowSealTarget = BigInt(this.opts.minCandidateRows);
239
+ let payloadBytes = 0n;
240
+ let rowsSealed = 0n;
241
+ let endOffset = startOffset - 1n;
242
+ let lastAppendMs = 0n;
243
+
244
+ let lastYieldMs = Date.now();
245
+ let recordsSinceYield = 0;
246
+ for await (const rec of this.db.readWalRange(stream, startOffset, maxOffset)) {
247
+ const offset = BigInt(rec.offset);
248
+ const payload: Uint8Array = rec.payload;
249
+ const routingKey: Uint8Array | null = rec.routingKey ?? null;
250
+ const appendMs = BigInt(rec.tsMs);
251
+ lastAppendMs = appendMs;
252
+
253
+ const keyBytes = routingKey ?? new Uint8Array(0);
254
+ const segRec: SegmentRecord = {
255
+ appendNs: appendMs * 1_000_000n,
256
+ routingKey: keyBytes,
257
+ payload,
258
+ };
259
+ const recSize = 8 + 4 + keyBytes.byteLength + 4 + payload.byteLength;
260
+
261
+ if (blockRecords.length > 0 && blockBytesApprox + recSize > this.config.blockMaxBytes) {
262
+ const blockOffset = fileBytes;
263
+ const block = encodeBlock(blockRecords);
264
+ const compressedLen = readU32BE(block, 8);
265
+ blockIndex.push({
266
+ blockOffset,
267
+ firstOffset: blockFirstOffset,
268
+ recordCount: blockRecords.length,
269
+ compressedLen,
270
+ firstAppendNs: blockRecords[0].appendNs,
271
+ lastAppendNs: blockRecords[blockRecords.length - 1].appendNs,
272
+ });
273
+ writeSync(fd, block);
274
+ fileBytes += block.byteLength;
275
+ blockCount += 1;
276
+ blockRecords = [];
277
+ blockBytesApprox = 0;
278
+ await yieldToEventLoop();
279
+ }
280
+
281
+ if (blockRecords.length === 0) blockFirstOffset = offset;
282
+ blockRecords.push(segRec);
283
+ blockBytesApprox += recSize;
284
+
285
+ payloadBytes += BigInt(payload.byteLength);
286
+ rowsSealed += 1n;
287
+ endOffset = offset;
288
+ this.activePayloadBytes = Number(payloadBytes);
289
+ this.activeRows = Number(rowsSealed);
290
+ this.activeSegmentBytesEstimate = fileBytes + blockBytesApprox;
291
+
292
+ recordsSinceYield += 1;
293
+ if (recordsSinceYield >= 512 || Date.now() - lastYieldMs >= 10) {
294
+ await yieldToEventLoop();
295
+ lastYieldMs = Date.now();
296
+ recordsSinceYield = 0;
297
+ }
298
+
299
+ if (payloadBytes >= payloadSealTargetBytes) break;
300
+ if (rowsSealed >= rowSealTarget) break;
301
+ if (rowsSealed >= BigInt(this.opts.maxRowsPerSegment)) break;
302
+ }
303
+
304
+ if (rowsSealed === 0n) return;
305
+
306
+ if (blockRecords.length > 0) {
307
+ const blockOffset = fileBytes;
308
+ const block = encodeBlock(blockRecords);
309
+ const compressedLen = readU32BE(block, 8);
310
+ blockIndex.push({
311
+ blockOffset,
312
+ firstOffset: blockFirstOffset,
313
+ recordCount: blockRecords.length,
314
+ compressedLen,
315
+ firstAppendNs: blockRecords[0].appendNs,
316
+ lastAppendNs: blockRecords[blockRecords.length - 1].appendNs,
317
+ });
318
+ writeSync(fd, block);
319
+ fileBytes += block.byteLength;
320
+ blockCount += 1;
321
+ }
322
+
323
+ const footer = encodeFooter(blockIndex);
324
+ writeSync(fd, footer);
325
+ fileBytes += footer.byteLength;
326
+ this.activeSegmentBytesEstimate = fileBytes;
327
+
328
+ fsyncSync(fd);
329
+
330
+ const segmentId = `${shash}-${segmentIndex}-${startOffset.toString()}-${endOffset.toString()}`;
331
+ renameSync(tmpPath, localPath);
332
+
333
+ if (!this.stopping) {
334
+ try {
335
+ await this.runWithBusyRetry(async () => {
336
+ await this.db.commitSealedSegment({
337
+ segmentId,
338
+ stream,
339
+ segmentIndex,
340
+ startOffset,
341
+ endOffset,
342
+ blockCount,
343
+ lastAppendMs,
344
+ sizeBytes: fileBytes,
345
+ localPath,
346
+ payloadBytes,
347
+ rowsSealed,
348
+ claimToken: claim.token,
349
+ });
350
+ });
351
+ if (this.hooks?.onSegmentSealed) this.hooks.onSegmentSealed(stream, Number(payloadBytes), fileBytes);
352
+ } catch (e) {
353
+ try {
354
+ if (existsSync(localPath)) unlinkSync(localPath);
355
+ } catch {
356
+ // ignore
357
+ }
358
+ throw e;
359
+ }
360
+ }
361
+ } finally {
362
+ closeSync(fd);
363
+ this.cleanupTmp(tmpPath);
364
+ leaveCutPhase?.();
365
+ }
366
+ } finally {
367
+ this.activeBuildStream = null;
368
+ this.activePayloadBytes = 0;
369
+ this.activeSegmentBytesEstimate = 0;
370
+ this.activeRows = 0;
371
+ // Release claim.
372
+ if (!this.stopping) {
373
+ try {
374
+ await this.db.setSegmentInProgress(stream, 0, claim);
375
+ } catch {
376
+ // ignore
377
+ }
378
+ }
379
+ }
380
+ }
381
+ }
382
+
383
+ class FailureTracker {
384
+ private readonly cache: LruCache<string, { attempts: number; untilMs: number }>;
385
+
386
+ constructor(maxEntries: number) {
387
+ this.cache = new LruCache(maxEntries);
388
+ }
389
+
390
+ shouldSkip(stream: string): boolean {
391
+ const item = this.cache.get(stream);
392
+ if (!item) return false;
393
+ if (Date.now() >= item.untilMs) {
394
+ this.cache.delete(stream);
395
+ return false;
396
+ }
397
+ return true;
398
+ }
399
+
400
+ recordFailure(stream: string): void {
401
+ const now = Date.now();
402
+ const item = this.cache.get(stream) ?? { attempts: 0, untilMs: now };
403
+ item.attempts += 1;
404
+ const backoff = Math.min(60_000, 500 * 2 ** (item.attempts - 1));
405
+ item.untilMs = now + backoff;
406
+ this.cache.set(stream, item);
407
+ }
408
+
409
+ recordSuccess(stream: string): void {
410
+ this.cache.delete(stream);
411
+ }
412
+ }
@@ -0,0 +1,72 @@
1
+ import { parentPort, workerData, threadId } from "node:worker_threads";
2
+ import type { Config } from "../config.ts";
3
+ import { SqliteDurableStore } from "../db/db.ts";
4
+ import type { HostRuntime } from "../runtime/host_runtime.ts";
5
+ import { RuntimeMemorySampler } from "../runtime_memory_sampler.ts";
6
+ import { setSqliteRuntimeOverride } from "../sqlite/adapter.ts";
7
+ import { Segmenter, type SegmenterHooks, type SegmenterOptions } from "./segmenter.ts";
8
+ import { initConsoleLogging } from "../util/log.ts";
9
+
10
+ initConsoleLogging();
11
+
12
+ const data = workerData as { config: Config; hostRuntime?: HostRuntime; opts?: SegmenterOptions };
13
+ const cfg = data.config;
14
+ setSqliteRuntimeOverride(data.hostRuntime ?? null);
15
+ // The main server process initializes/migrates schema; workers should avoid
16
+ // concurrent migrations on the same sqlite file.
17
+ const db = new SqliteDurableStore(cfg.dbPath, { cacheBytes: cfg.workerSqliteCacheBytes, skipMigrations: true });
18
+ const memorySampler =
19
+ cfg.memorySamplerPath != null
20
+ ? new RuntimeMemorySampler(cfg.memorySamplerPath, {
21
+ intervalMs: cfg.memorySamplerIntervalMs,
22
+ scope: `segmenter-worker-${threadId}`,
23
+ })
24
+ : undefined;
25
+ memorySampler?.start();
26
+
27
+ const hooks: SegmenterHooks = {
28
+ onSegmentSealed: (stream, payloadBytes, segmentBytes) => {
29
+ parentPort?.postMessage({ type: "sealed", stream, payloadBytes, segmentBytes });
30
+ },
31
+ };
32
+
33
+ const segmenter = new Segmenter(cfg, db, data.opts ?? {}, hooks, memorySampler);
34
+ segmenter.start();
35
+ const memoryTimer = setInterval(() => {
36
+ try {
37
+ parentPort?.postMessage({ type: "memory", workerId: threadId, stats: segmenter.getMemoryStats() });
38
+ } catch {
39
+ // ignore
40
+ }
41
+ }, 1_000);
42
+
43
+ parentPort?.on("message", (msg: any) => {
44
+ if (!msg || typeof msg !== "object") return;
45
+ if (msg.type === "stop") {
46
+ try {
47
+ clearInterval(memoryTimer);
48
+ } catch {
49
+ // ignore
50
+ }
51
+ try {
52
+ segmenter.stop();
53
+ } catch {
54
+ // ignore
55
+ }
56
+ try {
57
+ db.close();
58
+ } catch {
59
+ // ignore
60
+ }
61
+ try {
62
+ memorySampler?.stop();
63
+ } catch {
64
+ // ignore
65
+ }
66
+ try {
67
+ parentPort?.postMessage({ type: "stopped" });
68
+ } catch {
69
+ // ignore
70
+ }
71
+ }
72
+ });
@@ -0,0 +1,130 @@
1
+ import { Worker } from "node:worker_threads";
2
+ import type { Config } from "../config";
3
+ import { detectHostRuntime } from "../runtime/host_runtime.ts";
4
+ import { resolveWorkerModuleUrl } from "../compute/worker_module_url";
5
+ import type { SegmenterHooks, SegmenterMemoryStats, SegmenterOptions } from "./segmenter";
6
+
7
+ export type SegmenterController = {
8
+ start: () => void;
9
+ stop: (hard?: boolean) => void | Promise<void>;
10
+ getMemoryStats?: () => SegmenterMemoryStats;
11
+ };
12
+
13
+ type WorkerMessage =
14
+ | { type: "sealed"; stream: string; payloadBytes: number; segmentBytes: number }
15
+ | { type: "memory"; workerId: number; stats: SegmenterMemoryStats }
16
+ | { type: "stopped" };
17
+
18
+ export class SegmenterWorkerPool implements SegmenterController {
19
+ private readonly config: Config;
20
+ private readonly workerCount: number;
21
+ private readonly opts: SegmenterOptions;
22
+ private readonly hooks?: SegmenterHooks;
23
+ private readonly workers: Worker[] = [];
24
+ private readonly workerMemory = new Map<number, { stats: SegmenterMemoryStats; reportedAtMs: number }>();
25
+ private started = false;
26
+
27
+ constructor(config: Config, workerCount: number, opts: SegmenterOptions = {}, hooks?: SegmenterHooks) {
28
+ this.config = config;
29
+ this.workerCount = Math.max(0, Math.floor(workerCount));
30
+ this.opts = opts;
31
+ this.hooks = hooks;
32
+ }
33
+
34
+ start(): void {
35
+ if (this.started) return;
36
+ this.started = true;
37
+ for (let i = 0; i < this.workerCount; i++) {
38
+ this.spawnWorker(i);
39
+ }
40
+ }
41
+
42
+ async stop(_hard?: boolean): Promise<void> {
43
+ if (!this.started) return;
44
+ this.started = false;
45
+ const workers = this.workers.slice();
46
+ this.workers.length = 0;
47
+ this.workerMemory.clear();
48
+ // Await termination so the worker threads are gone before stop() resolves;
49
+ // see the note in TouchProcessorWorkerPool.stop -- a lingering worker thread
50
+ // racing the host process's WASM teardown can abort the process on Linux.
51
+ await Promise.all(
52
+ workers.map((w) => {
53
+ try {
54
+ w.postMessage({ type: "stop" });
55
+ } catch {
56
+ // ignore
57
+ }
58
+ return w.terminate();
59
+ }),
60
+ );
61
+ }
62
+
63
+ getMemoryStats(): SegmenterMemoryStats {
64
+ const now = Date.now();
65
+ let activeBuilds = 0;
66
+ let activeStreams = 0;
67
+ let activePayloadBytes = 0;
68
+ let activeSegmentBytesEstimate = 0;
69
+ let activeRows = 0;
70
+ for (const [workerId, entry] of this.workerMemory) {
71
+ if (now - entry.reportedAtMs > 5_000) {
72
+ this.workerMemory.delete(workerId);
73
+ continue;
74
+ }
75
+ activeBuilds += Math.max(0, entry.stats.active_builds);
76
+ activeStreams += Math.max(0, entry.stats.active_streams);
77
+ activePayloadBytes += Math.max(0, entry.stats.active_payload_bytes);
78
+ activeSegmentBytesEstimate += Math.max(0, entry.stats.active_segment_bytes_estimate);
79
+ activeRows += Math.max(0, entry.stats.active_rows);
80
+ }
81
+ return {
82
+ active_builds: activeBuilds,
83
+ active_streams: activeStreams,
84
+ active_payload_bytes: activePayloadBytes,
85
+ active_segment_bytes_estimate: activeSegmentBytesEstimate,
86
+ active_rows: activeRows,
87
+ };
88
+ }
89
+
90
+ private spawnWorker(idx: number): void {
91
+ const workerSpec = resolveWorkerModuleUrl(import.meta.url, "./segmenter_worker.ts", "../segment/segmenter_worker.js");
92
+ const worker = new Worker(workerSpec, {
93
+ workerData: {
94
+ config: this.config,
95
+ hostRuntime: detectHostRuntime(),
96
+ opts: this.opts,
97
+ },
98
+ type: "module",
99
+ smol: true,
100
+ } as any);
101
+
102
+ worker.on("message", (msg: WorkerMessage) => {
103
+ if (msg?.type === "sealed") {
104
+ this.hooks?.onSegmentSealed?.(msg.stream, msg.payloadBytes, msg.segmentBytes);
105
+ } else if (msg?.type === "memory") {
106
+ this.workerMemory.set(msg.workerId, {
107
+ stats: msg.stats,
108
+ reportedAtMs: Date.now(),
109
+ });
110
+ }
111
+ });
112
+
113
+ worker.on("error", (err) => {
114
+ // eslint-disable-next-line no-console
115
+ console.error(`segmenter worker ${idx} error`, err);
116
+ });
117
+
118
+ worker.on("exit", (code) => {
119
+ this.workerMemory.delete(worker.threadId);
120
+ if (!this.started) return;
121
+ if (code !== 0) {
122
+ // eslint-disable-next-line no-console
123
+ console.error(`segmenter worker ${idx} exited with code ${code}, respawning`);
124
+ this.spawnWorker(idx);
125
+ }
126
+ });
127
+
128
+ this.workers.push(worker);
129
+ }
130
+ }