lakesync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +74 -0
  2. package/dist/adapter.d.ts +369 -0
  3. package/dist/adapter.js +39 -0
  4. package/dist/adapter.js.map +1 -0
  5. package/dist/analyst.d.ts +268 -0
  6. package/dist/analyst.js +495 -0
  7. package/dist/analyst.js.map +1 -0
  8. package/dist/auth-CAVutXzx.d.ts +30 -0
  9. package/dist/base-poller-Qo_SmCZs.d.ts +82 -0
  10. package/dist/catalogue.d.ts +65 -0
  11. package/dist/catalogue.js +17 -0
  12. package/dist/catalogue.js.map +1 -0
  13. package/dist/chunk-4ARO6KTJ.js +257 -0
  14. package/dist/chunk-4ARO6KTJ.js.map +1 -0
  15. package/dist/chunk-5YOFCJQ7.js +1115 -0
  16. package/dist/chunk-5YOFCJQ7.js.map +1 -0
  17. package/dist/chunk-7D4SUZUM.js +38 -0
  18. package/dist/chunk-7D4SUZUM.js.map +1 -0
  19. package/dist/chunk-BNJOGBYK.js +335 -0
  20. package/dist/chunk-BNJOGBYK.js.map +1 -0
  21. package/dist/chunk-ICNT7I3K.js +1180 -0
  22. package/dist/chunk-ICNT7I3K.js.map +1 -0
  23. package/dist/chunk-P5DRFKIT.js +413 -0
  24. package/dist/chunk-P5DRFKIT.js.map +1 -0
  25. package/dist/chunk-X3RO5SYJ.js +880 -0
  26. package/dist/chunk-X3RO5SYJ.js.map +1 -0
  27. package/dist/client.d.ts +428 -0
  28. package/dist/client.js +2048 -0
  29. package/dist/client.js.map +1 -0
  30. package/dist/compactor.d.ts +342 -0
  31. package/dist/compactor.js +793 -0
  32. package/dist/compactor.js.map +1 -0
  33. package/dist/coordinator-CxckTzYW.d.ts +396 -0
  34. package/dist/db-types-BR6Kt4uf.d.ts +29 -0
  35. package/dist/gateway-D5SaaMvT.d.ts +337 -0
  36. package/dist/gateway-server.d.ts +306 -0
  37. package/dist/gateway-server.js +4663 -0
  38. package/dist/gateway-server.js.map +1 -0
  39. package/dist/gateway.d.ts +196 -0
  40. package/dist/gateway.js +79 -0
  41. package/dist/gateway.js.map +1 -0
  42. package/dist/hlc-DiD8QNG3.d.ts +70 -0
  43. package/dist/index.d.ts +245 -0
  44. package/dist/index.js +102 -0
  45. package/dist/index.js.map +1 -0
  46. package/dist/json-dYtqiL0F.d.ts +18 -0
  47. package/dist/nessie-client-DrNikVXy.d.ts +160 -0
  48. package/dist/parquet.d.ts +78 -0
  49. package/dist/parquet.js +15 -0
  50. package/dist/parquet.js.map +1 -0
  51. package/dist/proto.d.ts +434 -0
  52. package/dist/proto.js +67 -0
  53. package/dist/proto.js.map +1 -0
  54. package/dist/react.d.ts +147 -0
  55. package/dist/react.js +224 -0
  56. package/dist/react.js.map +1 -0
  57. package/dist/resolver-C3Wphi6O.d.ts +10 -0
  58. package/dist/result-CojzlFE2.d.ts +64 -0
  59. package/dist/src-QU2YLPZY.js +383 -0
  60. package/dist/src-QU2YLPZY.js.map +1 -0
  61. package/dist/src-WYBF5LOI.js +102 -0
  62. package/dist/src-WYBF5LOI.js.map +1 -0
  63. package/dist/src-WZNPHANQ.js +426 -0
  64. package/dist/src-WZNPHANQ.js.map +1 -0
  65. package/dist/types-Bs-QyOe-.d.ts +143 -0
  66. package/dist/types-DAQL_vU_.d.ts +118 -0
  67. package/dist/types-DSC_EiwR.d.ts +45 -0
  68. package/dist/types-V_jVu2sA.d.ts +73 -0
  69. package/package.json +119 -0
@@ -0,0 +1,793 @@
1
+ import {
2
+ encodeSyncResponse
3
+ } from "./chunk-BNJOGBYK.js";
4
+ import {
5
+ readParquetToDeltas,
6
+ writeDeltasToParquet
7
+ } from "./chunk-4ARO6KTJ.js";
8
+ import {
9
+ Err,
10
+ FlushError,
11
+ HLC,
12
+ LakeSyncError,
13
+ Ok,
14
+ rowKey
15
+ } from "./chunk-ICNT7I3K.js";
16
+ import "./chunk-7D4SUZUM.js";
17
+
18
+ // ../compactor/src/checkpoint-generator.ts
19
+ var DEFAULT_CHECKPOINT_CONFIG = {
20
+ chunkBytes: 16 * 1024 * 1024
21
+ };
22
+ var ESTIMATED_BASE_BYTES = 200;
23
+ var ESTIMATED_BYTES_PER_COLUMN = 50;
24
+ var CheckpointGenerator = class {
25
+ adapter;
26
+ gatewayId;
27
+ config;
28
+ constructor(adapter, _schema, gatewayId, config) {
29
+ this.adapter = adapter;
30
+ this.gatewayId = gatewayId;
31
+ this.config = config ?? DEFAULT_CHECKPOINT_CONFIG;
32
+ }
33
+ /**
34
+ * Generate checkpoint chunks from base Parquet files.
35
+ *
36
+ * Reads each base file sequentially, accumulates deltas, and flushes
37
+ * chunks when the estimated byte size exceeds the configured threshold.
38
+ *
39
+ * @param baseFileKeys - Storage keys of the base Parquet files
40
+ * @param snapshotHlc - The HLC timestamp representing this snapshot point
41
+ * @returns A Result containing the CheckpointResult, or a LakeSyncError on failure
42
+ */
43
+ async generate(baseFileKeys, snapshotHlc) {
44
+ if (baseFileKeys.length === 0) {
45
+ return Ok({ chunksWritten: 0, bytesWritten: 0, snapshotHlc });
46
+ }
47
+ const prefix = `checkpoints/${this.gatewayId}`;
48
+ const chunkNames = [];
49
+ let totalBytesWritten = 0;
50
+ let totalDeltas = 0;
51
+ let accumulator = [];
52
+ let accumulatedBytes = 0;
53
+ for (const key of baseFileKeys) {
54
+ const getResult = await this.adapter.getObject(key);
55
+ if (!getResult.ok) {
56
+ return Err(
57
+ new LakeSyncError(
58
+ `Failed to read base file: ${key}`,
59
+ "CHECKPOINT_READ_ERROR",
60
+ getResult.error
61
+ )
62
+ );
63
+ }
64
+ const parseResult = await readParquetToDeltas(getResult.value);
65
+ if (!parseResult.ok) {
66
+ return Err(
67
+ new LakeSyncError(
68
+ `Failed to parse base file: ${key}`,
69
+ "CHECKPOINT_PARSE_ERROR",
70
+ parseResult.error
71
+ )
72
+ );
73
+ }
74
+ for (const delta of parseResult.value) {
75
+ accumulator.push(delta);
76
+ accumulatedBytes += ESTIMATED_BASE_BYTES + delta.columns.length * ESTIMATED_BYTES_PER_COLUMN;
77
+ if (accumulatedBytes >= this.config.chunkBytes) {
78
+ const flushResult = await this.flushChunk(
79
+ prefix,
80
+ chunkNames.length,
81
+ accumulator,
82
+ snapshotHlc
83
+ );
84
+ if (!flushResult.ok) return flushResult;
85
+ totalBytesWritten += flushResult.value;
86
+ totalDeltas += accumulator.length;
87
+ chunkNames.push(this.chunkFileName(chunkNames.length));
88
+ accumulator = [];
89
+ accumulatedBytes = 0;
90
+ }
91
+ }
92
+ }
93
+ if (accumulator.length > 0) {
94
+ const flushResult = await this.flushChunk(
95
+ prefix,
96
+ chunkNames.length,
97
+ accumulator,
98
+ snapshotHlc
99
+ );
100
+ if (!flushResult.ok) return flushResult;
101
+ totalBytesWritten += flushResult.value;
102
+ totalDeltas += accumulator.length;
103
+ chunkNames.push(this.chunkFileName(chunkNames.length));
104
+ }
105
+ const manifest = {
106
+ snapshotHlc: snapshotHlc.toString(),
107
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
108
+ chunkCount: chunkNames.length,
109
+ totalDeltas,
110
+ chunks: chunkNames
111
+ };
112
+ const manifestBytes = new TextEncoder().encode(JSON.stringify(manifest));
113
+ const manifestResult = await this.adapter.putObject(
114
+ `${prefix}/manifest.json`,
115
+ manifestBytes,
116
+ "application/json"
117
+ );
118
+ if (!manifestResult.ok) {
119
+ return Err(
120
+ new LakeSyncError(
121
+ "Failed to write checkpoint manifest",
122
+ "CHECKPOINT_WRITE_ERROR",
123
+ manifestResult.error
124
+ )
125
+ );
126
+ }
127
+ totalBytesWritten += manifestBytes.byteLength;
128
+ return Ok({
129
+ chunksWritten: chunkNames.length,
130
+ bytesWritten: totalBytesWritten,
131
+ snapshotHlc
132
+ });
133
+ }
134
+ /**
135
+ * Get all storage keys produced by a checkpoint generation.
136
+ * Useful for adding to activeKeys in maintenance to prevent orphan removal.
137
+ */
138
+ getCheckpointKeys(chunkCount) {
139
+ const prefix = `checkpoints/${this.gatewayId}`;
140
+ const keys = [`${prefix}/manifest.json`];
141
+ for (let i = 0; i < chunkCount; i++) {
142
+ keys.push(`${prefix}/${this.chunkFileName(i)}`);
143
+ }
144
+ return keys;
145
+ }
146
+ chunkFileName(index) {
147
+ return `chunk-${String(index).padStart(3, "0")}.bin`;
148
+ }
149
+ async flushChunk(prefix, index, deltas, snapshotHlc) {
150
+ const encodeResult = encodeSyncResponse({
151
+ deltas,
152
+ serverHlc: snapshotHlc,
153
+ hasMore: false
154
+ });
155
+ if (!encodeResult.ok) {
156
+ return Err(
157
+ new LakeSyncError(
158
+ `Failed to encode checkpoint chunk ${index}`,
159
+ "CHECKPOINT_ENCODE_ERROR",
160
+ encodeResult.error
161
+ )
162
+ );
163
+ }
164
+ const data = encodeResult.value;
165
+ const chunkKey = `${prefix}/${this.chunkFileName(index)}`;
166
+ const putResult = await this.adapter.putObject(chunkKey, data, "application/octet-stream");
167
+ if (!putResult.ok) {
168
+ return Err(
169
+ new LakeSyncError(
170
+ `Failed to write checkpoint chunk: ${chunkKey}`,
171
+ "CHECKPOINT_WRITE_ERROR",
172
+ putResult.error
173
+ )
174
+ );
175
+ }
176
+ return Ok(data.byteLength);
177
+ }
178
+ };
179
+
180
+ // ../compactor/src/equality-delete.ts
181
+ var EQUALITY_DELETE_SCHEMA = {
182
+ table: "_equality_delete",
183
+ columns: []
184
+ };
185
+ var SENTINEL_HLC = HLC.encode(0, 0);
186
+ async function writeEqualityDeletes(deletedRows, _schema) {
187
+ if (deletedRows.length === 0) {
188
+ return Ok(new Uint8Array(0));
189
+ }
190
+ try {
191
+ const syntheticDeltas = deletedRows.map((row, index) => ({
192
+ op: "DELETE",
193
+ table: row.table,
194
+ rowId: row.rowId,
195
+ clientId: "_compactor",
196
+ columns: [],
197
+ hlc: SENTINEL_HLC,
198
+ deltaId: `eq-delete-${index}`
199
+ }));
200
+ return await writeDeltasToParquet(syntheticDeltas, EQUALITY_DELETE_SCHEMA);
201
+ } catch (err) {
202
+ const cause = err instanceof Error ? err : new Error(String(err));
203
+ return Err(new FlushError(`Failed to write equality deletes: ${cause.message}`, cause));
204
+ }
205
+ }
206
+ async function readEqualityDeletes(data) {
207
+ if (data.byteLength === 0) {
208
+ return Ok([]);
209
+ }
210
+ const readResult = await readParquetToDeltas(data);
211
+ if (!readResult.ok) {
212
+ return Err(
213
+ new FlushError(
214
+ `Failed to read equality deletes: ${readResult.error.message}`,
215
+ readResult.error
216
+ )
217
+ );
218
+ }
219
+ const rows = readResult.value.map((delta) => ({
220
+ table: delta.table,
221
+ rowId: delta.rowId
222
+ }));
223
+ return Ok(rows);
224
+ }
225
+
226
+ // ../compactor/src/compactor.ts
227
+ var Compactor = class {
228
+ adapter;
229
+ config;
230
+ schema;
231
+ /**
232
+ * Create a new Compactor instance.
233
+ *
234
+ * @param adapter - The lake adapter for reading/writing Parquet files
235
+ * @param config - Compaction configuration (thresholds and limits)
236
+ * @param schema - The table schema describing user-defined columns
237
+ */
238
+ constructor(adapter, config, schema) {
239
+ this.adapter = adapter;
240
+ this.config = config;
241
+ this.schema = schema;
242
+ }
243
+ /**
244
+ * Compact delta files into base data files.
245
+ *
246
+ * Reads delta files from storage, resolves all deltas per row using LWW,
247
+ * and writes consolidated base files + equality delete files.
248
+ *
249
+ * @param deltaFileKeys - Storage keys of the delta Parquet files to compact
250
+ * @param outputPrefix - Prefix for the output base/delete file keys
251
+ * @returns A Result containing the CompactionResult, or a LakeSyncError on failure
252
+ */
253
+ async compact(deltaFileKeys, outputPrefix) {
254
+ if (deltaFileKeys.length < this.config.minDeltaFiles) {
255
+ return Ok({
256
+ baseFilesWritten: 0,
257
+ deleteFilesWritten: 0,
258
+ deltaFilesCompacted: 0,
259
+ bytesRead: 0,
260
+ bytesWritten: 0
261
+ });
262
+ }
263
+ const keysToCompact = deltaFileKeys.slice(0, this.config.maxDeltaFiles);
264
+ const resolveResult = await this.readAndResolveIncrementally(keysToCompact);
265
+ if (!resolveResult.ok) return resolveResult;
266
+ const { liveRows, deletedRows, bytesRead } = resolveResult.value;
267
+ const writeResult = await this.writeOutputFiles(liveRows, deletedRows, outputPrefix);
268
+ if (!writeResult.ok) return writeResult;
269
+ return Ok({
270
+ ...writeResult.value,
271
+ deltaFilesCompacted: keysToCompact.length,
272
+ bytesRead
273
+ });
274
+ }
275
+ /**
276
+ * Read delta files one at a time and incrementally resolve to final row state.
277
+ *
278
+ * Memory usage is O(unique rows x columns) rather than O(total deltas),
279
+ * since each file's deltas are processed and discarded before reading the next.
280
+ */
281
+ async readAndResolveIncrementally(keysToCompact) {
282
+ const rowStates = /* @__PURE__ */ new Map();
283
+ let bytesRead = 0;
284
+ for (const key of keysToCompact) {
285
+ const getResult = await this.adapter.getObject(key);
286
+ if (!getResult.ok) {
287
+ return Err(
288
+ new LakeSyncError(
289
+ `Failed to read delta file: ${key}`,
290
+ "COMPACTION_READ_ERROR",
291
+ getResult.error
292
+ )
293
+ );
294
+ }
295
+ const data = getResult.value;
296
+ bytesRead += data.byteLength;
297
+ const parseResult = await readParquetToDeltas(data);
298
+ if (!parseResult.ok) {
299
+ return Err(
300
+ new LakeSyncError(
301
+ `Failed to parse delta file: ${key}`,
302
+ "COMPACTION_PARSE_ERROR",
303
+ parseResult.error
304
+ )
305
+ );
306
+ }
307
+ for (const delta of parseResult.value) {
308
+ const k = rowKey(delta.table, delta.rowId);
309
+ let state = rowStates.get(k);
310
+ if (!state) {
311
+ state = {
312
+ table: delta.table,
313
+ rowId: delta.rowId,
314
+ clientId: delta.clientId,
315
+ columns: /* @__PURE__ */ new Map(),
316
+ latestHlc: 0n,
317
+ latestDeltaId: delta.deltaId,
318
+ deleteHlc: 0n
319
+ };
320
+ rowStates.set(k, state);
321
+ }
322
+ if (HLC.compare(delta.hlc, state.latestHlc) > 0) {
323
+ state.latestHlc = delta.hlc;
324
+ state.latestDeltaId = delta.deltaId;
325
+ state.clientId = delta.clientId;
326
+ }
327
+ if (delta.op === "DELETE") {
328
+ if (HLC.compare(delta.hlc, state.deleteHlc) > 0) {
329
+ state.deleteHlc = delta.hlc;
330
+ }
331
+ } else {
332
+ for (const col of delta.columns) {
333
+ const existing = state.columns.get(col.column);
334
+ if (!existing || HLC.compare(delta.hlc, existing.hlc) > 0) {
335
+ state.columns.set(col.column, {
336
+ value: col.value,
337
+ hlc: delta.hlc
338
+ });
339
+ }
340
+ }
341
+ }
342
+ }
343
+ }
344
+ const liveRows = [];
345
+ const deletedRows = [];
346
+ for (const [, state] of rowStates) {
347
+ let isDeleted = state.deleteHlc > 0n;
348
+ if (isDeleted) {
349
+ for (const col of state.columns.values()) {
350
+ if (HLC.compare(state.deleteHlc, col.hlc) < 0) {
351
+ isDeleted = false;
352
+ break;
353
+ }
354
+ }
355
+ }
356
+ if (isDeleted || state.columns.size === 0) {
357
+ deletedRows.push({ table: state.table, rowId: state.rowId });
358
+ } else {
359
+ const columns = [];
360
+ for (const col of this.schema.columns) {
361
+ const colState = state.columns.get(col.name);
362
+ if (colState && (state.deleteHlc === 0n || HLC.compare(colState.hlc, state.deleteHlc) > 0)) {
363
+ columns.push({ column: col.name, value: colState.value });
364
+ }
365
+ }
366
+ liveRows.push({
367
+ op: "INSERT",
368
+ table: state.table,
369
+ rowId: state.rowId,
370
+ clientId: state.clientId,
371
+ columns,
372
+ hlc: state.latestHlc,
373
+ deltaId: state.latestDeltaId
374
+ });
375
+ }
376
+ }
377
+ return Ok({ liveRows, deletedRows, bytesRead });
378
+ }
379
+ /** Write base Parquet file(s) for live rows and equality delete file(s) for deleted rows. */
380
+ async writeOutputFiles(liveRows, deletedRows, outputPrefix) {
381
+ let bytesWritten = 0;
382
+ let baseFilesWritten = 0;
383
+ let deleteFilesWritten = 0;
384
+ if (liveRows.length > 0) {
385
+ const writeResult = await writeDeltasToParquet(liveRows, this.schema);
386
+ if (!writeResult.ok) {
387
+ return Err(
388
+ new LakeSyncError(
389
+ "Failed to write base file",
390
+ "COMPACTION_WRITE_ERROR",
391
+ writeResult.error
392
+ )
393
+ );
394
+ }
395
+ const baseData = writeResult.value;
396
+ const timestamp = this.generateTimestamp();
397
+ const basePath = `${outputPrefix}/base-${timestamp}.parquet`;
398
+ const putResult = await this.adapter.putObject(
399
+ basePath,
400
+ baseData,
401
+ "application/octet-stream"
402
+ );
403
+ if (!putResult.ok) {
404
+ return Err(
405
+ new LakeSyncError(
406
+ `Failed to store base file: ${basePath}`,
407
+ "COMPACTION_STORE_ERROR",
408
+ putResult.error
409
+ )
410
+ );
411
+ }
412
+ bytesWritten += baseData.byteLength;
413
+ baseFilesWritten = 1;
414
+ }
415
+ if (deletedRows.length > 0) {
416
+ const writeResult = await writeEqualityDeletes(deletedRows, this.schema);
417
+ if (!writeResult.ok) {
418
+ return Err(
419
+ new LakeSyncError(
420
+ "Failed to write equality delete file",
421
+ "COMPACTION_WRITE_ERROR",
422
+ writeResult.error
423
+ )
424
+ );
425
+ }
426
+ const deleteData = writeResult.value;
427
+ const timestamp = this.generateTimestamp();
428
+ const deletePath = `${outputPrefix}/delete-${timestamp}.parquet`;
429
+ const putResult = await this.adapter.putObject(
430
+ deletePath,
431
+ deleteData,
432
+ "application/octet-stream"
433
+ );
434
+ if (!putResult.ok) {
435
+ return Err(
436
+ new LakeSyncError(
437
+ `Failed to store delete file: ${deletePath}`,
438
+ "COMPACTION_STORE_ERROR",
439
+ putResult.error
440
+ )
441
+ );
442
+ }
443
+ bytesWritten += deleteData.byteLength;
444
+ deleteFilesWritten = 1;
445
+ }
446
+ return Ok({ baseFilesWritten, deleteFilesWritten, bytesWritten });
447
+ }
448
+ /**
449
+ * Generate a timestamp string for output file naming.
450
+ * Uses the current wall clock time with a random suffix for uniqueness.
451
+ */
452
+ generateTimestamp() {
453
+ const now = Date.now();
454
+ const suffix = Math.random().toString(36).slice(2, 8);
455
+ return `${now}-${suffix}`;
456
+ }
457
+ };
458
+
459
+ // ../compactor/src/maintenance.ts
460
+ var DEFAULT_MAINTENANCE_CONFIG = {
461
+ retainSnapshots: 5,
462
+ orphanAgeMs: 60 * 60 * 1e3
463
+ // 1 hour
464
+ };
465
+ var MaintenanceRunner = class {
466
+ compactor;
467
+ adapter;
468
+ config;
469
+ checkpointGenerator;
470
+ /**
471
+ * Create a new MaintenanceRunner instance.
472
+ *
473
+ * @param compactor - The compactor instance for merging delta files
474
+ * @param adapter - The lake adapter for storage operations
475
+ * @param config - Maintenance configuration (retention and age thresholds)
476
+ * @param checkpointGenerator - Optional checkpoint generator; when provided,
477
+ * checkpoints are generated after successful compaction
478
+ */
479
+ constructor(compactor, adapter, config, checkpointGenerator) {
480
+ this.compactor = compactor;
481
+ this.adapter = adapter;
482
+ this.config = config;
483
+ this.checkpointGenerator = checkpointGenerator ?? null;
484
+ }
485
+ /**
486
+ * Run the full maintenance cycle: compact, expire, and clean.
487
+ *
488
+ * Compacts delta files into base/delete files, then removes orphaned
489
+ * storage objects that are no longer referenced by any active data.
490
+ * Files younger than `orphanAgeMs` are never deleted to avoid races
491
+ * with in-progress flush operations.
492
+ *
493
+ * @param deltaFileKeys - Storage keys of the delta Parquet files to compact
494
+ * @param outputPrefix - Prefix for the output base/delete file keys
495
+ * @param storagePrefix - Prefix under which all related storage files live
496
+ * @returns A Result containing the MaintenanceReport, or a LakeSyncError on failure
497
+ */
498
+ async run(deltaFileKeys, outputPrefix, storagePrefix) {
499
+ const compactionResult = await this.compactor.compact(deltaFileKeys, outputPrefix);
500
+ if (!compactionResult.ok) {
501
+ return Err(
502
+ new LakeSyncError(
503
+ `Maintenance compaction failed: ${compactionResult.error.message}`,
504
+ "MAINTENANCE_COMPACTION_ERROR",
505
+ compactionResult.error
506
+ )
507
+ );
508
+ }
509
+ const compaction = compactionResult.value;
510
+ const activeKeys = /* @__PURE__ */ new Set();
511
+ const compactedCount = compaction.deltaFilesCompacted;
512
+ for (let i = compactedCount; i < deltaFileKeys.length; i++) {
513
+ activeKeys.add(deltaFileKeys[i]);
514
+ }
515
+ const listOutputResult = await this.adapter.listObjects(outputPrefix);
516
+ if (!listOutputResult.ok) {
517
+ return Err(
518
+ new LakeSyncError(
519
+ `Failed to list output files: ${listOutputResult.error.message}`,
520
+ "MAINTENANCE_LIST_ERROR",
521
+ listOutputResult.error
522
+ )
523
+ );
524
+ }
525
+ for (const obj of listOutputResult.value) {
526
+ activeKeys.add(obj.key);
527
+ }
528
+ let checkpoint;
529
+ if (this.checkpointGenerator && compaction.baseFilesWritten > 0) {
530
+ const baseFileKeys = listOutputResult.value.filter((obj) => obj.key.endsWith(".parquet") && obj.key.includes("/base-")).map((obj) => obj.key);
531
+ if (baseFileKeys.length > 0) {
532
+ const snapshotHlc = HLC.encode(Date.now(), 0);
533
+ const checkpointResult = await this.checkpointGenerator.generate(baseFileKeys, snapshotHlc);
534
+ if (checkpointResult.ok) {
535
+ checkpoint = checkpointResult.value;
536
+ const checkpointKeys = this.checkpointGenerator.getCheckpointKeys(
537
+ checkpoint.chunksWritten
538
+ );
539
+ for (const key of checkpointKeys) {
540
+ activeKeys.add(key);
541
+ }
542
+ }
543
+ }
544
+ }
545
+ const orphanResult = await this.removeOrphans(storagePrefix, activeKeys);
546
+ if (!orphanResult.ok) {
547
+ return Err(
548
+ new LakeSyncError(
549
+ `Maintenance orphan removal failed: ${orphanResult.error.message}`,
550
+ "MAINTENANCE_ORPHAN_ERROR",
551
+ orphanResult.error
552
+ )
553
+ );
554
+ }
555
+ return Ok({
556
+ compaction,
557
+ snapshotsExpired: 0,
558
+ orphansRemoved: orphanResult.value,
559
+ checkpoint
560
+ });
561
+ }
562
+ /**
563
+ * Delete orphaned files not referenced by any active data.
564
+ *
565
+ * Lists all files under the given storage prefix, compares each
566
+ * against the set of active keys, and deletes files that are both
567
+ * unreferenced and older than `orphanAgeMs`. This age guard
568
+ * prevents deletion of files created by in-progress flush operations.
569
+ *
570
+ * @param storagePrefix - The storage prefix to scan for orphaned files
571
+ * @param activeKeys - Set of storage keys that must be retained
572
+ * @returns A Result containing the count of deleted files, or a LakeSyncError on failure
573
+ */
574
+ async removeOrphans(storagePrefix, activeKeys) {
575
+ const listResult = await this.adapter.listObjects(storagePrefix);
576
+ if (!listResult.ok) {
577
+ return Err(
578
+ new LakeSyncError(
579
+ `Failed to list objects for orphan removal: ${listResult.error.message}`,
580
+ "MAINTENANCE_LIST_ERROR",
581
+ listResult.error
582
+ )
583
+ );
584
+ }
585
+ const now = Date.now();
586
+ const orphanKeys = this.findOrphans(listResult.value, activeKeys, now);
587
+ if (orphanKeys.length === 0) {
588
+ return Ok(0);
589
+ }
590
+ const deleteResult = await this.adapter.deleteObjects(orphanKeys);
591
+ if (!deleteResult.ok) {
592
+ return Err(
593
+ new LakeSyncError(
594
+ `Failed to delete orphaned files: ${deleteResult.error.message}`,
595
+ "MAINTENANCE_DELETE_ERROR",
596
+ deleteResult.error
597
+ )
598
+ );
599
+ }
600
+ return Ok(orphanKeys.length);
601
+ }
602
+ /**
603
+ * Identify orphaned file keys from a list of storage objects.
604
+ *
605
+ * A file is considered an orphan if it is not in the active keys set
606
+ * and its last modification time is older than the configured orphan age.
607
+ */
608
+ findOrphans(objects, activeKeys, now) {
609
+ const orphans = [];
610
+ for (const obj of objects) {
611
+ if (activeKeys.has(obj.key)) {
612
+ continue;
613
+ }
614
+ const age = now - obj.lastModified.getTime();
615
+ if (age >= this.config.orphanAgeMs) {
616
+ orphans.push(obj.key);
617
+ }
618
+ }
619
+ return orphans;
620
+ }
621
+ };
622
+
623
+ // ../compactor/src/scheduler.ts
624
+ var DEFAULT_SCHEDULER_CONFIG = {
625
+ intervalMs: 6e4,
626
+ enabled: true
627
+ };
628
+ var CompactionScheduler = class {
629
+ runner;
630
+ taskProvider;
631
+ config;
632
+ timer = null;
633
+ running = false;
634
+ inFlightPromise = null;
635
+ /**
636
+ * Create a new CompactionScheduler instance.
637
+ *
638
+ * @param runner - The maintenance runner to execute on each tick
639
+ * @param taskProvider - Function that provides maintenance task parameters for each run
640
+ * @param config - Scheduler configuration (interval and enabled flag)
641
+ */
642
+ constructor(runner, taskProvider, config = {}) {
643
+ this.runner = runner;
644
+ this.taskProvider = taskProvider;
645
+ this.config = { ...DEFAULT_SCHEDULER_CONFIG, ...config };
646
+ }
647
+ /**
648
+ * Whether the scheduler is currently active (timer is ticking).
649
+ */
650
+ get isRunning() {
651
+ return this.running;
652
+ }
653
+ /**
654
+ * Start the scheduler interval timer.
655
+ *
656
+ * Begins executing maintenance runs at the configured interval.
657
+ * If the scheduler is already running or disabled, returns an error.
658
+ *
659
+ * @returns A Result indicating success or a descriptive error
660
+ */
661
+ start() {
662
+ if (!this.config.enabled) {
663
+ return Err(new LakeSyncError("Scheduler is disabled", "SCHEDULER_DISABLED"));
664
+ }
665
+ if (this.running) {
666
+ return Err(new LakeSyncError("Scheduler is already running", "SCHEDULER_ALREADY_RUNNING"));
667
+ }
668
+ this.running = true;
669
+ this.timer = setInterval(() => {
670
+ void this.tick();
671
+ }, this.config.intervalMs);
672
+ return Ok(void 0);
673
+ }
674
+ /**
675
+ * Stop the scheduler and wait for any in-progress run to finish.
676
+ *
677
+ * Clears the interval timer and, if a maintenance run is currently
678
+ * executing, awaits its completion before returning.
679
+ *
680
+ * @returns A Result indicating success or a descriptive error
681
+ */
682
+ async stop() {
683
+ if (!this.running) {
684
+ return Err(new LakeSyncError("Scheduler is not running", "SCHEDULER_NOT_RUNNING"));
685
+ }
686
+ if (this.timer !== null) {
687
+ clearInterval(this.timer);
688
+ this.timer = null;
689
+ }
690
+ this.running = false;
691
+ if (this.inFlightPromise !== null) {
692
+ await this.inFlightPromise;
693
+ this.inFlightPromise = null;
694
+ }
695
+ return Ok(void 0);
696
+ }
697
+ /**
698
+ * Manually trigger a single maintenance run.
699
+ *
700
+ * Useful for testing or administrative purposes. If a run is already
701
+ * in progress, skips and returns an error.
702
+ *
703
+ * @returns A Result containing the MaintenanceReport, or a LakeSyncError on failure
704
+ */
705
+ async runOnce() {
706
+ if (this.inFlightPromise !== null) {
707
+ return Err(new LakeSyncError("A maintenance run is already in progress", "SCHEDULER_BUSY"));
708
+ }
709
+ return this.executeMaintenance();
710
+ }
711
+ /**
712
+ * Internal tick handler called by the interval timer.
713
+ * Skips if a previous run is still in progress.
714
+ */
715
+ async tick() {
716
+ if (this.inFlightPromise !== null) {
717
+ return;
718
+ }
719
+ await this.executeMaintenance();
720
+ }
721
+ /**
722
+ * Execute a single maintenance cycle.
723
+ *
724
+ * Calls the task provider to get parameters, then runs the maintenance
725
+ * runner. Tracks the in-flight promise so concurrent runs are prevented.
726
+ */
727
+ async executeMaintenance() {
728
+ const taskResult = await this.resolveTask();
729
+ if (!taskResult.ok) {
730
+ return taskResult;
731
+ }
732
+ const task = taskResult.value;
733
+ if (task === null) {
734
+ return Ok({
735
+ compaction: {
736
+ baseFilesWritten: 0,
737
+ deleteFilesWritten: 0,
738
+ deltaFilesCompacted: 0,
739
+ bytesRead: 0,
740
+ bytesWritten: 0
741
+ },
742
+ snapshotsExpired: 0,
743
+ orphansRemoved: 0
744
+ });
745
+ }
746
+ const promise = this.runner.run(task.deltaFileKeys, task.outputPrefix, task.storagePrefix);
747
+ this.inFlightPromise = promise;
748
+ try {
749
+ const result = await promise;
750
+ return result;
751
+ } finally {
752
+ this.inFlightPromise = null;
753
+ }
754
+ }
755
+ /**
756
+ * Resolve the maintenance task from the provider, wrapping any thrown
757
+ * exceptions into a Result error.
758
+ */
759
+ async resolveTask() {
760
+ try {
761
+ const task = await this.taskProvider();
762
+ return Ok(task);
763
+ } catch (error) {
764
+ return Err(
765
+ new LakeSyncError(
766
+ `Task provider failed: ${error instanceof Error ? error.message : String(error)}`,
767
+ "SCHEDULER_TASK_PROVIDER_ERROR"
768
+ )
769
+ );
770
+ }
771
+ }
772
+ };
773
+
774
+ // ../compactor/src/types.ts
775
+ var DEFAULT_COMPACTION_CONFIG = {
776
+ minDeltaFiles: 10,
777
+ maxDeltaFiles: 20,
778
+ targetFileSizeBytes: 128 * 1024 * 1024
779
+ // 128 MB
780
+ };
781
+ export {
782
+ CheckpointGenerator,
783
+ CompactionScheduler,
784
+ Compactor,
785
+ DEFAULT_CHECKPOINT_CONFIG,
786
+ DEFAULT_COMPACTION_CONFIG,
787
+ DEFAULT_MAINTENANCE_CONFIG,
788
+ DEFAULT_SCHEDULER_CONFIG,
789
+ MaintenanceRunner,
790
+ readEqualityDeletes,
791
+ writeEqualityDeletes
792
+ };
793
+ //# sourceMappingURL=compactor.js.map