lakesync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +74 -0
  2. package/dist/adapter.d.ts +369 -0
  3. package/dist/adapter.js +39 -0
  4. package/dist/adapter.js.map +1 -0
  5. package/dist/analyst.d.ts +268 -0
  6. package/dist/analyst.js +495 -0
  7. package/dist/analyst.js.map +1 -0
  8. package/dist/auth-CAVutXzx.d.ts +30 -0
  9. package/dist/base-poller-Qo_SmCZs.d.ts +82 -0
  10. package/dist/catalogue.d.ts +65 -0
  11. package/dist/catalogue.js +17 -0
  12. package/dist/catalogue.js.map +1 -0
  13. package/dist/chunk-4ARO6KTJ.js +257 -0
  14. package/dist/chunk-4ARO6KTJ.js.map +1 -0
  15. package/dist/chunk-5YOFCJQ7.js +1115 -0
  16. package/dist/chunk-5YOFCJQ7.js.map +1 -0
  17. package/dist/chunk-7D4SUZUM.js +38 -0
  18. package/dist/chunk-7D4SUZUM.js.map +1 -0
  19. package/dist/chunk-BNJOGBYK.js +335 -0
  20. package/dist/chunk-BNJOGBYK.js.map +1 -0
  21. package/dist/chunk-ICNT7I3K.js +1180 -0
  22. package/dist/chunk-ICNT7I3K.js.map +1 -0
  23. package/dist/chunk-P5DRFKIT.js +413 -0
  24. package/dist/chunk-P5DRFKIT.js.map +1 -0
  25. package/dist/chunk-X3RO5SYJ.js +880 -0
  26. package/dist/chunk-X3RO5SYJ.js.map +1 -0
  27. package/dist/client.d.ts +428 -0
  28. package/dist/client.js +2048 -0
  29. package/dist/client.js.map +1 -0
  30. package/dist/compactor.d.ts +342 -0
  31. package/dist/compactor.js +793 -0
  32. package/dist/compactor.js.map +1 -0
  33. package/dist/coordinator-CxckTzYW.d.ts +396 -0
  34. package/dist/db-types-BR6Kt4uf.d.ts +29 -0
  35. package/dist/gateway-D5SaaMvT.d.ts +337 -0
  36. package/dist/gateway-server.d.ts +306 -0
  37. package/dist/gateway-server.js +4663 -0
  38. package/dist/gateway-server.js.map +1 -0
  39. package/dist/gateway.d.ts +196 -0
  40. package/dist/gateway.js +79 -0
  41. package/dist/gateway.js.map +1 -0
  42. package/dist/hlc-DiD8QNG3.d.ts +70 -0
  43. package/dist/index.d.ts +245 -0
  44. package/dist/index.js +102 -0
  45. package/dist/index.js.map +1 -0
  46. package/dist/json-dYtqiL0F.d.ts +18 -0
  47. package/dist/nessie-client-DrNikVXy.d.ts +160 -0
  48. package/dist/parquet.d.ts +78 -0
  49. package/dist/parquet.js +15 -0
  50. package/dist/parquet.js.map +1 -0
  51. package/dist/proto.d.ts +434 -0
  52. package/dist/proto.js +67 -0
  53. package/dist/proto.js.map +1 -0
  54. package/dist/react.d.ts +147 -0
  55. package/dist/react.js +224 -0
  56. package/dist/react.js.map +1 -0
  57. package/dist/resolver-C3Wphi6O.d.ts +10 -0
  58. package/dist/result-CojzlFE2.d.ts +64 -0
  59. package/dist/src-QU2YLPZY.js +383 -0
  60. package/dist/src-QU2YLPZY.js.map +1 -0
  61. package/dist/src-WYBF5LOI.js +102 -0
  62. package/dist/src-WYBF5LOI.js.map +1 -0
  63. package/dist/src-WZNPHANQ.js +426 -0
  64. package/dist/src-WZNPHANQ.js.map +1 -0
  65. package/dist/types-Bs-QyOe-.d.ts +143 -0
  66. package/dist/types-DAQL_vU_.d.ts +118 -0
  67. package/dist/types-DSC_EiwR.d.ts +45 -0
  68. package/dist/types-V_jVu2sA.d.ts +73 -0
  69. package/package.json +119 -0
@@ -0,0 +1,268 @@
1
+ import { R as Result, L as LakeSyncError, H as HLCTimestamp } from './result-CojzlFE2.js';
2
+
3
+ /**
4
+ * Configuration options for the DuckDB-Wasm client.
5
+ */
6
+ interface DuckDBClientConfig {
7
+ /** Whether to enable console logging from DuckDB. Defaults to false. */
8
+ logger?: boolean;
9
+ /** Maximum number of threads for DuckDB. Defaults to 1. */
10
+ threads?: number;
11
+ }
12
+ /**
13
+ * Wrapper around DuckDB-Wasm that provides a simplified, Result-based API
14
+ * for executing SQL queries and registering Parquet data sources.
15
+ *
16
+ * Works in both Node.js/Bun (using the blocking bindings) and browser
17
+ * environments (using the async worker-based bindings).
18
+ *
19
+ * @example
20
+ * ```ts
21
+ * const client = new DuckDBClient({ logger: false });
22
+ * const initResult = await client.init();
23
+ * if (!initResult.ok) { console.error(initResult.error); return; }
24
+ *
25
+ * const result = await client.query<{ answer: number }>("SELECT 42 AS answer");
26
+ * if (result.ok) console.log(result.value); // [{ answer: 42 }]
27
+ *
28
+ * await client.close();
29
+ * ```
30
+ */
31
+ declare class DuckDBClient {
32
+ private readonly _config;
33
+ private _db;
34
+ private _conn;
35
+ private _closed;
36
+ constructor(config?: DuckDBClientConfig);
37
+ /**
38
+ * Initialise the DuckDB-Wasm instance and open a connection.
39
+ *
40
+ * Uses the blocking Node.js bindings when running in Node/Bun,
41
+ * which avoids the need for Worker threads.
42
+ *
43
+ * @returns A Result indicating success or failure with a LakeSyncError
44
+ */
45
+ init(): Promise<Result<void, LakeSyncError>>;
46
+ /**
47
+ * Execute a SQL query and return the results as an array of objects.
48
+ *
49
+ * @param sql - The SQL statement to execute
50
+ * @param _params - Reserved for future use (parameterised queries)
51
+ * @returns A Result containing the query results or a LakeSyncError
52
+ */
53
+ query<T>(sql: string, _params?: unknown[]): Promise<Result<T[], LakeSyncError>>;
54
+ /**
55
+ * Register an in-memory Parquet file as a named table that can be
56
+ * queried using `SELECT * FROM '<name>'`.
57
+ *
58
+ * @param name - The virtual file name (e.g. "deltas.parquet")
59
+ * @param data - The Parquet file contents as a Uint8Array
60
+ * @returns A Result indicating success or failure
61
+ */
62
+ registerParquetBuffer(name: string, data: Uint8Array): Promise<Result<void, LakeSyncError>>;
63
+ /**
64
+ * Register a remote Parquet file by URL so it can be queried using
65
+ * `SELECT * FROM '<name>'`.
66
+ *
67
+ * @param name - The virtual file name (e.g. "remote.parquet")
68
+ * @param url - The URL pointing to the Parquet file
69
+ * @returns A Result indicating success or failure
70
+ */
71
+ registerParquetUrl(name: string, url: string): Promise<Result<void, LakeSyncError>>;
72
+ /**
73
+ * Tear down the DuckDB connection and database instance.
74
+ *
75
+ * After calling close(), any subsequent query or registration calls
76
+ * will return an error Result.
77
+ */
78
+ close(): Promise<void>;
79
+ }
80
+
81
+ /**
82
+ * Configuration for the TimeTraveller.
83
+ */
84
+ interface TimeTravelConfig {
85
+ /** The DuckDB client used for executing time-travel queries. */
86
+ duckdb: DuckDBClient;
87
+ }
88
+ /**
89
+ * Provides time-travel query capabilities over delta Parquet files.
90
+ *
91
+ * Allows querying the materialised state of data as it existed at a specific
92
+ * HLC timestamp, or inspecting raw deltas within a time range. Uses DuckDB
93
+ * SQL with window functions to perform column-level LWW materialisation
94
+ * entirely in-engine.
95
+ *
96
+ * Delta Parquet files contain flattened rows with system columns (`op`, `table`,
97
+ * `rowId`, `clientId`, `hlc`, `deltaId`) and user-defined columns (e.g. `title`,
98
+ * `completed`). The materialisation reconstructs per-row state by selecting the
99
+ * latest value for each column based on HLC ordering, then excluding deleted rows.
100
+ *
101
+ * @example
102
+ * ```ts
103
+ * const traveller = new TimeTraveller({ duckdb: client });
104
+ * await traveller.registerDeltas([{ name: "batch-1.parquet", data: bytes }]);
105
+ *
106
+ * const result = await traveller.queryAsOf(hlcTimestamp, "SELECT * FROM _state WHERE completed = true");
107
+ * if (result.ok) console.log(result.value);
108
+ * ```
109
+ */
110
+ declare class TimeTraveller {
111
+ private readonly _config;
112
+ private readonly _sources;
113
+ constructor(config: TimeTravelConfig);
114
+ /**
115
+ * Register one or more Parquet buffers containing delta data.
116
+ *
117
+ * Each buffer is registered with DuckDB and can subsequently be
118
+ * queried via the time-travel methods.
119
+ *
120
+ * @param parquetBuffers - Array of named Parquet file buffers to register
121
+ * @returns A Result indicating success or a LakeSyncError on failure
122
+ */
123
+ registerDeltas(parquetBuffers: Array<{
124
+ name: string;
125
+ data: Uint8Array;
126
+ }>): Promise<Result<void, LakeSyncError>>;
127
+ /**
128
+ * Query the materialised state as of the given HLC timestamp.
129
+ *
130
+ * Filters all deltas where `hlc <= asOfHlc`, then materialises the latest
131
+ * state per (table, rowId) using column-level LWW (highest HLC wins per
132
+ * column). The user's SQL is applied on top of the materialised view,
133
+ * which is exposed as the CTE `_state`.
134
+ *
135
+ * Deleted rows (where the latest operation is DELETE) are excluded from
136
+ * the materialised view.
137
+ *
138
+ * @param asOfHlc - The HLC timestamp representing the point in time to query
139
+ * @param sql - SQL to apply on the materialised view (use `_state` as the table name)
140
+ * @returns A Result containing the query results or a LakeSyncError
141
+ */
142
+ queryAsOf(asOfHlc: HLCTimestamp, sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
143
+ /**
144
+ * Query raw deltas within a time range.
145
+ *
146
+ * Filters deltas where `fromHlc < hlc <= toHlc` and returns them as raw
147
+ * (unmaterialised) rows. Useful for audit trails and changelog views.
148
+ *
149
+ * The user's SQL is applied on top of the filtered deltas, which are
150
+ * exposed as the CTE `_deltas`.
151
+ *
152
+ * @param fromHlc - The exclusive lower bound HLC timestamp
153
+ * @param toHlc - The inclusive upper bound HLC timestamp
154
+ * @param sql - SQL to apply on the filtered deltas (use `_deltas` as the table name)
155
+ * @returns A Result containing the query results or a LakeSyncError
156
+ */
157
+ queryBetween(fromHlc: HLCTimestamp, toHlc: HLCTimestamp, sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
158
+ /**
159
+ * Materialise the full state at a point in time, returning all rows.
160
+ *
161
+ * Equivalent to `queryAsOf(asOfHlc, "SELECT * FROM _state")` but provided
162
+ * as a convenience method.
163
+ *
164
+ * @param asOfHlc - The HLC timestamp representing the point in time to materialise
165
+ * @returns A Result containing all materialised rows or a LakeSyncError
166
+ */
167
+ materialiseAsOf(asOfHlc: HLCTimestamp): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
168
+ /**
169
+ * Build a UNION ALL SQL expression covering all registered Parquet sources.
170
+ */
171
+ private _buildUnionSql;
172
+ /**
173
+ * Discover user-defined column names from the registered Parquet data.
174
+ *
175
+ * Reads the column names from the first registered source and filters
176
+ * out system columns to identify user-defined columns.
177
+ */
178
+ private _discoverUserColumns;
179
+ /**
180
+ * Build the materialisation SQL that reconstructs per-row state using
181
+ * column-level LWW semantics.
182
+ *
183
+ * Strategy:
184
+ * 1. Filter deltas by HLC <= asOfHlc
185
+ * 2. For each (table, rowId), determine the latest operation (by max HLC)
186
+ * 3. For each user column in each row, pick the value from the delta with
187
+ * the highest HLC (where that column is not null)
188
+ * 4. Exclude rows where the latest operation is DELETE
189
+ *
190
+ * @param userColumns - Names of user-defined columns
191
+ * @param asOfHlc - The HLC timestamp cutoff as a bigint
192
+ * @returns SQL string producing the materialised view
193
+ */
194
+ private _buildMaterialiseSql;
195
+ }
196
+
197
+ /**
198
+ * Configuration for the UnionReader.
199
+ */
200
+ interface UnionReadConfig {
201
+ /** The DuckDB client used to query cold (Parquet) data. */
202
+ duckdb: DuckDBClient;
203
+ /** The logical table name being queried. */
204
+ tableName: string;
205
+ }
206
+ /**
207
+ * Merges "hot" in-memory rows with "cold" Parquet data via DuckDB.
208
+ *
209
+ * Cold data is registered as Parquet file buffers in DuckDB. Hot data is
210
+ * serialised to JSON and loaded via `read_json_auto`. The two sources are
211
+ * combined with `UNION ALL` and the caller's SQL is applied on top.
212
+ *
213
+ * The union is exposed as a CTE named `_union`, so the caller's SQL should
214
+ * reference `_union` as the table name.
215
+ *
216
+ * @example
217
+ * ```ts
218
+ * const reader = new UnionReader({ duckdb: client, tableName: "todos" });
219
+ * await reader.registerColdData([{ name: "batch-1.parquet", data: parquetBytes }]);
220
+ *
221
+ * const result = await reader.query(
222
+ * "SELECT * FROM _union WHERE completed = true",
223
+ * [{ id: "row-3", title: "New task", completed: false }],
224
+ * );
225
+ * ```
226
+ */
227
+ declare class UnionReader {
228
+ private readonly _config;
229
+ private readonly _coldSources;
230
+ private _hotCounter;
231
+ constructor(config: UnionReadConfig);
232
+ /**
233
+ * Register one or more Parquet buffers as cold data sources.
234
+ *
235
+ * Each buffer is registered with DuckDB and can subsequently be
236
+ * queried alongside hot data via {@link query}.
237
+ *
238
+ * @param parquetBuffers - Array of named Parquet file buffers to register
239
+ * @returns A Result indicating success or a LakeSyncError on failure
240
+ */
241
+ registerColdData(parquetBuffers: Array<{
242
+ name: string;
243
+ data: Uint8Array;
244
+ }>): Promise<Result<void, LakeSyncError>>;
245
+ /**
246
+ * Execute a SQL query that unions hot in-memory rows with cold Parquet data.
247
+ *
248
+ * The caller's SQL is wrapped around a UNION ALL of cold and hot sources.
249
+ * The unioned data is available as `_union` in the SQL statement.
250
+ *
251
+ * If `hotRows` is empty or not provided, only cold data is queried.
252
+ * If no cold sources are registered and `hotRows` is provided, only hot data is queried.
253
+ *
254
+ * @param sql - SQL to apply on top of the unioned data (use `_union` as the table name)
255
+ * @param hotRows - Optional array of in-memory row objects to include in the union
256
+ * @returns A Result containing the query results or a LakeSyncError
257
+ */
258
+ query(sql: string, hotRows?: Record<string, unknown>[]): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
259
+ /**
260
+ * Query only cold (Parquet) data without any hot rows.
261
+ *
262
+ * @param sql - SQL to execute against cold data (use `_union` as the table name)
263
+ * @returns A Result containing the query results or a LakeSyncError
264
+ */
265
+ queryColdOnly(sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
266
+ }
267
+
268
+ export { DuckDBClient, type DuckDBClientConfig, type TimeTravelConfig, TimeTraveller, type UnionReadConfig, UnionReader };