lakesync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/dist/adapter.d.ts +369 -0
- package/dist/adapter.js +39 -0
- package/dist/adapter.js.map +1 -0
- package/dist/analyst.d.ts +268 -0
- package/dist/analyst.js +495 -0
- package/dist/analyst.js.map +1 -0
- package/dist/auth-CAVutXzx.d.ts +30 -0
- package/dist/base-poller-Qo_SmCZs.d.ts +82 -0
- package/dist/catalogue.d.ts +65 -0
- package/dist/catalogue.js +17 -0
- package/dist/catalogue.js.map +1 -0
- package/dist/chunk-4ARO6KTJ.js +257 -0
- package/dist/chunk-4ARO6KTJ.js.map +1 -0
- package/dist/chunk-5YOFCJQ7.js +1115 -0
- package/dist/chunk-5YOFCJQ7.js.map +1 -0
- package/dist/chunk-7D4SUZUM.js +38 -0
- package/dist/chunk-7D4SUZUM.js.map +1 -0
- package/dist/chunk-BNJOGBYK.js +335 -0
- package/dist/chunk-BNJOGBYK.js.map +1 -0
- package/dist/chunk-ICNT7I3K.js +1180 -0
- package/dist/chunk-ICNT7I3K.js.map +1 -0
- package/dist/chunk-P5DRFKIT.js +413 -0
- package/dist/chunk-P5DRFKIT.js.map +1 -0
- package/dist/chunk-X3RO5SYJ.js +880 -0
- package/dist/chunk-X3RO5SYJ.js.map +1 -0
- package/dist/client.d.ts +428 -0
- package/dist/client.js +2048 -0
- package/dist/client.js.map +1 -0
- package/dist/compactor.d.ts +342 -0
- package/dist/compactor.js +793 -0
- package/dist/compactor.js.map +1 -0
- package/dist/coordinator-CxckTzYW.d.ts +396 -0
- package/dist/db-types-BR6Kt4uf.d.ts +29 -0
- package/dist/gateway-D5SaaMvT.d.ts +337 -0
- package/dist/gateway-server.d.ts +306 -0
- package/dist/gateway-server.js +4663 -0
- package/dist/gateway-server.js.map +1 -0
- package/dist/gateway.d.ts +196 -0
- package/dist/gateway.js +79 -0
- package/dist/gateway.js.map +1 -0
- package/dist/hlc-DiD8QNG3.d.ts +70 -0
- package/dist/index.d.ts +245 -0
- package/dist/index.js +102 -0
- package/dist/index.js.map +1 -0
- package/dist/json-dYtqiL0F.d.ts +18 -0
- package/dist/nessie-client-DrNikVXy.d.ts +160 -0
- package/dist/parquet.d.ts +78 -0
- package/dist/parquet.js +15 -0
- package/dist/parquet.js.map +1 -0
- package/dist/proto.d.ts +434 -0
- package/dist/proto.js +67 -0
- package/dist/proto.js.map +1 -0
- package/dist/react.d.ts +147 -0
- package/dist/react.js +224 -0
- package/dist/react.js.map +1 -0
- package/dist/resolver-C3Wphi6O.d.ts +10 -0
- package/dist/result-CojzlFE2.d.ts +64 -0
- package/dist/src-QU2YLPZY.js +383 -0
- package/dist/src-QU2YLPZY.js.map +1 -0
- package/dist/src-WYBF5LOI.js +102 -0
- package/dist/src-WYBF5LOI.js.map +1 -0
- package/dist/src-WZNPHANQ.js +426 -0
- package/dist/src-WZNPHANQ.js.map +1 -0
- package/dist/types-Bs-QyOe-.d.ts +143 -0
- package/dist/types-DAQL_vU_.d.ts +118 -0
- package/dist/types-DSC_EiwR.d.ts +45 -0
- package/dist/types-V_jVu2sA.d.ts +73 -0
- package/package.json +119 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { R as Result, L as LakeSyncError, H as HLCTimestamp } from './result-CojzlFE2.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration options for the DuckDB-Wasm client.
|
|
5
|
+
*/
|
|
6
|
+
interface DuckDBClientConfig {
|
|
7
|
+
/** Whether to enable console logging from DuckDB. Defaults to false. */
|
|
8
|
+
logger?: boolean;
|
|
9
|
+
/** Maximum number of threads for DuckDB. Defaults to 1. */
|
|
10
|
+
threads?: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Wrapper around DuckDB-Wasm that provides a simplified, Result-based API
|
|
14
|
+
* for executing SQL queries and registering Parquet data sources.
|
|
15
|
+
*
|
|
16
|
+
* Works in both Node.js/Bun (using the blocking bindings) and browser
|
|
17
|
+
* environments (using the async worker-based bindings).
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```ts
|
|
21
|
+
* const client = new DuckDBClient({ logger: false });
|
|
22
|
+
* const initResult = await client.init();
|
|
23
|
+
* if (!initResult.ok) { console.error(initResult.error); return; }
|
|
24
|
+
*
|
|
25
|
+
* const result = await client.query<{ answer: number }>("SELECT 42 AS answer");
|
|
26
|
+
* if (result.ok) console.log(result.value); // [{ answer: 42 }]
|
|
27
|
+
*
|
|
28
|
+
* await client.close();
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
declare class DuckDBClient {
|
|
32
|
+
private readonly _config;
|
|
33
|
+
private _db;
|
|
34
|
+
private _conn;
|
|
35
|
+
private _closed;
|
|
36
|
+
constructor(config?: DuckDBClientConfig);
|
|
37
|
+
/**
|
|
38
|
+
* Initialise the DuckDB-Wasm instance and open a connection.
|
|
39
|
+
*
|
|
40
|
+
* Uses the blocking Node.js bindings when running in Node/Bun,
|
|
41
|
+
* which avoids the need for Worker threads.
|
|
42
|
+
*
|
|
43
|
+
* @returns A Result indicating success or failure with a LakeSyncError
|
|
44
|
+
*/
|
|
45
|
+
init(): Promise<Result<void, LakeSyncError>>;
|
|
46
|
+
/**
|
|
47
|
+
* Execute a SQL query and return the results as an array of objects.
|
|
48
|
+
*
|
|
49
|
+
* @param sql - The SQL statement to execute
|
|
50
|
+
* @param _params - Reserved for future use (parameterised queries)
|
|
51
|
+
* @returns A Result containing the query results or a LakeSyncError
|
|
52
|
+
*/
|
|
53
|
+
query<T>(sql: string, _params?: unknown[]): Promise<Result<T[], LakeSyncError>>;
|
|
54
|
+
/**
|
|
55
|
+
* Register an in-memory Parquet file as a named table that can be
|
|
56
|
+
* queried using `SELECT * FROM '<name>'`.
|
|
57
|
+
*
|
|
58
|
+
* @param name - The virtual file name (e.g. "deltas.parquet")
|
|
59
|
+
* @param data - The Parquet file contents as a Uint8Array
|
|
60
|
+
* @returns A Result indicating success or failure
|
|
61
|
+
*/
|
|
62
|
+
registerParquetBuffer(name: string, data: Uint8Array): Promise<Result<void, LakeSyncError>>;
|
|
63
|
+
/**
|
|
64
|
+
* Register a remote Parquet file by URL so it can be queried using
|
|
65
|
+
* `SELECT * FROM '<name>'`.
|
|
66
|
+
*
|
|
67
|
+
* @param name - The virtual file name (e.g. "remote.parquet")
|
|
68
|
+
* @param url - The URL pointing to the Parquet file
|
|
69
|
+
* @returns A Result indicating success or failure
|
|
70
|
+
*/
|
|
71
|
+
registerParquetUrl(name: string, url: string): Promise<Result<void, LakeSyncError>>;
|
|
72
|
+
/**
|
|
73
|
+
* Tear down the DuckDB connection and database instance.
|
|
74
|
+
*
|
|
75
|
+
* After calling close(), any subsequent query or registration calls
|
|
76
|
+
* will return an error Result.
|
|
77
|
+
*/
|
|
78
|
+
close(): Promise<void>;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Configuration for the TimeTraveller.
|
|
83
|
+
*/
|
|
84
|
+
interface TimeTravelConfig {
|
|
85
|
+
/** The DuckDB client used for executing time-travel queries. */
|
|
86
|
+
duckdb: DuckDBClient;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Provides time-travel query capabilities over delta Parquet files.
|
|
90
|
+
*
|
|
91
|
+
* Allows querying the materialised state of data as it existed at a specific
|
|
92
|
+
* HLC timestamp, or inspecting raw deltas within a time range. Uses DuckDB
|
|
93
|
+
* SQL with window functions to perform column-level LWW materialisation
|
|
94
|
+
* entirely in-engine.
|
|
95
|
+
*
|
|
96
|
+
* Delta Parquet files contain flattened rows with system columns (`op`, `table`,
|
|
97
|
+
* `rowId`, `clientId`, `hlc`, `deltaId`) and user-defined columns (e.g. `title`,
|
|
98
|
+
* `completed`). The materialisation reconstructs per-row state by selecting the
|
|
99
|
+
* latest value for each column based on HLC ordering, then excluding deleted rows.
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```ts
|
|
103
|
+
* const traveller = new TimeTraveller({ duckdb: client });
|
|
104
|
+
* await traveller.registerDeltas([{ name: "batch-1.parquet", data: bytes }]);
|
|
105
|
+
*
|
|
106
|
+
* const result = await traveller.queryAsOf(hlcTimestamp, "SELECT * FROM _state WHERE completed = true");
|
|
107
|
+
* if (result.ok) console.log(result.value);
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
declare class TimeTraveller {
|
|
111
|
+
private readonly _config;
|
|
112
|
+
private readonly _sources;
|
|
113
|
+
constructor(config: TimeTravelConfig);
|
|
114
|
+
/**
|
|
115
|
+
* Register one or more Parquet buffers containing delta data.
|
|
116
|
+
*
|
|
117
|
+
* Each buffer is registered with DuckDB and can subsequently be
|
|
118
|
+
* queried via the time-travel methods.
|
|
119
|
+
*
|
|
120
|
+
* @param parquetBuffers - Array of named Parquet file buffers to register
|
|
121
|
+
* @returns A Result indicating success or a LakeSyncError on failure
|
|
122
|
+
*/
|
|
123
|
+
registerDeltas(parquetBuffers: Array<{
|
|
124
|
+
name: string;
|
|
125
|
+
data: Uint8Array;
|
|
126
|
+
}>): Promise<Result<void, LakeSyncError>>;
|
|
127
|
+
/**
|
|
128
|
+
* Query the materialised state as of the given HLC timestamp.
|
|
129
|
+
*
|
|
130
|
+
* Filters all deltas where `hlc <= asOfHlc`, then materialises the latest
|
|
131
|
+
* state per (table, rowId) using column-level LWW (highest HLC wins per
|
|
132
|
+
* column). The user's SQL is applied on top of the materialised view,
|
|
133
|
+
* which is exposed as the CTE `_state`.
|
|
134
|
+
*
|
|
135
|
+
* Deleted rows (where the latest operation is DELETE) are excluded from
|
|
136
|
+
* the materialised view.
|
|
137
|
+
*
|
|
138
|
+
* @param asOfHlc - The HLC timestamp representing the point in time to query
|
|
139
|
+
* @param sql - SQL to apply on the materialised view (use `_state` as the table name)
|
|
140
|
+
* @returns A Result containing the query results or a LakeSyncError
|
|
141
|
+
*/
|
|
142
|
+
queryAsOf(asOfHlc: HLCTimestamp, sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
|
|
143
|
+
/**
|
|
144
|
+
* Query raw deltas within a time range.
|
|
145
|
+
*
|
|
146
|
+
* Filters deltas where `fromHlc < hlc <= toHlc` and returns them as raw
|
|
147
|
+
* (unmaterialised) rows. Useful for audit trails and changelog views.
|
|
148
|
+
*
|
|
149
|
+
* The user's SQL is applied on top of the filtered deltas, which are
|
|
150
|
+
* exposed as the CTE `_deltas`.
|
|
151
|
+
*
|
|
152
|
+
* @param fromHlc - The exclusive lower bound HLC timestamp
|
|
153
|
+
* @param toHlc - The inclusive upper bound HLC timestamp
|
|
154
|
+
* @param sql - SQL to apply on the filtered deltas (use `_deltas` as the table name)
|
|
155
|
+
* @returns A Result containing the query results or a LakeSyncError
|
|
156
|
+
*/
|
|
157
|
+
queryBetween(fromHlc: HLCTimestamp, toHlc: HLCTimestamp, sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
|
|
158
|
+
/**
|
|
159
|
+
* Materialise the full state at a point in time, returning all rows.
|
|
160
|
+
*
|
|
161
|
+
* Equivalent to `queryAsOf(asOfHlc, "SELECT * FROM _state")` but provided
|
|
162
|
+
* as a convenience method.
|
|
163
|
+
*
|
|
164
|
+
* @param asOfHlc - The HLC timestamp representing the point in time to materialise
|
|
165
|
+
* @returns A Result containing all materialised rows or a LakeSyncError
|
|
166
|
+
*/
|
|
167
|
+
materialiseAsOf(asOfHlc: HLCTimestamp): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
|
|
168
|
+
/**
|
|
169
|
+
* Build a UNION ALL SQL expression covering all registered Parquet sources.
|
|
170
|
+
*/
|
|
171
|
+
private _buildUnionSql;
|
|
172
|
+
/**
|
|
173
|
+
* Discover user-defined column names from the registered Parquet data.
|
|
174
|
+
*
|
|
175
|
+
* Reads the column names from the first registered source and filters
|
|
176
|
+
* out system columns to identify user-defined columns.
|
|
177
|
+
*/
|
|
178
|
+
private _discoverUserColumns;
|
|
179
|
+
/**
|
|
180
|
+
* Build the materialisation SQL that reconstructs per-row state using
|
|
181
|
+
* column-level LWW semantics.
|
|
182
|
+
*
|
|
183
|
+
* Strategy:
|
|
184
|
+
* 1. Filter deltas by HLC <= asOfHlc
|
|
185
|
+
* 2. For each (table, rowId), determine the latest operation (by max HLC)
|
|
186
|
+
* 3. For each user column in each row, pick the value from the delta with
|
|
187
|
+
* the highest HLC (where that column is not null)
|
|
188
|
+
* 4. Exclude rows where the latest operation is DELETE
|
|
189
|
+
*
|
|
190
|
+
* @param userColumns - Names of user-defined columns
|
|
191
|
+
* @param asOfHlc - The HLC timestamp cutoff as a bigint
|
|
192
|
+
* @returns SQL string producing the materialised view
|
|
193
|
+
*/
|
|
194
|
+
private _buildMaterialiseSql;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Configuration for the UnionReader.
|
|
199
|
+
*/
|
|
200
|
+
interface UnionReadConfig {
|
|
201
|
+
/** The DuckDB client used to query cold (Parquet) data. */
|
|
202
|
+
duckdb: DuckDBClient;
|
|
203
|
+
/** The logical table name being queried. */
|
|
204
|
+
tableName: string;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Merges "hot" in-memory rows with "cold" Parquet data via DuckDB.
|
|
208
|
+
*
|
|
209
|
+
* Cold data is registered as Parquet file buffers in DuckDB. Hot data is
|
|
210
|
+
* serialised to JSON and loaded via `read_json_auto`. The two sources are
|
|
211
|
+
* combined with `UNION ALL` and the caller's SQL is applied on top.
|
|
212
|
+
*
|
|
213
|
+
* The union is exposed as a CTE named `_union`, so the caller's SQL should
|
|
214
|
+
* reference `_union` as the table name.
|
|
215
|
+
*
|
|
216
|
+
* @example
|
|
217
|
+
* ```ts
|
|
218
|
+
* const reader = new UnionReader({ duckdb: client, tableName: "todos" });
|
|
219
|
+
* await reader.registerColdData([{ name: "batch-1.parquet", data: parquetBytes }]);
|
|
220
|
+
*
|
|
221
|
+
* const result = await reader.query(
|
|
222
|
+
* "SELECT * FROM _union WHERE completed = true",
|
|
223
|
+
* [{ id: "row-3", title: "New task", completed: false }],
|
|
224
|
+
* );
|
|
225
|
+
* ```
|
|
226
|
+
*/
|
|
227
|
+
declare class UnionReader {
|
|
228
|
+
private readonly _config;
|
|
229
|
+
private readonly _coldSources;
|
|
230
|
+
private _hotCounter;
|
|
231
|
+
constructor(config: UnionReadConfig);
|
|
232
|
+
/**
|
|
233
|
+
* Register one or more Parquet buffers as cold data sources.
|
|
234
|
+
*
|
|
235
|
+
* Each buffer is registered with DuckDB and can subsequently be
|
|
236
|
+
* queried alongside hot data via {@link query}.
|
|
237
|
+
*
|
|
238
|
+
* @param parquetBuffers - Array of named Parquet file buffers to register
|
|
239
|
+
* @returns A Result indicating success or a LakeSyncError on failure
|
|
240
|
+
*/
|
|
241
|
+
registerColdData(parquetBuffers: Array<{
|
|
242
|
+
name: string;
|
|
243
|
+
data: Uint8Array;
|
|
244
|
+
}>): Promise<Result<void, LakeSyncError>>;
|
|
245
|
+
/**
|
|
246
|
+
* Execute a SQL query that unions hot in-memory rows with cold Parquet data.
|
|
247
|
+
*
|
|
248
|
+
* The caller's SQL is wrapped around a UNION ALL of cold and hot sources.
|
|
249
|
+
* The unioned data is available as `_union` in the SQL statement.
|
|
250
|
+
*
|
|
251
|
+
* If `hotRows` is empty or not provided, only cold data is queried.
|
|
252
|
+
* If no cold sources are registered and `hotRows` is provided, only hot data is queried.
|
|
253
|
+
*
|
|
254
|
+
* @param sql - SQL to apply on top of the unioned data (use `_union` as the table name)
|
|
255
|
+
* @param hotRows - Optional array of in-memory row objects to include in the union
|
|
256
|
+
* @returns A Result containing the query results or a LakeSyncError
|
|
257
|
+
*/
|
|
258
|
+
query(sql: string, hotRows?: Record<string, unknown>[]): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
|
|
259
|
+
/**
|
|
260
|
+
* Query only cold (Parquet) data without any hot rows.
|
|
261
|
+
*
|
|
262
|
+
* @param sql - SQL to execute against cold data (use `_union` as the table name)
|
|
263
|
+
* @returns A Result containing the query results or a LakeSyncError
|
|
264
|
+
*/
|
|
265
|
+
queryColdOnly(sql: string): Promise<Result<Record<string, unknown>[], LakeSyncError>>;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export { DuckDBClient, type DuckDBClientConfig, type TimeTravelConfig, TimeTraveller, type UnionReadConfig, UnionReader };
|