lakesync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/dist/adapter.d.ts +369 -0
- package/dist/adapter.js +39 -0
- package/dist/adapter.js.map +1 -0
- package/dist/analyst.d.ts +268 -0
- package/dist/analyst.js +495 -0
- package/dist/analyst.js.map +1 -0
- package/dist/auth-CAVutXzx.d.ts +30 -0
- package/dist/base-poller-Qo_SmCZs.d.ts +82 -0
- package/dist/catalogue.d.ts +65 -0
- package/dist/catalogue.js +17 -0
- package/dist/catalogue.js.map +1 -0
- package/dist/chunk-4ARO6KTJ.js +257 -0
- package/dist/chunk-4ARO6KTJ.js.map +1 -0
- package/dist/chunk-5YOFCJQ7.js +1115 -0
- package/dist/chunk-5YOFCJQ7.js.map +1 -0
- package/dist/chunk-7D4SUZUM.js +38 -0
- package/dist/chunk-7D4SUZUM.js.map +1 -0
- package/dist/chunk-BNJOGBYK.js +335 -0
- package/dist/chunk-BNJOGBYK.js.map +1 -0
- package/dist/chunk-ICNT7I3K.js +1180 -0
- package/dist/chunk-ICNT7I3K.js.map +1 -0
- package/dist/chunk-P5DRFKIT.js +413 -0
- package/dist/chunk-P5DRFKIT.js.map +1 -0
- package/dist/chunk-X3RO5SYJ.js +880 -0
- package/dist/chunk-X3RO5SYJ.js.map +1 -0
- package/dist/client.d.ts +428 -0
- package/dist/client.js +2048 -0
- package/dist/client.js.map +1 -0
- package/dist/compactor.d.ts +342 -0
- package/dist/compactor.js +793 -0
- package/dist/compactor.js.map +1 -0
- package/dist/coordinator-CxckTzYW.d.ts +396 -0
- package/dist/db-types-BR6Kt4uf.d.ts +29 -0
- package/dist/gateway-D5SaaMvT.d.ts +337 -0
- package/dist/gateway-server.d.ts +306 -0
- package/dist/gateway-server.js +4663 -0
- package/dist/gateway-server.js.map +1 -0
- package/dist/gateway.d.ts +196 -0
- package/dist/gateway.js +79 -0
- package/dist/gateway.js.map +1 -0
- package/dist/hlc-DiD8QNG3.d.ts +70 -0
- package/dist/index.d.ts +245 -0
- package/dist/index.js +102 -0
- package/dist/index.js.map +1 -0
- package/dist/json-dYtqiL0F.d.ts +18 -0
- package/dist/nessie-client-DrNikVXy.d.ts +160 -0
- package/dist/parquet.d.ts +78 -0
- package/dist/parquet.js +15 -0
- package/dist/parquet.js.map +1 -0
- package/dist/proto.d.ts +434 -0
- package/dist/proto.js +67 -0
- package/dist/proto.js.map +1 -0
- package/dist/react.d.ts +147 -0
- package/dist/react.js +224 -0
- package/dist/react.js.map +1 -0
- package/dist/resolver-C3Wphi6O.d.ts +10 -0
- package/dist/result-CojzlFE2.d.ts +64 -0
- package/dist/src-QU2YLPZY.js +383 -0
- package/dist/src-QU2YLPZY.js.map +1 -0
- package/dist/src-WYBF5LOI.js +102 -0
- package/dist/src-WYBF5LOI.js.map +1 -0
- package/dist/src-WZNPHANQ.js +426 -0
- package/dist/src-WZNPHANQ.js.map +1 -0
- package/dist/types-Bs-QyOe-.d.ts +143 -0
- package/dist/types-DAQL_vU_.d.ts +118 -0
- package/dist/types-DSC_EiwR.d.ts +45 -0
- package/dist/types-V_jVu2sA.d.ts +73 -0
- package/package.json +119 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BigInt-safe JSON replacer.
|
|
3
|
+
*
|
|
4
|
+
* Converts BigInt values to strings so they survive `JSON.stringify`,
|
|
5
|
+
* which otherwise throws on BigInt.
|
|
6
|
+
*/
|
|
7
|
+
declare function bigintReplacer(_key: string, value: unknown): unknown;
|
|
8
|
+
/**
|
|
9
|
+
* BigInt-aware JSON reviver.
|
|
10
|
+
*
|
|
11
|
+
* Restores string-encoded HLC timestamps (fields ending in `Hlc` or `hlc`)
|
|
12
|
+
* back to BigInt so they match the branded `HLCTimestamp` type.
|
|
13
|
+
*
|
|
14
|
+
* Invalid numeric strings are left as-is to prevent runtime crashes.
|
|
15
|
+
*/
|
|
16
|
+
declare function bigintReviver(key: string, value: unknown): unknown;
|
|
17
|
+
|
|
18
|
+
export { bigintReviver as a, bigintReplacer as b };
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { L as LakeSyncError, R as Result } from './result-CojzlFE2.js';
|
|
2
|
+
|
|
3
|
+
/** Configuration for connecting to a Nessie Iceberg REST catalogue */
|
|
4
|
+
interface CatalogueConfig {
|
|
5
|
+
/** Nessie Iceberg REST API base URI, e.g. "http://localhost:19120/iceberg" */
|
|
6
|
+
nessieUri: string;
|
|
7
|
+
/** Object storage warehouse URI, e.g. "s3://lakesync-warehouse" */
|
|
8
|
+
warehouseUri: string;
|
|
9
|
+
/** Nessie branch name. Defaults to "main". */
|
|
10
|
+
defaultBranch?: string;
|
|
11
|
+
}
|
|
12
|
+
/** Iceberg schema definition following the Iceberg REST spec */
|
|
13
|
+
interface IcebergSchema {
|
|
14
|
+
type: "struct";
|
|
15
|
+
"schema-id": number;
|
|
16
|
+
fields: IcebergField[];
|
|
17
|
+
}
|
|
18
|
+
/** A single field within an Iceberg schema */
|
|
19
|
+
interface IcebergField {
|
|
20
|
+
id: number;
|
|
21
|
+
name: string;
|
|
22
|
+
required: boolean;
|
|
23
|
+
type: string;
|
|
24
|
+
}
|
|
25
|
+
/** Iceberg partition specification */
|
|
26
|
+
interface PartitionSpec {
|
|
27
|
+
"spec-id": number;
|
|
28
|
+
fields: Array<{
|
|
29
|
+
"source-id": number;
|
|
30
|
+
"field-id": number;
|
|
31
|
+
name: string;
|
|
32
|
+
transform: string;
|
|
33
|
+
}>;
|
|
34
|
+
}
|
|
35
|
+
/** A data file reference for Iceberg table commits */
|
|
36
|
+
interface DataFile {
|
|
37
|
+
content: "data";
|
|
38
|
+
"file-path": string;
|
|
39
|
+
"file-format": "PARQUET";
|
|
40
|
+
"record-count": number;
|
|
41
|
+
"file-size-in-bytes": number;
|
|
42
|
+
partition?: Record<string, string>;
|
|
43
|
+
}
|
|
44
|
+
/** An Iceberg table snapshot */
|
|
45
|
+
interface Snapshot {
|
|
46
|
+
"snapshot-id": number;
|
|
47
|
+
"timestamp-ms": number;
|
|
48
|
+
summary: Record<string, string>;
|
|
49
|
+
"manifest-list"?: string;
|
|
50
|
+
}
|
|
51
|
+
/** Full table metadata as returned by the Iceberg REST catalogue */
|
|
52
|
+
interface TableMetadata {
|
|
53
|
+
"metadata-location"?: string;
|
|
54
|
+
metadata: {
|
|
55
|
+
"format-version": number;
|
|
56
|
+
"table-uuid": string;
|
|
57
|
+
location: string;
|
|
58
|
+
"current-schema-id": number;
|
|
59
|
+
schemas: IcebergSchema[];
|
|
60
|
+
"current-snapshot-id"?: number;
|
|
61
|
+
snapshots?: Snapshot[];
|
|
62
|
+
"partition-specs"?: PartitionSpec[];
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/** Catalogue operation error */
|
|
66
|
+
declare class CatalogueError extends LakeSyncError {
|
|
67
|
+
readonly statusCode: number;
|
|
68
|
+
constructor(message: string, statusCode: number, cause?: Error);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Typed client for the Nessie Iceberg REST Catalogue API v1.
|
|
73
|
+
*
|
|
74
|
+
* Wraps standard Iceberg REST endpoints exposed by the Nessie server,
|
|
75
|
+
* returning `Result<T, CatalogueError>` from every public method.
|
|
76
|
+
*
|
|
77
|
+
* On first use, the client fetches `/v1/config` from the server to discover
|
|
78
|
+
* the catalogue prefix (typically the Nessie branch name, e.g. `"main"`).
|
|
79
|
+
* All subsequent requests include this prefix in the URL path as required
|
|
80
|
+
* by the Iceberg REST specification: `/v1/{prefix}/namespaces/...`.
|
|
81
|
+
*/
|
|
82
|
+
declare class NessieCatalogueClient {
|
|
83
|
+
private readonly baseUri;
|
|
84
|
+
private readonly warehouseUri;
|
|
85
|
+
private prefixPromise;
|
|
86
|
+
constructor(config: CatalogueConfig);
|
|
87
|
+
/**
|
|
88
|
+
* Resolve the catalogue prefix by calling the `/v1/config` endpoint.
|
|
89
|
+
*
|
|
90
|
+
* The Iceberg REST specification requires a prefix segment in all
|
|
91
|
+
* API paths (e.g. `/v1/{prefix}/namespaces`). Nessie returns this
|
|
92
|
+
* value in the `defaults.prefix` field of the config response.
|
|
93
|
+
*
|
|
94
|
+
* The result is cached so the config endpoint is only called once
|
|
95
|
+
* per client instance.
|
|
96
|
+
*
|
|
97
|
+
* @returns The resolved prefix string (e.g. `"main"`)
|
|
98
|
+
*/
|
|
99
|
+
private resolvePrefix;
|
|
100
|
+
/**
|
|
101
|
+
* Build the base API path including the resolved prefix.
|
|
102
|
+
*
|
|
103
|
+
* @returns URL prefix such as `http://host/iceberg/v1/main` or
|
|
104
|
+
* `http://host/iceberg/v1` when no prefix is configured
|
|
105
|
+
*/
|
|
106
|
+
private apiBase;
|
|
107
|
+
/**
|
|
108
|
+
* Create a namespace (idempotent -- ignores 409 Conflict).
|
|
109
|
+
*
|
|
110
|
+
* @param namespace - Namespace parts, e.g. `["lakesync"]`
|
|
111
|
+
* @returns `Ok(void)` on success or if namespace already exists
|
|
112
|
+
*/
|
|
113
|
+
createNamespace(namespace: string[]): Promise<Result<void, CatalogueError>>;
|
|
114
|
+
/**
|
|
115
|
+
* List all namespaces in the catalogue.
|
|
116
|
+
*
|
|
117
|
+
* @returns Array of namespace arrays, e.g. `[["lakesync"], ["other"]]`
|
|
118
|
+
*/
|
|
119
|
+
listNamespaces(): Promise<Result<string[][], CatalogueError>>;
|
|
120
|
+
/**
|
|
121
|
+
* Create an Iceberg table within a namespace.
|
|
122
|
+
*
|
|
123
|
+
* @param namespace - Namespace parts, e.g. `["lakesync"]`
|
|
124
|
+
* @param name - Table name
|
|
125
|
+
* @param schema - Iceberg schema definition
|
|
126
|
+
* @param partitionSpec - Partition specification
|
|
127
|
+
*/
|
|
128
|
+
createTable(namespace: string[], name: string, schema: IcebergSchema, partitionSpec: PartitionSpec): Promise<Result<void, CatalogueError>>;
|
|
129
|
+
/**
|
|
130
|
+
* Load table metadata from the catalogue.
|
|
131
|
+
*
|
|
132
|
+
* @param namespace - Namespace parts, e.g. `["lakesync"]`
|
|
133
|
+
* @param name - Table name
|
|
134
|
+
* @returns Full table metadata including schemas, snapshots, and partition specs
|
|
135
|
+
*/
|
|
136
|
+
loadTable(namespace: string[], name: string): Promise<Result<TableMetadata, CatalogueError>>;
|
|
137
|
+
/**
|
|
138
|
+
* Append data files to a table, creating a new snapshot.
|
|
139
|
+
*
|
|
140
|
+
* Uses the standard Iceberg REST v1 commit-table endpoint with
|
|
141
|
+
* `add-snapshot` and `set-snapshot-ref` metadata updates.
|
|
142
|
+
* First loads the current table metadata to determine the current state,
|
|
143
|
+
* then commits a new snapshot referencing the provided data files.
|
|
144
|
+
*
|
|
145
|
+
* @param namespace - Namespace parts, e.g. `["lakesync"]`
|
|
146
|
+
* @param table - Table name
|
|
147
|
+
* @param files - Data files to append
|
|
148
|
+
*/
|
|
149
|
+
appendFiles(namespace: string[], table: string, files: DataFile[]): Promise<Result<void, CatalogueError>>;
|
|
150
|
+
/**
|
|
151
|
+
* Get the current snapshot of a table, or null if no snapshots exist.
|
|
152
|
+
*
|
|
153
|
+
* @param namespace - Namespace parts, e.g. `["lakesync"]`
|
|
154
|
+
* @param table - Table name
|
|
155
|
+
* @returns The current snapshot, or `null` if the table has no snapshots
|
|
156
|
+
*/
|
|
157
|
+
currentSnapshot(namespace: string[], table: string): Promise<Result<Snapshot | null, CatalogueError>>;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export { type CatalogueConfig as C, type DataFile as D, type IcebergSchema as I, NessieCatalogueClient as N, type PartitionSpec as P, type Snapshot as S, type TableMetadata as T, CatalogueError as a, type IcebergField as b };
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { T as TableSchema, R as RowDelta } from './types-V_jVu2sA.js';
|
|
2
|
+
import * as arrow from 'apache-arrow';
|
|
3
|
+
import { R as Result, F as FlushError } from './result-CojzlFE2.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Builds an Apache Arrow Schema from a LakeSync TableSchema.
|
|
7
|
+
*
|
|
8
|
+
* The resulting schema always includes the following system columns:
|
|
9
|
+
* - `op` (Utf8) — the delta operation type (INSERT, UPDATE, DELETE)
|
|
10
|
+
* - `table` (Utf8) — the table name
|
|
11
|
+
* - `rowId` (Utf8) — the row identifier
|
|
12
|
+
* - `clientId` (Utf8) — the client identifier
|
|
13
|
+
* - `hlc` (Int64) — the HLC timestamp as a 64-bit integer
|
|
14
|
+
* - `deltaId` (Utf8) — the deterministic delta identifier
|
|
15
|
+
*
|
|
16
|
+
* User-defined columns from the TableSchema are appended after system columns,
|
|
17
|
+
* with types mapped as follows:
|
|
18
|
+
* - `string` → Utf8
|
|
19
|
+
* - `number` → Float64
|
|
20
|
+
* - `boolean` → Bool
|
|
21
|
+
* - `json` → Utf8 (values are JSON-serialised)
|
|
22
|
+
* - `null` → Utf8, nullable
|
|
23
|
+
*
|
|
24
|
+
* @param schema - The LakeSync TableSchema to convert
|
|
25
|
+
* @returns An Apache Arrow Schema with system and user columns
|
|
26
|
+
*/
|
|
27
|
+
declare function buildArrowSchema(schema: TableSchema): arrow.Schema;
|
|
28
|
+
/**
|
|
29
|
+
* Converts an array of RowDelta objects into an Apache Arrow Table.
|
|
30
|
+
*
|
|
31
|
+
* System columns (op, table, rowId, clientId, hlc, deltaId) are extracted
|
|
32
|
+
* directly from each delta. User columns are looked up from each delta's
|
|
33
|
+
* `columns` array; missing columns produce `null` for that row.
|
|
34
|
+
*
|
|
35
|
+
* Type conversions:
|
|
36
|
+
* - `json` columns are serialised via `JSON.stringify`
|
|
37
|
+
* - `hlc` is passed as a bigint directly to Int64 vectors
|
|
38
|
+
* - All other types are passed through as-is
|
|
39
|
+
*
|
|
40
|
+
* @param deltas - Array of RowDelta objects to convert
|
|
41
|
+
* @param schema - The LakeSync TableSchema describing user columns
|
|
42
|
+
* @returns An Apache Arrow Table containing all deltas
|
|
43
|
+
*/
|
|
44
|
+
declare function deltasToArrowTable(deltas: RowDelta[], schema: TableSchema): arrow.Table;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Deserialises Parquet bytes back into an array of RowDelta objects.
|
|
48
|
+
*
|
|
49
|
+
* Reads the Parquet data using parquet-wasm, converts to an Apache Arrow Table
|
|
50
|
+
* via IPC stream, then iterates over rows to reconstruct RowDelta objects.
|
|
51
|
+
* Int64 `hlc` values are cast back to branded HLCTimestamp bigints.
|
|
52
|
+
* Columns stored as JSON-serialised Utf8 strings (objects and arrays) are
|
|
53
|
+
* automatically parsed back to their original values.
|
|
54
|
+
* Int8 columns marked as booleans in metadata are converted back to true/false.
|
|
55
|
+
*
|
|
56
|
+
* @param data - The Parquet file bytes to deserialise
|
|
57
|
+
* @returns A Result containing the reconstructed RowDelta array, or a FlushError on failure
|
|
58
|
+
*/
|
|
59
|
+
declare function readParquetToDeltas(data: Uint8Array): Promise<Result<RowDelta[], FlushError>>;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Serialises an array of RowDelta objects into Parquet bytes.
|
|
63
|
+
*
|
|
64
|
+
* Converts deltas to an Apache Arrow Table via `deltasToArrowTable`,
|
|
65
|
+
* then encodes the Arrow data as IPC stream bytes and writes them
|
|
66
|
+
* to Parquet format using Snappy compression via parquet-wasm.
|
|
67
|
+
*
|
|
68
|
+
* Boolean columns are stored as Int8 (1/0/null) to work around an
|
|
69
|
+
* Arrow JS IPC serialisation limitation. The original column types
|
|
70
|
+
* are preserved in Parquet metadata for the reader to restore.
|
|
71
|
+
*
|
|
72
|
+
* @param deltas - The row deltas to serialise
|
|
73
|
+
* @param schema - The table schema describing user-defined columns
|
|
74
|
+
* @returns A Result containing the Parquet file as a Uint8Array, or a FlushError on failure
|
|
75
|
+
*/
|
|
76
|
+
declare function writeDeltasToParquet(deltas: RowDelta[], schema: TableSchema): Promise<Result<Uint8Array, FlushError>>;
|
|
77
|
+
|
|
78
|
+
export { buildArrowSchema, deltasToArrowTable, readParquetToDeltas, writeDeltasToParquet };
|
package/dist/parquet.js
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildArrowSchema,
|
|
3
|
+
deltasToArrowTable,
|
|
4
|
+
readParquetToDeltas,
|
|
5
|
+
writeDeltasToParquet
|
|
6
|
+
} from "./chunk-4ARO6KTJ.js";
|
|
7
|
+
import "./chunk-ICNT7I3K.js";
|
|
8
|
+
import "./chunk-7D4SUZUM.js";
|
|
9
|
+
export {
|
|
10
|
+
buildArrowSchema,
|
|
11
|
+
deltasToArrowTable,
|
|
12
|
+
readParquetToDeltas,
|
|
13
|
+
writeDeltasToParquet
|
|
14
|
+
};
|
|
15
|
+
//# sourceMappingURL=parquet.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|