node-s3tables 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -1
- package/dist/index.d.ts +107 -7
- package/dist/index.js +1201 -15
- package/package.json +14 -7
package/README.md
CHANGED
|
@@ -11,7 +11,14 @@ npm install node-s3tables
|
|
|
11
11
|
## Quick Start
|
|
12
12
|
|
|
13
13
|
```javascript
|
|
14
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
getMetadata,
|
|
16
|
+
addSchema,
|
|
17
|
+
addPartitionSpec,
|
|
18
|
+
addManifest,
|
|
19
|
+
addDataFiles,
|
|
20
|
+
setCurrentCommit,
|
|
21
|
+
} from 'node-s3tables';
|
|
15
22
|
|
|
16
23
|
// Get table metadata
|
|
17
24
|
const metadata = await getMetadata({
|
|
@@ -128,8 +135,134 @@ await addPartitionSpec({
|
|
|
128
135
|
});
|
|
129
136
|
```
|
|
130
137
|
|
|
138
|
+
### addManifest(params)
|
|
139
|
+
|
|
140
|
+
Creates a manifest file for data files and returns a manifest list record.
|
|
141
|
+
|
|
142
|
+
**Parameters:**
|
|
143
|
+
|
|
144
|
+
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
145
|
+
- `params.region` (string) - AWS region
|
|
146
|
+
- `params.metadata` (IcebergMetadata) - Table metadata
|
|
147
|
+
- `params.schemaId` (number) - Schema ID to use
|
|
148
|
+
- `params.specId` (number) - Partition spec ID to use
|
|
149
|
+
- `params.snapshotId` (bigint) - Snapshot ID
|
|
150
|
+
- `params.sequenceNumber` (bigint) - Sequence number
|
|
151
|
+
- `params.files` (AddFile[]) - Array of data files
|
|
152
|
+
|
|
153
|
+
**Returns:** Promise<ManifestListRecord>
|
|
154
|
+
|
|
155
|
+
```javascript
|
|
156
|
+
const manifestRecord = await addManifest({
|
|
157
|
+
region: 'us-west-2',
|
|
158
|
+
metadata: tableMetadata,
|
|
159
|
+
schemaId: 2,
|
|
160
|
+
specId: 1,
|
|
161
|
+
snapshotId: 4183020680887155442n,
|
|
162
|
+
sequenceNumber: 1n,
|
|
163
|
+
files: [
|
|
164
|
+
{
|
|
165
|
+
file: 's3://my-bucket/data/sales-2024-01-01.parquet',
|
|
166
|
+
partitions: { sale_date_day: '2024-01-01' },
|
|
167
|
+
recordCount: 1000n,
|
|
168
|
+
fileSize: 52428n,
|
|
169
|
+
},
|
|
170
|
+
],
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### addDataFiles(params)
|
|
175
|
+
|
|
176
|
+
Adds data files to an S3 table by creating a new snapshot.
|
|
177
|
+
|
|
178
|
+
**Parameters:**
|
|
179
|
+
|
|
180
|
+
- `params.tableBucketARN` (string) - The ARN of the table bucket
|
|
181
|
+
- `params.namespace` (string) - The namespace name
|
|
182
|
+
- `params.name` (string) - The table name
|
|
183
|
+
- `params.lists` (AddFileList[]) - Array of file lists to add
|
|
184
|
+
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
185
|
+
|
|
186
|
+
**Returns:** Promise<string>
|
|
187
|
+
|
|
188
|
+
```javascript
|
|
189
|
+
await addDataFiles({
|
|
190
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
191
|
+
namespace: 'sales',
|
|
192
|
+
name: 'daily_sales',
|
|
193
|
+
lists: [
|
|
194
|
+
{
|
|
195
|
+
specId: 1,
|
|
196
|
+
schemaId: 2,
|
|
197
|
+
files: [
|
|
198
|
+
{
|
|
199
|
+
file: 's3://my-bucket/data/sales-2024-01-01.parquet',
|
|
200
|
+
partitions: { sale_date_day: '2024-01-01' },
|
|
201
|
+
recordCount: 1000n,
|
|
202
|
+
fileSize: 52428n,
|
|
203
|
+
},
|
|
204
|
+
],
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
});
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### setCurrentCommit(params)
|
|
211
|
+
|
|
212
|
+
Sets the current commit/snapshot for an S3 table.
|
|
213
|
+
|
|
214
|
+
**Parameters:**
|
|
215
|
+
|
|
216
|
+
- `params.tableBucketARN` (string) - The ARN of the table bucket
|
|
217
|
+
- `params.namespace` (string) - The namespace name
|
|
218
|
+
- `params.name` (string) - The table name
|
|
219
|
+
- `params.snapshotId` (bigint) - The snapshot ID to set as current
|
|
220
|
+
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
221
|
+
|
|
222
|
+
**Returns:** Promise<string>
|
|
223
|
+
|
|
224
|
+
```javascript
|
|
225
|
+
await setCurrentCommit({
|
|
226
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
227
|
+
namespace: 'sales',
|
|
228
|
+
name: 'daily_sales',
|
|
229
|
+
snapshotId: 4183020680887155442n,
|
|
230
|
+
});
|
|
231
|
+
```
|
|
232
|
+
|
|
131
233
|
## Type Definitions
|
|
132
234
|
|
|
235
|
+
### AddFileList
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
interface AddFileList {
|
|
239
|
+
specId: number;
|
|
240
|
+
schemaId: number;
|
|
241
|
+
files: AddFile[];
|
|
242
|
+
}
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### AddFile
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
interface AddFile {
|
|
249
|
+
file: string;
|
|
250
|
+
partitions: PartitionRecord;
|
|
251
|
+
fileSize: bigint;
|
|
252
|
+
recordCount: bigint;
|
|
253
|
+
columnSizes?: Record<string, bigint> | null;
|
|
254
|
+
valueCounts?: Record<string, bigint> | null;
|
|
255
|
+
nullValueCounts?: Record<string, bigint> | null;
|
|
256
|
+
nanValueCounts?: Record<string, bigint> | null;
|
|
257
|
+
lowerBounds?: Record<string, Buffer> | null;
|
|
258
|
+
upperBounds?: Record<string, Buffer> | null;
|
|
259
|
+
keyMetadata?: Buffer | null;
|
|
260
|
+
splitOffsets?: bigint[] | null;
|
|
261
|
+
equalityIds?: number[] | null;
|
|
262
|
+
sortOrderId?: number | null;
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
133
266
|
### IcebergSchemaField
|
|
134
267
|
|
|
135
268
|
```typescript
|
|
@@ -177,6 +310,78 @@ Supported partition transforms:
|
|
|
177
310
|
- `'bucket[N]'` - Hash bucket with N buckets
|
|
178
311
|
- `'truncate[N]'` - Truncate strings to N characters
|
|
179
312
|
|
|
313
|
+
## Testing
|
|
314
|
+
|
|
315
|
+
### Prerequisites
|
|
316
|
+
|
|
317
|
+
The tests require AWS credentials and S3 Tables resources. Set up the following environment variables in a `.env` file:
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
TABLE_BUCKET_ARN=arn:aws:s3tables:us-west-2:123456789012:bucket/your-test-bucket
|
|
321
|
+
CATALOG_ID=123456789012:s3tablescatalog/your-test-bucket
|
|
322
|
+
OUTPUT_BUCKET=your-output-bucket
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### AWS Service Calls and Permissions
|
|
326
|
+
|
|
327
|
+
The tests make calls to multiple AWS services and require the following permissions:
|
|
328
|
+
|
|
329
|
+
**S3 Tables:**
|
|
330
|
+
|
|
331
|
+
- `s3tables:CreateNamespace`
|
|
332
|
+
- `s3tables:DeleteNamespace`
|
|
333
|
+
- `s3tables:CreateTable`
|
|
334
|
+
- `s3tables:DeleteTable`
|
|
335
|
+
- `s3tables:GetTableMetadata`
|
|
336
|
+
- `s3tables:UpdateTableMetadata`
|
|
337
|
+
|
|
338
|
+
**S3:**
|
|
339
|
+
|
|
340
|
+
- `s3:PutObject` (for uploading test Parquet files)
|
|
341
|
+
- `s3:GetObject` (for reading manifest files)
|
|
342
|
+
|
|
343
|
+
**Lake Formation:**
|
|
344
|
+
|
|
345
|
+
- `lakeformation:AddLFTagsToResource` (adds `AccessLevel: Public` tag to namespaces)
|
|
346
|
+
|
|
347
|
+
**Athena:**
|
|
348
|
+
|
|
349
|
+
- `athena:StartQueryExecution`
|
|
350
|
+
- `athena:GetQueryExecution`
|
|
351
|
+
- `athena:GetQueryResults`
|
|
352
|
+
|
|
353
|
+
**Lake Formation Setup:**
|
|
354
|
+
The tests expect a Lake Formation tag with key `AccessLevel` and value `Public` to exist in your account. This tag is automatically applied to test namespaces to allow Athena query permissions.
|
|
355
|
+
|
|
356
|
+
### Test Dependencies
|
|
357
|
+
|
|
358
|
+
The test suite uses additional dependencies for creating test data:
|
|
359
|
+
|
|
360
|
+
- `@aws-sdk/client-athena` - For running Athena queries in tests
|
|
361
|
+
- `@aws-sdk/client-lakeformation` - For Lake Formation permissions
|
|
362
|
+
- `parquetjs` - For creating test Parquet files
|
|
363
|
+
- `dotenv-cli` - For loading environment variables
|
|
364
|
+
|
|
365
|
+
### Running Tests
|
|
366
|
+
|
|
367
|
+
Run the test suite:
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
npm test
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
Run tests with coverage:
|
|
374
|
+
|
|
375
|
+
```bash
|
|
376
|
+
npm run test:cover
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
Run a single test file:
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
npm run test:single test/create.test.ts
|
|
383
|
+
```
|
|
384
|
+
|
|
180
385
|
## Configuration
|
|
181
386
|
|
|
182
387
|
The library uses the AWS SDK for authentication. Configure credentials using:
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
import { AwsCredentialIdentity } from '@aws-sdk/types';
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
type RawValue = string | number | bigint | Buffer | null;
|
|
4
|
+
type PartitionRecord = Record<string, RawValue>;
|
|
5
|
+
interface PartitionSummary {
|
|
6
|
+
contains_null: boolean;
|
|
7
|
+
contains_nan?: boolean | null;
|
|
8
|
+
lower_bound?: Buffer | null;
|
|
9
|
+
upper_bound?: Buffer | null;
|
|
10
|
+
}
|
|
11
|
+
declare enum ListContent {
|
|
12
|
+
DATA = 0,
|
|
13
|
+
DELETES = 1
|
|
14
|
+
}
|
|
15
|
+
interface ManifestListRecord {
|
|
16
|
+
manifest_path: string;
|
|
17
|
+
manifest_length: bigint;
|
|
18
|
+
partition_spec_id: number;
|
|
19
|
+
content: ListContent;
|
|
20
|
+
sequence_number: bigint;
|
|
21
|
+
min_sequence_number: bigint;
|
|
22
|
+
added_snapshot_id: bigint;
|
|
23
|
+
added_data_files_count: number;
|
|
24
|
+
existing_data_files_count: number;
|
|
25
|
+
deleted_data_files_count: number;
|
|
26
|
+
added_rows_count: bigint;
|
|
27
|
+
existing_rows_count: bigint;
|
|
28
|
+
deleted_rows_count: bigint;
|
|
29
|
+
partitions?: PartitionSummary[] | null;
|
|
30
|
+
}
|
|
3
31
|
|
|
4
32
|
type IcebergTransform = 'identity' | 'year' | 'month' | 'day' | 'hour' | `bucket[${number}]` | `truncate[${number}]`;
|
|
5
33
|
interface IcebergPartitionField {
|
|
@@ -39,15 +67,61 @@ interface IcebergPartitionSpec {
|
|
|
39
67
|
'spec-id': number;
|
|
40
68
|
fields: IcebergPartitionField[];
|
|
41
69
|
}
|
|
70
|
+
interface IcebergSnapshot {
|
|
71
|
+
'snapshot-id': bigint | number;
|
|
72
|
+
'parent-snapshot-id'?: bigint | number;
|
|
73
|
+
'sequence-number': number;
|
|
74
|
+
'timestamp-ms': number;
|
|
75
|
+
'manifest-list': string;
|
|
76
|
+
summary: Record<string, string>;
|
|
77
|
+
'schema-id'?: number;
|
|
78
|
+
}
|
|
42
79
|
interface IcebergMetadata {
|
|
43
80
|
'last-column-id': number;
|
|
44
81
|
'current-schema-id': number;
|
|
45
82
|
schemas: IcebergSchema[];
|
|
83
|
+
snapshots: IcebergSnapshot[];
|
|
46
84
|
'default-spec-id': number;
|
|
47
85
|
'partition-specs': IcebergPartitionSpec[];
|
|
48
86
|
'last-partition-id': number;
|
|
49
|
-
'current-snapshot-id': number;
|
|
87
|
+
'current-snapshot-id': bigint | number;
|
|
88
|
+
location: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
interface AddFile {
|
|
92
|
+
file: string;
|
|
93
|
+
partitions: PartitionRecord;
|
|
94
|
+
fileSize: bigint;
|
|
95
|
+
recordCount: bigint;
|
|
96
|
+
columnSizes?: Record<string, bigint> | null | undefined;
|
|
97
|
+
valueCounts?: Record<string, bigint> | null | undefined;
|
|
98
|
+
nullValueCounts?: Record<string, bigint> | null | undefined;
|
|
99
|
+
nanValueCounts?: Record<string, bigint> | null | undefined;
|
|
100
|
+
lowerBounds?: Record<string, Buffer> | null | undefined;
|
|
101
|
+
upperBounds?: Record<string, Buffer> | null | undefined;
|
|
102
|
+
keyMetadata?: Buffer | null | undefined;
|
|
103
|
+
splitOffsets?: bigint[] | null | undefined;
|
|
104
|
+
equalityIds?: number[] | null | undefined;
|
|
105
|
+
sortOrderId?: number | null | undefined;
|
|
50
106
|
}
|
|
107
|
+
interface AddManifestParams {
|
|
108
|
+
credentials?: AwsCredentialIdentity | undefined;
|
|
109
|
+
region: string;
|
|
110
|
+
metadata: IcebergMetadata;
|
|
111
|
+
schemaId: number;
|
|
112
|
+
specId: number;
|
|
113
|
+
snapshotId: bigint;
|
|
114
|
+
sequenceNumber: bigint;
|
|
115
|
+
files: AddFile[];
|
|
116
|
+
}
|
|
117
|
+
declare function addManifest(params: AddManifestParams): Promise<ManifestListRecord>;
|
|
118
|
+
|
|
119
|
+
type JSONPrimitive = string | number | boolean | null | bigint | undefined;
|
|
120
|
+
type JSONValue = JSONPrimitive | JSONObject | JSONArray;
|
|
121
|
+
interface JSONObject {
|
|
122
|
+
[key: string]: JSONValue;
|
|
123
|
+
}
|
|
124
|
+
type JSONArray = JSONValue[];
|
|
51
125
|
|
|
52
126
|
type TableLocation = {
|
|
53
127
|
tableArn: string;
|
|
@@ -57,7 +131,8 @@ type TableLocation = {
|
|
|
57
131
|
name: string;
|
|
58
132
|
};
|
|
59
133
|
type GetMetadataParams = TableLocation & {
|
|
60
|
-
|
|
134
|
+
region?: string;
|
|
135
|
+
credentials?: AwsCredentialIdentity;
|
|
61
136
|
};
|
|
62
137
|
declare function getMetadata(params: GetMetadataParams): Promise<IcebergMetadata>;
|
|
63
138
|
interface AddSchemaParams {
|
|
@@ -68,7 +143,7 @@ interface AddSchemaParams {
|
|
|
68
143
|
schemaId: number;
|
|
69
144
|
fields: IcebergSchemaField[];
|
|
70
145
|
}
|
|
71
|
-
declare function addSchema(params: AddSchemaParams): Promise<
|
|
146
|
+
declare function addSchema(params: AddSchemaParams): Promise<JSONObject>;
|
|
72
147
|
interface AddPartitionSpecParams {
|
|
73
148
|
credentials?: AwsCredentialIdentity;
|
|
74
149
|
tableBucketARN: string;
|
|
@@ -77,13 +152,38 @@ interface AddPartitionSpecParams {
|
|
|
77
152
|
specId: number;
|
|
78
153
|
fields: IcebergPartitionField[];
|
|
79
154
|
}
|
|
80
|
-
declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<
|
|
155
|
+
declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<JSONObject>;
|
|
156
|
+
|
|
157
|
+
interface AddFileList {
|
|
158
|
+
specId: number;
|
|
159
|
+
schemaId: number;
|
|
160
|
+
files: AddFile[];
|
|
161
|
+
}
|
|
162
|
+
interface AddDataFilesParams {
|
|
163
|
+
credentials?: AwsCredentialIdentity;
|
|
164
|
+
tableBucketARN: string;
|
|
165
|
+
namespace: string;
|
|
166
|
+
name: string;
|
|
167
|
+
lists: AddFileList[];
|
|
168
|
+
}
|
|
169
|
+
declare function addDataFiles(params: AddDataFilesParams): Promise<JSONObject>;
|
|
170
|
+
interface SetCurrentCommitParams {
|
|
171
|
+
credentials?: AwsCredentialIdentity;
|
|
172
|
+
tableBucketARN: string;
|
|
173
|
+
namespace: string;
|
|
174
|
+
name: string;
|
|
175
|
+
snapshotId: bigint;
|
|
176
|
+
}
|
|
177
|
+
declare function setCurrentCommit(params: SetCurrentCommitParams): Promise<JSONObject>;
|
|
81
178
|
|
|
82
179
|
declare const _default: {
|
|
83
180
|
getMetadata: typeof getMetadata;
|
|
84
181
|
addSchema: typeof addSchema;
|
|
85
182
|
addPartitionSpec: typeof addPartitionSpec;
|
|
183
|
+
addManifest: typeof addManifest;
|
|
184
|
+
addDataFiles: typeof addDataFiles;
|
|
185
|
+
setCurrentCommit: typeof setCurrentCommit;
|
|
86
186
|
};
|
|
87
187
|
|
|
88
|
-
export { addPartitionSpec, addSchema, _default as default, getMetadata };
|
|
89
|
-
export type { AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergTransform, IcebergType, TableLocation };
|
|
188
|
+
export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
|
|
189
|
+
export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
|