node-s3tables 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,14 @@ npm install node-s3tables
11
11
  ## Quick Start
12
12
 
13
13
  ```javascript
14
- import { getMetadata, addSchema, addPartitionSpec } from 'node-s3tables';
14
+ import {
15
+ getMetadata,
16
+ addSchema,
17
+ addPartitionSpec,
18
+ addManifest,
19
+ addDataFiles,
20
+ setCurrentCommit,
21
+ } from 'node-s3tables';
15
22
 
16
23
  // Get table metadata
17
24
  const metadata = await getMetadata({
@@ -128,8 +135,134 @@ await addPartitionSpec({
128
135
  });
129
136
  ```
130
137
 
138
+ ### addManifest(params)
139
+
140
+ Creates a manifest file for data files and returns a manifest list record.
141
+
142
+ **Parameters:**
143
+
144
+ - `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
145
+ - `params.region` (string) - AWS region
146
+ - `params.metadata` (IcebergMetadata) - Table metadata
147
+ - `params.schemaId` (number) - Schema ID to use
148
+ - `params.specId` (number) - Partition spec ID to use
149
+ - `params.snapshotId` (bigint) - Snapshot ID
150
+ - `params.sequenceNumber` (bigint) - Sequence number
151
+ - `params.files` (AddFile[]) - Array of data files
152
+
153
+ **Returns:** Promise<ManifestListRecord>
154
+
155
+ ```javascript
156
+ const manifestRecord = await addManifest({
157
+ region: 'us-west-2',
158
+ metadata: tableMetadata,
159
+ schemaId: 2,
160
+ specId: 1,
161
+ snapshotId: 4183020680887155442n,
162
+ sequenceNumber: 1n,
163
+ files: [
164
+ {
165
+ file: 's3://my-bucket/data/sales-2024-01-01.parquet',
166
+ partitions: { sale_date_day: '2024-01-01' },
167
+ recordCount: 1000n,
168
+ fileSize: 52428n,
169
+ },
170
+ ],
171
+ });
172
+ ```
173
+
174
+ ### addDataFiles(params)
175
+
176
+ Adds data files to an S3 table by creating a new snapshot.
177
+
178
+ **Parameters:**
179
+
180
+ - `params.tableBucketARN` (string) - The ARN of the table bucket
181
+ - `params.namespace` (string) - The namespace name
182
+ - `params.name` (string) - The table name
183
+ - `params.lists` (AddFileList[]) - Array of file lists to add
184
+ - `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
185
+
186
+ **Returns:** Promise<string>
187
+
188
+ ```javascript
189
+ await addDataFiles({
190
+ tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
191
+ namespace: 'sales',
192
+ name: 'daily_sales',
193
+ lists: [
194
+ {
195
+ specId: 1,
196
+ schemaId: 2,
197
+ files: [
198
+ {
199
+ file: 's3://my-bucket/data/sales-2024-01-01.parquet',
200
+ partitions: { sale_date_day: '2024-01-01' },
201
+ recordCount: 1000n,
202
+ fileSize: 52428n,
203
+ },
204
+ ],
205
+ },
206
+ ],
207
+ });
208
+ ```
209
+
210
+ ### setCurrentCommit(params)
211
+
212
+ Sets the current commit/snapshot for an S3 table.
213
+
214
+ **Parameters:**
215
+
216
+ - `params.tableBucketARN` (string) - The ARN of the table bucket
217
+ - `params.namespace` (string) - The namespace name
218
+ - `params.name` (string) - The table name
219
+ - `params.snapshotId` (bigint) - The snapshot ID to set as current
220
+ - `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
221
+
222
+ **Returns:** Promise<string>
223
+
224
+ ```javascript
225
+ await setCurrentCommit({
226
+ tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
227
+ namespace: 'sales',
228
+ name: 'daily_sales',
229
+ snapshotId: 4183020680887155442n,
230
+ });
231
+ ```
232
+
131
233
  ## Type Definitions
132
234
 
235
+ ### AddFileList
236
+
237
+ ```typescript
238
+ interface AddFileList {
239
+ specId: number;
240
+ schemaId: number;
241
+ files: AddFile[];
242
+ }
243
+ ```
244
+
245
+ ### AddFile
246
+
247
+ ```typescript
248
+ interface AddFile {
249
+ file: string;
250
+ partitions: PartitionRecord;
251
+ fileSize: bigint;
252
+ recordCount: bigint;
253
+ columnSizes?: Record<string, bigint> | null;
254
+ valueCounts?: Record<string, bigint> | null;
255
+ nullValueCounts?: Record<string, bigint> | null;
256
+ nanValueCounts?: Record<string, bigint> | null;
257
+ lowerBounds?: Record<string, Buffer> | null;
258
+ upperBounds?: Record<string, Buffer> | null;
259
+ keyMetadata?: Buffer | null;
260
+ splitOffsets?: bigint[] | null;
261
+ equalityIds?: number[] | null;
262
+ sortOrderId?: number | null;
263
+ }
264
+ ```
265
+
133
266
  ### IcebergSchemaField
134
267
 
135
268
  ```typescript
@@ -177,6 +310,78 @@ Supported partition transforms:
177
310
  - `'bucket[N]'` - Hash bucket with N buckets
178
311
  - `'truncate[N]'` - Truncate strings to N characters
179
312
 
313
+ ## Testing
314
+
315
+ ### Prerequisites
316
+
317
+ The tests require AWS credentials and S3 Tables resources. Set up the following environment variables in a `.env` file:
318
+
319
+ ```bash
320
+ TABLE_BUCKET_ARN=arn:aws:s3tables:us-west-2:123456789012:bucket/your-test-bucket
321
+ CATALOG_ID=123456789012:s3tablescatalog/your-test-bucket
322
+ OUTPUT_BUCKET=your-output-bucket
323
+ ```
324
+
325
+ ### AWS Service Calls and Permissions
326
+
327
+ The tests make calls to multiple AWS services and require the following permissions:
328
+
329
+ **S3 Tables:**
330
+
331
+ - `s3tables:CreateNamespace`
332
+ - `s3tables:DeleteNamespace`
333
+ - `s3tables:CreateTable`
334
+ - `s3tables:DeleteTable`
335
+ - `s3tables:GetTableMetadata`
336
+ - `s3tables:UpdateTableMetadata`
337
+
338
+ **S3:**
339
+
340
+ - `s3:PutObject` (for uploading test Parquet files)
341
+ - `s3:GetObject` (for reading manifest files)
342
+
343
+ **Lake Formation:**
344
+
345
+ - `lakeformation:AddLFTagsToResource` (adds `AccessLevel: Public` tag to namespaces)
346
+
347
+ **Athena:**
348
+
349
+ - `athena:StartQueryExecution`
350
+ - `athena:GetQueryExecution`
351
+ - `athena:GetQueryResults`
352
+
353
+ **Lake Formation Setup:**
354
+ The tests expect a Lake Formation tag with key `AccessLevel` and value `Public` to exist in your account. This tag is automatically applied to test namespaces to allow Athena query permissions.
355
+
356
+ ### Test Dependencies
357
+
358
+ The test suite uses additional dependencies for creating test data:
359
+
360
+ - `@aws-sdk/client-athena` - For running Athena queries in tests
361
+ - `@aws-sdk/client-lakeformation` - For Lake Formation permissions
362
+ - `parquetjs` - For creating test Parquet files
363
+ - `dotenv-cli` - For loading environment variables
364
+
365
+ ### Running Tests
366
+
367
+ Run the test suite:
368
+
369
+ ```bash
370
+ npm test
371
+ ```
372
+
373
+ Run tests with coverage:
374
+
375
+ ```bash
376
+ npm run test:cover
377
+ ```
378
+
379
+ Run a single test file:
380
+
381
+ ```bash
382
+ npm run test:single test/create.test.ts
383
+ ```
384
+
180
385
  ## Configuration
181
386
 
182
387
  The library uses the AWS SDK for authentication. Configure credentials using:
package/dist/index.d.ts CHANGED
@@ -1,5 +1,33 @@
1
1
  import { AwsCredentialIdentity } from '@aws-sdk/types';
2
- import { S3TablesClientConfig } from '@aws-sdk/client-s3tables';
2
+
3
+ type RawValue = string | number | bigint | Buffer | null;
4
+ type PartitionRecord = Record<string, RawValue>;
5
+ interface PartitionSummary {
6
+ contains_null: boolean;
7
+ contains_nan?: boolean | null;
8
+ lower_bound?: Buffer | null;
9
+ upper_bound?: Buffer | null;
10
+ }
11
+ declare enum ListContent {
12
+ DATA = 0,
13
+ DELETES = 1
14
+ }
15
+ interface ManifestListRecord {
16
+ manifest_path: string;
17
+ manifest_length: bigint;
18
+ partition_spec_id: number;
19
+ content: ListContent;
20
+ sequence_number: bigint;
21
+ min_sequence_number: bigint;
22
+ added_snapshot_id: bigint;
23
+ added_data_files_count: number;
24
+ existing_data_files_count: number;
25
+ deleted_data_files_count: number;
26
+ added_rows_count: bigint;
27
+ existing_rows_count: bigint;
28
+ deleted_rows_count: bigint;
29
+ partitions?: PartitionSummary[] | null;
30
+ }
3
31
 
4
32
  type IcebergTransform = 'identity' | 'year' | 'month' | 'day' | 'hour' | `bucket[${number}]` | `truncate[${number}]`;
5
33
  interface IcebergPartitionField {
@@ -39,15 +67,61 @@ interface IcebergPartitionSpec {
39
67
  'spec-id': number;
40
68
  fields: IcebergPartitionField[];
41
69
  }
70
+ interface IcebergSnapshot {
71
+ 'snapshot-id': bigint | number;
72
+ 'parent-snapshot-id'?: bigint | number;
73
+ 'sequence-number': number;
74
+ 'timestamp-ms': number;
75
+ 'manifest-list': string;
76
+ summary: Record<string, string>;
77
+ 'schema-id'?: number;
78
+ }
42
79
  interface IcebergMetadata {
43
80
  'last-column-id': number;
44
81
  'current-schema-id': number;
45
82
  schemas: IcebergSchema[];
83
+ snapshots: IcebergSnapshot[];
46
84
  'default-spec-id': number;
47
85
  'partition-specs': IcebergPartitionSpec[];
48
86
  'last-partition-id': number;
49
- 'current-snapshot-id': number;
87
+ 'current-snapshot-id': bigint | number;
88
+ location: string;
89
+ }
90
+
91
+ interface AddFile {
92
+ file: string;
93
+ partitions: PartitionRecord;
94
+ fileSize: bigint;
95
+ recordCount: bigint;
96
+ columnSizes?: Record<string, bigint> | null | undefined;
97
+ valueCounts?: Record<string, bigint> | null | undefined;
98
+ nullValueCounts?: Record<string, bigint> | null | undefined;
99
+ nanValueCounts?: Record<string, bigint> | null | undefined;
100
+ lowerBounds?: Record<string, Buffer> | null | undefined;
101
+ upperBounds?: Record<string, Buffer> | null | undefined;
102
+ keyMetadata?: Buffer | null | undefined;
103
+ splitOffsets?: bigint[] | null | undefined;
104
+ equalityIds?: number[] | null | undefined;
105
+ sortOrderId?: number | null | undefined;
50
106
  }
107
+ interface AddManifestParams {
108
+ credentials?: AwsCredentialIdentity | undefined;
109
+ region: string;
110
+ metadata: IcebergMetadata;
111
+ schemaId: number;
112
+ specId: number;
113
+ snapshotId: bigint;
114
+ sequenceNumber: bigint;
115
+ files: AddFile[];
116
+ }
117
+ declare function addManifest(params: AddManifestParams): Promise<ManifestListRecord>;
118
+
119
+ type JSONPrimitive = string | number | boolean | null | bigint | undefined;
120
+ type JSONValue = JSONPrimitive | JSONObject | JSONArray;
121
+ interface JSONObject {
122
+ [key: string]: JSONValue;
123
+ }
124
+ type JSONArray = JSONValue[];
51
125
 
52
126
  type TableLocation = {
53
127
  tableArn: string;
@@ -57,7 +131,8 @@ type TableLocation = {
57
131
  name: string;
58
132
  };
59
133
  type GetMetadataParams = TableLocation & {
60
- config?: S3TablesClientConfig;
134
+ region?: string;
135
+ credentials?: AwsCredentialIdentity;
61
136
  };
62
137
  declare function getMetadata(params: GetMetadataParams): Promise<IcebergMetadata>;
63
138
  interface AddSchemaParams {
@@ -68,7 +143,7 @@ interface AddSchemaParams {
68
143
  schemaId: number;
69
144
  fields: IcebergSchemaField[];
70
145
  }
71
- declare function addSchema(params: AddSchemaParams): Promise<string>;
146
+ declare function addSchema(params: AddSchemaParams): Promise<JSONObject>;
72
147
  interface AddPartitionSpecParams {
73
148
  credentials?: AwsCredentialIdentity;
74
149
  tableBucketARN: string;
@@ -77,13 +152,38 @@ interface AddPartitionSpecParams {
77
152
  specId: number;
78
153
  fields: IcebergPartitionField[];
79
154
  }
80
- declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<string>;
155
+ declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<JSONObject>;
156
+
157
+ interface AddFileList {
158
+ specId: number;
159
+ schemaId: number;
160
+ files: AddFile[];
161
+ }
162
+ interface AddDataFilesParams {
163
+ credentials?: AwsCredentialIdentity;
164
+ tableBucketARN: string;
165
+ namespace: string;
166
+ name: string;
167
+ lists: AddFileList[];
168
+ }
169
+ declare function addDataFiles(params: AddDataFilesParams): Promise<JSONObject>;
170
+ interface SetCurrentCommitParams {
171
+ credentials?: AwsCredentialIdentity;
172
+ tableBucketARN: string;
173
+ namespace: string;
174
+ name: string;
175
+ snapshotId: bigint;
176
+ }
177
+ declare function setCurrentCommit(params: SetCurrentCommitParams): Promise<JSONObject>;
81
178
 
82
179
  declare const _default: {
83
180
  getMetadata: typeof getMetadata;
84
181
  addSchema: typeof addSchema;
85
182
  addPartitionSpec: typeof addPartitionSpec;
183
+ addManifest: typeof addManifest;
184
+ addDataFiles: typeof addDataFiles;
185
+ setCurrentCommit: typeof setCurrentCommit;
86
186
  };
87
187
 
88
- export { addPartitionSpec, addSchema, _default as default, getMetadata };
89
- export type { AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergTransform, IcebergType, TableLocation };
188
+ export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
189
+ export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };