node-s3tables 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/index.d.ts +16 -5
- package/dist/index.js +84 -35
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
addManifest,
|
|
19
19
|
addDataFiles,
|
|
20
20
|
setCurrentCommit,
|
|
21
|
+
removeSnapshots,
|
|
21
22
|
} from 'node-s3tables';
|
|
22
23
|
|
|
23
24
|
// Get table metadata
|
|
@@ -184,6 +185,7 @@ Adds data files to an S3 table by creating a new snapshot.
|
|
|
184
185
|
- `params.namespace` (string) - The namespace name
|
|
185
186
|
- `params.name` (string) - The table name
|
|
186
187
|
- `params.lists` (AddFileList[]) - Array of file lists to add
|
|
188
|
+
- `params.maxSnapshots` (number, optional) - Maximum number of snapshots to retain. When set, automatically removes the oldest snapshot if the count exceeds this limit
|
|
187
189
|
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
188
190
|
|
|
189
191
|
**Returns:** Promise<string>
|
|
@@ -208,6 +210,28 @@ await addDataFiles({
|
|
|
208
210
|
},
|
|
209
211
|
],
|
|
210
212
|
});
|
|
213
|
+
|
|
214
|
+
// With automatic snapshot cleanup
|
|
215
|
+
await addDataFiles({
|
|
216
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
217
|
+
namespace: 'sales',
|
|
218
|
+
name: 'daily_sales',
|
|
219
|
+
maxSnapshots: 10, // Keep only the 10 most recent snapshots
|
|
220
|
+
lists: [
|
|
221
|
+
{
|
|
222
|
+
specId: 1,
|
|
223
|
+
schemaId: 2,
|
|
224
|
+
files: [
|
|
225
|
+
{
|
|
226
|
+
file: 's3://my-bucket/data/sales-2024-01-02.parquet',
|
|
227
|
+
partitions: { sale_date_day: '2024-01-02' },
|
|
228
|
+
recordCount: 1500n,
|
|
229
|
+
fileSize: 78643n,
|
|
230
|
+
},
|
|
231
|
+
],
|
|
232
|
+
},
|
|
233
|
+
],
|
|
234
|
+
});
|
|
211
235
|
```
|
|
212
236
|
|
|
213
237
|
### setCurrentCommit(params)
|
|
@@ -233,6 +257,29 @@ await setCurrentCommit({
|
|
|
233
257
|
});
|
|
234
258
|
```
|
|
235
259
|
|
|
260
|
+
### removeSnapshots(params)
|
|
261
|
+
|
|
262
|
+
Removes snapshots from an S3 table. Note: Due to Iceberg limitations, only one snapshot can be removed at a time.
|
|
263
|
+
|
|
264
|
+
**Parameters:**
|
|
265
|
+
|
|
266
|
+
- `params.tableBucketARN` (string) - The ARN of the table bucket
|
|
267
|
+
- `params.namespace` (string) - The namespace name
|
|
268
|
+
- `params.name` (string) - The table name
|
|
269
|
+
- `params.snapshotIds` (bigint[]) - Array of snapshot IDs to remove (only provide one snapshot ID)
|
|
270
|
+
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
271
|
+
|
|
272
|
+
**Returns:** Promise<IcebergUpdateResponse>
|
|
273
|
+
|
|
274
|
+
```javascript
|
|
275
|
+
await removeSnapshots({
|
|
276
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
277
|
+
namespace: 'sales',
|
|
278
|
+
name: 'daily_sales',
|
|
279
|
+
snapshotIds: [4183020680887155442n], // Only one snapshot ID
|
|
280
|
+
});
|
|
281
|
+
```
|
|
282
|
+
|
|
236
283
|
## Type Definitions
|
|
237
284
|
|
|
238
285
|
### IcebergUpdateResponse
|
package/dist/index.d.ts
CHANGED
|
@@ -20,9 +20,9 @@ interface ManifestListRecord {
|
|
|
20
20
|
sequence_number: bigint;
|
|
21
21
|
min_sequence_number: bigint;
|
|
22
22
|
added_snapshot_id: bigint;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
added_files_count: number;
|
|
24
|
+
existing_files_count: number;
|
|
25
|
+
deleted_files_count: number;
|
|
26
26
|
added_rows_count: bigint;
|
|
27
27
|
existing_rows_count: bigint;
|
|
28
28
|
deleted_rows_count: bigint;
|
|
@@ -81,6 +81,7 @@ interface IcebergSnapshot {
|
|
|
81
81
|
}
|
|
82
82
|
interface IcebergMetadata {
|
|
83
83
|
'last-column-id': number;
|
|
84
|
+
'last-sequence-number': bigint | number;
|
|
84
85
|
'current-schema-id': number;
|
|
85
86
|
schemas: IcebergSchema[];
|
|
86
87
|
snapshots: IcebergSnapshot[];
|
|
@@ -153,6 +154,14 @@ interface AddPartitionSpecParams {
|
|
|
153
154
|
fields: IcebergPartitionField[];
|
|
154
155
|
}
|
|
155
156
|
declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<IcebergUpdateResponse>;
|
|
157
|
+
interface RemoveSnapshotsParams {
|
|
158
|
+
credentials?: AwsCredentialIdentity;
|
|
159
|
+
tableBucketARN: string;
|
|
160
|
+
namespace: string;
|
|
161
|
+
name: string;
|
|
162
|
+
snapshotIds: bigint[];
|
|
163
|
+
}
|
|
164
|
+
declare function removeSnapshots(params: RemoveSnapshotsParams): Promise<IcebergUpdateResponse>;
|
|
156
165
|
|
|
157
166
|
type JSONPrimitive = string | number | boolean | null | bigint | undefined;
|
|
158
167
|
type JSONValue = JSONPrimitive | JSONObject | JSONArray;
|
|
@@ -174,6 +183,7 @@ interface AddDataFilesParams {
|
|
|
174
183
|
snapshotId?: bigint;
|
|
175
184
|
lists: AddFileList[];
|
|
176
185
|
retryCount?: number;
|
|
186
|
+
maxSnapshots?: number;
|
|
177
187
|
}
|
|
178
188
|
interface AddDataFilesResult {
|
|
179
189
|
result: JSONObject;
|
|
@@ -207,7 +217,8 @@ declare const _default: {
|
|
|
207
217
|
addManifest: typeof addManifest;
|
|
208
218
|
addDataFiles: typeof addDataFiles;
|
|
209
219
|
setCurrentCommit: typeof setCurrentCommit;
|
|
220
|
+
removeSnapshots: typeof removeSnapshots;
|
|
210
221
|
};
|
|
211
222
|
|
|
212
|
-
export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
|
|
213
|
-
export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, SetCurrentCommitParams, TableLocation };
|
|
223
|
+
export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, removeSnapshots, setCurrentCommit };
|
|
224
|
+
export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, RemoveSnapshotsParams, SetCurrentCommitParams, TableLocation };
|
package/dist/index.js
CHANGED
|
@@ -4,6 +4,7 @@ Object.defineProperty(exports, '__esModule', { value: true });
|
|
|
4
4
|
|
|
5
5
|
var node_crypto = require('node:crypto');
|
|
6
6
|
var avsc = require('avsc');
|
|
7
|
+
var zlib = require('node:zlib');
|
|
7
8
|
var clientS3 = require('@aws-sdk/client-s3');
|
|
8
9
|
var clientS3tables = require('@aws-sdk/client-s3tables');
|
|
9
10
|
var libStorage = require('@aws-sdk/lib-storage');
|
|
@@ -32,6 +33,7 @@ function _interopNamespaceDefault(e) {
|
|
|
32
33
|
}
|
|
33
34
|
|
|
34
35
|
var avsc__namespace = /*#__PURE__*/_interopNamespaceDefault(avsc);
|
|
36
|
+
var zlib__namespace = /*#__PURE__*/_interopNamespaceDefault(zlib);
|
|
35
37
|
var LosslessJson__namespace = /*#__PURE__*/_interopNamespaceDefault(LosslessJson);
|
|
36
38
|
|
|
37
39
|
function fixupMetadata(metadata) {
|
|
@@ -55,6 +57,7 @@ async function avroToBuffer(params) {
|
|
|
55
57
|
const buffers = [];
|
|
56
58
|
const opts = {
|
|
57
59
|
writeHeader: true,
|
|
60
|
+
codecs: { deflate: zlib__namespace.deflateRaw },
|
|
58
61
|
codec: 'deflate',
|
|
59
62
|
metadata,
|
|
60
63
|
};
|
|
@@ -120,7 +123,12 @@ function _icebergToAvroField(field, schema) {
|
|
|
120
123
|
}
|
|
121
124
|
throw new Error(`Unsupported transform: ${field.transform} for type`);
|
|
122
125
|
}
|
|
123
|
-
return {
|
|
126
|
+
return {
|
|
127
|
+
name: field.name,
|
|
128
|
+
type: ['null', avroType],
|
|
129
|
+
default: null,
|
|
130
|
+
'field-id': field['field-id'],
|
|
131
|
+
};
|
|
124
132
|
}
|
|
125
133
|
function _mapPrimitiveToAvro(type) {
|
|
126
134
|
switch (type) {
|
|
@@ -166,7 +174,9 @@ var ListContent;
|
|
|
166
174
|
ListContent[ListContent["DELETES"] = 1] = "DELETES";
|
|
167
175
|
})(ListContent || (ListContent = {}));
|
|
168
176
|
const BigIntType = avsc__namespace.types.LongType.__with({
|
|
169
|
-
fromBuffer
|
|
177
|
+
fromBuffer(uint_array) {
|
|
178
|
+
return Buffer.from(uint_array).readBigInt64LE();
|
|
179
|
+
},
|
|
170
180
|
toBuffer(n) {
|
|
171
181
|
const buf = Buffer.alloc(8);
|
|
172
182
|
buf.writeBigInt64LE(n);
|
|
@@ -531,19 +541,19 @@ const ManifestListType = avsc__namespace.Type.forSchema({
|
|
|
531
541
|
'field-id': 503,
|
|
532
542
|
},
|
|
533
543
|
{
|
|
534
|
-
name: '
|
|
544
|
+
name: 'added_files_count',
|
|
535
545
|
type: 'int',
|
|
536
546
|
doc: 'Added entry count',
|
|
537
547
|
'field-id': 504,
|
|
538
548
|
},
|
|
539
549
|
{
|
|
540
|
-
name: '
|
|
550
|
+
name: 'existing_files_count',
|
|
541
551
|
type: 'int',
|
|
542
552
|
doc: 'Existing entry count',
|
|
543
553
|
'field-id': 505,
|
|
544
554
|
},
|
|
545
555
|
{
|
|
546
|
-
name: '
|
|
556
|
+
name: 'deleted_files_count',
|
|
547
557
|
type: 'int',
|
|
548
558
|
doc: 'Deleted entry count',
|
|
549
559
|
'field-id': 506,
|
|
@@ -837,10 +847,12 @@ async function updateManifestList(params) {
|
|
|
837
847
|
}
|
|
838
848
|
const passthrough = new node_stream.PassThrough();
|
|
839
849
|
const decoder = new avsc__namespace.streams.BlockDecoder({
|
|
850
|
+
codecs: { deflate: zlib__namespace.inflateRaw },
|
|
840
851
|
parseHook: () => ManifestListType,
|
|
841
852
|
});
|
|
842
853
|
const encoder = new avsc__namespace.streams.BlockEncoder(ManifestListType, {
|
|
843
854
|
codec: 'deflate',
|
|
855
|
+
codecs: { deflate: zlib__namespace.deflateRaw },
|
|
844
856
|
metadata,
|
|
845
857
|
});
|
|
846
858
|
encoder.pipe(passthrough);
|
|
@@ -961,9 +973,9 @@ async function addManifest(params) {
|
|
|
961
973
|
sequence_number: params.sequenceNumber,
|
|
962
974
|
min_sequence_number: params.sequenceNumber,
|
|
963
975
|
added_snapshot_id: params.snapshotId,
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
976
|
+
added_files_count: params.files.length,
|
|
977
|
+
existing_files_count: 0,
|
|
978
|
+
deleted_files_count: 0,
|
|
967
979
|
added_rows_count,
|
|
968
980
|
existing_rows_count: 0n,
|
|
969
981
|
deleted_rows_count: 0n,
|
|
@@ -1168,6 +1180,19 @@ async function addPartitionSpec(params) {
|
|
|
1168
1180
|
},
|
|
1169
1181
|
});
|
|
1170
1182
|
}
|
|
1183
|
+
async function removeSnapshots(params) {
|
|
1184
|
+
return icebergRequest({
|
|
1185
|
+
tableBucketARN: params.tableBucketARN,
|
|
1186
|
+
method: 'POST',
|
|
1187
|
+
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1188
|
+
body: {
|
|
1189
|
+
requirements: [],
|
|
1190
|
+
updates: [
|
|
1191
|
+
{ action: 'remove-snapshots', 'snapshot-ids': params.snapshotIds },
|
|
1192
|
+
],
|
|
1193
|
+
},
|
|
1194
|
+
});
|
|
1195
|
+
}
|
|
1171
1196
|
|
|
1172
1197
|
const DEFAULT_RETRY_COUNT = 5;
|
|
1173
1198
|
async function addDataFiles(params) {
|
|
@@ -1192,7 +1217,18 @@ async function addDataFiles(params) {
|
|
|
1192
1217
|
if (snapshot && !old_list_key) {
|
|
1193
1218
|
throw new Error('last snapshot invalid');
|
|
1194
1219
|
}
|
|
1195
|
-
let sequence_number = BigInt(metadata
|
|
1220
|
+
let sequence_number = BigInt(metadata['last-sequence-number']) + 1n;
|
|
1221
|
+
let remove_snapshot_id = 0n;
|
|
1222
|
+
if (params.maxSnapshots && metadata.snapshots.length >= params.maxSnapshots) {
|
|
1223
|
+
let earliest_time = 0;
|
|
1224
|
+
for (const snap of metadata.snapshots) {
|
|
1225
|
+
const snap_time = snap['timestamp-ms'];
|
|
1226
|
+
if (earliest_time === 0 || snap_time < earliest_time) {
|
|
1227
|
+
earliest_time = snap_time;
|
|
1228
|
+
remove_snapshot_id = BigInt(snap['snapshot-id']);
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1196
1232
|
let added_files = 0;
|
|
1197
1233
|
let added_records = 0n;
|
|
1198
1234
|
let added_size = 0n;
|
|
@@ -1252,6 +1288,37 @@ async function addDataFiles(params) {
|
|
|
1252
1288
|
});
|
|
1253
1289
|
}
|
|
1254
1290
|
try {
|
|
1291
|
+
const updates = [
|
|
1292
|
+
{
|
|
1293
|
+
action: 'add-snapshot',
|
|
1294
|
+
snapshot: {
|
|
1295
|
+
'sequence-number': sequence_number,
|
|
1296
|
+
'snapshot-id': snapshot_id,
|
|
1297
|
+
'parent-snapshot-id': parent_snapshot_id,
|
|
1298
|
+
'timestamp-ms': Date.now(),
|
|
1299
|
+
summary: {
|
|
1300
|
+
operation: 'append',
|
|
1301
|
+
'added-data-files': String(added_files),
|
|
1302
|
+
'added-records': String(added_records),
|
|
1303
|
+
'added-files-size': String(added_size),
|
|
1304
|
+
},
|
|
1305
|
+
'manifest-list': manifest_list_url,
|
|
1306
|
+
'schema-id': metadata['current-schema-id'],
|
|
1307
|
+
},
|
|
1308
|
+
},
|
|
1309
|
+
{
|
|
1310
|
+
action: 'set-snapshot-ref',
|
|
1311
|
+
'snapshot-id': snapshot_id,
|
|
1312
|
+
type: 'branch',
|
|
1313
|
+
'ref-name': 'main',
|
|
1314
|
+
},
|
|
1315
|
+
];
|
|
1316
|
+
if (remove_snapshot_id > 0n) {
|
|
1317
|
+
updates.push({
|
|
1318
|
+
action: 'remove-snapshots',
|
|
1319
|
+
'snapshot-ids': [remove_snapshot_id],
|
|
1320
|
+
});
|
|
1321
|
+
}
|
|
1255
1322
|
const result = await icebergRequest({
|
|
1256
1323
|
credentials: params.credentials,
|
|
1257
1324
|
tableBucketARN: params.tableBucketARN,
|
|
@@ -1267,31 +1334,7 @@ async function addDataFiles(params) {
|
|
|
1267
1334
|
},
|
|
1268
1335
|
]
|
|
1269
1336
|
: [],
|
|
1270
|
-
updates
|
|
1271
|
-
{
|
|
1272
|
-
action: 'add-snapshot',
|
|
1273
|
-
snapshot: {
|
|
1274
|
-
'sequence-number': sequence_number,
|
|
1275
|
-
'snapshot-id': snapshot_id,
|
|
1276
|
-
'parent-snapshot-id': parent_snapshot_id,
|
|
1277
|
-
'timestamp-ms': Date.now(),
|
|
1278
|
-
summary: {
|
|
1279
|
-
operation: 'append',
|
|
1280
|
-
'added-data-files': String(added_files),
|
|
1281
|
-
'added-records': String(added_records),
|
|
1282
|
-
'added-files-size': String(added_size),
|
|
1283
|
-
},
|
|
1284
|
-
'manifest-list': manifest_list_url,
|
|
1285
|
-
'schema-id': metadata['current-schema-id'],
|
|
1286
|
-
},
|
|
1287
|
-
},
|
|
1288
|
-
{
|
|
1289
|
-
action: 'set-snapshot-ref',
|
|
1290
|
-
'snapshot-id': snapshot_id,
|
|
1291
|
-
type: 'branch',
|
|
1292
|
-
'ref-name': 'main',
|
|
1293
|
-
},
|
|
1294
|
-
],
|
|
1337
|
+
updates,
|
|
1295
1338
|
},
|
|
1296
1339
|
});
|
|
1297
1340
|
return {
|
|
@@ -1305,7 +1348,10 @@ async function addDataFiles(params) {
|
|
|
1305
1348
|
catch (e) {
|
|
1306
1349
|
if (e instanceof IcebergHttpError &&
|
|
1307
1350
|
e.status === 409 &&
|
|
1308
|
-
try_count < retry_max)
|
|
1351
|
+
try_count < retry_max) {
|
|
1352
|
+
// retry case
|
|
1353
|
+
remove_snapshot_id = 0n;
|
|
1354
|
+
}
|
|
1309
1355
|
else {
|
|
1310
1356
|
throw e;
|
|
1311
1357
|
}
|
|
@@ -1344,6 +1390,7 @@ async function setCurrentCommit(params) {
|
|
|
1344
1390
|
method: 'POST',
|
|
1345
1391
|
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1346
1392
|
body: {
|
|
1393
|
+
requirements: [],
|
|
1347
1394
|
updates: [
|
|
1348
1395
|
{
|
|
1349
1396
|
action: 'set-snapshot-ref',
|
|
@@ -1374,6 +1421,7 @@ var index = {
|
|
|
1374
1421
|
addManifest,
|
|
1375
1422
|
addDataFiles,
|
|
1376
1423
|
setCurrentCommit,
|
|
1424
|
+
removeSnapshots,
|
|
1377
1425
|
};
|
|
1378
1426
|
|
|
1379
1427
|
exports.IcebergHttpError = IcebergHttpError;
|
|
@@ -1383,4 +1431,5 @@ exports.addPartitionSpec = addPartitionSpec;
|
|
|
1383
1431
|
exports.addSchema = addSchema;
|
|
1384
1432
|
exports.default = index;
|
|
1385
1433
|
exports.getMetadata = getMetadata;
|
|
1434
|
+
exports.removeSnapshots = removeSnapshots;
|
|
1386
1435
|
exports.setCurrentCommit = setCurrentCommit;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-s3tables",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.11",
|
|
4
4
|
"description": "node api for dealing with s3tables",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"@aws-sdk/client-s3": "^3.901.0",
|
|
29
29
|
"@aws-sdk/client-s3tables": "^3.901.0",
|
|
30
30
|
"@aws-sdk/lib-storage": "^3.901.0",
|
|
31
|
-
"avsc": "
|
|
31
|
+
"avsc": "https://github.com/jim-lake/avsc.git#66bc46724a6db2e55123a94532aca318a133a8ee",
|
|
32
32
|
"lossless-json": "^4.2.0"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|