node-s3tables 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/index.d.ts +13 -2
- package/dist/index.js +69 -33
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
addManifest,
|
|
19
19
|
addDataFiles,
|
|
20
20
|
setCurrentCommit,
|
|
21
|
+
removeSnapshots,
|
|
21
22
|
} from 'node-s3tables';
|
|
22
23
|
|
|
23
24
|
// Get table metadata
|
|
@@ -184,6 +185,7 @@ Adds data files to an S3 table by creating a new snapshot.
|
|
|
184
185
|
- `params.namespace` (string) - The namespace name
|
|
185
186
|
- `params.name` (string) - The table name
|
|
186
187
|
- `params.lists` (AddFileList[]) - Array of file lists to add
|
|
188
|
+
- `params.maxSnapshots` (number, optional) - Maximum number of snapshots to retain. When set, automatically removes the oldest snapshot if the count exceeds this limit
|
|
187
189
|
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
188
190
|
|
|
189
191
|
**Returns:** Promise<string>
|
|
@@ -208,6 +210,28 @@ await addDataFiles({
|
|
|
208
210
|
},
|
|
209
211
|
],
|
|
210
212
|
});
|
|
213
|
+
|
|
214
|
+
// With automatic snapshot cleanup
|
|
215
|
+
await addDataFiles({
|
|
216
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
217
|
+
namespace: 'sales',
|
|
218
|
+
name: 'daily_sales',
|
|
219
|
+
maxSnapshots: 10, // Keep only the 10 most recent snapshots
|
|
220
|
+
lists: [
|
|
221
|
+
{
|
|
222
|
+
specId: 1,
|
|
223
|
+
schemaId: 2,
|
|
224
|
+
files: [
|
|
225
|
+
{
|
|
226
|
+
file: 's3://my-bucket/data/sales-2024-01-02.parquet',
|
|
227
|
+
partitions: { sale_date_day: '2024-01-02' },
|
|
228
|
+
recordCount: 1500n,
|
|
229
|
+
fileSize: 78643n,
|
|
230
|
+
},
|
|
231
|
+
],
|
|
232
|
+
},
|
|
233
|
+
],
|
|
234
|
+
});
|
|
211
235
|
```
|
|
212
236
|
|
|
213
237
|
### setCurrentCommit(params)
|
|
@@ -233,6 +257,29 @@ await setCurrentCommit({
|
|
|
233
257
|
});
|
|
234
258
|
```
|
|
235
259
|
|
|
260
|
+
### removeSnapshots(params)
|
|
261
|
+
|
|
262
|
+
Removes snapshots from an S3 table. Note: Due to Iceberg limitations, only one snapshot can be removed at a time.
|
|
263
|
+
|
|
264
|
+
**Parameters:**
|
|
265
|
+
|
|
266
|
+
- `params.tableBucketARN` (string) - The ARN of the table bucket
|
|
267
|
+
- `params.namespace` (string) - The namespace name
|
|
268
|
+
- `params.name` (string) - The table name
|
|
269
|
+
- `params.snapshotIds` (bigint[]) - Array of snapshot IDs to remove (only provide one snapshot ID)
|
|
270
|
+
- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
|
|
271
|
+
|
|
272
|
+
**Returns:** Promise<IcebergUpdateResponse>
|
|
273
|
+
|
|
274
|
+
```javascript
|
|
275
|
+
await removeSnapshots({
|
|
276
|
+
tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
|
|
277
|
+
namespace: 'sales',
|
|
278
|
+
name: 'daily_sales',
|
|
279
|
+
snapshotIds: [4183020680887155442n], // Only one snapshot ID
|
|
280
|
+
});
|
|
281
|
+
```
|
|
282
|
+
|
|
236
283
|
## Type Definitions
|
|
237
284
|
|
|
238
285
|
### IcebergUpdateResponse
|
package/dist/index.d.ts
CHANGED
|
@@ -81,6 +81,7 @@ interface IcebergSnapshot {
|
|
|
81
81
|
}
|
|
82
82
|
interface IcebergMetadata {
|
|
83
83
|
'last-column-id': number;
|
|
84
|
+
'last-sequence-number': bigint | number;
|
|
84
85
|
'current-schema-id': number;
|
|
85
86
|
schemas: IcebergSchema[];
|
|
86
87
|
snapshots: IcebergSnapshot[];
|
|
@@ -153,6 +154,14 @@ interface AddPartitionSpecParams {
|
|
|
153
154
|
fields: IcebergPartitionField[];
|
|
154
155
|
}
|
|
155
156
|
declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<IcebergUpdateResponse>;
|
|
157
|
+
interface RemoveSnapshotsParams {
|
|
158
|
+
credentials?: AwsCredentialIdentity;
|
|
159
|
+
tableBucketARN: string;
|
|
160
|
+
namespace: string;
|
|
161
|
+
name: string;
|
|
162
|
+
snapshotIds: bigint[];
|
|
163
|
+
}
|
|
164
|
+
declare function removeSnapshots(params: RemoveSnapshotsParams): Promise<IcebergUpdateResponse>;
|
|
156
165
|
|
|
157
166
|
type JSONPrimitive = string | number | boolean | null | bigint | undefined;
|
|
158
167
|
type JSONValue = JSONPrimitive | JSONObject | JSONArray;
|
|
@@ -174,6 +183,7 @@ interface AddDataFilesParams {
|
|
|
174
183
|
snapshotId?: bigint;
|
|
175
184
|
lists: AddFileList[];
|
|
176
185
|
retryCount?: number;
|
|
186
|
+
maxSnapshots?: number;
|
|
177
187
|
}
|
|
178
188
|
interface AddDataFilesResult {
|
|
179
189
|
result: JSONObject;
|
|
@@ -207,7 +217,8 @@ declare const _default: {
|
|
|
207
217
|
addManifest: typeof addManifest;
|
|
208
218
|
addDataFiles: typeof addDataFiles;
|
|
209
219
|
setCurrentCommit: typeof setCurrentCommit;
|
|
220
|
+
removeSnapshots: typeof removeSnapshots;
|
|
210
221
|
};
|
|
211
222
|
|
|
212
|
-
export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
|
|
213
|
-
export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, SetCurrentCommitParams, TableLocation };
|
|
223
|
+
export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, removeSnapshots, setCurrentCommit };
|
|
224
|
+
export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, RemoveSnapshotsParams, SetCurrentCommitParams, TableLocation };
|
package/dist/index.js
CHANGED
|
@@ -745,16 +745,16 @@ function _encodeValue(raw, transform, out_type) {
|
|
|
745
745
|
}
|
|
746
746
|
}
|
|
747
747
|
const NaNValue = NaN;
|
|
748
|
-
function makeBounds(
|
|
748
|
+
function makeBounds(partitions, spec, schema) {
|
|
749
749
|
return spec.fields.map((f) => {
|
|
750
750
|
const schemaField = schema.fields.find((sf) => sf.id === f['source-id']);
|
|
751
751
|
if (!schemaField) {
|
|
752
752
|
throw new Error(`Schema field not found for source-id ${f['source-id']}`);
|
|
753
753
|
}
|
|
754
|
-
if (!(f.name in
|
|
755
|
-
throw new Error(`
|
|
754
|
+
if (!(f.name in partitions)) {
|
|
755
|
+
throw new Error(`partitions missing ${f.name}`);
|
|
756
756
|
}
|
|
757
|
-
const raw =
|
|
757
|
+
const raw = partitions[f.name];
|
|
758
758
|
if (typeof raw === 'number' && isNaN(raw)) {
|
|
759
759
|
return NaNValue;
|
|
760
760
|
}
|
|
@@ -1168,6 +1168,19 @@ async function addPartitionSpec(params) {
|
|
|
1168
1168
|
},
|
|
1169
1169
|
});
|
|
1170
1170
|
}
|
|
1171
|
+
async function removeSnapshots(params) {
|
|
1172
|
+
return icebergRequest({
|
|
1173
|
+
tableBucketARN: params.tableBucketARN,
|
|
1174
|
+
method: 'POST',
|
|
1175
|
+
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1176
|
+
body: {
|
|
1177
|
+
requirements: [],
|
|
1178
|
+
updates: [
|
|
1179
|
+
{ action: 'remove-snapshots', 'snapshot-ids': params.snapshotIds },
|
|
1180
|
+
],
|
|
1181
|
+
},
|
|
1182
|
+
});
|
|
1183
|
+
}
|
|
1171
1184
|
|
|
1172
1185
|
const DEFAULT_RETRY_COUNT = 5;
|
|
1173
1186
|
async function addDataFiles(params) {
|
|
@@ -1181,8 +1194,7 @@ async function addDataFiles(params) {
|
|
|
1181
1194
|
const metadata = await getMetadata(params);
|
|
1182
1195
|
const bucket = metadata.location.split('/').slice(-1)[0];
|
|
1183
1196
|
const parent_snapshot_id = BigInt(metadata['current-snapshot-id']);
|
|
1184
|
-
const snapshot = metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id) ??
|
|
1185
|
-
null;
|
|
1197
|
+
const snapshot = metadata.snapshots.find((s) => BigInt(s['snapshot-id']) === parent_snapshot_id) ?? null;
|
|
1186
1198
|
if (!bucket) {
|
|
1187
1199
|
throw new Error('bad manifest location');
|
|
1188
1200
|
}
|
|
@@ -1193,7 +1205,18 @@ async function addDataFiles(params) {
|
|
|
1193
1205
|
if (snapshot && !old_list_key) {
|
|
1194
1206
|
throw new Error('last snapshot invalid');
|
|
1195
1207
|
}
|
|
1196
|
-
let sequence_number = BigInt(metadata
|
|
1208
|
+
let sequence_number = BigInt(metadata['last-sequence-number']) + 1n;
|
|
1209
|
+
let remove_snapshot_id = 0n;
|
|
1210
|
+
if (params.maxSnapshots && metadata.snapshots.length >= params.maxSnapshots) {
|
|
1211
|
+
let earliest_time = 0;
|
|
1212
|
+
for (const snap of metadata.snapshots) {
|
|
1213
|
+
const snap_time = snap['timestamp-ms'];
|
|
1214
|
+
if (earliest_time === 0 || snap_time < earliest_time) {
|
|
1215
|
+
earliest_time = snap_time;
|
|
1216
|
+
remove_snapshot_id = BigInt(snap['snapshot-id']);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1197
1220
|
let added_files = 0;
|
|
1198
1221
|
let added_records = 0n;
|
|
1199
1222
|
let added_size = 0n;
|
|
@@ -1253,6 +1276,37 @@ async function addDataFiles(params) {
|
|
|
1253
1276
|
});
|
|
1254
1277
|
}
|
|
1255
1278
|
try {
|
|
1279
|
+
const updates = [
|
|
1280
|
+
{
|
|
1281
|
+
action: 'add-snapshot',
|
|
1282
|
+
snapshot: {
|
|
1283
|
+
'sequence-number': sequence_number,
|
|
1284
|
+
'snapshot-id': snapshot_id,
|
|
1285
|
+
'parent-snapshot-id': parent_snapshot_id,
|
|
1286
|
+
'timestamp-ms': Date.now(),
|
|
1287
|
+
summary: {
|
|
1288
|
+
operation: 'append',
|
|
1289
|
+
'added-data-files': String(added_files),
|
|
1290
|
+
'added-records': String(added_records),
|
|
1291
|
+
'added-files-size': String(added_size),
|
|
1292
|
+
},
|
|
1293
|
+
'manifest-list': manifest_list_url,
|
|
1294
|
+
'schema-id': metadata['current-schema-id'],
|
|
1295
|
+
},
|
|
1296
|
+
},
|
|
1297
|
+
{
|
|
1298
|
+
action: 'set-snapshot-ref',
|
|
1299
|
+
'snapshot-id': snapshot_id,
|
|
1300
|
+
type: 'branch',
|
|
1301
|
+
'ref-name': 'main',
|
|
1302
|
+
},
|
|
1303
|
+
];
|
|
1304
|
+
if (remove_snapshot_id > 0n) {
|
|
1305
|
+
updates.push({
|
|
1306
|
+
action: 'remove-snapshots',
|
|
1307
|
+
'snapshot-ids': [remove_snapshot_id],
|
|
1308
|
+
});
|
|
1309
|
+
}
|
|
1256
1310
|
const result = await icebergRequest({
|
|
1257
1311
|
credentials: params.credentials,
|
|
1258
1312
|
tableBucketARN: params.tableBucketARN,
|
|
@@ -1268,31 +1322,7 @@ async function addDataFiles(params) {
|
|
|
1268
1322
|
},
|
|
1269
1323
|
]
|
|
1270
1324
|
: [],
|
|
1271
|
-
updates
|
|
1272
|
-
{
|
|
1273
|
-
action: 'add-snapshot',
|
|
1274
|
-
snapshot: {
|
|
1275
|
-
'sequence-number': sequence_number,
|
|
1276
|
-
'snapshot-id': snapshot_id,
|
|
1277
|
-
'parent-snapshot-id': parent_snapshot_id,
|
|
1278
|
-
'timestamp-ms': Date.now(),
|
|
1279
|
-
summary: {
|
|
1280
|
-
operation: 'append',
|
|
1281
|
-
'added-data-files': String(added_files),
|
|
1282
|
-
'added-records': String(added_records),
|
|
1283
|
-
'added-files-size': String(added_size),
|
|
1284
|
-
},
|
|
1285
|
-
'manifest-list': manifest_list_url,
|
|
1286
|
-
'schema-id': metadata['current-schema-id'],
|
|
1287
|
-
},
|
|
1288
|
-
},
|
|
1289
|
-
{
|
|
1290
|
-
action: 'set-snapshot-ref',
|
|
1291
|
-
'snapshot-id': snapshot_id,
|
|
1292
|
-
type: 'branch',
|
|
1293
|
-
'ref-name': 'main',
|
|
1294
|
-
},
|
|
1295
|
-
],
|
|
1325
|
+
updates,
|
|
1296
1326
|
},
|
|
1297
1327
|
});
|
|
1298
1328
|
return {
|
|
@@ -1306,7 +1336,10 @@ async function addDataFiles(params) {
|
|
|
1306
1336
|
catch (e) {
|
|
1307
1337
|
if (e instanceof IcebergHttpError &&
|
|
1308
1338
|
e.status === 409 &&
|
|
1309
|
-
try_count < retry_max)
|
|
1339
|
+
try_count < retry_max) {
|
|
1340
|
+
// retry case
|
|
1341
|
+
remove_snapshot_id = 0n;
|
|
1342
|
+
}
|
|
1310
1343
|
else {
|
|
1311
1344
|
throw e;
|
|
1312
1345
|
}
|
|
@@ -1345,6 +1378,7 @@ async function setCurrentCommit(params) {
|
|
|
1345
1378
|
method: 'POST',
|
|
1346
1379
|
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1347
1380
|
body: {
|
|
1381
|
+
requirements: [],
|
|
1348
1382
|
updates: [
|
|
1349
1383
|
{
|
|
1350
1384
|
action: 'set-snapshot-ref',
|
|
@@ -1375,6 +1409,7 @@ var index = {
|
|
|
1375
1409
|
addManifest,
|
|
1376
1410
|
addDataFiles,
|
|
1377
1411
|
setCurrentCommit,
|
|
1412
|
+
removeSnapshots,
|
|
1378
1413
|
};
|
|
1379
1414
|
|
|
1380
1415
|
exports.IcebergHttpError = IcebergHttpError;
|
|
@@ -1384,4 +1419,5 @@ exports.addPartitionSpec = addPartitionSpec;
|
|
|
1384
1419
|
exports.addSchema = addSchema;
|
|
1385
1420
|
exports.default = index;
|
|
1386
1421
|
exports.getMetadata = getMetadata;
|
|
1422
|
+
exports.removeSnapshots = removeSnapshots;
|
|
1387
1423
|
exports.setCurrentCommit = setCurrentCommit;
|