node-s3tables 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,6 +18,7 @@ import {
18
18
  addManifest,
19
19
  addDataFiles,
20
20
  setCurrentCommit,
21
+ removeSnapshots,
21
22
  } from 'node-s3tables';
22
23
 
23
24
  // Get table metadata
@@ -184,6 +185,7 @@ Adds data files to an S3 table by creating a new snapshot.
184
185
  - `params.namespace` (string) - The namespace name
185
186
  - `params.name` (string) - The table name
186
187
  - `params.lists` (AddFileList[]) - Array of file lists to add
188
+ - `params.maxSnapshots` (number, optional) - Maximum number of snapshots to retain. When set, automatically removes the oldest snapshot if the count exceeds this limit
187
189
  - `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
188
190
 
189
191
  **Returns:** Promise<string>
@@ -208,6 +210,28 @@ await addDataFiles({
208
210
  },
209
211
  ],
210
212
  });
213
+
214
+ // With automatic snapshot cleanup
215
+ await addDataFiles({
216
+ tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
217
+ namespace: 'sales',
218
+ name: 'daily_sales',
219
+ maxSnapshots: 10, // Keep only the 10 most recent snapshots
220
+ lists: [
221
+ {
222
+ specId: 1,
223
+ schemaId: 2,
224
+ files: [
225
+ {
226
+ file: 's3://my-bucket/data/sales-2024-01-02.parquet',
227
+ partitions: { sale_date_day: '2024-01-02' },
228
+ recordCount: 1500n,
229
+ fileSize: 78643n,
230
+ },
231
+ ],
232
+ },
233
+ ],
234
+ });
211
235
  ```
212
236
 
213
237
  ### setCurrentCommit(params)
@@ -233,6 +257,29 @@ await setCurrentCommit({
233
257
  });
234
258
  ```
235
259
 
260
+ ### removeSnapshots(params)
261
+
262
+ Removes snapshots from an S3 table. Note: Due to Iceberg limitations, only one snapshot can be removed at a time.
263
+
264
+ **Parameters:**
265
+
266
+ - `params.tableBucketARN` (string) - The ARN of the table bucket
267
+ - `params.namespace` (string) - The namespace name
268
+ - `params.name` (string) - The table name
269
+ - `params.snapshotIds` (bigint[]) - Array of snapshot IDs to remove (only provide one snapshot ID)
270
+ - `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
271
+
272
+ **Returns:** Promise<IcebergUpdateResponse>
273
+
274
+ ```javascript
275
+ await removeSnapshots({
276
+ tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
277
+ namespace: 'sales',
278
+ name: 'daily_sales',
279
+ snapshotIds: [4183020680887155442n], // Only one snapshot ID
280
+ });
281
+ ```
282
+
236
283
  ## Type Definitions
237
284
 
238
285
  ### IcebergUpdateResponse
package/dist/index.d.ts CHANGED
@@ -81,6 +81,7 @@ interface IcebergSnapshot {
81
81
  }
82
82
  interface IcebergMetadata {
83
83
  'last-column-id': number;
84
+ 'last-sequence-number': bigint | number;
84
85
  'current-schema-id': number;
85
86
  schemas: IcebergSchema[];
86
87
  snapshots: IcebergSnapshot[];
@@ -153,6 +154,14 @@ interface AddPartitionSpecParams {
153
154
  fields: IcebergPartitionField[];
154
155
  }
155
156
  declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<IcebergUpdateResponse>;
157
+ interface RemoveSnapshotsParams {
158
+ credentials?: AwsCredentialIdentity;
159
+ tableBucketARN: string;
160
+ namespace: string;
161
+ name: string;
162
+ snapshotIds: bigint[];
163
+ }
164
+ declare function removeSnapshots(params: RemoveSnapshotsParams): Promise<IcebergUpdateResponse>;
156
165
 
157
166
  type JSONPrimitive = string | number | boolean | null | bigint | undefined;
158
167
  type JSONValue = JSONPrimitive | JSONObject | JSONArray;
@@ -174,6 +183,7 @@ interface AddDataFilesParams {
174
183
  snapshotId?: bigint;
175
184
  lists: AddFileList[];
176
185
  retryCount?: number;
186
+ maxSnapshots?: number;
177
187
  }
178
188
  interface AddDataFilesResult {
179
189
  result: JSONObject;
@@ -207,7 +217,8 @@ declare const _default: {
207
217
  addManifest: typeof addManifest;
208
218
  addDataFiles: typeof addDataFiles;
209
219
  setCurrentCommit: typeof setCurrentCommit;
220
+ removeSnapshots: typeof removeSnapshots;
210
221
  };
211
222
 
212
- export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
213
- export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, SetCurrentCommitParams, TableLocation };
223
+ export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, removeSnapshots, setCurrentCommit };
224
+ export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, IcebergUpdateResponse, RemoveSnapshotsParams, SetCurrentCommitParams, TableLocation };
package/dist/index.js CHANGED
@@ -745,16 +745,16 @@ function _encodeValue(raw, transform, out_type) {
745
745
  }
746
746
  }
747
747
  const NaNValue = NaN;
748
- function makeBounds(paritions, spec, schema) {
748
+ function makeBounds(partitions, spec, schema) {
749
749
  return spec.fields.map((f) => {
750
750
  const schemaField = schema.fields.find((sf) => sf.id === f['source-id']);
751
751
  if (!schemaField) {
752
752
  throw new Error(`Schema field not found for source-id ${f['source-id']}`);
753
753
  }
754
- if (!(f.name in paritions)) {
755
- throw new Error(`paritions missing ${f.name}`);
754
+ if (!(f.name in partitions)) {
755
+ throw new Error(`partitions missing ${f.name}`);
756
756
  }
757
- const raw = paritions[f.name];
757
+ const raw = partitions[f.name];
758
758
  if (typeof raw === 'number' && isNaN(raw)) {
759
759
  return NaNValue;
760
760
  }
@@ -1168,6 +1168,19 @@ async function addPartitionSpec(params) {
1168
1168
  },
1169
1169
  });
1170
1170
  }
1171
+ async function removeSnapshots(params) {
1172
+ return icebergRequest({
1173
+ tableBucketARN: params.tableBucketARN,
1174
+ method: 'POST',
1175
+ suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1176
+ body: {
1177
+ requirements: [],
1178
+ updates: [
1179
+ { action: 'remove-snapshots', 'snapshot-ids': params.snapshotIds },
1180
+ ],
1181
+ },
1182
+ });
1183
+ }
1171
1184
 
1172
1185
  const DEFAULT_RETRY_COUNT = 5;
1173
1186
  async function addDataFiles(params) {
@@ -1181,8 +1194,7 @@ async function addDataFiles(params) {
1181
1194
  const metadata = await getMetadata(params);
1182
1195
  const bucket = metadata.location.split('/').slice(-1)[0];
1183
1196
  const parent_snapshot_id = BigInt(metadata['current-snapshot-id']);
1184
- const snapshot = metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id) ??
1185
- null;
1197
+ const snapshot = metadata.snapshots.find((s) => BigInt(s['snapshot-id']) === parent_snapshot_id) ?? null;
1186
1198
  if (!bucket) {
1187
1199
  throw new Error('bad manifest location');
1188
1200
  }
@@ -1193,7 +1205,18 @@ async function addDataFiles(params) {
1193
1205
  if (snapshot && !old_list_key) {
1194
1206
  throw new Error('last snapshot invalid');
1195
1207
  }
1196
- let sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
1208
+ let sequence_number = BigInt(metadata['last-sequence-number']) + 1n;
1209
+ let remove_snapshot_id = 0n;
1210
+ if (params.maxSnapshots && metadata.snapshots.length >= params.maxSnapshots) {
1211
+ let earliest_time = 0;
1212
+ for (const snap of metadata.snapshots) {
1213
+ const snap_time = snap['timestamp-ms'];
1214
+ if (earliest_time === 0 || snap_time < earliest_time) {
1215
+ earliest_time = snap_time;
1216
+ remove_snapshot_id = BigInt(snap['snapshot-id']);
1217
+ }
1218
+ }
1219
+ }
1197
1220
  let added_files = 0;
1198
1221
  let added_records = 0n;
1199
1222
  let added_size = 0n;
@@ -1253,6 +1276,37 @@ async function addDataFiles(params) {
1253
1276
  });
1254
1277
  }
1255
1278
  try {
1279
+ const updates = [
1280
+ {
1281
+ action: 'add-snapshot',
1282
+ snapshot: {
1283
+ 'sequence-number': sequence_number,
1284
+ 'snapshot-id': snapshot_id,
1285
+ 'parent-snapshot-id': parent_snapshot_id,
1286
+ 'timestamp-ms': Date.now(),
1287
+ summary: {
1288
+ operation: 'append',
1289
+ 'added-data-files': String(added_files),
1290
+ 'added-records': String(added_records),
1291
+ 'added-files-size': String(added_size),
1292
+ },
1293
+ 'manifest-list': manifest_list_url,
1294
+ 'schema-id': metadata['current-schema-id'],
1295
+ },
1296
+ },
1297
+ {
1298
+ action: 'set-snapshot-ref',
1299
+ 'snapshot-id': snapshot_id,
1300
+ type: 'branch',
1301
+ 'ref-name': 'main',
1302
+ },
1303
+ ];
1304
+ if (remove_snapshot_id > 0n) {
1305
+ updates.push({
1306
+ action: 'remove-snapshots',
1307
+ 'snapshot-ids': [remove_snapshot_id],
1308
+ });
1309
+ }
1256
1310
  const result = await icebergRequest({
1257
1311
  credentials: params.credentials,
1258
1312
  tableBucketARN: params.tableBucketARN,
@@ -1268,31 +1322,7 @@ async function addDataFiles(params) {
1268
1322
  },
1269
1323
  ]
1270
1324
  : [],
1271
- updates: [
1272
- {
1273
- action: 'add-snapshot',
1274
- snapshot: {
1275
- 'sequence-number': sequence_number,
1276
- 'snapshot-id': snapshot_id,
1277
- 'parent-snapshot-id': parent_snapshot_id,
1278
- 'timestamp-ms': Date.now(),
1279
- summary: {
1280
- operation: 'append',
1281
- 'added-data-files': String(added_files),
1282
- 'added-records': String(added_records),
1283
- 'added-files-size': String(added_size),
1284
- },
1285
- 'manifest-list': manifest_list_url,
1286
- 'schema-id': metadata['current-schema-id'],
1287
- },
1288
- },
1289
- {
1290
- action: 'set-snapshot-ref',
1291
- 'snapshot-id': snapshot_id,
1292
- type: 'branch',
1293
- 'ref-name': 'main',
1294
- },
1295
- ],
1325
+ updates,
1296
1326
  },
1297
1327
  });
1298
1328
  return {
@@ -1306,7 +1336,10 @@ async function addDataFiles(params) {
1306
1336
  catch (e) {
1307
1337
  if (e instanceof IcebergHttpError &&
1308
1338
  e.status === 409 &&
1309
- try_count < retry_max) ;
1339
+ try_count < retry_max) {
1340
+ // retry case
1341
+ remove_snapshot_id = 0n;
1342
+ }
1310
1343
  else {
1311
1344
  throw e;
1312
1345
  }
@@ -1345,6 +1378,7 @@ async function setCurrentCommit(params) {
1345
1378
  method: 'POST',
1346
1379
  suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1347
1380
  body: {
1381
+ requirements: [],
1348
1382
  updates: [
1349
1383
  {
1350
1384
  action: 'set-snapshot-ref',
@@ -1375,6 +1409,7 @@ var index = {
1375
1409
  addManifest,
1376
1410
  addDataFiles,
1377
1411
  setCurrentCommit,
1412
+ removeSnapshots,
1378
1413
  };
1379
1414
 
1380
1415
  exports.IcebergHttpError = IcebergHttpError;
@@ -1384,4 +1419,5 @@ exports.addPartitionSpec = addPartitionSpec;
1384
1419
  exports.addSchema = addSchema;
1385
1420
  exports.default = index;
1386
1421
  exports.getMetadata = getMetadata;
1422
+ exports.removeSnapshots = removeSnapshots;
1387
1423
  exports.setCurrentCommit = setCurrentCommit;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-s3tables",
3
- "version": "0.0.8",
3
+ "version": "0.0.10",
4
4
  "description": "node api for dealing with s3tables",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",