node-s3tables 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -310,6 +310,82 @@ Supported partition transforms:
310
310
  - `'bucket[N]'` - Hash bucket with N buckets
311
311
  - `'truncate[N]'` - Truncate strings to N characters
312
312
 
313
+ ## AWS API Calls and Required Permissions
314
+
315
+ The library makes calls to multiple AWS services and requires specific IAM permissions:
316
+
317
+ ### S3 Tables Service
318
+
319
+ **API Calls:**
320
+
321
+ - `GetTable` - Used by `getMetadata()` when called with `tableArn`
322
+ - Iceberg REST API calls via HTTPS to `s3tables.{region}.amazonaws.com`
323
+
324
+ **Required Permissions:**
325
+
326
+ - `s3tables:GetTable` - For retrieving table information
327
+ - `s3tables:GetTableData` - For reading table metadata and data objects (includes GetObject, HeadObject, ListParts)
328
+ - `s3tables:PutTableData` - For writing table metadata and data objects (includes PutObject, multipart upload operations)
329
+ - `s3tables:UpdateTableMetadataLocation` - For updating table root pointer during metadata operations
330
+
331
+ ### Function-Specific Permission Requirements
332
+
333
+ **`getMetadata()`:**
334
+
335
+ - When using `tableArn`: `s3tables:GetTable`, `s3tables:GetTableData`
336
+ - When using `tableBucketARN` + `namespace` + `name`: `s3tables:GetTableData`
337
+
338
+ **`addSchema()`:**
339
+
340
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
341
+
342
+ **`addPartitionSpec()`:**
343
+
344
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
345
+
346
+ **`addManifest()`:**
347
+
348
+ - `s3tables:PutTableData` (for writing manifest files)
349
+
350
+ **`addDataFiles()`:**
351
+
352
+ - `s3tables:GetTableData` (to get current metadata and read existing manifest lists)
353
+ - `s3tables:PutTableData` (to write new manifest files and lists)
354
+ - `s3tables:UpdateTableMetadataLocation` (to add snapshots)
355
+
356
+ **`setCurrentCommit()`:**
357
+
358
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
359
+
360
+ ### Example IAM Policy
361
+
362
+ ```json
363
+ {
364
+ "Version": "2012-10-17",
365
+ "Statement": [
366
+ {
367
+ "Effect": "Allow",
368
+ "Action": [
369
+ "s3tables:GetTable",
370
+ "s3tables:GetTableData",
371
+ "s3tables:PutTableData",
372
+ "s3tables:UpdateTableMetadataLocation"
373
+ ],
374
+ "Resource": "arn:aws:s3tables:*:*:bucket/*/table/*"
375
+ }
376
+ ]
377
+ }
378
+ ```
379
+
380
+ ## Configuration
381
+
382
+ The library uses the AWS SDK for authentication. Configure credentials using:
383
+
384
+ - Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
385
+ - AWS credentials file (`~/.aws/credentials`)
386
+ - IAM roles (when running on EC2/Lambda)
387
+ - Or pass credentials directly to functions
388
+
313
389
  ## Testing
314
390
 
315
391
  ### Prerequisites
@@ -367,7 +443,7 @@ The test suite uses additional dependencies for creating test data:
367
443
  Run the test suite:
368
444
 
369
445
  ```bash
370
- npm test
446
+ npm run test
371
447
  ```
372
448
 
373
449
  Run tests with coverage:
@@ -382,15 +458,6 @@ Run a single test file:
382
458
  npm run test:single test/create.test.ts
383
459
  ```
384
460
 
385
- ## Configuration
386
-
387
- The library uses the AWS SDK for authentication. Configure credentials using:
388
-
389
- - Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
390
- - AWS credentials file (`~/.aws/credentials`)
391
- - IAM roles (when running on EC2/Lambda)
392
- - Or pass credentials directly to functions
393
-
394
461
  ## License
395
462
 
396
463
  MIT
package/dist/index.d.ts CHANGED
@@ -67,13 +67,16 @@ interface IcebergPartitionSpec {
67
67
  'spec-id': number;
68
68
  fields: IcebergPartitionField[];
69
69
  }
70
+ interface IcebergSnapshotSummary extends Record<string, string> {
71
+ operation: 'append' | 'replace' | 'overwrite' | 'delete';
72
+ }
70
73
  interface IcebergSnapshot {
71
74
  'snapshot-id': bigint | number;
72
75
  'parent-snapshot-id'?: bigint | number;
73
76
  'sequence-number': number;
74
77
  'timestamp-ms': number;
75
78
  'manifest-list': string;
76
- summary: Record<string, string>;
79
+ summary: IcebergSnapshotSummary;
77
80
  'schema-id'?: number;
78
81
  }
79
82
  interface IcebergMetadata {
@@ -84,7 +87,7 @@ interface IcebergMetadata {
84
87
  'default-spec-id': number;
85
88
  'partition-specs': IcebergPartitionSpec[];
86
89
  'last-partition-id': number;
87
- 'current-snapshot-id': bigint | number;
90
+ 'current-snapshot-id'?: bigint | number;
88
91
  location: string;
89
92
  }
90
93
 
@@ -165,8 +168,16 @@ interface AddDataFilesParams {
165
168
  namespace: string;
166
169
  name: string;
167
170
  lists: AddFileList[];
171
+ retryCount?: number;
172
+ }
173
+ interface AddDataFilesResult {
174
+ result: JSONObject;
175
+ retriesNeeded: number;
176
+ parentSnapshotId: bigint;
177
+ snapshotId: bigint;
178
+ sequenceNumber: bigint;
168
179
  }
169
- declare function addDataFiles(params: AddDataFilesParams): Promise<JSONObject>;
180
+ declare function addDataFiles(params: AddDataFilesParams): Promise<AddDataFilesResult>;
170
181
  interface SetCurrentCommitParams {
171
182
  credentials?: AwsCredentialIdentity;
172
183
  tableBucketARN: string;
@@ -176,7 +187,15 @@ interface SetCurrentCommitParams {
176
187
  }
177
188
  declare function setCurrentCommit(params: SetCurrentCommitParams): Promise<JSONObject>;
178
189
 
190
+ declare class IcebergHttpError extends Error {
191
+ status: number;
192
+ text?: string;
193
+ body?: JSONObject;
194
+ constructor(status: number, body: JSONValue, message: string);
195
+ }
196
+
179
197
  declare const _default: {
198
+ IcebergHttpError: typeof IcebergHttpError;
180
199
  getMetadata: typeof getMetadata;
181
200
  addSchema: typeof addSchema;
182
201
  addPartitionSpec: typeof addPartitionSpec;
@@ -185,5 +204,5 @@ declare const _default: {
185
204
  setCurrentCommit: typeof setCurrentCommit;
186
205
  };
187
206
 
188
- export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
189
- export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
207
+ export { IcebergHttpError, addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
208
+ export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
package/dist/index.js CHANGED
@@ -1022,6 +1022,21 @@ function parse(text) {
1022
1022
  return LosslessJson__namespace.parse(text, null, customNumberParser);
1023
1023
  }
1024
1024
 
1025
+ class IcebergHttpError extends Error {
1026
+ status;
1027
+ text;
1028
+ body;
1029
+ constructor(status, body, message) {
1030
+ super(message);
1031
+ this.status = status;
1032
+ if (typeof body === 'string') {
1033
+ this.text = body;
1034
+ }
1035
+ else if (body && typeof body === 'object') {
1036
+ this.body = body;
1037
+ }
1038
+ }
1039
+ }
1025
1040
  async function icebergRequest(params) {
1026
1041
  const region = params.tableBucketARN.split(':')[3];
1027
1042
  if (!region) {
@@ -1061,9 +1076,18 @@ async function icebergRequest(params) {
1061
1076
  }
1062
1077
  const res = await fetch(url, fetch_opts);
1063
1078
  const text = await res.text();
1079
+ const ret = res.headers.get('content-type') === 'application/json'
1080
+ ? _parse(text)
1081
+ : text;
1064
1082
  if (!res.ok) {
1065
- throw new Error(`request failed: ${res.status} ${res.statusText} ${text}`);
1083
+ if (res.status) {
1084
+ throw new IcebergHttpError(res.status, ret, `request failed: ${res.statusText} ${text}`);
1085
+ }
1086
+ throw new Error(`request failed: ${res.statusText} ${text}`);
1066
1087
  }
1088
+ return ret;
1089
+ }
1090
+ function _parse(text) {
1067
1091
  try {
1068
1092
  return parse(text);
1069
1093
  }
@@ -1145,26 +1169,31 @@ async function addPartitionSpec(params) {
1145
1169
  });
1146
1170
  }
1147
1171
 
1172
+ const DEFAULT_RETRY_COUNT = 5;
1148
1173
  async function addDataFiles(params) {
1149
1174
  const { credentials } = params;
1175
+ const retry_max = params.retryCount ?? DEFAULT_RETRY_COUNT;
1150
1176
  const region = params.tableBucketARN.split(':')[3];
1151
1177
  if (!region) {
1152
1178
  throw new Error('bad tableBucketARN');
1153
1179
  }
1154
1180
  const snapshot_id = _randomBigInt64();
1155
1181
  const metadata = await getMetadata(params);
1156
- const parent_snapshot_id = metadata['current-snapshot-id'];
1157
1182
  const bucket = metadata.location.split('/').slice(-1)[0];
1158
- const snapshot = parent_snapshot_id === -1
1159
- ? null
1160
- : metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id);
1183
+ const parent_snapshot_id = BigInt(metadata['current-snapshot-id'] ?? -1n);
1184
+ const snapshot = metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id) ??
1185
+ null;
1161
1186
  if (!bucket) {
1162
1187
  throw new Error('bad manifest location');
1163
1188
  }
1164
- if (parent_snapshot_id !== -1 && !snapshot) {
1189
+ if (parent_snapshot_id > 0n && !snapshot) {
1165
1190
  throw new Error('no old snapshot');
1166
1191
  }
1167
- const sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
1192
+ let old_list_key = snapshot ? parseS3Url(snapshot['manifest-list']).key : '';
1193
+ if (snapshot && !old_list_key) {
1194
+ throw new Error('last snapshot invalid');
1195
+ }
1196
+ let sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
1168
1197
  let added_files = 0;
1169
1198
  let added_records = 0n;
1170
1199
  let added_size = 0n;
@@ -1186,88 +1215,128 @@ async function addDataFiles(params) {
1186
1215
  };
1187
1216
  return addManifest(opts);
1188
1217
  }));
1189
- const manifest_list_key = `metadata/${node_crypto.randomUUID()}.avro`;
1190
- const manifest_list_url = `s3://${bucket}/${manifest_list_key}`;
1191
- if (snapshot) {
1192
- const { key: old_list_key } = parseS3Url(snapshot['manifest-list']);
1193
- if (!old_list_key) {
1194
- throw new Error('snapshot invalid');
1218
+ let expected_snapshot_id = parent_snapshot_id;
1219
+ for (let try_count = 0;; try_count++) {
1220
+ const manifest_list_key = `metadata/${node_crypto.randomUUID()}.avro`;
1221
+ const manifest_list_url = `s3://${bucket}/${manifest_list_key}`;
1222
+ if (old_list_key) {
1223
+ await updateManifestList({
1224
+ credentials,
1225
+ region,
1226
+ bucket,
1227
+ key: old_list_key,
1228
+ outKey: manifest_list_key,
1229
+ metadata: {
1230
+ 'sequence-number': String(sequence_number),
1231
+ 'snapshot-id': String(snapshot_id),
1232
+ 'parent-snapshot-id': String(parent_snapshot_id),
1233
+ },
1234
+ prepend: records,
1235
+ });
1195
1236
  }
1196
- await updateManifestList({
1197
- credentials,
1198
- region,
1199
- bucket,
1200
- key: old_list_key,
1201
- outKey: manifest_list_key,
1202
- metadata: {
1203
- 'sequence-number': String(sequence_number),
1204
- 'snapshot-id': String(snapshot_id),
1205
- 'parent-snapshot-id': String(parent_snapshot_id),
1206
- },
1207
- prepend: records,
1208
- });
1209
- }
1210
- else {
1211
- const manifest_list_buf = await avroToBuffer({
1212
- type: ManifestListType,
1213
- metadata: {
1214
- 'sequence-number': String(sequence_number),
1215
- 'snapshot-id': String(snapshot_id),
1216
- 'parent-snapshot-id': String(parent_snapshot_id),
1217
- },
1218
- records,
1219
- });
1220
- await writeS3File({
1221
- credentials,
1222
- region,
1223
- bucket,
1224
- key: manifest_list_key,
1225
- body: manifest_list_buf,
1226
- });
1227
- }
1228
- const commit_result = await icebergRequest({
1229
- credentials: params.credentials,
1230
- tableBucketARN: params.tableBucketARN,
1231
- method: 'POST',
1232
- suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1233
- body: {
1234
- requirements: parent_snapshot_id === -1
1235
- ? []
1236
- : [
1237
- {
1238
- type: 'assert-ref-snapshot-id',
1239
- ref: 'main',
1240
- 'snapshot-id': parent_snapshot_id,
1241
- },
1242
- ],
1243
- updates: [
1244
- {
1245
- action: 'add-snapshot',
1246
- snapshot: {
1247
- 'sequence-number': sequence_number,
1248
- 'snapshot-id': snapshot_id,
1249
- 'parent-snapshot-id': parent_snapshot_id,
1250
- 'timestamp-ms': Date.now(),
1251
- summary: {
1252
- operation: 'append',
1253
- 'added-data-files': String(added_files),
1254
- 'added-records': String(added_records),
1255
- 'added-files-size': String(added_size),
1256
- },
1257
- 'manifest-list': manifest_list_url,
1258
- 'schema-id': metadata['current-schema-id'],
1259
- },
1237
+ else {
1238
+ const manifest_list_buf = await avroToBuffer({
1239
+ type: ManifestListType,
1240
+ metadata: {
1241
+ 'sequence-number': String(sequence_number),
1242
+ 'snapshot-id': String(snapshot_id),
1243
+ 'parent-snapshot-id': 'null',
1260
1244
  },
1261
- {
1262
- action: 'set-snapshot-ref',
1263
- 'snapshot-id': snapshot_id,
1264
- type: 'branch',
1265
- 'ref-name': 'main',
1245
+ records,
1246
+ });
1247
+ await writeS3File({
1248
+ credentials,
1249
+ region,
1250
+ bucket,
1251
+ key: manifest_list_key,
1252
+ body: manifest_list_buf,
1253
+ });
1254
+ }
1255
+ try {
1256
+ const result = await icebergRequest({
1257
+ credentials: params.credentials,
1258
+ tableBucketARN: params.tableBucketARN,
1259
+ method: 'POST',
1260
+ suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1261
+ body: {
1262
+ requirements: expected_snapshot_id > 0n
1263
+ ? [
1264
+ {
1265
+ type: 'assert-ref-snapshot-id',
1266
+ ref: 'main',
1267
+ 'snapshot-id': expected_snapshot_id,
1268
+ },
1269
+ ]
1270
+ : [],
1271
+ updates: [
1272
+ {
1273
+ action: 'add-snapshot',
1274
+ snapshot: {
1275
+ 'sequence-number': sequence_number,
1276
+ 'snapshot-id': snapshot_id,
1277
+ 'parent-snapshot-id': parent_snapshot_id,
1278
+ 'timestamp-ms': Date.now(),
1279
+ summary: {
1280
+ operation: 'append',
1281
+ 'added-data-files': String(added_files),
1282
+ 'added-records': String(added_records),
1283
+ 'added-files-size': String(added_size),
1284
+ },
1285
+ 'manifest-list': manifest_list_url,
1286
+ 'schema-id': metadata['current-schema-id'],
1287
+ },
1288
+ },
1289
+ {
1290
+ action: 'set-snapshot-ref',
1291
+ 'snapshot-id': snapshot_id,
1292
+ type: 'branch',
1293
+ 'ref-name': 'main',
1294
+ },
1295
+ ],
1266
1296
  },
1267
- ],
1268
- },
1269
- });
1270
- return commit_result;
1297
+ });
1298
+ return {
1299
+ result,
1300
+ retriesNeeded: try_count,
1301
+ parentSnapshotId: parent_snapshot_id,
1302
+ snapshotId: snapshot_id,
1303
+ sequenceNumber: sequence_number,
1304
+ };
1305
+ }
1306
+ catch (e) {
1307
+ if (e instanceof IcebergHttpError &&
1308
+ e.status === 409 &&
1309
+ try_count < retry_max) ;
1310
+ else {
1311
+ throw e;
1312
+ }
1313
+ }
1314
+ // we do a merge in the append only simultanious case
1315
+ const conflict_metadata = await getMetadata(params);
1316
+ const conflict_snapshot_id = BigInt(conflict_metadata['current-snapshot-id'] ?? -1n);
1317
+ if (conflict_snapshot_id <= 0n) {
1318
+ throw new Error('conflict');
1319
+ }
1320
+ const conflict_snap = conflict_metadata.snapshots.find((s) => s['snapshot-id'] === conflict_snapshot_id);
1321
+ if (!conflict_snap) {
1322
+ throw new Error('conflict');
1323
+ }
1324
+ if (conflict_snap.summary.operation === 'append' &&
1325
+ BigInt(conflict_snap['sequence-number']) === sequence_number) {
1326
+ old_list_key = parseS3Url(conflict_snap['manifest-list']).key;
1327
+ if (!old_list_key) {
1328
+ throw new Error('conflict');
1329
+ }
1330
+ added_files += parseInt(conflict_snap.summary['added-data-files'] ?? '0', 10);
1331
+ added_records += BigInt(conflict_snap.summary['added-records'] ?? '0');
1332
+ added_size += BigInt(conflict_snap.summary['added-files-size'] ?? '0');
1333
+ expected_snapshot_id = conflict_snapshot_id;
1334
+ sequence_number++;
1335
+ }
1336
+ else {
1337
+ throw new Error('conflict');
1338
+ }
1339
+ }
1271
1340
  }
1272
1341
  async function setCurrentCommit(params) {
1273
1342
  const commit_result = await icebergRequest({
@@ -1299,6 +1368,7 @@ function _randomBigInt64() {
1299
1368
  }
1300
1369
 
1301
1370
  var index = {
1371
+ IcebergHttpError,
1302
1372
  getMetadata,
1303
1373
  addSchema,
1304
1374
  addPartitionSpec,
@@ -1307,6 +1377,7 @@ var index = {
1307
1377
  setCurrentCommit,
1308
1378
  };
1309
1379
 
1380
+ exports.IcebergHttpError = IcebergHttpError;
1310
1381
  exports.addDataFiles = addDataFiles;
1311
1382
  exports.addManifest = addManifest;
1312
1383
  exports.addPartitionSpec = addPartitionSpec;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-s3tables",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "node api for dealing with s3tables",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",