node-s3tables 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -310,6 +310,82 @@ Supported partition transforms:
310
310
  - `'bucket[N]'` - Hash bucket with N buckets
311
311
  - `'truncate[N]'` - Truncate strings to N characters
312
312
 
313
+ ## AWS API Calls and Required Permissions
314
+
315
+ The library makes calls to multiple AWS services and requires specific IAM permissions:
316
+
317
+ ### S3 Tables Service
318
+
319
+ **API Calls:**
320
+
321
+ - `GetTable` - Used by `getMetadata()` when called with `tableArn`
322
+ - Iceberg REST API calls via HTTPS to `s3tables.{region}.amazonaws.com`
323
+
324
+ **Required Permissions:**
325
+
326
+ - `s3tables:GetTable` - For retrieving table information
327
+ - `s3tables:GetTableData` - For reading table metadata and data objects (includes GetObject, HeadObject, ListParts)
328
+ - `s3tables:PutTableData` - For writing table metadata and data objects (includes PutObject, multipart upload operations)
329
+ - `s3tables:UpdateTableMetadataLocation` - For updating table root pointer during metadata operations
330
+
331
+ ### Function-Specific Permission Requirements
332
+
333
+ **`getMetadata()`:**
334
+
335
+ - When using `tableArn`: `s3tables:GetTable`, `s3tables:GetTableData`
336
+ - When using `tableBucketARN` + `namespace` + `name`: `s3tables:GetTableData`
337
+
338
+ **`addSchema()`:**
339
+
340
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
341
+
342
+ **`addPartitionSpec()`:**
343
+
344
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
345
+
346
+ **`addManifest()`:**
347
+
348
+ - `s3tables:PutTableData` (for writing manifest files)
349
+
350
+ **`addDataFiles()`:**
351
+
352
+ - `s3tables:GetTableData` (to get current metadata and read existing manifest lists)
353
+ - `s3tables:PutTableData` (to write new manifest files and lists)
354
+ - `s3tables:UpdateTableMetadataLocation` (to add snapshots)
355
+
356
+ **`setCurrentCommit()`:**
357
+
358
+ - `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
359
+
360
+ ### Example IAM Policy
361
+
362
+ ```json
363
+ {
364
+ "Version": "2012-10-17",
365
+ "Statement": [
366
+ {
367
+ "Effect": "Allow",
368
+ "Action": [
369
+ "s3tables:GetTable",
370
+ "s3tables:GetTableData",
371
+ "s3tables:PutTableData",
372
+ "s3tables:UpdateTableMetadataLocation"
373
+ ],
374
+ "Resource": "arn:aws:s3tables:*:*:bucket/*/table/*"
375
+ }
376
+ ]
377
+ }
378
+ ```
379
+
380
+ ## Configuration
381
+
382
+ The library uses the AWS SDK for authentication. Configure credentials using:
383
+
384
+ - Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
385
+ - AWS credentials file (`~/.aws/credentials`)
386
+ - IAM roles (when running on EC2/Lambda)
387
+ - Or pass credentials directly to functions
388
+
313
389
  ## Testing
314
390
 
315
391
  ### Prerequisites
@@ -367,7 +443,7 @@ The test suite uses additional dependencies for creating test data:
367
443
  Run the test suite:
368
444
 
369
445
  ```bash
370
- npm test
446
+ npm run test
371
447
  ```
372
448
 
373
449
  Run tests with coverage:
@@ -382,15 +458,6 @@ Run a single test file:
382
458
  npm run test:single test/create.test.ts
383
459
  ```
384
460
 
385
- ## Configuration
386
-
387
- The library uses the AWS SDK for authentication. Configure credentials using:
388
-
389
- - Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
390
- - AWS credentials file (`~/.aws/credentials`)
391
- - IAM roles (when running on EC2/Lambda)
392
- - Or pass credentials directly to functions
393
-
394
461
  ## License
395
462
 
396
463
  MIT
package/dist/index.d.ts CHANGED
@@ -67,13 +67,16 @@ interface IcebergPartitionSpec {
67
67
  'spec-id': number;
68
68
  fields: IcebergPartitionField[];
69
69
  }
70
+ interface IcebergSnapshotSummary extends Record<string, string> {
71
+ operation: 'append' | 'replace' | 'overwrite' | 'delete';
72
+ }
70
73
  interface IcebergSnapshot {
71
74
  'snapshot-id': bigint | number;
72
75
  'parent-snapshot-id'?: bigint | number;
73
76
  'sequence-number': number;
74
77
  'timestamp-ms': number;
75
78
  'manifest-list': string;
76
- summary: Record<string, string>;
79
+ summary: IcebergSnapshotSummary;
77
80
  'schema-id'?: number;
78
81
  }
79
82
  interface IcebergMetadata {
@@ -84,7 +87,7 @@ interface IcebergMetadata {
84
87
  'default-spec-id': number;
85
88
  'partition-specs': IcebergPartitionSpec[];
86
89
  'last-partition-id': number;
87
- 'current-snapshot-id': bigint | number;
90
+ 'current-snapshot-id'?: bigint | number;
88
91
  location: string;
89
92
  }
90
93
 
@@ -165,8 +168,16 @@ interface AddDataFilesParams {
165
168
  namespace: string;
166
169
  name: string;
167
170
  lists: AddFileList[];
171
+ retryCount?: number;
172
+ }
173
+ interface AddDataFilesResult {
174
+ result: JSONObject;
175
+ retriesNeeded: number;
176
+ parentSnapshotId: bigint;
177
+ snapshotId: bigint;
178
+ sequenceNumber: bigint;
168
179
  }
169
- declare function addDataFiles(params: AddDataFilesParams): Promise<JSONObject>;
180
+ declare function addDataFiles(params: AddDataFilesParams): Promise<AddDataFilesResult>;
170
181
  interface SetCurrentCommitParams {
171
182
  credentials?: AwsCredentialIdentity;
172
183
  tableBucketARN: string;
@@ -186,4 +197,4 @@ declare const _default: {
186
197
  };
187
198
 
188
199
  export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
189
- export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
200
+ export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
package/dist/index.js CHANGED
@@ -1022,6 +1022,13 @@ function parse(text) {
1022
1022
  return LosslessJson__namespace.parse(text, null, customNumberParser);
1023
1023
  }
1024
1024
 
1025
+ class HttpError extends Error {
1026
+ status;
1027
+ constructor(status, message) {
1028
+ super(message);
1029
+ this.status = status;
1030
+ }
1031
+ }
1025
1032
  async function icebergRequest(params) {
1026
1033
  const region = params.tableBucketARN.split(':')[3];
1027
1034
  if (!region) {
@@ -1062,8 +1069,17 @@ async function icebergRequest(params) {
1062
1069
  const res = await fetch(url, fetch_opts);
1063
1070
  const text = await res.text();
1064
1071
  if (!res.ok) {
1065
- throw new Error(`request failed: ${res.status} ${res.statusText} ${text}`);
1072
+ if (res.status) {
1073
+ throw new HttpError(res.status, `request failed: ${res.statusText} ${text}`);
1074
+ }
1075
+ throw new Error(`request failed: ${res.statusText} ${text}`);
1066
1076
  }
1077
+ const ret = res.headers.get('content-type') === 'application/json'
1078
+ ? _parse(text)
1079
+ : text;
1080
+ return ret;
1081
+ }
1082
+ function _parse(text) {
1067
1083
  try {
1068
1084
  return parse(text);
1069
1085
  }
@@ -1145,26 +1161,31 @@ async function addPartitionSpec(params) {
1145
1161
  });
1146
1162
  }
1147
1163
 
1164
+ const DEFAULT_RETRY_COUNT = 5;
1148
1165
  async function addDataFiles(params) {
1149
1166
  const { credentials } = params;
1167
+ const retry_max = params.retryCount ?? DEFAULT_RETRY_COUNT;
1150
1168
  const region = params.tableBucketARN.split(':')[3];
1151
1169
  if (!region) {
1152
1170
  throw new Error('bad tableBucketARN');
1153
1171
  }
1154
1172
  const snapshot_id = _randomBigInt64();
1155
1173
  const metadata = await getMetadata(params);
1156
- const parent_snapshot_id = metadata['current-snapshot-id'];
1157
1174
  const bucket = metadata.location.split('/').slice(-1)[0];
1158
- const snapshot = parent_snapshot_id === -1
1159
- ? null
1160
- : metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id);
1175
+ const parent_snapshot_id = BigInt(metadata['current-snapshot-id'] ?? -1n);
1176
+ const snapshot = metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id) ??
1177
+ null;
1161
1178
  if (!bucket) {
1162
1179
  throw new Error('bad manifest location');
1163
1180
  }
1164
- if (parent_snapshot_id !== -1 && !snapshot) {
1181
+ if (parent_snapshot_id > 0n && !snapshot) {
1165
1182
  throw new Error('no old snapshot');
1166
1183
  }
1167
- const sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
1184
+ let old_list_key = snapshot ? parseS3Url(snapshot['manifest-list']).key : '';
1185
+ if (snapshot && !old_list_key) {
1186
+ throw new Error('last snapshot invalid');
1187
+ }
1188
+ let sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
1168
1189
  let added_files = 0;
1169
1190
  let added_records = 0n;
1170
1191
  let added_size = 0n;
@@ -1186,88 +1207,126 @@ async function addDataFiles(params) {
1186
1207
  };
1187
1208
  return addManifest(opts);
1188
1209
  }));
1189
- const manifest_list_key = `metadata/${node_crypto.randomUUID()}.avro`;
1190
- const manifest_list_url = `s3://${bucket}/${manifest_list_key}`;
1191
- if (snapshot) {
1192
- const { key: old_list_key } = parseS3Url(snapshot['manifest-list']);
1193
- if (!old_list_key) {
1194
- throw new Error('snapshot invalid');
1210
+ let expected_snapshot_id = parent_snapshot_id;
1211
+ for (let try_count = 0;; try_count++) {
1212
+ const manifest_list_key = `metadata/${node_crypto.randomUUID()}.avro`;
1213
+ const manifest_list_url = `s3://${bucket}/${manifest_list_key}`;
1214
+ if (old_list_key) {
1215
+ await updateManifestList({
1216
+ credentials,
1217
+ region,
1218
+ bucket,
1219
+ key: old_list_key,
1220
+ outKey: manifest_list_key,
1221
+ metadata: {
1222
+ 'sequence-number': String(sequence_number),
1223
+ 'snapshot-id': String(snapshot_id),
1224
+ 'parent-snapshot-id': String(parent_snapshot_id),
1225
+ },
1226
+ prepend: records,
1227
+ });
1195
1228
  }
1196
- await updateManifestList({
1197
- credentials,
1198
- region,
1199
- bucket,
1200
- key: old_list_key,
1201
- outKey: manifest_list_key,
1202
- metadata: {
1203
- 'sequence-number': String(sequence_number),
1204
- 'snapshot-id': String(snapshot_id),
1205
- 'parent-snapshot-id': String(parent_snapshot_id),
1206
- },
1207
- prepend: records,
1208
- });
1209
- }
1210
- else {
1211
- const manifest_list_buf = await avroToBuffer({
1212
- type: ManifestListType,
1213
- metadata: {
1214
- 'sequence-number': String(sequence_number),
1215
- 'snapshot-id': String(snapshot_id),
1216
- 'parent-snapshot-id': String(parent_snapshot_id),
1217
- },
1218
- records,
1219
- });
1220
- await writeS3File({
1221
- credentials,
1222
- region,
1223
- bucket,
1224
- key: manifest_list_key,
1225
- body: manifest_list_buf,
1226
- });
1227
- }
1228
- const commit_result = await icebergRequest({
1229
- credentials: params.credentials,
1230
- tableBucketARN: params.tableBucketARN,
1231
- method: 'POST',
1232
- suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1233
- body: {
1234
- requirements: parent_snapshot_id === -1
1235
- ? []
1236
- : [
1237
- {
1238
- type: 'assert-ref-snapshot-id',
1239
- ref: 'main',
1240
- 'snapshot-id': parent_snapshot_id,
1241
- },
1242
- ],
1243
- updates: [
1244
- {
1245
- action: 'add-snapshot',
1246
- snapshot: {
1247
- 'sequence-number': sequence_number,
1248
- 'snapshot-id': snapshot_id,
1249
- 'parent-snapshot-id': parent_snapshot_id,
1250
- 'timestamp-ms': Date.now(),
1251
- summary: {
1252
- operation: 'append',
1253
- 'added-data-files': String(added_files),
1254
- 'added-records': String(added_records),
1255
- 'added-files-size': String(added_size),
1256
- },
1257
- 'manifest-list': manifest_list_url,
1258
- 'schema-id': metadata['current-schema-id'],
1259
- },
1229
+ else {
1230
+ const manifest_list_buf = await avroToBuffer({
1231
+ type: ManifestListType,
1232
+ metadata: {
1233
+ 'sequence-number': String(sequence_number),
1234
+ 'snapshot-id': String(snapshot_id),
1235
+ 'parent-snapshot-id': 'null',
1260
1236
  },
1261
- {
1262
- action: 'set-snapshot-ref',
1263
- 'snapshot-id': snapshot_id,
1264
- type: 'branch',
1265
- 'ref-name': 'main',
1237
+ records,
1238
+ });
1239
+ await writeS3File({
1240
+ credentials,
1241
+ region,
1242
+ bucket,
1243
+ key: manifest_list_key,
1244
+ body: manifest_list_buf,
1245
+ });
1246
+ }
1247
+ try {
1248
+ const result = await icebergRequest({
1249
+ credentials: params.credentials,
1250
+ tableBucketARN: params.tableBucketARN,
1251
+ method: 'POST',
1252
+ suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
1253
+ body: {
1254
+ requirements: expected_snapshot_id > 0n
1255
+ ? [
1256
+ {
1257
+ type: 'assert-ref-snapshot-id',
1258
+ ref: 'main',
1259
+ 'snapshot-id': expected_snapshot_id,
1260
+ },
1261
+ ]
1262
+ : [],
1263
+ updates: [
1264
+ {
1265
+ action: 'add-snapshot',
1266
+ snapshot: {
1267
+ 'sequence-number': sequence_number,
1268
+ 'snapshot-id': snapshot_id,
1269
+ 'parent-snapshot-id': parent_snapshot_id,
1270
+ 'timestamp-ms': Date.now(),
1271
+ summary: {
1272
+ operation: 'append',
1273
+ 'added-data-files': String(added_files),
1274
+ 'added-records': String(added_records),
1275
+ 'added-files-size': String(added_size),
1276
+ },
1277
+ 'manifest-list': manifest_list_url,
1278
+ 'schema-id': metadata['current-schema-id'],
1279
+ },
1280
+ },
1281
+ {
1282
+ action: 'set-snapshot-ref',
1283
+ 'snapshot-id': snapshot_id,
1284
+ type: 'branch',
1285
+ 'ref-name': 'main',
1286
+ },
1287
+ ],
1266
1288
  },
1267
- ],
1268
- },
1269
- });
1270
- return commit_result;
1289
+ });
1290
+ return {
1291
+ result,
1292
+ retriesNeeded: try_count,
1293
+ parentSnapshotId: parent_snapshot_id,
1294
+ snapshotId: snapshot_id,
1295
+ sequenceNumber: sequence_number,
1296
+ };
1297
+ }
1298
+ catch (e) {
1299
+ if (e instanceof HttpError && e.status === 409 && try_count < retry_max) ;
1300
+ else {
1301
+ throw e;
1302
+ }
1303
+ }
1304
+ // we do a merge in the append only simultanious case
1305
+ const conflict_metadata = await getMetadata(params);
1306
+ const conflict_snapshot_id = BigInt(conflict_metadata['current-snapshot-id'] ?? -1n);
1307
+ if (conflict_snapshot_id <= 0n) {
1308
+ throw new Error('conflict');
1309
+ }
1310
+ const conflict_snap = conflict_metadata.snapshots.find((s) => s['snapshot-id'] === conflict_snapshot_id);
1311
+ if (!conflict_snap) {
1312
+ throw new Error('conflict');
1313
+ }
1314
+ if (conflict_snap.summary.operation === 'append' &&
1315
+ BigInt(conflict_snap['sequence-number']) === sequence_number) {
1316
+ old_list_key = parseS3Url(conflict_snap['manifest-list']).key;
1317
+ if (!old_list_key) {
1318
+ throw new Error('conflict');
1319
+ }
1320
+ added_files += parseInt(conflict_snap.summary['added-data-files'] ?? '0', 10);
1321
+ added_records += BigInt(conflict_snap.summary['added-records'] ?? '0');
1322
+ added_size += BigInt(conflict_snap.summary['added-files-size'] ?? '0');
1323
+ expected_snapshot_id = conflict_snapshot_id;
1324
+ sequence_number++;
1325
+ }
1326
+ else {
1327
+ throw new Error('conflict');
1328
+ }
1329
+ }
1271
1330
  }
1272
1331
  async function setCurrentCommit(params) {
1273
1332
  const commit_result = await icebergRequest({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-s3tables",
3
- "version": "0.0.4",
3
+ "version": "0.0.5",
4
4
  "description": "node api for dealing with s3tables",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",