node-s3tables 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -10
- package/dist/index.d.ts +15 -4
- package/dist/index.js +145 -86
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -310,6 +310,82 @@ Supported partition transforms:
|
|
|
310
310
|
- `'bucket[N]'` - Hash bucket with N buckets
|
|
311
311
|
- `'truncate[N]'` - Truncate strings to N characters
|
|
312
312
|
|
|
313
|
+
## AWS API Calls and Required Permissions
|
|
314
|
+
|
|
315
|
+
The library makes calls to multiple AWS services and requires specific IAM permissions:
|
|
316
|
+
|
|
317
|
+
### S3 Tables Service
|
|
318
|
+
|
|
319
|
+
**API Calls:**
|
|
320
|
+
|
|
321
|
+
- `GetTable` - Used by `getMetadata()` when called with `tableArn`
|
|
322
|
+
- Iceberg REST API calls via HTTPS to `s3tables.{region}.amazonaws.com`
|
|
323
|
+
|
|
324
|
+
**Required Permissions:**
|
|
325
|
+
|
|
326
|
+
- `s3tables:GetTable` - For retrieving table information
|
|
327
|
+
- `s3tables:GetTableData` - For reading table metadata and data objects (includes GetObject, HeadObject, ListParts)
|
|
328
|
+
- `s3tables:PutTableData` - For writing table metadata and data objects (includes PutObject, multipart upload operations)
|
|
329
|
+
- `s3tables:UpdateTableMetadataLocation` - For updating table root pointer during metadata operations
|
|
330
|
+
|
|
331
|
+
### Function-Specific Permission Requirements
|
|
332
|
+
|
|
333
|
+
**`getMetadata()`:**
|
|
334
|
+
|
|
335
|
+
- When using `tableArn`: `s3tables:GetTable`, `s3tables:GetTableData`
|
|
336
|
+
- When using `tableBucketARN` + `namespace` + `name`: `s3tables:GetTableData`
|
|
337
|
+
|
|
338
|
+
**`addSchema()`:**
|
|
339
|
+
|
|
340
|
+
- `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
|
|
341
|
+
|
|
342
|
+
**`addPartitionSpec()`:**
|
|
343
|
+
|
|
344
|
+
- `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
|
|
345
|
+
|
|
346
|
+
**`addManifest()`:**
|
|
347
|
+
|
|
348
|
+
- `s3tables:PutTableData` (for writing manifest files)
|
|
349
|
+
|
|
350
|
+
**`addDataFiles()`:**
|
|
351
|
+
|
|
352
|
+
- `s3tables:GetTableData` (to get current metadata and read existing manifest lists)
|
|
353
|
+
- `s3tables:PutTableData` (to write new manifest files and lists)
|
|
354
|
+
- `s3tables:UpdateTableMetadataLocation` (to add snapshots)
|
|
355
|
+
|
|
356
|
+
**`setCurrentCommit()`:**
|
|
357
|
+
|
|
358
|
+
- `s3tables:PutTableData`, `s3tables:UpdateTableMetadataLocation`
|
|
359
|
+
|
|
360
|
+
### Example IAM Policy
|
|
361
|
+
|
|
362
|
+
```json
|
|
363
|
+
{
|
|
364
|
+
"Version": "2012-10-17",
|
|
365
|
+
"Statement": [
|
|
366
|
+
{
|
|
367
|
+
"Effect": "Allow",
|
|
368
|
+
"Action": [
|
|
369
|
+
"s3tables:GetTable",
|
|
370
|
+
"s3tables:GetTableData",
|
|
371
|
+
"s3tables:PutTableData",
|
|
372
|
+
"s3tables:UpdateTableMetadataLocation"
|
|
373
|
+
],
|
|
374
|
+
"Resource": "arn:aws:s3tables:*:*:bucket/*/table/*"
|
|
375
|
+
}
|
|
376
|
+
]
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Configuration
|
|
381
|
+
|
|
382
|
+
The library uses the AWS SDK for authentication. Configure credentials using:
|
|
383
|
+
|
|
384
|
+
- Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
|
|
385
|
+
- AWS credentials file (`~/.aws/credentials`)
|
|
386
|
+
- IAM roles (when running on EC2/Lambda)
|
|
387
|
+
- Or pass credentials directly to functions
|
|
388
|
+
|
|
313
389
|
## Testing
|
|
314
390
|
|
|
315
391
|
### Prerequisites
|
|
@@ -367,7 +443,7 @@ The test suite uses additional dependencies for creating test data:
|
|
|
367
443
|
Run the test suite:
|
|
368
444
|
|
|
369
445
|
```bash
|
|
370
|
-
npm test
|
|
446
|
+
npm run test
|
|
371
447
|
```
|
|
372
448
|
|
|
373
449
|
Run tests with coverage:
|
|
@@ -382,15 +458,6 @@ Run a single test file:
|
|
|
382
458
|
npm run test:single test/create.test.ts
|
|
383
459
|
```
|
|
384
460
|
|
|
385
|
-
## Configuration
|
|
386
|
-
|
|
387
|
-
The library uses the AWS SDK for authentication. Configure credentials using:
|
|
388
|
-
|
|
389
|
-
- Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
|
|
390
|
-
- AWS credentials file (`~/.aws/credentials`)
|
|
391
|
-
- IAM roles (when running on EC2/Lambda)
|
|
392
|
-
- Or pass credentials directly to functions
|
|
393
|
-
|
|
394
461
|
## License
|
|
395
462
|
|
|
396
463
|
MIT
|
package/dist/index.d.ts
CHANGED
|
@@ -67,13 +67,16 @@ interface IcebergPartitionSpec {
|
|
|
67
67
|
'spec-id': number;
|
|
68
68
|
fields: IcebergPartitionField[];
|
|
69
69
|
}
|
|
70
|
+
interface IcebergSnapshotSummary extends Record<string, string> {
|
|
71
|
+
operation: 'append' | 'replace' | 'overwrite' | 'delete';
|
|
72
|
+
}
|
|
70
73
|
interface IcebergSnapshot {
|
|
71
74
|
'snapshot-id': bigint | number;
|
|
72
75
|
'parent-snapshot-id'?: bigint | number;
|
|
73
76
|
'sequence-number': number;
|
|
74
77
|
'timestamp-ms': number;
|
|
75
78
|
'manifest-list': string;
|
|
76
|
-
summary:
|
|
79
|
+
summary: IcebergSnapshotSummary;
|
|
77
80
|
'schema-id'?: number;
|
|
78
81
|
}
|
|
79
82
|
interface IcebergMetadata {
|
|
@@ -84,7 +87,7 @@ interface IcebergMetadata {
|
|
|
84
87
|
'default-spec-id': number;
|
|
85
88
|
'partition-specs': IcebergPartitionSpec[];
|
|
86
89
|
'last-partition-id': number;
|
|
87
|
-
'current-snapshot-id'
|
|
90
|
+
'current-snapshot-id'?: bigint | number;
|
|
88
91
|
location: string;
|
|
89
92
|
}
|
|
90
93
|
|
|
@@ -165,8 +168,16 @@ interface AddDataFilesParams {
|
|
|
165
168
|
namespace: string;
|
|
166
169
|
name: string;
|
|
167
170
|
lists: AddFileList[];
|
|
171
|
+
retryCount?: number;
|
|
172
|
+
}
|
|
173
|
+
interface AddDataFilesResult {
|
|
174
|
+
result: JSONObject;
|
|
175
|
+
retriesNeeded: number;
|
|
176
|
+
parentSnapshotId: bigint;
|
|
177
|
+
snapshotId: bigint;
|
|
178
|
+
sequenceNumber: bigint;
|
|
168
179
|
}
|
|
169
|
-
declare function addDataFiles(params: AddDataFilesParams): Promise<
|
|
180
|
+
declare function addDataFiles(params: AddDataFilesParams): Promise<AddDataFilesResult>;
|
|
170
181
|
interface SetCurrentCommitParams {
|
|
171
182
|
credentials?: AwsCredentialIdentity;
|
|
172
183
|
tableBucketARN: string;
|
|
@@ -186,4 +197,4 @@ declare const _default: {
|
|
|
186
197
|
};
|
|
187
198
|
|
|
188
199
|
export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
|
|
189
|
-
export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
|
|
200
|
+
export type { AddDataFilesParams, AddDataFilesResult, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergSnapshotSummary, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };
|
package/dist/index.js
CHANGED
|
@@ -1022,6 +1022,13 @@ function parse(text) {
|
|
|
1022
1022
|
return LosslessJson__namespace.parse(text, null, customNumberParser);
|
|
1023
1023
|
}
|
|
1024
1024
|
|
|
1025
|
+
class HttpError extends Error {
|
|
1026
|
+
status;
|
|
1027
|
+
constructor(status, message) {
|
|
1028
|
+
super(message);
|
|
1029
|
+
this.status = status;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1025
1032
|
async function icebergRequest(params) {
|
|
1026
1033
|
const region = params.tableBucketARN.split(':')[3];
|
|
1027
1034
|
if (!region) {
|
|
@@ -1062,8 +1069,17 @@ async function icebergRequest(params) {
|
|
|
1062
1069
|
const res = await fetch(url, fetch_opts);
|
|
1063
1070
|
const text = await res.text();
|
|
1064
1071
|
if (!res.ok) {
|
|
1065
|
-
|
|
1072
|
+
if (res.status) {
|
|
1073
|
+
throw new HttpError(res.status, `request failed: ${res.statusText} ${text}`);
|
|
1074
|
+
}
|
|
1075
|
+
throw new Error(`request failed: ${res.statusText} ${text}`);
|
|
1066
1076
|
}
|
|
1077
|
+
const ret = res.headers.get('content-type') === 'application/json'
|
|
1078
|
+
? _parse(text)
|
|
1079
|
+
: text;
|
|
1080
|
+
return ret;
|
|
1081
|
+
}
|
|
1082
|
+
function _parse(text) {
|
|
1067
1083
|
try {
|
|
1068
1084
|
return parse(text);
|
|
1069
1085
|
}
|
|
@@ -1145,26 +1161,31 @@ async function addPartitionSpec(params) {
|
|
|
1145
1161
|
});
|
|
1146
1162
|
}
|
|
1147
1163
|
|
|
1164
|
+
const DEFAULT_RETRY_COUNT = 5;
|
|
1148
1165
|
async function addDataFiles(params) {
|
|
1149
1166
|
const { credentials } = params;
|
|
1167
|
+
const retry_max = params.retryCount ?? DEFAULT_RETRY_COUNT;
|
|
1150
1168
|
const region = params.tableBucketARN.split(':')[3];
|
|
1151
1169
|
if (!region) {
|
|
1152
1170
|
throw new Error('bad tableBucketARN');
|
|
1153
1171
|
}
|
|
1154
1172
|
const snapshot_id = _randomBigInt64();
|
|
1155
1173
|
const metadata = await getMetadata(params);
|
|
1156
|
-
const parent_snapshot_id = metadata['current-snapshot-id'];
|
|
1157
1174
|
const bucket = metadata.location.split('/').slice(-1)[0];
|
|
1158
|
-
const
|
|
1159
|
-
|
|
1160
|
-
|
|
1175
|
+
const parent_snapshot_id = BigInt(metadata['current-snapshot-id'] ?? -1n);
|
|
1176
|
+
const snapshot = metadata.snapshots.find((s) => s['snapshot-id'] === parent_snapshot_id) ??
|
|
1177
|
+
null;
|
|
1161
1178
|
if (!bucket) {
|
|
1162
1179
|
throw new Error('bad manifest location');
|
|
1163
1180
|
}
|
|
1164
|
-
if (parent_snapshot_id
|
|
1181
|
+
if (parent_snapshot_id > 0n && !snapshot) {
|
|
1165
1182
|
throw new Error('no old snapshot');
|
|
1166
1183
|
}
|
|
1167
|
-
|
|
1184
|
+
let old_list_key = snapshot ? parseS3Url(snapshot['manifest-list']).key : '';
|
|
1185
|
+
if (snapshot && !old_list_key) {
|
|
1186
|
+
throw new Error('last snapshot invalid');
|
|
1187
|
+
}
|
|
1188
|
+
let sequence_number = BigInt(metadata.snapshots.reduce((memo, s) => s['sequence-number'] > memo ? s['sequence-number'] : memo, 0)) + 1n;
|
|
1168
1189
|
let added_files = 0;
|
|
1169
1190
|
let added_records = 0n;
|
|
1170
1191
|
let added_size = 0n;
|
|
@@ -1186,88 +1207,126 @@ async function addDataFiles(params) {
|
|
|
1186
1207
|
};
|
|
1187
1208
|
return addManifest(opts);
|
|
1188
1209
|
}));
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
const
|
|
1193
|
-
if (
|
|
1194
|
-
|
|
1210
|
+
let expected_snapshot_id = parent_snapshot_id;
|
|
1211
|
+
for (let try_count = 0;; try_count++) {
|
|
1212
|
+
const manifest_list_key = `metadata/${node_crypto.randomUUID()}.avro`;
|
|
1213
|
+
const manifest_list_url = `s3://${bucket}/${manifest_list_key}`;
|
|
1214
|
+
if (old_list_key) {
|
|
1215
|
+
await updateManifestList({
|
|
1216
|
+
credentials,
|
|
1217
|
+
region,
|
|
1218
|
+
bucket,
|
|
1219
|
+
key: old_list_key,
|
|
1220
|
+
outKey: manifest_list_key,
|
|
1221
|
+
metadata: {
|
|
1222
|
+
'sequence-number': String(sequence_number),
|
|
1223
|
+
'snapshot-id': String(snapshot_id),
|
|
1224
|
+
'parent-snapshot-id': String(parent_snapshot_id),
|
|
1225
|
+
},
|
|
1226
|
+
prepend: records,
|
|
1227
|
+
});
|
|
1195
1228
|
}
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
'sequence-number': String(sequence_number),
|
|
1204
|
-
'snapshot-id': String(snapshot_id),
|
|
1205
|
-
'parent-snapshot-id': String(parent_snapshot_id),
|
|
1206
|
-
},
|
|
1207
|
-
prepend: records,
|
|
1208
|
-
});
|
|
1209
|
-
}
|
|
1210
|
-
else {
|
|
1211
|
-
const manifest_list_buf = await avroToBuffer({
|
|
1212
|
-
type: ManifestListType,
|
|
1213
|
-
metadata: {
|
|
1214
|
-
'sequence-number': String(sequence_number),
|
|
1215
|
-
'snapshot-id': String(snapshot_id),
|
|
1216
|
-
'parent-snapshot-id': String(parent_snapshot_id),
|
|
1217
|
-
},
|
|
1218
|
-
records,
|
|
1219
|
-
});
|
|
1220
|
-
await writeS3File({
|
|
1221
|
-
credentials,
|
|
1222
|
-
region,
|
|
1223
|
-
bucket,
|
|
1224
|
-
key: manifest_list_key,
|
|
1225
|
-
body: manifest_list_buf,
|
|
1226
|
-
});
|
|
1227
|
-
}
|
|
1228
|
-
const commit_result = await icebergRequest({
|
|
1229
|
-
credentials: params.credentials,
|
|
1230
|
-
tableBucketARN: params.tableBucketARN,
|
|
1231
|
-
method: 'POST',
|
|
1232
|
-
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1233
|
-
body: {
|
|
1234
|
-
requirements: parent_snapshot_id === -1
|
|
1235
|
-
? []
|
|
1236
|
-
: [
|
|
1237
|
-
{
|
|
1238
|
-
type: 'assert-ref-snapshot-id',
|
|
1239
|
-
ref: 'main',
|
|
1240
|
-
'snapshot-id': parent_snapshot_id,
|
|
1241
|
-
},
|
|
1242
|
-
],
|
|
1243
|
-
updates: [
|
|
1244
|
-
{
|
|
1245
|
-
action: 'add-snapshot',
|
|
1246
|
-
snapshot: {
|
|
1247
|
-
'sequence-number': sequence_number,
|
|
1248
|
-
'snapshot-id': snapshot_id,
|
|
1249
|
-
'parent-snapshot-id': parent_snapshot_id,
|
|
1250
|
-
'timestamp-ms': Date.now(),
|
|
1251
|
-
summary: {
|
|
1252
|
-
operation: 'append',
|
|
1253
|
-
'added-data-files': String(added_files),
|
|
1254
|
-
'added-records': String(added_records),
|
|
1255
|
-
'added-files-size': String(added_size),
|
|
1256
|
-
},
|
|
1257
|
-
'manifest-list': manifest_list_url,
|
|
1258
|
-
'schema-id': metadata['current-schema-id'],
|
|
1259
|
-
},
|
|
1229
|
+
else {
|
|
1230
|
+
const manifest_list_buf = await avroToBuffer({
|
|
1231
|
+
type: ManifestListType,
|
|
1232
|
+
metadata: {
|
|
1233
|
+
'sequence-number': String(sequence_number),
|
|
1234
|
+
'snapshot-id': String(snapshot_id),
|
|
1235
|
+
'parent-snapshot-id': 'null',
|
|
1260
1236
|
},
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1237
|
+
records,
|
|
1238
|
+
});
|
|
1239
|
+
await writeS3File({
|
|
1240
|
+
credentials,
|
|
1241
|
+
region,
|
|
1242
|
+
bucket,
|
|
1243
|
+
key: manifest_list_key,
|
|
1244
|
+
body: manifest_list_buf,
|
|
1245
|
+
});
|
|
1246
|
+
}
|
|
1247
|
+
try {
|
|
1248
|
+
const result = await icebergRequest({
|
|
1249
|
+
credentials: params.credentials,
|
|
1250
|
+
tableBucketARN: params.tableBucketARN,
|
|
1251
|
+
method: 'POST',
|
|
1252
|
+
suffix: `/namespaces/${params.namespace}/tables/${params.name}`,
|
|
1253
|
+
body: {
|
|
1254
|
+
requirements: expected_snapshot_id > 0n
|
|
1255
|
+
? [
|
|
1256
|
+
{
|
|
1257
|
+
type: 'assert-ref-snapshot-id',
|
|
1258
|
+
ref: 'main',
|
|
1259
|
+
'snapshot-id': expected_snapshot_id,
|
|
1260
|
+
},
|
|
1261
|
+
]
|
|
1262
|
+
: [],
|
|
1263
|
+
updates: [
|
|
1264
|
+
{
|
|
1265
|
+
action: 'add-snapshot',
|
|
1266
|
+
snapshot: {
|
|
1267
|
+
'sequence-number': sequence_number,
|
|
1268
|
+
'snapshot-id': snapshot_id,
|
|
1269
|
+
'parent-snapshot-id': parent_snapshot_id,
|
|
1270
|
+
'timestamp-ms': Date.now(),
|
|
1271
|
+
summary: {
|
|
1272
|
+
operation: 'append',
|
|
1273
|
+
'added-data-files': String(added_files),
|
|
1274
|
+
'added-records': String(added_records),
|
|
1275
|
+
'added-files-size': String(added_size),
|
|
1276
|
+
},
|
|
1277
|
+
'manifest-list': manifest_list_url,
|
|
1278
|
+
'schema-id': metadata['current-schema-id'],
|
|
1279
|
+
},
|
|
1280
|
+
},
|
|
1281
|
+
{
|
|
1282
|
+
action: 'set-snapshot-ref',
|
|
1283
|
+
'snapshot-id': snapshot_id,
|
|
1284
|
+
type: 'branch',
|
|
1285
|
+
'ref-name': 'main',
|
|
1286
|
+
},
|
|
1287
|
+
],
|
|
1266
1288
|
},
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1289
|
+
});
|
|
1290
|
+
return {
|
|
1291
|
+
result,
|
|
1292
|
+
retriesNeeded: try_count,
|
|
1293
|
+
parentSnapshotId: parent_snapshot_id,
|
|
1294
|
+
snapshotId: snapshot_id,
|
|
1295
|
+
sequenceNumber: sequence_number,
|
|
1296
|
+
};
|
|
1297
|
+
}
|
|
1298
|
+
catch (e) {
|
|
1299
|
+
if (e instanceof HttpError && e.status === 409 && try_count < retry_max) ;
|
|
1300
|
+
else {
|
|
1301
|
+
throw e;
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
// we do a merge in the append only simultanious case
|
|
1305
|
+
const conflict_metadata = await getMetadata(params);
|
|
1306
|
+
const conflict_snapshot_id = BigInt(conflict_metadata['current-snapshot-id'] ?? -1n);
|
|
1307
|
+
if (conflict_snapshot_id <= 0n) {
|
|
1308
|
+
throw new Error('conflict');
|
|
1309
|
+
}
|
|
1310
|
+
const conflict_snap = conflict_metadata.snapshots.find((s) => s['snapshot-id'] === conflict_snapshot_id);
|
|
1311
|
+
if (!conflict_snap) {
|
|
1312
|
+
throw new Error('conflict');
|
|
1313
|
+
}
|
|
1314
|
+
if (conflict_snap.summary.operation === 'append' &&
|
|
1315
|
+
BigInt(conflict_snap['sequence-number']) === sequence_number) {
|
|
1316
|
+
old_list_key = parseS3Url(conflict_snap['manifest-list']).key;
|
|
1317
|
+
if (!old_list_key) {
|
|
1318
|
+
throw new Error('conflict');
|
|
1319
|
+
}
|
|
1320
|
+
added_files += parseInt(conflict_snap.summary['added-data-files'] ?? '0', 10);
|
|
1321
|
+
added_records += BigInt(conflict_snap.summary['added-records'] ?? '0');
|
|
1322
|
+
added_size += BigInt(conflict_snap.summary['added-files-size'] ?? '0');
|
|
1323
|
+
expected_snapshot_id = conflict_snapshot_id;
|
|
1324
|
+
sequence_number++;
|
|
1325
|
+
}
|
|
1326
|
+
else {
|
|
1327
|
+
throw new Error('conflict');
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1271
1330
|
}
|
|
1272
1331
|
async function setCurrentCommit(params) {
|
|
1273
1332
|
const commit_result = await icebergRequest({
|