npm - node-s3tables - Versions diffs - 0.0.1 → 0.0.3 - Mend

node-s3tables 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -1 +1,396 @@
 # node-s3tables
+A Node.js library for interacting with AWS S3 Tables using the Iceberg REST API.
+## Installation
+```bash
+npm install node-s3tables
+```
+## Quick Start
+```javascript
+import {
+  getMetadata,
+  addSchema,
+  addPartitionSpec,
+  addManifest,
+  addDataFiles,
+  setCurrentCommit,
+} from 'node-s3tables';
+// Get table metadata
+const metadata = await getMetadata({
+  tableArn:
+    'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket/table/my-table-id',
+});
+// Add a new schema
+await addSchema({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'my_namespace',
+  name: 'my_table',
+  schemaId: 2,
+  fields: [
+    { id: 1, name: 'id', required: true, type: 'long' },
+    { id: 2, name: 'name', required: false, type: 'string' },
+  ],
+});
+```
+## API Reference
+### getMetadata(params)
+Retrieves Iceberg metadata for an S3 table.
+**Parameters:**
+- `params.tableArn` (string) - The ARN of the table
+- OR `params.tableBucketARN` (string) + `params.namespace` (string) + `params.name` (string)
+- `params.config` (S3TablesClientConfig, optional) - AWS SDK configuration
+**Returns:** Promise<IcebergMetadata>
+```javascript
+// Using table ARN
+const metadata = await getMetadata({
+  tableArn:
+    'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket/table/my-table-id',
+});
+// Using bucket ARN + namespace + name
+const metadata = await getMetadata({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'my_namespace',
+  name: 'my_table',
+});
+```
+### addSchema(params)
+Adds a new schema to an S3 table and sets it as current.
+**Parameters:**
+- `params.tableBucketARN` (string) - The ARN of the table bucket
+- `params.namespace` (string) - The namespace name
+- `params.name` (string) - The table name
+- `params.schemaId` (number) - The new schema ID
+- `params.fields` (IcebergSchemaField[]) - Array of schema fields
+- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
+**Returns:** Promise<string>
+```javascript
+await addSchema({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'sales',
+  name: 'daily_sales',
+  schemaId: 2,
+  fields: [
+    { id: 1, name: 'sale_date', required: false, type: 'date' },
+    { id: 2, name: 'product_category', required: false, type: 'string' },
+    { id: 3, name: 'sales_amount', required: false, type: 'double' },
+  ],
+});
+```
+### addPartitionSpec(params)
+Adds a new partition specification to an S3 table and sets it as default.
+**Parameters:**
+- `params.tableBucketARN` (string) - The ARN of the table bucket
+- `params.namespace` (string) - The namespace name
+- `params.name` (string) - The table name
+- `params.specId` (number) - The new partition spec ID
+- `params.fields` (IcebergPartitionField[]) - Array of partition fields
+- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
+**Returns:** Promise<string>
+```javascript
+await addPartitionSpec({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'sales',
+  name: 'daily_sales',
+  specId: 1,
+  fields: [
+    {
+      'field-id': 1000,
+      name: 'sale_date_day',
+      'source-id': 1,
+      transform: 'day',
+    },
+    {
+      'field-id': 1001,
+      name: 'product_category',
+      'source-id': 2,
+      transform: 'identity',
+    },
+  ],
+});
+```
+### addManifest(params)
+Creates a manifest file for data files and returns a manifest list record.
+**Parameters:**
+- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
+- `params.region` (string) - AWS region
+- `params.metadata` (IcebergMetadata) - Table metadata
+- `params.schemaId` (number) - Schema ID to use
+- `params.specId` (number) - Partition spec ID to use
+- `params.snapshotId` (bigint) - Snapshot ID
+- `params.sequenceNumber` (bigint) - Sequence number
+- `params.files` (AddFile[]) - Array of data files
+**Returns:** Promise<ManifestListRecord>
+```javascript
+const manifestRecord = await addManifest({
+  region: 'us-west-2',
+  metadata: tableMetadata,
+  schemaId: 2,
+  specId: 1,
+  snapshotId: 4183020680887155442n,
+  sequenceNumber: 1n,
+  files: [
+    {
+      file: 's3://my-bucket/data/sales-2024-01-01.parquet',
+      partitions: { sale_date_day: '2024-01-01' },
+      recordCount: 1000n,
+      fileSize: 52428n,
+    },
+  ],
+});
+```
+### addDataFiles(params)
+Adds data files to an S3 table by creating a new snapshot.
+**Parameters:**
+- `params.tableBucketARN` (string) - The ARN of the table bucket
+- `params.namespace` (string) - The namespace name
+- `params.name` (string) - The table name
+- `params.lists` (AddFileList[]) - Array of file lists to add
+- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
+**Returns:** Promise<string>
+```javascript
+await addDataFiles({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'sales',
+  name: 'daily_sales',
+  lists: [
+    {
+      specId: 1,
+      schemaId: 2,
+      files: [
+        {
+          file: 's3://my-bucket/data/sales-2024-01-01.parquet',
+          partitions: { sale_date_day: '2024-01-01' },
+          recordCount: 1000n,
+          fileSize: 52428n,
+        },
+      ],
+    },
+  ],
+});
+```
+### setCurrentCommit(params)
+Sets the current commit/snapshot for an S3 table.
+**Parameters:**
+- `params.tableBucketARN` (string) - The ARN of the table bucket
+- `params.namespace` (string) - The namespace name
+- `params.name` (string) - The table name
+- `params.snapshotId` (bigint) - The snapshot ID to set as current
+- `params.credentials` (AwsCredentialIdentity, optional) - AWS credentials
+**Returns:** Promise<string>
+```javascript
+await setCurrentCommit({
+  tableBucketARN: 'arn:aws:s3tables:us-west-2:123456789012:bucket/my-bucket',
+  namespace: 'sales',
+  name: 'daily_sales',
+  snapshotId: 4183020680887155442n,
+});
+```
+## Type Definitions
+### AddFileList
+```typescript
+interface AddFileList {
+  specId: number;
+  schemaId: number;
+  files: AddFile[];
+}
+```
+### AddFile
+```typescript
+interface AddFile {
+  file: string;
+  partitions: PartitionRecord;
+  fileSize: bigint;
+  recordCount: bigint;
+  columnSizes?: Record<string, bigint> | null;
+  valueCounts?: Record<string, bigint> | null;
+  nullValueCounts?: Record<string, bigint> | null;
+  nanValueCounts?: Record<string, bigint> | null;
+  lowerBounds?: Record<string, Buffer> | null;
+  upperBounds?: Record<string, Buffer> | null;
+  keyMetadata?: Buffer | null;
+  splitOffsets?: bigint[] | null;
+  equalityIds?: number[] | null;
+  sortOrderId?: number | null;
+}
+```
+### IcebergSchemaField
+```typescript
+interface IcebergSchemaField {
+  id: number;
+  name: string;
+  type: IcebergType;
+  required: boolean;
+  doc?: string;
+}
+```
+### IcebergPartitionField
+```typescript
+interface IcebergPartitionField {
+  'field-id': number;
+  name: string;
+  'source-id': number;
+  transform: IcebergTransform;
+}
+```
+### IcebergType
+Supported primitive types:
+- `'boolean'`, `'int'`, `'long'`, `'float'`, `'double'`
+- `'date'`, `'time'`, `'timestamp'`, `'timestamptz'`
+- `'string'`, `'uuid'`, `'binary'`
+- `'decimal(precision,scale)'`, `'fixed[length]'`
+Complex types:
+- List: `{ type: 'list', element: IcebergType, 'element-required': boolean }`
+- Map: `{ type: 'map', key: IcebergType, value: IcebergType, 'value-required': boolean }`
+- Struct: `{ type: 'struct', fields: IcebergSchemaField[] }`
+### IcebergTransform
+Supported partition transforms:
+- `'identity'` - Use the field value as-is
+- `'year'`, `'month'`, `'day'`, `'hour'` - Date/time transforms
+- `'bucket[N]'` - Hash bucket with N buckets
+- `'truncate[N]'` - Truncate strings to N characters
+## Testing
+### Prerequisites
+The tests require AWS credentials and S3 Tables resources. Set up the following environment variables in a `.env` file:
+```bash
+TABLE_BUCKET_ARN=arn:aws:s3tables:us-west-2:123456789012:bucket/your-test-bucket
+CATALOG_ID=123456789012:s3tablescatalog/your-test-bucket
+OUTPUT_BUCKET=your-output-bucket
+```
+### AWS Service Calls and Permissions
+The tests make calls to multiple AWS services and require the following permissions:
+**S3 Tables:**
+- `s3tables:CreateNamespace`
+- `s3tables:DeleteNamespace`
+- `s3tables:CreateTable`
+- `s3tables:DeleteTable`
+- `s3tables:GetTableMetadata`
+- `s3tables:UpdateTableMetadata`
+**S3:**
+- `s3:PutObject` (for uploading test Parquet files)
+- `s3:GetObject` (for reading manifest files)
+**Lake Formation:**
+- `lakeformation:AddLFTagsToResource` (adds `AccessLevel: Public` tag to namespaces)
+**Athena:**
+- `athena:StartQueryExecution`
+- `athena:GetQueryExecution`
+- `athena:GetQueryResults`
+**Lake Formation Setup:**
+The tests expect a Lake Formation tag with key `AccessLevel` and value `Public` to exist in your account. This tag is automatically applied to test namespaces to allow Athena query permissions.
+### Test Dependencies
+The test suite uses additional dependencies for creating test data:
+- `@aws-sdk/client-athena` - For running Athena queries in tests
+- `@aws-sdk/client-lakeformation` - For Lake Formation permissions
+- `parquetjs` - For creating test Parquet files
+- `dotenv-cli` - For loading environment variables
+### Running Tests
+Run the test suite:
+```bash
+npm test
+```
+Run tests with coverage:
+```bash
+npm run test:cover
+```
+Run a single test file:
+```bash
+npm run test:single test/create.test.ts
+```
+## Configuration
+The library uses the AWS SDK for authentication. Configure credentials using:
+- Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
+- AWS credentials file (`~/.aws/credentials`)
+- IAM roles (when running on EC2/Lambda)
+- Or pass credentials directly to functions
+## License
+MIT

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,33 @@
 import { AwsCredentialIdentity } from '@aws-sdk/types';
-import { S3TablesClientConfig } from '@aws-sdk/client-s3tables';
+type RawValue = string | number | bigint | Buffer | null;
+type PartitionRecord = Record<string, RawValue>;
+interface PartitionSummary {
+    contains_null: boolean;
+    contains_nan?: boolean | null;
+    lower_bound?: Buffer | null;
+    upper_bound?: Buffer | null;
+}
+declare enum ListContent {
+    DATA = 0,
+    DELETES = 1
+}
+interface ManifestListRecord {
+    manifest_path: string;
+    manifest_length: bigint;
+    partition_spec_id: number;
+    content: ListContent;
+    sequence_number: bigint;
+    min_sequence_number: bigint;
+    added_snapshot_id: bigint;
+    added_data_files_count: number;
+    existing_data_files_count: number;
+    deleted_data_files_count: number;
+    added_rows_count: bigint;
+    existing_rows_count: bigint;
+    deleted_rows_count: bigint;
+    partitions?: PartitionSummary[] | null;
+}
 type IcebergTransform = 'identity' | 'year' | 'month' | 'day' | 'hour' | `bucket[${number}]` | `truncate[${number}]`;
 interface IcebergPartitionField {
@@ -39,15 +67,61 @@ interface IcebergPartitionSpec {
     'spec-id': number;
     fields: IcebergPartitionField[];
 }
+interface IcebergSnapshot {
+    'snapshot-id': bigint | number;
+    'parent-snapshot-id'?: bigint | number;
+    'sequence-number': number;
+    'timestamp-ms': number;
+    'manifest-list': string;
+    summary: Record<string, string>;
+    'schema-id'?: number;
+}
 interface IcebergMetadata {
     'last-column-id': number;
     'current-schema-id': number;
     schemas: IcebergSchema[];
+    snapshots: IcebergSnapshot[];
     'default-spec-id': number;
     'partition-specs': IcebergPartitionSpec[];
     'last-partition-id': number;
-    'current-snapshot-id': number;
+    'current-snapshot-id': bigint | number;
+    location: string;
+}
+interface AddFile {
+    file: string;
+    partitions: PartitionRecord;
+    fileSize: bigint;
+    recordCount: bigint;
+    columnSizes?: Record<string, bigint> | null | undefined;
+    valueCounts?: Record<string, bigint> | null | undefined;
+    nullValueCounts?: Record<string, bigint> | null | undefined;
+    nanValueCounts?: Record<string, bigint> | null | undefined;
+    lowerBounds?: Record<string, Buffer> | null | undefined;
+    upperBounds?: Record<string, Buffer> | null | undefined;
+    keyMetadata?: Buffer | null | undefined;
+    splitOffsets?: bigint[] | null | undefined;
+    equalityIds?: number[] | null | undefined;
+    sortOrderId?: number | null | undefined;
 }
+interface AddManifestParams {
+    credentials?: AwsCredentialIdentity | undefined;
+    region: string;
+    metadata: IcebergMetadata;
+    schemaId: number;
+    specId: number;
+    snapshotId: bigint;
+    sequenceNumber: bigint;
+    files: AddFile[];
+}
+declare function addManifest(params: AddManifestParams): Promise<ManifestListRecord>;
+type JSONPrimitive = string | number | boolean | null | bigint | undefined;
+type JSONValue = JSONPrimitive | JSONObject | JSONArray;
+interface JSONObject {
+    [key: string]: JSONValue;
+}
+type JSONArray = JSONValue[];
 type TableLocation = {
     tableArn: string;
@@ -57,7 +131,8 @@ type TableLocation = {
     name: string;
 };
 type GetMetadataParams = TableLocation & {
-    config?: S3TablesClientConfig;
+    region?: string;
+    credentials?: AwsCredentialIdentity;
 };
 declare function getMetadata(params: GetMetadataParams): Promise<IcebergMetadata>;
 interface AddSchemaParams {
@@ -68,7 +143,7 @@ interface AddSchemaParams {
     schemaId: number;
     fields: IcebergSchemaField[];
 }
-declare function addSchema(params: AddSchemaParams): Promise<string>;
+declare function addSchema(params: AddSchemaParams): Promise<JSONObject>;
 interface AddPartitionSpecParams {
     credentials?: AwsCredentialIdentity;
     tableBucketARN: string;
@@ -77,13 +152,38 @@ interface AddPartitionSpecParams {
     specId: number;
     fields: IcebergPartitionField[];
 }
-declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<string>;
+declare function addPartitionSpec(params: AddPartitionSpecParams): Promise<JSONObject>;
+interface AddFileList {
+    specId: number;
+    schemaId: number;
+    files: AddFile[];
+}
+interface AddDataFilesParams {
+    credentials?: AwsCredentialIdentity;
+    tableBucketARN: string;
+    namespace: string;
+    name: string;
+    lists: AddFileList[];
+}
+declare function addDataFiles(params: AddDataFilesParams): Promise<JSONObject>;
+interface SetCurrentCommitParams {
+    credentials?: AwsCredentialIdentity;
+    tableBucketARN: string;
+    namespace: string;
+    name: string;
+    snapshotId: bigint;
+}
+declare function setCurrentCommit(params: SetCurrentCommitParams): Promise<JSONObject>;
 declare const _default: {
     getMetadata: typeof getMetadata;
     addSchema: typeof addSchema;
     addPartitionSpec: typeof addPartitionSpec;
+    addManifest: typeof addManifest;
+    addDataFiles: typeof addDataFiles;
+    setCurrentCommit: typeof setCurrentCommit;
 };
-export { addPartitionSpec, addSchema, _default as default, getMetadata };
-export type { AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergTransform, IcebergType, TableLocation };
+export { addDataFiles, addManifest, addPartitionSpec, addSchema, _default as default, getMetadata, setCurrentCommit };
+export type { AddDataFilesParams, AddFile, AddFileList, AddManifestParams, AddPartitionSpecParams, AddSchemaParams, GetMetadataParams, IcebergComplexType, IcebergMetadata, IcebergPartitionField, IcebergPartitionSpec, IcebergPrimitiveType, IcebergSchema, IcebergSchemaField, IcebergSnapshot, IcebergTransform, IcebergType, SetCurrentCommitParams, TableLocation };