PyPI - awslabs.s3-tables-mcp-server - Versions diffs - 0.0.4__tar.gz → 0.0.5__tar.gz - Mend

awslabs.s3-tables-mcp-server 0.0.4tar.gz → 0.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: awslabs.s3-tables-mcp-server
-Version: 0.0.4
+Version: 0.0.5
 Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
 Project-URL: homepage, https://awslabs.github.io/mcp/
 Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/

{awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/__init__.py RENAMED Viewed

@@ -15,4 +15,4 @@
 # This file is part of the awslabs namespace.
 # It is intentionally minimal to support PEP 420 namespace packages.
-__version__ = '0.0.4'
+__version__ = '0.0.5'

awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/csv.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS S3 Tables MCP Server file processing module.
+This module provides functionality for processing and analyzing uploaded files,
+particularly focusing on CSV file handling and import capabilities.
+"""
+import pyarrow.csv as pc
+from .utils import import_file_to_table
+async def import_csv_to_table(
+    warehouse: str,
+    region: str,
+    namespace: str,
+    table_name: str,
+    s3_url: str,
+    uri: str,
+    catalog_name: str = 's3tablescatalog',
+    rest_signing_name: str = 's3tables',
+    rest_sigv4_enabled: str = 'true',
+    preserve_case: bool = False,
+):
+    """Import a CSV file into an S3 table using PyArrow."""
+    return await import_file_to_table(
+        warehouse=warehouse,
+        region=region,
+        namespace=namespace,
+        table_name=table_name,
+        s3_url=s3_url,
+        uri=uri,
+        create_pyarrow_table=pc.read_csv,
+        catalog_name=catalog_name,
+        rest_signing_name=rest_signing_name,
+        rest_sigv4_enabled=rest_sigv4_enabled,
+        preserve_case=preserve_case,
+    )

awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/parquet.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pyarrow.parquet as pq
+from .utils import import_file_to_table
+async def import_parquet_to_table(
+    warehouse: str,
+    region: str,
+    namespace: str,
+    table_name: str,
+    s3_url: str,
+    uri: str,
+    catalog_name: str = 's3tablescatalog',
+    rest_signing_name: str = 's3tables',
+    rest_sigv4_enabled: str = 'true',
+    preserve_case: bool = False,
+):
+    """Import a Parquet file into an S3 table using PyArrow."""
+    return await import_file_to_table(
+        warehouse=warehouse,
+        region=region,
+        namespace=namespace,
+        table_name=table_name,
+        s3_url=s3_url,
+        uri=uri,
+        create_pyarrow_table=pq.read_table,
+        catalog_name=catalog_name,
+        rest_signing_name=rest_signing_name,
+        rest_sigv4_enabled=rest_sigv4_enabled,
+        preserve_case=preserve_case,
+    )

awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/utils.py ADDED Viewed

@@ -0,0 +1,157 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS S3 Tables MCP Server file processing utilities.
+This module provides utility functions for file processing operations,
+particularly focusing on column name conversion and schema transformation.
+"""
+import os
+import pyarrow as pa
+from ..utils import get_s3_client, pyiceberg_load_catalog
+from io import BytesIO
+from pydantic.alias_generators import to_snake
+from pyiceberg.exceptions import NoSuchTableError
+from typing import Any, Callable, Dict
+from urllib.parse import urlparse
+def convert_column_names_to_snake_case(schema: pa.Schema) -> pa.Schema:
+    """Convert column names in PyArrow schema to snake_case.
+    Args:
+        schema: PyArrow schema with original column names
+    Returns:
+        PyArrow schema with converted column names
+    Raises:
+        ValueError: If duplicate column names exist after conversion
+    """
+    # Extract original column names
+    original_names = schema.names
+    # Convert each column name to snake_case
+    converted_names = [to_snake(name) for name in original_names]
+    # Check for duplicates after conversion using set and len
+    if len(set(converted_names)) != len(converted_names):
+        raise ValueError(
+            f'Duplicate column names after case conversion. '
+            f'Original names: {original_names}. Converted names: {converted_names}'
+        )
+    # Create new schema with converted column names
+    new_fields = []
+    for i, field in enumerate(schema):
+        new_field = pa.field(
+            converted_names[i], field.type, nullable=field.nullable, metadata=field.metadata
+        )
+        new_fields.append(new_field)
+    return pa.schema(new_fields, metadata=schema.metadata)
+async def import_file_to_table(
+    warehouse: str,
+    region: str,
+    namespace: str,
+    table_name: str,
+    s3_url: str,
+    uri: str,
+    create_pyarrow_table: Callable[[Any], pa.Table],
+    catalog_name: str = 's3tablescatalog',
+    rest_signing_name: str = 's3tables',
+    rest_sigv4_enabled: str = 'true',
+    preserve_case: bool = False,
+) -> Dict:
+    """Import data from a file (CSV, Parquet, etc.) into an S3 table using a provided PyArrow table creation function."""
+    # Parse S3 URL
+    parsed = urlparse(s3_url)
+    bucket = parsed.netloc
+    key = parsed.path.lstrip('/')
+    try:
+        # Load Iceberg catalog
+        catalog = pyiceberg_load_catalog(
+            catalog_name,
+            warehouse,
+            uri,
+            region,
+            rest_signing_name,
+            rest_sigv4_enabled,
+        )
+        # Get S3 client and read the file
+        s3_client = get_s3_client()
+        response = s3_client.get_object(Bucket=bucket, Key=key)
+        file_bytes = response['Body'].read()
+        # Create PyArrow Table and Schema (file-like interface)
+        file_like = BytesIO(file_bytes)
+        pyarrow_table = create_pyarrow_table(file_like)
+        pyarrow_schema = pyarrow_table.schema
+        # Convert column names to snake_case unless preserve_case is True
+        columns_converted = False
+        if not preserve_case:
+            try:
+                pyarrow_schema = convert_column_names_to_snake_case(pyarrow_schema)
+                pyarrow_table = pyarrow_table.rename_columns(pyarrow_schema.names)
+                columns_converted = True
+            except Exception as conv_err:
+                return {
+                    'status': 'error',
+                    'error': f'Column name conversion failed: {str(conv_err)}',
+                }
+        table_created = False
+        try:
+            # Try to load existing table
+            table = catalog.load_table(f'{namespace}.{table_name}')
+        except NoSuchTableError:
+            # Table doesn't exist, create it using the schema
+            try:
+                table = catalog.create_table(
+                    identifier=f'{namespace}.{table_name}',
+                    schema=pyarrow_schema,
+                )
+                table_created = True
+            except Exception as create_error:
+                return {
+                    'status': 'error',
+                    'error': f'Failed to create table: {str(create_error)}',
+                }
+        # Append data to Iceberg table
+        table.append(pyarrow_table)
+        # Build message with warnings if applicable
+        message = f'Successfully imported {pyarrow_table.num_rows} rows{" and created new table" if table_created else ""}'
+        if columns_converted:
+            message += '. WARNING: Column names were converted to snake_case format. To preserve the original case, set preserve_case to True.'
+        return {
+            'status': 'success',
+            'message': message,
+            'rows_processed': pyarrow_table.num_rows,
+            'file_processed': os.path.basename(key),
+            'table_created': table_created,
+            'table_uuid': table.metadata.table_uuid,
+            'columns': pyarrow_schema.names,
+        }
+    except Exception as e:
+        return {'status': 'error', 'error': str(e)}

{awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/server.py RENAMED Viewed

@@ -589,6 +589,9 @@ async def import_csv_to_table(
         str, Field('s3tables', description='REST signing name')
     ] = 's3tables',
     rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
+    preserve_case: Annotated[
+        bool, Field(..., description='Preserve case of column names')
+    ] = False,
 ) -> dict:
     """Import data from a CSV file into an S3 table.
@@ -596,6 +599,7 @@ async def import_csv_to_table(
     If the table doesn't exist, it will be created with a schema inferred from the CSV file.
     If the table exists, the CSV file schema must be compatible with the table's schema.
     The tool will validate the schema before attempting to import the data.
+    If preserve_case is True, the column names will not be converted to snake_case. Otherwise, the column names will be converted to snake_case.
     Returns error dictionary with status and error message if:
         - URL is not a valid S3 URL
@@ -615,6 +619,7 @@ async def import_csv_to_table(
         catalog_name: 's3tablescatalog'
         rest_signing_name: 's3tables'
         rest_sigv4_enabled: 'true'
+        preserve_case: False
     Permissions:
     You must have:
@@ -634,6 +639,7 @@ async def import_csv_to_table(
         catalog_name=catalog_name,
         rest_signing_name=rest_signing_name,
         rest_sigv4_enabled=rest_sigv4_enabled,
+        preserve_case=preserve_case,
     )
@@ -656,6 +662,9 @@ async def import_parquet_to_table(
         str, Field('s3tables', description='REST signing name')
     ] = 's3tables',
     rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
+    preserve_case: Annotated[
+        bool, Field(..., description='Preserve case of column names')
+    ] = False,
 ) -> dict:
     """Import data from a Parquet file into an S3 table.
@@ -663,6 +672,7 @@ async def import_parquet_to_table(
     If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
     If the table exists, the Parquet file schema must be compatible with the table's schema.
     The tool will validate the schema before attempting to import the data.
+    If preserve_case is True, the column names will not be converted to snake_case. Otherwise, the column names will be converted to snake_case.
     Returns error dictionary with status and error message if:
         - URL is not a valid S3 URL
@@ -688,6 +698,7 @@ async def import_parquet_to_table(
         catalog_name: 's3tablescatalog'
         rest_signing_name: 's3tables'
         rest_sigv4_enabled: 'true'
+        preserve_case: False
     Permissions:
     You must have:
@@ -708,6 +719,7 @@ async def import_parquet_to_table(
         catalog_name=catalog_name,
         rest_signing_name=rest_signing_name,
         rest_sigv4_enabled=rest_sigv4_enabled,
+        preserve_case=preserve_case,
     )

{awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/pyproject.toml RENAMED Viewed

@@ -2,7 +2,7 @@
 name = "awslabs.s3-tables-mcp-server"
 # NOTE: "Patch"=9223372036854775807 bumps next release to zero.
-version = "0.0.4"
+version = "0.0.5"
 description = "An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server"
 readme = "README.md"

awslabs_s3_tables_mcp_server-0.0.5/tests/test_csv.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for CSV file processor (import_csv_to_table)."""
+import pytest
+from awslabs.s3_tables_mcp_server.file_processor import csv
+from unittest.mock import AsyncMock, patch
+@pytest.mark.asyncio
+async def test_import_csv_to_table_success():
+    """Test successful import_csv_to_table."""
+    # Arrange
+    warehouse = 'test-warehouse'
+    region = 'us-west-2'
+    namespace = 'testns'
+    table_name = 'testtable'
+    s3_url = 's3://bucket/test.csv'
+    uri = 'http://localhost:8181'
+    catalog_name = 's3tablescatalog'
+    rest_signing_name = 's3tables'
+    rest_sigv4_enabled = 'true'
+    preserve_case = False
+    # Patch import_file_to_table to simulate a successful import
+    success_result = {
+        'status': 'success',
+        'message': 'Successfully imported 2 rows',
+        'rows_processed': 2,
+        'file_processed': 'test.csv',
+        'table_created': True,
+        'table_uuid': 'fake-uuid',
+        'columns': ['col1', 'col2'],
+    }
+    with patch(
+        'awslabs.s3_tables_mcp_server.file_processor.csv.import_file_to_table',
+        new=AsyncMock(return_value=success_result),
+    ):
+        # Act
+        result = await csv.import_csv_to_table(
+            warehouse=warehouse,
+            region=region,
+            namespace=namespace,
+            table_name=table_name,
+            s3_url=s3_url,
+            uri=uri,
+            catalog_name=catalog_name,
+            rest_signing_name=rest_signing_name,
+            rest_sigv4_enabled=rest_sigv4_enabled,
+            preserve_case=preserve_case,
+        )
+    # Assert
+    assert result['status'] == 'success'
+    assert result['rows_processed'] == 2
+    assert result['file_processed'] == 'test.csv'
+    assert result['table_created'] is True
+    assert result['columns'] == ['col1', 'col2']

awslabs.s3-tables-mcp-server 0.0.4__tar.gz → 0.0.5__tar.gz

awslabs.s3-tables-mcp-server 0.0.4tar.gz → 0.0.5tar.gz