PyPI - awslabs.s3-tables-mcp-server - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

awslabs.s3-tables-mcp-server 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

awslabs/s3_tables_mcp_server/__init__.py CHANGED Viewed

@@ -15,4 +15,4 @@
 # This file is part of the awslabs namespace.
 # It is intentionally minimal to support PEP 420 namespace packages.
-__version__ = '0.0.0'
+__version__ = '0.0.3'

awslabs/s3_tables_mcp_server/engines/pyiceberg.py CHANGED Viewed

@@ -14,32 +14,14 @@
 """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
+import io
+import json
 import pyarrow as pa
+import pyarrow.json as pj
 from ..utils import pyiceberg_load_catalog
 from daft import Catalog as DaftCatalog
 from daft.session import Session
-from datetime import date, datetime, time
-from decimal import Decimal
 from pydantic import BaseModel
-from pyiceberg.types import (
-    BinaryType,
-    BooleanType,
-    DateType,
-    DecimalType,
-    DoubleType,
-    FixedType,
-    FloatType,
-    IntegerType,
-    ListType,
-    LongType,
-    MapType,
-    StringType,
-    StructType,
-    TimestampType,
-    TimestamptzType,
-    TimeType,
-    UUIDType,
-)
 # pyiceberg and daft imports
 from typing import Any, Dict, Optional
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
     rest_sigv4_enabled: str = 'true'
-def convert_value_for_append(value, iceberg_type):
-    """Convert a value to the appropriate type for appending to an Iceberg table column.
-    Args:
-        value: The value to convert. Can be of various types (str, int, float, etc.).
-        iceberg_type: The Iceberg type to convert the value to.
-    Returns:
-        The value converted to the appropriate type for the Iceberg column, or None if value is None.
-    Raises:
-        NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
-        ValueError: If the conversion is unsupported or fails.
-    """
-    if value is None:
-        return None
-    # Already correct type
-    if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
-        return value
-    if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
-        return value
-    if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
-        return value
-    if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
-        return value
-    if isinstance(iceberg_type, DateType) and isinstance(value, date):
-        return value
-    if isinstance(iceberg_type, TimeType) and isinstance(value, time):
-        return value
-    if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
-        return value
-    if isinstance(iceberg_type, StringType) and isinstance(value, str):
-        return value
-    # Convert from string
-    if isinstance(value, str):
-        if isinstance(iceberg_type, BooleanType):
-            return value.lower() in ('true', '1', 'yes')
-        if isinstance(iceberg_type, (IntegerType, LongType)):
-            return int(value)
-        if isinstance(iceberg_type, (FloatType, DoubleType)):
-            return float(value)
-        if isinstance(iceberg_type, DecimalType):
-            return Decimal(value)
-        if isinstance(iceberg_type, DateType):
-            return date.fromisoformat(value)
-        if isinstance(iceberg_type, TimeType):
-            return time.fromisoformat(value)
-        if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
-            return datetime.fromisoformat(value)
-        if isinstance(iceberg_type, StringType):
-            return value
-        if isinstance(iceberg_type, UUIDType):
-            import uuid
-            return uuid.UUID(value)
-        if isinstance(iceberg_type, (BinaryType, FixedType)):
-            return bytes.fromhex(value)
-    # Convert from number
-    if isinstance(value, (int, float)):
-        if isinstance(iceberg_type, (IntegerType, LongType)):
-            return int(value)
-        if isinstance(iceberg_type, (FloatType, DoubleType)):
-            return float(value)
-        if isinstance(iceberg_type, DecimalType):
-            return Decimal(str(value))
-        if isinstance(iceberg_type, StringType):
-            return str(value)
-    if isinstance(iceberg_type, (ListType, MapType, StructType)):
-        raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
-    raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
 class PyIcebergEngine:
     """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
@@ -197,7 +107,7 @@ class PyIcebergEngine:
             return False
     def append_rows(self, table_name: str, rows: list[dict]) -> None:
-        """Append rows to an Iceberg table using pyiceberg.
+        """Append rows to an Iceberg table using pyiceberg with JSON encoding.
         Args:
             table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
@@ -214,26 +124,31 @@ class PyIcebergEngine:
                 full_table_name = f'{self.config.namespace}.{table_name}'
             else:
                 full_table_name = table_name
+            # Load the Iceberg table
             table = self._catalog.load_table(full_table_name)
-            iceberg_schema = table.schema()
-            converted_rows = []
+            # Encode rows as JSON (line-delimited format)
+            json_lines = []
             for row in rows:
-                converted_row = {}
-                for field in iceberg_schema.fields:
-                    field_name = field.name
-                    field_type = field.field_type
-                    value = row.get(field_name)
-                    if field.required and value is None:
-                        raise ValueError(f'Required field {field_name} is missing or None')
-                    try:
-                        converted_row[field_name] = convert_value_for_append(value, field_type)
-                    except (ValueError, TypeError) as e:
-                        raise ValueError(
-                            f'Error converting value for field {field_name}: {str(e)}'
-                        )
-                converted_rows.append(converted_row)
-            schema = iceberg_schema.as_arrow()
-            pa_table = pa.Table.from_pylist(converted_rows, schema=schema)
-            table.append(pa_table)
+                json_lines.append(json.dumps(row))
+            json_data = '\n'.join(json_lines)
+            # Create a file-like object from the JSON data
+            json_buffer = io.BytesIO(json_data.encode('utf-8'))
+            # Read JSON data into PyArrow Table using pyarrow.json.read_json
+            # This enforces the Iceberg schema and validates the data
+            try:
+                new_data_table = pj.read_json(
+                    json_buffer, read_options=pj.ReadOptions(use_threads=True)
+                )
+            except pa.ArrowInvalid as e:
+                raise ValueError(
+                    f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
+                )
+            # Append the new data to the Iceberg table
+            table.append(new_data_table)
         except Exception as e:
             raise Exception(f'Error appending rows: {str(e)}')

awslabs/s3_tables_mcp_server/file_processor/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS S3 Tables MCP Server file processing module.
+This module provides functionality for processing and analyzing uploaded files,
+particularly focusing on CSV and Parquet file handling and import capabilities.
+"""
+from .csv import import_csv_to_table
+from .parquet import import_parquet_to_table
+__all__ = ['import_csv_to_table', 'import_parquet_to_table']

awslabs/s3_tables_mcp_server/file_processor/csv.py ADDED Viewed

@@ -0,0 +1,123 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS S3 Tables MCP Server file processing module.
+This module provides functionality for processing and analyzing uploaded files,
+particularly focusing on CSV file handling and import capabilities.
+"""
+import io
+import os
+import pyarrow.csv as pc
+from ..utils import get_s3_client, pyiceberg_load_catalog
+from pyiceberg.exceptions import NoSuchTableError
+from typing import Dict
+from urllib.parse import urlparse
+async def import_csv_to_table(
+    warehouse: str,
+    region: str,
+    namespace: str,
+    table_name: str,
+    s3_url: str,
+    uri: str,
+    catalog_name: str = 's3tablescatalog',
+    rest_signing_name: str = 's3tables',
+    rest_sigv4_enabled: str = 'true',
+) -> Dict:
+    """Import data from a CSV file into an S3 table.
+    This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
+    If the table doesn't exist, it will be created using the schema inferred from the CSV file.
+    Args:
+        warehouse: Warehouse string for Iceberg catalog
+        region: AWS region for S3Tables/Iceberg REST endpoint
+        namespace: The namespace containing the table
+        table_name: The name of the table to import data into
+        s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
+        uri: REST URI for Iceberg catalog
+        catalog_name: Catalog name
+        rest_signing_name: REST signing name
+        rest_sigv4_enabled: Enable SigV4 signing
+    Returns:
+        A dictionary containing:
+        - status: 'success' or 'error'
+        - message: Success message or error details
+        - rows_processed: Number of rows processed (on success)
+        - file_processed: Name of the processed file
+        - table_created: Boolean indicating if a new table was created (on success)
+    """
+    # Parse S3 URL
+    parsed = urlparse(s3_url)
+    bucket = parsed.netloc
+    key = parsed.path.lstrip('/')
+    try:
+        # Load Iceberg catalog
+        catalog = pyiceberg_load_catalog(
+            catalog_name,
+            warehouse,
+            uri,
+            region,
+            rest_signing_name,
+            rest_sigv4_enabled,
+        )
+        # Get S3 client and read the CSV file to infer schema
+        s3_client = get_s3_client()
+        response = s3_client.get_object(Bucket=bucket, Key=key)
+        csv_data = response['Body'].read()
+        # Read CSV file into PyArrow Table to infer schema
+        # Convert bytes to file-like object for PyArrow
+        csv_buffer = io.BytesIO(csv_data)
+        csv_table = pc.read_csv(csv_buffer)
+        csv_schema = csv_table.schema
+        table_created = False
+        try:
+            # Try to load existing table
+            table = catalog.load_table(f'{namespace}.{table_name}')
+        except NoSuchTableError:
+            # Table doesn't exist, create it using the CSV schema
+            try:
+                table = catalog.create_table(
+                    identifier=f'{namespace}.{table_name}',
+                    schema=csv_schema,
+                )
+                table_created = True
+            except Exception as create_error:
+                return {
+                    'status': 'error',
+                    'error': f'Failed to create table: {str(create_error)}',
+                }
+        # Append data to Iceberg table
+        table.append(csv_table)
+        return {
+            'status': 'success',
+            'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
+            'rows_processed': csv_table.num_rows,
+            'file_processed': os.path.basename(key),
+            'table_created': table_created,
+            'table_uuid': table.metadata.table_uuid,
+        }
+    except Exception as e:
+        return {'status': 'error', 'error': str(e)}

awslabs/s3_tables_mcp_server/file_processor/parquet.py ADDED Viewed

@@ -0,0 +1,116 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pyarrow.parquet as pq
+from awslabs.s3_tables_mcp_server.utils import get_s3_client, pyiceberg_load_catalog
+from io import BytesIO
+from pyiceberg.exceptions import NoSuchTableError
+from typing import Dict
+async def import_parquet_to_table(
+    warehouse: str,
+    region: str,
+    namespace: str,
+    table_name: str,
+    s3_url: str,
+    uri: str,
+    catalog_name: str = 's3tablescatalog',
+    rest_signing_name: str = 's3tables',
+    rest_sigv4_enabled: str = 'true',
+) -> Dict:
+    """Import data from a Parquet file into an S3 table.
+    This function reads data from a Parquet file stored in S3 and imports it into an existing Iceberg table.
+    If the table doesn't exist, it will be created using the schema from the Parquet file.
+    Args:
+        warehouse: Warehouse string for Iceberg catalog
+        region: AWS region for S3Tables/Iceberg REST endpoint
+        namespace: The namespace containing the table
+        table_name: The name of the table to import data into
+        s3_url: The S3 URL of the Parquet file
+        uri: REST URI for Iceberg catalog
+        catalog_name: Catalog name
+        rest_signing_name: REST signing name
+        rest_sigv4_enabled: Enable SigV4 signing
+    Returns:
+        A dictionary containing:
+        - status: 'success' or 'error'
+        - message: Success message or error details
+        - rows_processed: Number of rows processed (on success)
+        - file_processed: Name of the processed file
+        - table_created: Boolean indicating if a new table was created (on success)
+    """
+    import os
+    from urllib.parse import urlparse
+    # Parse S3 URL
+    parsed = urlparse(s3_url)
+    bucket = parsed.netloc
+    key = parsed.path.lstrip('/')
+    try:
+        # Load Iceberg catalog
+        catalog = pyiceberg_load_catalog(
+            catalog_name,
+            warehouse,
+            uri,
+            region,
+            rest_signing_name,
+            rest_sigv4_enabled,
+        )
+        # Get S3 client and read the Parquet file first to get the schema
+        s3_client = get_s3_client()
+        response = s3_client.get_object(Bucket=bucket, Key=key)
+        parquet_data = BytesIO(response['Body'].read())
+        # Read Parquet file into PyArrow Table
+        parquet_table = pq.read_table(parquet_data)
+        parquet_schema = parquet_table.schema
+        table_created = False
+        try:
+            # Try to load existing table
+            table = catalog.load_table(f'{namespace}.{table_name}')
+        except NoSuchTableError:
+            # Table doesn't exist, create it using the Parquet schema
+            try:
+                table = catalog.create_table(
+                    identifier=f'{namespace}.{table_name}',
+                    schema=parquet_schema,
+                )
+                table_created = True
+            except Exception as create_error:
+                return {
+                    'status': 'error',
+                    'error': f'Failed to create table: {str(create_error)}',
+                }
+        # Append data to Iceberg table
+        table.append(parquet_table)
+        return {
+            'status': 'success',
+            'message': f'Successfully imported {parquet_table.num_rows} rows{" and created new table" if table_created else ""}',
+            'rows_processed': parquet_table.num_rows,
+            'file_processed': os.path.basename(key),
+            'table_created': table_created,
+            'table_uuid': table.metadata.table_uuid,
+        }
+    except Exception as e:
+        return {'status': 'error', 'error': str(e)}

awslabs/s3_tables_mcp_server/server.py CHANGED Viewed

@@ -32,7 +32,6 @@ from .utils import set_user_agent_mode
 from awslabs.s3_tables_mcp_server import (
     __version__,
     database,
-    file_processor,
     namespaces,
     resources,
     s3_operations,
@@ -48,6 +47,12 @@ from awslabs.s3_tables_mcp_server.constants import (
     TABLE_BUCKET_NAME_PATTERN,
     TABLE_NAME_FIELD,
 )
+from awslabs.s3_tables_mcp_server.file_processor import (
+    import_csv_to_table as import_csv_to_table_func,
+)
+from awslabs.s3_tables_mcp_server.file_processor import (
+    import_parquet_to_table as import_parquet_to_table_func,
+)
 from datetime import datetime, timezone
 from mcp.server.fastmcp import FastMCP
 from pydantic import Field
@@ -567,32 +572,75 @@ async def query_database(
 @app.tool()
 @log_tool_call_with_response
-async def preview_csv_file(
+@write_operation
+async def import_csv_to_table(
+    warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
+    region: Annotated[
+        str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
+    ],
+    namespace: Annotated[str, NAMESPACE_NAME_FIELD],
+    table_name: Annotated[str, TABLE_NAME_FIELD],
     s3_url: Annotated[str, S3_URL_FIELD],
+    uri: Annotated[str, Field(..., description='REST URI for Iceberg catalog')],
+    catalog_name: Annotated[
+        str, Field('s3tablescatalog', description='Catalog name')
+    ] = 's3tablescatalog',
+    rest_signing_name: Annotated[
+        str, Field('s3tables', description='REST signing name')
+    ] = 's3tables',
+    rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
 ) -> dict:
-    """Preview the structure of a CSV file stored in S3.
+    """Import data from a CSV file into an S3 table.
-    This tool provides a quick preview of a CSV file's structure by reading
-    only the headers and first row of data from an S3 location. It's useful for
-    understanding the schema and data format without downloading the entire file.
-    It can be used before creating an s3 table from a csv file to get the schema and data format.
+    This tool reads data from a CSV file stored in S3 and imports it into an S3 table.
+    If the table doesn't exist, it will be created with a schema inferred from the CSV file.
+    If the table exists, the CSV file schema must be compatible with the table's schema.
+    The tool will validate the schema before attempting to import the data.
     Returns error dictionary with status and error message if:
         - URL is not a valid S3 URL
         - File is not a CSV file
         - File cannot be accessed
+        - Table does not exist
+        - CSV headers don't match table schema
         - Any other error occurs
+    Example input values:
+        warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
+        region: 'us-west-2'
+        namespace: 'retail_data'
+        table_name: 'customers'
+        s3_url: 's3://bucket-name/path/to/file.csv'
+        uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
+        catalog_name: 's3tablescatalog'
+        rest_signing_name: 's3tables'
+        rest_sigv4_enabled: 'true'
     Permissions:
-    You must have the s3:GetObject permission for the S3 bucket and key.
+    You must have:
+    - s3:GetObject permission for the CSV file
+    - s3tables:GetTable and s3tables:GetTables permissions to access table information
+    - s3tables:PutTableData permission to write to the table
     """
-    return file_processor.preview_csv_structure(s3_url)
+    if uri is None:
+        uri = _default_uri_for_region(region)
+    return await import_csv_to_table_func(
+        warehouse=warehouse,
+        region=region,
+        namespace=namespace,
+        table_name=table_name,
+        s3_url=s3_url,
+        uri=uri,
+        catalog_name=catalog_name,
+        rest_signing_name=rest_signing_name,
+        rest_sigv4_enabled=rest_sigv4_enabled,
+    )
 @app.tool()
 @log_tool_call_with_response
 @write_operation
-async def import_csv_to_table(
+async def import_parquet_to_table(
     warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
     region: Annotated[
         str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
@@ -609,29 +657,33 @@ async def import_csv_to_table(
     ] = 's3tables',
     rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
 ) -> dict:
-    """Import data from a CSV file into an S3 table.
+    """Import data from a Parquet file into an S3 table.
-    This tool reads data from a CSV file stored in S3 and imports it into an existing S3 table.
-    The CSV file must have headers that match the table's schema. The tool will validate the CSV structure
-    before attempting to import the data.
-    To create a table, first use the preview_csv_file tool to get the schema and data format.
-    Then use the create_table tool to create the table.
+    This tool reads data from a Parquet file stored in S3 and imports it into an S3 table.
+    If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
+    If the table exists, the Parquet file schema must be compatible with the table's schema.
+    The tool will validate the schema before attempting to import the data.
     Returns error dictionary with status and error message if:
         - URL is not a valid S3 URL
-        - File is not a CSV file
+        - File is not a Parquet file
         - File cannot be accessed
-        - Table does not exist
-        - CSV headers don't match table schema
+        - Parquet schema is incompatible with existing table schema
         - Any other error occurs
+    Returns success dictionary with:
+        - status: 'success'
+        - message: Success message with row count
+        - rows_processed: Number of rows imported
+        - file_processed: Name of the processed file
+        - table_created: True if a new table was created
     Example input values:
         warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
         region: 'us-west-2'
         namespace: 'retail_data'
         table_name: 'customers'
-        s3_url: 's3://bucket-name/path/to/file.csv'
+        s3_url: 's3://bucket-name/path/to/file.parquet'
         uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
         catalog_name: 's3tablescatalog'
         rest_signing_name: 's3tables'
@@ -639,14 +691,14 @@ async def import_csv_to_table(
     Permissions:
     You must have:
-    - s3:GetObject permission for the CSV file
-    - s3tables:GetDatabase and s3tables:GetDatabases permissions to access database information
+    - s3:GetObject permission for the Parquet file
     - s3tables:GetTable and s3tables:GetTables permissions to access table information
     - s3tables:PutTableData permission to write to the table
+    - s3tables:CreateTable permission (if table doesn't exist)
     """
     if uri is None:
         uri = _default_uri_for_region(region)
-    return await file_processor.import_csv_to_table(
+    return await import_parquet_to_table_func(
         warehouse=warehouse,
         region=region,
         namespace=namespace,

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: awslabs.s3-tables-mcp-server
-Version: 0.0.2
+Version: 0.0.3
 Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
 Project-URL: homepage, https://awslabs.github.io/mcp/
 Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,23 @@
 awslabs/__init__.py,sha256=BHp8_uaBohApyLlmVWvYVe5bSrH59FvLJ5cNTigMV_8,644
-awslabs/s3_tables_mcp_server/__init__.py,sha256=tAFjxXc05WBoVKOK38ijpQ0TqVAFS0h4gfiiPe8TeHo,754
+awslabs/s3_tables_mcp_server/__init__.py,sha256=T0dJ6usOanvF7UeP7NAt_YhLunvmWkSXvnjOvbL467Y,754
 awslabs/s3_tables_mcp_server/constants.py,sha256=qCWY9A9PAQXdVz-anO26zbQ72Dp79nGM7xeLR062a_o,4971
 awslabs/s3_tables_mcp_server/database.py,sha256=YorxcSx-9typfQ5W_LzwNPZkP47u__QSLJlp0fBsZLg,3851
-awslabs/s3_tables_mcp_server/file_processor.py,sha256=BZR-yMFoB4NKJb1hzD3pYT0ziLS4QiEB5iLWSfDju1U,17031
 awslabs/s3_tables_mcp_server/models.py,sha256=zWTFJLBhIZRLEgOCTyNcGvbItxqYbFJKH6se1EzXDjY,8097
 awslabs/s3_tables_mcp_server/namespaces.py,sha256=KZqxJiEnlpxkqvbfygezbr0szwyDP2O0J6osyiPUzwg,2071
 awslabs/s3_tables_mcp_server/resources.py,sha256=PXZo0sTVn34tXJ4mlw_OS90p12SNoLZs4Re0gV815wk,8281
 awslabs/s3_tables_mcp_server/s3_operations.py,sha256=Zq3oe-uHuKbW87b_WQyM-6HZ0_ikbgiagb2SVesltdg,1656
-awslabs/s3_tables_mcp_server/server.py,sha256=kfVz0oHhS3S4_LApxVPpospIkqfin8TlGMf9J7nMVmo,29842
+awslabs/s3_tables_mcp_server/server.py,sha256=cvXDTZuK1sGpYfjLbF6iLGe49BSA0yx4rSp73UEBcvE,32008
 awslabs/s3_tables_mcp_server/table_buckets.py,sha256=JHmpB_P9h0Hz5Uis25_GPTD1G-mIODVwjaswwIGyCS4,4471
 awslabs/s3_tables_mcp_server/tables.py,sha256=ITnRDHHrtRWLsRhff4TP4B7gGT_jRXy994oxK3x10a4,10143
 awslabs/s3_tables_mcp_server/utils.py,sha256=SReyS3KsdikI9ycL5RsvtVI7MiRnA1W9bTiXGKf1lHc,4517
 awslabs/s3_tables_mcp_server/engines/__init__.py,sha256=O4wlFva3THWmjfaXfJAwi29mxJSKIhM0jcebVfd3S5U,615
-awslabs/s3_tables_mcp_server/engines/pyiceberg.py,sha256=9D9xN1BMOpdNCBNZ2TnuR88kGodURHG6HOweM-oP918,9299
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/METADATA,sha256=XU8CVGfDUURiNbT99jV6gAiT33B6TDGUVUj6tYKusrc,11511
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/entry_points.txt,sha256=WRA45Bi2dVY5hskxkka_e7BAGRqG1KiW3ImTBnHSyLs,90
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/licenses/NOTICE,sha256=jm-1A_8i-wl7KYs2Ynj2A29vXWJCMKLHmGfy4P_B51Y,96
-awslabs_s3_tables_mcp_server-0.0.2.dist-info/RECORD,,
+awslabs/s3_tables_mcp_server/engines/pyiceberg.py,sha256=wzkySQZgx7L8Fn2Oqk8Yz4V-hQZDUempd8q0IwvCY_4,5784
+awslabs/s3_tables_mcp_server/file_processor/__init__.py,sha256=8PeggFRY3ZKBdxcFPEqSSHkSJBZ57eOs-z0fqkMHn9E,978
+awslabs/s3_tables_mcp_server/file_processor/csv.py,sha256=Sngc5mfJDLxQaINBUJLBn5OLc842rv9FqqcJ1upK6iw,4406
+awslabs/s3_tables_mcp_server/file_processor/parquet.py,sha256=Lr7mtqsK9jqlWokQv74dgdEgYmNKlCJ869yNNMrm69o,4189
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/METADATA,sha256=DYOFGTR6IgR7l2ZciRbZWF4yjQ5FSTL8ilpBdmKMHFY,11511
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/entry_points.txt,sha256=WRA45Bi2dVY5hskxkka_e7BAGRqG1KiW3ImTBnHSyLs,90
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/licenses/NOTICE,sha256=jm-1A_8i-wl7KYs2Ynj2A29vXWJCMKLHmGfy4P_B51Y,96
+awslabs_s3_tables_mcp_server-0.0.3.dist-info/RECORD,,

awslabs/s3_tables_mcp_server/file_processor.py DELETED Viewed

@@ -1,485 +0,0 @@
-# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""AWS S3 Tables MCP Server file processing module.
-This module provides functionality for processing and analyzing uploaded files,
-particularly focusing on CSV file handling and preview capabilities.
-"""
-import csv
-import os
-import pyarrow as pa
-import re
-import uuid
-from .utils import get_s3_client, pyiceberg_load_catalog
-from datetime import date, datetime, time
-from decimal import Decimal
-from io import StringIO
-from pyiceberg.types import (
-    BinaryType,
-    BooleanType,
-    DateType,
-    DecimalType,
-    DoubleType,
-    FixedType,
-    FloatType,
-    IntegerType,
-    ListType,
-    LongType,
-    MapType,
-    StringType,
-    StructType,
-    TimestampType,
-    TimestamptzType,
-    TimeType,
-    UUIDType,
-)
-from typing import Dict, List, Optional
-from urllib.parse import urlparse
-def validate_s3_url(s3_url: str) -> tuple[bool, Optional[str], Optional[str], Optional[str]]:
-    """Validate an S3 URL and extract its components.
-    Args:
-        s3_url: The S3 URL to validate (format: s3://bucket-name/key)
-    Returns:
-        Tuple containing:
-        - bool: Whether the URL is valid
-        - str: Error message if invalid, None if valid
-        - str: Bucket name if valid, None if invalid
-        - str: Object key if valid, None if invalid
-    """
-    try:
-        parsed = urlparse(s3_url)
-        if parsed.scheme != 's3':
-            return False, f"Invalid URL scheme: {parsed.scheme}. Must be 's3://'", None, None
-        if not parsed.netloc:
-            return False, 'Missing bucket name in S3 URL', None, None
-        bucket = parsed.netloc
-        key = parsed.path.lstrip('/')
-        if not key:
-            return False, 'Missing object key in S3 URL', None, None
-        return True, None, bucket, key
-    except Exception as e:
-        return False, f'Error parsing S3 URL: {str(e)}', None, None
-def preview_csv_structure(s3_url: str) -> Dict:
-    """Preview the structure of a CSV file stored in S3 by reading its headers and first row.
-    This function provides a quick preview of a CSV file's structure by reading
-    only the headers and first row of data from an S3 location. It's useful for
-    understanding the schema and data format without downloading the entire file.
-    Args:
-        s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
-    Returns:
-        A dictionary containing:
-        - headers: List of column names from the first row
-        - first_row: Dictionary mapping column names to their values from the first data row (empty if no data)
-        - total_columns: Number of columns in the CSV
-        - file_name: Name of the CSV file
-    Returns error dictionary with status and error message if:
-        - URL is not a valid S3 URL
-        - File is not a CSV file
-        - File cannot be accessed
-        - Any other error occurs
-    """
-    try:
-        # Validate S3 URL
-        is_valid, error_msg, bucket, key = validate_s3_url(s3_url)
-        if not is_valid:
-            return {'status': 'error', 'error': error_msg}
-        # At this point, bucket and key are guaranteed to be non-None strings
-        if bucket is None or key is None:
-            return {'status': 'error', 'error': 'Invalid S3 URL: bucket or key is None'}
-        # Check if file has .csv extension
-        if not key.lower().endswith('.csv'):
-            return {
-                'status': 'error',
-                'error': f'File {key} is not a CSV file. Only .csv files are supported.',
-            }
-        # Get S3 client
-        s3_client = get_s3_client()
-        # Get the object from S3, only downloading first 8KB (should be enough for headers and first row)
-        response = s3_client.get_object(
-            Bucket=bucket,
-            Key=key,
-            Range='bytes=0-32768',  # First 32KB
-        )
-        # Read the CSV content
-        csv_content = response['Body'].read().decode('utf-8')
-        # Split content into lines
-        lines = csv_content.splitlines()
-        if not lines:
-            return {'status': 'error', 'error': 'File is empty'}
-        # Parse the headers
-        headers = next(csv.reader([lines[0]]), [])
-        # Try to get first row if it exists
-        first_row = next(csv.reader([lines[1]]), []) if len(lines) > 1 else []
-        # Create a dictionary mapping headers to first row values
-        first_row_dict = dict(zip(headers, first_row)) if headers and first_row else {}
-        return {
-            'headers': headers,
-            'first_row': first_row_dict,
-            'total_columns': len(headers),
-            'file_name': os.path.basename(key),
-        }
-    except Exception as e:
-        return {'status': 'error', 'error': str(e)}
-def convert_value(value: Optional[str], iceberg_type):
-    """Convert a string value to the appropriate type based on Iceberg schema type.
-    Args:
-        value: The string value to convert (can be None)
-        iceberg_type: The Iceberg type to convert to
-    Returns:
-        The converted value of the appropriate type
-    Raises:
-        ValueError: If the value cannot be converted to the target type
-        NotImplementedError: For unsupported complex types
-    """
-    if value is None or value == '':
-        return None
-    if isinstance(iceberg_type, BooleanType):
-        return value.lower() in ('true', '1', 'yes')
-    elif isinstance(iceberg_type, IntegerType):
-        return int(value)
-    elif isinstance(iceberg_type, LongType):
-        return int(value)
-    elif isinstance(iceberg_type, FloatType):
-        return float(value)
-    elif isinstance(iceberg_type, DoubleType):
-        return float(value)
-    elif isinstance(iceberg_type, DecimalType):
-        return Decimal(value)
-    elif isinstance(iceberg_type, DateType):
-        return date.fromisoformat(value)
-    elif isinstance(iceberg_type, TimeType):
-        return time.fromisoformat(value)
-    elif isinstance(iceberg_type, TimestampType):
-        return datetime.fromisoformat(value)
-    elif isinstance(iceberg_type, TimestamptzType):
-        return datetime.fromisoformat(value)  # Ensure it's tz-aware if needed
-    elif isinstance(iceberg_type, StringType):
-        return str(value)
-    elif isinstance(iceberg_type, UUIDType):
-        return uuid.UUID(value)
-    elif isinstance(iceberg_type, BinaryType) or isinstance(iceberg_type, FixedType):
-        return bytes.fromhex(value)
-    elif isinstance(iceberg_type, ListType):
-        # naive split for example; you'd want better parsing logic
-        return [convert_value(v.strip(), iceberg_type.element_type) for v in value.split(',')]
-    elif isinstance(iceberg_type, MapType):
-        # naive: "key1:value1,key2:value2"
-        return {
-            k.strip(): convert_value(v.strip(), iceberg_type.value_type)
-            for k, v in (item.split(':') for item in value.split(','))
-        }
-    elif isinstance(iceberg_type, StructType):
-        raise NotImplementedError('Nested structs need structured input like JSON or dict.')
-    else:
-        raise ValueError(f'Unsupported Iceberg type: {iceberg_type}')
-def create_pyarrow_schema_from_iceberg(schema) -> pa.Schema:
-    """Create a PyArrow schema from an Iceberg schema, supporting basic types and decimals."""
-    def convert_iceberg_type_to_pyarrow(iceberg_type_str: str):
-        """Convert an Iceberg type string to a PyArrow type."""
-        iceberg_type_str = iceberg_type_str.lower()
-        if iceberg_type_str == 'boolean':
-            return pa.bool_()
-        elif iceberg_type_str == 'int':
-            return pa.int32()
-        elif iceberg_type_str == 'long':
-            return pa.int64()
-        elif iceberg_type_str == 'float':
-            return pa.float32()
-        elif iceberg_type_str == 'double':
-            return pa.float64()
-        elif iceberg_type_str == 'date':
-            return pa.date32()
-        elif iceberg_type_str == 'time':
-            return pa.time64('us')
-        elif iceberg_type_str == 'timestamp':
-            return pa.timestamp('us')
-        elif iceberg_type_str == 'timestamptz':
-            return pa.timestamp('us', tz='UTC')
-        elif iceberg_type_str == 'string':
-            return pa.string()
-        elif iceberg_type_str == 'uuid':
-            return pa.string()
-        elif iceberg_type_str == 'binary':
-            return pa.binary()
-        elif iceberg_type_str.startswith('fixed'):
-            size_match = re.match(r'fixed\((\d+)\)', iceberg_type_str)
-            return pa.binary(int(size_match.group(1))) if size_match else pa.binary()
-        elif iceberg_type_str.startswith('decimal'):
-            decimal_match = re.match(r'decimal\((\d+),\s*(\d+)\)', iceberg_type_str)
-            if decimal_match:
-                precision = int(decimal_match.group(1))
-                scale = int(decimal_match.group(2))
-                if precision <= 18:
-                    return pa.decimal128(
-                        precision, scale
-                    )  # Will use INT64 encoding for small precision
-                else:
-                    return pa.decimal256(precision, scale)  # For large precision decimals
-            else:
-                raise ValueError(f'Invalid decimal type format: {iceberg_type_str}')
-        else:
-            raise ValueError(f'Unsupported Iceberg type: {iceberg_type_str}')
-    # Build PyArrow schema
-    pa_fields = []
-    for field in schema.fields:
-        name = field.name
-        iceberg_type_str = str(field.field_type)
-        try:
-            pa_type = convert_iceberg_type_to_pyarrow(iceberg_type_str)
-        except ValueError as e:
-            raise ValueError(f"Error in field '{name}': {e}")
-        pa_fields.append(pa.field(name, pa_type, nullable=not field.required))
-    return pa.schema(pa_fields)
-def process_chunk(chunk: List[Dict], table, chunk_name: str = 'Chunk') -> Dict:
-    """Process a chunk of data by converting it to a PyArrow table and appending to the table.
-    Args:
-        chunk: List of dictionaries representing the data rows
-        table: The Iceberg table to append data to
-        chunk_name: Name identifier for the chunk (for logging purposes)
-    Returns:
-        Dictionary with status and message
-    """
-    try:
-        # Get the Iceberg schema and create PyArrow schema
-        schema = table.schema()
-        pyarrow_schema = create_pyarrow_schema_from_iceberg(schema)
-        # Convert list of dictionaries to PyArrow table with proper schema
-        table_data = pa.Table.from_pylist(chunk, schema=pyarrow_schema)
-        table.append(table_data)
-        return {
-            'status': 'success',
-            'message': f'Successfully processed {len(chunk)} rows in {chunk_name.lower()}',
-        }
-    except Exception as e:
-        return {'status': 'error', 'error': f'Error inserting {chunk_name.lower()}: {str(e)}'}
-async def import_csv_to_table(
-    warehouse: str,
-    region: str,
-    namespace: str,
-    table_name: str,
-    s3_url: str,
-    uri: str = 'https://s3tables.us-west-2.amazonaws.com/iceberg',
-    catalog_name: str = 's3tablescatalog',
-    rest_signing_name: str = 's3tables',
-    rest_sigv4_enabled: str = 'true',
-) -> Dict:
-    """Import data from a CSV file into an S3 table.
-    This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
-    The CSV file must have headers that match the table's schema. The function will validate the CSV structure
-    before attempting to import the data.
-    Args:
-        warehouse: Warehouse string for Iceberg catalog
-        region: AWS region for S3Tables/Iceberg REST endpoint
-        namespace: The namespace containing the table
-        table_name: The name of the table to import data into
-        s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
-        uri: REST URI for Iceberg catalog
-        catalog_name: Catalog name
-        rest_signing_name: REST signing name
-        rest_sigv4_enabled: Enable SigV4 signing
-    Returns:
-        A dictionary containing:
-        - status: 'success' or 'error'
-        - message: Success message or error details
-        - rows_processed: Number of rows processed (on success)
-        - file_processed: Name of the processed file
-        - csv_headers: List of CSV headers
-    Returns error dictionary with status and error message if:
-        - URL is not a valid S3 URL
-        - File is not a CSV file
-        - File cannot be accessed
-        - Table does not exist
-        - CSV headers don't match table schema
-        - Any other error occurs
-    """
-    # Validate S3 URL
-    is_valid, error_msg, bucket, key = validate_s3_url(s3_url)
-    if not is_valid:
-        return {'status': 'error', 'error': error_msg}
-    if bucket is None or key is None:
-        return {'status': 'error', 'error': 'Invalid S3 URL: bucket or key is None'}
-    if not key.lower().endswith('.csv'):
-        return {
-            'status': 'error',
-            'error': f'File {key} is not a CSV file. Only .csv files are supported.',
-        }
-    try:
-        # Load catalog using provided parameters (see pyiceberg.py style)
-        catalog = pyiceberg_load_catalog(
-            catalog_name,
-            warehouse,
-            uri,
-            region,
-            rest_signing_name,
-            rest_sigv4_enabled,
-        )
-        # Load existing table
-        table = catalog.load_table(f'{namespace}.{table_name}')
-        # Get schema information
-        schema = table.schema()
-        # Get S3 client
-        s3_client = get_s3_client()
-        # Get the CSV file from S3
-        response = s3_client.get_object(Bucket=bucket, Key=key)
-        csv_content = response['Body'].read().decode('utf-8')
-        # Read CSV content
-        csv_reader = csv.DictReader(StringIO(csv_content))
-        # Validate headers against schema
-        csv_headers = csv_reader.fieldnames
-        schema_field_names = {field.name for field in schema.fields}
-        if not csv_headers:
-            return {'status': 'error', 'error': 'CSV file has no headers'}
-        missing_columns = schema_field_names - set(csv_headers)
-        if missing_columns:
-            return {
-                'status': 'error',
-                'error': f'CSV is missing required columns: {", ".join(missing_columns)}',
-            }
-        # Process rows in chunks
-        chunk_size = 5000
-        rows_processed = 0
-        current_chunk = []
-        for row in csv_reader:
-            # Transform row data according to schema types
-            transformed_row = {}
-            for field in schema.fields:
-                value = row.get(field.name)
-                # Handle required fields
-                if field.required and (value is None or value == ''):
-                    return {
-                        'status': 'error',
-                        'error': f'Required field {field.name} is missing or empty in row {rows_processed + 1}',
-                    }
-                # Transform value based on field type
-                try:
-                    if value is None or value == '':
-                        transformed_row[field.name] = None
-                    else:
-                        transformed_row[field.name] = convert_value(value, field.field_type)
-                except (ValueError, TypeError) as e:
-                    return {
-                        'status': 'error',
-                        'error': f'Error converting value for field {field.name} in row {rows_processed + 1}: {str(e)}',
-                    }
-            current_chunk.append(transformed_row)
-            rows_processed += 1
-            # Process chunk when it reaches the chunk size
-            if len(current_chunk) >= chunk_size:
-                result = process_chunk(current_chunk, table, 'Chunk')
-                if result['status'] == 'error':
-                    return result
-                current_chunk = []
-        # Process any remaining rows
-        if current_chunk:
-            result = process_chunk(current_chunk, table, 'Final Chunk')
-            if result['status'] == 'error':
-                return result
-        return {
-            'status': 'success',
-            'message': f'Successfully processed {rows_processed} rows',
-            'rows_processed': rows_processed,
-            'file_processed': os.path.basename(key),
-            'csv_headers': csv_headers,
-        }
-    except Exception as e:
-        return {'status': 'error', 'error': str(e)}

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{awslabs_s3_tables_mcp_server-0.0.2.dist-info → awslabs_s3_tables_mcp_server-0.0.3.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

awslabs.s3-tables-mcp-server 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

awslabs.s3-tables-mcp-server 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl