awslabs.s3-tables-mcp-server 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/Dockerfile +5 -5
  2. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/PKG-INFO +1 -1
  3. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
  4. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +28 -113
  5. awslabs_s3_tables_mcp_server-0.0.4/awslabs/s3_tables_mcp_server/file_processor/__init__.py +24 -0
  6. awslabs_s3_tables_mcp_server-0.0.4/awslabs/s3_tables_mcp_server/file_processor/csv.py +123 -0
  7. awslabs_s3_tables_mcp_server-0.0.4/awslabs/s3_tables_mcp_server/file_processor/parquet.py +116 -0
  8. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/server.py +76 -24
  9. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/pyproject.toml +1 -1
  10. awslabs_s3_tables_mcp_server-0.0.4/tests/test_csv.py +235 -0
  11. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_init.py +0 -1
  12. awslabs_s3_tables_mcp_server-0.0.4/tests/test_parquet.py +241 -0
  13. awslabs_s3_tables_mcp_server-0.0.4/tests/test_pyiceberg.py +579 -0
  14. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_server.py +110 -25
  15. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/uv.lock +763 -763
  16. awslabs_s3_tables_mcp_server-0.0.2/awslabs/s3_tables_mcp_server/file_processor.py +0 -485
  17. awslabs_s3_tables_mcp_server-0.0.2/tests/test_file_processor.py +0 -607
  18. awslabs_s3_tables_mcp_server-0.0.2/tests/test_pyiceberg.py +0 -437
  19. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/.gitignore +0 -0
  20. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/.python-version +0 -0
  21. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/CHANGELOG.md +0 -0
  22. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/CONTEXT.md +0 -0
  23. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/LICENSE +0 -0
  24. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/NOTICE +0 -0
  25. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/README.md +0 -0
  26. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/__init__.py +0 -0
  27. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/constants.py +0 -0
  28. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/database.py +0 -0
  29. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
  30. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/models.py +0 -0
  31. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
  32. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/resources.py +0 -0
  33. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
  34. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
  35. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/tables.py +0 -0
  36. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/awslabs/s3_tables_mcp_server/utils.py +0 -0
  37. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/docker-healthcheck.sh +0 -0
  38. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_database.py +0 -0
  39. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_main.py +0 -0
  40. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_namespaces.py +0 -0
  41. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_resources.py +0 -0
  42. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_s3_operations.py +0 -0
  43. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_table_buckets.py +0 -0
  44. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_tables.py +0 -0
  45. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/tests/test_utils.py +0 -0
  46. {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.4}/uv-requirements.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # dependabot should continue to update this to the latest hash.
16
- FROM public.ecr.aws/sam/build-python3.13@sha256:0c274ddd44e1d80e4dab3a70c25fe29508f612a045cba7d27840461c12eee86d AS uv
16
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283 AS uv
17
17
 
18
18
  # Install the project into `/app`
19
19
  WORKDIR /app
@@ -48,10 +48,10 @@ COPY . /app
48
48
  RUN --mount=type=cache,target=/root/.cache/uv \
49
49
  uv sync --python 3.13 --frozen --no-dev --no-editable
50
50
 
51
- # Make the directory just in case it doesn't exist
52
- RUN mkdir -p /root/.local
51
+ # # Make the directory just in case it doesn't exist
52
+ # RUN mkdir -p /root/.local
53
53
 
54
- FROM public.ecr.aws/sam/build-python3.13@sha256:0c274ddd44e1d80e4dab3a70c25fe29508f612a045cba7d27840461c12eee86d
54
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283
55
55
 
56
56
  # Place executables in the environment at the front of the path and include other binaries
57
57
  ENV PATH="/app/.venv/bin:$PATH:/usr/sbin" \
@@ -63,7 +63,7 @@ RUN groupadd --force --system app && \
63
63
  chmod o+x /root
64
64
 
65
65
  # Copy application artifacts from build stage
66
- COPY --from=uv --chown=app:app /root/.local /root/.local
66
+ # COPY --from=uv --chown=app:app /root/.local /root/.local
67
67
  COPY --from=uv --chown=app:app /app/.venv /app/.venv
68
68
 
69
69
  # Get healthcheck script
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awslabs.s3-tables-mcp-server
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
5
5
  Project-URL: homepage, https://awslabs.github.io/mcp/
6
6
  Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
@@ -15,4 +15,4 @@
15
15
  # This file is part of the awslabs namespace.
16
16
  # It is intentionally minimal to support PEP 420 namespace packages.
17
17
 
18
- __version__ = '0.0.0'
18
+ __version__ = '0.0.4'
@@ -14,32 +14,14 @@
14
14
 
15
15
  """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
16
16
 
17
+ import io
18
+ import json
17
19
  import pyarrow as pa
20
+ import pyarrow.json as pj
18
21
  from ..utils import pyiceberg_load_catalog
19
22
  from daft import Catalog as DaftCatalog
20
23
  from daft.session import Session
21
- from datetime import date, datetime, time
22
- from decimal import Decimal
23
24
  from pydantic import BaseModel
24
- from pyiceberg.types import (
25
- BinaryType,
26
- BooleanType,
27
- DateType,
28
- DecimalType,
29
- DoubleType,
30
- FixedType,
31
- FloatType,
32
- IntegerType,
33
- ListType,
34
- LongType,
35
- MapType,
36
- StringType,
37
- StructType,
38
- TimestampType,
39
- TimestamptzType,
40
- TimeType,
41
- UUIDType,
42
- )
43
25
 
44
26
  # pyiceberg and daft imports
45
27
  from typing import Any, Dict, Optional
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
57
39
  rest_sigv4_enabled: str = 'true'
58
40
 
59
41
 
60
- def convert_value_for_append(value, iceberg_type):
61
- """Convert a value to the appropriate type for appending to an Iceberg table column.
62
-
63
- Args:
64
- value: The value to convert. Can be of various types (str, int, float, etc.).
65
- iceberg_type: The Iceberg type to convert the value to.
66
-
67
- Returns:
68
- The value converted to the appropriate type for the Iceberg column, or None if value is None.
69
-
70
- Raises:
71
- NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
72
- ValueError: If the conversion is unsupported or fails.
73
- """
74
- if value is None:
75
- return None
76
- # Already correct type
77
- if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
78
- return value
79
- if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
80
- return value
81
- if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
82
- return value
83
- if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
84
- return value
85
- if isinstance(iceberg_type, DateType) and isinstance(value, date):
86
- return value
87
- if isinstance(iceberg_type, TimeType) and isinstance(value, time):
88
- return value
89
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
90
- return value
91
- if isinstance(iceberg_type, StringType) and isinstance(value, str):
92
- return value
93
- # Convert from string
94
- if isinstance(value, str):
95
- if isinstance(iceberg_type, BooleanType):
96
- return value.lower() in ('true', '1', 'yes')
97
- if isinstance(iceberg_type, (IntegerType, LongType)):
98
- return int(value)
99
- if isinstance(iceberg_type, (FloatType, DoubleType)):
100
- return float(value)
101
- if isinstance(iceberg_type, DecimalType):
102
- return Decimal(value)
103
- if isinstance(iceberg_type, DateType):
104
- return date.fromisoformat(value)
105
- if isinstance(iceberg_type, TimeType):
106
- return time.fromisoformat(value)
107
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
108
- return datetime.fromisoformat(value)
109
- if isinstance(iceberg_type, StringType):
110
- return value
111
- if isinstance(iceberg_type, UUIDType):
112
- import uuid
113
-
114
- return uuid.UUID(value)
115
- if isinstance(iceberg_type, (BinaryType, FixedType)):
116
- return bytes.fromhex(value)
117
- # Convert from number
118
- if isinstance(value, (int, float)):
119
- if isinstance(iceberg_type, (IntegerType, LongType)):
120
- return int(value)
121
- if isinstance(iceberg_type, (FloatType, DoubleType)):
122
- return float(value)
123
- if isinstance(iceberg_type, DecimalType):
124
- return Decimal(str(value))
125
- if isinstance(iceberg_type, StringType):
126
- return str(value)
127
- if isinstance(iceberg_type, (ListType, MapType, StructType)):
128
- raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
129
- raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
130
-
131
-
132
42
  class PyIcebergEngine:
133
43
  """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
134
44
 
@@ -197,7 +107,7 @@ class PyIcebergEngine:
197
107
  return False
198
108
 
199
109
  def append_rows(self, table_name: str, rows: list[dict]) -> None:
200
- """Append rows to an Iceberg table using pyiceberg.
110
+ """Append rows to an Iceberg table using pyiceberg with JSON encoding.
201
111
 
202
112
  Args:
203
113
  table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
@@ -214,26 +124,31 @@ class PyIcebergEngine:
214
124
  full_table_name = f'{self.config.namespace}.{table_name}'
215
125
  else:
216
126
  full_table_name = table_name
127
+
128
+ # Load the Iceberg table
217
129
  table = self._catalog.load_table(full_table_name)
218
- iceberg_schema = table.schema()
219
- converted_rows = []
130
+ # Encode rows as JSON (line-delimited format)
131
+ json_lines = []
220
132
  for row in rows:
221
- converted_row = {}
222
- for field in iceberg_schema.fields:
223
- field_name = field.name
224
- field_type = field.field_type
225
- value = row.get(field_name)
226
- if field.required and value is None:
227
- raise ValueError(f'Required field {field_name} is missing or None')
228
- try:
229
- converted_row[field_name] = convert_value_for_append(value, field_type)
230
- except (ValueError, TypeError) as e:
231
- raise ValueError(
232
- f'Error converting value for field {field_name}: {str(e)}'
233
- )
234
- converted_rows.append(converted_row)
235
- schema = iceberg_schema.as_arrow()
236
- pa_table = pa.Table.from_pylist(converted_rows, schema=schema)
237
- table.append(pa_table)
133
+ json_lines.append(json.dumps(row))
134
+ json_data = '\n'.join(json_lines)
135
+
136
+ # Create a file-like object from the JSON data
137
+ json_buffer = io.BytesIO(json_data.encode('utf-8'))
138
+
139
+ # Read JSON data into PyArrow Table using pyarrow.json.read_json
140
+ # This enforces the Iceberg schema and validates the data
141
+ try:
142
+ new_data_table = pj.read_json(
143
+ json_buffer, read_options=pj.ReadOptions(use_threads=True)
144
+ )
145
+ except pa.ArrowInvalid as e:
146
+ raise ValueError(
147
+ f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
148
+ )
149
+
150
+ # Append the new data to the Iceberg table
151
+ table.append(new_data_table)
152
+
238
153
  except Exception as e:
239
154
  raise Exception(f'Error appending rows: {str(e)}')
@@ -0,0 +1,24 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV and Parquet file handling and import capabilities.
19
+ """
20
+
21
+ from .csv import import_csv_to_table
22
+ from .parquet import import_parquet_to_table
23
+
24
+ __all__ = ['import_csv_to_table', 'import_parquet_to_table']
@@ -0,0 +1,123 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV file handling and import capabilities.
19
+ """
20
+
21
+ import io
22
+ import os
23
+ import pyarrow.csv as pc
24
+ from ..utils import get_s3_client, pyiceberg_load_catalog
25
+ from pyiceberg.exceptions import NoSuchTableError
26
+ from typing import Dict
27
+ from urllib.parse import urlparse
28
+
29
+
30
+ async def import_csv_to_table(
31
+ warehouse: str,
32
+ region: str,
33
+ namespace: str,
34
+ table_name: str,
35
+ s3_url: str,
36
+ uri: str,
37
+ catalog_name: str = 's3tablescatalog',
38
+ rest_signing_name: str = 's3tables',
39
+ rest_sigv4_enabled: str = 'true',
40
+ ) -> Dict:
41
+ """Import data from a CSV file into an S3 table.
42
+
43
+ This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
44
+ If the table doesn't exist, it will be created using the schema inferred from the CSV file.
45
+
46
+ Args:
47
+ warehouse: Warehouse string for Iceberg catalog
48
+ region: AWS region for S3Tables/Iceberg REST endpoint
49
+ namespace: The namespace containing the table
50
+ table_name: The name of the table to import data into
51
+ s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
52
+ uri: REST URI for Iceberg catalog
53
+ catalog_name: Catalog name
54
+ rest_signing_name: REST signing name
55
+ rest_sigv4_enabled: Enable SigV4 signing
56
+
57
+ Returns:
58
+ A dictionary containing:
59
+ - status: 'success' or 'error'
60
+ - message: Success message or error details
61
+ - rows_processed: Number of rows processed (on success)
62
+ - file_processed: Name of the processed file
63
+ - table_created: Boolean indicating if a new table was created (on success)
64
+ """
65
+ # Parse S3 URL
66
+ parsed = urlparse(s3_url)
67
+ bucket = parsed.netloc
68
+ key = parsed.path.lstrip('/')
69
+
70
+ try:
71
+ # Load Iceberg catalog
72
+ catalog = pyiceberg_load_catalog(
73
+ catalog_name,
74
+ warehouse,
75
+ uri,
76
+ region,
77
+ rest_signing_name,
78
+ rest_sigv4_enabled,
79
+ )
80
+
81
+ # Get S3 client and read the CSV file to infer schema
82
+ s3_client = get_s3_client()
83
+ response = s3_client.get_object(Bucket=bucket, Key=key)
84
+ csv_data = response['Body'].read()
85
+
86
+ # Read CSV file into PyArrow Table to infer schema
87
+ # Convert bytes to file-like object for PyArrow
88
+ csv_buffer = io.BytesIO(csv_data)
89
+ csv_table = pc.read_csv(csv_buffer)
90
+ csv_schema = csv_table.schema
91
+
92
+ table_created = False
93
+ try:
94
+ # Try to load existing table
95
+ table = catalog.load_table(f'{namespace}.{table_name}')
96
+ except NoSuchTableError:
97
+ # Table doesn't exist, create it using the CSV schema
98
+ try:
99
+ table = catalog.create_table(
100
+ identifier=f'{namespace}.{table_name}',
101
+ schema=csv_schema,
102
+ )
103
+ table_created = True
104
+ except Exception as create_error:
105
+ return {
106
+ 'status': 'error',
107
+ 'error': f'Failed to create table: {str(create_error)}',
108
+ }
109
+
110
+ # Append data to Iceberg table
111
+ table.append(csv_table)
112
+
113
+ return {
114
+ 'status': 'success',
115
+ 'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
116
+ 'rows_processed': csv_table.num_rows,
117
+ 'file_processed': os.path.basename(key),
118
+ 'table_created': table_created,
119
+ 'table_uuid': table.metadata.table_uuid,
120
+ }
121
+
122
+ except Exception as e:
123
+ return {'status': 'error', 'error': str(e)}
@@ -0,0 +1,116 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pyarrow.parquet as pq
16
+ from awslabs.s3_tables_mcp_server.utils import get_s3_client, pyiceberg_load_catalog
17
+ from io import BytesIO
18
+ from pyiceberg.exceptions import NoSuchTableError
19
+ from typing import Dict
20
+
21
+
22
+ async def import_parquet_to_table(
23
+ warehouse: str,
24
+ region: str,
25
+ namespace: str,
26
+ table_name: str,
27
+ s3_url: str,
28
+ uri: str,
29
+ catalog_name: str = 's3tablescatalog',
30
+ rest_signing_name: str = 's3tables',
31
+ rest_sigv4_enabled: str = 'true',
32
+ ) -> Dict:
33
+ """Import data from a Parquet file into an S3 table.
34
+
35
+ This function reads data from a Parquet file stored in S3 and imports it into an existing Iceberg table.
36
+ If the table doesn't exist, it will be created using the schema from the Parquet file.
37
+
38
+ Args:
39
+ warehouse: Warehouse string for Iceberg catalog
40
+ region: AWS region for S3Tables/Iceberg REST endpoint
41
+ namespace: The namespace containing the table
42
+ table_name: The name of the table to import data into
43
+ s3_url: The S3 URL of the Parquet file
44
+ uri: REST URI for Iceberg catalog
45
+ catalog_name: Catalog name
46
+ rest_signing_name: REST signing name
47
+ rest_sigv4_enabled: Enable SigV4 signing
48
+
49
+ Returns:
50
+ A dictionary containing:
51
+ - status: 'success' or 'error'
52
+ - message: Success message or error details
53
+ - rows_processed: Number of rows processed (on success)
54
+ - file_processed: Name of the processed file
55
+ - table_created: Boolean indicating if a new table was created (on success)
56
+ """
57
+ import os
58
+ from urllib.parse import urlparse
59
+
60
+ # Parse S3 URL
61
+ parsed = urlparse(s3_url)
62
+ bucket = parsed.netloc
63
+ key = parsed.path.lstrip('/')
64
+
65
+ try:
66
+ # Load Iceberg catalog
67
+ catalog = pyiceberg_load_catalog(
68
+ catalog_name,
69
+ warehouse,
70
+ uri,
71
+ region,
72
+ rest_signing_name,
73
+ rest_sigv4_enabled,
74
+ )
75
+
76
+ # Get S3 client and read the Parquet file first to get the schema
77
+ s3_client = get_s3_client()
78
+ response = s3_client.get_object(Bucket=bucket, Key=key)
79
+ parquet_data = BytesIO(response['Body'].read())
80
+
81
+ # Read Parquet file into PyArrow Table
82
+ parquet_table = pq.read_table(parquet_data)
83
+ parquet_schema = parquet_table.schema
84
+
85
+ table_created = False
86
+ try:
87
+ # Try to load existing table
88
+ table = catalog.load_table(f'{namespace}.{table_name}')
89
+ except NoSuchTableError:
90
+ # Table doesn't exist, create it using the Parquet schema
91
+ try:
92
+ table = catalog.create_table(
93
+ identifier=f'{namespace}.{table_name}',
94
+ schema=parquet_schema,
95
+ )
96
+ table_created = True
97
+ except Exception as create_error:
98
+ return {
99
+ 'status': 'error',
100
+ 'error': f'Failed to create table: {str(create_error)}',
101
+ }
102
+
103
+ # Append data to Iceberg table
104
+ table.append(parquet_table)
105
+
106
+ return {
107
+ 'status': 'success',
108
+ 'message': f'Successfully imported {parquet_table.num_rows} rows{" and created new table" if table_created else ""}',
109
+ 'rows_processed': parquet_table.num_rows,
110
+ 'file_processed': os.path.basename(key),
111
+ 'table_created': table_created,
112
+ 'table_uuid': table.metadata.table_uuid,
113
+ }
114
+
115
+ except Exception as e:
116
+ return {'status': 'error', 'error': str(e)}
@@ -32,7 +32,6 @@ from .utils import set_user_agent_mode
32
32
  from awslabs.s3_tables_mcp_server import (
33
33
  __version__,
34
34
  database,
35
- file_processor,
36
35
  namespaces,
37
36
  resources,
38
37
  s3_operations,
@@ -48,6 +47,12 @@ from awslabs.s3_tables_mcp_server.constants import (
48
47
  TABLE_BUCKET_NAME_PATTERN,
49
48
  TABLE_NAME_FIELD,
50
49
  )
50
+ from awslabs.s3_tables_mcp_server.file_processor import (
51
+ import_csv_to_table as import_csv_to_table_func,
52
+ )
53
+ from awslabs.s3_tables_mcp_server.file_processor import (
54
+ import_parquet_to_table as import_parquet_to_table_func,
55
+ )
51
56
  from datetime import datetime, timezone
52
57
  from mcp.server.fastmcp import FastMCP
53
58
  from pydantic import Field
@@ -567,32 +572,75 @@ async def query_database(
567
572
 
568
573
  @app.tool()
569
574
  @log_tool_call_with_response
570
- async def preview_csv_file(
575
+ @write_operation
576
+ async def import_csv_to_table(
577
+ warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
578
+ region: Annotated[
579
+ str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
580
+ ],
581
+ namespace: Annotated[str, NAMESPACE_NAME_FIELD],
582
+ table_name: Annotated[str, TABLE_NAME_FIELD],
571
583
  s3_url: Annotated[str, S3_URL_FIELD],
584
+ uri: Annotated[str, Field(..., description='REST URI for Iceberg catalog')],
585
+ catalog_name: Annotated[
586
+ str, Field('s3tablescatalog', description='Catalog name')
587
+ ] = 's3tablescatalog',
588
+ rest_signing_name: Annotated[
589
+ str, Field('s3tables', description='REST signing name')
590
+ ] = 's3tables',
591
+ rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
572
592
  ) -> dict:
573
- """Preview the structure of a CSV file stored in S3.
593
+ """Import data from a CSV file into an S3 table.
574
594
 
575
- This tool provides a quick preview of a CSV file's structure by reading
576
- only the headers and first row of data from an S3 location. It's useful for
577
- understanding the schema and data format without downloading the entire file.
578
- It can be used before creating an s3 table from a csv file to get the schema and data format.
595
+ This tool reads data from a CSV file stored in S3 and imports it into an S3 table.
596
+ If the table doesn't exist, it will be created with a schema inferred from the CSV file.
597
+ If the table exists, the CSV file schema must be compatible with the table's schema.
598
+ The tool will validate the schema before attempting to import the data.
579
599
 
580
600
  Returns error dictionary with status and error message if:
581
601
  - URL is not a valid S3 URL
582
602
  - File is not a CSV file
583
603
  - File cannot be accessed
604
+ - Table does not exist
605
+ - CSV headers don't match table schema
584
606
  - Any other error occurs
585
607
 
608
+ Example input values:
609
+ warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
610
+ region: 'us-west-2'
611
+ namespace: 'retail_data'
612
+ table_name: 'customers'
613
+ s3_url: 's3://bucket-name/path/to/file.csv'
614
+ uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
615
+ catalog_name: 's3tablescatalog'
616
+ rest_signing_name: 's3tables'
617
+ rest_sigv4_enabled: 'true'
618
+
586
619
  Permissions:
587
- You must have the s3:GetObject permission for the S3 bucket and key.
620
+ You must have:
621
+ - s3:GetObject permission for the CSV file
622
+ - s3tables:GetTable and s3tables:GetTables permissions to access table information
623
+ - s3tables:PutTableData permission to write to the table
588
624
  """
589
- return file_processor.preview_csv_structure(s3_url)
625
+ if uri is None:
626
+ uri = _default_uri_for_region(region)
627
+ return await import_csv_to_table_func(
628
+ warehouse=warehouse,
629
+ region=region,
630
+ namespace=namespace,
631
+ table_name=table_name,
632
+ s3_url=s3_url,
633
+ uri=uri,
634
+ catalog_name=catalog_name,
635
+ rest_signing_name=rest_signing_name,
636
+ rest_sigv4_enabled=rest_sigv4_enabled,
637
+ )
590
638
 
591
639
 
592
640
  @app.tool()
593
641
  @log_tool_call_with_response
594
642
  @write_operation
595
- async def import_csv_to_table(
643
+ async def import_parquet_to_table(
596
644
  warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
597
645
  region: Annotated[
598
646
  str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
@@ -609,29 +657,33 @@ async def import_csv_to_table(
609
657
  ] = 's3tables',
610
658
  rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
611
659
  ) -> dict:
612
- """Import data from a CSV file into an S3 table.
660
+ """Import data from a Parquet file into an S3 table.
613
661
 
614
- This tool reads data from a CSV file stored in S3 and imports it into an existing S3 table.
615
- The CSV file must have headers that match the table's schema. The tool will validate the CSV structure
616
- before attempting to import the data.
617
-
618
- To create a table, first use the preview_csv_file tool to get the schema and data format.
619
- Then use the create_table tool to create the table.
662
+ This tool reads data from a Parquet file stored in S3 and imports it into an S3 table.
663
+ If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
664
+ If the table exists, the Parquet file schema must be compatible with the table's schema.
665
+ The tool will validate the schema before attempting to import the data.
620
666
 
621
667
  Returns error dictionary with status and error message if:
622
668
  - URL is not a valid S3 URL
623
- - File is not a CSV file
669
+ - File is not a Parquet file
624
670
  - File cannot be accessed
625
- - Table does not exist
626
- - CSV headers don't match table schema
671
+ - Parquet schema is incompatible with existing table schema
627
672
  - Any other error occurs
628
673
 
674
+ Returns success dictionary with:
675
+ - status: 'success'
676
+ - message: Success message with row count
677
+ - rows_processed: Number of rows imported
678
+ - file_processed: Name of the processed file
679
+ - table_created: True if a new table was created
680
+
629
681
  Example input values:
630
682
  warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
631
683
  region: 'us-west-2'
632
684
  namespace: 'retail_data'
633
685
  table_name: 'customers'
634
- s3_url: 's3://bucket-name/path/to/file.csv'
686
+ s3_url: 's3://bucket-name/path/to/file.parquet'
635
687
  uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
636
688
  catalog_name: 's3tablescatalog'
637
689
  rest_signing_name: 's3tables'
@@ -639,14 +691,14 @@ async def import_csv_to_table(
639
691
 
640
692
  Permissions:
641
693
  You must have:
642
- - s3:GetObject permission for the CSV file
643
- - s3tables:GetDatabase and s3tables:GetDatabases permissions to access database information
694
+ - s3:GetObject permission for the Parquet file
644
695
  - s3tables:GetTable and s3tables:GetTables permissions to access table information
645
696
  - s3tables:PutTableData permission to write to the table
697
+ - s3tables:CreateTable permission (if table doesn't exist)
646
698
  """
647
699
  if uri is None:
648
700
  uri = _default_uri_for_region(region)
649
- return await file_processor.import_csv_to_table(
701
+ return await import_parquet_to_table_func(
650
702
  warehouse=warehouse,
651
703
  region=region,
652
704
  namespace=namespace,
@@ -2,7 +2,7 @@
2
2
  name = "awslabs.s3-tables-mcp-server"
3
3
 
4
4
  # NOTE: "Patch"=9223372036854775807 bumps next release to zero.
5
- version = "0.0.2"
5
+ version = "0.0.4"
6
6
 
7
7
  description = "An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server"
8
8
  readme = "README.md"