awslabs.s3-tables-mcp-server 0.0.13__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/Dockerfile +2 -2
  2. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/PKG-INFO +3 -3
  3. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
  4. awslabs_s3_tables_mcp_server-0.0.15/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +248 -0
  5. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/file_processor/csv.py +1 -1
  6. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/file_processor/utils.py +54 -15
  7. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/server.py +84 -17
  8. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/pyproject.toml +3 -3
  9. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_file_processor_utils.py +334 -17
  10. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_pyiceberg.py +462 -24
  11. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/uv.lock +139 -74
  12. awslabs_s3_tables_mcp_server-0.0.13/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +0 -154
  13. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/.gitignore +0 -0
  14. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/.python-version +0 -0
  15. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/CHANGELOG.md +0 -0
  16. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/CONTEXT.md +0 -0
  17. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/LICENSE +0 -0
  18. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/NOTICE +0 -0
  19. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/README.md +0 -0
  20. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/__init__.py +0 -0
  21. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/constants.py +0 -0
  22. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/database.py +0 -0
  23. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
  24. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/file_processor/__init__.py +0 -0
  25. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/file_processor/parquet.py +0 -0
  26. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/models.py +0 -0
  27. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
  28. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/resources.py +0 -0
  29. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
  30. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
  31. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/tables.py +0 -0
  32. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/awslabs/s3_tables_mcp_server/utils.py +0 -0
  33. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/docker-healthcheck.sh +0 -0
  34. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_csv.py +0 -0
  35. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_database.py +0 -0
  36. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_init.py +0 -0
  37. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_main.py +0 -0
  38. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_namespaces.py +0 -0
  39. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_parquet.py +0 -0
  40. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_resources.py +0 -0
  41. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_s3_operations.py +0 -0
  42. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_server.py +0 -0
  43. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_table_buckets.py +0 -0
  44. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_tables.py +0 -0
  45. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/tests/test_utils.py +0 -0
  46. {awslabs_s3_tables_mcp_server-0.0.13 → awslabs_s3_tables_mcp_server-0.0.15}/uv-requirements.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # dependabot should continue to update this to the latest hash.
16
- FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:61169c2bdb8e6bb44a8dfad33f569d324d52f079fded9a204b322a6fb9c9f799 AS uv
16
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:e66df2153a7cc47b4438848efb65e2d9442db4330db9befaee5107fc75464959 AS uv
17
17
 
18
18
  # Install the project into `/app`
19
19
  WORKDIR /app
@@ -51,7 +51,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
51
51
  # # Make the directory just in case it doesn't exist
52
52
  # RUN mkdir -p /root/.local
53
53
 
54
- FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:61169c2bdb8e6bb44a8dfad33f569d324d52f079fded9a204b322a6fb9c9f799
54
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:e66df2153a7cc47b4438848efb65e2d9442db4330db9befaee5107fc75464959
55
55
 
56
56
  # Place executables in the environment at the front of the path and include other binaries
57
57
  ENV PATH="/app/.venv/bin:$PATH:/usr/sbin" \
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awslabs.s3-tables-mcp-server
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
5
5
  Project-URL: homepage, https://awslabs.github.io/mcp/
6
6
  Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
@@ -25,9 +25,9 @@ Requires-Dist: boto3==1.40.8
25
25
  Requires-Dist: daft==0.5.8
26
26
  Requires-Dist: loguru==0.7.3
27
27
  Requires-Dist: mcp[cli]==1.11.0
28
- Requires-Dist: pyarrow==20.0.0
28
+ Requires-Dist: pyarrow==22.0.0
29
29
  Requires-Dist: pydantic==2.9.2
30
- Requires-Dist: pyiceberg==0.9.1
30
+ Requires-Dist: pyiceberg==0.10.0
31
31
  Requires-Dist: sqlparse==0.5.3
32
32
  Description-Content-Type: text/markdown
33
33
 
@@ -15,4 +15,4 @@
15
15
  # This file is part of the awslabs namespace.
16
16
  # It is intentionally minimal to support PEP 420 namespace packages.
17
17
 
18
- __version__ = '0.0.13'
18
+ __version__ = '0.0.15'
@@ -0,0 +1,248 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
16
+
17
+ import pyarrow as pa
18
+ from ..utils import pyiceberg_load_catalog
19
+ from daft import Catalog as DaftCatalog
20
+ from daft.session import Session
21
+ from datetime import datetime
22
+ from pydantic import BaseModel
23
+
24
+ # pyiceberg and daft imports
25
+ from typing import Any, Dict, Optional
26
+
27
+
28
+ def convert_temporal_fields(rows: list[dict], arrow_schema: pa.Schema) -> list[dict]:
29
+ """Convert string temporal fields to appropriate datetime objects based on Arrow schema.
30
+
31
+ Args:
32
+ rows: List of row dictionaries with string temporal values
33
+ arrow_schema: PyArrow schema defining field types
34
+
35
+ Returns:
36
+ List of row dictionaries with converted temporal values
37
+ """
38
+ converted_rows = []
39
+
40
+ for row in rows:
41
+ converted_row = {}
42
+ for field_name, value in row.items():
43
+ # Early skip for non-string values
44
+ if not isinstance(value, str):
45
+ converted_row[field_name] = value
46
+ continue
47
+
48
+ # Get the field type from schema
49
+ field = arrow_schema.field(field_name)
50
+ field_type = field.type
51
+
52
+ # Date32 or Date64 - calendar date without timezone or time
53
+ if pa.types.is_date(field_type):
54
+ # Format: "2025-03-14"
55
+ converted_row[field_name] = datetime.strptime(value, '%Y-%m-%d').date()
56
+
57
+ # Time64 - time of day, microsecond precision, without date or timezone
58
+ elif pa.types.is_time(field_type):
59
+ # Format: "17:10:34.123456" or "17:10:34"
60
+ fmt = '%H:%M:%S.%f' if '.' in value else '%H:%M:%S'
61
+ converted_row[field_name] = datetime.strptime(value, fmt).time()
62
+
63
+ # Timestamp without timezone
64
+ elif pa.types.is_timestamp(field_type) and field_type.tz is None:
65
+ # Format: "2025-03-14 17:10:34.123456" or "2025-03-14T17:10:34.123456"
66
+ value_normalized = value.replace('T', ' ')
67
+ if '.' in value_normalized:
68
+ # Truncate nanoseconds to microseconds if needed
69
+ parts = value_normalized.split('.')
70
+ if len(parts[1]) > 6:
71
+ value_normalized = f'{parts[0]}.{parts[1][:6]}'
72
+ fmt = '%Y-%m-%d %H:%M:%S.%f'
73
+ else:
74
+ fmt = '%Y-%m-%d %H:%M:%S'
75
+ converted_row[field_name] = datetime.strptime(value_normalized, fmt)
76
+
77
+ # Timestamp with timezone (stored in UTC)
78
+ elif pa.types.is_timestamp(field_type) and field_type.tz is not None:
79
+ # Format: "2025-03-14 17:10:34.123456-07" or "2025-03-14T17:10:34.123456+00:00"
80
+ value_normalized = value.replace('T', ' ')
81
+ from datetime import timezone
82
+
83
+ # Truncate nanoseconds to microseconds if present
84
+ if '.' in value_normalized:
85
+ # Split on timezone indicator (+ or -)
86
+ # Find the last occurrence of + or - which should be the timezone
87
+ tz_idx = max(value_normalized.rfind('+'), value_normalized.rfind('-'))
88
+ if tz_idx > 10: # Make sure it's not the date separator
89
+ timestamp_part = value_normalized[:tz_idx]
90
+ tz_part = value_normalized[tz_idx:]
91
+
92
+ # Truncate fractional seconds to 6 digits
93
+ if '.' in timestamp_part:
94
+ parts = timestamp_part.split('.')
95
+ if len(parts[1]) > 6:
96
+ timestamp_part = f'{parts[0]}.{parts[1][:6]}'
97
+
98
+ value_normalized = timestamp_part + tz_part
99
+
100
+ # Try different timezone formats
101
+ for fmt in [
102
+ '%Y-%m-%d %H:%M:%S.%f%z',
103
+ '%Y-%m-%d %H:%M:%S%z',
104
+ '%Y-%m-%d %H:%M:%S.%f',
105
+ '%Y-%m-%d %H:%M:%S',
106
+ ]:
107
+ try:
108
+ dt = datetime.strptime(value_normalized, fmt)
109
+ if dt.tzinfo is None:
110
+ dt = dt.replace(tzinfo=timezone.utc)
111
+ converted_row[field_name] = dt.astimezone(timezone.utc)
112
+ break
113
+ except ValueError:
114
+ continue
115
+ else:
116
+ raise ValueError(
117
+ f'Could not parse timestamp with timezone: {value} for field {field_name}'
118
+ )
119
+
120
+ else:
121
+ # Not a temporal field, keep as is
122
+ converted_row[field_name] = value
123
+
124
+ converted_rows.append(converted_row)
125
+
126
+ return converted_rows
127
+
128
+
129
+ class PyIcebergConfig(BaseModel):
130
+ """Configuration for PyIceberg/Daft connection."""
131
+
132
+ warehouse: str # e.g. 'arn:aws:s3tables:us-west-2:484907528679:bucket/customer-data-bucket'
133
+ uri: str # e.g. 'https://s3tables.us-west-2.amazonaws.com/iceberg'
134
+ region: str # e.g. 'us-west-2'
135
+ namespace: str # e.g. 'retail_data'
136
+ catalog_name: str = 's3tablescatalog' # default
137
+ rest_signing_name: str = 's3tables'
138
+ rest_sigv4_enabled: str = 'true'
139
+
140
+
141
+ class PyIcebergEngine:
142
+ """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
143
+
144
+ def __init__(self, config: PyIcebergConfig):
145
+ """Initialize the PyIcebergEngine with the given configuration.
146
+
147
+ Args:
148
+ config: PyIcebergConfig object containing connection parameters.
149
+ """
150
+ self.config = config
151
+ self._catalog: Optional[Any] = None
152
+ self._session: Optional[Session] = None
153
+ self._initialize_connection()
154
+
155
+ def _initialize_connection(self):
156
+ try:
157
+ self._catalog = pyiceberg_load_catalog(
158
+ self.config.catalog_name,
159
+ self.config.warehouse,
160
+ self.config.uri,
161
+ self.config.region,
162
+ self.config.rest_signing_name,
163
+ self.config.rest_sigv4_enabled,
164
+ )
165
+ self._session = Session()
166
+ self._session.attach(DaftCatalog.from_iceberg(self._catalog))
167
+ self._session.set_namespace(self.config.namespace)
168
+ except Exception as e:
169
+ raise ConnectionError(f'Failed to initialize PyIceberg connection: {str(e)}')
170
+
171
+ def execute_query(self, query: str) -> Dict[str, Any]:
172
+ """Execute a SQL query against the Iceberg catalog using Daft.
173
+
174
+ Args:
175
+ query: SQL query to execute
176
+
177
+ Returns:
178
+ Dict containing:
179
+ - columns: List of column names
180
+ - rows: List of rows, where each row is a list of values
181
+ """
182
+ if not self._session:
183
+ raise ConnectionError('No active session for PyIceberg/Daft')
184
+ try:
185
+ result = self._session.sql(query)
186
+ if result is None:
187
+ raise Exception('Query execution returned None result')
188
+ df = result.collect()
189
+ columns = df.column_names
190
+ rows = df.to_pylist()
191
+ return {
192
+ 'columns': columns,
193
+ 'rows': [list(row.values()) for row in rows],
194
+ }
195
+ except Exception as e:
196
+ raise Exception(f'Error executing query: {str(e)}')
197
+
198
+ def test_connection(self) -> bool:
199
+ """Test the connection by listing namespaces."""
200
+ if not self._session:
201
+ return False
202
+ try:
203
+ _ = self._session.list_namespaces()
204
+ return True
205
+ except Exception:
206
+ return False
207
+
208
+ def append_rows(self, table_name: str, rows: list[dict]) -> None:
209
+ """Append rows to an Iceberg table using pyiceberg.
210
+
211
+ Args:
212
+ table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
213
+ rows: List of dictionaries, each representing a row to append
214
+
215
+ Raises:
216
+ Exception: If appending fails
217
+ """
218
+ if not self._catalog:
219
+ raise ConnectionError('No active catalog for PyIceberg')
220
+ try:
221
+ # If table_name does not contain a dot, prepend the namespace
222
+ if '.' not in table_name:
223
+ full_table_name = f'{self.config.namespace}.{table_name}'
224
+ else:
225
+ full_table_name = table_name
226
+
227
+ # Load the Iceberg table
228
+ table = self._catalog.load_table(full_table_name)
229
+
230
+ # Convert Iceberg schema to Arrow schema to ensure types/order match
231
+ arrow_schema = table.schema().as_arrow()
232
+
233
+ # Convert temporal fields from strings to datetime objects
234
+ converted_rows = convert_temporal_fields(rows, arrow_schema)
235
+
236
+ # Create PyArrow table directly from pylist with schema validation
237
+ try:
238
+ pa_table = pa.Table.from_pylist(converted_rows, schema=arrow_schema)
239
+ except pa.ArrowInvalid as e:
240
+ raise ValueError(
241
+ f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
242
+ )
243
+
244
+ # Append the PyArrow table to the Iceberg table
245
+ table.append(pa_table)
246
+
247
+ except Exception as e:
248
+ raise Exception(f'Error appending rows: {str(e)}')
@@ -34,7 +34,7 @@ async def import_csv_to_table(
34
34
  rest_sigv4_enabled: str = 'true',
35
35
  preserve_case: bool = False,
36
36
  ):
37
- """Import a CSV file into an S3 table using PyArrow."""
37
+ """Import a CSV file into an existing S3 table using PyArrow."""
38
38
  return await import_file_to_table(
39
39
  warehouse=warehouse,
40
40
  region=region,
@@ -20,6 +20,7 @@ particularly focusing on column name conversion and schema transformation.
20
20
 
21
21
  import os
22
22
  import pyarrow as pa
23
+ import pyarrow.compute as pc
23
24
  from ..utils import get_s3_client, pyiceberg_load_catalog
24
25
  from io import BytesIO
25
26
  from pydantic.alias_generators import to_snake
@@ -64,6 +65,44 @@ def convert_column_names_to_snake_case(schema: pa.Schema) -> pa.Schema:
64
65
  return pa.schema(new_fields, metadata=schema.metadata)
65
66
 
66
67
 
68
+ def convert_temporal_fields_in_table(
69
+ pyarrow_table: pa.Table, target_schema: pa.Schema
70
+ ) -> pa.Table:
71
+ """Convert string temporal fields in PyArrow table to appropriate temporal types.
72
+
73
+ Args:
74
+ pyarrow_table: PyArrow table with string temporal values
75
+ target_schema: Target schema with temporal field types
76
+
77
+ Returns:
78
+ PyArrow table with converted temporal columns
79
+ """
80
+ # Use PyArrow's cast which can handle ISO 8601 formatted strings
81
+ # This is simpler and more robust than strptime for mixed formats
82
+ try:
83
+ # Try direct cast - PyArrow can parse ISO 8601 strings automatically
84
+ converted_table = pyarrow_table.cast(target_schema, safe=False)
85
+ return converted_table
86
+ except pa.ArrowInvalid:
87
+ # If direct cast fails, fall back to column-by-column conversion
88
+ arrays = []
89
+ for i, field in enumerate(target_schema):
90
+ col_name = field.name
91
+ col_data = pyarrow_table.column(col_name)
92
+ field_type = field.type
93
+
94
+ # Try to cast the column to the target type
95
+ try:
96
+ col_data = pc.cast(col_data, field_type, safe=False)
97
+ except pa.ArrowInvalid:
98
+ # If cast fails, keep original data
99
+ pass
100
+
101
+ arrays.append(col_data)
102
+
103
+ return pa.Table.from_arrays(arrays, schema=target_schema)
104
+
105
+
67
106
  async def import_file_to_table(
68
107
  warehouse: str,
69
108
  region: str,
@@ -117,29 +156,30 @@ async def import_file_to_table(
117
156
  'error': f'Column name conversion failed: {str(conv_err)}',
118
157
  }
119
158
 
120
- table_created = False
121
159
  try:
122
160
  # Try to load existing table
123
161
  table = catalog.load_table(f'{namespace}.{table_name}')
162
+ # Convert temporal fields to match existing table schema
163
+ target_schema = table.schema().as_arrow()
164
+ pyarrow_table = convert_temporal_fields_in_table(pyarrow_table, target_schema)
124
165
  except NoSuchTableError:
125
- # Table doesn't exist, create it using the schema
126
- try:
127
- table = catalog.create_table(
128
- identifier=f'{namespace}.{table_name}',
129
- schema=pyarrow_schema,
130
- )
131
- table_created = True
132
- except Exception as create_error:
133
- return {
134
- 'status': 'error',
135
- 'error': f'Failed to create table: {str(create_error)}',
136
- }
166
+ # Table doesn't exist - return error with schema information
167
+ # Build column information from the source file schema
168
+ columns_info = []
169
+ for field in pyarrow_schema:
170
+ columns_info.append({'name': field.name, 'type': str(field.type)})
171
+
172
+ return {
173
+ 'status': 'error',
174
+ 'error': f'Table {namespace}.{table_name} does not exist. Please create the table first before importing data.',
175
+ 'columns': columns_info,
176
+ }
137
177
 
138
178
  # Append data to Iceberg table
139
179
  table.append(pyarrow_table)
140
180
 
141
181
  # Build message with warnings if applicable
142
- message = f'Successfully imported {pyarrow_table.num_rows} rows{" and created new table" if table_created else ""}'
182
+ message = f'Successfully imported {pyarrow_table.num_rows} rows'
143
183
  if columns_converted:
144
184
  message += '. WARNING: Column names were converted to snake_case format. To preserve the original case, set preserve_case to True.'
145
185
 
@@ -148,7 +188,6 @@ async def import_file_to_table(
148
188
  'message': message,
149
189
  'rows_processed': pyarrow_table.num_rows,
150
190
  'file_processed': os.path.basename(key),
151
- 'table_created': table_created,
152
191
  'table_uuid': table.metadata.table_uuid,
153
192
  'columns': pyarrow_schema.names,
154
193
  }
@@ -307,45 +307,115 @@ async def create_table(
307
307
  """Create a new S3 table in an S3 table bucket.
308
308
 
309
309
  Creates a new S3 table associated with the given S3 namespace in an S3 table bucket.
310
- The S3 table can be configured with specific format and metadata settings. Metadata contains the schema of the table. Use double type for decimals.
310
+ The S3 table can be configured with specific format and metadata settings. Metadata contains the schema of the table.
311
311
  Do not use the metadata parameter if the schema is unclear.
312
312
 
313
+ Supported Iceberg Primitive Types:
314
+ - boolean: True or false
315
+ - int: 32-bit signed integers (can promote to long)
316
+ - long: 64-bit signed integers
317
+ - float: 32-bit IEEE 754 floating point (can promote to double)
318
+ - double: 64-bit IEEE 754 floating point
319
+ - decimal(P,S): Fixed-point decimal with precision P and scale S (precision must be 38 or less)
320
+ - date: Calendar date without timezone or time
321
+ - time: Time of day, microsecond precision, without date or timezone
322
+ - timestamp: Timestamp, microsecond precision, without timezone (represents date and time regardless of zone)
323
+ - timestamptz: Timestamp, microsecond precision, with timezone (stored as UTC)
324
+ - string: Arbitrary-length character sequences (UTF-8 encoded)
325
+
326
+ Note: Binary field types (binary, fixed, uuid) are not supported.
327
+
313
328
  Example of S3 table metadata:
314
329
  {
315
330
  "metadata": {
316
331
  "iceberg": {
317
332
  "schema": {
318
333
  "type": "struct",
319
- "fields": [{
334
+ "fields": [
335
+ {
320
336
  "id": 1,
321
- "name": "customer_id",
337
+ "name": "id",
322
338
  "type": "long",
323
339
  "required": true
324
340
  },
325
341
  {
326
342
  "id": 2,
327
- "name": "customer_name",
328
- "type": "string",
329
- "required": true
343
+ "name": "bool_field",
344
+ "type": "boolean",
345
+ "required": false
330
346
  },
331
347
  {
332
348
  "id": 3,
333
- "name": "customer_balance",
349
+ "name": "int_field",
350
+ "type": "int",
351
+ "required": false
352
+ },
353
+ {
354
+ "id": 4,
355
+ "name": "long_field",
356
+ "type": "long",
357
+ "required": false
358
+ },
359
+ {
360
+ "id": 5,
361
+ "name": "float_field",
362
+ "type": "float",
363
+ "required": false
364
+ },
365
+ {
366
+ "id": 6,
367
+ "name": "double_field",
334
368
  "type": "double",
335
369
  "required": false
370
+ },
371
+ {
372
+ "id": 7,
373
+ "name": "decimal_field",
374
+ "type": "decimal(10,2)",
375
+ "required": false
376
+ },
377
+ {
378
+ "id": 8,
379
+ "name": "date_field",
380
+ "type": "date",
381
+ "required": false
382
+ },
383
+ {
384
+ "id": 9,
385
+ "name": "time_field",
386
+ "type": "time",
387
+ "required": false
388
+ },
389
+ {
390
+ "id": 10,
391
+ "name": "timestamp_field",
392
+ "type": "timestamp",
393
+ "required": false
394
+ },
395
+ {
396
+ "id": 11,
397
+ "name": "timestamptz_field",
398
+ "type": "timestamptz",
399
+ "required": false
400
+ },
401
+ {
402
+ "id": 12,
403
+ "name": "string_field",
404
+ "type": "string",
405
+ "required": false
336
406
  }
337
407
  ]
338
408
  },
339
409
  "partition-spec": [
340
410
  {
341
- "source-id": 1,
411
+ "source-id": 8,
342
412
  "field-id": 1000,
343
413
  "transform": "month",
344
- "name": "sale_date_month"
414
+ "name": "date_field_month"
345
415
  }
346
416
  ],
347
417
  "table-properties": {
348
- "description": "Customer information table with customer_id for joining with transactions"
418
+ "description": "Example table demonstrating supported Iceberg primitive types"
349
419
  }
350
420
  }
351
421
  }
@@ -353,7 +423,6 @@ async def create_table(
353
423
 
354
424
  Permissions:
355
425
  You must have the s3tables:CreateTable permission to use this operation.
356
- If using metadata parameter, you must have the s3tables:PutTableData permission.
357
426
  """
358
427
  from awslabs.s3_tables_mcp_server.models import OpenTableFormat, TableMetadata
359
428
 
@@ -665,11 +734,10 @@ async def import_parquet_to_table(
665
734
  bool, Field(..., description='Preserve case of column names')
666
735
  ] = False,
667
736
  ) -> dict:
668
- """Import data from a Parquet file into an S3 table.
737
+ """Import data from a Parquet file into an existing S3 table.
669
738
 
670
- This tool reads data from a Parquet file stored in S3 and imports it into an S3 table.
671
- If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
672
- If the table exists, the Parquet file schema must be compatible with the table's schema.
739
+ This tool reads data from a Parquet file stored in S3 and imports it into an existing S3 table.
740
+ The table must already exist. The Parquet file schema must be compatible with the table's schema.
673
741
  The tool will validate the schema before attempting to import the data.
674
742
  If preserve_case is True, the column names will not be converted to snake_case. Otherwise, the column names will be converted to snake_case.
675
743
 
@@ -677,6 +745,7 @@ async def import_parquet_to_table(
677
745
  - URL is not a valid S3 URL
678
746
  - File is not a Parquet file
679
747
  - File cannot be accessed
748
+ - Table does not exist
680
749
  - Parquet schema is incompatible with existing table schema
681
750
  - Any other error occurs
682
751
 
@@ -685,7 +754,6 @@ async def import_parquet_to_table(
685
754
  - message: Success message with row count
686
755
  - rows_processed: Number of rows imported
687
756
  - file_processed: Name of the processed file
688
- - table_created: True if a new table was created
689
757
 
690
758
  Example input values:
691
759
  warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
@@ -704,7 +772,6 @@ async def import_parquet_to_table(
704
772
  - s3:GetObject permission for the Parquet file
705
773
  - s3tables:GetTable and s3tables:GetTables permissions to access table information
706
774
  - s3tables:PutTableData permission to write to the table
707
- - s3tables:CreateTable permission (if table doesn't exist)
708
775
  """
709
776
  if uri is None:
710
777
  uri = _default_uri_for_region(region)
@@ -2,7 +2,7 @@
2
2
  name = "awslabs.s3-tables-mcp-server"
3
3
 
4
4
  # NOTE: "Patch"=9223372036854775807 bumps next release to zero.
5
- version = "0.0.13"
5
+ version = "0.0.15"
6
6
 
7
7
  description = "An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server"
8
8
  readme = "README.md"
@@ -12,8 +12,8 @@ dependencies = [
12
12
  "mcp[cli]==1.11.0",
13
13
  "pydantic==2.9.2",
14
14
  "boto3==1.40.8",
15
- "pyiceberg==0.9.1",
16
- "pyarrow==20.0.0",
15
+ "pyiceberg==0.10.0",
16
+ "pyarrow==22.0.0",
17
17
  "sqlparse==0.5.3",
18
18
  "daft==0.5.8",
19
19
  ]