awslabs.s3-tables-mcp-server 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/Dockerfile +5 -5
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/PKG-INFO +1 -1
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +28 -113
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/__init__.py +24 -0
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/csv.py +123 -0
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/parquet.py +116 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/server.py +76 -24
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/pyproject.toml +1 -1
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_csv.py +235 -0
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_parquet.py +241 -0
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_pyiceberg.py +579 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_server.py +110 -25
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/uv.lock +763 -763
- awslabs_s3_tables_mcp_server-0.0.2/awslabs/s3_tables_mcp_server/file_processor.py +0 -485
- awslabs_s3_tables_mcp_server-0.0.2/tests/test_file_processor.py +0 -607
- awslabs_s3_tables_mcp_server-0.0.2/tests/test_pyiceberg.py +0 -437
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/.gitignore +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/.python-version +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/CHANGELOG.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/CONTEXT.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/LICENSE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/NOTICE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/README.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/constants.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/models.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/utils.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/docker-healthcheck.sh +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_init.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_main.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_utils.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.2 → awslabs_s3_tables_mcp_server-0.0.3}/uv-requirements.txt +0 -0
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
# dependabot should continue to update this to the latest hash.
|
16
|
-
FROM public.ecr.aws/
|
16
|
+
FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283 AS uv
|
17
17
|
|
18
18
|
# Install the project into `/app`
|
19
19
|
WORKDIR /app
|
@@ -48,10 +48,10 @@ COPY . /app
|
|
48
48
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
49
49
|
uv sync --python 3.13 --frozen --no-dev --no-editable
|
50
50
|
|
51
|
-
# Make the directory just in case it doesn't exist
|
52
|
-
RUN mkdir -p /root/.local
|
51
|
+
# # Make the directory just in case it doesn't exist
|
52
|
+
# RUN mkdir -p /root/.local
|
53
53
|
|
54
|
-
FROM public.ecr.aws/
|
54
|
+
FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283
|
55
55
|
|
56
56
|
# Place executables in the environment at the front of the path and include other binaries
|
57
57
|
ENV PATH="/app/.venv/bin:$PATH:/usr/sbin" \
|
@@ -63,7 +63,7 @@ RUN groupadd --force --system app && \
|
|
63
63
|
chmod o+x /root
|
64
64
|
|
65
65
|
# Copy application artifacts from build stage
|
66
|
-
COPY --from=uv --chown=app:app /root/.local /root/.local
|
66
|
+
# COPY --from=uv --chown=app:app /root/.local /root/.local
|
67
67
|
COPY --from=uv --chown=app:app /app/.venv /app/.venv
|
68
68
|
|
69
69
|
# Get healthcheck script
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: awslabs.s3-tables-mcp-server
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
|
5
5
|
Project-URL: homepage, https://awslabs.github.io/mcp/
|
6
6
|
Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
|
@@ -14,32 +14,14 @@
|
|
14
14
|
|
15
15
|
"""Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
|
16
16
|
|
17
|
+
import io
|
18
|
+
import json
|
17
19
|
import pyarrow as pa
|
20
|
+
import pyarrow.json as pj
|
18
21
|
from ..utils import pyiceberg_load_catalog
|
19
22
|
from daft import Catalog as DaftCatalog
|
20
23
|
from daft.session import Session
|
21
|
-
from datetime import date, datetime, time
|
22
|
-
from decimal import Decimal
|
23
24
|
from pydantic import BaseModel
|
24
|
-
from pyiceberg.types import (
|
25
|
-
BinaryType,
|
26
|
-
BooleanType,
|
27
|
-
DateType,
|
28
|
-
DecimalType,
|
29
|
-
DoubleType,
|
30
|
-
FixedType,
|
31
|
-
FloatType,
|
32
|
-
IntegerType,
|
33
|
-
ListType,
|
34
|
-
LongType,
|
35
|
-
MapType,
|
36
|
-
StringType,
|
37
|
-
StructType,
|
38
|
-
TimestampType,
|
39
|
-
TimestamptzType,
|
40
|
-
TimeType,
|
41
|
-
UUIDType,
|
42
|
-
)
|
43
25
|
|
44
26
|
# pyiceberg and daft imports
|
45
27
|
from typing import Any, Dict, Optional
|
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
|
|
57
39
|
rest_sigv4_enabled: str = 'true'
|
58
40
|
|
59
41
|
|
60
|
-
def convert_value_for_append(value, iceberg_type):
|
61
|
-
"""Convert a value to the appropriate type for appending to an Iceberg table column.
|
62
|
-
|
63
|
-
Args:
|
64
|
-
value: The value to convert. Can be of various types (str, int, float, etc.).
|
65
|
-
iceberg_type: The Iceberg type to convert the value to.
|
66
|
-
|
67
|
-
Returns:
|
68
|
-
The value converted to the appropriate type for the Iceberg column, or None if value is None.
|
69
|
-
|
70
|
-
Raises:
|
71
|
-
NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
|
72
|
-
ValueError: If the conversion is unsupported or fails.
|
73
|
-
"""
|
74
|
-
if value is None:
|
75
|
-
return None
|
76
|
-
# Already correct type
|
77
|
-
if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
|
78
|
-
return value
|
79
|
-
if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
|
80
|
-
return value
|
81
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
|
82
|
-
return value
|
83
|
-
if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
|
84
|
-
return value
|
85
|
-
if isinstance(iceberg_type, DateType) and isinstance(value, date):
|
86
|
-
return value
|
87
|
-
if isinstance(iceberg_type, TimeType) and isinstance(value, time):
|
88
|
-
return value
|
89
|
-
if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
|
90
|
-
return value
|
91
|
-
if isinstance(iceberg_type, StringType) and isinstance(value, str):
|
92
|
-
return value
|
93
|
-
# Convert from string
|
94
|
-
if isinstance(value, str):
|
95
|
-
if isinstance(iceberg_type, BooleanType):
|
96
|
-
return value.lower() in ('true', '1', 'yes')
|
97
|
-
if isinstance(iceberg_type, (IntegerType, LongType)):
|
98
|
-
return int(value)
|
99
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)):
|
100
|
-
return float(value)
|
101
|
-
if isinstance(iceberg_type, DecimalType):
|
102
|
-
return Decimal(value)
|
103
|
-
if isinstance(iceberg_type, DateType):
|
104
|
-
return date.fromisoformat(value)
|
105
|
-
if isinstance(iceberg_type, TimeType):
|
106
|
-
return time.fromisoformat(value)
|
107
|
-
if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
|
108
|
-
return datetime.fromisoformat(value)
|
109
|
-
if isinstance(iceberg_type, StringType):
|
110
|
-
return value
|
111
|
-
if isinstance(iceberg_type, UUIDType):
|
112
|
-
import uuid
|
113
|
-
|
114
|
-
return uuid.UUID(value)
|
115
|
-
if isinstance(iceberg_type, (BinaryType, FixedType)):
|
116
|
-
return bytes.fromhex(value)
|
117
|
-
# Convert from number
|
118
|
-
if isinstance(value, (int, float)):
|
119
|
-
if isinstance(iceberg_type, (IntegerType, LongType)):
|
120
|
-
return int(value)
|
121
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)):
|
122
|
-
return float(value)
|
123
|
-
if isinstance(iceberg_type, DecimalType):
|
124
|
-
return Decimal(str(value))
|
125
|
-
if isinstance(iceberg_type, StringType):
|
126
|
-
return str(value)
|
127
|
-
if isinstance(iceberg_type, (ListType, MapType, StructType)):
|
128
|
-
raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
|
129
|
-
raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
|
130
|
-
|
131
|
-
|
132
42
|
class PyIcebergEngine:
|
133
43
|
"""Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
|
134
44
|
|
@@ -197,7 +107,7 @@ class PyIcebergEngine:
|
|
197
107
|
return False
|
198
108
|
|
199
109
|
def append_rows(self, table_name: str, rows: list[dict]) -> None:
|
200
|
-
"""Append rows to an Iceberg table using pyiceberg.
|
110
|
+
"""Append rows to an Iceberg table using pyiceberg with JSON encoding.
|
201
111
|
|
202
112
|
Args:
|
203
113
|
table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
|
@@ -214,26 +124,31 @@ class PyIcebergEngine:
|
|
214
124
|
full_table_name = f'{self.config.namespace}.{table_name}'
|
215
125
|
else:
|
216
126
|
full_table_name = table_name
|
127
|
+
|
128
|
+
# Load the Iceberg table
|
217
129
|
table = self._catalog.load_table(full_table_name)
|
218
|
-
|
219
|
-
|
130
|
+
# Encode rows as JSON (line-delimited format)
|
131
|
+
json_lines = []
|
220
132
|
for row in rows:
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
133
|
+
json_lines.append(json.dumps(row))
|
134
|
+
json_data = '\n'.join(json_lines)
|
135
|
+
|
136
|
+
# Create a file-like object from the JSON data
|
137
|
+
json_buffer = io.BytesIO(json_data.encode('utf-8'))
|
138
|
+
|
139
|
+
# Read JSON data into PyArrow Table using pyarrow.json.read_json
|
140
|
+
# This enforces the Iceberg schema and validates the data
|
141
|
+
try:
|
142
|
+
new_data_table = pj.read_json(
|
143
|
+
json_buffer, read_options=pj.ReadOptions(use_threads=True)
|
144
|
+
)
|
145
|
+
except pa.ArrowInvalid as e:
|
146
|
+
raise ValueError(
|
147
|
+
f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
|
148
|
+
)
|
149
|
+
|
150
|
+
# Append the new data to the Iceberg table
|
151
|
+
table.append(new_data_table)
|
152
|
+
|
238
153
|
except Exception as e:
|
239
154
|
raise Exception(f'Error appending rows: {str(e)}')
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""AWS S3 Tables MCP Server file processing module.
|
16
|
+
|
17
|
+
This module provides functionality for processing and analyzing uploaded files,
|
18
|
+
particularly focusing on CSV and Parquet file handling and import capabilities.
|
19
|
+
"""
|
20
|
+
|
21
|
+
from .csv import import_csv_to_table
|
22
|
+
from .parquet import import_parquet_to_table
|
23
|
+
|
24
|
+
__all__ = ['import_csv_to_table', 'import_parquet_to_table']
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""AWS S3 Tables MCP Server file processing module.
|
16
|
+
|
17
|
+
This module provides functionality for processing and analyzing uploaded files,
|
18
|
+
particularly focusing on CSV file handling and import capabilities.
|
19
|
+
"""
|
20
|
+
|
21
|
+
import io
|
22
|
+
import os
|
23
|
+
import pyarrow.csv as pc
|
24
|
+
from ..utils import get_s3_client, pyiceberg_load_catalog
|
25
|
+
from pyiceberg.exceptions import NoSuchTableError
|
26
|
+
from typing import Dict
|
27
|
+
from urllib.parse import urlparse
|
28
|
+
|
29
|
+
|
30
|
+
async def import_csv_to_table(
|
31
|
+
warehouse: str,
|
32
|
+
region: str,
|
33
|
+
namespace: str,
|
34
|
+
table_name: str,
|
35
|
+
s3_url: str,
|
36
|
+
uri: str,
|
37
|
+
catalog_name: str = 's3tablescatalog',
|
38
|
+
rest_signing_name: str = 's3tables',
|
39
|
+
rest_sigv4_enabled: str = 'true',
|
40
|
+
) -> Dict:
|
41
|
+
"""Import data from a CSV file into an S3 table.
|
42
|
+
|
43
|
+
This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
|
44
|
+
If the table doesn't exist, it will be created using the schema inferred from the CSV file.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
warehouse: Warehouse string for Iceberg catalog
|
48
|
+
region: AWS region for S3Tables/Iceberg REST endpoint
|
49
|
+
namespace: The namespace containing the table
|
50
|
+
table_name: The name of the table to import data into
|
51
|
+
s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
|
52
|
+
uri: REST URI for Iceberg catalog
|
53
|
+
catalog_name: Catalog name
|
54
|
+
rest_signing_name: REST signing name
|
55
|
+
rest_sigv4_enabled: Enable SigV4 signing
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
A dictionary containing:
|
59
|
+
- status: 'success' or 'error'
|
60
|
+
- message: Success message or error details
|
61
|
+
- rows_processed: Number of rows processed (on success)
|
62
|
+
- file_processed: Name of the processed file
|
63
|
+
- table_created: Boolean indicating if a new table was created (on success)
|
64
|
+
"""
|
65
|
+
# Parse S3 URL
|
66
|
+
parsed = urlparse(s3_url)
|
67
|
+
bucket = parsed.netloc
|
68
|
+
key = parsed.path.lstrip('/')
|
69
|
+
|
70
|
+
try:
|
71
|
+
# Load Iceberg catalog
|
72
|
+
catalog = pyiceberg_load_catalog(
|
73
|
+
catalog_name,
|
74
|
+
warehouse,
|
75
|
+
uri,
|
76
|
+
region,
|
77
|
+
rest_signing_name,
|
78
|
+
rest_sigv4_enabled,
|
79
|
+
)
|
80
|
+
|
81
|
+
# Get S3 client and read the CSV file to infer schema
|
82
|
+
s3_client = get_s3_client()
|
83
|
+
response = s3_client.get_object(Bucket=bucket, Key=key)
|
84
|
+
csv_data = response['Body'].read()
|
85
|
+
|
86
|
+
# Read CSV file into PyArrow Table to infer schema
|
87
|
+
# Convert bytes to file-like object for PyArrow
|
88
|
+
csv_buffer = io.BytesIO(csv_data)
|
89
|
+
csv_table = pc.read_csv(csv_buffer)
|
90
|
+
csv_schema = csv_table.schema
|
91
|
+
|
92
|
+
table_created = False
|
93
|
+
try:
|
94
|
+
# Try to load existing table
|
95
|
+
table = catalog.load_table(f'{namespace}.{table_name}')
|
96
|
+
except NoSuchTableError:
|
97
|
+
# Table doesn't exist, create it using the CSV schema
|
98
|
+
try:
|
99
|
+
table = catalog.create_table(
|
100
|
+
identifier=f'{namespace}.{table_name}',
|
101
|
+
schema=csv_schema,
|
102
|
+
)
|
103
|
+
table_created = True
|
104
|
+
except Exception as create_error:
|
105
|
+
return {
|
106
|
+
'status': 'error',
|
107
|
+
'error': f'Failed to create table: {str(create_error)}',
|
108
|
+
}
|
109
|
+
|
110
|
+
# Append data to Iceberg table
|
111
|
+
table.append(csv_table)
|
112
|
+
|
113
|
+
return {
|
114
|
+
'status': 'success',
|
115
|
+
'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
|
116
|
+
'rows_processed': csv_table.num_rows,
|
117
|
+
'file_processed': os.path.basename(key),
|
118
|
+
'table_created': table_created,
|
119
|
+
'table_uuid': table.metadata.table_uuid,
|
120
|
+
}
|
121
|
+
|
122
|
+
except Exception as e:
|
123
|
+
return {'status': 'error', 'error': str(e)}
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import pyarrow.parquet as pq
|
16
|
+
from awslabs.s3_tables_mcp_server.utils import get_s3_client, pyiceberg_load_catalog
|
17
|
+
from io import BytesIO
|
18
|
+
from pyiceberg.exceptions import NoSuchTableError
|
19
|
+
from typing import Dict
|
20
|
+
|
21
|
+
|
22
|
+
async def import_parquet_to_table(
|
23
|
+
warehouse: str,
|
24
|
+
region: str,
|
25
|
+
namespace: str,
|
26
|
+
table_name: str,
|
27
|
+
s3_url: str,
|
28
|
+
uri: str,
|
29
|
+
catalog_name: str = 's3tablescatalog',
|
30
|
+
rest_signing_name: str = 's3tables',
|
31
|
+
rest_sigv4_enabled: str = 'true',
|
32
|
+
) -> Dict:
|
33
|
+
"""Import data from a Parquet file into an S3 table.
|
34
|
+
|
35
|
+
This function reads data from a Parquet file stored in S3 and imports it into an existing Iceberg table.
|
36
|
+
If the table doesn't exist, it will be created using the schema from the Parquet file.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
warehouse: Warehouse string for Iceberg catalog
|
40
|
+
region: AWS region for S3Tables/Iceberg REST endpoint
|
41
|
+
namespace: The namespace containing the table
|
42
|
+
table_name: The name of the table to import data into
|
43
|
+
s3_url: The S3 URL of the Parquet file
|
44
|
+
uri: REST URI for Iceberg catalog
|
45
|
+
catalog_name: Catalog name
|
46
|
+
rest_signing_name: REST signing name
|
47
|
+
rest_sigv4_enabled: Enable SigV4 signing
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
A dictionary containing:
|
51
|
+
- status: 'success' or 'error'
|
52
|
+
- message: Success message or error details
|
53
|
+
- rows_processed: Number of rows processed (on success)
|
54
|
+
- file_processed: Name of the processed file
|
55
|
+
- table_created: Boolean indicating if a new table was created (on success)
|
56
|
+
"""
|
57
|
+
import os
|
58
|
+
from urllib.parse import urlparse
|
59
|
+
|
60
|
+
# Parse S3 URL
|
61
|
+
parsed = urlparse(s3_url)
|
62
|
+
bucket = parsed.netloc
|
63
|
+
key = parsed.path.lstrip('/')
|
64
|
+
|
65
|
+
try:
|
66
|
+
# Load Iceberg catalog
|
67
|
+
catalog = pyiceberg_load_catalog(
|
68
|
+
catalog_name,
|
69
|
+
warehouse,
|
70
|
+
uri,
|
71
|
+
region,
|
72
|
+
rest_signing_name,
|
73
|
+
rest_sigv4_enabled,
|
74
|
+
)
|
75
|
+
|
76
|
+
# Get S3 client and read the Parquet file first to get the schema
|
77
|
+
s3_client = get_s3_client()
|
78
|
+
response = s3_client.get_object(Bucket=bucket, Key=key)
|
79
|
+
parquet_data = BytesIO(response['Body'].read())
|
80
|
+
|
81
|
+
# Read Parquet file into PyArrow Table
|
82
|
+
parquet_table = pq.read_table(parquet_data)
|
83
|
+
parquet_schema = parquet_table.schema
|
84
|
+
|
85
|
+
table_created = False
|
86
|
+
try:
|
87
|
+
# Try to load existing table
|
88
|
+
table = catalog.load_table(f'{namespace}.{table_name}')
|
89
|
+
except NoSuchTableError:
|
90
|
+
# Table doesn't exist, create it using the Parquet schema
|
91
|
+
try:
|
92
|
+
table = catalog.create_table(
|
93
|
+
identifier=f'{namespace}.{table_name}',
|
94
|
+
schema=parquet_schema,
|
95
|
+
)
|
96
|
+
table_created = True
|
97
|
+
except Exception as create_error:
|
98
|
+
return {
|
99
|
+
'status': 'error',
|
100
|
+
'error': f'Failed to create table: {str(create_error)}',
|
101
|
+
}
|
102
|
+
|
103
|
+
# Append data to Iceberg table
|
104
|
+
table.append(parquet_table)
|
105
|
+
|
106
|
+
return {
|
107
|
+
'status': 'success',
|
108
|
+
'message': f'Successfully imported {parquet_table.num_rows} rows{" and created new table" if table_created else ""}',
|
109
|
+
'rows_processed': parquet_table.num_rows,
|
110
|
+
'file_processed': os.path.basename(key),
|
111
|
+
'table_created': table_created,
|
112
|
+
'table_uuid': table.metadata.table_uuid,
|
113
|
+
}
|
114
|
+
|
115
|
+
except Exception as e:
|
116
|
+
return {'status': 'error', 'error': str(e)}
|
@@ -32,7 +32,6 @@ from .utils import set_user_agent_mode
|
|
32
32
|
from awslabs.s3_tables_mcp_server import (
|
33
33
|
__version__,
|
34
34
|
database,
|
35
|
-
file_processor,
|
36
35
|
namespaces,
|
37
36
|
resources,
|
38
37
|
s3_operations,
|
@@ -48,6 +47,12 @@ from awslabs.s3_tables_mcp_server.constants import (
|
|
48
47
|
TABLE_BUCKET_NAME_PATTERN,
|
49
48
|
TABLE_NAME_FIELD,
|
50
49
|
)
|
50
|
+
from awslabs.s3_tables_mcp_server.file_processor import (
|
51
|
+
import_csv_to_table as import_csv_to_table_func,
|
52
|
+
)
|
53
|
+
from awslabs.s3_tables_mcp_server.file_processor import (
|
54
|
+
import_parquet_to_table as import_parquet_to_table_func,
|
55
|
+
)
|
51
56
|
from datetime import datetime, timezone
|
52
57
|
from mcp.server.fastmcp import FastMCP
|
53
58
|
from pydantic import Field
|
@@ -567,32 +572,75 @@ async def query_database(
|
|
567
572
|
|
568
573
|
@app.tool()
|
569
574
|
@log_tool_call_with_response
|
570
|
-
|
575
|
+
@write_operation
|
576
|
+
async def import_csv_to_table(
|
577
|
+
warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
|
578
|
+
region: Annotated[
|
579
|
+
str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
|
580
|
+
],
|
581
|
+
namespace: Annotated[str, NAMESPACE_NAME_FIELD],
|
582
|
+
table_name: Annotated[str, TABLE_NAME_FIELD],
|
571
583
|
s3_url: Annotated[str, S3_URL_FIELD],
|
584
|
+
uri: Annotated[str, Field(..., description='REST URI for Iceberg catalog')],
|
585
|
+
catalog_name: Annotated[
|
586
|
+
str, Field('s3tablescatalog', description='Catalog name')
|
587
|
+
] = 's3tablescatalog',
|
588
|
+
rest_signing_name: Annotated[
|
589
|
+
str, Field('s3tables', description='REST signing name')
|
590
|
+
] = 's3tables',
|
591
|
+
rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
|
572
592
|
) -> dict:
|
573
|
-
"""
|
593
|
+
"""Import data from a CSV file into an S3 table.
|
574
594
|
|
575
|
-
This tool
|
576
|
-
|
577
|
-
|
578
|
-
|
595
|
+
This tool reads data from a CSV file stored in S3 and imports it into an S3 table.
|
596
|
+
If the table doesn't exist, it will be created with a schema inferred from the CSV file.
|
597
|
+
If the table exists, the CSV file schema must be compatible with the table's schema.
|
598
|
+
The tool will validate the schema before attempting to import the data.
|
579
599
|
|
580
600
|
Returns error dictionary with status and error message if:
|
581
601
|
- URL is not a valid S3 URL
|
582
602
|
- File is not a CSV file
|
583
603
|
- File cannot be accessed
|
604
|
+
- Table does not exist
|
605
|
+
- CSV headers don't match table schema
|
584
606
|
- Any other error occurs
|
585
607
|
|
608
|
+
Example input values:
|
609
|
+
warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
|
610
|
+
region: 'us-west-2'
|
611
|
+
namespace: 'retail_data'
|
612
|
+
table_name: 'customers'
|
613
|
+
s3_url: 's3://bucket-name/path/to/file.csv'
|
614
|
+
uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
|
615
|
+
catalog_name: 's3tablescatalog'
|
616
|
+
rest_signing_name: 's3tables'
|
617
|
+
rest_sigv4_enabled: 'true'
|
618
|
+
|
586
619
|
Permissions:
|
587
|
-
You must have
|
620
|
+
You must have:
|
621
|
+
- s3:GetObject permission for the CSV file
|
622
|
+
- s3tables:GetTable and s3tables:GetTables permissions to access table information
|
623
|
+
- s3tables:PutTableData permission to write to the table
|
588
624
|
"""
|
589
|
-
|
625
|
+
if uri is None:
|
626
|
+
uri = _default_uri_for_region(region)
|
627
|
+
return await import_csv_to_table_func(
|
628
|
+
warehouse=warehouse,
|
629
|
+
region=region,
|
630
|
+
namespace=namespace,
|
631
|
+
table_name=table_name,
|
632
|
+
s3_url=s3_url,
|
633
|
+
uri=uri,
|
634
|
+
catalog_name=catalog_name,
|
635
|
+
rest_signing_name=rest_signing_name,
|
636
|
+
rest_sigv4_enabled=rest_sigv4_enabled,
|
637
|
+
)
|
590
638
|
|
591
639
|
|
592
640
|
@app.tool()
|
593
641
|
@log_tool_call_with_response
|
594
642
|
@write_operation
|
595
|
-
async def
|
643
|
+
async def import_parquet_to_table(
|
596
644
|
warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
|
597
645
|
region: Annotated[
|
598
646
|
str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
|
@@ -609,29 +657,33 @@ async def import_csv_to_table(
|
|
609
657
|
] = 's3tables',
|
610
658
|
rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
|
611
659
|
) -> dict:
|
612
|
-
"""Import data from a
|
660
|
+
"""Import data from a Parquet file into an S3 table.
|
613
661
|
|
614
|
-
This tool reads data from a
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
To create a table, first use the preview_csv_file tool to get the schema and data format.
|
619
|
-
Then use the create_table tool to create the table.
|
662
|
+
This tool reads data from a Parquet file stored in S3 and imports it into an S3 table.
|
663
|
+
If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
|
664
|
+
If the table exists, the Parquet file schema must be compatible with the table's schema.
|
665
|
+
The tool will validate the schema before attempting to import the data.
|
620
666
|
|
621
667
|
Returns error dictionary with status and error message if:
|
622
668
|
- URL is not a valid S3 URL
|
623
|
-
- File is not a
|
669
|
+
- File is not a Parquet file
|
624
670
|
- File cannot be accessed
|
625
|
-
-
|
626
|
-
- CSV headers don't match table schema
|
671
|
+
- Parquet schema is incompatible with existing table schema
|
627
672
|
- Any other error occurs
|
628
673
|
|
674
|
+
Returns success dictionary with:
|
675
|
+
- status: 'success'
|
676
|
+
- message: Success message with row count
|
677
|
+
- rows_processed: Number of rows imported
|
678
|
+
- file_processed: Name of the processed file
|
679
|
+
- table_created: True if a new table was created
|
680
|
+
|
629
681
|
Example input values:
|
630
682
|
warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
|
631
683
|
region: 'us-west-2'
|
632
684
|
namespace: 'retail_data'
|
633
685
|
table_name: 'customers'
|
634
|
-
s3_url: 's3://bucket-name/path/to/file.
|
686
|
+
s3_url: 's3://bucket-name/path/to/file.parquet'
|
635
687
|
uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
|
636
688
|
catalog_name: 's3tablescatalog'
|
637
689
|
rest_signing_name: 's3tables'
|
@@ -639,14 +691,14 @@ async def import_csv_to_table(
|
|
639
691
|
|
640
692
|
Permissions:
|
641
693
|
You must have:
|
642
|
-
- s3:GetObject permission for the
|
643
|
-
- s3tables:GetDatabase and s3tables:GetDatabases permissions to access database information
|
694
|
+
- s3:GetObject permission for the Parquet file
|
644
695
|
- s3tables:GetTable and s3tables:GetTables permissions to access table information
|
645
696
|
- s3tables:PutTableData permission to write to the table
|
697
|
+
- s3tables:CreateTable permission (if table doesn't exist)
|
646
698
|
"""
|
647
699
|
if uri is None:
|
648
700
|
uri = _default_uri_for_region(region)
|
649
|
-
return await
|
701
|
+
return await import_parquet_to_table_func(
|
650
702
|
warehouse=warehouse,
|
651
703
|
region=region,
|
652
704
|
namespace=namespace,
|
@@ -2,7 +2,7 @@
|
|
2
2
|
name = "awslabs.s3-tables-mcp-server"
|
3
3
|
|
4
4
|
# NOTE: "Patch"=9223372036854775807 bumps next release to zero.
|
5
|
-
version = "0.0.
|
5
|
+
version = "0.0.3"
|
6
6
|
|
7
7
|
description = "An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server"
|
8
8
|
readme = "README.md"
|