awslabs.s3-tables-mcp-server 0.0.4__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/PKG-INFO +1 -1
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
- awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/csv.py +50 -0
- awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/parquet.py +44 -0
- awslabs_s3_tables_mcp_server-0.0.5/awslabs/s3_tables_mcp_server/file_processor/utils.py +157 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/server.py +12 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/pyproject.toml +1 -1
- awslabs_s3_tables_mcp_server-0.0.5/tests/test_csv.py +70 -0
- awslabs_s3_tables_mcp_server-0.0.5/tests/test_file_processor_utils.py +645 -0
- awslabs_s3_tables_mcp_server-0.0.5/tests/test_parquet.py +70 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_server.py +51 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/uv.lock +1 -1
- awslabs_s3_tables_mcp_server-0.0.4/awslabs/s3_tables_mcp_server/file_processor/csv.py +0 -123
- awslabs_s3_tables_mcp_server-0.0.4/awslabs/s3_tables_mcp_server/file_processor/parquet.py +0 -116
- awslabs_s3_tables_mcp_server-0.0.4/tests/test_csv.py +0 -235
- awslabs_s3_tables_mcp_server-0.0.4/tests/test_parquet.py +0 -241
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/.gitignore +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/.python-version +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/CHANGELOG.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/CONTEXT.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/Dockerfile +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/LICENSE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/NOTICE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/README.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/constants.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/file_processor/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/models.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/awslabs/s3_tables_mcp_server/utils.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/docker-healthcheck.sh +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_init.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_main.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_pyiceberg.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/tests/test_utils.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.4 → awslabs_s3_tables_mcp_server-0.0.5}/uv-requirements.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: awslabs.s3-tables-mcp-server
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
|
|
5
5
|
Project-URL: homepage, https://awslabs.github.io/mcp/
|
|
6
6
|
Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""AWS S3 Tables MCP Server file processing module.
|
|
16
|
+
|
|
17
|
+
This module provides functionality for processing and analyzing uploaded files,
|
|
18
|
+
particularly focusing on CSV file handling and import capabilities.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import pyarrow.csv as pc
|
|
22
|
+
from .utils import import_file_to_table
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def import_csv_to_table(
|
|
26
|
+
warehouse: str,
|
|
27
|
+
region: str,
|
|
28
|
+
namespace: str,
|
|
29
|
+
table_name: str,
|
|
30
|
+
s3_url: str,
|
|
31
|
+
uri: str,
|
|
32
|
+
catalog_name: str = 's3tablescatalog',
|
|
33
|
+
rest_signing_name: str = 's3tables',
|
|
34
|
+
rest_sigv4_enabled: str = 'true',
|
|
35
|
+
preserve_case: bool = False,
|
|
36
|
+
):
|
|
37
|
+
"""Import a CSV file into an S3 table using PyArrow."""
|
|
38
|
+
return await import_file_to_table(
|
|
39
|
+
warehouse=warehouse,
|
|
40
|
+
region=region,
|
|
41
|
+
namespace=namespace,
|
|
42
|
+
table_name=table_name,
|
|
43
|
+
s3_url=s3_url,
|
|
44
|
+
uri=uri,
|
|
45
|
+
create_pyarrow_table=pc.read_csv,
|
|
46
|
+
catalog_name=catalog_name,
|
|
47
|
+
rest_signing_name=rest_signing_name,
|
|
48
|
+
rest_sigv4_enabled=rest_sigv4_enabled,
|
|
49
|
+
preserve_case=preserve_case,
|
|
50
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pyarrow.parquet as pq
|
|
16
|
+
from .utils import import_file_to_table
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def import_parquet_to_table(
|
|
20
|
+
warehouse: str,
|
|
21
|
+
region: str,
|
|
22
|
+
namespace: str,
|
|
23
|
+
table_name: str,
|
|
24
|
+
s3_url: str,
|
|
25
|
+
uri: str,
|
|
26
|
+
catalog_name: str = 's3tablescatalog',
|
|
27
|
+
rest_signing_name: str = 's3tables',
|
|
28
|
+
rest_sigv4_enabled: str = 'true',
|
|
29
|
+
preserve_case: bool = False,
|
|
30
|
+
):
|
|
31
|
+
"""Import a Parquet file into an S3 table using PyArrow."""
|
|
32
|
+
return await import_file_to_table(
|
|
33
|
+
warehouse=warehouse,
|
|
34
|
+
region=region,
|
|
35
|
+
namespace=namespace,
|
|
36
|
+
table_name=table_name,
|
|
37
|
+
s3_url=s3_url,
|
|
38
|
+
uri=uri,
|
|
39
|
+
create_pyarrow_table=pq.read_table,
|
|
40
|
+
catalog_name=catalog_name,
|
|
41
|
+
rest_signing_name=rest_signing_name,
|
|
42
|
+
rest_sigv4_enabled=rest_sigv4_enabled,
|
|
43
|
+
preserve_case=preserve_case,
|
|
44
|
+
)
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""AWS S3 Tables MCP Server file processing utilities.
|
|
16
|
+
|
|
17
|
+
This module provides utility functions for file processing operations,
|
|
18
|
+
particularly focusing on column name conversion and schema transformation.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
from ..utils import get_s3_client, pyiceberg_load_catalog
|
|
24
|
+
from io import BytesIO
|
|
25
|
+
from pydantic.alias_generators import to_snake
|
|
26
|
+
from pyiceberg.exceptions import NoSuchTableError
|
|
27
|
+
from typing import Any, Callable, Dict
|
|
28
|
+
from urllib.parse import urlparse
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def convert_column_names_to_snake_case(schema: pa.Schema) -> pa.Schema:
|
|
32
|
+
"""Convert column names in PyArrow schema to snake_case.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
schema: PyArrow schema with original column names
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
PyArrow schema with converted column names
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
ValueError: If duplicate column names exist after conversion
|
|
42
|
+
"""
|
|
43
|
+
# Extract original column names
|
|
44
|
+
original_names = schema.names
|
|
45
|
+
|
|
46
|
+
# Convert each column name to snake_case
|
|
47
|
+
converted_names = [to_snake(name) for name in original_names]
|
|
48
|
+
|
|
49
|
+
# Check for duplicates after conversion using set and len
|
|
50
|
+
if len(set(converted_names)) != len(converted_names):
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f'Duplicate column names after case conversion. '
|
|
53
|
+
f'Original names: {original_names}. Converted names: {converted_names}'
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Create new schema with converted column names
|
|
57
|
+
new_fields = []
|
|
58
|
+
for i, field in enumerate(schema):
|
|
59
|
+
new_field = pa.field(
|
|
60
|
+
converted_names[i], field.type, nullable=field.nullable, metadata=field.metadata
|
|
61
|
+
)
|
|
62
|
+
new_fields.append(new_field)
|
|
63
|
+
|
|
64
|
+
return pa.schema(new_fields, metadata=schema.metadata)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def import_file_to_table(
|
|
68
|
+
warehouse: str,
|
|
69
|
+
region: str,
|
|
70
|
+
namespace: str,
|
|
71
|
+
table_name: str,
|
|
72
|
+
s3_url: str,
|
|
73
|
+
uri: str,
|
|
74
|
+
create_pyarrow_table: Callable[[Any], pa.Table],
|
|
75
|
+
catalog_name: str = 's3tablescatalog',
|
|
76
|
+
rest_signing_name: str = 's3tables',
|
|
77
|
+
rest_sigv4_enabled: str = 'true',
|
|
78
|
+
preserve_case: bool = False,
|
|
79
|
+
) -> Dict:
|
|
80
|
+
"""Import data from a file (CSV, Parquet, etc.) into an S3 table using a provided PyArrow table creation function."""
|
|
81
|
+
# Parse S3 URL
|
|
82
|
+
parsed = urlparse(s3_url)
|
|
83
|
+
bucket = parsed.netloc
|
|
84
|
+
key = parsed.path.lstrip('/')
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Load Iceberg catalog
|
|
88
|
+
catalog = pyiceberg_load_catalog(
|
|
89
|
+
catalog_name,
|
|
90
|
+
warehouse,
|
|
91
|
+
uri,
|
|
92
|
+
region,
|
|
93
|
+
rest_signing_name,
|
|
94
|
+
rest_sigv4_enabled,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Get S3 client and read the file
|
|
98
|
+
s3_client = get_s3_client()
|
|
99
|
+
response = s3_client.get_object(Bucket=bucket, Key=key)
|
|
100
|
+
file_bytes = response['Body'].read()
|
|
101
|
+
|
|
102
|
+
# Create PyArrow Table and Schema (file-like interface)
|
|
103
|
+
file_like = BytesIO(file_bytes)
|
|
104
|
+
pyarrow_table = create_pyarrow_table(file_like)
|
|
105
|
+
pyarrow_schema = pyarrow_table.schema
|
|
106
|
+
|
|
107
|
+
# Convert column names to snake_case unless preserve_case is True
|
|
108
|
+
columns_converted = False
|
|
109
|
+
if not preserve_case:
|
|
110
|
+
try:
|
|
111
|
+
pyarrow_schema = convert_column_names_to_snake_case(pyarrow_schema)
|
|
112
|
+
pyarrow_table = pyarrow_table.rename_columns(pyarrow_schema.names)
|
|
113
|
+
columns_converted = True
|
|
114
|
+
except Exception as conv_err:
|
|
115
|
+
return {
|
|
116
|
+
'status': 'error',
|
|
117
|
+
'error': f'Column name conversion failed: {str(conv_err)}',
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
table_created = False
|
|
121
|
+
try:
|
|
122
|
+
# Try to load existing table
|
|
123
|
+
table = catalog.load_table(f'{namespace}.{table_name}')
|
|
124
|
+
except NoSuchTableError:
|
|
125
|
+
# Table doesn't exist, create it using the schema
|
|
126
|
+
try:
|
|
127
|
+
table = catalog.create_table(
|
|
128
|
+
identifier=f'{namespace}.{table_name}',
|
|
129
|
+
schema=pyarrow_schema,
|
|
130
|
+
)
|
|
131
|
+
table_created = True
|
|
132
|
+
except Exception as create_error:
|
|
133
|
+
return {
|
|
134
|
+
'status': 'error',
|
|
135
|
+
'error': f'Failed to create table: {str(create_error)}',
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Append data to Iceberg table
|
|
139
|
+
table.append(pyarrow_table)
|
|
140
|
+
|
|
141
|
+
# Build message with warnings if applicable
|
|
142
|
+
message = f'Successfully imported {pyarrow_table.num_rows} rows{" and created new table" if table_created else ""}'
|
|
143
|
+
if columns_converted:
|
|
144
|
+
message += '. WARNING: Column names were converted to snake_case format. To preserve the original case, set preserve_case to True.'
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
'status': 'success',
|
|
148
|
+
'message': message,
|
|
149
|
+
'rows_processed': pyarrow_table.num_rows,
|
|
150
|
+
'file_processed': os.path.basename(key),
|
|
151
|
+
'table_created': table_created,
|
|
152
|
+
'table_uuid': table.metadata.table_uuid,
|
|
153
|
+
'columns': pyarrow_schema.names,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
return {'status': 'error', 'error': str(e)}
|
|
@@ -589,6 +589,9 @@ async def import_csv_to_table(
|
|
|
589
589
|
str, Field('s3tables', description='REST signing name')
|
|
590
590
|
] = 's3tables',
|
|
591
591
|
rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
|
|
592
|
+
preserve_case: Annotated[
|
|
593
|
+
bool, Field(..., description='Preserve case of column names')
|
|
594
|
+
] = False,
|
|
592
595
|
) -> dict:
|
|
593
596
|
"""Import data from a CSV file into an S3 table.
|
|
594
597
|
|
|
@@ -596,6 +599,7 @@ async def import_csv_to_table(
|
|
|
596
599
|
If the table doesn't exist, it will be created with a schema inferred from the CSV file.
|
|
597
600
|
If the table exists, the CSV file schema must be compatible with the table's schema.
|
|
598
601
|
The tool will validate the schema before attempting to import the data.
|
|
602
|
+
If preserve_case is True, the column names will not be converted to snake_case. Otherwise, the column names will be converted to snake_case.
|
|
599
603
|
|
|
600
604
|
Returns error dictionary with status and error message if:
|
|
601
605
|
- URL is not a valid S3 URL
|
|
@@ -615,6 +619,7 @@ async def import_csv_to_table(
|
|
|
615
619
|
catalog_name: 's3tablescatalog'
|
|
616
620
|
rest_signing_name: 's3tables'
|
|
617
621
|
rest_sigv4_enabled: 'true'
|
|
622
|
+
preserve_case: False
|
|
618
623
|
|
|
619
624
|
Permissions:
|
|
620
625
|
You must have:
|
|
@@ -634,6 +639,7 @@ async def import_csv_to_table(
|
|
|
634
639
|
catalog_name=catalog_name,
|
|
635
640
|
rest_signing_name=rest_signing_name,
|
|
636
641
|
rest_sigv4_enabled=rest_sigv4_enabled,
|
|
642
|
+
preserve_case=preserve_case,
|
|
637
643
|
)
|
|
638
644
|
|
|
639
645
|
|
|
@@ -656,6 +662,9 @@ async def import_parquet_to_table(
|
|
|
656
662
|
str, Field('s3tables', description='REST signing name')
|
|
657
663
|
] = 's3tables',
|
|
658
664
|
rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
|
|
665
|
+
preserve_case: Annotated[
|
|
666
|
+
bool, Field(..., description='Preserve case of column names')
|
|
667
|
+
] = False,
|
|
659
668
|
) -> dict:
|
|
660
669
|
"""Import data from a Parquet file into an S3 table.
|
|
661
670
|
|
|
@@ -663,6 +672,7 @@ async def import_parquet_to_table(
|
|
|
663
672
|
If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
|
|
664
673
|
If the table exists, the Parquet file schema must be compatible with the table's schema.
|
|
665
674
|
The tool will validate the schema before attempting to import the data.
|
|
675
|
+
If preserve_case is True, the column names will not be converted to snake_case. Otherwise, the column names will be converted to snake_case.
|
|
666
676
|
|
|
667
677
|
Returns error dictionary with status and error message if:
|
|
668
678
|
- URL is not a valid S3 URL
|
|
@@ -688,6 +698,7 @@ async def import_parquet_to_table(
|
|
|
688
698
|
catalog_name: 's3tablescatalog'
|
|
689
699
|
rest_signing_name: 's3tables'
|
|
690
700
|
rest_sigv4_enabled: 'true'
|
|
701
|
+
preserve_case: False
|
|
691
702
|
|
|
692
703
|
Permissions:
|
|
693
704
|
You must have:
|
|
@@ -708,6 +719,7 @@ async def import_parquet_to_table(
|
|
|
708
719
|
catalog_name=catalog_name,
|
|
709
720
|
rest_signing_name=rest_signing_name,
|
|
710
721
|
rest_sigv4_enabled=rest_sigv4_enabled,
|
|
722
|
+
preserve_case=preserve_case,
|
|
711
723
|
)
|
|
712
724
|
|
|
713
725
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name = "awslabs.s3-tables-mcp-server"
|
|
3
3
|
|
|
4
4
|
# NOTE: "Patch"=9223372036854775807 bumps next release to zero.
|
|
5
|
-
version = "0.0.
|
|
5
|
+
version = "0.0.5"
|
|
6
6
|
|
|
7
7
|
description = "An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server"
|
|
8
8
|
readme = "README.md"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Tests for CSV file processor (import_csv_to_table)."""
|
|
16
|
+
|
|
17
|
+
import pytest
|
|
18
|
+
from awslabs.s3_tables_mcp_server.file_processor import csv
|
|
19
|
+
from unittest.mock import AsyncMock, patch
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
async def test_import_csv_to_table_success():
|
|
24
|
+
"""Test successful import_csv_to_table."""
|
|
25
|
+
# Arrange
|
|
26
|
+
warehouse = 'test-warehouse'
|
|
27
|
+
region = 'us-west-2'
|
|
28
|
+
namespace = 'testns'
|
|
29
|
+
table_name = 'testtable'
|
|
30
|
+
s3_url = 's3://bucket/test.csv'
|
|
31
|
+
uri = 'http://localhost:8181'
|
|
32
|
+
catalog_name = 's3tablescatalog'
|
|
33
|
+
rest_signing_name = 's3tables'
|
|
34
|
+
rest_sigv4_enabled = 'true'
|
|
35
|
+
preserve_case = False
|
|
36
|
+
|
|
37
|
+
# Patch import_file_to_table to simulate a successful import
|
|
38
|
+
success_result = {
|
|
39
|
+
'status': 'success',
|
|
40
|
+
'message': 'Successfully imported 2 rows',
|
|
41
|
+
'rows_processed': 2,
|
|
42
|
+
'file_processed': 'test.csv',
|
|
43
|
+
'table_created': True,
|
|
44
|
+
'table_uuid': 'fake-uuid',
|
|
45
|
+
'columns': ['col1', 'col2'],
|
|
46
|
+
}
|
|
47
|
+
with patch(
|
|
48
|
+
'awslabs.s3_tables_mcp_server.file_processor.csv.import_file_to_table',
|
|
49
|
+
new=AsyncMock(return_value=success_result),
|
|
50
|
+
):
|
|
51
|
+
# Act
|
|
52
|
+
result = await csv.import_csv_to_table(
|
|
53
|
+
warehouse=warehouse,
|
|
54
|
+
region=region,
|
|
55
|
+
namespace=namespace,
|
|
56
|
+
table_name=table_name,
|
|
57
|
+
s3_url=s3_url,
|
|
58
|
+
uri=uri,
|
|
59
|
+
catalog_name=catalog_name,
|
|
60
|
+
rest_signing_name=rest_signing_name,
|
|
61
|
+
rest_sigv4_enabled=rest_sigv4_enabled,
|
|
62
|
+
preserve_case=preserve_case,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Assert
|
|
66
|
+
assert result['status'] == 'success'
|
|
67
|
+
assert result['rows_processed'] == 2
|
|
68
|
+
assert result['file_processed'] == 'test.csv'
|
|
69
|
+
assert result['table_created'] is True
|
|
70
|
+
assert result['columns'] == ['col1', 'col2']
|