awslabs.s3-tables-mcp-server 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
awslabs/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS Labs MCP package."""
@@ -0,0 +1,18 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # This file is part of the awslabs namespace.
16
+ # It is intentionally minimal to support PEP 420 namespace packages.
17
+
18
+ __version__ = '0.0.0'
@@ -0,0 +1,167 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Constants used throughout the S3 Tables MCP Server.
16
+
17
+ This module contains all the constant values used across the S3 Tables MCP Server,
18
+ including version information, regex patterns for validation, and field definitions
19
+ for Pydantic models.
20
+ """
21
+
22
+ from pydantic import Field
23
+
24
+
25
+ # Patterns
26
+ TABLE_BUCKET_NAME_PATTERN = r'[a-z0-9][a-z0-9-]{1,61}[a-z0-9]'
27
+ """
28
+ Regex pattern for validating S3 bucket names.
29
+ Valid bucket names must:
30
+ - Be between 3 and 63 characters long
31
+ - Start and end with a letter or number
32
+ - Contain only lowercase letters, numbers, and hyphens
33
+ - Not contain consecutive hyphens
34
+ """
35
+
36
+ TABLE_BUCKET_ARN_PATTERN = (
37
+ r'arn:aws[-a-z0-9]*:[a-z0-9]+:[-a-z0-9]*:[0-9]{12}:bucket/[a-z0-9_-]{3,63}'
38
+ )
39
+ """
40
+ Regex pattern for validating S3 bucket ARNs.
41
+ Format: arn:aws[-a-z0-9]*:[a-z0-9]+:[-a-z0-9]*:[0-9]{12}:bucket/[bucket-name]
42
+ Example: arn:aws:s3:::my-bucket
43
+ """
44
+
45
+ TABLE_NAME_PATTERN = r'[0-9a-z_]*'
46
+ """
47
+ Regex pattern for validating table names.
48
+ Valid table names must:
49
+ - Contain only lowercase letters, numbers, and underscores
50
+ - Have a maximum length of 255 characters
51
+ """
52
+
53
+ TABLE_ARN_PATTERN = (
54
+ r'arn:aws[-a-z0-9]*:[a-z0-9]+:[-a-z0-9]*:[0-9]{12}:bucket/[a-z0-9_-]{3,63}/table/[0-9a-f-]{36}'
55
+ )
56
+ """
57
+ Regex pattern for validating table ARNs.
58
+ Format: arn:aws[-a-z0-9]*:[a-z0-9]+:[-a-z0-9]*:[0-9]{12}:bucket/[bucket-name]/table/[uuid]
59
+ Example: arn:aws:s3:::my-bucket/table/123e4567-e89b-12d3-a456-426614174000
60
+ """
61
+
62
+ # Field Definitions
63
+ TABLE_BUCKET_ARN_FIELD = Field(
64
+ ...,
65
+ description='Table bucket ARN',
66
+ pattern=TABLE_BUCKET_ARN_PATTERN,
67
+ min_length=1,
68
+ max_length=2048,
69
+ )
70
+ """
71
+ Pydantic field for table bucket ARN validation.
72
+ Required field that must match the TABLE_BUCKET_ARN_PATTERN.
73
+ """
74
+
75
+ TABLE_ARN_FIELD = Field(..., description='Table ARN', pattern=TABLE_ARN_PATTERN)
76
+ """
77
+ Pydantic field for table ARN validation.
78
+ Required field that must match the TABLE_ARN_PATTERN.
79
+ """
80
+
81
+ NAMESPACE_NAME_FIELD = Field(
82
+ ...,
83
+ description='The name of the namespace. Must be 1-255 characters long and contain only alphanumeric characters, underscores, and hyphens.',
84
+ min_length=1,
85
+ max_length=255,
86
+ pattern=r'^[a-zA-Z0-9_-]+$',
87
+ )
88
+ """
89
+ Pydantic field for namespace name validation.
90
+ Required field that must:
91
+ - Be 1-255 characters long
92
+ - Contain only alphanumeric characters, underscores, and hyphens
93
+ """
94
+
95
+ TABLE_NAME_FIELD = Field(
96
+ ...,
97
+ description='The name of the table. Must be 1-255 characters long and contain only alphanumeric characters, underscores, and hyphens.',
98
+ min_length=1,
99
+ max_length=255,
100
+ pattern=TABLE_NAME_PATTERN,
101
+ )
102
+ """
103
+ Pydantic field for table name validation.
104
+ Required field that must:
105
+ - Be 1-255 characters long
106
+ - Contain only alphanumeric characters, underscores, and hyphens
107
+ - Match the TABLE_NAME_PATTERN
108
+ """
109
+
110
+ REGION_NAME_FIELD = Field(
111
+ None,
112
+ description='The AWS region name where the operation should be performed.',
113
+ min_length=1,
114
+ max_length=64,
115
+ )
116
+ """
117
+ Pydantic field for AWS region name.
118
+ Optional field that can be used to specify the AWS region for operations.
119
+ Example values: 'us-east-1', 'eu-west-1', 'ap-southeast-2'
120
+ """
121
+
122
+ # Query-specific fields
123
+ QUERY_FIELD = Field(
124
+ default=None,
125
+ description='Optional SQL query. If not provided, will execute SELECT * FROM table. Must be a read operation.',
126
+ min_length=1,
127
+ max_length=10000,
128
+ )
129
+ """
130
+ Pydantic field for SQL query validation.
131
+ Optional field that must be a valid read operation.
132
+ """
133
+
134
+ OUTPUT_LOCATION_FIELD = Field(
135
+ default=None,
136
+ description='Optional S3 location for query results. If not provided, will use default Athena results bucket.',
137
+ pattern=r'^s3://[a-z0-9-]+/[a-z0-9-./]*$',
138
+ min_length=1,
139
+ max_length=2048,
140
+ )
141
+ """
142
+ Pydantic field for output location validation.
143
+ Optional field that must be a valid S3 URI.
144
+ """
145
+
146
+ WORKGROUP_FIELD = Field(
147
+ default='primary',
148
+ description='Athena workgroup to use for query execution.',
149
+ pattern=r'^[a-zA-Z0-9_-]+$',
150
+ min_length=1,
151
+ max_length=128,
152
+ )
153
+ """
154
+ Pydantic field for workgroup validation.
155
+ Optional field that must contain only letters, numbers, hyphens, and underscores.
156
+ Defaults to 'primary'.
157
+ """
158
+
159
+ S3_URL_FIELD = Field(
160
+ ...,
161
+ description='The S3 URL of the file to preview (format: s3://bucket-name/key)',
162
+ min_length=1,
163
+ )
164
+ """
165
+ Pydantic field for S3 URL validation.
166
+ Required field that must be a valid S3 URI.
167
+ """
@@ -0,0 +1,140 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Database query operations for S3 Tables MCP Server.
16
+
17
+ This module provides functions for executing queries against S3 Tables using Athena.
18
+ It handles query execution, result retrieval, and proper formatting of responses.
19
+ """
20
+
21
+ import sqlparse
22
+ from .engines.pyiceberg import PyIcebergConfig, PyIcebergEngine
23
+ from typing import Any, Dict
24
+
25
+
26
+ WRITE_OPERATIONS = {
27
+ 'ADD',
28
+ 'ALTER',
29
+ 'ANALYZE',
30
+ 'BEGIN',
31
+ 'COMMIT',
32
+ 'COPY',
33
+ 'CREATE',
34
+ 'DELETE',
35
+ 'DROP',
36
+ 'EXPORT',
37
+ 'GRANT',
38
+ 'IMPORT',
39
+ 'INSERT',
40
+ 'LOAD',
41
+ 'LOCK',
42
+ 'MERGE',
43
+ 'MSCK',
44
+ 'REDUCE',
45
+ 'REFRESH',
46
+ 'REPLACE',
47
+ 'RESET',
48
+ 'REVOKE',
49
+ 'ROLLBACK',
50
+ 'SET',
51
+ 'START',
52
+ 'TRUNCATE',
53
+ 'UNCACHE',
54
+ 'UNLOCK',
55
+ 'UPDATE',
56
+ 'UPSERT',
57
+ 'VACUUM',
58
+ 'VALUES',
59
+ 'WRITE',
60
+ }
61
+
62
+ READ_OPERATIONS = {
63
+ 'DESC',
64
+ 'DESCRIBE',
65
+ 'EXPLAIN',
66
+ 'LIST',
67
+ 'SELECT',
68
+ 'SHOW',
69
+ 'USE',
70
+ }
71
+
72
+ # Disallowed destructive operations for write
73
+ DESTRUCTIVE_OPERATIONS = {'DELETE', 'DROP', 'MERGE', 'REPLACE', 'TRUNCATE', 'VACUUM'}
74
+
75
+
76
+ def _get_query_operations(query: str) -> set:
77
+ """Extract all top-level SQL operations from the query as a set."""
78
+ parsed = sqlparse.parse(query)
79
+ operations = set()
80
+ for stmt in parsed:
81
+ tokens = [token.value.upper() for token in stmt.tokens if not token.is_whitespace]
82
+ for token in tokens:
83
+ if token.isalpha():
84
+ operations.add(token)
85
+ return operations
86
+
87
+
88
+ async def query_database_resource(
89
+ warehouse: str,
90
+ region: str,
91
+ namespace: str,
92
+ query: str,
93
+ uri: str = 'https://s3tables.us-west-2.amazonaws.com/iceberg',
94
+ catalog_name: str = 's3tablescatalog',
95
+ rest_signing_name: str = 's3tables',
96
+ rest_sigv4_enabled: str = 'true',
97
+ ) -> Dict[str, Any]:
98
+ """Execute a read-only query against a database using PyIceberg."""
99
+ operations = _get_query_operations(query)
100
+ disallowed = operations & WRITE_OPERATIONS
101
+ if disallowed:
102
+ raise ValueError(f'Write operations are not allowed in read-only queries: {disallowed}')
103
+ config = PyIcebergConfig(
104
+ warehouse=warehouse,
105
+ uri=uri,
106
+ region=region,
107
+ namespace=namespace,
108
+ catalog_name=catalog_name,
109
+ rest_signing_name=rest_signing_name,
110
+ rest_sigv4_enabled=rest_sigv4_enabled,
111
+ )
112
+ engine = PyIcebergEngine(config)
113
+ result = engine.execute_query(query)
114
+ return result
115
+
116
+
117
+ async def append_rows_to_table_resource(
118
+ warehouse: str,
119
+ region: str,
120
+ namespace: str,
121
+ table_name: str,
122
+ rows: list,
123
+ uri: str = 'https://s3tables.us-west-2.amazonaws.com/iceberg',
124
+ catalog_name: str = 's3tablescatalog',
125
+ rest_signing_name: str = 's3tables',
126
+ rest_sigv4_enabled: str = 'true',
127
+ ) -> Dict[str, Any]:
128
+ """Append rows to an Iceberg table using PyIceberg."""
129
+ config = PyIcebergConfig(
130
+ warehouse=warehouse,
131
+ uri=uri,
132
+ region=region,
133
+ namespace=namespace,
134
+ catalog_name=catalog_name,
135
+ rest_signing_name=rest_signing_name,
136
+ rest_sigv4_enabled=rest_sigv4_enabled,
137
+ )
138
+ engine = PyIcebergEngine(config)
139
+ engine.append_rows(table_name, rows)
140
+ return {'status': 'success', 'rows_appended': len(rows)}
@@ -0,0 +1,13 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,239 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
16
+
17
+ import pyarrow as pa
18
+ from ..utils import pyiceberg_load_catalog
19
+ from daft import Catalog as DaftCatalog
20
+ from daft.session import Session
21
+ from datetime import date, datetime, time
22
+ from decimal import Decimal
23
+ from pydantic import BaseModel
24
+ from pyiceberg.types import (
25
+ BinaryType,
26
+ BooleanType,
27
+ DateType,
28
+ DecimalType,
29
+ DoubleType,
30
+ FixedType,
31
+ FloatType,
32
+ IntegerType,
33
+ ListType,
34
+ LongType,
35
+ MapType,
36
+ StringType,
37
+ StructType,
38
+ TimestampType,
39
+ TimestamptzType,
40
+ TimeType,
41
+ UUIDType,
42
+ )
43
+
44
+ # pyiceberg and daft imports
45
+ from typing import Any, Dict, Optional
46
+
47
+
48
+ class PyIcebergConfig(BaseModel):
49
+ """Configuration for PyIceberg/Daft connection."""
50
+
51
+ warehouse: str # e.g. 'arn:aws:s3tables:us-west-2:484907528679:bucket/customer-data-bucket'
52
+ uri: str # e.g. 'https://s3tables.us-west-2.amazonaws.com/iceberg'
53
+ region: str # e.g. 'us-west-2'
54
+ namespace: str # e.g. 'retail_data'
55
+ catalog_name: str = 's3tablescatalog' # default
56
+ rest_signing_name: str = 's3tables'
57
+ rest_sigv4_enabled: str = 'true'
58
+
59
+
60
+ def convert_value_for_append(value, iceberg_type):
61
+ """Convert a value to the appropriate type for appending to an Iceberg table column.
62
+
63
+ Args:
64
+ value: The value to convert. Can be of various types (str, int, float, etc.).
65
+ iceberg_type: The Iceberg type to convert the value to.
66
+
67
+ Returns:
68
+ The value converted to the appropriate type for the Iceberg column, or None if value is None.
69
+
70
+ Raises:
71
+ NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
72
+ ValueError: If the conversion is unsupported or fails.
73
+ """
74
+ if value is None:
75
+ return None
76
+ # Already correct type
77
+ if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
78
+ return value
79
+ if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
80
+ return value
81
+ if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
82
+ return value
83
+ if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
84
+ return value
85
+ if isinstance(iceberg_type, DateType) and isinstance(value, date):
86
+ return value
87
+ if isinstance(iceberg_type, TimeType) and isinstance(value, time):
88
+ return value
89
+ if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
90
+ return value
91
+ if isinstance(iceberg_type, StringType) and isinstance(value, str):
92
+ return value
93
+ # Convert from string
94
+ if isinstance(value, str):
95
+ if isinstance(iceberg_type, BooleanType):
96
+ return value.lower() in ('true', '1', 'yes')
97
+ if isinstance(iceberg_type, (IntegerType, LongType)):
98
+ return int(value)
99
+ if isinstance(iceberg_type, (FloatType, DoubleType)):
100
+ return float(value)
101
+ if isinstance(iceberg_type, DecimalType):
102
+ return Decimal(value)
103
+ if isinstance(iceberg_type, DateType):
104
+ return date.fromisoformat(value)
105
+ if isinstance(iceberg_type, TimeType):
106
+ return time.fromisoformat(value)
107
+ if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
108
+ return datetime.fromisoformat(value)
109
+ if isinstance(iceberg_type, StringType):
110
+ return value
111
+ if isinstance(iceberg_type, UUIDType):
112
+ import uuid
113
+
114
+ return uuid.UUID(value)
115
+ if isinstance(iceberg_type, (BinaryType, FixedType)):
116
+ return bytes.fromhex(value)
117
+ # Convert from number
118
+ if isinstance(value, (int, float)):
119
+ if isinstance(iceberg_type, (IntegerType, LongType)):
120
+ return int(value)
121
+ if isinstance(iceberg_type, (FloatType, DoubleType)):
122
+ return float(value)
123
+ if isinstance(iceberg_type, DecimalType):
124
+ return Decimal(str(value))
125
+ if isinstance(iceberg_type, StringType):
126
+ return str(value)
127
+ if isinstance(iceberg_type, (ListType, MapType, StructType)):
128
+ raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
129
+ raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
130
+
131
+
132
+ class PyIcebergEngine:
133
+ """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
134
+
135
+ def __init__(self, config: PyIcebergConfig):
136
+ """Initialize the PyIcebergEngine with the given configuration.
137
+
138
+ Args:
139
+ config: PyIcebergConfig object containing connection parameters.
140
+ """
141
+ self.config = config
142
+ self._catalog: Optional[Any] = None
143
+ self._session: Optional[Session] = None
144
+ self._initialize_connection()
145
+
146
+ def _initialize_connection(self):
147
+ try:
148
+ self._catalog = pyiceberg_load_catalog(
149
+ self.config.catalog_name,
150
+ self.config.warehouse,
151
+ self.config.uri,
152
+ self.config.region,
153
+ self.config.rest_signing_name,
154
+ self.config.rest_sigv4_enabled,
155
+ )
156
+ self._session = Session()
157
+ self._session.attach(DaftCatalog.from_iceberg(self._catalog))
158
+ self._session.set_namespace(self.config.namespace)
159
+ except Exception as e:
160
+ raise ConnectionError(f'Failed to initialize PyIceberg connection: {str(e)}')
161
+
162
+ def execute_query(self, query: str) -> Dict[str, Any]:
163
+ """Execute a SQL query against the Iceberg catalog using Daft.
164
+
165
+ Args:
166
+ query: SQL query to execute
167
+
168
+ Returns:
169
+ Dict containing:
170
+ - columns: List of column names
171
+ - rows: List of rows, where each row is a list of values
172
+ """
173
+ if not self._session:
174
+ raise ConnectionError('No active session for PyIceberg/Daft')
175
+ try:
176
+ result = self._session.sql(query)
177
+ if result is None:
178
+ raise Exception('Query execution returned None result')
179
+ df = result.collect()
180
+ columns = df.column_names
181
+ rows = df.to_pylist()
182
+ return {
183
+ 'columns': columns,
184
+ 'rows': [list(row.values()) for row in rows],
185
+ }
186
+ except Exception as e:
187
+ raise Exception(f'Error executing query: {str(e)}')
188
+
189
+ def test_connection(self) -> bool:
190
+ """Test the connection by listing namespaces."""
191
+ if not self._session:
192
+ return False
193
+ try:
194
+ _ = self._session.list_namespaces()
195
+ return True
196
+ except Exception:
197
+ return False
198
+
199
+ def append_rows(self, table_name: str, rows: list[dict]) -> None:
200
+ """Append rows to an Iceberg table using pyiceberg.
201
+
202
+ Args:
203
+ table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
204
+ rows: List of dictionaries, each representing a row to append
205
+
206
+ Raises:
207
+ Exception: If appending fails
208
+ """
209
+ if not self._catalog:
210
+ raise ConnectionError('No active catalog for PyIceberg')
211
+ try:
212
+ # If table_name does not contain a dot, prepend the namespace
213
+ if '.' not in table_name:
214
+ full_table_name = f'{self.config.namespace}.{table_name}'
215
+ else:
216
+ full_table_name = table_name
217
+ table = self._catalog.load_table(full_table_name)
218
+ iceberg_schema = table.schema()
219
+ converted_rows = []
220
+ for row in rows:
221
+ converted_row = {}
222
+ for field in iceberg_schema.fields:
223
+ field_name = field.name
224
+ field_type = field.field_type
225
+ value = row.get(field_name)
226
+ if field.required and value is None:
227
+ raise ValueError(f'Required field {field_name} is missing or None')
228
+ try:
229
+ converted_row[field_name] = convert_value_for_append(value, field_type)
230
+ except (ValueError, TypeError) as e:
231
+ raise ValueError(
232
+ f'Error converting value for field {field_name}: {str(e)}'
233
+ )
234
+ converted_rows.append(converted_row)
235
+ schema = iceberg_schema.as_arrow()
236
+ pa_table = pa.Table.from_pylist(converted_rows, schema=schema)
237
+ table.append(pa_table)
238
+ except Exception as e:
239
+ raise Exception(f'Error appending rows: {str(e)}')