awslabs.s3-tables-mcp-server 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs_s3_tables_mcp_server-0.0.3/CONTEXT.md +110 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/Dockerfile +18 -19
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/PKG-INFO +30 -2
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/README.md +28 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +28 -113
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/__init__.py +24 -0
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/csv.py +123 -0
- awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/parquet.py +116 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/server.py +76 -24
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/docker-healthcheck.sh +7 -8
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/pyproject.toml +2 -2
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_csv.py +235 -0
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_parquet.py +241 -0
- awslabs_s3_tables_mcp_server-0.0.3/tests/test_pyiceberg.py +579 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_server.py +110 -25
- awslabs_s3_tables_mcp_server-0.0.3/uv-requirements.txt +26 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/uv.lock +205 -5
- awslabs_s3_tables_mcp_server-0.0.1/awslabs/s3_tables_mcp_server/file_processor.py +0 -485
- awslabs_s3_tables_mcp_server-0.0.1/tests/test_file_processor.py +0 -607
- awslabs_s3_tables_mcp_server-0.0.1/tests/test_pyiceberg.py +0 -437
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/.gitignore +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/.python-version +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/CHANGELOG.md +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/LICENSE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/NOTICE +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/constants.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/models.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/utils.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_database.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_init.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_main.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_namespaces.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_resources.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_s3_operations.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_table_buckets.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_tables.py +0 -0
- {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_utils.py +0 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
# AWS S3 Tables MCP (Model Context Protocol) Server
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
The AWS S3 Tables MCP Server enables AI assistants and users to manage, query, and interact with S3-based tabular data at scale. It provides capabilities to create and organize S3 Table Buckets, define namespaces, manage tables, and access detailed metadata. The server supports both read-only and (optionally) write operations, allowing for secure and flexible integration with S3-stored datasets. Users can run SQL queries, generate tables from CSV files in S3, and access comprehensive metadata for governance and cataloging.
|
6
|
+
|
7
|
+
**Important:** When a user prompt relates to **S3 Tables**, you must always prefer using the **S3 Tables MCP Server tools** over the AWS CLI. Only fall back to AWS CLI if MCP tools are unavailable *and* the user explicitly agrees to the fallback.
|
8
|
+
|
9
|
+
---
|
10
|
+
|
11
|
+
## Instructions
|
12
|
+
|
13
|
+
You are assisting with management and queries related to Amazon AWS S3 Tables. The S3 Tables system is composed of three hierarchical resource types:
|
14
|
+
- **S3 Table Buckets**: Containers that hold namespaces.
|
15
|
+
- **Namespaces**: Logical groupings inside table buckets that contain tables.
|
16
|
+
- **Tables**: Structured datasets stored within namespaces, following the Apache Iceberg table format.
|
17
|
+
|
18
|
+
Resource Details:
|
19
|
+
- Each S3 table consists of underlying data and metadata stored as subresources within a table bucket.
|
20
|
+
- Tables have unique identifiers, including an Amazon Resource Name (ARN) and table ID, plus associated resource policies for access control.
|
21
|
+
- When a table is created, a unique warehouse location (an S3 path) is generated automatically to store objects related to the table.
|
22
|
+
Example warehouse location format:
|
23
|
+
`s3://63a8e430-6e0b-46f5-k833abtwr6s8tmtsycedn8s4yc3xhuse1b--table-s3`
|
24
|
+
|
25
|
+
Table Types:
|
26
|
+
- **Customer Tables**: Read-write tables that users can modify using S3 API operations or integrated query engines.
|
27
|
+
- **AWS Tables**: Read-only tables managed by AWS services (e.g., S3 Metadata tables). These cannot be modified by users outside AWS S3.
|
28
|
+
|
29
|
+
Integration:
|
30
|
+
Amazon S3 Table Buckets can be integrated with Amazon SageMaker Lakehouse, allowing AWS analytics services like Athena and Redshift to discover and query table data automatically.
|
31
|
+
|
32
|
+
---
|
33
|
+
|
34
|
+
## Maintenance
|
35
|
+
|
36
|
+
Amazon S3 performs automatic maintenance at two levels:
|
37
|
+
|
38
|
+
1. **Table Bucket-Level Maintenance**
|
39
|
+
- *Unreferenced File Removal*: Deletes orphaned files to optimize storage usage and reduce costs.
|
40
|
+
|
41
|
+
2. **Table-Level Maintenance**
|
42
|
+
- *File Compaction*: Combines small files into larger ones to improve query performance and reduce storage overhead.
|
43
|
+
- *Snapshot Management*: Maintains table version histories and controls metadata growth.
|
44
|
+
|
45
|
+
These maintenance features are enabled by default but can be customized or disabled via maintenance configuration files.
|
46
|
+
|
47
|
+
---
|
48
|
+
|
49
|
+
## Quota
|
50
|
+
|
51
|
+
- Each table bucket can hold up to **10,000 tables** by default.
|
52
|
+
- To increase the quota, users must contact **AWS Support**.
|
53
|
+
|
54
|
+
---
|
55
|
+
|
56
|
+
## Operational Guidelines for LLM
|
57
|
+
|
58
|
+
### 1. Tool Verification
|
59
|
+
- Always verify the availability of the `awslabss_3_tables_mcp_server` and its associated tools before performing any operation.
|
60
|
+
- If unavailable, ask the user if they prefer to proceed using AWS CLI commands as a fallback.
|
61
|
+
- **Do not use AWS CLI by default for S3 Tables. Always prefer MCP tools when the prompt is about S3 Tables.**
|
62
|
+
|
63
|
+
### 2. Request Clarification
|
64
|
+
- If critical context (e.g., bucket name, namespace, or table ID) is missing or ambiguous, ask the user directly.
|
65
|
+
- Do not make assumptions about default values or context.
|
66
|
+
|
67
|
+
### 3. Handling Destructive Operations
|
68
|
+
Before performing any destructive operation, the system must:
|
69
|
+
- Clearly describe the consequences of the action.
|
70
|
+
- Request explicit confirmation.
|
71
|
+
- Destructive actions include:
|
72
|
+
- Deleting S3 Table Buckets
|
73
|
+
- Deleting Namespaces
|
74
|
+
- Deleting Tables
|
75
|
+
- Dropping Tables via SQL
|
76
|
+
- Disabling encryption
|
77
|
+
|
78
|
+
### 4. Default Tool Usage
|
79
|
+
- Always use **MCP tools first** for all S3 Tables operations.
|
80
|
+
- Use AWS CLI **only when MCP tools are unavailable** *and* with **explicit user approval**.
|
81
|
+
|
82
|
+
### 5. Communication and Safety
|
83
|
+
- Explain any risks or irreversible effects before performing changes.
|
84
|
+
- Respect the user's decision to abort or proceed.
|
85
|
+
- Present instructions and confirmations clearly and concisely.
|
86
|
+
|
87
|
+
### 6. Additional Considerations
|
88
|
+
- Use full ARNs when referencing tables to avoid ambiguity.
|
89
|
+
- Distinguish between **AWS-managed** (read-only) and **customer-managed** (read-write) tables.
|
90
|
+
- If needed, guide users in adjusting maintenance configurations.
|
91
|
+
|
92
|
+
---
|
93
|
+
|
94
|
+
## Troubleshooting
|
95
|
+
|
96
|
+
### Unknown Information
|
97
|
+
- If a user requests information that is unavailable, unclear, or unsupported by the MCP Server, do not attempt to infer or fabricate a response.
|
98
|
+
- Refer them to the official Amazon S3 Tables documentation for further details and the most up-to-date guidance:
|
99
|
+
https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables.html
|
100
|
+
|
101
|
+
### Insufficient Permissions
|
102
|
+
- Never attempt to auto-modify IAM policies or permissions.
|
103
|
+
- If the user asks for permission changes, explicitly confirm their intent before taking any action.
|
104
|
+
|
105
|
+
### Operation Unavailable (Read-Only Mode)
|
106
|
+
- Never attempt write operations or file changes in read-only mode.
|
107
|
+
- If users want write mode enabled, direct them to the setup documentation:
|
108
|
+
https://github.com/awslabs/mcp/blob/main/src/s3-tables-mcp-server/README.md
|
109
|
+
|
110
|
+
---
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
# dependabot should continue to update this to the latest hash.
|
16
|
-
FROM public.ecr.aws/
|
16
|
+
FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283 AS uv
|
17
17
|
|
18
18
|
# Install the project into `/app`
|
19
19
|
WORKDIR /app
|
@@ -31,40 +31,39 @@ ENV UV_PYTHON_PREFERENCE=only-system
|
|
31
31
|
ENV UV_FROZEN=true
|
32
32
|
|
33
33
|
# Copy the required files first
|
34
|
-
COPY pyproject.toml uv.lock ./
|
34
|
+
COPY pyproject.toml uv.lock uv-requirements.txt ./
|
35
|
+
|
36
|
+
# Python optimization and uv configuration
|
37
|
+
ENV PIP_NO_CACHE_DIR=1 \
|
38
|
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
35
39
|
|
36
40
|
# Install the project's dependencies using the lockfile and settings
|
37
41
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
38
|
-
pip install uv && \
|
39
|
-
uv sync --frozen --no-install-project --no-dev --no-editable
|
42
|
+
pip install --require-hashes --requirement uv-requirements.txt --no-cache-dir && \
|
43
|
+
uv sync --python 3.13 --frozen --no-install-project --no-dev --no-editable
|
40
44
|
|
41
45
|
# Then, add the rest of the project source code and install it
|
42
46
|
# Installing separately from its dependencies allows optimal layer caching
|
43
47
|
COPY . /app
|
44
48
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
45
|
-
uv sync --frozen --no-dev --no-editable
|
49
|
+
uv sync --python 3.13 --frozen --no-dev --no-editable
|
46
50
|
|
47
|
-
# Make the directory just in case it doesn't exist
|
48
|
-
RUN mkdir -p /root/.local
|
51
|
+
# # Make the directory just in case it doesn't exist
|
52
|
+
# RUN mkdir -p /root/.local
|
49
53
|
|
50
|
-
FROM public.ecr.aws/
|
54
|
+
FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283
|
51
55
|
|
52
56
|
# Place executables in the environment at the front of the path and include other binaries
|
53
|
-
ENV PATH="/app/.venv/bin:$PATH:/usr/sbin"
|
57
|
+
ENV PATH="/app/.venv/bin:$PATH:/usr/sbin" \
|
58
|
+
PYTHONUNBUFFERED=1
|
54
59
|
|
55
|
-
# Install lsof for the healthcheck
|
56
|
-
# Install other tools as needed for the MCP server
|
57
60
|
# Add non-root user and ability to change directory into /root
|
58
|
-
RUN
|
59
|
-
yum install -y lsof && \
|
60
|
-
yum clean all -y && \
|
61
|
-
rm -rf /var/cache/yum && \
|
62
|
-
groupadd --force --system app && \
|
61
|
+
RUN groupadd --force --system app && \
|
63
62
|
useradd app -g app -d /app && \
|
64
63
|
chmod o+x /root
|
65
64
|
|
66
|
-
#
|
67
|
-
COPY --from=uv --chown=app:app /root/.local /root/.local
|
65
|
+
# Copy application artifacts from build stage
|
66
|
+
# COPY --from=uv --chown=app:app /root/.local /root/.local
|
68
67
|
COPY --from=uv --chown=app:app /app/.venv /app/.venv
|
69
68
|
|
70
69
|
# Get healthcheck script
|
@@ -74,5 +73,5 @@ COPY ./docker-healthcheck.sh /usr/local/bin/docker-healthcheck.sh
|
|
74
73
|
USER app
|
75
74
|
|
76
75
|
# When running the container, add --db-path and a bind mount to the host's db file
|
77
|
-
HEALTHCHECK --interval=
|
76
|
+
HEALTHCHECK --interval=60s --timeout=10s --start-period=10s --retries=3 CMD ["docker-healthcheck.sh"]
|
78
77
|
ENTRYPOINT ["awslabs.s3-tables-mcp-server"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: awslabs.s3-tables-mcp-server
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
|
5
5
|
Project-URL: homepage, https://awslabs.github.io/mcp/
|
6
6
|
Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
|
@@ -24,7 +24,7 @@ Requires-Python: >=3.10
|
|
24
24
|
Requires-Dist: boto3>=1.34.0
|
25
25
|
Requires-Dist: daft>=0.5.8
|
26
26
|
Requires-Dist: loguru>=0.7.0
|
27
|
-
Requires-Dist: mcp[cli]>=1.
|
27
|
+
Requires-Dist: mcp[cli]>=1.11.0
|
28
28
|
Requires-Dist: pyarrow>=20.0.0
|
29
29
|
Requires-Dist: pydantic>=2.10.6
|
30
30
|
Requires-Dist: pyiceberg>=0.9.1
|
@@ -76,6 +76,10 @@ The S3 Tables MCP Server simplifies the management of S3-based tables by providi
|
|
76
76
|
|
77
77
|
### Installation
|
78
78
|
|
79
|
+
| Cursor | VS Code |
|
80
|
+
|:------:|:-------:|
|
81
|
+
| [](https://cursor.com/install-mcp?name=awslabs.s3-tables-mcp-server&config=eyJjb21tYW5kIjoidXZ4IGF3c2xhYnMuczMtdGFibGVzLW1jcC1zZXJ2ZXJAbGF0ZXN0IiwiZW52Ijp7IkFXU19QUk9GSUxFIjoieW91ci1hd3MtcHJvZmlsZSIsIkFXU19SRUdJT04iOiJ1cy1lYXN0LTEifX0%3D) | [](https://insiders.vscode.dev/redirect/mcp/install?name=S3%20Tables%20MCP%20Server&config=%7B%22command%22%3A%22uvx%22%2C%22args%22%3A%5B%22awslabs.s3-tables-mcp-server%40latest%22%5D%2C%22env%22%3A%7B%22AWS_PROFILE%22%3A%22your-aws-profile%22%2C%22AWS_REGION%22%3A%22us-east-1%22%7D%7D) |
|
82
|
+
|
79
83
|
Configure the MCP server in your MCP client configuration (e.g., for Amazon Q Developer CLI, edit `~/.aws/amazonq/mcp.json`):
|
80
84
|
|
81
85
|
```json
|
@@ -200,6 +204,30 @@ You can override the default by providing the `--log-dir` flag with a custom pat
|
|
200
204
|
| `Show the schema for customer_data table` | Retrieves the table structure and column definitions to understand the data format and types |
|
201
205
|
| `Run a query to find monthly revenue trends` | Performs data analysis using **read-only** SQL queries to extract business insights from stored table data. For write operations, only appending new data (inserts) is supported; updates and deletes are not available via SQL. |
|
202
206
|
|
207
|
+
## Using Amazon Q with S3 Tables MCP Server
|
208
|
+
|
209
|
+
Amazon Q can provide better answers and code suggestions when it has additional context. To enhance Amazon Q's understanding of S3 Tables, you can add the provided context file to your Q environment.
|
210
|
+
|
211
|
+
### How to Add Context to Amazon Q
|
212
|
+
|
213
|
+
1. **Download the CONTEXT.md file**
|
214
|
+
- Download the `CONTEXT.md` file from the GitHub repository for this project.
|
215
|
+
|
216
|
+
2. **Start Amazon Q Chat**
|
217
|
+
- Run the following command to start a chat session with Amazon Q:
|
218
|
+
```sh
|
219
|
+
q chat
|
220
|
+
```
|
221
|
+
|
222
|
+
3. **Add the Context File**
|
223
|
+
- In the Q chat, run:
|
224
|
+
```sh
|
225
|
+
/context add <path>/CONTEXT.md
|
226
|
+
```
|
227
|
+
- Replace `<path>` with the actual path to where you downloaded `CONTEXT.md`.
|
228
|
+
|
229
|
+
Now, Amazon Q will have improved context about S3 Tables and can provide more relevant answers.
|
230
|
+
|
203
231
|
## Security Considerations
|
204
232
|
|
205
233
|
When using this MCP server, consider:
|
@@ -43,6 +43,10 @@ The S3 Tables MCP Server simplifies the management of S3-based tables by providi
|
|
43
43
|
|
44
44
|
### Installation
|
45
45
|
|
46
|
+
| Cursor | VS Code |
|
47
|
+
|:------:|:-------:|
|
48
|
+
| [](https://cursor.com/install-mcp?name=awslabs.s3-tables-mcp-server&config=eyJjb21tYW5kIjoidXZ4IGF3c2xhYnMuczMtdGFibGVzLW1jcC1zZXJ2ZXJAbGF0ZXN0IiwiZW52Ijp7IkFXU19QUk9GSUxFIjoieW91ci1hd3MtcHJvZmlsZSIsIkFXU19SRUdJT04iOiJ1cy1lYXN0LTEifX0%3D) | [](https://insiders.vscode.dev/redirect/mcp/install?name=S3%20Tables%20MCP%20Server&config=%7B%22command%22%3A%22uvx%22%2C%22args%22%3A%5B%22awslabs.s3-tables-mcp-server%40latest%22%5D%2C%22env%22%3A%7B%22AWS_PROFILE%22%3A%22your-aws-profile%22%2C%22AWS_REGION%22%3A%22us-east-1%22%7D%7D) |
|
49
|
+
|
46
50
|
Configure the MCP server in your MCP client configuration (e.g., for Amazon Q Developer CLI, edit `~/.aws/amazonq/mcp.json`):
|
47
51
|
|
48
52
|
```json
|
@@ -167,6 +171,30 @@ You can override the default by providing the `--log-dir` flag with a custom pat
|
|
167
171
|
| `Show the schema for customer_data table` | Retrieves the table structure and column definitions to understand the data format and types |
|
168
172
|
| `Run a query to find monthly revenue trends` | Performs data analysis using **read-only** SQL queries to extract business insights from stored table data. For write operations, only appending new data (inserts) is supported; updates and deletes are not available via SQL. |
|
169
173
|
|
174
|
+
## Using Amazon Q with S3 Tables MCP Server
|
175
|
+
|
176
|
+
Amazon Q can provide better answers and code suggestions when it has additional context. To enhance Amazon Q's understanding of S3 Tables, you can add the provided context file to your Q environment.
|
177
|
+
|
178
|
+
### How to Add Context to Amazon Q
|
179
|
+
|
180
|
+
1. **Download the CONTEXT.md file**
|
181
|
+
- Download the `CONTEXT.md` file from the GitHub repository for this project.
|
182
|
+
|
183
|
+
2. **Start Amazon Q Chat**
|
184
|
+
- Run the following command to start a chat session with Amazon Q:
|
185
|
+
```sh
|
186
|
+
q chat
|
187
|
+
```
|
188
|
+
|
189
|
+
3. **Add the Context File**
|
190
|
+
- In the Q chat, run:
|
191
|
+
```sh
|
192
|
+
/context add <path>/CONTEXT.md
|
193
|
+
```
|
194
|
+
- Replace `<path>` with the actual path to where you downloaded `CONTEXT.md`.
|
195
|
+
|
196
|
+
Now, Amazon Q will have improved context about S3 Tables and can provide more relevant answers.
|
197
|
+
|
170
198
|
## Security Considerations
|
171
199
|
|
172
200
|
When using this MCP server, consider:
|
@@ -14,32 +14,14 @@
|
|
14
14
|
|
15
15
|
"""Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
|
16
16
|
|
17
|
+
import io
|
18
|
+
import json
|
17
19
|
import pyarrow as pa
|
20
|
+
import pyarrow.json as pj
|
18
21
|
from ..utils import pyiceberg_load_catalog
|
19
22
|
from daft import Catalog as DaftCatalog
|
20
23
|
from daft.session import Session
|
21
|
-
from datetime import date, datetime, time
|
22
|
-
from decimal import Decimal
|
23
24
|
from pydantic import BaseModel
|
24
|
-
from pyiceberg.types import (
|
25
|
-
BinaryType,
|
26
|
-
BooleanType,
|
27
|
-
DateType,
|
28
|
-
DecimalType,
|
29
|
-
DoubleType,
|
30
|
-
FixedType,
|
31
|
-
FloatType,
|
32
|
-
IntegerType,
|
33
|
-
ListType,
|
34
|
-
LongType,
|
35
|
-
MapType,
|
36
|
-
StringType,
|
37
|
-
StructType,
|
38
|
-
TimestampType,
|
39
|
-
TimestamptzType,
|
40
|
-
TimeType,
|
41
|
-
UUIDType,
|
42
|
-
)
|
43
25
|
|
44
26
|
# pyiceberg and daft imports
|
45
27
|
from typing import Any, Dict, Optional
|
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
|
|
57
39
|
rest_sigv4_enabled: str = 'true'
|
58
40
|
|
59
41
|
|
60
|
-
def convert_value_for_append(value, iceberg_type):
|
61
|
-
"""Convert a value to the appropriate type for appending to an Iceberg table column.
|
62
|
-
|
63
|
-
Args:
|
64
|
-
value: The value to convert. Can be of various types (str, int, float, etc.).
|
65
|
-
iceberg_type: The Iceberg type to convert the value to.
|
66
|
-
|
67
|
-
Returns:
|
68
|
-
The value converted to the appropriate type for the Iceberg column, or None if value is None.
|
69
|
-
|
70
|
-
Raises:
|
71
|
-
NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
|
72
|
-
ValueError: If the conversion is unsupported or fails.
|
73
|
-
"""
|
74
|
-
if value is None:
|
75
|
-
return None
|
76
|
-
# Already correct type
|
77
|
-
if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
|
78
|
-
return value
|
79
|
-
if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
|
80
|
-
return value
|
81
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
|
82
|
-
return value
|
83
|
-
if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
|
84
|
-
return value
|
85
|
-
if isinstance(iceberg_type, DateType) and isinstance(value, date):
|
86
|
-
return value
|
87
|
-
if isinstance(iceberg_type, TimeType) and isinstance(value, time):
|
88
|
-
return value
|
89
|
-
if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
|
90
|
-
return value
|
91
|
-
if isinstance(iceberg_type, StringType) and isinstance(value, str):
|
92
|
-
return value
|
93
|
-
# Convert from string
|
94
|
-
if isinstance(value, str):
|
95
|
-
if isinstance(iceberg_type, BooleanType):
|
96
|
-
return value.lower() in ('true', '1', 'yes')
|
97
|
-
if isinstance(iceberg_type, (IntegerType, LongType)):
|
98
|
-
return int(value)
|
99
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)):
|
100
|
-
return float(value)
|
101
|
-
if isinstance(iceberg_type, DecimalType):
|
102
|
-
return Decimal(value)
|
103
|
-
if isinstance(iceberg_type, DateType):
|
104
|
-
return date.fromisoformat(value)
|
105
|
-
if isinstance(iceberg_type, TimeType):
|
106
|
-
return time.fromisoformat(value)
|
107
|
-
if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
|
108
|
-
return datetime.fromisoformat(value)
|
109
|
-
if isinstance(iceberg_type, StringType):
|
110
|
-
return value
|
111
|
-
if isinstance(iceberg_type, UUIDType):
|
112
|
-
import uuid
|
113
|
-
|
114
|
-
return uuid.UUID(value)
|
115
|
-
if isinstance(iceberg_type, (BinaryType, FixedType)):
|
116
|
-
return bytes.fromhex(value)
|
117
|
-
# Convert from number
|
118
|
-
if isinstance(value, (int, float)):
|
119
|
-
if isinstance(iceberg_type, (IntegerType, LongType)):
|
120
|
-
return int(value)
|
121
|
-
if isinstance(iceberg_type, (FloatType, DoubleType)):
|
122
|
-
return float(value)
|
123
|
-
if isinstance(iceberg_type, DecimalType):
|
124
|
-
return Decimal(str(value))
|
125
|
-
if isinstance(iceberg_type, StringType):
|
126
|
-
return str(value)
|
127
|
-
if isinstance(iceberg_type, (ListType, MapType, StructType)):
|
128
|
-
raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
|
129
|
-
raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
|
130
|
-
|
131
|
-
|
132
42
|
class PyIcebergEngine:
|
133
43
|
"""Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
|
134
44
|
|
@@ -197,7 +107,7 @@ class PyIcebergEngine:
|
|
197
107
|
return False
|
198
108
|
|
199
109
|
def append_rows(self, table_name: str, rows: list[dict]) -> None:
|
200
|
-
"""Append rows to an Iceberg table using pyiceberg.
|
110
|
+
"""Append rows to an Iceberg table using pyiceberg with JSON encoding.
|
201
111
|
|
202
112
|
Args:
|
203
113
|
table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
|
@@ -214,26 +124,31 @@ class PyIcebergEngine:
|
|
214
124
|
full_table_name = f'{self.config.namespace}.{table_name}'
|
215
125
|
else:
|
216
126
|
full_table_name = table_name
|
127
|
+
|
128
|
+
# Load the Iceberg table
|
217
129
|
table = self._catalog.load_table(full_table_name)
|
218
|
-
|
219
|
-
|
130
|
+
# Encode rows as JSON (line-delimited format)
|
131
|
+
json_lines = []
|
220
132
|
for row in rows:
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
133
|
+
json_lines.append(json.dumps(row))
|
134
|
+
json_data = '\n'.join(json_lines)
|
135
|
+
|
136
|
+
# Create a file-like object from the JSON data
|
137
|
+
json_buffer = io.BytesIO(json_data.encode('utf-8'))
|
138
|
+
|
139
|
+
# Read JSON data into PyArrow Table using pyarrow.json.read_json
|
140
|
+
# This enforces the Iceberg schema and validates the data
|
141
|
+
try:
|
142
|
+
new_data_table = pj.read_json(
|
143
|
+
json_buffer, read_options=pj.ReadOptions(use_threads=True)
|
144
|
+
)
|
145
|
+
except pa.ArrowInvalid as e:
|
146
|
+
raise ValueError(
|
147
|
+
f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
|
148
|
+
)
|
149
|
+
|
150
|
+
# Append the new data to the Iceberg table
|
151
|
+
table.append(new_data_table)
|
152
|
+
|
238
153
|
except Exception as e:
|
239
154
|
raise Exception(f'Error appending rows: {str(e)}')
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""AWS S3 Tables MCP Server file processing module.
|
16
|
+
|
17
|
+
This module provides functionality for processing and analyzing uploaded files,
|
18
|
+
particularly focusing on CSV and Parquet file handling and import capabilities.
|
19
|
+
"""
|
20
|
+
|
21
|
+
from .csv import import_csv_to_table
|
22
|
+
from .parquet import import_parquet_to_table
|
23
|
+
|
24
|
+
__all__ = ['import_csv_to_table', 'import_parquet_to_table']
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""AWS S3 Tables MCP Server file processing module.
|
16
|
+
|
17
|
+
This module provides functionality for processing and analyzing uploaded files,
|
18
|
+
particularly focusing on CSV file handling and import capabilities.
|
19
|
+
"""
|
20
|
+
|
21
|
+
import io
|
22
|
+
import os
|
23
|
+
import pyarrow.csv as pc
|
24
|
+
from ..utils import get_s3_client, pyiceberg_load_catalog
|
25
|
+
from pyiceberg.exceptions import NoSuchTableError
|
26
|
+
from typing import Dict
|
27
|
+
from urllib.parse import urlparse
|
28
|
+
|
29
|
+
|
30
|
+
async def import_csv_to_table(
|
31
|
+
warehouse: str,
|
32
|
+
region: str,
|
33
|
+
namespace: str,
|
34
|
+
table_name: str,
|
35
|
+
s3_url: str,
|
36
|
+
uri: str,
|
37
|
+
catalog_name: str = 's3tablescatalog',
|
38
|
+
rest_signing_name: str = 's3tables',
|
39
|
+
rest_sigv4_enabled: str = 'true',
|
40
|
+
) -> Dict:
|
41
|
+
"""Import data from a CSV file into an S3 table.
|
42
|
+
|
43
|
+
This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
|
44
|
+
If the table doesn't exist, it will be created using the schema inferred from the CSV file.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
warehouse: Warehouse string for Iceberg catalog
|
48
|
+
region: AWS region for S3Tables/Iceberg REST endpoint
|
49
|
+
namespace: The namespace containing the table
|
50
|
+
table_name: The name of the table to import data into
|
51
|
+
s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
|
52
|
+
uri: REST URI for Iceberg catalog
|
53
|
+
catalog_name: Catalog name
|
54
|
+
rest_signing_name: REST signing name
|
55
|
+
rest_sigv4_enabled: Enable SigV4 signing
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
A dictionary containing:
|
59
|
+
- status: 'success' or 'error'
|
60
|
+
- message: Success message or error details
|
61
|
+
- rows_processed: Number of rows processed (on success)
|
62
|
+
- file_processed: Name of the processed file
|
63
|
+
- table_created: Boolean indicating if a new table was created (on success)
|
64
|
+
"""
|
65
|
+
# Parse S3 URL
|
66
|
+
parsed = urlparse(s3_url)
|
67
|
+
bucket = parsed.netloc
|
68
|
+
key = parsed.path.lstrip('/')
|
69
|
+
|
70
|
+
try:
|
71
|
+
# Load Iceberg catalog
|
72
|
+
catalog = pyiceberg_load_catalog(
|
73
|
+
catalog_name,
|
74
|
+
warehouse,
|
75
|
+
uri,
|
76
|
+
region,
|
77
|
+
rest_signing_name,
|
78
|
+
rest_sigv4_enabled,
|
79
|
+
)
|
80
|
+
|
81
|
+
# Get S3 client and read the CSV file to infer schema
|
82
|
+
s3_client = get_s3_client()
|
83
|
+
response = s3_client.get_object(Bucket=bucket, Key=key)
|
84
|
+
csv_data = response['Body'].read()
|
85
|
+
|
86
|
+
# Read CSV file into PyArrow Table to infer schema
|
87
|
+
# Convert bytes to file-like object for PyArrow
|
88
|
+
csv_buffer = io.BytesIO(csv_data)
|
89
|
+
csv_table = pc.read_csv(csv_buffer)
|
90
|
+
csv_schema = csv_table.schema
|
91
|
+
|
92
|
+
table_created = False
|
93
|
+
try:
|
94
|
+
# Try to load existing table
|
95
|
+
table = catalog.load_table(f'{namespace}.{table_name}')
|
96
|
+
except NoSuchTableError:
|
97
|
+
# Table doesn't exist, create it using the CSV schema
|
98
|
+
try:
|
99
|
+
table = catalog.create_table(
|
100
|
+
identifier=f'{namespace}.{table_name}',
|
101
|
+
schema=csv_schema,
|
102
|
+
)
|
103
|
+
table_created = True
|
104
|
+
except Exception as create_error:
|
105
|
+
return {
|
106
|
+
'status': 'error',
|
107
|
+
'error': f'Failed to create table: {str(create_error)}',
|
108
|
+
}
|
109
|
+
|
110
|
+
# Append data to Iceberg table
|
111
|
+
table.append(csv_table)
|
112
|
+
|
113
|
+
return {
|
114
|
+
'status': 'success',
|
115
|
+
'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
|
116
|
+
'rows_processed': csv_table.num_rows,
|
117
|
+
'file_processed': os.path.basename(key),
|
118
|
+
'table_created': table_created,
|
119
|
+
'table_uuid': table.metadata.table_uuid,
|
120
|
+
}
|
121
|
+
|
122
|
+
except Exception as e:
|
123
|
+
return {'status': 'error', 'error': str(e)}
|