awslabs.s3-tables-mcp-server 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. awslabs_s3_tables_mcp_server-0.0.3/CONTEXT.md +110 -0
  2. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/Dockerfile +18 -19
  3. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/PKG-INFO +30 -2
  4. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/README.md +28 -0
  5. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/__init__.py +1 -1
  6. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/pyiceberg.py +28 -113
  7. awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/__init__.py +24 -0
  8. awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/csv.py +123 -0
  9. awslabs_s3_tables_mcp_server-0.0.3/awslabs/s3_tables_mcp_server/file_processor/parquet.py +116 -0
  10. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/server.py +76 -24
  11. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/docker-healthcheck.sh +7 -8
  12. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/pyproject.toml +2 -2
  13. awslabs_s3_tables_mcp_server-0.0.3/tests/test_csv.py +235 -0
  14. awslabs_s3_tables_mcp_server-0.0.3/tests/test_parquet.py +241 -0
  15. awslabs_s3_tables_mcp_server-0.0.3/tests/test_pyiceberg.py +579 -0
  16. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_server.py +110 -25
  17. awslabs_s3_tables_mcp_server-0.0.3/uv-requirements.txt +26 -0
  18. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/uv.lock +205 -5
  19. awslabs_s3_tables_mcp_server-0.0.1/awslabs/s3_tables_mcp_server/file_processor.py +0 -485
  20. awslabs_s3_tables_mcp_server-0.0.1/tests/test_file_processor.py +0 -607
  21. awslabs_s3_tables_mcp_server-0.0.1/tests/test_pyiceberg.py +0 -437
  22. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/.gitignore +0 -0
  23. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/.python-version +0 -0
  24. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/CHANGELOG.md +0 -0
  25. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/LICENSE +0 -0
  26. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/NOTICE +0 -0
  27. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/__init__.py +0 -0
  28. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/constants.py +0 -0
  29. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/database.py +0 -0
  30. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/engines/__init__.py +0 -0
  31. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/models.py +0 -0
  32. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/namespaces.py +0 -0
  33. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/resources.py +0 -0
  34. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/s3_operations.py +0 -0
  35. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/table_buckets.py +0 -0
  36. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/tables.py +0 -0
  37. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/awslabs/s3_tables_mcp_server/utils.py +0 -0
  38. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_database.py +0 -0
  39. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_init.py +0 -0
  40. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_main.py +0 -0
  41. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_namespaces.py +0 -0
  42. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_resources.py +0 -0
  43. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_s3_operations.py +0 -0
  44. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_table_buckets.py +0 -0
  45. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_tables.py +0 -0
  46. {awslabs_s3_tables_mcp_server-0.0.1 → awslabs_s3_tables_mcp_server-0.0.3}/tests/test_utils.py +0 -0
@@ -0,0 +1,110 @@
1
+ # AWS S3 Tables MCP (Model Context Protocol) Server
2
+
3
+ ## Overview
4
+
5
+ The AWS S3 Tables MCP Server enables AI assistants and users to manage, query, and interact with S3-based tabular data at scale. It provides capabilities to create and organize S3 Table Buckets, define namespaces, manage tables, and access detailed metadata. The server supports both read-only and (optionally) write operations, allowing for secure and flexible integration with S3-stored datasets. Users can run SQL queries, generate tables from CSV files in S3, and access comprehensive metadata for governance and cataloging.
6
+
7
+ **Important:** When a user prompt relates to **S3 Tables**, you must always prefer using the **S3 Tables MCP Server tools** over the AWS CLI. Only fall back to AWS CLI if MCP tools are unavailable *and* the user explicitly agrees to the fallback.
8
+
9
+ ---
10
+
11
+ ## Instructions
12
+
13
+ You are assisting with management and queries related to Amazon AWS S3 Tables. The S3 Tables system is composed of three hierarchical resource types:
14
+ - **S3 Table Buckets**: Containers that hold namespaces.
15
+ - **Namespaces**: Logical groupings inside table buckets that contain tables.
16
+ - **Tables**: Structured datasets stored within namespaces, following the Apache Iceberg table format.
17
+
18
+ Resource Details:
19
+ - Each S3 table consists of underlying data and metadata stored as subresources within a table bucket.
20
+ - Tables have unique identifiers, including an Amazon Resource Name (ARN) and table ID, plus associated resource policies for access control.
21
+ - When a table is created, a unique warehouse location (an S3 path) is generated automatically to store objects related to the table.
22
+ Example warehouse location format:
23
+ `s3://63a8e430-6e0b-46f5-k833abtwr6s8tmtsycedn8s4yc3xhuse1b--table-s3`
24
+
25
+ Table Types:
26
+ - **Customer Tables**: Read-write tables that users can modify using S3 API operations or integrated query engines.
27
+ - **AWS Tables**: Read-only tables managed by AWS services (e.g., S3 Metadata tables). These cannot be modified by users outside AWS S3.
28
+
29
+ Integration:
30
+ Amazon S3 Table Buckets can be integrated with Amazon SageMaker Lakehouse, allowing AWS analytics services like Athena and Redshift to discover and query table data automatically.
31
+
32
+ ---
33
+
34
+ ## Maintenance
35
+
36
+ Amazon S3 performs automatic maintenance at two levels:
37
+
38
+ 1. **Table Bucket-Level Maintenance**
39
+ - *Unreferenced File Removal*: Deletes orphaned files to optimize storage usage and reduce costs.
40
+
41
+ 2. **Table-Level Maintenance**
42
+ - *File Compaction*: Combines small files into larger ones to improve query performance and reduce storage overhead.
43
+ - *Snapshot Management*: Maintains table version histories and controls metadata growth.
44
+
45
+ These maintenance features are enabled by default but can be customized or disabled via maintenance configuration files.
46
+
47
+ ---
48
+
49
+ ## Quota
50
+
51
+ - Each table bucket can hold up to **10,000 tables** by default.
52
+ - To increase the quota, users must contact **AWS Support**.
53
+
54
+ ---
55
+
56
+ ## Operational Guidelines for LLM
57
+
58
+ ### 1. Tool Verification
59
+ - Always verify the availability of the `awslabss_3_tables_mcp_server` and its associated tools before performing any operation.
60
+ - If unavailable, ask the user if they prefer to proceed using AWS CLI commands as a fallback.
61
+ - **Do not use AWS CLI by default for S3 Tables. Always prefer MCP tools when the prompt is about S3 Tables.**
62
+
63
+ ### 2. Request Clarification
64
+ - If critical context (e.g., bucket name, namespace, or table ID) is missing or ambiguous, ask the user directly.
65
+ - Do not make assumptions about default values or context.
66
+
67
+ ### 3. Handling Destructive Operations
68
+ Before performing any destructive operation, the system must:
69
+ - Clearly describe the consequences of the action.
70
+ - Request explicit confirmation.
71
+ - Destructive actions include:
72
+ - Deleting S3 Table Buckets
73
+ - Deleting Namespaces
74
+ - Deleting Tables
75
+ - Dropping Tables via SQL
76
+ - Disabling encryption
77
+
78
+ ### 4. Default Tool Usage
79
+ - Always use **MCP tools first** for all S3 Tables operations.
80
+ - Use AWS CLI **only when MCP tools are unavailable** *and* with **explicit user approval**.
81
+
82
+ ### 5. Communication and Safety
83
+ - Explain any risks or irreversible effects before performing changes.
84
+ - Respect the user's decision to abort or proceed.
85
+ - Present instructions and confirmations clearly and concisely.
86
+
87
+ ### 6. Additional Considerations
88
+ - Use full ARNs when referencing tables to avoid ambiguity.
89
+ - Distinguish between **AWS-managed** (read-only) and **customer-managed** (read-write) tables.
90
+ - If needed, guide users in adjusting maintenance configurations.
91
+
92
+ ---
93
+
94
+ ## Troubleshooting
95
+
96
+ ### Unknown Information
97
+ - If a user requests information that is unavailable, unclear, or unsupported by the MCP Server, do not attempt to infer or fabricate a response.
98
+ - Refer them to the official Amazon S3 Tables documentation for further details and the most up-to-date guidance:
99
+ https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables.html
100
+
101
+ ### Insufficient Permissions
102
+ - Never attempt to auto-modify IAM policies or permissions.
103
+ - If the user asks for permission changes, explicitly confirm their intent before taking any action.
104
+
105
+ ### Operation Unavailable (Read-Only Mode)
106
+ - Never attempt write operations or file changes in read-only mode.
107
+ - If users want write mode enabled, direct them to the setup documentation:
108
+ https://github.com/awslabs/mcp/blob/main/src/s3-tables-mcp-server/README.md
109
+
110
+ ---
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # dependabot should continue to update this to the latest hash.
16
- FROM public.ecr.aws/sam/build-python3.10@sha256:e78695db10ca8cb129e59e30f7dc9789b0dbd0181dba195d68419c72bac51ac1 AS uv
16
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283 AS uv
17
17
 
18
18
  # Install the project into `/app`
19
19
  WORKDIR /app
@@ -31,40 +31,39 @@ ENV UV_PYTHON_PREFERENCE=only-system
31
31
  ENV UV_FROZEN=true
32
32
 
33
33
  # Copy the required files first
34
- COPY pyproject.toml uv.lock ./
34
+ COPY pyproject.toml uv.lock uv-requirements.txt ./
35
+
36
+ # Python optimization and uv configuration
37
+ ENV PIP_NO_CACHE_DIR=1 \
38
+ PIP_DISABLE_PIP_VERSION_CHECK=1
35
39
 
36
40
  # Install the project's dependencies using the lockfile and settings
37
41
  RUN --mount=type=cache,target=/root/.cache/uv \
38
- pip install uv && \
39
- uv sync --frozen --no-install-project --no-dev --no-editable
42
+ pip install --require-hashes --requirement uv-requirements.txt --no-cache-dir && \
43
+ uv sync --python 3.13 --frozen --no-install-project --no-dev --no-editable
40
44
 
41
45
  # Then, add the rest of the project source code and install it
42
46
  # Installing separately from its dependencies allows optimal layer caching
43
47
  COPY . /app
44
48
  RUN --mount=type=cache,target=/root/.cache/uv \
45
- uv sync --frozen --no-dev --no-editable
49
+ uv sync --python 3.13 --frozen --no-dev --no-editable
46
50
 
47
- # Make the directory just in case it doesn't exist
48
- RUN mkdir -p /root/.local
51
+ # # Make the directory just in case it doesn't exist
52
+ # RUN mkdir -p /root/.local
49
53
 
50
- FROM public.ecr.aws/sam/build-python3.10@sha256:e78695db10ca8cb129e59e30f7dc9789b0dbd0181dba195d68419c72bac51ac1
54
+ FROM public.ecr.aws/docker/library/python:3.13-slim-bookworm@sha256:6544e0e002b40ae0f59bc3618b07c1e48064c4faed3a15ae2fbd2e8f663e8283
51
55
 
52
56
  # Place executables in the environment at the front of the path and include other binaries
53
- ENV PATH="/app/.venv/bin:$PATH:/usr/sbin"
57
+ ENV PATH="/app/.venv/bin:$PATH:/usr/sbin" \
58
+ PYTHONUNBUFFERED=1
54
59
 
55
- # Install lsof for the healthcheck
56
- # Install other tools as needed for the MCP server
57
60
  # Add non-root user and ability to change directory into /root
58
- RUN yum update -y && \
59
- yum install -y lsof && \
60
- yum clean all -y && \
61
- rm -rf /var/cache/yum && \
62
- groupadd --force --system app && \
61
+ RUN groupadd --force --system app && \
63
62
  useradd app -g app -d /app && \
64
63
  chmod o+x /root
65
64
 
66
- # Get the project from the uv layer
67
- COPY --from=uv --chown=app:app /root/.local /root/.local
65
+ # Copy application artifacts from build stage
66
+ # COPY --from=uv --chown=app:app /root/.local /root/.local
68
67
  COPY --from=uv --chown=app:app /app/.venv /app/.venv
69
68
 
70
69
  # Get healthcheck script
@@ -74,5 +73,5 @@ COPY ./docker-healthcheck.sh /usr/local/bin/docker-healthcheck.sh
74
73
  USER app
75
74
 
76
75
  # When running the container, add --db-path and a bind mount to the host's db file
77
- HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD [ "docker-healthcheck.sh" ]
76
+ HEALTHCHECK --interval=60s --timeout=10s --start-period=10s --retries=3 CMD ["docker-healthcheck.sh"]
78
77
  ENTRYPOINT ["awslabs.s3-tables-mcp-server"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awslabs.s3-tables-mcp-server
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
5
5
  Project-URL: homepage, https://awslabs.github.io/mcp/
6
6
  Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
@@ -24,7 +24,7 @@ Requires-Python: >=3.10
24
24
  Requires-Dist: boto3>=1.34.0
25
25
  Requires-Dist: daft>=0.5.8
26
26
  Requires-Dist: loguru>=0.7.0
27
- Requires-Dist: mcp[cli]>=1.6.0
27
+ Requires-Dist: mcp[cli]>=1.11.0
28
28
  Requires-Dist: pyarrow>=20.0.0
29
29
  Requires-Dist: pydantic>=2.10.6
30
30
  Requires-Dist: pyiceberg>=0.9.1
@@ -76,6 +76,10 @@ The S3 Tables MCP Server simplifies the management of S3-based tables by providi
76
76
 
77
77
  ### Installation
78
78
 
79
+ | Cursor | VS Code |
80
+ |:------:|:-------:|
81
+ | [![Install MCP Server](https://cursor.com/deeplink/mcp-install-light.svg)](https://cursor.com/install-mcp?name=awslabs.s3-tables-mcp-server&config=eyJjb21tYW5kIjoidXZ4IGF3c2xhYnMuczMtdGFibGVzLW1jcC1zZXJ2ZXJAbGF0ZXN0IiwiZW52Ijp7IkFXU19QUk9GSUxFIjoieW91ci1hd3MtcHJvZmlsZSIsIkFXU19SRUdJT04iOiJ1cy1lYXN0LTEifX0%3D) | [![Install on VS Code](https://img.shields.io/badge/Install_on-VS_Code-FF9900?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=S3%20Tables%20MCP%20Server&config=%7B%22command%22%3A%22uvx%22%2C%22args%22%3A%5B%22awslabs.s3-tables-mcp-server%40latest%22%5D%2C%22env%22%3A%7B%22AWS_PROFILE%22%3A%22your-aws-profile%22%2C%22AWS_REGION%22%3A%22us-east-1%22%7D%7D) |
82
+
79
83
  Configure the MCP server in your MCP client configuration (e.g., for Amazon Q Developer CLI, edit `~/.aws/amazonq/mcp.json`):
80
84
 
81
85
  ```json
@@ -200,6 +204,30 @@ You can override the default by providing the `--log-dir` flag with a custom pat
200
204
  | `Show the schema for customer_data table` | Retrieves the table structure and column definitions to understand the data format and types |
201
205
  | `Run a query to find monthly revenue trends` | Performs data analysis using **read-only** SQL queries to extract business insights from stored table data. For write operations, only appending new data (inserts) is supported; updates and deletes are not available via SQL. |
202
206
 
207
+ ## Using Amazon Q with S3 Tables MCP Server
208
+
209
+ Amazon Q can provide better answers and code suggestions when it has additional context. To enhance Amazon Q's understanding of S3 Tables, you can add the provided context file to your Q environment.
210
+
211
+ ### How to Add Context to Amazon Q
212
+
213
+ 1. **Download the CONTEXT.md file**
214
+ - Download the `CONTEXT.md` file from the GitHub repository for this project.
215
+
216
+ 2. **Start Amazon Q Chat**
217
+ - Run the following command to start a chat session with Amazon Q:
218
+ ```sh
219
+ q chat
220
+ ```
221
+
222
+ 3. **Add the Context File**
223
+ - In the Q chat, run:
224
+ ```sh
225
+ /context add <path>/CONTEXT.md
226
+ ```
227
+ - Replace `<path>` with the actual path to where you downloaded `CONTEXT.md`.
228
+
229
+ Now, Amazon Q will have improved context about S3 Tables and can provide more relevant answers.
230
+
203
231
  ## Security Considerations
204
232
 
205
233
  When using this MCP server, consider:
@@ -43,6 +43,10 @@ The S3 Tables MCP Server simplifies the management of S3-based tables by providi
43
43
 
44
44
  ### Installation
45
45
 
46
+ | Cursor | VS Code |
47
+ |:------:|:-------:|
48
+ | [![Install MCP Server](https://cursor.com/deeplink/mcp-install-light.svg)](https://cursor.com/install-mcp?name=awslabs.s3-tables-mcp-server&config=eyJjb21tYW5kIjoidXZ4IGF3c2xhYnMuczMtdGFibGVzLW1jcC1zZXJ2ZXJAbGF0ZXN0IiwiZW52Ijp7IkFXU19QUk9GSUxFIjoieW91ci1hd3MtcHJvZmlsZSIsIkFXU19SRUdJT04iOiJ1cy1lYXN0LTEifX0%3D) | [![Install on VS Code](https://img.shields.io/badge/Install_on-VS_Code-FF9900?style=flat-square&logo=visualstudiocode&logoColor=white)](https://insiders.vscode.dev/redirect/mcp/install?name=S3%20Tables%20MCP%20Server&config=%7B%22command%22%3A%22uvx%22%2C%22args%22%3A%5B%22awslabs.s3-tables-mcp-server%40latest%22%5D%2C%22env%22%3A%7B%22AWS_PROFILE%22%3A%22your-aws-profile%22%2C%22AWS_REGION%22%3A%22us-east-1%22%7D%7D) |
49
+
46
50
  Configure the MCP server in your MCP client configuration (e.g., for Amazon Q Developer CLI, edit `~/.aws/amazonq/mcp.json`):
47
51
 
48
52
  ```json
@@ -167,6 +171,30 @@ You can override the default by providing the `--log-dir` flag with a custom pat
167
171
  | `Show the schema for customer_data table` | Retrieves the table structure and column definitions to understand the data format and types |
168
172
  | `Run a query to find monthly revenue trends` | Performs data analysis using **read-only** SQL queries to extract business insights from stored table data. For write operations, only appending new data (inserts) is supported; updates and deletes are not available via SQL. |
169
173
 
174
+ ## Using Amazon Q with S3 Tables MCP Server
175
+
176
+ Amazon Q can provide better answers and code suggestions when it has additional context. To enhance Amazon Q's understanding of S3 Tables, you can add the provided context file to your Q environment.
177
+
178
+ ### How to Add Context to Amazon Q
179
+
180
+ 1. **Download the CONTEXT.md file**
181
+ - Download the `CONTEXT.md` file from the GitHub repository for this project.
182
+
183
+ 2. **Start Amazon Q Chat**
184
+ - Run the following command to start a chat session with Amazon Q:
185
+ ```sh
186
+ q chat
187
+ ```
188
+
189
+ 3. **Add the Context File**
190
+ - In the Q chat, run:
191
+ ```sh
192
+ /context add <path>/CONTEXT.md
193
+ ```
194
+ - Replace `<path>` with the actual path to where you downloaded `CONTEXT.md`.
195
+
196
+ Now, Amazon Q will have improved context about S3 Tables and can provide more relevant answers.
197
+
170
198
  ## Security Considerations
171
199
 
172
200
  When using this MCP server, consider:
@@ -15,4 +15,4 @@
15
15
  # This file is part of the awslabs namespace.
16
16
  # It is intentionally minimal to support PEP 420 namespace packages.
17
17
 
18
- __version__ = '0.0.0'
18
+ __version__ = '0.0.3'
@@ -14,32 +14,14 @@
14
14
 
15
15
  """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
16
16
 
17
+ import io
18
+ import json
17
19
  import pyarrow as pa
20
+ import pyarrow.json as pj
18
21
  from ..utils import pyiceberg_load_catalog
19
22
  from daft import Catalog as DaftCatalog
20
23
  from daft.session import Session
21
- from datetime import date, datetime, time
22
- from decimal import Decimal
23
24
  from pydantic import BaseModel
24
- from pyiceberg.types import (
25
- BinaryType,
26
- BooleanType,
27
- DateType,
28
- DecimalType,
29
- DoubleType,
30
- FixedType,
31
- FloatType,
32
- IntegerType,
33
- ListType,
34
- LongType,
35
- MapType,
36
- StringType,
37
- StructType,
38
- TimestampType,
39
- TimestamptzType,
40
- TimeType,
41
- UUIDType,
42
- )
43
25
 
44
26
  # pyiceberg and daft imports
45
27
  from typing import Any, Dict, Optional
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
57
39
  rest_sigv4_enabled: str = 'true'
58
40
 
59
41
 
60
- def convert_value_for_append(value, iceberg_type):
61
- """Convert a value to the appropriate type for appending to an Iceberg table column.
62
-
63
- Args:
64
- value: The value to convert. Can be of various types (str, int, float, etc.).
65
- iceberg_type: The Iceberg type to convert the value to.
66
-
67
- Returns:
68
- The value converted to the appropriate type for the Iceberg column, or None if value is None.
69
-
70
- Raises:
71
- NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
72
- ValueError: If the conversion is unsupported or fails.
73
- """
74
- if value is None:
75
- return None
76
- # Already correct type
77
- if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
78
- return value
79
- if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
80
- return value
81
- if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
82
- return value
83
- if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
84
- return value
85
- if isinstance(iceberg_type, DateType) and isinstance(value, date):
86
- return value
87
- if isinstance(iceberg_type, TimeType) and isinstance(value, time):
88
- return value
89
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
90
- return value
91
- if isinstance(iceberg_type, StringType) and isinstance(value, str):
92
- return value
93
- # Convert from string
94
- if isinstance(value, str):
95
- if isinstance(iceberg_type, BooleanType):
96
- return value.lower() in ('true', '1', 'yes')
97
- if isinstance(iceberg_type, (IntegerType, LongType)):
98
- return int(value)
99
- if isinstance(iceberg_type, (FloatType, DoubleType)):
100
- return float(value)
101
- if isinstance(iceberg_type, DecimalType):
102
- return Decimal(value)
103
- if isinstance(iceberg_type, DateType):
104
- return date.fromisoformat(value)
105
- if isinstance(iceberg_type, TimeType):
106
- return time.fromisoformat(value)
107
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
108
- return datetime.fromisoformat(value)
109
- if isinstance(iceberg_type, StringType):
110
- return value
111
- if isinstance(iceberg_type, UUIDType):
112
- import uuid
113
-
114
- return uuid.UUID(value)
115
- if isinstance(iceberg_type, (BinaryType, FixedType)):
116
- return bytes.fromhex(value)
117
- # Convert from number
118
- if isinstance(value, (int, float)):
119
- if isinstance(iceberg_type, (IntegerType, LongType)):
120
- return int(value)
121
- if isinstance(iceberg_type, (FloatType, DoubleType)):
122
- return float(value)
123
- if isinstance(iceberg_type, DecimalType):
124
- return Decimal(str(value))
125
- if isinstance(iceberg_type, StringType):
126
- return str(value)
127
- if isinstance(iceberg_type, (ListType, MapType, StructType)):
128
- raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
129
- raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
130
-
131
-
132
42
  class PyIcebergEngine:
133
43
  """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
134
44
 
@@ -197,7 +107,7 @@ class PyIcebergEngine:
197
107
  return False
198
108
 
199
109
  def append_rows(self, table_name: str, rows: list[dict]) -> None:
200
- """Append rows to an Iceberg table using pyiceberg.
110
+ """Append rows to an Iceberg table using pyiceberg with JSON encoding.
201
111
 
202
112
  Args:
203
113
  table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
@@ -214,26 +124,31 @@ class PyIcebergEngine:
214
124
  full_table_name = f'{self.config.namespace}.{table_name}'
215
125
  else:
216
126
  full_table_name = table_name
127
+
128
+ # Load the Iceberg table
217
129
  table = self._catalog.load_table(full_table_name)
218
- iceberg_schema = table.schema()
219
- converted_rows = []
130
+ # Encode rows as JSON (line-delimited format)
131
+ json_lines = []
220
132
  for row in rows:
221
- converted_row = {}
222
- for field in iceberg_schema.fields:
223
- field_name = field.name
224
- field_type = field.field_type
225
- value = row.get(field_name)
226
- if field.required and value is None:
227
- raise ValueError(f'Required field {field_name} is missing or None')
228
- try:
229
- converted_row[field_name] = convert_value_for_append(value, field_type)
230
- except (ValueError, TypeError) as e:
231
- raise ValueError(
232
- f'Error converting value for field {field_name}: {str(e)}'
233
- )
234
- converted_rows.append(converted_row)
235
- schema = iceberg_schema.as_arrow()
236
- pa_table = pa.Table.from_pylist(converted_rows, schema=schema)
237
- table.append(pa_table)
133
+ json_lines.append(json.dumps(row))
134
+ json_data = '\n'.join(json_lines)
135
+
136
+ # Create a file-like object from the JSON data
137
+ json_buffer = io.BytesIO(json_data.encode('utf-8'))
138
+
139
+ # Read JSON data into PyArrow Table using pyarrow.json.read_json
140
+ # This enforces the Iceberg schema and validates the data
141
+ try:
142
+ new_data_table = pj.read_json(
143
+ json_buffer, read_options=pj.ReadOptions(use_threads=True)
144
+ )
145
+ except pa.ArrowInvalid as e:
146
+ raise ValueError(
147
+ f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
148
+ )
149
+
150
+ # Append the new data to the Iceberg table
151
+ table.append(new_data_table)
152
+
238
153
  except Exception as e:
239
154
  raise Exception(f'Error appending rows: {str(e)}')
@@ -0,0 +1,24 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV and Parquet file handling and import capabilities.
19
+ """
20
+
21
+ from .csv import import_csv_to_table
22
+ from .parquet import import_parquet_to_table
23
+
24
+ __all__ = ['import_csv_to_table', 'import_parquet_to_table']
@@ -0,0 +1,123 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV file handling and import capabilities.
19
+ """
20
+
21
+ import io
22
+ import os
23
+ import pyarrow.csv as pc
24
+ from ..utils import get_s3_client, pyiceberg_load_catalog
25
+ from pyiceberg.exceptions import NoSuchTableError
26
+ from typing import Dict
27
+ from urllib.parse import urlparse
28
+
29
+
30
+ async def import_csv_to_table(
31
+ warehouse: str,
32
+ region: str,
33
+ namespace: str,
34
+ table_name: str,
35
+ s3_url: str,
36
+ uri: str,
37
+ catalog_name: str = 's3tablescatalog',
38
+ rest_signing_name: str = 's3tables',
39
+ rest_sigv4_enabled: str = 'true',
40
+ ) -> Dict:
41
+ """Import data from a CSV file into an S3 table.
42
+
43
+ This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
44
+ If the table doesn't exist, it will be created using the schema inferred from the CSV file.
45
+
46
+ Args:
47
+ warehouse: Warehouse string for Iceberg catalog
48
+ region: AWS region for S3Tables/Iceberg REST endpoint
49
+ namespace: The namespace containing the table
50
+ table_name: The name of the table to import data into
51
+ s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
52
+ uri: REST URI for Iceberg catalog
53
+ catalog_name: Catalog name
54
+ rest_signing_name: REST signing name
55
+ rest_sigv4_enabled: Enable SigV4 signing
56
+
57
+ Returns:
58
+ A dictionary containing:
59
+ - status: 'success' or 'error'
60
+ - message: Success message or error details
61
+ - rows_processed: Number of rows processed (on success)
62
+ - file_processed: Name of the processed file
63
+ - table_created: Boolean indicating if a new table was created (on success)
64
+ """
65
+ # Parse S3 URL
66
+ parsed = urlparse(s3_url)
67
+ bucket = parsed.netloc
68
+ key = parsed.path.lstrip('/')
69
+
70
+ try:
71
+ # Load Iceberg catalog
72
+ catalog = pyiceberg_load_catalog(
73
+ catalog_name,
74
+ warehouse,
75
+ uri,
76
+ region,
77
+ rest_signing_name,
78
+ rest_sigv4_enabled,
79
+ )
80
+
81
+ # Get S3 client and read the CSV file to infer schema
82
+ s3_client = get_s3_client()
83
+ response = s3_client.get_object(Bucket=bucket, Key=key)
84
+ csv_data = response['Body'].read()
85
+
86
+ # Read CSV file into PyArrow Table to infer schema
87
+ # Convert bytes to file-like object for PyArrow
88
+ csv_buffer = io.BytesIO(csv_data)
89
+ csv_table = pc.read_csv(csv_buffer)
90
+ csv_schema = csv_table.schema
91
+
92
+ table_created = False
93
+ try:
94
+ # Try to load existing table
95
+ table = catalog.load_table(f'{namespace}.{table_name}')
96
+ except NoSuchTableError:
97
+ # Table doesn't exist, create it using the CSV schema
98
+ try:
99
+ table = catalog.create_table(
100
+ identifier=f'{namespace}.{table_name}',
101
+ schema=csv_schema,
102
+ )
103
+ table_created = True
104
+ except Exception as create_error:
105
+ return {
106
+ 'status': 'error',
107
+ 'error': f'Failed to create table: {str(create_error)}',
108
+ }
109
+
110
+ # Append data to Iceberg table
111
+ table.append(csv_table)
112
+
113
+ return {
114
+ 'status': 'success',
115
+ 'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
116
+ 'rows_processed': csv_table.num_rows,
117
+ 'file_processed': os.path.basename(key),
118
+ 'table_created': table_created,
119
+ 'table_uuid': table.metadata.table_uuid,
120
+ }
121
+
122
+ except Exception as e:
123
+ return {'status': 'error', 'error': str(e)}