awslabs.s3-tables-mcp-server 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,4 +15,4 @@
15
15
  # This file is part of the awslabs namespace.
16
16
  # It is intentionally minimal to support PEP 420 namespace packages.
17
17
 
18
- __version__ = '0.0.0'
18
+ __version__ = '0.0.3'
@@ -14,32 +14,14 @@
14
14
 
15
15
  """Engine for interacting with Iceberg tables using pyiceberg and daft (read-only)."""
16
16
 
17
+ import io
18
+ import json
17
19
  import pyarrow as pa
20
+ import pyarrow.json as pj
18
21
  from ..utils import pyiceberg_load_catalog
19
22
  from daft import Catalog as DaftCatalog
20
23
  from daft.session import Session
21
- from datetime import date, datetime, time
22
- from decimal import Decimal
23
24
  from pydantic import BaseModel
24
- from pyiceberg.types import (
25
- BinaryType,
26
- BooleanType,
27
- DateType,
28
- DecimalType,
29
- DoubleType,
30
- FixedType,
31
- FloatType,
32
- IntegerType,
33
- ListType,
34
- LongType,
35
- MapType,
36
- StringType,
37
- StructType,
38
- TimestampType,
39
- TimestamptzType,
40
- TimeType,
41
- UUIDType,
42
- )
43
25
 
44
26
  # pyiceberg and daft imports
45
27
  from typing import Any, Dict, Optional
@@ -57,78 +39,6 @@ class PyIcebergConfig(BaseModel):
57
39
  rest_sigv4_enabled: str = 'true'
58
40
 
59
41
 
60
- def convert_value_for_append(value, iceberg_type):
61
- """Convert a value to the appropriate type for appending to an Iceberg table column.
62
-
63
- Args:
64
- value: The value to convert. Can be of various types (str, int, float, etc.).
65
- iceberg_type: The Iceberg type to convert the value to.
66
-
67
- Returns:
68
- The value converted to the appropriate type for the Iceberg column, or None if value is None.
69
-
70
- Raises:
71
- NotImplementedError: If the iceberg_type is a complex type (ListType, MapType, StructType).
72
- ValueError: If the conversion is unsupported or fails.
73
- """
74
- if value is None:
75
- return None
76
- # Already correct type
77
- if isinstance(iceberg_type, BooleanType) and isinstance(value, bool):
78
- return value
79
- if isinstance(iceberg_type, (IntegerType, LongType)) and isinstance(value, int):
80
- return value
81
- if isinstance(iceberg_type, (FloatType, DoubleType)) and isinstance(value, float):
82
- return value
83
- if isinstance(iceberg_type, DecimalType) and isinstance(value, Decimal):
84
- return value
85
- if isinstance(iceberg_type, DateType) and isinstance(value, date):
86
- return value
87
- if isinstance(iceberg_type, TimeType) and isinstance(value, time):
88
- return value
89
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)) and isinstance(value, datetime):
90
- return value
91
- if isinstance(iceberg_type, StringType) and isinstance(value, str):
92
- return value
93
- # Convert from string
94
- if isinstance(value, str):
95
- if isinstance(iceberg_type, BooleanType):
96
- return value.lower() in ('true', '1', 'yes')
97
- if isinstance(iceberg_type, (IntegerType, LongType)):
98
- return int(value)
99
- if isinstance(iceberg_type, (FloatType, DoubleType)):
100
- return float(value)
101
- if isinstance(iceberg_type, DecimalType):
102
- return Decimal(value)
103
- if isinstance(iceberg_type, DateType):
104
- return date.fromisoformat(value)
105
- if isinstance(iceberg_type, TimeType):
106
- return time.fromisoformat(value)
107
- if isinstance(iceberg_type, (TimestampType, TimestamptzType)):
108
- return datetime.fromisoformat(value)
109
- if isinstance(iceberg_type, StringType):
110
- return value
111
- if isinstance(iceberg_type, UUIDType):
112
- import uuid
113
-
114
- return uuid.UUID(value)
115
- if isinstance(iceberg_type, (BinaryType, FixedType)):
116
- return bytes.fromhex(value)
117
- # Convert from number
118
- if isinstance(value, (int, float)):
119
- if isinstance(iceberg_type, (IntegerType, LongType)):
120
- return int(value)
121
- if isinstance(iceberg_type, (FloatType, DoubleType)):
122
- return float(value)
123
- if isinstance(iceberg_type, DecimalType):
124
- return Decimal(str(value))
125
- if isinstance(iceberg_type, StringType):
126
- return str(value)
127
- if isinstance(iceberg_type, (ListType, MapType, StructType)):
128
- raise NotImplementedError(f'Complex type {iceberg_type} not supported in append_rows')
129
- raise ValueError(f'Unsupported conversion from {type(value)} to {iceberg_type}')
130
-
131
-
132
42
  class PyIcebergEngine:
133
43
  """Engine for read-only queries on Iceberg tables using pyiceberg and daft."""
134
44
 
@@ -197,7 +107,7 @@ class PyIcebergEngine:
197
107
  return False
198
108
 
199
109
  def append_rows(self, table_name: str, rows: list[dict]) -> None:
200
- """Append rows to an Iceberg table using pyiceberg.
110
+ """Append rows to an Iceberg table using pyiceberg with JSON encoding.
201
111
 
202
112
  Args:
203
113
  table_name: The name of the table (e.g., 'namespace.tablename' or just 'tablename' if namespace is set)
@@ -214,26 +124,31 @@ class PyIcebergEngine:
214
124
  full_table_name = f'{self.config.namespace}.{table_name}'
215
125
  else:
216
126
  full_table_name = table_name
127
+
128
+ # Load the Iceberg table
217
129
  table = self._catalog.load_table(full_table_name)
218
- iceberg_schema = table.schema()
219
- converted_rows = []
130
+ # Encode rows as JSON (line-delimited format)
131
+ json_lines = []
220
132
  for row in rows:
221
- converted_row = {}
222
- for field in iceberg_schema.fields:
223
- field_name = field.name
224
- field_type = field.field_type
225
- value = row.get(field_name)
226
- if field.required and value is None:
227
- raise ValueError(f'Required field {field_name} is missing or None')
228
- try:
229
- converted_row[field_name] = convert_value_for_append(value, field_type)
230
- except (ValueError, TypeError) as e:
231
- raise ValueError(
232
- f'Error converting value for field {field_name}: {str(e)}'
233
- )
234
- converted_rows.append(converted_row)
235
- schema = iceberg_schema.as_arrow()
236
- pa_table = pa.Table.from_pylist(converted_rows, schema=schema)
237
- table.append(pa_table)
133
+ json_lines.append(json.dumps(row))
134
+ json_data = '\n'.join(json_lines)
135
+
136
+ # Create a file-like object from the JSON data
137
+ json_buffer = io.BytesIO(json_data.encode('utf-8'))
138
+
139
+ # Read JSON data into PyArrow Table using pyarrow.json.read_json
140
+ # This enforces the Iceberg schema and validates the data
141
+ try:
142
+ new_data_table = pj.read_json(
143
+ json_buffer, read_options=pj.ReadOptions(use_threads=True)
144
+ )
145
+ except pa.ArrowInvalid as e:
146
+ raise ValueError(
147
+ f'Schema mismatch detected: {e}. Please ensure your data matches the table schema.'
148
+ )
149
+
150
+ # Append the new data to the Iceberg table
151
+ table.append(new_data_table)
152
+
238
153
  except Exception as e:
239
154
  raise Exception(f'Error appending rows: {str(e)}')
@@ -0,0 +1,24 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV and Parquet file handling and import capabilities.
19
+ """
20
+
21
+ from .csv import import_csv_to_table
22
+ from .parquet import import_parquet_to_table
23
+
24
+ __all__ = ['import_csv_to_table', 'import_parquet_to_table']
@@ -0,0 +1,123 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """AWS S3 Tables MCP Server file processing module.
16
+
17
+ This module provides functionality for processing and analyzing uploaded files,
18
+ particularly focusing on CSV file handling and import capabilities.
19
+ """
20
+
21
+ import io
22
+ import os
23
+ import pyarrow.csv as pc
24
+ from ..utils import get_s3_client, pyiceberg_load_catalog
25
+ from pyiceberg.exceptions import NoSuchTableError
26
+ from typing import Dict
27
+ from urllib.parse import urlparse
28
+
29
+
30
+ async def import_csv_to_table(
31
+ warehouse: str,
32
+ region: str,
33
+ namespace: str,
34
+ table_name: str,
35
+ s3_url: str,
36
+ uri: str,
37
+ catalog_name: str = 's3tablescatalog',
38
+ rest_signing_name: str = 's3tables',
39
+ rest_sigv4_enabled: str = 'true',
40
+ ) -> Dict:
41
+ """Import data from a CSV file into an S3 table.
42
+
43
+ This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
44
+ If the table doesn't exist, it will be created using the schema inferred from the CSV file.
45
+
46
+ Args:
47
+ warehouse: Warehouse string for Iceberg catalog
48
+ region: AWS region for S3Tables/Iceberg REST endpoint
49
+ namespace: The namespace containing the table
50
+ table_name: The name of the table to import data into
51
+ s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
52
+ uri: REST URI for Iceberg catalog
53
+ catalog_name: Catalog name
54
+ rest_signing_name: REST signing name
55
+ rest_sigv4_enabled: Enable SigV4 signing
56
+
57
+ Returns:
58
+ A dictionary containing:
59
+ - status: 'success' or 'error'
60
+ - message: Success message or error details
61
+ - rows_processed: Number of rows processed (on success)
62
+ - file_processed: Name of the processed file
63
+ - table_created: Boolean indicating if a new table was created (on success)
64
+ """
65
+ # Parse S3 URL
66
+ parsed = urlparse(s3_url)
67
+ bucket = parsed.netloc
68
+ key = parsed.path.lstrip('/')
69
+
70
+ try:
71
+ # Load Iceberg catalog
72
+ catalog = pyiceberg_load_catalog(
73
+ catalog_name,
74
+ warehouse,
75
+ uri,
76
+ region,
77
+ rest_signing_name,
78
+ rest_sigv4_enabled,
79
+ )
80
+
81
+ # Get S3 client and read the CSV file to infer schema
82
+ s3_client = get_s3_client()
83
+ response = s3_client.get_object(Bucket=bucket, Key=key)
84
+ csv_data = response['Body'].read()
85
+
86
+ # Read CSV file into PyArrow Table to infer schema
87
+ # Convert bytes to file-like object for PyArrow
88
+ csv_buffer = io.BytesIO(csv_data)
89
+ csv_table = pc.read_csv(csv_buffer)
90
+ csv_schema = csv_table.schema
91
+
92
+ table_created = False
93
+ try:
94
+ # Try to load existing table
95
+ table = catalog.load_table(f'{namespace}.{table_name}')
96
+ except NoSuchTableError:
97
+ # Table doesn't exist, create it using the CSV schema
98
+ try:
99
+ table = catalog.create_table(
100
+ identifier=f'{namespace}.{table_name}',
101
+ schema=csv_schema,
102
+ )
103
+ table_created = True
104
+ except Exception as create_error:
105
+ return {
106
+ 'status': 'error',
107
+ 'error': f'Failed to create table: {str(create_error)}',
108
+ }
109
+
110
+ # Append data to Iceberg table
111
+ table.append(csv_table)
112
+
113
+ return {
114
+ 'status': 'success',
115
+ 'message': f'Successfully imported {csv_table.num_rows} rows{" and created new table" if table_created else ""}',
116
+ 'rows_processed': csv_table.num_rows,
117
+ 'file_processed': os.path.basename(key),
118
+ 'table_created': table_created,
119
+ 'table_uuid': table.metadata.table_uuid,
120
+ }
121
+
122
+ except Exception as e:
123
+ return {'status': 'error', 'error': str(e)}
@@ -0,0 +1,116 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pyarrow.parquet as pq
16
+ from awslabs.s3_tables_mcp_server.utils import get_s3_client, pyiceberg_load_catalog
17
+ from io import BytesIO
18
+ from pyiceberg.exceptions import NoSuchTableError
19
+ from typing import Dict
20
+
21
+
22
+ async def import_parquet_to_table(
23
+ warehouse: str,
24
+ region: str,
25
+ namespace: str,
26
+ table_name: str,
27
+ s3_url: str,
28
+ uri: str,
29
+ catalog_name: str = 's3tablescatalog',
30
+ rest_signing_name: str = 's3tables',
31
+ rest_sigv4_enabled: str = 'true',
32
+ ) -> Dict:
33
+ """Import data from a Parquet file into an S3 table.
34
+
35
+ This function reads data from a Parquet file stored in S3 and imports it into an existing Iceberg table.
36
+ If the table doesn't exist, it will be created using the schema from the Parquet file.
37
+
38
+ Args:
39
+ warehouse: Warehouse string for Iceberg catalog
40
+ region: AWS region for S3Tables/Iceberg REST endpoint
41
+ namespace: The namespace containing the table
42
+ table_name: The name of the table to import data into
43
+ s3_url: The S3 URL of the Parquet file
44
+ uri: REST URI for Iceberg catalog
45
+ catalog_name: Catalog name
46
+ rest_signing_name: REST signing name
47
+ rest_sigv4_enabled: Enable SigV4 signing
48
+
49
+ Returns:
50
+ A dictionary containing:
51
+ - status: 'success' or 'error'
52
+ - message: Success message or error details
53
+ - rows_processed: Number of rows processed (on success)
54
+ - file_processed: Name of the processed file
55
+ - table_created: Boolean indicating if a new table was created (on success)
56
+ """
57
+ import os
58
+ from urllib.parse import urlparse
59
+
60
+ # Parse S3 URL
61
+ parsed = urlparse(s3_url)
62
+ bucket = parsed.netloc
63
+ key = parsed.path.lstrip('/')
64
+
65
+ try:
66
+ # Load Iceberg catalog
67
+ catalog = pyiceberg_load_catalog(
68
+ catalog_name,
69
+ warehouse,
70
+ uri,
71
+ region,
72
+ rest_signing_name,
73
+ rest_sigv4_enabled,
74
+ )
75
+
76
+ # Get S3 client and read the Parquet file first to get the schema
77
+ s3_client = get_s3_client()
78
+ response = s3_client.get_object(Bucket=bucket, Key=key)
79
+ parquet_data = BytesIO(response['Body'].read())
80
+
81
+ # Read Parquet file into PyArrow Table
82
+ parquet_table = pq.read_table(parquet_data)
83
+ parquet_schema = parquet_table.schema
84
+
85
+ table_created = False
86
+ try:
87
+ # Try to load existing table
88
+ table = catalog.load_table(f'{namespace}.{table_name}')
89
+ except NoSuchTableError:
90
+ # Table doesn't exist, create it using the Parquet schema
91
+ try:
92
+ table = catalog.create_table(
93
+ identifier=f'{namespace}.{table_name}',
94
+ schema=parquet_schema,
95
+ )
96
+ table_created = True
97
+ except Exception as create_error:
98
+ return {
99
+ 'status': 'error',
100
+ 'error': f'Failed to create table: {str(create_error)}',
101
+ }
102
+
103
+ # Append data to Iceberg table
104
+ table.append(parquet_table)
105
+
106
+ return {
107
+ 'status': 'success',
108
+ 'message': f'Successfully imported {parquet_table.num_rows} rows{" and created new table" if table_created else ""}',
109
+ 'rows_processed': parquet_table.num_rows,
110
+ 'file_processed': os.path.basename(key),
111
+ 'table_created': table_created,
112
+ 'table_uuid': table.metadata.table_uuid,
113
+ }
114
+
115
+ except Exception as e:
116
+ return {'status': 'error', 'error': str(e)}
@@ -32,7 +32,6 @@ from .utils import set_user_agent_mode
32
32
  from awslabs.s3_tables_mcp_server import (
33
33
  __version__,
34
34
  database,
35
- file_processor,
36
35
  namespaces,
37
36
  resources,
38
37
  s3_operations,
@@ -48,6 +47,12 @@ from awslabs.s3_tables_mcp_server.constants import (
48
47
  TABLE_BUCKET_NAME_PATTERN,
49
48
  TABLE_NAME_FIELD,
50
49
  )
50
+ from awslabs.s3_tables_mcp_server.file_processor import (
51
+ import_csv_to_table as import_csv_to_table_func,
52
+ )
53
+ from awslabs.s3_tables_mcp_server.file_processor import (
54
+ import_parquet_to_table as import_parquet_to_table_func,
55
+ )
51
56
  from datetime import datetime, timezone
52
57
  from mcp.server.fastmcp import FastMCP
53
58
  from pydantic import Field
@@ -567,32 +572,75 @@ async def query_database(
567
572
 
568
573
  @app.tool()
569
574
  @log_tool_call_with_response
570
- async def preview_csv_file(
575
+ @write_operation
576
+ async def import_csv_to_table(
577
+ warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
578
+ region: Annotated[
579
+ str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
580
+ ],
581
+ namespace: Annotated[str, NAMESPACE_NAME_FIELD],
582
+ table_name: Annotated[str, TABLE_NAME_FIELD],
571
583
  s3_url: Annotated[str, S3_URL_FIELD],
584
+ uri: Annotated[str, Field(..., description='REST URI for Iceberg catalog')],
585
+ catalog_name: Annotated[
586
+ str, Field('s3tablescatalog', description='Catalog name')
587
+ ] = 's3tablescatalog',
588
+ rest_signing_name: Annotated[
589
+ str, Field('s3tables', description='REST signing name')
590
+ ] = 's3tables',
591
+ rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
572
592
  ) -> dict:
573
- """Preview the structure of a CSV file stored in S3.
593
+ """Import data from a CSV file into an S3 table.
574
594
 
575
- This tool provides a quick preview of a CSV file's structure by reading
576
- only the headers and first row of data from an S3 location. It's useful for
577
- understanding the schema and data format without downloading the entire file.
578
- It can be used before creating an s3 table from a csv file to get the schema and data format.
595
+ This tool reads data from a CSV file stored in S3 and imports it into an S3 table.
596
+ If the table doesn't exist, it will be created with a schema inferred from the CSV file.
597
+ If the table exists, the CSV file schema must be compatible with the table's schema.
598
+ The tool will validate the schema before attempting to import the data.
579
599
 
580
600
  Returns error dictionary with status and error message if:
581
601
  - URL is not a valid S3 URL
582
602
  - File is not a CSV file
583
603
  - File cannot be accessed
604
+ - Table does not exist
605
+ - CSV headers don't match table schema
584
606
  - Any other error occurs
585
607
 
608
+ Example input values:
609
+ warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
610
+ region: 'us-west-2'
611
+ namespace: 'retail_data'
612
+ table_name: 'customers'
613
+ s3_url: 's3://bucket-name/path/to/file.csv'
614
+ uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
615
+ catalog_name: 's3tablescatalog'
616
+ rest_signing_name: 's3tables'
617
+ rest_sigv4_enabled: 'true'
618
+
586
619
  Permissions:
587
- You must have the s3:GetObject permission for the S3 bucket and key.
620
+ You must have:
621
+ - s3:GetObject permission for the CSV file
622
+ - s3tables:GetTable and s3tables:GetTables permissions to access table information
623
+ - s3tables:PutTableData permission to write to the table
588
624
  """
589
- return file_processor.preview_csv_structure(s3_url)
625
+ if uri is None:
626
+ uri = _default_uri_for_region(region)
627
+ return await import_csv_to_table_func(
628
+ warehouse=warehouse,
629
+ region=region,
630
+ namespace=namespace,
631
+ table_name=table_name,
632
+ s3_url=s3_url,
633
+ uri=uri,
634
+ catalog_name=catalog_name,
635
+ rest_signing_name=rest_signing_name,
636
+ rest_sigv4_enabled=rest_sigv4_enabled,
637
+ )
590
638
 
591
639
 
592
640
  @app.tool()
593
641
  @log_tool_call_with_response
594
642
  @write_operation
595
- async def import_csv_to_table(
643
+ async def import_parquet_to_table(
596
644
  warehouse: Annotated[str, Field(..., description='Warehouse string for Iceberg catalog')],
597
645
  region: Annotated[
598
646
  str, Field(..., description='AWS region for S3Tables/Iceberg REST endpoint')
@@ -609,29 +657,33 @@ async def import_csv_to_table(
609
657
  ] = 's3tables',
610
658
  rest_sigv4_enabled: Annotated[str, Field('true', description='Enable SigV4 signing')] = 'true',
611
659
  ) -> dict:
612
- """Import data from a CSV file into an S3 table.
660
+ """Import data from a Parquet file into an S3 table.
613
661
 
614
- This tool reads data from a CSV file stored in S3 and imports it into an existing S3 table.
615
- The CSV file must have headers that match the table's schema. The tool will validate the CSV structure
616
- before attempting to import the data.
617
-
618
- To create a table, first use the preview_csv_file tool to get the schema and data format.
619
- Then use the create_table tool to create the table.
662
+ This tool reads data from a Parquet file stored in S3 and imports it into an S3 table.
663
+ If the table doesn't exist, it will be created with a schema inferred from the Parquet file.
664
+ If the table exists, the Parquet file schema must be compatible with the table's schema.
665
+ The tool will validate the schema before attempting to import the data.
620
666
 
621
667
  Returns error dictionary with status and error message if:
622
668
  - URL is not a valid S3 URL
623
- - File is not a CSV file
669
+ - File is not a Parquet file
624
670
  - File cannot be accessed
625
- - Table does not exist
626
- - CSV headers don't match table schema
671
+ - Parquet schema is incompatible with existing table schema
627
672
  - Any other error occurs
628
673
 
674
+ Returns success dictionary with:
675
+ - status: 'success'
676
+ - message: Success message with row count
677
+ - rows_processed: Number of rows imported
678
+ - file_processed: Name of the processed file
679
+ - table_created: True if a new table was created
680
+
629
681
  Example input values:
630
682
  warehouse: 'arn:aws:s3tables:<Region>:<accountID>:bucket/<bucketname>'
631
683
  region: 'us-west-2'
632
684
  namespace: 'retail_data'
633
685
  table_name: 'customers'
634
- s3_url: 's3://bucket-name/path/to/file.csv'
686
+ s3_url: 's3://bucket-name/path/to/file.parquet'
635
687
  uri: 'https://s3tables.us-west-2.amazonaws.com/iceberg'
636
688
  catalog_name: 's3tablescatalog'
637
689
  rest_signing_name: 's3tables'
@@ -639,14 +691,14 @@ async def import_csv_to_table(
639
691
 
640
692
  Permissions:
641
693
  You must have:
642
- - s3:GetObject permission for the CSV file
643
- - s3tables:GetDatabase and s3tables:GetDatabases permissions to access database information
694
+ - s3:GetObject permission for the Parquet file
644
695
  - s3tables:GetTable and s3tables:GetTables permissions to access table information
645
696
  - s3tables:PutTableData permission to write to the table
697
+ - s3tables:CreateTable permission (if table doesn't exist)
646
698
  """
647
699
  if uri is None:
648
700
  uri = _default_uri_for_region(region)
649
- return await file_processor.import_csv_to_table(
701
+ return await import_parquet_to_table_func(
650
702
  warehouse=warehouse,
651
703
  region=region,
652
704
  namespace=namespace,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: awslabs.s3-tables-mcp-server
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: An AWS Labs Model Context Protocol (MCP) server for awslabs.s3-tables-mcp-server
5
5
  Project-URL: homepage, https://awslabs.github.io/mcp/
6
6
  Project-URL: docs, https://awslabs.github.io/mcp/servers/s3-tables-mcp-server/
@@ -1,21 +1,23 @@
1
1
  awslabs/__init__.py,sha256=BHp8_uaBohApyLlmVWvYVe5bSrH59FvLJ5cNTigMV_8,644
2
- awslabs/s3_tables_mcp_server/__init__.py,sha256=tAFjxXc05WBoVKOK38ijpQ0TqVAFS0h4gfiiPe8TeHo,754
2
+ awslabs/s3_tables_mcp_server/__init__.py,sha256=T0dJ6usOanvF7UeP7NAt_YhLunvmWkSXvnjOvbL467Y,754
3
3
  awslabs/s3_tables_mcp_server/constants.py,sha256=qCWY9A9PAQXdVz-anO26zbQ72Dp79nGM7xeLR062a_o,4971
4
4
  awslabs/s3_tables_mcp_server/database.py,sha256=YorxcSx-9typfQ5W_LzwNPZkP47u__QSLJlp0fBsZLg,3851
5
- awslabs/s3_tables_mcp_server/file_processor.py,sha256=BZR-yMFoB4NKJb1hzD3pYT0ziLS4QiEB5iLWSfDju1U,17031
6
5
  awslabs/s3_tables_mcp_server/models.py,sha256=zWTFJLBhIZRLEgOCTyNcGvbItxqYbFJKH6se1EzXDjY,8097
7
6
  awslabs/s3_tables_mcp_server/namespaces.py,sha256=KZqxJiEnlpxkqvbfygezbr0szwyDP2O0J6osyiPUzwg,2071
8
7
  awslabs/s3_tables_mcp_server/resources.py,sha256=PXZo0sTVn34tXJ4mlw_OS90p12SNoLZs4Re0gV815wk,8281
9
8
  awslabs/s3_tables_mcp_server/s3_operations.py,sha256=Zq3oe-uHuKbW87b_WQyM-6HZ0_ikbgiagb2SVesltdg,1656
10
- awslabs/s3_tables_mcp_server/server.py,sha256=kfVz0oHhS3S4_LApxVPpospIkqfin8TlGMf9J7nMVmo,29842
9
+ awslabs/s3_tables_mcp_server/server.py,sha256=cvXDTZuK1sGpYfjLbF6iLGe49BSA0yx4rSp73UEBcvE,32008
11
10
  awslabs/s3_tables_mcp_server/table_buckets.py,sha256=JHmpB_P9h0Hz5Uis25_GPTD1G-mIODVwjaswwIGyCS4,4471
12
11
  awslabs/s3_tables_mcp_server/tables.py,sha256=ITnRDHHrtRWLsRhff4TP4B7gGT_jRXy994oxK3x10a4,10143
13
12
  awslabs/s3_tables_mcp_server/utils.py,sha256=SReyS3KsdikI9ycL5RsvtVI7MiRnA1W9bTiXGKf1lHc,4517
14
13
  awslabs/s3_tables_mcp_server/engines/__init__.py,sha256=O4wlFva3THWmjfaXfJAwi29mxJSKIhM0jcebVfd3S5U,615
15
- awslabs/s3_tables_mcp_server/engines/pyiceberg.py,sha256=9D9xN1BMOpdNCBNZ2TnuR88kGodURHG6HOweM-oP918,9299
16
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/METADATA,sha256=XU8CVGfDUURiNbT99jV6gAiT33B6TDGUVUj6tYKusrc,11511
17
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/entry_points.txt,sha256=WRA45Bi2dVY5hskxkka_e7BAGRqG1KiW3ImTBnHSyLs,90
19
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
20
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/licenses/NOTICE,sha256=jm-1A_8i-wl7KYs2Ynj2A29vXWJCMKLHmGfy4P_B51Y,96
21
- awslabs_s3_tables_mcp_server-0.0.2.dist-info/RECORD,,
14
+ awslabs/s3_tables_mcp_server/engines/pyiceberg.py,sha256=wzkySQZgx7L8Fn2Oqk8Yz4V-hQZDUempd8q0IwvCY_4,5784
15
+ awslabs/s3_tables_mcp_server/file_processor/__init__.py,sha256=8PeggFRY3ZKBdxcFPEqSSHkSJBZ57eOs-z0fqkMHn9E,978
16
+ awslabs/s3_tables_mcp_server/file_processor/csv.py,sha256=Sngc5mfJDLxQaINBUJLBn5OLc842rv9FqqcJ1upK6iw,4406
17
+ awslabs/s3_tables_mcp_server/file_processor/parquet.py,sha256=Lr7mtqsK9jqlWokQv74dgdEgYmNKlCJ869yNNMrm69o,4189
18
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/METADATA,sha256=DYOFGTR6IgR7l2ZciRbZWF4yjQ5FSTL8ilpBdmKMHFY,11511
19
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/entry_points.txt,sha256=WRA45Bi2dVY5hskxkka_e7BAGRqG1KiW3ImTBnHSyLs,90
21
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
22
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/licenses/NOTICE,sha256=jm-1A_8i-wl7KYs2Ynj2A29vXWJCMKLHmGfy4P_B51Y,96
23
+ awslabs_s3_tables_mcp_server-0.0.3.dist-info/RECORD,,
@@ -1,485 +0,0 @@
1
- # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- """AWS S3 Tables MCP Server file processing module.
16
-
17
- This module provides functionality for processing and analyzing uploaded files,
18
- particularly focusing on CSV file handling and preview capabilities.
19
- """
20
-
21
- import csv
22
- import os
23
- import pyarrow as pa
24
- import re
25
- import uuid
26
- from .utils import get_s3_client, pyiceberg_load_catalog
27
- from datetime import date, datetime, time
28
- from decimal import Decimal
29
- from io import StringIO
30
- from pyiceberg.types import (
31
- BinaryType,
32
- BooleanType,
33
- DateType,
34
- DecimalType,
35
- DoubleType,
36
- FixedType,
37
- FloatType,
38
- IntegerType,
39
- ListType,
40
- LongType,
41
- MapType,
42
- StringType,
43
- StructType,
44
- TimestampType,
45
- TimestamptzType,
46
- TimeType,
47
- UUIDType,
48
- )
49
- from typing import Dict, List, Optional
50
- from urllib.parse import urlparse
51
-
52
-
53
- def validate_s3_url(s3_url: str) -> tuple[bool, Optional[str], Optional[str], Optional[str]]:
54
- """Validate an S3 URL and extract its components.
55
-
56
- Args:
57
- s3_url: The S3 URL to validate (format: s3://bucket-name/key)
58
-
59
- Returns:
60
- Tuple containing:
61
- - bool: Whether the URL is valid
62
- - str: Error message if invalid, None if valid
63
- - str: Bucket name if valid, None if invalid
64
- - str: Object key if valid, None if invalid
65
- """
66
- try:
67
- parsed = urlparse(s3_url)
68
- if parsed.scheme != 's3':
69
- return False, f"Invalid URL scheme: {parsed.scheme}. Must be 's3://'", None, None
70
-
71
- if not parsed.netloc:
72
- return False, 'Missing bucket name in S3 URL', None, None
73
-
74
- bucket = parsed.netloc
75
- key = parsed.path.lstrip('/')
76
-
77
- if not key:
78
- return False, 'Missing object key in S3 URL', None, None
79
-
80
- return True, None, bucket, key
81
- except Exception as e:
82
- return False, f'Error parsing S3 URL: {str(e)}', None, None
83
-
84
-
85
- def preview_csv_structure(s3_url: str) -> Dict:
86
- """Preview the structure of a CSV file stored in S3 by reading its headers and first row.
87
-
88
- This function provides a quick preview of a CSV file's structure by reading
89
- only the headers and first row of data from an S3 location. It's useful for
90
- understanding the schema and data format without downloading the entire file.
91
-
92
- Args:
93
- s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
94
-
95
- Returns:
96
- A dictionary containing:
97
- - headers: List of column names from the first row
98
- - first_row: Dictionary mapping column names to their values from the first data row (empty if no data)
99
- - total_columns: Number of columns in the CSV
100
- - file_name: Name of the CSV file
101
-
102
- Returns error dictionary with status and error message if:
103
- - URL is not a valid S3 URL
104
- - File is not a CSV file
105
- - File cannot be accessed
106
- - Any other error occurs
107
- """
108
- try:
109
- # Validate S3 URL
110
- is_valid, error_msg, bucket, key = validate_s3_url(s3_url)
111
- if not is_valid:
112
- return {'status': 'error', 'error': error_msg}
113
-
114
- # At this point, bucket and key are guaranteed to be non-None strings
115
- if bucket is None or key is None:
116
- return {'status': 'error', 'error': 'Invalid S3 URL: bucket or key is None'}
117
-
118
- # Check if file has .csv extension
119
- if not key.lower().endswith('.csv'):
120
- return {
121
- 'status': 'error',
122
- 'error': f'File {key} is not a CSV file. Only .csv files are supported.',
123
- }
124
-
125
- # Get S3 client
126
- s3_client = get_s3_client()
127
-
128
- # Get the object from S3, only downloading first 8KB (should be enough for headers and first row)
129
- response = s3_client.get_object(
130
- Bucket=bucket,
131
- Key=key,
132
- Range='bytes=0-32768', # First 32KB
133
- )
134
-
135
- # Read the CSV content
136
- csv_content = response['Body'].read().decode('utf-8')
137
-
138
- # Split content into lines
139
- lines = csv_content.splitlines()
140
- if not lines:
141
- return {'status': 'error', 'error': 'File is empty'}
142
-
143
- # Parse the headers
144
- headers = next(csv.reader([lines[0]]), [])
145
-
146
- # Try to get first row if it exists
147
- first_row = next(csv.reader([lines[1]]), []) if len(lines) > 1 else []
148
-
149
- # Create a dictionary mapping headers to first row values
150
- first_row_dict = dict(zip(headers, first_row)) if headers and first_row else {}
151
-
152
- return {
153
- 'headers': headers,
154
- 'first_row': first_row_dict,
155
- 'total_columns': len(headers),
156
- 'file_name': os.path.basename(key),
157
- }
158
- except Exception as e:
159
- return {'status': 'error', 'error': str(e)}
160
-
161
-
162
- def convert_value(value: Optional[str], iceberg_type):
163
- """Convert a string value to the appropriate type based on Iceberg schema type.
164
-
165
- Args:
166
- value: The string value to convert (can be None)
167
- iceberg_type: The Iceberg type to convert to
168
-
169
- Returns:
170
- The converted value of the appropriate type
171
-
172
- Raises:
173
- ValueError: If the value cannot be converted to the target type
174
- NotImplementedError: For unsupported complex types
175
- """
176
- if value is None or value == '':
177
- return None
178
-
179
- if isinstance(iceberg_type, BooleanType):
180
- return value.lower() in ('true', '1', 'yes')
181
-
182
- elif isinstance(iceberg_type, IntegerType):
183
- return int(value)
184
-
185
- elif isinstance(iceberg_type, LongType):
186
- return int(value)
187
-
188
- elif isinstance(iceberg_type, FloatType):
189
- return float(value)
190
-
191
- elif isinstance(iceberg_type, DoubleType):
192
- return float(value)
193
-
194
- elif isinstance(iceberg_type, DecimalType):
195
- return Decimal(value)
196
-
197
- elif isinstance(iceberg_type, DateType):
198
- return date.fromisoformat(value)
199
-
200
- elif isinstance(iceberg_type, TimeType):
201
- return time.fromisoformat(value)
202
-
203
- elif isinstance(iceberg_type, TimestampType):
204
- return datetime.fromisoformat(value)
205
-
206
- elif isinstance(iceberg_type, TimestamptzType):
207
- return datetime.fromisoformat(value) # Ensure it's tz-aware if needed
208
-
209
- elif isinstance(iceberg_type, StringType):
210
- return str(value)
211
-
212
- elif isinstance(iceberg_type, UUIDType):
213
- return uuid.UUID(value)
214
-
215
- elif isinstance(iceberg_type, BinaryType) or isinstance(iceberg_type, FixedType):
216
- return bytes.fromhex(value)
217
-
218
- elif isinstance(iceberg_type, ListType):
219
- # naive split for example; you'd want better parsing logic
220
- return [convert_value(v.strip(), iceberg_type.element_type) for v in value.split(',')]
221
-
222
- elif isinstance(iceberg_type, MapType):
223
- # naive: "key1:value1,key2:value2"
224
- return {
225
- k.strip(): convert_value(v.strip(), iceberg_type.value_type)
226
- for k, v in (item.split(':') for item in value.split(','))
227
- }
228
-
229
- elif isinstance(iceberg_type, StructType):
230
- raise NotImplementedError('Nested structs need structured input like JSON or dict.')
231
-
232
- else:
233
- raise ValueError(f'Unsupported Iceberg type: {iceberg_type}')
234
-
235
-
236
- def create_pyarrow_schema_from_iceberg(schema) -> pa.Schema:
237
- """Create a PyArrow schema from an Iceberg schema, supporting basic types and decimals."""
238
-
239
- def convert_iceberg_type_to_pyarrow(iceberg_type_str: str):
240
- """Convert an Iceberg type string to a PyArrow type."""
241
- iceberg_type_str = iceberg_type_str.lower()
242
-
243
- if iceberg_type_str == 'boolean':
244
- return pa.bool_()
245
- elif iceberg_type_str == 'int':
246
- return pa.int32()
247
- elif iceberg_type_str == 'long':
248
- return pa.int64()
249
- elif iceberg_type_str == 'float':
250
- return pa.float32()
251
- elif iceberg_type_str == 'double':
252
- return pa.float64()
253
- elif iceberg_type_str == 'date':
254
- return pa.date32()
255
- elif iceberg_type_str == 'time':
256
- return pa.time64('us')
257
- elif iceberg_type_str == 'timestamp':
258
- return pa.timestamp('us')
259
- elif iceberg_type_str == 'timestamptz':
260
- return pa.timestamp('us', tz='UTC')
261
- elif iceberg_type_str == 'string':
262
- return pa.string()
263
- elif iceberg_type_str == 'uuid':
264
- return pa.string()
265
- elif iceberg_type_str == 'binary':
266
- return pa.binary()
267
- elif iceberg_type_str.startswith('fixed'):
268
- size_match = re.match(r'fixed\((\d+)\)', iceberg_type_str)
269
- return pa.binary(int(size_match.group(1))) if size_match else pa.binary()
270
- elif iceberg_type_str.startswith('decimal'):
271
- decimal_match = re.match(r'decimal\((\d+),\s*(\d+)\)', iceberg_type_str)
272
- if decimal_match:
273
- precision = int(decimal_match.group(1))
274
- scale = int(decimal_match.group(2))
275
- if precision <= 18:
276
- return pa.decimal128(
277
- precision, scale
278
- ) # Will use INT64 encoding for small precision
279
- else:
280
- return pa.decimal256(precision, scale) # For large precision decimals
281
- else:
282
- raise ValueError(f'Invalid decimal type format: {iceberg_type_str}')
283
- else:
284
- raise ValueError(f'Unsupported Iceberg type: {iceberg_type_str}')
285
-
286
- # Build PyArrow schema
287
- pa_fields = []
288
- for field in schema.fields:
289
- name = field.name
290
- iceberg_type_str = str(field.field_type)
291
- try:
292
- pa_type = convert_iceberg_type_to_pyarrow(iceberg_type_str)
293
- except ValueError as e:
294
- raise ValueError(f"Error in field '{name}': {e}")
295
-
296
- pa_fields.append(pa.field(name, pa_type, nullable=not field.required))
297
-
298
- return pa.schema(pa_fields)
299
-
300
-
301
- def process_chunk(chunk: List[Dict], table, chunk_name: str = 'Chunk') -> Dict:
302
- """Process a chunk of data by converting it to a PyArrow table and appending to the table.
303
-
304
- Args:
305
- chunk: List of dictionaries representing the data rows
306
- table: The Iceberg table to append data to
307
- chunk_name: Name identifier for the chunk (for logging purposes)
308
-
309
- Returns:
310
- Dictionary with status and message
311
- """
312
- try:
313
- # Get the Iceberg schema and create PyArrow schema
314
- schema = table.schema()
315
- pyarrow_schema = create_pyarrow_schema_from_iceberg(schema)
316
-
317
- # Convert list of dictionaries to PyArrow table with proper schema
318
- table_data = pa.Table.from_pylist(chunk, schema=pyarrow_schema)
319
-
320
- table.append(table_data)
321
-
322
- return {
323
- 'status': 'success',
324
- 'message': f'Successfully processed {len(chunk)} rows in {chunk_name.lower()}',
325
- }
326
-
327
- except Exception as e:
328
- return {'status': 'error', 'error': f'Error inserting {chunk_name.lower()}: {str(e)}'}
329
-
330
-
331
- async def import_csv_to_table(
332
- warehouse: str,
333
- region: str,
334
- namespace: str,
335
- table_name: str,
336
- s3_url: str,
337
- uri: str = 'https://s3tables.us-west-2.amazonaws.com/iceberg',
338
- catalog_name: str = 's3tablescatalog',
339
- rest_signing_name: str = 's3tables',
340
- rest_sigv4_enabled: str = 'true',
341
- ) -> Dict:
342
- """Import data from a CSV file into an S3 table.
343
-
344
- This function reads data from a CSV file stored in S3 and imports it into an existing S3 table.
345
- The CSV file must have headers that match the table's schema. The function will validate the CSV structure
346
- before attempting to import the data.
347
-
348
- Args:
349
- warehouse: Warehouse string for Iceberg catalog
350
- region: AWS region for S3Tables/Iceberg REST endpoint
351
- namespace: The namespace containing the table
352
- table_name: The name of the table to import data into
353
- s3_url: The S3 URL of the CSV file (format: s3://bucket-name/key)
354
- uri: REST URI for Iceberg catalog
355
- catalog_name: Catalog name
356
- rest_signing_name: REST signing name
357
- rest_sigv4_enabled: Enable SigV4 signing
358
-
359
- Returns:
360
- A dictionary containing:
361
- - status: 'success' or 'error'
362
- - message: Success message or error details
363
- - rows_processed: Number of rows processed (on success)
364
- - file_processed: Name of the processed file
365
- - csv_headers: List of CSV headers
366
-
367
- Returns error dictionary with status and error message if:
368
- - URL is not a valid S3 URL
369
- - File is not a CSV file
370
- - File cannot be accessed
371
- - Table does not exist
372
- - CSV headers don't match table schema
373
- - Any other error occurs
374
- """
375
- # Validate S3 URL
376
- is_valid, error_msg, bucket, key = validate_s3_url(s3_url)
377
- if not is_valid:
378
- return {'status': 'error', 'error': error_msg}
379
-
380
- if bucket is None or key is None:
381
- return {'status': 'error', 'error': 'Invalid S3 URL: bucket or key is None'}
382
-
383
- if not key.lower().endswith('.csv'):
384
- return {
385
- 'status': 'error',
386
- 'error': f'File {key} is not a CSV file. Only .csv files are supported.',
387
- }
388
-
389
- try:
390
- # Load catalog using provided parameters (see pyiceberg.py style)
391
- catalog = pyiceberg_load_catalog(
392
- catalog_name,
393
- warehouse,
394
- uri,
395
- region,
396
- rest_signing_name,
397
- rest_sigv4_enabled,
398
- )
399
-
400
- # Load existing table
401
- table = catalog.load_table(f'{namespace}.{table_name}')
402
-
403
- # Get schema information
404
- schema = table.schema()
405
-
406
- # Get S3 client
407
- s3_client = get_s3_client()
408
-
409
- # Get the CSV file from S3
410
- response = s3_client.get_object(Bucket=bucket, Key=key)
411
- csv_content = response['Body'].read().decode('utf-8')
412
-
413
- # Read CSV content
414
- csv_reader = csv.DictReader(StringIO(csv_content))
415
-
416
- # Validate headers against schema
417
- csv_headers = csv_reader.fieldnames
418
- schema_field_names = {field.name for field in schema.fields}
419
-
420
- if not csv_headers:
421
- return {'status': 'error', 'error': 'CSV file has no headers'}
422
-
423
- missing_columns = schema_field_names - set(csv_headers)
424
- if missing_columns:
425
- return {
426
- 'status': 'error',
427
- 'error': f'CSV is missing required columns: {", ".join(missing_columns)}',
428
- }
429
-
430
- # Process rows in chunks
431
- chunk_size = 5000
432
- rows_processed = 0
433
- current_chunk = []
434
-
435
- for row in csv_reader:
436
- # Transform row data according to schema types
437
- transformed_row = {}
438
- for field in schema.fields:
439
- value = row.get(field.name)
440
-
441
- # Handle required fields
442
- if field.required and (value is None or value == ''):
443
- return {
444
- 'status': 'error',
445
- 'error': f'Required field {field.name} is missing or empty in row {rows_processed + 1}',
446
- }
447
-
448
- # Transform value based on field type
449
- try:
450
- if value is None or value == '':
451
- transformed_row[field.name] = None
452
- else:
453
- transformed_row[field.name] = convert_value(value, field.field_type)
454
- except (ValueError, TypeError) as e:
455
- return {
456
- 'status': 'error',
457
- 'error': f'Error converting value for field {field.name} in row {rows_processed + 1}: {str(e)}',
458
- }
459
-
460
- current_chunk.append(transformed_row)
461
- rows_processed += 1
462
-
463
- # Process chunk when it reaches the chunk size
464
- if len(current_chunk) >= chunk_size:
465
- result = process_chunk(current_chunk, table, 'Chunk')
466
- if result['status'] == 'error':
467
- return result
468
- current_chunk = []
469
-
470
- # Process any remaining rows
471
- if current_chunk:
472
- result = process_chunk(current_chunk, table, 'Final Chunk')
473
- if result['status'] == 'error':
474
- return result
475
-
476
- return {
477
- 'status': 'success',
478
- 'message': f'Successfully processed {rows_processed} rows',
479
- 'rows_processed': rows_processed,
480
- 'file_processed': os.path.basename(key),
481
- 'csv_headers': csv_headers,
482
- }
483
-
484
- except Exception as e:
485
- return {'status': 'error', 'error': str(e)}