daita-agents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of daita-agents might be problematic. Click here for more details.
- daita/__init__.py +208 -0
- daita/agents/__init__.py +33 -0
- daita/agents/base.py +722 -0
- daita/agents/substrate.py +895 -0
- daita/cli/__init__.py +145 -0
- daita/cli/__main__.py +7 -0
- daita/cli/ascii_art.py +44 -0
- daita/cli/core/__init__.py +0 -0
- daita/cli/core/create.py +254 -0
- daita/cli/core/deploy.py +473 -0
- daita/cli/core/deployments.py +309 -0
- daita/cli/core/import_detector.py +219 -0
- daita/cli/core/init.py +382 -0
- daita/cli/core/logs.py +239 -0
- daita/cli/core/managed_deploy.py +709 -0
- daita/cli/core/run.py +648 -0
- daita/cli/core/status.py +421 -0
- daita/cli/core/test.py +239 -0
- daita/cli/core/webhooks.py +172 -0
- daita/cli/main.py +588 -0
- daita/cli/utils.py +541 -0
- daita/config/__init__.py +62 -0
- daita/config/base.py +159 -0
- daita/config/settings.py +184 -0
- daita/core/__init__.py +262 -0
- daita/core/decision_tracing.py +701 -0
- daita/core/exceptions.py +480 -0
- daita/core/focus.py +251 -0
- daita/core/interfaces.py +76 -0
- daita/core/plugin_tracing.py +550 -0
- daita/core/relay.py +695 -0
- daita/core/reliability.py +381 -0
- daita/core/scaling.py +444 -0
- daita/core/tools.py +402 -0
- daita/core/tracing.py +770 -0
- daita/core/workflow.py +1084 -0
- daita/display/__init__.py +1 -0
- daita/display/console.py +160 -0
- daita/execution/__init__.py +58 -0
- daita/execution/client.py +856 -0
- daita/execution/exceptions.py +92 -0
- daita/execution/models.py +317 -0
- daita/llm/__init__.py +60 -0
- daita/llm/anthropic.py +166 -0
- daita/llm/base.py +373 -0
- daita/llm/factory.py +101 -0
- daita/llm/gemini.py +152 -0
- daita/llm/grok.py +114 -0
- daita/llm/mock.py +135 -0
- daita/llm/openai.py +109 -0
- daita/plugins/__init__.py +141 -0
- daita/plugins/base.py +37 -0
- daita/plugins/base_db.py +167 -0
- daita/plugins/elasticsearch.py +844 -0
- daita/plugins/mcp.py +481 -0
- daita/plugins/mongodb.py +510 -0
- daita/plugins/mysql.py +351 -0
- daita/plugins/postgresql.py +331 -0
- daita/plugins/redis_messaging.py +500 -0
- daita/plugins/rest.py +529 -0
- daita/plugins/s3.py +761 -0
- daita/plugins/slack.py +729 -0
- daita/utils/__init__.py +18 -0
- daita_agents-0.1.0.dist-info/METADATA +350 -0
- daita_agents-0.1.0.dist-info/RECORD +69 -0
- daita_agents-0.1.0.dist-info/WHEEL +5 -0
- daita_agents-0.1.0.dist-info/entry_points.txt +2 -0
- daita_agents-0.1.0.dist-info/licenses/LICENSE +56 -0
- daita_agents-0.1.0.dist-info/top_level.txt +1 -0
daita/plugins/s3.py
ADDED
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AWS S3 plugin for Daita Agents.
|
|
3
|
+
|
|
4
|
+
Simple S3 object storage operations - no over-engineering.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import io
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ..core.tools import AgentTool
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class S3Plugin:
|
|
18
|
+
"""
|
|
19
|
+
Simple AWS S3 plugin for agents.
|
|
20
|
+
|
|
21
|
+
Handles S3 operations with automatic format detection and focus system support.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
bucket: str,
|
|
27
|
+
region: str = "us-east-1",
|
|
28
|
+
aws_access_key_id: Optional[str] = None,
|
|
29
|
+
aws_secret_access_key: Optional[str] = None,
|
|
30
|
+
aws_session_token: Optional[str] = None,
|
|
31
|
+
endpoint_url: Optional[str] = None,
|
|
32
|
+
**kwargs
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Initialize S3 connection.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
bucket: S3 bucket name
|
|
39
|
+
region: AWS region
|
|
40
|
+
aws_access_key_id: AWS access key (optional, uses env/IAM if not provided)
|
|
41
|
+
aws_secret_access_key: AWS secret key (optional, uses env/IAM if not provided)
|
|
42
|
+
aws_session_token: AWS session token (optional, for temporary credentials)
|
|
43
|
+
endpoint_url: Custom S3 endpoint URL (for S3-compatible services)
|
|
44
|
+
**kwargs: Additional boto3 parameters
|
|
45
|
+
"""
|
|
46
|
+
if not bucket or not bucket.strip():
|
|
47
|
+
raise ValueError("S3 bucket name cannot be empty")
|
|
48
|
+
|
|
49
|
+
self.bucket = bucket
|
|
50
|
+
self.region = region
|
|
51
|
+
self.aws_access_key_id = aws_access_key_id
|
|
52
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
53
|
+
self.aws_session_token = aws_session_token
|
|
54
|
+
self.endpoint_url = endpoint_url
|
|
55
|
+
|
|
56
|
+
# Store additional config
|
|
57
|
+
self.config = kwargs
|
|
58
|
+
|
|
59
|
+
self._client = None
|
|
60
|
+
self._session = None
|
|
61
|
+
|
|
62
|
+
logger.debug(f"S3 plugin configured for bucket {bucket} in region {region}")
|
|
63
|
+
|
|
64
|
+
async def connect(self):
|
|
65
|
+
"""Initialize S3 client."""
|
|
66
|
+
if self._client is not None:
|
|
67
|
+
return # Already connected
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
import boto3
|
|
71
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
72
|
+
|
|
73
|
+
# Create session with credentials
|
|
74
|
+
session_kwargs = {
|
|
75
|
+
'region_name': self.region
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if self.aws_access_key_id:
|
|
79
|
+
session_kwargs['aws_access_key_id'] = self.aws_access_key_id
|
|
80
|
+
if self.aws_secret_access_key:
|
|
81
|
+
session_kwargs['aws_secret_access_key'] = self.aws_secret_access_key
|
|
82
|
+
if self.aws_session_token:
|
|
83
|
+
session_kwargs['aws_session_token'] = self.aws_session_token
|
|
84
|
+
|
|
85
|
+
self._session = boto3.Session(**session_kwargs)
|
|
86
|
+
|
|
87
|
+
# Create S3 client
|
|
88
|
+
client_kwargs = {}
|
|
89
|
+
if self.endpoint_url:
|
|
90
|
+
client_kwargs['endpoint_url'] = self.endpoint_url
|
|
91
|
+
|
|
92
|
+
self._client = self._session.client('s3', **client_kwargs)
|
|
93
|
+
|
|
94
|
+
# Test connection by checking bucket exists
|
|
95
|
+
try:
|
|
96
|
+
self._client.head_bucket(Bucket=self.bucket)
|
|
97
|
+
logger.info(f"Connected to S3 bucket: {self.bucket}")
|
|
98
|
+
except ClientError as e:
|
|
99
|
+
error_code = e.response['Error']['Code']
|
|
100
|
+
if error_code == '404':
|
|
101
|
+
raise RuntimeError(f"S3 bucket '{self.bucket}' does not exist")
|
|
102
|
+
elif error_code == '403':
|
|
103
|
+
raise RuntimeError(f"Access denied to S3 bucket '{self.bucket}'")
|
|
104
|
+
else:
|
|
105
|
+
raise RuntimeError(f"S3 connection error: {e}")
|
|
106
|
+
|
|
107
|
+
except ImportError:
|
|
108
|
+
raise RuntimeError("boto3 not installed. Run: pip install boto3")
|
|
109
|
+
except NoCredentialsError:
|
|
110
|
+
raise RuntimeError("AWS credentials not found. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or use IAM roles.")
|
|
111
|
+
except Exception as e:
|
|
112
|
+
raise RuntimeError(f"Failed to connect to S3: {e}")
|
|
113
|
+
|
|
114
|
+
async def disconnect(self):
|
|
115
|
+
"""Close S3 connection."""
|
|
116
|
+
if self._client:
|
|
117
|
+
# boto3 client doesn't need explicit closing
|
|
118
|
+
self._client = None
|
|
119
|
+
self._session = None
|
|
120
|
+
logger.info("Disconnected from S3")
|
|
121
|
+
|
|
122
|
+
async def list_objects(
|
|
123
|
+
self,
|
|
124
|
+
prefix: str = "",
|
|
125
|
+
max_keys: int = 1000,
|
|
126
|
+
focus: Optional[List[str]] = None
|
|
127
|
+
) -> List[Dict[str, Any]]:
|
|
128
|
+
"""
|
|
129
|
+
List objects in the S3 bucket.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
prefix: Object key prefix filter
|
|
133
|
+
max_keys: Maximum number of objects to return
|
|
134
|
+
focus: List of object attributes to focus on
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of object metadata dictionaries
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
objects = await s3.list_objects(prefix="data/", focus=["Key", "Size"])
|
|
141
|
+
"""
|
|
142
|
+
if self._client is None:
|
|
143
|
+
await self.connect()
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
response = self._client.list_objects_v2(
|
|
147
|
+
Bucket=self.bucket,
|
|
148
|
+
Prefix=prefix,
|
|
149
|
+
MaxKeys=max_keys
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
objects = response.get('Contents', [])
|
|
153
|
+
|
|
154
|
+
# Apply focus system if specified
|
|
155
|
+
if focus:
|
|
156
|
+
filtered_objects = []
|
|
157
|
+
for obj in objects:
|
|
158
|
+
filtered_obj = {key: obj.get(key) for key in focus if key in obj}
|
|
159
|
+
filtered_objects.append(filtered_obj)
|
|
160
|
+
return filtered_objects
|
|
161
|
+
|
|
162
|
+
return objects
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error(f"Failed to list S3 objects: {e}")
|
|
166
|
+
raise RuntimeError(f"S3 list_objects failed: {e}")
|
|
167
|
+
|
|
168
|
+
async def get_object(
|
|
169
|
+
self,
|
|
170
|
+
key: str,
|
|
171
|
+
format: str = "auto",
|
|
172
|
+
focus: Optional[List[str]] = None
|
|
173
|
+
) -> Union[bytes, str, Dict[str, Any], Any]:
|
|
174
|
+
"""
|
|
175
|
+
Get an object from S3 with automatic format detection.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
key: S3 object key
|
|
179
|
+
format: Format type ('auto', 'bytes', 'text', 'json', 'csv', 'pandas')
|
|
180
|
+
focus: List of columns to focus on (for pandas/csv)
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Object data in requested format
|
|
184
|
+
|
|
185
|
+
Example:
|
|
186
|
+
data = await s3.get_object("reports/monthly.csv", format="pandas")
|
|
187
|
+
"""
|
|
188
|
+
if self._client is None:
|
|
189
|
+
await self.connect()
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
response = self._client.get_object(Bucket=self.bucket, Key=key)
|
|
193
|
+
content = response['Body'].read()
|
|
194
|
+
|
|
195
|
+
# Auto-detect format from file extension
|
|
196
|
+
if format == "auto":
|
|
197
|
+
format = self._detect_format(key)
|
|
198
|
+
|
|
199
|
+
# Process based on format
|
|
200
|
+
if format == "bytes":
|
|
201
|
+
return content
|
|
202
|
+
elif format == "text":
|
|
203
|
+
return content.decode('utf-8')
|
|
204
|
+
elif format == "json":
|
|
205
|
+
import json
|
|
206
|
+
return json.loads(content.decode('utf-8'))
|
|
207
|
+
elif format == "csv":
|
|
208
|
+
import csv
|
|
209
|
+
import io
|
|
210
|
+
content_str = content.decode('utf-8')
|
|
211
|
+
reader = csv.DictReader(io.StringIO(content_str))
|
|
212
|
+
rows = list(reader)
|
|
213
|
+
|
|
214
|
+
# Apply focus system
|
|
215
|
+
if focus:
|
|
216
|
+
filtered_rows = []
|
|
217
|
+
for row in rows:
|
|
218
|
+
filtered_row = {col: row.get(col) for col in focus if col in row}
|
|
219
|
+
filtered_rows.append(filtered_row)
|
|
220
|
+
return filtered_rows
|
|
221
|
+
return rows
|
|
222
|
+
elif format == "pandas":
|
|
223
|
+
try:
|
|
224
|
+
import pandas as pd
|
|
225
|
+
|
|
226
|
+
# Detect file type for pandas
|
|
227
|
+
if key.endswith('.csv'):
|
|
228
|
+
df = pd.read_csv(io.BytesIO(content))
|
|
229
|
+
elif key.endswith('.json'):
|
|
230
|
+
df = pd.read_json(io.BytesIO(content))
|
|
231
|
+
elif key.endswith('.parquet'):
|
|
232
|
+
df = pd.read_parquet(io.BytesIO(content))
|
|
233
|
+
elif key.endswith('.xlsx'):
|
|
234
|
+
df = pd.read_excel(io.BytesIO(content))
|
|
235
|
+
else:
|
|
236
|
+
# Try CSV as default
|
|
237
|
+
df = pd.read_csv(io.BytesIO(content))
|
|
238
|
+
|
|
239
|
+
# Apply focus system
|
|
240
|
+
if focus:
|
|
241
|
+
available_cols = [col for col in focus if col in df.columns]
|
|
242
|
+
if available_cols:
|
|
243
|
+
df = df[available_cols]
|
|
244
|
+
|
|
245
|
+
return df
|
|
246
|
+
except ImportError:
|
|
247
|
+
raise RuntimeError("pandas not installed. Run: pip install pandas")
|
|
248
|
+
else:
|
|
249
|
+
return content
|
|
250
|
+
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.error(f"Failed to get S3 object {key}: {e}")
|
|
253
|
+
raise RuntimeError(f"S3 get_object failed: {e}")
|
|
254
|
+
|
|
255
|
+
async def put_object(
|
|
256
|
+
self,
|
|
257
|
+
key: str,
|
|
258
|
+
data: Union[bytes, str, Dict[str, Any], Any],
|
|
259
|
+
content_type: Optional[str] = None,
|
|
260
|
+
metadata: Optional[Dict[str, str]] = None
|
|
261
|
+
) -> Dict[str, Any]:
|
|
262
|
+
"""
|
|
263
|
+
Put an object to S3.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
key: S3 object key
|
|
267
|
+
data: Data to upload (bytes, string, dict, or pandas DataFrame)
|
|
268
|
+
content_type: Content type (auto-detected if not provided)
|
|
269
|
+
metadata: Object metadata
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Upload result metadata
|
|
273
|
+
|
|
274
|
+
Example:
|
|
275
|
+
result = await s3.put_object("data/output.json", {"result": "success"})
|
|
276
|
+
"""
|
|
277
|
+
if self._client is None:
|
|
278
|
+
await self.connect()
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
# Process data based on type
|
|
282
|
+
if hasattr(data, 'to_csv'): # pandas DataFrame
|
|
283
|
+
buffer = io.StringIO()
|
|
284
|
+
data.to_csv(buffer, index=False)
|
|
285
|
+
body = buffer.getvalue().encode('utf-8')
|
|
286
|
+
content_type = content_type or 'text/csv'
|
|
287
|
+
elif hasattr(data, 'to_json'): # pandas DataFrame to JSON
|
|
288
|
+
buffer = io.StringIO()
|
|
289
|
+
data.to_json(buffer, orient='records', indent=2)
|
|
290
|
+
body = buffer.getvalue().encode('utf-8')
|
|
291
|
+
content_type = content_type or 'application/json'
|
|
292
|
+
elif isinstance(data, dict):
|
|
293
|
+
import json
|
|
294
|
+
body = json.dumps(data, indent=2).encode('utf-8')
|
|
295
|
+
content_type = content_type or 'application/json'
|
|
296
|
+
elif isinstance(data, str):
|
|
297
|
+
body = data.encode('utf-8')
|
|
298
|
+
content_type = content_type or 'text/plain'
|
|
299
|
+
elif isinstance(data, bytes):
|
|
300
|
+
body = data
|
|
301
|
+
content_type = content_type or 'application/octet-stream'
|
|
302
|
+
else:
|
|
303
|
+
# Try to convert to string
|
|
304
|
+
body = str(data).encode('utf-8')
|
|
305
|
+
content_type = content_type or 'text/plain'
|
|
306
|
+
|
|
307
|
+
# Auto-detect content type from key if not provided
|
|
308
|
+
if not content_type:
|
|
309
|
+
content_type = self._detect_content_type(key)
|
|
310
|
+
|
|
311
|
+
# Prepare put_object arguments
|
|
312
|
+
put_args = {
|
|
313
|
+
'Bucket': self.bucket,
|
|
314
|
+
'Key': key,
|
|
315
|
+
'Body': body,
|
|
316
|
+
'ContentType': content_type
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if metadata:
|
|
320
|
+
put_args['Metadata'] = metadata
|
|
321
|
+
|
|
322
|
+
# Upload object
|
|
323
|
+
response = self._client.put_object(**put_args)
|
|
324
|
+
|
|
325
|
+
result = {
|
|
326
|
+
'key': key,
|
|
327
|
+
'etag': response['ETag'],
|
|
328
|
+
'size': len(body),
|
|
329
|
+
'content_type': content_type
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if metadata:
|
|
333
|
+
result['metadata'] = metadata
|
|
334
|
+
|
|
335
|
+
logger.info(f"Uploaded S3 object: {key} ({len(body)} bytes)")
|
|
336
|
+
return result
|
|
337
|
+
|
|
338
|
+
except Exception as e:
|
|
339
|
+
logger.error(f"Failed to put S3 object {key}: {e}")
|
|
340
|
+
raise RuntimeError(f"S3 put_object failed: {e}")
|
|
341
|
+
|
|
342
|
+
async def upload_dataframe(
|
|
343
|
+
self,
|
|
344
|
+
df: Any,
|
|
345
|
+
key: str,
|
|
346
|
+
format: str = "csv",
|
|
347
|
+
**kwargs
|
|
348
|
+
) -> Dict[str, Any]:
|
|
349
|
+
"""
|
|
350
|
+
Upload a pandas DataFrame to S3.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
df: pandas DataFrame
|
|
354
|
+
key: S3 object key
|
|
355
|
+
format: Output format ('csv', 'json', 'parquet')
|
|
356
|
+
**kwargs: Additional format-specific parameters
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
Upload result metadata
|
|
360
|
+
|
|
361
|
+
Example:
|
|
362
|
+
result = await s3.upload_dataframe(df, "processed/results.parquet", format="parquet")
|
|
363
|
+
"""
|
|
364
|
+
if self._client is None:
|
|
365
|
+
await self.connect()
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
if format == "csv":
|
|
369
|
+
buffer = io.StringIO()
|
|
370
|
+
df.to_csv(buffer, index=False, **kwargs)
|
|
371
|
+
body = buffer.getvalue().encode('utf-8')
|
|
372
|
+
content_type = 'text/csv'
|
|
373
|
+
elif format == "json":
|
|
374
|
+
buffer = io.StringIO()
|
|
375
|
+
df.to_json(buffer, orient='records', indent=2, **kwargs)
|
|
376
|
+
body = buffer.getvalue().encode('utf-8')
|
|
377
|
+
content_type = 'application/json'
|
|
378
|
+
elif format == "parquet":
|
|
379
|
+
buffer = io.BytesIO()
|
|
380
|
+
df.to_parquet(buffer, **kwargs)
|
|
381
|
+
body = buffer.getvalue()
|
|
382
|
+
content_type = 'application/octet-stream'
|
|
383
|
+
else:
|
|
384
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
385
|
+
|
|
386
|
+
# Upload using put_object
|
|
387
|
+
return await self.put_object(key, body, content_type)
|
|
388
|
+
|
|
389
|
+
except Exception as e:
|
|
390
|
+
logger.error(f"Failed to upload DataFrame to S3: {e}")
|
|
391
|
+
raise RuntimeError(f"S3 upload_dataframe failed: {e}")
|
|
392
|
+
|
|
393
|
+
async def download_file(self, key: str, local_path: str) -> str:
|
|
394
|
+
"""
|
|
395
|
+
Download an S3 object to local file.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
key: S3 object key
|
|
399
|
+
local_path: Local file path
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
Local file path
|
|
403
|
+
|
|
404
|
+
Example:
|
|
405
|
+
path = await s3.download_file("data/input.csv", "/tmp/input.csv")
|
|
406
|
+
"""
|
|
407
|
+
if self._client is None:
|
|
408
|
+
await self.connect()
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
# Ensure directory exists
|
|
412
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|
413
|
+
|
|
414
|
+
# Download file
|
|
415
|
+
self._client.download_file(self.bucket, key, local_path)
|
|
416
|
+
|
|
417
|
+
logger.info(f"Downloaded S3 object {key} to {local_path}")
|
|
418
|
+
return local_path
|
|
419
|
+
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.error(f"Failed to download S3 object {key}: {e}")
|
|
422
|
+
raise RuntimeError(f"S3 download_file failed: {e}")
|
|
423
|
+
|
|
424
|
+
async def upload_file(self, local_path: str, key: str) -> Dict[str, Any]:
|
|
425
|
+
"""
|
|
426
|
+
Upload a local file to S3.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
local_path: Local file path
|
|
430
|
+
key: S3 object key
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Upload result metadata
|
|
434
|
+
|
|
435
|
+
Example:
|
|
436
|
+
result = await s3.upload_file("/tmp/output.csv", "results/output.csv")
|
|
437
|
+
"""
|
|
438
|
+
if self._client is None:
|
|
439
|
+
await self.connect()
|
|
440
|
+
|
|
441
|
+
try:
|
|
442
|
+
if not os.path.exists(local_path):
|
|
443
|
+
raise FileNotFoundError(f"Local file not found: {local_path}")
|
|
444
|
+
|
|
445
|
+
# Get file size
|
|
446
|
+
file_size = os.path.getsize(local_path)
|
|
447
|
+
|
|
448
|
+
# Auto-detect content type
|
|
449
|
+
content_type = self._detect_content_type(local_path)
|
|
450
|
+
|
|
451
|
+
# Upload file
|
|
452
|
+
self._client.upload_file(
|
|
453
|
+
local_path,
|
|
454
|
+
self.bucket,
|
|
455
|
+
key,
|
|
456
|
+
ExtraArgs={'ContentType': content_type}
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Get object metadata
|
|
460
|
+
response = self._client.head_object(Bucket=self.bucket, Key=key)
|
|
461
|
+
|
|
462
|
+
result = {
|
|
463
|
+
'key': key,
|
|
464
|
+
'etag': response['ETag'],
|
|
465
|
+
'size': file_size,
|
|
466
|
+
'content_type': content_type,
|
|
467
|
+
'local_path': local_path
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
logger.info(f"Uploaded file {local_path} to S3 object {key}")
|
|
471
|
+
return result
|
|
472
|
+
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logger.error(f"Failed to upload file {local_path} to S3: {e}")
|
|
475
|
+
raise RuntimeError(f"S3 upload_file failed: {e}")
|
|
476
|
+
|
|
477
|
+
async def delete_object(self, key: str) -> Dict[str, Any]:
|
|
478
|
+
"""
|
|
479
|
+
Delete an object from S3.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
key: S3 object key
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
Delete result metadata
|
|
486
|
+
|
|
487
|
+
Example:
|
|
488
|
+
result = await s3.delete_object("temp/old_file.txt")
|
|
489
|
+
"""
|
|
490
|
+
if self._client is None:
|
|
491
|
+
await self.connect()
|
|
492
|
+
|
|
493
|
+
try:
|
|
494
|
+
response = self._client.delete_object(Bucket=self.bucket, Key=key)
|
|
495
|
+
|
|
496
|
+
result = {
|
|
497
|
+
'key': key,
|
|
498
|
+
'deleted': True
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
logger.info(f"Deleted S3 object: {key}")
|
|
502
|
+
return result
|
|
503
|
+
|
|
504
|
+
except Exception as e:
|
|
505
|
+
logger.error(f"Failed to delete S3 object {key}: {e}")
|
|
506
|
+
raise RuntimeError(f"S3 delete_object failed: {e}")
|
|
507
|
+
|
|
508
|
+
async def copy_object(
|
|
509
|
+
self,
|
|
510
|
+
source_key: str,
|
|
511
|
+
dest_key: str,
|
|
512
|
+
source_bucket: Optional[str] = None
|
|
513
|
+
) -> Dict[str, Any]:
|
|
514
|
+
"""
|
|
515
|
+
Copy an object within S3.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
source_key: Source object key
|
|
519
|
+
dest_key: Destination object key
|
|
520
|
+
source_bucket: Source bucket (uses same bucket if not provided)
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Copy result metadata
|
|
524
|
+
|
|
525
|
+
Example:
|
|
526
|
+
result = await s3.copy_object("data/input.csv", "backup/input.csv")
|
|
527
|
+
"""
|
|
528
|
+
if self._client is None:
|
|
529
|
+
await self.connect()
|
|
530
|
+
|
|
531
|
+
try:
|
|
532
|
+
source_bucket = source_bucket or self.bucket
|
|
533
|
+
copy_source = {'Bucket': source_bucket, 'Key': source_key}
|
|
534
|
+
|
|
535
|
+
response = self._client.copy_object(
|
|
536
|
+
CopySource=copy_source,
|
|
537
|
+
Bucket=self.bucket,
|
|
538
|
+
Key=dest_key
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
result = {
|
|
542
|
+
'source_key': source_key,
|
|
543
|
+
'dest_key': dest_key,
|
|
544
|
+
'source_bucket': source_bucket,
|
|
545
|
+
'dest_bucket': self.bucket,
|
|
546
|
+
'etag': response['CopyObjectResult']['ETag']
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
logger.info(f"Copied S3 object {source_key} to {dest_key}")
|
|
550
|
+
return result
|
|
551
|
+
|
|
552
|
+
except Exception as e:
|
|
553
|
+
logger.error(f"Failed to copy S3 object {source_key}: {e}")
|
|
554
|
+
raise RuntimeError(f"S3 copy_object failed: {e}")
|
|
555
|
+
|
|
556
|
+
def _detect_format(self, key: str) -> str:
|
|
557
|
+
"""Detect format from file extension."""
|
|
558
|
+
ext = Path(key).suffix.lower()
|
|
559
|
+
|
|
560
|
+
format_map = {
|
|
561
|
+
'.json': 'json',
|
|
562
|
+
'.csv': 'csv',
|
|
563
|
+
'.txt': 'text',
|
|
564
|
+
'.parquet': 'pandas',
|
|
565
|
+
'.xlsx': 'pandas',
|
|
566
|
+
'.xls': 'pandas'
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return format_map.get(ext, 'bytes')
|
|
570
|
+
|
|
571
|
+
def _detect_content_type(self, key: str) -> str:
|
|
572
|
+
"""Detect content type from file extension."""
|
|
573
|
+
ext = Path(key).suffix.lower()
|
|
574
|
+
|
|
575
|
+
content_type_map = {
|
|
576
|
+
'.json': 'application/json',
|
|
577
|
+
'.csv': 'text/csv',
|
|
578
|
+
'.txt': 'text/plain',
|
|
579
|
+
'.parquet': 'application/octet-stream',
|
|
580
|
+
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
581
|
+
'.xls': 'application/vnd.ms-excel',
|
|
582
|
+
'.pdf': 'application/pdf',
|
|
583
|
+
'.png': 'image/png',
|
|
584
|
+
'.jpg': 'image/jpeg',
|
|
585
|
+
'.jpeg': 'image/jpeg',
|
|
586
|
+
'.gif': 'image/gif'
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
return content_type_map.get(ext, 'application/octet-stream')
|
|
590
|
+
|
|
591
|
+
def get_tools(self) -> List['AgentTool']:
|
|
592
|
+
"""
|
|
593
|
+
Expose S3 operations as agent tools.
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
List of AgentTool instances for S3 operations
|
|
597
|
+
"""
|
|
598
|
+
from ..core.tools import AgentTool
|
|
599
|
+
|
|
600
|
+
return [
|
|
601
|
+
AgentTool(
|
|
602
|
+
name="read_s3_file",
|
|
603
|
+
description="Read and parse a file from S3 bucket. Automatically detects format (CSV, JSON, Parquet, text) based on file extension.",
|
|
604
|
+
parameters={
|
|
605
|
+
"key": {
|
|
606
|
+
"type": "string",
|
|
607
|
+
"description": "S3 object key (file path within the bucket)",
|
|
608
|
+
"required": True
|
|
609
|
+
},
|
|
610
|
+
"format": {
|
|
611
|
+
"type": "string",
|
|
612
|
+
"description": "Format hint: 'auto', 'csv', 'json', 'pandas', 'text'. Default is 'auto' which detects from extension.",
|
|
613
|
+
"required": False
|
|
614
|
+
}
|
|
615
|
+
},
|
|
616
|
+
handler=self._tool_read_file,
|
|
617
|
+
category="storage",
|
|
618
|
+
source="plugin",
|
|
619
|
+
plugin_name="S3",
|
|
620
|
+
timeout_seconds=120
|
|
621
|
+
),
|
|
622
|
+
AgentTool(
|
|
623
|
+
name="write_s3_file",
|
|
624
|
+
description="Write data to S3 bucket. Accepts dictionaries (saved as JSON), strings, or binary data.",
|
|
625
|
+
parameters={
|
|
626
|
+
"key": {
|
|
627
|
+
"type": "string",
|
|
628
|
+
"description": "S3 object key (file path within the bucket)",
|
|
629
|
+
"required": True
|
|
630
|
+
},
|
|
631
|
+
"data": {
|
|
632
|
+
"type": "object",
|
|
633
|
+
"description": "Data to write (dict for JSON, string for text, or bytes for binary)",
|
|
634
|
+
"required": True
|
|
635
|
+
}
|
|
636
|
+
},
|
|
637
|
+
handler=self._tool_write_file,
|
|
638
|
+
category="storage",
|
|
639
|
+
source="plugin",
|
|
640
|
+
plugin_name="S3",
|
|
641
|
+
timeout_seconds=120
|
|
642
|
+
),
|
|
643
|
+
AgentTool(
|
|
644
|
+
name="list_s3_objects",
|
|
645
|
+
description="List objects in S3 bucket with optional prefix filter to narrow down results",
|
|
646
|
+
parameters={
|
|
647
|
+
"prefix": {
|
|
648
|
+
"type": "string",
|
|
649
|
+
"description": "Filter objects by prefix (folder path). Leave empty to list all objects.",
|
|
650
|
+
"required": False
|
|
651
|
+
},
|
|
652
|
+
"max_keys": {
|
|
653
|
+
"type": "integer",
|
|
654
|
+
"description": "Maximum number of objects to return. Default is 100.",
|
|
655
|
+
"required": False
|
|
656
|
+
}
|
|
657
|
+
},
|
|
658
|
+
handler=self._tool_list_objects,
|
|
659
|
+
category="storage",
|
|
660
|
+
source="plugin",
|
|
661
|
+
plugin_name="S3",
|
|
662
|
+
timeout_seconds=60
|
|
663
|
+
),
|
|
664
|
+
AgentTool(
|
|
665
|
+
name="delete_s3_file",
|
|
666
|
+
description="Delete a file from S3 bucket. This operation is permanent and cannot be undone.",
|
|
667
|
+
parameters={
|
|
668
|
+
"key": {
|
|
669
|
+
"type": "string",
|
|
670
|
+
"description": "S3 object key (file path) to delete",
|
|
671
|
+
"required": True
|
|
672
|
+
}
|
|
673
|
+
},
|
|
674
|
+
handler=self._tool_delete_file,
|
|
675
|
+
category="storage",
|
|
676
|
+
source="plugin",
|
|
677
|
+
plugin_name="S3",
|
|
678
|
+
timeout_seconds=30
|
|
679
|
+
)
|
|
680
|
+
]
|
|
681
|
+
|
|
682
|
+
async def _tool_read_file(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
683
|
+
"""Tool handler for read_s3_file"""
|
|
684
|
+
key = args.get("key")
|
|
685
|
+
format_hint = args.get("format", "auto")
|
|
686
|
+
|
|
687
|
+
data = await self.get_object(key, format=format_hint)
|
|
688
|
+
|
|
689
|
+
return {
|
|
690
|
+
"success": True,
|
|
691
|
+
"key": key,
|
|
692
|
+
"data": data,
|
|
693
|
+
"format": self._detect_format(key),
|
|
694
|
+
"bucket": self.bucket
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
async def _tool_write_file(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
698
|
+
"""Tool handler for write_s3_file"""
|
|
699
|
+
key = args.get("key")
|
|
700
|
+
data = args.get("data")
|
|
701
|
+
|
|
702
|
+
result = await self.put_object(key, data)
|
|
703
|
+
|
|
704
|
+
return {
|
|
705
|
+
"success": True,
|
|
706
|
+
"key": key,
|
|
707
|
+
"size": result.get("size"),
|
|
708
|
+
"location": f"s3://{self.bucket}/{key}",
|
|
709
|
+
"bucket": self.bucket
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
async def _tool_list_objects(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
713
|
+
"""Tool handler for list_s3_objects"""
|
|
714
|
+
prefix = args.get("prefix", "")
|
|
715
|
+
max_keys = args.get("max_keys", 100)
|
|
716
|
+
|
|
717
|
+
objects = await self.list_objects(prefix=prefix, max_keys=max_keys)
|
|
718
|
+
|
|
719
|
+
# Simplify object metadata for LLM consumption
|
|
720
|
+
simplified = [
|
|
721
|
+
{
|
|
722
|
+
"key": obj["Key"],
|
|
723
|
+
"size": obj["Size"],
|
|
724
|
+
"modified": str(obj.get("LastModified", ""))
|
|
725
|
+
}
|
|
726
|
+
for obj in objects
|
|
727
|
+
]
|
|
728
|
+
|
|
729
|
+
return {
|
|
730
|
+
"success": True,
|
|
731
|
+
"objects": simplified,
|
|
732
|
+
"count": len(simplified),
|
|
733
|
+
"bucket": self.bucket,
|
|
734
|
+
"prefix": prefix if prefix else "(all objects)"
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
async def _tool_delete_file(self, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
738
|
+
"""Tool handler for delete_s3_file"""
|
|
739
|
+
key = args.get("key")
|
|
740
|
+
|
|
741
|
+
result = await self.delete_object(key)
|
|
742
|
+
|
|
743
|
+
return {
|
|
744
|
+
"success": True,
|
|
745
|
+
"key": key,
|
|
746
|
+
"deleted": result.get("deleted", True),
|
|
747
|
+
"bucket": self.bucket
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
# Context manager support
|
|
751
|
+
async def __aenter__(self):
|
|
752
|
+
await self.connect()
|
|
753
|
+
return self
|
|
754
|
+
|
|
755
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
756
|
+
await self.disconnect()
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def s3(**kwargs) -> S3Plugin:
|
|
760
|
+
"""Create S3 plugin with simplified interface."""
|
|
761
|
+
return S3Plugin(**kwargs)
|