agno 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5 -1
- agno/db/base.py +2 -0
- agno/db/postgres/postgres.py +5 -5
- agno/db/singlestore/singlestore.py +4 -5
- agno/db/sqlite/sqlite.py +4 -4
- agno/knowledge/embedder/aws_bedrock.py +325 -106
- agno/knowledge/knowledge.py +83 -1853
- agno/knowledge/loaders/__init__.py +29 -0
- agno/knowledge/loaders/azure_blob.py +423 -0
- agno/knowledge/loaders/base.py +187 -0
- agno/knowledge/loaders/gcs.py +267 -0
- agno/knowledge/loaders/github.py +415 -0
- agno/knowledge/loaders/s3.py +281 -0
- agno/knowledge/loaders/sharepoint.py +439 -0
- agno/knowledge/reader/website_reader.py +2 -2
- agno/knowledge/remote_knowledge.py +151 -0
- agno/knowledge/reranker/aws_bedrock.py +299 -0
- agno/learn/machine.py +5 -6
- agno/learn/stores/session_context.py +10 -2
- agno/models/azure/openai_chat.py +6 -11
- agno/models/neosantara/__init__.py +5 -0
- agno/models/neosantara/neosantara.py +42 -0
- agno/models/utils.py +5 -0
- agno/os/app.py +4 -1
- agno/os/interfaces/agui/router.py +1 -1
- agno/os/routers/components/components.py +2 -0
- agno/os/routers/knowledge/knowledge.py +0 -1
- agno/os/routers/registry/registry.py +340 -192
- agno/os/routers/workflows/router.py +7 -1
- agno/os/schema.py +104 -0
- agno/registry/registry.py +4 -0
- agno/run/workflow.py +3 -0
- agno/session/workflow.py +1 -1
- agno/skills/utils.py +100 -2
- agno/team/team.py +6 -3
- agno/tools/mcp/mcp.py +26 -1
- agno/vectordb/lancedb/lance_db.py +22 -7
- agno/workflow/__init__.py +4 -0
- agno/workflow/cel.py +299 -0
- agno/workflow/condition.py +280 -58
- agno/workflow/loop.py +177 -46
- agno/workflow/parallel.py +75 -4
- agno/workflow/router.py +260 -44
- agno/workflow/step.py +14 -7
- agno/workflow/steps.py +43 -0
- agno/workflow/workflow.py +104 -46
- {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/METADATA +25 -37
- {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/RECORD +51 -39
- {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/WHEEL +0 -0
- {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/licenses/LICENSE +0 -0
- {agno-2.4.6.dist-info → agno-2.4.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""S3 content loader for Knowledge.
|
|
2
|
+
|
|
3
|
+
Provides methods for loading content from AWS S3.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# mypy: disable-error-code="attr-defined"
|
|
7
|
+
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
|
11
|
+
|
|
12
|
+
from agno.knowledge.content import Content, ContentStatus
|
|
13
|
+
from agno.knowledge.loaders.base import BaseLoader
|
|
14
|
+
from agno.knowledge.reader import Reader
|
|
15
|
+
from agno.knowledge.remote_content.config import RemoteContentConfig, S3Config
|
|
16
|
+
from agno.knowledge.remote_content.remote_content import S3Content
|
|
17
|
+
from agno.utils.log import log_error, log_info, log_warning
|
|
18
|
+
from agno.utils.string import generate_id
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class S3Loader(BaseLoader):
|
|
22
|
+
"""Loader for S3 content."""
|
|
23
|
+
|
|
24
|
+
# ==========================================
|
|
25
|
+
# S3 HELPERS (shared between sync/async)
|
|
26
|
+
# ==========================================
|
|
27
|
+
|
|
28
|
+
def _validate_s3_config(
|
|
29
|
+
self,
|
|
30
|
+
content: Content,
|
|
31
|
+
config: Optional[RemoteContentConfig],
|
|
32
|
+
) -> Optional[S3Config]:
|
|
33
|
+
"""Validate and extract S3 config.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
S3Config if valid, None otherwise (S3 can work without explicit config)
|
|
37
|
+
"""
|
|
38
|
+
return cast(S3Config, config) if isinstance(config, S3Config) else None
|
|
39
|
+
|
|
40
|
+
def _build_s3_metadata(
|
|
41
|
+
self,
|
|
42
|
+
s3_config: Optional[S3Config],
|
|
43
|
+
bucket_name: str,
|
|
44
|
+
object_name: str,
|
|
45
|
+
) -> Dict[str, str]:
|
|
46
|
+
"""Build S3-specific metadata dictionary."""
|
|
47
|
+
metadata: Dict[str, str] = {
|
|
48
|
+
"source_type": "s3",
|
|
49
|
+
"s3_bucket": bucket_name,
|
|
50
|
+
"s3_object_name": object_name,
|
|
51
|
+
}
|
|
52
|
+
if s3_config:
|
|
53
|
+
metadata["source_config_id"] = s3_config.id
|
|
54
|
+
metadata["source_config_name"] = s3_config.name
|
|
55
|
+
if s3_config.region:
|
|
56
|
+
metadata["s3_region"] = s3_config.region
|
|
57
|
+
return metadata
|
|
58
|
+
|
|
59
|
+
def _build_s3_virtual_path(self, bucket_name: str, object_name: str) -> str:
|
|
60
|
+
"""Build virtual path for S3 content."""
|
|
61
|
+
return f"s3://{bucket_name}/{object_name}"
|
|
62
|
+
|
|
63
|
+
# ==========================================
|
|
64
|
+
# S3 LOADERS
|
|
65
|
+
# ==========================================
|
|
66
|
+
|
|
67
|
+
async def _aload_from_s3(
|
|
68
|
+
self,
|
|
69
|
+
content: Content,
|
|
70
|
+
upsert: bool,
|
|
71
|
+
skip_if_exists: bool,
|
|
72
|
+
config: Optional[RemoteContentConfig] = None,
|
|
73
|
+
):
|
|
74
|
+
"""Load content from AWS S3 (async).
|
|
75
|
+
|
|
76
|
+
Note: Uses sync boto3 calls as boto3 doesn't have an async API.
|
|
77
|
+
"""
|
|
78
|
+
from agno.cloud.aws.s3.bucket import S3Bucket
|
|
79
|
+
from agno.cloud.aws.s3.object import S3Object
|
|
80
|
+
|
|
81
|
+
log_warning(
|
|
82
|
+
"S3 content loading has limited features. "
|
|
83
|
+
"Recursive folder traversal, rich metadata, and improved naming are coming in a future release."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
remote_content: S3Content = cast(S3Content, content.remote_content)
|
|
87
|
+
s3_config = self._validate_s3_config(content, config)
|
|
88
|
+
|
|
89
|
+
# Get or create bucket with credentials from config
|
|
90
|
+
bucket = remote_content.bucket
|
|
91
|
+
try:
|
|
92
|
+
if bucket is None and remote_content.bucket_name:
|
|
93
|
+
bucket = S3Bucket(
|
|
94
|
+
name=remote_content.bucket_name,
|
|
95
|
+
region=s3_config.region if s3_config else None,
|
|
96
|
+
aws_access_key_id=s3_config.aws_access_key_id if s3_config else None,
|
|
97
|
+
aws_secret_access_key=s3_config.aws_secret_access_key if s3_config else None,
|
|
98
|
+
)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
log_error(f"Error getting bucket: {e}")
|
|
101
|
+
|
|
102
|
+
# Identify objects to read
|
|
103
|
+
objects_to_read: List[S3Object] = []
|
|
104
|
+
if bucket is not None:
|
|
105
|
+
if remote_content.key is not None:
|
|
106
|
+
_object = S3Object(bucket_name=bucket.name, name=remote_content.key)
|
|
107
|
+
objects_to_read.append(_object)
|
|
108
|
+
elif remote_content.object is not None:
|
|
109
|
+
objects_to_read.append(remote_content.object)
|
|
110
|
+
elif remote_content.prefix is not None:
|
|
111
|
+
objects_to_read.extend(bucket.get_objects(prefix=remote_content.prefix))
|
|
112
|
+
else:
|
|
113
|
+
objects_to_read.extend(bucket.get_objects())
|
|
114
|
+
|
|
115
|
+
if objects_to_read:
|
|
116
|
+
log_info(f"Processing {len(objects_to_read)} file(s) from S3")
|
|
117
|
+
|
|
118
|
+
bucket_name = bucket.name if bucket else "unknown"
|
|
119
|
+
is_folder_upload = len(objects_to_read) > 1
|
|
120
|
+
root_path = remote_content.prefix or ""
|
|
121
|
+
|
|
122
|
+
for s3_object in objects_to_read:
|
|
123
|
+
object_name = s3_object.name or ""
|
|
124
|
+
file_name = object_name.split("/")[-1]
|
|
125
|
+
|
|
126
|
+
# Build metadata and virtual path using helpers
|
|
127
|
+
virtual_path = self._build_s3_virtual_path(bucket_name, object_name)
|
|
128
|
+
s3_metadata = self._build_s3_metadata(s3_config, bucket_name, object_name)
|
|
129
|
+
merged_metadata: Dict[str, Any] = self._merge_metadata(s3_metadata, content.metadata)
|
|
130
|
+
|
|
131
|
+
# Compute content name using base helper
|
|
132
|
+
content_name = self._compute_content_name(object_name, file_name, content.name, root_path, is_folder_upload)
|
|
133
|
+
|
|
134
|
+
# Create content entry
|
|
135
|
+
content_entry = Content(
|
|
136
|
+
name=content_name,
|
|
137
|
+
description=content.description,
|
|
138
|
+
path=virtual_path,
|
|
139
|
+
status=ContentStatus.PROCESSING,
|
|
140
|
+
metadata=merged_metadata,
|
|
141
|
+
file_type="s3",
|
|
142
|
+
)
|
|
143
|
+
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
144
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
145
|
+
|
|
146
|
+
await self._ainsert_contents_db(content_entry)
|
|
147
|
+
|
|
148
|
+
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
149
|
+
content_entry.status = ContentStatus.COMPLETED
|
|
150
|
+
await self._aupdate_content(content_entry)
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# Select reader
|
|
154
|
+
reader = self._select_reader_by_uri(s3_object.uri, content.reader)
|
|
155
|
+
reader = cast(Reader, reader)
|
|
156
|
+
|
|
157
|
+
# Fetch and load the content
|
|
158
|
+
temporary_file = None
|
|
159
|
+
readable_content: Optional[Union[BytesIO, Path]] = None
|
|
160
|
+
if s3_object.uri.endswith(".pdf"):
|
|
161
|
+
readable_content = BytesIO(s3_object.get_resource().get()["Body"].read())
|
|
162
|
+
else:
|
|
163
|
+
temporary_file = Path("storage").joinpath(file_name)
|
|
164
|
+
readable_content = temporary_file
|
|
165
|
+
s3_object.download(readable_content) # type: ignore
|
|
166
|
+
|
|
167
|
+
# Read the content
|
|
168
|
+
read_documents = await reader.async_read(readable_content, name=file_name)
|
|
169
|
+
|
|
170
|
+
# Prepare and insert the content in the vector database
|
|
171
|
+
self._prepare_documents_for_insert(read_documents, content_entry.id)
|
|
172
|
+
await self._ahandle_vector_db_insert(content_entry, read_documents, upsert)
|
|
173
|
+
|
|
174
|
+
# Remove temporary file if needed
|
|
175
|
+
if temporary_file:
|
|
176
|
+
temporary_file.unlink()
|
|
177
|
+
|
|
178
|
+
def _load_from_s3(
|
|
179
|
+
self,
|
|
180
|
+
content: Content,
|
|
181
|
+
upsert: bool,
|
|
182
|
+
skip_if_exists: bool,
|
|
183
|
+
config: Optional[RemoteContentConfig] = None,
|
|
184
|
+
):
|
|
185
|
+
"""Load content from AWS S3 (sync)."""
|
|
186
|
+
from agno.cloud.aws.s3.bucket import S3Bucket
|
|
187
|
+
from agno.cloud.aws.s3.object import S3Object
|
|
188
|
+
|
|
189
|
+
log_warning(
|
|
190
|
+
"S3 content loading has limited features. "
|
|
191
|
+
"Recursive folder traversal, rich metadata, and improved naming are coming in a future release."
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
remote_content: S3Content = cast(S3Content, content.remote_content)
|
|
195
|
+
s3_config = self._validate_s3_config(content, config)
|
|
196
|
+
|
|
197
|
+
# Get or create bucket with credentials from config
|
|
198
|
+
bucket = remote_content.bucket
|
|
199
|
+
if bucket is None and remote_content.bucket_name:
|
|
200
|
+
bucket = S3Bucket(
|
|
201
|
+
name=remote_content.bucket_name,
|
|
202
|
+
region=s3_config.region if s3_config else None,
|
|
203
|
+
aws_access_key_id=s3_config.aws_access_key_id if s3_config else None,
|
|
204
|
+
aws_secret_access_key=s3_config.aws_secret_access_key if s3_config else None,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Identify objects to read
|
|
208
|
+
objects_to_read: List[S3Object] = []
|
|
209
|
+
if bucket is not None:
|
|
210
|
+
if remote_content.key is not None:
|
|
211
|
+
_object = S3Object(bucket_name=bucket.name, name=remote_content.key)
|
|
212
|
+
objects_to_read.append(_object)
|
|
213
|
+
elif remote_content.object is not None:
|
|
214
|
+
objects_to_read.append(remote_content.object)
|
|
215
|
+
elif remote_content.prefix is not None:
|
|
216
|
+
objects_to_read.extend(bucket.get_objects(prefix=remote_content.prefix))
|
|
217
|
+
else:
|
|
218
|
+
objects_to_read.extend(bucket.get_objects())
|
|
219
|
+
|
|
220
|
+
if objects_to_read:
|
|
221
|
+
log_info(f"Processing {len(objects_to_read)} file(s) from S3")
|
|
222
|
+
|
|
223
|
+
bucket_name = bucket.name if bucket else "unknown"
|
|
224
|
+
is_folder_upload = len(objects_to_read) > 1
|
|
225
|
+
root_path = remote_content.prefix or ""
|
|
226
|
+
|
|
227
|
+
for s3_object in objects_to_read:
|
|
228
|
+
object_name = s3_object.name or ""
|
|
229
|
+
file_name = object_name.split("/")[-1]
|
|
230
|
+
|
|
231
|
+
# Build metadata and virtual path using helpers
|
|
232
|
+
virtual_path = self._build_s3_virtual_path(bucket_name, object_name)
|
|
233
|
+
s3_metadata = self._build_s3_metadata(s3_config, bucket_name, object_name)
|
|
234
|
+
merged_metadata: Dict[str, Any] = self._merge_metadata(s3_metadata, content.metadata)
|
|
235
|
+
|
|
236
|
+
# Compute content name using base helper
|
|
237
|
+
content_name = self._compute_content_name(object_name, file_name, content.name, root_path, is_folder_upload)
|
|
238
|
+
|
|
239
|
+
# Create content entry
|
|
240
|
+
content_entry = Content(
|
|
241
|
+
name=content_name,
|
|
242
|
+
description=content.description,
|
|
243
|
+
path=virtual_path,
|
|
244
|
+
status=ContentStatus.PROCESSING,
|
|
245
|
+
metadata=merged_metadata,
|
|
246
|
+
file_type="s3",
|
|
247
|
+
)
|
|
248
|
+
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
249
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
250
|
+
|
|
251
|
+
self._insert_contents_db(content_entry)
|
|
252
|
+
|
|
253
|
+
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
254
|
+
content_entry.status = ContentStatus.COMPLETED
|
|
255
|
+
self._update_content(content_entry)
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
# Select reader
|
|
259
|
+
reader = self._select_reader_by_uri(s3_object.uri, content.reader)
|
|
260
|
+
reader = cast(Reader, reader)
|
|
261
|
+
|
|
262
|
+
# Fetch and load the content
|
|
263
|
+
temporary_file = None
|
|
264
|
+
readable_content: Optional[Union[BytesIO, Path]] = None
|
|
265
|
+
if s3_object.uri.endswith(".pdf"):
|
|
266
|
+
readable_content = BytesIO(s3_object.get_resource().get()["Body"].read())
|
|
267
|
+
else:
|
|
268
|
+
temporary_file = Path("storage").joinpath(file_name)
|
|
269
|
+
readable_content = temporary_file
|
|
270
|
+
s3_object.download(readable_content) # type: ignore
|
|
271
|
+
|
|
272
|
+
# Read the content
|
|
273
|
+
read_documents = reader.read(readable_content, name=file_name)
|
|
274
|
+
|
|
275
|
+
# Prepare and insert the content in the vector database
|
|
276
|
+
self._prepare_documents_for_insert(read_documents, content_entry.id)
|
|
277
|
+
self._handle_vector_db_insert(content_entry, read_documents, upsert)
|
|
278
|
+
|
|
279
|
+
# Remove temporary file if needed
|
|
280
|
+
if temporary_file:
|
|
281
|
+
temporary_file.unlink()
|