agno 2.2.13__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +197 -110
- agno/api/api.py +2 -0
- agno/db/base.py +26 -0
- agno/db/dynamo/dynamo.py +8 -0
- agno/db/dynamo/schemas.py +1 -0
- agno/db/firestore/firestore.py +8 -0
- agno/db/firestore/schemas.py +1 -0
- agno/db/gcs_json/gcs_json_db.py +8 -0
- agno/db/in_memory/in_memory_db.py +8 -1
- agno/db/json/json_db.py +8 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/async_mongo.py +16 -6
- agno/db/mongo/mongo.py +11 -0
- agno/db/mongo/schemas.py +3 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/mysql.py +76 -3
- agno/db/mysql/schemas.py +20 -10
- agno/db/postgres/async_postgres.py +99 -25
- agno/db/postgres/postgres.py +75 -6
- agno/db/postgres/schemas.py +30 -20
- agno/db/redis/redis.py +15 -2
- agno/db/redis/schemas.py +4 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +11 -0
- agno/db/singlestore/singlestore.py +79 -5
- agno/db/sqlite/async_sqlite.py +97 -19
- agno/db/sqlite/schemas.py +10 -0
- agno/db/sqlite/sqlite.py +79 -2
- agno/db/surrealdb/surrealdb.py +8 -0
- agno/knowledge/chunking/semantic.py +7 -2
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/knowledge.py +57 -86
- agno/knowledge/reader/csv_reader.py +7 -9
- agno/knowledge/reader/docx_reader.py +5 -5
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -18
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +11 -11
- agno/knowledge/reader/pptx_reader.py +5 -5
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +8 -8
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/models/anthropic/claude.py +319 -28
- agno/models/aws/claude.py +32 -0
- agno/models/azure/openai_chat.py +19 -10
- agno/models/base.py +612 -545
- agno/models/cerebras/cerebras.py +8 -11
- agno/models/cohere/chat.py +27 -1
- agno/models/google/gemini.py +39 -7
- agno/models/groq/groq.py +25 -11
- agno/models/meta/llama.py +20 -9
- agno/models/meta/llama_openai.py +3 -19
- agno/models/nebius/nebius.py +4 -4
- agno/models/openai/chat.py +30 -14
- agno/models/openai/responses.py +10 -13
- agno/models/response.py +1 -0
- agno/models/vertexai/claude.py +26 -0
- agno/os/app.py +8 -19
- agno/os/router.py +54 -0
- agno/os/routers/knowledge/knowledge.py +2 -2
- agno/os/schema.py +2 -2
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -112
- agno/session/workflow.py +353 -57
- agno/team/team.py +227 -125
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/http.py +111 -0
- agno/utils/media.py +11 -0
- agno/utils/models/claude.py +8 -0
- agno/utils/print_response/agent.py +33 -12
- agno/utils/print_response/team.py +22 -12
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/workflow/condition.py +13 -0
- agno/workflow/loop.py +13 -0
- agno/workflow/parallel.py +13 -0
- agno/workflow/router.py +13 -0
- agno/workflow/step.py +120 -20
- agno/workflow/steps.py +13 -0
- agno/workflow/workflow.py +76 -63
- {agno-2.2.13.dist-info → agno-2.3.1.dist-info}/METADATA +6 -2
- {agno-2.2.13.dist-info → agno-2.3.1.dist-info}/RECORD +91 -88
- agno/tools/googlesearch.py +0 -98
- {agno-2.2.13.dist-info → agno-2.3.1.dist-info}/WHEEL +0 -0
- {agno-2.2.13.dist-info → agno-2.3.1.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.3.1.dist-info}/top_level.txt +0 -0
agno/knowledge/knowledge.py
CHANGED
|
@@ -51,9 +51,6 @@ class Knowledge:
|
|
|
51
51
|
self.vector_db.create()
|
|
52
52
|
|
|
53
53
|
self.construct_readers()
|
|
54
|
-
self.valid_metadata_filters = set()
|
|
55
|
-
|
|
56
|
-
# --- SDK Specific Methods ---
|
|
57
54
|
|
|
58
55
|
# --- Add Contents ---
|
|
59
56
|
@overload
|
|
@@ -269,7 +266,7 @@ class Knowledge:
|
|
|
269
266
|
return
|
|
270
267
|
|
|
271
268
|
if not skip_if_exists:
|
|
272
|
-
|
|
269
|
+
log_debug("skip_if_exists is disabled, disabling upsert")
|
|
273
270
|
upsert = False
|
|
274
271
|
|
|
275
272
|
content = None
|
|
@@ -431,7 +428,7 @@ class Knowledge:
|
|
|
431
428
|
|
|
432
429
|
else:
|
|
433
430
|
reader = ReaderFactory.get_reader_for_extension(path.suffix)
|
|
434
|
-
|
|
431
|
+
log_debug(f"Using Reader: {reader.__class__.__name__}")
|
|
435
432
|
if reader:
|
|
436
433
|
# TODO: We will refactor this to eventually pass authorization to all readers
|
|
437
434
|
import inspect
|
|
@@ -652,7 +649,7 @@ class Knowledge:
|
|
|
652
649
|
content_io = io.BytesIO(content_bytes)
|
|
653
650
|
|
|
654
651
|
if content.reader:
|
|
655
|
-
|
|
652
|
+
log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
|
|
656
653
|
read_documents = content.reader.read(content_io, name=name)
|
|
657
654
|
else:
|
|
658
655
|
text_reader = self.text_reader
|
|
@@ -676,7 +673,7 @@ class Knowledge:
|
|
|
676
673
|
|
|
677
674
|
# Respect an explicitly provided reader; otherwise select based on file type
|
|
678
675
|
if content.reader:
|
|
679
|
-
|
|
676
|
+
log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
|
|
680
677
|
reader = content.reader
|
|
681
678
|
else:
|
|
682
679
|
reader = self._select_reader(content.file_data.type)
|
|
@@ -996,11 +993,6 @@ class Knowledge:
|
|
|
996
993
|
include: Optional[List[str]] = None,
|
|
997
994
|
exclude: Optional[List[str]] = None,
|
|
998
995
|
) -> None:
|
|
999
|
-
log_info(f"Loading content: {content.id}")
|
|
1000
|
-
|
|
1001
|
-
if content.metadata:
|
|
1002
|
-
self.add_filters(content.metadata)
|
|
1003
|
-
|
|
1004
996
|
if content.path:
|
|
1005
997
|
await self._load_from_path(content, upsert, skip_if_exists, include, exclude)
|
|
1006
998
|
|
|
@@ -1174,9 +1166,6 @@ class Knowledge:
|
|
|
1174
1166
|
if self.vector_db and content.metadata:
|
|
1175
1167
|
self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
|
|
1176
1168
|
|
|
1177
|
-
if content.metadata:
|
|
1178
|
-
self.add_filters(content.metadata)
|
|
1179
|
-
|
|
1180
1169
|
return content_row.to_dict()
|
|
1181
1170
|
|
|
1182
1171
|
else:
|
|
@@ -1223,13 +1212,13 @@ class Knowledge:
|
|
|
1223
1212
|
if self.vector_db and content.metadata:
|
|
1224
1213
|
self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
|
|
1225
1214
|
|
|
1226
|
-
if content.metadata:
|
|
1227
|
-
self.add_filters(content.metadata)
|
|
1228
|
-
|
|
1229
1215
|
return content_row.to_dict()
|
|
1230
1216
|
|
|
1231
1217
|
else:
|
|
1232
|
-
|
|
1218
|
+
if self.name:
|
|
1219
|
+
log_warning(f"Contents DB not found for knowledge base: {self.name}")
|
|
1220
|
+
else:
|
|
1221
|
+
log_warning("Contents DB not found for knowledge base")
|
|
1233
1222
|
return None
|
|
1234
1223
|
|
|
1235
1224
|
async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
|
|
@@ -1455,41 +1444,55 @@ class Knowledge:
|
|
|
1455
1444
|
return []
|
|
1456
1445
|
|
|
1457
1446
|
def get_valid_filters(self) -> Set[str]:
|
|
1458
|
-
if self.
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
return
|
|
1468
|
-
|
|
1469
|
-
def
|
|
1470
|
-
|
|
1447
|
+
if self.contents_db is None:
|
|
1448
|
+
log_warning("No contents db provided. This is required for filtering.")
|
|
1449
|
+
return set()
|
|
1450
|
+
contents, _ = self.get_content()
|
|
1451
|
+
valid_filters: Set[str] = set()
|
|
1452
|
+
for content in contents:
|
|
1453
|
+
if content.metadata:
|
|
1454
|
+
valid_filters.update(content.metadata.keys())
|
|
1455
|
+
|
|
1456
|
+
return valid_filters
|
|
1457
|
+
|
|
1458
|
+
async def async_get_valid_filters(self) -> Set[str]:
|
|
1459
|
+
if self.contents_db is None:
|
|
1460
|
+
log_warning("No contents db provided. This is required for filtering.")
|
|
1461
|
+
return set()
|
|
1462
|
+
contents, _ = await self.aget_content()
|
|
1463
|
+
valid_filters: Set[str] = set()
|
|
1464
|
+
for content in contents:
|
|
1465
|
+
if content.metadata:
|
|
1466
|
+
valid_filters.update(content.metadata.keys())
|
|
1467
|
+
|
|
1468
|
+
return valid_filters
|
|
1469
|
+
|
|
1470
|
+
def _validate_filters(
|
|
1471
|
+
self, filters: Union[Dict[str, Any], List[FilterExpr]], valid_metadata_filters: Set[str]
|
|
1472
|
+
) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
|
|
1471
1473
|
if not filters:
|
|
1472
|
-
return
|
|
1474
|
+
return {}, []
|
|
1473
1475
|
|
|
1474
|
-
valid_filters:
|
|
1476
|
+
valid_filters: Union[Dict[str, Any], List[FilterExpr]] = {}
|
|
1475
1477
|
invalid_keys = []
|
|
1476
1478
|
|
|
1477
1479
|
if isinstance(filters, dict):
|
|
1478
1480
|
# If no metadata filters tracked yet, all keys are considered invalid
|
|
1479
|
-
if
|
|
1481
|
+
if valid_metadata_filters is None or not valid_metadata_filters:
|
|
1480
1482
|
invalid_keys = list(filters.keys())
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
+
log_warning(
|
|
1484
|
+
f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}"
|
|
1485
|
+
)
|
|
1486
|
+
return {}, invalid_keys
|
|
1483
1487
|
|
|
1484
|
-
valid_filters = {}
|
|
1485
1488
|
for key, value in filters.items():
|
|
1486
1489
|
# Handle both normal keys and prefixed keys like meta_data.key
|
|
1487
1490
|
base_key = key.split(".")[-1] if "." in key else key
|
|
1488
|
-
if base_key in
|
|
1489
|
-
valid_filters[key] = value
|
|
1491
|
+
if base_key in valid_metadata_filters or key in valid_metadata_filters:
|
|
1492
|
+
valid_filters[key] = value # type: ignore
|
|
1490
1493
|
else:
|
|
1491
1494
|
invalid_keys.append(key)
|
|
1492
|
-
|
|
1495
|
+
log_warning(f"Invalid filter key: {key} - not present in knowledge base")
|
|
1493
1496
|
|
|
1494
1497
|
elif isinstance(filters, List):
|
|
1495
1498
|
# Validate that list contains FilterExpr instances
|
|
@@ -1501,56 +1504,30 @@ class Knowledge:
|
|
|
1501
1504
|
f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
|
|
1502
1505
|
f"AND(...), OR(...), NOT(...) from agno.filters"
|
|
1503
1506
|
)
|
|
1504
|
-
|
|
1505
1507
|
# Filter expressions are already validated, return empty dict/list
|
|
1506
1508
|
# The actual filtering happens in the vector_db layer
|
|
1507
1509
|
return filters, []
|
|
1508
1510
|
|
|
1509
1511
|
return valid_filters, invalid_keys
|
|
1510
1512
|
|
|
1511
|
-
def validate_filters(
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
self.
|
|
1515
|
-
|
|
1516
|
-
return self._validate_filters(filters)
|
|
1517
|
-
|
|
1518
|
-
async def async_validate_filters(
|
|
1519
|
-
self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]
|
|
1520
|
-
) -> Tuple[Any, List[str]]:
|
|
1521
|
-
if self.valid_metadata_filters is None:
|
|
1522
|
-
self.valid_metadata_filters = set()
|
|
1523
|
-
self.valid_metadata_filters.update(await self._aget_filters_from_db())
|
|
1513
|
+
def validate_filters(
|
|
1514
|
+
self, filters: Union[Dict[str, Any], List[FilterExpr]]
|
|
1515
|
+
) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
|
|
1516
|
+
valid_filters_from_db = self.get_valid_filters()
|
|
1524
1517
|
|
|
1525
|
-
|
|
1518
|
+
valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
|
|
1526
1519
|
|
|
1527
|
-
|
|
1528
|
-
if self.valid_metadata_filters is None:
|
|
1529
|
-
self.valid_metadata_filters = set()
|
|
1520
|
+
return valid_filters, invalid_keys
|
|
1530
1521
|
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1522
|
+
async def async_validate_filters(
|
|
1523
|
+
self, filters: Union[Dict[str, Any], List[FilterExpr]]
|
|
1524
|
+
) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
|
|
1525
|
+
"""Return a tuple containing a dict with all valid filters and a list of invalid filter keys"""
|
|
1526
|
+
valid_filters_from_db = await self.async_get_valid_filters()
|
|
1534
1527
|
|
|
1535
|
-
|
|
1536
|
-
if self.contents_db is None:
|
|
1537
|
-
return set()
|
|
1538
|
-
contents, _ = self.get_content()
|
|
1539
|
-
valid_filters: Set[str] = set()
|
|
1540
|
-
for content in contents:
|
|
1541
|
-
if content.metadata:
|
|
1542
|
-
valid_filters.update(content.metadata.keys())
|
|
1543
|
-
return valid_filters
|
|
1528
|
+
valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
|
|
1544
1529
|
|
|
1545
|
-
|
|
1546
|
-
if self.contents_db is None:
|
|
1547
|
-
return set()
|
|
1548
|
-
contents, _ = await self.aget_content()
|
|
1549
|
-
valid_filters: Set[str] = set()
|
|
1550
|
-
for content in contents:
|
|
1551
|
-
if content.metadata:
|
|
1552
|
-
valid_filters.update(content.metadata.keys())
|
|
1553
|
-
return valid_filters
|
|
1530
|
+
return valid_filters, invalid_keys
|
|
1554
1531
|
|
|
1555
1532
|
def remove_vector_by_id(self, id: str) -> bool:
|
|
1556
1533
|
from agno.vectordb import VectorDb
|
|
@@ -1855,12 +1832,6 @@ class Knowledge:
|
|
|
1855
1832
|
log_info(f"Selecting reader for extension: {extension}")
|
|
1856
1833
|
return ReaderFactory.get_reader_for_extension(extension)
|
|
1857
1834
|
|
|
1858
|
-
def get_filters(self) -> List[str]:
|
|
1859
|
-
return [
|
|
1860
|
-
"filter_tag_1",
|
|
1861
|
-
"filter_tag2",
|
|
1862
|
-
]
|
|
1863
|
-
|
|
1864
1835
|
# --- Convenience Properties for Backward Compatibility ---
|
|
1865
1836
|
|
|
1866
1837
|
def _is_text_mime_type(self, mime_type: str) -> bool:
|
|
@@ -15,7 +15,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
15
15
|
from agno.knowledge.document.base import Document
|
|
16
16
|
from agno.knowledge.reader.base import Reader
|
|
17
17
|
from agno.knowledge.types import ContentType
|
|
18
|
-
from agno.utils.log import
|
|
18
|
+
from agno.utils.log import log_debug, log_error
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class CSVReader(Reader):
|
|
@@ -46,10 +46,10 @@ class CSVReader(Reader):
|
|
|
46
46
|
if isinstance(file, Path):
|
|
47
47
|
if not file.exists():
|
|
48
48
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
49
|
-
|
|
49
|
+
log_debug(f"Reading: {file}")
|
|
50
50
|
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
51
51
|
else:
|
|
52
|
-
|
|
52
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
53
53
|
file.seek(0)
|
|
54
54
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
55
55
|
|
|
@@ -78,7 +78,7 @@ class CSVReader(Reader):
|
|
|
78
78
|
return chunked_documents
|
|
79
79
|
return documents
|
|
80
80
|
except Exception as e:
|
|
81
|
-
|
|
81
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
82
82
|
return []
|
|
83
83
|
|
|
84
84
|
async def async_read(
|
|
@@ -105,12 +105,12 @@ class CSVReader(Reader):
|
|
|
105
105
|
if isinstance(file, Path):
|
|
106
106
|
if not file.exists():
|
|
107
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
-
|
|
108
|
+
log_debug(f"Reading async: {file}")
|
|
109
109
|
async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
|
|
110
110
|
content = await file_content.read()
|
|
111
111
|
file_content_io = io.StringIO(content)
|
|
112
112
|
else:
|
|
113
|
-
|
|
113
|
+
log_debug(f"Reading retrieved file async: {file.name}")
|
|
114
114
|
file.seek(0)
|
|
115
115
|
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
116
116
|
|
|
@@ -160,7 +160,5 @@ class CSVReader(Reader):
|
|
|
160
160
|
|
|
161
161
|
return documents
|
|
162
162
|
except Exception as e:
|
|
163
|
-
|
|
164
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
165
|
-
)
|
|
163
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
166
164
|
return []
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
from docx import Document as DocxDocument # type: ignore
|
|
@@ -43,11 +43,11 @@ class DocxReader(Reader):
|
|
|
43
43
|
if isinstance(file, Path):
|
|
44
44
|
if not file.exists():
|
|
45
45
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
-
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
47
|
docx_document = DocxDocument(str(file))
|
|
48
48
|
doc_name = name or file.stem
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'docx_file')}")
|
|
51
51
|
docx_document = DocxDocument(file)
|
|
52
52
|
doc_name = name or (
|
|
53
53
|
getattr(file, "name", "docx_file").split(".")[0] if hasattr(file, "name") else "docx_file"
|
|
@@ -70,7 +70,7 @@ class DocxReader(Reader):
|
|
|
70
70
|
return documents
|
|
71
71
|
|
|
72
72
|
except Exception as e:
|
|
73
|
-
|
|
73
|
+
log_error(f"Error reading file: {e}")
|
|
74
74
|
return []
|
|
75
75
|
|
|
76
76
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -78,5 +78,5 @@ class DocxReader(Reader):
|
|
|
78
78
|
try:
|
|
79
79
|
return await asyncio.to_thread(self.read, file, name)
|
|
80
80
|
except Exception as e:
|
|
81
|
-
|
|
81
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
82
82
|
return []
|
|
@@ -13,7 +13,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
|
13
13
|
from agno.knowledge.document.base import Document
|
|
14
14
|
from agno.knowledge.reader.base import Reader
|
|
15
15
|
from agno.knowledge.types import ContentType
|
|
16
|
-
from agno.utils.log import
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class FieldLabeledCSVReader(Reader):
|
|
@@ -105,10 +105,10 @@ class FieldLabeledCSVReader(Reader):
|
|
|
105
105
|
if isinstance(file, Path):
|
|
106
106
|
if not file.exists():
|
|
107
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
-
|
|
108
|
+
log_debug(f"Reading: {file}")
|
|
109
109
|
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
110
110
|
else:
|
|
111
|
-
|
|
111
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
112
112
|
file.seek(0)
|
|
113
113
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
114
114
|
|
|
@@ -127,15 +127,15 @@ class FieldLabeledCSVReader(Reader):
|
|
|
127
127
|
rows = list(csv_reader)
|
|
128
128
|
|
|
129
129
|
if not rows:
|
|
130
|
-
|
|
130
|
+
log_warning("CSV file is empty")
|
|
131
131
|
return []
|
|
132
132
|
|
|
133
133
|
# First row is headers
|
|
134
134
|
headers = [header.strip() for header in rows[0]]
|
|
135
|
-
|
|
135
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
136
136
|
|
|
137
137
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
138
|
-
|
|
138
|
+
log_debug(f"Processing {len(data_rows)} data rows")
|
|
139
139
|
|
|
140
140
|
for row_index, row in enumerate(data_rows):
|
|
141
141
|
# Ensure row has same length as headers (pad or truncate)
|
|
@@ -163,13 +163,13 @@ class FieldLabeledCSVReader(Reader):
|
|
|
163
163
|
)
|
|
164
164
|
|
|
165
165
|
documents.append(document)
|
|
166
|
-
|
|
166
|
+
log_debug(f"Created document for row {row_index + 1}: {len(labeled_text)} chars")
|
|
167
167
|
|
|
168
|
-
|
|
168
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
169
169
|
return documents
|
|
170
170
|
|
|
171
171
|
except Exception as e:
|
|
172
|
-
|
|
172
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
173
173
|
return []
|
|
174
174
|
|
|
175
175
|
async def async_read(
|
|
@@ -185,12 +185,12 @@ class FieldLabeledCSVReader(Reader):
|
|
|
185
185
|
if isinstance(file, Path):
|
|
186
186
|
if not file.exists():
|
|
187
187
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
188
|
-
|
|
188
|
+
log_debug(f"Reading async: {file}")
|
|
189
189
|
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
190
190
|
content = await file_content.read()
|
|
191
191
|
file_content_io = io.StringIO(content)
|
|
192
192
|
else:
|
|
193
|
-
|
|
193
|
+
log_debug(f"Reading retrieved file async: {name or file.name}")
|
|
194
194
|
file.seek(0)
|
|
195
195
|
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
196
196
|
|
|
@@ -205,17 +205,17 @@ class FieldLabeledCSVReader(Reader):
|
|
|
205
205
|
rows = list(csv_reader)
|
|
206
206
|
|
|
207
207
|
if not rows:
|
|
208
|
-
|
|
208
|
+
log_warning("CSV file is empty")
|
|
209
209
|
return []
|
|
210
210
|
|
|
211
211
|
# First row is headers
|
|
212
212
|
headers = [header.strip() for header in rows[0]]
|
|
213
|
-
|
|
213
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
214
214
|
|
|
215
215
|
# Process data rows
|
|
216
216
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
217
217
|
total_rows = len(data_rows)
|
|
218
|
-
|
|
218
|
+
log_debug(f"Processing {total_rows} data rows")
|
|
219
219
|
|
|
220
220
|
# For small files, process all at once
|
|
221
221
|
if total_rows <= 10:
|
|
@@ -282,11 +282,9 @@ class FieldLabeledCSVReader(Reader):
|
|
|
282
282
|
|
|
283
283
|
documents = [doc for page_docs in page_results for doc in page_docs]
|
|
284
284
|
|
|
285
|
-
|
|
285
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
286
286
|
return documents
|
|
287
287
|
|
|
288
288
|
except Exception as e:
|
|
289
|
-
|
|
290
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
291
|
-
)
|
|
289
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
292
290
|
return []
|
|
@@ -10,7 +10,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
10
10
|
from agno.knowledge.document.base import Document
|
|
11
11
|
from agno.knowledge.reader.base import Reader
|
|
12
12
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
13
|
+
from agno.utils.log import log_debug, log_error
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class JSONReader(Reader):
|
|
@@ -41,13 +41,13 @@ class JSONReader(Reader):
|
|
|
41
41
|
if isinstance(path, Path):
|
|
42
42
|
if not path.exists():
|
|
43
43
|
raise FileNotFoundError(f"Could not find file: {path}")
|
|
44
|
-
|
|
44
|
+
log_debug(f"Reading: {path}")
|
|
45
45
|
json_name = name or path.name.split(".")[0]
|
|
46
46
|
json_contents = json.loads(path.read_text(self.encoding or "utf-8"))
|
|
47
47
|
|
|
48
48
|
elif isinstance(path, BytesIO):
|
|
49
49
|
json_name = name or path.name.split(".")[0]
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {json_name}")
|
|
51
51
|
path.seek(0)
|
|
52
52
|
json_contents = json.load(path)
|
|
53
53
|
|
|
@@ -72,7 +72,8 @@ class JSONReader(Reader):
|
|
|
72
72
|
chunked_documents.extend(self.chunk_document(document))
|
|
73
73
|
return chunked_documents
|
|
74
74
|
return documents
|
|
75
|
-
except Exception:
|
|
75
|
+
except Exception as e:
|
|
76
|
+
log_error(f"Error reading: {path}: {e}")
|
|
76
77
|
raise
|
|
77
78
|
|
|
78
79
|
async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -7,7 +7,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
7
7
|
from agno.knowledge.document.base import Document
|
|
8
8
|
from agno.knowledge.reader.base import Reader
|
|
9
9
|
from agno.knowledge.types import ContentType
|
|
10
|
-
from agno.utils.log import
|
|
10
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
11
11
|
|
|
12
12
|
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
13
|
|
|
@@ -65,11 +65,11 @@ class MarkdownReader(Reader):
|
|
|
65
65
|
if isinstance(file, Path):
|
|
66
66
|
if not file.exists():
|
|
67
67
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
68
|
-
|
|
68
|
+
log_debug(f"Reading: {file}")
|
|
69
69
|
file_name = name or file.stem
|
|
70
70
|
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
71
71
|
else:
|
|
72
|
-
|
|
72
|
+
log_debug(f"Reading uploaded file: {file.name}")
|
|
73
73
|
file_name = name or file.name.split(".")[0]
|
|
74
74
|
file.seek(0)
|
|
75
75
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
@@ -82,7 +82,7 @@ class MarkdownReader(Reader):
|
|
|
82
82
|
return chunked_documents
|
|
83
83
|
return documents
|
|
84
84
|
except Exception as e:
|
|
85
|
-
|
|
85
|
+
log_error(f"Error reading: {file}: {e}")
|
|
86
86
|
return []
|
|
87
87
|
|
|
88
88
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -91,7 +91,7 @@ class MarkdownReader(Reader):
|
|
|
91
91
|
if not file.exists():
|
|
92
92
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
93
93
|
|
|
94
|
-
|
|
94
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
95
95
|
file_name = name or file.stem
|
|
96
96
|
|
|
97
97
|
try:
|
|
@@ -100,10 +100,10 @@ class MarkdownReader(Reader):
|
|
|
100
100
|
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
101
101
|
file_contents = await f.read()
|
|
102
102
|
except ImportError:
|
|
103
|
-
|
|
103
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
104
104
|
file_contents = file.read_text(self.encoding or "utf-8")
|
|
105
105
|
else:
|
|
106
|
-
|
|
106
|
+
log_debug(f"Reading uploaded file asynchronously: {file.name}")
|
|
107
107
|
file_name = name or file.name.split(".")[0]
|
|
108
108
|
file.seek(0)
|
|
109
109
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
@@ -118,7 +118,7 @@ class MarkdownReader(Reader):
|
|
|
118
118
|
return await self._async_chunk_document(document)
|
|
119
119
|
return [document]
|
|
120
120
|
except Exception as e:
|
|
121
|
-
|
|
121
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
122
122
|
return []
|
|
123
123
|
|
|
124
124
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
@@ -9,7 +9,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
9
9
|
from agno.knowledge.document.base import Document
|
|
10
10
|
from agno.knowledge.reader.base import Reader
|
|
11
11
|
from agno.knowledge.types import ContentType
|
|
12
|
-
from agno.utils.log import
|
|
12
|
+
from agno.utils.log import log_debug, log_error
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
from pypdf import PdfReader as DocumentReader # noqa: F401
|
|
@@ -236,13 +236,13 @@ class BasePDFReader(Reader):
|
|
|
236
236
|
# Use provided password or fall back to instance password
|
|
237
237
|
pdf_password = password or self.password
|
|
238
238
|
if not pdf_password:
|
|
239
|
-
|
|
239
|
+
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
240
240
|
return False
|
|
241
241
|
|
|
242
242
|
try:
|
|
243
243
|
decrypted_pdf = doc_reader.decrypt(pdf_password)
|
|
244
244
|
if decrypted_pdf:
|
|
245
|
-
|
|
245
|
+
log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
|
|
246
246
|
return True
|
|
247
247
|
else:
|
|
248
248
|
log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
|
|
@@ -344,12 +344,12 @@ class PDFReader(BasePDFReader):
|
|
|
344
344
|
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
345
345
|
) -> List[Document]:
|
|
346
346
|
doc_name = self._get_doc_name(pdf, name)
|
|
347
|
-
|
|
347
|
+
log_debug(f"Reading: {doc_name}")
|
|
348
348
|
|
|
349
349
|
try:
|
|
350
350
|
pdf_reader = DocumentReader(pdf)
|
|
351
351
|
except PdfStreamError as e:
|
|
352
|
-
|
|
352
|
+
log_error(f"Error reading PDF: {e}")
|
|
353
353
|
return []
|
|
354
354
|
# Handle PDF decryption
|
|
355
355
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
@@ -368,12 +368,12 @@ class PDFReader(BasePDFReader):
|
|
|
368
368
|
log_error("No pdf provided")
|
|
369
369
|
return []
|
|
370
370
|
doc_name = self._get_doc_name(pdf, name)
|
|
371
|
-
|
|
371
|
+
log_debug(f"Reading: {doc_name}")
|
|
372
372
|
|
|
373
373
|
try:
|
|
374
374
|
pdf_reader = DocumentReader(pdf)
|
|
375
375
|
except PdfStreamError as e:
|
|
376
|
-
|
|
376
|
+
log_error(f"Error reading PDF: {e}")
|
|
377
377
|
return []
|
|
378
378
|
|
|
379
379
|
# Handle PDF decryption
|
|
@@ -394,11 +394,11 @@ class PDFImageReader(BasePDFReader):
|
|
|
394
394
|
raise ValueError("No pdf provided")
|
|
395
395
|
|
|
396
396
|
doc_name = self._get_doc_name(pdf, name)
|
|
397
|
-
|
|
397
|
+
log_debug(f"Reading: {doc_name}")
|
|
398
398
|
try:
|
|
399
399
|
pdf_reader = DocumentReader(pdf)
|
|
400
400
|
except PdfStreamError as e:
|
|
401
|
-
|
|
401
|
+
log_error(f"Error reading PDF: {e}")
|
|
402
402
|
return []
|
|
403
403
|
|
|
404
404
|
# Handle PDF decryption
|
|
@@ -415,12 +415,12 @@ class PDFImageReader(BasePDFReader):
|
|
|
415
415
|
raise ValueError("No pdf provided")
|
|
416
416
|
|
|
417
417
|
doc_name = self._get_doc_name(pdf, name)
|
|
418
|
-
|
|
418
|
+
log_debug(f"Reading: {doc_name}")
|
|
419
419
|
|
|
420
420
|
try:
|
|
421
421
|
pdf_reader = DocumentReader(pdf)
|
|
422
422
|
except PdfStreamError as e:
|
|
423
|
-
|
|
423
|
+
log_error(f"Error reading PDF: {e}")
|
|
424
424
|
return []
|
|
425
425
|
|
|
426
426
|
# Handle PDF decryption
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
from pptx import Presentation # type: ignore
|
|
@@ -43,11 +43,11 @@ class PPTXReader(Reader):
|
|
|
43
43
|
if isinstance(file, Path):
|
|
44
44
|
if not file.exists():
|
|
45
45
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
-
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
47
|
presentation = Presentation(str(file))
|
|
48
48
|
doc_name = name or file.stem
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
|
|
51
51
|
presentation = Presentation(file)
|
|
52
52
|
doc_name = name or (
|
|
53
53
|
getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
|
|
@@ -89,7 +89,7 @@ class PPTXReader(Reader):
|
|
|
89
89
|
return documents
|
|
90
90
|
|
|
91
91
|
except Exception as e:
|
|
92
|
-
|
|
92
|
+
log_error(f"Error reading file: {e}")
|
|
93
93
|
return []
|
|
94
94
|
|
|
95
95
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -97,5 +97,5 @@ class PPTXReader(Reader):
|
|
|
97
97
|
try:
|
|
98
98
|
return await asyncio.to_thread(self.read, file, name)
|
|
99
99
|
except Exception as e:
|
|
100
|
-
|
|
100
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
101
101
|
return []
|