agno 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +12 -0
- agno/db/base.py +8 -4
- agno/db/dynamo/dynamo.py +69 -17
- agno/db/firestore/firestore.py +68 -29
- agno/db/gcs_json/gcs_json_db.py +68 -17
- agno/db/in_memory/in_memory_db.py +83 -14
- agno/db/json/json_db.py +79 -15
- agno/db/mongo/mongo.py +27 -8
- agno/db/mysql/mysql.py +17 -3
- agno/db/postgres/postgres.py +21 -3
- agno/db/redis/redis.py +38 -11
- agno/db/singlestore/singlestore.py +14 -3
- agno/db/sqlite/sqlite.py +34 -46
- agno/knowledge/reader/field_labeled_csv_reader.py +294 -0
- agno/knowledge/reader/pdf_reader.py +28 -52
- agno/knowledge/reader/reader_factory.py +12 -0
- agno/memory/manager.py +12 -4
- agno/models/anthropic/claude.py +4 -1
- agno/models/aws/bedrock.py +52 -112
- agno/os/app.py +24 -30
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +252 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/router.py +12 -0
- agno/os/router.py +38 -8
- agno/os/routers/memory/memory.py +5 -3
- agno/os/routers/memory/schemas.py +1 -0
- agno/os/utils.py +36 -10
- agno/team/team.py +12 -0
- agno/tools/mcp.py +46 -1
- agno/utils/merge_dict.py +22 -1
- agno/utils/streamlit.py +1 -1
- agno/workflow/parallel.py +90 -14
- agno/workflow/step.py +30 -27
- agno/workflow/workflow.py +5 -3
- {agno-2.1.1.dist-info → agno-2.1.2.dist-info}/METADATA +16 -14
- {agno-2.1.1.dist-info → agno-2.1.2.dist-info}/RECORD +41 -36
- {agno-2.1.1.dist-info → agno-2.1.2.dist-info}/WHEEL +0 -0
- {agno-2.1.1.dist-info → agno-2.1.2.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.1.dist-info → agno-2.1.2.dist-info}/top_level.txt +0 -0
|
@@ -990,11 +990,12 @@ class SingleStoreDb(BaseDb):
|
|
|
990
990
|
return []
|
|
991
991
|
|
|
992
992
|
# -- Memory methods --
|
|
993
|
-
def delete_user_memory(self, memory_id: str):
|
|
993
|
+
def delete_user_memory(self, memory_id: str, user_id: Optional[str] = None):
|
|
994
994
|
"""Delete a user memory from the database.
|
|
995
995
|
|
|
996
996
|
Args:
|
|
997
997
|
memory_id (str): The ID of the memory to delete.
|
|
998
|
+
user_id (Optional[str]): The ID of the user to filter by. Defaults to None.
|
|
998
999
|
|
|
999
1000
|
Returns:
|
|
1000
1001
|
bool: True if deletion was successful, False otherwise.
|
|
@@ -1009,6 +1010,8 @@ class SingleStoreDb(BaseDb):
|
|
|
1009
1010
|
|
|
1010
1011
|
with self.Session() as sess, sess.begin():
|
|
1011
1012
|
delete_stmt = table.delete().where(table.c.memory_id == memory_id)
|
|
1013
|
+
if user_id is not None:
|
|
1014
|
+
delete_stmt = delete_stmt.where(table.c.user_id == user_id)
|
|
1012
1015
|
result = sess.execute(delete_stmt)
|
|
1013
1016
|
|
|
1014
1017
|
success = result.rowcount > 0
|
|
@@ -1021,11 +1024,12 @@ class SingleStoreDb(BaseDb):
|
|
|
1021
1024
|
log_error(f"Error deleting memory: {e}")
|
|
1022
1025
|
raise e
|
|
1023
1026
|
|
|
1024
|
-
def delete_user_memories(self, memory_ids: List[str]) -> None:
|
|
1027
|
+
def delete_user_memories(self, memory_ids: List[str], user_id: Optional[str] = None) -> None:
|
|
1025
1028
|
"""Delete user memories from the database.
|
|
1026
1029
|
|
|
1027
1030
|
Args:
|
|
1028
1031
|
memory_ids (List[str]): The IDs of the memories to delete.
|
|
1032
|
+
user_id (Optional[str]): The ID of the user to filter by. Defaults to None.
|
|
1029
1033
|
|
|
1030
1034
|
Raises:
|
|
1031
1035
|
Exception: If an error occurs during deletion.
|
|
@@ -1037,6 +1041,8 @@ class SingleStoreDb(BaseDb):
|
|
|
1037
1041
|
|
|
1038
1042
|
with self.Session() as sess, sess.begin():
|
|
1039
1043
|
delete_stmt = table.delete().where(table.c.memory_id.in_(memory_ids))
|
|
1044
|
+
if user_id is not None:
|
|
1045
|
+
delete_stmt = delete_stmt.where(table.c.user_id == user_id)
|
|
1040
1046
|
result = sess.execute(delete_stmt)
|
|
1041
1047
|
if result.rowcount == 0:
|
|
1042
1048
|
log_debug(f"No memories found with ids: {memory_ids}")
|
|
@@ -1073,12 +1079,15 @@ class SingleStoreDb(BaseDb):
|
|
|
1073
1079
|
log_error(f"Exception reading from memory table: {e}")
|
|
1074
1080
|
raise e
|
|
1075
1081
|
|
|
1076
|
-
def get_user_memory(
|
|
1082
|
+
def get_user_memory(
|
|
1083
|
+
self, memory_id: str, deserialize: Optional[bool] = True, user_id: Optional[str] = None
|
|
1084
|
+
) -> Optional[UserMemory]:
|
|
1077
1085
|
"""Get a memory from the database.
|
|
1078
1086
|
|
|
1079
1087
|
Args:
|
|
1080
1088
|
memory_id (str): The ID of the memory to get.
|
|
1081
1089
|
deserialize (Optional[bool]): Whether to serialize the memory. Defaults to True.
|
|
1090
|
+
user_id (Optional[str]): The ID of the user to filter by. Defaults to None.
|
|
1082
1091
|
|
|
1083
1092
|
Returns:
|
|
1084
1093
|
Union[UserMemory, Dict[str, Any], None]:
|
|
@@ -1095,6 +1104,8 @@ class SingleStoreDb(BaseDb):
|
|
|
1095
1104
|
|
|
1096
1105
|
with self.Session() as sess, sess.begin():
|
|
1097
1106
|
stmt = select(table).where(table.c.memory_id == memory_id)
|
|
1107
|
+
if user_id is not None:
|
|
1108
|
+
stmt = stmt.where(table.c.user_id == user_id)
|
|
1098
1109
|
|
|
1099
1110
|
result = sess.execute(stmt).fetchone()
|
|
1100
1111
|
if not result:
|
agno/db/sqlite/sqlite.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from datetime import date, datetime, timedelta, timezone
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from agno.db.base import BaseDb, SessionType
|
|
@@ -24,7 +24,7 @@ from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
|
24
24
|
from agno.utils.string import generate_id
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
|
-
from sqlalchemy import Column, MetaData, Table, and_, func, select, text
|
|
27
|
+
from sqlalchemy import Column, MetaData, Table, and_, func, select, text
|
|
28
28
|
from sqlalchemy.dialects import sqlite
|
|
29
29
|
from sqlalchemy.engine import Engine, create_engine
|
|
30
30
|
from sqlalchemy.orm import scoped_session, sessionmaker
|
|
@@ -442,11 +442,7 @@ class SqliteDb(BaseDb):
|
|
|
442
442
|
if end_timestamp is not None:
|
|
443
443
|
stmt = stmt.where(table.c.created_at <= end_timestamp)
|
|
444
444
|
if session_name is not None:
|
|
445
|
-
stmt = stmt.where(
|
|
446
|
-
func.coalesce(func.json_extract(table.c.session_data, "$.session_name"), "").like(
|
|
447
|
-
f"%{session_name}%"
|
|
448
|
-
)
|
|
449
|
-
)
|
|
445
|
+
stmt = stmt.where(table.c.session_data.like(f"%{session_name}%"))
|
|
450
446
|
if session_type is not None:
|
|
451
447
|
stmt = stmt.where(table.c.session_type == session_type.value)
|
|
452
448
|
|
|
@@ -468,8 +464,10 @@ class SqliteDb(BaseDb):
|
|
|
468
464
|
return [] if deserialize else ([], 0)
|
|
469
465
|
|
|
470
466
|
sessions_raw = [deserialize_session_json_fields(dict(record._mapping)) for record in records]
|
|
471
|
-
if not
|
|
467
|
+
if not deserialize:
|
|
472
468
|
return sessions_raw, total_count
|
|
469
|
+
if not sessions_raw:
|
|
470
|
+
return []
|
|
473
471
|
|
|
474
472
|
if session_type == SessionType.AGENT:
|
|
475
473
|
return [AgentSession.from_dict(record) for record in sessions_raw] # type: ignore
|
|
@@ -505,43 +503,20 @@ class SqliteDb(BaseDb):
|
|
|
505
503
|
Exception: If an error occurs during renaming.
|
|
506
504
|
"""
|
|
507
505
|
try:
|
|
508
|
-
|
|
509
|
-
|
|
506
|
+
# Get the current session as a deserialized object
|
|
507
|
+
# Get the session record
|
|
508
|
+
session = self.get_session(session_id, session_type, deserialize=True)
|
|
509
|
+
if session is None:
|
|
510
510
|
return None
|
|
511
511
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
.values(session_data=func.json_set(table.c.session_data, "$.session_name", session_name))
|
|
518
|
-
)
|
|
519
|
-
result = sess.execute(stmt)
|
|
520
|
-
|
|
521
|
-
# Check if any rows were affected
|
|
522
|
-
if result.rowcount == 0:
|
|
523
|
-
return None
|
|
524
|
-
|
|
525
|
-
# Fetch the updated row
|
|
526
|
-
select_stmt = select(table).where(table.c.session_id == session_id)
|
|
527
|
-
row = sess.execute(select_stmt).fetchone()
|
|
528
|
-
|
|
529
|
-
if not row:
|
|
530
|
-
return None
|
|
531
|
-
|
|
532
|
-
session_raw = deserialize_session_json_fields(dict(row._mapping))
|
|
533
|
-
if not session_raw or not deserialize:
|
|
534
|
-
return session_raw
|
|
512
|
+
session = cast(Session, session)
|
|
513
|
+
# Update the session name
|
|
514
|
+
if session.session_data is None:
|
|
515
|
+
session.session_data = {}
|
|
516
|
+
session.session_data["session_name"] = session_name
|
|
535
517
|
|
|
536
|
-
#
|
|
537
|
-
|
|
538
|
-
return AgentSession.from_dict(session_raw)
|
|
539
|
-
elif session_type == SessionType.TEAM:
|
|
540
|
-
return TeamSession.from_dict(session_raw)
|
|
541
|
-
elif session_type == SessionType.WORKFLOW:
|
|
542
|
-
return WorkflowSession.from_dict(session_raw)
|
|
543
|
-
else:
|
|
544
|
-
raise ValueError(f"Invalid session type: {session_type}")
|
|
518
|
+
# Upsert the updated session back to the database
|
|
519
|
+
return self.upsert_session(session, deserialize=deserialize)
|
|
545
520
|
|
|
546
521
|
except Exception as e:
|
|
547
522
|
log_error(f"Exception renaming session: {e}")
|
|
@@ -909,9 +884,13 @@ class SqliteDb(BaseDb):
|
|
|
909
884
|
|
|
910
885
|
# -- Memory methods --
|
|
911
886
|
|
|
912
|
-
def delete_user_memory(self, memory_id: str):
|
|
887
|
+
def delete_user_memory(self, memory_id: str, user_id: Optional[str] = None):
|
|
913
888
|
"""Delete a user memory from the database.
|
|
914
889
|
|
|
890
|
+
Args:
|
|
891
|
+
memory_id (str): The ID of the memory to delete.
|
|
892
|
+
user_id (Optional[str]): The user ID to filter by. Defaults to None.
|
|
893
|
+
|
|
915
894
|
Returns:
|
|
916
895
|
bool: True if deletion was successful, False otherwise.
|
|
917
896
|
|
|
@@ -925,6 +904,8 @@ class SqliteDb(BaseDb):
|
|
|
925
904
|
|
|
926
905
|
with self.Session() as sess, sess.begin():
|
|
927
906
|
delete_stmt = table.delete().where(table.c.memory_id == memory_id)
|
|
907
|
+
if user_id is not None:
|
|
908
|
+
delete_stmt = delete_stmt.where(table.c.user_id == user_id)
|
|
928
909
|
result = sess.execute(delete_stmt)
|
|
929
910
|
|
|
930
911
|
success = result.rowcount > 0
|
|
@@ -937,11 +918,12 @@ class SqliteDb(BaseDb):
|
|
|
937
918
|
log_error(f"Error deleting user memory: {e}")
|
|
938
919
|
raise e
|
|
939
920
|
|
|
940
|
-
def delete_user_memories(self, memory_ids: List[str]) -> None:
|
|
921
|
+
def delete_user_memories(self, memory_ids: List[str], user_id: Optional[str] = None) -> None:
|
|
941
922
|
"""Delete user memories from the database.
|
|
942
923
|
|
|
943
924
|
Args:
|
|
944
925
|
memory_ids (List[str]): The IDs of the memories to delete.
|
|
926
|
+
user_id (Optional[str]): The user ID to filter by. Defaults to None.
|
|
945
927
|
|
|
946
928
|
Raises:
|
|
947
929
|
Exception: If an error occurs during deletion.
|
|
@@ -953,6 +935,8 @@ class SqliteDb(BaseDb):
|
|
|
953
935
|
|
|
954
936
|
with self.Session() as sess, sess.begin():
|
|
955
937
|
delete_stmt = table.delete().where(table.c.memory_id.in_(memory_ids))
|
|
938
|
+
if user_id is not None:
|
|
939
|
+
delete_stmt = delete_stmt.where(table.c.user_id == user_id)
|
|
956
940
|
result = sess.execute(delete_stmt)
|
|
957
941
|
if result.rowcount == 0:
|
|
958
942
|
log_debug(f"No user memories found with ids: {memory_ids}")
|
|
@@ -973,7 +957,8 @@ class SqliteDb(BaseDb):
|
|
|
973
957
|
return []
|
|
974
958
|
|
|
975
959
|
with self.Session() as sess, sess.begin():
|
|
976
|
-
|
|
960
|
+
# Select topics from all results
|
|
961
|
+
stmt = select(func.json_array_elements_text(table.c.topics)).select_from(table)
|
|
977
962
|
result = sess.execute(stmt).fetchall()
|
|
978
963
|
|
|
979
964
|
return list(set([record[0] for record in result]))
|
|
@@ -983,13 +968,14 @@ class SqliteDb(BaseDb):
|
|
|
983
968
|
raise e
|
|
984
969
|
|
|
985
970
|
def get_user_memory(
|
|
986
|
-
self, memory_id: str, deserialize: Optional[bool] = True
|
|
971
|
+
self, memory_id: str, deserialize: Optional[bool] = True, user_id: Optional[str] = None
|
|
987
972
|
) -> Optional[Union[UserMemory, Dict[str, Any]]]:
|
|
988
973
|
"""Get a memory from the database.
|
|
989
974
|
|
|
990
975
|
Args:
|
|
991
976
|
memory_id (str): The ID of the memory to get.
|
|
992
977
|
deserialize (Optional[bool]): Whether to serialize the memory. Defaults to True.
|
|
978
|
+
user_id (Optional[str]): The user ID to filter by. Defaults to None.
|
|
993
979
|
|
|
994
980
|
Returns:
|
|
995
981
|
Optional[Union[UserMemory, Dict[str, Any]]]:
|
|
@@ -1006,6 +992,8 @@ class SqliteDb(BaseDb):
|
|
|
1006
992
|
|
|
1007
993
|
with self.Session() as sess, sess.begin():
|
|
1008
994
|
stmt = select(table).where(table.c.memory_id == memory_id)
|
|
995
|
+
if user_id is not None:
|
|
996
|
+
stmt = stmt.where(table.c.user_id == user_id)
|
|
1009
997
|
result = sess.execute(stmt).fetchone()
|
|
1010
998
|
if result is None:
|
|
1011
999
|
return None
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import csv
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import IO, Any, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import aiofiles
|
|
9
|
+
except ImportError:
|
|
10
|
+
raise ImportError("`aiofiles` not installed. Please install it with `pip install aiofiles`")
|
|
11
|
+
|
|
12
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
13
|
+
from agno.knowledge.document.base import Document
|
|
14
|
+
from agno.knowledge.reader.base import Reader
|
|
15
|
+
from agno.knowledge.types import ContentType
|
|
16
|
+
from agno.utils.log import logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FieldLabeledCSVReader(Reader):
|
|
20
|
+
"""Reader for CSV files that converts each row to a field-labeled document."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
chunk_title: Optional[Union[str, List[str]]] = None,
|
|
25
|
+
field_names: Optional[List[str]] = None,
|
|
26
|
+
format_headers: bool = True,
|
|
27
|
+
skip_empty_fields: bool = True,
|
|
28
|
+
**kwargs,
|
|
29
|
+
):
|
|
30
|
+
super().__init__(chunk=False, chunking_strategy=None, **kwargs)
|
|
31
|
+
self.chunk_title = chunk_title
|
|
32
|
+
self.field_names = field_names or []
|
|
33
|
+
self.format_headers = format_headers
|
|
34
|
+
self.skip_empty_fields = skip_empty_fields
|
|
35
|
+
|
|
36
|
+
logger.info(f"FieldLabeledCSVReader initialized - chunk_title: {chunk_title}, field_names: {self.field_names}")
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
40
|
+
"""Chunking is not supported - each row is already a logical document unit."""
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
45
|
+
"""Get the list of supported content types."""
|
|
46
|
+
return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
|
|
47
|
+
|
|
48
|
+
def _format_field_name(self, field_name: str) -> str:
|
|
49
|
+
"""Format field name to be more readable."""
|
|
50
|
+
if not self.format_headers:
|
|
51
|
+
return field_name.strip()
|
|
52
|
+
|
|
53
|
+
# Replace underscores with spaces and title case
|
|
54
|
+
formatted = field_name.replace("_", " ").strip().title()
|
|
55
|
+
return formatted
|
|
56
|
+
|
|
57
|
+
def _get_title_for_entry(self, entry_index: int) -> Optional[str]:
|
|
58
|
+
"""Get title for a specific entry."""
|
|
59
|
+
if self.chunk_title is None:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
if isinstance(self.chunk_title, str):
|
|
63
|
+
return self.chunk_title
|
|
64
|
+
|
|
65
|
+
if isinstance(self.chunk_title, list) and self.chunk_title:
|
|
66
|
+
return self.chunk_title[entry_index % len(self.chunk_title)]
|
|
67
|
+
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def _convert_row_to_labeled_text(self, headers: List[str], row: List[str], entry_index: int) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Convert a CSV row to field-labeled text format.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
headers: Column headers
|
|
76
|
+
row: Data row values
|
|
77
|
+
entry_index: Index of this entry (for title rotation)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Formatted text with field labels
|
|
81
|
+
"""
|
|
82
|
+
lines = []
|
|
83
|
+
|
|
84
|
+
title = self._get_title_for_entry(entry_index)
|
|
85
|
+
if title:
|
|
86
|
+
lines.append(title)
|
|
87
|
+
|
|
88
|
+
for i, (header, value) in enumerate(zip(headers, row)):
|
|
89
|
+
clean_value = value.strip() if value else ""
|
|
90
|
+
|
|
91
|
+
if self.skip_empty_fields and not clean_value:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
if self.field_names and i < len(self.field_names):
|
|
95
|
+
field_name = self.field_names[i]
|
|
96
|
+
else:
|
|
97
|
+
field_name = self._format_field_name(header)
|
|
98
|
+
|
|
99
|
+
lines.append(f"{field_name}: {clean_value}")
|
|
100
|
+
|
|
101
|
+
return "\n".join(lines)
|
|
102
|
+
|
|
103
|
+
def read(
|
|
104
|
+
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
105
|
+
) -> List[Document]:
|
|
106
|
+
try:
|
|
107
|
+
if isinstance(file, Path):
|
|
108
|
+
if not file.exists():
|
|
109
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
110
|
+
logger.info(f"Reading: {file}")
|
|
111
|
+
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
112
|
+
else:
|
|
113
|
+
logger.info(f"Reading retrieved file: {name or file.name}")
|
|
114
|
+
file.seek(0)
|
|
115
|
+
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
116
|
+
|
|
117
|
+
csv_name = name or (
|
|
118
|
+
Path(file.name).stem
|
|
119
|
+
if isinstance(file, Path)
|
|
120
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
documents = []
|
|
124
|
+
|
|
125
|
+
with file_content as csvfile:
|
|
126
|
+
csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
|
|
127
|
+
|
|
128
|
+
# Read all rows
|
|
129
|
+
rows = list(csv_reader)
|
|
130
|
+
|
|
131
|
+
if not rows:
|
|
132
|
+
logger.warning("CSV file is empty")
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
# First row is headers
|
|
136
|
+
headers = [header.strip() for header in rows[0]]
|
|
137
|
+
logger.info(f"Found {len(headers)} headers: {headers}")
|
|
138
|
+
|
|
139
|
+
data_rows = rows[1:] if len(rows) > 1 else []
|
|
140
|
+
logger.info(f"Processing {len(data_rows)} data rows")
|
|
141
|
+
|
|
142
|
+
for row_index, row in enumerate(data_rows):
|
|
143
|
+
# Ensure row has same length as headers (pad or truncate)
|
|
144
|
+
normalized_row = row[: len(headers)] # Truncate if too long
|
|
145
|
+
while len(normalized_row) < len(headers): # Pad if too short
|
|
146
|
+
normalized_row.append("")
|
|
147
|
+
|
|
148
|
+
# Convert row to labeled text
|
|
149
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
150
|
+
|
|
151
|
+
if labeled_text.strip():
|
|
152
|
+
# Create document for this row
|
|
153
|
+
doc_id = f"{csv_name}_row_{row_index + 1}"
|
|
154
|
+
|
|
155
|
+
document = Document(
|
|
156
|
+
id=doc_id,
|
|
157
|
+
name=csv_name,
|
|
158
|
+
meta_data={
|
|
159
|
+
"row_index": row_index,
|
|
160
|
+
"headers": headers,
|
|
161
|
+
"total_rows": len(data_rows),
|
|
162
|
+
"source": "field_labeled_csv_reader",
|
|
163
|
+
},
|
|
164
|
+
content=labeled_text,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
documents.append(document)
|
|
168
|
+
logger.debug(f"Created document for row {row_index + 1}: {len(labeled_text)} chars")
|
|
169
|
+
|
|
170
|
+
logger.info(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
171
|
+
return documents
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
175
|
+
return []
|
|
176
|
+
|
|
177
|
+
async def async_read(
|
|
178
|
+
self,
|
|
179
|
+
file: Union[Path, IO[Any]],
|
|
180
|
+
delimiter: str = ",",
|
|
181
|
+
quotechar: str = '"',
|
|
182
|
+
page_size: int = 1000,
|
|
183
|
+
name: Optional[str] = None,
|
|
184
|
+
) -> List[Document]:
|
|
185
|
+
try:
|
|
186
|
+
# Handle file input
|
|
187
|
+
if isinstance(file, Path):
|
|
188
|
+
if not file.exists():
|
|
189
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
190
|
+
logger.info(f"Reading async: {file}")
|
|
191
|
+
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
192
|
+
content = await file_content.read()
|
|
193
|
+
file_content_io = io.StringIO(content)
|
|
194
|
+
else:
|
|
195
|
+
logger.info(f"Reading retrieved file async: {name or file.name}")
|
|
196
|
+
file.seek(0)
|
|
197
|
+
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
198
|
+
|
|
199
|
+
csv_name = name or (
|
|
200
|
+
Path(file.name).stem
|
|
201
|
+
if isinstance(file, Path)
|
|
202
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
file_content_io.seek(0)
|
|
206
|
+
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
207
|
+
rows = list(csv_reader)
|
|
208
|
+
|
|
209
|
+
if not rows:
|
|
210
|
+
logger.warning("CSV file is empty")
|
|
211
|
+
return []
|
|
212
|
+
|
|
213
|
+
# First row is headers
|
|
214
|
+
headers = [header.strip() for header in rows[0]]
|
|
215
|
+
logger.info(f"Found {len(headers)} headers: {headers}")
|
|
216
|
+
|
|
217
|
+
# Process data rows
|
|
218
|
+
data_rows = rows[1:] if len(rows) > 1 else []
|
|
219
|
+
total_rows = len(data_rows)
|
|
220
|
+
logger.info(f"Processing {total_rows} data rows")
|
|
221
|
+
|
|
222
|
+
# For small files, process all at once
|
|
223
|
+
if total_rows <= 10:
|
|
224
|
+
documents = []
|
|
225
|
+
for row_index, row in enumerate(data_rows):
|
|
226
|
+
normalized_row = row[: len(headers)]
|
|
227
|
+
while len(normalized_row) < len(headers):
|
|
228
|
+
normalized_row.append("")
|
|
229
|
+
|
|
230
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
231
|
+
|
|
232
|
+
if labeled_text.strip():
|
|
233
|
+
document = Document(
|
|
234
|
+
id=f"{csv_name}_row_{row_index + 1}",
|
|
235
|
+
name=csv_name,
|
|
236
|
+
meta_data={
|
|
237
|
+
"row_index": row_index,
|
|
238
|
+
"headers": headers,
|
|
239
|
+
"total_rows": total_rows,
|
|
240
|
+
"source": "field_labeled_csv_reader",
|
|
241
|
+
},
|
|
242
|
+
content=labeled_text,
|
|
243
|
+
)
|
|
244
|
+
documents.append(document)
|
|
245
|
+
else:
|
|
246
|
+
pages = []
|
|
247
|
+
for i in range(0, total_rows, page_size):
|
|
248
|
+
pages.append(data_rows[i : i + page_size])
|
|
249
|
+
|
|
250
|
+
async def _process_page(page_number: int, page_rows: List[List[str]]) -> List[Document]:
|
|
251
|
+
"""Process a page of rows into documents"""
|
|
252
|
+
page_documents = []
|
|
253
|
+
start_row_index = (page_number - 1) * page_size
|
|
254
|
+
|
|
255
|
+
for i, row in enumerate(page_rows):
|
|
256
|
+
row_index = start_row_index + i
|
|
257
|
+
|
|
258
|
+
normalized_row = row[: len(headers)]
|
|
259
|
+
while len(normalized_row) < len(headers):
|
|
260
|
+
normalized_row.append("")
|
|
261
|
+
|
|
262
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
263
|
+
|
|
264
|
+
if labeled_text.strip():
|
|
265
|
+
document = Document(
|
|
266
|
+
id=f"{csv_name}_row_{row_index + 1}",
|
|
267
|
+
name=csv_name,
|
|
268
|
+
meta_data={
|
|
269
|
+
"row_index": row_index,
|
|
270
|
+
"headers": headers,
|
|
271
|
+
"total_rows": total_rows,
|
|
272
|
+
"page": page_number,
|
|
273
|
+
"source": "field_labeled_csv_reader",
|
|
274
|
+
},
|
|
275
|
+
content=labeled_text,
|
|
276
|
+
)
|
|
277
|
+
page_documents.append(document)
|
|
278
|
+
|
|
279
|
+
return page_documents
|
|
280
|
+
|
|
281
|
+
page_results = await asyncio.gather(
|
|
282
|
+
*[_process_page(page_number, page) for page_number, page in enumerate(pages, start=1)]
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
documents = [doc for page_docs in page_results for doc in page_docs]
|
|
286
|
+
|
|
287
|
+
logger.info(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
288
|
+
return documents
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error(
|
|
292
|
+
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
293
|
+
)
|
|
294
|
+
return []
|
|
@@ -218,6 +218,19 @@ class BasePDFReader(Reader):
|
|
|
218
218
|
chunked_documents.extend(self.chunk_document(document))
|
|
219
219
|
return chunked_documents
|
|
220
220
|
|
|
221
|
+
def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
|
|
222
|
+
"""Determines the document name from the source or a provided name."""
|
|
223
|
+
try:
|
|
224
|
+
if name:
|
|
225
|
+
return name
|
|
226
|
+
if isinstance(pdf_source, str):
|
|
227
|
+
return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
228
|
+
# Assumes a file-like object with a .name attribute
|
|
229
|
+
return pdf_source.name.split(".")[0]
|
|
230
|
+
except Exception:
|
|
231
|
+
# The original code had a bug here, it should check `name` first.
|
|
232
|
+
return name or "pdf"
|
|
233
|
+
|
|
221
234
|
def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
|
|
222
235
|
if not doc_reader.is_encrypted:
|
|
223
236
|
return True
|
|
@@ -332,30 +345,7 @@ class PDFReader(BasePDFReader):
|
|
|
332
345
|
def read(
|
|
333
346
|
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
334
347
|
) -> List[Document]:
|
|
335
|
-
|
|
336
|
-
if name:
|
|
337
|
-
doc_name = name
|
|
338
|
-
elif isinstance(pdf, str):
|
|
339
|
-
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
340
|
-
else:
|
|
341
|
-
doc_name = pdf.name.split(".")[0]
|
|
342
|
-
except Exception:
|
|
343
|
-
doc_name = "pdf"
|
|
344
|
-
|
|
345
|
-
try:
|
|
346
|
-
DocumentReader(pdf)
|
|
347
|
-
except PdfStreamError as e:
|
|
348
|
-
logger.error(f"Error reading PDF: {e}")
|
|
349
|
-
return []
|
|
350
|
-
|
|
351
|
-
try:
|
|
352
|
-
if isinstance(pdf, str):
|
|
353
|
-
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
354
|
-
else:
|
|
355
|
-
doc_name = name or pdf.name.split(".")[0]
|
|
356
|
-
except Exception:
|
|
357
|
-
doc_name = name or "pdf"
|
|
358
|
-
|
|
348
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
359
349
|
log_info(f"Reading: {doc_name}")
|
|
360
350
|
|
|
361
351
|
try:
|
|
@@ -363,7 +353,6 @@ class PDFReader(BasePDFReader):
|
|
|
363
353
|
except PdfStreamError as e:
|
|
364
354
|
logger.error(f"Error reading PDF: {e}")
|
|
365
355
|
return []
|
|
366
|
-
|
|
367
356
|
# Handle PDF decryption
|
|
368
357
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
369
358
|
return []
|
|
@@ -380,15 +369,7 @@ class PDFReader(BasePDFReader):
|
|
|
380
369
|
if pdf is None:
|
|
381
370
|
log_error("No pdf provided")
|
|
382
371
|
return []
|
|
383
|
-
|
|
384
|
-
try:
|
|
385
|
-
if isinstance(pdf, str):
|
|
386
|
-
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
387
|
-
else:
|
|
388
|
-
doc_name = pdf.name.split(".")[0]
|
|
389
|
-
except Exception:
|
|
390
|
-
doc_name = name or "pdf"
|
|
391
|
-
|
|
372
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
392
373
|
log_info(f"Reading: {doc_name}")
|
|
393
374
|
|
|
394
375
|
try:
|
|
@@ -414,16 +395,13 @@ class PDFImageReader(BasePDFReader):
|
|
|
414
395
|
if not pdf:
|
|
415
396
|
raise ValueError("No pdf provided")
|
|
416
397
|
|
|
417
|
-
|
|
418
|
-
if isinstance(pdf, str):
|
|
419
|
-
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
420
|
-
else:
|
|
421
|
-
doc_name = pdf.name.split(".")[0]
|
|
422
|
-
except Exception:
|
|
423
|
-
doc_name = "pdf"
|
|
424
|
-
|
|
398
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
425
399
|
log_info(f"Reading: {doc_name}")
|
|
426
|
-
|
|
400
|
+
try:
|
|
401
|
+
pdf_reader = DocumentReader(pdf)
|
|
402
|
+
except PdfStreamError as e:
|
|
403
|
+
logger.error(f"Error reading PDF: {e}")
|
|
404
|
+
return []
|
|
427
405
|
|
|
428
406
|
# Handle PDF decryption
|
|
429
407
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
@@ -438,16 +416,14 @@ class PDFImageReader(BasePDFReader):
|
|
|
438
416
|
if not pdf:
|
|
439
417
|
raise ValueError("No pdf provided")
|
|
440
418
|
|
|
441
|
-
|
|
442
|
-
if isinstance(pdf, str):
|
|
443
|
-
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
444
|
-
else:
|
|
445
|
-
doc_name = pdf.name.split(".")[0]
|
|
446
|
-
except Exception:
|
|
447
|
-
doc_name = "pdf"
|
|
448
|
-
|
|
419
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
449
420
|
log_info(f"Reading: {doc_name}")
|
|
450
|
-
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
pdf_reader = DocumentReader(pdf)
|
|
424
|
+
except PdfStreamError as e:
|
|
425
|
+
logger.error(f"Error reading PDF: {e}")
|
|
426
|
+
return []
|
|
451
427
|
|
|
452
428
|
# Handle PDF decryption
|
|
453
429
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
@@ -35,6 +35,18 @@ class ReaderFactory:
|
|
|
35
35
|
config.update(kwargs)
|
|
36
36
|
return CSVReader(**config)
|
|
37
37
|
|
|
38
|
+
@classmethod
|
|
39
|
+
def _get_field_labeled_csv_reader(cls, **kwargs) -> Reader:
|
|
40
|
+
"""Get Field Labeled CSV reader instance."""
|
|
41
|
+
from agno.knowledge.reader.field_labeled_csv_reader import FieldLabeledCSVReader
|
|
42
|
+
|
|
43
|
+
config: Dict[str, Any] = {
|
|
44
|
+
"name": "Field Labeled CSV Reader",
|
|
45
|
+
"description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
|
|
46
|
+
}
|
|
47
|
+
config.update(kwargs)
|
|
48
|
+
return FieldLabeledCSVReader(**config)
|
|
49
|
+
|
|
38
50
|
@classmethod
|
|
39
51
|
def _get_docx_reader(cls, **kwargs) -> Reader:
|
|
40
52
|
"""Get Docx reader instance."""
|