agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. agno/agent/__init__.py +4 -0
  2. agno/agent/agent.py +1368 -541
  3. agno/agent/remote.py +13 -0
  4. agno/db/base.py +339 -0
  5. agno/db/postgres/async_postgres.py +116 -12
  6. agno/db/postgres/postgres.py +1242 -25
  7. agno/db/postgres/schemas.py +48 -1
  8. agno/db/sqlite/async_sqlite.py +119 -4
  9. agno/db/sqlite/schemas.py +51 -0
  10. agno/db/sqlite/sqlite.py +1186 -13
  11. agno/db/utils.py +37 -1
  12. agno/integrations/discord/client.py +12 -1
  13. agno/knowledge/__init__.py +4 -0
  14. agno/knowledge/chunking/code.py +1 -1
  15. agno/knowledge/chunking/semantic.py +1 -1
  16. agno/knowledge/chunking/strategy.py +4 -0
  17. agno/knowledge/filesystem.py +412 -0
  18. agno/knowledge/knowledge.py +3722 -2182
  19. agno/knowledge/protocol.py +134 -0
  20. agno/knowledge/reader/arxiv_reader.py +2 -2
  21. agno/knowledge/reader/base.py +9 -7
  22. agno/knowledge/reader/csv_reader.py +236 -13
  23. agno/knowledge/reader/docx_reader.py +2 -2
  24. agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
  25. agno/knowledge/reader/firecrawl_reader.py +2 -2
  26. agno/knowledge/reader/json_reader.py +2 -2
  27. agno/knowledge/reader/markdown_reader.py +2 -2
  28. agno/knowledge/reader/pdf_reader.py +5 -4
  29. agno/knowledge/reader/pptx_reader.py +2 -2
  30. agno/knowledge/reader/reader_factory.py +118 -1
  31. agno/knowledge/reader/s3_reader.py +2 -2
  32. agno/knowledge/reader/tavily_reader.py +2 -2
  33. agno/knowledge/reader/text_reader.py +2 -2
  34. agno/knowledge/reader/web_search_reader.py +2 -2
  35. agno/knowledge/reader/website_reader.py +5 -3
  36. agno/knowledge/reader/wikipedia_reader.py +2 -2
  37. agno/knowledge/reader/youtube_reader.py +2 -2
  38. agno/knowledge/remote_content/__init__.py +29 -0
  39. agno/knowledge/remote_content/config.py +204 -0
  40. agno/knowledge/remote_content/remote_content.py +74 -17
  41. agno/knowledge/utils.py +37 -29
  42. agno/learn/__init__.py +6 -0
  43. agno/learn/machine.py +35 -0
  44. agno/learn/schemas.py +82 -11
  45. agno/learn/stores/__init__.py +3 -0
  46. agno/learn/stores/decision_log.py +1156 -0
  47. agno/learn/stores/learned_knowledge.py +6 -6
  48. agno/models/anthropic/claude.py +24 -0
  49. agno/models/aws/bedrock.py +20 -0
  50. agno/models/base.py +60 -6
  51. agno/models/cerebras/cerebras.py +34 -2
  52. agno/models/cohere/chat.py +25 -0
  53. agno/models/google/gemini.py +50 -5
  54. agno/models/litellm/chat.py +38 -0
  55. agno/models/n1n/__init__.py +3 -0
  56. agno/models/n1n/n1n.py +57 -0
  57. agno/models/openai/chat.py +25 -1
  58. agno/models/openrouter/openrouter.py +46 -0
  59. agno/models/perplexity/perplexity.py +2 -0
  60. agno/models/response.py +16 -0
  61. agno/os/app.py +83 -44
  62. agno/os/interfaces/slack/router.py +10 -1
  63. agno/os/interfaces/whatsapp/router.py +6 -0
  64. agno/os/middleware/__init__.py +2 -0
  65. agno/os/middleware/trailing_slash.py +27 -0
  66. agno/os/router.py +1 -0
  67. agno/os/routers/agents/router.py +29 -16
  68. agno/os/routers/agents/schema.py +6 -4
  69. agno/os/routers/components/__init__.py +3 -0
  70. agno/os/routers/components/components.py +475 -0
  71. agno/os/routers/evals/schemas.py +4 -3
  72. agno/os/routers/health.py +3 -3
  73. agno/os/routers/knowledge/knowledge.py +128 -3
  74. agno/os/routers/knowledge/schemas.py +12 -0
  75. agno/os/routers/memory/schemas.py +4 -2
  76. agno/os/routers/metrics/metrics.py +9 -11
  77. agno/os/routers/metrics/schemas.py +10 -6
  78. agno/os/routers/registry/__init__.py +3 -0
  79. agno/os/routers/registry/registry.py +337 -0
  80. agno/os/routers/teams/router.py +20 -8
  81. agno/os/routers/teams/schema.py +6 -4
  82. agno/os/routers/traces/traces.py +5 -5
  83. agno/os/routers/workflows/router.py +38 -11
  84. agno/os/routers/workflows/schema.py +1 -1
  85. agno/os/schema.py +92 -26
  86. agno/os/utils.py +84 -19
  87. agno/reasoning/anthropic.py +2 -2
  88. agno/reasoning/azure_ai_foundry.py +2 -2
  89. agno/reasoning/deepseek.py +2 -2
  90. agno/reasoning/default.py +6 -7
  91. agno/reasoning/gemini.py +2 -2
  92. agno/reasoning/helpers.py +6 -7
  93. agno/reasoning/manager.py +4 -10
  94. agno/reasoning/ollama.py +2 -2
  95. agno/reasoning/openai.py +2 -2
  96. agno/reasoning/vertexai.py +2 -2
  97. agno/registry/__init__.py +3 -0
  98. agno/registry/registry.py +68 -0
  99. agno/run/agent.py +59 -0
  100. agno/run/base.py +7 -0
  101. agno/run/team.py +57 -0
  102. agno/skills/agent_skills.py +10 -3
  103. agno/team/__init__.py +3 -1
  104. agno/team/team.py +1165 -330
  105. agno/tools/duckduckgo.py +25 -71
  106. agno/tools/exa.py +0 -21
  107. agno/tools/function.py +35 -83
  108. agno/tools/knowledge.py +9 -4
  109. agno/tools/mem0.py +11 -10
  110. agno/tools/memory.py +47 -46
  111. agno/tools/parallel.py +0 -7
  112. agno/tools/reasoning.py +30 -23
  113. agno/tools/tavily.py +4 -1
  114. agno/tools/websearch.py +93 -0
  115. agno/tools/website.py +1 -1
  116. agno/tools/wikipedia.py +1 -1
  117. agno/tools/workflow.py +48 -47
  118. agno/utils/agent.py +42 -5
  119. agno/utils/events.py +160 -2
  120. agno/utils/print_response/agent.py +0 -31
  121. agno/utils/print_response/team.py +0 -2
  122. agno/utils/print_response/workflow.py +0 -2
  123. agno/utils/team.py +61 -11
  124. agno/vectordb/lancedb/lance_db.py +4 -1
  125. agno/vectordb/mongodb/mongodb.py +1 -1
  126. agno/vectordb/pgvector/pgvector.py +3 -3
  127. agno/vectordb/qdrant/qdrant.py +4 -4
  128. agno/workflow/__init__.py +3 -1
  129. agno/workflow/condition.py +0 -21
  130. agno/workflow/loop.py +0 -21
  131. agno/workflow/parallel.py +0 -21
  132. agno/workflow/router.py +0 -21
  133. agno/workflow/step.py +117 -24
  134. agno/workflow/steps.py +0 -21
  135. agno/workflow/workflow.py +427 -63
  136. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
  137. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
  138. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
  139. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
  140. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,134 @@
1
+ """
2
+ Knowledge Protocol
3
+ ==================
4
+ Defines the minimal interface that knowledge implementations must implement.
5
+
6
+ This protocol enables:
7
+ - Custom knowledge bases to be used with agents
8
+ - Each implementation defines its own tools and context
9
+ - Flexible tool naming (not forced to use 'search')
10
+ - Type safety with Protocol typing
11
+ """
12
+
13
+ from typing import Callable, List, Protocol, runtime_checkable
14
+
15
+ from agno.knowledge.document import Document
16
+
17
+
18
+ @runtime_checkable
19
+ class KnowledgeProtocol(Protocol):
20
+ """Minimal protocol for knowledge implementations.
21
+
22
+ Enables custom knowledge bases to be used with agents.
23
+ Each implementation defines what tools it exposes and what
24
+ context/instructions it provides to the agent.
25
+
26
+ Required methods:
27
+ - build_context(): Return instructions for the agent's system prompt
28
+ - get_tools(): Return tools to expose to the agent
29
+ - aget_tools(): Async version of get_tools
30
+
31
+ Optional methods:
32
+ - retrieve(): Default retrieval for context injection (add_knowledge_to_context)
33
+ - aretrieve(): Async version of retrieve
34
+
35
+ Example:
36
+ ```python
37
+ from agno.knowledge.protocol import KnowledgeProtocol
38
+ from agno.knowledge.document import Document
39
+
40
+ class MyKnowledge:
41
+ def build_context(self, **kwargs) -> str:
42
+ return "Use search_docs to find information."
43
+
44
+ def get_tools(self, **kwargs) -> List[Callable]:
45
+ return [self.search_docs]
46
+
47
+ async def aget_tools(self, **kwargs) -> List[Callable]:
48
+ return [self.search_docs]
49
+
50
+ def search_docs(self, query: str) -> str:
51
+ # Your search implementation
52
+ return "Results for: " + query
53
+
54
+ # Optional: for add_knowledge_to_context feature
55
+ def retrieve(self, query: str, **kwargs) -> List[Document]:
56
+ results = self._internal_search(query)
57
+ return [Document(content=r) for r in results]
58
+
59
+ # MyKnowledge satisfies KnowledgeProtocol
60
+ agent = Agent(knowledge=MyKnowledge())
61
+ ```
62
+ """
63
+
64
+ def build_context(self, **kwargs) -> str:
65
+ """Build context string for the agent's system prompt.
66
+
67
+ Returns instructions about how to use this knowledge,
68
+ what tools are available, and any usage guidelines.
69
+
70
+ Args:
71
+ **kwargs: Context including enable_agentic_filters, etc.
72
+
73
+ Returns:
74
+ Formatted context string to inject into system prompt.
75
+ """
76
+ ...
77
+
78
+ def get_tools(self, **kwargs) -> List[Callable]:
79
+ """Get tools to expose to the agent.
80
+
81
+ Returns callable tools that the agent can use to interact
82
+ with this knowledge. Each implementation decides what
83
+ tools make sense (e.g., search, grep, list_files, query_db).
84
+
85
+ Args:
86
+ **kwargs: Context including run_response, run_context,
87
+ async_mode, enable_agentic_filters, agent, etc.
88
+
89
+ Returns:
90
+ List of callable tools.
91
+ """
92
+ ...
93
+
94
+ async def aget_tools(self, **kwargs) -> List[Callable]:
95
+ """Async version of get_tools.
96
+
97
+ Args:
98
+ **kwargs: Same as get_tools.
99
+
100
+ Returns:
101
+ List of callable tools.
102
+ """
103
+ ...
104
+
105
+ # Optional methods - used by add_knowledge_to_context feature
106
+ # Implementations that don't support context injection can omit these
107
+
108
+ def retrieve(self, query: str, **kwargs) -> List[Document]:
109
+ """Retrieve documents for context injection.
110
+
111
+ Used by the add_knowledge_to_context feature to pre-fetch
112
+ relevant documents into the user message. This is optional;
113
+ if not implemented, add_knowledge_to_context will be skipped.
114
+
115
+ Args:
116
+ query: The query string.
117
+ **kwargs: Additional parameters (max_results, filters, etc.)
118
+
119
+ Returns:
120
+ List of Document objects.
121
+ """
122
+ ...
123
+
124
+ async def aretrieve(self, query: str, **kwargs) -> List[Document]:
125
+ """Async version of retrieve.
126
+
127
+ Args:
128
+ query: The query string.
129
+ **kwargs: Additional parameters.
130
+
131
+ Returns:
132
+ List of Document objects.
133
+ """
134
+ ...
@@ -17,7 +17,7 @@ class ArxivReader(Reader):
17
17
  sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
18
18
 
19
19
  @classmethod
20
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
20
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
21
21
  """Get the list of supported chunking strategies for Arxiv readers."""
22
22
  return [
23
23
  ChunkingStrategyType.CODE_CHUNKER,
@@ -29,7 +29,7 @@ class ArxivReader(Reader):
29
29
  ]
30
30
 
31
31
  @classmethod
32
- def get_supported_content_types(self) -> List[ContentType]:
32
+ def get_supported_content_types(cls) -> List[ContentType]:
33
33
  return [ContentType.TOPIC]
34
34
 
35
35
  def __init__(
@@ -73,11 +73,17 @@ class Reader:
73
73
  def chunk_document(self, document: Document) -> List[Document]:
74
74
  if self.chunking_strategy is None:
75
75
  self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
76
- return self.chunking_strategy.chunk(document) # type: ignore
76
+ return self.chunking_strategy.chunk(document)
77
+
78
+ async def achunk_document(self, document: Document) -> List[Document]:
79
+ """Async version of chunk_document."""
80
+ if self.chunking_strategy is None:
81
+ self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
82
+ return await self.chunking_strategy.achunk(document)
77
83
 
78
84
  async def chunk_documents_async(self, documents: List[Document]) -> List[Document]:
79
85
  """
80
- Asynchronously chunk a list of documents using the instance's chunk_document method.
86
+ Asynchronously chunk a list of documents.
81
87
 
82
88
  Args:
83
89
  documents: List of documents to be chunked.
@@ -85,11 +91,7 @@ class Reader:
85
91
  Returns:
86
92
  A flattened list of chunked documents.
87
93
  """
88
-
89
- async def _chunk_document_async(doc: Document) -> List[Document]:
90
- return await asyncio.to_thread(self.chunk_document, doc)
91
-
92
94
  # Process chunking in parallel for all documents
93
- chunked_lists = await asyncio.gather(*[_chunk_document_async(doc) for doc in documents])
95
+ chunked_lists = await asyncio.gather(*[self.achunk_document(doc) for doc in documents])
94
96
  # Flatten the result
95
97
  return [chunk for sublist in chunked_lists for chunk in sublist]
@@ -1,8 +1,9 @@
1
1
  import asyncio
2
2
  import csv
3
3
  import io
4
+ from datetime import date, datetime
4
5
  from pathlib import Path
5
- from typing import IO, Any, List, Optional, Union
6
+ from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
6
7
  from uuid import uuid4
7
8
 
8
9
  try:
@@ -18,6 +19,113 @@ from agno.knowledge.types import ContentType
18
19
  from agno.utils.log import log_debug, log_error
19
20
 
20
21
 
22
+ def _get_workbook_name(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
23
+ """Extract workbook name from file path or name parameter.
24
+
25
+ Priority: explicit name > file path stem > file object name attribute > "workbook"
26
+ """
27
+ if name:
28
+ return Path(name).stem
29
+ if isinstance(file, Path):
30
+ return file.stem
31
+ return Path(getattr(file, "name", "workbook")).stem
32
+
33
+
34
+ def _infer_file_extension(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
35
+ if isinstance(file, Path):
36
+ return file.suffix.lower()
37
+
38
+ file_name = getattr(file, "name", None)
39
+ if isinstance(file_name, str) and file_name:
40
+ return Path(file_name).suffix.lower()
41
+
42
+ if name:
43
+ return Path(name).suffix.lower()
44
+
45
+ return ""
46
+
47
+
48
+ def _convert_xls_cell_value(cell_value: Any, cell_type: int, datemode: int) -> Any:
49
+ """Convert xlrd cell value to Python type.
50
+
51
+ xlrd returns dates as Excel serial numbers and booleans as 0/1 integers.
52
+ This converts them to proper Python types for consistent handling with openpyxl.
53
+ """
54
+ try:
55
+ import xlrd
56
+ except ImportError:
57
+ return cell_value
58
+
59
+ if cell_type == xlrd.XL_CELL_DATE:
60
+ try:
61
+ date_tuple = xlrd.xldate_as_tuple(cell_value, datemode)
62
+ return datetime(*date_tuple)
63
+ except Exception:
64
+ return cell_value
65
+ if cell_type == xlrd.XL_CELL_BOOLEAN:
66
+ return bool(cell_value)
67
+ return cell_value
68
+
69
+
70
+ def _stringify_spreadsheet_cell_value(value: Any) -> str:
71
+ if value is None:
72
+ return ""
73
+
74
+ # Handle datetime/date before float check (datetime is not a float)
75
+ if isinstance(value, datetime):
76
+ return value.isoformat()
77
+ if isinstance(value, date):
78
+ return value.isoformat()
79
+
80
+ if isinstance(value, float) and value.is_integer():
81
+ return str(int(value))
82
+
83
+ result = str(value)
84
+ # Normalize all line endings to space to preserve row integrity in CSV-like output
85
+ # Must handle CRLF first before individual CR/LF to avoid double-spacing
86
+ result = result.replace("\r\n", " ") # Windows (CRLF)
87
+ result = result.replace("\r", " ") # Old Mac (CR)
88
+ result = result.replace("\n", " ") # Unix (LF)
89
+ return result
90
+
91
+
92
+ def _row_values_to_csv_line(row_values: Sequence[Any]) -> str:
93
+ values = [_stringify_spreadsheet_cell_value(v) for v in row_values]
94
+ while values and values[-1] == "":
95
+ values.pop()
96
+
97
+ return ", ".join(values)
98
+
99
+
100
+ def _excel_rows_to_documents(
101
+ *,
102
+ workbook_name: str,
103
+ sheets: Iterable[Tuple[str, Iterable[Sequence[Any]]]],
104
+ ) -> List[Document]:
105
+ documents = []
106
+ for sheet_index, (sheet_name, rows) in enumerate(sheets, start=1):
107
+ lines = []
108
+ for row in rows:
109
+ line = _row_values_to_csv_line(row)
110
+ if line:
111
+ lines.append(line)
112
+
113
+ if not lines:
114
+ log_debug(f"Sheet '{sheet_name}' is empty, skipping")
115
+ continue
116
+
117
+ documents.append(
118
+ Document(
119
+ name=workbook_name,
120
+ id=str(uuid4()),
121
+ meta_data={"sheet_name": sheet_name, "sheet_index": sheet_index},
122
+ content="\n".join(lines),
123
+ )
124
+ )
125
+
126
+ return documents
127
+
128
+
21
129
  class CSVReader(Reader):
22
130
  """Reader for CSV files"""
23
131
 
@@ -25,7 +133,7 @@ class CSVReader(Reader):
25
133
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
26
134
 
27
135
  @classmethod
28
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
136
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
29
137
  """Get the list of supported chunking strategies for CSV readers."""
30
138
  return [
31
139
  ChunkingStrategyType.ROW_CHUNKER,
@@ -37,13 +145,29 @@ class CSVReader(Reader):
37
145
  ]
38
146
 
39
147
  @classmethod
40
- def get_supported_content_types(self) -> List[ContentType]:
148
+ def get_supported_content_types(cls) -> List[ContentType]:
41
149
  return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
42
150
 
43
151
  def read(
44
152
  self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
45
153
  ) -> List[Document]:
46
154
  try:
155
+ file_extension = _infer_file_extension(file, name)
156
+ if file_extension in {ContentType.XLSX, ContentType.XLS}:
157
+ workbook_name = _get_workbook_name(file, name)
158
+
159
+ if file_extension == ContentType.XLSX:
160
+ documents = self._read_xlsx(file, workbook_name=workbook_name)
161
+ else:
162
+ documents = self._read_xls(file, workbook_name=workbook_name)
163
+
164
+ if self.chunk:
165
+ chunked_documents = []
166
+ for document in documents:
167
+ chunked_documents.extend(self.chunk_document(document))
168
+ return chunked_documents
169
+ return documents
170
+
47
171
  if isinstance(file, Path):
48
172
  if not file.exists():
49
173
  raise FileNotFoundError(f"Could not find file: {file}")
@@ -56,19 +180,20 @@ class CSVReader(Reader):
56
180
  log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
57
181
  csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
58
182
  file.seek(0)
59
- file_content = io.StringIO(file.read().decode("utf-8"))
183
+ file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
60
184
 
61
- csv_content = ""
185
+ csv_lines: List[str] = []
62
186
  with file_content as csvfile:
63
187
  csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
64
188
  for row in csv_reader:
65
- csv_content += ", ".join(row) + "\n"
189
+ # Use stringify to normalize line endings in CSV cells
190
+ csv_lines.append(", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row))
66
191
 
67
192
  documents = [
68
193
  Document(
69
194
  name=csv_name,
70
195
  id=str(uuid4()),
71
- content=csv_content,
196
+ content="\n".join(csv_lines),
72
197
  )
73
198
  ]
74
199
  if self.chunk:
@@ -77,8 +202,17 @@ class CSVReader(Reader):
77
202
  chunked_documents.extend(self.chunk_document(document))
78
203
  return chunked_documents
79
204
  return documents
205
+ except FileNotFoundError:
206
+ raise
207
+ except ImportError:
208
+ raise
209
+ except UnicodeDecodeError as e:
210
+ file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
211
+ log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
212
+ return []
80
213
  except Exception as e:
81
- log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
214
+ file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
215
+ log_error(f"Error reading {file_desc}: {e}")
82
216
  return []
83
217
 
84
218
  async def async_read(
@@ -102,18 +236,31 @@ class CSVReader(Reader):
102
236
  List of Document objects
103
237
  """
104
238
  try:
239
+ file_extension = _infer_file_extension(file, name)
240
+ if file_extension in {ContentType.XLSX, ContentType.XLS}:
241
+ workbook_name = _get_workbook_name(file, name)
242
+
243
+ if file_extension == ContentType.XLSX:
244
+ documents = await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
245
+ else:
246
+ documents = await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
247
+
248
+ if self.chunk:
249
+ documents = await self.chunk_documents_async(documents)
250
+ return documents
251
+
105
252
  if isinstance(file, Path):
106
253
  if not file.exists():
107
254
  raise FileNotFoundError(f"Could not find file: {file}")
108
255
  log_debug(f"Reading async: {file}")
109
- async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
256
+ async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
110
257
  content = await file_content.read()
111
258
  file_content_io = io.StringIO(content)
112
259
  csv_name = name or file.stem
113
260
  else:
114
261
  log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
115
262
  file.seek(0)
116
- file_content_io = io.StringIO(file.read().decode("utf-8"))
263
+ file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
117
264
  csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
118
265
 
119
266
  file_content_io.seek(0)
@@ -122,7 +269,10 @@ class CSVReader(Reader):
122
269
  total_rows = len(rows)
123
270
 
124
271
  if total_rows <= 10:
125
- csv_content = " ".join(", ".join(row) for row in rows)
272
+ # Use stringify to normalize line endings in CSV cells
273
+ csv_content = " ".join(
274
+ ", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in rows
275
+ )
126
276
  documents = [
127
277
  Document(
128
278
  name=csv_name,
@@ -138,7 +288,10 @@ class CSVReader(Reader):
138
288
  async def _process_page(page_number: int, page_rows: List[List[str]]) -> Document:
139
289
  """Process a page of rows into a document"""
140
290
  start_row = (page_number - 1) * page_size + 1
141
- page_content = " ".join(", ".join(row) for row in page_rows)
291
+ # Use stringify to normalize line endings in CSV cells
292
+ page_content = " ".join(
293
+ ", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in page_rows
294
+ )
142
295
 
143
296
  return Document(
144
297
  name=csv_name,
@@ -155,6 +308,76 @@ class CSVReader(Reader):
155
308
  documents = await self.chunk_documents_async(documents)
156
309
 
157
310
  return documents
311
+ except FileNotFoundError:
312
+ raise
313
+ except ImportError:
314
+ raise
315
+ except UnicodeDecodeError as e:
316
+ file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
317
+ log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
318
+ return []
158
319
  except Exception as e:
159
- log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
320
+ file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
321
+ log_error(f"Error reading {file_desc}: {e}")
160
322
  return []
323
+
324
+ def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
325
+ try:
326
+ import openpyxl # type: ignore
327
+ except ImportError as e:
328
+ raise ImportError(
329
+ "`openpyxl` not installed. Please install it via `pip install agno[csv]` or `pip install openpyxl`."
330
+ ) from e
331
+
332
+ if isinstance(file, Path):
333
+ workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
334
+ else:
335
+ file.seek(0)
336
+ raw = file.read()
337
+ if isinstance(raw, str):
338
+ raw = raw.encode("utf-8", errors="replace")
339
+ workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
340
+
341
+ try:
342
+ return _excel_rows_to_documents(
343
+ workbook_name=workbook_name,
344
+ sheets=[(worksheet.title, worksheet.iter_rows(values_only=True)) for worksheet in workbook.worksheets],
345
+ )
346
+ finally:
347
+ workbook.close()
348
+
349
+ def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
350
+ try:
351
+ import xlrd # type: ignore
352
+ except ImportError as e:
353
+ raise ImportError(
354
+ "`xlrd` not installed. Please install it via `pip install agno[csv]` or `pip install xlrd`."
355
+ ) from e
356
+
357
+ if isinstance(file, Path):
358
+ workbook = xlrd.open_workbook(filename=str(file))
359
+ else:
360
+ file.seek(0)
361
+ raw = file.read()
362
+ if isinstance(raw, str):
363
+ raw = raw.encode("utf-8", errors="replace")
364
+ workbook = xlrd.open_workbook(file_contents=raw)
365
+
366
+ sheets: List[Tuple[str, Iterable[Sequence[Any]]]] = []
367
+ for sheet_index in range(workbook.nsheets):
368
+ sheet = workbook.sheet_by_index(sheet_index)
369
+
370
+ def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
371
+ for row_index in range(_sheet.nrows):
372
+ yield [
373
+ _convert_xls_cell_value(
374
+ _sheet.cell_value(row_index, col_index),
375
+ _sheet.cell_type(row_index, col_index),
376
+ _datemode,
377
+ )
378
+ for col_index in range(_sheet.ncols)
379
+ ]
380
+
381
+ sheets.append((sheet.name, _iter_sheet_rows()))
382
+
383
+ return _excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
@@ -23,7 +23,7 @@ class DocxReader(Reader):
23
23
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
24
 
25
25
  @classmethod
26
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
26
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
27
27
  """Get the list of supported chunking strategies for DOCX readers."""
28
28
  return [
29
29
  ChunkingStrategyType.DOCUMENT_CHUNKER,
@@ -35,7 +35,7 @@ class DocxReader(Reader):
35
35
  ]
36
36
 
37
37
  @classmethod
38
- def get_supported_content_types(self) -> List[ContentType]:
38
+ def get_supported_content_types(cls) -> List[ContentType]:
39
39
  return [ContentType.DOCX, ContentType.DOC]
40
40
 
41
41
  def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]: