agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +4 -0
- agno/agent/agent.py +1368 -541
- agno/agent/remote.py +13 -0
- agno/db/base.py +339 -0
- agno/db/postgres/async_postgres.py +116 -12
- agno/db/postgres/postgres.py +1242 -25
- agno/db/postgres/schemas.py +48 -1
- agno/db/sqlite/async_sqlite.py +119 -4
- agno/db/sqlite/schemas.py +51 -0
- agno/db/sqlite/sqlite.py +1186 -13
- agno/db/utils.py +37 -1
- agno/integrations/discord/client.py +12 -1
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +4 -0
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +3722 -2182
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +2 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +236 -13
- agno/knowledge/reader/docx_reader.py +2 -2
- agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
- agno/knowledge/reader/firecrawl_reader.py +2 -2
- agno/knowledge/reader/json_reader.py +2 -2
- agno/knowledge/reader/markdown_reader.py +2 -2
- agno/knowledge/reader/pdf_reader.py +5 -4
- agno/knowledge/reader/pptx_reader.py +2 -2
- agno/knowledge/reader/reader_factory.py +118 -1
- agno/knowledge/reader/s3_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +2 -2
- agno/knowledge/reader/web_search_reader.py +2 -2
- agno/knowledge/reader/website_reader.py +5 -3
- agno/knowledge/reader/wikipedia_reader.py +2 -2
- agno/knowledge/reader/youtube_reader.py +2 -2
- agno/knowledge/remote_content/__init__.py +29 -0
- agno/knowledge/remote_content/config.py +204 -0
- agno/knowledge/remote_content/remote_content.py +74 -17
- agno/knowledge/utils.py +37 -29
- agno/learn/__init__.py +6 -0
- agno/learn/machine.py +35 -0
- agno/learn/schemas.py +82 -11
- agno/learn/stores/__init__.py +3 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/learned_knowledge.py +6 -6
- agno/models/anthropic/claude.py +24 -0
- agno/models/aws/bedrock.py +20 -0
- agno/models/base.py +60 -6
- agno/models/cerebras/cerebras.py +34 -2
- agno/models/cohere/chat.py +25 -0
- agno/models/google/gemini.py +50 -5
- agno/models/litellm/chat.py +38 -0
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/openai/chat.py +25 -1
- agno/models/openrouter/openrouter.py +46 -0
- agno/models/perplexity/perplexity.py +2 -0
- agno/models/response.py +16 -0
- agno/os/app.py +83 -44
- agno/os/interfaces/slack/router.py +10 -1
- agno/os/interfaces/whatsapp/router.py +6 -0
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +1 -0
- agno/os/routers/agents/router.py +29 -16
- agno/os/routers/agents/schema.py +6 -4
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/evals/schemas.py +4 -3
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +128 -3
- agno/os/routers/knowledge/schemas.py +12 -0
- agno/os/routers/memory/schemas.py +4 -2
- agno/os/routers/metrics/metrics.py +9 -11
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/teams/router.py +20 -8
- agno/os/routers/teams/schema.py +6 -4
- agno/os/routers/traces/traces.py +5 -5
- agno/os/routers/workflows/router.py +38 -11
- agno/os/routers/workflows/schema.py +1 -1
- agno/os/schema.py +92 -26
- agno/os/utils.py +84 -19
- agno/reasoning/anthropic.py +2 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +2 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +4 -10
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/reasoning/vertexai.py +2 -2
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/run/agent.py +59 -0
- agno/run/base.py +7 -0
- agno/run/team.py +57 -0
- agno/skills/agent_skills.py +10 -3
- agno/team/__init__.py +3 -1
- agno/team/team.py +1165 -330
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/function.py +35 -83
- agno/tools/knowledge.py +9 -4
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/parallel.py +0 -7
- agno/tools/reasoning.py +30 -23
- agno/tools/tavily.py +4 -1
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +48 -47
- agno/utils/agent.py +42 -5
- agno/utils/events.py +160 -2
- agno/utils/print_response/agent.py +0 -31
- agno/utils/print_response/team.py +0 -2
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/team.py +61 -11
- agno/vectordb/lancedb/lance_db.py +4 -1
- agno/vectordb/mongodb/mongodb.py +1 -1
- agno/vectordb/pgvector/pgvector.py +3 -3
- agno/vectordb/qdrant/qdrant.py +4 -4
- agno/workflow/__init__.py +3 -1
- agno/workflow/condition.py +0 -21
- agno/workflow/loop.py +0 -21
- agno/workflow/parallel.py +0 -21
- agno/workflow/router.py +0 -21
- agno/workflow/step.py +117 -24
- agno/workflow/steps.py +0 -21
- agno/workflow/workflow.py +427 -63
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Protocol
|
|
3
|
+
==================
|
|
4
|
+
Defines the minimal interface that knowledge implementations must implement.
|
|
5
|
+
|
|
6
|
+
This protocol enables:
|
|
7
|
+
- Custom knowledge bases to be used with agents
|
|
8
|
+
- Each implementation defines its own tools and context
|
|
9
|
+
- Flexible tool naming (not forced to use 'search')
|
|
10
|
+
- Type safety with Protocol typing
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Callable, List, Protocol, runtime_checkable
|
|
14
|
+
|
|
15
|
+
from agno.knowledge.document import Document
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@runtime_checkable
|
|
19
|
+
class KnowledgeProtocol(Protocol):
|
|
20
|
+
"""Minimal protocol for knowledge implementations.
|
|
21
|
+
|
|
22
|
+
Enables custom knowledge bases to be used with agents.
|
|
23
|
+
Each implementation defines what tools it exposes and what
|
|
24
|
+
context/instructions it provides to the agent.
|
|
25
|
+
|
|
26
|
+
Required methods:
|
|
27
|
+
- build_context(): Return instructions for the agent's system prompt
|
|
28
|
+
- get_tools(): Return tools to expose to the agent
|
|
29
|
+
- aget_tools(): Async version of get_tools
|
|
30
|
+
|
|
31
|
+
Optional methods:
|
|
32
|
+
- retrieve(): Default retrieval for context injection (add_knowledge_to_context)
|
|
33
|
+
- aretrieve(): Async version of retrieve
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
```python
|
|
37
|
+
from agno.knowledge.protocol import KnowledgeProtocol
|
|
38
|
+
from agno.knowledge.document import Document
|
|
39
|
+
|
|
40
|
+
class MyKnowledge:
|
|
41
|
+
def build_context(self, **kwargs) -> str:
|
|
42
|
+
return "Use search_docs to find information."
|
|
43
|
+
|
|
44
|
+
def get_tools(self, **kwargs) -> List[Callable]:
|
|
45
|
+
return [self.search_docs]
|
|
46
|
+
|
|
47
|
+
async def aget_tools(self, **kwargs) -> List[Callable]:
|
|
48
|
+
return [self.search_docs]
|
|
49
|
+
|
|
50
|
+
def search_docs(self, query: str) -> str:
|
|
51
|
+
# Your search implementation
|
|
52
|
+
return "Results for: " + query
|
|
53
|
+
|
|
54
|
+
# Optional: for add_knowledge_to_context feature
|
|
55
|
+
def retrieve(self, query: str, **kwargs) -> List[Document]:
|
|
56
|
+
results = self._internal_search(query)
|
|
57
|
+
return [Document(content=r) for r in results]
|
|
58
|
+
|
|
59
|
+
# MyKnowledge satisfies KnowledgeProtocol
|
|
60
|
+
agent = Agent(knowledge=MyKnowledge())
|
|
61
|
+
```
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def build_context(self, **kwargs) -> str:
|
|
65
|
+
"""Build context string for the agent's system prompt.
|
|
66
|
+
|
|
67
|
+
Returns instructions about how to use this knowledge,
|
|
68
|
+
what tools are available, and any usage guidelines.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
**kwargs: Context including enable_agentic_filters, etc.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Formatted context string to inject into system prompt.
|
|
75
|
+
"""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def get_tools(self, **kwargs) -> List[Callable]:
|
|
79
|
+
"""Get tools to expose to the agent.
|
|
80
|
+
|
|
81
|
+
Returns callable tools that the agent can use to interact
|
|
82
|
+
with this knowledge. Each implementation decides what
|
|
83
|
+
tools make sense (e.g., search, grep, list_files, query_db).
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
**kwargs: Context including run_response, run_context,
|
|
87
|
+
async_mode, enable_agentic_filters, agent, etc.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of callable tools.
|
|
91
|
+
"""
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
async def aget_tools(self, **kwargs) -> List[Callable]:
|
|
95
|
+
"""Async version of get_tools.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
**kwargs: Same as get_tools.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of callable tools.
|
|
102
|
+
"""
|
|
103
|
+
...
|
|
104
|
+
|
|
105
|
+
# Optional methods - used by add_knowledge_to_context feature
|
|
106
|
+
# Implementations that don't support context injection can omit these
|
|
107
|
+
|
|
108
|
+
def retrieve(self, query: str, **kwargs) -> List[Document]:
|
|
109
|
+
"""Retrieve documents for context injection.
|
|
110
|
+
|
|
111
|
+
Used by the add_knowledge_to_context feature to pre-fetch
|
|
112
|
+
relevant documents into the user message. This is optional;
|
|
113
|
+
if not implemented, add_knowledge_to_context will be skipped.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
query: The query string.
|
|
117
|
+
**kwargs: Additional parameters (max_results, filters, etc.)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of Document objects.
|
|
121
|
+
"""
|
|
122
|
+
...
|
|
123
|
+
|
|
124
|
+
async def aretrieve(self, query: str, **kwargs) -> List[Document]:
|
|
125
|
+
"""Async version of retrieve.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
query: The query string.
|
|
129
|
+
**kwargs: Additional parameters.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List of Document objects.
|
|
133
|
+
"""
|
|
134
|
+
...
|
|
@@ -17,7 +17,7 @@ class ArxivReader(Reader):
|
|
|
17
17
|
sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
|
|
18
18
|
|
|
19
19
|
@classmethod
|
|
20
|
-
def get_supported_chunking_strategies(
|
|
20
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
21
21
|
"""Get the list of supported chunking strategies for Arxiv readers."""
|
|
22
22
|
return [
|
|
23
23
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -29,7 +29,7 @@ class ArxivReader(Reader):
|
|
|
29
29
|
]
|
|
30
30
|
|
|
31
31
|
@classmethod
|
|
32
|
-
def get_supported_content_types(
|
|
32
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
33
33
|
return [ContentType.TOPIC]
|
|
34
34
|
|
|
35
35
|
def __init__(
|
agno/knowledge/reader/base.py
CHANGED
|
@@ -73,11 +73,17 @@ class Reader:
|
|
|
73
73
|
def chunk_document(self, document: Document) -> List[Document]:
|
|
74
74
|
if self.chunking_strategy is None:
|
|
75
75
|
self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
|
|
76
|
-
return self.chunking_strategy.chunk(document)
|
|
76
|
+
return self.chunking_strategy.chunk(document)
|
|
77
|
+
|
|
78
|
+
async def achunk_document(self, document: Document) -> List[Document]:
|
|
79
|
+
"""Async version of chunk_document."""
|
|
80
|
+
if self.chunking_strategy is None:
|
|
81
|
+
self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
|
|
82
|
+
return await self.chunking_strategy.achunk(document)
|
|
77
83
|
|
|
78
84
|
async def chunk_documents_async(self, documents: List[Document]) -> List[Document]:
|
|
79
85
|
"""
|
|
80
|
-
Asynchronously chunk a list of documents
|
|
86
|
+
Asynchronously chunk a list of documents.
|
|
81
87
|
|
|
82
88
|
Args:
|
|
83
89
|
documents: List of documents to be chunked.
|
|
@@ -85,11 +91,7 @@ class Reader:
|
|
|
85
91
|
Returns:
|
|
86
92
|
A flattened list of chunked documents.
|
|
87
93
|
"""
|
|
88
|
-
|
|
89
|
-
async def _chunk_document_async(doc: Document) -> List[Document]:
|
|
90
|
-
return await asyncio.to_thread(self.chunk_document, doc)
|
|
91
|
-
|
|
92
94
|
# Process chunking in parallel for all documents
|
|
93
|
-
chunked_lists = await asyncio.gather(*[
|
|
95
|
+
chunked_lists = await asyncio.gather(*[self.achunk_document(doc) for doc in documents])
|
|
94
96
|
# Flatten the result
|
|
95
97
|
return [chunk for sublist in chunked_lists for chunk in sublist]
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import csv
|
|
3
3
|
import io
|
|
4
|
+
from datetime import date, datetime
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import IO, Any, List, Optional, Union
|
|
6
|
+
from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
|
|
6
7
|
from uuid import uuid4
|
|
7
8
|
|
|
8
9
|
try:
|
|
@@ -18,6 +19,113 @@ from agno.knowledge.types import ContentType
|
|
|
18
19
|
from agno.utils.log import log_debug, log_error
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def _get_workbook_name(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
|
|
23
|
+
"""Extract workbook name from file path or name parameter.
|
|
24
|
+
|
|
25
|
+
Priority: explicit name > file path stem > file object name attribute > "workbook"
|
|
26
|
+
"""
|
|
27
|
+
if name:
|
|
28
|
+
return Path(name).stem
|
|
29
|
+
if isinstance(file, Path):
|
|
30
|
+
return file.stem
|
|
31
|
+
return Path(getattr(file, "name", "workbook")).stem
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _infer_file_extension(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
|
|
35
|
+
if isinstance(file, Path):
|
|
36
|
+
return file.suffix.lower()
|
|
37
|
+
|
|
38
|
+
file_name = getattr(file, "name", None)
|
|
39
|
+
if isinstance(file_name, str) and file_name:
|
|
40
|
+
return Path(file_name).suffix.lower()
|
|
41
|
+
|
|
42
|
+
if name:
|
|
43
|
+
return Path(name).suffix.lower()
|
|
44
|
+
|
|
45
|
+
return ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _convert_xls_cell_value(cell_value: Any, cell_type: int, datemode: int) -> Any:
|
|
49
|
+
"""Convert xlrd cell value to Python type.
|
|
50
|
+
|
|
51
|
+
xlrd returns dates as Excel serial numbers and booleans as 0/1 integers.
|
|
52
|
+
This converts them to proper Python types for consistent handling with openpyxl.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
import xlrd
|
|
56
|
+
except ImportError:
|
|
57
|
+
return cell_value
|
|
58
|
+
|
|
59
|
+
if cell_type == xlrd.XL_CELL_DATE:
|
|
60
|
+
try:
|
|
61
|
+
date_tuple = xlrd.xldate_as_tuple(cell_value, datemode)
|
|
62
|
+
return datetime(*date_tuple)
|
|
63
|
+
except Exception:
|
|
64
|
+
return cell_value
|
|
65
|
+
if cell_type == xlrd.XL_CELL_BOOLEAN:
|
|
66
|
+
return bool(cell_value)
|
|
67
|
+
return cell_value
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _stringify_spreadsheet_cell_value(value: Any) -> str:
|
|
71
|
+
if value is None:
|
|
72
|
+
return ""
|
|
73
|
+
|
|
74
|
+
# Handle datetime/date before float check (datetime is not a float)
|
|
75
|
+
if isinstance(value, datetime):
|
|
76
|
+
return value.isoformat()
|
|
77
|
+
if isinstance(value, date):
|
|
78
|
+
return value.isoformat()
|
|
79
|
+
|
|
80
|
+
if isinstance(value, float) and value.is_integer():
|
|
81
|
+
return str(int(value))
|
|
82
|
+
|
|
83
|
+
result = str(value)
|
|
84
|
+
# Normalize all line endings to space to preserve row integrity in CSV-like output
|
|
85
|
+
# Must handle CRLF first before individual CR/LF to avoid double-spacing
|
|
86
|
+
result = result.replace("\r\n", " ") # Windows (CRLF)
|
|
87
|
+
result = result.replace("\r", " ") # Old Mac (CR)
|
|
88
|
+
result = result.replace("\n", " ") # Unix (LF)
|
|
89
|
+
return result
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _row_values_to_csv_line(row_values: Sequence[Any]) -> str:
|
|
93
|
+
values = [_stringify_spreadsheet_cell_value(v) for v in row_values]
|
|
94
|
+
while values and values[-1] == "":
|
|
95
|
+
values.pop()
|
|
96
|
+
|
|
97
|
+
return ", ".join(values)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _excel_rows_to_documents(
|
|
101
|
+
*,
|
|
102
|
+
workbook_name: str,
|
|
103
|
+
sheets: Iterable[Tuple[str, Iterable[Sequence[Any]]]],
|
|
104
|
+
) -> List[Document]:
|
|
105
|
+
documents = []
|
|
106
|
+
for sheet_index, (sheet_name, rows) in enumerate(sheets, start=1):
|
|
107
|
+
lines = []
|
|
108
|
+
for row in rows:
|
|
109
|
+
line = _row_values_to_csv_line(row)
|
|
110
|
+
if line:
|
|
111
|
+
lines.append(line)
|
|
112
|
+
|
|
113
|
+
if not lines:
|
|
114
|
+
log_debug(f"Sheet '{sheet_name}' is empty, skipping")
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
documents.append(
|
|
118
|
+
Document(
|
|
119
|
+
name=workbook_name,
|
|
120
|
+
id=str(uuid4()),
|
|
121
|
+
meta_data={"sheet_name": sheet_name, "sheet_index": sheet_index},
|
|
122
|
+
content="\n".join(lines),
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return documents
|
|
127
|
+
|
|
128
|
+
|
|
21
129
|
class CSVReader(Reader):
|
|
22
130
|
"""Reader for CSV files"""
|
|
23
131
|
|
|
@@ -25,7 +133,7 @@ class CSVReader(Reader):
|
|
|
25
133
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
26
134
|
|
|
27
135
|
@classmethod
|
|
28
|
-
def get_supported_chunking_strategies(
|
|
136
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
29
137
|
"""Get the list of supported chunking strategies for CSV readers."""
|
|
30
138
|
return [
|
|
31
139
|
ChunkingStrategyType.ROW_CHUNKER,
|
|
@@ -37,13 +145,29 @@ class CSVReader(Reader):
|
|
|
37
145
|
]
|
|
38
146
|
|
|
39
147
|
@classmethod
|
|
40
|
-
def get_supported_content_types(
|
|
148
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
41
149
|
return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
|
|
42
150
|
|
|
43
151
|
def read(
|
|
44
152
|
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
45
153
|
) -> List[Document]:
|
|
46
154
|
try:
|
|
155
|
+
file_extension = _infer_file_extension(file, name)
|
|
156
|
+
if file_extension in {ContentType.XLSX, ContentType.XLS}:
|
|
157
|
+
workbook_name = _get_workbook_name(file, name)
|
|
158
|
+
|
|
159
|
+
if file_extension == ContentType.XLSX:
|
|
160
|
+
documents = self._read_xlsx(file, workbook_name=workbook_name)
|
|
161
|
+
else:
|
|
162
|
+
documents = self._read_xls(file, workbook_name=workbook_name)
|
|
163
|
+
|
|
164
|
+
if self.chunk:
|
|
165
|
+
chunked_documents = []
|
|
166
|
+
for document in documents:
|
|
167
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
168
|
+
return chunked_documents
|
|
169
|
+
return documents
|
|
170
|
+
|
|
47
171
|
if isinstance(file, Path):
|
|
48
172
|
if not file.exists():
|
|
49
173
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
@@ -56,19 +180,20 @@ class CSVReader(Reader):
|
|
|
56
180
|
log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
|
|
57
181
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
58
182
|
file.seek(0)
|
|
59
|
-
file_content = io.StringIO(file.read().decode("utf-8"))
|
|
183
|
+
file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
60
184
|
|
|
61
|
-
|
|
185
|
+
csv_lines: List[str] = []
|
|
62
186
|
with file_content as csvfile:
|
|
63
187
|
csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
|
|
64
188
|
for row in csv_reader:
|
|
65
|
-
|
|
189
|
+
# Use stringify to normalize line endings in CSV cells
|
|
190
|
+
csv_lines.append(", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row))
|
|
66
191
|
|
|
67
192
|
documents = [
|
|
68
193
|
Document(
|
|
69
194
|
name=csv_name,
|
|
70
195
|
id=str(uuid4()),
|
|
71
|
-
content=
|
|
196
|
+
content="\n".join(csv_lines),
|
|
72
197
|
)
|
|
73
198
|
]
|
|
74
199
|
if self.chunk:
|
|
@@ -77,8 +202,17 @@ class CSVReader(Reader):
|
|
|
77
202
|
chunked_documents.extend(self.chunk_document(document))
|
|
78
203
|
return chunked_documents
|
|
79
204
|
return documents
|
|
205
|
+
except FileNotFoundError:
|
|
206
|
+
raise
|
|
207
|
+
except ImportError:
|
|
208
|
+
raise
|
|
209
|
+
except UnicodeDecodeError as e:
|
|
210
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
211
|
+
log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
|
|
212
|
+
return []
|
|
80
213
|
except Exception as e:
|
|
81
|
-
|
|
214
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
215
|
+
log_error(f"Error reading {file_desc}: {e}")
|
|
82
216
|
return []
|
|
83
217
|
|
|
84
218
|
async def async_read(
|
|
@@ -102,18 +236,31 @@ class CSVReader(Reader):
|
|
|
102
236
|
List of Document objects
|
|
103
237
|
"""
|
|
104
238
|
try:
|
|
239
|
+
file_extension = _infer_file_extension(file, name)
|
|
240
|
+
if file_extension in {ContentType.XLSX, ContentType.XLS}:
|
|
241
|
+
workbook_name = _get_workbook_name(file, name)
|
|
242
|
+
|
|
243
|
+
if file_extension == ContentType.XLSX:
|
|
244
|
+
documents = await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
|
|
245
|
+
else:
|
|
246
|
+
documents = await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
|
|
247
|
+
|
|
248
|
+
if self.chunk:
|
|
249
|
+
documents = await self.chunk_documents_async(documents)
|
|
250
|
+
return documents
|
|
251
|
+
|
|
105
252
|
if isinstance(file, Path):
|
|
106
253
|
if not file.exists():
|
|
107
254
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
255
|
log_debug(f"Reading async: {file}")
|
|
109
|
-
async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
|
|
256
|
+
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
110
257
|
content = await file_content.read()
|
|
111
258
|
file_content_io = io.StringIO(content)
|
|
112
259
|
csv_name = name or file.stem
|
|
113
260
|
else:
|
|
114
261
|
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
115
262
|
file.seek(0)
|
|
116
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
263
|
+
file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
117
264
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
118
265
|
|
|
119
266
|
file_content_io.seek(0)
|
|
@@ -122,7 +269,10 @@ class CSVReader(Reader):
|
|
|
122
269
|
total_rows = len(rows)
|
|
123
270
|
|
|
124
271
|
if total_rows <= 10:
|
|
125
|
-
|
|
272
|
+
# Use stringify to normalize line endings in CSV cells
|
|
273
|
+
csv_content = " ".join(
|
|
274
|
+
", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in rows
|
|
275
|
+
)
|
|
126
276
|
documents = [
|
|
127
277
|
Document(
|
|
128
278
|
name=csv_name,
|
|
@@ -138,7 +288,10 @@ class CSVReader(Reader):
|
|
|
138
288
|
async def _process_page(page_number: int, page_rows: List[List[str]]) -> Document:
|
|
139
289
|
"""Process a page of rows into a document"""
|
|
140
290
|
start_row = (page_number - 1) * page_size + 1
|
|
141
|
-
|
|
291
|
+
# Use stringify to normalize line endings in CSV cells
|
|
292
|
+
page_content = " ".join(
|
|
293
|
+
", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in page_rows
|
|
294
|
+
)
|
|
142
295
|
|
|
143
296
|
return Document(
|
|
144
297
|
name=csv_name,
|
|
@@ -155,6 +308,76 @@ class CSVReader(Reader):
|
|
|
155
308
|
documents = await self.chunk_documents_async(documents)
|
|
156
309
|
|
|
157
310
|
return documents
|
|
311
|
+
except FileNotFoundError:
|
|
312
|
+
raise
|
|
313
|
+
except ImportError:
|
|
314
|
+
raise
|
|
315
|
+
except UnicodeDecodeError as e:
|
|
316
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
317
|
+
log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
|
|
318
|
+
return []
|
|
158
319
|
except Exception as e:
|
|
159
|
-
|
|
320
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
321
|
+
log_error(f"Error reading {file_desc}: {e}")
|
|
160
322
|
return []
|
|
323
|
+
|
|
324
|
+
def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
325
|
+
try:
|
|
326
|
+
import openpyxl # type: ignore
|
|
327
|
+
except ImportError as e:
|
|
328
|
+
raise ImportError(
|
|
329
|
+
"`openpyxl` not installed. Please install it via `pip install agno[csv]` or `pip install openpyxl`."
|
|
330
|
+
) from e
|
|
331
|
+
|
|
332
|
+
if isinstance(file, Path):
|
|
333
|
+
workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
|
|
334
|
+
else:
|
|
335
|
+
file.seek(0)
|
|
336
|
+
raw = file.read()
|
|
337
|
+
if isinstance(raw, str):
|
|
338
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
339
|
+
workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
return _excel_rows_to_documents(
|
|
343
|
+
workbook_name=workbook_name,
|
|
344
|
+
sheets=[(worksheet.title, worksheet.iter_rows(values_only=True)) for worksheet in workbook.worksheets],
|
|
345
|
+
)
|
|
346
|
+
finally:
|
|
347
|
+
workbook.close()
|
|
348
|
+
|
|
349
|
+
def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
350
|
+
try:
|
|
351
|
+
import xlrd # type: ignore
|
|
352
|
+
except ImportError as e:
|
|
353
|
+
raise ImportError(
|
|
354
|
+
"`xlrd` not installed. Please install it via `pip install agno[csv]` or `pip install xlrd`."
|
|
355
|
+
) from e
|
|
356
|
+
|
|
357
|
+
if isinstance(file, Path):
|
|
358
|
+
workbook = xlrd.open_workbook(filename=str(file))
|
|
359
|
+
else:
|
|
360
|
+
file.seek(0)
|
|
361
|
+
raw = file.read()
|
|
362
|
+
if isinstance(raw, str):
|
|
363
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
364
|
+
workbook = xlrd.open_workbook(file_contents=raw)
|
|
365
|
+
|
|
366
|
+
sheets: List[Tuple[str, Iterable[Sequence[Any]]]] = []
|
|
367
|
+
for sheet_index in range(workbook.nsheets):
|
|
368
|
+
sheet = workbook.sheet_by_index(sheet_index)
|
|
369
|
+
|
|
370
|
+
def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
|
|
371
|
+
for row_index in range(_sheet.nrows):
|
|
372
|
+
yield [
|
|
373
|
+
_convert_xls_cell_value(
|
|
374
|
+
_sheet.cell_value(row_index, col_index),
|
|
375
|
+
_sheet.cell_type(row_index, col_index),
|
|
376
|
+
_datemode,
|
|
377
|
+
)
|
|
378
|
+
for col_index in range(_sheet.ncols)
|
|
379
|
+
]
|
|
380
|
+
|
|
381
|
+
sheets.append((sheet.name, _iter_sheet_rows()))
|
|
382
|
+
|
|
383
|
+
return _excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
|
|
@@ -23,7 +23,7 @@ class DocxReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for DOCX readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class DocxReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.DOCX, ContentType.DOC]
|
|
40
40
|
|
|
41
41
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|