agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6009 -2874
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +385 -6
- agno/db/dynamo/dynamo.py +388 -81
- agno/db/dynamo/schemas.py +47 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +435 -64
- agno/db/firestore/schemas.py +11 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +384 -42
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +351 -66
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +339 -48
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +510 -37
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2036 -0
- agno/db/mongo/mongo.py +653 -76
- agno/db/mongo/schemas.py +13 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/mysql.py +687 -25
- agno/db/mysql/schemas.py +61 -37
- agno/db/mysql/utils.py +60 -2
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2001 -0
- agno/db/postgres/postgres.py +676 -57
- agno/db/postgres/schemas.py +43 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +344 -38
- agno/db/redis/schemas.py +18 -0
- agno/db/redis/utils.py +60 -2
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +26 -1
- agno/db/singlestore/singlestore.py +687 -53
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2371 -0
- agno/db/sqlite/schemas.py +24 -0
- agno/db/sqlite/sqlite.py +774 -85
- agno/db/sqlite/utils.py +168 -5
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1361 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +50 -22
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +68 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +1 -1
- agno/knowledge/chunking/semantic.py +40 -8
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +13 -0
- agno/knowledge/embedder/openai.py +37 -65
- agno/knowledge/embedder/sentence_transformer.py +8 -4
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +595 -187
- agno/knowledge/reader/base.py +9 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/json_reader.py +6 -5
- agno/knowledge/reader/markdown_reader.py +13 -13
- agno/knowledge/reader/pdf_reader.py +43 -68
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +51 -6
- agno/knowledge/reader/s3_reader.py +3 -15
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +13 -13
- agno/knowledge/reader/web_search_reader.py +2 -43
- agno/knowledge/reader/website_reader.py +43 -25
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/types.py +9 -0
- agno/knowledge/utils.py +20 -0
- agno/media.py +339 -266
- agno/memory/manager.py +336 -82
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/anthropic/claude.py +183 -37
- agno/models/aws/bedrock.py +52 -112
- agno/models/aws/claude.py +33 -1
- agno/models/azure/ai_foundry.py +33 -15
- agno/models/azure/openai_chat.py +25 -8
- agno/models/base.py +1011 -566
- agno/models/cerebras/cerebras.py +19 -13
- agno/models/cerebras/cerebras_openai.py +8 -5
- agno/models/cohere/chat.py +27 -1
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/google/gemini.py +110 -37
- agno/models/groq/groq.py +28 -11
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/chat.py +18 -1
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/message.py +143 -4
- agno/models/meta/llama.py +27 -10
- agno/models/meta/llama_openai.py +5 -17
- agno/models/nebius/nebius.py +6 -6
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/ollama/chat.py +60 -6
- agno/models/openai/chat.py +102 -43
- agno/models/openai/responses.py +103 -106
- agno/models/openrouter/openrouter.py +41 -3
- agno/models/perplexity/perplexity.py +4 -5
- agno/models/portkey/portkey.py +3 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +81 -5
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/together.py +2 -2
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +2 -2
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +96 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +3 -2
- agno/os/app.py +543 -175
- agno/os/auth.py +24 -14
- agno/os/config.py +1 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/agui.py +23 -7
- agno/os/interfaces/agui/router.py +27 -3
- agno/os/interfaces/agui/utils.py +242 -142
- agno/os/interfaces/base.py +6 -2
- agno/os/interfaces/slack/router.py +81 -23
- agno/os/interfaces/slack/slack.py +29 -14
- agno/os/interfaces/whatsapp/router.py +11 -4
- agno/os/interfaces/whatsapp/whatsapp.py +14 -7
- agno/os/mcp.py +111 -54
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +556 -139
- agno/os/routers/evals/evals.py +71 -34
- agno/os/routers/evals/schemas.py +31 -31
- agno/os/routers/evals/utils.py +6 -5
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +185 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +158 -53
- agno/os/routers/memory/schemas.py +20 -16
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +499 -38
- agno/os/schema.py +308 -198
- agno/os/utils.py +401 -41
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +7 -2
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +266 -112
- agno/run/base.py +53 -24
- agno/run/team.py +252 -111
- agno/run/workflow.py +156 -45
- agno/session/agent.py +105 -89
- agno/session/summary.py +65 -25
- agno/session/team.py +176 -96
- agno/session/workflow.py +406 -40
- agno/team/team.py +3854 -1692
- agno/tools/brightdata.py +3 -3
- agno/tools/cartesia.py +3 -5
- agno/tools/dalle.py +9 -8
- agno/tools/decorator.py +4 -2
- agno/tools/desi_vocal.py +2 -2
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +20 -13
- agno/tools/eleven_labs.py +26 -28
- agno/tools/exa.py +21 -16
- agno/tools/fal.py +4 -4
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +350 -0
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +257 -37
- agno/tools/giphy.py +2 -2
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +270 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/knowledge.py +3 -3
- agno/tools/lumalab.py +3 -3
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +11 -17
- agno/tools/memori.py +1 -53
- agno/tools/memory.py +419 -0
- agno/tools/models/azure_openai.py +2 -2
- agno/tools/models/gemini.py +3 -3
- agno/tools/models/groq.py +3 -5
- agno/tools/models/nebius.py +7 -7
- agno/tools/models_labs.py +25 -15
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +4 -9
- agno/tools/opencv.py +3 -3
- agno/tools/parallel.py +314 -0
- agno/tools/replicate.py +7 -7
- agno/tools/scrapegraph.py +58 -31
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/slack.py +18 -3
- agno/tools/spider.py +2 -2
- agno/tools/tavily.py +146 -0
- agno/tools/whatsapp.py +1 -1
- agno/tools/workflow.py +278 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +27 -0
- agno/utils/common.py +90 -1
- agno/utils/events.py +222 -7
- agno/utils/gemini.py +181 -23
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +111 -0
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +95 -5
- agno/utils/media.py +188 -10
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +60 -0
- agno/utils/models/claude.py +40 -11
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/print_response/agent.py +105 -21
- agno/utils/print_response/team.py +103 -38
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/reasoning.py +22 -1
- agno/utils/serialize.py +32 -0
- agno/utils/streamlit.py +16 -10
- agno/utils/string.py +41 -0
- agno/utils/team.py +98 -9
- agno/utils/tools.py +1 -1
- agno/vectordb/base.py +23 -4
- agno/vectordb/cassandra/cassandra.py +65 -9
- agno/vectordb/chroma/chromadb.py +182 -38
- agno/vectordb/clickhouse/clickhousedb.py +64 -11
- agno/vectordb/couchbase/couchbase.py +105 -10
- agno/vectordb/lancedb/lance_db.py +183 -135
- agno/vectordb/langchaindb/langchaindb.py +25 -7
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +46 -7
- agno/vectordb/milvus/milvus.py +126 -9
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +112 -7
- agno/vectordb/pgvector/pgvector.py +142 -21
- agno/vectordb/pineconedb/pineconedb.py +80 -8
- agno/vectordb/qdrant/qdrant.py +125 -39
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/singlestore/singlestore.py +111 -25
- agno/vectordb/surrealdb/surrealdb.py +31 -5
- agno/vectordb/upstashdb/upstashdb.py +76 -8
- agno/vectordb/weaviate/weaviate.py +86 -15
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +112 -18
- agno/workflow/loop.py +69 -10
- agno/workflow/parallel.py +266 -118
- agno/workflow/router.py +110 -17
- agno/workflow/step.py +645 -136
- agno/workflow/steps.py +65 -6
- agno/workflow/types.py +71 -33
- agno/workflow/workflow.py +2113 -300
- agno-2.3.0.dist-info/METADATA +618 -0
- agno-2.3.0.dist-info/RECORD +577 -0
- agno-2.3.0.dist-info/licenses/LICENSE +201 -0
- agno/knowledge/reader/url_reader.py +0 -128
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -610
- agno/utils/models/aws_claude.py +0 -170
- agno-2.0.0rc2.dist-info/METADATA +0 -355
- agno-2.0.0rc2.dist-info/RECORD +0 -515
- agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
- {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
- {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
agno/knowledge/reader/base.py
CHANGED
|
@@ -19,6 +19,7 @@ class Reader:
|
|
|
19
19
|
name: Optional[str] = None
|
|
20
20
|
description: Optional[str] = None
|
|
21
21
|
max_results: int = 5 # Maximum number of results to return (useful for search-based readers)
|
|
22
|
+
encoding: Optional[str] = None
|
|
22
23
|
|
|
23
24
|
def __init__(
|
|
24
25
|
self,
|
|
@@ -29,6 +30,7 @@ class Reader:
|
|
|
29
30
|
name: Optional[str] = None,
|
|
30
31
|
description: Optional[str] = None,
|
|
31
32
|
max_results: int = 5,
|
|
33
|
+
encoding: Optional[str] = None,
|
|
32
34
|
**kwargs,
|
|
33
35
|
) -> None:
|
|
34
36
|
self.chunk = chunk
|
|
@@ -40,12 +42,17 @@ class Reader:
|
|
|
40
42
|
self.name = name
|
|
41
43
|
self.description = description
|
|
42
44
|
self.max_results = max_results
|
|
45
|
+
self.encoding = encoding
|
|
43
46
|
|
|
44
|
-
def set_chunking_strategy_from_string(
|
|
47
|
+
def set_chunking_strategy_from_string(
|
|
48
|
+
self, strategy_name: str, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
49
|
+
) -> None:
|
|
45
50
|
"""Set the chunking strategy from a string name."""
|
|
46
51
|
try:
|
|
47
52
|
strategy_type = ChunkingStrategyType.from_string(strategy_name)
|
|
48
|
-
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
53
|
+
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
54
|
+
strategy_type, chunk_size=chunk_size, overlap=overlap, **kwargs
|
|
55
|
+
)
|
|
49
56
|
except ValueError as e:
|
|
50
57
|
raise ValueError(f"Failed to set chunking strategy: {e}")
|
|
51
58
|
|
|
@@ -15,7 +15,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
15
15
|
from agno.knowledge.document.base import Document
|
|
16
16
|
from agno.knowledge.reader.base import Reader
|
|
17
17
|
from agno.knowledge.types import ContentType
|
|
18
|
-
from agno.utils.log import
|
|
18
|
+
from agno.utils.log import log_debug, log_error
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class CSVReader(Reader):
|
|
@@ -46,10 +46,10 @@ class CSVReader(Reader):
|
|
|
46
46
|
if isinstance(file, Path):
|
|
47
47
|
if not file.exists():
|
|
48
48
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
49
|
-
|
|
50
|
-
file_content = file.open(newline="", mode="r", encoding="utf-8")
|
|
49
|
+
log_debug(f"Reading: {file}")
|
|
50
|
+
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
51
51
|
else:
|
|
52
|
-
|
|
52
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
53
53
|
file.seek(0)
|
|
54
54
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
55
55
|
|
|
@@ -78,7 +78,7 @@ class CSVReader(Reader):
|
|
|
78
78
|
return chunked_documents
|
|
79
79
|
return documents
|
|
80
80
|
except Exception as e:
|
|
81
|
-
|
|
81
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
82
82
|
return []
|
|
83
83
|
|
|
84
84
|
async def async_read(
|
|
@@ -105,12 +105,12 @@ class CSVReader(Reader):
|
|
|
105
105
|
if isinstance(file, Path):
|
|
106
106
|
if not file.exists():
|
|
107
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
-
|
|
108
|
+
log_debug(f"Reading async: {file}")
|
|
109
109
|
async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
|
|
110
110
|
content = await file_content.read()
|
|
111
111
|
file_content_io = io.StringIO(content)
|
|
112
112
|
else:
|
|
113
|
-
|
|
113
|
+
log_debug(f"Reading retrieved file async: {file.name}")
|
|
114
114
|
file.seek(0)
|
|
115
115
|
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
116
116
|
|
|
@@ -160,7 +160,5 @@ class CSVReader(Reader):
|
|
|
160
160
|
|
|
161
161
|
return documents
|
|
162
162
|
except Exception as e:
|
|
163
|
-
|
|
164
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
165
|
-
)
|
|
163
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
166
164
|
return []
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
from docx import Document as DocxDocument # type: ignore
|
|
@@ -43,11 +43,11 @@ class DocxReader(Reader):
|
|
|
43
43
|
if isinstance(file, Path):
|
|
44
44
|
if not file.exists():
|
|
45
45
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
-
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
47
|
docx_document = DocxDocument(str(file))
|
|
48
48
|
doc_name = name or file.stem
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'docx_file')}")
|
|
51
51
|
docx_document = DocxDocument(file)
|
|
52
52
|
doc_name = name or (
|
|
53
53
|
getattr(file, "name", "docx_file").split(".")[0] if hasattr(file, "name") else "docx_file"
|
|
@@ -62,7 +62,6 @@ class DocxReader(Reader):
|
|
|
62
62
|
content=doc_content,
|
|
63
63
|
)
|
|
64
64
|
]
|
|
65
|
-
|
|
66
65
|
if self.chunk:
|
|
67
66
|
chunked_documents = []
|
|
68
67
|
for document in documents:
|
|
@@ -71,7 +70,7 @@ class DocxReader(Reader):
|
|
|
71
70
|
return documents
|
|
72
71
|
|
|
73
72
|
except Exception as e:
|
|
74
|
-
|
|
73
|
+
log_error(f"Error reading file: {e}")
|
|
75
74
|
return []
|
|
76
75
|
|
|
77
76
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -79,5 +78,5 @@ class DocxReader(Reader):
|
|
|
79
78
|
try:
|
|
80
79
|
return await asyncio.to_thread(self.read, file, name)
|
|
81
80
|
except Exception as e:
|
|
82
|
-
|
|
81
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
83
82
|
return []
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import csv
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import IO, Any, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import aiofiles
|
|
9
|
+
except ImportError:
|
|
10
|
+
raise ImportError("`aiofiles` not installed. Please install it with `pip install aiofiles`")
|
|
11
|
+
|
|
12
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
13
|
+
from agno.knowledge.document.base import Document
|
|
14
|
+
from agno.knowledge.reader.base import Reader
|
|
15
|
+
from agno.knowledge.types import ContentType
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FieldLabeledCSVReader(Reader):
|
|
20
|
+
"""Reader for CSV files that converts each row to a field-labeled document."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
chunk_title: Optional[Union[str, List[str]]] = None,
|
|
25
|
+
field_names: Optional[List[str]] = None,
|
|
26
|
+
format_headers: bool = True,
|
|
27
|
+
skip_empty_fields: bool = True,
|
|
28
|
+
**kwargs,
|
|
29
|
+
):
|
|
30
|
+
super().__init__(chunk=False, chunking_strategy=None, **kwargs)
|
|
31
|
+
self.chunk_title = chunk_title
|
|
32
|
+
self.field_names = field_names or []
|
|
33
|
+
self.format_headers = format_headers
|
|
34
|
+
self.skip_empty_fields = skip_empty_fields
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
38
|
+
"""Chunking is not supported - each row is already a logical document unit."""
|
|
39
|
+
return []
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
43
|
+
"""Get the list of supported content types."""
|
|
44
|
+
return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
|
|
45
|
+
|
|
46
|
+
def _format_field_name(self, field_name: str) -> str:
|
|
47
|
+
"""Format field name to be more readable."""
|
|
48
|
+
if not self.format_headers:
|
|
49
|
+
return field_name.strip()
|
|
50
|
+
|
|
51
|
+
# Replace underscores with spaces and title case
|
|
52
|
+
formatted = field_name.replace("_", " ").strip().title()
|
|
53
|
+
return formatted
|
|
54
|
+
|
|
55
|
+
def _get_title_for_entry(self, entry_index: int) -> Optional[str]:
|
|
56
|
+
"""Get title for a specific entry."""
|
|
57
|
+
if self.chunk_title is None:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
if isinstance(self.chunk_title, str):
|
|
61
|
+
return self.chunk_title
|
|
62
|
+
|
|
63
|
+
if isinstance(self.chunk_title, list) and self.chunk_title:
|
|
64
|
+
return self.chunk_title[entry_index % len(self.chunk_title)]
|
|
65
|
+
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def _convert_row_to_labeled_text(self, headers: List[str], row: List[str], entry_index: int) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Convert a CSV row to field-labeled text format.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
headers: Column headers
|
|
74
|
+
row: Data row values
|
|
75
|
+
entry_index: Index of this entry (for title rotation)
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Formatted text with field labels
|
|
79
|
+
"""
|
|
80
|
+
lines = []
|
|
81
|
+
|
|
82
|
+
title = self._get_title_for_entry(entry_index)
|
|
83
|
+
if title:
|
|
84
|
+
lines.append(title)
|
|
85
|
+
|
|
86
|
+
for i, (header, value) in enumerate(zip(headers, row)):
|
|
87
|
+
clean_value = value.strip() if value else ""
|
|
88
|
+
|
|
89
|
+
if self.skip_empty_fields and not clean_value:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if self.field_names and i < len(self.field_names):
|
|
93
|
+
field_name = self.field_names[i]
|
|
94
|
+
else:
|
|
95
|
+
field_name = self._format_field_name(header)
|
|
96
|
+
|
|
97
|
+
lines.append(f"{field_name}: {clean_value}")
|
|
98
|
+
|
|
99
|
+
return "\n".join(lines)
|
|
100
|
+
|
|
101
|
+
def read(
|
|
102
|
+
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
103
|
+
) -> List[Document]:
|
|
104
|
+
try:
|
|
105
|
+
if isinstance(file, Path):
|
|
106
|
+
if not file.exists():
|
|
107
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
+
log_debug(f"Reading: {file}")
|
|
109
|
+
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
110
|
+
else:
|
|
111
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
112
|
+
file.seek(0)
|
|
113
|
+
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
114
|
+
|
|
115
|
+
csv_name = name or (
|
|
116
|
+
Path(file.name).stem
|
|
117
|
+
if isinstance(file, Path)
|
|
118
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
documents = []
|
|
122
|
+
|
|
123
|
+
with file_content as csvfile:
|
|
124
|
+
csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
|
|
125
|
+
|
|
126
|
+
# Read all rows
|
|
127
|
+
rows = list(csv_reader)
|
|
128
|
+
|
|
129
|
+
if not rows:
|
|
130
|
+
log_warning("CSV file is empty")
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
# First row is headers
|
|
134
|
+
headers = [header.strip() for header in rows[0]]
|
|
135
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
136
|
+
|
|
137
|
+
data_rows = rows[1:] if len(rows) > 1 else []
|
|
138
|
+
log_debug(f"Processing {len(data_rows)} data rows")
|
|
139
|
+
|
|
140
|
+
for row_index, row in enumerate(data_rows):
|
|
141
|
+
# Ensure row has same length as headers (pad or truncate)
|
|
142
|
+
normalized_row = row[: len(headers)] # Truncate if too long
|
|
143
|
+
while len(normalized_row) < len(headers): # Pad if too short
|
|
144
|
+
normalized_row.append("")
|
|
145
|
+
|
|
146
|
+
# Convert row to labeled text
|
|
147
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
148
|
+
|
|
149
|
+
if labeled_text.strip():
|
|
150
|
+
# Create document for this row
|
|
151
|
+
doc_id = f"{csv_name}_row_{row_index + 1}"
|
|
152
|
+
|
|
153
|
+
document = Document(
|
|
154
|
+
id=doc_id,
|
|
155
|
+
name=csv_name,
|
|
156
|
+
meta_data={
|
|
157
|
+
"row_index": row_index,
|
|
158
|
+
"headers": headers,
|
|
159
|
+
"total_rows": len(data_rows),
|
|
160
|
+
"source": "field_labeled_csv_reader",
|
|
161
|
+
},
|
|
162
|
+
content=labeled_text,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
documents.append(document)
|
|
166
|
+
log_debug(f"Created document for row {row_index + 1}: {len(labeled_text)} chars")
|
|
167
|
+
|
|
168
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
169
|
+
return documents
|
|
170
|
+
|
|
171
|
+
except Exception as e:
|
|
172
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
async def async_read(
|
|
176
|
+
self,
|
|
177
|
+
file: Union[Path, IO[Any]],
|
|
178
|
+
delimiter: str = ",",
|
|
179
|
+
quotechar: str = '"',
|
|
180
|
+
page_size: int = 1000,
|
|
181
|
+
name: Optional[str] = None,
|
|
182
|
+
) -> List[Document]:
|
|
183
|
+
try:
|
|
184
|
+
# Handle file input
|
|
185
|
+
if isinstance(file, Path):
|
|
186
|
+
if not file.exists():
|
|
187
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
188
|
+
log_debug(f"Reading async: {file}")
|
|
189
|
+
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
190
|
+
content = await file_content.read()
|
|
191
|
+
file_content_io = io.StringIO(content)
|
|
192
|
+
else:
|
|
193
|
+
log_debug(f"Reading retrieved file async: {name or file.name}")
|
|
194
|
+
file.seek(0)
|
|
195
|
+
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
196
|
+
|
|
197
|
+
csv_name = name or (
|
|
198
|
+
Path(file.name).stem
|
|
199
|
+
if isinstance(file, Path)
|
|
200
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
file_content_io.seek(0)
|
|
204
|
+
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
205
|
+
rows = list(csv_reader)
|
|
206
|
+
|
|
207
|
+
if not rows:
|
|
208
|
+
log_warning("CSV file is empty")
|
|
209
|
+
return []
|
|
210
|
+
|
|
211
|
+
# First row is headers
|
|
212
|
+
headers = [header.strip() for header in rows[0]]
|
|
213
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
214
|
+
|
|
215
|
+
# Process data rows
|
|
216
|
+
data_rows = rows[1:] if len(rows) > 1 else []
|
|
217
|
+
total_rows = len(data_rows)
|
|
218
|
+
log_debug(f"Processing {total_rows} data rows")
|
|
219
|
+
|
|
220
|
+
# For small files, process all at once
|
|
221
|
+
if total_rows <= 10:
|
|
222
|
+
documents = []
|
|
223
|
+
for row_index, row in enumerate(data_rows):
|
|
224
|
+
normalized_row = row[: len(headers)]
|
|
225
|
+
while len(normalized_row) < len(headers):
|
|
226
|
+
normalized_row.append("")
|
|
227
|
+
|
|
228
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
229
|
+
|
|
230
|
+
if labeled_text.strip():
|
|
231
|
+
document = Document(
|
|
232
|
+
id=f"{csv_name}_row_{row_index + 1}",
|
|
233
|
+
name=csv_name,
|
|
234
|
+
meta_data={
|
|
235
|
+
"row_index": row_index,
|
|
236
|
+
"headers": headers,
|
|
237
|
+
"total_rows": total_rows,
|
|
238
|
+
"source": "field_labeled_csv_reader",
|
|
239
|
+
},
|
|
240
|
+
content=labeled_text,
|
|
241
|
+
)
|
|
242
|
+
documents.append(document)
|
|
243
|
+
else:
|
|
244
|
+
pages = []
|
|
245
|
+
for i in range(0, total_rows, page_size):
|
|
246
|
+
pages.append(data_rows[i : i + page_size])
|
|
247
|
+
|
|
248
|
+
async def _process_page(page_number: int, page_rows: List[List[str]]) -> List[Document]:
|
|
249
|
+
"""Process a page of rows into documents"""
|
|
250
|
+
page_documents = []
|
|
251
|
+
start_row_index = (page_number - 1) * page_size
|
|
252
|
+
|
|
253
|
+
for i, row in enumerate(page_rows):
|
|
254
|
+
row_index = start_row_index + i
|
|
255
|
+
|
|
256
|
+
normalized_row = row[: len(headers)]
|
|
257
|
+
while len(normalized_row) < len(headers):
|
|
258
|
+
normalized_row.append("")
|
|
259
|
+
|
|
260
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, row_index)
|
|
261
|
+
|
|
262
|
+
if labeled_text.strip():
|
|
263
|
+
document = Document(
|
|
264
|
+
id=f"{csv_name}_row_{row_index + 1}",
|
|
265
|
+
name=csv_name,
|
|
266
|
+
meta_data={
|
|
267
|
+
"row_index": row_index,
|
|
268
|
+
"headers": headers,
|
|
269
|
+
"total_rows": total_rows,
|
|
270
|
+
"page": page_number,
|
|
271
|
+
"source": "field_labeled_csv_reader",
|
|
272
|
+
},
|
|
273
|
+
content=labeled_text,
|
|
274
|
+
)
|
|
275
|
+
page_documents.append(document)
|
|
276
|
+
|
|
277
|
+
return page_documents
|
|
278
|
+
|
|
279
|
+
page_results = await asyncio.gather(
|
|
280
|
+
*[_process_page(page_number, page) for page_number, page in enumerate(pages, start=1)]
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
documents = [doc for page_docs in page_results for doc in page_docs]
|
|
284
|
+
|
|
285
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
286
|
+
return documents
|
|
287
|
+
|
|
288
|
+
except Exception as e:
|
|
289
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
290
|
+
return []
|
|
@@ -10,7 +10,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
10
10
|
from agno.knowledge.document.base import Document
|
|
11
11
|
from agno.knowledge.reader.base import Reader
|
|
12
12
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
13
|
+
from agno.utils.log import log_debug, log_error
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class JSONReader(Reader):
|
|
@@ -41,13 +41,13 @@ class JSONReader(Reader):
|
|
|
41
41
|
if isinstance(path, Path):
|
|
42
42
|
if not path.exists():
|
|
43
43
|
raise FileNotFoundError(f"Could not find file: {path}")
|
|
44
|
-
|
|
44
|
+
log_debug(f"Reading: {path}")
|
|
45
45
|
json_name = name or path.name.split(".")[0]
|
|
46
|
-
json_contents = json.loads(path.read_text("utf-8"))
|
|
46
|
+
json_contents = json.loads(path.read_text(self.encoding or "utf-8"))
|
|
47
47
|
|
|
48
48
|
elif isinstance(path, BytesIO):
|
|
49
49
|
json_name = name or path.name.split(".")[0]
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {json_name}")
|
|
51
51
|
path.seek(0)
|
|
52
52
|
json_contents = json.load(path)
|
|
53
53
|
|
|
@@ -72,7 +72,8 @@ class JSONReader(Reader):
|
|
|
72
72
|
chunked_documents.extend(self.chunk_document(document))
|
|
73
73
|
return chunked_documents
|
|
74
74
|
return documents
|
|
75
|
-
except Exception:
|
|
75
|
+
except Exception as e:
|
|
76
|
+
log_error(f"Error reading: {path}: {e}")
|
|
76
77
|
raise
|
|
77
78
|
|
|
78
79
|
async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -7,7 +7,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
7
7
|
from agno.knowledge.document.base import Document
|
|
8
8
|
from agno.knowledge.reader.base import Reader
|
|
9
9
|
from agno.knowledge.types import ContentType
|
|
10
|
-
from agno.utils.log import
|
|
10
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
11
11
|
|
|
12
12
|
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
13
|
|
|
@@ -65,14 +65,14 @@ class MarkdownReader(Reader):
|
|
|
65
65
|
if isinstance(file, Path):
|
|
66
66
|
if not file.exists():
|
|
67
67
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
68
|
-
|
|
68
|
+
log_debug(f"Reading: {file}")
|
|
69
69
|
file_name = name or file.stem
|
|
70
|
-
file_contents = file.read_text("utf-8")
|
|
70
|
+
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
71
71
|
else:
|
|
72
|
-
|
|
72
|
+
log_debug(f"Reading uploaded file: {file.name}")
|
|
73
73
|
file_name = name or file.name.split(".")[0]
|
|
74
74
|
file.seek(0)
|
|
75
|
-
file_contents = file.read().decode("utf-8")
|
|
75
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
76
76
|
|
|
77
77
|
documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
|
|
78
78
|
if self.chunk:
|
|
@@ -82,7 +82,7 @@ class MarkdownReader(Reader):
|
|
|
82
82
|
return chunked_documents
|
|
83
83
|
return documents
|
|
84
84
|
except Exception as e:
|
|
85
|
-
|
|
85
|
+
log_error(f"Error reading: {file}: {e}")
|
|
86
86
|
return []
|
|
87
87
|
|
|
88
88
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -91,22 +91,22 @@ class MarkdownReader(Reader):
|
|
|
91
91
|
if not file.exists():
|
|
92
92
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
93
93
|
|
|
94
|
-
|
|
94
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
95
95
|
file_name = name or file.stem
|
|
96
96
|
|
|
97
97
|
try:
|
|
98
98
|
import aiofiles
|
|
99
99
|
|
|
100
|
-
async with aiofiles.open(file, "r", encoding="utf-8") as f:
|
|
100
|
+
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
101
101
|
file_contents = await f.read()
|
|
102
102
|
except ImportError:
|
|
103
|
-
|
|
104
|
-
file_contents = file.read_text("utf-8")
|
|
103
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
104
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
105
105
|
else:
|
|
106
|
-
|
|
106
|
+
log_debug(f"Reading uploaded file asynchronously: {file.name}")
|
|
107
107
|
file_name = name or file.name.split(".")[0]
|
|
108
108
|
file.seek(0)
|
|
109
|
-
file_contents = file.read().decode("utf-8")
|
|
109
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
110
110
|
|
|
111
111
|
document = Document(
|
|
112
112
|
name=file_name,
|
|
@@ -118,7 +118,7 @@ class MarkdownReader(Reader):
|
|
|
118
118
|
return await self._async_chunk_document(document)
|
|
119
119
|
return [document]
|
|
120
120
|
except Exception as e:
|
|
121
|
-
|
|
121
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
122
122
|
return []
|
|
123
123
|
|
|
124
124
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|