agno 2.3.25__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +4 -0
- agno/agent/agent.py +1428 -558
- agno/agent/remote.py +13 -0
- agno/db/base.py +339 -0
- agno/db/postgres/async_postgres.py +116 -12
- agno/db/postgres/postgres.py +1229 -25
- agno/db/postgres/schemas.py +48 -1
- agno/db/sqlite/async_sqlite.py +119 -4
- agno/db/sqlite/schemas.py +51 -0
- agno/db/sqlite/sqlite.py +1173 -13
- agno/db/utils.py +37 -1
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +4 -0
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +2767 -2254
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +2 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +5 -5
- agno/knowledge/reader/docx_reader.py +2 -2
- agno/knowledge/reader/field_labeled_csv_reader.py +2 -2
- agno/knowledge/reader/firecrawl_reader.py +2 -2
- agno/knowledge/reader/json_reader.py +2 -2
- agno/knowledge/reader/markdown_reader.py +2 -2
- agno/knowledge/reader/pdf_reader.py +5 -4
- agno/knowledge/reader/pptx_reader.py +2 -2
- agno/knowledge/reader/reader_factory.py +110 -0
- agno/knowledge/reader/s3_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +2 -2
- agno/knowledge/reader/web_search_reader.py +2 -2
- agno/knowledge/reader/website_reader.py +5 -3
- agno/knowledge/reader/wikipedia_reader.py +2 -2
- agno/knowledge/reader/youtube_reader.py +2 -2
- agno/knowledge/utils.py +37 -29
- agno/learn/__init__.py +6 -0
- agno/learn/machine.py +35 -0
- agno/learn/schemas.py +82 -11
- agno/learn/stores/__init__.py +3 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/learned_knowledge.py +6 -6
- agno/models/anthropic/claude.py +24 -0
- agno/models/aws/bedrock.py +20 -0
- agno/models/base.py +48 -4
- agno/models/cohere/chat.py +25 -0
- agno/models/google/gemini.py +50 -5
- agno/models/litellm/chat.py +38 -0
- agno/models/openai/chat.py +7 -0
- agno/models/openrouter/openrouter.py +46 -0
- agno/models/response.py +16 -0
- agno/os/app.py +83 -44
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +1 -0
- agno/os/routers/agents/router.py +29 -16
- agno/os/routers/agents/schema.py +6 -4
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +466 -0
- agno/os/routers/evals/schemas.py +4 -3
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +3 -3
- agno/os/routers/memory/schemas.py +4 -2
- agno/os/routers/metrics/metrics.py +9 -11
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/teams/router.py +20 -8
- agno/os/routers/teams/schema.py +6 -4
- agno/os/routers/traces/traces.py +5 -5
- agno/os/routers/workflows/router.py +38 -11
- agno/os/routers/workflows/schema.py +1 -1
- agno/os/schema.py +92 -26
- agno/os/utils.py +133 -16
- agno/reasoning/anthropic.py +2 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +2 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +4 -10
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/reasoning/vertexai.py +2 -2
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/run/agent.py +57 -0
- agno/run/base.py +7 -0
- agno/run/team.py +57 -0
- agno/skills/agent_skills.py +10 -3
- agno/team/__init__.py +3 -1
- agno/team/team.py +1276 -326
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/function.py +35 -83
- agno/tools/knowledge.py +9 -4
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/parallel.py +0 -7
- agno/tools/reasoning.py +30 -23
- agno/tools/tavily.py +4 -1
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +48 -47
- agno/utils/agent.py +42 -5
- agno/utils/events.py +160 -2
- agno/utils/print_response/agent.py +0 -31
- agno/utils/print_response/team.py +0 -2
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/team.py +61 -11
- agno/vectordb/lancedb/lance_db.py +4 -1
- agno/vectordb/mongodb/mongodb.py +1 -1
- agno/vectordb/qdrant/qdrant.py +4 -4
- agno/workflow/__init__.py +3 -1
- agno/workflow/condition.py +0 -21
- agno/workflow/loop.py +0 -21
- agno/workflow/parallel.py +0 -21
- agno/workflow/router.py +0 -21
- agno/workflow/step.py +117 -24
- agno/workflow/steps.py +0 -21
- agno/workflow/workflow.py +625 -63
- {agno-2.3.25.dist-info → agno-2.4.0.dist-info}/METADATA +46 -76
- {agno-2.3.25.dist-info → agno-2.4.0.dist-info}/RECORD +128 -117
- {agno-2.3.25.dist-info → agno-2.4.0.dist-info}/WHEEL +0 -0
- {agno-2.3.25.dist-info → agno-2.4.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.25.dist-info → agno-2.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Protocol
|
|
3
|
+
==================
|
|
4
|
+
Defines the minimal interface that knowledge implementations must implement.
|
|
5
|
+
|
|
6
|
+
This protocol enables:
|
|
7
|
+
- Custom knowledge bases to be used with agents
|
|
8
|
+
- Each implementation defines its own tools and context
|
|
9
|
+
- Flexible tool naming (not forced to use 'search')
|
|
10
|
+
- Type safety with Protocol typing
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Callable, List, Protocol, runtime_checkable
|
|
14
|
+
|
|
15
|
+
from agno.knowledge.document import Document
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@runtime_checkable
|
|
19
|
+
class KnowledgeProtocol(Protocol):
|
|
20
|
+
"""Minimal protocol for knowledge implementations.
|
|
21
|
+
|
|
22
|
+
Enables custom knowledge bases to be used with agents.
|
|
23
|
+
Each implementation defines what tools it exposes and what
|
|
24
|
+
context/instructions it provides to the agent.
|
|
25
|
+
|
|
26
|
+
Required methods:
|
|
27
|
+
- build_context(): Return instructions for the agent's system prompt
|
|
28
|
+
- get_tools(): Return tools to expose to the agent
|
|
29
|
+
- aget_tools(): Async version of get_tools
|
|
30
|
+
|
|
31
|
+
Optional methods:
|
|
32
|
+
- retrieve(): Default retrieval for context injection (add_knowledge_to_context)
|
|
33
|
+
- aretrieve(): Async version of retrieve
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
```python
|
|
37
|
+
from agno.knowledge.protocol import KnowledgeProtocol
|
|
38
|
+
from agno.knowledge.document import Document
|
|
39
|
+
|
|
40
|
+
class MyKnowledge:
|
|
41
|
+
def build_context(self, **kwargs) -> str:
|
|
42
|
+
return "Use search_docs to find information."
|
|
43
|
+
|
|
44
|
+
def get_tools(self, **kwargs) -> List[Callable]:
|
|
45
|
+
return [self.search_docs]
|
|
46
|
+
|
|
47
|
+
async def aget_tools(self, **kwargs) -> List[Callable]:
|
|
48
|
+
return [self.search_docs]
|
|
49
|
+
|
|
50
|
+
def search_docs(self, query: str) -> str:
|
|
51
|
+
# Your search implementation
|
|
52
|
+
return "Results for: " + query
|
|
53
|
+
|
|
54
|
+
# Optional: for add_knowledge_to_context feature
|
|
55
|
+
def retrieve(self, query: str, **kwargs) -> List[Document]:
|
|
56
|
+
results = self._internal_search(query)
|
|
57
|
+
return [Document(content=r) for r in results]
|
|
58
|
+
|
|
59
|
+
# MyKnowledge satisfies KnowledgeProtocol
|
|
60
|
+
agent = Agent(knowledge=MyKnowledge())
|
|
61
|
+
```
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def build_context(self, **kwargs) -> str:
|
|
65
|
+
"""Build context string for the agent's system prompt.
|
|
66
|
+
|
|
67
|
+
Returns instructions about how to use this knowledge,
|
|
68
|
+
what tools are available, and any usage guidelines.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
**kwargs: Context including enable_agentic_filters, etc.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Formatted context string to inject into system prompt.
|
|
75
|
+
"""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def get_tools(self, **kwargs) -> List[Callable]:
|
|
79
|
+
"""Get tools to expose to the agent.
|
|
80
|
+
|
|
81
|
+
Returns callable tools that the agent can use to interact
|
|
82
|
+
with this knowledge. Each implementation decides what
|
|
83
|
+
tools make sense (e.g., search, grep, list_files, query_db).
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
**kwargs: Context including run_response, run_context,
|
|
87
|
+
async_mode, enable_agentic_filters, agent, etc.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of callable tools.
|
|
91
|
+
"""
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
async def aget_tools(self, **kwargs) -> List[Callable]:
|
|
95
|
+
"""Async version of get_tools.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
**kwargs: Same as get_tools.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of callable tools.
|
|
102
|
+
"""
|
|
103
|
+
...
|
|
104
|
+
|
|
105
|
+
# Optional methods - used by add_knowledge_to_context feature
|
|
106
|
+
# Implementations that don't support context injection can omit these
|
|
107
|
+
|
|
108
|
+
def retrieve(self, query: str, **kwargs) -> List[Document]:
|
|
109
|
+
"""Retrieve documents for context injection.
|
|
110
|
+
|
|
111
|
+
Used by the add_knowledge_to_context feature to pre-fetch
|
|
112
|
+
relevant documents into the user message. This is optional;
|
|
113
|
+
if not implemented, add_knowledge_to_context will be skipped.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
query: The query string.
|
|
117
|
+
**kwargs: Additional parameters (max_results, filters, etc.)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of Document objects.
|
|
121
|
+
"""
|
|
122
|
+
...
|
|
123
|
+
|
|
124
|
+
async def aretrieve(self, query: str, **kwargs) -> List[Document]:
|
|
125
|
+
"""Async version of retrieve.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
query: The query string.
|
|
129
|
+
**kwargs: Additional parameters.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List of Document objects.
|
|
133
|
+
"""
|
|
134
|
+
...
|
|
@@ -17,7 +17,7 @@ class ArxivReader(Reader):
|
|
|
17
17
|
sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
|
|
18
18
|
|
|
19
19
|
@classmethod
|
|
20
|
-
def get_supported_chunking_strategies(
|
|
20
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
21
21
|
"""Get the list of supported chunking strategies for Arxiv readers."""
|
|
22
22
|
return [
|
|
23
23
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -29,7 +29,7 @@ class ArxivReader(Reader):
|
|
|
29
29
|
]
|
|
30
30
|
|
|
31
31
|
@classmethod
|
|
32
|
-
def get_supported_content_types(
|
|
32
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
33
33
|
return [ContentType.TOPIC]
|
|
34
34
|
|
|
35
35
|
def __init__(
|
agno/knowledge/reader/base.py
CHANGED
|
@@ -73,11 +73,17 @@ class Reader:
|
|
|
73
73
|
def chunk_document(self, document: Document) -> List[Document]:
|
|
74
74
|
if self.chunking_strategy is None:
|
|
75
75
|
self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
|
|
76
|
-
return self.chunking_strategy.chunk(document)
|
|
76
|
+
return self.chunking_strategy.chunk(document)
|
|
77
|
+
|
|
78
|
+
async def achunk_document(self, document: Document) -> List[Document]:
|
|
79
|
+
"""Async version of chunk_document."""
|
|
80
|
+
if self.chunking_strategy is None:
|
|
81
|
+
self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
|
|
82
|
+
return await self.chunking_strategy.achunk(document)
|
|
77
83
|
|
|
78
84
|
async def chunk_documents_async(self, documents: List[Document]) -> List[Document]:
|
|
79
85
|
"""
|
|
80
|
-
Asynchronously chunk a list of documents
|
|
86
|
+
Asynchronously chunk a list of documents.
|
|
81
87
|
|
|
82
88
|
Args:
|
|
83
89
|
documents: List of documents to be chunked.
|
|
@@ -85,11 +91,7 @@ class Reader:
|
|
|
85
91
|
Returns:
|
|
86
92
|
A flattened list of chunked documents.
|
|
87
93
|
"""
|
|
88
|
-
|
|
89
|
-
async def _chunk_document_async(doc: Document) -> List[Document]:
|
|
90
|
-
return await asyncio.to_thread(self.chunk_document, doc)
|
|
91
|
-
|
|
92
94
|
# Process chunking in parallel for all documents
|
|
93
|
-
chunked_lists = await asyncio.gather(*[
|
|
95
|
+
chunked_lists = await asyncio.gather(*[self.achunk_document(doc) for doc in documents])
|
|
94
96
|
# Flatten the result
|
|
95
97
|
return [chunk for sublist in chunked_lists for chunk in sublist]
|
|
@@ -25,7 +25,7 @@ class CSVReader(Reader):
|
|
|
25
25
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
|
-
def get_supported_chunking_strategies(
|
|
28
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
29
29
|
"""Get the list of supported chunking strategies for CSV readers."""
|
|
30
30
|
return [
|
|
31
31
|
ChunkingStrategyType.ROW_CHUNKER,
|
|
@@ -37,7 +37,7 @@ class CSVReader(Reader):
|
|
|
37
37
|
]
|
|
38
38
|
|
|
39
39
|
@classmethod
|
|
40
|
-
def get_supported_content_types(
|
|
40
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
41
41
|
return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
|
|
42
42
|
|
|
43
43
|
def read(
|
|
@@ -56,7 +56,7 @@ class CSVReader(Reader):
|
|
|
56
56
|
log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
|
|
57
57
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
58
58
|
file.seek(0)
|
|
59
|
-
file_content = io.StringIO(file.read().decode("utf-8"))
|
|
59
|
+
file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
60
60
|
|
|
61
61
|
csv_content = ""
|
|
62
62
|
with file_content as csvfile:
|
|
@@ -106,14 +106,14 @@ class CSVReader(Reader):
|
|
|
106
106
|
if not file.exists():
|
|
107
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
108
|
log_debug(f"Reading async: {file}")
|
|
109
|
-
async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
|
|
109
|
+
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
110
110
|
content = await file_content.read()
|
|
111
111
|
file_content_io = io.StringIO(content)
|
|
112
112
|
csv_name = name or file.stem
|
|
113
113
|
else:
|
|
114
114
|
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
115
115
|
file.seek(0)
|
|
116
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
116
|
+
file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
117
117
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
118
118
|
|
|
119
119
|
file_content_io.seek(0)
|
|
@@ -23,7 +23,7 @@ class DocxReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for DOCX readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class DocxReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.DOCX, ContentType.DOC]
|
|
40
40
|
|
|
41
41
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -114,7 +114,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
114
114
|
log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
|
|
115
115
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
116
116
|
file.seek(0)
|
|
117
|
-
file_content = io.StringIO(file.read().decode("utf-8"))
|
|
117
|
+
file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
118
118
|
|
|
119
119
|
documents = []
|
|
120
120
|
|
|
@@ -192,7 +192,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
192
192
|
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
193
193
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
194
194
|
file.seek(0)
|
|
195
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
195
|
+
file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
196
196
|
|
|
197
197
|
file_content_io.seek(0)
|
|
198
198
|
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
@@ -43,7 +43,7 @@ class FirecrawlReader(Reader):
|
|
|
43
43
|
self.mode = mode
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
|
-
def get_supported_chunking_strategies(
|
|
46
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
47
47
|
"""Get the list of supported chunking strategies for Firecrawl readers."""
|
|
48
48
|
return [
|
|
49
49
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -55,7 +55,7 @@ class FirecrawlReader(Reader):
|
|
|
55
55
|
]
|
|
56
56
|
|
|
57
57
|
@classmethod
|
|
58
|
-
def get_supported_content_types(
|
|
58
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
59
59
|
return [ContentType.URL]
|
|
60
60
|
|
|
61
61
|
def scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -21,7 +21,7 @@ class JSONReader(Reader):
|
|
|
21
21
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
|
-
def get_supported_chunking_strategies(
|
|
24
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
25
25
|
"""Get the list of supported chunking strategies for JSON readers."""
|
|
26
26
|
return [
|
|
27
27
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -33,7 +33,7 @@ class JSONReader(Reader):
|
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
@classmethod
|
|
36
|
-
def get_supported_content_types(
|
|
36
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
37
37
|
return [ContentType.JSON]
|
|
38
38
|
|
|
39
39
|
def read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -28,7 +28,7 @@ class MarkdownReader(Reader):
|
|
|
28
28
|
"""Reader for Markdown files"""
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
|
-
def get_supported_chunking_strategies(
|
|
31
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
32
32
|
"""Get the list of supported chunking strategies for Markdown readers."""
|
|
33
33
|
strategies = [
|
|
34
34
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -46,7 +46,7 @@ class MarkdownReader(Reader):
|
|
|
46
46
|
return strategies
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
|
-
def get_supported_content_types(
|
|
49
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
50
50
|
return [ContentType.MARKDOWN]
|
|
51
51
|
|
|
52
52
|
def __init__(
|
|
@@ -200,7 +200,7 @@ class BasePDFReader(Reader):
|
|
|
200
200
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
201
201
|
|
|
202
202
|
@classmethod
|
|
203
|
-
def get_supported_chunking_strategies(
|
|
203
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
204
204
|
"""Get the list of supported chunking strategies for PDF readers."""
|
|
205
205
|
return [
|
|
206
206
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -232,8 +232,9 @@ class BasePDFReader(Reader):
|
|
|
232
232
|
return True
|
|
233
233
|
|
|
234
234
|
# Use provided password or fall back to instance password
|
|
235
|
-
|
|
236
|
-
if
|
|
235
|
+
# Note: Empty string "" is a valid password for PDFs with blank user password
|
|
236
|
+
pdf_password = self.password if password is None else password
|
|
237
|
+
if pdf_password is None:
|
|
237
238
|
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
238
239
|
return False
|
|
239
240
|
|
|
@@ -335,7 +336,7 @@ class PDFReader(BasePDFReader):
|
|
|
335
336
|
"""Reader for PDF files"""
|
|
336
337
|
|
|
337
338
|
@classmethod
|
|
338
|
-
def get_supported_content_types(
|
|
339
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
339
340
|
return [ContentType.PDF]
|
|
340
341
|
|
|
341
342
|
def read(
|
|
@@ -23,7 +23,7 @@ class PPTXReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for PPTX readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class PPTXReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.PPTX]
|
|
40
40
|
|
|
41
41
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -10,6 +10,70 @@ class ReaderFactory:
|
|
|
10
10
|
# Cache for instantiated readers
|
|
11
11
|
_reader_cache: Dict[str, Reader] = {}
|
|
12
12
|
|
|
13
|
+
# Static metadata for readers - avoids instantiation just to get metadata
|
|
14
|
+
READER_METADATA: Dict[str, Dict[str, str]] = {
|
|
15
|
+
"pdf": {
|
|
16
|
+
"name": "PdfReader",
|
|
17
|
+
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
18
|
+
},
|
|
19
|
+
"csv": {
|
|
20
|
+
"name": "CsvReader",
|
|
21
|
+
"description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
|
|
22
|
+
},
|
|
23
|
+
"field_labeled_csv": {
|
|
24
|
+
"name": "FieldLabeledCsvReader",
|
|
25
|
+
"description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
|
|
26
|
+
},
|
|
27
|
+
"docx": {
|
|
28
|
+
"name": "DocxReader",
|
|
29
|
+
"description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
|
|
30
|
+
},
|
|
31
|
+
"pptx": {
|
|
32
|
+
"name": "PptxReader",
|
|
33
|
+
"description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
|
|
34
|
+
},
|
|
35
|
+
"json": {
|
|
36
|
+
"name": "JsonReader",
|
|
37
|
+
"description": "Processes JSON data structures and API responses with nested object handling",
|
|
38
|
+
},
|
|
39
|
+
"markdown": {
|
|
40
|
+
"name": "MarkdownReader",
|
|
41
|
+
"description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
|
|
42
|
+
},
|
|
43
|
+
"text": {
|
|
44
|
+
"name": "TextReader",
|
|
45
|
+
"description": "Handles plain text files with customizable chunking strategies and encoding detection",
|
|
46
|
+
},
|
|
47
|
+
"website": {
|
|
48
|
+
"name": "WebsiteReader",
|
|
49
|
+
"description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
|
|
50
|
+
},
|
|
51
|
+
"firecrawl": {
|
|
52
|
+
"name": "FirecrawlReader",
|
|
53
|
+
"description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
|
|
54
|
+
},
|
|
55
|
+
"tavily": {
|
|
56
|
+
"name": "TavilyReader",
|
|
57
|
+
"description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
|
|
58
|
+
},
|
|
59
|
+
"youtube": {
|
|
60
|
+
"name": "YouTubeReader",
|
|
61
|
+
"description": "Extracts transcripts and metadata from YouTube videos and playlists",
|
|
62
|
+
},
|
|
63
|
+
"arxiv": {
|
|
64
|
+
"name": "ArxivReader",
|
|
65
|
+
"description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
|
|
66
|
+
},
|
|
67
|
+
"wikipedia": {
|
|
68
|
+
"name": "WikipediaReader",
|
|
69
|
+
"description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
|
|
70
|
+
},
|
|
71
|
+
"web_search": {
|
|
72
|
+
"name": "WebSearchReader",
|
|
73
|
+
"description": "Executes web searches and processes results with relevance ranking and content extraction",
|
|
74
|
+
},
|
|
75
|
+
}
|
|
76
|
+
|
|
13
77
|
@classmethod
|
|
14
78
|
def _get_pdf_reader(cls, **kwargs) -> Reader:
|
|
15
79
|
"""Get PDF reader instance."""
|
|
@@ -203,6 +267,52 @@ class ReaderFactory:
|
|
|
203
267
|
raise ValueError(f"Unknown reader: {reader_key}")
|
|
204
268
|
return getattr(cls, method_name)
|
|
205
269
|
|
|
270
|
+
@classmethod
|
|
271
|
+
def get_reader_class(cls, reader_key: str) -> type:
|
|
272
|
+
"""Get the reader CLASS without instantiation.
|
|
273
|
+
|
|
274
|
+
This is useful for accessing class methods like get_supported_chunking_strategies()
|
|
275
|
+
without the overhead of creating an instance.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
reader_key: The reader key (e.g., 'pdf', 'csv', 'markdown')
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
The reader class (not an instance)
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
ValueError: If the reader key is unknown
|
|
285
|
+
ImportError: If the reader's dependencies are not installed
|
|
286
|
+
"""
|
|
287
|
+
# Map reader keys to their import paths
|
|
288
|
+
reader_class_map: Dict[str, tuple] = {
|
|
289
|
+
"pdf": ("agno.knowledge.reader.pdf_reader", "PDFReader"),
|
|
290
|
+
"csv": ("agno.knowledge.reader.csv_reader", "CSVReader"),
|
|
291
|
+
"field_labeled_csv": ("agno.knowledge.reader.field_labeled_csv_reader", "FieldLabeledCSVReader"),
|
|
292
|
+
"docx": ("agno.knowledge.reader.docx_reader", "DocxReader"),
|
|
293
|
+
"pptx": ("agno.knowledge.reader.pptx_reader", "PPTXReader"),
|
|
294
|
+
"json": ("agno.knowledge.reader.json_reader", "JSONReader"),
|
|
295
|
+
"markdown": ("agno.knowledge.reader.markdown_reader", "MarkdownReader"),
|
|
296
|
+
"text": ("agno.knowledge.reader.text_reader", "TextReader"),
|
|
297
|
+
"website": ("agno.knowledge.reader.website_reader", "WebsiteReader"),
|
|
298
|
+
"firecrawl": ("agno.knowledge.reader.firecrawl_reader", "FirecrawlReader"),
|
|
299
|
+
"tavily": ("agno.knowledge.reader.tavily_reader", "TavilyReader"),
|
|
300
|
+
"youtube": ("agno.knowledge.reader.youtube_reader", "YouTubeReader"),
|
|
301
|
+
"arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
|
|
302
|
+
"wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
|
|
303
|
+
"web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if reader_key not in reader_class_map:
|
|
307
|
+
raise ValueError(f"Unknown reader: {reader_key}")
|
|
308
|
+
|
|
309
|
+
module_path, class_name = reader_class_map[reader_key]
|
|
310
|
+
|
|
311
|
+
import importlib
|
|
312
|
+
|
|
313
|
+
module = importlib.import_module(module_path)
|
|
314
|
+
return getattr(module, class_name)
|
|
315
|
+
|
|
206
316
|
@classmethod
|
|
207
317
|
def create_reader(cls, reader_key: str, **kwargs) -> Reader:
|
|
208
318
|
"""Create a reader instance with the given key and optional overrides."""
|
|
@@ -35,7 +35,7 @@ class S3Reader(Reader):
|
|
|
35
35
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_chunking_strategies(
|
|
38
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
39
39
|
"""Get the list of supported chunking strategies for S3 readers."""
|
|
40
40
|
return [
|
|
41
41
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -47,7 +47,7 @@ class S3Reader(Reader):
|
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
@classmethod
|
|
50
|
-
def get_supported_content_types(
|
|
50
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
51
51
|
return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
|
|
52
52
|
|
|
53
53
|
def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
@@ -62,7 +62,7 @@ class TavilyReader(Reader):
|
|
|
62
62
|
self.extract_depth = extract_depth
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
|
-
def get_supported_chunking_strategies(
|
|
65
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
66
66
|
"""Get the list of supported chunking strategies for Tavily readers."""
|
|
67
67
|
return [
|
|
68
68
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -74,7 +74,7 @@ class TavilyReader(Reader):
|
|
|
74
74
|
]
|
|
75
75
|
|
|
76
76
|
@classmethod
|
|
77
|
-
def get_supported_content_types(
|
|
77
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
78
78
|
return [ContentType.URL]
|
|
79
79
|
|
|
80
80
|
def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -18,7 +18,7 @@ class TextReader(Reader):
|
|
|
18
18
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
19
19
|
|
|
20
20
|
@classmethod
|
|
21
|
-
def get_supported_chunking_strategies(
|
|
21
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
22
22
|
"""Get the list of supported chunking strategies for Text readers."""
|
|
23
23
|
return [
|
|
24
24
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -30,7 +30,7 @@ class TextReader(Reader):
|
|
|
30
30
|
]
|
|
31
31
|
|
|
32
32
|
@classmethod
|
|
33
|
-
def get_supported_content_types(
|
|
33
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
34
34
|
return [ContentType.TXT]
|
|
35
35
|
|
|
36
36
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -53,7 +53,7 @@ class WebSearchReader(Reader):
|
|
|
53
53
|
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
54
54
|
|
|
55
55
|
@classmethod
|
|
56
|
-
def get_supported_chunking_strategies(
|
|
56
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
57
57
|
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
58
58
|
return [
|
|
59
59
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -65,7 +65,7 @@ class WebSearchReader(Reader):
|
|
|
65
65
|
]
|
|
66
66
|
|
|
67
67
|
@classmethod
|
|
68
|
-
def get_supported_content_types(
|
|
68
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
69
69
|
return [ContentType.TOPIC]
|
|
70
70
|
|
|
71
71
|
def _respect_rate_limits(self):
|
|
@@ -49,7 +49,7 @@ class WebsiteReader(Reader):
|
|
|
49
49
|
self._urls_to_crawl = []
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
|
-
def get_supported_chunking_strategies(
|
|
52
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
53
53
|
"""Get the list of supported chunking strategies for Website readers."""
|
|
54
54
|
return [
|
|
55
55
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -61,7 +61,7 @@ class WebsiteReader(Reader):
|
|
|
61
61
|
]
|
|
62
62
|
|
|
63
63
|
@classmethod
|
|
64
|
-
def get_supported_content_types(
|
|
64
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
65
65
|
return [ContentType.URL]
|
|
66
66
|
|
|
67
67
|
def delay(self, min_seconds=1, max_seconds=3):
|
|
@@ -428,7 +428,8 @@ class WebsiteReader(Reader):
|
|
|
428
428
|
meta_data={"url": str(crawled_url)},
|
|
429
429
|
content=crawled_content,
|
|
430
430
|
)
|
|
431
|
-
|
|
431
|
+
chunks = self.chunk_document(doc)
|
|
432
|
+
return chunks
|
|
432
433
|
else:
|
|
433
434
|
return [
|
|
434
435
|
Document(
|
|
@@ -444,6 +445,7 @@ class WebsiteReader(Reader):
|
|
|
444
445
|
process_document(crawled_url, crawled_content)
|
|
445
446
|
for crawled_url, crawled_content in crawler_result.items()
|
|
446
447
|
]
|
|
448
|
+
|
|
447
449
|
results = await asyncio.gather(*tasks)
|
|
448
450
|
|
|
449
451
|
# Flatten the results
|
|
@@ -24,7 +24,7 @@ class WikipediaReader(Reader):
|
|
|
24
24
|
self.auto_suggest = auto_suggest
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
|
-
def get_supported_chunking_strategies(
|
|
27
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
28
28
|
"""Get the list of supported chunking strategies for Wikipedia readers."""
|
|
29
29
|
return [
|
|
30
30
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -36,7 +36,7 @@ class WikipediaReader(Reader):
|
|
|
36
36
|
]
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def get_supported_content_types(
|
|
39
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
40
40
|
return [ContentType.TOPIC]
|
|
41
41
|
|
|
42
42
|
def read(self, topic: str) -> List[Document]:
|
|
@@ -23,7 +23,7 @@ class YouTubeReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for YouTube readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class YouTubeReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.YOUTUBE]
|
|
40
40
|
|
|
41
41
|
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|