agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6015 -2823
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +385 -6
- agno/db/dynamo/dynamo.py +388 -81
- agno/db/dynamo/schemas.py +47 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +435 -64
- agno/db/firestore/schemas.py +11 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +384 -42
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +351 -66
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +339 -48
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +510 -37
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2036 -0
- agno/db/mongo/mongo.py +653 -76
- agno/db/mongo/schemas.py +13 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/mysql.py +687 -25
- agno/db/mysql/schemas.py +61 -37
- agno/db/mysql/utils.py +60 -2
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2001 -0
- agno/db/postgres/postgres.py +676 -57
- agno/db/postgres/schemas.py +43 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +344 -38
- agno/db/redis/schemas.py +18 -0
- agno/db/redis/utils.py +60 -2
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +26 -1
- agno/db/singlestore/singlestore.py +687 -53
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2371 -0
- agno/db/sqlite/schemas.py +24 -0
- agno/db/sqlite/sqlite.py +774 -85
- agno/db/sqlite/utils.py +168 -5
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1361 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +50 -22
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +68 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +1 -1
- agno/knowledge/chunking/semantic.py +40 -8
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +13 -0
- agno/knowledge/embedder/openai.py +37 -65
- agno/knowledge/embedder/sentence_transformer.py +8 -4
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +594 -186
- agno/knowledge/reader/base.py +9 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/json_reader.py +6 -5
- agno/knowledge/reader/markdown_reader.py +13 -13
- agno/knowledge/reader/pdf_reader.py +43 -68
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +51 -6
- agno/knowledge/reader/s3_reader.py +3 -15
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +13 -13
- agno/knowledge/reader/web_search_reader.py +2 -43
- agno/knowledge/reader/website_reader.py +43 -25
- agno/knowledge/reranker/__init__.py +2 -8
- agno/knowledge/types.py +9 -0
- agno/knowledge/utils.py +20 -0
- agno/media.py +72 -0
- agno/memory/manager.py +336 -82
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/anthropic/claude.py +183 -37
- agno/models/aws/bedrock.py +52 -112
- agno/models/aws/claude.py +33 -1
- agno/models/azure/ai_foundry.py +33 -15
- agno/models/azure/openai_chat.py +25 -8
- agno/models/base.py +999 -519
- agno/models/cerebras/cerebras.py +19 -13
- agno/models/cerebras/cerebras_openai.py +8 -5
- agno/models/cohere/chat.py +27 -1
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/google/gemini.py +103 -31
- agno/models/groq/groq.py +28 -11
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/chat.py +18 -1
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/message.py +139 -0
- agno/models/meta/llama.py +27 -10
- agno/models/meta/llama_openai.py +5 -17
- agno/models/nebius/nebius.py +6 -6
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/ollama/chat.py +59 -5
- agno/models/openai/chat.py +69 -29
- agno/models/openai/responses.py +103 -106
- agno/models/openrouter/openrouter.py +41 -3
- agno/models/perplexity/perplexity.py +4 -5
- agno/models/portkey/portkey.py +3 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +77 -1
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/together.py +2 -2
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +2 -2
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +96 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +3 -2
- agno/os/app.py +543 -178
- agno/os/auth.py +24 -14
- agno/os/config.py +1 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/agui.py +23 -7
- agno/os/interfaces/agui/router.py +27 -3
- agno/os/interfaces/agui/utils.py +242 -142
- agno/os/interfaces/base.py +6 -2
- agno/os/interfaces/slack/router.py +81 -23
- agno/os/interfaces/slack/slack.py +29 -14
- agno/os/interfaces/whatsapp/router.py +11 -4
- agno/os/interfaces/whatsapp/whatsapp.py +14 -7
- agno/os/mcp.py +111 -54
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +556 -139
- agno/os/routers/evals/evals.py +71 -34
- agno/os/routers/evals/schemas.py +31 -31
- agno/os/routers/evals/utils.py +6 -5
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +185 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +158 -53
- agno/os/routers/memory/schemas.py +20 -16
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +499 -38
- agno/os/schema.py +308 -198
- agno/os/utils.py +401 -41
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +7 -2
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +248 -94
- agno/run/base.py +44 -5
- agno/run/team.py +238 -97
- agno/run/workflow.py +144 -33
- agno/session/agent.py +105 -89
- agno/session/summary.py +65 -25
- agno/session/team.py +176 -96
- agno/session/workflow.py +406 -40
- agno/team/team.py +3854 -1610
- agno/tools/dalle.py +2 -4
- agno/tools/decorator.py +4 -2
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +14 -7
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +350 -0
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +250 -30
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +270 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/knowledge.py +3 -3
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +11 -17
- agno/tools/memori.py +1 -53
- agno/tools/memory.py +419 -0
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/scrapegraph.py +58 -31
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/slack.py +18 -3
- agno/tools/spider.py +2 -2
- agno/tools/tavily.py +146 -0
- agno/tools/whatsapp.py +1 -1
- agno/tools/workflow.py +278 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +27 -0
- agno/utils/common.py +90 -1
- agno/utils/events.py +217 -2
- agno/utils/gemini.py +180 -22
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +111 -0
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +188 -10
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +60 -0
- agno/utils/models/claude.py +40 -11
- agno/utils/print_response/agent.py +105 -21
- agno/utils/print_response/team.py +103 -38
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/reasoning.py +22 -1
- agno/utils/serialize.py +32 -0
- agno/utils/streamlit.py +16 -10
- agno/utils/string.py +41 -0
- agno/utils/team.py +98 -9
- agno/utils/tools.py +1 -1
- agno/vectordb/base.py +23 -4
- agno/vectordb/cassandra/cassandra.py +65 -9
- agno/vectordb/chroma/chromadb.py +182 -38
- agno/vectordb/clickhouse/clickhousedb.py +64 -11
- agno/vectordb/couchbase/couchbase.py +105 -10
- agno/vectordb/lancedb/lance_db.py +124 -133
- agno/vectordb/langchaindb/langchaindb.py +25 -7
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +46 -7
- agno/vectordb/milvus/milvus.py +126 -9
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +112 -7
- agno/vectordb/pgvector/pgvector.py +142 -21
- agno/vectordb/pineconedb/pineconedb.py +80 -8
- agno/vectordb/qdrant/qdrant.py +125 -39
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/singlestore/singlestore.py +111 -25
- agno/vectordb/surrealdb/surrealdb.py +31 -5
- agno/vectordb/upstashdb/upstashdb.py +76 -8
- agno/vectordb/weaviate/weaviate.py +86 -15
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +112 -18
- agno/workflow/loop.py +69 -10
- agno/workflow/parallel.py +266 -118
- agno/workflow/router.py +110 -17
- agno/workflow/step.py +638 -129
- agno/workflow/steps.py +65 -6
- agno/workflow/types.py +61 -23
- agno/workflow/workflow.py +2085 -272
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
- agno-2.3.0.dist-info/RECORD +577 -0
- agno/knowledge/reader/url_reader.py +0 -128
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -610
- agno/utils/models/aws_claude.py +0 -170
- agno-2.0.1.dist-info/RECORD +0 -515
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class TextReader(Reader):
|
|
@@ -37,14 +37,14 @@ class TextReader(Reader):
|
|
|
37
37
|
if isinstance(file, Path):
|
|
38
38
|
if not file.exists():
|
|
39
39
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
40
|
-
|
|
40
|
+
log_debug(f"Reading: {file}")
|
|
41
41
|
file_name = name or file.stem
|
|
42
|
-
file_contents = file.read_text("utf-8")
|
|
42
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
43
43
|
else:
|
|
44
44
|
file_name = name or file.name.split(".")[0]
|
|
45
|
-
|
|
45
|
+
log_debug(f"Reading uploaded file: {file_name}")
|
|
46
46
|
file.seek(0)
|
|
47
|
-
file_contents = file.read().decode("utf-8")
|
|
47
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
48
48
|
|
|
49
49
|
documents = [
|
|
50
50
|
Document(
|
|
@@ -60,7 +60,7 @@ class TextReader(Reader):
|
|
|
60
60
|
return chunked_documents
|
|
61
61
|
return documents
|
|
62
62
|
except Exception as e:
|
|
63
|
-
|
|
63
|
+
log_error(f"Error reading: {file}: {e}")
|
|
64
64
|
return []
|
|
65
65
|
|
|
66
66
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -69,22 +69,22 @@ class TextReader(Reader):
|
|
|
69
69
|
if not file.exists():
|
|
70
70
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
73
73
|
file_name = name or file.stem
|
|
74
74
|
|
|
75
75
|
try:
|
|
76
76
|
import aiofiles
|
|
77
77
|
|
|
78
|
-
async with aiofiles.open(file, "r", encoding="utf-8") as f:
|
|
78
|
+
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
79
79
|
file_contents = await f.read()
|
|
80
80
|
except ImportError:
|
|
81
|
-
|
|
82
|
-
file_contents = file.read_text("utf-8")
|
|
81
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
82
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
83
83
|
else:
|
|
84
|
-
|
|
84
|
+
log_debug(f"Reading uploaded file asynchronously: {file.name}")
|
|
85
85
|
file_name = name or file.name.split(".")[0]
|
|
86
86
|
file.seek(0)
|
|
87
|
-
file_contents = file.read().decode("utf-8")
|
|
87
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
88
88
|
|
|
89
89
|
document = Document(
|
|
90
90
|
name=file_name,
|
|
@@ -96,7 +96,7 @@ class TextReader(Reader):
|
|
|
96
96
|
return await self._async_chunk_document(document)
|
|
97
97
|
return [document]
|
|
98
98
|
except Exception as e:
|
|
99
|
-
|
|
99
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
100
100
|
return []
|
|
101
101
|
|
|
102
102
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
@@ -37,7 +37,7 @@ class WebSearchReader(Reader):
|
|
|
37
37
|
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
38
38
|
|
|
39
39
|
# Search engine configuration
|
|
40
|
-
search_engine: Literal["duckduckgo"
|
|
40
|
+
search_engine: Literal["duckduckgo"] = "duckduckgo"
|
|
41
41
|
search_delay: float = 3.0 # Delay between search requests
|
|
42
42
|
max_search_retries: int = 2 # Retries for search operations
|
|
43
43
|
|
|
@@ -96,7 +96,7 @@ class WebSearchReader(Reader):
|
|
|
96
96
|
results.append(
|
|
97
97
|
{
|
|
98
98
|
"title": result.get("title", ""),
|
|
99
|
-
"url": result.get("
|
|
99
|
+
"url": result.get("href", ""),
|
|
100
100
|
"description": result.get("body", ""),
|
|
101
101
|
}
|
|
102
102
|
)
|
|
@@ -121,51 +121,10 @@ class WebSearchReader(Reader):
|
|
|
121
121
|
return []
|
|
122
122
|
return []
|
|
123
123
|
|
|
124
|
-
def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
|
|
125
|
-
"""Perform web search using Google (requires googlesearch-python)"""
|
|
126
|
-
log_debug(f"Performing Google search for: {query}")
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
from googlesearch import search
|
|
130
|
-
except ImportError:
|
|
131
|
-
logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
|
|
132
|
-
return []
|
|
133
|
-
|
|
134
|
-
for attempt in range(self.max_search_retries):
|
|
135
|
-
try:
|
|
136
|
-
self._respect_rate_limits()
|
|
137
|
-
|
|
138
|
-
results = []
|
|
139
|
-
search_results = search(query, num_results=self.max_results, stop=self.max_results)
|
|
140
|
-
|
|
141
|
-
for result in search_results:
|
|
142
|
-
results.append(
|
|
143
|
-
{
|
|
144
|
-
"title": getattr(result, "title", ""),
|
|
145
|
-
"url": getattr(result, "url", ""),
|
|
146
|
-
"description": getattr(result, "description", ""),
|
|
147
|
-
}
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
log_debug(f"Found {len(results)} Google search results")
|
|
151
|
-
return results
|
|
152
|
-
|
|
153
|
-
except Exception as e:
|
|
154
|
-
logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
|
|
155
|
-
if attempt < self.max_search_retries - 1:
|
|
156
|
-
time.sleep(self.search_delay)
|
|
157
|
-
else:
|
|
158
|
-
logger.error(f"All Google search attempts failed: {e}")
|
|
159
|
-
return []
|
|
160
|
-
|
|
161
|
-
return []
|
|
162
|
-
|
|
163
124
|
def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
|
|
164
125
|
"""Perform web search using the configured search engine"""
|
|
165
126
|
if self.search_engine == "duckduckgo":
|
|
166
127
|
return self._perform_duckduckgo_search(query)
|
|
167
|
-
elif self.search_engine == "google":
|
|
168
|
-
return self._perform_google_search(query)
|
|
169
128
|
else:
|
|
170
129
|
logger.error(f"Unsupported search engine: {self.search_engine}")
|
|
171
130
|
return []
|
|
@@ -12,7 +12,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
12
12
|
from agno.knowledge.document.base import Document
|
|
13
13
|
from agno.knowledge.reader.base import Reader
|
|
14
14
|
from agno.knowledge.types import ContentType
|
|
15
|
-
from agno.utils.log import log_debug,
|
|
15
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
16
16
|
|
|
17
17
|
try:
|
|
18
18
|
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
@@ -106,23 +106,36 @@ class WebsiteReader(Reader):
|
|
|
106
106
|
"""
|
|
107
107
|
Check if the tag matches any of the relevant tags or class names
|
|
108
108
|
"""
|
|
109
|
-
if tag
|
|
109
|
+
if not isinstance(tag, Tag):
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
if tag.name in ["article", "main", "section"]:
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
classes_attr = tag.get("class")
|
|
116
|
+
classes: List[str] = classes_attr if isinstance(classes_attr, list) else []
|
|
117
|
+
content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
|
|
118
|
+
if any(cls in content_classes for cls in classes):
|
|
110
119
|
return True
|
|
111
|
-
|
|
120
|
+
|
|
121
|
+
# Check for common content IDs
|
|
122
|
+
tag_id = tag.get("id", "")
|
|
123
|
+
if tag_id in ["content", "main", "article"]:
|
|
112
124
|
return True
|
|
125
|
+
|
|
113
126
|
return False
|
|
114
127
|
|
|
115
|
-
#
|
|
128
|
+
# Try to find main content element
|
|
116
129
|
element = soup.find(match)
|
|
117
|
-
if element:
|
|
130
|
+
if element and hasattr(element, "find_all"):
|
|
131
|
+
# Remove common unwanted elements from the found content
|
|
132
|
+
for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
|
|
133
|
+
unwanted.decompose()
|
|
118
134
|
return element.get_text(strip=True, separator=" ")
|
|
119
135
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
):
|
|
124
|
-
return ""
|
|
125
|
-
|
|
136
|
+
# Fallback: get full page content
|
|
137
|
+
for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
|
|
138
|
+
unwanted.decompose()
|
|
126
139
|
return soup.get_text(strip=True, separator=" ")
|
|
127
140
|
|
|
128
141
|
def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
|
|
@@ -164,7 +177,7 @@ class WebsiteReader(Reader):
|
|
|
164
177
|
if (
|
|
165
178
|
current_url in self._visited
|
|
166
179
|
or not urlparse(current_url).netloc.endswith(primary_domain)
|
|
167
|
-
or current_depth > self.max_depth
|
|
180
|
+
or (current_depth > self.max_depth and current_url != url)
|
|
168
181
|
or num_links >= self.max_links
|
|
169
182
|
):
|
|
170
183
|
continue
|
|
@@ -174,13 +187,14 @@ class WebsiteReader(Reader):
|
|
|
174
187
|
|
|
175
188
|
try:
|
|
176
189
|
log_debug(f"Crawling: {current_url}")
|
|
190
|
+
|
|
177
191
|
response = (
|
|
178
|
-
httpx.get(current_url, timeout=self.timeout, proxy=self.proxy)
|
|
192
|
+
httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
|
|
179
193
|
if self.proxy
|
|
180
|
-
else httpx.get(current_url, timeout=self.timeout)
|
|
194
|
+
else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
|
|
181
195
|
)
|
|
182
|
-
|
|
183
196
|
response.raise_for_status()
|
|
197
|
+
|
|
184
198
|
soup = BeautifulSoup(response.content, "html.parser")
|
|
185
199
|
|
|
186
200
|
# Extract main content
|
|
@@ -213,19 +227,23 @@ class WebsiteReader(Reader):
|
|
|
213
227
|
|
|
214
228
|
except httpx.HTTPStatusError as e:
|
|
215
229
|
# Log HTTP status errors but continue crawling other pages
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
230
|
+
# Skip redirect errors (3xx) as they should be handled by follow_redirects
|
|
231
|
+
if e.response.status_code >= 300 and e.response.status_code < 400:
|
|
232
|
+
log_debug(f"Redirect encountered for {current_url}, skipping: {e}")
|
|
233
|
+
else:
|
|
234
|
+
log_warning(f"HTTP status error while crawling {current_url}: {e}")
|
|
235
|
+
# For the initial URL, we should raise the error only if it's not a redirect
|
|
236
|
+
if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
|
|
219
237
|
raise
|
|
220
238
|
except httpx.RequestError as e:
|
|
221
239
|
# Log request errors but continue crawling other pages
|
|
222
|
-
|
|
240
|
+
log_warning(f"Request error while crawling {current_url}: {e}")
|
|
223
241
|
# For the initial URL, we should raise the error
|
|
224
242
|
if current_url == url and not crawler_result:
|
|
225
243
|
raise
|
|
226
244
|
except Exception as e:
|
|
227
245
|
# Log other exceptions but continue crawling other pages
|
|
228
|
-
|
|
246
|
+
log_warning(f"Failed to crawl {current_url}: {e}")
|
|
229
247
|
# For the initial URL, we should raise the error
|
|
230
248
|
if current_url == url and not crawler_result:
|
|
231
249
|
# Wrap non-HTTP exceptions in a RequestError
|
|
@@ -314,19 +332,19 @@ class WebsiteReader(Reader):
|
|
|
314
332
|
|
|
315
333
|
except httpx.HTTPStatusError as e:
|
|
316
334
|
# Log HTTP status errors but continue crawling other pages
|
|
317
|
-
|
|
335
|
+
log_warning(f"HTTP status error while crawling asynchronously {current_url}: {e}")
|
|
318
336
|
# For the initial URL, we should raise the error
|
|
319
337
|
if current_url == url and not crawler_result:
|
|
320
338
|
raise
|
|
321
339
|
except httpx.RequestError as e:
|
|
322
340
|
# Log request errors but continue crawling other pages
|
|
323
|
-
|
|
341
|
+
log_warning(f"Request error while crawling asynchronously {current_url}: {e}")
|
|
324
342
|
# For the initial URL, we should raise the error
|
|
325
343
|
if current_url == url and not crawler_result:
|
|
326
344
|
raise
|
|
327
345
|
except Exception as e:
|
|
328
346
|
# Log other exceptions but continue crawling other pages
|
|
329
|
-
|
|
347
|
+
log_warning(f"Failed to crawl asynchronously {current_url}: {e}")
|
|
330
348
|
# For the initial URL, we should raise the error
|
|
331
349
|
if current_url == url and not crawler_result:
|
|
332
350
|
# Wrap non-HTTP exceptions in a RequestError
|
|
@@ -380,7 +398,7 @@ class WebsiteReader(Reader):
|
|
|
380
398
|
)
|
|
381
399
|
return documents
|
|
382
400
|
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
383
|
-
|
|
401
|
+
log_error(f"Error reading website {url}: {e}")
|
|
384
402
|
raise
|
|
385
403
|
|
|
386
404
|
async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -433,5 +451,5 @@ class WebsiteReader(Reader):
|
|
|
433
451
|
|
|
434
452
|
return documents
|
|
435
453
|
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
436
|
-
|
|
454
|
+
log_error(f"Error reading website asynchronously {url}: {e}")
|
|
437
455
|
raise
|
|
@@ -1,9 +1,3 @@
|
|
|
1
|
-
from agno.knowledge.reranker.
|
|
2
|
-
from agno.knowledge.reranker.infinity import InfinityReranker
|
|
3
|
-
from agno.knowledge.reranker.sentence_transformer import SentenceTransformerReranker
|
|
1
|
+
from agno.knowledge.reranker.base import Reranker
|
|
4
2
|
|
|
5
|
-
__all__ = [
|
|
6
|
-
"CohereReranker",
|
|
7
|
-
"InfinityReranker",
|
|
8
|
-
"SentenceTransformerReranker",
|
|
9
|
-
]
|
|
3
|
+
__all__ = ["Reranker"]
|
agno/knowledge/types.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
2
5
|
|
|
3
6
|
|
|
4
7
|
class ContentType(str, Enum):
|
|
@@ -17,6 +20,7 @@ class ContentType(str, Enum):
|
|
|
17
20
|
MARKDOWN = ".md"
|
|
18
21
|
DOCX = ".docx"
|
|
19
22
|
DOC = ".doc"
|
|
23
|
+
PPTX = ".pptx"
|
|
20
24
|
JSON = ".json"
|
|
21
25
|
|
|
22
26
|
# Spreadsheet file extensions
|
|
@@ -28,3 +32,8 @@ class ContentType(str, Enum):
|
|
|
28
32
|
def get_content_type_enum(content_type_str: str) -> ContentType:
|
|
29
33
|
"""Convert a content type string to ContentType enum."""
|
|
30
34
|
return ContentType(content_type_str)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class KnowledgeFilter(BaseModel):
|
|
38
|
+
key: str
|
|
39
|
+
value: Any
|
agno/knowledge/utils.py
CHANGED
|
@@ -129,12 +129,32 @@ def get_chunker_info(chunker_key: str) -> Dict:
|
|
|
129
129
|
class_name = chunker_class.__name__
|
|
130
130
|
docstring = chunker_class.__doc__ or f"{class_name} chunking strategy"
|
|
131
131
|
|
|
132
|
+
# Check class __init__ signature for chunk_size and overlap parameters
|
|
133
|
+
metadata = {}
|
|
134
|
+
import inspect
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
sig = inspect.signature(chunker_class.__init__)
|
|
138
|
+
param_names = set(sig.parameters.keys())
|
|
139
|
+
|
|
140
|
+
# If class has chunk_size or max_chunk_size parameter, set default chunk_size
|
|
141
|
+
if "chunk_size" in param_names or "max_chunk_size" in param_names:
|
|
142
|
+
metadata["chunk_size"] = 5000
|
|
143
|
+
|
|
144
|
+
# If class has overlap parameter, set default overlap
|
|
145
|
+
if "overlap" in param_names:
|
|
146
|
+
metadata["chunk_overlap"] = 0
|
|
147
|
+
except Exception:
|
|
148
|
+
# If we can't inspect, skip metadata
|
|
149
|
+
pass
|
|
150
|
+
|
|
132
151
|
return {
|
|
133
152
|
"key": chunker_key,
|
|
134
153
|
"class_name": class_name,
|
|
135
154
|
"name": chunker_key,
|
|
136
155
|
"description": docstring.strip(),
|
|
137
156
|
"strategy_type": strategy_type.value,
|
|
157
|
+
"metadata": metadata,
|
|
138
158
|
}
|
|
139
159
|
except ValueError:
|
|
140
160
|
raise ValueError(f"Unknown chunker key: {chunker_key}")
|
agno/media.py
CHANGED
|
@@ -334,11 +334,16 @@ class Video(BaseModel):
|
|
|
334
334
|
|
|
335
335
|
|
|
336
336
|
class File(BaseModel):
|
|
337
|
+
id: Optional[str] = None
|
|
337
338
|
url: Optional[str] = None
|
|
338
339
|
filepath: Optional[Union[Path, str]] = None
|
|
339
340
|
# Raw bytes content of a file
|
|
340
341
|
content: Optional[Any] = None
|
|
341
342
|
mime_type: Optional[str] = None
|
|
343
|
+
|
|
344
|
+
file_type: Optional[str] = None
|
|
345
|
+
filename: Optional[str] = None
|
|
346
|
+
size: Optional[int] = None
|
|
342
347
|
# External file object (e.g. GeminiFile, must be a valid object as expected by the model you are using)
|
|
343
348
|
external: Optional[Any] = None
|
|
344
349
|
format: Optional[str] = None # E.g. `pdf`, `txt`, `csv`, `xml`, etc.
|
|
@@ -364,7 +369,10 @@ class File(BaseModel):
|
|
|
364
369
|
def valid_mime_types(cls) -> List[str]:
|
|
365
370
|
return [
|
|
366
371
|
"application/pdf",
|
|
372
|
+
"application/json",
|
|
367
373
|
"application/x-javascript",
|
|
374
|
+
"application/json",
|
|
375
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
368
376
|
"text/javascript",
|
|
369
377
|
"application/x-python",
|
|
370
378
|
"text/x-python",
|
|
@@ -377,6 +385,29 @@ class File(BaseModel):
|
|
|
377
385
|
"text/rtf",
|
|
378
386
|
]
|
|
379
387
|
|
|
388
|
+
@classmethod
|
|
389
|
+
def from_base64(
|
|
390
|
+
cls,
|
|
391
|
+
base64_content: str,
|
|
392
|
+
id: Optional[str] = None,
|
|
393
|
+
mime_type: Optional[str] = None,
|
|
394
|
+
filename: Optional[str] = None,
|
|
395
|
+
name: Optional[str] = None,
|
|
396
|
+
format: Optional[str] = None,
|
|
397
|
+
) -> "File":
|
|
398
|
+
"""Create File from base64 encoded content"""
|
|
399
|
+
import base64
|
|
400
|
+
|
|
401
|
+
content_bytes = base64.b64decode(base64_content)
|
|
402
|
+
return cls(
|
|
403
|
+
content=content_bytes,
|
|
404
|
+
id=id,
|
|
405
|
+
mime_type=mime_type,
|
|
406
|
+
filename=filename,
|
|
407
|
+
name=name,
|
|
408
|
+
format=format,
|
|
409
|
+
)
|
|
410
|
+
|
|
380
411
|
@property
|
|
381
412
|
def file_url_content(self) -> Optional[Tuple[bytes, str]]:
|
|
382
413
|
import httpx
|
|
@@ -388,3 +419,44 @@ class File(BaseModel):
|
|
|
388
419
|
return content, mime_type
|
|
389
420
|
else:
|
|
390
421
|
return None
|
|
422
|
+
|
|
423
|
+
def _normalise_content(self) -> Optional[Union[str, bytes]]:
|
|
424
|
+
if self.content is None:
|
|
425
|
+
return None
|
|
426
|
+
content_normalised: Union[str, bytes] = self.content
|
|
427
|
+
if content_normalised and isinstance(content_normalised, bytes):
|
|
428
|
+
from base64 import b64encode
|
|
429
|
+
|
|
430
|
+
try:
|
|
431
|
+
if self.mime_type and self.mime_type.startswith("text/"):
|
|
432
|
+
content_normalised = content_normalised.decode("utf-8")
|
|
433
|
+
else:
|
|
434
|
+
content_normalised = b64encode(content_normalised).decode("utf-8")
|
|
435
|
+
except UnicodeDecodeError:
|
|
436
|
+
if isinstance(self.content, bytes):
|
|
437
|
+
content_normalised = b64encode(self.content).decode("utf-8")
|
|
438
|
+
except Exception:
|
|
439
|
+
try:
|
|
440
|
+
if isinstance(self.content, bytes):
|
|
441
|
+
content_normalised = b64encode(self.content).decode("utf-8")
|
|
442
|
+
except Exception:
|
|
443
|
+
pass
|
|
444
|
+
return content_normalised
|
|
445
|
+
|
|
446
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
447
|
+
content_normalised = self._normalise_content()
|
|
448
|
+
|
|
449
|
+
response_dict = {
|
|
450
|
+
"id": self.id,
|
|
451
|
+
"url": self.url,
|
|
452
|
+
"filepath": str(self.filepath) if self.filepath else None,
|
|
453
|
+
"content": content_normalised,
|
|
454
|
+
"mime_type": self.mime_type,
|
|
455
|
+
"file_type": self.file_type,
|
|
456
|
+
"filename": self.filename,
|
|
457
|
+
"size": self.size,
|
|
458
|
+
"external": self.external,
|
|
459
|
+
"format": self.format,
|
|
460
|
+
"name": self.name,
|
|
461
|
+
}
|
|
462
|
+
return {k: v for k, v in response_dict.items() if v is not None}
|