PyPI - agno - Versions diffs - 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.1py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (314) hide show

agno/agent/agent.py +6015 -2823
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +594 -186
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +2 -8
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +72 -0
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +999 -519
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +103 -31
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +139 -0
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +59 -5
agno/models/openai/chat.py +69 -29
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +77 -1
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -178
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +248 -94
agno/run/base.py +44 -5
agno/run/team.py +238 -97
agno/run/workflow.py +144 -33
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1610
agno/tools/dalle.py +2 -4
agno/tools/decorator.py +4 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +14 -7
agno/tools/eleven_labs.py +23 -25
agno/tools/exa.py +21 -16
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +250 -30
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/notion.py +204 -0
agno/tools/parallel.py +314 -0
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +217 -2
agno/utils/gemini.py +180 -22
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +92 -2
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +124 -133
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +638 -129
agno/workflow/steps.py +65 -6
agno/workflow/types.py +61 -23
agno/workflow/workflow.py +2085 -272
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
agno-2.3.0.dist-info/RECORD +577 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.1.dist-info/RECORD +0 -515
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/knowledge/chunking/agentic.py CHANGED Viewed

@@ -1,28 +1,31 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from agno.knowledge.chunking.strategy import ChunkingStrategy
 from agno.knowledge.document.base import Document
 from agno.models.base import Model
 from agno.models.defaults import DEFAULT_OPENAI_MODEL_ID
 from agno.models.message import Message
+from agno.models.utils import get_model
 class AgenticChunking(ChunkingStrategy):
     """Chunking strategy that uses an LLM to determine natural breakpoints in the text"""
-    def __init__(self, model: Optional[Model] = None, max_chunk_size: int = 5000):
+    def __init__(self, model: Optional[Union[Model, str]] = None, max_chunk_size: int = 5000):
+        # Convert model string to Model instance
+        model = get_model(model)
         if model is None:
             try:
                 from agno.models.openai import OpenAIChat
             except Exception:
                 raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
             model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
-        self.max_chunk_size = max_chunk_size
+        self.chunk_size = max_chunk_size
         self.model = model
     def chunk(self, document: Document) -> List[Document]:
         """Split text into chunks using LLM to determine natural breakpoints based on context"""
-        if len(document.content) <= self.max_chunk_size:
+        if len(document.content) <= self.chunk_size:
             return [document]
         chunks: List[Document] = []
@@ -31,22 +34,22 @@ class AgenticChunking(ChunkingStrategy):
         chunk_number = 1
         while remaining_text:
-            # Ask model to find a good breakpoint within max_chunk_size
-            prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.max_chunk_size} characters.
+            # Ask model to find a good breakpoint within chunk_size
+            prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
             Consider semantic completeness, paragraph boundaries, and topic transitions.
             Return only the character position number of where to break the text:
-            {remaining_text[: self.max_chunk_size]}"""
+            {remaining_text[: self.chunk_size]}"""
             try:
                 response = self.model.response([Message(role="user", content=prompt)])
                 if response and response.content:
-                    break_point = min(int(response.content.strip()), self.max_chunk_size)
+                    break_point = min(int(response.content.strip()), self.chunk_size)
                 else:
-                    break_point = self.max_chunk_size
+                    break_point = self.chunk_size
             except Exception:
                 # Fallback to max size if model fails
-                break_point = self.max_chunk_size
+                break_point = self.chunk_size
             # Extract chunk and update remaining text
             chunk = remaining_text[:break_point].strip()

agno/knowledge/chunking/fixed.py CHANGED Viewed

@@ -7,7 +7,7 @@ from agno.knowledge.document.base import Document
 class FixedSizeChunking(ChunkingStrategy):
     """Chunking strategy that splits text into fixed-size chunks with optional overlap"""
-    def __init__(self, chunk_size: int = 100, overlap: int = 0):
+    def __init__(self, chunk_size: int = 5000, overlap: int = 0):
         # overlap must be less than chunk size
         if overlap >= chunk_size:
             raise ValueError(f"Invalid parameters: overlap ({overlap}) must be less than chunk size ({chunk_size}).")

agno/knowledge/chunking/semantic.py CHANGED Viewed

@@ -1,16 +1,22 @@
-from typing import List, Optional
+import inspect
+from typing import Any, Dict, List, Optional
 from agno.knowledge.chunking.strategy import ChunkingStrategy
 from agno.knowledge.document.base import Document
 from agno.knowledge.embedder.base import Embedder
-from agno.knowledge.embedder.openai import OpenAIEmbedder
+from agno.utils.log import log_info
 class SemanticChunking(ChunkingStrategy):
     """Chunking strategy that splits text into semantic chunks using chonkie"""
     def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
-        self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small")  # type: ignore
+        if embedder is None:
+            from agno.knowledge.embedder.openai import OpenAIEmbedder
+            embedder = OpenAIEmbedder()  # type: ignore
+            log_info("Embedder not provided, using OpenAIEmbedder as default.")
+        self.embedder = embedder
         self.chunk_size = chunk_size
         self.similarity_threshold = similarity_threshold
         self.chunker = None  # Will be initialized lazily when needed
@@ -26,11 +32,37 @@ class SemanticChunking(ChunkingStrategy):
                     "Please install it using `pip install chonkie` to use SemanticChunking."
                 )
-            self.chunker = SemanticChunker(
-                embedding_model=self.embedder.id,  # type: ignore
-                chunk_size=self.chunk_size,
-                threshold=self.similarity_threshold,
-            )
+            # Build arguments dynamically based on chonkie's supported signature
+            params: Dict[str, Any] = {
+                "chunk_size": self.chunk_size,
+                "threshold": self.similarity_threshold,
+            }
+            try:
+                sig = inspect.signature(SemanticChunker)
+                param_names = set(sig.parameters.keys())
+                # Prefer passing a callable to avoid Chonkie initializing its own client
+                if "embedding_fn" in param_names:
+                    params["embedding_fn"] = self.embedder.get_embedding  # type: ignore[attr-defined]
+                    # If chonkie allows specifying dimensions, provide them
+                    if "embedding_dimensions" in param_names and getattr(self.embedder, "dimensions", None):
+                        params["embedding_dimensions"] = self.embedder.dimensions  # type: ignore[attr-defined]
+                elif "embedder" in param_names:
+                    # Some versions may accept an embedder object directly
+                    params["embedder"] = self.embedder
+                else:
+                    # Fallback to model id
+                    params["embedding_model"] = getattr(self.embedder, "id", None) or "text-embedding-3-small"
+                self.chunker = SemanticChunker(**params)
+            except Exception:
+                # As a final fallback, use the original behavior
+                self.chunker = SemanticChunker(
+                    embedding_model=getattr(self.embedder, "id", None) or "text-embedding-3-small",
+                    chunk_size=self.chunk_size,
+                    threshold=self.similarity_threshold,
+                )
     def chunk(self, document: Document) -> List[Document]:
         """Split document into semantic chunks using chonkie"""

agno/knowledge/chunking/strategy.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import List
+from typing import List, Optional
 from agno.knowledge.document.base import Document
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
     """Factory for creating chunking strategy instances."""
     @classmethod
-    def create_strategy(cls, strategy_type: ChunkingStrategyType, **kwargs) -> ChunkingStrategy:
+    def create_strategy(
+        cls,
+        strategy_type: ChunkingStrategyType,
+        chunk_size: Optional[int] = None,
+        overlap: Optional[int] = None,
+        **kwargs,
+    ) -> ChunkingStrategy:
         """Create an instance of the chunking strategy with the given parameters."""
         strategy_map = {
             ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
             ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
             ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
         }
-        return strategy_map[strategy_type](**kwargs)
+        return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
     @classmethod
-    def _create_agentic_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_agentic_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.agentic import AgenticChunking
-        # Map chunk_size to max_chunk_size for AgenticChunking
-        if "chunk_size" in kwargs and "max_chunk_size" not in kwargs:
-            kwargs["max_chunk_size"] = kwargs.pop("chunk_size")
+        # AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
+        if chunk_size is not None:
+            kwargs["max_chunk_size"] = chunk_size
+        # Remove overlap since AgenticChunking doesn't support it
         return AgenticChunking(**kwargs)
     @classmethod
-    def _create_document_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_document_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.document import DocumentChunking
+        # DocumentChunking accepts both chunk_size and overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        if overlap is not None:
+            kwargs["overlap"] = overlap
         return DocumentChunking(**kwargs)
     @classmethod
-    def _create_recursive_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_recursive_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.recursive import RecursiveChunking
+        # RecursiveChunking accepts both chunk_size and overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        if overlap is not None:
+            kwargs["overlap"] = overlap
         return RecursiveChunking(**kwargs)
     @classmethod
-    def _create_semantic_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_semantic_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.semantic import SemanticChunking
+        # SemanticChunking accepts chunk_size but not overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        # Remove overlap since SemanticChunking doesn't support it
         return SemanticChunking(**kwargs)
     @classmethod
-    def _create_fixed_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_fixed_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.fixed import FixedSizeChunking
+        # FixedSizeChunking accepts both chunk_size and overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        if overlap is not None:
+            kwargs["overlap"] = overlap
         return FixedSizeChunking(**kwargs)
     @classmethod
-    def _create_row_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_row_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.row import RowChunking
-        # Remove chunk_size if present since RowChunking doesn't use it
-        kwargs.pop("chunk_size", None)
+        # RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
         return RowChunking(**kwargs)
     @classmethod
-    def _create_markdown_chunking(cls, **kwargs) -> ChunkingStrategy:
+    def _create_markdown_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
         from agno.knowledge.chunking.markdown import MarkdownChunking
+        # MarkdownChunking accepts both chunk_size and overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        if overlap is not None:
+            kwargs["overlap"] = overlap
         return MarkdownChunking(**kwargs)

agno/knowledge/embedder/aws_bedrock.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
 from agno.exceptions import AgnoError, ModelProviderError
 from agno.knowledge.embedder.base import Embedder
-from agno.utils.log import log_error, logger
+from agno.utils.log import log_error, log_warning
 try:
     from boto3 import client as AwsClient
@@ -69,6 +69,11 @@ class AwsBedrockEmbedder(Embedder):
     client_params: Optional[Dict[str, Any]] = None
     client: Optional[AwsClient] = None
+    def __post_init__(self):
+        if self.enable_batch:
+            log_warning("AwsBedrockEmbedder does not support batch embeddings, setting enable_batch to False")
+            self.enable_batch = False
     def get_client(self) -> AwsClient:
         """
         Returns an AWS Bedrock client.
@@ -220,10 +225,10 @@ class AwsBedrockEmbedder(Embedder):
                     # Fallback to the first available embedding type
                     for embedding_type in response["embeddings"]:
                         return response["embeddings"][embedding_type][0]
-            logger.warning("No embeddings found in response")
+            log_warning("No embeddings found in response")
             return []
         except Exception as e:
-            logger.warning(f"Error extracting embeddings: {e}")
+            log_warning(f"Error extracting embeddings: {e}")
             return []
     def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -286,7 +291,7 @@ class AwsBedrockEmbedder(Embedder):
                         # Fallback to the first available embedding type
                         for embedding_type in response_body["embeddings"]:
                             return response_body["embeddings"][embedding_type][0]
-                logger.warning("No embeddings found in response")
+                log_warning("No embeddings found in response")
                 return []
         except ClientError as e:
             log_error(f"Unexpected error calling Bedrock API: {str(e)}")

agno/knowledge/embedder/azure_openai.py CHANGED Viewed

@@ -154,3 +154,57 @@ class AzureOpenAIEmbedder(Embedder):
         embedding = response.data[0].embedding
         usage = response.usage
         return embedding, usage.model_dump()
+    async def async_get_embeddings_batch_and_usage(
+        self, texts: List[str]
+    ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
+        """
+        Get embeddings and usage for multiple texts in batches.
+        Args:
+            texts: List of text strings to embed
+        Returns:
+            Tuple of (List of embedding vectors, List of usage dictionaries)
+        """
+        all_embeddings = []
+        all_usage = []
+        logger.info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size}")
+        for i in range(0, len(texts), self.batch_size):
+            batch_texts = texts[i : i + self.batch_size]
+            req: Dict[str, Any] = {
+                "input": batch_texts,
+                "model": self.id,
+                "encoding_format": self.encoding_format,
+            }
+            if self.user is not None:
+                req["user"] = self.user
+            if self.id.startswith("text-embedding-3"):
+                req["dimensions"] = self.dimensions
+            if self.request_params:
+                req.update(self.request_params)
+            try:
+                response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
+                batch_embeddings = [data.embedding for data in response.data]
+                all_embeddings.extend(batch_embeddings)
+                # For each embedding in the batch, add the same usage information
+                usage_dict = response.usage.model_dump() if response.usage else None
+                all_usage.extend([usage_dict] * len(batch_embeddings))
+            except Exception as e:
+                logger.warning(f"Error in async batch embedding: {e}")
+                # Fallback to individual calls for this batch
+                for text in batch_texts:
+                    try:
+                        embedding, usage = await self.async_get_embedding_and_usage(text)
+                        all_embeddings.append(embedding)
+                        all_usage.append(usage)
+                    except Exception as e2:
+                        logger.warning(f"Error in individual async embedding fallback: {e2}")
+                        all_embeddings.append([])
+                        all_usage.append(None)
+        return all_embeddings, all_usage

agno/knowledge/embedder/base.py CHANGED Viewed

@@ -7,6 +7,8 @@ class Embedder:
     """Base class for managing embedders"""
     dimensions: Optional[int] = 1536
+    enable_batch: bool = False
+    batch_size: int = 100  # Number of texts to process in each API call
     def get_embedding(self, text: str) -> List[float]:
         raise NotImplementedError

agno/knowledge/embedder/cohere.py CHANGED Viewed

@@ -1,8 +1,9 @@
+import time
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple, Union
 from agno.knowledge.embedder.base import Embedder
-from agno.utils.log import logger
+from agno.utils.log import log_debug, log_error, log_info, log_warning
 try:
     from cohere import AsyncClient as AsyncCohereClient
@@ -22,6 +23,7 @@ class CohereEmbedder(Embedder):
     client_params: Optional[Dict[str, Any]] = None
     cohere_client: Optional[CohereClient] = None
     async_client: Optional[AsyncCohereClient] = None
+    exponential_backoff: bool = False  # Enable exponential backoff on rate limits
     @property
     def client(self) -> CohereClient:
@@ -61,6 +63,111 @@ class CohereEmbedder(Embedder):
             request_params.update(self.request_params)
         return self.client.embed(texts=[text], **request_params)
+    def _get_batch_request_params(self) -> Dict[str, Any]:
+        """Get request parameters for batch embedding calls."""
+        request_params: Dict[str, Any] = {}
+        if self.id:
+            request_params["model"] = self.id
+        if self.input_type:
+            request_params["input_type"] = self.input_type
+        if self.embedding_types:
+            request_params["embedding_types"] = self.embedding_types
+        if self.request_params:
+            request_params.update(self.request_params)
+        return request_params
+    def _is_rate_limit_error(self, error: Exception) -> bool:
+        """Check if the error is a rate limiting error."""
+        if hasattr(error, "status_code") and error.status_code == 429:
+            return True
+        error_str = str(error).lower()
+        return any(
+            phrase in error_str
+            for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+        )
+    def _exponential_backoff_sleep(self, attempt: int, base_delay: float = 1.0) -> None:
+        """Sleep with exponential backoff."""
+        delay = base_delay * (2**attempt) + (time.time() % 1)  # Add jitter
+        log_debug(f"Rate limited, waiting {delay:.2f} seconds before retry (attempt {attempt + 1})")
+        time.sleep(delay)
+    async def _async_rate_limit_backoff_sleep(self, attempt: int) -> None:
+        """Async version of rate-limit-aware backoff for APIs with per-minute limits."""
+        import asyncio
+        # For 40 req/min APIs like Cohere Trial, we need longer waits
+        if attempt == 0:
+            delay = 15.0  # Wait 15 seconds (1/4 of minute window)
+        elif attempt == 1:
+            delay = 30.0  # Wait 30 seconds (1/2 of minute window)
+        else:
+            delay = 60.0  # Wait full minute for window reset
+        # Add small jitter
+        delay += time.time() % 3
+        log_debug(
+            f"Async rate limit backoff, waiting {delay:.1f} seconds for rate limit window reset (attempt {attempt + 1})"
+        )
+        await asyncio.sleep(delay)
+    async def _async_batch_with_retry(
+        self, texts: List[str], max_retries: int = 3
+    ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
+        """Execute async batch embedding with rate-limit-aware backoff for rate limiting."""
+        log_debug(f"Starting async batch retry for {len(texts)} texts with max_retries={max_retries}")
+        for attempt in range(max_retries + 1):
+            try:
+                request_params = self._get_batch_request_params()
+                response: Union[
+                    EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse
+                ] = await self.aclient.embed(texts=texts, **request_params)
+                # Extract embeddings from response
+                if isinstance(response, EmbeddingsFloatsEmbedResponse):
+                    batch_embeddings = response.embeddings
+                elif isinstance(response, EmbeddingsByTypeEmbedResponse):
+                    batch_embeddings = response.embeddings.float_ if response.embeddings.float_ else []
+                else:
+                    log_warning("No embeddings found in response")
+                    batch_embeddings = []
+                # Extract usage information
+                usage = response.meta.billed_units if response.meta else None
+                usage_dict = usage.model_dump() if usage else None
+                all_usage = [usage_dict] * len(batch_embeddings)
+                log_debug(f"Async batch embedding succeeded on attempt {attempt + 1}")
+                return batch_embeddings, all_usage
+            except Exception as e:
+                if self._is_rate_limit_error(e):
+                    if not self.exponential_backoff:
+                        log_warning(
+                            "Rate limit detected. To enable automatic backoff retry, set enable_backoff=True when creating the embedder."
+                        )
+                        raise e
+                    log_info(f"Async rate limit detected on attempt {attempt + 1}")
+                    if attempt < max_retries:
+                        await self._async_rate_limit_backoff_sleep(attempt)
+                        continue
+                    else:
+                        log_warning(f"Async max retries ({max_retries}) reached for rate limiting")
+                        raise e
+                else:
+                    log_debug(f"Async non-rate-limit error on attempt {attempt + 1}: {e}")
+                    raise e
+        # This should never be reached, but just in case
+        log_error("Could not create embeddings. End of retry loop reached.")
+        return [], []
     def get_embedding(self, text: str) -> List[float]:
         response: Union[EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse] = self.response(text=text)
         try:
@@ -69,10 +176,10 @@ class CohereEmbedder(Embedder):
             elif isinstance(response, EmbeddingsByTypeEmbedResponse):
                 return response.embeddings.float_[0] if response.embeddings.float_ else []
             else:
-                logger.warning("No embeddings found")
+                log_warning("No embeddings found")
                 return []
         except Exception as e:
-            logger.warning(e)
+            log_warning(e)
             return []
     def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -110,10 +217,10 @@ class CohereEmbedder(Embedder):
             elif isinstance(response, EmbeddingsByTypeEmbedResponse):
                 return response.embeddings.float_[0] if response.embeddings.float_ else []
             else:
-                logger.warning("No embeddings found")
+                log_warning("No embeddings found")
                 return []
         except Exception as e:
-            logger.warning(e)
+            log_warning(e)
             return []
     async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -142,3 +249,75 @@ class CohereEmbedder(Embedder):
         if usage:
             return embedding, usage.model_dump()
         return embedding, None
+    async def async_get_embeddings_batch_and_usage(
+        self, texts: List[str]
+    ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
+        """
+                Get embeddings and usage for multiple texts in batches (async version).
+                Args:
+                    texts: List of text strings to embed
+                Returns:
+        s, List of usage dictionaries)
+        """
+        all_embeddings = []
+        all_usage = []
+        log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
+        for i in range(0, len(texts), self.batch_size):
+            batch_texts = texts[i : i + self.batch_size]
+            try:
+                # Use retry logic for batch processing
+                batch_embeddings, batch_usage = await self._async_batch_with_retry(batch_texts)
+                all_embeddings.extend(batch_embeddings)
+                all_usage.extend(batch_usage)
+            except Exception as e:
+                log_warning(f"Async batch embedding failed after retries: {e}")
+                # Check if this is a rate limit error and backoff is disabled
+                if self._is_rate_limit_error(e) and not self.exponential_backoff:
+                    log_warning("Rate limit hit and backoff is disabled. Failing immediately.")
+                    raise e
+                # Only fall back to individual calls for non-rate-limit errors
+                # For rate limit errors, we should reduce batch size instead
+                if self._is_rate_limit_error(e):
+                    log_warning("Rate limit hit even after retries. Consider reducing batch_size or upgrading API key.")
+                    # Try with smaller batch size
+                    if len(batch_texts) > 1:
+                        smaller_batch_size = max(1, len(batch_texts) // 2)
+                        log_info(f"Retrying with smaller batch size: {smaller_batch_size}")
+                        for j in range(0, len(batch_texts), smaller_batch_size):
+                            small_batch = batch_texts[j : j + smaller_batch_size]
+                            try:
+                                small_embeddings, small_usage = await self._async_batch_with_retry(small_batch)
+                                all_embeddings.extend(small_embeddings)
+                                all_usage.extend(small_usage)
+                            except Exception as e3:
+                                log_error(f"Failed even with reduced batch size: {e3}")
+                                # Fall back to empty results for this batch
+                                all_embeddings.extend([[] for _ in small_batch])
+                                all_usage.extend([None for _ in small_batch])
+                    else:
+                        # Single item already failed, add empty result
+                        log_debug("Single item failed, adding empty result")
+                        all_embeddings.append([])
+                        all_usage.append(None)
+                else:
+                    # For non-rate-limit errors, fall back to individual calls
+                    log_debug("Non-rate-limit error, falling back to individual calls")
+                    for text in batch_texts:
+                        try:
+                            embedding, usage = await self.async_get_embedding_and_usage(text)
+                            all_embeddings.append(embedding)
+                            all_usage.append(usage)
+                        except Exception as e2:
+                            log_warning(f"Error in individual async embedding fallback: {e2}")
+                            all_embeddings.append([])
+                            all_usage.append(None)
+        return all_embeddings, all_usage

agno/knowledge/embedder/fastembed.py CHANGED Viewed

@@ -23,7 +23,7 @@ class FastEmbedEmbedder(Embedder):
     """Using BAAI/bge-small-en-v1.5 model, more models available: https://qdrant.github.io/fastembed/examples/Supported_Models/"""
     id: str = "BAAI/bge-small-en-v1.5"
-    dimensions: int = 384
+    dimensions: Optional[int] = 384
     def get_embedding(self, text: str) -> List[float]:
         model = TextEmbedding(model_name=self.id)

agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.1py3-none-any.whl → 2.3.0py3-none-any.whl