PyPI - agno - Versions diffs - 2.0.11__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

agno 2.0.11py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

agno/agent/agent.py +606 -175
agno/db/in_memory/in_memory_db.py +42 -29
agno/db/postgres/postgres.py +6 -4
agno/exceptions.py +62 -1
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +51 -0
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/ollama.py +5 -0
agno/knowledge/embedder/openai.py +18 -54
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +5 -4
agno/knowledge/reader/pdf_reader.py +4 -3
agno/knowledge/reader/website_reader.py +3 -2
agno/models/base.py +125 -32
agno/models/cerebras/cerebras.py +1 -0
agno/models/cerebras/cerebras_openai.py +1 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/google/gemini.py +27 -5
agno/models/openai/chat.py +13 -4
agno/models/perplexity/perplexity.py +2 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +49 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +1 -0
agno/os/app.py +98 -126
agno/os/interfaces/whatsapp/router.py +2 -0
agno/os/mcp.py +1 -1
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +181 -45
agno/os/routers/home.py +2 -2
agno/os/routers/memory/memory.py +23 -1
agno/os/routers/memory/schemas.py +1 -1
agno/os/routers/session/session.py +20 -3
agno/os/utils.py +74 -8
agno/run/agent.py +120 -77
agno/run/team.py +115 -72
agno/run/workflow.py +5 -15
agno/session/summary.py +9 -10
agno/session/team.py +2 -1
agno/team/team.py +720 -168
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +42 -2
agno/tools/knowledge.py +3 -3
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/spider.py +2 -2
agno/tools/workflow.py +4 -5
agno/utils/events.py +66 -1
agno/utils/hooks.py +57 -0
agno/utils/media.py +11 -9
agno/utils/print_response/agent.py +43 -5
agno/utils/print_response/team.py +48 -12
agno/vectordb/cassandra/cassandra.py +44 -4
agno/vectordb/chroma/chromadb.py +79 -8
agno/vectordb/clickhouse/clickhousedb.py +43 -6
agno/vectordb/couchbase/couchbase.py +76 -5
agno/vectordb/lancedb/lance_db.py +38 -3
agno/vectordb/milvus/milvus.py +76 -4
agno/vectordb/mongodb/mongodb.py +76 -4
agno/vectordb/pgvector/pgvector.py +50 -6
agno/vectordb/pineconedb/pineconedb.py +39 -2
agno/vectordb/qdrant/qdrant.py +76 -26
agno/vectordb/singlestore/singlestore.py +77 -4
agno/vectordb/upstashdb/upstashdb.py +42 -2
agno/vectordb/weaviate/weaviate.py +39 -3
agno/workflow/types.py +1 -0
agno/workflow/workflow.py +58 -2
{agno-2.0.11.dist-info → agno-2.1.0.dist-info}/METADATA +4 -3
{agno-2.0.11.dist-info → agno-2.1.0.dist-info}/RECORD +83 -73
{agno-2.0.11.dist-info → agno-2.1.0.dist-info}/WHEEL +0 -0
{agno-2.0.11.dist-info → agno-2.1.0.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.11.dist-info → agno-2.1.0.dist-info}/top_level.txt +0 -0

agno/utils/print_response/team.py CHANGED Viewed

@@ -98,6 +98,20 @@ def print_response(
         )
         response_timer.stop()
+        if run_response.input is not None and run_response.input.input_content != input:
+            # Input was modified during the run
+            panels = [status]
+            if show_message:
+                # Convert message to a panel
+                message_content = get_text_from_message(run_response.input.input_content)
+                message_panel = create_panel(
+                    content=Text(message_content, style="green"),
+                    title="Message",
+                    border_style="cyan",
+                )
+                panels.append(message_panel)  # type: ignore
+                live_console.update(Group(*panels))
         team_markdown = False
         member_markdown = {}
         if markdown:
@@ -400,6 +414,8 @@ def print_response_stream(
             **kwargs,
         )
+        input_content = get_text_from_message(input)
         team_markdown = None
         member_markdown = {}
@@ -435,6 +451,10 @@ def print_response_stream(
                 if hasattr(resp, "reasoning_steps") and resp.reasoning_steps is not None:  # type: ignore
                     reasoning_steps = resp.reasoning_steps  # type: ignore
+                if resp.event == TeamRunEvent.pre_hook_completed:  # type: ignore
+                    if resp.run_input is not None:  # type: ignore
+                        input_content = get_text_from_message(resp.run_input.input_content)  # type: ignore
                 # Collect team tool calls, avoiding duplicates
                 if resp.event == TeamRunEvent.tool_call_completed and resp.tool:  # type: ignore
                     tool = resp.tool  # type: ignore
@@ -479,12 +499,11 @@ def print_response_stream(
             # Create new panels for each chunk
             panels = []
-            if input and show_message:
+            if input_content and show_message:
                 render = True
                 # Convert message to a panel
-                message_content = get_text_from_message(input)
                 message_panel = create_panel(
-                    content=Text(message_content, style="green"),
+                    content=Text(input_content, style="green"),
                     title="Message",
                     border_style="cyan",
                 )
@@ -663,10 +682,9 @@ def print_response_stream(
         final_panels = []
         # Start with the message
-        if input and show_message:
-            message_content = get_text_from_message(input)
+        if input_content and show_message:
             message_panel = create_panel(
-                content=Text(message_content, style="green"),
+                content=Text(input_content, style="green"),
                 title="Message",
                 border_style="cyan",
             )
@@ -911,6 +929,20 @@ async def aprint_response(
         )
         response_timer.stop()
+        if run_response.input is not None and run_response.input.input_content != input:
+            # Input was modified during the run
+            panels = [status]
+            if show_message:
+                # Convert message to a panel
+                message_content = get_text_from_message(run_response.input.input_content)
+                message_panel = create_panel(
+                    content=Text(message_content, style="green"),
+                    title="Message",
+                    border_style="cyan",
+                )
+                panels.append(message_panel)  # type: ignore
+                live_console.update(Group(*panels))
         team_markdown = False
         member_markdown = {}
         if markdown:
@@ -1196,6 +1228,8 @@ async def aprint_response_stream(
         # Dict to track member response panels by member_id
         member_response_panels = {}
+        input_content = get_text_from_message(input)
         final_run_response = None
         async for resp in team.arun(  # type: ignore
             input=input,
@@ -1245,6 +1279,10 @@ async def aprint_response_stream(
                 if hasattr(resp, "reasoning_steps") and resp.reasoning_steps is not None:  # type: ignore
                     reasoning_steps = resp.reasoning_steps  # type: ignore
+                if resp.event == TeamRunEvent.pre_hook_completed:  # type: ignore
+                    if resp.run_input is not None:  # type: ignore
+                        input_content = get_text_from_message(resp.run_input.input_content)  # type: ignore
                 # Collect team tool calls, avoiding duplicates
                 if resp.event == TeamRunEvent.tool_call_completed and resp.tool:  # type: ignore
                     tool = resp.tool  # type: ignore
@@ -1288,12 +1326,11 @@ async def aprint_response_stream(
             # Create new panels for each chunk
             panels = []
-            if input and show_message:
+            if input_content and show_message:
                 render = True
                 # Convert message to a panel
-                message_content = get_text_from_message(input)
                 message_panel = create_panel(
-                    content=Text(message_content, style="green"),
+                    content=Text(input_content, style="green"),
                     title="Message",
                     border_style="cyan",
                 )
@@ -1473,10 +1510,9 @@ async def aprint_response_stream(
         final_panels = []
         # Start with the message
-        if input and show_message:
-            message_content = get_text_from_message(input)
+        if input_content and show_message:
             message_panel = create_panel(
-                content=Text(message_content, style="green"),
+                content=Text(input_content, style="green"),
                 title="Message",
                 border_style="cyan",
             )

agno/vectordb/cassandra/cassandra.py CHANGED Viewed

@@ -119,12 +119,52 @@ class Cassandra(VectorDb):
         """Insert documents asynchronously by running in a thread."""
         log_info(f"Cassandra VectorDB : Inserting Documents to the table {self.table_name}")
-        for doc in documents:
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
             try:
-                embed_tasks = [doc.async_embed(embedder=self.embedder)]
-                await asyncio.gather(*embed_tasks, return_exceptions=True)
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
             except Exception as e:
-                log_error(f"Error processing document '{doc.name}': {e}")
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    for doc in documents:
+                        try:
+                            embed_tasks = [doc.async_embed(embedder=self.embedder)]
+                            await asyncio.gather(*embed_tasks, return_exceptions=True)
+                        except Exception as e:
+                            log_error(f"Error processing document '{doc.name}': {e}")
+        else:
+            # Use individual embedding (original behavior)
+            for doc in documents:
+                try:
+                    embed_tasks = [doc.async_embed(embedder=self.embedder)]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+                except Exception as e:
+                    log_error(f"Error processing document '{doc.name}': {e}")
         futures = []
         for doc in documents:

agno/vectordb/chroma/chromadb.py CHANGED Viewed

@@ -215,11 +215,47 @@ class ChromaDb(VectorDb):
         if not self._collection:
             self._collection = self.client.get_collection(name=self.collection_name)
-        try:
-            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-            await asyncio.gather(*embed_tasks, return_exceptions=True)
-        except Exception as e:
-            log_error(f"Error processing document: {e}")
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            try:
+                embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+                await asyncio.gather(*embed_tasks, return_exceptions=True)
+            except Exception as e:
+                log_error(f"Error processing document: {e}")
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -341,8 +377,44 @@ class ChromaDb(VectorDb):
         if not self._collection:
             self._collection = self.client.get_collection(name=self.collection_name)
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -709,7 +781,6 @@ class ChromaDb(VectorDb):
         try:
             collection: Collection = self.client.get_collection(name=self.collection_name)
-            print("COLLECTION_----------", collection)
             # Try to get the document by ID
             result = collection.get(ids=[id])
             found_ids = result.get("ids", [])

agno/vectordb/clickhouse/clickhousedb.py CHANGED Viewed

@@ -81,6 +81,7 @@ class Clickhouse(VectorDb):
         if self.async_client is None:
             self.async_client = await clickhouse_connect.get_async_client(
                 host=self.host,
+                username=self.username,  # type: ignore
                 password=self.password,
                 database=self.database_name,
                 port=self.port,
@@ -228,7 +229,7 @@ class Clickhouse(VectorDb):
             "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
             parameters=parameters,
         )
-        return bool(result)
+        return len(result.result_rows) > 0 if result.result_rows else False
     async def async_name_exists(self, name: str) -> bool:
         """Check if a document with given name exists asynchronously."""
@@ -241,7 +242,7 @@ class Clickhouse(VectorDb):
             "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
             parameters=parameters,
         )
-        return bool(result)
+        return len(result.result_rows) > 0 if result.result_rows else False
     def id_exists(self, id: str) -> bool:
         """
@@ -257,7 +258,7 @@ class Clickhouse(VectorDb):
             "SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
             parameters=parameters,
         )
-        return bool(result)
+        return len(result.result_rows) > 0 if result.result_rows else False
     def insert(
         self,
@@ -308,8 +309,44 @@ class Clickhouse(VectorDb):
         rows: List[List[Any]] = []
         async_client = await self._ensure_async_client()
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -696,7 +733,7 @@ class Clickhouse(VectorDb):
             "SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
             parameters=parameters,
         )
-        return bool(result)
+        return len(result.result_rows) > 0 if result.result_rows else False
     def _delete_by_content_hash(self, content_hash: str) -> bool:
         """

agno/vectordb/couchbase/couchbase.py CHANGED Viewed

@@ -871,8 +871,44 @@ class CouchbaseSearch(VectorDb):
         async_collection_instance = await self.get_async_collection()
         all_docs_to_insert: Dict[str, Any] = {}
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             try:
@@ -937,8 +973,44 @@ class CouchbaseSearch(VectorDb):
         async_collection_instance = await self.get_async_collection()
         all_docs_to_upsert: Dict[str, Any] = {}
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             try:
@@ -1225,7 +1297,6 @@ class CouchbaseSearch(VectorDb):
             rows = list(result.rows())  # Collect once
             for row in rows:
-                print(row)
                 self.collection.remove(row.get("doc_id"))
             log_info(f"Deleted {len(rows)} documents with metadata {metadata}")
             return True

agno/vectordb/lancedb/lance_db.py CHANGED Viewed

@@ -354,9 +354,44 @@ class LanceDb(VectorDb):
         log_debug(f"Inserting {len(documents)} documents")
         data = []
-        # Prepare documents for insertion.
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             if await self.async_doc_exists(document):

agno/vectordb/milvus/milvus.py CHANGED Viewed

@@ -457,8 +457,44 @@ class Milvus(VectorDb):
         """Insert documents asynchronously based on search type."""
         log_info(f"Inserting {len(documents)} documents asynchronously")
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         if self.search_type == SearchType.hybrid:
             await asyncio.gather(
@@ -547,8 +583,44 @@ class Milvus(VectorDb):
     ) -> None:
         log_debug(f"Upserting {len(documents)} documents asynchronously")
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         async def process_document(document):
             cleaned_content = document.content.replace("\x00", "\ufffd")

agno 2.0.11__py3-none-any.whl → 2.1.0__py3-none-any.whl

agno 2.0.11py3-none-any.whl → 2.1.0py3-none-any.whl