PyPI - agno - Versions diffs - 2.0.11__py3-none-any.whl → 2.1.1__py3-none-any.whl - Mend

agno 2.0.11py3-none-any.whl → 2.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

agno/agent/agent.py +607 -176
agno/db/in_memory/in_memory_db.py +42 -29
agno/db/mongo/mongo.py +65 -66
agno/db/postgres/postgres.py +6 -4
agno/db/utils.py +50 -22
agno/exceptions.py +62 -1
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +51 -0
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/ollama.py +5 -0
agno/knowledge/embedder/openai.py +18 -54
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +11 -4
agno/knowledge/reader/pdf_reader.py +4 -3
agno/knowledge/reader/website_reader.py +3 -2
agno/models/base.py +125 -32
agno/models/cerebras/cerebras.py +1 -0
agno/models/cerebras/cerebras_openai.py +1 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/google/gemini.py +27 -5
agno/models/openai/chat.py +13 -4
agno/models/openai/responses.py +1 -1
agno/models/perplexity/perplexity.py +2 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +49 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +1 -0
agno/os/app.py +98 -126
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/agui/agui.py +21 -5
agno/os/interfaces/base.py +4 -2
agno/os/interfaces/slack/slack.py +13 -8
agno/os/interfaces/whatsapp/router.py +2 -0
agno/os/interfaces/whatsapp/whatsapp.py +12 -5
agno/os/mcp.py +2 -2
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +182 -46
agno/os/routers/home.py +2 -2
agno/os/routers/memory/memory.py +23 -1
agno/os/routers/memory/schemas.py +1 -1
agno/os/routers/session/session.py +20 -3
agno/os/utils.py +74 -8
agno/run/agent.py +120 -77
agno/run/base.py +2 -13
agno/run/team.py +115 -72
agno/run/workflow.py +5 -15
agno/session/summary.py +9 -10
agno/session/team.py +2 -1
agno/team/team.py +721 -169
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +42 -2
agno/tools/knowledge.py +3 -3
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/spider.py +2 -2
agno/tools/workflow.py +4 -5
agno/utils/events.py +66 -1
agno/utils/hooks.py +57 -0
agno/utils/media.py +11 -9
agno/utils/print_response/agent.py +43 -5
agno/utils/print_response/team.py +48 -12
agno/utils/serialize.py +32 -0
agno/vectordb/cassandra/cassandra.py +44 -4
agno/vectordb/chroma/chromadb.py +79 -8
agno/vectordb/clickhouse/clickhousedb.py +43 -6
agno/vectordb/couchbase/couchbase.py +76 -5
agno/vectordb/lancedb/lance_db.py +38 -3
agno/vectordb/milvus/milvus.py +76 -4
agno/vectordb/mongodb/mongodb.py +76 -4
agno/vectordb/pgvector/pgvector.py +50 -6
agno/vectordb/pineconedb/pineconedb.py +39 -2
agno/vectordb/qdrant/qdrant.py +76 -26
agno/vectordb/singlestore/singlestore.py +77 -4
agno/vectordb/upstashdb/upstashdb.py +42 -2
agno/vectordb/weaviate/weaviate.py +39 -3
agno/workflow/types.py +5 -6
agno/workflow/workflow.py +58 -2
{agno-2.0.11.dist-info → agno-2.1.1.dist-info}/METADATA +4 -3
{agno-2.0.11.dist-info → agno-2.1.1.dist-info}/RECORD +93 -82
{agno-2.0.11.dist-info → agno-2.1.1.dist-info}/WHEEL +0 -0
{agno-2.0.11.dist-info → agno-2.1.1.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.11.dist-info → agno-2.1.1.dist-info}/top_level.txt +0 -0

agno/vectordb/milvus/milvus.py CHANGED Viewed

@@ -457,8 +457,44 @@ class Milvus(VectorDb):
         """Insert documents asynchronously based on search type."""
         log_info(f"Inserting {len(documents)} documents asynchronously")
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         if self.search_type == SearchType.hybrid:
             await asyncio.gather(
@@ -547,8 +583,44 @@ class Milvus(VectorDb):
     ) -> None:
         log_debug(f"Upserting {len(documents)} documents asynchronously")
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         async def process_document(document):
             cleaned_content = document.content.replace("\x00", "\ufffd")

agno/vectordb/mongodb/mongodb.py CHANGED Viewed

@@ -1018,8 +1018,44 @@ class MongoDb(VectorDb):
         log_debug(f"Inserting {len(documents)} documents asynchronously")
         collection = await self._get_async_collection()
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         prepared_docs = []
         for document in documents:
@@ -1047,8 +1083,44 @@ class MongoDb(VectorDb):
         log_info(f"Upserting {len(documents)} documents asynchronously")
         collection = await self._get_async_collection()
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for document in documents:
             try:

agno/vectordb/pgvector/pgvector.py CHANGED Viewed

@@ -55,7 +55,6 @@ class PgVector(VectorDb):
         schema_version: int = 1,
         auto_upgrade_schema: bool = False,
         reranker: Optional[Reranker] = None,
-        use_batch: bool = False,
     ):
         """
         Initialize the PgVector instance.
@@ -96,7 +95,6 @@ class PgVector(VectorDb):
         self.db_url: Optional[str] = db_url
         self.db_engine: Engine = db_engine
         self.metadata: MetaData = MetaData(schema=self.schema)
-        self.use_batch: bool = use_batch
         # Embedder for embedding the document contents
         if embedder is None:
@@ -337,8 +335,8 @@ class PgVector(VectorDb):
                     batch_docs = documents[i : i + batch_size]
                     log_debug(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
                     try:
-                        embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
-                        await asyncio.gather(*embed_tasks, return_exceptions=True)
+                        # Embed all documents in the batch
+                        await self._async_embed_documents(batch_docs)
                         # Prepare documents for insertion
                         batch_records = []
@@ -493,6 +491,52 @@ class PgVector(VectorDb):
             "content_id": doc.content_id,
         }
+    async def _async_embed_documents(self, batch_docs: List[Document]) -> None:
+        """
+        Embed a batch of documents using either batch embedding or individual embedding.
+        Args:
+            batch_docs: List of documents to embed
+        """
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in batch_docs]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(batch_docs):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding.  {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
     async def async_upsert(
         self,
         content_hash: str,
@@ -530,8 +574,8 @@ class PgVector(VectorDb):
                     batch_docs = documents[i : i + batch_size]
                     log_info(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
                     try:
-                        embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
-                        await asyncio.gather(*embed_tasks, return_exceptions=True)
+                        # Embed all documents in the batch
+                        await self._async_embed_documents(batch_docs)
                         # Prepare documents for upserting
                         batch_records_dict = {}  # Use dict to deduplicate by ID

agno/vectordb/pineconedb/pineconedb.py CHANGED Viewed

@@ -338,8 +338,45 @@ class PineconeDb(VectorDb):
     async def _prepare_vectors(self, documents: List[Document]) -> List[Dict[str, Any]]:
         """Prepare vectors for upsert."""
         vectors = []
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         for doc in documents:
             doc.meta_data["text"] = doc.content

agno/vectordb/qdrant/qdrant.py CHANGED Viewed

@@ -131,7 +131,8 @@ class Qdrant(VectorDb):
                 if fastembed_kwargs:
                     default_kwargs.update(fastembed_kwargs)
-                self.sparse_encoder = SparseTextEmbedding(**default_kwargs)
+                # Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
+                self.sparse_encoder = SparseTextEmbedding(**default_kwargs)  # type: ignore
             except ImportError as e:
                 raise ImportError(
@@ -192,10 +193,12 @@ class Qdrant(VectorDb):
             # Configure vectors based on search type
             if self.search_type == SearchType.vector:
                 # Maintain backward compatibility with unnamed vectors
-                vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
+                vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
             else:
                 # Use named vectors for hybrid search
-                vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}  # type: ignore
+                vectors_config = {
+                    self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
+                }  # type: ignore
             self.client.create_collection(
                 collection_name=self.collection,
@@ -220,10 +223,12 @@ class Qdrant(VectorDb):
             # Configure vectors based on search type
             if self.search_type == SearchType.vector:
                 # Maintain backward compatibility with unnamed vectors
-                vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
+                vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
             else:
                 # Use named vectors for hybrid search
-                vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}  # type: ignore
+                vectors_config = {
+                    self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
+                }  # type: ignore
             await self.async_client.create_collection(
                 collection_name=self.collection,
@@ -281,7 +286,7 @@ class Qdrant(VectorDb):
             return len(scroll_result[0]) > 0
         return False
-    async def async_name_exists(self, name: str) -> bool:
+    async def async_name_exists(self, name: str) -> bool:  # type: ignore[override]
         """
         Asynchronously validates if a document with the given name exists in the collection.
@@ -341,7 +346,9 @@ class Qdrant(VectorDb):
                     vector[self.dense_vector_name] = document.embedding
                 if self.search_type in [SearchType.keyword, SearchType.hybrid]:
-                    vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
+                    vector[self.sparse_vector_name] = next(
+                        iter(self.sparse_encoder.embed([document.content]))
+                    ).as_object()  # type: ignore
             # Create payload with document properties
             payload = {
@@ -363,7 +370,7 @@ class Qdrant(VectorDb):
             points.append(
                 models.PointStruct(
                     id=doc_id,
-                    vector=vector,
+                    vector=vector,  # type: ignore
                     payload=payload,
                 )
             )
@@ -384,26 +391,69 @@ class Qdrant(VectorDb):
         """
         log_debug(f"Inserting {len(documents)} documents asynchronously")
+        # Apply batch embedding when needed for vector or hybrid search
+        if self.search_type in [SearchType.vector, SearchType.hybrid]:
+            if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+                # Use batch embedding when enabled and supported
+                try:
+                    # Extract content from all documents
+                    doc_contents = [doc.content for doc in documents]
+                    # Get batch embeddings and usage
+                    embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                    # Process documents with pre-computed embeddings
+                    for j, doc in enumerate(documents):
+                        try:
+                            if j < len(embeddings):
+                                doc.embedding = embeddings[j]
+                                doc.usage = usages[j] if j < len(usages) else None
+                        except Exception as e:
+                            log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+                except Exception as e:
+                    # Check if this is a rate limit error - don't fall back as it would make things worse
+                    error_str = str(e).lower()
+                    is_rate_limit = any(
+                        phrase in error_str
+                        for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                    )
+                    if is_rate_limit:
+                        log_error(f"Rate limit detected during batch embedding. {e}")
+                        raise e
+                    else:
+                        log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                        # Fall back to individual embedding
+                        for doc in documents:
+                            if self.search_type in [SearchType.vector, SearchType.hybrid]:
+                                doc.embed(embedder=self.embedder)
+            else:
+                # Use individual embedding
+                for doc in documents:
+                    if self.search_type in [SearchType.vector, SearchType.hybrid]:
+                        doc.embed(embedder=self.embedder)
         async def process_document(document):
             cleaned_content = document.content.replace("\x00", "\ufffd")
             doc_id = md5(cleaned_content.encode()).hexdigest()
             if self.search_type == SearchType.vector:
                 # For vector search, maintain backward compatibility with unnamed vectors
-                document.embed(embedder=self.embedder)
-                vector = document.embedding
+                vector = document.embedding  # Already embedded above
             else:
                 # For other search types, use named vectors
                 vector = {}
                 if self.search_type in [SearchType.hybrid]:
-                    document.embed(embedder=self.embedder)
-                    vector[self.dense_vector_name] = document.embedding
+                    vector[self.dense_vector_name] = document.embedding  # Already embedded above
                 if self.search_type in [SearchType.keyword, SearchType.hybrid]:
-                    vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
+                    vector[self.sparse_vector_name] = next(
+                        iter(self.sparse_encoder.embed([document.content]))
+                    ).as_object()  # type: ignore
             if self.search_type in [SearchType.keyword, SearchType.hybrid]:
-                vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
+                vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
             # Create payload with document properties
             payload = {
@@ -423,9 +473,9 @@ class Qdrant(VectorDb):
                 payload["meta_data"].update(filters)
             log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
-            return models.PointStruct(
+            return models.PointStruct(  # type: ignore
                 id=doc_id,
-                vector=vector,
+                vector=vector,  # type: ignore
                 payload=payload,
             )
@@ -501,12 +551,12 @@ class Qdrant(VectorDb):
         filters: Optional[Dict[str, Any]],
     ) -> List[models.ScoredPoint]:
         dense_embedding = self.embedder.get_embedding(query)
-        sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
+        sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
         call = self.client.query_points(
             collection_name=self.collection,
             prefetch=[
                 models.Prefetch(
-                    query=models.SparseVector(**sparse_embedding),
+                    query=models.SparseVector(**sparse_embedding),  # type: ignore  # type: ignore
                     limit=limit,
                     using=self.sparse_vector_name,
                 ),
@@ -557,10 +607,10 @@ class Qdrant(VectorDb):
         limit: int,
         filters: Optional[Dict[str, Any]],
     ) -> List[models.ScoredPoint]:
-        sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
+        sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
         call = self.client.query_points(
             collection_name=self.collection,
-            query=models.SparseVector(**sparse_embedding),
+            query=models.SparseVector(**sparse_embedding),  # type: ignore
             with_vectors=True,
             with_payload=True,
             limit=limit,
@@ -606,10 +656,10 @@ class Qdrant(VectorDb):
         limit: int,
         filters: Optional[Dict[str, Any]],
     ) -> List[models.ScoredPoint]:
-        sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
+        sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
         call = await self.async_client.query_points(
             collection_name=self.collection,
-            query=models.SparseVector(**sparse_embedding),
+            query=models.SparseVector(**sparse_embedding),  # type: ignore
             with_vectors=True,
             with_payload=True,
             limit=limit,
@@ -625,12 +675,12 @@ class Qdrant(VectorDb):
         filters: Optional[Dict[str, Any]],
     ) -> List[models.ScoredPoint]:
         dense_embedding = self.embedder.get_embedding(query)
-        sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
+        sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
         call = await self.async_client.query_points(
             collection_name=self.collection,
             prefetch=[
                 models.Prefetch(
-                    query=models.SparseVector(**sparse_embedding),
+                    query=models.SparseVector(**sparse_embedding),  # type: ignore  # type: ignore
                     limit=limit,
                     using=self.sparse_vector_name,
                 ),
@@ -689,7 +739,7 @@ class Qdrant(VectorDb):
                     filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
             if filter_conditions:
-                return models.Filter(must=filter_conditions)
+                return models.Filter(must=filter_conditions)  # type: ignore
         return None
@@ -807,7 +857,7 @@ class Qdrant(VectorDb):
                 )
             # Create a filter that requires ALL metadata conditions to match
-            filter_condition = models.Filter(must=filter_conditions)
+            filter_condition = models.Filter(must=filter_conditions)  # type: ignore
             # First, count how many points will be deleted
             count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)

agno/vectordb/singlestore/singlestore.py CHANGED Viewed

@@ -496,8 +496,44 @@ class SingleStore(VectorDb):
         documents: List[Document],
         filters: Optional[Dict[str, Any]] = None,
     ) -> None:
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         with self.Session.begin() as sess:
             counter = 0
@@ -543,8 +579,45 @@ class SingleStore(VectorDb):
             filters (Optional[Dict[str, Any]]): Optional filters for the upsert.
             batch_size (int): Number of documents to upsert in each batch.
         """
-        embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
-        await asyncio.gather(*embed_tasks, return_exceptions=True)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    log_error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
         with self.Session.begin() as sess:
             counter = 0

agno 2.0.11__py3-none-any.whl → 2.1.1__py3-none-any.whl

agno 2.0.11py3-none-any.whl → 2.1.1py3-none-any.whl