PyPI - agno - Versions diffs - 2.2.5__py3-none-any.whl → 2.2.7__py3-none-any.whl - Mend

agno 2.2.5py3-none-any.whl → 2.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

agno/agent/agent.py +500 -423
agno/api/os.py +1 -1
agno/culture/manager.py +12 -8
agno/guardrails/prompt_injection.py +1 -0
agno/knowledge/chunking/agentic.py +6 -2
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/knowledge.py +37 -5
agno/memory/manager.py +9 -4
agno/models/anthropic/claude.py +1 -2
agno/models/azure/ai_foundry.py +31 -14
agno/models/azure/openai_chat.py +12 -4
agno/models/base.py +106 -65
agno/models/cerebras/cerebras.py +11 -6
agno/models/groq/groq.py +7 -4
agno/models/meta/llama.py +12 -6
agno/models/meta/llama_openai.py +5 -1
agno/models/openai/chat.py +26 -17
agno/models/openai/responses.py +11 -63
agno/models/requesty/requesty.py +5 -2
agno/models/utils.py +254 -8
agno/models/vertexai/claude.py +9 -13
agno/os/app.py +13 -12
agno/os/routers/evals/evals.py +8 -8
agno/os/routers/evals/utils.py +1 -0
agno/os/schema.py +56 -38
agno/os/utils.py +27 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +5 -0
agno/run/base.py +18 -1
agno/run/team.py +13 -9
agno/run/workflow.py +39 -0
agno/session/summary.py +8 -2
agno/session/workflow.py +4 -3
agno/team/team.py +302 -369
agno/tools/exa.py +21 -16
agno/tools/file.py +153 -25
agno/tools/function.py +98 -17
agno/tools/mcp/mcp.py +8 -1
agno/tools/notion.py +204 -0
agno/utils/agent.py +78 -0
agno/utils/events.py +2 -0
agno/utils/hooks.py +1 -1
agno/utils/models/claude.py +25 -8
agno/utils/print_response/workflow.py +115 -16
agno/vectordb/__init__.py +2 -1
agno/vectordb/milvus/milvus.py +5 -0
agno/vectordb/redis/__init__.py +5 -0
agno/vectordb/redis/redisdb.py +687 -0
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/step.py +13 -2
agno/workflow/workflow.py +969 -72
{agno-2.2.5.dist-info → agno-2.2.7.dist-info}/METADATA +10 -3
{agno-2.2.5.dist-info → agno-2.2.7.dist-info}/RECORD +57 -52
{agno-2.2.5.dist-info → agno-2.2.7.dist-info}/WHEEL +0 -0
{agno-2.2.5.dist-info → agno-2.2.7.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.5.dist-info → agno-2.2.7.dist-info}/top_level.txt +0 -0

agno/api/os.py CHANGED Viewed

@@ -14,4 +14,4 @@ def log_os_telemetry(launch: OSLaunch) -> None:
             )
             response.raise_for_status()
         except Exception as e:
-            log_debug(f"Could not create OS launch: {e}")
+            log_debug(f"Could not register OS launch for telemetry: {e}")

agno/culture/manager.py CHANGED Viewed

@@ -8,6 +8,7 @@ from agno.db.base import AsyncBaseDb, BaseDb
 from agno.db.schemas.culture import CulturalKnowledge
 from agno.models.base import Model
 from agno.models.message import Message
+from agno.models.utils import get_model
 from agno.tools.function import Function
 from agno.utils.log import (
     log_debug,
@@ -55,7 +56,7 @@ class CultureManager:
     def __init__(
         self,
-        model: Optional[Model] = None,
+        model: Optional[Union[Model, str]] = None,
         db: Optional[Union[BaseDb, AsyncBaseDb]] = None,
         system_message: Optional[str] = None,
         culture_capture_instructions: Optional[str] = None,
@@ -66,9 +67,7 @@ class CultureManager:
         clear_knowledge: bool = True,
         debug_mode: bool = False,
     ):
-        self.model = model
-        if self.model is not None and isinstance(self.model, str):
-            raise ValueError("Model must be a Model object, not a string")
+        self.model = get_model(model)
         self.db = db
         self.system_message = system_message
         self.culture_capture_instructions = culture_capture_instructions
@@ -135,9 +134,10 @@ class CultureManager:
         if not self.db:
             return None
-        self.db = cast(AsyncBaseDb, self.db)
-        return await self.db.get_all_cultural_knowledge(name=name)
+        if isinstance(self.db, AsyncBaseDb):
+            return await self.db.get_all_cultural_knowledge(name=name)
+        else:
+            return self.db.get_all_cultural_knowledge(name=name)
     def add_cultural_knowledge(
         self,
@@ -231,7 +231,11 @@ class CultureManager:
         if not messages or not isinstance(messages, list):
             raise ValueError("Invalid messages list")
-        knowledge = self.get_all_knowledge()
+        if isinstance(self.db, AsyncBaseDb):
+            knowledge = await self.aget_all_knowledge()
+        else:
+            knowledge = self.get_all_knowledge()
         if knowledge is None:
             knowledge = []

agno/guardrails/prompt_injection.py CHANGED Viewed

@@ -32,6 +32,7 @@ class PromptInjectionGuardrail(BaseGuardrail):
             "ignore safeguards",
             "admin override",
             "root access",
+            "forget everything",
         ]
     def check(self, run_input: Union[RunInput, TeamRunInput]) -> None:

agno/knowledge/chunking/agentic.py CHANGED Viewed

@@ -1,22 +1,26 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from agno.knowledge.chunking.strategy import ChunkingStrategy
 from agno.knowledge.document.base import Document
 from agno.models.base import Model
 from agno.models.defaults import DEFAULT_OPENAI_MODEL_ID
 from agno.models.message import Message
+from agno.models.utils import get_model
 class AgenticChunking(ChunkingStrategy):
     """Chunking strategy that uses an LLM to determine natural breakpoints in the text"""
-    def __init__(self, model: Optional[Model] = None, max_chunk_size: int = 5000):
+    def __init__(self, model: Optional[Union[Model, str]] = None, max_chunk_size: int = 5000):
+        # Convert model string to Model instance
+        model = get_model(model)
         if model is None:
             try:
                 from agno.models.openai import OpenAIChat
             except Exception:
                 raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
             model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
         self.max_chunk_size = max_chunk_size
         self.model = model

agno/knowledge/embedder/vllm.py ADDED Viewed

@@ -0,0 +1,262 @@
+import asyncio
+from dataclasses import dataclass
+from os import getenv
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from agno.knowledge.embedder.base import Embedder
+from agno.utils.log import logger
+try:
+    from vllm import LLM  # type: ignore
+    from vllm.outputs import EmbeddingRequestOutput  # type: ignore
+except ImportError:
+    raise ImportError("`vllm` not installed. Please install using `pip install vllm`.")
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI
+    from openai import OpenAI as OpenAIClient
+    from openai.types.create_embedding_response import CreateEmbeddingResponse
+@dataclass
+class VLLMEmbedder(Embedder):
+    """
+    VLLM Embedder supporting both local and remote deployment modes.
+    Local Mode (default):
+        - Loads model locally and runs inference on your GPU/CPU
+        - No API key required
+        - Example: VLLMEmbedder(id="intfloat/e5-mistral-7b-instruct")
+    Remote Mode:
+        - Connects to a remote vLLM server via OpenAI-compatible API
+        - Uses OpenAI SDK to communicate with vLLM's OpenAI-compatible endpoint
+        - Requires base_url and optionally api_key
+        - Example: VLLMEmbedder(base_url="http://localhost:8000/v1", api_key="your-key")
+        - Ref: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    """
+    id: str = "sentence-transformers/all-MiniLM-L6-v2"
+    dimensions: int = 4096
+    # Local mode parameters
+    enforce_eager: bool = True
+    vllm_kwargs: Optional[Dict[str, Any]] = None
+    vllm_client: Optional[LLM] = None
+    # Remote mode parameters
+    api_key: Optional[str] = getenv("VLLM_API_KEY")
+    base_url: Optional[str] = None
+    request_params: Optional[Dict[str, Any]] = None
+    client_params: Optional[Dict[str, Any]] = None
+    remote_client: Optional["OpenAIClient"] = None  # OpenAI-compatible client for vLLM server
+    async_remote_client: Optional["AsyncOpenAI"] = None  # Async OpenAI-compatible client for vLLM server
+    @property
+    def is_remote(self) -> bool:
+        """Determine if we should use remote mode."""
+        return self.base_url is not None
+    def _get_vllm_client(self) -> LLM:
+        """Get local VLLM client."""
+        if self.vllm_client:
+            return self.vllm_client
+        _vllm_params: Dict[str, Any] = {
+            "model": self.id,
+            "task": "embed",
+            "enforce_eager": self.enforce_eager,
+        }
+        if self.vllm_kwargs:
+            _vllm_params.update(self.vllm_kwargs)
+        self.vllm_client = LLM(**_vllm_params)
+        return self.vllm_client
+    def _get_remote_client(self) -> "OpenAIClient":
+        """Get OpenAI-compatible client for remote vLLM server."""
+        if self.remote_client:
+            return self.remote_client
+        try:
+            from openai import OpenAI as OpenAIClient
+        except ImportError:
+            raise ImportError("`openai` package required for remote vLLM mode. ")
+        _client_params: Dict[str, Any] = {
+            "api_key": self.api_key or "EMPTY",  # VLLM can run without API key
+            "base_url": self.base_url,
+        }
+        if self.client_params:
+            _client_params.update(self.client_params)
+        self.remote_client = OpenAIClient(**_client_params)
+        return self.remote_client
+    def _get_async_remote_client(self) -> "AsyncOpenAI":
+        """Get async OpenAI-compatible client for remote vLLM server."""
+        if self.async_remote_client:
+            return self.async_remote_client
+        try:
+            from openai import AsyncOpenAI
+        except ImportError:
+            raise ImportError("`openai` package required for remote vLLM mode. ")
+        _client_params: Dict[str, Any] = {
+            "api_key": self.api_key or "EMPTY",
+            "base_url": self.base_url,
+        }
+        if self.client_params:
+            _client_params.update(self.client_params)
+        self.async_remote_client = AsyncOpenAI(**_client_params)
+        return self.async_remote_client
+    def _create_embedding_local(self, text: str) -> Optional[EmbeddingRequestOutput]:
+        """Create embedding using local VLLM."""
+        try:
+            outputs = self._get_vllm_client().embed([text])
+            return outputs[0] if outputs else None
+        except Exception as e:
+            logger.warning(f"Error creating local embedding: {e}")
+            return None
+    def _create_embedding_remote(self, text: str) -> "CreateEmbeddingResponse":
+        """Create embedding using remote vLLM server."""
+        _request_params: Dict[str, Any] = {
+            "input": text,
+            "model": self.id,
+        }
+        if self.request_params:
+            _request_params.update(self.request_params)
+        return self._get_remote_client().embeddings.create(**_request_params)
+    def get_embedding(self, text: str) -> List[float]:
+        try:
+            if self.is_remote:
+                # Remote mode: OpenAI-compatible API
+                response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
+                return response.data[0].embedding
+            else:
+                # Local mode: Direct VLLM
+                output = self._create_embedding_local(text=text)
+                if output and hasattr(output, "outputs") and hasattr(output.outputs, "embedding"):
+                    embedding = output.outputs.embedding
+                    if len(embedding) != self.dimensions:
+                        logger.warning(f"Expected embedding dimension {self.dimensions}, but got {len(embedding)}")
+                    return embedding
+                return []
+        except Exception as e:
+            logger.warning(f"Error extracting embedding: {e}")
+            return []
+    def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
+        if self.is_remote:
+            try:
+                response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
+                embedding = response.data[0].embedding
+                usage = response.usage
+                if usage:
+                    return embedding, usage.model_dump()
+                return embedding, None
+            except Exception as e:
+                logger.warning(f"Error in remote embedding: {e}")
+                return [], None
+        else:
+            embedding = self.get_embedding(text=text)
+            # Local VLLM doesn't provide usage information
+            return embedding, None
+    async def async_get_embedding(self, text: str) -> List[float]:
+        """Async version of get_embedding using thread executor for local mode."""
+        if self.is_remote:
+            # Remote mode: async client for vLLM server
+            try:
+                req: Dict[str, Any] = {
+                    "input": text,
+                    "model": self.id,
+                }
+                if self.request_params:
+                    req.update(self.request_params)
+                response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                return response.data[0].embedding
+            except Exception as e:
+                logger.warning(f"Error in async remote embedding: {e}")
+                return []
+        else:
+            # Local mode: use thread executor for CPU-bound operations
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(None, self.get_embedding, text)
+    async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
+        """Async version of get_embedding_and_usage using thread executor for local mode."""
+        if self.is_remote:
+            try:
+                req: Dict[str, Any] = {
+                    "input": text,
+                    "model": self.id,
+                }
+                if self.request_params:
+                    req.update(self.request_params)
+                response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                embedding = response.data[0].embedding
+                usage = response.usage
+                return embedding, usage.model_dump() if usage else None
+            except Exception as e:
+                logger.warning(f"Error in async remote embedding: {e}")
+                return [], None
+        else:
+            # Local mode: use thread executor for CPU-bound operations
+            try:
+                loop = asyncio.get_event_loop()
+                return await loop.run_in_executor(None, self.get_embedding_and_usage, text)
+            except Exception as e:
+                logger.warning(f"Error in async local embedding: {e}")
+                return [], None
+    async def async_get_embeddings_batch_and_usage(
+        self, texts: List[str]
+    ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
+        """
+        Get embeddings and usage for multiple texts in batches (async version).
+        Args:
+            texts: List of text strings to embed
+        Returns:
+            Tuple of (List of embedding vectors, List of usage dictionaries)
+        """
+        all_embeddings = []
+        all_usage = []
+        logger.info(f"Getting embeddings for {len(texts)} texts in batches of {self.batch_size} (async)")
+        for i in range(0, len(texts), self.batch_size):
+            batch_texts = texts[i : i + self.batch_size]
+            try:
+                if self.is_remote:
+                    # Remote mode: use batch API
+                    req: Dict[str, Any] = {
+                        "input": batch_texts,
+                        "model": self.id,
+                    }
+                    if self.request_params:
+                        req.update(self.request_params)
+                    response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                    batch_embeddings = [data.embedding for data in response.data]
+                    all_embeddings.extend(batch_embeddings)
+                    # For each embedding in the batch, add the same usage information
+                    usage_dict = response.usage.model_dump() if response.usage else None
+                    all_usage.extend([usage_dict] * len(batch_embeddings))
+                else:
+                    # Local mode: process individually using thread executor
+                    for text in batch_texts:
+                        embedding, usage = await self.async_get_embedding_and_usage(text)
+                        all_embeddings.append(embedding)
+                        all_usage.append(usage)
+            except Exception as e:
+                logger.warning(f"Error in async batch embedding: {e}")
+                # Fallback: add empty results for failed batch
+                for _ in batch_texts:
+                    all_embeddings.append([])
+                    all_usage.append(None)
+        return all_embeddings, all_usage

agno/knowledge/knowledge.py CHANGED Viewed

@@ -4,7 +4,6 @@ import io
 import time
 from dataclasses import dataclass
 from enum import Enum
-from functools import cached_property
 from io import BytesIO
 from os.path import basename
 from pathlib import Path
@@ -187,10 +186,14 @@ class Knowledge:
         paths: Optional[List[str]] = None,
         urls: Optional[List[str]] = None,
         metadata: Optional[Dict[str, str]] = None,
+        topics: Optional[List[str]] = None,
+        text_contents: Optional[List[str]] = None,
+        reader: Optional[Reader] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
         upsert: bool = True,
         skip_if_exists: bool = False,
+        remote_content: Optional[RemoteContent] = None,
     ) -> None: ...
     def add_contents(self, *args, **kwargs) -> None:
@@ -208,10 +211,14 @@ class Knowledge:
             paths: Optional list of file paths to load content from
             urls: Optional list of URLs to load content from
             metadata: Optional metadata dictionary to apply to all content
+            topics: Optional list of topics to add
+            text_contents: Optional list of text content strings to add
+            reader: Optional reader to use for processing content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
             upsert: Whether to update existing content if it already exists
             skip_if_exists: Whether to skip adding content if it already exists
+            remote_content: Optional remote content (S3, GCS, etc.) to add
         """
         asyncio.run(self.add_contents_async(*args, **kwargs))
@@ -1449,14 +1456,16 @@ class Knowledge:
     def get_valid_filters(self) -> Set[str]:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
-        self.valid_metadata_filters.update(self._get_filters_from_db)
+        self.valid_metadata_filters.update(self._get_filters_from_db())
         return self.valid_metadata_filters
-    def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+    async def aget_valid_filters(self) -> Set[str]:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
-        self.valid_metadata_filters.update(self._get_filters_from_db)
+        self.valid_metadata_filters.update(await self._aget_filters_from_db())
+        return self.valid_metadata_filters
+    def _validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
         if not filters:
             return {}, []
@@ -1480,6 +1489,20 @@ class Knowledge:
         return valid_filters, invalid_keys
+    def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+        if self.valid_metadata_filters is None:
+            self.valid_metadata_filters = set()
+        self.valid_metadata_filters.update(self._get_filters_from_db())
+        return self._validate_filters(filters)
+    async def async_validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+        if self.valid_metadata_filters is None:
+            self.valid_metadata_filters = set()
+        self.valid_metadata_filters.update(await self._aget_filters_from_db())
+        return self._validate_filters(filters)
     def add_filters(self, metadata: Dict[str, Any]) -> None:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
@@ -1488,7 +1511,6 @@ class Knowledge:
             for key in metadata.keys():
                 self.valid_metadata_filters.add(key)
-    @cached_property
     def _get_filters_from_db(self) -> Set[str]:
         if self.contents_db is None:
             return set()
@@ -1499,6 +1521,16 @@ class Knowledge:
                 valid_filters.update(content.metadata.keys())
         return valid_filters
+    async def _aget_filters_from_db(self) -> Set[str]:
+        if self.contents_db is None:
+            return set()
+        contents, _ = await self.aget_content()
+        valid_filters: Set[str] = set()
+        for content in contents:
+            if content.metadata:
+                valid_filters.update(content.metadata.keys())
+        return valid_filters
     def remove_vector_by_id(self, id: str) -> bool:
         from agno.vectordb import VectorDb

agno/memory/manager.py CHANGED Viewed

@@ -11,6 +11,7 @@ from agno.db.base import AsyncBaseDb, BaseDb
 from agno.db.schemas import UserMemory
 from agno.models.base import Model
 from agno.models.message import Message
+from agno.models.utils import get_model
 from agno.tools.function import Function
 from agno.utils.log import (
     log_debug,
@@ -66,7 +67,7 @@ class MemoryManager:
     def __init__(
         self,
-        model: Optional[Model] = None,
+        model: Optional[Union[Model, str]] = None,
         system_message: Optional[str] = None,
         memory_capture_instructions: Optional[str] = None,
         additional_instructions: Optional[str] = None,
@@ -77,9 +78,7 @@ class MemoryManager:
         clear_memories: bool = False,
         debug_mode: bool = False,
     ):
-        self.model = model
-        if self.model is not None and isinstance(self.model, str):
-            raise ValueError("Model must be a Model object, not a string")
+        self.model = model  # type: ignore[assignment]
         self.system_message = system_message
         self.memory_capture_instructions = memory_capture_instructions
         self.additional_instructions = additional_instructions
@@ -90,6 +89,12 @@ class MemoryManager:
         self.clear_memories = clear_memories
         self.debug_mode = debug_mode
+        self._get_models()
+    def _get_models(self) -> None:
+        if self.model is not None:
+            self.model = get_model(self.model)
     def get_model(self) -> Model:
         if self.model is None:
             try:

agno/models/anthropic/claude.py CHANGED Viewed

@@ -98,7 +98,6 @@ class Claude(Model):
     timeout: Optional[float] = None
     client_params: Optional[Dict[str, Any]] = None
-    # Anthropic clients
     client: Optional[AnthropicClient] = None
     async_client: Optional[AsyncAnthropicClient] = None
@@ -145,7 +144,7 @@ class Claude(Model):
         """
         Returns an instance of the async Anthropic client.
         """
-        if self.async_client:
+        if self.async_client and not self.async_client.is_closed():
             return self.async_client
         _client_params = self._get_client_params()

agno/models/azure/ai_foundry.py CHANGED Viewed

@@ -160,7 +160,9 @@ class AzureAIFoundry(Model):
         Returns:
             ChatCompletionsClient: An instance of the Azure AI client.
         """
-        if self.client:
+        # Check if client exists and is not closed
+        # Azure's client doesn't have is_closed(), so we check if _client exists
+        if self.client and hasattr(self.client, "_client"):
             return self.client
         client_params = self._get_client_params()
@@ -174,11 +176,28 @@ class AzureAIFoundry(Model):
         Returns:
             AsyncChatCompletionsClient: An instance of the asynchronous Azure AI client.
         """
+        # Check if client exists and is not closed
+        # Azure's async client doesn't have is_closed(), so we check if _client exists
+        if self.async_client and hasattr(self.async_client, "_client"):
+            return self.async_client
         client_params = self._get_client_params()
         self.async_client = AsyncChatCompletionsClient(**client_params)
         return self.async_client
+    def close(self) -> None:
+        """Close the synchronous client and clean up resources."""
+        if self.client:
+            self.client.close()
+            self.client = None
+    async def aclose(self) -> None:
+        """Close the asynchronous client and clean up resources."""
+        if self.async_client:
+            await self.async_client.close()
+            self.async_client = None
     def invoke(
         self,
         messages: List[Message],
@@ -236,11 +255,10 @@ class AzureAIFoundry(Model):
                 run_response.metrics.set_time_to_first_token()
             assistant_message.metrics.start_timer()
-            async with self.get_async_client() as client:
-                provider_response = await client.complete(
-                    messages=[format_message(m) for m in messages],
-                    **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
-                )
+            provider_response = await self.get_async_client().complete(
+                messages=[format_message(m) for m in messages],
+                **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
+            )
             assistant_message.metrics.stop_timer()
             model_response = self._parse_provider_response(provider_response, response_format=response_format)  # type: ignore
@@ -316,14 +334,13 @@ class AzureAIFoundry(Model):
             assistant_message.metrics.start_timer()
-            async with self.get_async_client() as client:
-                async_stream = await client.complete(
-                    messages=[format_message(m) for m in messages],
-                    stream=True,
-                    **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
-                )
-                async for chunk in async_stream:  # type: ignore
-                    yield self._parse_provider_response_delta(chunk)
+            async_stream = await self.get_async_client().complete(
+                messages=[format_message(m) for m in messages],
+                stream=True,
+                **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
+            )
+            async for chunk in async_stream:  # type: ignore
+                yield self._parse_provider_response_delta(chunk)
             assistant_message.metrics.stop_timer()

agno/models/azure/openai_chat.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, Optional
 import httpx
 from agno.models.openai.like import OpenAILike
+from agno.utils.log import log_debug
 try:
     from openai import AsyncAzureOpenAI as AsyncAzureOpenAIClient
@@ -70,7 +71,6 @@ class AzureOpenAI(OpenAILike):
             "base_url": self.base_url,
             "azure_ad_token": self.azure_ad_token,
             "azure_ad_token_provider": self.azure_ad_token_provider,
-            "http_client": self.http_client,
         }
         if self.default_headers is not None:
             _client_params["default_headers"] = self.default_headers
@@ -95,7 +95,13 @@ class AzureOpenAI(OpenAILike):
         _client_params: Dict[str, Any] = self._get_client_params()
-        # -*- Create client
+        if self.http_client:
+            if isinstance(self.http_client, httpx.Client):
+                _client_params["http_client"] = self.http_client
+            else:
+                log_debug("http_client is not an instance of httpx.Client.")
+        # Create client
         self.client = AzureOpenAIClient(**_client_params)
         return self.client
@@ -106,14 +112,16 @@ class AzureOpenAI(OpenAILike):
         Returns:
             AsyncAzureOpenAIClient: An instance of the asynchronous OpenAI client.
         """
-        if self.async_client:
+        if self.async_client and not self.async_client.is_closed():
             return self.async_client
         _client_params: Dict[str, Any] = self._get_client_params()
-        if self.http_client:
+        if self.http_client and isinstance(self.http_client, httpx.AsyncClient):
             _client_params["http_client"] = self.http_client
         else:
+            if self.http_client:
+                log_debug("The current http_client is not async. A default httpx.AsyncClient will be used instead.")
             # Create a new async HTTP client with custom limits
             _client_params["http_client"] = httpx.AsyncClient(
                 limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)

agno 2.2.5__py3-none-any.whl → 2.2.7__py3-none-any.whl

agno 2.2.5py3-none-any.whl → 2.2.7py3-none-any.whl