PyPI - agno - Versions diffs - 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

agno/agent/agent.py +6009 -2874
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +595 -187
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +3 -0
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +339 -266
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +1011 -566
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +110 -37
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +143 -4
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +60 -6
agno/models/openai/chat.py +102 -43
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +81 -5
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -175
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +266 -112
agno/run/base.py +53 -24
agno/run/team.py +252 -111
agno/run/workflow.py +156 -45
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1692
agno/tools/brightdata.py +3 -3
agno/tools/cartesia.py +3 -5
agno/tools/dalle.py +9 -8
agno/tools/decorator.py +4 -2
agno/tools/desi_vocal.py +2 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +20 -13
agno/tools/eleven_labs.py +26 -28
agno/tools/exa.py +21 -16
agno/tools/fal.py +4 -4
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +257 -37
agno/tools/giphy.py +2 -2
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/lumalab.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/azure_openai.py +2 -2
agno/tools/models/gemini.py +3 -3
agno/tools/models/groq.py +3 -5
agno/tools/models/nebius.py +7 -7
agno/tools/models_labs.py +25 -15
agno/tools/notion.py +204 -0
agno/tools/openai.py +4 -9
agno/tools/opencv.py +3 -3
agno/tools/parallel.py +314 -0
agno/tools/replicate.py +7 -7
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +222 -7
agno/utils/gemini.py +181 -23
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +95 -5
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/models/cohere.py +1 -1
agno/utils/models/watsonx.py +1 -1
agno/utils/openai.py +1 -1
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +183 -135
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +645 -136
agno/workflow/steps.py +65 -6
agno/workflow/types.py +71 -33
agno/workflow/workflow.py +2113 -300
agno-2.3.0.dist-info/METADATA +618 -0
agno-2.3.0.dist-info/RECORD +577 -0
agno-2.3.0.dist-info/licenses/LICENSE +201 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.0rc2.dist-info/METADATA +0 -355
agno-2.0.0rc2.dist-info/RECORD +0 -515
agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/models/ollama/chat.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from os import getenv
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Type, Union
 from pydantic import BaseModel
@@ -10,6 +11,7 @@ from agno.models.message import Message
 from agno.models.metrics import Metrics
 from agno.models.response import ModelResponse
 from agno.utils.log import log_debug, log_warning
+from agno.utils.reasoning import extract_thinking_content
 try:
     from ollama import AsyncClient as AsyncOllamaClient
@@ -43,6 +45,7 @@ class Ollama(Model):
     # Client parameters
     host: Optional[str] = None
     timeout: Optional[Any] = None
+    api_key: Optional[str] = field(default_factory=lambda: getenv("OLLAMA_API_KEY"))
     client_params: Optional[Dict[str, Any]] = None
     # Ollama clients
@@ -50,10 +53,23 @@ class Ollama(Model):
     async_client: Optional[AsyncOllamaClient] = None
     def _get_client_params(self) -> Dict[str, Any]:
+        host = self.host
+        headers = {}
+        if self.api_key:
+            if not host:
+                host = "https://ollama.com"
+            headers["authorization"] = f"Bearer {self.api_key}"
+            log_debug(f"Using Ollama cloud endpoint: {host}")
         base_params = {
-            "host": self.host,
+            "host": host,
             "timeout": self.timeout,
         }
+        if headers:
+            base_params["headers"] = headers
         # Create client_params dict with non-None values
         client_params = {k: v for k, v in base_params.items() if v is not None}
         # Add additional client params if provided
@@ -84,7 +100,8 @@ class Ollama(Model):
         if self.async_client is not None:
             return self.async_client
-        return AsyncOllamaClient(**self._get_client_params())
+        self.async_client = AsyncOllamaClient(**self._get_client_params())
+        return self.async_client
     def get_request_params(
         self,
@@ -144,12 +161,34 @@ class Ollama(Model):
             "role": message.role,
             "content": message.content,
         }
+        if message.role == "assistant" and message.tool_calls is not None:
+            # Format tool calls for assistant messages
+            formatted_tool_calls = []
+            for tool_call in message.tool_calls:
+                if "function" in tool_call:
+                    function_data = tool_call["function"]
+                    formatted_tool_call = {
+                        "id": tool_call.get("id"),
+                        "type": "function",
+                        "function": {
+                            "name": function_data["name"],
+                            "arguments": json.loads(function_data["arguments"])
+                            if isinstance(function_data["arguments"], str)
+                            else function_data["arguments"],
+                        },
+                    }
+                    formatted_tool_calls.append(formatted_tool_call)
+            if formatted_tool_calls:
+                _message["tool_calls"] = formatted_tool_calls
         if message.role == "user":
             if message.images is not None:
                 message_images = []
                 for image in message.images:
                     if image.url is not None:
-                        message_images.append(image.image_url_content)
+                        message_images.append(image.get_content_bytes())
                     if image.filepath is not None:
                         message_images.append(image.filepath)  # type: ignore
                     if image.content is not None and isinstance(image.content, bytes):
@@ -309,6 +348,16 @@ class Ollama(Model):
         if response_message.get("content") is not None:
             model_response.content = response_message.get("content")
+        # Extract thinking content between <think> tags if present
+        if model_response.content and model_response.content.find("<think>") != -1:
+            reasoning_content, clean_content = extract_thinking_content(model_response.content)
+            if reasoning_content:
+                # Store extracted thinking content separately
+                model_response.reasoning_content = reasoning_content
+                # Update main content with clean version
+                model_response.content = clean_content
         if response_message.get("tool_calls") is not None:
             if model_response.tool_calls is None:
                 model_response.tool_calls = []
@@ -380,8 +429,13 @@ class Ollama(Model):
         """
         metrics = Metrics()
-        metrics.input_tokens = response.get("prompt_eval_count", 0)
-        metrics.output_tokens = response.get("eval_count", 0)
+        # Safely handle None values from Ollama Cloud responses
+        input_tokens = response.get("prompt_eval_count")
+        output_tokens = response.get("eval_count")
+        # Default to 0 if None
+        metrics.input_tokens = input_tokens if input_tokens is not None else 0
+        metrics.output_tokens = output_tokens if output_tokens is not None else 0
         metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
         return metrics

agno/models/openai/chat.py CHANGED Viewed

@@ -2,32 +2,31 @@ from collections.abc import AsyncIterator
 from dataclasses import dataclass
 from os import getenv
 from typing import Any, Dict, Iterator, List, Literal, Optional, Type, Union
+from uuid import uuid4
 import httpx
 from pydantic import BaseModel
 from agno.exceptions import ModelProviderError
-from agno.media import AudioResponse
+from agno.media import Audio
 from agno.models.base import Model
 from agno.models.message import Message
 from agno.models.metrics import Metrics
 from agno.models.response import ModelResponse
 from agno.run.agent import RunOutput
+from agno.run.team import TeamRunOutput
+from agno.utils.http import get_default_async_client, get_default_sync_client
 from agno.utils.log import log_debug, log_error, log_warning
 from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
+from agno.utils.reasoning import extract_thinking_content
 try:
     from openai import APIConnectionError, APIStatusError, RateLimitError
     from openai import AsyncOpenAI as AsyncOpenAIClient
     from openai import OpenAI as OpenAIClient
     from openai.types import CompletionUsage
-    from openai.types.chat import ChatCompletionAudio
-    from openai.types.chat.chat_completion import ChatCompletion
-    from openai.types.chat.chat_completion_chunk import (
-        ChatCompletionChunk,
-        ChoiceDelta,
-        ChoiceDeltaToolCall,
-    )
+    from openai.types.chat import ChatCompletion, ChatCompletionAudio, ChatCompletionChunk
+    from openai.types.chat.chat_completion_chunk import ChoiceDelta, ChoiceDeltaToolCall
 except (ImportError, ModuleNotFoundError):
     raise ImportError("`openai` not installed. Please install using `pip install openai`")
@@ -67,8 +66,10 @@ class OpenAIChat(Model):
     user: Optional[str] = None
     top_p: Optional[float] = None
     service_tier: Optional[str] = None  # "auto" | "default" | "flex" | "priority", defaults to "auto" when not set
+    strict_output: bool = True  # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
     extra_headers: Optional[Any] = None
     extra_query: Optional[Any] = None
+    extra_body: Optional[Any] = None
     request_params: Optional[Dict[str, Any]] = None
     role_map: Optional[Dict[str, str]] = None
@@ -83,6 +84,10 @@ class OpenAIChat(Model):
     http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
     client_params: Optional[Dict[str, Any]] = None
+    # Cached clients to avoid recreating them on every request
+    client: Optional[OpenAIClient] = None
+    async_client: Optional[AsyncOpenAIClient] = None
     # The role to map the message role to.
     default_role_map = {
         "system": "developer",
@@ -120,48 +125,68 @@ class OpenAIChat(Model):
     def get_client(self) -> OpenAIClient:
         """
-        Returns an OpenAI client.
+        Returns an OpenAI client. Caches the client to avoid recreating it on every request.
         Returns:
             OpenAIClient: An instance of the OpenAI client.
         """
+        # Return cached client if it exists and is not closed
+        if self.client is not None and not self.client.is_closed():
+            return self.client
+        log_debug(f"Creating new sync OpenAI client for model {self.id}")
         client_params: Dict[str, Any] = self._get_client_params()
         if self.http_client:
             if isinstance(self.http_client, httpx.Client):
                 client_params["http_client"] = self.http_client
             else:
-                log_warning("http_client is not an instance of httpx.Client.")
-        return OpenAIClient(**client_params)
+                log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
+                # Use global sync client when user http_client is invalid
+                client_params["http_client"] = get_default_sync_client()
+        else:
+            # Use global sync client when no custom http_client is provided
+            client_params["http_client"] = get_default_sync_client()
+        # Create and cache the client
+        self.client = OpenAIClient(**client_params)
+        return self.client
     def get_async_client(self) -> AsyncOpenAIClient:
         """
-        Returns an asynchronous OpenAI client.
+        Returns an asynchronous OpenAI client. Caches the client to avoid recreating it on every request.
         Returns:
             AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
         """
+        # Return cached client if it exists and is not closed
+        if self.async_client is not None and not self.async_client.is_closed():
+            return self.async_client
+        log_debug(f"Creating new async OpenAI client for model {self.id}")
         client_params: Dict[str, Any] = self._get_client_params()
         if self.http_client:
             if isinstance(self.http_client, httpx.AsyncClient):
                 client_params["http_client"] = self.http_client
             else:
-                log_warning("http_client is not an instance of httpx.AsyncClient. Using default httpx.AsyncClient.")
-                # Create a new async HTTP client with custom limits
-                client_params["http_client"] = httpx.AsyncClient(
-                    limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
+                log_warning(
+                    "http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
                 )
+                # Use global async client when user http_client is invalid
+                client_params["http_client"] = get_default_async_client()
         else:
-            # Create a new async HTTP client with custom limits
-            client_params["http_client"] = httpx.AsyncClient(
-                limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
-            )
-        return AsyncOpenAIClient(**client_params)
+            # Use global async client when no custom http_client is provided
+            client_params["http_client"] = get_default_async_client()
+        # Create and cache the client
+        self.async_client = AsyncOpenAIClient(**client_params)
+        return self.async_client
     def get_request_params(
         self,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> Dict[str, Any]:
         """
         Returns keyword arguments for API requests.
@@ -190,6 +215,7 @@ class OpenAIChat(Model):
             "top_p": self.top_p,
             "extra_headers": self.extra_headers,
             "extra_query": self.extra_query,
+            "extra_body": self.extra_body,
             "metadata": self.metadata,
             "service_tier": self.service_tier,
         }
@@ -206,7 +232,7 @@ class OpenAIChat(Model):
                     "json_schema": {
                         "name": response_format.__name__,
                         "schema": schema,
-                        "strict": True,
+                        "strict": self.strict_output,
                     },
                 }
             else:
@@ -269,6 +295,7 @@ class OpenAIChat(Model):
                 "user": self.user,
                 "extra_headers": self.extra_headers,
                 "extra_query": self.extra_query,
+                "extra_body": self.extra_body,
                 "service_tier": self.service_tier,
             }
         )
@@ -346,7 +373,7 @@ class OpenAIChat(Model):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> ModelResponse:
         """
         Send a chat completion request to the OpenAI API and parse the response.
@@ -370,7 +397,9 @@ class OpenAIChat(Model):
             provider_response = self.get_client().chat.completions.create(
                 model=self.id,
                 messages=[self._format_message(m) for m in messages],  # type: ignore
-                **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
             )
             assistant_message.metrics.stop_timer()
@@ -424,7 +453,7 @@ class OpenAIChat(Model):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the OpenAI API.
@@ -447,7 +476,9 @@ class OpenAIChat(Model):
             response = await self.get_async_client().chat.completions.create(
                 model=self.id,
                 messages=[self._format_message(m) for m in messages],  # type: ignore
-                **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
             )
             assistant_message.metrics.stop_timer()
@@ -501,7 +532,7 @@ class OpenAIChat(Model):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the OpenAI API.
@@ -524,7 +555,9 @@ class OpenAIChat(Model):
                 messages=[self._format_message(m) for m in messages],  # type: ignore
                 stream=True,
                 stream_options={"include_usage": True},
-                **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
             ):
                 yield self._parse_provider_response_delta(chunk)
@@ -575,7 +608,7 @@ class OpenAIChat(Model):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the OpenAI API.
@@ -598,7 +631,9 @@ class OpenAIChat(Model):
                 messages=[self._format_message(m) for m in messages],  # type: ignore
                 stream=True,
                 stream_options={"include_usage": True},
-                **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
             )
             async for chunk in async_stream:
@@ -712,6 +747,12 @@ class OpenAIChat(Model):
         if response_message.content is not None:
             model_response.content = response_message.content
+            # Extract thinking content before any structured parsing
+            if model_response.content:
+                reasoning_content, output_content = extract_thinking_content(model_response.content)
+                if reasoning_content:
+                    model_response.reasoning_content = reasoning_content
+                    model_response.content = output_content
         # Add tool calls
         if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
             try:
@@ -729,14 +770,14 @@ class OpenAIChat(Model):
             # If the audio output modality is requested, we can extract an audio response
             try:
                 if isinstance(response_message.audio, dict):
-                    model_response.audio = AudioResponse(
+                    model_response.audio = Audio(
                         id=response_message.audio.get("id"),
                         content=response_message.audio.get("data"),
                         expires_at=response_message.audio.get("expires_at"),
                         transcript=response_message.audio.get("transcript"),
                     )
                 else:
-                    model_response.audio = AudioResponse(
+                    model_response.audio = Audio(
                         id=response_message.audio.id,
                         content=response_message.audio.data,
                         expires_at=response_message.audio.expires_at,
@@ -783,21 +824,39 @@ class OpenAIChat(Model):
                 # Add audio if present
                 if hasattr(choice_delta, "audio") and choice_delta.audio is not None:
                     try:
+                        audio_data = None
+                        audio_id = None
+                        audio_expires_at = None
+                        audio_transcript = None
                         if isinstance(choice_delta.audio, dict):
-                            model_response.audio = AudioResponse(
-                                id=choice_delta.audio.get("id"),
-                                content=choice_delta.audio.get("data"),
-                                expires_at=choice_delta.audio.get("expires_at"),
-                                transcript=choice_delta.audio.get("transcript"),
+                            audio_data = choice_delta.audio.get("data")
+                            audio_id = choice_delta.audio.get("id")
+                            audio_expires_at = choice_delta.audio.get("expires_at")
+                            audio_transcript = choice_delta.audio.get("transcript")
+                        else:
+                            audio_data = choice_delta.audio.data
+                            audio_id = choice_delta.audio.id
+                            audio_expires_at = choice_delta.audio.expires_at
+                            audio_transcript = choice_delta.audio.transcript
+                        # Only create Audio object if there's actual content
+                        if audio_data is not None:
+                            model_response.audio = Audio(
+                                id=audio_id,
+                                content=audio_data,
+                                expires_at=audio_expires_at,
+                                transcript=audio_transcript,
                                 sample_rate=24000,
                                 mime_type="pcm16",
                             )
-                        else:
-                            model_response.audio = AudioResponse(
-                                id=choice_delta.audio.id,
-                                content=choice_delta.audio.data,
-                                expires_at=choice_delta.audio.expires_at,
-                                transcript=choice_delta.audio.transcript,
+                        # If no content but there's transcript/metadata, create minimal Audio object
+                        elif audio_transcript is not None or audio_id is not None:
+                            model_response.audio = Audio(
+                                id=audio_id or str(uuid4()),
+                                content=b"",
+                                expires_at=audio_expires_at,
+                                transcript=audio_transcript,
                                 sample_rate=24000,
                                 mime_type="pcm16",
                             )

agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl