PyPI - agno - Versions diffs - 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl - Mend

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (723) hide show

agno/__init__.py +8 -0
agno/agent/__init__.py +44 -5
agno/agent/agent.py +10531 -2975
agno/api/agent.py +14 -53
agno/api/api.py +7 -46
agno/api/evals.py +22 -0
agno/api/os.py +17 -0
agno/api/routes.py +6 -25
agno/api/schemas/__init__.py +9 -0
agno/api/schemas/agent.py +6 -9
agno/api/schemas/evals.py +16 -0
agno/api/schemas/os.py +14 -0
agno/api/schemas/team.py +10 -10
agno/api/schemas/utils.py +21 -0
agno/api/schemas/workflows.py +16 -0
agno/api/settings.py +53 -0
agno/api/team.py +22 -26
agno/api/workflow.py +28 -0
agno/cloud/aws/base.py +214 -0
agno/cloud/aws/s3/__init__.py +2 -0
agno/cloud/aws/s3/api_client.py +43 -0
agno/cloud/aws/s3/bucket.py +195 -0
agno/cloud/aws/s3/object.py +57 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/__init__.py +24 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +946 -0
agno/db/dynamo/__init__.py +3 -0
agno/db/dynamo/dynamo.py +2781 -0
agno/db/dynamo/schemas.py +442 -0
agno/db/dynamo/utils.py +743 -0
agno/db/firestore/__init__.py +3 -0
agno/db/firestore/firestore.py +2379 -0
agno/db/firestore/schemas.py +181 -0
agno/db/firestore/utils.py +376 -0
agno/db/gcs_json/__init__.py +3 -0
agno/db/gcs_json/gcs_json_db.py +1791 -0
agno/db/gcs_json/utils.py +228 -0
agno/db/in_memory/__init__.py +3 -0
agno/db/in_memory/in_memory_db.py +1312 -0
agno/db/in_memory/utils.py +230 -0
agno/db/json/__init__.py +3 -0
agno/db/json/json_db.py +1777 -0
agno/db/json/utils.py +230 -0
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +635 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +17 -0
agno/db/mongo/async_mongo.py +2760 -0
agno/db/mongo/mongo.py +2597 -0
agno/db/mongo/schemas.py +119 -0
agno/db/mongo/utils.py +276 -0
agno/db/mysql/__init__.py +4 -0
agno/db/mysql/async_mysql.py +2912 -0
agno/db/mysql/mysql.py +2923 -0
agno/db/mysql/schemas.py +186 -0
agno/db/mysql/utils.py +488 -0
agno/db/postgres/__init__.py +4 -0
agno/db/postgres/async_postgres.py +2579 -0
agno/db/postgres/postgres.py +2870 -0
agno/db/postgres/schemas.py +187 -0
agno/db/postgres/utils.py +442 -0
agno/db/redis/__init__.py +3 -0
agno/db/redis/redis.py +2141 -0
agno/db/redis/schemas.py +159 -0
agno/db/redis/utils.py +346 -0
agno/db/schemas/__init__.py +4 -0
agno/db/schemas/culture.py +120 -0
agno/db/schemas/evals.py +34 -0
agno/db/schemas/knowledge.py +40 -0
agno/db/schemas/memory.py +61 -0
agno/db/singlestore/__init__.py +3 -0
agno/db/singlestore/schemas.py +179 -0
agno/db/singlestore/singlestore.py +2877 -0
agno/db/singlestore/utils.py +384 -0
agno/db/sqlite/__init__.py +4 -0
agno/db/sqlite/async_sqlite.py +2911 -0
agno/db/sqlite/schemas.py +181 -0
agno/db/sqlite/sqlite.py +2908 -0
agno/db/sqlite/utils.py +429 -0
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +334 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1908 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +118 -0
agno/eval/__init__.py +24 -0
agno/eval/accuracy.py +666 -276
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +779 -0
agno/eval/reliability.py +241 -62
agno/eval/utils.py +120 -0
agno/exceptions.py +143 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/__init__.py +3 -0
agno/integrations/discord/client.py +203 -0
agno/knowledge/__init__.py +5 -1
agno/{document → knowledge}/chunking/agentic.py +22 -14
agno/{document → knowledge}/chunking/document.py +2 -2
agno/{document → knowledge}/chunking/fixed.py +7 -6
agno/knowledge/chunking/markdown.py +151 -0
agno/{document → knowledge}/chunking/recursive.py +15 -3
agno/knowledge/chunking/row.py +39 -0
agno/knowledge/chunking/semantic.py +91 -0
agno/knowledge/chunking/strategy.py +165 -0
agno/knowledge/content.py +74 -0
agno/knowledge/document/__init__.py +5 -0
agno/{document → knowledge/document}/base.py +12 -2
agno/knowledge/embedder/__init__.py +5 -0
agno/knowledge/embedder/aws_bedrock.py +343 -0
agno/knowledge/embedder/azure_openai.py +210 -0
agno/{embedder → knowledge/embedder}/base.py +8 -0
agno/knowledge/embedder/cohere.py +323 -0
agno/knowledge/embedder/fastembed.py +62 -0
agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
agno/knowledge/embedder/google.py +258 -0
agno/knowledge/embedder/huggingface.py +94 -0
agno/knowledge/embedder/jina.py +182 -0
agno/knowledge/embedder/langdb.py +22 -0
agno/knowledge/embedder/mistral.py +206 -0
agno/knowledge/embedder/nebius.py +13 -0
agno/knowledge/embedder/ollama.py +154 -0
agno/knowledge/embedder/openai.py +195 -0
agno/knowledge/embedder/sentence_transformer.py +63 -0
agno/{embedder → knowledge/embedder}/together.py +1 -1
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +165 -0
agno/knowledge/knowledge.py +3006 -0
agno/knowledge/reader/__init__.py +7 -0
agno/knowledge/reader/arxiv_reader.py +81 -0
agno/knowledge/reader/base.py +95 -0
agno/knowledge/reader/csv_reader.py +164 -0
agno/knowledge/reader/docx_reader.py +82 -0
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/firecrawl_reader.py +201 -0
agno/knowledge/reader/json_reader.py +88 -0
agno/knowledge/reader/markdown_reader.py +137 -0
agno/knowledge/reader/pdf_reader.py +431 -0
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +313 -0
agno/knowledge/reader/s3_reader.py +89 -0
agno/knowledge/reader/tavily_reader.py +193 -0
agno/knowledge/reader/text_reader.py +127 -0
agno/knowledge/reader/web_search_reader.py +325 -0
agno/knowledge/reader/website_reader.py +455 -0
agno/knowledge/reader/wikipedia_reader.py +91 -0
agno/knowledge/reader/youtube_reader.py +78 -0
agno/knowledge/remote_content/remote_content.py +88 -0
agno/knowledge/reranker/__init__.py +3 -0
agno/{reranker → knowledge/reranker}/base.py +1 -1
agno/{reranker → knowledge/reranker}/cohere.py +2 -2
agno/knowledge/reranker/infinity.py +195 -0
agno/knowledge/reranker/sentence_transformer.py +54 -0
agno/knowledge/types.py +39 -0
agno/knowledge/utils.py +234 -0
agno/media.py +439 -95
agno/memory/__init__.py +16 -3
agno/memory/manager.py +1474 -123
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/__init__.py +5 -0
agno/models/aimlapi/aimlapi.py +62 -0
agno/models/anthropic/__init__.py +4 -0
agno/models/anthropic/claude.py +960 -496
agno/models/aws/__init__.py +15 -0
agno/models/aws/bedrock.py +686 -451
agno/models/aws/claude.py +190 -183
agno/models/azure/__init__.py +18 -1
agno/models/azure/ai_foundry.py +489 -0
agno/models/azure/openai_chat.py +89 -40
agno/models/base.py +2477 -550
agno/models/cerebras/__init__.py +12 -0
agno/models/cerebras/cerebras.py +565 -0
agno/models/cerebras/cerebras_openai.py +131 -0
agno/models/cohere/__init__.py +4 -0
agno/models/cohere/chat.py +306 -492
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +74 -0
agno/models/dashscope/__init__.py +5 -0
agno/models/dashscope/dashscope.py +90 -0
agno/models/deepinfra/__init__.py +5 -0
agno/models/deepinfra/deepinfra.py +45 -0
agno/models/deepseek/__init__.py +4 -0
agno/models/deepseek/deepseek.py +110 -9
agno/models/fireworks/__init__.py +4 -0
agno/models/fireworks/fireworks.py +19 -22
agno/models/google/__init__.py +3 -7
agno/models/google/gemini.py +1717 -662
agno/models/google/utils.py +22 -0
agno/models/groq/__init__.py +4 -0
agno/models/groq/groq.py +391 -666
agno/models/huggingface/__init__.py +4 -0
agno/models/huggingface/huggingface.py +266 -538
agno/models/ibm/__init__.py +5 -0
agno/models/ibm/watsonx.py +432 -0
agno/models/internlm/__init__.py +3 -0
agno/models/internlm/internlm.py +20 -3
agno/models/langdb/__init__.py +1 -0
agno/models/langdb/langdb.py +60 -0
agno/models/litellm/__init__.py +14 -0
agno/models/litellm/chat.py +503 -0
agno/models/litellm/litellm_openai.py +42 -0
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/lmstudio/__init__.py +5 -0
agno/models/lmstudio/lmstudio.py +25 -0
agno/models/message.py +361 -39
agno/models/meta/__init__.py +12 -0
agno/models/meta/llama.py +502 -0
agno/models/meta/llama_openai.py +79 -0
agno/models/metrics.py +120 -0
agno/models/mistral/__init__.py +4 -0
agno/models/mistral/mistral.py +293 -393
agno/models/nebius/__init__.py +3 -0
agno/models/nebius/nebius.py +53 -0
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/__init__.py +4 -0
agno/models/nvidia/nvidia.py +22 -3
agno/models/ollama/__init__.py +4 -2
agno/models/ollama/chat.py +257 -492
agno/models/openai/__init__.py +7 -0
agno/models/openai/chat.py +725 -770
agno/models/openai/like.py +16 -2
agno/models/openai/responses.py +1121 -0
agno/models/openrouter/__init__.py +4 -0
agno/models/openrouter/openrouter.py +62 -5
agno/models/perplexity/__init__.py +5 -0
agno/models/perplexity/perplexity.py +203 -0
agno/models/portkey/__init__.py +3 -0
agno/models/portkey/portkey.py +82 -0
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +69 -0
agno/models/response.py +177 -7
agno/models/sambanova/__init__.py +4 -0
agno/models/sambanova/sambanova.py +23 -4
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +42 -0
agno/models/together/__init__.py +4 -0
agno/models/together/together.py +21 -164
agno/models/utils.py +266 -0
agno/models/vercel/__init__.py +3 -0
agno/models/vercel/v0.py +43 -0
agno/models/vertexai/__init__.py +0 -1
agno/models/vertexai/claude.py +190 -0
agno/models/vllm/__init__.py +3 -0
agno/models/vllm/vllm.py +83 -0
agno/models/xai/__init__.py +2 -0
agno/models/xai/xai.py +111 -7
agno/os/__init__.py +3 -0
agno/os/app.py +1027 -0
agno/os/auth.py +244 -0
agno/os/config.py +126 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +249 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/__init__.py +3 -0
agno/os/interfaces/agui/agui.py +47 -0
agno/os/interfaces/agui/router.py +147 -0
agno/os/interfaces/agui/utils.py +574 -0
agno/os/interfaces/base.py +25 -0
agno/os/interfaces/slack/__init__.py +3 -0
agno/os/interfaces/slack/router.py +148 -0
agno/os/interfaces/slack/security.py +30 -0
agno/os/interfaces/slack/slack.py +47 -0
agno/os/interfaces/whatsapp/__init__.py +3 -0
agno/os/interfaces/whatsapp/router.py +210 -0
agno/os/interfaces/whatsapp/security.py +55 -0
agno/os/interfaces/whatsapp/whatsapp.py +36 -0
agno/os/mcp.py +293 -0
agno/os/middleware/__init__.py +9 -0
agno/os/middleware/jwt.py +797 -0
agno/os/router.py +258 -0
agno/os/routers/__init__.py +3 -0
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +599 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/__init__.py +3 -0
agno/os/routers/evals/evals.py +450 -0
agno/os/routers/evals/schemas.py +174 -0
agno/os/routers/evals/utils.py +231 -0
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/__init__.py +3 -0
agno/os/routers/knowledge/knowledge.py +1008 -0
agno/os/routers/knowledge/schemas.py +178 -0
agno/os/routers/memory/__init__.py +3 -0
agno/os/routers/memory/memory.py +661 -0
agno/os/routers/memory/schemas.py +88 -0
agno/os/routers/metrics/__init__.py +3 -0
agno/os/routers/metrics/metrics.py +190 -0
agno/os/routers/metrics/schemas.py +47 -0
agno/os/routers/session/__init__.py +3 -0
agno/os/routers/session/session.py +997 -0
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +512 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +499 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +624 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +534 -0
agno/os/scopes.py +469 -0
agno/{playground → os}/settings.py +7 -15
agno/os/utils.py +973 -0
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +67 -0
agno/reasoning/deepseek.py +63 -0
agno/reasoning/default.py +97 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +71 -0
agno/reasoning/helpers.py +24 -1
agno/reasoning/ollama.py +67 -0
agno/reasoning/openai.py +86 -0
agno/reasoning/step.py +2 -1
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +822 -0
agno/run/base.py +247 -0
agno/run/cancel.py +81 -0
agno/run/requirement.py +181 -0
agno/run/team.py +767 -0
agno/run/workflow.py +708 -0
agno/session/__init__.py +10 -0
agno/session/agent.py +260 -0
agno/session/summary.py +265 -0
agno/session/team.py +342 -0
agno/session/workflow.py +501 -0
agno/table.py +10 -0
agno/team/__init__.py +37 -0
agno/team/team.py +9536 -0
agno/tools/__init__.py +7 -0
agno/tools/agentql.py +120 -0
agno/tools/airflow.py +22 -12
agno/tools/api.py +122 -0
agno/tools/apify.py +276 -83
agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
agno/tools/aws_lambda.py +28 -7
agno/tools/aws_ses.py +66 -0
agno/tools/baidusearch.py +11 -4
agno/tools/bitbucket.py +292 -0
agno/tools/brandfetch.py +213 -0
agno/tools/bravesearch.py +106 -0
agno/tools/brightdata.py +367 -0
agno/tools/browserbase.py +209 -0
agno/tools/calcom.py +32 -23
agno/tools/calculator.py +24 -37
agno/tools/cartesia.py +187 -0
agno/tools/{clickup_tool.py → clickup.py} +17 -28
agno/tools/confluence.py +91 -26
agno/tools/crawl4ai.py +139 -43
agno/tools/csv_toolkit.py +28 -22
agno/tools/dalle.py +36 -22
agno/tools/daytona.py +475 -0
agno/tools/decorator.py +169 -14
agno/tools/desi_vocal.py +23 -11
agno/tools/discord.py +32 -29
agno/tools/docker.py +716 -0
agno/tools/duckdb.py +76 -81
agno/tools/duckduckgo.py +43 -40
agno/tools/e2b.py +703 -0
agno/tools/eleven_labs.py +65 -54
agno/tools/email.py +13 -5
agno/tools/evm.py +129 -0
agno/tools/exa.py +324 -42
agno/tools/fal.py +39 -35
agno/tools/file.py +196 -30
agno/tools/file_generation.py +356 -0
agno/tools/financial_datasets.py +288 -0
agno/tools/firecrawl.py +108 -33
agno/tools/function.py +960 -122
agno/tools/giphy.py +34 -12
agno/tools/github.py +1294 -97
agno/tools/gmail.py +922 -0
agno/tools/google_bigquery.py +117 -0
agno/tools/google_drive.py +271 -0
agno/tools/google_maps.py +253 -0
agno/tools/googlecalendar.py +607 -107
agno/tools/googlesheets.py +377 -0
agno/tools/hackernews.py +20 -12
agno/tools/jina.py +24 -14
agno/tools/jira.py +48 -19
agno/tools/knowledge.py +218 -0
agno/tools/linear.py +82 -43
agno/tools/linkup.py +58 -0
agno/tools/local_file_system.py +15 -7
agno/tools/lumalab.py +41 -26
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +193 -0
agno/tools/memory.py +419 -0
agno/tools/mlx_transcribe.py +11 -9
agno/tools/models/azure_openai.py +190 -0
agno/tools/models/gemini.py +203 -0
agno/tools/models/groq.py +158 -0
agno/tools/models/morph.py +186 -0
agno/tools/models/nebius.py +124 -0
agno/tools/models_labs.py +163 -82
agno/tools/moviepy_video.py +18 -13
agno/tools/nano_banana.py +151 -0
agno/tools/neo4j.py +134 -0
agno/tools/newspaper.py +15 -4
agno/tools/newspaper4k.py +19 -6
agno/tools/notion.py +204 -0
agno/tools/openai.py +181 -17
agno/tools/openbb.py +27 -20
agno/tools/opencv.py +321 -0
agno/tools/openweather.py +233 -0
agno/tools/oxylabs.py +385 -0
agno/tools/pandas.py +25 -15
agno/tools/parallel.py +314 -0
agno/tools/postgres.py +238 -185
agno/tools/pubmed.py +125 -13
agno/tools/python.py +48 -35
agno/tools/reasoning.py +283 -0
agno/tools/reddit.py +207 -29
agno/tools/redshift.py +406 -0
agno/tools/replicate.py +69 -26
agno/tools/resend.py +11 -6
agno/tools/scrapegraph.py +179 -19
agno/tools/searxng.py +23 -31
agno/tools/serpapi.py +15 -10
agno/tools/serper.py +255 -0
agno/tools/shell.py +23 -12
agno/tools/shopify.py +1519 -0
agno/tools/slack.py +56 -14
agno/tools/sleep.py +8 -6
agno/tools/spider.py +35 -11
agno/tools/spotify.py +919 -0
agno/tools/sql.py +34 -19
agno/tools/tavily.py +158 -8
agno/tools/telegram.py +18 -8
agno/tools/todoist.py +218 -0
agno/tools/toolkit.py +134 -9
agno/tools/trafilatura.py +388 -0
agno/tools/trello.py +25 -28
agno/tools/twilio.py +18 -9
agno/tools/user_control_flow.py +78 -0
agno/tools/valyu.py +228 -0
agno/tools/visualization.py +467 -0
agno/tools/webbrowser.py +28 -0
agno/tools/webex.py +76 -0
agno/tools/website.py +23 -19
agno/tools/webtools.py +45 -0
agno/tools/whatsapp.py +286 -0
agno/tools/wikipedia.py +28 -19
agno/tools/workflow.py +285 -0
agno/tools/{twitter.py → x.py} +142 -46
agno/tools/yfinance.py +41 -39
agno/tools/youtube.py +34 -17
agno/tools/zendesk.py +15 -5
agno/tools/zep.py +454 -0
agno/tools/zoom.py +86 -37
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +157 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +111 -0
agno/utils/agent.py +938 -0
agno/utils/audio.py +37 -1
agno/utils/certs.py +27 -0
agno/utils/code_execution.py +11 -0
agno/utils/common.py +103 -20
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +700 -0
agno/utils/functions.py +107 -37
agno/utils/gemini.py +426 -0
agno/utils/hooks.py +171 -0
agno/utils/http.py +185 -0
agno/utils/json_schema.py +159 -37
agno/utils/knowledge.py +36 -0
agno/utils/location.py +19 -0
agno/utils/log.py +221 -8
agno/utils/mcp.py +214 -0
agno/utils/media.py +335 -14
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +77 -2
agno/utils/models/ai_foundry.py +50 -0
agno/utils/models/claude.py +373 -0
agno/utils/models/cohere.py +94 -0
agno/utils/models/llama.py +85 -0
agno/utils/models/mistral.py +100 -0
agno/utils/models/openai_responses.py +140 -0
agno/utils/models/schema_utils.py +153 -0
agno/utils/models/watsonx.py +41 -0
agno/utils/openai.py +257 -0
agno/utils/pickle.py +1 -1
agno/utils/pprint.py +124 -8
agno/utils/print_response/agent.py +930 -0
agno/utils/print_response/team.py +1914 -0
agno/utils/print_response/workflow.py +1668 -0
agno/utils/prompts.py +111 -0
agno/utils/reasoning.py +108 -0
agno/utils/response.py +163 -0
agno/utils/serialize.py +32 -0
agno/utils/shell.py +4 -4
agno/utils/streamlit.py +487 -0
agno/utils/string.py +204 -51
agno/utils/team.py +139 -0
agno/utils/timer.py +9 -2
agno/utils/tokens.py +657 -0
agno/utils/tools.py +19 -1
agno/utils/whatsapp.py +305 -0
agno/utils/yaml_io.py +3 -3
agno/vectordb/__init__.py +2 -0
agno/vectordb/base.py +87 -9
agno/vectordb/cassandra/__init__.py +5 -1
agno/vectordb/cassandra/cassandra.py +383 -27
agno/vectordb/chroma/__init__.py +4 -0
agno/vectordb/chroma/chromadb.py +748 -83
agno/vectordb/clickhouse/__init__.py +7 -1
agno/vectordb/clickhouse/clickhousedb.py +554 -53
agno/vectordb/couchbase/__init__.py +3 -0
agno/vectordb/couchbase/couchbase.py +1446 -0
agno/vectordb/lancedb/__init__.py +5 -0
agno/vectordb/lancedb/lance_db.py +730 -98
agno/vectordb/langchaindb/__init__.py +5 -0
agno/vectordb/langchaindb/langchaindb.py +163 -0
agno/vectordb/lightrag/__init__.py +5 -0
agno/vectordb/lightrag/lightrag.py +388 -0
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +166 -0
agno/vectordb/milvus/__init__.py +3 -0
agno/vectordb/milvus/milvus.py +966 -78
agno/vectordb/mongodb/__init__.py +9 -1
agno/vectordb/mongodb/mongodb.py +1175 -172
agno/vectordb/pgvector/__init__.py +8 -0
agno/vectordb/pgvector/pgvector.py +599 -115
agno/vectordb/pineconedb/__init__.py +5 -1
agno/vectordb/pineconedb/pineconedb.py +406 -43
agno/vectordb/qdrant/__init__.py +4 -0
agno/vectordb/qdrant/qdrant.py +914 -61
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +682 -0
agno/vectordb/singlestore/__init__.py +8 -1
agno/vectordb/singlestore/singlestore.py +771 -0
agno/vectordb/surrealdb/__init__.py +3 -0
agno/vectordb/surrealdb/surrealdb.py +663 -0
agno/vectordb/upstashdb/__init__.py +5 -0
agno/vectordb/upstashdb/upstashdb.py +718 -0
agno/vectordb/weaviate/__init__.py +8 -0
agno/vectordb/weaviate/index.py +15 -0
agno/vectordb/weaviate/weaviate.py +1009 -0
agno/workflow/__init__.py +23 -1
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +759 -0
agno/workflow/loop.py +756 -0
agno/workflow/parallel.py +853 -0
agno/workflow/router.py +723 -0
agno/workflow/step.py +1564 -0
agno/workflow/steps.py +613 -0
agno/workflow/types.py +556 -0
agno/workflow/workflow.py +4327 -514
agno-2.3.13.dist-info/METADATA +639 -0
agno-2.3.13.dist-info/RECORD +613 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
agno-2.3.13.dist-info/licenses/LICENSE +201 -0
agno/api/playground.py +0 -91
agno/api/schemas/playground.py +0 -22
agno/api/schemas/user.py +0 -22
agno/api/schemas/workspace.py +0 -46
agno/api/user.py +0 -160
agno/api/workspace.py +0 -151
agno/cli/auth_server.py +0 -118
agno/cli/config.py +0 -275
agno/cli/console.py +0 -88
agno/cli/credentials.py +0 -23
agno/cli/entrypoint.py +0 -571
agno/cli/operator.py +0 -355
agno/cli/settings.py +0 -85
agno/cli/ws/ws_cli.py +0 -817
agno/constants.py +0 -13
agno/document/__init__.py +0 -1
agno/document/chunking/semantic.py +0 -47
agno/document/chunking/strategy.py +0 -31
agno/document/reader/__init__.py +0 -1
agno/document/reader/arxiv_reader.py +0 -41
agno/document/reader/base.py +0 -22
agno/document/reader/csv_reader.py +0 -84
agno/document/reader/docx_reader.py +0 -46
agno/document/reader/firecrawl_reader.py +0 -99
agno/document/reader/json_reader.py +0 -43
agno/document/reader/pdf_reader.py +0 -219
agno/document/reader/s3/pdf_reader.py +0 -46
agno/document/reader/s3/text_reader.py +0 -51
agno/document/reader/text_reader.py +0 -41
agno/document/reader/website_reader.py +0 -175
agno/document/reader/youtube_reader.py +0 -50
agno/embedder/__init__.py +0 -1
agno/embedder/azure_openai.py +0 -86
agno/embedder/cohere.py +0 -72
agno/embedder/fastembed.py +0 -37
agno/embedder/google.py +0 -73
agno/embedder/huggingface.py +0 -54
agno/embedder/mistral.py +0 -80
agno/embedder/ollama.py +0 -57
agno/embedder/openai.py +0 -74
agno/embedder/sentence_transformer.py +0 -38
agno/embedder/voyageai.py +0 -64
agno/eval/perf.py +0 -201
agno/file/__init__.py +0 -1
agno/file/file.py +0 -16
agno/file/local/csv.py +0 -32
agno/file/local/txt.py +0 -19
agno/infra/app.py +0 -240
agno/infra/base.py +0 -144
agno/infra/context.py +0 -20
agno/infra/db_app.py +0 -52
agno/infra/resource.py +0 -205
agno/infra/resources.py +0 -55
agno/knowledge/agent.py +0 -230
agno/knowledge/arxiv.py +0 -22
agno/knowledge/combined.py +0 -22
agno/knowledge/csv.py +0 -28
agno/knowledge/csv_url.py +0 -19
agno/knowledge/document.py +0 -20
agno/knowledge/docx.py +0 -30
agno/knowledge/json.py +0 -28
agno/knowledge/langchain.py +0 -71
agno/knowledge/llamaindex.py +0 -66
agno/knowledge/pdf.py +0 -28
agno/knowledge/pdf_url.py +0 -26
agno/knowledge/s3/base.py +0 -60
agno/knowledge/s3/pdf.py +0 -21
agno/knowledge/s3/text.py +0 -23
agno/knowledge/text.py +0 -30
agno/knowledge/website.py +0 -88
agno/knowledge/wikipedia.py +0 -31
agno/knowledge/youtube.py +0 -22
agno/memory/agent.py +0 -392
agno/memory/classifier.py +0 -104
agno/memory/db/__init__.py +0 -1
agno/memory/db/base.py +0 -42
agno/memory/db/mongodb.py +0 -189
agno/memory/db/postgres.py +0 -203
agno/memory/db/sqlite.py +0 -193
agno/memory/memory.py +0 -15
agno/memory/row.py +0 -36
agno/memory/summarizer.py +0 -192
agno/memory/summary.py +0 -19
agno/memory/workflow.py +0 -38
agno/models/google/gemini_openai.py +0 -26
agno/models/ollama/hermes.py +0 -221
agno/models/ollama/tools.py +0 -362
agno/models/vertexai/gemini.py +0 -595
agno/playground/__init__.py +0 -3
agno/playground/async_router.py +0 -421
agno/playground/deploy.py +0 -249
agno/playground/operator.py +0 -92
agno/playground/playground.py +0 -91
agno/playground/schemas.py +0 -76
agno/playground/serve.py +0 -55
agno/playground/sync_router.py +0 -405
agno/reasoning/agent.py +0 -68
agno/run/response.py +0 -112
agno/storage/agent/__init__.py +0 -0
agno/storage/agent/base.py +0 -38
agno/storage/agent/dynamodb.py +0 -350
agno/storage/agent/json.py +0 -92
agno/storage/agent/mongodb.py +0 -228
agno/storage/agent/postgres.py +0 -367
agno/storage/agent/session.py +0 -79
agno/storage/agent/singlestore.py +0 -303
agno/storage/agent/sqlite.py +0 -357
agno/storage/agent/yaml.py +0 -93
agno/storage/workflow/__init__.py +0 -0
agno/storage/workflow/base.py +0 -40
agno/storage/workflow/mongodb.py +0 -233
agno/storage/workflow/postgres.py +0 -366
agno/storage/workflow/session.py +0 -60
agno/storage/workflow/sqlite.py +0 -359
agno/tools/googlesearch.py +0 -88
agno/utils/defaults.py +0 -57
agno/utils/filesystem.py +0 -39
agno/utils/git.py +0 -52
agno/utils/json_io.py +0 -30
agno/utils/load_env.py +0 -19
agno/utils/py_io.py +0 -19
agno/utils/pyproject.py +0 -18
agno/utils/resource_filter.py +0 -31
agno/vectordb/singlestore/s2vectordb.py +0 -390
agno/vectordb/singlestore/s2vectordb2.py +0 -355
agno/workspace/__init__.py +0 -0
agno/workspace/config.py +0 -325
agno/workspace/enums.py +0 -6
agno/workspace/helpers.py +0 -48
agno/workspace/operator.py +0 -758
agno/workspace/settings.py +0 -63
agno-0.1.2.dist-info/LICENSE +0 -375
agno-0.1.2.dist-info/METADATA +0 -502
agno-0.1.2.dist-info/RECORD +0 -352
agno-0.1.2.dist-info/entry_points.txt +0 -3
/agno/{cli → db/migrations}/__init__.py +0 -0
/agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
/agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
/agno/{document/reader/s3 → integrations}/__init__.py +0 -0
/agno/{file/local → knowledge/chunking}/__init__.py +0 -0
/agno/{infra → knowledge/remote_content}/__init__.py +0 -0
/agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
/agno/{reranker → utils/models}/__init__.py +0 -0
/agno/{storage → utils/print_response}/__init__.py +0 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0

agno/models/openai/chat.py CHANGED Viewed

@@ -1,45 +1,40 @@
+from collections.abc import AsyncIterator
 from dataclasses import dataclass
 from os import getenv
-from typing import Any, Dict, Iterator, List, Optional, Union
+from typing import Any, Dict, Iterator, List, Literal, Optional, Type, Union
+from uuid import uuid4
 import httpx
 from pydantic import BaseModel
-from agno.media import AudioOutput
-from agno.models.base import Metrics, Model
+from agno.exceptions import ModelAuthenticationError, ModelProviderError
+from agno.media import Audio
+from agno.models.base import Model
 from agno.models.message import Message
-from agno.models.response import ModelResponse, ModelResponseEvent
-from agno.tools.function import FunctionCall
-from agno.utils.log import logger
-from agno.utils.tools import get_function_call_for_tool_call
+from agno.models.metrics import Metrics
+from agno.models.response import ModelResponse
+from agno.run.agent import RunOutput
+from agno.run.team import TeamRunOutput
+from agno.utils.http import get_default_async_client, get_default_sync_client
+from agno.utils.log import log_debug, log_error, log_warning
+from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
+from agno.utils.reasoning import extract_thinking_content
 try:
+    from openai import APIConnectionError, APIStatusError, RateLimitError
     from openai import AsyncOpenAI as AsyncOpenAIClient
     from openai import OpenAI as OpenAIClient
-    from openai.types.chat.chat_completion import ChatCompletion
-    from openai.types.chat.chat_completion_chunk import (
-        ChatCompletionChunk,
-        ChoiceDelta,
-        ChoiceDeltaToolCall,
-    )
-    from openai.types.chat.chat_completion_message import ChatCompletionAudio, ChatCompletionMessage
-    from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
-    from openai.types.completion_usage import CompletionUsage
-except ModuleNotFoundError:
+    from openai.types import CompletionUsage
+    from openai.types.chat import ChatCompletion, ChatCompletionAudio, ChatCompletionChunk
+    from openai.types.chat.chat_completion_chunk import ChoiceDelta, ChoiceDeltaToolCall
+except (ImportError, ModuleNotFoundError):
     raise ImportError("`openai` not installed. Please install using `pip install openai`")
-@dataclass
-class StreamData:
-    response_content: str = ""
-    response_audio: Optional[ChatCompletionAudio] = None
-    response_tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 @dataclass
 class OpenAIChat(Model):
     """
-    A class for interacting with OpenAI models.
+    A class for interacting with OpenAI models using the Chat completions API.
     For more information, see: https://platform.openai.com/docs/api-reference/chat/create
     """
@@ -47,10 +42,12 @@ class OpenAIChat(Model):
     id: str = "gpt-4o"
     name: str = "OpenAIChat"
     provider: str = "OpenAI"
-    supports_structured_outputs: bool = True
+    supports_native_structured_outputs: bool = True
     # Request parameters
     store: Optional[bool] = None
+    reasoning_effort: Optional[str] = None
+    verbosity: Optional[Literal["low", "medium", "high"]] = None
     metadata: Optional[Dict[str, Any]] = None
     frequency_penalty: Optional[float] = None
     logit_bias: Optional[Any] = None
@@ -58,18 +55,23 @@ class OpenAIChat(Model):
     top_logprobs: Optional[int] = None
     max_tokens: Optional[int] = None
     max_completion_tokens: Optional[int] = None
-    modalities: Optional[List[str]] = None
-    audio: Optional[Dict[str, Any]] = None
+    modalities: Optional[List[str]] = None  # "text" and/or "audio"
+    audio: Optional[Dict[str, Any]] = (
+        None  # E.g. {"voice": "alloy", "format": "wav"}. `format` must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`. `voice` must be one of `ash`, `ballad`, `coral`, `sage`, `verse`, `alloy`, `echo`, and `shimmer`.
+    )
     presence_penalty: Optional[float] = None
-    response_format: Optional[Any] = None
     seed: Optional[int] = None
     stop: Optional[Union[str, List[str]]] = None
     temperature: Optional[float] = None
     user: Optional[str] = None
     top_p: Optional[float] = None
+    service_tier: Optional[str] = None  # "auto" | "default" | "flex" | "priority", defaults to "auto" when not set
+    strict_output: bool = True  # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
     extra_headers: Optional[Any] = None
     extra_query: Optional[Any] = None
+    extra_body: Optional[Any] = None
     request_params: Optional[Dict[str, Any]] = None
+    role_map: Optional[Dict[str, str]] = None
     # Client parameters
     api_key: Optional[str] = None
@@ -79,126 +81,192 @@ class OpenAIChat(Model):
     max_retries: Optional[int] = None
     default_headers: Optional[Any] = None
     default_query: Optional[Any] = None
-    http_client: Optional[httpx.Client] = None
+    http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
     client_params: Optional[Dict[str, Any]] = None
-    # OpenAI clients
+    # Cached clients to avoid recreating them on every request
     client: Optional[OpenAIClient] = None
     async_client: Optional[AsyncOpenAIClient] = None
-    # Internal parameters. Not used for API requests
-    # Whether to use the structured outputs with this Model.
-    structured_outputs: bool = False
-    # Whether to override the system role.
-    override_system_role: bool = True
-    # The role to map the system message to.
-    system_message_role: str = "developer"
+    # The role to map the message role to.
+    default_role_map = {
+        "system": "developer",
+        "user": "user",
+        "assistant": "assistant",
+        "tool": "tool",
+        "model": "assistant",
+    }
     def _get_client_params(self) -> Dict[str, Any]:
-        client_params: Dict[str, Any] = {}
-        self.api_key = self.api_key or getenv("OPENAI_API_KEY")
+        # Fetch API key from env if not already set
         if not self.api_key:
-            logger.error("OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.")
+            self.api_key = getenv("OPENAI_API_KEY")
+            if not self.api_key:
+                raise ModelAuthenticationError(
+                    message="OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.",
+                    model_name=self.name,
+                )
-        client_params.update(
-            {
-                "api_key": self.api_key,
-                "organization": self.organization,
-                "base_url": self.base_url,
-                "timeout": self.timeout,
-                "max_retries": self.max_retries,
-                "default_headers": self.default_headers,
-                "default_query": self.default_query,
-            }
-        )
-        if self.client_params is not None:
+        # Define base client params
+        base_params = {
+            "api_key": self.api_key,
+            "organization": self.organization,
+            "base_url": self.base_url,
+            "timeout": self.timeout,
+            "max_retries": self.max_retries,
+            "default_headers": self.default_headers,
+            "default_query": self.default_query,
+        }
+        # Create client_params dict with non-None values
+        client_params = {k: v for k, v in base_params.items() if v is not None}
+        # Add additional client params if provided
+        if self.client_params:
             client_params.update(self.client_params)
-        # Remove None
-        client_params = {k: v for k, v in client_params.items() if v is not None}
         return client_params
     def get_client(self) -> OpenAIClient:
         """
-        Returns an OpenAI client.
+        Returns an OpenAI client. Caches the client to avoid recreating it on every request.
         Returns:
             OpenAIClient: An instance of the OpenAI client.
         """
-        if self.client:
+        # Return cached client if it exists and is not closed
+        if self.client is not None and not self.client.is_closed():
             return self.client
+        log_debug(f"Creating new sync OpenAI client for model {self.id}")
         client_params: Dict[str, Any] = self._get_client_params()
-        if self.http_client is not None:
-            client_params["http_client"] = self.http_client
-        return OpenAIClient(**client_params)
+        if self.http_client:
+            if isinstance(self.http_client, httpx.Client):
+                client_params["http_client"] = self.http_client
+            else:
+                log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
+                # Use global sync client when user http_client is invalid
+                client_params["http_client"] = get_default_sync_client()
+        else:
+            # Use global sync client when no custom http_client is provided
+            client_params["http_client"] = get_default_sync_client()
+        # Create and cache the client
+        self.client = OpenAIClient(**client_params)
+        return self.client
     def get_async_client(self) -> AsyncOpenAIClient:
         """
-        Returns an asynchronous OpenAI client.
+        Returns an asynchronous OpenAI client. Caches the client to avoid recreating it on every request.
         Returns:
             AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
         """
-        if self.async_client:
+        # Return cached client if it exists and is not closed
+        if self.async_client is not None and not self.async_client.is_closed():
             return self.async_client
+        log_debug(f"Creating new async OpenAI client for model {self.id}")
         client_params: Dict[str, Any] = self._get_client_params()
         if self.http_client:
-            client_params["http_client"] = self.http_client
+            if isinstance(self.http_client, httpx.AsyncClient):
+                client_params["http_client"] = self.http_client
+            else:
+                log_warning(
+                    "http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
+                )
+                # Use global async client when user http_client is invalid
+                client_params["http_client"] = get_default_async_client()
         else:
-            # Create a new async HTTP client with custom limits
-            client_params["http_client"] = httpx.AsyncClient(
-                limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
-            )
-        return AsyncOpenAIClient(**client_params)
+            # Use global async client when no custom http_client is provided
+            client_params["http_client"] = get_default_async_client()
+        # Create and cache the client
+        self.async_client = AsyncOpenAIClient(**client_params)
+        return self.async_client
-    @property
-    def request_kwargs(self) -> Dict[str, Any]:
+    def get_request_params(
+        self,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+    ) -> Dict[str, Any]:
         """
         Returns keyword arguments for API requests.
         Returns:
             Dict[str, Any]: A dictionary of keyword arguments for API requests.
         """
-        request_params: Dict[str, Any] = {}
-        request_params.update(
-            {
-                "store": self.store,
-                "frequency_penalty": self.frequency_penalty,
-                "logit_bias": self.logit_bias,
-                "logprobs": self.logprobs,
-                "top_logprobs": self.top_logprobs,
-                "max_tokens": self.max_tokens,
-                "max_completion_tokens": self.max_completion_tokens,
-                "modalities": self.modalities,
-                "audio": self.audio,
-                "presence_penalty": self.presence_penalty,
-                "response_format": self.response_format,
-                "seed": self.seed,
-                "stop": self.stop,
-                "temperature": self.temperature,
-                "user": self.user,
-                "top_p": self.top_p,
-                "extra_headers": self.extra_headers,
-                "extra_query": self.extra_query,
-            }
-        )
-        if self.tools is not None:
-            request_params["tools"] = self.tools
-            if self.tool_choice is None:
-                request_params["tool_choice"] = "auto"
+        # Define base request parameters
+        base_params = {
+            "store": self.store,
+            "reasoning_effort": self.reasoning_effort,
+            "verbosity": self.verbosity,
+            "frequency_penalty": self.frequency_penalty,
+            "logit_bias": self.logit_bias,
+            "logprobs": self.logprobs,
+            "top_logprobs": self.top_logprobs,
+            "max_tokens": self.max_tokens,
+            "max_completion_tokens": self.max_completion_tokens,
+            "modalities": self.modalities,
+            "audio": self.audio,
+            "presence_penalty": self.presence_penalty,
+            "seed": self.seed,
+            "stop": self.stop,
+            "temperature": self.temperature,
+            "user": self.user,
+            "top_p": self.top_p,
+            "extra_headers": self.extra_headers,
+            "extra_query": self.extra_query,
+            "extra_body": self.extra_body,
+            "metadata": self.metadata,
+            "service_tier": self.service_tier,
+        }
+        # Handle response format - always use JSON schema approach
+        if response_format is not None:
+            if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+                # Convert Pydantic to JSON schema for regular endpoint
+                from agno.utils.models.schema_utils import get_response_schema_for_provider
+                schema = get_response_schema_for_provider(response_format, "openai")
+                base_params["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": response_format.__name__,
+                        "schema": schema,
+                        "strict": self.strict_output,
+                    },
+                }
             else:
-                request_params["tool_choice"] = self.tool_choice
-        if self.request_params is not None:
+                # Handle other response format types (like {"type": "json_object"})
+                base_params["response_format"] = response_format
+        # Filter out None values
+        request_params = {k: v for k, v in base_params.items() if v is not None}
+        # Add tools
+        if tools is not None and len(tools) > 0:
+            # Remove unsupported fields for OpenAILike models
+            if self.provider in ["AIMLAPI", "Fireworks", "Nvidia"]:
+                for tool in tools:
+                    if tool.get("type") == "function":
+                        if tool["function"].get("requires_confirmation") is not None:
+                            del tool["function"]["requires_confirmation"]
+                        if tool["function"].get("external_execution") is not None:
+                            del tool["function"]["external_execution"]
+            request_params["tools"] = tools
+            if tool_choice is not None:
+                request_params["tool_choice"] = tool_choice
+        # Add additional request params if provided
+        if self.request_params:
             request_params.update(self.request_params)
-        # Remove None
-        request_params = {k: v for k, v in request_params.items() if v is not None}
+        if request_params:
+            log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
         return request_params
     def to_dict(self) -> Dict[str, Any]:
@@ -208,10 +276,12 @@ class OpenAIChat(Model):
         Returns:
             Dict[str, Any]: The dictionary representation of the model.
         """
-        _dict = super().to_dict()
-        _dict.update(
+        model_dict = super().to_dict()
+        model_dict.update(
             {
                 "store": self.store,
+                "reasoning_effort": self.reasoning_effort,
+                "verbosity": self.verbosity,
                 "frequency_penalty": self.frequency_penalty,
                 "logit_bias": self.logit_bias,
                 "logprobs": self.logprobs,
@@ -221,9 +291,6 @@ class OpenAIChat(Model):
                 "modalities": self.modalities,
                 "audio": self.audio,
                 "presence_penalty": self.presence_penalty,
-                "response_format": self.response_format
-                if isinstance(self.response_format, dict)
-                else str(self.response_format),
                 "seed": self.seed,
                 "stop": self.stop,
                 "temperature": self.temperature,
@@ -231,763 +298,651 @@ class OpenAIChat(Model):
                 "user": self.user,
                 "extra_headers": self.extra_headers,
                 "extra_query": self.extra_query,
+                "extra_body": self.extra_body,
+                "service_tier": self.service_tier,
             }
         )
-        if self.tools is not None:
-            _dict["tools"] = self.tools
-            if self.tool_choice is None:
-                _dict["tool_choice"] = "auto"
-            else:
-                _dict["tool_choice"] = self.tool_choice
-        cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
+        cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
         return cleaned_dict
-    def format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by OpenAI.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
-        if message.role == "user":
-            if message.images is not None:
-                message = self.add_images_to_message(message=message, images=message.images)
-            if message.audio is not None:
-                message = self.add_audio_to_message(message=message, audio=message.audio)
-            if message.videos is not None:
-                logger.warning("Video input is currently unsupported.")
-        return message.to_dict()
-    def invoke(self, messages: List[Message]) -> Union[ChatCompletion, ParsedChatCompletion]:
+        tool_result = message.get_content(use_compressed_content=compress_tool_results)
+        message_dict: Dict[str, Any] = {
+            "role": self.role_map[message.role] if self.role_map else self.default_role_map[message.role],
+            "content": tool_result,
+            "name": message.name,
+            "tool_call_id": message.tool_call_id,
+            "tool_calls": message.tool_calls,
+        }
+        message_dict = {k: v for k, v in message_dict.items() if v is not None}
+        # Ignore non-string message content
+        # because we assume that the images/audio are already added to the message
+        if (message.images is not None and len(message.images) > 0) or (
+            message.audio is not None and len(message.audio) > 0
+        ):
+            # Ignore non-string message content
+            # because we assume that the images/audio are already added to the message
+            if isinstance(message.content, str):
+                message_dict["content"] = [{"type": "text", "text": message.content}]
+                if message.images is not None:
+                    message_dict["content"].extend(images_to_message(images=message.images))
+                if message.audio is not None:
+                    message_dict["content"].extend(audio_to_message(audio=message.audio))
+        if message.audio_output is not None:
+            message_dict["content"] = ""
+            message_dict["audio"] = {"id": message.audio_output.id}
+        if message.videos is not None and len(message.videos) > 0:
+            log_warning("Video input is currently unsupported.")
+        # OpenAI expects the tool_calls to be None if empty, not an empty list
+        if message.tool_calls is not None and len(message.tool_calls) == 0:
+            message_dict["tool_calls"] = None
+        if message.files is not None:
+            # Ensure content is a list of parts
+            content = message_dict.get("content")
+            if isinstance(content, str):  # wrap existing text
+                text = content
+                message_dict["content"] = [{"type": "text", "text": text}]
+            elif content is None:
+                message_dict["content"] = []
+            # Insert each file part before text parts
+            for file in message.files:
+                file_part = _format_file_for_message(file)
+                if file_part:
+                    message_dict["content"].insert(0, file_part)
+        # Manually add the content field even if it is None
+        if message.content is None:
+            message_dict["content"] = ""
+        return message_dict
+    def invoke(
+        self,
+        messages: List[Message],
+        assistant_message: Message,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
+    ) -> ModelResponse:
         """
-        Send a chat completion request to the OpenAI API.
+        Send a chat completion request to the OpenAI API and parse the response.
         Args:
             messages (List[Message]): A list of messages to send to the model.
+            assistant_message (Message): The assistant message to populate.
+            response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
+            tools (Optional[List[Dict[str, Any]]]): The tools to use.
+            tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
+            compress_tool_results: Whether to compress tool results.
         Returns:
-            ChatCompletion: The chat completion response from the API.
+            ModelResponse: The chat completion response from the API.
         """
-        if self.response_format is not None and self.structured_outputs:
-            try:
-                if isinstance(self.response_format, type) and issubclass(self.response_format, BaseModel):
-                    return self.get_client().beta.chat.completions.parse(
-                        model=self.id,
-                        messages=[self.format_message(m) for m in messages],  # type: ignore
-                        **self.request_kwargs,
-                    )
-                else:
-                    raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
-            except Exception as e:
-                logger.error(f"Error from OpenAI API: {e}")
+        try:
+            if run_response and run_response.metrics:
+                run_response.metrics.set_time_to_first_token()
-        return self.get_client().chat.completions.create(
-            model=self.id,
-            messages=[self.format_message(m) for m in messages],  # type: ignore
-            **self.request_kwargs,
-        )
+            assistant_message.metrics.start_timer()
-    async def ainvoke(self, messages: List[Message]) -> Union[ChatCompletion, ParsedChatCompletion]:
-        """
-        Sends an asynchronous chat completion request to the OpenAI API.
+            provider_response = self.get_client().chat.completions.create(
+                model=self.id,
+                messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
+            )
+            assistant_message.metrics.stop_timer()
-        Args:
-            messages (List[Message]): A list of messages to send to the model.
+            # Parse the response into an Agno ModelResponse object
+            model_response = self._parse_provider_response(provider_response, response_format=response_format)
-        Returns:
-            ChatCompletion: The chat completion response from the API.
-        """
-        if self.response_format is not None and self.structured_outputs:
-            try:
-                if isinstance(self.response_format, type) and issubclass(self.response_format, BaseModel):
-                    return await self.get_async_client().beta.chat.completions.parse(
-                        model=self.id,
-                        messages=[self.format_message(m) for m in messages],  # type: ignore
-                        **self.request_kwargs,
-                    )
-                else:
-                    raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
-            except Exception as e:
-                logger.error(f"Error from OpenAI API: {e}")
+            return model_response
-        return await self.get_async_client().chat.completions.create(
-            model=self.id,
-            messages=[self.format_message(m) for m in messages],  # type: ignore
-            **self.request_kwargs,
-        )
+        except RateLimitError as e:
+            log_error(f"Rate limit error from OpenAI API: {e}")
+            error_message = e.response.json().get("error", {})
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except APIConnectionError as e:
+            log_error(f"API connection error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
+        except APIStatusError as e:
+            log_error(f"API status error from OpenAI API: {e}")
+            try:
+                error_message = e.response.json().get("error", {})
+            except Exception:
+                error_message = e.response.text
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except ModelAuthenticationError as e:
+            log_error(f"Model authentication error from OpenAI API: {e}")
+            raise e
+        except Exception as e:
+            log_error(f"Error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
-    def invoke_stream(self, messages: List[Message]) -> Iterator[ChatCompletionChunk]:
+    async def ainvoke(
+        self,
+        messages: List[Message],
+        assistant_message: Message,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
+    ) -> ModelResponse:
         """
-        Send a streaming chat completion request to the OpenAI API.
+        Sends an asynchronous chat completion request to the OpenAI API.
         Args:
             messages (List[Message]): A list of messages to send to the model.
+            assistant_message (Message): The assistant message to populate.
+            response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
+            tools (Optional[List[Dict[str, Any]]]): The tools to use.
+            tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
+            compress_tool_results: Whether to compress tool results.
         Returns:
-            Iterator[ChatCompletionChunk]: An iterator of chat completion chunks.
-        """
-        yield from self.get_client().chat.completions.create(
-            model=self.id,
-            messages=[self.format_message(m) for m in messages],  # type: ignore
-            stream=True,
-            stream_options={"include_usage": True},
-            **self.request_kwargs,
-        )  # type: ignore
-    async def ainvoke_stream(self, messages: List[Message]) -> Any:
+            ModelResponse: The chat completion response from the API.
         """
-        Sends an asynchronous streaming chat completion request to the OpenAI API.
+        try:
+            if run_response and run_response.metrics:
+                run_response.metrics.set_time_to_first_token()
+            assistant_message.metrics.start_timer()
+            response = await self.get_async_client().chat.completions.create(
+                model=self.id,
+                messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
+            )
+            assistant_message.metrics.stop_timer()
-        Args:
-            messages (List[Message]): A list of messages to send to the model.
+            # Parse the response into an Agno ModelResponse object
+            provider_response: ModelResponse = self._parse_provider_response(response, response_format=response_format)
-        Returns:
-            Any: An asynchronous iterator of chat completion chunks.
-        """
-        async_stream = await self.get_async_client().chat.completions.create(
-            model=self.id,
-            messages=[self.format_message(m) for m in messages],  # type: ignore
-            stream=True,
-            stream_options={"include_usage": True},
-            **self.request_kwargs,
-        )
-        async for chunk in async_stream:  # type: ignore
-            yield chunk
+            return provider_response
+        except RateLimitError as e:
+            log_error(f"Rate limit error from OpenAI API: {e}")
+            error_message = e.response.json().get("error", {})
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except APIConnectionError as e:
+            log_error(f"API connection error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
+        except APIStatusError as e:
+            log_error(f"API status error from OpenAI API: {e}")
+            try:
+                error_message = e.response.json().get("error", {})
+            except Exception:
+                error_message = e.response.text
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except ModelAuthenticationError as e:
+            log_error(f"Model authentication error from OpenAI API: {e}")
+            raise e
+        except Exception as e:
+            log_error(f"Error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
-    def handle_tool_calls(
+    def invoke_stream(
         self,
-        assistant_message: Message,
         messages: List[Message],
-        model_response: ModelResponse,
-        tool_role: str = "tool",
-    ) -> Optional[ModelResponse]:
+        assistant_message: Message,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
+    ) -> Iterator[ModelResponse]:
         """
-        Handle tool calls in the assistant message.
+        Send a streaming chat completion request to the OpenAI API.
         Args:
-            assistant_message (Message): The assistant message.
-            messages (List[Message]): The list of messages.
-            model_response (ModelResponse): The model response.
-            tool_role (str): The role of the tool call. Defaults to "tool".
+            messages (List[Message]): A list of messages to send to the model.
+            compress_tool_results: Whether to compress tool results.
         Returns:
-            Optional[ModelResponse]: The model response after handling tool calls.
+            Iterator[ModelResponse]: An iterator of model responses.
         """
-        if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
-            if model_response.content is None:
-                model_response.content = ""
-            if model_response.tool_calls is None:
-                model_response.tool_calls = []
-            function_call_results: List[Message] = []
-            function_calls_to_run: List[FunctionCall] = []
-            for tool_call in assistant_message.tool_calls:
-                _tool_call_id = tool_call.get("id")
-                _function_call = get_function_call_for_tool_call(tool_call, self._functions)
-                if _function_call is None:
-                    messages.append(
-                        Message(
-                            role="tool",
-                            tool_call_id=_tool_call_id,
-                            content="Could not find function to call.",
-                        )
-                    )
-                    continue
-                if _function_call.error is not None:
-                    messages.append(
-                        Message(
-                            role="tool",
-                            tool_call_id=_tool_call_id,
-                            content=_function_call.error,
-                        )
-                    )
-                    continue
-                function_calls_to_run.append(_function_call)
-            if self.show_tool_calls:
-                model_response.content += "\nRunning:"
-                for _f in function_calls_to_run:
-                    model_response.content += f"\n - {_f.get_call_str()}"
-                model_response.content += "\n\n"
-            for function_call_response in self.run_function_calls(
-                function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
+        try:
+            if run_response and run_response.metrics:
+                run_response.metrics.set_time_to_first_token()
+            assistant_message.metrics.start_timer()
+            for chunk in self.get_client().chat.completions.create(
+                model=self.id,
+                messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
+                stream=True,
+                stream_options={"include_usage": True},
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
             ):
-                if (
-                    function_call_response.event == ModelResponseEvent.tool_call_completed.value
-                    and function_call_response.tool_calls is not None
-                ):
-                    model_response.tool_calls.extend(function_call_response.tool_calls)
-            if len(function_call_results) > 0:
-                messages.extend(function_call_results)
+                yield self._parse_provider_response_delta(chunk)
-            return model_response
-        return None
-    def update_usage_metrics(
-        self, assistant_message: Message, metrics: Metrics, response_usage: Optional[CompletionUsage]
-    ) -> None:
-        """
-        Update the usage metrics for the assistant message and the model.
-        Args:
-            assistant_message (Message): The assistant message.
-            metrics (Metrics): The metrics.
-            response_usage (Optional[CompletionUsage]): The response usage.
-        """
-        # Update time taken to generate response
-        assistant_message.metrics["time"] = metrics.response_timer.elapsed
-        self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
-        if response_usage:
-            prompt_tokens = response_usage.prompt_tokens
-            completion_tokens = response_usage.completion_tokens
-            total_tokens = response_usage.total_tokens
-            if prompt_tokens is not None:
-                metrics.input_tokens = prompt_tokens
-                metrics.prompt_tokens = prompt_tokens
-                assistant_message.metrics["input_tokens"] = prompt_tokens
-                assistant_message.metrics["prompt_tokens"] = prompt_tokens
-                self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
-                self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
-            if completion_tokens is not None:
-                metrics.output_tokens = completion_tokens
-                metrics.completion_tokens = completion_tokens
-                assistant_message.metrics["output_tokens"] = completion_tokens
-                assistant_message.metrics["completion_tokens"] = completion_tokens
-                self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
-                self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
-            if total_tokens is not None:
-                metrics.total_tokens = total_tokens
-                assistant_message.metrics["total_tokens"] = total_tokens
-                self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
-            if response_usage.prompt_tokens_details is not None:
-                if isinstance(response_usage.prompt_tokens_details, dict):
-                    metrics.prompt_tokens_details = response_usage.prompt_tokens_details
-                elif isinstance(response_usage.prompt_tokens_details, BaseModel):
-                    metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
-                assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
-                if metrics.prompt_tokens_details is not None:
-                    for k, v in metrics.prompt_tokens_details.items():
-                        self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
-            if response_usage.completion_tokens_details is not None:
-                if isinstance(response_usage.completion_tokens_details, dict):
-                    metrics.completion_tokens_details = response_usage.completion_tokens_details
-                elif isinstance(response_usage.completion_tokens_details, BaseModel):
-                    metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
-                        exclude_none=True
-                    )
-                assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
-                if metrics.completion_tokens_details is not None:
-                    for k, v in metrics.completion_tokens_details.items():
-                        self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
+            assistant_message.metrics.stop_timer()
-    def create_assistant_message(
-        self,
-        response_message: ChatCompletionMessage,
-        metrics: Metrics,
-        response_usage: Optional[CompletionUsage],
-    ) -> Message:
-        """
-        Create an assistant message from the response.
-        Args:
-            response_message (ChatCompletionMessage): The response message.
-            metrics (Metrics): The metrics.
-            response_usage (Optional[CompletionUsage]): The response usage.
-        Returns:
-            Message: The assistant message.
-        """
-        assistant_message = Message(
-            role=response_message.role or "assistant",
-            content=response_message.content,
-        )
-        if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
-            try:
-                assistant_message.tool_calls = [t.model_dump() for t in response_message.tool_calls]
-            except Exception as e:
-                logger.warning(f"Error processing tool calls: {e}")
-        if hasattr(response_message, "audio") and response_message.audio is not None:
+        except RateLimitError as e:
+            log_error(f"Rate limit error from OpenAI API: {e}")
+            error_message = e.response.json().get("error", {})
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except APIConnectionError as e:
+            log_error(f"API connection error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
+        except APIStatusError as e:
+            log_error(f"API status error from OpenAI API: {e}")
             try:
-                assistant_message.audio_output = AudioOutput(
-                    id=response_message.audio.id,
-                    content=response_message.audio.data,
-                    expires_at=response_message.audio.expires_at,
-                    transcript=response_message.audio.transcript,
-                )
-            except Exception as e:
-                logger.warning(f"Error processing audio: {e}")
-        # Update metrics
-        self.update_usage_metrics(assistant_message, metrics, response_usage)
-        return assistant_message
+                error_message = e.response.json().get("error", {})
+            except Exception:
+                error_message = e.response.text
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except ModelAuthenticationError as e:
+            log_error(f"Model authentication error from OpenAI API: {e}")
+            raise e
+        except Exception as e:
+            log_error(f"Error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
-    def response(self, messages: List[Message]) -> ModelResponse:
+    async def ainvoke_stream(
+        self,
+        messages: List[Message],
+        assistant_message: Message,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
+    ) -> AsyncIterator[ModelResponse]:
         """
-        Generate a response from OpenAI.
+        Sends an asynchronous streaming chat completion request to the OpenAI API.
         Args:
-            messages (List[Message]): A list of messages.
+            messages (List[Message]): A list of messages to send to the model.
+            compress_tool_results: Whether to compress tool results.
         Returns:
-            ModelResponse: The model response.
+            Any: An asynchronous iterator of model responses.
         """
-        logger.debug("---------- OpenAIChat Response Start ----------")
-        self._log_messages(messages)
-        model_response = ModelResponse()
-        metrics = Metrics()
-        # -*- Generate response
-        metrics.start_response_timer()
-        response: Union[ChatCompletion, ParsedChatCompletion] = self.invoke(messages=messages)
-        metrics.stop_response_timer()
-        # -*- Parse response
-        response_message: ChatCompletionMessage = response.choices[0].message
-        response_usage: Optional[CompletionUsage] = response.usage
-        response_audio: Optional[ChatCompletionAudio] = response_message.audio
-        # -*- Parse transcript if available
-        if response_audio:
-            if response_audio.transcript and not response_message.content:
-                response_message.content = response_audio.transcript
-        # -*- Parse structured outputs
         try:
-            if (
-                self.response_format is not None
-                and self.structured_outputs
-                and issubclass(self.response_format, BaseModel)
-            ):
-                parsed_object = response_message.parsed  # type: ignore
-                if parsed_object is not None:
-                    model_response.parsed = parsed_object
-        except Exception as e:
-            logger.warning(f"Error retrieving structured outputs: {e}")
+            if run_response and run_response.metrics:
+                run_response.metrics.set_time_to_first_token()
+            assistant_message.metrics.start_timer()
+            async_stream = await self.get_async_client().chat.completions.create(
+                model=self.id,
+                messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
+                stream=True,
+                stream_options={"include_usage": True},
+                **self.get_request_params(
+                    response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
+                ),
+            )
-        # -*- Create assistant message
-        assistant_message = self.create_assistant_message(
-            response_message=response_message, metrics=metrics, response_usage=response_usage
-        )
+            async for chunk in async_stream:
+                yield self._parse_provider_response_delta(chunk)
+            assistant_message.metrics.stop_timer()
-        # -*- Add assistant message to messages
-        messages.append(assistant_message)
-        # -*- Log response and metrics
-        assistant_message.log()
-        metrics.log()
-        # -*- Update model response with assistant message content and audio
-        if assistant_message.content is not None:
-            # add the content to the model response
-            model_response.content = assistant_message.get_content_string()
-        if assistant_message.audio_output is not None:
-            # add the audio to the model response
-            model_response.audio = assistant_message.audio_output
-        # -*- Handle tool calls
-        tool_role = "tool"
-        if (
-            self.handle_tool_calls(
-                assistant_message=assistant_message,
-                messages=messages,
-                model_response=model_response,
-                tool_role=tool_role,
+        except RateLimitError as e:
+            log_error(f"Rate limit error from OpenAI API: {e}")
+            error_message = e.response.json().get("error", {})
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
             )
-            is not None
-        ):
-            return self.handle_post_tool_call_messages(messages=messages, model_response=model_response)
-        logger.debug("---------- OpenAIChat Response End ----------")
-        return model_response
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except APIConnectionError as e:
+            log_error(f"API connection error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
+        except APIStatusError as e:
+            log_error(f"API status error from OpenAI API: {e}")
+            try:
+                error_message = e.response.json().get("error", {})
+            except Exception:
+                error_message = e.response.text
+            error_message = (
+                error_message.get("message", "Unknown model error")
+                if isinstance(error_message, dict)
+                else error_message
+            )
+            raise ModelProviderError(
+                message=error_message,
+                status_code=e.response.status_code,
+                model_name=self.name,
+                model_id=self.id,
+            ) from e
+        except ModelAuthenticationError as e:
+            log_error(f"Model authentication error from OpenAI API: {e}")
+            raise e
+        except Exception as e:
+            log_error(f"Error from OpenAI API: {e}")
+            raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
-    async def aresponse(self, messages: List[Message]) -> ModelResponse:
+    @staticmethod
+    def parse_tool_calls(tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
         """
-        Generate an asynchronous response from OpenAI.
+        Build tool calls from streamed tool call data.
         Args:
-            messages (List[Message]): A list of messages.
+            tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
         Returns:
-            ModelResponse: The model response from the API.
+            List[Dict[str, Any]]: The built tool calls.
+        """
+        tool_calls: List[Dict[str, Any]] = []
+        for _tool_call in tool_calls_data:
+            _index = _tool_call.index or 0
+            _tool_call_id = _tool_call.id
+            _tool_call_type = _tool_call.type
+            _function_name = _tool_call.function.name if _tool_call.function else None
+            _function_arguments = _tool_call.function.arguments if _tool_call.function else None
+            if len(tool_calls) <= _index:
+                tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
+            tool_call_entry = tool_calls[_index]
+            if not tool_call_entry:
+                tool_call_entry["id"] = _tool_call_id
+                tool_call_entry["type"] = _tool_call_type
+                tool_call_entry["function"] = {
+                    "name": _function_name or "",
+                    "arguments": _function_arguments or "",
+                }
+            else:
+                if _function_name:
+                    tool_call_entry["function"]["name"] += _function_name
+                if _function_arguments:
+                    tool_call_entry["function"]["arguments"] += _function_arguments
+                if _tool_call_id:
+                    tool_call_entry["id"] = _tool_call_id
+                if _tool_call_type:
+                    tool_call_entry["type"] = _tool_call_type
+        return tool_calls
+    def _parse_provider_response(
+        self,
+        response: ChatCompletion,
+        response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+    ) -> ModelResponse:
+        """
+        Parse the OpenAI response into a ModelResponse.
         """
-        logger.debug("---------- OpenAIChat Async Response Start ----------")
-        self._log_messages(messages)
         model_response = ModelResponse()
-        metrics = Metrics()
-        # -*- Generate response
-        metrics.start_response_timer()
-        response: Union[ChatCompletion, ParsedChatCompletion] = await self.ainvoke(messages=messages)
-        metrics.stop_response_timer()
-        # -*- Parse response
-        response_message: ChatCompletionMessage = response.choices[0].message
-        response_usage: Optional[CompletionUsage] = response.usage
-        response_audio: Optional[ChatCompletionAudio] = response_message.audio
-        # -*- Parse transcript if available
-        if response_audio:
-            if response_audio.transcript and not response_message.content:
-                response_message.content = response_audio.transcript
-        # -*- Parse structured outputs
-        try:
-            if (
-                self.response_format is not None
-                and self.structured_outputs
-                and issubclass(self.response_format, BaseModel)
-            ):
-                parsed_object = response_message.parsed  # type: ignore
-                if parsed_object is not None:
-                    model_response.parsed = parsed_object
-        except Exception as e:
-            logger.warning(f"Error retrieving structured outputs: {e}")
-        # -*- Create assistant message
-        assistant_message = self.create_assistant_message(
-            response_message=response_message, metrics=metrics, response_usage=response_usage
-        )
-        # -*- Add assistant message to messages
-        messages.append(assistant_message)
-        # -*- Log response and metrics
-        assistant_message.log()
-        metrics.log()
-        # -*- Update model response with assistant message content and audio
-        if assistant_message.content is not None:
-            # add the content to the model response
-            model_response.content = assistant_message.get_content_string()
-        if assistant_message.audio_output is not None:
-            # add the audio to the model response
-            model_response.audio = assistant_message.audio_output
-        # -*- Handle tool calls
-        tool_role = "tool"
-        if (
-            self.handle_tool_calls(
-                assistant_message=assistant_message,
-                messages=messages,
-                model_response=model_response,
-                tool_role=tool_role,
+        if hasattr(response, "error") and response.error:  # type: ignore
+            raise ModelProviderError(
+                message=response.error.get("message", "Unknown model error"),  # type: ignore
+                model_name=self.name,
+                model_id=self.id,
             )
-            is not None
-        ):
-            return await self.ahandle_post_tool_call_messages(messages=messages, model_response=model_response)
-        logger.debug("---------- OpenAIChat Async Response End ----------")
-        return model_response
-    def update_stream_metrics(self, assistant_message: Message, metrics: Metrics):
-        """
-        Update the usage metrics for the assistant message and the model.
-        Args:
-            assistant_message (Message): The assistant message.
-            metrics (Metrics): The metrics.
-        """
-        # Update time taken to generate response
-        assistant_message.metrics["time"] = metrics.response_timer.elapsed
-        self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
-        if metrics.time_to_first_token is not None:
-            assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
-            self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
-        if metrics.input_tokens is not None:
-            assistant_message.metrics["input_tokens"] = metrics.input_tokens
-            self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
-        if metrics.output_tokens is not None:
-            assistant_message.metrics["output_tokens"] = metrics.output_tokens
-            self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
-        if metrics.prompt_tokens is not None:
-            assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
-            self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
-        if metrics.completion_tokens is not None:
-            assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
-            self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
-        if metrics.total_tokens is not None:
-            assistant_message.metrics["total_tokens"] = metrics.total_tokens
-            self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
-        if metrics.prompt_tokens_details is not None:
-            assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
-            for k, v in metrics.prompt_tokens_details.items():
-                self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
-        if metrics.completion_tokens_details is not None:
-            assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
-            for k, v in metrics.completion_tokens_details.items():
-                self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
-    def add_response_usage_to_metrics(self, metrics: Metrics, response_usage: CompletionUsage):
-        metrics.input_tokens = response_usage.prompt_tokens
-        metrics.prompt_tokens = response_usage.prompt_tokens
-        metrics.output_tokens = response_usage.completion_tokens
-        metrics.completion_tokens = response_usage.completion_tokens
-        metrics.total_tokens = response_usage.total_tokens
-        if response_usage.prompt_tokens_details is not None:
-            if isinstance(response_usage.prompt_tokens_details, dict):
-                metrics.prompt_tokens_details = response_usage.prompt_tokens_details
-            elif isinstance(response_usage.prompt_tokens_details, BaseModel):
-                metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
-        if response_usage.completion_tokens_details is not None:
-            if isinstance(response_usage.completion_tokens_details, dict):
-                metrics.completion_tokens_details = response_usage.completion_tokens_details
-            elif isinstance(response_usage.completion_tokens_details, BaseModel):
-                metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
-                    exclude_none=True
-                )
-    def handle_stream_tool_calls(
-        self,
-        assistant_message: Message,
-        messages: List[Message],
-        tool_role: str = "tool",
-    ) -> Iterator[ModelResponse]:
-        """
-        Handle tool calls for response stream.
+        # Get response message
+        response_message = response.choices[0].message
+        # Add role
+        if response_message.role is not None:
+            model_response.role = response_message.role
+        # Add content
+        if response_message.content is not None:
+            model_response.content = response_message.content
+            # Extract thinking content before any structured parsing
+            if model_response.content:
+                reasoning_content, output_content = extract_thinking_content(model_response.content)
+                if reasoning_content:
+                    model_response.reasoning_content = reasoning_content
+                    model_response.content = output_content
+        # Add tool calls
+        if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
+            try:
+                model_response.tool_calls = [t.model_dump() for t in response_message.tool_calls]
+            except Exception as e:
+                log_warning(f"Error processing tool calls: {e}")
-        Args:
-            assistant_message (Message): The assistant message.
-            messages (List[Message]): The list of messages.
-            tool_role (str): The role of the tool call. Defaults to "tool".
+        # Add audio transcript to content if available
+        response_audio: Optional[ChatCompletionAudio] = response_message.audio
+        if response_audio and response_audio.transcript and not model_response.content:
+            model_response.content = response_audio.transcript
-        Returns:
-            Iterator[ModelResponse]: An iterator of the model response.
-        """
-        if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
-            function_calls_to_run: List[FunctionCall] = []
-            function_call_results: List[Message] = []
-            for tool_call in assistant_message.tool_calls:
-                _tool_call_id = tool_call.get("id")
-                _function_call = get_function_call_for_tool_call(tool_call, self._functions)
-                if _function_call is None:
-                    messages.append(
-                        Message(
-                            role=tool_role,
-                            tool_call_id=_tool_call_id,
-                            content="Could not find function to call.",
-                        )
+        # Add audio if present
+        if hasattr(response_message, "audio") and response_message.audio is not None:
+            # If the audio output modality is requested, we can extract an audio response
+            try:
+                if isinstance(response_message.audio, dict):
+                    model_response.audio = Audio(
+                        id=response_message.audio.get("id"),
+                        content=response_message.audio.get("data"),
+                        expires_at=response_message.audio.get("expires_at"),
+                        transcript=response_message.audio.get("transcript"),
                     )
-                    continue
-                if _function_call.error is not None:
-                    messages.append(
-                        Message(
-                            role=tool_role,
-                            tool_call_id=_tool_call_id,
-                            content=_function_call.error,
-                        )
+                else:
+                    model_response.audio = Audio(
+                        id=response_message.audio.id,
+                        content=response_message.audio.data,
+                        expires_at=response_message.audio.expires_at,
+                        transcript=response_message.audio.transcript,
                     )
-                    continue
-                function_calls_to_run.append(_function_call)
+            except Exception as e:
+                log_warning(f"Error processing audio: {e}")
-            if self.show_tool_calls:
-                yield ModelResponse(content="\nRunning:")
-                for _f in function_calls_to_run:
-                    yield ModelResponse(content=f"\n - {_f.get_call_str()}")
-                yield ModelResponse(content="\n\n")
+        if hasattr(response_message, "reasoning_content") and response_message.reasoning_content is not None:  # type: ignore
+            model_response.reasoning_content = response_message.reasoning_content  # type: ignore
+        elif hasattr(response_message, "reasoning") and response_message.reasoning is not None:  # type: ignore
+            model_response.reasoning_content = response_message.reasoning  # type: ignore
-            for function_call_response in self.run_function_calls(
-                function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
-            ):
-                yield function_call_response
+        if response.usage is not None:
+            model_response.response_usage = self._get_metrics(response.usage)
+        if model_response.provider_data is None:
+            model_response.provider_data = {}
-            if len(function_call_results) > 0:
-                messages.extend(function_call_results)
+        if response.id:
+            model_response.provider_data["id"] = response.id
+        if response.system_fingerprint:
+            model_response.provider_data["system_fingerprint"] = response.system_fingerprint
+        if response.model_extra:
+            model_response.provider_data["model_extra"] = response.model_extra
-    def response_stream(self, messages: List[Message]) -> Iterator[ModelResponse]:
+        return model_response
+    def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
         """
-        Generate a streaming response from OpenAI.
+        Parse the OpenAI streaming response into a ModelResponse.
         Args:
-            messages (List[Message]): A list of messages.
+            response_delta: Raw response chunk from OpenAI
         Returns:
-            Iterator[ModelResponse]: An iterator of model responses.
+            ModelResponse: Parsed response data
         """
-        logger.debug("---------- OpenAIChat Response Start ----------")
-        self._log_messages(messages)
-        stream_data: StreamData = StreamData()
-        metrics: Metrics = Metrics()
-        # -*- Generate response
-        metrics.start_response_timer()
-        for response in self.invoke_stream(messages=messages):
-            if len(response.choices) > 0:
-                metrics.completion_tokens += 1
-                if metrics.completion_tokens == 1:
-                    metrics.time_to_first_token = metrics.response_timer.elapsed
-                response_delta: ChoiceDelta = response.choices[0].delta
-                if response_delta.content is not None:
-                    stream_data.response_content += response_delta.content
-                    yield ModelResponse(content=response_delta.content)
-                if hasattr(response_delta, "audio"):
-                    response_audio = response_delta.audio
-                    stream_data.response_audio = response_audio
-                    if stream_data.response_audio:
-                        yield ModelResponse(
-                            audio=AudioOutput(
-                                id=stream_data.response_audio.id,
-                                content=stream_data.response_audio.data,
-                                expires_at=stream_data.response_audio.expires_at,
-                                transcript=stream_data.response_audio.transcript,
-                            )
-                        )
-                if response_delta.tool_calls is not None:
-                    if stream_data.response_tool_calls is None:
-                        stream_data.response_tool_calls = []
-                    stream_data.response_tool_calls.extend(response_delta.tool_calls)
-            if response.usage is not None:
-                self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
-        metrics.stop_response_timer()
-        # -*- Create assistant message
-        assistant_message = Message(role="assistant")
-        if stream_data.response_content != "":
-            assistant_message.content = stream_data.response_content
-        if stream_data.response_audio is not None:
-            assistant_message.audio_output = AudioOutput(
-                id=stream_data.response_audio.id,
-                content=stream_data.response_audio.data,
-                expires_at=stream_data.response_audio.expires_at,
-                transcript=stream_data.response_audio.transcript,
-            )
-        if stream_data.response_tool_calls is not None:
-            _tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
-            if len(_tool_calls) > 0:
-                assistant_message.tool_calls = _tool_calls
-        # -*- Update usage metrics
-        self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
+        model_response = ModelResponse()
-        # -*- Add assistant message to messages
-        messages.append(assistant_message)
+        if response_delta.choices and len(response_delta.choices) > 0:
+            choice_delta: ChoiceDelta = response_delta.choices[0].delta
+            if choice_delta:
+                # Add content
+                if choice_delta.content is not None:
+                    model_response.content = choice_delta.content
+                    # We only want to handle these if content is present
+                    if model_response.provider_data is None:
+                        model_response.provider_data = {}
+                    if response_delta.id:
+                        model_response.provider_data["id"] = response_delta.id
+                    if response_delta.system_fingerprint:
+                        model_response.provider_data["system_fingerprint"] = response_delta.system_fingerprint
+                    if response_delta.model_extra:
+                        model_response.provider_data["model_extra"] = response_delta.model_extra
+                # Add tool calls
+                if choice_delta.tool_calls is not None:
+                    model_response.tool_calls = choice_delta.tool_calls  # type: ignore
+                if hasattr(choice_delta, "reasoning_content") and choice_delta.reasoning_content is not None:
+                    model_response.reasoning_content = choice_delta.reasoning_content
+                elif hasattr(choice_delta, "reasoning") and choice_delta.reasoning is not None:
+                    model_response.reasoning_content = choice_delta.reasoning
+                # Add audio if present
+                if hasattr(choice_delta, "audio") and choice_delta.audio is not None:
+                    try:
+                        audio_data = None
+                        audio_id = None
+                        audio_expires_at = None
+                        audio_transcript = None
+                        if isinstance(choice_delta.audio, dict):
+                            audio_data = choice_delta.audio.get("data")
+                            audio_id = choice_delta.audio.get("id")
+                            audio_expires_at = choice_delta.audio.get("expires_at")
+                            audio_transcript = choice_delta.audio.get("transcript")
+                        else:
+                            audio_data = choice_delta.audio.data
+                            audio_id = choice_delta.audio.id
+                            audio_expires_at = choice_delta.audio.expires_at
+                            audio_transcript = choice_delta.audio.transcript
+                        # Only create Audio object if there's actual content
+                        if audio_data is not None:
+                            model_response.audio = Audio(
+                                id=audio_id,
+                                content=audio_data,
+                                expires_at=audio_expires_at,
+                                transcript=audio_transcript,
+                                sample_rate=24000,
+                                mime_type="pcm16",
+                            )
+                        # If no content but there's transcript/metadata, create minimal Audio object
+                        elif audio_transcript is not None or audio_id is not None:
+                            model_response.audio = Audio(
+                                id=audio_id or str(uuid4()),
+                                content=b"",
+                                expires_at=audio_expires_at,
+                                transcript=audio_transcript,
+                                sample_rate=24000,
+                                mime_type="pcm16",
+                            )
+                    except Exception as e:
+                        log_warning(f"Error processing audio: {e}")
-        # -*- Log response and metrics
-        assistant_message.log()
-        metrics.log()
+        # Add usage metrics if present
+        if response_delta.usage is not None:
+            model_response.response_usage = self._get_metrics(response_delta.usage)
-        # -*- Handle tool calls
-        if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
-            tool_role = "tool"
-            yield from self.handle_stream_tool_calls(
-                assistant_message=assistant_message, messages=messages, tool_role=tool_role
-            )
-            yield from self.handle_post_tool_call_messages_stream(messages=messages)
-        logger.debug("---------- OpenAIChat Response End ----------")
+        return model_response
-    async def aresponse_stream(self, messages: List[Message]) -> Any:
+    def _get_metrics(self, response_usage: CompletionUsage) -> Metrics:
         """
-        Generate an asynchronous streaming response from OpenAI.
+        Parse the given OpenAI-specific usage into an Agno Metrics object.
         Args:
-            messages (List[Message]): A list of messages.
+            response_usage: Usage data from OpenAI
         Returns:
-            Any: An asynchronous iterator of model responses.
+            Metrics: Parsed metrics data
         """
-        logger.debug("---------- OpenAIChat Async Response Start ----------")
-        self._log_messages(messages)
-        stream_data: StreamData = StreamData()
-        metrics: Metrics = Metrics()
-        # -*- Generate response
-        metrics.start_response_timer()
-        async for response in self.ainvoke_stream(messages=messages):
-            if response.choices and len(response.choices) > 0:
-                metrics.completion_tokens += 1
-                if metrics.completion_tokens == 1:
-                    metrics.time_to_first_token = metrics.response_timer.elapsed
-                response_delta: ChoiceDelta = response.choices[0].delta
-                if response_delta.content is not None:
-                    stream_data.response_content += response_delta.content
-                    yield ModelResponse(content=response_delta.content)
-                if hasattr(response_delta, "audio"):
-                    response_audio = response_delta.audio
-                    stream_data.response_audio = response_audio
-                    if stream_data.response_audio:
-                        yield ModelResponse(
-                            audio=AudioOutput(
-                                id=stream_data.response_audio.id,
-                                content=stream_data.response_audio.data,
-                                expires_at=stream_data.response_audio.expires_at,
-                                transcript=stream_data.response_audio.transcript,
-                            )
-                        )
-                if response_delta.tool_calls is not None:
-                    if stream_data.response_tool_calls is None:
-                        stream_data.response_tool_calls = []
-                    stream_data.response_tool_calls.extend(response_delta.tool_calls)
-            if response.usage is not None:
-                self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
-        metrics.stop_response_timer()
-        # -*- Create assistant message
-        assistant_message = Message(role="assistant")
-        if stream_data.response_content != "":
-            assistant_message.content = stream_data.response_content
-        if stream_data.response_audio is not None:
-            assistant_message.audio_output = AudioOutput(
-                id=stream_data.response_audio.id,
-                content=stream_data.response_audio.data,
-                expires_at=stream_data.response_audio.expires_at,
-                transcript=stream_data.response_audio.transcript,
-            )
-        if stream_data.response_tool_calls is not None:
-            _tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
-            if len(_tool_calls) > 0:
-                assistant_message.tool_calls = _tool_calls
-        self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
-        # -*- Add assistant message to messages
-        messages.append(assistant_message)
-        # -*- Log response and metrics
-        assistant_message.log()
-        metrics.log()
+        metrics = Metrics()
-        # -*- Handle tool calls
-        if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
-            tool_role = "tool"
-            for tool_call_response in self.handle_stream_tool_calls(
-                assistant_message=assistant_message, messages=messages, tool_role=tool_role
-            ):
-                yield tool_call_response
-            async for post_tool_call_response in self.ahandle_post_tool_call_messages_stream(messages=messages):
-                yield post_tool_call_response
-        logger.debug("---------- OpenAIChat Async Response End ----------")
+        metrics.input_tokens = response_usage.prompt_tokens or 0
+        metrics.output_tokens = response_usage.completion_tokens or 0
+        metrics.total_tokens = response_usage.total_tokens or 0
-    def build_tool_calls(self, tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
-        """
-        Build tool calls from tool call data.
+        # Add the prompt_tokens_details field
+        if prompt_token_details := response_usage.prompt_tokens_details:
+            metrics.audio_input_tokens = prompt_token_details.audio_tokens or 0
+            metrics.cache_read_tokens = prompt_token_details.cached_tokens or 0
-        Args:
-            tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
-        Returns:
-            List[Dict[str, Any]]: The built tool calls.
-        """
+        # Add the completion_tokens_details field
+        if completion_tokens_details := response_usage.completion_tokens_details:
+            metrics.audio_output_tokens = completion_tokens_details.audio_tokens or 0
+            metrics.reasoning_tokens = completion_tokens_details.reasoning_tokens or 0
-        return self._build_tool_calls(tool_calls_data)
+        return metrics

agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl