agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/openai/chat.py
CHANGED
|
@@ -1,45 +1,40 @@
|
|
|
1
|
+
from collections.abc import AsyncIterator
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
from os import getenv
|
|
3
|
-
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, Iterator, List, Literal, Optional, Type, Union
|
|
5
|
+
from uuid import uuid4
|
|
4
6
|
|
|
5
7
|
import httpx
|
|
6
8
|
from pydantic import BaseModel
|
|
7
9
|
|
|
8
|
-
from agno.
|
|
9
|
-
from agno.
|
|
10
|
+
from agno.exceptions import ModelAuthenticationError, ModelProviderError
|
|
11
|
+
from agno.media import Audio
|
|
12
|
+
from agno.models.base import Model
|
|
10
13
|
from agno.models.message import Message
|
|
11
|
-
from agno.models.
|
|
12
|
-
from agno.
|
|
13
|
-
from agno.
|
|
14
|
-
from agno.
|
|
14
|
+
from agno.models.metrics import Metrics
|
|
15
|
+
from agno.models.response import ModelResponse
|
|
16
|
+
from agno.run.agent import RunOutput
|
|
17
|
+
from agno.run.team import TeamRunOutput
|
|
18
|
+
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
19
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
20
|
+
from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
|
|
21
|
+
from agno.utils.reasoning import extract_thinking_content
|
|
15
22
|
|
|
16
23
|
try:
|
|
24
|
+
from openai import APIConnectionError, APIStatusError, RateLimitError
|
|
17
25
|
from openai import AsyncOpenAI as AsyncOpenAIClient
|
|
18
26
|
from openai import OpenAI as OpenAIClient
|
|
19
|
-
from openai.types
|
|
20
|
-
from openai.types.chat
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
ChoiceDeltaToolCall,
|
|
24
|
-
)
|
|
25
|
-
from openai.types.chat.chat_completion_message import ChatCompletionAudio, ChatCompletionMessage
|
|
26
|
-
from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
|
|
27
|
-
from openai.types.completion_usage import CompletionUsage
|
|
28
|
-
except ModuleNotFoundError:
|
|
27
|
+
from openai.types import CompletionUsage
|
|
28
|
+
from openai.types.chat import ChatCompletion, ChatCompletionAudio, ChatCompletionChunk
|
|
29
|
+
from openai.types.chat.chat_completion_chunk import ChoiceDelta, ChoiceDeltaToolCall
|
|
30
|
+
except (ImportError, ModuleNotFoundError):
|
|
29
31
|
raise ImportError("`openai` not installed. Please install using `pip install openai`")
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
@dataclass
|
|
33
|
-
class StreamData:
|
|
34
|
-
response_content: str = ""
|
|
35
|
-
response_audio: Optional[ChatCompletionAudio] = None
|
|
36
|
-
response_tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
|
|
37
|
-
|
|
38
|
-
|
|
39
34
|
@dataclass
|
|
40
35
|
class OpenAIChat(Model):
|
|
41
36
|
"""
|
|
42
|
-
A class for interacting with OpenAI models.
|
|
37
|
+
A class for interacting with OpenAI models using the Chat completions API.
|
|
43
38
|
|
|
44
39
|
For more information, see: https://platform.openai.com/docs/api-reference/chat/create
|
|
45
40
|
"""
|
|
@@ -47,10 +42,12 @@ class OpenAIChat(Model):
|
|
|
47
42
|
id: str = "gpt-4o"
|
|
48
43
|
name: str = "OpenAIChat"
|
|
49
44
|
provider: str = "OpenAI"
|
|
50
|
-
|
|
45
|
+
supports_native_structured_outputs: bool = True
|
|
51
46
|
|
|
52
47
|
# Request parameters
|
|
53
48
|
store: Optional[bool] = None
|
|
49
|
+
reasoning_effort: Optional[str] = None
|
|
50
|
+
verbosity: Optional[Literal["low", "medium", "high"]] = None
|
|
54
51
|
metadata: Optional[Dict[str, Any]] = None
|
|
55
52
|
frequency_penalty: Optional[float] = None
|
|
56
53
|
logit_bias: Optional[Any] = None
|
|
@@ -58,18 +55,23 @@ class OpenAIChat(Model):
|
|
|
58
55
|
top_logprobs: Optional[int] = None
|
|
59
56
|
max_tokens: Optional[int] = None
|
|
60
57
|
max_completion_tokens: Optional[int] = None
|
|
61
|
-
modalities: Optional[List[str]] = None
|
|
62
|
-
audio: Optional[Dict[str, Any]] =
|
|
58
|
+
modalities: Optional[List[str]] = None # "text" and/or "audio"
|
|
59
|
+
audio: Optional[Dict[str, Any]] = (
|
|
60
|
+
None # E.g. {"voice": "alloy", "format": "wav"}. `format` must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`. `voice` must be one of `ash`, `ballad`, `coral`, `sage`, `verse`, `alloy`, `echo`, and `shimmer`.
|
|
61
|
+
)
|
|
63
62
|
presence_penalty: Optional[float] = None
|
|
64
|
-
response_format: Optional[Any] = None
|
|
65
63
|
seed: Optional[int] = None
|
|
66
64
|
stop: Optional[Union[str, List[str]]] = None
|
|
67
65
|
temperature: Optional[float] = None
|
|
68
66
|
user: Optional[str] = None
|
|
69
67
|
top_p: Optional[float] = None
|
|
68
|
+
service_tier: Optional[str] = None # "auto" | "default" | "flex" | "priority", defaults to "auto" when not set
|
|
69
|
+
strict_output: bool = True # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
|
|
70
70
|
extra_headers: Optional[Any] = None
|
|
71
71
|
extra_query: Optional[Any] = None
|
|
72
|
+
extra_body: Optional[Any] = None
|
|
72
73
|
request_params: Optional[Dict[str, Any]] = None
|
|
74
|
+
role_map: Optional[Dict[str, str]] = None
|
|
73
75
|
|
|
74
76
|
# Client parameters
|
|
75
77
|
api_key: Optional[str] = None
|
|
@@ -79,126 +81,192 @@ class OpenAIChat(Model):
|
|
|
79
81
|
max_retries: Optional[int] = None
|
|
80
82
|
default_headers: Optional[Any] = None
|
|
81
83
|
default_query: Optional[Any] = None
|
|
82
|
-
http_client: Optional[httpx.Client] = None
|
|
84
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
83
85
|
client_params: Optional[Dict[str, Any]] = None
|
|
84
86
|
|
|
85
|
-
#
|
|
87
|
+
# Cached clients to avoid recreating them on every request
|
|
86
88
|
client: Optional[OpenAIClient] = None
|
|
87
89
|
async_client: Optional[AsyncOpenAIClient] = None
|
|
88
90
|
|
|
89
|
-
#
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
91
|
+
# The role to map the message role to.
|
|
92
|
+
default_role_map = {
|
|
93
|
+
"system": "developer",
|
|
94
|
+
"user": "user",
|
|
95
|
+
"assistant": "assistant",
|
|
96
|
+
"tool": "tool",
|
|
97
|
+
"model": "assistant",
|
|
98
|
+
}
|
|
97
99
|
|
|
98
100
|
def _get_client_params(self) -> Dict[str, Any]:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.api_key = self.api_key or getenv("OPENAI_API_KEY")
|
|
101
|
+
# Fetch API key from env if not already set
|
|
102
102
|
if not self.api_key:
|
|
103
|
-
|
|
103
|
+
self.api_key = getenv("OPENAI_API_KEY")
|
|
104
|
+
if not self.api_key:
|
|
105
|
+
raise ModelAuthenticationError(
|
|
106
|
+
message="OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.",
|
|
107
|
+
model_name=self.name,
|
|
108
|
+
)
|
|
104
109
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
110
|
+
# Define base client params
|
|
111
|
+
base_params = {
|
|
112
|
+
"api_key": self.api_key,
|
|
113
|
+
"organization": self.organization,
|
|
114
|
+
"base_url": self.base_url,
|
|
115
|
+
"timeout": self.timeout,
|
|
116
|
+
"max_retries": self.max_retries,
|
|
117
|
+
"default_headers": self.default_headers,
|
|
118
|
+
"default_query": self.default_query,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Create client_params dict with non-None values
|
|
122
|
+
client_params = {k: v for k, v in base_params.items() if v is not None}
|
|
123
|
+
|
|
124
|
+
# Add additional client params if provided
|
|
125
|
+
if self.client_params:
|
|
117
126
|
client_params.update(self.client_params)
|
|
118
|
-
|
|
119
|
-
# Remove None
|
|
120
|
-
client_params = {k: v for k, v in client_params.items() if v is not None}
|
|
121
127
|
return client_params
|
|
122
128
|
|
|
123
129
|
def get_client(self) -> OpenAIClient:
|
|
124
130
|
"""
|
|
125
|
-
Returns an OpenAI client.
|
|
131
|
+
Returns an OpenAI client. Caches the client to avoid recreating it on every request.
|
|
126
132
|
|
|
127
133
|
Returns:
|
|
128
134
|
OpenAIClient: An instance of the OpenAI client.
|
|
129
135
|
"""
|
|
130
|
-
if
|
|
136
|
+
# Return cached client if it exists and is not closed
|
|
137
|
+
if self.client is not None and not self.client.is_closed():
|
|
131
138
|
return self.client
|
|
132
139
|
|
|
140
|
+
log_debug(f"Creating new sync OpenAI client for model {self.id}")
|
|
133
141
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
134
|
-
if self.http_client
|
|
135
|
-
|
|
136
|
-
|
|
142
|
+
if self.http_client:
|
|
143
|
+
if isinstance(self.http_client, httpx.Client):
|
|
144
|
+
client_params["http_client"] = self.http_client
|
|
145
|
+
else:
|
|
146
|
+
log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
|
|
147
|
+
# Use global sync client when user http_client is invalid
|
|
148
|
+
client_params["http_client"] = get_default_sync_client()
|
|
149
|
+
else:
|
|
150
|
+
# Use global sync client when no custom http_client is provided
|
|
151
|
+
client_params["http_client"] = get_default_sync_client()
|
|
152
|
+
|
|
153
|
+
# Create and cache the client
|
|
154
|
+
self.client = OpenAIClient(**client_params)
|
|
155
|
+
return self.client
|
|
137
156
|
|
|
138
157
|
def get_async_client(self) -> AsyncOpenAIClient:
|
|
139
158
|
"""
|
|
140
|
-
Returns an asynchronous OpenAI client.
|
|
159
|
+
Returns an asynchronous OpenAI client. Caches the client to avoid recreating it on every request.
|
|
141
160
|
|
|
142
161
|
Returns:
|
|
143
162
|
AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
|
|
144
163
|
"""
|
|
145
|
-
if
|
|
164
|
+
# Return cached client if it exists and is not closed
|
|
165
|
+
if self.async_client is not None and not self.async_client.is_closed():
|
|
146
166
|
return self.async_client
|
|
147
167
|
|
|
168
|
+
log_debug(f"Creating new async OpenAI client for model {self.id}")
|
|
148
169
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
149
170
|
if self.http_client:
|
|
150
|
-
|
|
171
|
+
if isinstance(self.http_client, httpx.AsyncClient):
|
|
172
|
+
client_params["http_client"] = self.http_client
|
|
173
|
+
else:
|
|
174
|
+
log_warning(
|
|
175
|
+
"http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
|
|
176
|
+
)
|
|
177
|
+
# Use global async client when user http_client is invalid
|
|
178
|
+
client_params["http_client"] = get_default_async_client()
|
|
151
179
|
else:
|
|
152
|
-
#
|
|
153
|
-
client_params["http_client"] =
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
180
|
+
# Use global async client when no custom http_client is provided
|
|
181
|
+
client_params["http_client"] = get_default_async_client()
|
|
182
|
+
|
|
183
|
+
# Create and cache the client
|
|
184
|
+
self.async_client = AsyncOpenAIClient(**client_params)
|
|
185
|
+
return self.async_client
|
|
157
186
|
|
|
158
|
-
|
|
159
|
-
|
|
187
|
+
def get_request_params(
|
|
188
|
+
self,
|
|
189
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
190
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
191
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
192
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
193
|
+
) -> Dict[str, Any]:
|
|
160
194
|
"""
|
|
161
195
|
Returns keyword arguments for API requests.
|
|
162
196
|
|
|
163
197
|
Returns:
|
|
164
198
|
Dict[str, Any]: A dictionary of keyword arguments for API requests.
|
|
165
199
|
"""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
200
|
+
# Define base request parameters
|
|
201
|
+
base_params = {
|
|
202
|
+
"store": self.store,
|
|
203
|
+
"reasoning_effort": self.reasoning_effort,
|
|
204
|
+
"verbosity": self.verbosity,
|
|
205
|
+
"frequency_penalty": self.frequency_penalty,
|
|
206
|
+
"logit_bias": self.logit_bias,
|
|
207
|
+
"logprobs": self.logprobs,
|
|
208
|
+
"top_logprobs": self.top_logprobs,
|
|
209
|
+
"max_tokens": self.max_tokens,
|
|
210
|
+
"max_completion_tokens": self.max_completion_tokens,
|
|
211
|
+
"modalities": self.modalities,
|
|
212
|
+
"audio": self.audio,
|
|
213
|
+
"presence_penalty": self.presence_penalty,
|
|
214
|
+
"seed": self.seed,
|
|
215
|
+
"stop": self.stop,
|
|
216
|
+
"temperature": self.temperature,
|
|
217
|
+
"user": self.user,
|
|
218
|
+
"top_p": self.top_p,
|
|
219
|
+
"extra_headers": self.extra_headers,
|
|
220
|
+
"extra_query": self.extra_query,
|
|
221
|
+
"extra_body": self.extra_body,
|
|
222
|
+
"metadata": self.metadata,
|
|
223
|
+
"service_tier": self.service_tier,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Handle response format - always use JSON schema approach
|
|
227
|
+
if response_format is not None:
|
|
228
|
+
if isinstance(response_format, type) and issubclass(response_format, BaseModel):
|
|
229
|
+
# Convert Pydantic to JSON schema for regular endpoint
|
|
230
|
+
from agno.utils.models.schema_utils import get_response_schema_for_provider
|
|
231
|
+
|
|
232
|
+
schema = get_response_schema_for_provider(response_format, "openai")
|
|
233
|
+
base_params["response_format"] = {
|
|
234
|
+
"type": "json_schema",
|
|
235
|
+
"json_schema": {
|
|
236
|
+
"name": response_format.__name__,
|
|
237
|
+
"schema": schema,
|
|
238
|
+
"strict": self.strict_output,
|
|
239
|
+
},
|
|
240
|
+
}
|
|
194
241
|
else:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
242
|
+
# Handle other response format types (like {"type": "json_object"})
|
|
243
|
+
base_params["response_format"] = response_format
|
|
244
|
+
|
|
245
|
+
# Filter out None values
|
|
246
|
+
request_params = {k: v for k, v in base_params.items() if v is not None}
|
|
247
|
+
|
|
248
|
+
# Add tools
|
|
249
|
+
if tools is not None and len(tools) > 0:
|
|
250
|
+
# Remove unsupported fields for OpenAILike models
|
|
251
|
+
if self.provider in ["AIMLAPI", "Fireworks", "Nvidia"]:
|
|
252
|
+
for tool in tools:
|
|
253
|
+
if tool.get("type") == "function":
|
|
254
|
+
if tool["function"].get("requires_confirmation") is not None:
|
|
255
|
+
del tool["function"]["requires_confirmation"]
|
|
256
|
+
if tool["function"].get("external_execution") is not None:
|
|
257
|
+
del tool["function"]["external_execution"]
|
|
258
|
+
|
|
259
|
+
request_params["tools"] = tools
|
|
260
|
+
|
|
261
|
+
if tool_choice is not None:
|
|
262
|
+
request_params["tool_choice"] = tool_choice
|
|
263
|
+
|
|
264
|
+
# Add additional request params if provided
|
|
265
|
+
if self.request_params:
|
|
198
266
|
request_params.update(self.request_params)
|
|
199
267
|
|
|
200
|
-
|
|
201
|
-
|
|
268
|
+
if request_params:
|
|
269
|
+
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
202
270
|
return request_params
|
|
203
271
|
|
|
204
272
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -208,10 +276,12 @@ class OpenAIChat(Model):
|
|
|
208
276
|
Returns:
|
|
209
277
|
Dict[str, Any]: The dictionary representation of the model.
|
|
210
278
|
"""
|
|
211
|
-
|
|
212
|
-
|
|
279
|
+
model_dict = super().to_dict()
|
|
280
|
+
model_dict.update(
|
|
213
281
|
{
|
|
214
282
|
"store": self.store,
|
|
283
|
+
"reasoning_effort": self.reasoning_effort,
|
|
284
|
+
"verbosity": self.verbosity,
|
|
215
285
|
"frequency_penalty": self.frequency_penalty,
|
|
216
286
|
"logit_bias": self.logit_bias,
|
|
217
287
|
"logprobs": self.logprobs,
|
|
@@ -221,9 +291,6 @@ class OpenAIChat(Model):
|
|
|
221
291
|
"modalities": self.modalities,
|
|
222
292
|
"audio": self.audio,
|
|
223
293
|
"presence_penalty": self.presence_penalty,
|
|
224
|
-
"response_format": self.response_format
|
|
225
|
-
if isinstance(self.response_format, dict)
|
|
226
|
-
else str(self.response_format),
|
|
227
294
|
"seed": self.seed,
|
|
228
295
|
"stop": self.stop,
|
|
229
296
|
"temperature": self.temperature,
|
|
@@ -231,763 +298,651 @@ class OpenAIChat(Model):
|
|
|
231
298
|
"user": self.user,
|
|
232
299
|
"extra_headers": self.extra_headers,
|
|
233
300
|
"extra_query": self.extra_query,
|
|
301
|
+
"extra_body": self.extra_body,
|
|
302
|
+
"service_tier": self.service_tier,
|
|
234
303
|
}
|
|
235
304
|
)
|
|
236
|
-
|
|
237
|
-
_dict["tools"] = self.tools
|
|
238
|
-
if self.tool_choice is None:
|
|
239
|
-
_dict["tool_choice"] = "auto"
|
|
240
|
-
else:
|
|
241
|
-
_dict["tool_choice"] = self.tool_choice
|
|
242
|
-
cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
|
|
305
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
243
306
|
return cleaned_dict
|
|
244
307
|
|
|
245
|
-
def
|
|
308
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
246
309
|
"""
|
|
247
310
|
Format a message into the format expected by OpenAI.
|
|
248
311
|
|
|
249
312
|
Args:
|
|
250
313
|
message (Message): The message to format.
|
|
314
|
+
compress_tool_results: Whether to compress tool results.
|
|
251
315
|
|
|
252
316
|
Returns:
|
|
253
317
|
Dict[str, Any]: The formatted message.
|
|
254
318
|
"""
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
319
|
+
tool_result = message.get_content(use_compressed_content=compress_tool_results)
|
|
320
|
+
|
|
321
|
+
message_dict: Dict[str, Any] = {
|
|
322
|
+
"role": self.role_map[message.role] if self.role_map else self.default_role_map[message.role],
|
|
323
|
+
"content": tool_result,
|
|
324
|
+
"name": message.name,
|
|
325
|
+
"tool_call_id": message.tool_call_id,
|
|
326
|
+
"tool_calls": message.tool_calls,
|
|
327
|
+
}
|
|
328
|
+
message_dict = {k: v for k, v in message_dict.items() if v is not None}
|
|
329
|
+
|
|
330
|
+
# Ignore non-string message content
|
|
331
|
+
# because we assume that the images/audio are already added to the message
|
|
332
|
+
if (message.images is not None and len(message.images) > 0) or (
|
|
333
|
+
message.audio is not None and len(message.audio) > 0
|
|
334
|
+
):
|
|
335
|
+
# Ignore non-string message content
|
|
336
|
+
# because we assume that the images/audio are already added to the message
|
|
337
|
+
if isinstance(message.content, str):
|
|
338
|
+
message_dict["content"] = [{"type": "text", "text": message.content}]
|
|
339
|
+
if message.images is not None:
|
|
340
|
+
message_dict["content"].extend(images_to_message(images=message.images))
|
|
341
|
+
|
|
342
|
+
if message.audio is not None:
|
|
343
|
+
message_dict["content"].extend(audio_to_message(audio=message.audio))
|
|
344
|
+
|
|
345
|
+
if message.audio_output is not None:
|
|
346
|
+
message_dict["content"] = ""
|
|
347
|
+
message_dict["audio"] = {"id": message.audio_output.id}
|
|
348
|
+
|
|
349
|
+
if message.videos is not None and len(message.videos) > 0:
|
|
350
|
+
log_warning("Video input is currently unsupported.")
|
|
351
|
+
|
|
352
|
+
# OpenAI expects the tool_calls to be None if empty, not an empty list
|
|
353
|
+
if message.tool_calls is not None and len(message.tool_calls) == 0:
|
|
354
|
+
message_dict["tool_calls"] = None
|
|
355
|
+
|
|
356
|
+
if message.files is not None:
|
|
357
|
+
# Ensure content is a list of parts
|
|
358
|
+
content = message_dict.get("content")
|
|
359
|
+
if isinstance(content, str): # wrap existing text
|
|
360
|
+
text = content
|
|
361
|
+
message_dict["content"] = [{"type": "text", "text": text}]
|
|
362
|
+
elif content is None:
|
|
363
|
+
message_dict["content"] = []
|
|
364
|
+
# Insert each file part before text parts
|
|
365
|
+
for file in message.files:
|
|
366
|
+
file_part = _format_file_for_message(file)
|
|
367
|
+
if file_part:
|
|
368
|
+
message_dict["content"].insert(0, file_part)
|
|
369
|
+
|
|
370
|
+
# Manually add the content field even if it is None
|
|
371
|
+
if message.content is None:
|
|
372
|
+
message_dict["content"] = ""
|
|
373
|
+
return message_dict
|
|
374
|
+
|
|
375
|
+
def invoke(
|
|
376
|
+
self,
|
|
377
|
+
messages: List[Message],
|
|
378
|
+
assistant_message: Message,
|
|
379
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
380
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
381
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
382
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
383
|
+
compress_tool_results: bool = False,
|
|
384
|
+
) -> ModelResponse:
|
|
268
385
|
"""
|
|
269
|
-
Send a chat completion request to the OpenAI API.
|
|
386
|
+
Send a chat completion request to the OpenAI API and parse the response.
|
|
270
387
|
|
|
271
388
|
Args:
|
|
272
389
|
messages (List[Message]): A list of messages to send to the model.
|
|
390
|
+
assistant_message (Message): The assistant message to populate.
|
|
391
|
+
response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
|
|
392
|
+
tools (Optional[List[Dict[str, Any]]]): The tools to use.
|
|
393
|
+
tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
|
|
394
|
+
compress_tool_results: Whether to compress tool results.
|
|
273
395
|
|
|
274
396
|
Returns:
|
|
275
|
-
|
|
397
|
+
ModelResponse: The chat completion response from the API.
|
|
276
398
|
"""
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
return self.get_client().beta.chat.completions.parse(
|
|
281
|
-
model=self.id,
|
|
282
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
283
|
-
**self.request_kwargs,
|
|
284
|
-
)
|
|
285
|
-
else:
|
|
286
|
-
raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
|
|
287
|
-
except Exception as e:
|
|
288
|
-
logger.error(f"Error from OpenAI API: {e}")
|
|
399
|
+
try:
|
|
400
|
+
if run_response and run_response.metrics:
|
|
401
|
+
run_response.metrics.set_time_to_first_token()
|
|
289
402
|
|
|
290
|
-
|
|
291
|
-
model=self.id,
|
|
292
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
293
|
-
**self.request_kwargs,
|
|
294
|
-
)
|
|
403
|
+
assistant_message.metrics.start_timer()
|
|
295
404
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
405
|
+
provider_response = self.get_client().chat.completions.create(
|
|
406
|
+
model=self.id,
|
|
407
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
408
|
+
**self.get_request_params(
|
|
409
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
410
|
+
),
|
|
411
|
+
)
|
|
412
|
+
assistant_message.metrics.stop_timer()
|
|
299
413
|
|
|
300
|
-
|
|
301
|
-
|
|
414
|
+
# Parse the response into an Agno ModelResponse object
|
|
415
|
+
model_response = self._parse_provider_response(provider_response, response_format=response_format)
|
|
302
416
|
|
|
303
|
-
|
|
304
|
-
ChatCompletion: The chat completion response from the API.
|
|
305
|
-
"""
|
|
306
|
-
if self.response_format is not None and self.structured_outputs:
|
|
307
|
-
try:
|
|
308
|
-
if isinstance(self.response_format, type) and issubclass(self.response_format, BaseModel):
|
|
309
|
-
return await self.get_async_client().beta.chat.completions.parse(
|
|
310
|
-
model=self.id,
|
|
311
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
312
|
-
**self.request_kwargs,
|
|
313
|
-
)
|
|
314
|
-
else:
|
|
315
|
-
raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
|
|
316
|
-
except Exception as e:
|
|
317
|
-
logger.error(f"Error from OpenAI API: {e}")
|
|
417
|
+
return model_response
|
|
318
418
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
419
|
+
except RateLimitError as e:
|
|
420
|
+
log_error(f"Rate limit error from OpenAI API: {e}")
|
|
421
|
+
error_message = e.response.json().get("error", {})
|
|
422
|
+
error_message = (
|
|
423
|
+
error_message.get("message", "Unknown model error")
|
|
424
|
+
if isinstance(error_message, dict)
|
|
425
|
+
else error_message
|
|
426
|
+
)
|
|
427
|
+
raise ModelProviderError(
|
|
428
|
+
message=error_message,
|
|
429
|
+
status_code=e.response.status_code,
|
|
430
|
+
model_name=self.name,
|
|
431
|
+
model_id=self.id,
|
|
432
|
+
) from e
|
|
433
|
+
except APIConnectionError as e:
|
|
434
|
+
log_error(f"API connection error from OpenAI API: {e}")
|
|
435
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
436
|
+
except APIStatusError as e:
|
|
437
|
+
log_error(f"API status error from OpenAI API: {e}")
|
|
438
|
+
try:
|
|
439
|
+
error_message = e.response.json().get("error", {})
|
|
440
|
+
except Exception:
|
|
441
|
+
error_message = e.response.text
|
|
442
|
+
error_message = (
|
|
443
|
+
error_message.get("message", "Unknown model error")
|
|
444
|
+
if isinstance(error_message, dict)
|
|
445
|
+
else error_message
|
|
446
|
+
)
|
|
447
|
+
raise ModelProviderError(
|
|
448
|
+
message=error_message,
|
|
449
|
+
status_code=e.response.status_code,
|
|
450
|
+
model_name=self.name,
|
|
451
|
+
model_id=self.id,
|
|
452
|
+
) from e
|
|
453
|
+
except ModelAuthenticationError as e:
|
|
454
|
+
log_error(f"Model authentication error from OpenAI API: {e}")
|
|
455
|
+
raise e
|
|
456
|
+
except Exception as e:
|
|
457
|
+
log_error(f"Error from OpenAI API: {e}")
|
|
458
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
324
459
|
|
|
325
|
-
def
|
|
460
|
+
async def ainvoke(
|
|
461
|
+
self,
|
|
462
|
+
messages: List[Message],
|
|
463
|
+
assistant_message: Message,
|
|
464
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
465
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
466
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
467
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
468
|
+
compress_tool_results: bool = False,
|
|
469
|
+
) -> ModelResponse:
|
|
326
470
|
"""
|
|
327
|
-
|
|
471
|
+
Sends an asynchronous chat completion request to the OpenAI API.
|
|
328
472
|
|
|
329
473
|
Args:
|
|
330
474
|
messages (List[Message]): A list of messages to send to the model.
|
|
475
|
+
assistant_message (Message): The assistant message to populate.
|
|
476
|
+
response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
|
|
477
|
+
tools (Optional[List[Dict[str, Any]]]): The tools to use.
|
|
478
|
+
tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
|
|
479
|
+
compress_tool_results: Whether to compress tool results.
|
|
331
480
|
|
|
332
481
|
Returns:
|
|
333
|
-
|
|
334
|
-
"""
|
|
335
|
-
yield from self.get_client().chat.completions.create(
|
|
336
|
-
model=self.id,
|
|
337
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
338
|
-
stream=True,
|
|
339
|
-
stream_options={"include_usage": True},
|
|
340
|
-
**self.request_kwargs,
|
|
341
|
-
) # type: ignore
|
|
342
|
-
|
|
343
|
-
async def ainvoke_stream(self, messages: List[Message]) -> Any:
|
|
482
|
+
ModelResponse: The chat completion response from the API.
|
|
344
483
|
"""
|
|
345
|
-
|
|
484
|
+
try:
|
|
485
|
+
if run_response and run_response.metrics:
|
|
486
|
+
run_response.metrics.set_time_to_first_token()
|
|
487
|
+
|
|
488
|
+
assistant_message.metrics.start_timer()
|
|
489
|
+
response = await self.get_async_client().chat.completions.create(
|
|
490
|
+
model=self.id,
|
|
491
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
492
|
+
**self.get_request_params(
|
|
493
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
494
|
+
),
|
|
495
|
+
)
|
|
496
|
+
assistant_message.metrics.stop_timer()
|
|
346
497
|
|
|
347
|
-
|
|
348
|
-
|
|
498
|
+
# Parse the response into an Agno ModelResponse object
|
|
499
|
+
provider_response: ModelResponse = self._parse_provider_response(response, response_format=response_format)
|
|
349
500
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
501
|
+
return provider_response
|
|
502
|
+
|
|
503
|
+
except RateLimitError as e:
|
|
504
|
+
log_error(f"Rate limit error from OpenAI API: {e}")
|
|
505
|
+
error_message = e.response.json().get("error", {})
|
|
506
|
+
error_message = (
|
|
507
|
+
error_message.get("message", "Unknown model error")
|
|
508
|
+
if isinstance(error_message, dict)
|
|
509
|
+
else error_message
|
|
510
|
+
)
|
|
511
|
+
raise ModelProviderError(
|
|
512
|
+
message=error_message,
|
|
513
|
+
status_code=e.response.status_code,
|
|
514
|
+
model_name=self.name,
|
|
515
|
+
model_id=self.id,
|
|
516
|
+
) from e
|
|
517
|
+
except APIConnectionError as e:
|
|
518
|
+
log_error(f"API connection error from OpenAI API: {e}")
|
|
519
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
520
|
+
except APIStatusError as e:
|
|
521
|
+
log_error(f"API status error from OpenAI API: {e}")
|
|
522
|
+
try:
|
|
523
|
+
error_message = e.response.json().get("error", {})
|
|
524
|
+
except Exception:
|
|
525
|
+
error_message = e.response.text
|
|
526
|
+
error_message = (
|
|
527
|
+
error_message.get("message", "Unknown model error")
|
|
528
|
+
if isinstance(error_message, dict)
|
|
529
|
+
else error_message
|
|
530
|
+
)
|
|
531
|
+
raise ModelProviderError(
|
|
532
|
+
message=error_message,
|
|
533
|
+
status_code=e.response.status_code,
|
|
534
|
+
model_name=self.name,
|
|
535
|
+
model_id=self.id,
|
|
536
|
+
) from e
|
|
537
|
+
except ModelAuthenticationError as e:
|
|
538
|
+
log_error(f"Model authentication error from OpenAI API: {e}")
|
|
539
|
+
raise e
|
|
540
|
+
except Exception as e:
|
|
541
|
+
log_error(f"Error from OpenAI API: {e}")
|
|
542
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
362
543
|
|
|
363
|
-
def
|
|
544
|
+
def invoke_stream(
|
|
364
545
|
self,
|
|
365
|
-
assistant_message: Message,
|
|
366
546
|
messages: List[Message],
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
547
|
+
assistant_message: Message,
|
|
548
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
549
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
550
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
551
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
552
|
+
compress_tool_results: bool = False,
|
|
553
|
+
) -> Iterator[ModelResponse]:
|
|
370
554
|
"""
|
|
371
|
-
|
|
555
|
+
Send a streaming chat completion request to the OpenAI API.
|
|
372
556
|
|
|
373
557
|
Args:
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
model_response (ModelResponse): The model response.
|
|
377
|
-
tool_role (str): The role of the tool call. Defaults to "tool".
|
|
558
|
+
messages (List[Message]): A list of messages to send to the model.
|
|
559
|
+
compress_tool_results: Whether to compress tool results.
|
|
378
560
|
|
|
379
561
|
Returns:
|
|
380
|
-
|
|
562
|
+
Iterator[ModelResponse]: An iterator of model responses.
|
|
381
563
|
"""
|
|
382
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
383
|
-
if model_response.content is None:
|
|
384
|
-
model_response.content = ""
|
|
385
|
-
if model_response.tool_calls is None:
|
|
386
|
-
model_response.tool_calls = []
|
|
387
|
-
function_call_results: List[Message] = []
|
|
388
|
-
function_calls_to_run: List[FunctionCall] = []
|
|
389
|
-
for tool_call in assistant_message.tool_calls:
|
|
390
|
-
_tool_call_id = tool_call.get("id")
|
|
391
|
-
_function_call = get_function_call_for_tool_call(tool_call, self._functions)
|
|
392
|
-
if _function_call is None:
|
|
393
|
-
messages.append(
|
|
394
|
-
Message(
|
|
395
|
-
role="tool",
|
|
396
|
-
tool_call_id=_tool_call_id,
|
|
397
|
-
content="Could not find function to call.",
|
|
398
|
-
)
|
|
399
|
-
)
|
|
400
|
-
continue
|
|
401
|
-
if _function_call.error is not None:
|
|
402
|
-
messages.append(
|
|
403
|
-
Message(
|
|
404
|
-
role="tool",
|
|
405
|
-
tool_call_id=_tool_call_id,
|
|
406
|
-
content=_function_call.error,
|
|
407
|
-
)
|
|
408
|
-
)
|
|
409
|
-
continue
|
|
410
|
-
function_calls_to_run.append(_function_call)
|
|
411
564
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
for
|
|
419
|
-
|
|
565
|
+
try:
|
|
566
|
+
if run_response and run_response.metrics:
|
|
567
|
+
run_response.metrics.set_time_to_first_token()
|
|
568
|
+
|
|
569
|
+
assistant_message.metrics.start_timer()
|
|
570
|
+
|
|
571
|
+
for chunk in self.get_client().chat.completions.create(
|
|
572
|
+
model=self.id,
|
|
573
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
574
|
+
stream=True,
|
|
575
|
+
stream_options={"include_usage": True},
|
|
576
|
+
**self.get_request_params(
|
|
577
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
578
|
+
),
|
|
420
579
|
):
|
|
421
|
-
|
|
422
|
-
function_call_response.event == ModelResponseEvent.tool_call_completed.value
|
|
423
|
-
and function_call_response.tool_calls is not None
|
|
424
|
-
):
|
|
425
|
-
model_response.tool_calls.extend(function_call_response.tool_calls)
|
|
426
|
-
|
|
427
|
-
if len(function_call_results) > 0:
|
|
428
|
-
messages.extend(function_call_results)
|
|
580
|
+
yield self._parse_provider_response_delta(chunk)
|
|
429
581
|
|
|
430
|
-
|
|
431
|
-
return None
|
|
432
|
-
|
|
433
|
-
def update_usage_metrics(
|
|
434
|
-
self, assistant_message: Message, metrics: Metrics, response_usage: Optional[CompletionUsage]
|
|
435
|
-
) -> None:
|
|
436
|
-
"""
|
|
437
|
-
Update the usage metrics for the assistant message and the model.
|
|
438
|
-
|
|
439
|
-
Args:
|
|
440
|
-
assistant_message (Message): The assistant message.
|
|
441
|
-
metrics (Metrics): The metrics.
|
|
442
|
-
response_usage (Optional[CompletionUsage]): The response usage.
|
|
443
|
-
"""
|
|
444
|
-
# Update time taken to generate response
|
|
445
|
-
assistant_message.metrics["time"] = metrics.response_timer.elapsed
|
|
446
|
-
self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
|
|
447
|
-
if response_usage:
|
|
448
|
-
prompt_tokens = response_usage.prompt_tokens
|
|
449
|
-
completion_tokens = response_usage.completion_tokens
|
|
450
|
-
total_tokens = response_usage.total_tokens
|
|
451
|
-
|
|
452
|
-
if prompt_tokens is not None:
|
|
453
|
-
metrics.input_tokens = prompt_tokens
|
|
454
|
-
metrics.prompt_tokens = prompt_tokens
|
|
455
|
-
assistant_message.metrics["input_tokens"] = prompt_tokens
|
|
456
|
-
assistant_message.metrics["prompt_tokens"] = prompt_tokens
|
|
457
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
|
|
458
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
|
|
459
|
-
if completion_tokens is not None:
|
|
460
|
-
metrics.output_tokens = completion_tokens
|
|
461
|
-
metrics.completion_tokens = completion_tokens
|
|
462
|
-
assistant_message.metrics["output_tokens"] = completion_tokens
|
|
463
|
-
assistant_message.metrics["completion_tokens"] = completion_tokens
|
|
464
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
|
|
465
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
|
|
466
|
-
if total_tokens is not None:
|
|
467
|
-
metrics.total_tokens = total_tokens
|
|
468
|
-
assistant_message.metrics["total_tokens"] = total_tokens
|
|
469
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
|
|
470
|
-
if response_usage.prompt_tokens_details is not None:
|
|
471
|
-
if isinstance(response_usage.prompt_tokens_details, dict):
|
|
472
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details
|
|
473
|
-
elif isinstance(response_usage.prompt_tokens_details, BaseModel):
|
|
474
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
|
|
475
|
-
assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
|
|
476
|
-
if metrics.prompt_tokens_details is not None:
|
|
477
|
-
for k, v in metrics.prompt_tokens_details.items():
|
|
478
|
-
self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
|
|
479
|
-
if response_usage.completion_tokens_details is not None:
|
|
480
|
-
if isinstance(response_usage.completion_tokens_details, dict):
|
|
481
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details
|
|
482
|
-
elif isinstance(response_usage.completion_tokens_details, BaseModel):
|
|
483
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
|
|
484
|
-
exclude_none=True
|
|
485
|
-
)
|
|
486
|
-
assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
|
|
487
|
-
if metrics.completion_tokens_details is not None:
|
|
488
|
-
for k, v in metrics.completion_tokens_details.items():
|
|
489
|
-
self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
|
|
582
|
+
assistant_message.metrics.stop_timer()
|
|
490
583
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
content=response_message.content,
|
|
511
|
-
)
|
|
512
|
-
if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
|
|
513
|
-
try:
|
|
514
|
-
assistant_message.tool_calls = [t.model_dump() for t in response_message.tool_calls]
|
|
515
|
-
except Exception as e:
|
|
516
|
-
logger.warning(f"Error processing tool calls: {e}")
|
|
517
|
-
if hasattr(response_message, "audio") and response_message.audio is not None:
|
|
584
|
+
except RateLimitError as e:
|
|
585
|
+
log_error(f"Rate limit error from OpenAI API: {e}")
|
|
586
|
+
error_message = e.response.json().get("error", {})
|
|
587
|
+
error_message = (
|
|
588
|
+
error_message.get("message", "Unknown model error")
|
|
589
|
+
if isinstance(error_message, dict)
|
|
590
|
+
else error_message
|
|
591
|
+
)
|
|
592
|
+
raise ModelProviderError(
|
|
593
|
+
message=error_message,
|
|
594
|
+
status_code=e.response.status_code,
|
|
595
|
+
model_name=self.name,
|
|
596
|
+
model_id=self.id,
|
|
597
|
+
) from e
|
|
598
|
+
except APIConnectionError as e:
|
|
599
|
+
log_error(f"API connection error from OpenAI API: {e}")
|
|
600
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
601
|
+
except APIStatusError as e:
|
|
602
|
+
log_error(f"API status error from OpenAI API: {e}")
|
|
518
603
|
try:
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
604
|
+
error_message = e.response.json().get("error", {})
|
|
605
|
+
except Exception:
|
|
606
|
+
error_message = e.response.text
|
|
607
|
+
error_message = (
|
|
608
|
+
error_message.get("message", "Unknown model error")
|
|
609
|
+
if isinstance(error_message, dict)
|
|
610
|
+
else error_message
|
|
611
|
+
)
|
|
612
|
+
raise ModelProviderError(
|
|
613
|
+
message=error_message,
|
|
614
|
+
status_code=e.response.status_code,
|
|
615
|
+
model_name=self.name,
|
|
616
|
+
model_id=self.id,
|
|
617
|
+
) from e
|
|
618
|
+
except ModelAuthenticationError as e:
|
|
619
|
+
log_error(f"Model authentication error from OpenAI API: {e}")
|
|
620
|
+
raise e
|
|
621
|
+
except Exception as e:
|
|
622
|
+
log_error(f"Error from OpenAI API: {e}")
|
|
623
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
531
624
|
|
|
532
|
-
def
|
|
625
|
+
async def ainvoke_stream(
|
|
626
|
+
self,
|
|
627
|
+
messages: List[Message],
|
|
628
|
+
assistant_message: Message,
|
|
629
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
630
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
631
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
632
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
633
|
+
compress_tool_results: bool = False,
|
|
634
|
+
) -> AsyncIterator[ModelResponse]:
|
|
533
635
|
"""
|
|
534
|
-
|
|
636
|
+
Sends an asynchronous streaming chat completion request to the OpenAI API.
|
|
535
637
|
|
|
536
638
|
Args:
|
|
537
|
-
messages (List[Message]): A list of messages.
|
|
639
|
+
messages (List[Message]): A list of messages to send to the model.
|
|
640
|
+
compress_tool_results: Whether to compress tool results.
|
|
538
641
|
|
|
539
642
|
Returns:
|
|
540
|
-
|
|
643
|
+
Any: An asynchronous iterator of model responses.
|
|
541
644
|
"""
|
|
542
|
-
logger.debug("---------- OpenAIChat Response Start ----------")
|
|
543
|
-
self._log_messages(messages)
|
|
544
|
-
model_response = ModelResponse()
|
|
545
|
-
metrics = Metrics()
|
|
546
|
-
|
|
547
|
-
# -*- Generate response
|
|
548
|
-
metrics.start_response_timer()
|
|
549
|
-
response: Union[ChatCompletion, ParsedChatCompletion] = self.invoke(messages=messages)
|
|
550
|
-
metrics.stop_response_timer()
|
|
551
|
-
|
|
552
|
-
# -*- Parse response
|
|
553
|
-
response_message: ChatCompletionMessage = response.choices[0].message
|
|
554
|
-
response_usage: Optional[CompletionUsage] = response.usage
|
|
555
|
-
response_audio: Optional[ChatCompletionAudio] = response_message.audio
|
|
556
|
-
|
|
557
|
-
# -*- Parse transcript if available
|
|
558
|
-
if response_audio:
|
|
559
|
-
if response_audio.transcript and not response_message.content:
|
|
560
|
-
response_message.content = response_audio.transcript
|
|
561
645
|
|
|
562
|
-
# -*- Parse structured outputs
|
|
563
646
|
try:
|
|
564
|
-
if
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
647
|
+
if run_response and run_response.metrics:
|
|
648
|
+
run_response.metrics.set_time_to_first_token()
|
|
649
|
+
|
|
650
|
+
assistant_message.metrics.start_timer()
|
|
651
|
+
|
|
652
|
+
async_stream = await self.get_async_client().chat.completions.create(
|
|
653
|
+
model=self.id,
|
|
654
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
|
|
655
|
+
stream=True,
|
|
656
|
+
stream_options={"include_usage": True},
|
|
657
|
+
**self.get_request_params(
|
|
658
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
659
|
+
),
|
|
660
|
+
)
|
|
574
661
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
662
|
+
async for chunk in async_stream:
|
|
663
|
+
yield self._parse_provider_response_delta(chunk)
|
|
664
|
+
|
|
665
|
+
assistant_message.metrics.stop_timer()
|
|
579
666
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
# -*- Update model response with assistant message content and audio
|
|
588
|
-
if assistant_message.content is not None:
|
|
589
|
-
# add the content to the model response
|
|
590
|
-
model_response.content = assistant_message.get_content_string()
|
|
591
|
-
if assistant_message.audio_output is not None:
|
|
592
|
-
# add the audio to the model response
|
|
593
|
-
model_response.audio = assistant_message.audio_output
|
|
594
|
-
|
|
595
|
-
# -*- Handle tool calls
|
|
596
|
-
tool_role = "tool"
|
|
597
|
-
if (
|
|
598
|
-
self.handle_tool_calls(
|
|
599
|
-
assistant_message=assistant_message,
|
|
600
|
-
messages=messages,
|
|
601
|
-
model_response=model_response,
|
|
602
|
-
tool_role=tool_role,
|
|
667
|
+
except RateLimitError as e:
|
|
668
|
+
log_error(f"Rate limit error from OpenAI API: {e}")
|
|
669
|
+
error_message = e.response.json().get("error", {})
|
|
670
|
+
error_message = (
|
|
671
|
+
error_message.get("message", "Unknown model error")
|
|
672
|
+
if isinstance(error_message, dict)
|
|
673
|
+
else error_message
|
|
603
674
|
)
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
675
|
+
raise ModelProviderError(
|
|
676
|
+
message=error_message,
|
|
677
|
+
status_code=e.response.status_code,
|
|
678
|
+
model_name=self.name,
|
|
679
|
+
model_id=self.id,
|
|
680
|
+
) from e
|
|
681
|
+
except APIConnectionError as e:
|
|
682
|
+
log_error(f"API connection error from OpenAI API: {e}")
|
|
683
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
684
|
+
except APIStatusError as e:
|
|
685
|
+
log_error(f"API status error from OpenAI API: {e}")
|
|
686
|
+
try:
|
|
687
|
+
error_message = e.response.json().get("error", {})
|
|
688
|
+
except Exception:
|
|
689
|
+
error_message = e.response.text
|
|
690
|
+
error_message = (
|
|
691
|
+
error_message.get("message", "Unknown model error")
|
|
692
|
+
if isinstance(error_message, dict)
|
|
693
|
+
else error_message
|
|
694
|
+
)
|
|
695
|
+
raise ModelProviderError(
|
|
696
|
+
message=error_message,
|
|
697
|
+
status_code=e.response.status_code,
|
|
698
|
+
model_name=self.name,
|
|
699
|
+
model_id=self.id,
|
|
700
|
+
) from e
|
|
701
|
+
except ModelAuthenticationError as e:
|
|
702
|
+
log_error(f"Model authentication error from OpenAI API: {e}")
|
|
703
|
+
raise e
|
|
704
|
+
except Exception as e:
|
|
705
|
+
log_error(f"Error from OpenAI API: {e}")
|
|
706
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
609
707
|
|
|
610
|
-
|
|
708
|
+
@staticmethod
|
|
709
|
+
def parse_tool_calls(tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
|
|
611
710
|
"""
|
|
612
|
-
|
|
711
|
+
Build tool calls from streamed tool call data.
|
|
613
712
|
|
|
614
713
|
Args:
|
|
615
|
-
|
|
714
|
+
tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
|
|
616
715
|
|
|
617
716
|
Returns:
|
|
618
|
-
|
|
717
|
+
List[Dict[str, Any]]: The built tool calls.
|
|
718
|
+
"""
|
|
719
|
+
tool_calls: List[Dict[str, Any]] = []
|
|
720
|
+
for _tool_call in tool_calls_data:
|
|
721
|
+
_index = _tool_call.index or 0
|
|
722
|
+
_tool_call_id = _tool_call.id
|
|
723
|
+
_tool_call_type = _tool_call.type
|
|
724
|
+
_function_name = _tool_call.function.name if _tool_call.function else None
|
|
725
|
+
_function_arguments = _tool_call.function.arguments if _tool_call.function else None
|
|
726
|
+
|
|
727
|
+
if len(tool_calls) <= _index:
|
|
728
|
+
tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
|
|
729
|
+
tool_call_entry = tool_calls[_index]
|
|
730
|
+
if not tool_call_entry:
|
|
731
|
+
tool_call_entry["id"] = _tool_call_id
|
|
732
|
+
tool_call_entry["type"] = _tool_call_type
|
|
733
|
+
tool_call_entry["function"] = {
|
|
734
|
+
"name": _function_name or "",
|
|
735
|
+
"arguments": _function_arguments or "",
|
|
736
|
+
}
|
|
737
|
+
else:
|
|
738
|
+
if _function_name:
|
|
739
|
+
tool_call_entry["function"]["name"] += _function_name
|
|
740
|
+
if _function_arguments:
|
|
741
|
+
tool_call_entry["function"]["arguments"] += _function_arguments
|
|
742
|
+
if _tool_call_id:
|
|
743
|
+
tool_call_entry["id"] = _tool_call_id
|
|
744
|
+
if _tool_call_type:
|
|
745
|
+
tool_call_entry["type"] = _tool_call_type
|
|
746
|
+
return tool_calls
|
|
747
|
+
|
|
748
|
+
def _parse_provider_response(
|
|
749
|
+
self,
|
|
750
|
+
response: ChatCompletion,
|
|
751
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
752
|
+
) -> ModelResponse:
|
|
753
|
+
"""
|
|
754
|
+
Parse the OpenAI response into a ModelResponse.
|
|
619
755
|
"""
|
|
620
|
-
logger.debug("---------- OpenAIChat Async Response Start ----------")
|
|
621
|
-
self._log_messages(messages)
|
|
622
756
|
model_response = ModelResponse()
|
|
623
|
-
metrics = Metrics()
|
|
624
|
-
|
|
625
|
-
# -*- Generate response
|
|
626
|
-
metrics.start_response_timer()
|
|
627
|
-
response: Union[ChatCompletion, ParsedChatCompletion] = await self.ainvoke(messages=messages)
|
|
628
|
-
metrics.stop_response_timer()
|
|
629
|
-
|
|
630
|
-
# -*- Parse response
|
|
631
|
-
response_message: ChatCompletionMessage = response.choices[0].message
|
|
632
|
-
response_usage: Optional[CompletionUsage] = response.usage
|
|
633
|
-
response_audio: Optional[ChatCompletionAudio] = response_message.audio
|
|
634
|
-
|
|
635
|
-
# -*- Parse transcript if available
|
|
636
|
-
if response_audio:
|
|
637
|
-
if response_audio.transcript and not response_message.content:
|
|
638
|
-
response_message.content = response_audio.transcript
|
|
639
757
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
self.
|
|
644
|
-
|
|
645
|
-
and issubclass(self.response_format, BaseModel)
|
|
646
|
-
):
|
|
647
|
-
parsed_object = response_message.parsed # type: ignore
|
|
648
|
-
if parsed_object is not None:
|
|
649
|
-
model_response.parsed = parsed_object
|
|
650
|
-
except Exception as e:
|
|
651
|
-
logger.warning(f"Error retrieving structured outputs: {e}")
|
|
652
|
-
|
|
653
|
-
# -*- Create assistant message
|
|
654
|
-
assistant_message = self.create_assistant_message(
|
|
655
|
-
response_message=response_message, metrics=metrics, response_usage=response_usage
|
|
656
|
-
)
|
|
657
|
-
|
|
658
|
-
# -*- Add assistant message to messages
|
|
659
|
-
messages.append(assistant_message)
|
|
660
|
-
|
|
661
|
-
# -*- Log response and metrics
|
|
662
|
-
assistant_message.log()
|
|
663
|
-
metrics.log()
|
|
664
|
-
|
|
665
|
-
# -*- Update model response with assistant message content and audio
|
|
666
|
-
if assistant_message.content is not None:
|
|
667
|
-
# add the content to the model response
|
|
668
|
-
model_response.content = assistant_message.get_content_string()
|
|
669
|
-
if assistant_message.audio_output is not None:
|
|
670
|
-
# add the audio to the model response
|
|
671
|
-
model_response.audio = assistant_message.audio_output
|
|
672
|
-
|
|
673
|
-
# -*- Handle tool calls
|
|
674
|
-
tool_role = "tool"
|
|
675
|
-
if (
|
|
676
|
-
self.handle_tool_calls(
|
|
677
|
-
assistant_message=assistant_message,
|
|
678
|
-
messages=messages,
|
|
679
|
-
model_response=model_response,
|
|
680
|
-
tool_role=tool_role,
|
|
758
|
+
if hasattr(response, "error") and response.error: # type: ignore
|
|
759
|
+
raise ModelProviderError(
|
|
760
|
+
message=response.error.get("message", "Unknown model error"), # type: ignore
|
|
761
|
+
model_name=self.name,
|
|
762
|
+
model_id=self.id,
|
|
681
763
|
)
|
|
682
|
-
is not None
|
|
683
|
-
):
|
|
684
|
-
return await self.ahandle_post_tool_call_messages(messages=messages, model_response=model_response)
|
|
685
|
-
|
|
686
|
-
logger.debug("---------- OpenAIChat Async Response End ----------")
|
|
687
|
-
return model_response
|
|
688
|
-
|
|
689
|
-
def update_stream_metrics(self, assistant_message: Message, metrics: Metrics):
|
|
690
|
-
"""
|
|
691
|
-
Update the usage metrics for the assistant message and the model.
|
|
692
|
-
|
|
693
|
-
Args:
|
|
694
|
-
assistant_message (Message): The assistant message.
|
|
695
|
-
metrics (Metrics): The metrics.
|
|
696
|
-
"""
|
|
697
|
-
# Update time taken to generate response
|
|
698
|
-
assistant_message.metrics["time"] = metrics.response_timer.elapsed
|
|
699
|
-
self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
|
|
700
|
-
|
|
701
|
-
if metrics.time_to_first_token is not None:
|
|
702
|
-
assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
|
|
703
|
-
self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
|
|
704
|
-
|
|
705
|
-
if metrics.input_tokens is not None:
|
|
706
|
-
assistant_message.metrics["input_tokens"] = metrics.input_tokens
|
|
707
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
|
|
708
|
-
if metrics.output_tokens is not None:
|
|
709
|
-
assistant_message.metrics["output_tokens"] = metrics.output_tokens
|
|
710
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
|
|
711
|
-
if metrics.prompt_tokens is not None:
|
|
712
|
-
assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
|
|
713
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
|
|
714
|
-
if metrics.completion_tokens is not None:
|
|
715
|
-
assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
|
|
716
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
|
|
717
|
-
if metrics.total_tokens is not None:
|
|
718
|
-
assistant_message.metrics["total_tokens"] = metrics.total_tokens
|
|
719
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
|
|
720
|
-
if metrics.prompt_tokens_details is not None:
|
|
721
|
-
assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
|
|
722
|
-
for k, v in metrics.prompt_tokens_details.items():
|
|
723
|
-
self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
|
|
724
|
-
if metrics.completion_tokens_details is not None:
|
|
725
|
-
assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
|
|
726
|
-
for k, v in metrics.completion_tokens_details.items():
|
|
727
|
-
self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
|
|
728
|
-
|
|
729
|
-
def add_response_usage_to_metrics(self, metrics: Metrics, response_usage: CompletionUsage):
|
|
730
|
-
metrics.input_tokens = response_usage.prompt_tokens
|
|
731
|
-
metrics.prompt_tokens = response_usage.prompt_tokens
|
|
732
|
-
metrics.output_tokens = response_usage.completion_tokens
|
|
733
|
-
metrics.completion_tokens = response_usage.completion_tokens
|
|
734
|
-
metrics.total_tokens = response_usage.total_tokens
|
|
735
|
-
if response_usage.prompt_tokens_details is not None:
|
|
736
|
-
if isinstance(response_usage.prompt_tokens_details, dict):
|
|
737
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details
|
|
738
|
-
elif isinstance(response_usage.prompt_tokens_details, BaseModel):
|
|
739
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
|
|
740
|
-
if response_usage.completion_tokens_details is not None:
|
|
741
|
-
if isinstance(response_usage.completion_tokens_details, dict):
|
|
742
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details
|
|
743
|
-
elif isinstance(response_usage.completion_tokens_details, BaseModel):
|
|
744
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
|
|
745
|
-
exclude_none=True
|
|
746
|
-
)
|
|
747
764
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
765
|
+
# Get response message
|
|
766
|
+
response_message = response.choices[0].message
|
|
767
|
+
|
|
768
|
+
# Add role
|
|
769
|
+
if response_message.role is not None:
|
|
770
|
+
model_response.role = response_message.role
|
|
771
|
+
# Add content
|
|
772
|
+
if response_message.content is not None:
|
|
773
|
+
model_response.content = response_message.content
|
|
774
|
+
|
|
775
|
+
# Extract thinking content before any structured parsing
|
|
776
|
+
if model_response.content:
|
|
777
|
+
reasoning_content, output_content = extract_thinking_content(model_response.content)
|
|
778
|
+
if reasoning_content:
|
|
779
|
+
model_response.reasoning_content = reasoning_content
|
|
780
|
+
model_response.content = output_content
|
|
781
|
+
# Add tool calls
|
|
782
|
+
if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
|
|
783
|
+
try:
|
|
784
|
+
model_response.tool_calls = [t.model_dump() for t in response_message.tool_calls]
|
|
785
|
+
except Exception as e:
|
|
786
|
+
log_warning(f"Error processing tool calls: {e}")
|
|
756
787
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
788
|
+
# Add audio transcript to content if available
|
|
789
|
+
response_audio: Optional[ChatCompletionAudio] = response_message.audio
|
|
790
|
+
if response_audio and response_audio.transcript and not model_response.content:
|
|
791
|
+
model_response.content = response_audio.transcript
|
|
761
792
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
messages.append(
|
|
773
|
-
Message(
|
|
774
|
-
role=tool_role,
|
|
775
|
-
tool_call_id=_tool_call_id,
|
|
776
|
-
content="Could not find function to call.",
|
|
777
|
-
)
|
|
793
|
+
# Add audio if present
|
|
794
|
+
if hasattr(response_message, "audio") and response_message.audio is not None:
|
|
795
|
+
# If the audio output modality is requested, we can extract an audio response
|
|
796
|
+
try:
|
|
797
|
+
if isinstance(response_message.audio, dict):
|
|
798
|
+
model_response.audio = Audio(
|
|
799
|
+
id=response_message.audio.get("id"),
|
|
800
|
+
content=response_message.audio.get("data"),
|
|
801
|
+
expires_at=response_message.audio.get("expires_at"),
|
|
802
|
+
transcript=response_message.audio.get("transcript"),
|
|
778
803
|
)
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
content=_function_call.error,
|
|
786
|
-
)
|
|
804
|
+
else:
|
|
805
|
+
model_response.audio = Audio(
|
|
806
|
+
id=response_message.audio.id,
|
|
807
|
+
content=response_message.audio.data,
|
|
808
|
+
expires_at=response_message.audio.expires_at,
|
|
809
|
+
transcript=response_message.audio.transcript,
|
|
787
810
|
)
|
|
788
|
-
|
|
789
|
-
|
|
811
|
+
except Exception as e:
|
|
812
|
+
log_warning(f"Error processing audio: {e}")
|
|
790
813
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
yield ModelResponse(content="\n\n")
|
|
814
|
+
if hasattr(response_message, "reasoning_content") and response_message.reasoning_content is not None: # type: ignore
|
|
815
|
+
model_response.reasoning_content = response_message.reasoning_content # type: ignore
|
|
816
|
+
elif hasattr(response_message, "reasoning") and response_message.reasoning is not None: # type: ignore
|
|
817
|
+
model_response.reasoning_content = response_message.reasoning # type: ignore
|
|
796
818
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
819
|
+
if response.usage is not None:
|
|
820
|
+
model_response.response_usage = self._get_metrics(response.usage)
|
|
821
|
+
|
|
822
|
+
if model_response.provider_data is None:
|
|
823
|
+
model_response.provider_data = {}
|
|
801
824
|
|
|
802
|
-
|
|
803
|
-
|
|
825
|
+
if response.id:
|
|
826
|
+
model_response.provider_data["id"] = response.id
|
|
827
|
+
if response.system_fingerprint:
|
|
828
|
+
model_response.provider_data["system_fingerprint"] = response.system_fingerprint
|
|
829
|
+
if response.model_extra:
|
|
830
|
+
model_response.provider_data["model_extra"] = response.model_extra
|
|
804
831
|
|
|
805
|
-
|
|
832
|
+
return model_response
|
|
833
|
+
|
|
834
|
+
def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
|
|
806
835
|
"""
|
|
807
|
-
|
|
836
|
+
Parse the OpenAI streaming response into a ModelResponse.
|
|
808
837
|
|
|
809
838
|
Args:
|
|
810
|
-
|
|
839
|
+
response_delta: Raw response chunk from OpenAI
|
|
811
840
|
|
|
812
841
|
Returns:
|
|
813
|
-
|
|
842
|
+
ModelResponse: Parsed response data
|
|
814
843
|
"""
|
|
815
|
-
|
|
816
|
-
self._log_messages(messages)
|
|
817
|
-
stream_data: StreamData = StreamData()
|
|
818
|
-
metrics: Metrics = Metrics()
|
|
819
|
-
|
|
820
|
-
# -*- Generate response
|
|
821
|
-
metrics.start_response_timer()
|
|
822
|
-
for response in self.invoke_stream(messages=messages):
|
|
823
|
-
if len(response.choices) > 0:
|
|
824
|
-
metrics.completion_tokens += 1
|
|
825
|
-
if metrics.completion_tokens == 1:
|
|
826
|
-
metrics.time_to_first_token = metrics.response_timer.elapsed
|
|
827
|
-
|
|
828
|
-
response_delta: ChoiceDelta = response.choices[0].delta
|
|
829
|
-
|
|
830
|
-
if response_delta.content is not None:
|
|
831
|
-
stream_data.response_content += response_delta.content
|
|
832
|
-
yield ModelResponse(content=response_delta.content)
|
|
833
|
-
|
|
834
|
-
if hasattr(response_delta, "audio"):
|
|
835
|
-
response_audio = response_delta.audio
|
|
836
|
-
stream_data.response_audio = response_audio
|
|
837
|
-
if stream_data.response_audio:
|
|
838
|
-
yield ModelResponse(
|
|
839
|
-
audio=AudioOutput(
|
|
840
|
-
id=stream_data.response_audio.id,
|
|
841
|
-
content=stream_data.response_audio.data,
|
|
842
|
-
expires_at=stream_data.response_audio.expires_at,
|
|
843
|
-
transcript=stream_data.response_audio.transcript,
|
|
844
|
-
)
|
|
845
|
-
)
|
|
846
|
-
|
|
847
|
-
if response_delta.tool_calls is not None:
|
|
848
|
-
if stream_data.response_tool_calls is None:
|
|
849
|
-
stream_data.response_tool_calls = []
|
|
850
|
-
stream_data.response_tool_calls.extend(response_delta.tool_calls)
|
|
851
|
-
|
|
852
|
-
if response.usage is not None:
|
|
853
|
-
self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
|
|
854
|
-
metrics.stop_response_timer()
|
|
855
|
-
|
|
856
|
-
# -*- Create assistant message
|
|
857
|
-
assistant_message = Message(role="assistant")
|
|
858
|
-
if stream_data.response_content != "":
|
|
859
|
-
assistant_message.content = stream_data.response_content
|
|
860
|
-
|
|
861
|
-
if stream_data.response_audio is not None:
|
|
862
|
-
assistant_message.audio_output = AudioOutput(
|
|
863
|
-
id=stream_data.response_audio.id,
|
|
864
|
-
content=stream_data.response_audio.data,
|
|
865
|
-
expires_at=stream_data.response_audio.expires_at,
|
|
866
|
-
transcript=stream_data.response_audio.transcript,
|
|
867
|
-
)
|
|
868
|
-
|
|
869
|
-
if stream_data.response_tool_calls is not None:
|
|
870
|
-
_tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
|
|
871
|
-
if len(_tool_calls) > 0:
|
|
872
|
-
assistant_message.tool_calls = _tool_calls
|
|
873
|
-
|
|
874
|
-
# -*- Update usage metrics
|
|
875
|
-
self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
|
|
844
|
+
model_response = ModelResponse()
|
|
876
845
|
|
|
877
|
-
|
|
878
|
-
|
|
846
|
+
if response_delta.choices and len(response_delta.choices) > 0:
|
|
847
|
+
choice_delta: ChoiceDelta = response_delta.choices[0].delta
|
|
848
|
+
if choice_delta:
|
|
849
|
+
# Add content
|
|
850
|
+
if choice_delta.content is not None:
|
|
851
|
+
model_response.content = choice_delta.content
|
|
852
|
+
|
|
853
|
+
# We only want to handle these if content is present
|
|
854
|
+
if model_response.provider_data is None:
|
|
855
|
+
model_response.provider_data = {}
|
|
856
|
+
|
|
857
|
+
if response_delta.id:
|
|
858
|
+
model_response.provider_data["id"] = response_delta.id
|
|
859
|
+
if response_delta.system_fingerprint:
|
|
860
|
+
model_response.provider_data["system_fingerprint"] = response_delta.system_fingerprint
|
|
861
|
+
if response_delta.model_extra:
|
|
862
|
+
model_response.provider_data["model_extra"] = response_delta.model_extra
|
|
863
|
+
|
|
864
|
+
# Add tool calls
|
|
865
|
+
if choice_delta.tool_calls is not None:
|
|
866
|
+
model_response.tool_calls = choice_delta.tool_calls # type: ignore
|
|
867
|
+
|
|
868
|
+
if hasattr(choice_delta, "reasoning_content") and choice_delta.reasoning_content is not None:
|
|
869
|
+
model_response.reasoning_content = choice_delta.reasoning_content
|
|
870
|
+
elif hasattr(choice_delta, "reasoning") and choice_delta.reasoning is not None:
|
|
871
|
+
model_response.reasoning_content = choice_delta.reasoning
|
|
872
|
+
|
|
873
|
+
# Add audio if present
|
|
874
|
+
if hasattr(choice_delta, "audio") and choice_delta.audio is not None:
|
|
875
|
+
try:
|
|
876
|
+
audio_data = None
|
|
877
|
+
audio_id = None
|
|
878
|
+
audio_expires_at = None
|
|
879
|
+
audio_transcript = None
|
|
880
|
+
|
|
881
|
+
if isinstance(choice_delta.audio, dict):
|
|
882
|
+
audio_data = choice_delta.audio.get("data")
|
|
883
|
+
audio_id = choice_delta.audio.get("id")
|
|
884
|
+
audio_expires_at = choice_delta.audio.get("expires_at")
|
|
885
|
+
audio_transcript = choice_delta.audio.get("transcript")
|
|
886
|
+
else:
|
|
887
|
+
audio_data = choice_delta.audio.data
|
|
888
|
+
audio_id = choice_delta.audio.id
|
|
889
|
+
audio_expires_at = choice_delta.audio.expires_at
|
|
890
|
+
audio_transcript = choice_delta.audio.transcript
|
|
891
|
+
|
|
892
|
+
# Only create Audio object if there's actual content
|
|
893
|
+
if audio_data is not None:
|
|
894
|
+
model_response.audio = Audio(
|
|
895
|
+
id=audio_id,
|
|
896
|
+
content=audio_data,
|
|
897
|
+
expires_at=audio_expires_at,
|
|
898
|
+
transcript=audio_transcript,
|
|
899
|
+
sample_rate=24000,
|
|
900
|
+
mime_type="pcm16",
|
|
901
|
+
)
|
|
902
|
+
# If no content but there's transcript/metadata, create minimal Audio object
|
|
903
|
+
elif audio_transcript is not None or audio_id is not None:
|
|
904
|
+
model_response.audio = Audio(
|
|
905
|
+
id=audio_id or str(uuid4()),
|
|
906
|
+
content=b"",
|
|
907
|
+
expires_at=audio_expires_at,
|
|
908
|
+
transcript=audio_transcript,
|
|
909
|
+
sample_rate=24000,
|
|
910
|
+
mime_type="pcm16",
|
|
911
|
+
)
|
|
912
|
+
except Exception as e:
|
|
913
|
+
log_warning(f"Error processing audio: {e}")
|
|
879
914
|
|
|
880
|
-
#
|
|
881
|
-
|
|
882
|
-
|
|
915
|
+
# Add usage metrics if present
|
|
916
|
+
if response_delta.usage is not None:
|
|
917
|
+
model_response.response_usage = self._get_metrics(response_delta.usage)
|
|
883
918
|
|
|
884
|
-
|
|
885
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
886
|
-
tool_role = "tool"
|
|
887
|
-
yield from self.handle_stream_tool_calls(
|
|
888
|
-
assistant_message=assistant_message, messages=messages, tool_role=tool_role
|
|
889
|
-
)
|
|
890
|
-
yield from self.handle_post_tool_call_messages_stream(messages=messages)
|
|
891
|
-
logger.debug("---------- OpenAIChat Response End ----------")
|
|
919
|
+
return model_response
|
|
892
920
|
|
|
893
|
-
|
|
921
|
+
def _get_metrics(self, response_usage: CompletionUsage) -> Metrics:
|
|
894
922
|
"""
|
|
895
|
-
|
|
923
|
+
Parse the given OpenAI-specific usage into an Agno Metrics object.
|
|
896
924
|
|
|
897
925
|
Args:
|
|
898
|
-
|
|
926
|
+
response_usage: Usage data from OpenAI
|
|
899
927
|
|
|
900
928
|
Returns:
|
|
901
|
-
|
|
929
|
+
Metrics: Parsed metrics data
|
|
902
930
|
"""
|
|
903
|
-
logger.debug("---------- OpenAIChat Async Response Start ----------")
|
|
904
|
-
self._log_messages(messages)
|
|
905
|
-
stream_data: StreamData = StreamData()
|
|
906
|
-
metrics: Metrics = Metrics()
|
|
907
|
-
|
|
908
|
-
# -*- Generate response
|
|
909
|
-
metrics.start_response_timer()
|
|
910
|
-
async for response in self.ainvoke_stream(messages=messages):
|
|
911
|
-
if response.choices and len(response.choices) > 0:
|
|
912
|
-
metrics.completion_tokens += 1
|
|
913
|
-
if metrics.completion_tokens == 1:
|
|
914
|
-
metrics.time_to_first_token = metrics.response_timer.elapsed
|
|
915
|
-
|
|
916
|
-
response_delta: ChoiceDelta = response.choices[0].delta
|
|
917
|
-
|
|
918
|
-
if response_delta.content is not None:
|
|
919
|
-
stream_data.response_content += response_delta.content
|
|
920
|
-
yield ModelResponse(content=response_delta.content)
|
|
921
|
-
|
|
922
|
-
if hasattr(response_delta, "audio"):
|
|
923
|
-
response_audio = response_delta.audio
|
|
924
|
-
stream_data.response_audio = response_audio
|
|
925
|
-
if stream_data.response_audio:
|
|
926
|
-
yield ModelResponse(
|
|
927
|
-
audio=AudioOutput(
|
|
928
|
-
id=stream_data.response_audio.id,
|
|
929
|
-
content=stream_data.response_audio.data,
|
|
930
|
-
expires_at=stream_data.response_audio.expires_at,
|
|
931
|
-
transcript=stream_data.response_audio.transcript,
|
|
932
|
-
)
|
|
933
|
-
)
|
|
934
|
-
|
|
935
|
-
if response_delta.tool_calls is not None:
|
|
936
|
-
if stream_data.response_tool_calls is None:
|
|
937
|
-
stream_data.response_tool_calls = []
|
|
938
|
-
stream_data.response_tool_calls.extend(response_delta.tool_calls)
|
|
939
|
-
|
|
940
|
-
if response.usage is not None:
|
|
941
|
-
self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
|
|
942
|
-
metrics.stop_response_timer()
|
|
943
|
-
|
|
944
|
-
# -*- Create assistant message
|
|
945
|
-
assistant_message = Message(role="assistant")
|
|
946
|
-
if stream_data.response_content != "":
|
|
947
|
-
assistant_message.content = stream_data.response_content
|
|
948
|
-
|
|
949
|
-
if stream_data.response_audio is not None:
|
|
950
|
-
assistant_message.audio_output = AudioOutput(
|
|
951
|
-
id=stream_data.response_audio.id,
|
|
952
|
-
content=stream_data.response_audio.data,
|
|
953
|
-
expires_at=stream_data.response_audio.expires_at,
|
|
954
|
-
transcript=stream_data.response_audio.transcript,
|
|
955
|
-
)
|
|
956
|
-
|
|
957
|
-
if stream_data.response_tool_calls is not None:
|
|
958
|
-
_tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
|
|
959
|
-
if len(_tool_calls) > 0:
|
|
960
|
-
assistant_message.tool_calls = _tool_calls
|
|
961
|
-
|
|
962
|
-
self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
|
|
963
|
-
|
|
964
|
-
# -*- Add assistant message to messages
|
|
965
|
-
messages.append(assistant_message)
|
|
966
931
|
|
|
967
|
-
|
|
968
|
-
assistant_message.log()
|
|
969
|
-
metrics.log()
|
|
932
|
+
metrics = Metrics()
|
|
970
933
|
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
for tool_call_response in self.handle_stream_tool_calls(
|
|
975
|
-
assistant_message=assistant_message, messages=messages, tool_role=tool_role
|
|
976
|
-
):
|
|
977
|
-
yield tool_call_response
|
|
978
|
-
async for post_tool_call_response in self.ahandle_post_tool_call_messages_stream(messages=messages):
|
|
979
|
-
yield post_tool_call_response
|
|
980
|
-
logger.debug("---------- OpenAIChat Async Response End ----------")
|
|
934
|
+
metrics.input_tokens = response_usage.prompt_tokens or 0
|
|
935
|
+
metrics.output_tokens = response_usage.completion_tokens or 0
|
|
936
|
+
metrics.total_tokens = response_usage.total_tokens or 0
|
|
981
937
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
938
|
+
# Add the prompt_tokens_details field
|
|
939
|
+
if prompt_token_details := response_usage.prompt_tokens_details:
|
|
940
|
+
metrics.audio_input_tokens = prompt_token_details.audio_tokens or 0
|
|
941
|
+
metrics.cache_read_tokens = prompt_token_details.cached_tokens or 0
|
|
985
942
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
List[Dict[str, Any]]: The built tool calls.
|
|
991
|
-
"""
|
|
943
|
+
# Add the completion_tokens_details field
|
|
944
|
+
if completion_tokens_details := response_usage.completion_tokens_details:
|
|
945
|
+
metrics.audio_output_tokens = completion_tokens_details.audio_tokens or 0
|
|
946
|
+
metrics.reasoning_tokens = completion_tokens_details.reasoning_tokens or 0
|
|
992
947
|
|
|
993
|
-
return
|
|
948
|
+
return metrics
|