agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,40 @@
1
+ from collections.abc import AsyncIterator
1
2
  from dataclasses import dataclass
2
3
  from os import getenv
3
- from typing import Any, Dict, Iterator, List, Optional, Union
4
+ from typing import Any, Dict, Iterator, List, Literal, Optional, Type, Union
5
+ from uuid import uuid4
4
6
 
5
7
  import httpx
6
8
  from pydantic import BaseModel
7
9
 
8
- from agno.media import AudioOutput
9
- from agno.models.base import Metrics, Model
10
+ from agno.exceptions import ModelAuthenticationError, ModelProviderError
11
+ from agno.media import Audio
12
+ from agno.models.base import Model
10
13
  from agno.models.message import Message
11
- from agno.models.response import ModelResponse, ModelResponseEvent
12
- from agno.tools.function import FunctionCall
13
- from agno.utils.log import logger
14
- from agno.utils.tools import get_function_call_for_tool_call
14
+ from agno.models.metrics import Metrics
15
+ from agno.models.response import ModelResponse
16
+ from agno.run.agent import RunOutput
17
+ from agno.run.team import TeamRunOutput
18
+ from agno.utils.http import get_default_async_client, get_default_sync_client
19
+ from agno.utils.log import log_debug, log_error, log_warning
20
+ from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
21
+ from agno.utils.reasoning import extract_thinking_content
15
22
 
16
23
  try:
24
+ from openai import APIConnectionError, APIStatusError, RateLimitError
17
25
  from openai import AsyncOpenAI as AsyncOpenAIClient
18
26
  from openai import OpenAI as OpenAIClient
19
- from openai.types.chat.chat_completion import ChatCompletion
20
- from openai.types.chat.chat_completion_chunk import (
21
- ChatCompletionChunk,
22
- ChoiceDelta,
23
- ChoiceDeltaToolCall,
24
- )
25
- from openai.types.chat.chat_completion_message import ChatCompletionAudio, ChatCompletionMessage
26
- from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
27
- from openai.types.completion_usage import CompletionUsage
28
- except ModuleNotFoundError:
27
+ from openai.types import CompletionUsage
28
+ from openai.types.chat import ChatCompletion, ChatCompletionAudio, ChatCompletionChunk
29
+ from openai.types.chat.chat_completion_chunk import ChoiceDelta, ChoiceDeltaToolCall
30
+ except (ImportError, ModuleNotFoundError):
29
31
  raise ImportError("`openai` not installed. Please install using `pip install openai`")
30
32
 
31
33
 
32
- @dataclass
33
- class StreamData:
34
- response_content: str = ""
35
- response_audio: Optional[ChatCompletionAudio] = None
36
- response_tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
37
-
38
-
39
34
  @dataclass
40
35
  class OpenAIChat(Model):
41
36
  """
42
- A class for interacting with OpenAI models.
37
+ A class for interacting with OpenAI models using the Chat completions API.
43
38
 
44
39
  For more information, see: https://platform.openai.com/docs/api-reference/chat/create
45
40
  """
@@ -47,10 +42,12 @@ class OpenAIChat(Model):
47
42
  id: str = "gpt-4o"
48
43
  name: str = "OpenAIChat"
49
44
  provider: str = "OpenAI"
50
- supports_structured_outputs: bool = True
45
+ supports_native_structured_outputs: bool = True
51
46
 
52
47
  # Request parameters
53
48
  store: Optional[bool] = None
49
+ reasoning_effort: Optional[str] = None
50
+ verbosity: Optional[Literal["low", "medium", "high"]] = None
54
51
  metadata: Optional[Dict[str, Any]] = None
55
52
  frequency_penalty: Optional[float] = None
56
53
  logit_bias: Optional[Any] = None
@@ -58,18 +55,23 @@ class OpenAIChat(Model):
58
55
  top_logprobs: Optional[int] = None
59
56
  max_tokens: Optional[int] = None
60
57
  max_completion_tokens: Optional[int] = None
61
- modalities: Optional[List[str]] = None
62
- audio: Optional[Dict[str, Any]] = None
58
+ modalities: Optional[List[str]] = None # "text" and/or "audio"
59
+ audio: Optional[Dict[str, Any]] = (
60
+ None # E.g. {"voice": "alloy", "format": "wav"}. `format` must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`. `voice` must be one of `ash`, `ballad`, `coral`, `sage`, `verse`, `alloy`, `echo`, and `shimmer`.
61
+ )
63
62
  presence_penalty: Optional[float] = None
64
- response_format: Optional[Any] = None
65
63
  seed: Optional[int] = None
66
64
  stop: Optional[Union[str, List[str]]] = None
67
65
  temperature: Optional[float] = None
68
66
  user: Optional[str] = None
69
67
  top_p: Optional[float] = None
68
+ service_tier: Optional[str] = None # "auto" | "default" | "flex" | "priority", defaults to "auto" when not set
69
+ strict_output: bool = True # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
70
70
  extra_headers: Optional[Any] = None
71
71
  extra_query: Optional[Any] = None
72
+ extra_body: Optional[Any] = None
72
73
  request_params: Optional[Dict[str, Any]] = None
74
+ role_map: Optional[Dict[str, str]] = None
73
75
 
74
76
  # Client parameters
75
77
  api_key: Optional[str] = None
@@ -79,126 +81,192 @@ class OpenAIChat(Model):
79
81
  max_retries: Optional[int] = None
80
82
  default_headers: Optional[Any] = None
81
83
  default_query: Optional[Any] = None
82
- http_client: Optional[httpx.Client] = None
84
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
83
85
  client_params: Optional[Dict[str, Any]] = None
84
86
 
85
- # OpenAI clients
87
+ # Cached clients to avoid recreating them on every request
86
88
  client: Optional[OpenAIClient] = None
87
89
  async_client: Optional[AsyncOpenAIClient] = None
88
90
 
89
- # Internal parameters. Not used for API requests
90
- # Whether to use the structured outputs with this Model.
91
- structured_outputs: bool = False
92
-
93
- # Whether to override the system role.
94
- override_system_role: bool = True
95
- # The role to map the system message to.
96
- system_message_role: str = "developer"
91
+ # The role to map the message role to.
92
+ default_role_map = {
93
+ "system": "developer",
94
+ "user": "user",
95
+ "assistant": "assistant",
96
+ "tool": "tool",
97
+ "model": "assistant",
98
+ }
97
99
 
98
100
  def _get_client_params(self) -> Dict[str, Any]:
99
- client_params: Dict[str, Any] = {}
100
-
101
- self.api_key = self.api_key or getenv("OPENAI_API_KEY")
101
+ # Fetch API key from env if not already set
102
102
  if not self.api_key:
103
- logger.error("OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.")
103
+ self.api_key = getenv("OPENAI_API_KEY")
104
+ if not self.api_key:
105
+ raise ModelAuthenticationError(
106
+ message="OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.",
107
+ model_name=self.name,
108
+ )
104
109
 
105
- client_params.update(
106
- {
107
- "api_key": self.api_key,
108
- "organization": self.organization,
109
- "base_url": self.base_url,
110
- "timeout": self.timeout,
111
- "max_retries": self.max_retries,
112
- "default_headers": self.default_headers,
113
- "default_query": self.default_query,
114
- }
115
- )
116
- if self.client_params is not None:
110
+ # Define base client params
111
+ base_params = {
112
+ "api_key": self.api_key,
113
+ "organization": self.organization,
114
+ "base_url": self.base_url,
115
+ "timeout": self.timeout,
116
+ "max_retries": self.max_retries,
117
+ "default_headers": self.default_headers,
118
+ "default_query": self.default_query,
119
+ }
120
+
121
+ # Create client_params dict with non-None values
122
+ client_params = {k: v for k, v in base_params.items() if v is not None}
123
+
124
+ # Add additional client params if provided
125
+ if self.client_params:
117
126
  client_params.update(self.client_params)
118
-
119
- # Remove None
120
- client_params = {k: v for k, v in client_params.items() if v is not None}
121
127
  return client_params
122
128
 
123
129
  def get_client(self) -> OpenAIClient:
124
130
  """
125
- Returns an OpenAI client.
131
+ Returns an OpenAI client. Caches the client to avoid recreating it on every request.
126
132
 
127
133
  Returns:
128
134
  OpenAIClient: An instance of the OpenAI client.
129
135
  """
130
- if self.client:
136
+ # Return cached client if it exists and is not closed
137
+ if self.client is not None and not self.client.is_closed():
131
138
  return self.client
132
139
 
140
+ log_debug(f"Creating new sync OpenAI client for model {self.id}")
133
141
  client_params: Dict[str, Any] = self._get_client_params()
134
- if self.http_client is not None:
135
- client_params["http_client"] = self.http_client
136
- return OpenAIClient(**client_params)
142
+ if self.http_client:
143
+ if isinstance(self.http_client, httpx.Client):
144
+ client_params["http_client"] = self.http_client
145
+ else:
146
+ log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
147
+ # Use global sync client when user http_client is invalid
148
+ client_params["http_client"] = get_default_sync_client()
149
+ else:
150
+ # Use global sync client when no custom http_client is provided
151
+ client_params["http_client"] = get_default_sync_client()
152
+
153
+ # Create and cache the client
154
+ self.client = OpenAIClient(**client_params)
155
+ return self.client
137
156
 
138
157
  def get_async_client(self) -> AsyncOpenAIClient:
139
158
  """
140
- Returns an asynchronous OpenAI client.
159
+ Returns an asynchronous OpenAI client. Caches the client to avoid recreating it on every request.
141
160
 
142
161
  Returns:
143
162
  AsyncOpenAIClient: An instance of the asynchronous OpenAI client.
144
163
  """
145
- if self.async_client:
164
+ # Return cached client if it exists and is not closed
165
+ if self.async_client is not None and not self.async_client.is_closed():
146
166
  return self.async_client
147
167
 
168
+ log_debug(f"Creating new async OpenAI client for model {self.id}")
148
169
  client_params: Dict[str, Any] = self._get_client_params()
149
170
  if self.http_client:
150
- client_params["http_client"] = self.http_client
171
+ if isinstance(self.http_client, httpx.AsyncClient):
172
+ client_params["http_client"] = self.http_client
173
+ else:
174
+ log_warning(
175
+ "http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
176
+ )
177
+ # Use global async client when user http_client is invalid
178
+ client_params["http_client"] = get_default_async_client()
151
179
  else:
152
- # Create a new async HTTP client with custom limits
153
- client_params["http_client"] = httpx.AsyncClient(
154
- limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
155
- )
156
- return AsyncOpenAIClient(**client_params)
180
+ # Use global async client when no custom http_client is provided
181
+ client_params["http_client"] = get_default_async_client()
182
+
183
+ # Create and cache the client
184
+ self.async_client = AsyncOpenAIClient(**client_params)
185
+ return self.async_client
157
186
 
158
- @property
159
- def request_kwargs(self) -> Dict[str, Any]:
187
+ def get_request_params(
188
+ self,
189
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
190
+ tools: Optional[List[Dict[str, Any]]] = None,
191
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
192
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
193
+ ) -> Dict[str, Any]:
160
194
  """
161
195
  Returns keyword arguments for API requests.
162
196
 
163
197
  Returns:
164
198
  Dict[str, Any]: A dictionary of keyword arguments for API requests.
165
199
  """
166
- request_params: Dict[str, Any] = {}
167
-
168
- request_params.update(
169
- {
170
- "store": self.store,
171
- "frequency_penalty": self.frequency_penalty,
172
- "logit_bias": self.logit_bias,
173
- "logprobs": self.logprobs,
174
- "top_logprobs": self.top_logprobs,
175
- "max_tokens": self.max_tokens,
176
- "max_completion_tokens": self.max_completion_tokens,
177
- "modalities": self.modalities,
178
- "audio": self.audio,
179
- "presence_penalty": self.presence_penalty,
180
- "response_format": self.response_format,
181
- "seed": self.seed,
182
- "stop": self.stop,
183
- "temperature": self.temperature,
184
- "user": self.user,
185
- "top_p": self.top_p,
186
- "extra_headers": self.extra_headers,
187
- "extra_query": self.extra_query,
188
- }
189
- )
190
- if self.tools is not None:
191
- request_params["tools"] = self.tools
192
- if self.tool_choice is None:
193
- request_params["tool_choice"] = "auto"
200
+ # Define base request parameters
201
+ base_params = {
202
+ "store": self.store,
203
+ "reasoning_effort": self.reasoning_effort,
204
+ "verbosity": self.verbosity,
205
+ "frequency_penalty": self.frequency_penalty,
206
+ "logit_bias": self.logit_bias,
207
+ "logprobs": self.logprobs,
208
+ "top_logprobs": self.top_logprobs,
209
+ "max_tokens": self.max_tokens,
210
+ "max_completion_tokens": self.max_completion_tokens,
211
+ "modalities": self.modalities,
212
+ "audio": self.audio,
213
+ "presence_penalty": self.presence_penalty,
214
+ "seed": self.seed,
215
+ "stop": self.stop,
216
+ "temperature": self.temperature,
217
+ "user": self.user,
218
+ "top_p": self.top_p,
219
+ "extra_headers": self.extra_headers,
220
+ "extra_query": self.extra_query,
221
+ "extra_body": self.extra_body,
222
+ "metadata": self.metadata,
223
+ "service_tier": self.service_tier,
224
+ }
225
+
226
+ # Handle response format - always use JSON schema approach
227
+ if response_format is not None:
228
+ if isinstance(response_format, type) and issubclass(response_format, BaseModel):
229
+ # Convert Pydantic to JSON schema for regular endpoint
230
+ from agno.utils.models.schema_utils import get_response_schema_for_provider
231
+
232
+ schema = get_response_schema_for_provider(response_format, "openai")
233
+ base_params["response_format"] = {
234
+ "type": "json_schema",
235
+ "json_schema": {
236
+ "name": response_format.__name__,
237
+ "schema": schema,
238
+ "strict": self.strict_output,
239
+ },
240
+ }
194
241
  else:
195
- request_params["tool_choice"] = self.tool_choice
196
-
197
- if self.request_params is not None:
242
+ # Handle other response format types (like {"type": "json_object"})
243
+ base_params["response_format"] = response_format
244
+
245
+ # Filter out None values
246
+ request_params = {k: v for k, v in base_params.items() if v is not None}
247
+
248
+ # Add tools
249
+ if tools is not None and len(tools) > 0:
250
+ # Remove unsupported fields for OpenAILike models
251
+ if self.provider in ["AIMLAPI", "Fireworks", "Nvidia"]:
252
+ for tool in tools:
253
+ if tool.get("type") == "function":
254
+ if tool["function"].get("requires_confirmation") is not None:
255
+ del tool["function"]["requires_confirmation"]
256
+ if tool["function"].get("external_execution") is not None:
257
+ del tool["function"]["external_execution"]
258
+
259
+ request_params["tools"] = tools
260
+
261
+ if tool_choice is not None:
262
+ request_params["tool_choice"] = tool_choice
263
+
264
+ # Add additional request params if provided
265
+ if self.request_params:
198
266
  request_params.update(self.request_params)
199
267
 
200
- # Remove None
201
- request_params = {k: v for k, v in request_params.items() if v is not None}
268
+ if request_params:
269
+ log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
202
270
  return request_params
203
271
 
204
272
  def to_dict(self) -> Dict[str, Any]:
@@ -208,10 +276,12 @@ class OpenAIChat(Model):
208
276
  Returns:
209
277
  Dict[str, Any]: The dictionary representation of the model.
210
278
  """
211
- _dict = super().to_dict()
212
- _dict.update(
279
+ model_dict = super().to_dict()
280
+ model_dict.update(
213
281
  {
214
282
  "store": self.store,
283
+ "reasoning_effort": self.reasoning_effort,
284
+ "verbosity": self.verbosity,
215
285
  "frequency_penalty": self.frequency_penalty,
216
286
  "logit_bias": self.logit_bias,
217
287
  "logprobs": self.logprobs,
@@ -221,9 +291,6 @@ class OpenAIChat(Model):
221
291
  "modalities": self.modalities,
222
292
  "audio": self.audio,
223
293
  "presence_penalty": self.presence_penalty,
224
- "response_format": self.response_format
225
- if isinstance(self.response_format, dict)
226
- else str(self.response_format),
227
294
  "seed": self.seed,
228
295
  "stop": self.stop,
229
296
  "temperature": self.temperature,
@@ -231,763 +298,651 @@ class OpenAIChat(Model):
231
298
  "user": self.user,
232
299
  "extra_headers": self.extra_headers,
233
300
  "extra_query": self.extra_query,
301
+ "extra_body": self.extra_body,
302
+ "service_tier": self.service_tier,
234
303
  }
235
304
  )
236
- if self.tools is not None:
237
- _dict["tools"] = self.tools
238
- if self.tool_choice is None:
239
- _dict["tool_choice"] = "auto"
240
- else:
241
- _dict["tool_choice"] = self.tool_choice
242
- cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
305
+ cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
243
306
  return cleaned_dict
244
307
 
245
- def format_message(self, message: Message) -> Dict[str, Any]:
308
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
246
309
  """
247
310
  Format a message into the format expected by OpenAI.
248
311
 
249
312
  Args:
250
313
  message (Message): The message to format.
314
+ compress_tool_results: Whether to compress tool results.
251
315
 
252
316
  Returns:
253
317
  Dict[str, Any]: The formatted message.
254
318
  """
255
- if message.role == "user":
256
- if message.images is not None:
257
- message = self.add_images_to_message(message=message, images=message.images)
258
-
259
- if message.audio is not None:
260
- message = self.add_audio_to_message(message=message, audio=message.audio)
261
-
262
- if message.videos is not None:
263
- logger.warning("Video input is currently unsupported.")
264
-
265
- return message.to_dict()
266
-
267
- def invoke(self, messages: List[Message]) -> Union[ChatCompletion, ParsedChatCompletion]:
319
+ tool_result = message.get_content(use_compressed_content=compress_tool_results)
320
+
321
+ message_dict: Dict[str, Any] = {
322
+ "role": self.role_map[message.role] if self.role_map else self.default_role_map[message.role],
323
+ "content": tool_result,
324
+ "name": message.name,
325
+ "tool_call_id": message.tool_call_id,
326
+ "tool_calls": message.tool_calls,
327
+ }
328
+ message_dict = {k: v for k, v in message_dict.items() if v is not None}
329
+
330
+ # Ignore non-string message content
331
+ # because we assume that the images/audio are already added to the message
332
+ if (message.images is not None and len(message.images) > 0) or (
333
+ message.audio is not None and len(message.audio) > 0
334
+ ):
335
+ # Ignore non-string message content
336
+ # because we assume that the images/audio are already added to the message
337
+ if isinstance(message.content, str):
338
+ message_dict["content"] = [{"type": "text", "text": message.content}]
339
+ if message.images is not None:
340
+ message_dict["content"].extend(images_to_message(images=message.images))
341
+
342
+ if message.audio is not None:
343
+ message_dict["content"].extend(audio_to_message(audio=message.audio))
344
+
345
+ if message.audio_output is not None:
346
+ message_dict["content"] = ""
347
+ message_dict["audio"] = {"id": message.audio_output.id}
348
+
349
+ if message.videos is not None and len(message.videos) > 0:
350
+ log_warning("Video input is currently unsupported.")
351
+
352
+ # OpenAI expects the tool_calls to be None if empty, not an empty list
353
+ if message.tool_calls is not None and len(message.tool_calls) == 0:
354
+ message_dict["tool_calls"] = None
355
+
356
+ if message.files is not None:
357
+ # Ensure content is a list of parts
358
+ content = message_dict.get("content")
359
+ if isinstance(content, str): # wrap existing text
360
+ text = content
361
+ message_dict["content"] = [{"type": "text", "text": text}]
362
+ elif content is None:
363
+ message_dict["content"] = []
364
+ # Insert each file part before text parts
365
+ for file in message.files:
366
+ file_part = _format_file_for_message(file)
367
+ if file_part:
368
+ message_dict["content"].insert(0, file_part)
369
+
370
+ # Manually add the content field even if it is None
371
+ if message.content is None:
372
+ message_dict["content"] = ""
373
+ return message_dict
374
+
375
+ def invoke(
376
+ self,
377
+ messages: List[Message],
378
+ assistant_message: Message,
379
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
380
+ tools: Optional[List[Dict[str, Any]]] = None,
381
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
382
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
383
+ compress_tool_results: bool = False,
384
+ ) -> ModelResponse:
268
385
  """
269
- Send a chat completion request to the OpenAI API.
386
+ Send a chat completion request to the OpenAI API and parse the response.
270
387
 
271
388
  Args:
272
389
  messages (List[Message]): A list of messages to send to the model.
390
+ assistant_message (Message): The assistant message to populate.
391
+ response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
392
+ tools (Optional[List[Dict[str, Any]]]): The tools to use.
393
+ tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
394
+ compress_tool_results: Whether to compress tool results.
273
395
 
274
396
  Returns:
275
- ChatCompletion: The chat completion response from the API.
397
+ ModelResponse: The chat completion response from the API.
276
398
  """
277
- if self.response_format is not None and self.structured_outputs:
278
- try:
279
- if isinstance(self.response_format, type) and issubclass(self.response_format, BaseModel):
280
- return self.get_client().beta.chat.completions.parse(
281
- model=self.id,
282
- messages=[self.format_message(m) for m in messages], # type: ignore
283
- **self.request_kwargs,
284
- )
285
- else:
286
- raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
287
- except Exception as e:
288
- logger.error(f"Error from OpenAI API: {e}")
399
+ try:
400
+ if run_response and run_response.metrics:
401
+ run_response.metrics.set_time_to_first_token()
289
402
 
290
- return self.get_client().chat.completions.create(
291
- model=self.id,
292
- messages=[self.format_message(m) for m in messages], # type: ignore
293
- **self.request_kwargs,
294
- )
403
+ assistant_message.metrics.start_timer()
295
404
 
296
- async def ainvoke(self, messages: List[Message]) -> Union[ChatCompletion, ParsedChatCompletion]:
297
- """
298
- Sends an asynchronous chat completion request to the OpenAI API.
405
+ provider_response = self.get_client().chat.completions.create(
406
+ model=self.id,
407
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
408
+ **self.get_request_params(
409
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
410
+ ),
411
+ )
412
+ assistant_message.metrics.stop_timer()
299
413
 
300
- Args:
301
- messages (List[Message]): A list of messages to send to the model.
414
+ # Parse the response into an Agno ModelResponse object
415
+ model_response = self._parse_provider_response(provider_response, response_format=response_format)
302
416
 
303
- Returns:
304
- ChatCompletion: The chat completion response from the API.
305
- """
306
- if self.response_format is not None and self.structured_outputs:
307
- try:
308
- if isinstance(self.response_format, type) and issubclass(self.response_format, BaseModel):
309
- return await self.get_async_client().beta.chat.completions.parse(
310
- model=self.id,
311
- messages=[self.format_message(m) for m in messages], # type: ignore
312
- **self.request_kwargs,
313
- )
314
- else:
315
- raise ValueError("response_format must be a subclass of BaseModel if structured_outputs=True")
316
- except Exception as e:
317
- logger.error(f"Error from OpenAI API: {e}")
417
+ return model_response
318
418
 
319
- return await self.get_async_client().chat.completions.create(
320
- model=self.id,
321
- messages=[self.format_message(m) for m in messages], # type: ignore
322
- **self.request_kwargs,
323
- )
419
+ except RateLimitError as e:
420
+ log_error(f"Rate limit error from OpenAI API: {e}")
421
+ error_message = e.response.json().get("error", {})
422
+ error_message = (
423
+ error_message.get("message", "Unknown model error")
424
+ if isinstance(error_message, dict)
425
+ else error_message
426
+ )
427
+ raise ModelProviderError(
428
+ message=error_message,
429
+ status_code=e.response.status_code,
430
+ model_name=self.name,
431
+ model_id=self.id,
432
+ ) from e
433
+ except APIConnectionError as e:
434
+ log_error(f"API connection error from OpenAI API: {e}")
435
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
436
+ except APIStatusError as e:
437
+ log_error(f"API status error from OpenAI API: {e}")
438
+ try:
439
+ error_message = e.response.json().get("error", {})
440
+ except Exception:
441
+ error_message = e.response.text
442
+ error_message = (
443
+ error_message.get("message", "Unknown model error")
444
+ if isinstance(error_message, dict)
445
+ else error_message
446
+ )
447
+ raise ModelProviderError(
448
+ message=error_message,
449
+ status_code=e.response.status_code,
450
+ model_name=self.name,
451
+ model_id=self.id,
452
+ ) from e
453
+ except ModelAuthenticationError as e:
454
+ log_error(f"Model authentication error from OpenAI API: {e}")
455
+ raise e
456
+ except Exception as e:
457
+ log_error(f"Error from OpenAI API: {e}")
458
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
324
459
 
325
- def invoke_stream(self, messages: List[Message]) -> Iterator[ChatCompletionChunk]:
460
+ async def ainvoke(
461
+ self,
462
+ messages: List[Message],
463
+ assistant_message: Message,
464
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
465
+ tools: Optional[List[Dict[str, Any]]] = None,
466
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
467
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
468
+ compress_tool_results: bool = False,
469
+ ) -> ModelResponse:
326
470
  """
327
- Send a streaming chat completion request to the OpenAI API.
471
+ Sends an asynchronous chat completion request to the OpenAI API.
328
472
 
329
473
  Args:
330
474
  messages (List[Message]): A list of messages to send to the model.
475
+ assistant_message (Message): The assistant message to populate.
476
+ response_format (Optional[Union[Dict, Type[BaseModel]]]): The response format to use.
477
+ tools (Optional[List[Dict[str, Any]]]): The tools to use.
478
+ tool_choice (Optional[Union[str, Dict[str, Any]]]): The tool choice to use.
479
+ compress_tool_results: Whether to compress tool results.
331
480
 
332
481
  Returns:
333
- Iterator[ChatCompletionChunk]: An iterator of chat completion chunks.
334
- """
335
- yield from self.get_client().chat.completions.create(
336
- model=self.id,
337
- messages=[self.format_message(m) for m in messages], # type: ignore
338
- stream=True,
339
- stream_options={"include_usage": True},
340
- **self.request_kwargs,
341
- ) # type: ignore
342
-
343
- async def ainvoke_stream(self, messages: List[Message]) -> Any:
482
+ ModelResponse: The chat completion response from the API.
344
483
  """
345
- Sends an asynchronous streaming chat completion request to the OpenAI API.
484
+ try:
485
+ if run_response and run_response.metrics:
486
+ run_response.metrics.set_time_to_first_token()
487
+
488
+ assistant_message.metrics.start_timer()
489
+ response = await self.get_async_client().chat.completions.create(
490
+ model=self.id,
491
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
492
+ **self.get_request_params(
493
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
494
+ ),
495
+ )
496
+ assistant_message.metrics.stop_timer()
346
497
 
347
- Args:
348
- messages (List[Message]): A list of messages to send to the model.
498
+ # Parse the response into an Agno ModelResponse object
499
+ provider_response: ModelResponse = self._parse_provider_response(response, response_format=response_format)
349
500
 
350
- Returns:
351
- Any: An asynchronous iterator of chat completion chunks.
352
- """
353
- async_stream = await self.get_async_client().chat.completions.create(
354
- model=self.id,
355
- messages=[self.format_message(m) for m in messages], # type: ignore
356
- stream=True,
357
- stream_options={"include_usage": True},
358
- **self.request_kwargs,
359
- )
360
- async for chunk in async_stream: # type: ignore
361
- yield chunk
501
+ return provider_response
502
+
503
+ except RateLimitError as e:
504
+ log_error(f"Rate limit error from OpenAI API: {e}")
505
+ error_message = e.response.json().get("error", {})
506
+ error_message = (
507
+ error_message.get("message", "Unknown model error")
508
+ if isinstance(error_message, dict)
509
+ else error_message
510
+ )
511
+ raise ModelProviderError(
512
+ message=error_message,
513
+ status_code=e.response.status_code,
514
+ model_name=self.name,
515
+ model_id=self.id,
516
+ ) from e
517
+ except APIConnectionError as e:
518
+ log_error(f"API connection error from OpenAI API: {e}")
519
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
520
+ except APIStatusError as e:
521
+ log_error(f"API status error from OpenAI API: {e}")
522
+ try:
523
+ error_message = e.response.json().get("error", {})
524
+ except Exception:
525
+ error_message = e.response.text
526
+ error_message = (
527
+ error_message.get("message", "Unknown model error")
528
+ if isinstance(error_message, dict)
529
+ else error_message
530
+ )
531
+ raise ModelProviderError(
532
+ message=error_message,
533
+ status_code=e.response.status_code,
534
+ model_name=self.name,
535
+ model_id=self.id,
536
+ ) from e
537
+ except ModelAuthenticationError as e:
538
+ log_error(f"Model authentication error from OpenAI API: {e}")
539
+ raise e
540
+ except Exception as e:
541
+ log_error(f"Error from OpenAI API: {e}")
542
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
362
543
 
363
- def handle_tool_calls(
544
+ def invoke_stream(
364
545
  self,
365
- assistant_message: Message,
366
546
  messages: List[Message],
367
- model_response: ModelResponse,
368
- tool_role: str = "tool",
369
- ) -> Optional[ModelResponse]:
547
+ assistant_message: Message,
548
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
549
+ tools: Optional[List[Dict[str, Any]]] = None,
550
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
551
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
552
+ compress_tool_results: bool = False,
553
+ ) -> Iterator[ModelResponse]:
370
554
  """
371
- Handle tool calls in the assistant message.
555
+ Send a streaming chat completion request to the OpenAI API.
372
556
 
373
557
  Args:
374
- assistant_message (Message): The assistant message.
375
- messages (List[Message]): The list of messages.
376
- model_response (ModelResponse): The model response.
377
- tool_role (str): The role of the tool call. Defaults to "tool".
558
+ messages (List[Message]): A list of messages to send to the model.
559
+ compress_tool_results: Whether to compress tool results.
378
560
 
379
561
  Returns:
380
- Optional[ModelResponse]: The model response after handling tool calls.
562
+ Iterator[ModelResponse]: An iterator of model responses.
381
563
  """
382
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
383
- if model_response.content is None:
384
- model_response.content = ""
385
- if model_response.tool_calls is None:
386
- model_response.tool_calls = []
387
- function_call_results: List[Message] = []
388
- function_calls_to_run: List[FunctionCall] = []
389
- for tool_call in assistant_message.tool_calls:
390
- _tool_call_id = tool_call.get("id")
391
- _function_call = get_function_call_for_tool_call(tool_call, self._functions)
392
- if _function_call is None:
393
- messages.append(
394
- Message(
395
- role="tool",
396
- tool_call_id=_tool_call_id,
397
- content="Could not find function to call.",
398
- )
399
- )
400
- continue
401
- if _function_call.error is not None:
402
- messages.append(
403
- Message(
404
- role="tool",
405
- tool_call_id=_tool_call_id,
406
- content=_function_call.error,
407
- )
408
- )
409
- continue
410
- function_calls_to_run.append(_function_call)
411
564
 
412
- if self.show_tool_calls:
413
- model_response.content += "\nRunning:"
414
- for _f in function_calls_to_run:
415
- model_response.content += f"\n - {_f.get_call_str()}"
416
- model_response.content += "\n\n"
417
-
418
- for function_call_response in self.run_function_calls(
419
- function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
565
+ try:
566
+ if run_response and run_response.metrics:
567
+ run_response.metrics.set_time_to_first_token()
568
+
569
+ assistant_message.metrics.start_timer()
570
+
571
+ for chunk in self.get_client().chat.completions.create(
572
+ model=self.id,
573
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
574
+ stream=True,
575
+ stream_options={"include_usage": True},
576
+ **self.get_request_params(
577
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
578
+ ),
420
579
  ):
421
- if (
422
- function_call_response.event == ModelResponseEvent.tool_call_completed.value
423
- and function_call_response.tool_calls is not None
424
- ):
425
- model_response.tool_calls.extend(function_call_response.tool_calls)
426
-
427
- if len(function_call_results) > 0:
428
- messages.extend(function_call_results)
580
+ yield self._parse_provider_response_delta(chunk)
429
581
 
430
- return model_response
431
- return None
432
-
433
- def update_usage_metrics(
434
- self, assistant_message: Message, metrics: Metrics, response_usage: Optional[CompletionUsage]
435
- ) -> None:
436
- """
437
- Update the usage metrics for the assistant message and the model.
438
-
439
- Args:
440
- assistant_message (Message): The assistant message.
441
- metrics (Metrics): The metrics.
442
- response_usage (Optional[CompletionUsage]): The response usage.
443
- """
444
- # Update time taken to generate response
445
- assistant_message.metrics["time"] = metrics.response_timer.elapsed
446
- self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
447
- if response_usage:
448
- prompt_tokens = response_usage.prompt_tokens
449
- completion_tokens = response_usage.completion_tokens
450
- total_tokens = response_usage.total_tokens
451
-
452
- if prompt_tokens is not None:
453
- metrics.input_tokens = prompt_tokens
454
- metrics.prompt_tokens = prompt_tokens
455
- assistant_message.metrics["input_tokens"] = prompt_tokens
456
- assistant_message.metrics["prompt_tokens"] = prompt_tokens
457
- self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
458
- self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
459
- if completion_tokens is not None:
460
- metrics.output_tokens = completion_tokens
461
- metrics.completion_tokens = completion_tokens
462
- assistant_message.metrics["output_tokens"] = completion_tokens
463
- assistant_message.metrics["completion_tokens"] = completion_tokens
464
- self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
465
- self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
466
- if total_tokens is not None:
467
- metrics.total_tokens = total_tokens
468
- assistant_message.metrics["total_tokens"] = total_tokens
469
- self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
470
- if response_usage.prompt_tokens_details is not None:
471
- if isinstance(response_usage.prompt_tokens_details, dict):
472
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details
473
- elif isinstance(response_usage.prompt_tokens_details, BaseModel):
474
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
475
- assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
476
- if metrics.prompt_tokens_details is not None:
477
- for k, v in metrics.prompt_tokens_details.items():
478
- self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
479
- if response_usage.completion_tokens_details is not None:
480
- if isinstance(response_usage.completion_tokens_details, dict):
481
- metrics.completion_tokens_details = response_usage.completion_tokens_details
482
- elif isinstance(response_usage.completion_tokens_details, BaseModel):
483
- metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
484
- exclude_none=True
485
- )
486
- assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
487
- if metrics.completion_tokens_details is not None:
488
- for k, v in metrics.completion_tokens_details.items():
489
- self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
582
+ assistant_message.metrics.stop_timer()
490
583
 
491
- def create_assistant_message(
492
- self,
493
- response_message: ChatCompletionMessage,
494
- metrics: Metrics,
495
- response_usage: Optional[CompletionUsage],
496
- ) -> Message:
497
- """
498
- Create an assistant message from the response.
499
-
500
- Args:
501
- response_message (ChatCompletionMessage): The response message.
502
- metrics (Metrics): The metrics.
503
- response_usage (Optional[CompletionUsage]): The response usage.
504
-
505
- Returns:
506
- Message: The assistant message.
507
- """
508
- assistant_message = Message(
509
- role=response_message.role or "assistant",
510
- content=response_message.content,
511
- )
512
- if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
513
- try:
514
- assistant_message.tool_calls = [t.model_dump() for t in response_message.tool_calls]
515
- except Exception as e:
516
- logger.warning(f"Error processing tool calls: {e}")
517
- if hasattr(response_message, "audio") and response_message.audio is not None:
584
+ except RateLimitError as e:
585
+ log_error(f"Rate limit error from OpenAI API: {e}")
586
+ error_message = e.response.json().get("error", {})
587
+ error_message = (
588
+ error_message.get("message", "Unknown model error")
589
+ if isinstance(error_message, dict)
590
+ else error_message
591
+ )
592
+ raise ModelProviderError(
593
+ message=error_message,
594
+ status_code=e.response.status_code,
595
+ model_name=self.name,
596
+ model_id=self.id,
597
+ ) from e
598
+ except APIConnectionError as e:
599
+ log_error(f"API connection error from OpenAI API: {e}")
600
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
601
+ except APIStatusError as e:
602
+ log_error(f"API status error from OpenAI API: {e}")
518
603
  try:
519
- assistant_message.audio_output = AudioOutput(
520
- id=response_message.audio.id,
521
- content=response_message.audio.data,
522
- expires_at=response_message.audio.expires_at,
523
- transcript=response_message.audio.transcript,
524
- )
525
- except Exception as e:
526
- logger.warning(f"Error processing audio: {e}")
527
-
528
- # Update metrics
529
- self.update_usage_metrics(assistant_message, metrics, response_usage)
530
- return assistant_message
604
+ error_message = e.response.json().get("error", {})
605
+ except Exception:
606
+ error_message = e.response.text
607
+ error_message = (
608
+ error_message.get("message", "Unknown model error")
609
+ if isinstance(error_message, dict)
610
+ else error_message
611
+ )
612
+ raise ModelProviderError(
613
+ message=error_message,
614
+ status_code=e.response.status_code,
615
+ model_name=self.name,
616
+ model_id=self.id,
617
+ ) from e
618
+ except ModelAuthenticationError as e:
619
+ log_error(f"Model authentication error from OpenAI API: {e}")
620
+ raise e
621
+ except Exception as e:
622
+ log_error(f"Error from OpenAI API: {e}")
623
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
531
624
 
532
- def response(self, messages: List[Message]) -> ModelResponse:
625
+ async def ainvoke_stream(
626
+ self,
627
+ messages: List[Message],
628
+ assistant_message: Message,
629
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
630
+ tools: Optional[List[Dict[str, Any]]] = None,
631
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
632
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
633
+ compress_tool_results: bool = False,
634
+ ) -> AsyncIterator[ModelResponse]:
533
635
  """
534
- Generate a response from OpenAI.
636
+ Sends an asynchronous streaming chat completion request to the OpenAI API.
535
637
 
536
638
  Args:
537
- messages (List[Message]): A list of messages.
639
+ messages (List[Message]): A list of messages to send to the model.
640
+ compress_tool_results: Whether to compress tool results.
538
641
 
539
642
  Returns:
540
- ModelResponse: The model response.
643
+ Any: An asynchronous iterator of model responses.
541
644
  """
542
- logger.debug("---------- OpenAIChat Response Start ----------")
543
- self._log_messages(messages)
544
- model_response = ModelResponse()
545
- metrics = Metrics()
546
-
547
- # -*- Generate response
548
- metrics.start_response_timer()
549
- response: Union[ChatCompletion, ParsedChatCompletion] = self.invoke(messages=messages)
550
- metrics.stop_response_timer()
551
-
552
- # -*- Parse response
553
- response_message: ChatCompletionMessage = response.choices[0].message
554
- response_usage: Optional[CompletionUsage] = response.usage
555
- response_audio: Optional[ChatCompletionAudio] = response_message.audio
556
-
557
- # -*- Parse transcript if available
558
- if response_audio:
559
- if response_audio.transcript and not response_message.content:
560
- response_message.content = response_audio.transcript
561
645
 
562
- # -*- Parse structured outputs
563
646
  try:
564
- if (
565
- self.response_format is not None
566
- and self.structured_outputs
567
- and issubclass(self.response_format, BaseModel)
568
- ):
569
- parsed_object = response_message.parsed # type: ignore
570
- if parsed_object is not None:
571
- model_response.parsed = parsed_object
572
- except Exception as e:
573
- logger.warning(f"Error retrieving structured outputs: {e}")
647
+ if run_response and run_response.metrics:
648
+ run_response.metrics.set_time_to_first_token()
649
+
650
+ assistant_message.metrics.start_timer()
651
+
652
+ async_stream = await self.get_async_client().chat.completions.create(
653
+ model=self.id,
654
+ messages=[self._format_message(m, compress_tool_results) for m in messages], # type: ignore
655
+ stream=True,
656
+ stream_options={"include_usage": True},
657
+ **self.get_request_params(
658
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
659
+ ),
660
+ )
574
661
 
575
- # -*- Create assistant message
576
- assistant_message = self.create_assistant_message(
577
- response_message=response_message, metrics=metrics, response_usage=response_usage
578
- )
662
+ async for chunk in async_stream:
663
+ yield self._parse_provider_response_delta(chunk)
664
+
665
+ assistant_message.metrics.stop_timer()
579
666
 
580
- # -*- Add assistant message to messages
581
- messages.append(assistant_message)
582
-
583
- # -*- Log response and metrics
584
- assistant_message.log()
585
- metrics.log()
586
-
587
- # -*- Update model response with assistant message content and audio
588
- if assistant_message.content is not None:
589
- # add the content to the model response
590
- model_response.content = assistant_message.get_content_string()
591
- if assistant_message.audio_output is not None:
592
- # add the audio to the model response
593
- model_response.audio = assistant_message.audio_output
594
-
595
- # -*- Handle tool calls
596
- tool_role = "tool"
597
- if (
598
- self.handle_tool_calls(
599
- assistant_message=assistant_message,
600
- messages=messages,
601
- model_response=model_response,
602
- tool_role=tool_role,
667
+ except RateLimitError as e:
668
+ log_error(f"Rate limit error from OpenAI API: {e}")
669
+ error_message = e.response.json().get("error", {})
670
+ error_message = (
671
+ error_message.get("message", "Unknown model error")
672
+ if isinstance(error_message, dict)
673
+ else error_message
603
674
  )
604
- is not None
605
- ):
606
- return self.handle_post_tool_call_messages(messages=messages, model_response=model_response)
607
- logger.debug("---------- OpenAIChat Response End ----------")
608
- return model_response
675
+ raise ModelProviderError(
676
+ message=error_message,
677
+ status_code=e.response.status_code,
678
+ model_name=self.name,
679
+ model_id=self.id,
680
+ ) from e
681
+ except APIConnectionError as e:
682
+ log_error(f"API connection error from OpenAI API: {e}")
683
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
684
+ except APIStatusError as e:
685
+ log_error(f"API status error from OpenAI API: {e}")
686
+ try:
687
+ error_message = e.response.json().get("error", {})
688
+ except Exception:
689
+ error_message = e.response.text
690
+ error_message = (
691
+ error_message.get("message", "Unknown model error")
692
+ if isinstance(error_message, dict)
693
+ else error_message
694
+ )
695
+ raise ModelProviderError(
696
+ message=error_message,
697
+ status_code=e.response.status_code,
698
+ model_name=self.name,
699
+ model_id=self.id,
700
+ ) from e
701
+ except ModelAuthenticationError as e:
702
+ log_error(f"Model authentication error from OpenAI API: {e}")
703
+ raise e
704
+ except Exception as e:
705
+ log_error(f"Error from OpenAI API: {e}")
706
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
609
707
 
610
- async def aresponse(self, messages: List[Message]) -> ModelResponse:
708
+ @staticmethod
709
+ def parse_tool_calls(tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
611
710
  """
612
- Generate an asynchronous response from OpenAI.
711
+ Build tool calls from streamed tool call data.
613
712
 
614
713
  Args:
615
- messages (List[Message]): A list of messages.
714
+ tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
616
715
 
617
716
  Returns:
618
- ModelResponse: The model response from the API.
717
+ List[Dict[str, Any]]: The built tool calls.
718
+ """
719
+ tool_calls: List[Dict[str, Any]] = []
720
+ for _tool_call in tool_calls_data:
721
+ _index = _tool_call.index or 0
722
+ _tool_call_id = _tool_call.id
723
+ _tool_call_type = _tool_call.type
724
+ _function_name = _tool_call.function.name if _tool_call.function else None
725
+ _function_arguments = _tool_call.function.arguments if _tool_call.function else None
726
+
727
+ if len(tool_calls) <= _index:
728
+ tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
729
+ tool_call_entry = tool_calls[_index]
730
+ if not tool_call_entry:
731
+ tool_call_entry["id"] = _tool_call_id
732
+ tool_call_entry["type"] = _tool_call_type
733
+ tool_call_entry["function"] = {
734
+ "name": _function_name or "",
735
+ "arguments": _function_arguments or "",
736
+ }
737
+ else:
738
+ if _function_name:
739
+ tool_call_entry["function"]["name"] += _function_name
740
+ if _function_arguments:
741
+ tool_call_entry["function"]["arguments"] += _function_arguments
742
+ if _tool_call_id:
743
+ tool_call_entry["id"] = _tool_call_id
744
+ if _tool_call_type:
745
+ tool_call_entry["type"] = _tool_call_type
746
+ return tool_calls
747
+
748
+ def _parse_provider_response(
749
+ self,
750
+ response: ChatCompletion,
751
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
752
+ ) -> ModelResponse:
753
+ """
754
+ Parse the OpenAI response into a ModelResponse.
619
755
  """
620
- logger.debug("---------- OpenAIChat Async Response Start ----------")
621
- self._log_messages(messages)
622
756
  model_response = ModelResponse()
623
- metrics = Metrics()
624
-
625
- # -*- Generate response
626
- metrics.start_response_timer()
627
- response: Union[ChatCompletion, ParsedChatCompletion] = await self.ainvoke(messages=messages)
628
- metrics.stop_response_timer()
629
-
630
- # -*- Parse response
631
- response_message: ChatCompletionMessage = response.choices[0].message
632
- response_usage: Optional[CompletionUsage] = response.usage
633
- response_audio: Optional[ChatCompletionAudio] = response_message.audio
634
-
635
- # -*- Parse transcript if available
636
- if response_audio:
637
- if response_audio.transcript and not response_message.content:
638
- response_message.content = response_audio.transcript
639
757
 
640
- # -*- Parse structured outputs
641
- try:
642
- if (
643
- self.response_format is not None
644
- and self.structured_outputs
645
- and issubclass(self.response_format, BaseModel)
646
- ):
647
- parsed_object = response_message.parsed # type: ignore
648
- if parsed_object is not None:
649
- model_response.parsed = parsed_object
650
- except Exception as e:
651
- logger.warning(f"Error retrieving structured outputs: {e}")
652
-
653
- # -*- Create assistant message
654
- assistant_message = self.create_assistant_message(
655
- response_message=response_message, metrics=metrics, response_usage=response_usage
656
- )
657
-
658
- # -*- Add assistant message to messages
659
- messages.append(assistant_message)
660
-
661
- # -*- Log response and metrics
662
- assistant_message.log()
663
- metrics.log()
664
-
665
- # -*- Update model response with assistant message content and audio
666
- if assistant_message.content is not None:
667
- # add the content to the model response
668
- model_response.content = assistant_message.get_content_string()
669
- if assistant_message.audio_output is not None:
670
- # add the audio to the model response
671
- model_response.audio = assistant_message.audio_output
672
-
673
- # -*- Handle tool calls
674
- tool_role = "tool"
675
- if (
676
- self.handle_tool_calls(
677
- assistant_message=assistant_message,
678
- messages=messages,
679
- model_response=model_response,
680
- tool_role=tool_role,
758
+ if hasattr(response, "error") and response.error: # type: ignore
759
+ raise ModelProviderError(
760
+ message=response.error.get("message", "Unknown model error"), # type: ignore
761
+ model_name=self.name,
762
+ model_id=self.id,
681
763
  )
682
- is not None
683
- ):
684
- return await self.ahandle_post_tool_call_messages(messages=messages, model_response=model_response)
685
-
686
- logger.debug("---------- OpenAIChat Async Response End ----------")
687
- return model_response
688
-
689
- def update_stream_metrics(self, assistant_message: Message, metrics: Metrics):
690
- """
691
- Update the usage metrics for the assistant message and the model.
692
-
693
- Args:
694
- assistant_message (Message): The assistant message.
695
- metrics (Metrics): The metrics.
696
- """
697
- # Update time taken to generate response
698
- assistant_message.metrics["time"] = metrics.response_timer.elapsed
699
- self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
700
-
701
- if metrics.time_to_first_token is not None:
702
- assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
703
- self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
704
-
705
- if metrics.input_tokens is not None:
706
- assistant_message.metrics["input_tokens"] = metrics.input_tokens
707
- self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
708
- if metrics.output_tokens is not None:
709
- assistant_message.metrics["output_tokens"] = metrics.output_tokens
710
- self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
711
- if metrics.prompt_tokens is not None:
712
- assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
713
- self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
714
- if metrics.completion_tokens is not None:
715
- assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
716
- self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
717
- if metrics.total_tokens is not None:
718
- assistant_message.metrics["total_tokens"] = metrics.total_tokens
719
- self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
720
- if metrics.prompt_tokens_details is not None:
721
- assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
722
- for k, v in metrics.prompt_tokens_details.items():
723
- self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
724
- if metrics.completion_tokens_details is not None:
725
- assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
726
- for k, v in metrics.completion_tokens_details.items():
727
- self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
728
-
729
- def add_response_usage_to_metrics(self, metrics: Metrics, response_usage: CompletionUsage):
730
- metrics.input_tokens = response_usage.prompt_tokens
731
- metrics.prompt_tokens = response_usage.prompt_tokens
732
- metrics.output_tokens = response_usage.completion_tokens
733
- metrics.completion_tokens = response_usage.completion_tokens
734
- metrics.total_tokens = response_usage.total_tokens
735
- if response_usage.prompt_tokens_details is not None:
736
- if isinstance(response_usage.prompt_tokens_details, dict):
737
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details
738
- elif isinstance(response_usage.prompt_tokens_details, BaseModel):
739
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
740
- if response_usage.completion_tokens_details is not None:
741
- if isinstance(response_usage.completion_tokens_details, dict):
742
- metrics.completion_tokens_details = response_usage.completion_tokens_details
743
- elif isinstance(response_usage.completion_tokens_details, BaseModel):
744
- metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
745
- exclude_none=True
746
- )
747
764
 
748
- def handle_stream_tool_calls(
749
- self,
750
- assistant_message: Message,
751
- messages: List[Message],
752
- tool_role: str = "tool",
753
- ) -> Iterator[ModelResponse]:
754
- """
755
- Handle tool calls for response stream.
765
+ # Get response message
766
+ response_message = response.choices[0].message
767
+
768
+ # Add role
769
+ if response_message.role is not None:
770
+ model_response.role = response_message.role
771
+ # Add content
772
+ if response_message.content is not None:
773
+ model_response.content = response_message.content
774
+
775
+ # Extract thinking content before any structured parsing
776
+ if model_response.content:
777
+ reasoning_content, output_content = extract_thinking_content(model_response.content)
778
+ if reasoning_content:
779
+ model_response.reasoning_content = reasoning_content
780
+ model_response.content = output_content
781
+ # Add tool calls
782
+ if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
783
+ try:
784
+ model_response.tool_calls = [t.model_dump() for t in response_message.tool_calls]
785
+ except Exception as e:
786
+ log_warning(f"Error processing tool calls: {e}")
756
787
 
757
- Args:
758
- assistant_message (Message): The assistant message.
759
- messages (List[Message]): The list of messages.
760
- tool_role (str): The role of the tool call. Defaults to "tool".
788
+ # Add audio transcript to content if available
789
+ response_audio: Optional[ChatCompletionAudio] = response_message.audio
790
+ if response_audio and response_audio.transcript and not model_response.content:
791
+ model_response.content = response_audio.transcript
761
792
 
762
- Returns:
763
- Iterator[ModelResponse]: An iterator of the model response.
764
- """
765
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
766
- function_calls_to_run: List[FunctionCall] = []
767
- function_call_results: List[Message] = []
768
- for tool_call in assistant_message.tool_calls:
769
- _tool_call_id = tool_call.get("id")
770
- _function_call = get_function_call_for_tool_call(tool_call, self._functions)
771
- if _function_call is None:
772
- messages.append(
773
- Message(
774
- role=tool_role,
775
- tool_call_id=_tool_call_id,
776
- content="Could not find function to call.",
777
- )
793
+ # Add audio if present
794
+ if hasattr(response_message, "audio") and response_message.audio is not None:
795
+ # If the audio output modality is requested, we can extract an audio response
796
+ try:
797
+ if isinstance(response_message.audio, dict):
798
+ model_response.audio = Audio(
799
+ id=response_message.audio.get("id"),
800
+ content=response_message.audio.get("data"),
801
+ expires_at=response_message.audio.get("expires_at"),
802
+ transcript=response_message.audio.get("transcript"),
778
803
  )
779
- continue
780
- if _function_call.error is not None:
781
- messages.append(
782
- Message(
783
- role=tool_role,
784
- tool_call_id=_tool_call_id,
785
- content=_function_call.error,
786
- )
804
+ else:
805
+ model_response.audio = Audio(
806
+ id=response_message.audio.id,
807
+ content=response_message.audio.data,
808
+ expires_at=response_message.audio.expires_at,
809
+ transcript=response_message.audio.transcript,
787
810
  )
788
- continue
789
- function_calls_to_run.append(_function_call)
811
+ except Exception as e:
812
+ log_warning(f"Error processing audio: {e}")
790
813
 
791
- if self.show_tool_calls:
792
- yield ModelResponse(content="\nRunning:")
793
- for _f in function_calls_to_run:
794
- yield ModelResponse(content=f"\n - {_f.get_call_str()}")
795
- yield ModelResponse(content="\n\n")
814
+ if hasattr(response_message, "reasoning_content") and response_message.reasoning_content is not None: # type: ignore
815
+ model_response.reasoning_content = response_message.reasoning_content # type: ignore
816
+ elif hasattr(response_message, "reasoning") and response_message.reasoning is not None: # type: ignore
817
+ model_response.reasoning_content = response_message.reasoning # type: ignore
796
818
 
797
- for function_call_response in self.run_function_calls(
798
- function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
799
- ):
800
- yield function_call_response
819
+ if response.usage is not None:
820
+ model_response.response_usage = self._get_metrics(response.usage)
821
+
822
+ if model_response.provider_data is None:
823
+ model_response.provider_data = {}
801
824
 
802
- if len(function_call_results) > 0:
803
- messages.extend(function_call_results)
825
+ if response.id:
826
+ model_response.provider_data["id"] = response.id
827
+ if response.system_fingerprint:
828
+ model_response.provider_data["system_fingerprint"] = response.system_fingerprint
829
+ if response.model_extra:
830
+ model_response.provider_data["model_extra"] = response.model_extra
804
831
 
805
- def response_stream(self, messages: List[Message]) -> Iterator[ModelResponse]:
832
+ return model_response
833
+
834
+ def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
806
835
  """
807
- Generate a streaming response from OpenAI.
836
+ Parse the OpenAI streaming response into a ModelResponse.
808
837
 
809
838
  Args:
810
- messages (List[Message]): A list of messages.
839
+ response_delta: Raw response chunk from OpenAI
811
840
 
812
841
  Returns:
813
- Iterator[ModelResponse]: An iterator of model responses.
842
+ ModelResponse: Parsed response data
814
843
  """
815
- logger.debug("---------- OpenAIChat Response Start ----------")
816
- self._log_messages(messages)
817
- stream_data: StreamData = StreamData()
818
- metrics: Metrics = Metrics()
819
-
820
- # -*- Generate response
821
- metrics.start_response_timer()
822
- for response in self.invoke_stream(messages=messages):
823
- if len(response.choices) > 0:
824
- metrics.completion_tokens += 1
825
- if metrics.completion_tokens == 1:
826
- metrics.time_to_first_token = metrics.response_timer.elapsed
827
-
828
- response_delta: ChoiceDelta = response.choices[0].delta
829
-
830
- if response_delta.content is not None:
831
- stream_data.response_content += response_delta.content
832
- yield ModelResponse(content=response_delta.content)
833
-
834
- if hasattr(response_delta, "audio"):
835
- response_audio = response_delta.audio
836
- stream_data.response_audio = response_audio
837
- if stream_data.response_audio:
838
- yield ModelResponse(
839
- audio=AudioOutput(
840
- id=stream_data.response_audio.id,
841
- content=stream_data.response_audio.data,
842
- expires_at=stream_data.response_audio.expires_at,
843
- transcript=stream_data.response_audio.transcript,
844
- )
845
- )
846
-
847
- if response_delta.tool_calls is not None:
848
- if stream_data.response_tool_calls is None:
849
- stream_data.response_tool_calls = []
850
- stream_data.response_tool_calls.extend(response_delta.tool_calls)
851
-
852
- if response.usage is not None:
853
- self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
854
- metrics.stop_response_timer()
855
-
856
- # -*- Create assistant message
857
- assistant_message = Message(role="assistant")
858
- if stream_data.response_content != "":
859
- assistant_message.content = stream_data.response_content
860
-
861
- if stream_data.response_audio is not None:
862
- assistant_message.audio_output = AudioOutput(
863
- id=stream_data.response_audio.id,
864
- content=stream_data.response_audio.data,
865
- expires_at=stream_data.response_audio.expires_at,
866
- transcript=stream_data.response_audio.transcript,
867
- )
868
-
869
- if stream_data.response_tool_calls is not None:
870
- _tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
871
- if len(_tool_calls) > 0:
872
- assistant_message.tool_calls = _tool_calls
873
-
874
- # -*- Update usage metrics
875
- self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
844
+ model_response = ModelResponse()
876
845
 
877
- # -*- Add assistant message to messages
878
- messages.append(assistant_message)
846
+ if response_delta.choices and len(response_delta.choices) > 0:
847
+ choice_delta: ChoiceDelta = response_delta.choices[0].delta
848
+ if choice_delta:
849
+ # Add content
850
+ if choice_delta.content is not None:
851
+ model_response.content = choice_delta.content
852
+
853
+ # We only want to handle these if content is present
854
+ if model_response.provider_data is None:
855
+ model_response.provider_data = {}
856
+
857
+ if response_delta.id:
858
+ model_response.provider_data["id"] = response_delta.id
859
+ if response_delta.system_fingerprint:
860
+ model_response.provider_data["system_fingerprint"] = response_delta.system_fingerprint
861
+ if response_delta.model_extra:
862
+ model_response.provider_data["model_extra"] = response_delta.model_extra
863
+
864
+ # Add tool calls
865
+ if choice_delta.tool_calls is not None:
866
+ model_response.tool_calls = choice_delta.tool_calls # type: ignore
867
+
868
+ if hasattr(choice_delta, "reasoning_content") and choice_delta.reasoning_content is not None:
869
+ model_response.reasoning_content = choice_delta.reasoning_content
870
+ elif hasattr(choice_delta, "reasoning") and choice_delta.reasoning is not None:
871
+ model_response.reasoning_content = choice_delta.reasoning
872
+
873
+ # Add audio if present
874
+ if hasattr(choice_delta, "audio") and choice_delta.audio is not None:
875
+ try:
876
+ audio_data = None
877
+ audio_id = None
878
+ audio_expires_at = None
879
+ audio_transcript = None
880
+
881
+ if isinstance(choice_delta.audio, dict):
882
+ audio_data = choice_delta.audio.get("data")
883
+ audio_id = choice_delta.audio.get("id")
884
+ audio_expires_at = choice_delta.audio.get("expires_at")
885
+ audio_transcript = choice_delta.audio.get("transcript")
886
+ else:
887
+ audio_data = choice_delta.audio.data
888
+ audio_id = choice_delta.audio.id
889
+ audio_expires_at = choice_delta.audio.expires_at
890
+ audio_transcript = choice_delta.audio.transcript
891
+
892
+ # Only create Audio object if there's actual content
893
+ if audio_data is not None:
894
+ model_response.audio = Audio(
895
+ id=audio_id,
896
+ content=audio_data,
897
+ expires_at=audio_expires_at,
898
+ transcript=audio_transcript,
899
+ sample_rate=24000,
900
+ mime_type="pcm16",
901
+ )
902
+ # If no content but there's transcript/metadata, create minimal Audio object
903
+ elif audio_transcript is not None or audio_id is not None:
904
+ model_response.audio = Audio(
905
+ id=audio_id or str(uuid4()),
906
+ content=b"",
907
+ expires_at=audio_expires_at,
908
+ transcript=audio_transcript,
909
+ sample_rate=24000,
910
+ mime_type="pcm16",
911
+ )
912
+ except Exception as e:
913
+ log_warning(f"Error processing audio: {e}")
879
914
 
880
- # -*- Log response and metrics
881
- assistant_message.log()
882
- metrics.log()
915
+ # Add usage metrics if present
916
+ if response_delta.usage is not None:
917
+ model_response.response_usage = self._get_metrics(response_delta.usage)
883
918
 
884
- # -*- Handle tool calls
885
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
886
- tool_role = "tool"
887
- yield from self.handle_stream_tool_calls(
888
- assistant_message=assistant_message, messages=messages, tool_role=tool_role
889
- )
890
- yield from self.handle_post_tool_call_messages_stream(messages=messages)
891
- logger.debug("---------- OpenAIChat Response End ----------")
919
+ return model_response
892
920
 
893
- async def aresponse_stream(self, messages: List[Message]) -> Any:
921
+ def _get_metrics(self, response_usage: CompletionUsage) -> Metrics:
894
922
  """
895
- Generate an asynchronous streaming response from OpenAI.
923
+ Parse the given OpenAI-specific usage into an Agno Metrics object.
896
924
 
897
925
  Args:
898
- messages (List[Message]): A list of messages.
926
+ response_usage: Usage data from OpenAI
899
927
 
900
928
  Returns:
901
- Any: An asynchronous iterator of model responses.
929
+ Metrics: Parsed metrics data
902
930
  """
903
- logger.debug("---------- OpenAIChat Async Response Start ----------")
904
- self._log_messages(messages)
905
- stream_data: StreamData = StreamData()
906
- metrics: Metrics = Metrics()
907
-
908
- # -*- Generate response
909
- metrics.start_response_timer()
910
- async for response in self.ainvoke_stream(messages=messages):
911
- if response.choices and len(response.choices) > 0:
912
- metrics.completion_tokens += 1
913
- if metrics.completion_tokens == 1:
914
- metrics.time_to_first_token = metrics.response_timer.elapsed
915
-
916
- response_delta: ChoiceDelta = response.choices[0].delta
917
-
918
- if response_delta.content is not None:
919
- stream_data.response_content += response_delta.content
920
- yield ModelResponse(content=response_delta.content)
921
-
922
- if hasattr(response_delta, "audio"):
923
- response_audio = response_delta.audio
924
- stream_data.response_audio = response_audio
925
- if stream_data.response_audio:
926
- yield ModelResponse(
927
- audio=AudioOutput(
928
- id=stream_data.response_audio.id,
929
- content=stream_data.response_audio.data,
930
- expires_at=stream_data.response_audio.expires_at,
931
- transcript=stream_data.response_audio.transcript,
932
- )
933
- )
934
-
935
- if response_delta.tool_calls is not None:
936
- if stream_data.response_tool_calls is None:
937
- stream_data.response_tool_calls = []
938
- stream_data.response_tool_calls.extend(response_delta.tool_calls)
939
-
940
- if response.usage is not None:
941
- self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
942
- metrics.stop_response_timer()
943
-
944
- # -*- Create assistant message
945
- assistant_message = Message(role="assistant")
946
- if stream_data.response_content != "":
947
- assistant_message.content = stream_data.response_content
948
-
949
- if stream_data.response_audio is not None:
950
- assistant_message.audio_output = AudioOutput(
951
- id=stream_data.response_audio.id,
952
- content=stream_data.response_audio.data,
953
- expires_at=stream_data.response_audio.expires_at,
954
- transcript=stream_data.response_audio.transcript,
955
- )
956
-
957
- if stream_data.response_tool_calls is not None:
958
- _tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
959
- if len(_tool_calls) > 0:
960
- assistant_message.tool_calls = _tool_calls
961
-
962
- self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
963
-
964
- # -*- Add assistant message to messages
965
- messages.append(assistant_message)
966
931
 
967
- # -*- Log response and metrics
968
- assistant_message.log()
969
- metrics.log()
932
+ metrics = Metrics()
970
933
 
971
- # -*- Handle tool calls
972
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
973
- tool_role = "tool"
974
- for tool_call_response in self.handle_stream_tool_calls(
975
- assistant_message=assistant_message, messages=messages, tool_role=tool_role
976
- ):
977
- yield tool_call_response
978
- async for post_tool_call_response in self.ahandle_post_tool_call_messages_stream(messages=messages):
979
- yield post_tool_call_response
980
- logger.debug("---------- OpenAIChat Async Response End ----------")
934
+ metrics.input_tokens = response_usage.prompt_tokens or 0
935
+ metrics.output_tokens = response_usage.completion_tokens or 0
936
+ metrics.total_tokens = response_usage.total_tokens or 0
981
937
 
982
- def build_tool_calls(self, tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
983
- """
984
- Build tool calls from tool call data.
938
+ # Add the prompt_tokens_details field
939
+ if prompt_token_details := response_usage.prompt_tokens_details:
940
+ metrics.audio_input_tokens = prompt_token_details.audio_tokens or 0
941
+ metrics.cache_read_tokens = prompt_token_details.cached_tokens or 0
985
942
 
986
- Args:
987
- tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
988
-
989
- Returns:
990
- List[Dict[str, Any]]: The built tool calls.
991
- """
943
+ # Add the completion_tokens_details field
944
+ if completion_tokens_details := response_usage.completion_tokens_details:
945
+ metrics.audio_output_tokens = completion_tokens_details.audio_tokens or 0
946
+ metrics.reasoning_tokens = completion_tokens_details.reasoning_tokens or 0
992
947
 
993
- return self._build_tool_calls(tool_calls_data)
948
+ return metrics