agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,70 +1,42 @@
1
- from dataclasses import dataclass
1
+ import json
2
+ from collections.abc import AsyncIterator
3
+ from dataclasses import asdict, dataclass
2
4
  from os import getenv
3
- from typing import Any, Dict, Iterator, List, Optional, Union
5
+ from typing import Any, Dict, Iterator, List, Optional, Type, Union
4
6
 
5
7
  import httpx
8
+ from huggingface_hub import ChatCompletionInputStreamOptions
6
9
  from pydantic import BaseModel
7
10
 
8
- from agno.models.base import Metrics, Model
11
+ from agno.exceptions import ModelProviderError
12
+ from agno.models.base import Model
9
13
  from agno.models.message import Message
14
+ from agno.models.metrics import Metrics
10
15
  from agno.models.response import ModelResponse
11
- from agno.tools.function import FunctionCall
12
- from agno.utils.log import logger
13
- from agno.utils.tools import get_function_call_for_tool_call
16
+ from agno.run.agent import RunOutput
17
+ from agno.utils.log import log_debug, log_error, log_warning
14
18
 
15
19
  try:
16
20
  from huggingface_hub import (
17
21
  AsyncInferenceClient,
18
22
  ChatCompletionOutput,
19
23
  ChatCompletionOutputMessage,
20
- ChatCompletionOutputUsage,
21
24
  ChatCompletionStreamOutput,
22
25
  ChatCompletionStreamOutputDelta,
23
26
  ChatCompletionStreamOutputDeltaToolCall,
24
27
  InferenceClient,
25
28
  )
26
- except (ModuleNotFoundError, ImportError):
29
+ from huggingface_hub.errors import InferenceTimeoutError
30
+ except ImportError:
27
31
  raise ImportError("`huggingface_hub` not installed. Please install using `pip install huggingface_hub`")
28
32
 
29
33
 
30
- @dataclass
31
- class StreamData:
32
- response_content: str = ""
33
- response_tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = None
34
-
35
-
36
34
  @dataclass
37
35
  class HuggingFace(Model):
38
36
  """
39
37
  A class for interacting with HuggingFace Hub Inference models.
40
38
 
41
- Attributes:
42
- id (str): The id of the HuggingFace model to use. Default is "meta-llama/Meta-Llama-3-8B-Instruct".
43
- name (str): The name of this chat model instance. Default is "HuggingFace".
44
- provider (str): The provider of the model. Default is "HuggingFace".
45
- store (Optional[bool]): Whether or not to store the output of this chat completion request for use in the model distillation or evals products.
46
- frequency_penalty (Optional[float]): Penalizes new tokens based on their frequency in the text so far.
47
- logit_bias (Optional[Any]): Modifies the likelihood of specified tokens appearing in the completion.
48
- logprobs (Optional[bool]): Include the log probabilities on the logprobs most likely tokens.
49
- max_tokens (Optional[int]): The maximum number of tokens to generate in the chat completion.
50
- presence_penalty (Optional[float]): Penalizes new tokens based on whether they appear in the text so far.
51
- response_format (Optional[Any]): An object specifying the format that the model must output.
52
- seed (Optional[int]): A seed for deterministic sampling.
53
- stop (Optional[Union[str, List[str]]]): Up to 4 sequences where the API will stop generating further tokens.
54
- temperature (Optional[float]): Controls randomness in the model's output.
55
- top_logprobs (Optional[int]): How many log probability results to return per token.
56
- top_p (Optional[float]): Controls diversity via nucleus sampling.
57
- request_params (Optional[Dict[str, Any]]): Additional parameters to include in the request.
58
- api_key (Optional[str]): The Access Token for authenticating with HuggingFace.
59
- base_url (Optional[Union[str, httpx.URL]]): The base URL for API requests.
60
- timeout (Optional[float]): The timeout for API requests.
61
- max_retries (Optional[int]): The maximum number of retries for failed requests.
62
- default_headers (Optional[Any]): Default headers to include in all requests.
63
- default_query (Optional[Any]): Default query parameters to include in all requests.
64
- http_client (Optional[httpx.Client]): An optional pre-configured HTTP client.
65
- client_params (Optional[Dict[str, Any]]): Additional parameters for client configuration.
66
- client (Optional[InferenceClient]): The HuggingFace Hub Inference client instance.
67
- async_client (Optional[AsyncInferenceClient]): The asynchronous HuggingFace Hub client instance.
39
+ For more information, see: https://huggingface.co/docs/api-inference/index
68
40
  """
69
41
 
70
42
  id: str = "meta-llama/Meta-Llama-3-8B-Instruct"
@@ -78,7 +50,6 @@ class HuggingFace(Model):
78
50
  logprobs: Optional[bool] = None
79
51
  max_tokens: Optional[int] = None
80
52
  presence_penalty: Optional[float] = None
81
- response_format: Optional[Any] = None
82
53
  seed: Optional[int] = None
83
54
  stop: Optional[Union[str, List[str]]] = None
84
55
  temperature: Optional[float] = None
@@ -93,7 +64,6 @@ class HuggingFace(Model):
93
64
  max_retries: Optional[int] = None
94
65
  default_headers: Optional[Any] = None
95
66
  default_query: Optional[Any] = None
96
- http_client: Optional[httpx.Client] = None
97
67
  client_params: Optional[Dict[str, Any]] = None
98
68
 
99
69
  # HuggingFace Hub Inference clients
@@ -103,7 +73,7 @@ class HuggingFace(Model):
103
73
  def get_client_params(self) -> Dict[str, Any]:
104
74
  self.api_key = self.api_key or getenv("HF_TOKEN")
105
75
  if not self.api_key:
106
- logger.error("HF_TOKEN not set. Please set the HF_TOKEN environment variable.")
76
+ log_error("HF_TOKEN not set. Please set the HF_TOKEN environment variable.")
107
77
 
108
78
  _client_params: Dict[str, Any] = {}
109
79
  if self.api_key is not None:
@@ -133,9 +103,8 @@ class HuggingFace(Model):
133
103
  return self.client
134
104
 
135
105
  _client_params: Dict[str, Any] = self.get_client_params()
136
- if self.http_client is not None:
137
- _client_params["http_client"] = self.http_client
138
- return InferenceClient(**_client_params)
106
+ self.client = InferenceClient(**_client_params)
107
+ return self.client
139
108
 
140
109
  def get_async_client(self) -> AsyncInferenceClient:
141
110
  """
@@ -148,18 +117,12 @@ class HuggingFace(Model):
148
117
  return self.async_client
149
118
 
150
119
  _client_params: Dict[str, Any] = self.get_client_params()
120
+ self.async_client = AsyncInferenceClient(**_client_params)
121
+ return self.async_client
151
122
 
152
- if self.http_client:
153
- _client_params["http_client"] = self.http_client
154
- else:
155
- # Create a new async HTTP client with custom limits
156
- _client_params["http_client"] = httpx.AsyncClient(
157
- limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
158
- )
159
- return AsyncInferenceClient(**_client_params)
160
-
161
- @property
162
- def request_kwargs(self) -> Dict[str, Any]:
123
+ def get_request_params(
124
+ self, tools: Optional[List[Dict[str, Any]]] = None, tool_choice: Optional[Union[str, Dict[str, Any]]] = None
125
+ ) -> Dict[str, Any]:
163
126
  """
164
127
  Returns keyword arguments for inference model client requests.
165
128
 
@@ -179,8 +142,6 @@ class HuggingFace(Model):
179
142
  _request_params["max_tokens"] = self.max_tokens
180
143
  if self.presence_penalty is not None:
181
144
  _request_params["presence_penalty"] = self.presence_penalty
182
- if self.response_format is not None:
183
- _request_params["response_format"] = self.response_format
184
145
  if self.seed is not None:
185
146
  _request_params["seed"] = self.seed
186
147
  if self.stop is not None:
@@ -191,14 +152,17 @@ class HuggingFace(Model):
191
152
  _request_params["top_logprobs"] = self.top_logprobs
192
153
  if self.top_p is not None:
193
154
  _request_params["top_p"] = self.top_p
194
- if self.tools is not None:
195
- _request_params["tools"] = self.tools
196
- if self.tool_choice is None:
155
+ if tools is not None:
156
+ _request_params["tools"] = tools
157
+ if tool_choice is None:
197
158
  _request_params["tool_choice"] = "auto"
198
159
  else:
199
- _request_params["tool_choice"] = self.tool_choice
160
+ _request_params["tool_choice"] = tool_choice
200
161
  if self.request_params is not None:
201
162
  _request_params.update(self.request_params)
163
+
164
+ if _request_params:
165
+ log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
202
166
  return _request_params
203
167
 
204
168
  def to_dict(self) -> Dict[str, Any]:
@@ -217,558 +181,322 @@ class HuggingFace(Model):
217
181
  "logprobs": self.logprobs,
218
182
  "max_tokens": self.max_tokens,
219
183
  "presence_penalty": self.presence_penalty,
220
- "response_format": self.response_format,
221
184
  "seed": self.seed,
222
185
  "stop": self.stop,
223
186
  "temperature": self.temperature,
224
187
  "top_logprobs": self.top_logprobs,
225
188
  "top_p": self.top_p,
226
- "tools": self.tools,
227
- "tool_choice": self.tool_choice
228
- if (self.tools is not None and self.tool_choice is not None)
229
- else "auto",
230
189
  }
231
190
  )
232
191
  cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
233
192
  return cleaned_dict
234
193
 
235
- def invoke(self, messages: List[Message]) -> Union[ChatCompletionOutput]:
194
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
236
195
  """
237
- Send a chat completion request to the HuggingFace Hub.
196
+ Format a message into the format expected by HuggingFace.
238
197
 
239
198
  Args:
240
- messages (List[Message]): A list of messages to send to the model.
199
+ message (Message): The message to format.
200
+ compress_tool_results: Whether to compress tool results.
241
201
 
242
202
  Returns:
243
- ChatCompletionOutput: The chat completion response from the Inference Client.
203
+ Dict[str, Any]: The formatted message.
244
204
  """
245
- return self.get_client().chat.completions.create(
246
- model=self.id,
247
- messages=[m.to_dict() for m in messages],
248
- **self.request_kwargs,
249
- )
205
+ # Use compressed content for tool messages if compression is active
206
+ if message.role == "tool":
207
+ content = message.get_content(use_compressed_content=compress_tool_results)
208
+ else:
209
+ content = message.content if message.content is not None else ""
250
210
 
251
- async def ainvoke(self, messages: List[Message]) -> Union[ChatCompletionOutput]:
252
- """
253
- Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
211
+ message_dict: Dict[str, Any] = {
212
+ "role": message.role,
213
+ "content": content,
214
+ "name": message.name or message.tool_name,
215
+ "tool_call_id": message.tool_call_id,
216
+ "tool_calls": message.tool_calls,
217
+ }
254
218
 
255
- Args:
256
- messages (List[Message]): A list of messages to send to the model.
219
+ message_dict = {k: v for k, v in message_dict.items() if v is not None}
257
220
 
258
- Returns:
259
- ChatCompletionOutput: The chat completion response from the Inference Client.
260
- """
261
- return await self.get_async_client().chat.completions.create(
262
- model=self.id,
263
- messages=[m.to_dict() for m in messages],
264
- **self.request_kwargs,
265
- )
221
+ if message.tool_calls is None or len(message.tool_calls) == 0:
222
+ message_dict["tool_calls"] = None
266
223
 
267
- def invoke_stream(self, messages: List[Message]) -> Iterator[ChatCompletionStreamOutput]:
268
- """
269
- Send a streaming chat completion request to the HuggingFace API.
224
+ if message.audio is not None and len(message.audio) > 0:
225
+ log_warning("Audio input is currently unsupported.")
270
226
 
271
- Args:
272
- messages (List[Message]): A list of messages to send to the model.
227
+ if message.files is not None and len(message.files) > 0:
228
+ log_warning("File input is currently unsupported.")
273
229
 
274
- Returns:
275
- Iterator[ChatCompletionStreamOutput]: An iterator of chat completion delta.
276
- """
277
- yield from self.get_client().chat.completions.create(
278
- model=self.id,
279
- messages=[m.to_dict() for m in messages], # type: ignore
280
- stream=True,
281
- stream_options={"include_usage": True},
282
- **self.request_kwargs,
283
- ) # type: ignore
284
-
285
- async def ainvoke_stream(self, messages: List[Message]) -> Any:
286
- """
287
- Sends an asynchronous streaming chat completion request to the HuggingFace API.
230
+ if message.images is not None and len(message.images) > 0:
231
+ log_warning("Image input is currently unsupported.")
288
232
 
289
- Args:
290
- messages (List[Message]): A list of messages to send to the model.
233
+ if message.videos is not None and len(message.videos) > 0:
234
+ log_warning("Video input is currently unsupported.")
291
235
 
292
- Returns:
293
- Any: An asynchronous iterator of chat completion chunks.
294
- """
295
- async_stream = await self.get_async_client().chat.completions.create(
296
- model=self.id,
297
- messages=[m.to_dict() for m in messages],
298
- stream=True,
299
- stream_options={"include_usage": True},
300
- **self.request_kwargs,
301
- )
302
- async for chunk in async_stream: # type: ignore
303
- yield chunk
236
+ return message_dict
304
237
 
305
- def _handle_tool_calls(
306
- self, assistant_message: Message, messages: List[Message], model_response: ModelResponse
307
- ) -> Optional[ModelResponse]:
238
+ def invoke(
239
+ self,
240
+ messages: List[Message],
241
+ assistant_message: Message,
242
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
243
+ tools: Optional[List[Dict[str, Any]]] = None,
244
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
245
+ run_response: Optional[RunOutput] = None,
246
+ compress_tool_results: bool = False,
247
+ ) -> ModelResponse:
308
248
  """
309
- Handle tool calls in the assistant message.
310
-
311
- Args:
312
- assistant_message (Message): The assistant message.
313
- messages (List[Message]): The list of messages.
314
- model_response (ModelResponse): The model response.
315
-
316
- Returns:
317
- Optional[ModelResponse]: The model response after handling tool calls.
249
+ Send a chat completion request to the HuggingFace Hub.
318
250
  """
319
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
320
- model_response.content = ""
321
- tool_role: str = "tool"
322
- function_calls_to_run: List[FunctionCall] = []
323
- function_call_results: List[Message] = []
324
- for tool_call in assistant_message.tool_calls:
325
- _tool_call_id = tool_call.get("id")
326
- _function_call = get_function_call_for_tool_call(tool_call, self._functions)
327
- if _function_call is None:
328
- messages.append(
329
- Message(
330
- role="tool",
331
- tool_call_id=_tool_call_id,
332
- content="Could not find function to call.",
333
- )
334
- )
335
- continue
336
- if _function_call.error is not None:
337
- messages.append(
338
- Message(
339
- role="tool",
340
- tool_call_id=_tool_call_id,
341
- content=_function_call.error,
342
- )
343
- )
344
- continue
345
- function_calls_to_run.append(_function_call)
346
-
347
- if self.show_tool_calls:
348
- model_response.content += "\nRunning:"
349
- for _f in function_calls_to_run:
350
- model_response.content += f"\n - {_f.get_call_str()}"
351
- model_response.content += "\n\n"
352
-
353
- for _ in self.run_function_calls(
354
- function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
355
- ):
356
- pass
357
-
358
- if len(function_call_results) > 0:
359
- messages.extend(function_call_results)
251
+ try:
252
+ if run_response and run_response.metrics:
253
+ run_response.metrics.set_time_to_first_token()
254
+
255
+ assistant_message.metrics.start_timer()
256
+ provider_response = self.get_client().chat.completions.create(
257
+ model=self.id,
258
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
259
+ **self.get_request_params(tools=tools, tool_choice=tool_choice),
260
+ )
261
+ assistant_message.metrics.stop_timer()
360
262
 
361
- return model_response
362
- return None
263
+ return self._parse_provider_response(provider_response, response_format=response_format)
363
264
 
364
- def _update_usage_metrics(
365
- self, assistant_message: Message, metrics: Metrics, response_usage: Optional[ChatCompletionOutputUsage]
366
- ) -> None:
367
- """
368
- Update the usage metrics for the assistant message and the model.
265
+ except InferenceTimeoutError as e:
266
+ log_error(f"Error invoking HuggingFace model: {e}")
267
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
268
+ except Exception as e:
269
+ log_error(f"Unexpected error invoking HuggingFace model: {e}")
270
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
369
271
 
370
- Args:
371
- assistant_message (Message): The assistant message.
372
- metrics (Metrics): The metrics.
373
- response_usage (Optional[CompletionUsage]): The response usage.
374
- """
375
- # Update time taken to generate response
376
- assistant_message.metrics["time"] = metrics.response_timer.elapsed
377
- self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
378
- if response_usage:
379
- prompt_tokens = response_usage.prompt_tokens
380
- completion_tokens = response_usage.completion_tokens
381
- total_tokens = response_usage.total_tokens
382
-
383
- if prompt_tokens is not None:
384
- metrics.input_tokens = prompt_tokens
385
- metrics.prompt_tokens = prompt_tokens
386
- assistant_message.metrics["input_tokens"] = prompt_tokens
387
- assistant_message.metrics["prompt_tokens"] = prompt_tokens
388
- self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
389
- self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
390
- if completion_tokens is not None:
391
- metrics.output_tokens = completion_tokens
392
- metrics.completion_tokens = completion_tokens
393
- assistant_message.metrics["output_tokens"] = completion_tokens
394
- assistant_message.metrics["completion_tokens"] = completion_tokens
395
- self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
396
- self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
397
- if total_tokens is not None:
398
- metrics.total_tokens = total_tokens
399
- assistant_message.metrics["total_tokens"] = total_tokens
400
- self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
401
- if response_usage.prompt_tokens_details is not None:
402
- if isinstance(response_usage.prompt_tokens_details, dict):
403
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details
404
- elif isinstance(response_usage.prompt_tokens_details, BaseModel):
405
- metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
406
- assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
407
- if metrics.prompt_tokens_details is not None:
408
- for k, v in metrics.prompt_tokens_details.items():
409
- self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
410
- if response_usage.completion_tokens_details is not None:
411
- if isinstance(response_usage.completion_tokens_details, dict):
412
- metrics.completion_tokens_details = response_usage.completion_tokens_details
413
- elif isinstance(response_usage.completion_tokens_details, BaseModel):
414
- metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
415
- exclude_none=True
416
- )
417
- assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
418
- if metrics.completion_tokens_details is not None:
419
- for k, v in metrics.completion_tokens_details.items():
420
- self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
421
-
422
- def _create_assistant_message(
272
+ async def ainvoke(
423
273
  self,
424
- response_message: ChatCompletionOutputMessage,
425
- metrics: Metrics,
426
- response_usage: Optional[ChatCompletionOutputUsage],
427
- ) -> Message:
274
+ messages: List[Message],
275
+ assistant_message: Message,
276
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
277
+ tools: Optional[List[Dict[str, Any]]] = None,
278
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
279
+ run_response: Optional[RunOutput] = None,
280
+ compress_tool_results: bool = False,
281
+ ) -> ModelResponse:
428
282
  """
429
- Create an assistant message from the response.
430
-
431
- Args:
432
- response_message (ChatCompletionMessage): The response message.
433
- metrics (Metrics): The metrics.
434
- response_usage (Optional[CompletionUsage]): The response usage.
435
-
436
- Returns:
437
- Message: The assistant message.
283
+ Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
438
284
  """
439
- assistant_message = Message(
440
- role=response_message.role or "assistant",
441
- content=response_message.content,
442
- )
443
- if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
444
- assistant_message.tool_calls = [t.model_dump() for t in response_message.tool_calls]
285
+ try:
286
+ if run_response and run_response.metrics:
287
+ run_response.metrics.set_time_to_first_token()
288
+
289
+ assistant_message.metrics.start_timer()
290
+ provider_response = await self.get_async_client().chat.completions.create(
291
+ model=self.id,
292
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
293
+ **self.get_request_params(tools=tools, tool_choice=tool_choice),
294
+ )
295
+ assistant_message.metrics.stop_timer()
445
296
 
446
- return assistant_message
297
+ return self._parse_provider_response(provider_response, response_format=response_format)
447
298
 
448
- def response(self, messages: List[Message]) -> ModelResponse:
449
- """
450
- Generate a response from HuggingFace Hub.
451
-
452
- Args:
453
- messages (List[Message]): A list of messages.
299
+ except InferenceTimeoutError as e:
300
+ log_error(f"Error invoking HuggingFace model: {e}")
301
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
302
+ except Exception as e:
303
+ log_error(f"Unexpected error invoking HuggingFace model: {e}")
304
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
454
305
 
455
- Returns:
456
- ModelResponse: The model response.
306
+ def invoke_stream(
307
+ self,
308
+ messages: List[Message],
309
+ assistant_message: Message,
310
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
311
+ tools: Optional[List[Dict[str, Any]]] = None,
312
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
313
+ run_response: Optional[RunOutput] = None,
314
+ compress_tool_results: bool = False,
315
+ ) -> Iterator[ModelResponse]:
457
316
  """
458
- logger.debug("---------- HuggingFace Response Start ----------")
459
- self._log_messages(messages)
460
- model_response = ModelResponse()
461
- metrics = Metrics()
317
+ Send a streaming chat completion request to the HuggingFace API.
318
+ """
319
+ try:
320
+ if run_response and run_response.metrics:
321
+ run_response.metrics.set_time_to_first_token()
462
322
 
463
- # -*- Generate response
464
- metrics.start_response_timer()
465
- response: Union[ChatCompletionOutput] = self.invoke(messages=messages)
466
- metrics.stop_response_timer()
323
+ assistant_message.metrics.start_timer()
467
324
 
468
- # -*- Parse response
469
- response_message: ChatCompletionOutputMessage = response.choices[0].message
470
- response_usage: Optional[ChatCompletionOutputUsage] = response.usage
325
+ stream = self.get_client().chat.completions.create(
326
+ model=self.id,
327
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
328
+ stream=True,
329
+ stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
330
+ **self.get_request_params(tools=tools, tool_choice=tool_choice),
331
+ )
471
332
 
472
- # -*- Create assistant message
473
- assistant_message = self._create_assistant_message(
474
- response_message=response_message, metrics=metrics, response_usage=response_usage
475
- )
333
+ for chunk in stream:
334
+ yield self._parse_provider_response_delta(chunk)
476
335
 
477
- # -*- Add assistant message to messages
478
- messages.append(assistant_message)
336
+ assistant_message.metrics.stop_timer()
479
337
 
480
- # -*- Log response and metrics
481
- assistant_message.log()
482
- metrics.log()
338
+ except InferenceTimeoutError as e:
339
+ log_error(f"Error invoking HuggingFace model: {e}")
340
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
341
+ except Exception as e:
342
+ log_error(f"Unexpected error invoking HuggingFace model: {e}")
343
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
483
344
 
484
- # -*- Handle tool calls
485
- if self._handle_tool_calls(assistant_message, messages, model_response):
486
- response_after_tool_calls = self.response(messages=messages)
487
- if response_after_tool_calls.content is not None:
488
- if model_response.content is None:
489
- model_response.content = ""
490
- model_response.content += response_after_tool_calls.content
491
- return model_response
345
+ async def ainvoke_stream(
346
+ self,
347
+ messages: List[Message],
348
+ assistant_message: Message,
349
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
350
+ tools: Optional[List[Dict[str, Any]]] = None,
351
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
352
+ run_response: Optional[RunOutput] = None,
353
+ compress_tool_results: bool = False,
354
+ ) -> AsyncIterator[Any]:
355
+ """
356
+ Sends an asynchronous streaming chat completion request to the HuggingFace API.
357
+ """
358
+ try:
359
+ if run_response and run_response.metrics:
360
+ run_response.metrics.set_time_to_first_token()
361
+
362
+ assistant_message.metrics.start_timer()
363
+ provider_response = await self.get_async_client().chat.completions.create(
364
+ model=self.id,
365
+ messages=[self._format_message(m, compress_tool_results) for m in messages],
366
+ stream=True,
367
+ stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
368
+ **self.get_request_params(tools=tools, tool_choice=tool_choice),
369
+ )
492
370
 
493
- # -*- Update model response
494
- if assistant_message.content is not None:
495
- model_response.content = assistant_message.get_content_string()
371
+ async for chunk in provider_response:
372
+ yield self._parse_provider_response_delta(chunk)
496
373
 
497
- logger.debug("---------- HuggingFace Response End ----------")
498
- return model_response
374
+ assistant_message.metrics.stop_timer()
375
+
376
+ except InferenceTimeoutError as e:
377
+ log_error(f"Error invoking HuggingFace model: {e}")
378
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
379
+ except Exception as e:
380
+ log_error(f"Unexpected error invoking HuggingFace model: {e}")
381
+ raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
499
382
 
500
- async def aresponse(self, messages: List[Message]) -> ModelResponse:
383
+ # Override base method
384
+ @staticmethod
385
+ def parse_tool_calls(tool_calls_data: List[ChatCompletionStreamOutputDeltaToolCall]) -> List[Dict[str, Any]]:
501
386
  """
502
- Generate an asynchronous response from HuggingFace.
387
+ Build tool calls from streamed tool call data.
503
388
 
504
389
  Args:
505
- messages (List[Message]): A list of messages.
390
+ tool_calls_data (List[ChatCompletionStreamOutputDeltaToolCall]): The tool call data to build from.
506
391
 
507
392
  Returns:
508
- ModelResponse: The model response from the API.
393
+ List[Dict[str, Any]]: The built tool calls.
394
+ """
395
+ tool_calls: List[Dict[str, Any]] = []
396
+ for tool_call in tool_calls_data:
397
+ _tool_call = tool_call[0]
398
+ _index = _tool_call.index
399
+ _tool_call_id = _tool_call.id
400
+ _tool_call_type = _tool_call.type
401
+ _function_name = _tool_call.function.name if _tool_call.function else None
402
+ _function_arguments = _tool_call.function.arguments if _tool_call.function else None
403
+
404
+ if len(tool_calls) <= _index:
405
+ tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
406
+ tool_call_entry = tool_calls[_index]
407
+ if not tool_call_entry:
408
+ tool_call_entry["id"] = _tool_call_id
409
+ tool_call_entry["type"] = _tool_call_type
410
+ tool_call_entry["function"] = {
411
+ "name": _function_name or "",
412
+ "arguments": _function_arguments or "",
413
+ }
414
+ else:
415
+ if _function_name:
416
+ tool_call_entry["function"]["name"] += _function_name
417
+ if _function_arguments:
418
+ tool_call_entry["function"]["arguments"] += _function_arguments
419
+ if _tool_call_id:
420
+ tool_call_entry["id"] = _tool_call_id
421
+ if _tool_call_type:
422
+ tool_call_entry["type"] = _tool_call_type
423
+ return tool_calls
424
+
425
+ def _parse_provider_response(
426
+ self,
427
+ response: ChatCompletionOutput,
428
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
429
+ ) -> ModelResponse:
430
+ """
431
+ Parse the provider response into a ModelResponse.
509
432
  """
510
- logger.debug("---------- HuggingFace Async Response Start ----------")
511
- self._log_messages(messages)
512
433
  model_response = ModelResponse()
513
- metrics = Metrics()
514
434
 
515
- # -*- Generate response
516
- metrics.start_response_timer()
517
- response: Union[ChatCompletionOutput] = await self.ainvoke(messages=messages)
518
- metrics.stop_response_timer()
519
-
520
- # -*- Parse response
521
435
  response_message: ChatCompletionOutputMessage = response.choices[0].message
522
- response_usage: Optional[ChatCompletionOutputUsage] = response.usage
523
436
 
524
- # -*- Parse structured outputs
437
+ model_response.role = response_message.role
438
+ if response_message.content is not None:
439
+ model_response.content = response_message.content
440
+
441
+ if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
442
+ model_response.tool_calls = [asdict(t) for t in response_message.tool_calls]
443
+ for tool_call in model_response.tool_calls:
444
+ if isinstance(tool_call["function"]["arguments"], dict):
445
+ tool_call["function"]["arguments"] = json.dumps(tool_call["function"]["arguments"])
446
+
525
447
  try:
526
448
  if (
527
- self.response_format is not None
528
- and self.structured_outputs
529
- and issubclass(self.response_format, BaseModel)
449
+ response_format is not None
450
+ and isinstance(response_format, type)
451
+ and issubclass(response_format, BaseModel)
530
452
  ):
531
453
  parsed_object = response_message.parsed # type: ignore
532
454
  if parsed_object is not None:
533
455
  model_response.parsed = parsed_object
534
456
  except Exception as e:
535
- logger.warning(f"Error retrieving structured outputs: {e}")
536
-
537
- # -*- Create assistant message
538
- assistant_message = self._create_assistant_message(
539
- response_message=response_message, metrics=metrics, response_usage=response_usage
540
- )
541
-
542
- # -*- Add assistant message to messages
543
- messages.append(assistant_message)
457
+ log_warning(f"Error retrieving structured outputs: {e}")
544
458
 
545
- # -*- Log response and metrics
546
- assistant_message.log()
547
- metrics.log()
459
+ if response.usage is not None:
460
+ model_response.response_usage = self._get_metrics(response)
548
461
 
549
- # -*- Handle tool calls
550
- if self._handle_tool_calls(assistant_message, messages, model_response):
551
- response_after_tool_calls = await self.aresponse(messages=messages)
552
- if response_after_tool_calls.content is not None:
553
- if model_response.content is None:
554
- model_response.content = ""
555
- model_response.content += response_after_tool_calls.content
556
- return model_response
557
-
558
- # -*- Update model response
559
- if assistant_message.content is not None:
560
- model_response.content = assistant_message.get_content_string()
561
-
562
- logger.debug("---------- HuggingFace Async Response End ----------")
563
462
  return model_response
564
463
 
565
- def _update_stream_metrics(self, assistant_message: Message, metrics: Metrics):
464
+ def _parse_provider_response_delta(self, response_delta: ChatCompletionStreamOutput) -> ModelResponse:
566
465
  """
567
- Update the usage metrics for the assistant message and the model.
568
-
569
- Args:
570
- assistant_message (Message): The assistant message.
571
- metrics (Metrics): The metrics.
572
- """
573
- # Update time taken to generate response
574
- assistant_message.metrics["time"] = metrics.response_timer.elapsed
575
- self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
576
-
577
- if metrics.time_to_first_token is not None:
578
- assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
579
- self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
580
-
581
- if metrics.input_tokens is not None:
582
- assistant_message.metrics["input_tokens"] = metrics.input_tokens
583
- self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
584
- if metrics.output_tokens is not None:
585
- assistant_message.metrics["output_tokens"] = metrics.output_tokens
586
- self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
587
- if metrics.prompt_tokens is not None:
588
- assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
589
- self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
590
- if metrics.completion_tokens is not None:
591
- assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
592
- self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
593
- if metrics.total_tokens is not None:
594
- assistant_message.metrics["total_tokens"] = metrics.total_tokens
595
- self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
596
- if metrics.prompt_tokens_details is not None:
597
- assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
598
- for k, v in metrics.prompt_tokens_details.items():
599
- self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
600
- if metrics.completion_tokens_details is not None:
601
- assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
602
- for k, v in metrics.completion_tokens_details.items():
603
- self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
604
-
605
- def _handle_stream_tool_calls(
606
- self,
607
- assistant_message: Message,
608
- messages: List[Message],
609
- ) -> Iterator[ModelResponse]:
466
+ Parse the provider response delta into a ModelResponse.
610
467
  """
611
- Handle tool calls for response stream.
468
+ model_response = ModelResponse()
469
+ if response_delta.choices and len(response_delta.choices) > 0:
470
+ response_delta_message: ChatCompletionStreamOutputDelta = response_delta.choices[0].delta
612
471
 
613
- Args:
614
- assistant_message (Message): The assistant message.
615
- messages (List[Message]): The list of messages.
472
+ model_response.role = response_delta_message.role
616
473
 
617
- Returns:
618
- Iterator[ModelResponse]: An iterator of the model response.
619
- """
620
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
621
- tool_role: str = "tool"
622
- function_calls_to_run: List[FunctionCall] = []
623
- function_call_results: List[Message] = []
624
- for tool_call in assistant_message.tool_calls:
625
- _tool_call_id = tool_call.get("id")
626
- _function_call = get_function_call_for_tool_call(tool_call, self._functions)
627
- if _function_call is None:
628
- messages.append(
629
- Message(
630
- role=tool_role,
631
- tool_call_id=_tool_call_id,
632
- content="Could not find function to call.",
633
- )
634
- )
635
- continue
636
- if _function_call.error is not None:
637
- messages.append(
638
- Message(
639
- role=tool_role,
640
- tool_call_id=_tool_call_id,
641
- content=_function_call.error,
642
- )
643
- )
644
- continue
645
- function_calls_to_run.append(_function_call)
646
-
647
- if self.show_tool_calls:
648
- yield ModelResponse(content="\nRunning:")
649
- for _f in function_calls_to_run:
650
- yield ModelResponse(content=f"\n - {_f.get_call_str()}")
651
- yield ModelResponse(content="\n\n")
652
-
653
- for intermediate_model_response in self.run_function_calls(
654
- function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
655
- ):
656
- yield intermediate_model_response
474
+ if response_delta_message.content is not None:
475
+ model_response.content = response_delta_message.content
476
+ if response_delta_message.tool_calls is not None and len(response_delta_message.tool_calls) > 0:
477
+ model_response.tool_calls = [response_delta_message.tool_calls] # type: ignore
478
+ if response_delta.usage is not None:
479
+ model_response.response_usage = self._get_metrics(response_delta)
657
480
 
658
- if len(function_call_results) > 0:
659
- messages.extend(function_call_results)
481
+ return model_response
660
482
 
661
- def response_stream(self, messages: List[Message]) -> Iterator[ModelResponse]:
483
+ def _get_metrics(self, response: Union[ChatCompletionOutput, ChatCompletionStreamOutput]) -> Metrics:
662
484
  """
663
- Generate a streaming response from HuggingFace Hub.
485
+ Parse the given HuggingFace-specific usage into an Agno Metrics object.
664
486
 
665
487
  Args:
666
- messages (List[Message]): A list of messages.
488
+ response: The HuggingFace response to parse.
667
489
 
668
490
  Returns:
669
- Iterator[ModelResponse]: An iterator of model responses.
670
- """
671
- logger.debug("---------- HuggingFace Response Start ----------")
672
- self._log_messages(messages)
673
- stream_data: StreamData = StreamData()
674
-
675
- # -*- Generate response
676
- for response in self.invoke_stream(messages=messages):
677
- if len(response.choices) > 0:
678
- # metrics.completion_tokens += 1
679
-
680
- response_delta: ChatCompletionStreamOutputDelta = response.choices[0].delta
681
- response_content: Optional[str] = response_delta.content
682
- response_tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = response_delta.tool_calls
683
-
684
- if response_content is not None:
685
- stream_data.response_content += response_content
686
- yield ModelResponse(content=response_content)
687
-
688
- if response_tool_calls is not None:
689
- if stream_data.response_tool_calls is None:
690
- stream_data.response_tool_calls = []
691
- stream_data.response_tool_calls.extend(response_tool_calls)
692
-
693
- # -*- Create assistant message
694
- assistant_message = Message(role="assistant")
695
- if stream_data.response_content != "":
696
- assistant_message.content = stream_data.response_content
697
-
698
- if stream_data.response_tool_calls is not None:
699
- _tool_calls = self._build_tool_calls(stream_data.response_tool_calls)
700
- if len(_tool_calls) > 0:
701
- assistant_message.tool_calls = _tool_calls
702
-
703
- # -*- Add assistant message to messages
704
- messages.append(assistant_message)
705
-
706
- # -*- Handle tool calls
707
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
708
- yield from self._handle_stream_tool_calls(assistant_message, messages)
709
- yield from self.response_stream(messages=messages)
710
- logger.debug("---------- HuggingFace Response End ----------")
711
-
712
- async def aresponse_stream(self, messages: List[Message]) -> Any:
491
+ Metrics: Parsed metrics data
713
492
  """
714
- Generate an asynchronous streaming response from HuggingFace Hub.
493
+ metrics = Metrics()
715
494
 
716
- Args:
717
- messages (List[Message]): A list of messages.
495
+ if not response.usage:
496
+ return metrics
718
497
 
719
- Returns:
720
- Any: An asynchronous iterator of model responses.
721
- """
722
- logger.debug("---------- HuggingFace Hub Async Response Start ----------")
723
- self._log_messages(messages)
724
- stream_data: StreamData = StreamData()
725
- metrics: Metrics = Metrics()
726
-
727
- # -*- Generate response
728
- metrics.start_response_timer()
729
- async for response in self.ainvoke_stream(messages=messages):
730
- if len(response.choices) > 0:
731
- metrics.completion_tokens += 1
732
- if metrics.completion_tokens == 1:
733
- metrics.time_to_first_token = metrics.response_timer.elapsed
734
-
735
- response_delta: ChatCompletionStreamOutputDelta = response.choices[0].delta
736
- response_content = response_delta.content
737
- response_tool_calls = response_delta.tool_calls
738
-
739
- if response_content is not None:
740
- stream_data.response_content += response_content
741
- yield ModelResponse(content=response_content)
742
-
743
- if response_tool_calls is not None:
744
- if stream_data.response_tool_calls is None:
745
- stream_data.response_tool_calls = []
746
- stream_data.response_tool_calls.extend(response_tool_calls)
747
- metrics.stop_response_timer()
748
-
749
- # -*- Create assistant message
750
- assistant_message = Message(role="assistant")
751
- if stream_data.response_content != "":
752
- assistant_message.content = stream_data.response_content
753
-
754
- if stream_data.response_tool_calls is not None:
755
- _tool_calls = self._build_tool_calls(stream_data.response_tool_calls)
756
- if len(_tool_calls) > 0:
757
- assistant_message.tool_calls = _tool_calls
758
-
759
- self._update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
760
-
761
- # -*- Add assistant message to messages
762
- messages.append(assistant_message)
763
-
764
- # -*- Log response and metrics
765
- assistant_message.log()
766
- metrics.log()
767
-
768
- # -*- Handle tool calls
769
- if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
770
- for model_response in self._handle_stream_tool_calls(assistant_message, messages):
771
- yield model_response
772
- async for model_response in self.aresponse_stream(messages=messages):
773
- yield model_response
774
- logger.debug("---------- HuggingFace Hub Async Response End ----------")
498
+ metrics.input_tokens = response.usage.prompt_tokens or 0
499
+ metrics.output_tokens = response.usage.completion_tokens or 0
500
+ metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
501
+
502
+ return metrics