agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/aws/claude.py CHANGED
@@ -1,19 +1,17 @@
1
1
  from dataclasses import dataclass
2
2
  from os import getenv
3
- from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Type, Union
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
+ import httpx
5
6
  from pydantic import BaseModel
6
7
 
7
- from agno.exceptions import ModelProviderError, ModelRateLimitError
8
8
  from agno.models.anthropic import Claude as AnthropicClaude
9
- from agno.models.message import Message
10
- from agno.models.response import ModelResponse
11
- from agno.run.agent import RunOutput
12
- from agno.utils.log import log_debug, log_error, log_warning
13
- from agno.utils.models.claude import format_messages
9
+ from agno.utils.http import get_default_async_client, get_default_sync_client
10
+ from agno.utils.log import log_debug, log_warning
11
+ from agno.utils.models.claude import format_tools_for_model
14
12
 
15
13
  try:
16
- from anthropic import AnthropicBedrock, APIConnectionError, APIStatusError, AsyncAnthropicBedrock, RateLimitError
14
+ from anthropic import AnthropicBedrock, AsyncAnthropicBedrock
17
15
  except ImportError:
18
16
  raise ImportError("`anthropic[bedrock]` not installed. Please install using `pip install anthropic[bedrock]`")
19
17
 
@@ -31,44 +29,69 @@ class Claude(AnthropicClaude):
31
29
  For more information, see: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic.html
32
30
  """
33
31
 
34
- id: str = "anthropic.claude-3-5-sonnet-20240620-v1:0"
32
+ id: str = "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
35
33
  name: str = "AwsBedrockAnthropicClaude"
36
34
  provider: str = "AwsBedrock"
37
35
 
38
36
  aws_access_key: Optional[str] = None
39
37
  aws_secret_key: Optional[str] = None
40
38
  aws_region: Optional[str] = None
39
+ api_key: Optional[str] = None
41
40
  session: Optional[Session] = None
42
41
 
43
- # -*- Request parameters
44
- max_tokens: int = 4096
45
- temperature: Optional[float] = None
46
- top_p: Optional[float] = None
47
- top_k: Optional[int] = None
48
- stop_sequences: Optional[List[str]] = None
42
+ client: Optional[AnthropicBedrock] = None # type: ignore
43
+ async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
49
44
 
50
- # -*- Request parameters
51
- request_params: Optional[Dict[str, Any]] = None
52
- # -*- Client parameters
53
- client_params: Optional[Dict[str, Any]] = None
45
+ def __post_init__(self):
46
+ """Validate model configuration after initialization"""
47
+ # Validate thinking support immediately at model creation
48
+ if self.thinking:
49
+ self._validate_thinking_support()
50
+ # Overwrite output schema support for AWS Bedrock Claude
51
+ self.supports_native_structured_outputs = False
52
+ self.supports_json_schema_outputs = False
54
53
 
55
- def to_dict(self) -> Dict[str, Any]:
56
- """
57
- Convert the model to a dictionary.
54
+ def _get_client_params(self) -> Dict[str, Any]:
55
+ if self.session:
56
+ credentials = self.session.get_credentials()
57
+ client_params: Dict[str, Any] = {
58
+ "aws_access_key": credentials.access_key,
59
+ "aws_secret_key": credentials.secret_key,
60
+ "aws_session_token": credentials.token,
61
+ "aws_region": self.session.region_name,
62
+ }
63
+ else:
64
+ self.api_key = self.api_key or getenv("AWS_BEDROCK_API_KEY")
65
+ if self.api_key:
66
+ self.aws_region = self.aws_region or getenv("AWS_REGION")
67
+ client_params = {
68
+ "api_key": self.api_key,
69
+ }
70
+ if self.aws_region:
71
+ client_params["aws_region"] = self.aws_region
72
+ else:
73
+ self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY_ID") or getenv("AWS_ACCESS_KEY")
74
+ self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_ACCESS_KEY") or getenv("AWS_SECRET_KEY")
75
+ self.aws_region = self.aws_region or getenv("AWS_REGION")
76
+
77
+ client_params = {
78
+ "aws_secret_key": self.aws_secret_key,
79
+ "aws_access_key": self.aws_access_key,
80
+ "aws_region": self.aws_region,
81
+ }
82
+
83
+ if not (self.api_key or (self.aws_access_key and self.aws_secret_key)):
84
+ log_warning(
85
+ "AWS credentials not found. Please set AWS_BEDROCK_API_KEY or AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide a boto3 session."
86
+ )
87
+
88
+ if self.timeout is not None:
89
+ client_params["timeout"] = self.timeout
58
90
 
59
- Returns:
60
- Dict[str, Any]: The dictionary representation of the model.
61
- """
62
- _dict = super().to_dict()
63
- _dict["max_tokens"] = self.max_tokens
64
- _dict["temperature"] = self.temperature
65
- _dict["top_p"] = self.top_p
66
- _dict["top_k"] = self.top_k
67
- _dict["stop_sequences"] = self.stop_sequences
68
- return _dict
91
+ if self.client_params:
92
+ client_params.update(self.client_params)
69
93
 
70
- client: Optional[AnthropicBedrock] = None # type: ignore
71
- async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
94
+ return client_params
72
95
 
73
96
  def get_client(self):
74
97
  """
@@ -80,27 +103,18 @@ class Claude(AnthropicClaude):
80
103
  if self.client is not None and not self.client.is_closed():
81
104
  return self.client
82
105
 
83
- if self.session:
84
- credentials = self.session.get_credentials()
85
- client_params = {
86
- "aws_access_key": credentials.access_key,
87
- "aws_secret_key": credentials.secret_key,
88
- "aws_session_token": credentials.token,
89
- "aws_region": self.session.region_name,
90
- }
91
- else:
92
- self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
93
- self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
94
- self.aws_region = self.aws_region or getenv("AWS_REGION")
95
-
96
- client_params = {
97
- "aws_secret_key": self.aws_secret_key,
98
- "aws_access_key": self.aws_access_key,
99
- "aws_region": self.aws_region,
100
- }
106
+ client_params = self._get_client_params()
101
107
 
102
- if self.client_params:
103
- client_params.update(self.client_params)
108
+ if self.http_client:
109
+ if isinstance(self.http_client, httpx.Client):
110
+ client_params["http_client"] = self.http_client
111
+ else:
112
+ log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
113
+ # Use global sync client when user http_client is invalid
114
+ client_params["http_client"] = get_default_sync_client()
115
+ else:
116
+ # Use global sync client when no custom http_client is provided
117
+ client_params["http_client"] = get_default_sync_client()
104
118
 
105
119
  self.client = AnthropicBedrock(
106
120
  **client_params, # type: ignore
@@ -117,39 +131,46 @@ class Claude(AnthropicClaude):
117
131
  if self.async_client is not None:
118
132
  return self.async_client
119
133
 
120
- if self.session:
121
- credentials = self.session.get_credentials()
122
- client_params = {
123
- "aws_access_key": credentials.access_key,
124
- "aws_secret_key": credentials.secret_key,
125
- "aws_session_token": credentials.token,
126
- "aws_region": self.session.region_name,
127
- }
134
+ client_params = self._get_client_params()
135
+
136
+ if self.http_client:
137
+ if isinstance(self.http_client, httpx.AsyncClient):
138
+ client_params["http_client"] = self.http_client
139
+ else:
140
+ log_warning(
141
+ "http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
142
+ )
143
+ # Use global async client when user http_client is invalid
144
+ client_params["http_client"] = get_default_async_client()
128
145
  else:
129
- client_params = {
130
- "aws_secret_key": self.aws_secret_key,
131
- "aws_access_key": self.aws_access_key,
132
- "aws_region": self.aws_region,
133
- }
134
-
135
- if self.client_params:
136
- client_params.update(self.client_params)
146
+ # Use global async client when no custom http_client is provided
147
+ client_params["http_client"] = get_default_async_client()
137
148
 
138
149
  self.async_client = AsyncAnthropicBedrock(
139
150
  **client_params, # type: ignore
140
151
  )
141
152
  return self.async_client
142
153
 
143
- def get_request_params(self) -> Dict[str, Any]:
154
+ def get_request_params(
155
+ self,
156
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
157
+ tools: Optional[List[Dict[str, Any]]] = None,
158
+ ) -> Dict[str, Any]:
144
159
  """
145
160
  Generate keyword arguments for API requests.
146
161
 
147
162
  Returns:
148
163
  Dict[str, Any]: The keyword arguments for API requests.
149
164
  """
165
+ # Validate thinking support if thinking is enabled
166
+ if self.thinking:
167
+ self._validate_thinking_support()
168
+
150
169
  _request_params: Dict[str, Any] = {}
151
170
  if self.max_tokens:
152
171
  _request_params["max_tokens"] = self.max_tokens
172
+ if self.thinking:
173
+ _request_params["thinking"] = self.thinking
153
174
  if self.temperature:
154
175
  _request_params["temperature"] = self.temperature
155
176
  if self.stop_sequences:
@@ -158,6 +179,16 @@ class Claude(AnthropicClaude):
158
179
  _request_params["top_p"] = self.top_p
159
180
  if self.top_k:
160
181
  _request_params["top_k"] = self.top_k
182
+ if self.timeout:
183
+ _request_params["timeout"] = self.timeout
184
+
185
+ # Build betas list - include existing betas and add new one if needed
186
+ betas_list = list(self.betas) if self.betas else []
187
+
188
+ # Include betas if any are present
189
+ if betas_list:
190
+ _request_params["betas"] = betas_list
191
+
161
192
  if self.request_params:
162
193
  _request_params.update(self.request_params)
163
194
 
@@ -165,214 +196,40 @@ class Claude(AnthropicClaude):
165
196
  log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
166
197
  return _request_params
167
198
 
168
- def invoke(
169
- self,
170
- messages: List[Message],
171
- assistant_message: Message,
172
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
173
- tools: Optional[List[Dict[str, Any]]] = None,
174
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
175
- run_response: Optional[RunOutput] = None,
176
- ) -> ModelResponse:
177
- """
178
- Send a request to the Anthropic API to generate a response.
179
- """
180
-
181
- try:
182
- chat_messages, system_message = format_messages(messages)
183
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
184
-
185
- if run_response and run_response.metrics:
186
- run_response.metrics.set_time_to_first_token()
187
-
188
- assistant_message.metrics.start_timer()
189
- response = self.get_client().messages.create(
190
- model=self.id,
191
- messages=chat_messages, # type: ignore
192
- **request_kwargs,
193
- )
194
- assistant_message.metrics.stop_timer()
195
-
196
- model_response = self._parse_provider_response(response, response_format=response_format)
197
-
198
- return model_response
199
-
200
- except APIConnectionError as e:
201
- log_error(f"Connection error while calling Claude API: {str(e)}")
202
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
203
- except RateLimitError as e:
204
- log_warning(f"Rate limit exceeded: {str(e)}")
205
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
206
- except APIStatusError as e:
207
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
208
- raise ModelProviderError(
209
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
210
- ) from e
211
- except Exception as e:
212
- log_error(f"Unexpected error calling Claude API: {str(e)}")
213
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
214
-
215
- def invoke_stream(
216
- self,
217
- messages: List[Message],
218
- assistant_message: Message,
219
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
220
- tools: Optional[List[Dict[str, Any]]] = None,
221
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
222
- run_response: Optional[RunOutput] = None,
223
- ) -> Iterator[ModelResponse]:
224
- """
225
- Stream a response from the Anthropic API.
226
-
227
- Args:
228
- messages (List[Message]): A list of messages to send to the model.
229
-
230
- Returns:
231
- Any: The streamed response from the model.
232
-
233
- Raises:
234
- APIConnectionError: If there are network connectivity issues
235
- RateLimitError: If the API rate limit is exceeded
236
- APIStatusError: For other API-related errors
237
- """
238
-
239
- chat_messages, system_message = format_messages(messages)
240
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
241
-
242
- try:
243
- if run_response and run_response.metrics:
244
- run_response.metrics.set_time_to_first_token()
245
-
246
- assistant_message.metrics.start_timer()
247
-
248
- with self.get_client().messages.stream(
249
- model=self.id,
250
- messages=chat_messages, # type: ignore
251
- **request_kwargs,
252
- ) as stream:
253
- for chunk in stream:
254
- yield self._parse_provider_response_delta(chunk)
255
-
256
- assistant_message.metrics.stop_timer()
257
-
258
- except APIConnectionError as e:
259
- log_error(f"Connection error while calling Claude API: {str(e)}")
260
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
261
- except RateLimitError as e:
262
- log_warning(f"Rate limit exceeded: {str(e)}")
263
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
264
- except APIStatusError as e:
265
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
266
- raise ModelProviderError(
267
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
268
- ) from e
269
- except Exception as e:
270
- log_error(f"Unexpected error calling Claude API: {str(e)}")
271
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
272
-
273
- async def ainvoke(
199
+ def _prepare_request_kwargs(
274
200
  self,
275
- messages: List[Message],
276
- assistant_message: Message,
277
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
201
+ system_message: str,
278
202
  tools: Optional[List[Dict[str, Any]]] = None,
279
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
280
- run_response: Optional[RunOutput] = None,
281
- ) -> ModelResponse:
282
- """
283
- Send an asynchronous request to the Anthropic API to generate a response.
284
- """
285
-
286
- try:
287
- chat_messages, system_message = format_messages(messages)
288
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
289
-
290
- if run_response and run_response.metrics:
291
- run_response.metrics.set_time_to_first_token()
292
-
293
- assistant_message.metrics.start_timer()
294
-
295
- response = await self.get_async_client().messages.create(
296
- model=self.id,
297
- messages=chat_messages, # type: ignore
298
- **request_kwargs,
299
- )
300
-
301
- assistant_message.metrics.stop_timer()
302
-
303
- model_response = self._parse_provider_response(response, response_format=response_format)
304
-
305
- return model_response
306
-
307
- except APIConnectionError as e:
308
- log_error(f"Connection error while calling Claude API: {str(e)}")
309
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
310
- except RateLimitError as e:
311
- log_warning(f"Rate limit exceeded: {str(e)}")
312
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
313
- except APIStatusError as e:
314
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
315
- raise ModelProviderError(
316
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
317
- ) from e
318
- except Exception as e:
319
- log_error(f"Unexpected error calling Claude API: {str(e)}")
320
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
321
-
322
- async def ainvoke_stream(
323
- self,
324
- messages: List[Message],
325
- assistant_message: Message,
326
203
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
327
- tools: Optional[List[Dict[str, Any]]] = None,
328
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
329
- run_response: Optional[RunOutput] = None,
330
- ) -> AsyncIterator[ModelResponse]:
204
+ ) -> Dict[str, Any]:
331
205
  """
332
- Stream an asynchronous response from the Anthropic API.
206
+ Prepare the request keyword arguments for the API call.
333
207
 
334
208
  Args:
335
- messages (List[Message]): A list of messages to send to the model.
209
+ system_message (str): The concatenated system messages.
210
+ tools: Optional list of tools
211
+ response_format: Optional response format (Pydantic model or dict)
336
212
 
337
213
  Returns:
338
- Any: The streamed response from the model.
339
-
340
- Raises:
341
- APIConnectionError: If there are network connectivity issues
342
- RateLimitError: If the API rate limit is exceeded
343
- APIStatusError: For other API-related errors
344
- """
345
-
346
- try:
347
- chat_messages, system_message = format_messages(messages)
348
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
349
-
350
- if run_response and run_response.metrics:
351
- run_response.metrics.set_time_to_first_token()
352
-
353
- assistant_message.metrics.start_timer()
354
-
355
- async with self.get_async_client().messages.stream(
356
- model=self.id,
357
- messages=chat_messages, # type: ignore
358
- **request_kwargs,
359
- ) as stream:
360
- async for chunk in stream:
361
- yield self._parse_provider_response_delta(chunk)
362
-
363
- assistant_message.metrics.stop_timer()
364
-
365
- except APIConnectionError as e:
366
- log_error(f"Connection error while calling Claude API: {str(e)}")
367
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
368
- except RateLimitError as e:
369
- log_warning(f"Rate limit exceeded: {str(e)}")
370
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
371
- except APIStatusError as e:
372
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
373
- raise ModelProviderError(
374
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
375
- ) from e
376
- except Exception as e:
377
- log_error(f"Unexpected error calling Claude API: {str(e)}")
378
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
214
+ Dict[str, Any]: The request keyword arguments.
215
+ """
216
+ # Pass response_format and tools to get_request_params for beta header handling
217
+ request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
218
+ if system_message:
219
+ if self.cache_system_prompt:
220
+ cache_control = (
221
+ {"type": "ephemeral", "ttl": "1h"}
222
+ if self.extended_cache_time is not None and self.extended_cache_time is True
223
+ else {"type": "ephemeral"}
224
+ )
225
+ request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
226
+ else:
227
+ request_kwargs["system"] = [{"text": system_message, "type": "text"}]
228
+
229
+ # Format tools (this will handle strict mode)
230
+ if tools:
231
+ request_kwargs["tools"] = format_tools_for_model(tools)
232
+
233
+ if request_kwargs:
234
+ log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
235
+ return request_kwargs
@@ -60,6 +60,7 @@ class AzureAIFoundry(Model):
60
60
  stop: Optional[Union[str, List[str]]] = None
61
61
  seed: Optional[int] = None
62
62
  model_extras: Optional[Dict[str, Any]] = None
63
+ strict_output: bool = True # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
63
64
  request_params: Optional[Dict[str, Any]] = None
64
65
  # Client parameters
65
66
  api_key: Optional[str] = None
@@ -116,7 +117,7 @@ class AzureAIFoundry(Model):
116
117
  name=response_format.__name__,
117
118
  schema=response_format.model_json_schema(), # type: ignore
118
119
  description=response_format.__doc__,
119
- strict=True,
120
+ strict=self.strict_output,
120
121
  ),
121
122
  )
122
123
 
@@ -135,9 +136,9 @@ class AzureAIFoundry(Model):
135
136
  self.azure_endpoint = self.azure_endpoint or getenv("AZURE_ENDPOINT")
136
137
 
137
138
  if not self.api_key:
138
- raise ValueError("API key is required")
139
+ log_error("AZURE_API_KEY not set. Please set the AZURE_API_KEY environment variable.")
139
140
  if not self.azure_endpoint:
140
- raise ValueError("Endpoint URL is required")
141
+ log_error("AZURE_ENDPOINT not set. Please set the AZURE_ENDPOINT environment variable.")
141
142
 
142
143
  base_params = {
143
144
  "endpoint": self.azure_endpoint,
@@ -160,7 +161,9 @@ class AzureAIFoundry(Model):
160
161
  Returns:
161
162
  ChatCompletionsClient: An instance of the Azure AI client.
162
163
  """
163
- if self.client:
164
+ # Check if client exists and is not closed
165
+ # Azure's client doesn't have is_closed(), so we check if _client exists
166
+ if self.client and hasattr(self.client, "_client"):
164
167
  return self.client
165
168
 
166
169
  client_params = self._get_client_params()
@@ -174,11 +177,28 @@ class AzureAIFoundry(Model):
174
177
  Returns:
175
178
  AsyncChatCompletionsClient: An instance of the asynchronous Azure AI client.
176
179
  """
180
+ # Check if client exists and is not closed
181
+ # Azure's async client doesn't have is_closed(), so we check if _client exists
182
+ if self.async_client and hasattr(self.async_client, "_client"):
183
+ return self.async_client
184
+
177
185
  client_params = self._get_client_params()
178
186
 
179
187
  self.async_client = AsyncChatCompletionsClient(**client_params)
180
188
  return self.async_client
181
189
 
190
+ def close(self) -> None:
191
+ """Close the synchronous client and clean up resources."""
192
+ if self.client:
193
+ self.client.close()
194
+ self.client = None
195
+
196
+ async def aclose(self) -> None:
197
+ """Close the asynchronous client and clean up resources."""
198
+ if self.async_client:
199
+ await self.async_client.close()
200
+ self.async_client = None
201
+
182
202
  def invoke(
183
203
  self,
184
204
  messages: List[Message],
@@ -187,6 +207,7 @@ class AzureAIFoundry(Model):
187
207
  tools: Optional[List[Dict[str, Any]]] = None,
188
208
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
189
209
  run_response: Optional[RunOutput] = None,
210
+ compress_tool_results: bool = False,
190
211
  ) -> ModelResponse:
191
212
  """
192
213
  Send a chat completion request to the Azure AI API.
@@ -197,7 +218,7 @@ class AzureAIFoundry(Model):
197
218
 
198
219
  assistant_message.metrics.start_timer()
199
220
  provider_response = self.get_client().complete(
200
- messages=[format_message(m) for m in messages],
221
+ messages=[format_message(m, compress_tool_results) for m in messages],
201
222
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
202
223
  )
203
224
  assistant_message.metrics.stop_timer()
@@ -226,6 +247,7 @@ class AzureAIFoundry(Model):
226
247
  tools: Optional[List[Dict[str, Any]]] = None,
227
248
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
228
249
  run_response: Optional[RunOutput] = None,
250
+ compress_tool_results: bool = False,
229
251
  ) -> ModelResponse:
230
252
  """
231
253
  Sends an asynchronous chat completion request to the Azure AI API.
@@ -236,11 +258,10 @@ class AzureAIFoundry(Model):
236
258
  run_response.metrics.set_time_to_first_token()
237
259
 
238
260
  assistant_message.metrics.start_timer()
239
- async with self.get_async_client() as client:
240
- provider_response = await client.complete(
241
- messages=[format_message(m) for m in messages],
242
- **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
243
- )
261
+ provider_response = await self.get_async_client().complete(
262
+ messages=[format_message(m, compress_tool_results) for m in messages],
263
+ **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
264
+ )
244
265
  assistant_message.metrics.stop_timer()
245
266
 
246
267
  model_response = self._parse_provider_response(provider_response, response_format=response_format) # type: ignore
@@ -267,6 +288,7 @@ class AzureAIFoundry(Model):
267
288
  tools: Optional[List[Dict[str, Any]]] = None,
268
289
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
269
290
  run_response: Optional[RunOutput] = None,
291
+ compress_tool_results: bool = False,
270
292
  ) -> Iterator[ModelResponse]:
271
293
  """
272
294
  Send a streaming chat completion request to the Azure AI API.
@@ -278,7 +300,7 @@ class AzureAIFoundry(Model):
278
300
  assistant_message.metrics.start_timer()
279
301
 
280
302
  for chunk in self.get_client().complete(
281
- messages=[format_message(m) for m in messages],
303
+ messages=[format_message(m, compress_tool_results) for m in messages],
282
304
  stream=True,
283
305
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
284
306
  ):
@@ -306,6 +328,7 @@ class AzureAIFoundry(Model):
306
328
  tools: Optional[List[Dict[str, Any]]] = None,
307
329
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
308
330
  run_response: Optional[RunOutput] = None,
331
+ compress_tool_results: bool = False,
309
332
  ) -> AsyncIterator[ModelResponse]:
310
333
  """
311
334
  Sends an asynchronous streaming chat completion request to the Azure AI API.
@@ -316,14 +339,13 @@ class AzureAIFoundry(Model):
316
339
 
317
340
  assistant_message.metrics.start_timer()
318
341
 
319
- async with self.get_async_client() as client:
320
- async_stream = await client.complete(
321
- messages=[format_message(m) for m in messages],
322
- stream=True,
323
- **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
324
- )
325
- async for chunk in async_stream: # type: ignore
326
- yield self._parse_provider_response_delta(chunk)
342
+ async_stream = await self.get_async_client().complete(
343
+ messages=[format_message(m, compress_tool_results) for m in messages],
344
+ stream=True,
345
+ **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
346
+ )
347
+ async for chunk in async_stream: # type: ignore
348
+ yield self._parse_provider_response_delta(chunk)
327
349
 
328
350
  assistant_message.metrics.stop_timer()
329
351