agno 2.3.2__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. agno/agent/agent.py +513 -185
  2. agno/compression/__init__.py +3 -0
  3. agno/compression/manager.py +176 -0
  4. agno/db/dynamo/dynamo.py +11 -0
  5. agno/db/firestore/firestore.py +5 -1
  6. agno/db/gcs_json/gcs_json_db.py +5 -2
  7. agno/db/in_memory/in_memory_db.py +5 -2
  8. agno/db/json/json_db.py +5 -1
  9. agno/db/migrations/manager.py +4 -4
  10. agno/db/mongo/async_mongo.py +158 -34
  11. agno/db/mongo/mongo.py +6 -2
  12. agno/db/mysql/mysql.py +48 -54
  13. agno/db/postgres/async_postgres.py +61 -51
  14. agno/db/postgres/postgres.py +42 -50
  15. agno/db/redis/redis.py +5 -0
  16. agno/db/redis/utils.py +5 -5
  17. agno/db/singlestore/singlestore.py +99 -108
  18. agno/db/sqlite/async_sqlite.py +29 -27
  19. agno/db/sqlite/sqlite.py +30 -26
  20. agno/knowledge/reader/pdf_reader.py +2 -2
  21. agno/knowledge/reader/tavily_reader.py +0 -1
  22. agno/memory/__init__.py +14 -1
  23. agno/memory/manager.py +217 -4
  24. agno/memory/strategies/__init__.py +15 -0
  25. agno/memory/strategies/base.py +67 -0
  26. agno/memory/strategies/summarize.py +196 -0
  27. agno/memory/strategies/types.py +37 -0
  28. agno/models/anthropic/claude.py +84 -80
  29. agno/models/aws/bedrock.py +38 -16
  30. agno/models/aws/claude.py +97 -277
  31. agno/models/azure/ai_foundry.py +8 -4
  32. agno/models/base.py +101 -14
  33. agno/models/cerebras/cerebras.py +18 -7
  34. agno/models/cerebras/cerebras_openai.py +4 -2
  35. agno/models/cohere/chat.py +8 -4
  36. agno/models/google/gemini.py +578 -20
  37. agno/models/groq/groq.py +18 -5
  38. agno/models/huggingface/huggingface.py +17 -6
  39. agno/models/ibm/watsonx.py +16 -6
  40. agno/models/litellm/chat.py +17 -7
  41. agno/models/message.py +19 -5
  42. agno/models/meta/llama.py +20 -4
  43. agno/models/mistral/mistral.py +8 -4
  44. agno/models/ollama/chat.py +17 -6
  45. agno/models/openai/chat.py +17 -6
  46. agno/models/openai/responses.py +23 -9
  47. agno/models/vertexai/claude.py +99 -5
  48. agno/os/interfaces/agui/router.py +1 -0
  49. agno/os/interfaces/agui/utils.py +97 -57
  50. agno/os/router.py +16 -0
  51. agno/os/routers/memory/memory.py +143 -0
  52. agno/os/routers/memory/schemas.py +26 -0
  53. agno/os/schema.py +21 -6
  54. agno/os/utils.py +134 -10
  55. agno/run/base.py +2 -1
  56. agno/run/workflow.py +1 -1
  57. agno/team/team.py +565 -219
  58. agno/tools/mcp/mcp.py +1 -1
  59. agno/utils/agent.py +119 -1
  60. agno/utils/models/ai_foundry.py +9 -2
  61. agno/utils/models/claude.py +12 -5
  62. agno/utils/models/cohere.py +9 -2
  63. agno/utils/models/llama.py +9 -2
  64. agno/utils/models/mistral.py +4 -2
  65. agno/utils/print_response/agent.py +37 -2
  66. agno/utils/print_response/team.py +52 -0
  67. agno/utils/tokens.py +41 -0
  68. agno/workflow/types.py +2 -2
  69. {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
  70. {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/RECORD +73 -66
  71. {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
  72. {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
  73. {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0
agno/models/aws/claude.py CHANGED
@@ -1,21 +1,17 @@
1
1
  from dataclasses import dataclass
2
2
  from os import getenv
3
- from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Type, Union
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
5
  import httpx
6
6
  from pydantic import BaseModel
7
7
 
8
- from agno.exceptions import ModelProviderError, ModelRateLimitError
9
8
  from agno.models.anthropic import Claude as AnthropicClaude
10
- from agno.models.message import Message
11
- from agno.models.response import ModelResponse
12
- from agno.run.agent import RunOutput
13
9
  from agno.utils.http import get_default_async_client, get_default_sync_client
14
- from agno.utils.log import log_debug, log_error, log_warning
15
- from agno.utils.models.claude import format_messages
10
+ from agno.utils.log import log_debug, log_warning
11
+ from agno.utils.models.claude import format_tools_for_model
16
12
 
17
13
  try:
18
- from anthropic import AnthropicBedrock, APIConnectionError, APIStatusError, AsyncAnthropicBedrock, RateLimitError
14
+ from anthropic import AnthropicBedrock, AsyncAnthropicBedrock
19
15
  except ImportError:
20
16
  raise ImportError("`anthropic[bedrock]` not installed. Please install using `pip install anthropic[bedrock]`")
21
17
 
@@ -33,73 +29,56 @@ class Claude(AnthropicClaude):
33
29
  For more information, see: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic.html
34
30
  """
35
31
 
36
- id: str = "anthropic.claude-3-5-sonnet-20240620-v1:0"
32
+ id: str = "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
37
33
  name: str = "AwsBedrockAnthropicClaude"
38
34
  provider: str = "AwsBedrock"
39
35
 
40
36
  aws_access_key: Optional[str] = None
41
37
  aws_secret_key: Optional[str] = None
42
38
  aws_region: Optional[str] = None
39
+ api_key: Optional[str] = None
43
40
  session: Optional[Session] = None
44
41
 
45
- # -*- Request parameters
46
- max_tokens: int = 4096
47
- temperature: Optional[float] = None
48
- top_p: Optional[float] = None
49
- top_k: Optional[int] = None
50
- stop_sequences: Optional[List[str]] = None
51
-
52
- # -*- Request parameters
53
- request_params: Optional[Dict[str, Any]] = None
54
- # -*- Client parameters
55
- client_params: Optional[Dict[str, Any]] = None
56
-
57
- def to_dict(self) -> Dict[str, Any]:
58
- """
59
- Convert the model to a dictionary.
60
-
61
- Returns:
62
- Dict[str, Any]: The dictionary representation of the model.
63
- """
64
- _dict = super().to_dict()
65
- _dict["max_tokens"] = self.max_tokens
66
- _dict["temperature"] = self.temperature
67
- _dict["top_p"] = self.top_p
68
- _dict["top_k"] = self.top_k
69
- _dict["stop_sequences"] = self.stop_sequences
70
- return _dict
71
-
72
42
  client: Optional[AnthropicBedrock] = None # type: ignore
73
43
  async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
74
44
 
75
- def get_client(self):
76
- """
77
- Get the Bedrock client.
78
-
79
- Returns:
80
- AnthropicBedrock: The Bedrock client.
81
- """
82
- if self.client is not None and not self.client.is_closed():
83
- return self.client
45
+ def __post_init__(self):
46
+ """Validate model configuration after initialization"""
47
+ # Validate thinking support immediately at model creation
48
+ if self.thinking:
49
+ self._validate_thinking_support()
50
+ # Overwrite output schema support for AWS Bedrock Claude
51
+ self.supports_native_structured_outputs = False
52
+ self.supports_json_schema_outputs = False
84
53
 
54
+ def _get_client_params(self) -> Dict[str, Any]:
85
55
  if self.session:
86
56
  credentials = self.session.get_credentials()
87
- client_params = {
57
+ client_params: Dict[str, Any] = {
88
58
  "aws_access_key": credentials.access_key,
89
59
  "aws_secret_key": credentials.secret_key,
90
60
  "aws_session_token": credentials.token,
91
61
  "aws_region": self.session.region_name,
92
62
  }
93
63
  else:
94
- self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
95
- self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
96
- self.aws_region = self.aws_region or getenv("AWS_REGION")
97
-
98
- client_params = {
99
- "aws_secret_key": self.aws_secret_key,
100
- "aws_access_key": self.aws_access_key,
101
- "aws_region": self.aws_region,
102
- }
64
+ self.api_key = self.api_key or getenv("AWS_BEDROCK_API_KEY")
65
+ if self.api_key:
66
+ self.aws_region = self.aws_region or getenv("AWS_REGION")
67
+ client_params = {
68
+ "api_key": self.api_key,
69
+ }
70
+ if self.aws_region:
71
+ client_params["aws_region"] = self.aws_region
72
+ else:
73
+ self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
74
+ self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
75
+ self.aws_region = self.aws_region or getenv("AWS_REGION")
76
+
77
+ client_params = {
78
+ "aws_secret_key": self.aws_secret_key,
79
+ "aws_access_key": self.aws_access_key,
80
+ "aws_region": self.aws_region,
81
+ }
103
82
 
104
83
  if self.timeout is not None:
105
84
  client_params["timeout"] = self.timeout
@@ -107,6 +86,20 @@ class Claude(AnthropicClaude):
107
86
  if self.client_params:
108
87
  client_params.update(self.client_params)
109
88
 
89
+ return client_params
90
+
91
+ def get_client(self):
92
+ """
93
+ Get the Bedrock client.
94
+
95
+ Returns:
96
+ AnthropicBedrock: The Bedrock client.
97
+ """
98
+ if self.client is not None and not self.client.is_closed():
99
+ return self.client
100
+
101
+ client_params = self._get_client_params()
102
+
110
103
  if self.http_client:
111
104
  if isinstance(self.http_client, httpx.Client):
112
105
  client_params["http_client"] = self.http_client
@@ -133,26 +126,7 @@ class Claude(AnthropicClaude):
133
126
  if self.async_client is not None:
134
127
  return self.async_client
135
128
 
136
- if self.session:
137
- credentials = self.session.get_credentials()
138
- client_params = {
139
- "aws_access_key": credentials.access_key,
140
- "aws_secret_key": credentials.secret_key,
141
- "aws_session_token": credentials.token,
142
- "aws_region": self.session.region_name,
143
- }
144
- else:
145
- client_params = {
146
- "aws_secret_key": self.aws_secret_key,
147
- "aws_access_key": self.aws_access_key,
148
- "aws_region": self.aws_region,
149
- }
150
-
151
- if self.timeout is not None:
152
- client_params["timeout"] = self.timeout
153
-
154
- if self.client_params:
155
- client_params.update(self.client_params)
129
+ client_params = self._get_client_params()
156
130
 
157
131
  if self.http_client:
158
132
  if isinstance(self.http_client, httpx.AsyncClient):
@@ -172,16 +146,26 @@ class Claude(AnthropicClaude):
172
146
  )
173
147
  return self.async_client
174
148
 
175
- def get_request_params(self) -> Dict[str, Any]:
149
+ def get_request_params(
150
+ self,
151
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
152
+ tools: Optional[List[Dict[str, Any]]] = None,
153
+ ) -> Dict[str, Any]:
176
154
  """
177
155
  Generate keyword arguments for API requests.
178
156
 
179
157
  Returns:
180
158
  Dict[str, Any]: The keyword arguments for API requests.
181
159
  """
160
+ # Validate thinking support if thinking is enabled
161
+ if self.thinking:
162
+ self._validate_thinking_support()
163
+
182
164
  _request_params: Dict[str, Any] = {}
183
165
  if self.max_tokens:
184
166
  _request_params["max_tokens"] = self.max_tokens
167
+ if self.thinking:
168
+ _request_params["thinking"] = self.thinking
185
169
  if self.temperature:
186
170
  _request_params["temperature"] = self.temperature
187
171
  if self.stop_sequences:
@@ -190,6 +174,16 @@ class Claude(AnthropicClaude):
190
174
  _request_params["top_p"] = self.top_p
191
175
  if self.top_k:
192
176
  _request_params["top_k"] = self.top_k
177
+ if self.timeout:
178
+ _request_params["timeout"] = self.timeout
179
+
180
+ # Build betas list - include existing betas and add new one if needed
181
+ betas_list = list(self.betas) if self.betas else []
182
+
183
+ # Include betas if any are present
184
+ if betas_list:
185
+ _request_params["betas"] = betas_list
186
+
193
187
  if self.request_params:
194
188
  _request_params.update(self.request_params)
195
189
 
@@ -197,214 +191,40 @@ class Claude(AnthropicClaude):
197
191
  log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
198
192
  return _request_params
199
193
 
200
- def invoke(
194
+ def _prepare_request_kwargs(
201
195
  self,
202
- messages: List[Message],
203
- assistant_message: Message,
204
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
196
+ system_message: str,
205
197
  tools: Optional[List[Dict[str, Any]]] = None,
206
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
207
- run_response: Optional[RunOutput] = None,
208
- ) -> ModelResponse:
209
- """
210
- Send a request to the Anthropic API to generate a response.
211
- """
212
-
213
- try:
214
- chat_messages, system_message = format_messages(messages)
215
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
216
-
217
- if run_response and run_response.metrics:
218
- run_response.metrics.set_time_to_first_token()
219
-
220
- assistant_message.metrics.start_timer()
221
- response = self.get_client().messages.create(
222
- model=self.id,
223
- messages=chat_messages, # type: ignore
224
- **request_kwargs,
225
- )
226
- assistant_message.metrics.stop_timer()
227
-
228
- model_response = self._parse_provider_response(response, response_format=response_format)
229
-
230
- return model_response
231
-
232
- except APIConnectionError as e:
233
- log_error(f"Connection error while calling Claude API: {str(e)}")
234
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
235
- except RateLimitError as e:
236
- log_warning(f"Rate limit exceeded: {str(e)}")
237
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
238
- except APIStatusError as e:
239
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
240
- raise ModelProviderError(
241
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
242
- ) from e
243
- except Exception as e:
244
- log_error(f"Unexpected error calling Claude API: {str(e)}")
245
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
246
-
247
- def invoke_stream(
248
- self,
249
- messages: List[Message],
250
- assistant_message: Message,
251
198
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
252
- tools: Optional[List[Dict[str, Any]]] = None,
253
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
254
- run_response: Optional[RunOutput] = None,
255
- ) -> Iterator[ModelResponse]:
199
+ ) -> Dict[str, Any]:
256
200
  """
257
- Stream a response from the Anthropic API.
201
+ Prepare the request keyword arguments for the API call.
258
202
 
259
203
  Args:
260
- messages (List[Message]): A list of messages to send to the model.
204
+ system_message (str): The concatenated system messages.
205
+ tools: Optional list of tools
206
+ response_format: Optional response format (Pydantic model or dict)
261
207
 
262
208
  Returns:
263
- Any: The streamed response from the model.
264
-
265
- Raises:
266
- APIConnectionError: If there are network connectivity issues
267
- RateLimitError: If the API rate limit is exceeded
268
- APIStatusError: For other API-related errors
269
- """
270
-
271
- chat_messages, system_message = format_messages(messages)
272
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
273
-
274
- try:
275
- if run_response and run_response.metrics:
276
- run_response.metrics.set_time_to_first_token()
277
-
278
- assistant_message.metrics.start_timer()
279
-
280
- with self.get_client().messages.stream(
281
- model=self.id,
282
- messages=chat_messages, # type: ignore
283
- **request_kwargs,
284
- ) as stream:
285
- for chunk in stream:
286
- yield self._parse_provider_response_delta(chunk)
287
-
288
- assistant_message.metrics.stop_timer()
289
-
290
- except APIConnectionError as e:
291
- log_error(f"Connection error while calling Claude API: {str(e)}")
292
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
293
- except RateLimitError as e:
294
- log_warning(f"Rate limit exceeded: {str(e)}")
295
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
296
- except APIStatusError as e:
297
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
298
- raise ModelProviderError(
299
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
300
- ) from e
301
- except Exception as e:
302
- log_error(f"Unexpected error calling Claude API: {str(e)}")
303
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
304
-
305
- async def ainvoke(
306
- self,
307
- messages: List[Message],
308
- assistant_message: Message,
309
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
310
- tools: Optional[List[Dict[str, Any]]] = None,
311
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
312
- run_response: Optional[RunOutput] = None,
313
- ) -> ModelResponse:
314
- """
315
- Send an asynchronous request to the Anthropic API to generate a response.
316
- """
317
-
318
- try:
319
- chat_messages, system_message = format_messages(messages)
320
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
321
-
322
- if run_response and run_response.metrics:
323
- run_response.metrics.set_time_to_first_token()
324
-
325
- assistant_message.metrics.start_timer()
326
-
327
- response = await self.get_async_client().messages.create(
328
- model=self.id,
329
- messages=chat_messages, # type: ignore
330
- **request_kwargs,
331
- )
332
-
333
- assistant_message.metrics.stop_timer()
334
-
335
- model_response = self._parse_provider_response(response, response_format=response_format)
336
-
337
- return model_response
338
-
339
- except APIConnectionError as e:
340
- log_error(f"Connection error while calling Claude API: {str(e)}")
341
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
342
- except RateLimitError as e:
343
- log_warning(f"Rate limit exceeded: {str(e)}")
344
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
345
- except APIStatusError as e:
346
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
347
- raise ModelProviderError(
348
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
349
- ) from e
350
- except Exception as e:
351
- log_error(f"Unexpected error calling Claude API: {str(e)}")
352
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
353
-
354
- async def ainvoke_stream(
355
- self,
356
- messages: List[Message],
357
- assistant_message: Message,
358
- response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
359
- tools: Optional[List[Dict[str, Any]]] = None,
360
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
361
- run_response: Optional[RunOutput] = None,
362
- ) -> AsyncIterator[ModelResponse]:
363
- """
364
- Stream an asynchronous response from the Anthropic API.
365
-
366
- Args:
367
- messages (List[Message]): A list of messages to send to the model.
368
-
369
- Returns:
370
- Any: The streamed response from the model.
209
+ Dict[str, Any]: The request keyword arguments.
210
+ """
211
+ # Pass response_format and tools to get_request_params for beta header handling
212
+ request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
213
+ if system_message:
214
+ if self.cache_system_prompt:
215
+ cache_control = (
216
+ {"type": "ephemeral", "ttl": "1h"}
217
+ if self.extended_cache_time is not None and self.extended_cache_time is True
218
+ else {"type": "ephemeral"}
219
+ )
220
+ request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
221
+ else:
222
+ request_kwargs["system"] = [{"text": system_message, "type": "text"}]
371
223
 
372
- Raises:
373
- APIConnectionError: If there are network connectivity issues
374
- RateLimitError: If the API rate limit is exceeded
375
- APIStatusError: For other API-related errors
376
- """
224
+ # Format tools (this will handle strict mode)
225
+ if tools:
226
+ request_kwargs["tools"] = format_tools_for_model(tools)
377
227
 
378
- try:
379
- chat_messages, system_message = format_messages(messages)
380
- request_kwargs = self._prepare_request_kwargs(system_message, tools)
381
-
382
- if run_response and run_response.metrics:
383
- run_response.metrics.set_time_to_first_token()
384
-
385
- assistant_message.metrics.start_timer()
386
-
387
- async with self.get_async_client().messages.stream(
388
- model=self.id,
389
- messages=chat_messages, # type: ignore
390
- **request_kwargs,
391
- ) as stream:
392
- async for chunk in stream:
393
- yield self._parse_provider_response_delta(chunk)
394
-
395
- assistant_message.metrics.stop_timer()
396
-
397
- except APIConnectionError as e:
398
- log_error(f"Connection error while calling Claude API: {str(e)}")
399
- raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
400
- except RateLimitError as e:
401
- log_warning(f"Rate limit exceeded: {str(e)}")
402
- raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
403
- except APIStatusError as e:
404
- log_error(f"Claude API error (status {e.status_code}): {str(e)}")
405
- raise ModelProviderError(
406
- message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
407
- ) from e
408
- except Exception as e:
409
- log_error(f"Unexpected error calling Claude API: {str(e)}")
410
- raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
228
+ if request_kwargs:
229
+ log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
230
+ return request_kwargs
@@ -207,6 +207,7 @@ class AzureAIFoundry(Model):
207
207
  tools: Optional[List[Dict[str, Any]]] = None,
208
208
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
209
209
  run_response: Optional[RunOutput] = None,
210
+ compress_tool_results: bool = False,
210
211
  ) -> ModelResponse:
211
212
  """
212
213
  Send a chat completion request to the Azure AI API.
@@ -217,7 +218,7 @@ class AzureAIFoundry(Model):
217
218
 
218
219
  assistant_message.metrics.start_timer()
219
220
  provider_response = self.get_client().complete(
220
- messages=[format_message(m) for m in messages],
221
+ messages=[format_message(m, compress_tool_results) for m in messages],
221
222
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
222
223
  )
223
224
  assistant_message.metrics.stop_timer()
@@ -246,6 +247,7 @@ class AzureAIFoundry(Model):
246
247
  tools: Optional[List[Dict[str, Any]]] = None,
247
248
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
248
249
  run_response: Optional[RunOutput] = None,
250
+ compress_tool_results: bool = False,
249
251
  ) -> ModelResponse:
250
252
  """
251
253
  Sends an asynchronous chat completion request to the Azure AI API.
@@ -257,7 +259,7 @@ class AzureAIFoundry(Model):
257
259
 
258
260
  assistant_message.metrics.start_timer()
259
261
  provider_response = await self.get_async_client().complete(
260
- messages=[format_message(m) for m in messages],
262
+ messages=[format_message(m, compress_tool_results) for m in messages],
261
263
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
262
264
  )
263
265
  assistant_message.metrics.stop_timer()
@@ -286,6 +288,7 @@ class AzureAIFoundry(Model):
286
288
  tools: Optional[List[Dict[str, Any]]] = None,
287
289
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
288
290
  run_response: Optional[RunOutput] = None,
291
+ compress_tool_results: bool = False,
289
292
  ) -> Iterator[ModelResponse]:
290
293
  """
291
294
  Send a streaming chat completion request to the Azure AI API.
@@ -297,7 +300,7 @@ class AzureAIFoundry(Model):
297
300
  assistant_message.metrics.start_timer()
298
301
 
299
302
  for chunk in self.get_client().complete(
300
- messages=[format_message(m) for m in messages],
303
+ messages=[format_message(m, compress_tool_results) for m in messages],
301
304
  stream=True,
302
305
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
303
306
  ):
@@ -325,6 +328,7 @@ class AzureAIFoundry(Model):
325
328
  tools: Optional[List[Dict[str, Any]]] = None,
326
329
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
327
330
  run_response: Optional[RunOutput] = None,
331
+ compress_tool_results: bool = False,
328
332
  ) -> AsyncIterator[ModelResponse]:
329
333
  """
330
334
  Sends an asynchronous streaming chat completion request to the Azure AI API.
@@ -336,7 +340,7 @@ class AzureAIFoundry(Model):
336
340
  assistant_message.metrics.start_timer()
337
341
 
338
342
  async_stream = await self.get_async_client().complete(
339
- messages=[format_message(m) for m in messages],
343
+ messages=[format_message(m, compress_tool_results) for m in messages],
340
344
  stream=True,
341
345
  **self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
342
346
  )