agno 2.3.2__py3-none-any.whl → 2.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +513 -185
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +176 -0
- agno/db/dynamo/dynamo.py +11 -0
- agno/db/firestore/firestore.py +5 -1
- agno/db/gcs_json/gcs_json_db.py +5 -2
- agno/db/in_memory/in_memory_db.py +5 -2
- agno/db/json/json_db.py +5 -1
- agno/db/migrations/manager.py +4 -4
- agno/db/mongo/async_mongo.py +158 -34
- agno/db/mongo/mongo.py +6 -2
- agno/db/mysql/mysql.py +48 -54
- agno/db/postgres/async_postgres.py +61 -51
- agno/db/postgres/postgres.py +42 -50
- agno/db/redis/redis.py +5 -0
- agno/db/redis/utils.py +5 -5
- agno/db/singlestore/singlestore.py +99 -108
- agno/db/sqlite/async_sqlite.py +29 -27
- agno/db/sqlite/sqlite.py +30 -26
- agno/knowledge/reader/pdf_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +0 -1
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +217 -4
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +67 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/anthropic/claude.py +84 -80
- agno/models/aws/bedrock.py +38 -16
- agno/models/aws/claude.py +97 -277
- agno/models/azure/ai_foundry.py +8 -4
- agno/models/base.py +101 -14
- agno/models/cerebras/cerebras.py +18 -7
- agno/models/cerebras/cerebras_openai.py +4 -2
- agno/models/cohere/chat.py +8 -4
- agno/models/google/gemini.py +578 -20
- agno/models/groq/groq.py +18 -5
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/litellm/chat.py +17 -7
- agno/models/message.py +19 -5
- agno/models/meta/llama.py +20 -4
- agno/models/mistral/mistral.py +8 -4
- agno/models/ollama/chat.py +17 -6
- agno/models/openai/chat.py +17 -6
- agno/models/openai/responses.py +23 -9
- agno/models/vertexai/claude.py +99 -5
- agno/os/interfaces/agui/router.py +1 -0
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/router.py +16 -0
- agno/os/routers/memory/memory.py +143 -0
- agno/os/routers/memory/schemas.py +26 -0
- agno/os/schema.py +21 -6
- agno/os/utils.py +134 -10
- agno/run/base.py +2 -1
- agno/run/workflow.py +1 -1
- agno/team/team.py +565 -219
- agno/tools/mcp/mcp.py +1 -1
- agno/utils/agent.py +119 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +12 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +37 -2
- agno/utils/print_response/team.py +52 -0
- agno/utils/tokens.py +41 -0
- agno/workflow/types.py +2 -2
- {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
- {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/RECORD +73 -66
- {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
- {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.2.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0
agno/models/aws/claude.py
CHANGED
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
|
-
from agno.exceptions import ModelProviderError, ModelRateLimitError
|
|
9
8
|
from agno.models.anthropic import Claude as AnthropicClaude
|
|
10
|
-
from agno.models.message import Message
|
|
11
|
-
from agno.models.response import ModelResponse
|
|
12
|
-
from agno.run.agent import RunOutput
|
|
13
9
|
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
14
|
-
from agno.utils.log import log_debug,
|
|
15
|
-
from agno.utils.models.claude import
|
|
10
|
+
from agno.utils.log import log_debug, log_warning
|
|
11
|
+
from agno.utils.models.claude import format_tools_for_model
|
|
16
12
|
|
|
17
13
|
try:
|
|
18
|
-
from anthropic import AnthropicBedrock,
|
|
14
|
+
from anthropic import AnthropicBedrock, AsyncAnthropicBedrock
|
|
19
15
|
except ImportError:
|
|
20
16
|
raise ImportError("`anthropic[bedrock]` not installed. Please install using `pip install anthropic[bedrock]`")
|
|
21
17
|
|
|
@@ -33,73 +29,56 @@ class Claude(AnthropicClaude):
|
|
|
33
29
|
For more information, see: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic.html
|
|
34
30
|
"""
|
|
35
31
|
|
|
36
|
-
id: str = "anthropic.claude-
|
|
32
|
+
id: str = "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
|
37
33
|
name: str = "AwsBedrockAnthropicClaude"
|
|
38
34
|
provider: str = "AwsBedrock"
|
|
39
35
|
|
|
40
36
|
aws_access_key: Optional[str] = None
|
|
41
37
|
aws_secret_key: Optional[str] = None
|
|
42
38
|
aws_region: Optional[str] = None
|
|
39
|
+
api_key: Optional[str] = None
|
|
43
40
|
session: Optional[Session] = None
|
|
44
41
|
|
|
45
|
-
# -*- Request parameters
|
|
46
|
-
max_tokens: int = 4096
|
|
47
|
-
temperature: Optional[float] = None
|
|
48
|
-
top_p: Optional[float] = None
|
|
49
|
-
top_k: Optional[int] = None
|
|
50
|
-
stop_sequences: Optional[List[str]] = None
|
|
51
|
-
|
|
52
|
-
# -*- Request parameters
|
|
53
|
-
request_params: Optional[Dict[str, Any]] = None
|
|
54
|
-
# -*- Client parameters
|
|
55
|
-
client_params: Optional[Dict[str, Any]] = None
|
|
56
|
-
|
|
57
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
58
|
-
"""
|
|
59
|
-
Convert the model to a dictionary.
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
Dict[str, Any]: The dictionary representation of the model.
|
|
63
|
-
"""
|
|
64
|
-
_dict = super().to_dict()
|
|
65
|
-
_dict["max_tokens"] = self.max_tokens
|
|
66
|
-
_dict["temperature"] = self.temperature
|
|
67
|
-
_dict["top_p"] = self.top_p
|
|
68
|
-
_dict["top_k"] = self.top_k
|
|
69
|
-
_dict["stop_sequences"] = self.stop_sequences
|
|
70
|
-
return _dict
|
|
71
|
-
|
|
72
42
|
client: Optional[AnthropicBedrock] = None # type: ignore
|
|
73
43
|
async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
|
|
74
44
|
|
|
75
|
-
def
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
return self.client
|
|
45
|
+
def __post_init__(self):
|
|
46
|
+
"""Validate model configuration after initialization"""
|
|
47
|
+
# Validate thinking support immediately at model creation
|
|
48
|
+
if self.thinking:
|
|
49
|
+
self._validate_thinking_support()
|
|
50
|
+
# Overwrite output schema support for AWS Bedrock Claude
|
|
51
|
+
self.supports_native_structured_outputs = False
|
|
52
|
+
self.supports_json_schema_outputs = False
|
|
84
53
|
|
|
54
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
85
55
|
if self.session:
|
|
86
56
|
credentials = self.session.get_credentials()
|
|
87
|
-
client_params = {
|
|
57
|
+
client_params: Dict[str, Any] = {
|
|
88
58
|
"aws_access_key": credentials.access_key,
|
|
89
59
|
"aws_secret_key": credentials.secret_key,
|
|
90
60
|
"aws_session_token": credentials.token,
|
|
91
61
|
"aws_region": self.session.region_name,
|
|
92
62
|
}
|
|
93
63
|
else:
|
|
94
|
-
self.
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
64
|
+
self.api_key = self.api_key or getenv("AWS_BEDROCK_API_KEY")
|
|
65
|
+
if self.api_key:
|
|
66
|
+
self.aws_region = self.aws_region or getenv("AWS_REGION")
|
|
67
|
+
client_params = {
|
|
68
|
+
"api_key": self.api_key,
|
|
69
|
+
}
|
|
70
|
+
if self.aws_region:
|
|
71
|
+
client_params["aws_region"] = self.aws_region
|
|
72
|
+
else:
|
|
73
|
+
self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
|
|
74
|
+
self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
|
|
75
|
+
self.aws_region = self.aws_region or getenv("AWS_REGION")
|
|
76
|
+
|
|
77
|
+
client_params = {
|
|
78
|
+
"aws_secret_key": self.aws_secret_key,
|
|
79
|
+
"aws_access_key": self.aws_access_key,
|
|
80
|
+
"aws_region": self.aws_region,
|
|
81
|
+
}
|
|
103
82
|
|
|
104
83
|
if self.timeout is not None:
|
|
105
84
|
client_params["timeout"] = self.timeout
|
|
@@ -107,6 +86,20 @@ class Claude(AnthropicClaude):
|
|
|
107
86
|
if self.client_params:
|
|
108
87
|
client_params.update(self.client_params)
|
|
109
88
|
|
|
89
|
+
return client_params
|
|
90
|
+
|
|
91
|
+
def get_client(self):
|
|
92
|
+
"""
|
|
93
|
+
Get the Bedrock client.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
AnthropicBedrock: The Bedrock client.
|
|
97
|
+
"""
|
|
98
|
+
if self.client is not None and not self.client.is_closed():
|
|
99
|
+
return self.client
|
|
100
|
+
|
|
101
|
+
client_params = self._get_client_params()
|
|
102
|
+
|
|
110
103
|
if self.http_client:
|
|
111
104
|
if isinstance(self.http_client, httpx.Client):
|
|
112
105
|
client_params["http_client"] = self.http_client
|
|
@@ -133,26 +126,7 @@ class Claude(AnthropicClaude):
|
|
|
133
126
|
if self.async_client is not None:
|
|
134
127
|
return self.async_client
|
|
135
128
|
|
|
136
|
-
|
|
137
|
-
credentials = self.session.get_credentials()
|
|
138
|
-
client_params = {
|
|
139
|
-
"aws_access_key": credentials.access_key,
|
|
140
|
-
"aws_secret_key": credentials.secret_key,
|
|
141
|
-
"aws_session_token": credentials.token,
|
|
142
|
-
"aws_region": self.session.region_name,
|
|
143
|
-
}
|
|
144
|
-
else:
|
|
145
|
-
client_params = {
|
|
146
|
-
"aws_secret_key": self.aws_secret_key,
|
|
147
|
-
"aws_access_key": self.aws_access_key,
|
|
148
|
-
"aws_region": self.aws_region,
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
if self.timeout is not None:
|
|
152
|
-
client_params["timeout"] = self.timeout
|
|
153
|
-
|
|
154
|
-
if self.client_params:
|
|
155
|
-
client_params.update(self.client_params)
|
|
129
|
+
client_params = self._get_client_params()
|
|
156
130
|
|
|
157
131
|
if self.http_client:
|
|
158
132
|
if isinstance(self.http_client, httpx.AsyncClient):
|
|
@@ -172,16 +146,26 @@ class Claude(AnthropicClaude):
|
|
|
172
146
|
)
|
|
173
147
|
return self.async_client
|
|
174
148
|
|
|
175
|
-
def get_request_params(
|
|
149
|
+
def get_request_params(
|
|
150
|
+
self,
|
|
151
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
152
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
153
|
+
) -> Dict[str, Any]:
|
|
176
154
|
"""
|
|
177
155
|
Generate keyword arguments for API requests.
|
|
178
156
|
|
|
179
157
|
Returns:
|
|
180
158
|
Dict[str, Any]: The keyword arguments for API requests.
|
|
181
159
|
"""
|
|
160
|
+
# Validate thinking support if thinking is enabled
|
|
161
|
+
if self.thinking:
|
|
162
|
+
self._validate_thinking_support()
|
|
163
|
+
|
|
182
164
|
_request_params: Dict[str, Any] = {}
|
|
183
165
|
if self.max_tokens:
|
|
184
166
|
_request_params["max_tokens"] = self.max_tokens
|
|
167
|
+
if self.thinking:
|
|
168
|
+
_request_params["thinking"] = self.thinking
|
|
185
169
|
if self.temperature:
|
|
186
170
|
_request_params["temperature"] = self.temperature
|
|
187
171
|
if self.stop_sequences:
|
|
@@ -190,6 +174,16 @@ class Claude(AnthropicClaude):
|
|
|
190
174
|
_request_params["top_p"] = self.top_p
|
|
191
175
|
if self.top_k:
|
|
192
176
|
_request_params["top_k"] = self.top_k
|
|
177
|
+
if self.timeout:
|
|
178
|
+
_request_params["timeout"] = self.timeout
|
|
179
|
+
|
|
180
|
+
# Build betas list - include existing betas and add new one if needed
|
|
181
|
+
betas_list = list(self.betas) if self.betas else []
|
|
182
|
+
|
|
183
|
+
# Include betas if any are present
|
|
184
|
+
if betas_list:
|
|
185
|
+
_request_params["betas"] = betas_list
|
|
186
|
+
|
|
193
187
|
if self.request_params:
|
|
194
188
|
_request_params.update(self.request_params)
|
|
195
189
|
|
|
@@ -197,214 +191,40 @@ class Claude(AnthropicClaude):
|
|
|
197
191
|
log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
|
|
198
192
|
return _request_params
|
|
199
193
|
|
|
200
|
-
def
|
|
194
|
+
def _prepare_request_kwargs(
|
|
201
195
|
self,
|
|
202
|
-
|
|
203
|
-
assistant_message: Message,
|
|
204
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
196
|
+
system_message: str,
|
|
205
197
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
206
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
207
|
-
run_response: Optional[RunOutput] = None,
|
|
208
|
-
) -> ModelResponse:
|
|
209
|
-
"""
|
|
210
|
-
Send a request to the Anthropic API to generate a response.
|
|
211
|
-
"""
|
|
212
|
-
|
|
213
|
-
try:
|
|
214
|
-
chat_messages, system_message = format_messages(messages)
|
|
215
|
-
request_kwargs = self._prepare_request_kwargs(system_message, tools)
|
|
216
|
-
|
|
217
|
-
if run_response and run_response.metrics:
|
|
218
|
-
run_response.metrics.set_time_to_first_token()
|
|
219
|
-
|
|
220
|
-
assistant_message.metrics.start_timer()
|
|
221
|
-
response = self.get_client().messages.create(
|
|
222
|
-
model=self.id,
|
|
223
|
-
messages=chat_messages, # type: ignore
|
|
224
|
-
**request_kwargs,
|
|
225
|
-
)
|
|
226
|
-
assistant_message.metrics.stop_timer()
|
|
227
|
-
|
|
228
|
-
model_response = self._parse_provider_response(response, response_format=response_format)
|
|
229
|
-
|
|
230
|
-
return model_response
|
|
231
|
-
|
|
232
|
-
except APIConnectionError as e:
|
|
233
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
234
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
235
|
-
except RateLimitError as e:
|
|
236
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
237
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
238
|
-
except APIStatusError as e:
|
|
239
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
240
|
-
raise ModelProviderError(
|
|
241
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
242
|
-
) from e
|
|
243
|
-
except Exception as e:
|
|
244
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
245
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
246
|
-
|
|
247
|
-
def invoke_stream(
|
|
248
|
-
self,
|
|
249
|
-
messages: List[Message],
|
|
250
|
-
assistant_message: Message,
|
|
251
198
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
252
|
-
|
|
253
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
254
|
-
run_response: Optional[RunOutput] = None,
|
|
255
|
-
) -> Iterator[ModelResponse]:
|
|
199
|
+
) -> Dict[str, Any]:
|
|
256
200
|
"""
|
|
257
|
-
|
|
201
|
+
Prepare the request keyword arguments for the API call.
|
|
258
202
|
|
|
259
203
|
Args:
|
|
260
|
-
|
|
204
|
+
system_message (str): The concatenated system messages.
|
|
205
|
+
tools: Optional list of tools
|
|
206
|
+
response_format: Optional response format (Pydantic model or dict)
|
|
261
207
|
|
|
262
208
|
Returns:
|
|
263
|
-
Any: The
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
assistant_message.metrics.start_timer()
|
|
279
|
-
|
|
280
|
-
with self.get_client().messages.stream(
|
|
281
|
-
model=self.id,
|
|
282
|
-
messages=chat_messages, # type: ignore
|
|
283
|
-
**request_kwargs,
|
|
284
|
-
) as stream:
|
|
285
|
-
for chunk in stream:
|
|
286
|
-
yield self._parse_provider_response_delta(chunk)
|
|
287
|
-
|
|
288
|
-
assistant_message.metrics.stop_timer()
|
|
289
|
-
|
|
290
|
-
except APIConnectionError as e:
|
|
291
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
292
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
293
|
-
except RateLimitError as e:
|
|
294
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
295
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
296
|
-
except APIStatusError as e:
|
|
297
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
298
|
-
raise ModelProviderError(
|
|
299
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
300
|
-
) from e
|
|
301
|
-
except Exception as e:
|
|
302
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
303
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
304
|
-
|
|
305
|
-
async def ainvoke(
|
|
306
|
-
self,
|
|
307
|
-
messages: List[Message],
|
|
308
|
-
assistant_message: Message,
|
|
309
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
310
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
311
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
312
|
-
run_response: Optional[RunOutput] = None,
|
|
313
|
-
) -> ModelResponse:
|
|
314
|
-
"""
|
|
315
|
-
Send an asynchronous request to the Anthropic API to generate a response.
|
|
316
|
-
"""
|
|
317
|
-
|
|
318
|
-
try:
|
|
319
|
-
chat_messages, system_message = format_messages(messages)
|
|
320
|
-
request_kwargs = self._prepare_request_kwargs(system_message, tools)
|
|
321
|
-
|
|
322
|
-
if run_response and run_response.metrics:
|
|
323
|
-
run_response.metrics.set_time_to_first_token()
|
|
324
|
-
|
|
325
|
-
assistant_message.metrics.start_timer()
|
|
326
|
-
|
|
327
|
-
response = await self.get_async_client().messages.create(
|
|
328
|
-
model=self.id,
|
|
329
|
-
messages=chat_messages, # type: ignore
|
|
330
|
-
**request_kwargs,
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
assistant_message.metrics.stop_timer()
|
|
334
|
-
|
|
335
|
-
model_response = self._parse_provider_response(response, response_format=response_format)
|
|
336
|
-
|
|
337
|
-
return model_response
|
|
338
|
-
|
|
339
|
-
except APIConnectionError as e:
|
|
340
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
341
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
342
|
-
except RateLimitError as e:
|
|
343
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
344
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
345
|
-
except APIStatusError as e:
|
|
346
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
347
|
-
raise ModelProviderError(
|
|
348
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
349
|
-
) from e
|
|
350
|
-
except Exception as e:
|
|
351
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
352
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
353
|
-
|
|
354
|
-
async def ainvoke_stream(
|
|
355
|
-
self,
|
|
356
|
-
messages: List[Message],
|
|
357
|
-
assistant_message: Message,
|
|
358
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
359
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
360
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
361
|
-
run_response: Optional[RunOutput] = None,
|
|
362
|
-
) -> AsyncIterator[ModelResponse]:
|
|
363
|
-
"""
|
|
364
|
-
Stream an asynchronous response from the Anthropic API.
|
|
365
|
-
|
|
366
|
-
Args:
|
|
367
|
-
messages (List[Message]): A list of messages to send to the model.
|
|
368
|
-
|
|
369
|
-
Returns:
|
|
370
|
-
Any: The streamed response from the model.
|
|
209
|
+
Dict[str, Any]: The request keyword arguments.
|
|
210
|
+
"""
|
|
211
|
+
# Pass response_format and tools to get_request_params for beta header handling
|
|
212
|
+
request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
|
|
213
|
+
if system_message:
|
|
214
|
+
if self.cache_system_prompt:
|
|
215
|
+
cache_control = (
|
|
216
|
+
{"type": "ephemeral", "ttl": "1h"}
|
|
217
|
+
if self.extended_cache_time is not None and self.extended_cache_time is True
|
|
218
|
+
else {"type": "ephemeral"}
|
|
219
|
+
)
|
|
220
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
|
|
221
|
+
else:
|
|
222
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text"}]
|
|
371
223
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
APIStatusError: For other API-related errors
|
|
376
|
-
"""
|
|
224
|
+
# Format tools (this will handle strict mode)
|
|
225
|
+
if tools:
|
|
226
|
+
request_kwargs["tools"] = format_tools_for_model(tools)
|
|
377
227
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
if run_response and run_response.metrics:
|
|
383
|
-
run_response.metrics.set_time_to_first_token()
|
|
384
|
-
|
|
385
|
-
assistant_message.metrics.start_timer()
|
|
386
|
-
|
|
387
|
-
async with self.get_async_client().messages.stream(
|
|
388
|
-
model=self.id,
|
|
389
|
-
messages=chat_messages, # type: ignore
|
|
390
|
-
**request_kwargs,
|
|
391
|
-
) as stream:
|
|
392
|
-
async for chunk in stream:
|
|
393
|
-
yield self._parse_provider_response_delta(chunk)
|
|
394
|
-
|
|
395
|
-
assistant_message.metrics.stop_timer()
|
|
396
|
-
|
|
397
|
-
except APIConnectionError as e:
|
|
398
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
399
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
400
|
-
except RateLimitError as e:
|
|
401
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
402
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
403
|
-
except APIStatusError as e:
|
|
404
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
405
|
-
raise ModelProviderError(
|
|
406
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
407
|
-
) from e
|
|
408
|
-
except Exception as e:
|
|
409
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
410
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
228
|
+
if request_kwargs:
|
|
229
|
+
log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
|
|
230
|
+
return request_kwargs
|
agno/models/azure/ai_foundry.py
CHANGED
|
@@ -207,6 +207,7 @@ class AzureAIFoundry(Model):
|
|
|
207
207
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
208
208
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
209
209
|
run_response: Optional[RunOutput] = None,
|
|
210
|
+
compress_tool_results: bool = False,
|
|
210
211
|
) -> ModelResponse:
|
|
211
212
|
"""
|
|
212
213
|
Send a chat completion request to the Azure AI API.
|
|
@@ -217,7 +218,7 @@ class AzureAIFoundry(Model):
|
|
|
217
218
|
|
|
218
219
|
assistant_message.metrics.start_timer()
|
|
219
220
|
provider_response = self.get_client().complete(
|
|
220
|
-
messages=[format_message(m) for m in messages],
|
|
221
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
221
222
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
222
223
|
)
|
|
223
224
|
assistant_message.metrics.stop_timer()
|
|
@@ -246,6 +247,7 @@ class AzureAIFoundry(Model):
|
|
|
246
247
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
247
248
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
248
249
|
run_response: Optional[RunOutput] = None,
|
|
250
|
+
compress_tool_results: bool = False,
|
|
249
251
|
) -> ModelResponse:
|
|
250
252
|
"""
|
|
251
253
|
Sends an asynchronous chat completion request to the Azure AI API.
|
|
@@ -257,7 +259,7 @@ class AzureAIFoundry(Model):
|
|
|
257
259
|
|
|
258
260
|
assistant_message.metrics.start_timer()
|
|
259
261
|
provider_response = await self.get_async_client().complete(
|
|
260
|
-
messages=[format_message(m) for m in messages],
|
|
262
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
261
263
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
262
264
|
)
|
|
263
265
|
assistant_message.metrics.stop_timer()
|
|
@@ -286,6 +288,7 @@ class AzureAIFoundry(Model):
|
|
|
286
288
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
287
289
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
288
290
|
run_response: Optional[RunOutput] = None,
|
|
291
|
+
compress_tool_results: bool = False,
|
|
289
292
|
) -> Iterator[ModelResponse]:
|
|
290
293
|
"""
|
|
291
294
|
Send a streaming chat completion request to the Azure AI API.
|
|
@@ -297,7 +300,7 @@ class AzureAIFoundry(Model):
|
|
|
297
300
|
assistant_message.metrics.start_timer()
|
|
298
301
|
|
|
299
302
|
for chunk in self.get_client().complete(
|
|
300
|
-
messages=[format_message(m) for m in messages],
|
|
303
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
301
304
|
stream=True,
|
|
302
305
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
303
306
|
):
|
|
@@ -325,6 +328,7 @@ class AzureAIFoundry(Model):
|
|
|
325
328
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
326
329
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
327
330
|
run_response: Optional[RunOutput] = None,
|
|
331
|
+
compress_tool_results: bool = False,
|
|
328
332
|
) -> AsyncIterator[ModelResponse]:
|
|
329
333
|
"""
|
|
330
334
|
Sends an asynchronous streaming chat completion request to the Azure AI API.
|
|
@@ -336,7 +340,7 @@ class AzureAIFoundry(Model):
|
|
|
336
340
|
assistant_message.metrics.start_timer()
|
|
337
341
|
|
|
338
342
|
async_stream = await self.get_async_client().complete(
|
|
339
|
-
messages=[format_message(m) for m in messages],
|
|
343
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
340
344
|
stream=True,
|
|
341
345
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
342
346
|
)
|