agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/aws/claude.py
CHANGED
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
|
+
import httpx
|
|
5
6
|
from pydantic import BaseModel
|
|
6
7
|
|
|
7
|
-
from agno.exceptions import ModelProviderError, ModelRateLimitError
|
|
8
8
|
from agno.models.anthropic import Claude as AnthropicClaude
|
|
9
|
-
from agno.
|
|
10
|
-
from agno.
|
|
11
|
-
from agno.
|
|
12
|
-
from agno.utils.log import log_debug, log_error, log_warning
|
|
13
|
-
from agno.utils.models.claude import format_messages
|
|
9
|
+
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
10
|
+
from agno.utils.log import log_debug, log_warning
|
|
11
|
+
from agno.utils.models.claude import format_tools_for_model
|
|
14
12
|
|
|
15
13
|
try:
|
|
16
|
-
from anthropic import AnthropicBedrock,
|
|
14
|
+
from anthropic import AnthropicBedrock, AsyncAnthropicBedrock
|
|
17
15
|
except ImportError:
|
|
18
16
|
raise ImportError("`anthropic[bedrock]` not installed. Please install using `pip install anthropic[bedrock]`")
|
|
19
17
|
|
|
@@ -31,44 +29,69 @@ class Claude(AnthropicClaude):
|
|
|
31
29
|
For more information, see: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic.html
|
|
32
30
|
"""
|
|
33
31
|
|
|
34
|
-
id: str = "anthropic.claude-
|
|
32
|
+
id: str = "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
|
35
33
|
name: str = "AwsBedrockAnthropicClaude"
|
|
36
34
|
provider: str = "AwsBedrock"
|
|
37
35
|
|
|
38
36
|
aws_access_key: Optional[str] = None
|
|
39
37
|
aws_secret_key: Optional[str] = None
|
|
40
38
|
aws_region: Optional[str] = None
|
|
39
|
+
api_key: Optional[str] = None
|
|
41
40
|
session: Optional[Session] = None
|
|
42
41
|
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
temperature: Optional[float] = None
|
|
46
|
-
top_p: Optional[float] = None
|
|
47
|
-
top_k: Optional[int] = None
|
|
48
|
-
stop_sequences: Optional[List[str]] = None
|
|
42
|
+
client: Optional[AnthropicBedrock] = None # type: ignore
|
|
43
|
+
async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
|
|
49
44
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
45
|
+
def __post_init__(self):
|
|
46
|
+
"""Validate model configuration after initialization"""
|
|
47
|
+
# Validate thinking support immediately at model creation
|
|
48
|
+
if self.thinking:
|
|
49
|
+
self._validate_thinking_support()
|
|
50
|
+
# Overwrite output schema support for AWS Bedrock Claude
|
|
51
|
+
self.supports_native_structured_outputs = False
|
|
52
|
+
self.supports_json_schema_outputs = False
|
|
54
53
|
|
|
55
|
-
def
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
55
|
+
if self.session:
|
|
56
|
+
credentials = self.session.get_credentials()
|
|
57
|
+
client_params: Dict[str, Any] = {
|
|
58
|
+
"aws_access_key": credentials.access_key,
|
|
59
|
+
"aws_secret_key": credentials.secret_key,
|
|
60
|
+
"aws_session_token": credentials.token,
|
|
61
|
+
"aws_region": self.session.region_name,
|
|
62
|
+
}
|
|
63
|
+
else:
|
|
64
|
+
self.api_key = self.api_key or getenv("AWS_BEDROCK_API_KEY")
|
|
65
|
+
if self.api_key:
|
|
66
|
+
self.aws_region = self.aws_region or getenv("AWS_REGION")
|
|
67
|
+
client_params = {
|
|
68
|
+
"api_key": self.api_key,
|
|
69
|
+
}
|
|
70
|
+
if self.aws_region:
|
|
71
|
+
client_params["aws_region"] = self.aws_region
|
|
72
|
+
else:
|
|
73
|
+
self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY_ID") or getenv("AWS_ACCESS_KEY")
|
|
74
|
+
self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_ACCESS_KEY") or getenv("AWS_SECRET_KEY")
|
|
75
|
+
self.aws_region = self.aws_region or getenv("AWS_REGION")
|
|
76
|
+
|
|
77
|
+
client_params = {
|
|
78
|
+
"aws_secret_key": self.aws_secret_key,
|
|
79
|
+
"aws_access_key": self.aws_access_key,
|
|
80
|
+
"aws_region": self.aws_region,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if not (self.api_key or (self.aws_access_key and self.aws_secret_key)):
|
|
84
|
+
log_warning(
|
|
85
|
+
"AWS credentials not found. Please set AWS_BEDROCK_API_KEY or AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide a boto3 session."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if self.timeout is not None:
|
|
89
|
+
client_params["timeout"] = self.timeout
|
|
58
90
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
"""
|
|
62
|
-
_dict = super().to_dict()
|
|
63
|
-
_dict["max_tokens"] = self.max_tokens
|
|
64
|
-
_dict["temperature"] = self.temperature
|
|
65
|
-
_dict["top_p"] = self.top_p
|
|
66
|
-
_dict["top_k"] = self.top_k
|
|
67
|
-
_dict["stop_sequences"] = self.stop_sequences
|
|
68
|
-
return _dict
|
|
91
|
+
if self.client_params:
|
|
92
|
+
client_params.update(self.client_params)
|
|
69
93
|
|
|
70
|
-
|
|
71
|
-
async_client: Optional[AsyncAnthropicBedrock] = None # type: ignore
|
|
94
|
+
return client_params
|
|
72
95
|
|
|
73
96
|
def get_client(self):
|
|
74
97
|
"""
|
|
@@ -80,27 +103,18 @@ class Claude(AnthropicClaude):
|
|
|
80
103
|
if self.client is not None and not self.client.is_closed():
|
|
81
104
|
return self.client
|
|
82
105
|
|
|
83
|
-
|
|
84
|
-
credentials = self.session.get_credentials()
|
|
85
|
-
client_params = {
|
|
86
|
-
"aws_access_key": credentials.access_key,
|
|
87
|
-
"aws_secret_key": credentials.secret_key,
|
|
88
|
-
"aws_session_token": credentials.token,
|
|
89
|
-
"aws_region": self.session.region_name,
|
|
90
|
-
}
|
|
91
|
-
else:
|
|
92
|
-
self.aws_access_key = self.aws_access_key or getenv("AWS_ACCESS_KEY")
|
|
93
|
-
self.aws_secret_key = self.aws_secret_key or getenv("AWS_SECRET_KEY")
|
|
94
|
-
self.aws_region = self.aws_region or getenv("AWS_REGION")
|
|
95
|
-
|
|
96
|
-
client_params = {
|
|
97
|
-
"aws_secret_key": self.aws_secret_key,
|
|
98
|
-
"aws_access_key": self.aws_access_key,
|
|
99
|
-
"aws_region": self.aws_region,
|
|
100
|
-
}
|
|
106
|
+
client_params = self._get_client_params()
|
|
101
107
|
|
|
102
|
-
if self.
|
|
103
|
-
|
|
108
|
+
if self.http_client:
|
|
109
|
+
if isinstance(self.http_client, httpx.Client):
|
|
110
|
+
client_params["http_client"] = self.http_client
|
|
111
|
+
else:
|
|
112
|
+
log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
|
|
113
|
+
# Use global sync client when user http_client is invalid
|
|
114
|
+
client_params["http_client"] = get_default_sync_client()
|
|
115
|
+
else:
|
|
116
|
+
# Use global sync client when no custom http_client is provided
|
|
117
|
+
client_params["http_client"] = get_default_sync_client()
|
|
104
118
|
|
|
105
119
|
self.client = AnthropicBedrock(
|
|
106
120
|
**client_params, # type: ignore
|
|
@@ -117,39 +131,46 @@ class Claude(AnthropicClaude):
|
|
|
117
131
|
if self.async_client is not None:
|
|
118
132
|
return self.async_client
|
|
119
133
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
"
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
134
|
+
client_params = self._get_client_params()
|
|
135
|
+
|
|
136
|
+
if self.http_client:
|
|
137
|
+
if isinstance(self.http_client, httpx.AsyncClient):
|
|
138
|
+
client_params["http_client"] = self.http_client
|
|
139
|
+
else:
|
|
140
|
+
log_warning(
|
|
141
|
+
"http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
|
|
142
|
+
)
|
|
143
|
+
# Use global async client when user http_client is invalid
|
|
144
|
+
client_params["http_client"] = get_default_async_client()
|
|
128
145
|
else:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"aws_access_key": self.aws_access_key,
|
|
132
|
-
"aws_region": self.aws_region,
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
if self.client_params:
|
|
136
|
-
client_params.update(self.client_params)
|
|
146
|
+
# Use global async client when no custom http_client is provided
|
|
147
|
+
client_params["http_client"] = get_default_async_client()
|
|
137
148
|
|
|
138
149
|
self.async_client = AsyncAnthropicBedrock(
|
|
139
150
|
**client_params, # type: ignore
|
|
140
151
|
)
|
|
141
152
|
return self.async_client
|
|
142
153
|
|
|
143
|
-
def get_request_params(
|
|
154
|
+
def get_request_params(
|
|
155
|
+
self,
|
|
156
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
157
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
158
|
+
) -> Dict[str, Any]:
|
|
144
159
|
"""
|
|
145
160
|
Generate keyword arguments for API requests.
|
|
146
161
|
|
|
147
162
|
Returns:
|
|
148
163
|
Dict[str, Any]: The keyword arguments for API requests.
|
|
149
164
|
"""
|
|
165
|
+
# Validate thinking support if thinking is enabled
|
|
166
|
+
if self.thinking:
|
|
167
|
+
self._validate_thinking_support()
|
|
168
|
+
|
|
150
169
|
_request_params: Dict[str, Any] = {}
|
|
151
170
|
if self.max_tokens:
|
|
152
171
|
_request_params["max_tokens"] = self.max_tokens
|
|
172
|
+
if self.thinking:
|
|
173
|
+
_request_params["thinking"] = self.thinking
|
|
153
174
|
if self.temperature:
|
|
154
175
|
_request_params["temperature"] = self.temperature
|
|
155
176
|
if self.stop_sequences:
|
|
@@ -158,6 +179,16 @@ class Claude(AnthropicClaude):
|
|
|
158
179
|
_request_params["top_p"] = self.top_p
|
|
159
180
|
if self.top_k:
|
|
160
181
|
_request_params["top_k"] = self.top_k
|
|
182
|
+
if self.timeout:
|
|
183
|
+
_request_params["timeout"] = self.timeout
|
|
184
|
+
|
|
185
|
+
# Build betas list - include existing betas and add new one if needed
|
|
186
|
+
betas_list = list(self.betas) if self.betas else []
|
|
187
|
+
|
|
188
|
+
# Include betas if any are present
|
|
189
|
+
if betas_list:
|
|
190
|
+
_request_params["betas"] = betas_list
|
|
191
|
+
|
|
161
192
|
if self.request_params:
|
|
162
193
|
_request_params.update(self.request_params)
|
|
163
194
|
|
|
@@ -165,214 +196,40 @@ class Claude(AnthropicClaude):
|
|
|
165
196
|
log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
|
|
166
197
|
return _request_params
|
|
167
198
|
|
|
168
|
-
def
|
|
169
|
-
self,
|
|
170
|
-
messages: List[Message],
|
|
171
|
-
assistant_message: Message,
|
|
172
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
173
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
174
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
175
|
-
run_response: Optional[RunOutput] = None,
|
|
176
|
-
) -> ModelResponse:
|
|
177
|
-
"""
|
|
178
|
-
Send a request to the Anthropic API to generate a response.
|
|
179
|
-
"""
|
|
180
|
-
|
|
181
|
-
try:
|
|
182
|
-
chat_messages, system_message = format_messages(messages)
|
|
183
|
-
request_kwargs = self._prepare_request_kwargs(system_message, tools)
|
|
184
|
-
|
|
185
|
-
if run_response and run_response.metrics:
|
|
186
|
-
run_response.metrics.set_time_to_first_token()
|
|
187
|
-
|
|
188
|
-
assistant_message.metrics.start_timer()
|
|
189
|
-
response = self.get_client().messages.create(
|
|
190
|
-
model=self.id,
|
|
191
|
-
messages=chat_messages, # type: ignore
|
|
192
|
-
**request_kwargs,
|
|
193
|
-
)
|
|
194
|
-
assistant_message.metrics.stop_timer()
|
|
195
|
-
|
|
196
|
-
model_response = self._parse_provider_response(response, response_format=response_format)
|
|
197
|
-
|
|
198
|
-
return model_response
|
|
199
|
-
|
|
200
|
-
except APIConnectionError as e:
|
|
201
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
202
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
203
|
-
except RateLimitError as e:
|
|
204
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
205
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
206
|
-
except APIStatusError as e:
|
|
207
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
208
|
-
raise ModelProviderError(
|
|
209
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
210
|
-
) from e
|
|
211
|
-
except Exception as e:
|
|
212
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
213
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
214
|
-
|
|
215
|
-
def invoke_stream(
|
|
216
|
-
self,
|
|
217
|
-
messages: List[Message],
|
|
218
|
-
assistant_message: Message,
|
|
219
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
220
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
221
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
222
|
-
run_response: Optional[RunOutput] = None,
|
|
223
|
-
) -> Iterator[ModelResponse]:
|
|
224
|
-
"""
|
|
225
|
-
Stream a response from the Anthropic API.
|
|
226
|
-
|
|
227
|
-
Args:
|
|
228
|
-
messages (List[Message]): A list of messages to send to the model.
|
|
229
|
-
|
|
230
|
-
Returns:
|
|
231
|
-
Any: The streamed response from the model.
|
|
232
|
-
|
|
233
|
-
Raises:
|
|
234
|
-
APIConnectionError: If there are network connectivity issues
|
|
235
|
-
RateLimitError: If the API rate limit is exceeded
|
|
236
|
-
APIStatusError: For other API-related errors
|
|
237
|
-
"""
|
|
238
|
-
|
|
239
|
-
chat_messages, system_message = format_messages(messages)
|
|
240
|
-
request_kwargs = self._prepare_request_kwargs(system_message, tools)
|
|
241
|
-
|
|
242
|
-
try:
|
|
243
|
-
if run_response and run_response.metrics:
|
|
244
|
-
run_response.metrics.set_time_to_first_token()
|
|
245
|
-
|
|
246
|
-
assistant_message.metrics.start_timer()
|
|
247
|
-
|
|
248
|
-
with self.get_client().messages.stream(
|
|
249
|
-
model=self.id,
|
|
250
|
-
messages=chat_messages, # type: ignore
|
|
251
|
-
**request_kwargs,
|
|
252
|
-
) as stream:
|
|
253
|
-
for chunk in stream:
|
|
254
|
-
yield self._parse_provider_response_delta(chunk)
|
|
255
|
-
|
|
256
|
-
assistant_message.metrics.stop_timer()
|
|
257
|
-
|
|
258
|
-
except APIConnectionError as e:
|
|
259
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
260
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
261
|
-
except RateLimitError as e:
|
|
262
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
263
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
264
|
-
except APIStatusError as e:
|
|
265
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
266
|
-
raise ModelProviderError(
|
|
267
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
268
|
-
) from e
|
|
269
|
-
except Exception as e:
|
|
270
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
271
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
272
|
-
|
|
273
|
-
async def ainvoke(
|
|
199
|
+
def _prepare_request_kwargs(
|
|
274
200
|
self,
|
|
275
|
-
|
|
276
|
-
assistant_message: Message,
|
|
277
|
-
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
201
|
+
system_message: str,
|
|
278
202
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
279
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
280
|
-
run_response: Optional[RunOutput] = None,
|
|
281
|
-
) -> ModelResponse:
|
|
282
|
-
"""
|
|
283
|
-
Send an asynchronous request to the Anthropic API to generate a response.
|
|
284
|
-
"""
|
|
285
|
-
|
|
286
|
-
try:
|
|
287
|
-
chat_messages, system_message = format_messages(messages)
|
|
288
|
-
request_kwargs = self._prepare_request_kwargs(system_message, tools)
|
|
289
|
-
|
|
290
|
-
if run_response and run_response.metrics:
|
|
291
|
-
run_response.metrics.set_time_to_first_token()
|
|
292
|
-
|
|
293
|
-
assistant_message.metrics.start_timer()
|
|
294
|
-
|
|
295
|
-
response = await self.get_async_client().messages.create(
|
|
296
|
-
model=self.id,
|
|
297
|
-
messages=chat_messages, # type: ignore
|
|
298
|
-
**request_kwargs,
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
assistant_message.metrics.stop_timer()
|
|
302
|
-
|
|
303
|
-
model_response = self._parse_provider_response(response, response_format=response_format)
|
|
304
|
-
|
|
305
|
-
return model_response
|
|
306
|
-
|
|
307
|
-
except APIConnectionError as e:
|
|
308
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
309
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
310
|
-
except RateLimitError as e:
|
|
311
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
312
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
313
|
-
except APIStatusError as e:
|
|
314
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
315
|
-
raise ModelProviderError(
|
|
316
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
317
|
-
) from e
|
|
318
|
-
except Exception as e:
|
|
319
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
320
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
321
|
-
|
|
322
|
-
async def ainvoke_stream(
|
|
323
|
-
self,
|
|
324
|
-
messages: List[Message],
|
|
325
|
-
assistant_message: Message,
|
|
326
203
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
327
|
-
|
|
328
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
329
|
-
run_response: Optional[RunOutput] = None,
|
|
330
|
-
) -> AsyncIterator[ModelResponse]:
|
|
204
|
+
) -> Dict[str, Any]:
|
|
331
205
|
"""
|
|
332
|
-
|
|
206
|
+
Prepare the request keyword arguments for the API call.
|
|
333
207
|
|
|
334
208
|
Args:
|
|
335
|
-
|
|
209
|
+
system_message (str): The concatenated system messages.
|
|
210
|
+
tools: Optional list of tools
|
|
211
|
+
response_format: Optional response format (Pydantic model or dict)
|
|
336
212
|
|
|
337
213
|
Returns:
|
|
338
|
-
Any: The
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
async for chunk in stream:
|
|
361
|
-
yield self._parse_provider_response_delta(chunk)
|
|
362
|
-
|
|
363
|
-
assistant_message.metrics.stop_timer()
|
|
364
|
-
|
|
365
|
-
except APIConnectionError as e:
|
|
366
|
-
log_error(f"Connection error while calling Claude API: {str(e)}")
|
|
367
|
-
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
368
|
-
except RateLimitError as e:
|
|
369
|
-
log_warning(f"Rate limit exceeded: {str(e)}")
|
|
370
|
-
raise ModelRateLimitError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
371
|
-
except APIStatusError as e:
|
|
372
|
-
log_error(f"Claude API error (status {e.status_code}): {str(e)}")
|
|
373
|
-
raise ModelProviderError(
|
|
374
|
-
message=e.message, status_code=e.status_code, model_name=self.name, model_id=self.id
|
|
375
|
-
) from e
|
|
376
|
-
except Exception as e:
|
|
377
|
-
log_error(f"Unexpected error calling Claude API: {str(e)}")
|
|
378
|
-
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
214
|
+
Dict[str, Any]: The request keyword arguments.
|
|
215
|
+
"""
|
|
216
|
+
# Pass response_format and tools to get_request_params for beta header handling
|
|
217
|
+
request_kwargs = self.get_request_params(response_format=response_format, tools=tools).copy()
|
|
218
|
+
if system_message:
|
|
219
|
+
if self.cache_system_prompt:
|
|
220
|
+
cache_control = (
|
|
221
|
+
{"type": "ephemeral", "ttl": "1h"}
|
|
222
|
+
if self.extended_cache_time is not None and self.extended_cache_time is True
|
|
223
|
+
else {"type": "ephemeral"}
|
|
224
|
+
)
|
|
225
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text", "cache_control": cache_control}]
|
|
226
|
+
else:
|
|
227
|
+
request_kwargs["system"] = [{"text": system_message, "type": "text"}]
|
|
228
|
+
|
|
229
|
+
# Format tools (this will handle strict mode)
|
|
230
|
+
if tools:
|
|
231
|
+
request_kwargs["tools"] = format_tools_for_model(tools)
|
|
232
|
+
|
|
233
|
+
if request_kwargs:
|
|
234
|
+
log_debug(f"Calling {self.provider} with request parameters: {request_kwargs}", log_level=2)
|
|
235
|
+
return request_kwargs
|
agno/models/azure/ai_foundry.py
CHANGED
|
@@ -60,6 +60,7 @@ class AzureAIFoundry(Model):
|
|
|
60
60
|
stop: Optional[Union[str, List[str]]] = None
|
|
61
61
|
seed: Optional[int] = None
|
|
62
62
|
model_extras: Optional[Dict[str, Any]] = None
|
|
63
|
+
strict_output: bool = True # When True, guarantees schema adherence for structured outputs. When False, attempts to follow schema as a guide but may occasionally deviate
|
|
63
64
|
request_params: Optional[Dict[str, Any]] = None
|
|
64
65
|
# Client parameters
|
|
65
66
|
api_key: Optional[str] = None
|
|
@@ -116,7 +117,7 @@ class AzureAIFoundry(Model):
|
|
|
116
117
|
name=response_format.__name__,
|
|
117
118
|
schema=response_format.model_json_schema(), # type: ignore
|
|
118
119
|
description=response_format.__doc__,
|
|
119
|
-
strict=
|
|
120
|
+
strict=self.strict_output,
|
|
120
121
|
),
|
|
121
122
|
)
|
|
122
123
|
|
|
@@ -135,9 +136,9 @@ class AzureAIFoundry(Model):
|
|
|
135
136
|
self.azure_endpoint = self.azure_endpoint or getenv("AZURE_ENDPOINT")
|
|
136
137
|
|
|
137
138
|
if not self.api_key:
|
|
138
|
-
|
|
139
|
+
log_error("AZURE_API_KEY not set. Please set the AZURE_API_KEY environment variable.")
|
|
139
140
|
if not self.azure_endpoint:
|
|
140
|
-
|
|
141
|
+
log_error("AZURE_ENDPOINT not set. Please set the AZURE_ENDPOINT environment variable.")
|
|
141
142
|
|
|
142
143
|
base_params = {
|
|
143
144
|
"endpoint": self.azure_endpoint,
|
|
@@ -160,7 +161,9 @@ class AzureAIFoundry(Model):
|
|
|
160
161
|
Returns:
|
|
161
162
|
ChatCompletionsClient: An instance of the Azure AI client.
|
|
162
163
|
"""
|
|
163
|
-
if
|
|
164
|
+
# Check if client exists and is not closed
|
|
165
|
+
# Azure's client doesn't have is_closed(), so we check if _client exists
|
|
166
|
+
if self.client and hasattr(self.client, "_client"):
|
|
164
167
|
return self.client
|
|
165
168
|
|
|
166
169
|
client_params = self._get_client_params()
|
|
@@ -174,11 +177,28 @@ class AzureAIFoundry(Model):
|
|
|
174
177
|
Returns:
|
|
175
178
|
AsyncChatCompletionsClient: An instance of the asynchronous Azure AI client.
|
|
176
179
|
"""
|
|
180
|
+
# Check if client exists and is not closed
|
|
181
|
+
# Azure's async client doesn't have is_closed(), so we check if _client exists
|
|
182
|
+
if self.async_client and hasattr(self.async_client, "_client"):
|
|
183
|
+
return self.async_client
|
|
184
|
+
|
|
177
185
|
client_params = self._get_client_params()
|
|
178
186
|
|
|
179
187
|
self.async_client = AsyncChatCompletionsClient(**client_params)
|
|
180
188
|
return self.async_client
|
|
181
189
|
|
|
190
|
+
def close(self) -> None:
|
|
191
|
+
"""Close the synchronous client and clean up resources."""
|
|
192
|
+
if self.client:
|
|
193
|
+
self.client.close()
|
|
194
|
+
self.client = None
|
|
195
|
+
|
|
196
|
+
async def aclose(self) -> None:
|
|
197
|
+
"""Close the asynchronous client and clean up resources."""
|
|
198
|
+
if self.async_client:
|
|
199
|
+
await self.async_client.close()
|
|
200
|
+
self.async_client = None
|
|
201
|
+
|
|
182
202
|
def invoke(
|
|
183
203
|
self,
|
|
184
204
|
messages: List[Message],
|
|
@@ -187,6 +207,7 @@ class AzureAIFoundry(Model):
|
|
|
187
207
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
188
208
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
189
209
|
run_response: Optional[RunOutput] = None,
|
|
210
|
+
compress_tool_results: bool = False,
|
|
190
211
|
) -> ModelResponse:
|
|
191
212
|
"""
|
|
192
213
|
Send a chat completion request to the Azure AI API.
|
|
@@ -197,7 +218,7 @@ class AzureAIFoundry(Model):
|
|
|
197
218
|
|
|
198
219
|
assistant_message.metrics.start_timer()
|
|
199
220
|
provider_response = self.get_client().complete(
|
|
200
|
-
messages=[format_message(m) for m in messages],
|
|
221
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
201
222
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
202
223
|
)
|
|
203
224
|
assistant_message.metrics.stop_timer()
|
|
@@ -226,6 +247,7 @@ class AzureAIFoundry(Model):
|
|
|
226
247
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
227
248
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
228
249
|
run_response: Optional[RunOutput] = None,
|
|
250
|
+
compress_tool_results: bool = False,
|
|
229
251
|
) -> ModelResponse:
|
|
230
252
|
"""
|
|
231
253
|
Sends an asynchronous chat completion request to the Azure AI API.
|
|
@@ -236,11 +258,10 @@ class AzureAIFoundry(Model):
|
|
|
236
258
|
run_response.metrics.set_time_to_first_token()
|
|
237
259
|
|
|
238
260
|
assistant_message.metrics.start_timer()
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
)
|
|
261
|
+
provider_response = await self.get_async_client().complete(
|
|
262
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
263
|
+
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
264
|
+
)
|
|
244
265
|
assistant_message.metrics.stop_timer()
|
|
245
266
|
|
|
246
267
|
model_response = self._parse_provider_response(provider_response, response_format=response_format) # type: ignore
|
|
@@ -267,6 +288,7 @@ class AzureAIFoundry(Model):
|
|
|
267
288
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
268
289
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
269
290
|
run_response: Optional[RunOutput] = None,
|
|
291
|
+
compress_tool_results: bool = False,
|
|
270
292
|
) -> Iterator[ModelResponse]:
|
|
271
293
|
"""
|
|
272
294
|
Send a streaming chat completion request to the Azure AI API.
|
|
@@ -278,7 +300,7 @@ class AzureAIFoundry(Model):
|
|
|
278
300
|
assistant_message.metrics.start_timer()
|
|
279
301
|
|
|
280
302
|
for chunk in self.get_client().complete(
|
|
281
|
-
messages=[format_message(m) for m in messages],
|
|
303
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
282
304
|
stream=True,
|
|
283
305
|
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
284
306
|
):
|
|
@@ -306,6 +328,7 @@ class AzureAIFoundry(Model):
|
|
|
306
328
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
307
329
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
308
330
|
run_response: Optional[RunOutput] = None,
|
|
331
|
+
compress_tool_results: bool = False,
|
|
309
332
|
) -> AsyncIterator[ModelResponse]:
|
|
310
333
|
"""
|
|
311
334
|
Sends an asynchronous streaming chat completion request to the Azure AI API.
|
|
@@ -316,14 +339,13 @@ class AzureAIFoundry(Model):
|
|
|
316
339
|
|
|
317
340
|
assistant_message.metrics.start_timer()
|
|
318
341
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
yield self._parse_provider_response_delta(chunk)
|
|
342
|
+
async_stream = await self.get_async_client().complete(
|
|
343
|
+
messages=[format_message(m, compress_tool_results) for m in messages],
|
|
344
|
+
stream=True,
|
|
345
|
+
**self.get_request_params(tools=tools, response_format=response_format, tool_choice=tool_choice),
|
|
346
|
+
)
|
|
347
|
+
async for chunk in async_stream: # type: ignore
|
|
348
|
+
yield self._parse_provider_response_delta(chunk)
|
|
327
349
|
|
|
328
350
|
assistant_message.metrics.stop_timer()
|
|
329
351
|
|