agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/base.py
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import collections.abc
|
|
3
|
+
import json
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from dataclasses import dataclass, field
|
|
6
|
+
from hashlib import md5
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from time import sleep, time
|
|
5
9
|
from types import AsyncGeneratorType, GeneratorType
|
|
6
10
|
from typing import (
|
|
11
|
+
TYPE_CHECKING,
|
|
7
12
|
Any,
|
|
8
13
|
AsyncIterator,
|
|
9
14
|
Dict,
|
|
@@ -11,25 +16,31 @@ from typing import (
|
|
|
11
16
|
List,
|
|
12
17
|
Literal,
|
|
13
18
|
Optional,
|
|
19
|
+
Sequence,
|
|
14
20
|
Tuple,
|
|
15
21
|
Type,
|
|
16
22
|
Union,
|
|
17
23
|
get_args,
|
|
18
24
|
)
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from agno.compression.manager import CompressionManager
|
|
19
28
|
from uuid import uuid4
|
|
20
29
|
|
|
21
30
|
from pydantic import BaseModel
|
|
22
31
|
|
|
23
|
-
from agno.exceptions import AgentRunException
|
|
32
|
+
from agno.exceptions import AgentRunException, ModelProviderError, RetryableModelProviderError
|
|
24
33
|
from agno.media import Audio, File, Image, Video
|
|
25
34
|
from agno.models.message import Citations, Message
|
|
26
35
|
from agno.models.metrics import Metrics
|
|
27
36
|
from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
|
|
28
37
|
from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
|
|
38
|
+
from agno.run.requirement import RunRequirement
|
|
29
39
|
from agno.run.team import RunContentEvent as TeamRunContentEvent
|
|
30
|
-
from agno.run.team import TeamRunOutputEvent
|
|
40
|
+
from agno.run.team import TeamRunOutput, TeamRunOutputEvent
|
|
41
|
+
from agno.run.workflow import WorkflowRunOutputEvent
|
|
31
42
|
from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
|
|
32
|
-
from agno.utils.log import log_debug, log_error, log_warning
|
|
43
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
33
44
|
from agno.utils.timer import Timer
|
|
34
45
|
from agno.utils.tools import get_function_call_for_tool_call, get_function_call_for_tool_execution
|
|
35
46
|
|
|
@@ -48,6 +59,8 @@ class MessageData:
|
|
|
48
59
|
response_video: Optional[Video] = None
|
|
49
60
|
response_file: Optional[File] = None
|
|
50
61
|
|
|
62
|
+
response_metrics: Optional[Metrics] = None
|
|
63
|
+
|
|
51
64
|
# Data from the provider that we might need on subsequent messages
|
|
52
65
|
response_provider_data: Optional[Dict[str, Any]] = None
|
|
53
66
|
|
|
@@ -133,18 +146,323 @@ class Model(ABC):
|
|
|
133
146
|
# The role of the assistant message.
|
|
134
147
|
assistant_message_role: str = "assistant"
|
|
135
148
|
|
|
149
|
+
# Cache model responses to avoid redundant API calls during development
|
|
150
|
+
cache_response: bool = False
|
|
151
|
+
cache_ttl: Optional[int] = None
|
|
152
|
+
cache_dir: Optional[str] = None
|
|
153
|
+
|
|
154
|
+
# Retry configuration for model provider errors
|
|
155
|
+
# Number of retries to attempt when a ModelProviderError occurs
|
|
156
|
+
retries: int = 0
|
|
157
|
+
# Delay between retries (in seconds)
|
|
158
|
+
delay_between_retries: int = 1
|
|
159
|
+
# Exponential backoff: if True, the delay between retries is doubled each time
|
|
160
|
+
exponential_backoff: bool = False
|
|
161
|
+
# Enable retrying a model invocation once with a guidance message.
|
|
162
|
+
# This is useful for known errors avoidable with extra instructions.
|
|
163
|
+
retry_with_guidance: bool = True
|
|
164
|
+
# Set the number of times to retry the model invocation with guidance.
|
|
165
|
+
retry_with_guidance_limit: int = 1
|
|
166
|
+
|
|
136
167
|
def __post_init__(self):
|
|
137
168
|
if self.provider is None and self.name is not None:
|
|
138
169
|
self.provider = f"{self.name} ({self.id})"
|
|
139
170
|
|
|
171
|
+
def _get_retry_delay(self, attempt: int) -> float:
|
|
172
|
+
"""Calculate the delay before the next retry attempt."""
|
|
173
|
+
if self.exponential_backoff:
|
|
174
|
+
return self.delay_between_retries * (2**attempt)
|
|
175
|
+
return self.delay_between_retries
|
|
176
|
+
|
|
177
|
+
def _invoke_with_retry(self, **kwargs) -> ModelResponse:
|
|
178
|
+
"""
|
|
179
|
+
Invoke the model with retry logic for ModelProviderError.
|
|
180
|
+
|
|
181
|
+
This method wraps the invoke() call and retries on ModelProviderError
|
|
182
|
+
with optional exponential backoff.
|
|
183
|
+
"""
|
|
184
|
+
last_exception: Optional[ModelProviderError] = None
|
|
185
|
+
|
|
186
|
+
for attempt in range(self.retries + 1):
|
|
187
|
+
try:
|
|
188
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
189
|
+
return self.invoke(**kwargs)
|
|
190
|
+
except ModelProviderError as e:
|
|
191
|
+
last_exception = e
|
|
192
|
+
if attempt < self.retries:
|
|
193
|
+
delay = self._get_retry_delay(attempt)
|
|
194
|
+
log_warning(
|
|
195
|
+
f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
|
|
196
|
+
)
|
|
197
|
+
sleep(delay)
|
|
198
|
+
else:
|
|
199
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
200
|
+
except RetryableModelProviderError as e:
|
|
201
|
+
current_count = retries_with_guidance_count
|
|
202
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
203
|
+
raise ModelProviderError(
|
|
204
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
205
|
+
model_name=self.name,
|
|
206
|
+
model_id=self.id,
|
|
207
|
+
)
|
|
208
|
+
kwargs.pop("retry_with_guidance", None)
|
|
209
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
210
|
+
|
|
211
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
212
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
213
|
+
|
|
214
|
+
return self._invoke_with_retry(**kwargs, retry_with_guidance=True)
|
|
215
|
+
|
|
216
|
+
# If we've exhausted all retries, raise the last exception
|
|
217
|
+
raise last_exception # type: ignore
|
|
218
|
+
|
|
219
|
+
async def _ainvoke_with_retry(self, **kwargs) -> ModelResponse:
|
|
220
|
+
"""
|
|
221
|
+
Asynchronously invoke the model with retry logic for ModelProviderError.
|
|
222
|
+
|
|
223
|
+
This method wraps the ainvoke() call and retries on ModelProviderError
|
|
224
|
+
with optional exponential backoff.
|
|
225
|
+
"""
|
|
226
|
+
last_exception: Optional[ModelProviderError] = None
|
|
227
|
+
|
|
228
|
+
for attempt in range(self.retries + 1):
|
|
229
|
+
try:
|
|
230
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
231
|
+
return await self.ainvoke(**kwargs)
|
|
232
|
+
except ModelProviderError as e:
|
|
233
|
+
last_exception = e
|
|
234
|
+
if attempt < self.retries:
|
|
235
|
+
delay = self._get_retry_delay(attempt)
|
|
236
|
+
log_warning(
|
|
237
|
+
f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
|
|
238
|
+
)
|
|
239
|
+
await asyncio.sleep(delay)
|
|
240
|
+
else:
|
|
241
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
242
|
+
except RetryableModelProviderError as e:
|
|
243
|
+
current_count = retries_with_guidance_count
|
|
244
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
245
|
+
raise ModelProviderError(
|
|
246
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
247
|
+
model_name=self.name,
|
|
248
|
+
model_id=self.id,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
kwargs.pop("retry_with_guidance", None)
|
|
252
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
253
|
+
|
|
254
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
255
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
256
|
+
|
|
257
|
+
return await self._ainvoke_with_retry(**kwargs, retry_with_guidance=True)
|
|
258
|
+
|
|
259
|
+
# If we've exhausted all retries, raise the last exception
|
|
260
|
+
raise last_exception # type: ignore
|
|
261
|
+
|
|
262
|
+
def _invoke_stream_with_retry(self, **kwargs) -> Iterator[ModelResponse]:
|
|
263
|
+
"""
|
|
264
|
+
Invoke the model stream with retry logic for ModelProviderError.
|
|
265
|
+
|
|
266
|
+
This method wraps the invoke_stream() call and retries on ModelProviderError
|
|
267
|
+
with optional exponential backoff. Note that retries restart the entire stream.
|
|
268
|
+
"""
|
|
269
|
+
last_exception: Optional[ModelProviderError] = None
|
|
270
|
+
|
|
271
|
+
for attempt in range(self.retries + 1):
|
|
272
|
+
try:
|
|
273
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
274
|
+
yield from self.invoke_stream(**kwargs)
|
|
275
|
+
return # Success, exit the retry loop
|
|
276
|
+
except ModelProviderError as e:
|
|
277
|
+
last_exception = e
|
|
278
|
+
if attempt < self.retries:
|
|
279
|
+
delay = self._get_retry_delay(attempt)
|
|
280
|
+
log_warning(
|
|
281
|
+
f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
|
|
282
|
+
f"Retrying in {delay}s..."
|
|
283
|
+
)
|
|
284
|
+
sleep(delay)
|
|
285
|
+
else:
|
|
286
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
287
|
+
except RetryableModelProviderError as e:
|
|
288
|
+
current_count = retries_with_guidance_count
|
|
289
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
290
|
+
raise ModelProviderError(
|
|
291
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
292
|
+
model_name=self.name,
|
|
293
|
+
model_id=self.id,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
kwargs.pop("retry_with_guidance", None)
|
|
297
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
298
|
+
|
|
299
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
300
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
301
|
+
|
|
302
|
+
yield from self._invoke_stream_with_retry(**kwargs, retry_with_guidance=True)
|
|
303
|
+
return # Success, exit after regeneration
|
|
304
|
+
|
|
305
|
+
# If we've exhausted all retries, raise the last exception
|
|
306
|
+
raise last_exception # type: ignore
|
|
307
|
+
|
|
308
|
+
async def _ainvoke_stream_with_retry(self, **kwargs) -> AsyncIterator[ModelResponse]:
|
|
309
|
+
"""
|
|
310
|
+
Asynchronously invoke the model stream with retry logic for ModelProviderError.
|
|
311
|
+
|
|
312
|
+
This method wraps the ainvoke_stream() call and retries on ModelProviderError
|
|
313
|
+
with optional exponential backoff. Note that retries restart the entire stream.
|
|
314
|
+
"""
|
|
315
|
+
last_exception: Optional[ModelProviderError] = None
|
|
316
|
+
|
|
317
|
+
for attempt in range(self.retries + 1):
|
|
318
|
+
try:
|
|
319
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
320
|
+
async for response in self.ainvoke_stream(**kwargs):
|
|
321
|
+
yield response
|
|
322
|
+
return # Success, exit the retry loop
|
|
323
|
+
except ModelProviderError as e:
|
|
324
|
+
last_exception = e
|
|
325
|
+
if attempt < self.retries:
|
|
326
|
+
delay = self._get_retry_delay(attempt)
|
|
327
|
+
log_warning(
|
|
328
|
+
f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
|
|
329
|
+
f"Retrying in {delay}s..."
|
|
330
|
+
)
|
|
331
|
+
await asyncio.sleep(delay)
|
|
332
|
+
else:
|
|
333
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
334
|
+
except RetryableModelProviderError as e:
|
|
335
|
+
current_count = retries_with_guidance_count
|
|
336
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
337
|
+
raise ModelProviderError(
|
|
338
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
339
|
+
model_name=self.name,
|
|
340
|
+
model_id=self.id,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
kwargs.pop("retry_with_guidance", None)
|
|
344
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
345
|
+
|
|
346
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
347
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
348
|
+
|
|
349
|
+
async for response in self._ainvoke_stream_with_retry(**kwargs, retry_with_guidance=True):
|
|
350
|
+
yield response
|
|
351
|
+
return # Success, exit after regeneration
|
|
352
|
+
|
|
353
|
+
# If we've exhausted all retries, raise the last exception
|
|
354
|
+
raise last_exception # type: ignore
|
|
355
|
+
|
|
140
356
|
def to_dict(self) -> Dict[str, Any]:
|
|
141
357
|
fields = {"name", "id", "provider"}
|
|
142
358
|
_dict = {field: getattr(self, field) for field in fields if getattr(self, field) is not None}
|
|
143
359
|
return _dict
|
|
144
360
|
|
|
361
|
+
def _remove_temporary_messages(self, messages: List[Message]) -> None:
|
|
362
|
+
"""Remove temporary messages from the given list.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
messages: The list of messages to filter (modified in place).
|
|
366
|
+
"""
|
|
367
|
+
messages[:] = [m for m in messages if not m.temporary]
|
|
368
|
+
|
|
145
369
|
def get_provider(self) -> str:
|
|
146
370
|
return self.provider or self.name or self.__class__.__name__
|
|
147
371
|
|
|
372
|
+
def _get_model_cache_key(self, messages: List[Message], stream: bool, **kwargs: Any) -> str:
|
|
373
|
+
"""Generate a cache key based on model messages and core parameters."""
|
|
374
|
+
message_data = []
|
|
375
|
+
for msg in messages:
|
|
376
|
+
msg_dict = {
|
|
377
|
+
"role": msg.role,
|
|
378
|
+
"content": msg.content,
|
|
379
|
+
}
|
|
380
|
+
message_data.append(msg_dict)
|
|
381
|
+
|
|
382
|
+
# Include tools parameter in cache key
|
|
383
|
+
has_tools = bool(kwargs.get("tools"))
|
|
384
|
+
|
|
385
|
+
cache_data = {
|
|
386
|
+
"model_id": self.id,
|
|
387
|
+
"messages": message_data,
|
|
388
|
+
"has_tools": has_tools,
|
|
389
|
+
"response_format": kwargs.get("response_format"),
|
|
390
|
+
"stream": stream,
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
cache_str = json.dumps(cache_data, sort_keys=True)
|
|
394
|
+
return md5(cache_str.encode()).hexdigest()
|
|
395
|
+
|
|
396
|
+
def _get_model_cache_file_path(self, cache_key: str) -> Path:
|
|
397
|
+
"""Get the file path for a cache key."""
|
|
398
|
+
if self.cache_dir:
|
|
399
|
+
cache_dir = Path(self.cache_dir)
|
|
400
|
+
else:
|
|
401
|
+
cache_dir = Path.home() / ".agno" / "cache" / "model_responses"
|
|
402
|
+
|
|
403
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
404
|
+
return cache_dir / f"{cache_key}.json"
|
|
405
|
+
|
|
406
|
+
def _get_cached_model_response(self, cache_key: str) -> Optional[Dict[str, Any]]:
|
|
407
|
+
"""Retrieve a cached response if it exists and is not expired."""
|
|
408
|
+
cache_file = self._get_model_cache_file_path(cache_key)
|
|
409
|
+
|
|
410
|
+
if not cache_file.exists():
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
try:
|
|
414
|
+
with open(cache_file, "r") as f:
|
|
415
|
+
cached_data = json.load(f)
|
|
416
|
+
|
|
417
|
+
# Check TTL if set (None means no expiration)
|
|
418
|
+
if self.cache_ttl is not None:
|
|
419
|
+
if time() - cached_data["timestamp"] > self.cache_ttl:
|
|
420
|
+
return None
|
|
421
|
+
|
|
422
|
+
return cached_data
|
|
423
|
+
except Exception:
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
def _save_model_response_to_cache(self, cache_key: str, result: ModelResponse, is_streaming: bool = False) -> None:
|
|
427
|
+
"""Save a model response to cache."""
|
|
428
|
+
try:
|
|
429
|
+
cache_file = self._get_model_cache_file_path(cache_key)
|
|
430
|
+
|
|
431
|
+
cache_data = {
|
|
432
|
+
"timestamp": int(time()),
|
|
433
|
+
"is_streaming": is_streaming,
|
|
434
|
+
"result": result.to_dict(),
|
|
435
|
+
}
|
|
436
|
+
with open(cache_file, "w") as f:
|
|
437
|
+
json.dump(cache_data, f)
|
|
438
|
+
except Exception:
|
|
439
|
+
pass
|
|
440
|
+
|
|
441
|
+
def _save_streaming_responses_to_cache(self, cache_key: str, responses: List[ModelResponse]) -> None:
|
|
442
|
+
"""Save streaming responses to cache."""
|
|
443
|
+
cache_file = self._get_model_cache_file_path(cache_key)
|
|
444
|
+
|
|
445
|
+
cache_data = {
|
|
446
|
+
"timestamp": int(time()),
|
|
447
|
+
"is_streaming": True,
|
|
448
|
+
"streaming_responses": [r.to_dict() for r in responses],
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
with open(cache_file, "w") as f:
|
|
453
|
+
json.dump(cache_data, f)
|
|
454
|
+
except Exception:
|
|
455
|
+
pass
|
|
456
|
+
|
|
457
|
+
def _model_response_from_cache(self, cached_data: Dict[str, Any]) -> ModelResponse:
|
|
458
|
+
"""Reconstruct a ModelResponse from cached data."""
|
|
459
|
+
return ModelResponse.from_dict(cached_data["result"])
|
|
460
|
+
|
|
461
|
+
def _streaming_responses_from_cache(self, cached_data: list) -> Iterator[ModelResponse]:
|
|
462
|
+
"""Reconstruct streaming responses from cached data."""
|
|
463
|
+
for cached_response in cached_data:
|
|
464
|
+
yield ModelResponse.from_dict(cached_response)
|
|
465
|
+
|
|
148
466
|
@abstractmethod
|
|
149
467
|
def invoke(self, *args, **kwargs) -> ModelResponse:
|
|
150
468
|
pass
|
|
@@ -187,298 +505,455 @@ class Model(ABC):
|
|
|
187
505
|
"""
|
|
188
506
|
pass
|
|
189
507
|
|
|
508
|
+
def _format_tools(self, tools: Optional[List[Union[Function, dict]]]) -> List[Dict[str, Any]]:
|
|
509
|
+
_tool_dicts = []
|
|
510
|
+
for tool in tools or []:
|
|
511
|
+
if isinstance(tool, Function):
|
|
512
|
+
_tool_dicts.append({"type": "function", "function": tool.to_dict()})
|
|
513
|
+
else:
|
|
514
|
+
# If a dict is passed, it is a builtin tool
|
|
515
|
+
_tool_dicts.append(tool)
|
|
516
|
+
return _tool_dicts
|
|
517
|
+
|
|
518
|
+
def count_tokens(
|
|
519
|
+
self,
|
|
520
|
+
messages: List[Message],
|
|
521
|
+
tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
|
|
522
|
+
output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
523
|
+
) -> int:
|
|
524
|
+
from agno.utils.tokens import count_tokens
|
|
525
|
+
|
|
526
|
+
return count_tokens(
|
|
527
|
+
messages,
|
|
528
|
+
tools=list(tools) if tools else None,
|
|
529
|
+
model_id=self.id,
|
|
530
|
+
output_schema=output_schema,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
async def acount_tokens(
|
|
534
|
+
self,
|
|
535
|
+
messages: List[Message],
|
|
536
|
+
tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
|
|
537
|
+
output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
538
|
+
) -> int:
|
|
539
|
+
return self.count_tokens(messages, tools, output_schema=output_schema)
|
|
540
|
+
|
|
190
541
|
def response(
|
|
191
542
|
self,
|
|
192
543
|
messages: List[Message],
|
|
193
544
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
194
|
-
tools: Optional[List[
|
|
195
|
-
functions: Optional[Dict[str, Function]] = None,
|
|
545
|
+
tools: Optional[List[Union[Function, dict]]] = None,
|
|
196
546
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
197
547
|
tool_call_limit: Optional[int] = None,
|
|
198
|
-
run_response: Optional[RunOutput] = None,
|
|
548
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
199
549
|
send_media_to_model: bool = True,
|
|
550
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
200
551
|
) -> ModelResponse:
|
|
201
552
|
"""
|
|
202
553
|
Generate a response from the model.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
messages: List of messages to send to the model
|
|
557
|
+
response_format: Response format to use
|
|
558
|
+
tools: List of tools to use. This includes the original Function objects and dicts for built-in tools.
|
|
559
|
+
tool_choice: Tool choice to use
|
|
560
|
+
tool_call_limit: Tool call limit
|
|
561
|
+
run_response: Run response to use
|
|
562
|
+
send_media_to_model: Whether to send media to the model
|
|
203
563
|
"""
|
|
564
|
+
try:
|
|
565
|
+
# Check cache if enabled
|
|
566
|
+
if self.cache_response:
|
|
567
|
+
cache_key = self._get_model_cache_key(
|
|
568
|
+
messages, stream=False, response_format=response_format, tools=tools
|
|
569
|
+
)
|
|
570
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
204
571
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
_log_messages(messages)
|
|
209
|
-
model_response = ModelResponse()
|
|
210
|
-
|
|
211
|
-
function_call_count = 0
|
|
212
|
-
|
|
213
|
-
while True:
|
|
214
|
-
# Get response from model
|
|
215
|
-
assistant_message = Message(role=self.assistant_message_role)
|
|
216
|
-
self._process_model_response(
|
|
217
|
-
messages=messages,
|
|
218
|
-
assistant_message=assistant_message,
|
|
219
|
-
model_response=model_response,
|
|
220
|
-
response_format=response_format,
|
|
221
|
-
tools=tools,
|
|
222
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
223
|
-
run_response=run_response,
|
|
224
|
-
)
|
|
572
|
+
if cached_data:
|
|
573
|
+
log_info("Cache hit for model response")
|
|
574
|
+
return self._model_response_from_cache(cached_data)
|
|
225
575
|
|
|
226
|
-
|
|
227
|
-
|
|
576
|
+
log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
|
|
577
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
228
578
|
|
|
229
|
-
|
|
230
|
-
|
|
579
|
+
_log_messages(messages)
|
|
580
|
+
model_response = ModelResponse()
|
|
231
581
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
582
|
+
function_call_count = 0
|
|
583
|
+
|
|
584
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
585
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
586
|
+
|
|
587
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
588
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
589
|
+
|
|
590
|
+
while True:
|
|
591
|
+
# Compress tool results if compression is enabled and threshold is met
|
|
592
|
+
if _compression_manager is not None and _compression_manager.should_compress(
|
|
593
|
+
messages, tools, model=self, response_format=response_format
|
|
594
|
+
):
|
|
595
|
+
_compression_manager.compress(messages)
|
|
596
|
+
|
|
597
|
+
# Get response from model
|
|
598
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
599
|
+
self._process_model_response(
|
|
237
600
|
messages=messages,
|
|
601
|
+
assistant_message=assistant_message,
|
|
238
602
|
model_response=model_response,
|
|
239
|
-
|
|
603
|
+
response_format=response_format,
|
|
604
|
+
tools=_tool_dicts,
|
|
605
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
606
|
+
run_response=run_response,
|
|
607
|
+
compress_tool_results=_compress_tool_results,
|
|
240
608
|
)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
if function_call_response
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
if
|
|
273
|
-
model_response.
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
function_call_response.
|
|
278
|
-
|
|
609
|
+
|
|
610
|
+
# Add assistant message to messages
|
|
611
|
+
messages.append(assistant_message)
|
|
612
|
+
|
|
613
|
+
# Log response and metrics
|
|
614
|
+
assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
615
|
+
|
|
616
|
+
# Handle tool calls if present
|
|
617
|
+
if assistant_message.tool_calls:
|
|
618
|
+
# Prepare function calls
|
|
619
|
+
function_calls_to_run = self._prepare_function_calls(
|
|
620
|
+
assistant_message=assistant_message,
|
|
621
|
+
messages=messages,
|
|
622
|
+
model_response=model_response,
|
|
623
|
+
functions=_functions,
|
|
624
|
+
)
|
|
625
|
+
function_call_results: List[Message] = []
|
|
626
|
+
|
|
627
|
+
# Execute function calls
|
|
628
|
+
for function_call_response in self.run_function_calls(
|
|
629
|
+
function_calls=function_calls_to_run,
|
|
630
|
+
function_call_results=function_call_results,
|
|
631
|
+
current_function_call_count=function_call_count,
|
|
632
|
+
function_call_limit=tool_call_limit,
|
|
633
|
+
):
|
|
634
|
+
if isinstance(function_call_response, ModelResponse):
|
|
635
|
+
# The session state is updated by the function call
|
|
636
|
+
if function_call_response.updated_session_state is not None:
|
|
637
|
+
model_response.updated_session_state = function_call_response.updated_session_state
|
|
638
|
+
|
|
639
|
+
# Media artifacts are generated by the function call
|
|
640
|
+
if function_call_response.images is not None:
|
|
641
|
+
if model_response.images is None:
|
|
642
|
+
model_response.images = []
|
|
643
|
+
model_response.images.extend(function_call_response.images)
|
|
644
|
+
|
|
645
|
+
if function_call_response.audios is not None:
|
|
646
|
+
if model_response.audios is None:
|
|
647
|
+
model_response.audios = []
|
|
648
|
+
model_response.audios.extend(function_call_response.audios)
|
|
649
|
+
|
|
650
|
+
if function_call_response.videos is not None:
|
|
651
|
+
if model_response.videos is None:
|
|
652
|
+
model_response.videos = []
|
|
653
|
+
model_response.videos.extend(function_call_response.videos)
|
|
654
|
+
|
|
655
|
+
if function_call_response.files is not None:
|
|
656
|
+
if model_response.files is None:
|
|
657
|
+
model_response.files = []
|
|
658
|
+
model_response.files.extend(function_call_response.files)
|
|
659
|
+
|
|
660
|
+
if (
|
|
661
|
+
function_call_response.event
|
|
662
|
+
in [
|
|
663
|
+
ModelResponseEvent.tool_call_completed.value,
|
|
664
|
+
ModelResponseEvent.tool_call_paused.value,
|
|
665
|
+
]
|
|
666
|
+
and function_call_response.tool_executions is not None
|
|
667
|
+
):
|
|
668
|
+
# Record the tool execution in the model response
|
|
669
|
+
if model_response.tool_executions is None:
|
|
670
|
+
model_response.tool_executions = []
|
|
671
|
+
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
672
|
+
|
|
673
|
+
# If the tool is currently paused (HITL flow), add the requirement to the run response
|
|
674
|
+
if (
|
|
675
|
+
function_call_response.event == ModelResponseEvent.tool_call_paused.value
|
|
676
|
+
and run_response is not None
|
|
677
|
+
):
|
|
678
|
+
current_tool_execution = function_call_response.tool_executions[-1]
|
|
679
|
+
if run_response.requirements is None:
|
|
680
|
+
run_response.requirements = []
|
|
681
|
+
run_response.requirements.append(
|
|
682
|
+
RunRequirement(tool_execution=current_tool_execution)
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
elif function_call_response.event not in [
|
|
686
|
+
ModelResponseEvent.tool_call_started.value,
|
|
279
687
|
ModelResponseEvent.tool_call_completed.value,
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
):
|
|
284
|
-
if model_response.tool_executions is None:
|
|
285
|
-
model_response.tool_executions = []
|
|
286
|
-
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
287
|
-
|
|
288
|
-
elif function_call_response.event not in [
|
|
289
|
-
ModelResponseEvent.tool_call_started.value,
|
|
290
|
-
ModelResponseEvent.tool_call_completed.value,
|
|
291
|
-
]:
|
|
292
|
-
if function_call_response.content:
|
|
293
|
-
model_response.content += function_call_response.content # type: ignore
|
|
294
|
-
|
|
295
|
-
# Add a function call for each successful execution
|
|
296
|
-
function_call_count += len(function_call_results)
|
|
297
|
-
|
|
298
|
-
# Format and add results to messages
|
|
299
|
-
self.format_function_call_results(
|
|
300
|
-
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
301
|
-
)
|
|
688
|
+
]:
|
|
689
|
+
if function_call_response.content:
|
|
690
|
+
model_response.content += function_call_response.content # type: ignore
|
|
302
691
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
692
|
+
# Add a function call for each successful execution
|
|
693
|
+
function_call_count += len(function_call_results)
|
|
694
|
+
|
|
695
|
+
# Format and add results to messages
|
|
696
|
+
self.format_function_call_results(
|
|
306
697
|
messages=messages,
|
|
307
698
|
function_call_results=function_call_results,
|
|
308
|
-
|
|
699
|
+
compress_tool_results=_compress_tool_results,
|
|
700
|
+
**model_response.extra or {},
|
|
309
701
|
)
|
|
310
702
|
|
|
311
|
-
|
|
312
|
-
|
|
703
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
704
|
+
# Handle function call media
|
|
705
|
+
self._handle_function_call_media(
|
|
706
|
+
messages=messages,
|
|
707
|
+
function_call_results=function_call_results,
|
|
708
|
+
send_media_to_model=send_media_to_model,
|
|
709
|
+
)
|
|
313
710
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
break
|
|
711
|
+
for function_call_result in function_call_results:
|
|
712
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
317
713
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
714
|
+
# Check if we should stop after tool calls
|
|
715
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
716
|
+
break
|
|
321
717
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
718
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
719
|
+
if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
|
|
720
|
+
break
|
|
325
721
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
722
|
+
# If we have any tool calls that require external execution, break the loop
|
|
723
|
+
if any(tc.external_execution_required for tc in model_response.tool_executions or []):
|
|
724
|
+
break
|
|
329
725
|
|
|
330
|
-
|
|
331
|
-
|
|
726
|
+
# If we have any tool calls that require user input, break the loop
|
|
727
|
+
if any(tc.requires_user_input for tc in model_response.tool_executions or []):
|
|
728
|
+
break
|
|
332
729
|
|
|
333
|
-
|
|
334
|
-
|
|
730
|
+
# Continue loop to get next response
|
|
731
|
+
continue
|
|
732
|
+
|
|
733
|
+
# No tool calls or finished processing them
|
|
734
|
+
break
|
|
735
|
+
|
|
736
|
+
log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
|
|
737
|
+
|
|
738
|
+
# Save to cache if enabled
|
|
739
|
+
if self.cache_response:
|
|
740
|
+
self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
|
|
741
|
+
finally:
|
|
742
|
+
# Close the Gemini client
|
|
743
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
|
|
744
|
+
try:
|
|
745
|
+
self.client.close() # type: ignore
|
|
746
|
+
self.client = None
|
|
747
|
+
except AttributeError:
|
|
748
|
+
log_warning(
|
|
749
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
750
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
751
|
+
)
|
|
335
752
|
|
|
336
|
-
log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
|
|
337
753
|
return model_response
|
|
338
754
|
|
|
339
755
|
async def aresponse(
|
|
340
756
|
self,
|
|
341
757
|
messages: List[Message],
|
|
342
758
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
343
|
-
tools: Optional[List[
|
|
344
|
-
functions: Optional[Dict[str, Function]] = None,
|
|
759
|
+
tools: Optional[List[Union[Function, dict]]] = None,
|
|
345
760
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
346
761
|
tool_call_limit: Optional[int] = None,
|
|
762
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
347
763
|
send_media_to_model: bool = True,
|
|
764
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
348
765
|
) -> ModelResponse:
|
|
349
766
|
"""
|
|
350
767
|
Generate an asynchronous response from the model.
|
|
351
768
|
"""
|
|
352
769
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
messages=messages,
|
|
365
|
-
assistant_message=assistant_message,
|
|
366
|
-
model_response=model_response,
|
|
367
|
-
response_format=response_format,
|
|
368
|
-
tools=tools,
|
|
369
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
370
|
-
)
|
|
770
|
+
try:
|
|
771
|
+
# Check cache if enabled
|
|
772
|
+
if self.cache_response:
|
|
773
|
+
cache_key = self._get_model_cache_key(
|
|
774
|
+
messages, stream=False, response_format=response_format, tools=tools
|
|
775
|
+
)
|
|
776
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
777
|
+
|
|
778
|
+
if cached_data:
|
|
779
|
+
log_info("Cache hit for model response")
|
|
780
|
+
return self._model_response_from_cache(cached_data)
|
|
371
781
|
|
|
372
|
-
|
|
373
|
-
|
|
782
|
+
log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
|
|
783
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
784
|
+
_log_messages(messages)
|
|
785
|
+
model_response = ModelResponse()
|
|
374
786
|
|
|
375
|
-
|
|
376
|
-
|
|
787
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
788
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
377
789
|
|
|
378
|
-
|
|
379
|
-
if
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
790
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
791
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
792
|
+
|
|
793
|
+
function_call_count = 0
|
|
794
|
+
|
|
795
|
+
while True:
|
|
796
|
+
# Compress existing tool results BEFORE making API call to avoid context overflow
|
|
797
|
+
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
798
|
+
messages, tools, model=self, response_format=response_format
|
|
799
|
+
):
|
|
800
|
+
await _compression_manager.acompress(messages)
|
|
801
|
+
|
|
802
|
+
# Get response from model
|
|
803
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
804
|
+
await self._aprocess_model_response(
|
|
383
805
|
messages=messages,
|
|
806
|
+
assistant_message=assistant_message,
|
|
384
807
|
model_response=model_response,
|
|
385
|
-
|
|
808
|
+
response_format=response_format,
|
|
809
|
+
tools=_tool_dicts,
|
|
810
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
811
|
+
run_response=run_response,
|
|
812
|
+
compress_tool_results=_compress_tool_results,
|
|
386
813
|
)
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
if function_call_response
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
if
|
|
419
|
-
model_response.
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
function_call_response.
|
|
424
|
-
|
|
814
|
+
|
|
815
|
+
# Add assistant message to messages
|
|
816
|
+
messages.append(assistant_message)
|
|
817
|
+
|
|
818
|
+
# Log response and metrics
|
|
819
|
+
assistant_message.log(metrics=True)
|
|
820
|
+
|
|
821
|
+
# Handle tool calls if present
|
|
822
|
+
if assistant_message.tool_calls:
|
|
823
|
+
# Prepare function calls
|
|
824
|
+
function_calls_to_run = self._prepare_function_calls(
|
|
825
|
+
assistant_message=assistant_message,
|
|
826
|
+
messages=messages,
|
|
827
|
+
model_response=model_response,
|
|
828
|
+
functions=_functions,
|
|
829
|
+
)
|
|
830
|
+
function_call_results: List[Message] = []
|
|
831
|
+
|
|
832
|
+
# Execute function calls
|
|
833
|
+
async for function_call_response in self.arun_function_calls(
|
|
834
|
+
function_calls=function_calls_to_run,
|
|
835
|
+
function_call_results=function_call_results,
|
|
836
|
+
current_function_call_count=function_call_count,
|
|
837
|
+
function_call_limit=tool_call_limit,
|
|
838
|
+
):
|
|
839
|
+
if isinstance(function_call_response, ModelResponse):
|
|
840
|
+
# The session state is updated by the function call
|
|
841
|
+
if function_call_response.updated_session_state is not None:
|
|
842
|
+
model_response.updated_session_state = function_call_response.updated_session_state
|
|
843
|
+
|
|
844
|
+
# Media artifacts are generated by the function call
|
|
845
|
+
if function_call_response.images is not None:
|
|
846
|
+
if model_response.images is None:
|
|
847
|
+
model_response.images = []
|
|
848
|
+
model_response.images.extend(function_call_response.images)
|
|
849
|
+
|
|
850
|
+
if function_call_response.audios is not None:
|
|
851
|
+
if model_response.audios is None:
|
|
852
|
+
model_response.audios = []
|
|
853
|
+
model_response.audios.extend(function_call_response.audios)
|
|
854
|
+
|
|
855
|
+
if function_call_response.videos is not None:
|
|
856
|
+
if model_response.videos is None:
|
|
857
|
+
model_response.videos = []
|
|
858
|
+
model_response.videos.extend(function_call_response.videos)
|
|
859
|
+
|
|
860
|
+
if function_call_response.files is not None:
|
|
861
|
+
if model_response.files is None:
|
|
862
|
+
model_response.files = []
|
|
863
|
+
model_response.files.extend(function_call_response.files)
|
|
864
|
+
|
|
865
|
+
if (
|
|
866
|
+
function_call_response.event
|
|
867
|
+
in [
|
|
868
|
+
ModelResponseEvent.tool_call_completed.value,
|
|
869
|
+
ModelResponseEvent.tool_call_paused.value,
|
|
870
|
+
]
|
|
871
|
+
and function_call_response.tool_executions is not None
|
|
872
|
+
):
|
|
873
|
+
if model_response.tool_executions is None:
|
|
874
|
+
model_response.tool_executions = []
|
|
875
|
+
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
876
|
+
|
|
877
|
+
# If the tool is currently paused (HITL flow), add the requirement to the run response
|
|
878
|
+
if (
|
|
879
|
+
function_call_response.event == ModelResponseEvent.tool_call_paused.value
|
|
880
|
+
and run_response is not None
|
|
881
|
+
):
|
|
882
|
+
current_tool_execution = function_call_response.tool_executions[-1]
|
|
883
|
+
if run_response.requirements is None:
|
|
884
|
+
run_response.requirements = []
|
|
885
|
+
run_response.requirements.append(
|
|
886
|
+
RunRequirement(tool_execution=current_tool_execution)
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
elif function_call_response.event not in [
|
|
890
|
+
ModelResponseEvent.tool_call_started.value,
|
|
425
891
|
ModelResponseEvent.tool_call_completed.value,
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
):
|
|
430
|
-
if model_response.tool_executions is None:
|
|
431
|
-
model_response.tool_executions = []
|
|
432
|
-
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
433
|
-
elif function_call_response.event not in [
|
|
434
|
-
ModelResponseEvent.tool_call_started.value,
|
|
435
|
-
ModelResponseEvent.tool_call_completed.value,
|
|
436
|
-
]:
|
|
437
|
-
if function_call_response.content:
|
|
438
|
-
model_response.content += function_call_response.content # type: ignore
|
|
439
|
-
|
|
440
|
-
# Add a function call for each successful execution
|
|
441
|
-
function_call_count += len(function_call_results)
|
|
442
|
-
|
|
443
|
-
# Format and add results to messages
|
|
444
|
-
self.format_function_call_results(
|
|
445
|
-
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
446
|
-
)
|
|
892
|
+
]:
|
|
893
|
+
if function_call_response.content:
|
|
894
|
+
model_response.content += function_call_response.content # type: ignore
|
|
447
895
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
896
|
+
# Add a function call for each successful execution
|
|
897
|
+
function_call_count += len(function_call_results)
|
|
898
|
+
|
|
899
|
+
# Format and add results to messages
|
|
900
|
+
self.format_function_call_results(
|
|
451
901
|
messages=messages,
|
|
452
902
|
function_call_results=function_call_results,
|
|
453
|
-
|
|
903
|
+
compress_tool_results=_compress_tool_results,
|
|
904
|
+
**model_response.extra or {},
|
|
454
905
|
)
|
|
455
906
|
|
|
456
|
-
|
|
457
|
-
|
|
907
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
908
|
+
# Handle function call media
|
|
909
|
+
self._handle_function_call_media(
|
|
910
|
+
messages=messages,
|
|
911
|
+
function_call_results=function_call_results,
|
|
912
|
+
send_media_to_model=send_media_to_model,
|
|
913
|
+
)
|
|
458
914
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
break
|
|
915
|
+
for function_call_result in function_call_results:
|
|
916
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
462
917
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
918
|
+
# Check if we should stop after tool calls
|
|
919
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
920
|
+
break
|
|
466
921
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
922
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
923
|
+
if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
|
|
924
|
+
break
|
|
470
925
|
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
926
|
+
# If we have any tool calls that require external execution, break the loop
|
|
927
|
+
if any(tc.external_execution_required for tc in model_response.tool_executions or []):
|
|
928
|
+
break
|
|
474
929
|
|
|
475
|
-
|
|
476
|
-
|
|
930
|
+
# If we have any tool calls that require user input, break the loop
|
|
931
|
+
if any(tc.requires_user_input for tc in model_response.tool_executions or []):
|
|
932
|
+
break
|
|
477
933
|
|
|
478
|
-
|
|
479
|
-
|
|
934
|
+
# Continue loop to get next response
|
|
935
|
+
continue
|
|
936
|
+
|
|
937
|
+
# No tool calls or finished processing them
|
|
938
|
+
break
|
|
939
|
+
|
|
940
|
+
log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
|
|
941
|
+
|
|
942
|
+
# Save to cache if enabled
|
|
943
|
+
if self.cache_response:
|
|
944
|
+
self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
|
|
945
|
+
finally:
|
|
946
|
+
# Close the Gemini client
|
|
947
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
948
|
+
try:
|
|
949
|
+
await self.client.aio.aclose() # type: ignore
|
|
950
|
+
self.client = None
|
|
951
|
+
except AttributeError:
|
|
952
|
+
log_warning(
|
|
953
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
954
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
955
|
+
)
|
|
480
956
|
|
|
481
|
-
log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
|
|
482
957
|
return model_response
|
|
483
958
|
|
|
484
959
|
def _process_model_response(
|
|
@@ -489,7 +964,8 @@ class Model(ABC):
|
|
|
489
964
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
490
965
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
491
966
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
492
|
-
run_response: Optional[RunOutput] = None,
|
|
967
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
968
|
+
compress_tool_results: bool = False,
|
|
493
969
|
) -> None:
|
|
494
970
|
"""
|
|
495
971
|
Process a single model response and return the assistant message and whether to continue.
|
|
@@ -497,14 +973,15 @@ class Model(ABC):
|
|
|
497
973
|
Returns:
|
|
498
974
|
Tuple[Message, bool]: (assistant_message, should_continue)
|
|
499
975
|
"""
|
|
500
|
-
# Generate response
|
|
501
|
-
provider_response = self.
|
|
976
|
+
# Generate response with retry logic for ModelProviderError
|
|
977
|
+
provider_response = self._invoke_with_retry(
|
|
502
978
|
assistant_message=assistant_message,
|
|
503
979
|
messages=messages,
|
|
504
980
|
response_format=response_format,
|
|
505
981
|
tools=tools,
|
|
506
982
|
tool_choice=tool_choice or self._tool_choice,
|
|
507
983
|
run_response=run_response,
|
|
984
|
+
compress_tool_results=compress_tool_results,
|
|
508
985
|
)
|
|
509
986
|
|
|
510
987
|
# Populate the assistant message
|
|
@@ -533,6 +1010,8 @@ class Model(ABC):
|
|
|
533
1010
|
if model_response.extra is None:
|
|
534
1011
|
model_response.extra = {}
|
|
535
1012
|
model_response.extra.update(provider_response.extra)
|
|
1013
|
+
if provider_response.provider_data is not None:
|
|
1014
|
+
model_response.provider_data = provider_response.provider_data
|
|
536
1015
|
|
|
537
1016
|
async def _aprocess_model_response(
|
|
538
1017
|
self,
|
|
@@ -542,7 +1021,8 @@ class Model(ABC):
|
|
|
542
1021
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
543
1022
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
544
1023
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
545
|
-
run_response: Optional[RunOutput] = None,
|
|
1024
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1025
|
+
compress_tool_results: bool = False,
|
|
546
1026
|
) -> None:
|
|
547
1027
|
"""
|
|
548
1028
|
Process a single async model response and return the assistant message and whether to continue.
|
|
@@ -550,14 +1030,15 @@ class Model(ABC):
|
|
|
550
1030
|
Returns:
|
|
551
1031
|
Tuple[Message, bool]: (assistant_message, should_continue)
|
|
552
1032
|
"""
|
|
553
|
-
# Generate response
|
|
554
|
-
provider_response = await self.
|
|
1033
|
+
# Generate response with retry logic for ModelProviderError
|
|
1034
|
+
provider_response = await self._ainvoke_with_retry(
|
|
555
1035
|
messages=messages,
|
|
556
1036
|
response_format=response_format,
|
|
557
1037
|
tools=tools,
|
|
558
1038
|
tool_choice=tool_choice or self._tool_choice,
|
|
559
1039
|
assistant_message=assistant_message,
|
|
560
1040
|
run_response=run_response,
|
|
1041
|
+
compress_tool_results=compress_tool_results,
|
|
561
1042
|
)
|
|
562
1043
|
|
|
563
1044
|
# Populate the assistant message
|
|
@@ -586,6 +1067,8 @@ class Model(ABC):
|
|
|
586
1067
|
if model_response.extra is None:
|
|
587
1068
|
model_response.extra = {}
|
|
588
1069
|
model_response.extra.update(provider_response.extra)
|
|
1070
|
+
if provider_response.provider_data is not None:
|
|
1071
|
+
model_response.provider_data = provider_response.provider_data
|
|
589
1072
|
|
|
590
1073
|
def _populate_assistant_message(
|
|
591
1074
|
self,
|
|
@@ -602,7 +1085,6 @@ class Model(ABC):
|
|
|
602
1085
|
Returns:
|
|
603
1086
|
Message: The populated assistant message
|
|
604
1087
|
"""
|
|
605
|
-
# Add role to assistant message
|
|
606
1088
|
if provider_response.role is not None:
|
|
607
1089
|
assistant_message.role = provider_response.role
|
|
608
1090
|
|
|
@@ -666,165 +1148,218 @@ class Model(ABC):
|
|
|
666
1148
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
667
1149
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
668
1150
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
669
|
-
run_response: Optional[RunOutput] = None,
|
|
1151
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1152
|
+
compress_tool_results: bool = False,
|
|
670
1153
|
) -> Iterator[ModelResponse]:
|
|
671
1154
|
"""
|
|
672
|
-
Process a streaming response from the model.
|
|
1155
|
+
Process a streaming response from the model with retry logic for ModelProviderError.
|
|
673
1156
|
"""
|
|
674
1157
|
|
|
675
|
-
for response_delta in self.
|
|
1158
|
+
for response_delta in self._invoke_stream_with_retry(
|
|
676
1159
|
messages=messages,
|
|
677
1160
|
assistant_message=assistant_message,
|
|
678
1161
|
response_format=response_format,
|
|
679
1162
|
tools=tools,
|
|
680
1163
|
tool_choice=tool_choice or self._tool_choice,
|
|
681
1164
|
run_response=run_response,
|
|
1165
|
+
compress_tool_results=compress_tool_results,
|
|
682
1166
|
):
|
|
683
|
-
|
|
1167
|
+
for model_response_delta in self._populate_stream_data(
|
|
684
1168
|
stream_data=stream_data,
|
|
685
|
-
assistant_message=assistant_message,
|
|
686
1169
|
model_response_delta=response_delta,
|
|
687
|
-
)
|
|
1170
|
+
):
|
|
1171
|
+
yield model_response_delta
|
|
688
1172
|
|
|
689
|
-
#
|
|
690
|
-
self.
|
|
1173
|
+
# Populate assistant message from stream data after the stream ends
|
|
1174
|
+
self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
|
|
691
1175
|
|
|
692
1176
|
def response_stream(
|
|
693
1177
|
self,
|
|
694
1178
|
messages: List[Message],
|
|
695
1179
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
696
|
-
tools: Optional[List[
|
|
697
|
-
functions: Optional[Dict[str, Function]] = None,
|
|
1180
|
+
tools: Optional[List[Union[Function, dict]]] = None,
|
|
698
1181
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
699
1182
|
tool_call_limit: Optional[int] = None,
|
|
700
1183
|
stream_model_response: bool = True,
|
|
701
|
-
run_response: Optional[RunOutput] = None,
|
|
1184
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
702
1185
|
send_media_to_model: bool = True,
|
|
1186
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
703
1187
|
) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
704
1188
|
"""
|
|
705
1189
|
Generate a streaming response from the model.
|
|
706
1190
|
"""
|
|
1191
|
+
try:
|
|
1192
|
+
# Check cache if enabled - capture key BEFORE streaming to avoid mismatch
|
|
1193
|
+
cache_key = None
|
|
1194
|
+
if self.cache_response:
|
|
1195
|
+
cache_key = self._get_model_cache_key(
|
|
1196
|
+
messages, stream=True, response_format=response_format, tools=tools
|
|
1197
|
+
)
|
|
1198
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
707
1199
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
1200
|
+
if cached_data:
|
|
1201
|
+
log_info("Cache hit for streaming model response")
|
|
1202
|
+
# Yield cached responses
|
|
1203
|
+
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
1204
|
+
yield response
|
|
1205
|
+
return
|
|
711
1206
|
|
|
712
|
-
|
|
1207
|
+
log_info("Cache miss for streaming model response")
|
|
713
1208
|
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
# Create assistant message and stream data
|
|
717
|
-
stream_data = MessageData()
|
|
718
|
-
model_response = ModelResponse()
|
|
719
|
-
if stream_model_response:
|
|
720
|
-
# Generate response
|
|
721
|
-
yield from self.process_response_stream(
|
|
722
|
-
messages=messages,
|
|
723
|
-
assistant_message=assistant_message,
|
|
724
|
-
stream_data=stream_data,
|
|
725
|
-
response_format=response_format,
|
|
726
|
-
tools=tools,
|
|
727
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
728
|
-
run_response=run_response,
|
|
729
|
-
)
|
|
1209
|
+
# Track streaming responses for caching
|
|
1210
|
+
streaming_responses: List[ModelResponse] = []
|
|
730
1211
|
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
if stream_data.response_reasoning_content:
|
|
735
|
-
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
736
|
-
if stream_data.response_redacted_reasoning_content:
|
|
737
|
-
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
738
|
-
if stream_data.response_provider_data:
|
|
739
|
-
assistant_message.provider_data = stream_data.response_provider_data
|
|
740
|
-
if stream_data.response_citations:
|
|
741
|
-
assistant_message.citations = stream_data.response_citations
|
|
742
|
-
if stream_data.response_audio:
|
|
743
|
-
assistant_message.audio_output = stream_data.response_audio
|
|
744
|
-
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
745
|
-
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
1212
|
+
log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
|
|
1213
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
1214
|
+
_log_messages(messages)
|
|
746
1215
|
|
|
747
|
-
else
|
|
748
|
-
|
|
749
|
-
messages=messages,
|
|
750
|
-
assistant_message=assistant_message,
|
|
751
|
-
model_response=model_response,
|
|
752
|
-
response_format=response_format,
|
|
753
|
-
tools=tools,
|
|
754
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
755
|
-
)
|
|
756
|
-
yield model_response
|
|
1216
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
1217
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
757
1218
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
assistant_message.log(metrics=True)
|
|
1219
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
1220
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
761
1221
|
|
|
762
|
-
|
|
763
|
-
if assistant_message.tool_calls is not None:
|
|
764
|
-
# Prepare function calls
|
|
765
|
-
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
766
|
-
assistant_message, messages, functions
|
|
767
|
-
)
|
|
768
|
-
function_call_results: List[Message] = []
|
|
769
|
-
|
|
770
|
-
# Execute function calls
|
|
771
|
-
for function_call_response in self.run_function_calls(
|
|
772
|
-
function_calls=function_calls_to_run,
|
|
773
|
-
function_call_results=function_call_results,
|
|
774
|
-
current_function_call_count=function_call_count,
|
|
775
|
-
function_call_limit=tool_call_limit,
|
|
776
|
-
):
|
|
777
|
-
yield function_call_response
|
|
1222
|
+
function_call_count = 0
|
|
778
1223
|
|
|
779
|
-
|
|
780
|
-
|
|
1224
|
+
while True:
|
|
1225
|
+
# Compress existing tool results BEFORE invoke
|
|
1226
|
+
if _compression_manager is not None and _compression_manager.should_compress(
|
|
1227
|
+
messages, tools, model=self, response_format=response_format
|
|
1228
|
+
):
|
|
1229
|
+
_compression_manager.compress(messages)
|
|
1230
|
+
|
|
1231
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
1232
|
+
# Create assistant message and stream data
|
|
1233
|
+
stream_data = MessageData()
|
|
1234
|
+
model_response = ModelResponse()
|
|
1235
|
+
if stream_model_response:
|
|
1236
|
+
# Generate response
|
|
1237
|
+
for response in self.process_response_stream(
|
|
1238
|
+
messages=messages,
|
|
1239
|
+
assistant_message=assistant_message,
|
|
1240
|
+
stream_data=stream_data,
|
|
1241
|
+
response_format=response_format,
|
|
1242
|
+
tools=_tool_dicts,
|
|
1243
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1244
|
+
run_response=run_response,
|
|
1245
|
+
compress_tool_results=_compress_tool_results,
|
|
1246
|
+
):
|
|
1247
|
+
if self.cache_response and isinstance(response, ModelResponse):
|
|
1248
|
+
streaming_responses.append(response)
|
|
1249
|
+
yield response
|
|
781
1250
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
1251
|
+
else:
|
|
1252
|
+
self._process_model_response(
|
|
1253
|
+
messages=messages,
|
|
1254
|
+
assistant_message=assistant_message,
|
|
1255
|
+
model_response=model_response,
|
|
1256
|
+
response_format=response_format,
|
|
1257
|
+
tools=_tool_dicts,
|
|
1258
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1259
|
+
run_response=run_response,
|
|
1260
|
+
compress_tool_results=_compress_tool_results,
|
|
786
1261
|
)
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
1262
|
+
if self.cache_response:
|
|
1263
|
+
streaming_responses.append(model_response)
|
|
1264
|
+
yield model_response
|
|
1265
|
+
|
|
1266
|
+
# Add assistant message to messages
|
|
1267
|
+
messages.append(assistant_message)
|
|
1268
|
+
assistant_message.log(metrics=True)
|
|
1269
|
+
|
|
1270
|
+
# Handle tool calls if present
|
|
1271
|
+
if assistant_message.tool_calls is not None:
|
|
1272
|
+
# Prepare function calls
|
|
1273
|
+
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1274
|
+
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
790
1275
|
)
|
|
791
|
-
|
|
792
|
-
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
1276
|
+
function_call_results: List[Message] = []
|
|
793
1277
|
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
messages=messages,
|
|
1278
|
+
# Execute function calls
|
|
1279
|
+
for function_call_response in self.run_function_calls(
|
|
1280
|
+
function_calls=function_calls_to_run,
|
|
798
1281
|
function_call_results=function_call_results,
|
|
799
|
-
|
|
800
|
-
|
|
1282
|
+
current_function_call_count=function_call_count,
|
|
1283
|
+
function_call_limit=tool_call_limit,
|
|
1284
|
+
):
|
|
1285
|
+
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
1286
|
+
streaming_responses.append(function_call_response)
|
|
1287
|
+
yield function_call_response
|
|
1288
|
+
|
|
1289
|
+
# Add a function call for each successful execution
|
|
1290
|
+
function_call_count += len(function_call_results)
|
|
1291
|
+
|
|
1292
|
+
# Format and add results to messages
|
|
1293
|
+
if stream_data and stream_data.extra is not None:
|
|
1294
|
+
self.format_function_call_results(
|
|
1295
|
+
messages=messages,
|
|
1296
|
+
function_call_results=function_call_results,
|
|
1297
|
+
compress_tool_results=_compress_tool_results,
|
|
1298
|
+
**stream_data.extra,
|
|
1299
|
+
)
|
|
1300
|
+
elif model_response and model_response.extra is not None:
|
|
1301
|
+
self.format_function_call_results(
|
|
1302
|
+
messages=messages,
|
|
1303
|
+
function_call_results=function_call_results,
|
|
1304
|
+
compress_tool_results=_compress_tool_results,
|
|
1305
|
+
**model_response.extra,
|
|
1306
|
+
)
|
|
1307
|
+
else:
|
|
1308
|
+
self.format_function_call_results(
|
|
1309
|
+
messages=messages,
|
|
1310
|
+
function_call_results=function_call_results,
|
|
1311
|
+
compress_tool_results=_compress_tool_results,
|
|
1312
|
+
)
|
|
801
1313
|
|
|
802
|
-
|
|
803
|
-
|
|
1314
|
+
# Handle function call media
|
|
1315
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
1316
|
+
self._handle_function_call_media(
|
|
1317
|
+
messages=messages,
|
|
1318
|
+
function_call_results=function_call_results,
|
|
1319
|
+
send_media_to_model=send_media_to_model,
|
|
1320
|
+
)
|
|
804
1321
|
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
break
|
|
1322
|
+
for function_call_result in function_call_results:
|
|
1323
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
808
1324
|
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
1325
|
+
# Check if we should stop after tool calls
|
|
1326
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
1327
|
+
break
|
|
812
1328
|
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
1329
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
1330
|
+
if any(fc.function.requires_confirmation for fc in function_calls_to_run):
|
|
1331
|
+
break
|
|
816
1332
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1333
|
+
# If we have any tool calls that require external execution, break the loop
|
|
1334
|
+
if any(fc.function.external_execution for fc in function_calls_to_run):
|
|
1335
|
+
break
|
|
820
1336
|
|
|
821
|
-
|
|
822
|
-
|
|
1337
|
+
# If we have any tool calls that require user input, break the loop
|
|
1338
|
+
if any(fc.function.requires_user_input for fc in function_calls_to_run):
|
|
1339
|
+
break
|
|
823
1340
|
|
|
824
|
-
|
|
825
|
-
|
|
1341
|
+
# Continue loop to get next response
|
|
1342
|
+
continue
|
|
826
1343
|
|
|
827
|
-
|
|
1344
|
+
# No tool calls or finished processing them
|
|
1345
|
+
break
|
|
1346
|
+
|
|
1347
|
+
log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
|
|
1348
|
+
|
|
1349
|
+
# Save streaming responses to cache if enabled
|
|
1350
|
+
if self.cache_response and cache_key and streaming_responses:
|
|
1351
|
+
self._save_streaming_responses_to_cache(cache_key, streaming_responses)
|
|
1352
|
+
finally:
|
|
1353
|
+
# Close the Gemini client
|
|
1354
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
1355
|
+
try:
|
|
1356
|
+
self.client.close() # type: ignore
|
|
1357
|
+
self.client = None
|
|
1358
|
+
except AttributeError:
|
|
1359
|
+
log_warning(
|
|
1360
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
1361
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
1362
|
+
)
|
|
828
1363
|
|
|
829
1364
|
async def aprocess_response_stream(
|
|
830
1365
|
self,
|
|
@@ -834,175 +1369,264 @@ class Model(ABC):
|
|
|
834
1369
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
835
1370
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
836
1371
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
837
|
-
run_response: Optional[RunOutput] = None,
|
|
1372
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1373
|
+
compress_tool_results: bool = False,
|
|
838
1374
|
) -> AsyncIterator[ModelResponse]:
|
|
839
1375
|
"""
|
|
840
|
-
Process a streaming response from the model.
|
|
1376
|
+
Process a streaming response from the model with retry logic for ModelProviderError.
|
|
841
1377
|
"""
|
|
842
|
-
async for response_delta in self.
|
|
1378
|
+
async for response_delta in self._ainvoke_stream_with_retry(
|
|
843
1379
|
messages=messages,
|
|
844
1380
|
assistant_message=assistant_message,
|
|
845
1381
|
response_format=response_format,
|
|
846
1382
|
tools=tools,
|
|
847
1383
|
tool_choice=tool_choice or self._tool_choice,
|
|
848
1384
|
run_response=run_response,
|
|
849
|
-
|
|
850
|
-
|
|
1385
|
+
compress_tool_results=compress_tool_results,
|
|
1386
|
+
):
|
|
1387
|
+
for model_response_delta in self._populate_stream_data(
|
|
851
1388
|
stream_data=stream_data,
|
|
852
|
-
assistant_message=assistant_message,
|
|
853
1389
|
model_response_delta=response_delta,
|
|
854
1390
|
):
|
|
855
|
-
yield
|
|
1391
|
+
yield model_response_delta
|
|
856
1392
|
|
|
857
|
-
# Populate the
|
|
858
|
-
self.
|
|
1393
|
+
# Populate assistant message from stream data after the stream ends
|
|
1394
|
+
self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
|
|
859
1395
|
|
|
860
1396
|
async def aresponse_stream(
|
|
861
1397
|
self,
|
|
862
1398
|
messages: List[Message],
|
|
863
1399
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
864
|
-
tools: Optional[List[
|
|
865
|
-
functions: Optional[Dict[str, Function]] = None,
|
|
1400
|
+
tools: Optional[List[Union[Function, dict]]] = None,
|
|
866
1401
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
867
1402
|
tool_call_limit: Optional[int] = None,
|
|
868
1403
|
stream_model_response: bool = True,
|
|
869
|
-
run_response: Optional[RunOutput] = None,
|
|
1404
|
+
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
870
1405
|
send_media_to_model: bool = True,
|
|
1406
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
871
1407
|
) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
872
1408
|
"""
|
|
873
1409
|
Generate an asynchronous streaming response from the model.
|
|
874
1410
|
"""
|
|
1411
|
+
try:
|
|
1412
|
+
# Check cache if enabled - capture key BEFORE streaming to avoid mismatch
|
|
1413
|
+
cache_key = None
|
|
1414
|
+
if self.cache_response:
|
|
1415
|
+
cache_key = self._get_model_cache_key(
|
|
1416
|
+
messages, stream=True, response_format=response_format, tools=tools
|
|
1417
|
+
)
|
|
1418
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
875
1419
|
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
1420
|
+
if cached_data:
|
|
1421
|
+
log_info("Cache hit for async streaming model response")
|
|
1422
|
+
# Yield cached responses
|
|
1423
|
+
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
1424
|
+
yield response
|
|
1425
|
+
return
|
|
879
1426
|
|
|
880
|
-
|
|
1427
|
+
log_info("Cache miss for async streaming model response")
|
|
881
1428
|
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
assistant_message = Message(role=self.assistant_message_role)
|
|
885
|
-
stream_data = MessageData()
|
|
886
|
-
model_response = ModelResponse()
|
|
887
|
-
if stream_model_response:
|
|
888
|
-
# Generate response
|
|
889
|
-
async for model_response in self.aprocess_response_stream(
|
|
890
|
-
messages=messages,
|
|
891
|
-
assistant_message=assistant_message,
|
|
892
|
-
stream_data=stream_data,
|
|
893
|
-
response_format=response_format,
|
|
894
|
-
tools=tools,
|
|
895
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
896
|
-
run_response=run_response,
|
|
897
|
-
):
|
|
898
|
-
yield model_response
|
|
1429
|
+
# Track streaming responses for caching
|
|
1430
|
+
streaming_responses: List[ModelResponse] = []
|
|
899
1431
|
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
if stream_data.response_reasoning_content:
|
|
904
|
-
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
905
|
-
if stream_data.response_redacted_reasoning_content:
|
|
906
|
-
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
907
|
-
if stream_data.response_provider_data:
|
|
908
|
-
assistant_message.provider_data = stream_data.response_provider_data
|
|
909
|
-
if stream_data.response_audio:
|
|
910
|
-
assistant_message.audio_output = stream_data.response_audio
|
|
911
|
-
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
912
|
-
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
1432
|
+
log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
|
|
1433
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
1434
|
+
_log_messages(messages)
|
|
913
1435
|
|
|
914
|
-
else
|
|
915
|
-
|
|
916
|
-
messages=messages,
|
|
917
|
-
assistant_message=assistant_message,
|
|
918
|
-
model_response=model_response,
|
|
919
|
-
response_format=response_format,
|
|
920
|
-
tools=tools,
|
|
921
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
922
|
-
run_response=run_response,
|
|
923
|
-
)
|
|
924
|
-
yield model_response
|
|
1436
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
1437
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
925
1438
|
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
assistant_message.log(metrics=True)
|
|
1439
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
1440
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
929
1441
|
|
|
930
|
-
|
|
931
|
-
if assistant_message.tool_calls is not None:
|
|
932
|
-
# Prepare function calls
|
|
933
|
-
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
934
|
-
assistant_message, messages, functions
|
|
935
|
-
)
|
|
936
|
-
function_call_results: List[Message] = []
|
|
937
|
-
|
|
938
|
-
# Execute function calls
|
|
939
|
-
async for function_call_response in self.arun_function_calls(
|
|
940
|
-
function_calls=function_calls_to_run,
|
|
941
|
-
function_call_results=function_call_results,
|
|
942
|
-
current_function_call_count=function_call_count,
|
|
943
|
-
function_call_limit=tool_call_limit,
|
|
944
|
-
):
|
|
945
|
-
yield function_call_response
|
|
1442
|
+
function_call_count = 0
|
|
946
1443
|
|
|
947
|
-
|
|
948
|
-
|
|
1444
|
+
while True:
|
|
1445
|
+
# Compress existing tool results BEFORE making API call to avoid context overflow
|
|
1446
|
+
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
1447
|
+
messages, tools, model=self, response_format=response_format
|
|
1448
|
+
):
|
|
1449
|
+
await _compression_manager.acompress(messages)
|
|
1450
|
+
|
|
1451
|
+
# Create assistant message and stream data
|
|
1452
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
1453
|
+
stream_data = MessageData()
|
|
1454
|
+
model_response = ModelResponse()
|
|
1455
|
+
if stream_model_response:
|
|
1456
|
+
# Generate response
|
|
1457
|
+
async for model_response in self.aprocess_response_stream(
|
|
1458
|
+
messages=messages,
|
|
1459
|
+
assistant_message=assistant_message,
|
|
1460
|
+
stream_data=stream_data,
|
|
1461
|
+
response_format=response_format,
|
|
1462
|
+
tools=_tool_dicts,
|
|
1463
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1464
|
+
run_response=run_response,
|
|
1465
|
+
compress_tool_results=_compress_tool_results,
|
|
1466
|
+
):
|
|
1467
|
+
if self.cache_response and isinstance(model_response, ModelResponse):
|
|
1468
|
+
streaming_responses.append(model_response)
|
|
1469
|
+
yield model_response
|
|
949
1470
|
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
1471
|
+
else:
|
|
1472
|
+
await self._aprocess_model_response(
|
|
1473
|
+
messages=messages,
|
|
1474
|
+
assistant_message=assistant_message,
|
|
1475
|
+
model_response=model_response,
|
|
1476
|
+
response_format=response_format,
|
|
1477
|
+
tools=_tool_dicts,
|
|
1478
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1479
|
+
run_response=run_response,
|
|
1480
|
+
compress_tool_results=_compress_tool_results,
|
|
954
1481
|
)
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
1482
|
+
if self.cache_response:
|
|
1483
|
+
streaming_responses.append(model_response)
|
|
1484
|
+
yield model_response
|
|
1485
|
+
|
|
1486
|
+
# Add assistant message to messages
|
|
1487
|
+
messages.append(assistant_message)
|
|
1488
|
+
assistant_message.log(metrics=True)
|
|
1489
|
+
|
|
1490
|
+
# Handle tool calls if present
|
|
1491
|
+
if assistant_message.tool_calls is not None:
|
|
1492
|
+
# Prepare function calls
|
|
1493
|
+
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1494
|
+
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
958
1495
|
)
|
|
959
|
-
|
|
960
|
-
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
1496
|
+
function_call_results: List[Message] = []
|
|
961
1497
|
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
messages=messages,
|
|
1498
|
+
# Execute function calls
|
|
1499
|
+
async for function_call_response in self.arun_function_calls(
|
|
1500
|
+
function_calls=function_calls_to_run,
|
|
966
1501
|
function_call_results=function_call_results,
|
|
967
|
-
|
|
968
|
-
|
|
1502
|
+
current_function_call_count=function_call_count,
|
|
1503
|
+
function_call_limit=tool_call_limit,
|
|
1504
|
+
):
|
|
1505
|
+
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
1506
|
+
streaming_responses.append(function_call_response)
|
|
1507
|
+
yield function_call_response
|
|
1508
|
+
|
|
1509
|
+
# Add a function call for each successful execution
|
|
1510
|
+
function_call_count += len(function_call_results)
|
|
1511
|
+
|
|
1512
|
+
# Format and add results to messages
|
|
1513
|
+
if stream_data and stream_data.extra is not None:
|
|
1514
|
+
self.format_function_call_results(
|
|
1515
|
+
messages=messages,
|
|
1516
|
+
function_call_results=function_call_results,
|
|
1517
|
+
compress_tool_results=_compress_tool_results,
|
|
1518
|
+
**stream_data.extra,
|
|
1519
|
+
)
|
|
1520
|
+
elif model_response and model_response.extra is not None:
|
|
1521
|
+
self.format_function_call_results(
|
|
1522
|
+
messages=messages,
|
|
1523
|
+
function_call_results=function_call_results,
|
|
1524
|
+
compress_tool_results=_compress_tool_results,
|
|
1525
|
+
**model_response.extra or {},
|
|
1526
|
+
)
|
|
1527
|
+
else:
|
|
1528
|
+
self.format_function_call_results(
|
|
1529
|
+
messages=messages,
|
|
1530
|
+
function_call_results=function_call_results,
|
|
1531
|
+
compress_tool_results=_compress_tool_results,
|
|
1532
|
+
)
|
|
969
1533
|
|
|
970
|
-
|
|
971
|
-
|
|
1534
|
+
# Handle function call media
|
|
1535
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
1536
|
+
self._handle_function_call_media(
|
|
1537
|
+
messages=messages,
|
|
1538
|
+
function_call_results=function_call_results,
|
|
1539
|
+
send_media_to_model=send_media_to_model,
|
|
1540
|
+
)
|
|
972
1541
|
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
break
|
|
1542
|
+
for function_call_result in function_call_results:
|
|
1543
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
976
1544
|
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
1545
|
+
# Check if we should stop after tool calls
|
|
1546
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
1547
|
+
break
|
|
980
1548
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1549
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
1550
|
+
if any(fc.function.requires_confirmation for fc in function_calls_to_run):
|
|
1551
|
+
break
|
|
984
1552
|
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1553
|
+
# If we have any tool calls that require external execution, break the loop
|
|
1554
|
+
if any(fc.function.external_execution for fc in function_calls_to_run):
|
|
1555
|
+
break
|
|
988
1556
|
|
|
989
|
-
|
|
990
|
-
|
|
1557
|
+
# If we have any tool calls that require user input, break the loop
|
|
1558
|
+
if any(fc.function.requires_user_input for fc in function_calls_to_run):
|
|
1559
|
+
break
|
|
1560
|
+
|
|
1561
|
+
# Continue loop to get next response
|
|
1562
|
+
continue
|
|
1563
|
+
|
|
1564
|
+
# No tool calls or finished processing them
|
|
1565
|
+
break
|
|
1566
|
+
|
|
1567
|
+
log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
|
|
991
1568
|
|
|
992
|
-
#
|
|
993
|
-
|
|
1569
|
+
# Save streaming responses to cache if enabled
|
|
1570
|
+
if self.cache_response and cache_key and streaming_responses:
|
|
1571
|
+
self._save_streaming_responses_to_cache(cache_key, streaming_responses)
|
|
994
1572
|
|
|
995
|
-
|
|
1573
|
+
finally:
|
|
1574
|
+
# Close the Gemini client
|
|
1575
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
1576
|
+
try:
|
|
1577
|
+
await self.client.aio.aclose() # type: ignore
|
|
1578
|
+
self.client = None
|
|
1579
|
+
except AttributeError:
|
|
1580
|
+
log_warning(
|
|
1581
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
1582
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
1583
|
+
)
|
|
996
1584
|
|
|
997
|
-
def
|
|
998
|
-
self,
|
|
1585
|
+
def _populate_assistant_message_from_stream_data(
|
|
1586
|
+
self, assistant_message: Message, stream_data: MessageData
|
|
1587
|
+
) -> None:
|
|
1588
|
+
"""
|
|
1589
|
+
Populate an assistant message with the stream data.
|
|
1590
|
+
"""
|
|
1591
|
+
if stream_data.response_role is not None:
|
|
1592
|
+
assistant_message.role = stream_data.response_role
|
|
1593
|
+
if stream_data.response_metrics is not None:
|
|
1594
|
+
assistant_message.metrics = stream_data.response_metrics
|
|
1595
|
+
if stream_data.response_content:
|
|
1596
|
+
assistant_message.content = stream_data.response_content
|
|
1597
|
+
if stream_data.response_reasoning_content:
|
|
1598
|
+
assistant_message.reasoning_content = stream_data.response_reasoning_content
|
|
1599
|
+
if stream_data.response_redacted_reasoning_content:
|
|
1600
|
+
assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
|
|
1601
|
+
if stream_data.response_provider_data:
|
|
1602
|
+
assistant_message.provider_data = stream_data.response_provider_data
|
|
1603
|
+
if stream_data.response_citations:
|
|
1604
|
+
assistant_message.citations = stream_data.response_citations
|
|
1605
|
+
if stream_data.response_audio:
|
|
1606
|
+
assistant_message.audio_output = stream_data.response_audio
|
|
1607
|
+
if stream_data.response_image:
|
|
1608
|
+
assistant_message.image_output = stream_data.response_image
|
|
1609
|
+
if stream_data.response_video:
|
|
1610
|
+
assistant_message.video_output = stream_data.response_video
|
|
1611
|
+
if stream_data.response_file:
|
|
1612
|
+
assistant_message.file_output = stream_data.response_file
|
|
1613
|
+
if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
|
|
1614
|
+
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
1615
|
+
|
|
1616
|
+
def _populate_stream_data(
|
|
1617
|
+
self, stream_data: MessageData, model_response_delta: ModelResponse
|
|
999
1618
|
) -> Iterator[ModelResponse]:
|
|
1000
1619
|
"""Update the stream data and assistant message with the model response."""
|
|
1001
|
-
# Add role to assistant message
|
|
1002
|
-
if model_response_delta.role is not None:
|
|
1003
|
-
assistant_message.role = model_response_delta.role
|
|
1004
1620
|
|
|
1005
1621
|
should_yield = False
|
|
1622
|
+
if model_response_delta.role is not None:
|
|
1623
|
+
stream_data.response_role = model_response_delta.role # type: ignore
|
|
1624
|
+
|
|
1625
|
+
if model_response_delta.response_usage is not None:
|
|
1626
|
+
if stream_data.response_metrics is None:
|
|
1627
|
+
stream_data.response_metrics = Metrics()
|
|
1628
|
+
stream_data.response_metrics += model_response_delta.response_usage
|
|
1629
|
+
|
|
1006
1630
|
# Update stream_data content
|
|
1007
1631
|
if model_response_delta.content is not None:
|
|
1008
1632
|
stream_data.response_content += model_response_delta.content
|
|
@@ -1147,12 +1771,14 @@ class Model(ABC):
|
|
|
1147
1771
|
images = None
|
|
1148
1772
|
videos = None
|
|
1149
1773
|
audios = None
|
|
1774
|
+
files = None
|
|
1150
1775
|
|
|
1151
1776
|
if success and function_execution_result:
|
|
1152
1777
|
# With unified classes, no conversion needed - use directly
|
|
1153
1778
|
images = function_execution_result.images
|
|
1154
1779
|
videos = function_execution_result.videos
|
|
1155
1780
|
audios = function_execution_result.audios
|
|
1781
|
+
files = function_execution_result.files
|
|
1156
1782
|
|
|
1157
1783
|
return Message(
|
|
1158
1784
|
role=self.tool_message_role,
|
|
@@ -1165,6 +1791,7 @@ class Model(ABC):
|
|
|
1165
1791
|
images=images,
|
|
1166
1792
|
videos=videos,
|
|
1167
1793
|
audio=audios,
|
|
1794
|
+
files=files,
|
|
1168
1795
|
**kwargs, # type: ignore
|
|
1169
1796
|
)
|
|
1170
1797
|
|
|
@@ -1202,11 +1829,15 @@ class Model(ABC):
|
|
|
1202
1829
|
|
|
1203
1830
|
# Run function calls sequentially
|
|
1204
1831
|
function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
|
|
1832
|
+
stop_after_tool_call_from_exception = False
|
|
1205
1833
|
try:
|
|
1206
1834
|
function_execution_result = function_call.execute()
|
|
1207
1835
|
except AgentRunException as a_exc:
|
|
1208
1836
|
# Update additional messages from function call
|
|
1209
1837
|
_handle_agent_exception(a_exc, additional_input)
|
|
1838
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
1839
|
+
if a_exc.stop_execution:
|
|
1840
|
+
stop_after_tool_call_from_exception = True
|
|
1210
1841
|
# Set function call success to False if an exception occurred
|
|
1211
1842
|
except Exception as e:
|
|
1212
1843
|
log_error(f"Error executing function {function_call.function.name}: {e}")
|
|
@@ -1221,32 +1852,60 @@ class Model(ABC):
|
|
|
1221
1852
|
function_call_output: str = ""
|
|
1222
1853
|
|
|
1223
1854
|
if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1855
|
+
try:
|
|
1856
|
+
for item in function_execution_result.result:
|
|
1857
|
+
# This function yields agent/team/workflow run events
|
|
1858
|
+
if (
|
|
1859
|
+
isinstance(item, tuple(get_args(RunOutputEvent)))
|
|
1860
|
+
or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
|
|
1861
|
+
or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
|
|
1862
|
+
):
|
|
1863
|
+
# We only capture content events for output accumulation
|
|
1864
|
+
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
1865
|
+
if item.content is not None and isinstance(item.content, BaseModel):
|
|
1866
|
+
function_call_output += item.content.model_dump_json()
|
|
1867
|
+
else:
|
|
1868
|
+
# Capture output
|
|
1869
|
+
function_call_output += item.content or ""
|
|
1236
1870
|
|
|
1237
|
-
|
|
1238
|
-
|
|
1871
|
+
if function_call.function.show_result and item.content is not None:
|
|
1872
|
+
yield ModelResponse(content=item.content)
|
|
1239
1873
|
|
|
1240
1874
|
if isinstance(item, CustomEvent):
|
|
1241
1875
|
function_call_output += str(item)
|
|
1242
1876
|
|
|
1243
|
-
|
|
1244
|
-
|
|
1877
|
+
# For WorkflowCompletedEvent, extract content for final output
|
|
1878
|
+
from agno.run.workflow import WorkflowCompletedEvent
|
|
1245
1879
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1880
|
+
if isinstance(item, WorkflowCompletedEvent):
|
|
1881
|
+
if item.content is not None:
|
|
1882
|
+
if isinstance(item.content, BaseModel):
|
|
1883
|
+
function_call_output += item.content.model_dump_json()
|
|
1884
|
+
else:
|
|
1885
|
+
function_call_output += str(item.content)
|
|
1886
|
+
|
|
1887
|
+
# Yield the event itself to bubble it up
|
|
1888
|
+
yield item
|
|
1889
|
+
|
|
1890
|
+
else:
|
|
1891
|
+
function_call_output += str(item)
|
|
1892
|
+
if function_call.function.show_result and item is not None:
|
|
1893
|
+
yield ModelResponse(content=str(item))
|
|
1894
|
+
except Exception as e:
|
|
1895
|
+
log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
|
|
1896
|
+
function_call.error = str(e)
|
|
1897
|
+
function_call_success = False
|
|
1898
|
+
|
|
1899
|
+
# For generators, re-capture updated_session_state after consumption
|
|
1900
|
+
# since session_state modifications were made during iteration
|
|
1901
|
+
if function_execution_result.updated_session_state is None:
|
|
1902
|
+
if (
|
|
1903
|
+
function_call.function._run_context is not None
|
|
1904
|
+
and function_call.function._run_context.session_state is not None
|
|
1905
|
+
):
|
|
1906
|
+
function_execution_result.updated_session_state = function_call.function._run_context.session_state
|
|
1907
|
+
elif function_call.function._session_state is not None:
|
|
1908
|
+
function_execution_result.updated_session_state = function_call.function._session_state
|
|
1250
1909
|
else:
|
|
1251
1910
|
from agno.tools.function import ToolResult
|
|
1252
1911
|
|
|
@@ -1267,7 +1926,7 @@ class Model(ABC):
|
|
|
1267
1926
|
else:
|
|
1268
1927
|
function_call_output = str(function_execution_result.result) if function_execution_result.result else ""
|
|
1269
1928
|
|
|
1270
|
-
if function_call.function.show_result:
|
|
1929
|
+
if function_call.function.show_result and function_call_output is not None:
|
|
1271
1930
|
yield ModelResponse(content=function_call_output)
|
|
1272
1931
|
|
|
1273
1932
|
# Create and yield function call result
|
|
@@ -1278,6 +1937,9 @@ class Model(ABC):
|
|
|
1278
1937
|
timer=function_call_timer,
|
|
1279
1938
|
function_execution_result=function_execution_result,
|
|
1280
1939
|
)
|
|
1940
|
+
# Override stop_after_tool_call if set by exception
|
|
1941
|
+
if stop_after_tool_call_from_exception:
|
|
1942
|
+
function_call_result.stop_after_tool_call = True
|
|
1281
1943
|
yield ModelResponse(
|
|
1282
1944
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1283
1945
|
tool_executions=[
|
|
@@ -1325,7 +1987,7 @@ class Model(ABC):
|
|
|
1325
1987
|
|
|
1326
1988
|
paused_tool_executions = []
|
|
1327
1989
|
|
|
1328
|
-
# The function
|
|
1990
|
+
# The function requires user confirmation (HITL)
|
|
1329
1991
|
if fc.function.requires_confirmation:
|
|
1330
1992
|
paused_tool_executions.append(
|
|
1331
1993
|
ToolExecution(
|
|
@@ -1335,7 +1997,8 @@ class Model(ABC):
|
|
|
1335
1997
|
requires_confirmation=True,
|
|
1336
1998
|
)
|
|
1337
1999
|
)
|
|
1338
|
-
|
|
2000
|
+
|
|
2001
|
+
# The function requires user input (HITL)
|
|
1339
2002
|
if fc.function.requires_user_input:
|
|
1340
2003
|
user_input_schema = fc.function.user_input_schema
|
|
1341
2004
|
if fc.arguments and user_input_schema:
|
|
@@ -1353,7 +2016,8 @@ class Model(ABC):
|
|
|
1353
2016
|
user_input_schema=user_input_schema,
|
|
1354
2017
|
)
|
|
1355
2018
|
)
|
|
1356
|
-
|
|
2019
|
+
|
|
2020
|
+
# If the function is from the user control flow (HITL) tools, we handle it here
|
|
1357
2021
|
if fc.function.name == "get_user_input" and fc.arguments and fc.arguments.get("user_input_fields"):
|
|
1358
2022
|
user_input_schema = []
|
|
1359
2023
|
for input_field in fc.arguments.get("user_input_fields", []):
|
|
@@ -1379,7 +2043,8 @@ class Model(ABC):
|
|
|
1379
2043
|
user_input_schema=user_input_schema,
|
|
1380
2044
|
)
|
|
1381
2045
|
)
|
|
1382
|
-
|
|
2046
|
+
|
|
2047
|
+
# The function requires external execution (HITL)
|
|
1383
2048
|
if fc.function.external_execution:
|
|
1384
2049
|
paused_tool_executions.append(
|
|
1385
2050
|
ToolExecution(
|
|
@@ -1416,6 +2081,7 @@ class Model(ABC):
|
|
|
1416
2081
|
function_call_timer = Timer()
|
|
1417
2082
|
function_call_timer.start()
|
|
1418
2083
|
success: Union[bool, AgentRunException] = False
|
|
2084
|
+
result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
|
|
1419
2085
|
|
|
1420
2086
|
try:
|
|
1421
2087
|
if (
|
|
@@ -1610,9 +2276,12 @@ class Model(ABC):
|
|
|
1610
2276
|
|
|
1611
2277
|
try:
|
|
1612
2278
|
async for item in function_call.result:
|
|
1613
|
-
# This function yields agent/team run events
|
|
1614
|
-
if isinstance(
|
|
1615
|
-
item,
|
|
2279
|
+
# This function yields agent/team/workflow run events
|
|
2280
|
+
if isinstance(
|
|
2281
|
+
item,
|
|
2282
|
+
tuple(get_args(RunOutputEvent))
|
|
2283
|
+
+ tuple(get_args(TeamRunOutputEvent))
|
|
2284
|
+
+ tuple(get_args(WorkflowRunOutputEvent)),
|
|
1616
2285
|
):
|
|
1617
2286
|
# We only capture content events
|
|
1618
2287
|
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
@@ -1622,12 +2291,22 @@ class Model(ABC):
|
|
|
1622
2291
|
# Capture output
|
|
1623
2292
|
function_call_output += item.content or ""
|
|
1624
2293
|
|
|
1625
|
-
if function_call.function.show_result:
|
|
2294
|
+
if function_call.function.show_result and item.content is not None:
|
|
1626
2295
|
await event_queue.put(ModelResponse(content=item.content))
|
|
1627
2296
|
continue
|
|
1628
2297
|
|
|
1629
|
-
|
|
1630
|
-
|
|
2298
|
+
if isinstance(item, CustomEvent):
|
|
2299
|
+
function_call_output += str(item)
|
|
2300
|
+
|
|
2301
|
+
# For WorkflowCompletedEvent, extract content for final output
|
|
2302
|
+
from agno.run.workflow import WorkflowCompletedEvent
|
|
2303
|
+
|
|
2304
|
+
if isinstance(item, WorkflowCompletedEvent):
|
|
2305
|
+
if item.content is not None:
|
|
2306
|
+
if isinstance(item.content, BaseModel):
|
|
2307
|
+
function_call_output += item.content.model_dump_json()
|
|
2308
|
+
else:
|
|
2309
|
+
function_call_output += str(item.content)
|
|
1631
2310
|
|
|
1632
2311
|
# Put the event into the queue to be yielded
|
|
1633
2312
|
await event_queue.put(item)
|
|
@@ -1635,7 +2314,7 @@ class Model(ABC):
|
|
|
1635
2314
|
# Yield custom events emitted by the tool
|
|
1636
2315
|
else:
|
|
1637
2316
|
function_call_output += str(item)
|
|
1638
|
-
if function_call.function.show_result:
|
|
2317
|
+
if function_call.function.show_result and item is not None:
|
|
1639
2318
|
await event_queue.put(ModelResponse(content=str(item)))
|
|
1640
2319
|
|
|
1641
2320
|
# Store the final output for this generator
|
|
@@ -1703,10 +2382,14 @@ class Model(ABC):
|
|
|
1703
2382
|
updated_session_state = function_execution_result.updated_session_state
|
|
1704
2383
|
|
|
1705
2384
|
# Handle AgentRunException
|
|
2385
|
+
stop_after_tool_call_from_exception = False
|
|
1706
2386
|
if isinstance(function_call_success, AgentRunException):
|
|
1707
2387
|
a_exc = function_call_success
|
|
1708
2388
|
# Update additional messages from function call
|
|
1709
2389
|
_handle_agent_exception(a_exc, additional_input)
|
|
2390
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
2391
|
+
if a_exc.stop_execution:
|
|
2392
|
+
stop_after_tool_call_from_exception = True
|
|
1710
2393
|
# Set function call success to False if an exception occurred
|
|
1711
2394
|
function_call_success = False
|
|
1712
2395
|
|
|
@@ -1718,30 +2401,60 @@ class Model(ABC):
|
|
|
1718
2401
|
function_call_output = async_function_call_output
|
|
1719
2402
|
# Events from async generators were already yielded in real-time above
|
|
1720
2403
|
elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
2404
|
+
try:
|
|
2405
|
+
for item in function_call.result:
|
|
2406
|
+
# This function yields agent/team/workflow run events
|
|
2407
|
+
if isinstance(
|
|
2408
|
+
item,
|
|
2409
|
+
tuple(get_args(RunOutputEvent))
|
|
2410
|
+
+ tuple(get_args(TeamRunOutputEvent))
|
|
2411
|
+
+ tuple(get_args(WorkflowRunOutputEvent)),
|
|
2412
|
+
):
|
|
2413
|
+
# We only capture content events
|
|
2414
|
+
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
2415
|
+
if item.content is not None and isinstance(item.content, BaseModel):
|
|
2416
|
+
function_call_output += item.content.model_dump_json()
|
|
2417
|
+
else:
|
|
2418
|
+
# Capture output
|
|
2419
|
+
function_call_output += item.content or ""
|
|
2420
|
+
|
|
2421
|
+
if function_call.function.show_result and item.content is not None:
|
|
2422
|
+
yield ModelResponse(content=item.content)
|
|
2423
|
+
continue
|
|
2424
|
+
|
|
2425
|
+
# Yield the event itself to bubble it up
|
|
2426
|
+
yield item
|
|
2427
|
+
else:
|
|
2428
|
+
function_call_output += str(item)
|
|
2429
|
+
if function_call.function.show_result and item is not None:
|
|
2430
|
+
yield ModelResponse(content=str(item))
|
|
2431
|
+
except Exception as e:
|
|
2432
|
+
log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
|
|
2433
|
+
function_call.error = str(e)
|
|
2434
|
+
function_call_success = False
|
|
2435
|
+
|
|
2436
|
+
# For generators (sync or async), re-capture updated_session_state after consumption
|
|
2437
|
+
# since session_state modifications were made during iteration
|
|
2438
|
+
if async_function_call_output is not None or isinstance(
|
|
2439
|
+
function_call.result,
|
|
2440
|
+
(GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
|
|
2441
|
+
):
|
|
2442
|
+
if updated_session_state is None:
|
|
2443
|
+
if (
|
|
2444
|
+
function_call.function._run_context is not None
|
|
2445
|
+
and function_call.function._run_context.session_state is not None
|
|
1725
2446
|
):
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
# Yield the event itself to bubble it up
|
|
1739
|
-
yield item
|
|
1740
|
-
else:
|
|
1741
|
-
function_call_output += str(item)
|
|
1742
|
-
if function_call.function.show_result:
|
|
1743
|
-
yield ModelResponse(content=str(item))
|
|
1744
|
-
else:
|
|
2447
|
+
updated_session_state = function_call.function._run_context.session_state
|
|
2448
|
+
elif function_call.function._session_state is not None:
|
|
2449
|
+
updated_session_state = function_call.function._session_state
|
|
2450
|
+
|
|
2451
|
+
if not (
|
|
2452
|
+
async_function_call_output is not None
|
|
2453
|
+
or isinstance(
|
|
2454
|
+
function_call.result,
|
|
2455
|
+
(GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
|
|
2456
|
+
)
|
|
2457
|
+
):
|
|
1745
2458
|
from agno.tools.function import ToolResult
|
|
1746
2459
|
|
|
1747
2460
|
if isinstance(function_execution_result.result, ToolResult):
|
|
@@ -1759,7 +2472,7 @@ class Model(ABC):
|
|
|
1759
2472
|
else:
|
|
1760
2473
|
function_call_output = str(function_call.result)
|
|
1761
2474
|
|
|
1762
|
-
if function_call.function.show_result:
|
|
2475
|
+
if function_call.function.show_result and function_call_output is not None:
|
|
1763
2476
|
yield ModelResponse(content=function_call_output)
|
|
1764
2477
|
|
|
1765
2478
|
# Create and yield function call result
|
|
@@ -1770,6 +2483,9 @@ class Model(ABC):
|
|
|
1770
2483
|
timer=function_call_timer,
|
|
1771
2484
|
function_execution_result=function_execution_result,
|
|
1772
2485
|
)
|
|
2486
|
+
# Override stop_after_tool_call if set by exception
|
|
2487
|
+
if stop_after_tool_call_from_exception:
|
|
2488
|
+
function_call_result.stop_after_tool_call = True
|
|
1773
2489
|
yield ModelResponse(
|
|
1774
2490
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1775
2491
|
tool_executions=[
|
|
@@ -1814,12 +2530,16 @@ class Model(ABC):
|
|
|
1814
2530
|
model_response.tool_calls = []
|
|
1815
2531
|
|
|
1816
2532
|
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1817
|
-
assistant_message, messages, functions
|
|
2533
|
+
assistant_message=assistant_message, messages=messages, functions=functions
|
|
1818
2534
|
)
|
|
1819
2535
|
return function_calls_to_run
|
|
1820
2536
|
|
|
1821
2537
|
def format_function_call_results(
|
|
1822
|
-
self,
|
|
2538
|
+
self,
|
|
2539
|
+
messages: List[Message],
|
|
2540
|
+
function_call_results: List[Message],
|
|
2541
|
+
compress_tool_results: bool = False,
|
|
2542
|
+
**kwargs,
|
|
1823
2543
|
) -> None:
|
|
1824
2544
|
"""
|
|
1825
2545
|
Format function call results.
|
|
@@ -1896,10 +2616,14 @@ class Model(ABC):
|
|
|
1896
2616
|
new_model = cls.__new__(cls)
|
|
1897
2617
|
memo[id(self)] = new_model
|
|
1898
2618
|
|
|
1899
|
-
# Deep copy all attributes
|
|
2619
|
+
# Deep copy all attributes except client objects
|
|
1900
2620
|
for k, v in self.__dict__.items():
|
|
1901
2621
|
if k in {"response_format", "_tools", "_functions"}:
|
|
1902
2622
|
continue
|
|
2623
|
+
# Skip client objects
|
|
2624
|
+
if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
|
|
2625
|
+
setattr(new_model, k, None)
|
|
2626
|
+
continue
|
|
1903
2627
|
try:
|
|
1904
2628
|
setattr(new_model, k, deepcopy(v, memo))
|
|
1905
2629
|
except Exception:
|