agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
agno/models/base.py
CHANGED
|
@@ -5,9 +5,10 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from hashlib import md5
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from time import time
|
|
8
|
+
from time import sleep, time
|
|
9
9
|
from types import AsyncGeneratorType, GeneratorType
|
|
10
10
|
from typing import (
|
|
11
|
+
TYPE_CHECKING,
|
|
11
12
|
Any,
|
|
12
13
|
AsyncIterator,
|
|
13
14
|
Dict,
|
|
@@ -15,21 +16,26 @@ from typing import (
|
|
|
15
16
|
List,
|
|
16
17
|
Literal,
|
|
17
18
|
Optional,
|
|
19
|
+
Sequence,
|
|
18
20
|
Tuple,
|
|
19
21
|
Type,
|
|
20
22
|
Union,
|
|
21
23
|
get_args,
|
|
22
24
|
)
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from agno.compression.manager import CompressionManager
|
|
23
28
|
from uuid import uuid4
|
|
24
29
|
|
|
25
30
|
from pydantic import BaseModel
|
|
26
31
|
|
|
27
|
-
from agno.exceptions import AgentRunException
|
|
32
|
+
from agno.exceptions import AgentRunException, ModelProviderError, RetryableModelProviderError
|
|
28
33
|
from agno.media import Audio, File, Image, Video
|
|
29
34
|
from agno.models.message import Citations, Message
|
|
30
35
|
from agno.models.metrics import Metrics
|
|
31
36
|
from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
|
|
32
37
|
from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
|
|
38
|
+
from agno.run.requirement import RunRequirement
|
|
33
39
|
from agno.run.team import RunContentEvent as TeamRunContentEvent
|
|
34
40
|
from agno.run.team import TeamRunOutput, TeamRunOutputEvent
|
|
35
41
|
from agno.run.workflow import WorkflowRunOutputEvent
|
|
@@ -145,15 +151,284 @@ class Model(ABC):
|
|
|
145
151
|
cache_ttl: Optional[int] = None
|
|
146
152
|
cache_dir: Optional[str] = None
|
|
147
153
|
|
|
154
|
+
# Retry configuration for model provider errors
|
|
155
|
+
# Number of retries to attempt when a ModelProviderError occurs
|
|
156
|
+
retries: int = 0
|
|
157
|
+
# Delay between retries (in seconds)
|
|
158
|
+
delay_between_retries: int = 1
|
|
159
|
+
# Exponential backoff: if True, the delay between retries is doubled each time
|
|
160
|
+
exponential_backoff: bool = False
|
|
161
|
+
# Enable retrying a model invocation once with a guidance message.
|
|
162
|
+
# This is useful for known errors avoidable with extra instructions.
|
|
163
|
+
retry_with_guidance: bool = True
|
|
164
|
+
# Set the number of times to retry the model invocation with guidance.
|
|
165
|
+
retry_with_guidance_limit: int = 1
|
|
166
|
+
|
|
148
167
|
def __post_init__(self):
|
|
149
168
|
if self.provider is None and self.name is not None:
|
|
150
169
|
self.provider = f"{self.name} ({self.id})"
|
|
151
170
|
|
|
171
|
+
def _get_retry_delay(self, attempt: int) -> float:
|
|
172
|
+
"""Calculate the delay before the next retry attempt."""
|
|
173
|
+
if self.exponential_backoff:
|
|
174
|
+
return self.delay_between_retries * (2**attempt)
|
|
175
|
+
return self.delay_between_retries
|
|
176
|
+
|
|
177
|
+
def _is_retryable_error(self, error: ModelProviderError) -> bool:
|
|
178
|
+
"""Determine if an error is worth retrying.
|
|
179
|
+
|
|
180
|
+
Non-retryable errors include:
|
|
181
|
+
- Client errors (400, 401, 403, 413, 422) that won't change on retry
|
|
182
|
+
- Context window/token limit exceeded errors
|
|
183
|
+
- Payload too large errors
|
|
184
|
+
|
|
185
|
+
Retryable errors include:
|
|
186
|
+
- Rate limit errors (429)
|
|
187
|
+
- Server errors (500, 502, 503, 504)
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
error: The ModelProviderError to evaluate.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
True if the error is transient and worth retrying, False otherwise.
|
|
194
|
+
"""
|
|
195
|
+
# Non-retryable status codes (client errors that won't change)
|
|
196
|
+
non_retryable_codes = {400, 401, 403, 404, 413, 422}
|
|
197
|
+
if error.status_code in non_retryable_codes:
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
# Non-retryable error message patterns (context/token limits)
|
|
201
|
+
non_retryable_patterns = [
|
|
202
|
+
"context_length_exceeded",
|
|
203
|
+
"context window",
|
|
204
|
+
"maximum context length",
|
|
205
|
+
"token limit",
|
|
206
|
+
"max_tokens",
|
|
207
|
+
"too many tokens",
|
|
208
|
+
"payload too large",
|
|
209
|
+
"content_too_large",
|
|
210
|
+
"request too large",
|
|
211
|
+
"input too long",
|
|
212
|
+
"exceeds the model",
|
|
213
|
+
]
|
|
214
|
+
error_msg = str(error.message).lower()
|
|
215
|
+
if any(pattern in error_msg for pattern in non_retryable_patterns):
|
|
216
|
+
return False
|
|
217
|
+
|
|
218
|
+
return True
|
|
219
|
+
|
|
220
|
+
def _invoke_with_retry(self, **kwargs) -> ModelResponse:
|
|
221
|
+
"""
|
|
222
|
+
Invoke the model with retry logic for ModelProviderError.
|
|
223
|
+
|
|
224
|
+
This method wraps the invoke() call and retries on ModelProviderError
|
|
225
|
+
with optional exponential backoff.
|
|
226
|
+
"""
|
|
227
|
+
last_exception: Optional[ModelProviderError] = None
|
|
228
|
+
|
|
229
|
+
for attempt in range(self.retries + 1):
|
|
230
|
+
try:
|
|
231
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
232
|
+
return self.invoke(**kwargs)
|
|
233
|
+
except ModelProviderError as e:
|
|
234
|
+
last_exception = e
|
|
235
|
+
# Check if error is non-retryable
|
|
236
|
+
if not self._is_retryable_error(e):
|
|
237
|
+
log_error(f"Non-retryable model provider error: {e}")
|
|
238
|
+
raise
|
|
239
|
+
if attempt < self.retries:
|
|
240
|
+
delay = self._get_retry_delay(attempt)
|
|
241
|
+
log_warning(
|
|
242
|
+
f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
|
|
243
|
+
)
|
|
244
|
+
sleep(delay)
|
|
245
|
+
else:
|
|
246
|
+
if self.retries > 0:
|
|
247
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
248
|
+
except RetryableModelProviderError as e:
|
|
249
|
+
current_count = retries_with_guidance_count
|
|
250
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
251
|
+
raise ModelProviderError(
|
|
252
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
253
|
+
model_name=self.name,
|
|
254
|
+
model_id=self.id,
|
|
255
|
+
)
|
|
256
|
+
kwargs.pop("retry_with_guidance", None)
|
|
257
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
258
|
+
|
|
259
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
260
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
261
|
+
|
|
262
|
+
return self._invoke_with_retry(**kwargs, retry_with_guidance=True)
|
|
263
|
+
|
|
264
|
+
# If we've exhausted all retries, raise the last exception
|
|
265
|
+
raise last_exception # type: ignore
|
|
266
|
+
|
|
267
|
+
async def _ainvoke_with_retry(self, **kwargs) -> ModelResponse:
|
|
268
|
+
"""
|
|
269
|
+
Asynchronously invoke the model with retry logic for ModelProviderError.
|
|
270
|
+
|
|
271
|
+
This method wraps the ainvoke() call and retries on ModelProviderError
|
|
272
|
+
with optional exponential backoff.
|
|
273
|
+
"""
|
|
274
|
+
last_exception: Optional[ModelProviderError] = None
|
|
275
|
+
|
|
276
|
+
for attempt in range(self.retries + 1):
|
|
277
|
+
try:
|
|
278
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
279
|
+
return await self.ainvoke(**kwargs)
|
|
280
|
+
except ModelProviderError as e:
|
|
281
|
+
last_exception = e
|
|
282
|
+
# Check if error is non-retryable
|
|
283
|
+
if not self._is_retryable_error(e):
|
|
284
|
+
log_error(f"Non-retryable model provider error: {e}")
|
|
285
|
+
raise
|
|
286
|
+
if attempt < self.retries:
|
|
287
|
+
delay = self._get_retry_delay(attempt)
|
|
288
|
+
log_warning(
|
|
289
|
+
f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
|
|
290
|
+
)
|
|
291
|
+
await asyncio.sleep(delay)
|
|
292
|
+
else:
|
|
293
|
+
if self.retries > 0:
|
|
294
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
295
|
+
except RetryableModelProviderError as e:
|
|
296
|
+
current_count = retries_with_guidance_count
|
|
297
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
298
|
+
raise ModelProviderError(
|
|
299
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
300
|
+
model_name=self.name,
|
|
301
|
+
model_id=self.id,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
kwargs.pop("retry_with_guidance", None)
|
|
305
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
306
|
+
|
|
307
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
308
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
309
|
+
|
|
310
|
+
return await self._ainvoke_with_retry(**kwargs, retry_with_guidance=True)
|
|
311
|
+
|
|
312
|
+
# If we've exhausted all retries, raise the last exception
|
|
313
|
+
raise last_exception # type: ignore
|
|
314
|
+
|
|
315
|
+
def _invoke_stream_with_retry(self, **kwargs) -> Iterator[ModelResponse]:
|
|
316
|
+
"""
|
|
317
|
+
Invoke the model stream with retry logic for ModelProviderError.
|
|
318
|
+
|
|
319
|
+
This method wraps the invoke_stream() call and retries on ModelProviderError
|
|
320
|
+
with optional exponential backoff. Note that retries restart the entire stream.
|
|
321
|
+
"""
|
|
322
|
+
last_exception: Optional[ModelProviderError] = None
|
|
323
|
+
|
|
324
|
+
for attempt in range(self.retries + 1):
|
|
325
|
+
try:
|
|
326
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
327
|
+
yield from self.invoke_stream(**kwargs)
|
|
328
|
+
return # Success, exit the retry loop
|
|
329
|
+
except ModelProviderError as e:
|
|
330
|
+
last_exception = e
|
|
331
|
+
# Check if error is non-retryable (e.g., context window exceeded, auth errors)
|
|
332
|
+
if not self._is_retryable_error(e):
|
|
333
|
+
log_error(f"Non-retryable model provider error: {e}")
|
|
334
|
+
raise
|
|
335
|
+
if attempt < self.retries:
|
|
336
|
+
delay = self._get_retry_delay(attempt)
|
|
337
|
+
log_warning(
|
|
338
|
+
f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
|
|
339
|
+
f"Retrying in {delay}s..."
|
|
340
|
+
)
|
|
341
|
+
sleep(delay)
|
|
342
|
+
else:
|
|
343
|
+
if self.retries > 0:
|
|
344
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
345
|
+
except RetryableModelProviderError as e:
|
|
346
|
+
current_count = retries_with_guidance_count
|
|
347
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
348
|
+
raise ModelProviderError(
|
|
349
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
350
|
+
model_name=self.name,
|
|
351
|
+
model_id=self.id,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
kwargs.pop("retry_with_guidance", None)
|
|
355
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
356
|
+
|
|
357
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
358
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
359
|
+
|
|
360
|
+
yield from self._invoke_stream_with_retry(**kwargs, retry_with_guidance=True)
|
|
361
|
+
return # Success, exit after regeneration
|
|
362
|
+
|
|
363
|
+
# If we've exhausted all retries, raise the last exception
|
|
364
|
+
raise last_exception # type: ignore
|
|
365
|
+
|
|
366
|
+
async def _ainvoke_stream_with_retry(self, **kwargs) -> AsyncIterator[ModelResponse]:
|
|
367
|
+
"""
|
|
368
|
+
Asynchronously invoke the model stream with retry logic for ModelProviderError.
|
|
369
|
+
|
|
370
|
+
This method wraps the ainvoke_stream() call and retries on ModelProviderError
|
|
371
|
+
with optional exponential backoff. Note that retries restart the entire stream.
|
|
372
|
+
"""
|
|
373
|
+
last_exception: Optional[ModelProviderError] = None
|
|
374
|
+
|
|
375
|
+
for attempt in range(self.retries + 1):
|
|
376
|
+
try:
|
|
377
|
+
retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
|
|
378
|
+
async for response in self.ainvoke_stream(**kwargs):
|
|
379
|
+
yield response
|
|
380
|
+
return # Success, exit the retry loop
|
|
381
|
+
except ModelProviderError as e:
|
|
382
|
+
last_exception = e
|
|
383
|
+
# Check if error is non-retryable
|
|
384
|
+
if not self._is_retryable_error(e):
|
|
385
|
+
log_error(f"Non-retryable model provider error: {e}")
|
|
386
|
+
raise
|
|
387
|
+
if attempt < self.retries:
|
|
388
|
+
delay = self._get_retry_delay(attempt)
|
|
389
|
+
log_warning(
|
|
390
|
+
f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
|
|
391
|
+
f"Retrying in {delay}s..."
|
|
392
|
+
)
|
|
393
|
+
await asyncio.sleep(delay)
|
|
394
|
+
else:
|
|
395
|
+
if self.retries > 0:
|
|
396
|
+
log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
|
|
397
|
+
except RetryableModelProviderError as e:
|
|
398
|
+
current_count = retries_with_guidance_count
|
|
399
|
+
if current_count >= self.retry_with_guidance_limit:
|
|
400
|
+
raise ModelProviderError(
|
|
401
|
+
message=f"Max retries with guidance reached. Error: {e.original_error}",
|
|
402
|
+
model_name=self.name,
|
|
403
|
+
model_id=self.id,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
kwargs.pop("retry_with_guidance", None)
|
|
407
|
+
kwargs["retries_with_guidance_count"] = current_count + 1
|
|
408
|
+
|
|
409
|
+
# Append the guidance message to help the model avoid the error in the next invoke.
|
|
410
|
+
kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
|
|
411
|
+
|
|
412
|
+
async for response in self._ainvoke_stream_with_retry(**kwargs, retry_with_guidance=True):
|
|
413
|
+
yield response
|
|
414
|
+
return # Success, exit after regeneration
|
|
415
|
+
|
|
416
|
+
# If we've exhausted all retries, raise the last exception
|
|
417
|
+
raise last_exception # type: ignore
|
|
418
|
+
|
|
152
419
|
def to_dict(self) -> Dict[str, Any]:
|
|
153
420
|
fields = {"name", "id", "provider"}
|
|
154
421
|
_dict = {field: getattr(self, field) for field in fields if getattr(self, field) is not None}
|
|
155
422
|
return _dict
|
|
156
423
|
|
|
424
|
+
def _remove_temporary_messages(self, messages: List[Message]) -> None:
|
|
425
|
+
"""Remove temporary messages from the given list.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
messages: The list of messages to filter (modified in place).
|
|
429
|
+
"""
|
|
430
|
+
messages[:] = [m for m in messages if not m.temporary]
|
|
431
|
+
|
|
157
432
|
def get_provider(self) -> str:
|
|
158
433
|
return self.provider or self.name or self.__class__.__name__
|
|
159
434
|
|
|
@@ -303,6 +578,29 @@ class Model(ABC):
|
|
|
303
578
|
_tool_dicts.append(tool)
|
|
304
579
|
return _tool_dicts
|
|
305
580
|
|
|
581
|
+
def count_tokens(
|
|
582
|
+
self,
|
|
583
|
+
messages: List[Message],
|
|
584
|
+
tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
|
|
585
|
+
output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
586
|
+
) -> int:
|
|
587
|
+
from agno.utils.tokens import count_tokens
|
|
588
|
+
|
|
589
|
+
return count_tokens(
|
|
590
|
+
messages,
|
|
591
|
+
tools=list(tools) if tools else None,
|
|
592
|
+
model_id=self.id,
|
|
593
|
+
output_schema=output_schema,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
async def acount_tokens(
|
|
597
|
+
self,
|
|
598
|
+
messages: List[Message],
|
|
599
|
+
tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
|
|
600
|
+
output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
601
|
+
) -> int:
|
|
602
|
+
return self.count_tokens(messages, tools, output_schema=output_schema)
|
|
603
|
+
|
|
306
604
|
def response(
|
|
307
605
|
self,
|
|
308
606
|
messages: List[Message],
|
|
@@ -312,6 +610,7 @@ class Model(ABC):
|
|
|
312
610
|
tool_call_limit: Optional[int] = None,
|
|
313
611
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
314
612
|
send_media_to_model: bool = True,
|
|
613
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
315
614
|
) -> ModelResponse:
|
|
316
615
|
"""
|
|
317
616
|
Generate a response from the model.
|
|
@@ -325,155 +624,194 @@ class Model(ABC):
|
|
|
325
624
|
run_response: Run response to use
|
|
326
625
|
send_media_to_model: Whether to send media to the model
|
|
327
626
|
"""
|
|
627
|
+
try:
|
|
628
|
+
# Check cache if enabled
|
|
629
|
+
if self.cache_response:
|
|
630
|
+
cache_key = self._get_model_cache_key(
|
|
631
|
+
messages, stream=False, response_format=response_format, tools=tools
|
|
632
|
+
)
|
|
633
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
328
634
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
cached_data = self._get_cached_model_response(cache_key)
|
|
333
|
-
|
|
334
|
-
if cached_data:
|
|
335
|
-
log_info("Cache hit for model response")
|
|
336
|
-
return self._model_response_from_cache(cached_data)
|
|
337
|
-
|
|
338
|
-
log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
|
|
339
|
-
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
340
|
-
|
|
341
|
-
_log_messages(messages)
|
|
342
|
-
model_response = ModelResponse()
|
|
343
|
-
|
|
344
|
-
function_call_count = 0
|
|
345
|
-
|
|
346
|
-
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
347
|
-
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
348
|
-
|
|
349
|
-
while True:
|
|
350
|
-
# Get response from model
|
|
351
|
-
assistant_message = Message(role=self.assistant_message_role)
|
|
352
|
-
self._process_model_response(
|
|
353
|
-
messages=messages,
|
|
354
|
-
assistant_message=assistant_message,
|
|
355
|
-
model_response=model_response,
|
|
356
|
-
response_format=response_format,
|
|
357
|
-
tools=_tool_dicts,
|
|
358
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
359
|
-
run_response=run_response,
|
|
360
|
-
)
|
|
635
|
+
if cached_data:
|
|
636
|
+
log_info("Cache hit for model response")
|
|
637
|
+
return self._model_response_from_cache(cached_data)
|
|
361
638
|
|
|
362
|
-
|
|
363
|
-
|
|
639
|
+
log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
|
|
640
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
364
641
|
|
|
365
|
-
|
|
366
|
-
|
|
642
|
+
_log_messages(messages)
|
|
643
|
+
model_response = ModelResponse()
|
|
367
644
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
645
|
+
function_call_count = 0
|
|
646
|
+
|
|
647
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
648
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
649
|
+
|
|
650
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
651
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
652
|
+
|
|
653
|
+
while True:
|
|
654
|
+
# Compress tool results if compression is enabled and threshold is met
|
|
655
|
+
if _compression_manager is not None and _compression_manager.should_compress(
|
|
656
|
+
messages, tools, model=self, response_format=response_format
|
|
657
|
+
):
|
|
658
|
+
_compression_manager.compress(messages)
|
|
659
|
+
|
|
660
|
+
# Get response from model
|
|
661
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
662
|
+
self._process_model_response(
|
|
373
663
|
messages=messages,
|
|
664
|
+
assistant_message=assistant_message,
|
|
374
665
|
model_response=model_response,
|
|
375
|
-
|
|
666
|
+
response_format=response_format,
|
|
667
|
+
tools=_tool_dicts,
|
|
668
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
669
|
+
run_response=run_response,
|
|
670
|
+
compress_tool_results=_compress_tool_results,
|
|
376
671
|
)
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
if function_call_response
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
if
|
|
409
|
-
model_response.
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
function_call_response.
|
|
414
|
-
|
|
672
|
+
|
|
673
|
+
# Add assistant message to messages
|
|
674
|
+
messages.append(assistant_message)
|
|
675
|
+
|
|
676
|
+
# Log response and metrics
|
|
677
|
+
assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
678
|
+
|
|
679
|
+
# Handle tool calls if present
|
|
680
|
+
if assistant_message.tool_calls:
|
|
681
|
+
# Prepare function calls
|
|
682
|
+
function_calls_to_run = self._prepare_function_calls(
|
|
683
|
+
assistant_message=assistant_message,
|
|
684
|
+
messages=messages,
|
|
685
|
+
model_response=model_response,
|
|
686
|
+
functions=_functions,
|
|
687
|
+
)
|
|
688
|
+
function_call_results: List[Message] = []
|
|
689
|
+
|
|
690
|
+
# Execute function calls
|
|
691
|
+
for function_call_response in self.run_function_calls(
|
|
692
|
+
function_calls=function_calls_to_run,
|
|
693
|
+
function_call_results=function_call_results,
|
|
694
|
+
current_function_call_count=function_call_count,
|
|
695
|
+
function_call_limit=tool_call_limit,
|
|
696
|
+
):
|
|
697
|
+
if isinstance(function_call_response, ModelResponse):
|
|
698
|
+
# The session state is updated by the function call
|
|
699
|
+
if function_call_response.updated_session_state is not None:
|
|
700
|
+
model_response.updated_session_state = function_call_response.updated_session_state
|
|
701
|
+
|
|
702
|
+
# Media artifacts are generated by the function call
|
|
703
|
+
if function_call_response.images is not None:
|
|
704
|
+
if model_response.images is None:
|
|
705
|
+
model_response.images = []
|
|
706
|
+
model_response.images.extend(function_call_response.images)
|
|
707
|
+
|
|
708
|
+
if function_call_response.audios is not None:
|
|
709
|
+
if model_response.audios is None:
|
|
710
|
+
model_response.audios = []
|
|
711
|
+
model_response.audios.extend(function_call_response.audios)
|
|
712
|
+
|
|
713
|
+
if function_call_response.videos is not None:
|
|
714
|
+
if model_response.videos is None:
|
|
715
|
+
model_response.videos = []
|
|
716
|
+
model_response.videos.extend(function_call_response.videos)
|
|
717
|
+
|
|
718
|
+
if function_call_response.files is not None:
|
|
719
|
+
if model_response.files is None:
|
|
720
|
+
model_response.files = []
|
|
721
|
+
model_response.files.extend(function_call_response.files)
|
|
722
|
+
|
|
723
|
+
if (
|
|
724
|
+
function_call_response.event
|
|
725
|
+
in [
|
|
726
|
+
ModelResponseEvent.tool_call_completed.value,
|
|
727
|
+
ModelResponseEvent.tool_call_paused.value,
|
|
728
|
+
]
|
|
729
|
+
and function_call_response.tool_executions is not None
|
|
730
|
+
):
|
|
731
|
+
# Record the tool execution in the model response
|
|
732
|
+
if model_response.tool_executions is None:
|
|
733
|
+
model_response.tool_executions = []
|
|
734
|
+
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
735
|
+
|
|
736
|
+
# If the tool is currently paused (HITL flow), add the requirement to the run response
|
|
737
|
+
if (
|
|
738
|
+
function_call_response.event == ModelResponseEvent.tool_call_paused.value
|
|
739
|
+
and run_response is not None
|
|
740
|
+
):
|
|
741
|
+
current_tool_execution = function_call_response.tool_executions[-1]
|
|
742
|
+
if run_response.requirements is None:
|
|
743
|
+
run_response.requirements = []
|
|
744
|
+
run_response.requirements.append(
|
|
745
|
+
RunRequirement(tool_execution=current_tool_execution)
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
elif function_call_response.event not in [
|
|
749
|
+
ModelResponseEvent.tool_call_started.value,
|
|
415
750
|
ModelResponseEvent.tool_call_completed.value,
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
):
|
|
420
|
-
if model_response.tool_executions is None:
|
|
421
|
-
model_response.tool_executions = []
|
|
422
|
-
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
423
|
-
|
|
424
|
-
elif function_call_response.event not in [
|
|
425
|
-
ModelResponseEvent.tool_call_started.value,
|
|
426
|
-
ModelResponseEvent.tool_call_completed.value,
|
|
427
|
-
]:
|
|
428
|
-
if function_call_response.content:
|
|
429
|
-
model_response.content += function_call_response.content # type: ignore
|
|
430
|
-
|
|
431
|
-
# Add a function call for each successful execution
|
|
432
|
-
function_call_count += len(function_call_results)
|
|
433
|
-
|
|
434
|
-
# Format and add results to messages
|
|
435
|
-
self.format_function_call_results(
|
|
436
|
-
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
437
|
-
)
|
|
751
|
+
]:
|
|
752
|
+
if function_call_response.content:
|
|
753
|
+
model_response.content += function_call_response.content # type: ignore
|
|
438
754
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
755
|
+
# Add a function call for each successful execution
|
|
756
|
+
function_call_count += len(function_call_results)
|
|
757
|
+
|
|
758
|
+
# Format and add results to messages
|
|
759
|
+
self.format_function_call_results(
|
|
442
760
|
messages=messages,
|
|
443
761
|
function_call_results=function_call_results,
|
|
444
|
-
|
|
762
|
+
compress_tool_results=_compress_tool_results,
|
|
763
|
+
**model_response.extra or {},
|
|
445
764
|
)
|
|
446
765
|
|
|
447
|
-
|
|
448
|
-
|
|
766
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
767
|
+
# Handle function call media
|
|
768
|
+
self._handle_function_call_media(
|
|
769
|
+
messages=messages,
|
|
770
|
+
function_call_results=function_call_results,
|
|
771
|
+
send_media_to_model=send_media_to_model,
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
for function_call_result in function_call_results:
|
|
775
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
449
776
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
777
|
+
# Check if we should stop after tool calls
|
|
778
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
779
|
+
break
|
|
453
780
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
781
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
782
|
+
if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
|
|
783
|
+
break
|
|
457
784
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
785
|
+
# If we have any tool calls that require external execution, break the loop
|
|
786
|
+
if any(tc.external_execution_required for tc in model_response.tool_executions or []):
|
|
787
|
+
break
|
|
461
788
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
789
|
+
# If we have any tool calls that require user input, break the loop
|
|
790
|
+
if any(tc.requires_user_input for tc in model_response.tool_executions or []):
|
|
791
|
+
break
|
|
465
792
|
|
|
466
|
-
|
|
467
|
-
|
|
793
|
+
# Continue loop to get next response
|
|
794
|
+
continue
|
|
468
795
|
|
|
469
|
-
|
|
470
|
-
|
|
796
|
+
# No tool calls or finished processing them
|
|
797
|
+
break
|
|
471
798
|
|
|
472
|
-
|
|
799
|
+
log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
|
|
473
800
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
801
|
+
# Save to cache if enabled
|
|
802
|
+
if self.cache_response:
|
|
803
|
+
self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
|
|
804
|
+
finally:
|
|
805
|
+
# Close the Gemini client
|
|
806
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
|
|
807
|
+
try:
|
|
808
|
+
self.client.close() # type: ignore
|
|
809
|
+
self.client = None
|
|
810
|
+
except AttributeError:
|
|
811
|
+
log_warning(
|
|
812
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
813
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
814
|
+
)
|
|
477
815
|
|
|
478
816
|
return model_response
|
|
479
817
|
|
|
@@ -486,157 +824,198 @@ class Model(ABC):
|
|
|
486
824
|
tool_call_limit: Optional[int] = None,
|
|
487
825
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
488
826
|
send_media_to_model: bool = True,
|
|
827
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
489
828
|
) -> ModelResponse:
|
|
490
829
|
"""
|
|
491
830
|
Generate an asynchronous response from the model.
|
|
492
831
|
"""
|
|
493
832
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
return self._model_response_from_cache(cached_data)
|
|
502
|
-
|
|
503
|
-
log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
|
|
504
|
-
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
505
|
-
_log_messages(messages)
|
|
506
|
-
model_response = ModelResponse()
|
|
507
|
-
|
|
508
|
-
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
509
|
-
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
510
|
-
|
|
511
|
-
function_call_count = 0
|
|
512
|
-
|
|
513
|
-
while True:
|
|
514
|
-
# Get response from model
|
|
515
|
-
assistant_message = Message(role=self.assistant_message_role)
|
|
516
|
-
await self._aprocess_model_response(
|
|
517
|
-
messages=messages,
|
|
518
|
-
assistant_message=assistant_message,
|
|
519
|
-
model_response=model_response,
|
|
520
|
-
response_format=response_format,
|
|
521
|
-
tools=_tool_dicts,
|
|
522
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
523
|
-
run_response=run_response,
|
|
524
|
-
)
|
|
833
|
+
try:
|
|
834
|
+
# Check cache if enabled
|
|
835
|
+
if self.cache_response:
|
|
836
|
+
cache_key = self._get_model_cache_key(
|
|
837
|
+
messages, stream=False, response_format=response_format, tools=tools
|
|
838
|
+
)
|
|
839
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
525
840
|
|
|
526
|
-
|
|
527
|
-
|
|
841
|
+
if cached_data:
|
|
842
|
+
log_info("Cache hit for model response")
|
|
843
|
+
return self._model_response_from_cache(cached_data)
|
|
528
844
|
|
|
529
|
-
|
|
530
|
-
|
|
845
|
+
log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
|
|
846
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
847
|
+
_log_messages(messages)
|
|
848
|
+
model_response = ModelResponse()
|
|
531
849
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
850
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
851
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
852
|
+
|
|
853
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
854
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
855
|
+
|
|
856
|
+
function_call_count = 0
|
|
857
|
+
|
|
858
|
+
while True:
|
|
859
|
+
# Compress existing tool results BEFORE making API call to avoid context overflow
|
|
860
|
+
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
861
|
+
messages, tools, model=self, response_format=response_format
|
|
862
|
+
):
|
|
863
|
+
await _compression_manager.acompress(messages)
|
|
864
|
+
|
|
865
|
+
# Get response from model
|
|
866
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
867
|
+
await self._aprocess_model_response(
|
|
537
868
|
messages=messages,
|
|
869
|
+
assistant_message=assistant_message,
|
|
538
870
|
model_response=model_response,
|
|
539
|
-
|
|
871
|
+
response_format=response_format,
|
|
872
|
+
tools=_tool_dicts,
|
|
873
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
874
|
+
run_response=run_response,
|
|
875
|
+
compress_tool_results=_compress_tool_results,
|
|
540
876
|
)
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
if function_call_response
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
if
|
|
573
|
-
model_response.
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
function_call_response.
|
|
578
|
-
|
|
877
|
+
|
|
878
|
+
# Add assistant message to messages
|
|
879
|
+
messages.append(assistant_message)
|
|
880
|
+
|
|
881
|
+
# Log response and metrics
|
|
882
|
+
assistant_message.log(metrics=True)
|
|
883
|
+
|
|
884
|
+
# Handle tool calls if present
|
|
885
|
+
if assistant_message.tool_calls:
|
|
886
|
+
# Prepare function calls
|
|
887
|
+
function_calls_to_run = self._prepare_function_calls(
|
|
888
|
+
assistant_message=assistant_message,
|
|
889
|
+
messages=messages,
|
|
890
|
+
model_response=model_response,
|
|
891
|
+
functions=_functions,
|
|
892
|
+
)
|
|
893
|
+
function_call_results: List[Message] = []
|
|
894
|
+
|
|
895
|
+
# Execute function calls
|
|
896
|
+
async for function_call_response in self.arun_function_calls(
|
|
897
|
+
function_calls=function_calls_to_run,
|
|
898
|
+
function_call_results=function_call_results,
|
|
899
|
+
current_function_call_count=function_call_count,
|
|
900
|
+
function_call_limit=tool_call_limit,
|
|
901
|
+
):
|
|
902
|
+
if isinstance(function_call_response, ModelResponse):
|
|
903
|
+
# The session state is updated by the function call
|
|
904
|
+
if function_call_response.updated_session_state is not None:
|
|
905
|
+
model_response.updated_session_state = function_call_response.updated_session_state
|
|
906
|
+
|
|
907
|
+
# Media artifacts are generated by the function call
|
|
908
|
+
if function_call_response.images is not None:
|
|
909
|
+
if model_response.images is None:
|
|
910
|
+
model_response.images = []
|
|
911
|
+
model_response.images.extend(function_call_response.images)
|
|
912
|
+
|
|
913
|
+
if function_call_response.audios is not None:
|
|
914
|
+
if model_response.audios is None:
|
|
915
|
+
model_response.audios = []
|
|
916
|
+
model_response.audios.extend(function_call_response.audios)
|
|
917
|
+
|
|
918
|
+
if function_call_response.videos is not None:
|
|
919
|
+
if model_response.videos is None:
|
|
920
|
+
model_response.videos = []
|
|
921
|
+
model_response.videos.extend(function_call_response.videos)
|
|
922
|
+
|
|
923
|
+
if function_call_response.files is not None:
|
|
924
|
+
if model_response.files is None:
|
|
925
|
+
model_response.files = []
|
|
926
|
+
model_response.files.extend(function_call_response.files)
|
|
927
|
+
|
|
928
|
+
if (
|
|
929
|
+
function_call_response.event
|
|
930
|
+
in [
|
|
931
|
+
ModelResponseEvent.tool_call_completed.value,
|
|
932
|
+
ModelResponseEvent.tool_call_paused.value,
|
|
933
|
+
]
|
|
934
|
+
and function_call_response.tool_executions is not None
|
|
935
|
+
):
|
|
936
|
+
if model_response.tool_executions is None:
|
|
937
|
+
model_response.tool_executions = []
|
|
938
|
+
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
939
|
+
|
|
940
|
+
# If the tool is currently paused (HITL flow), add the requirement to the run response
|
|
941
|
+
if (
|
|
942
|
+
function_call_response.event == ModelResponseEvent.tool_call_paused.value
|
|
943
|
+
and run_response is not None
|
|
944
|
+
):
|
|
945
|
+
current_tool_execution = function_call_response.tool_executions[-1]
|
|
946
|
+
if run_response.requirements is None:
|
|
947
|
+
run_response.requirements = []
|
|
948
|
+
run_response.requirements.append(
|
|
949
|
+
RunRequirement(tool_execution=current_tool_execution)
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
elif function_call_response.event not in [
|
|
953
|
+
ModelResponseEvent.tool_call_started.value,
|
|
579
954
|
ModelResponseEvent.tool_call_completed.value,
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
):
|
|
584
|
-
if model_response.tool_executions is None:
|
|
585
|
-
model_response.tool_executions = []
|
|
586
|
-
model_response.tool_executions.extend(function_call_response.tool_executions)
|
|
587
|
-
elif function_call_response.event not in [
|
|
588
|
-
ModelResponseEvent.tool_call_started.value,
|
|
589
|
-
ModelResponseEvent.tool_call_completed.value,
|
|
590
|
-
]:
|
|
591
|
-
if function_call_response.content:
|
|
592
|
-
model_response.content += function_call_response.content # type: ignore
|
|
593
|
-
|
|
594
|
-
# Add a function call for each successful execution
|
|
595
|
-
function_call_count += len(function_call_results)
|
|
596
|
-
|
|
597
|
-
# Format and add results to messages
|
|
598
|
-
self.format_function_call_results(
|
|
599
|
-
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
600
|
-
)
|
|
955
|
+
]:
|
|
956
|
+
if function_call_response.content:
|
|
957
|
+
model_response.content += function_call_response.content # type: ignore
|
|
601
958
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
959
|
+
# Add a function call for each successful execution
|
|
960
|
+
function_call_count += len(function_call_results)
|
|
961
|
+
|
|
962
|
+
# Format and add results to messages
|
|
963
|
+
self.format_function_call_results(
|
|
605
964
|
messages=messages,
|
|
606
965
|
function_call_results=function_call_results,
|
|
607
|
-
|
|
966
|
+
compress_tool_results=_compress_tool_results,
|
|
967
|
+
**model_response.extra or {},
|
|
608
968
|
)
|
|
609
969
|
|
|
610
|
-
|
|
611
|
-
|
|
970
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
971
|
+
# Handle function call media
|
|
972
|
+
self._handle_function_call_media(
|
|
973
|
+
messages=messages,
|
|
974
|
+
function_call_results=function_call_results,
|
|
975
|
+
send_media_to_model=send_media_to_model,
|
|
976
|
+
)
|
|
612
977
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
break
|
|
978
|
+
for function_call_result in function_call_results:
|
|
979
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
616
980
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
981
|
+
# Check if we should stop after tool calls
|
|
982
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
983
|
+
break
|
|
620
984
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
985
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
986
|
+
if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
|
|
987
|
+
break
|
|
624
988
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
989
|
+
# If we have any tool calls that require external execution, break the loop
|
|
990
|
+
if any(tc.external_execution_required for tc in model_response.tool_executions or []):
|
|
991
|
+
break
|
|
628
992
|
|
|
629
|
-
|
|
630
|
-
|
|
993
|
+
# If we have any tool calls that require user input, break the loop
|
|
994
|
+
if any(tc.requires_user_input for tc in model_response.tool_executions or []):
|
|
995
|
+
break
|
|
631
996
|
|
|
632
|
-
|
|
633
|
-
|
|
997
|
+
# Continue loop to get next response
|
|
998
|
+
continue
|
|
634
999
|
|
|
635
|
-
|
|
1000
|
+
# No tool calls or finished processing them
|
|
1001
|
+
break
|
|
636
1002
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
1003
|
+
log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
|
|
1004
|
+
|
|
1005
|
+
# Save to cache if enabled
|
|
1006
|
+
if self.cache_response:
|
|
1007
|
+
self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
|
|
1008
|
+
finally:
|
|
1009
|
+
# Close the Gemini client
|
|
1010
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
1011
|
+
try:
|
|
1012
|
+
await self.client.aio.aclose() # type: ignore
|
|
1013
|
+
self.client = None
|
|
1014
|
+
except AttributeError:
|
|
1015
|
+
log_warning(
|
|
1016
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
1017
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
1018
|
+
)
|
|
640
1019
|
|
|
641
1020
|
return model_response
|
|
642
1021
|
|
|
@@ -649,6 +1028,7 @@ class Model(ABC):
|
|
|
649
1028
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
650
1029
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
651
1030
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1031
|
+
compress_tool_results: bool = False,
|
|
652
1032
|
) -> None:
|
|
653
1033
|
"""
|
|
654
1034
|
Process a single model response and return the assistant message and whether to continue.
|
|
@@ -656,14 +1036,15 @@ class Model(ABC):
|
|
|
656
1036
|
Returns:
|
|
657
1037
|
Tuple[Message, bool]: (assistant_message, should_continue)
|
|
658
1038
|
"""
|
|
659
|
-
# Generate response
|
|
660
|
-
provider_response = self.
|
|
1039
|
+
# Generate response with retry logic for ModelProviderError
|
|
1040
|
+
provider_response = self._invoke_with_retry(
|
|
661
1041
|
assistant_message=assistant_message,
|
|
662
1042
|
messages=messages,
|
|
663
1043
|
response_format=response_format,
|
|
664
1044
|
tools=tools,
|
|
665
1045
|
tool_choice=tool_choice or self._tool_choice,
|
|
666
1046
|
run_response=run_response,
|
|
1047
|
+
compress_tool_results=compress_tool_results,
|
|
667
1048
|
)
|
|
668
1049
|
|
|
669
1050
|
# Populate the assistant message
|
|
@@ -694,6 +1075,8 @@ class Model(ABC):
|
|
|
694
1075
|
model_response.extra.update(provider_response.extra)
|
|
695
1076
|
if provider_response.provider_data is not None:
|
|
696
1077
|
model_response.provider_data = provider_response.provider_data
|
|
1078
|
+
if provider_response.response_usage is not None:
|
|
1079
|
+
model_response.response_usage = provider_response.response_usage
|
|
697
1080
|
|
|
698
1081
|
async def _aprocess_model_response(
|
|
699
1082
|
self,
|
|
@@ -704,6 +1087,7 @@ class Model(ABC):
|
|
|
704
1087
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
705
1088
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
706
1089
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1090
|
+
compress_tool_results: bool = False,
|
|
707
1091
|
) -> None:
|
|
708
1092
|
"""
|
|
709
1093
|
Process a single async model response and return the assistant message and whether to continue.
|
|
@@ -711,14 +1095,15 @@ class Model(ABC):
|
|
|
711
1095
|
Returns:
|
|
712
1096
|
Tuple[Message, bool]: (assistant_message, should_continue)
|
|
713
1097
|
"""
|
|
714
|
-
# Generate response
|
|
715
|
-
provider_response = await self.
|
|
1098
|
+
# Generate response with retry logic for ModelProviderError
|
|
1099
|
+
provider_response = await self._ainvoke_with_retry(
|
|
716
1100
|
messages=messages,
|
|
717
1101
|
response_format=response_format,
|
|
718
1102
|
tools=tools,
|
|
719
1103
|
tool_choice=tool_choice or self._tool_choice,
|
|
720
1104
|
assistant_message=assistant_message,
|
|
721
1105
|
run_response=run_response,
|
|
1106
|
+
compress_tool_results=compress_tool_results,
|
|
722
1107
|
)
|
|
723
1108
|
|
|
724
1109
|
# Populate the assistant message
|
|
@@ -749,6 +1134,8 @@ class Model(ABC):
|
|
|
749
1134
|
model_response.extra.update(provider_response.extra)
|
|
750
1135
|
if provider_response.provider_data is not None:
|
|
751
1136
|
model_response.provider_data = provider_response.provider_data
|
|
1137
|
+
if provider_response.response_usage is not None:
|
|
1138
|
+
model_response.response_usage = provider_response.response_usage
|
|
752
1139
|
|
|
753
1140
|
def _populate_assistant_message(
|
|
754
1141
|
self,
|
|
@@ -829,18 +1216,20 @@ class Model(ABC):
|
|
|
829
1216
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
830
1217
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
831
1218
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1219
|
+
compress_tool_results: bool = False,
|
|
832
1220
|
) -> Iterator[ModelResponse]:
|
|
833
1221
|
"""
|
|
834
|
-
Process a streaming response from the model.
|
|
1222
|
+
Process a streaming response from the model with retry logic for ModelProviderError.
|
|
835
1223
|
"""
|
|
836
1224
|
|
|
837
|
-
for response_delta in self.
|
|
1225
|
+
for response_delta in self._invoke_stream_with_retry(
|
|
838
1226
|
messages=messages,
|
|
839
1227
|
assistant_message=assistant_message,
|
|
840
1228
|
response_format=response_format,
|
|
841
1229
|
tools=tools,
|
|
842
1230
|
tool_choice=tool_choice or self._tool_choice,
|
|
843
1231
|
run_response=run_response,
|
|
1232
|
+
compress_tool_results=compress_tool_results,
|
|
844
1233
|
):
|
|
845
1234
|
for model_response_delta in self._populate_stream_data(
|
|
846
1235
|
stream_data=stream_data,
|
|
@@ -861,147 +1250,207 @@ class Model(ABC):
|
|
|
861
1250
|
stream_model_response: bool = True,
|
|
862
1251
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
863
1252
|
send_media_to_model: bool = True,
|
|
1253
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
864
1254
|
) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
865
1255
|
"""
|
|
866
1256
|
Generate a streaming response from the model.
|
|
867
1257
|
"""
|
|
1258
|
+
try:
|
|
1259
|
+
# Check cache if enabled - capture key BEFORE streaming to avoid mismatch
|
|
1260
|
+
cache_key = None
|
|
1261
|
+
if self.cache_response:
|
|
1262
|
+
cache_key = self._get_model_cache_key(
|
|
1263
|
+
messages, stream=True, response_format=response_format, tools=tools
|
|
1264
|
+
)
|
|
1265
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
868
1266
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
1267
|
+
if cached_data:
|
|
1268
|
+
log_info("Cache hit for streaming model response")
|
|
1269
|
+
# Yield cached responses
|
|
1270
|
+
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
1271
|
+
yield response
|
|
1272
|
+
return
|
|
874
1273
|
|
|
875
|
-
|
|
876
|
-
log_info("Cache hit for streaming model response")
|
|
877
|
-
# Yield cached responses
|
|
878
|
-
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
879
|
-
yield response
|
|
880
|
-
return
|
|
1274
|
+
log_info("Cache miss for streaming model response")
|
|
881
1275
|
|
|
882
|
-
|
|
1276
|
+
# Track streaming responses for caching
|
|
1277
|
+
streaming_responses: List[ModelResponse] = []
|
|
883
1278
|
|
|
884
|
-
|
|
885
|
-
|
|
1279
|
+
log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
|
|
1280
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
1281
|
+
_log_messages(messages)
|
|
886
1282
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
_log_messages(messages)
|
|
1283
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
1284
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
890
1285
|
|
|
891
|
-
|
|
892
|
-
|
|
1286
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
1287
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
893
1288
|
|
|
894
|
-
|
|
1289
|
+
function_call_count = 0
|
|
895
1290
|
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
model_response = ModelResponse()
|
|
901
|
-
if stream_model_response:
|
|
902
|
-
# Generate response
|
|
903
|
-
for response in self.process_response_stream(
|
|
904
|
-
messages=messages,
|
|
905
|
-
assistant_message=assistant_message,
|
|
906
|
-
stream_data=stream_data,
|
|
907
|
-
response_format=response_format,
|
|
908
|
-
tools=_tool_dicts,
|
|
909
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
910
|
-
run_response=run_response,
|
|
1291
|
+
while True:
|
|
1292
|
+
# Compress existing tool results BEFORE invoke
|
|
1293
|
+
if _compression_manager is not None and _compression_manager.should_compress(
|
|
1294
|
+
messages, tools, model=self, response_format=response_format
|
|
911
1295
|
):
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
1296
|
+
# Emit compression started event
|
|
1297
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1298
|
+
_compression_manager.compress(messages)
|
|
1299
|
+
# Emit compression completed event with stats
|
|
1300
|
+
yield ModelResponse(
|
|
1301
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1302
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1303
|
+
)
|
|
915
1304
|
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
model_response=model_response,
|
|
921
|
-
response_format=response_format,
|
|
922
|
-
tools=_tool_dicts,
|
|
923
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
924
|
-
)
|
|
925
|
-
if self.cache_response:
|
|
926
|
-
streaming_responses.append(model_response)
|
|
927
|
-
yield model_response
|
|
928
|
-
|
|
929
|
-
# Add assistant message to messages
|
|
930
|
-
messages.append(assistant_message)
|
|
931
|
-
assistant_message.log(metrics=True)
|
|
932
|
-
|
|
933
|
-
# Handle tool calls if present
|
|
934
|
-
if assistant_message.tool_calls is not None:
|
|
935
|
-
# Prepare function calls
|
|
936
|
-
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
937
|
-
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
938
|
-
)
|
|
939
|
-
function_call_results: List[Message] = []
|
|
940
|
-
|
|
941
|
-
# Execute function calls
|
|
942
|
-
for function_call_response in self.run_function_calls(
|
|
943
|
-
function_calls=function_calls_to_run,
|
|
944
|
-
function_call_results=function_call_results,
|
|
945
|
-
current_function_call_count=function_call_count,
|
|
946
|
-
function_call_limit=tool_call_limit,
|
|
947
|
-
):
|
|
948
|
-
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
949
|
-
streaming_responses.append(function_call_response)
|
|
950
|
-
yield function_call_response
|
|
1305
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
1306
|
+
# Create assistant message and stream data
|
|
1307
|
+
stream_data = MessageData()
|
|
1308
|
+
model_response = ModelResponse()
|
|
951
1309
|
|
|
952
|
-
#
|
|
953
|
-
|
|
1310
|
+
# Emit LLM request started event
|
|
1311
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
954
1312
|
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
self.
|
|
958
|
-
messages=messages,
|
|
1313
|
+
if stream_model_response:
|
|
1314
|
+
# Generate response
|
|
1315
|
+
for response in self.process_response_stream(
|
|
1316
|
+
messages=messages,
|
|
1317
|
+
assistant_message=assistant_message,
|
|
1318
|
+
stream_data=stream_data,
|
|
1319
|
+
response_format=response_format,
|
|
1320
|
+
tools=_tool_dicts,
|
|
1321
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1322
|
+
run_response=run_response,
|
|
1323
|
+
compress_tool_results=_compress_tool_results,
|
|
1324
|
+
):
|
|
1325
|
+
if self.cache_response and isinstance(response, ModelResponse):
|
|
1326
|
+
streaming_responses.append(response)
|
|
1327
|
+
yield response
|
|
1328
|
+
|
|
1329
|
+
else:
|
|
1330
|
+
self._process_model_response(
|
|
1331
|
+
messages=messages,
|
|
1332
|
+
assistant_message=assistant_message,
|
|
1333
|
+
model_response=model_response,
|
|
1334
|
+
response_format=response_format,
|
|
1335
|
+
tools=_tool_dicts,
|
|
1336
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1337
|
+
run_response=run_response,
|
|
1338
|
+
compress_tool_results=_compress_tool_results,
|
|
959
1339
|
)
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
1340
|
+
if self.cache_response:
|
|
1341
|
+
streaming_responses.append(model_response)
|
|
1342
|
+
yield model_response
|
|
1343
|
+
|
|
1344
|
+
# Add assistant message to messages
|
|
1345
|
+
messages.append(assistant_message)
|
|
1346
|
+
assistant_message.log(metrics=True)
|
|
1347
|
+
|
|
1348
|
+
# Emit LLM request completed event with metrics
|
|
1349
|
+
llm_metrics = assistant_message.metrics
|
|
1350
|
+
yield ModelResponse(
|
|
1351
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1352
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1353
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1354
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1355
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1356
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1357
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1358
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1361
|
+
# Handle tool calls if present
|
|
1362
|
+
if assistant_message.tool_calls is not None:
|
|
1363
|
+
# Prepare function calls
|
|
1364
|
+
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1365
|
+
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
963
1366
|
)
|
|
964
|
-
|
|
965
|
-
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
1367
|
+
function_call_results: List[Message] = []
|
|
966
1368
|
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
messages=messages,
|
|
1369
|
+
# Execute function calls
|
|
1370
|
+
for function_call_response in self.run_function_calls(
|
|
1371
|
+
function_calls=function_calls_to_run,
|
|
971
1372
|
function_call_results=function_call_results,
|
|
972
|
-
|
|
973
|
-
|
|
1373
|
+
current_function_call_count=function_call_count,
|
|
1374
|
+
function_call_limit=tool_call_limit,
|
|
1375
|
+
):
|
|
1376
|
+
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
1377
|
+
streaming_responses.append(function_call_response)
|
|
1378
|
+
yield function_call_response
|
|
1379
|
+
|
|
1380
|
+
# Add a function call for each successful execution
|
|
1381
|
+
function_call_count += len(function_call_results)
|
|
1382
|
+
|
|
1383
|
+
# Format and add results to messages
|
|
1384
|
+
if stream_data and stream_data.extra is not None:
|
|
1385
|
+
self.format_function_call_results(
|
|
1386
|
+
messages=messages,
|
|
1387
|
+
function_call_results=function_call_results,
|
|
1388
|
+
compress_tool_results=_compress_tool_results,
|
|
1389
|
+
**stream_data.extra,
|
|
1390
|
+
)
|
|
1391
|
+
elif model_response and model_response.extra is not None:
|
|
1392
|
+
self.format_function_call_results(
|
|
1393
|
+
messages=messages,
|
|
1394
|
+
function_call_results=function_call_results,
|
|
1395
|
+
compress_tool_results=_compress_tool_results,
|
|
1396
|
+
**model_response.extra,
|
|
1397
|
+
)
|
|
1398
|
+
else:
|
|
1399
|
+
self.format_function_call_results(
|
|
1400
|
+
messages=messages,
|
|
1401
|
+
function_call_results=function_call_results,
|
|
1402
|
+
compress_tool_results=_compress_tool_results,
|
|
1403
|
+
)
|
|
974
1404
|
|
|
975
|
-
|
|
976
|
-
|
|
1405
|
+
# Handle function call media
|
|
1406
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
1407
|
+
self._handle_function_call_media(
|
|
1408
|
+
messages=messages,
|
|
1409
|
+
function_call_results=function_call_results,
|
|
1410
|
+
send_media_to_model=send_media_to_model,
|
|
1411
|
+
)
|
|
977
1412
|
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
break
|
|
1413
|
+
for function_call_result in function_call_results:
|
|
1414
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
981
1415
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
1416
|
+
# Check if we should stop after tool calls
|
|
1417
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
1418
|
+
break
|
|
985
1419
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
1420
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
1421
|
+
if any(fc.function.requires_confirmation for fc in function_calls_to_run):
|
|
1422
|
+
break
|
|
989
1423
|
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1424
|
+
# If we have any tool calls that require external execution, break the loop
|
|
1425
|
+
if any(fc.function.external_execution for fc in function_calls_to_run):
|
|
1426
|
+
break
|
|
993
1427
|
|
|
994
|
-
|
|
995
|
-
|
|
1428
|
+
# If we have any tool calls that require user input, break the loop
|
|
1429
|
+
if any(fc.function.requires_user_input for fc in function_calls_to_run):
|
|
1430
|
+
break
|
|
996
1431
|
|
|
997
|
-
|
|
998
|
-
|
|
1432
|
+
# Continue loop to get next response
|
|
1433
|
+
continue
|
|
999
1434
|
|
|
1000
|
-
|
|
1435
|
+
# No tool calls or finished processing them
|
|
1436
|
+
break
|
|
1001
1437
|
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1438
|
+
log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
|
|
1439
|
+
|
|
1440
|
+
# Save streaming responses to cache if enabled
|
|
1441
|
+
if self.cache_response and cache_key and streaming_responses:
|
|
1442
|
+
self._save_streaming_responses_to_cache(cache_key, streaming_responses)
|
|
1443
|
+
finally:
|
|
1444
|
+
# Close the Gemini client
|
|
1445
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
1446
|
+
try:
|
|
1447
|
+
self.client.close() # type: ignore
|
|
1448
|
+
self.client = None
|
|
1449
|
+
except AttributeError:
|
|
1450
|
+
log_warning(
|
|
1451
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
1452
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
1453
|
+
)
|
|
1005
1454
|
|
|
1006
1455
|
async def aprocess_response_stream(
|
|
1007
1456
|
self,
|
|
@@ -1012,18 +1461,20 @@ class Model(ABC):
|
|
|
1012
1461
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1013
1462
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
1014
1463
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1464
|
+
compress_tool_results: bool = False,
|
|
1015
1465
|
) -> AsyncIterator[ModelResponse]:
|
|
1016
1466
|
"""
|
|
1017
|
-
Process a streaming response from the model.
|
|
1467
|
+
Process a streaming response from the model with retry logic for ModelProviderError.
|
|
1018
1468
|
"""
|
|
1019
|
-
async for response_delta in self.
|
|
1469
|
+
async for response_delta in self._ainvoke_stream_with_retry(
|
|
1020
1470
|
messages=messages,
|
|
1021
1471
|
assistant_message=assistant_message,
|
|
1022
1472
|
response_format=response_format,
|
|
1023
1473
|
tools=tools,
|
|
1024
1474
|
tool_choice=tool_choice or self._tool_choice,
|
|
1025
1475
|
run_response=run_response,
|
|
1026
|
-
|
|
1476
|
+
compress_tool_results=compress_tool_results,
|
|
1477
|
+
):
|
|
1027
1478
|
for model_response_delta in self._populate_stream_data(
|
|
1028
1479
|
stream_data=stream_data,
|
|
1029
1480
|
model_response_delta=response_delta,
|
|
@@ -1043,148 +1494,208 @@ class Model(ABC):
|
|
|
1043
1494
|
stream_model_response: bool = True,
|
|
1044
1495
|
run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
|
|
1045
1496
|
send_media_to_model: bool = True,
|
|
1497
|
+
compression_manager: Optional["CompressionManager"] = None,
|
|
1046
1498
|
) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
|
|
1047
1499
|
"""
|
|
1048
1500
|
Generate an asynchronous streaming response from the model.
|
|
1049
1501
|
"""
|
|
1502
|
+
try:
|
|
1503
|
+
# Check cache if enabled - capture key BEFORE streaming to avoid mismatch
|
|
1504
|
+
cache_key = None
|
|
1505
|
+
if self.cache_response:
|
|
1506
|
+
cache_key = self._get_model_cache_key(
|
|
1507
|
+
messages, stream=True, response_format=response_format, tools=tools
|
|
1508
|
+
)
|
|
1509
|
+
cached_data = self._get_cached_model_response(cache_key)
|
|
1050
1510
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1511
|
+
if cached_data:
|
|
1512
|
+
log_info("Cache hit for async streaming model response")
|
|
1513
|
+
# Yield cached responses
|
|
1514
|
+
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
1515
|
+
yield response
|
|
1516
|
+
return
|
|
1056
1517
|
|
|
1057
|
-
|
|
1058
|
-
log_info("Cache hit for async streaming model response")
|
|
1059
|
-
# Yield cached responses
|
|
1060
|
-
for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
|
|
1061
|
-
yield response
|
|
1062
|
-
return
|
|
1518
|
+
log_info("Cache miss for async streaming model response")
|
|
1063
1519
|
|
|
1064
|
-
|
|
1520
|
+
# Track streaming responses for caching
|
|
1521
|
+
streaming_responses: List[ModelResponse] = []
|
|
1065
1522
|
|
|
1066
|
-
|
|
1067
|
-
|
|
1523
|
+
log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
|
|
1524
|
+
log_debug(f"Model: {self.id}", center=True, symbol="-")
|
|
1525
|
+
_log_messages(messages)
|
|
1068
1526
|
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
_log_messages(messages)
|
|
1527
|
+
_tool_dicts = self._format_tools(tools) if tools is not None else []
|
|
1528
|
+
_functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
|
|
1072
1529
|
|
|
1073
|
-
|
|
1074
|
-
|
|
1530
|
+
_compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
|
|
1531
|
+
_compression_manager = compression_manager if _compress_tool_results else None
|
|
1075
1532
|
|
|
1076
|
-
|
|
1533
|
+
function_call_count = 0
|
|
1077
1534
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
model_response = ModelResponse()
|
|
1083
|
-
if stream_model_response:
|
|
1084
|
-
# Generate response
|
|
1085
|
-
async for model_response in self.aprocess_response_stream(
|
|
1086
|
-
messages=messages,
|
|
1087
|
-
assistant_message=assistant_message,
|
|
1088
|
-
stream_data=stream_data,
|
|
1089
|
-
response_format=response_format,
|
|
1090
|
-
tools=_tool_dicts,
|
|
1091
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
1092
|
-
run_response=run_response,
|
|
1535
|
+
while True:
|
|
1536
|
+
# Compress existing tool results BEFORE making API call to avoid context overflow
|
|
1537
|
+
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
1538
|
+
messages, tools, model=self, response_format=response_format
|
|
1093
1539
|
):
|
|
1094
|
-
|
|
1540
|
+
# Emit compression started event
|
|
1541
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1542
|
+
await _compression_manager.acompress(messages)
|
|
1543
|
+
# Emit compression completed event with stats
|
|
1544
|
+
yield ModelResponse(
|
|
1545
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1546
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1547
|
+
)
|
|
1548
|
+
|
|
1549
|
+
# Create assistant message and stream data
|
|
1550
|
+
assistant_message = Message(role=self.assistant_message_role)
|
|
1551
|
+
stream_data = MessageData()
|
|
1552
|
+
model_response = ModelResponse()
|
|
1553
|
+
|
|
1554
|
+
# Emit LLM request started event
|
|
1555
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
1556
|
+
|
|
1557
|
+
if stream_model_response:
|
|
1558
|
+
# Generate response
|
|
1559
|
+
async for model_response in self.aprocess_response_stream(
|
|
1560
|
+
messages=messages,
|
|
1561
|
+
assistant_message=assistant_message,
|
|
1562
|
+
stream_data=stream_data,
|
|
1563
|
+
response_format=response_format,
|
|
1564
|
+
tools=_tool_dicts,
|
|
1565
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1566
|
+
run_response=run_response,
|
|
1567
|
+
compress_tool_results=_compress_tool_results,
|
|
1568
|
+
):
|
|
1569
|
+
if self.cache_response and isinstance(model_response, ModelResponse):
|
|
1570
|
+
streaming_responses.append(model_response)
|
|
1571
|
+
yield model_response
|
|
1572
|
+
|
|
1573
|
+
else:
|
|
1574
|
+
await self._aprocess_model_response(
|
|
1575
|
+
messages=messages,
|
|
1576
|
+
assistant_message=assistant_message,
|
|
1577
|
+
model_response=model_response,
|
|
1578
|
+
response_format=response_format,
|
|
1579
|
+
tools=_tool_dicts,
|
|
1580
|
+
tool_choice=tool_choice or self._tool_choice,
|
|
1581
|
+
run_response=run_response,
|
|
1582
|
+
compress_tool_results=_compress_tool_results,
|
|
1583
|
+
)
|
|
1584
|
+
if self.cache_response:
|
|
1095
1585
|
streaming_responses.append(model_response)
|
|
1096
1586
|
yield model_response
|
|
1097
1587
|
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
assistant_message=assistant_message,
|
|
1102
|
-
model_response=model_response,
|
|
1103
|
-
response_format=response_format,
|
|
1104
|
-
tools=_tool_dicts,
|
|
1105
|
-
tool_choice=tool_choice or self._tool_choice,
|
|
1106
|
-
run_response=run_response,
|
|
1107
|
-
)
|
|
1108
|
-
if self.cache_response:
|
|
1109
|
-
streaming_responses.append(model_response)
|
|
1110
|
-
yield model_response
|
|
1111
|
-
|
|
1112
|
-
# Add assistant message to messages
|
|
1113
|
-
messages.append(assistant_message)
|
|
1114
|
-
assistant_message.log(metrics=True)
|
|
1115
|
-
|
|
1116
|
-
# Handle tool calls if present
|
|
1117
|
-
if assistant_message.tool_calls is not None:
|
|
1118
|
-
# Prepare function calls
|
|
1119
|
-
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1120
|
-
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
1121
|
-
)
|
|
1122
|
-
function_call_results: List[Message] = []
|
|
1123
|
-
|
|
1124
|
-
# Execute function calls
|
|
1125
|
-
async for function_call_response in self.arun_function_calls(
|
|
1126
|
-
function_calls=function_calls_to_run,
|
|
1127
|
-
function_call_results=function_call_results,
|
|
1128
|
-
current_function_call_count=function_call_count,
|
|
1129
|
-
function_call_limit=tool_call_limit,
|
|
1130
|
-
):
|
|
1131
|
-
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
1132
|
-
streaming_responses.append(function_call_response)
|
|
1133
|
-
yield function_call_response
|
|
1588
|
+
# Add assistant message to messages
|
|
1589
|
+
messages.append(assistant_message)
|
|
1590
|
+
assistant_message.log(metrics=True)
|
|
1134
1591
|
|
|
1135
|
-
#
|
|
1136
|
-
|
|
1592
|
+
# Emit LLM request completed event with metrics
|
|
1593
|
+
llm_metrics = assistant_message.metrics
|
|
1594
|
+
yield ModelResponse(
|
|
1595
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1596
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1597
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1598
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1599
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1600
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1601
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1602
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1603
|
+
)
|
|
1137
1604
|
|
|
1138
|
-
#
|
|
1139
|
-
if
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
elif model_response and model_response.extra is not None:
|
|
1144
|
-
self.format_function_call_results(
|
|
1145
|
-
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
1605
|
+
# Handle tool calls if present
|
|
1606
|
+
if assistant_message.tool_calls is not None:
|
|
1607
|
+
# Prepare function calls
|
|
1608
|
+
function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
|
|
1609
|
+
assistant_message=assistant_message, messages=messages, functions=_functions
|
|
1146
1610
|
)
|
|
1147
|
-
|
|
1148
|
-
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
1611
|
+
function_call_results: List[Message] = []
|
|
1149
1612
|
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
messages=messages,
|
|
1613
|
+
# Execute function calls
|
|
1614
|
+
async for function_call_response in self.arun_function_calls(
|
|
1615
|
+
function_calls=function_calls_to_run,
|
|
1154
1616
|
function_call_results=function_call_results,
|
|
1155
|
-
|
|
1156
|
-
|
|
1617
|
+
current_function_call_count=function_call_count,
|
|
1618
|
+
function_call_limit=tool_call_limit,
|
|
1619
|
+
):
|
|
1620
|
+
if self.cache_response and isinstance(function_call_response, ModelResponse):
|
|
1621
|
+
streaming_responses.append(function_call_response)
|
|
1622
|
+
yield function_call_response
|
|
1623
|
+
|
|
1624
|
+
# Add a function call for each successful execution
|
|
1625
|
+
function_call_count += len(function_call_results)
|
|
1626
|
+
|
|
1627
|
+
# Format and add results to messages
|
|
1628
|
+
if stream_data and stream_data.extra is not None:
|
|
1629
|
+
self.format_function_call_results(
|
|
1630
|
+
messages=messages,
|
|
1631
|
+
function_call_results=function_call_results,
|
|
1632
|
+
compress_tool_results=_compress_tool_results,
|
|
1633
|
+
**stream_data.extra,
|
|
1634
|
+
)
|
|
1635
|
+
elif model_response and model_response.extra is not None:
|
|
1636
|
+
self.format_function_call_results(
|
|
1637
|
+
messages=messages,
|
|
1638
|
+
function_call_results=function_call_results,
|
|
1639
|
+
compress_tool_results=_compress_tool_results,
|
|
1640
|
+
**model_response.extra or {},
|
|
1641
|
+
)
|
|
1642
|
+
else:
|
|
1643
|
+
self.format_function_call_results(
|
|
1644
|
+
messages=messages,
|
|
1645
|
+
function_call_results=function_call_results,
|
|
1646
|
+
compress_tool_results=_compress_tool_results,
|
|
1647
|
+
)
|
|
1648
|
+
|
|
1649
|
+
# Handle function call media
|
|
1650
|
+
if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
|
|
1651
|
+
self._handle_function_call_media(
|
|
1652
|
+
messages=messages,
|
|
1653
|
+
function_call_results=function_call_results,
|
|
1654
|
+
send_media_to_model=send_media_to_model,
|
|
1655
|
+
)
|
|
1157
1656
|
|
|
1158
|
-
|
|
1159
|
-
|
|
1657
|
+
for function_call_result in function_call_results:
|
|
1658
|
+
function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
|
|
1160
1659
|
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1660
|
+
# Check if we should stop after tool calls
|
|
1661
|
+
if any(m.stop_after_tool_call for m in function_call_results):
|
|
1662
|
+
break
|
|
1164
1663
|
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1664
|
+
# If we have any tool calls that require confirmation, break the loop
|
|
1665
|
+
if any(fc.function.requires_confirmation for fc in function_calls_to_run):
|
|
1666
|
+
break
|
|
1168
1667
|
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1668
|
+
# If we have any tool calls that require external execution, break the loop
|
|
1669
|
+
if any(fc.function.external_execution for fc in function_calls_to_run):
|
|
1670
|
+
break
|
|
1172
1671
|
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1672
|
+
# If we have any tool calls that require user input, break the loop
|
|
1673
|
+
if any(fc.function.requires_user_input for fc in function_calls_to_run):
|
|
1674
|
+
break
|
|
1176
1675
|
|
|
1177
|
-
|
|
1178
|
-
|
|
1676
|
+
# Continue loop to get next response
|
|
1677
|
+
continue
|
|
1179
1678
|
|
|
1180
|
-
|
|
1181
|
-
|
|
1679
|
+
# No tool calls or finished processing them
|
|
1680
|
+
break
|
|
1182
1681
|
|
|
1183
|
-
|
|
1682
|
+
log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
|
|
1184
1683
|
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1684
|
+
# Save streaming responses to cache if enabled
|
|
1685
|
+
if self.cache_response and cache_key and streaming_responses:
|
|
1686
|
+
self._save_streaming_responses_to_cache(cache_key, streaming_responses)
|
|
1687
|
+
|
|
1688
|
+
finally:
|
|
1689
|
+
# Close the Gemini client
|
|
1690
|
+
if self.__class__.__name__ == "Gemini" and self.client is not None:
|
|
1691
|
+
try:
|
|
1692
|
+
await self.client.aio.aclose() # type: ignore
|
|
1693
|
+
self.client = None
|
|
1694
|
+
except AttributeError:
|
|
1695
|
+
log_warning(
|
|
1696
|
+
"Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
|
|
1697
|
+
" please upgrade Gemini to the latest version: pip install -U google-genai"
|
|
1698
|
+
)
|
|
1188
1699
|
|
|
1189
1700
|
def _populate_assistant_message_from_stream_data(
|
|
1190
1701
|
self, assistant_message: Message, stream_data: MessageData
|
|
@@ -1433,11 +1944,15 @@ class Model(ABC):
|
|
|
1433
1944
|
|
|
1434
1945
|
# Run function calls sequentially
|
|
1435
1946
|
function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
|
|
1947
|
+
stop_after_tool_call_from_exception = False
|
|
1436
1948
|
try:
|
|
1437
1949
|
function_execution_result = function_call.execute()
|
|
1438
1950
|
except AgentRunException as a_exc:
|
|
1439
1951
|
# Update additional messages from function call
|
|
1440
1952
|
_handle_agent_exception(a_exc, additional_input)
|
|
1953
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
1954
|
+
if a_exc.stop_execution:
|
|
1955
|
+
stop_after_tool_call_from_exception = True
|
|
1441
1956
|
# Set function call success to False if an exception occurred
|
|
1442
1957
|
except Exception as e:
|
|
1443
1958
|
log_error(f"Error executing function {function_call.function.name}: {e}")
|
|
@@ -1452,44 +1967,59 @@ class Model(ABC):
|
|
|
1452
1967
|
function_call_output: str = ""
|
|
1453
1968
|
|
|
1454
1969
|
if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
if item
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1970
|
+
try:
|
|
1971
|
+
for item in function_execution_result.result:
|
|
1972
|
+
# This function yields agent/team/workflow run events
|
|
1973
|
+
if (
|
|
1974
|
+
isinstance(item, tuple(get_args(RunOutputEvent)))
|
|
1975
|
+
or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
|
|
1976
|
+
or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
|
|
1977
|
+
):
|
|
1978
|
+
# We only capture content events for output accumulation
|
|
1979
|
+
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
1980
|
+
if item.content is not None and isinstance(item.content, BaseModel):
|
|
1981
|
+
function_call_output += item.content.model_dump_json()
|
|
1982
|
+
else:
|
|
1983
|
+
# Capture output
|
|
1984
|
+
function_call_output += item.content or ""
|
|
1985
|
+
|
|
1986
|
+
if function_call.function.show_result and item.content is not None:
|
|
1987
|
+
yield ModelResponse(content=item.content)
|
|
1469
1988
|
|
|
1470
|
-
if
|
|
1471
|
-
|
|
1989
|
+
if isinstance(item, CustomEvent):
|
|
1990
|
+
function_call_output += str(item)
|
|
1991
|
+
item.tool_call_id = function_call.call_id
|
|
1472
1992
|
|
|
1473
|
-
|
|
1474
|
-
|
|
1993
|
+
# For WorkflowCompletedEvent, extract content for final output
|
|
1994
|
+
from agno.run.workflow import WorkflowCompletedEvent
|
|
1475
1995
|
|
|
1476
|
-
|
|
1477
|
-
|
|
1996
|
+
if isinstance(item, WorkflowCompletedEvent):
|
|
1997
|
+
if item.content is not None:
|
|
1998
|
+
if isinstance(item.content, BaseModel):
|
|
1999
|
+
function_call_output += item.content.model_dump_json()
|
|
2000
|
+
else:
|
|
2001
|
+
function_call_output += str(item.content)
|
|
1478
2002
|
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
if isinstance(item.content, BaseModel):
|
|
1482
|
-
function_call_output += item.content.model_dump_json()
|
|
1483
|
-
else:
|
|
1484
|
-
function_call_output += str(item.content)
|
|
2003
|
+
# Yield the event itself to bubble it up
|
|
2004
|
+
yield item
|
|
1485
2005
|
|
|
1486
|
-
|
|
1487
|
-
|
|
2006
|
+
else:
|
|
2007
|
+
function_call_output += str(item)
|
|
2008
|
+
if function_call.function.show_result and item is not None:
|
|
2009
|
+
yield ModelResponse(content=str(item))
|
|
2010
|
+
except Exception as e:
|
|
2011
|
+
log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
|
|
2012
|
+
function_call.error = str(e)
|
|
2013
|
+
function_call_success = False
|
|
1488
2014
|
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
2015
|
+
# For generators, re-capture updated_session_state after consumption
|
|
2016
|
+
# since session_state modifications were made during iteration
|
|
2017
|
+
if function_execution_result.updated_session_state is None:
|
|
2018
|
+
if (
|
|
2019
|
+
function_call.function._run_context is not None
|
|
2020
|
+
and function_call.function._run_context.session_state is not None
|
|
2021
|
+
):
|
|
2022
|
+
function_execution_result.updated_session_state = function_call.function._run_context.session_state
|
|
1493
2023
|
else:
|
|
1494
2024
|
from agno.tools.function import ToolResult
|
|
1495
2025
|
|
|
@@ -1521,6 +2051,9 @@ class Model(ABC):
|
|
|
1521
2051
|
timer=function_call_timer,
|
|
1522
2052
|
function_execution_result=function_execution_result,
|
|
1523
2053
|
)
|
|
2054
|
+
# Override stop_after_tool_call if set by exception
|
|
2055
|
+
if stop_after_tool_call_from_exception:
|
|
2056
|
+
function_call_result.stop_after_tool_call = True
|
|
1524
2057
|
yield ModelResponse(
|
|
1525
2058
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1526
2059
|
tool_executions=[
|
|
@@ -1568,7 +2101,7 @@ class Model(ABC):
|
|
|
1568
2101
|
|
|
1569
2102
|
paused_tool_executions = []
|
|
1570
2103
|
|
|
1571
|
-
# The function
|
|
2104
|
+
# The function requires user confirmation (HITL)
|
|
1572
2105
|
if fc.function.requires_confirmation:
|
|
1573
2106
|
paused_tool_executions.append(
|
|
1574
2107
|
ToolExecution(
|
|
@@ -1578,7 +2111,8 @@ class Model(ABC):
|
|
|
1578
2111
|
requires_confirmation=True,
|
|
1579
2112
|
)
|
|
1580
2113
|
)
|
|
1581
|
-
|
|
2114
|
+
|
|
2115
|
+
# The function requires user input (HITL)
|
|
1582
2116
|
if fc.function.requires_user_input:
|
|
1583
2117
|
user_input_schema = fc.function.user_input_schema
|
|
1584
2118
|
if fc.arguments and user_input_schema:
|
|
@@ -1596,15 +2130,26 @@ class Model(ABC):
|
|
|
1596
2130
|
user_input_schema=user_input_schema,
|
|
1597
2131
|
)
|
|
1598
2132
|
)
|
|
1599
|
-
|
|
2133
|
+
|
|
2134
|
+
# If the function is from the user control flow (HITL) tools, we handle it here
|
|
1600
2135
|
if fc.function.name == "get_user_input" and fc.arguments and fc.arguments.get("user_input_fields"):
|
|
1601
2136
|
user_input_schema = []
|
|
1602
2137
|
for input_field in fc.arguments.get("user_input_fields", []):
|
|
1603
2138
|
field_type = input_field.get("field_type")
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
2139
|
+
if isinstance(field_type, str):
|
|
2140
|
+
type_mapping = {
|
|
2141
|
+
"str": str,
|
|
2142
|
+
"int": int,
|
|
2143
|
+
"float": float,
|
|
2144
|
+
"bool": bool,
|
|
2145
|
+
"list": list,
|
|
2146
|
+
"dict": dict,
|
|
2147
|
+
}
|
|
2148
|
+
python_type = type_mapping.get(field_type, str)
|
|
2149
|
+
elif isinstance(field_type, type):
|
|
2150
|
+
python_type = field_type
|
|
2151
|
+
else:
|
|
2152
|
+
python_type = str
|
|
1608
2153
|
user_input_schema.append(
|
|
1609
2154
|
UserInputField(
|
|
1610
2155
|
name=input_field.get("field_name"),
|
|
@@ -1622,7 +2167,8 @@ class Model(ABC):
|
|
|
1622
2167
|
user_input_schema=user_input_schema,
|
|
1623
2168
|
)
|
|
1624
2169
|
)
|
|
1625
|
-
|
|
2170
|
+
|
|
2171
|
+
# The function requires external execution (HITL)
|
|
1626
2172
|
if fc.function.external_execution:
|
|
1627
2173
|
paused_tool_executions.append(
|
|
1628
2174
|
ToolExecution(
|
|
@@ -1755,10 +2301,20 @@ class Model(ABC):
|
|
|
1755
2301
|
user_input_schema = []
|
|
1756
2302
|
for input_field in fc.arguments.get("user_input_fields", []):
|
|
1757
2303
|
field_type = input_field.get("field_type")
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
2304
|
+
if isinstance(field_type, str):
|
|
2305
|
+
type_mapping = {
|
|
2306
|
+
"str": str,
|
|
2307
|
+
"int": int,
|
|
2308
|
+
"float": float,
|
|
2309
|
+
"bool": bool,
|
|
2310
|
+
"list": list,
|
|
2311
|
+
"dict": dict,
|
|
2312
|
+
}
|
|
2313
|
+
python_type = type_mapping.get(field_type, str)
|
|
2314
|
+
elif isinstance(field_type, type):
|
|
2315
|
+
python_type = field_type
|
|
2316
|
+
else:
|
|
2317
|
+
python_type = str
|
|
1762
2318
|
user_input_schema.append(
|
|
1763
2319
|
UserInputField(
|
|
1764
2320
|
name=input_field.get("field_name"),
|
|
@@ -1875,6 +2431,7 @@ class Model(ABC):
|
|
|
1875
2431
|
|
|
1876
2432
|
if isinstance(item, CustomEvent):
|
|
1877
2433
|
function_call_output += str(item)
|
|
2434
|
+
item.tool_call_id = function_call.call_id
|
|
1878
2435
|
|
|
1879
2436
|
# For WorkflowCompletedEvent, extract content for final output
|
|
1880
2437
|
from agno.run.workflow import WorkflowCompletedEvent
|
|
@@ -1952,18 +2509,26 @@ class Model(ABC):
|
|
|
1952
2509
|
if async_gen_index in async_generator_outputs:
|
|
1953
2510
|
_, async_function_call_output, error = async_generator_outputs[async_gen_index]
|
|
1954
2511
|
if error:
|
|
1955
|
-
|
|
1956
|
-
|
|
2512
|
+
# Handle async generator exceptions gracefully like sync generators
|
|
2513
|
+
log_error(
|
|
2514
|
+
f"Error while iterating async generator for {function_call.function.name}: {error}"
|
|
2515
|
+
)
|
|
2516
|
+
function_call.error = str(error)
|
|
2517
|
+
function_call_success = False
|
|
1957
2518
|
break
|
|
1958
2519
|
async_gen_index += 1
|
|
1959
2520
|
|
|
1960
2521
|
updated_session_state = function_execution_result.updated_session_state
|
|
1961
2522
|
|
|
1962
2523
|
# Handle AgentRunException
|
|
2524
|
+
stop_after_tool_call_from_exception = False
|
|
1963
2525
|
if isinstance(function_call_success, AgentRunException):
|
|
1964
2526
|
a_exc = function_call_success
|
|
1965
2527
|
# Update additional messages from function call
|
|
1966
2528
|
_handle_agent_exception(a_exc, additional_input)
|
|
2529
|
+
# If stop_execution is True, mark that we should stop after this tool call
|
|
2530
|
+
if a_exc.stop_execution:
|
|
2531
|
+
stop_after_tool_call_from_exception = True
|
|
1967
2532
|
# Set function call success to False if an exception occurred
|
|
1968
2533
|
function_call_success = False
|
|
1969
2534
|
|
|
@@ -1975,33 +2540,62 @@ class Model(ABC):
|
|
|
1975
2540
|
function_call_output = async_function_call_output
|
|
1976
2541
|
# Events from async generators were already yielded in real-time above
|
|
1977
2542
|
elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
2543
|
+
try:
|
|
2544
|
+
for item in function_call.result:
|
|
2545
|
+
# This function yields agent/team/workflow run events
|
|
2546
|
+
if isinstance(
|
|
2547
|
+
item,
|
|
2548
|
+
tuple(get_args(RunOutputEvent))
|
|
2549
|
+
+ tuple(get_args(TeamRunOutputEvent))
|
|
2550
|
+
+ tuple(get_args(WorkflowRunOutputEvent)),
|
|
2551
|
+
):
|
|
2552
|
+
# We only capture content events
|
|
2553
|
+
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
2554
|
+
if item.content is not None and isinstance(item.content, BaseModel):
|
|
2555
|
+
function_call_output += item.content.model_dump_json()
|
|
2556
|
+
else:
|
|
2557
|
+
# Capture output
|
|
2558
|
+
function_call_output += item.content or ""
|
|
2559
|
+
|
|
2560
|
+
if function_call.function.show_result and item.content is not None:
|
|
2561
|
+
yield ModelResponse(content=item.content)
|
|
2562
|
+
continue
|
|
2563
|
+
|
|
2564
|
+
elif isinstance(item, CustomEvent):
|
|
2565
|
+
function_call_output += str(item)
|
|
2566
|
+
item.tool_call_id = function_call.call_id
|
|
2567
|
+
|
|
2568
|
+
# Yield the event itself to bubble it up
|
|
2569
|
+
yield item
|
|
2570
|
+
else:
|
|
2571
|
+
function_call_output += str(item)
|
|
2572
|
+
if function_call.function.show_result and item is not None:
|
|
2573
|
+
yield ModelResponse(content=str(item))
|
|
2574
|
+
except Exception as e:
|
|
2575
|
+
log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
|
|
2576
|
+
function_call.error = str(e)
|
|
2577
|
+
function_call_success = False
|
|
2578
|
+
|
|
2579
|
+
# For generators (sync or async), re-capture updated_session_state after consumption
|
|
2580
|
+
# since session_state modifications were made during iteration
|
|
2581
|
+
if async_function_call_output is not None or isinstance(
|
|
2582
|
+
function_call.result,
|
|
2583
|
+
(GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
|
|
2584
|
+
):
|
|
2585
|
+
if updated_session_state is None:
|
|
2586
|
+
if (
|
|
2587
|
+
function_call.function._run_context is not None
|
|
2588
|
+
and function_call.function._run_context.session_state is not None
|
|
1985
2589
|
):
|
|
1986
|
-
|
|
1987
|
-
if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
|
|
1988
|
-
if item.content is not None and isinstance(item.content, BaseModel):
|
|
1989
|
-
function_call_output += item.content.model_dump_json()
|
|
1990
|
-
else:
|
|
1991
|
-
# Capture output
|
|
1992
|
-
function_call_output += item.content or ""
|
|
1993
|
-
|
|
1994
|
-
if function_call.function.show_result and item.content is not None:
|
|
1995
|
-
yield ModelResponse(content=item.content)
|
|
1996
|
-
continue
|
|
2590
|
+
updated_session_state = function_call.function._run_context.session_state
|
|
1997
2591
|
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2592
|
+
if not (
|
|
2593
|
+
async_function_call_output is not None
|
|
2594
|
+
or isinstance(
|
|
2595
|
+
function_call.result,
|
|
2596
|
+
(GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
|
|
2597
|
+
)
|
|
2598
|
+
):
|
|
2005
2599
|
from agno.tools.function import ToolResult
|
|
2006
2600
|
|
|
2007
2601
|
if isinstance(function_execution_result.result, ToolResult):
|
|
@@ -2030,6 +2624,9 @@ class Model(ABC):
|
|
|
2030
2624
|
timer=function_call_timer,
|
|
2031
2625
|
function_execution_result=function_execution_result,
|
|
2032
2626
|
)
|
|
2627
|
+
# Override stop_after_tool_call if set by exception
|
|
2628
|
+
if stop_after_tool_call_from_exception:
|
|
2629
|
+
function_call_result.stop_after_tool_call = True
|
|
2033
2630
|
yield ModelResponse(
|
|
2034
2631
|
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
2035
2632
|
tool_executions=[
|
|
@@ -2079,7 +2676,11 @@ class Model(ABC):
|
|
|
2079
2676
|
return function_calls_to_run
|
|
2080
2677
|
|
|
2081
2678
|
def format_function_call_results(
|
|
2082
|
-
self,
|
|
2679
|
+
self,
|
|
2680
|
+
messages: List[Message],
|
|
2681
|
+
function_call_results: List[Message],
|
|
2682
|
+
compress_tool_results: bool = False,
|
|
2683
|
+
**kwargs,
|
|
2083
2684
|
) -> None:
|
|
2084
2685
|
"""
|
|
2085
2686
|
Format function call results.
|