agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class GeminiFinishReason(Enum):
|
|
5
|
+
"""Gemini API finish reasons"""
|
|
6
|
+
|
|
7
|
+
STOP = "STOP"
|
|
8
|
+
MAX_TOKENS = "MAX_TOKENS"
|
|
9
|
+
SAFETY = "SAFETY"
|
|
10
|
+
RECITATION = "RECITATION"
|
|
11
|
+
MALFORMED_FUNCTION_CALL = "MALFORMED_FUNCTION_CALL"
|
|
12
|
+
OTHER = "OTHER"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Guidance message used to retry a Gemini invocation after a MALFORMED_FUNCTION_CALL error
|
|
16
|
+
MALFORMED_FUNCTION_CALL_GUIDANCE = """The previous function call was malformed. Please try again with a valid function call.
|
|
17
|
+
|
|
18
|
+
Guidelines:
|
|
19
|
+
- Generate the function call JSON directly, do not generate code
|
|
20
|
+
- Use the function name exactly as defined (no namespace prefixes like 'default_api.')
|
|
21
|
+
- Ensure all required parameters are provided with correct types
|
|
22
|
+
"""
|
agno/models/groq/groq.py
CHANGED
|
@@ -6,12 +6,13 @@ from typing import Any, Dict, Iterator, List, Optional, Type, Union
|
|
|
6
6
|
import httpx
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
|
|
9
|
-
from agno.exceptions import ModelProviderError
|
|
9
|
+
from agno.exceptions import ModelAuthenticationError, ModelProviderError
|
|
10
10
|
from agno.models.base import Model
|
|
11
11
|
from agno.models.message import Message
|
|
12
12
|
from agno.models.metrics import Metrics
|
|
13
13
|
from agno.models.response import ModelResponse
|
|
14
14
|
from agno.run.agent import RunOutput
|
|
15
|
+
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
15
16
|
from agno.utils.log import log_debug, log_error, log_warning
|
|
16
17
|
from agno.utils.openai import images_to_message
|
|
17
18
|
|
|
@@ -61,7 +62,7 @@ class Groq(Model):
|
|
|
61
62
|
max_retries: Optional[int] = None
|
|
62
63
|
default_headers: Optional[Any] = None
|
|
63
64
|
default_query: Optional[Any] = None
|
|
64
|
-
http_client: Optional[httpx.Client] = None
|
|
65
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
65
66
|
client_params: Optional[Dict[str, Any]] = None
|
|
66
67
|
|
|
67
68
|
# Groq clients
|
|
@@ -73,7 +74,10 @@ class Groq(Model):
|
|
|
73
74
|
if not self.api_key:
|
|
74
75
|
self.api_key = getenv("GROQ_API_KEY")
|
|
75
76
|
if not self.api_key:
|
|
76
|
-
|
|
77
|
+
raise ModelAuthenticationError(
|
|
78
|
+
message="GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.",
|
|
79
|
+
model_name=self.name,
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
# Define base client params
|
|
79
83
|
base_params = {
|
|
@@ -93,7 +97,7 @@ class Groq(Model):
|
|
|
93
97
|
|
|
94
98
|
def get_client(self) -> GroqClient:
|
|
95
99
|
"""
|
|
96
|
-
Returns a Groq client.
|
|
100
|
+
Returns a Groq client. Caches the client to avoid recreating it on every request.
|
|
97
101
|
|
|
98
102
|
Returns:
|
|
99
103
|
GroqClient: An instance of the Groq client.
|
|
@@ -103,30 +107,46 @@ class Groq(Model):
|
|
|
103
107
|
|
|
104
108
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
105
109
|
if self.http_client is not None:
|
|
106
|
-
|
|
110
|
+
if isinstance(self.http_client, httpx.Client):
|
|
111
|
+
client_params["http_client"] = self.http_client
|
|
112
|
+
else:
|
|
113
|
+
log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
|
|
114
|
+
# Use global sync client when user http_client is invalid
|
|
115
|
+
client_params["http_client"] = get_default_sync_client()
|
|
116
|
+
else:
|
|
117
|
+
# Use global sync client when no custom http_client is provided
|
|
118
|
+
client_params["http_client"] = get_default_sync_client()
|
|
107
119
|
|
|
108
120
|
self.client = GroqClient(**client_params)
|
|
109
121
|
return self.client
|
|
110
122
|
|
|
111
123
|
def get_async_client(self) -> AsyncGroqClient:
|
|
112
124
|
"""
|
|
113
|
-
Returns an asynchronous Groq client.
|
|
125
|
+
Returns an asynchronous Groq client. Caches the client to avoid recreating it on every request.
|
|
114
126
|
|
|
115
127
|
Returns:
|
|
116
128
|
AsyncGroqClient: An instance of the asynchronous Groq client.
|
|
117
129
|
"""
|
|
118
|
-
if self.async_client:
|
|
130
|
+
if self.async_client and not self.async_client.is_closed():
|
|
119
131
|
return self.async_client
|
|
120
132
|
|
|
121
133
|
client_params: Dict[str, Any] = self._get_client_params()
|
|
122
134
|
if self.http_client:
|
|
123
|
-
|
|
135
|
+
if isinstance(self.http_client, httpx.AsyncClient):
|
|
136
|
+
client_params["http_client"] = self.http_client
|
|
137
|
+
else:
|
|
138
|
+
log_warning(
|
|
139
|
+
"http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
|
|
140
|
+
)
|
|
141
|
+
# Use global async client when user http_client is invalid
|
|
142
|
+
client_params["http_client"] = get_default_async_client()
|
|
124
143
|
else:
|
|
125
|
-
#
|
|
126
|
-
client_params["http_client"] =
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
144
|
+
# Use global async client when no custom http_client is provided
|
|
145
|
+
client_params["http_client"] = get_default_async_client()
|
|
146
|
+
|
|
147
|
+
# Create and cache the client
|
|
148
|
+
self.async_client = AsyncGroqClient(**client_params)
|
|
149
|
+
return self.async_client
|
|
130
150
|
|
|
131
151
|
def get_request_params(
|
|
132
152
|
self,
|
|
@@ -204,19 +224,28 @@ class Groq(Model):
|
|
|
204
224
|
self,
|
|
205
225
|
message: Message,
|
|
206
226
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
227
|
+
compress_tool_results: bool = False,
|
|
207
228
|
) -> Dict[str, Any]:
|
|
208
229
|
"""
|
|
209
230
|
Format a message into the format expected by Groq.
|
|
210
231
|
|
|
211
232
|
Args:
|
|
212
233
|
message (Message): The message to format.
|
|
234
|
+
response_format: Optional response format specification.
|
|
235
|
+
compress_tool_results: Whether to compress tool results.
|
|
213
236
|
|
|
214
237
|
Returns:
|
|
215
238
|
Dict[str, Any]: The formatted message.
|
|
216
239
|
"""
|
|
240
|
+
# Use compressed content for tool messages if compression is active
|
|
241
|
+
if message.role == "tool":
|
|
242
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
243
|
+
else:
|
|
244
|
+
content = message.content
|
|
245
|
+
|
|
217
246
|
message_dict: Dict[str, Any] = {
|
|
218
247
|
"role": message.role,
|
|
219
|
-
"content":
|
|
248
|
+
"content": content,
|
|
220
249
|
"name": message.name,
|
|
221
250
|
"tool_call_id": message.tool_call_id,
|
|
222
251
|
"tool_calls": message.tool_calls,
|
|
@@ -259,6 +288,7 @@ class Groq(Model):
|
|
|
259
288
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
260
289
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
261
290
|
run_response: Optional[RunOutput] = None,
|
|
291
|
+
compress_tool_results: bool = False,
|
|
262
292
|
) -> ModelResponse:
|
|
263
293
|
"""
|
|
264
294
|
Send a chat completion request to the Groq API.
|
|
@@ -270,7 +300,7 @@ class Groq(Model):
|
|
|
270
300
|
assistant_message.metrics.start_timer()
|
|
271
301
|
provider_response = self.get_client().chat.completions.create(
|
|
272
302
|
model=self.id,
|
|
273
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
303
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
274
304
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
275
305
|
)
|
|
276
306
|
assistant_message.metrics.stop_timer()
|
|
@@ -299,6 +329,7 @@ class Groq(Model):
|
|
|
299
329
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
300
330
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
301
331
|
run_response: Optional[RunOutput] = None,
|
|
332
|
+
compress_tool_results: bool = False,
|
|
302
333
|
) -> ModelResponse:
|
|
303
334
|
"""
|
|
304
335
|
Sends an asynchronous chat completion request to the Groq API.
|
|
@@ -310,7 +341,7 @@ class Groq(Model):
|
|
|
310
341
|
assistant_message.metrics.start_timer()
|
|
311
342
|
response = await self.get_async_client().chat.completions.create(
|
|
312
343
|
model=self.id,
|
|
313
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
344
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
314
345
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
315
346
|
)
|
|
316
347
|
assistant_message.metrics.stop_timer()
|
|
@@ -339,6 +370,7 @@ class Groq(Model):
|
|
|
339
370
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
340
371
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
341
372
|
run_response: Optional[RunOutput] = None,
|
|
373
|
+
compress_tool_results: bool = False,
|
|
342
374
|
) -> Iterator[ModelResponse]:
|
|
343
375
|
"""
|
|
344
376
|
Send a streaming chat completion request to the Groq API.
|
|
@@ -351,7 +383,7 @@ class Groq(Model):
|
|
|
351
383
|
|
|
352
384
|
for chunk in self.get_client().chat.completions.create(
|
|
353
385
|
model=self.id,
|
|
354
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
386
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
355
387
|
stream=True,
|
|
356
388
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
357
389
|
):
|
|
@@ -379,6 +411,7 @@ class Groq(Model):
|
|
|
379
411
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
380
412
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
381
413
|
run_response: Optional[RunOutput] = None,
|
|
414
|
+
compress_tool_results: bool = False,
|
|
382
415
|
) -> AsyncIterator[ModelResponse]:
|
|
383
416
|
"""
|
|
384
417
|
Sends an asynchronous streaming chat completion request to the Groq API.
|
|
@@ -392,7 +425,7 @@ class Groq(Model):
|
|
|
392
425
|
|
|
393
426
|
async_stream = await self.get_async_client().chat.completions.create(
|
|
394
427
|
model=self.id,
|
|
395
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
428
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
396
429
|
stream=True,
|
|
397
430
|
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
398
431
|
)
|
|
@@ -191,19 +191,26 @@ class HuggingFace(Model):
|
|
|
191
191
|
cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
|
|
192
192
|
return cleaned_dict
|
|
193
193
|
|
|
194
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
194
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
195
195
|
"""
|
|
196
196
|
Format a message into the format expected by HuggingFace.
|
|
197
197
|
|
|
198
198
|
Args:
|
|
199
199
|
message (Message): The message to format.
|
|
200
|
+
compress_tool_results: Whether to compress tool results.
|
|
200
201
|
|
|
201
202
|
Returns:
|
|
202
203
|
Dict[str, Any]: The formatted message.
|
|
203
204
|
"""
|
|
205
|
+
# Use compressed content for tool messages if compression is active
|
|
206
|
+
if message.role == "tool":
|
|
207
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
208
|
+
else:
|
|
209
|
+
content = message.content if message.content is not None else ""
|
|
210
|
+
|
|
204
211
|
message_dict: Dict[str, Any] = {
|
|
205
212
|
"role": message.role,
|
|
206
|
-
"content":
|
|
213
|
+
"content": content,
|
|
207
214
|
"name": message.name or message.tool_name,
|
|
208
215
|
"tool_call_id": message.tool_call_id,
|
|
209
216
|
"tool_calls": message.tool_calls,
|
|
@@ -236,6 +243,7 @@ class HuggingFace(Model):
|
|
|
236
243
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
244
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
245
|
run_response: Optional[RunOutput] = None,
|
|
246
|
+
compress_tool_results: bool = False,
|
|
239
247
|
) -> ModelResponse:
|
|
240
248
|
"""
|
|
241
249
|
Send a chat completion request to the HuggingFace Hub.
|
|
@@ -247,7 +255,7 @@ class HuggingFace(Model):
|
|
|
247
255
|
assistant_message.metrics.start_timer()
|
|
248
256
|
provider_response = self.get_client().chat.completions.create(
|
|
249
257
|
model=self.id,
|
|
250
|
-
messages=[self._format_message(m) for m in messages],
|
|
258
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
251
259
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
252
260
|
)
|
|
253
261
|
assistant_message.metrics.stop_timer()
|
|
@@ -269,6 +277,7 @@ class HuggingFace(Model):
|
|
|
269
277
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
270
278
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
271
279
|
run_response: Optional[RunOutput] = None,
|
|
280
|
+
compress_tool_results: bool = False,
|
|
272
281
|
) -> ModelResponse:
|
|
273
282
|
"""
|
|
274
283
|
Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
|
|
@@ -280,7 +289,7 @@ class HuggingFace(Model):
|
|
|
280
289
|
assistant_message.metrics.start_timer()
|
|
281
290
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
282
291
|
model=self.id,
|
|
283
|
-
messages=[self._format_message(m) for m in messages],
|
|
292
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
284
293
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
285
294
|
)
|
|
286
295
|
assistant_message.metrics.stop_timer()
|
|
@@ -302,6 +311,7 @@ class HuggingFace(Model):
|
|
|
302
311
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
303
312
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
304
313
|
run_response: Optional[RunOutput] = None,
|
|
314
|
+
compress_tool_results: bool = False,
|
|
305
315
|
) -> Iterator[ModelResponse]:
|
|
306
316
|
"""
|
|
307
317
|
Send a streaming chat completion request to the HuggingFace API.
|
|
@@ -314,7 +324,7 @@ class HuggingFace(Model):
|
|
|
314
324
|
|
|
315
325
|
stream = self.get_client().chat.completions.create(
|
|
316
326
|
model=self.id,
|
|
317
|
-
messages=[self._format_message(m) for m in messages],
|
|
327
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
318
328
|
stream=True,
|
|
319
329
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
320
330
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
@@ -340,6 +350,7 @@ class HuggingFace(Model):
|
|
|
340
350
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
341
351
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
342
352
|
run_response: Optional[RunOutput] = None,
|
|
353
|
+
compress_tool_results: bool = False,
|
|
343
354
|
) -> AsyncIterator[Any]:
|
|
344
355
|
"""
|
|
345
356
|
Sends an asynchronous streaming chat completion request to the HuggingFace API.
|
|
@@ -351,7 +362,7 @@ class HuggingFace(Model):
|
|
|
351
362
|
assistant_message.metrics.start_timer()
|
|
352
363
|
provider_response = await self.get_async_client().chat.completions.create(
|
|
353
364
|
model=self.id,
|
|
354
|
-
messages=[self._format_message(m) for m in messages],
|
|
365
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
355
366
|
stream=True,
|
|
356
367
|
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
357
368
|
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
agno/models/ibm/watsonx.py
CHANGED
|
@@ -129,12 +129,13 @@ class WatsonX(Model):
|
|
|
129
129
|
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
130
130
|
return request_params
|
|
131
131
|
|
|
132
|
-
def _format_message(self, message: Message) -> Dict[str, Any]:
|
|
132
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
133
133
|
"""
|
|
134
134
|
Format a message into the format expected by WatsonX.
|
|
135
135
|
|
|
136
136
|
Args:
|
|
137
137
|
message (Message): The message to format.
|
|
138
|
+
compress_tool_results: Whether to compress tool results.
|
|
138
139
|
|
|
139
140
|
Returns:
|
|
140
141
|
Dict[str, Any]: The formatted message.
|
|
@@ -151,7 +152,12 @@ class WatsonX(Model):
|
|
|
151
152
|
if message.videos is not None and len(message.videos) > 0:
|
|
152
153
|
log_warning("Video input is currently unsupported.")
|
|
153
154
|
|
|
154
|
-
|
|
155
|
+
message_dict = message.to_dict()
|
|
156
|
+
|
|
157
|
+
# Use compressed content for tool messages if compression is active
|
|
158
|
+
if message.role == "tool" and compress_tool_results:
|
|
159
|
+
message_dict["content"] = message.get_content(use_compressed_content=True)
|
|
160
|
+
return message_dict
|
|
155
161
|
|
|
156
162
|
def invoke(
|
|
157
163
|
self,
|
|
@@ -161,6 +167,7 @@ class WatsonX(Model):
|
|
|
161
167
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
162
168
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
163
169
|
run_response: Optional[RunOutput] = None,
|
|
170
|
+
compress_tool_results: bool = False,
|
|
164
171
|
) -> ModelResponse:
|
|
165
172
|
"""
|
|
166
173
|
Send a chat completion request to the WatsonX API.
|
|
@@ -171,7 +178,7 @@ class WatsonX(Model):
|
|
|
171
178
|
|
|
172
179
|
client = self.get_client()
|
|
173
180
|
|
|
174
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
181
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
175
182
|
request_params = self.get_request_params(
|
|
176
183
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
177
184
|
)
|
|
@@ -196,6 +203,7 @@ class WatsonX(Model):
|
|
|
196
203
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
197
204
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
198
205
|
run_response: Optional[RunOutput] = None,
|
|
206
|
+
compress_tool_results: bool = False,
|
|
199
207
|
) -> Any:
|
|
200
208
|
"""
|
|
201
209
|
Sends an asynchronous chat completion request to the WatsonX API.
|
|
@@ -205,7 +213,7 @@ class WatsonX(Model):
|
|
|
205
213
|
run_response.metrics.set_time_to_first_token()
|
|
206
214
|
|
|
207
215
|
client = self.get_client()
|
|
208
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
216
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
209
217
|
|
|
210
218
|
request_params = self.get_request_params(
|
|
211
219
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -231,13 +239,14 @@ class WatsonX(Model):
|
|
|
231
239
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
232
240
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
233
241
|
run_response: Optional[RunOutput] = None,
|
|
242
|
+
compress_tool_results: bool = False,
|
|
234
243
|
) -> Iterator[ModelResponse]:
|
|
235
244
|
"""
|
|
236
245
|
Send a streaming chat completion request to the WatsonX API.
|
|
237
246
|
"""
|
|
238
247
|
try:
|
|
239
248
|
client = self.get_client()
|
|
240
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
249
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
241
250
|
|
|
242
251
|
request_params = self.get_request_params(
|
|
243
252
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
@@ -265,6 +274,7 @@ class WatsonX(Model):
|
|
|
265
274
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
266
275
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
267
276
|
run_response: Optional[RunOutput] = None,
|
|
277
|
+
compress_tool_results: bool = False,
|
|
268
278
|
) -> AsyncIterator[ModelResponse]:
|
|
269
279
|
"""
|
|
270
280
|
Sends an asynchronous streaming chat completion request to the WatsonX API.
|
|
@@ -274,7 +284,7 @@ class WatsonX(Model):
|
|
|
274
284
|
run_response.metrics.set_time_to_first_token()
|
|
275
285
|
|
|
276
286
|
client = self.get_client()
|
|
277
|
-
formatted_messages = [self._format_message(m) for m in messages]
|
|
287
|
+
formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
|
|
278
288
|
|
|
279
289
|
# Get parameters for chat
|
|
280
290
|
request_params = self.get_request_params(
|
agno/models/internlm/internlm.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelAuthenticationError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -24,3 +25,19 @@ class InternLM(OpenAILike):
|
|
|
24
25
|
|
|
25
26
|
api_key: Optional[str] = field(default_factory=lambda: getenv("INTERNLM_API_KEY"))
|
|
26
27
|
base_url: Optional[str] = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions"
|
|
28
|
+
|
|
29
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Returns client parameters for API requests, checking for INTERNLM_API_KEY.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
35
|
+
"""
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
self.api_key = getenv("INTERNLM_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ModelAuthenticationError(
|
|
40
|
+
message="INTERNLM_API_KEY not set. Please set the INTERNLM_API_KEY environment variable.",
|
|
41
|
+
model_name=self.name,
|
|
42
|
+
)
|
|
43
|
+
return super()._get_client_params()
|
agno/models/langdb/langdb.py
CHANGED
|
@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from os import getenv
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelAuthenticationError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -32,8 +33,19 @@ class LangDB(OpenAILike):
|
|
|
32
33
|
default_headers: Optional[dict] = None
|
|
33
34
|
|
|
34
35
|
def _get_client_params(self) -> Dict[str, Any]:
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
self.api_key = getenv("LANGDB_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ModelAuthenticationError(
|
|
40
|
+
message="LANGDB_API_KEY not set. Please set the LANGDB_API_KEY environment variable.",
|
|
41
|
+
model_name=self.name,
|
|
42
|
+
)
|
|
43
|
+
|
|
35
44
|
if not self.project_id:
|
|
36
|
-
raise
|
|
45
|
+
raise ModelAuthenticationError(
|
|
46
|
+
message="LANGDB_PROJECT_ID not set. Please set the LANGDB_PROJECT_ID environment variable.",
|
|
47
|
+
model_name=self.name,
|
|
48
|
+
)
|
|
37
49
|
|
|
38
50
|
if not self.base_url:
|
|
39
51
|
self.base_url = f"{self.base_host_url}/{self.project_id}/v1"
|
agno/models/litellm/chat.py
CHANGED
|
@@ -10,8 +10,10 @@ from agno.models.message import Message
|
|
|
10
10
|
from agno.models.metrics import Metrics
|
|
11
11
|
from agno.models.response import ModelResponse
|
|
12
12
|
from agno.run.agent import RunOutput
|
|
13
|
+
from agno.tools.function import Function
|
|
13
14
|
from agno.utils.log import log_debug, log_error, log_warning
|
|
14
15
|
from agno.utils.openai import _format_file_for_message, audio_to_message, images_to_message
|
|
16
|
+
from agno.utils.tokens import count_schema_tokens
|
|
15
17
|
|
|
16
18
|
try:
|
|
17
19
|
import litellm
|
|
@@ -57,8 +59,8 @@ class LiteLLM(Model):
|
|
|
57
59
|
# Check for other present valid keys, e.g. OPENAI_API_KEY if self.id is an OpenAI model
|
|
58
60
|
env_validation = validate_environment(model=self.id, api_base=self.api_base)
|
|
59
61
|
if not env_validation.get("keys_in_environment"):
|
|
60
|
-
|
|
61
|
-
"
|
|
62
|
+
log_error(
|
|
63
|
+
"LITELLM_API_KEY not set. Please set the LITELLM_API_KEY or other valid environment variables."
|
|
62
64
|
)
|
|
63
65
|
|
|
64
66
|
def get_client(self) -> Any:
|
|
@@ -74,11 +76,17 @@ class LiteLLM(Model):
|
|
|
74
76
|
self.client = litellm
|
|
75
77
|
return self.client
|
|
76
78
|
|
|
77
|
-
def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
|
|
79
|
+
def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
|
|
78
80
|
"""Format messages for LiteLLM API."""
|
|
79
81
|
formatted_messages = []
|
|
80
82
|
for m in messages:
|
|
81
|
-
|
|
83
|
+
# Use compressed content for tool messages if compression is active
|
|
84
|
+
if m.role == "tool":
|
|
85
|
+
content = m.get_content(use_compressed_content=compress_tool_results)
|
|
86
|
+
else:
|
|
87
|
+
content = m.content if m.content is not None else ""
|
|
88
|
+
|
|
89
|
+
msg = {"role": m.role, "content": content}
|
|
82
90
|
|
|
83
91
|
# Handle media
|
|
84
92
|
if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
|
|
@@ -98,7 +106,7 @@ class LiteLLM(Model):
|
|
|
98
106
|
if isinstance(msg["content"], str):
|
|
99
107
|
content_list = [{"type": "text", "text": msg["content"]}]
|
|
100
108
|
else:
|
|
101
|
-
content_list = msg["content"]
|
|
109
|
+
content_list = msg["content"] if isinstance(msg["content"], list) else []
|
|
102
110
|
for file in m.files:
|
|
103
111
|
file_part = _format_file_for_message(file)
|
|
104
112
|
if file_part:
|
|
@@ -186,10 +194,11 @@ class LiteLLM(Model):
|
|
|
186
194
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
187
195
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
188
196
|
run_response: Optional[RunOutput] = None,
|
|
197
|
+
compress_tool_results: bool = False,
|
|
189
198
|
) -> ModelResponse:
|
|
190
199
|
"""Sends a chat completion request to the LiteLLM API."""
|
|
191
200
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
192
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
201
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
193
202
|
|
|
194
203
|
if run_response and run_response.metrics:
|
|
195
204
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -211,10 +220,11 @@ class LiteLLM(Model):
|
|
|
211
220
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
212
221
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
213
222
|
run_response: Optional[RunOutput] = None,
|
|
223
|
+
compress_tool_results: bool = False,
|
|
214
224
|
) -> Iterator[ModelResponse]:
|
|
215
225
|
"""Sends a streaming chat completion request to the LiteLLM API."""
|
|
216
226
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
217
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
227
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
218
228
|
completion_kwargs["stream"] = True
|
|
219
229
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
220
230
|
|
|
@@ -236,10 +246,11 @@ class LiteLLM(Model):
|
|
|
236
246
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
237
247
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
238
248
|
run_response: Optional[RunOutput] = None,
|
|
249
|
+
compress_tool_results: bool = False,
|
|
239
250
|
) -> ModelResponse:
|
|
240
251
|
"""Sends an asynchronous chat completion request to the LiteLLM API."""
|
|
241
252
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
242
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
253
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
243
254
|
|
|
244
255
|
if run_response and run_response.metrics:
|
|
245
256
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -261,10 +272,11 @@ class LiteLLM(Model):
|
|
|
261
272
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
262
273
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
263
274
|
run_response: Optional[RunOutput] = None,
|
|
275
|
+
compress_tool_results: bool = False,
|
|
264
276
|
) -> AsyncIterator[ModelResponse]:
|
|
265
277
|
"""Sends an asynchronous streaming chat request to the LiteLLM API."""
|
|
266
278
|
completion_kwargs = self.get_request_params(tools=tools)
|
|
267
|
-
completion_kwargs["messages"] = self._format_messages(messages)
|
|
279
|
+
completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
|
|
268
280
|
completion_kwargs["stream"] = True
|
|
269
281
|
completion_kwargs["stream_options"] = {"include_usage": True}
|
|
270
282
|
|
|
@@ -466,3 +478,26 @@ class LiteLLM(Model):
|
|
|
466
478
|
metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
|
|
467
479
|
|
|
468
480
|
return metrics
|
|
481
|
+
|
|
482
|
+
def count_tokens(
|
|
483
|
+
self,
|
|
484
|
+
messages: List[Message],
|
|
485
|
+
tools: Optional[List[Union[Function, Dict[str, Any]]]] = None,
|
|
486
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
487
|
+
) -> int:
|
|
488
|
+
formatted_messages = self._format_messages(messages, compress_tool_results=True)
|
|
489
|
+
formatted_tools = self._format_tools(tools) if tools else None
|
|
490
|
+
tokens = litellm.token_counter(
|
|
491
|
+
model=self.id,
|
|
492
|
+
messages=formatted_messages,
|
|
493
|
+
tools=formatted_tools, # type: ignore
|
|
494
|
+
)
|
|
495
|
+
return tokens + count_schema_tokens(response_format, self.id)
|
|
496
|
+
|
|
497
|
+
async def acount_tokens(
|
|
498
|
+
self,
|
|
499
|
+
messages: List[Message],
|
|
500
|
+
tools: Optional[List[Union[Function, Dict[str, Any]]]] = None,
|
|
501
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
502
|
+
) -> int:
|
|
503
|
+
return self.count_tokens(messages, tools, response_format)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
|
+
from agno.exceptions import ModelAuthenticationError
|
|
5
6
|
from agno.models.openai.like import OpenAILike
|
|
6
7
|
|
|
7
8
|
|
|
@@ -23,3 +24,19 @@ class LiteLLMOpenAI(OpenAILike):
|
|
|
23
24
|
|
|
24
25
|
api_key: Optional[str] = field(default_factory=lambda: getenv("LITELLM_API_KEY"))
|
|
25
26
|
base_url: str = "http://0.0.0.0:4000"
|
|
27
|
+
|
|
28
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
29
|
+
"""
|
|
30
|
+
Returns client parameters for API requests, checking for LITELLM_API_KEY.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Dict[str, Any]: A dictionary of client parameters for API requests.
|
|
34
|
+
"""
|
|
35
|
+
if not self.api_key:
|
|
36
|
+
self.api_key = getenv("LITELLM_API_KEY")
|
|
37
|
+
if not self.api_key:
|
|
38
|
+
raise ModelAuthenticationError(
|
|
39
|
+
message="LITELLM_API_KEY not set. Please set the LITELLM_API_KEY environment variable.",
|
|
40
|
+
model_name=self.name,
|
|
41
|
+
)
|
|
42
|
+
return super()._get_client_params()
|