agno 2.3.26__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +4 -0
- agno/agent/agent.py +1368 -541
- agno/agent/remote.py +13 -0
- agno/db/base.py +339 -0
- agno/db/postgres/async_postgres.py +116 -12
- agno/db/postgres/postgres.py +1229 -25
- agno/db/postgres/schemas.py +48 -1
- agno/db/sqlite/async_sqlite.py +119 -4
- agno/db/sqlite/schemas.py +51 -0
- agno/db/sqlite/sqlite.py +1173 -13
- agno/db/utils.py +37 -1
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +4 -0
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +2767 -2254
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +2 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +5 -5
- agno/knowledge/reader/docx_reader.py +2 -2
- agno/knowledge/reader/field_labeled_csv_reader.py +2 -2
- agno/knowledge/reader/firecrawl_reader.py +2 -2
- agno/knowledge/reader/json_reader.py +2 -2
- agno/knowledge/reader/markdown_reader.py +2 -2
- agno/knowledge/reader/pdf_reader.py +5 -4
- agno/knowledge/reader/pptx_reader.py +2 -2
- agno/knowledge/reader/reader_factory.py +110 -0
- agno/knowledge/reader/s3_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +2 -2
- agno/knowledge/reader/web_search_reader.py +2 -2
- agno/knowledge/reader/website_reader.py +5 -3
- agno/knowledge/reader/wikipedia_reader.py +2 -2
- agno/knowledge/reader/youtube_reader.py +2 -2
- agno/knowledge/utils.py +37 -29
- agno/learn/__init__.py +6 -0
- agno/learn/machine.py +35 -0
- agno/learn/schemas.py +82 -11
- agno/learn/stores/__init__.py +3 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/learned_knowledge.py +6 -6
- agno/models/anthropic/claude.py +24 -0
- agno/models/aws/bedrock.py +20 -0
- agno/models/base.py +48 -4
- agno/models/cohere/chat.py +25 -0
- agno/models/google/gemini.py +50 -5
- agno/models/litellm/chat.py +38 -0
- agno/models/openai/chat.py +7 -0
- agno/models/openrouter/openrouter.py +46 -0
- agno/models/response.py +16 -0
- agno/os/app.py +83 -44
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +1 -0
- agno/os/routers/agents/router.py +29 -16
- agno/os/routers/agents/schema.py +6 -4
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +466 -0
- agno/os/routers/evals/schemas.py +4 -3
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +3 -3
- agno/os/routers/memory/schemas.py +4 -2
- agno/os/routers/metrics/metrics.py +9 -11
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/teams/router.py +20 -8
- agno/os/routers/teams/schema.py +6 -4
- agno/os/routers/traces/traces.py +5 -5
- agno/os/routers/workflows/router.py +38 -11
- agno/os/routers/workflows/schema.py +1 -1
- agno/os/schema.py +92 -26
- agno/os/utils.py +84 -19
- agno/reasoning/anthropic.py +2 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +2 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +4 -10
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/reasoning/vertexai.py +2 -2
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/run/agent.py +57 -0
- agno/run/base.py +7 -0
- agno/run/team.py +57 -0
- agno/skills/agent_skills.py +10 -3
- agno/team/__init__.py +3 -1
- agno/team/team.py +1145 -326
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/function.py +35 -83
- agno/tools/knowledge.py +9 -4
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/parallel.py +0 -7
- agno/tools/reasoning.py +30 -23
- agno/tools/tavily.py +4 -1
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +48 -47
- agno/utils/agent.py +42 -5
- agno/utils/events.py +160 -2
- agno/utils/print_response/agent.py +0 -31
- agno/utils/print_response/team.py +0 -2
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/team.py +61 -11
- agno/vectordb/lancedb/lance_db.py +4 -1
- agno/vectordb/mongodb/mongodb.py +1 -1
- agno/vectordb/qdrant/qdrant.py +4 -4
- agno/workflow/__init__.py +3 -1
- agno/workflow/condition.py +0 -21
- agno/workflow/loop.py +0 -21
- agno/workflow/parallel.py +0 -21
- agno/workflow/router.py +0 -21
- agno/workflow/step.py +117 -24
- agno/workflow/steps.py +0 -21
- agno/workflow/workflow.py +427 -63
- {agno-2.3.26.dist-info → agno-2.4.0.dist-info}/METADATA +46 -76
- {agno-2.3.26.dist-info → agno-2.4.0.dist-info}/RECORD +128 -117
- {agno-2.3.26.dist-info → agno-2.4.0.dist-info}/WHEEL +0 -0
- {agno-2.3.26.dist-info → agno-2.4.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.26.dist-info → agno-2.4.0.dist-info}/top_level.txt +0 -0
|
@@ -919,7 +919,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
919
919
|
text_content = self._to_text_content(learning=learning_obj)
|
|
920
920
|
|
|
921
921
|
# Build metadata for filtering
|
|
922
|
-
# Metadata must be passed separately to
|
|
922
|
+
# Metadata must be passed separately to insert for filters to work
|
|
923
923
|
filter_metadata: dict[str, Any] = {
|
|
924
924
|
"namespace": effective_namespace,
|
|
925
925
|
}
|
|
@@ -932,7 +932,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
932
932
|
if tags:
|
|
933
933
|
filter_metadata["tags"] = tags
|
|
934
934
|
|
|
935
|
-
self.knowledge.
|
|
935
|
+
self.knowledge.insert(
|
|
936
936
|
name=learning_data["title"],
|
|
937
937
|
text_content=text_content,
|
|
938
938
|
reader=TextReader(),
|
|
@@ -989,7 +989,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
989
989
|
text_content = self._to_text_content(learning=learning_obj)
|
|
990
990
|
|
|
991
991
|
# Build metadata for filtering - THIS IS THE KEY FIX!
|
|
992
|
-
# Metadata must be passed separately to
|
|
992
|
+
# Metadata must be passed separately to insert for filters to work
|
|
993
993
|
filter_metadata: dict[str, Any] = {
|
|
994
994
|
"namespace": effective_namespace,
|
|
995
995
|
}
|
|
@@ -1002,8 +1002,8 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
1002
1002
|
if tags:
|
|
1003
1003
|
filter_metadata["tags"] = tags
|
|
1004
1004
|
|
|
1005
|
-
if hasattr(self.knowledge, "
|
|
1006
|
-
await self.knowledge.
|
|
1005
|
+
if hasattr(self.knowledge, "ainsert"):
|
|
1006
|
+
await self.knowledge.ainsert(
|
|
1007
1007
|
name=learning_data["title"],
|
|
1008
1008
|
text_content=text_content,
|
|
1009
1009
|
reader=TextReader(),
|
|
@@ -1011,7 +1011,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
1011
1011
|
metadata=filter_metadata, # Pass metadata for filtering
|
|
1012
1012
|
)
|
|
1013
1013
|
else:
|
|
1014
|
-
self.knowledge.
|
|
1014
|
+
self.knowledge.insert(
|
|
1015
1015
|
name=learning_data["title"],
|
|
1016
1016
|
text_content=text_content,
|
|
1017
1017
|
reader=TextReader(),
|
agno/models/anthropic/claude.py
CHANGED
|
@@ -406,6 +406,30 @@ class Claude(Model):
|
|
|
406
406
|
self.async_client = AsyncAnthropicClient(**_client_params)
|
|
407
407
|
return self.async_client
|
|
408
408
|
|
|
409
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
410
|
+
"""
|
|
411
|
+
Convert the model to a dictionary.
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
415
|
+
"""
|
|
416
|
+
model_dict = super().to_dict()
|
|
417
|
+
model_dict.update(
|
|
418
|
+
{
|
|
419
|
+
"max_tokens": self.max_tokens,
|
|
420
|
+
"thinking": self.thinking,
|
|
421
|
+
"temperature": self.temperature,
|
|
422
|
+
"stop_sequences": self.stop_sequences,
|
|
423
|
+
"top_p": self.top_p,
|
|
424
|
+
"top_k": self.top_k,
|
|
425
|
+
"cache_system_prompt": self.cache_system_prompt,
|
|
426
|
+
"extended_cache_time": self.extended_cache_time,
|
|
427
|
+
"betas": self.betas,
|
|
428
|
+
}
|
|
429
|
+
)
|
|
430
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
431
|
+
return cleaned_dict
|
|
432
|
+
|
|
409
433
|
def count_tokens(
|
|
410
434
|
self,
|
|
411
435
|
messages: List[Message],
|
agno/models/aws/bedrock.py
CHANGED
|
@@ -166,6 +166,26 @@ class AwsBedrock(Model):
|
|
|
166
166
|
|
|
167
167
|
return self.async_session.client(**client_kwargs)
|
|
168
168
|
|
|
169
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
170
|
+
"""
|
|
171
|
+
Convert the model to a dictionary.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
175
|
+
"""
|
|
176
|
+
model_dict = super().to_dict()
|
|
177
|
+
model_dict.update(
|
|
178
|
+
{
|
|
179
|
+
"aws_region": self.aws_region,
|
|
180
|
+
"max_tokens": self.max_tokens,
|
|
181
|
+
"temperature": self.temperature,
|
|
182
|
+
"top_p": self.top_p,
|
|
183
|
+
"stop_sequences": self.stop_sequences,
|
|
184
|
+
}
|
|
185
|
+
)
|
|
186
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
187
|
+
return cleaned_dict
|
|
188
|
+
|
|
169
189
|
def _format_tools_for_request(self, tools: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
170
190
|
"""
|
|
171
191
|
Format the tools for the request.
|
agno/models/base.py
CHANGED
|
@@ -1293,12 +1293,23 @@ class Model(ABC):
|
|
|
1293
1293
|
if _compression_manager is not None and _compression_manager.should_compress(
|
|
1294
1294
|
messages, tools, model=self, response_format=response_format
|
|
1295
1295
|
):
|
|
1296
|
+
# Emit compression started event
|
|
1297
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1296
1298
|
_compression_manager.compress(messages)
|
|
1299
|
+
# Emit compression completed event with stats
|
|
1300
|
+
yield ModelResponse(
|
|
1301
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1302
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1303
|
+
)
|
|
1297
1304
|
|
|
1298
1305
|
assistant_message = Message(role=self.assistant_message_role)
|
|
1299
1306
|
# Create assistant message and stream data
|
|
1300
1307
|
stream_data = MessageData()
|
|
1301
1308
|
model_response = ModelResponse()
|
|
1309
|
+
|
|
1310
|
+
# Emit LLM request started event
|
|
1311
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
1312
|
+
|
|
1302
1313
|
if stream_model_response:
|
|
1303
1314
|
# Generate response
|
|
1304
1315
|
for response in self.process_response_stream(
|
|
@@ -1334,6 +1345,19 @@ class Model(ABC):
|
|
|
1334
1345
|
messages.append(assistant_message)
|
|
1335
1346
|
assistant_message.log(metrics=True)
|
|
1336
1347
|
|
|
1348
|
+
# Emit LLM request completed event with metrics
|
|
1349
|
+
llm_metrics = assistant_message.metrics
|
|
1350
|
+
yield ModelResponse(
|
|
1351
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1352
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1353
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1354
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1355
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1356
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1357
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1358
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1337
1361
|
# Handle tool calls if present
|
|
1338
1362
|
if assistant_message.tool_calls is not None:
|
|
1339
1363
|
# Prepare function calls
|
|
@@ -1513,12 +1537,23 @@ class Model(ABC):
|
|
|
1513
1537
|
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
1514
1538
|
messages, tools, model=self, response_format=response_format
|
|
1515
1539
|
):
|
|
1540
|
+
# Emit compression started event
|
|
1541
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1516
1542
|
await _compression_manager.acompress(messages)
|
|
1543
|
+
# Emit compression completed event with stats
|
|
1544
|
+
yield ModelResponse(
|
|
1545
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1546
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1547
|
+
)
|
|
1517
1548
|
|
|
1518
1549
|
# Create assistant message and stream data
|
|
1519
1550
|
assistant_message = Message(role=self.assistant_message_role)
|
|
1520
1551
|
stream_data = MessageData()
|
|
1521
1552
|
model_response = ModelResponse()
|
|
1553
|
+
|
|
1554
|
+
# Emit LLM request started event
|
|
1555
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
1556
|
+
|
|
1522
1557
|
if stream_model_response:
|
|
1523
1558
|
# Generate response
|
|
1524
1559
|
async for model_response in self.aprocess_response_stream(
|
|
@@ -1554,6 +1589,19 @@ class Model(ABC):
|
|
|
1554
1589
|
messages.append(assistant_message)
|
|
1555
1590
|
assistant_message.log(metrics=True)
|
|
1556
1591
|
|
|
1592
|
+
# Emit LLM request completed event with metrics
|
|
1593
|
+
llm_metrics = assistant_message.metrics
|
|
1594
|
+
yield ModelResponse(
|
|
1595
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1596
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1597
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1598
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1599
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1600
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1601
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1602
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1603
|
+
)
|
|
1604
|
+
|
|
1557
1605
|
# Handle tool calls if present
|
|
1558
1606
|
if assistant_message.tool_calls is not None:
|
|
1559
1607
|
# Prepare function calls
|
|
@@ -1971,8 +2019,6 @@ class Model(ABC):
|
|
|
1971
2019
|
and function_call.function._run_context.session_state is not None
|
|
1972
2020
|
):
|
|
1973
2021
|
function_execution_result.updated_session_state = function_call.function._run_context.session_state
|
|
1974
|
-
elif function_call.function._session_state is not None:
|
|
1975
|
-
function_execution_result.updated_session_state = function_call.function._session_state
|
|
1976
2022
|
else:
|
|
1977
2023
|
from agno.tools.function import ToolResult
|
|
1978
2024
|
|
|
@@ -2532,8 +2578,6 @@ class Model(ABC):
|
|
|
2532
2578
|
and function_call.function._run_context.session_state is not None
|
|
2533
2579
|
):
|
|
2534
2580
|
updated_session_state = function_call.function._run_context.session_state
|
|
2535
|
-
elif function_call.function._session_state is not None:
|
|
2536
|
-
updated_session_state = function_call.function._session_state
|
|
2537
2581
|
|
|
2538
2582
|
if not (
|
|
2539
2583
|
async_function_call_output is not None
|
agno/models/cohere/chat.py
CHANGED
|
@@ -115,6 +115,31 @@ class Cohere(Model):
|
|
|
115
115
|
self.async_client = CohereAsyncClient(**_client_params)
|
|
116
116
|
return self.async_client # type: ignore
|
|
117
117
|
|
|
118
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
119
|
+
"""
|
|
120
|
+
Convert the model to a dictionary.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
124
|
+
"""
|
|
125
|
+
model_dict = super().to_dict()
|
|
126
|
+
model_dict.update(
|
|
127
|
+
{
|
|
128
|
+
"temperature": self.temperature,
|
|
129
|
+
"max_tokens": self.max_tokens,
|
|
130
|
+
"top_k": self.top_k,
|
|
131
|
+
"top_p": self.top_p,
|
|
132
|
+
"seed": self.seed,
|
|
133
|
+
"frequency_penalty": self.frequency_penalty,
|
|
134
|
+
"presence_penalty": self.presence_penalty,
|
|
135
|
+
"logprobs": self.logprobs,
|
|
136
|
+
"strict_tools": self.strict_tools,
|
|
137
|
+
"add_chat_history": self.add_chat_history,
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
141
|
+
return cleaned_dict
|
|
142
|
+
|
|
118
143
|
def get_request_params(
|
|
119
144
|
self,
|
|
120
145
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
agno/models/google/gemini.py
CHANGED
|
@@ -172,6 +172,45 @@ class Gemini(Model):
|
|
|
172
172
|
self.client = genai.Client(**client_params)
|
|
173
173
|
return self.client
|
|
174
174
|
|
|
175
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
176
|
+
"""
|
|
177
|
+
Convert the model to a dictionary.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
181
|
+
"""
|
|
182
|
+
model_dict = super().to_dict()
|
|
183
|
+
model_dict.update(
|
|
184
|
+
{
|
|
185
|
+
"search": self.search,
|
|
186
|
+
"grounding": self.grounding,
|
|
187
|
+
"grounding_dynamic_threshold": self.grounding_dynamic_threshold,
|
|
188
|
+
"url_context": self.url_context,
|
|
189
|
+
"vertexai_search": self.vertexai_search,
|
|
190
|
+
"vertexai_search_datastore": self.vertexai_search_datastore,
|
|
191
|
+
"file_search_store_names": self.file_search_store_names,
|
|
192
|
+
"file_search_metadata_filter": self.file_search_metadata_filter,
|
|
193
|
+
"temperature": self.temperature,
|
|
194
|
+
"top_p": self.top_p,
|
|
195
|
+
"top_k": self.top_k,
|
|
196
|
+
"max_output_tokens": self.max_output_tokens,
|
|
197
|
+
"stop_sequences": self.stop_sequences,
|
|
198
|
+
"logprobs": self.logprobs,
|
|
199
|
+
"presence_penalty": self.presence_penalty,
|
|
200
|
+
"frequency_penalty": self.frequency_penalty,
|
|
201
|
+
"seed": self.seed,
|
|
202
|
+
"response_modalities": self.response_modalities,
|
|
203
|
+
"thinking_budget": self.thinking_budget,
|
|
204
|
+
"include_thoughts": self.include_thoughts,
|
|
205
|
+
"thinking_level": self.thinking_level,
|
|
206
|
+
"vertexai": self.vertexai,
|
|
207
|
+
"project_id": self.project_id,
|
|
208
|
+
"location": self.location,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
212
|
+
return cleaned_dict
|
|
213
|
+
|
|
175
214
|
def _append_file_search_tool(self, builtin_tools: List[Tool]) -> None:
|
|
176
215
|
"""Append Gemini File Search tool to builtin_tools if file search is enabled.
|
|
177
216
|
|
|
@@ -672,7 +711,6 @@ class Gemini(Model):
|
|
|
672
711
|
compress_tool_results: Whether to compress tool results.
|
|
673
712
|
"""
|
|
674
713
|
formatted_messages: List = []
|
|
675
|
-
file_content: Optional[Union[GeminiFile, Part]] = None
|
|
676
714
|
system_message = None
|
|
677
715
|
|
|
678
716
|
for message in messages:
|
|
@@ -795,14 +833,11 @@ class Gemini(Model):
|
|
|
795
833
|
for file in message.files:
|
|
796
834
|
file_content = self._format_file_for_message(file)
|
|
797
835
|
if isinstance(file_content, Part):
|
|
798
|
-
|
|
836
|
+
message_parts.append(file_content)
|
|
799
837
|
|
|
800
838
|
final_message = Content(role=role, parts=message_parts)
|
|
801
839
|
formatted_messages.append(final_message)
|
|
802
840
|
|
|
803
|
-
if isinstance(file_content, GeminiFile):
|
|
804
|
-
formatted_messages.insert(0, file_content)
|
|
805
|
-
|
|
806
841
|
return formatted_messages, system_message
|
|
807
842
|
|
|
808
843
|
def _format_audio_for_message(self, audio: Audio) -> Optional[Union[Part, GeminiFile]]:
|
|
@@ -936,6 +971,16 @@ class Gemini(Model):
|
|
|
936
971
|
|
|
937
972
|
# Case 2: File is a URL
|
|
938
973
|
elif file.url is not None:
|
|
974
|
+
# Case 2a: GCS URI (gs://) - pass directly to Gemini (supports up to 2GB)
|
|
975
|
+
if file.url.startswith("gs://") and file.mime_type:
|
|
976
|
+
return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
|
|
977
|
+
|
|
978
|
+
# Case 2b: HTTPS URL with mime_type - pass directly to Gemini (supports up to 100MB)
|
|
979
|
+
# This enables pre-signed URLs from S3/Azure and public URLs without downloading
|
|
980
|
+
if file.url.startswith("https://") and file.mime_type:
|
|
981
|
+
return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
|
|
982
|
+
|
|
983
|
+
# Case 2c: URL without mime_type - download and detect (existing behavior)
|
|
939
984
|
url_content = file.file_url_content
|
|
940
985
|
if url_content is not None:
|
|
941
986
|
content, mime_type = url_content
|
agno/models/litellm/chat.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import json
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from os import getenv
|
|
@@ -48,10 +49,18 @@ class LiteLLM(Model):
|
|
|
48
49
|
|
|
49
50
|
client: Optional[Any] = None
|
|
50
51
|
|
|
52
|
+
# Store the original client to preserve it across copies (e.g., for Router instances)
|
|
53
|
+
_original_client: Optional[Any] = None
|
|
54
|
+
|
|
51
55
|
def __post_init__(self):
|
|
52
56
|
"""Initialize the model after the dataclass initialization."""
|
|
53
57
|
super().__post_init__()
|
|
54
58
|
|
|
59
|
+
# Store the original client if provided (e.g., Router instance)
|
|
60
|
+
# This ensures the client is preserved when the model is copied for background tasks
|
|
61
|
+
if self.client is not None and self._original_client is None:
|
|
62
|
+
self._original_client = self.client
|
|
63
|
+
|
|
55
64
|
# Set up API key from environment variable if not already set
|
|
56
65
|
if not self.client and not self.api_key:
|
|
57
66
|
self.api_key = getenv("LITELLM_API_KEY")
|
|
@@ -70,12 +79,41 @@ class LiteLLM(Model):
|
|
|
70
79
|
Returns:
|
|
71
80
|
Any: An instance of the LiteLLM client.
|
|
72
81
|
"""
|
|
82
|
+
# First check if we have a current client
|
|
73
83
|
if self.client is not None:
|
|
74
84
|
return self.client
|
|
75
85
|
|
|
86
|
+
# Check if we have an original client (e.g., Router) that was preserved
|
|
87
|
+
# This handles the case where the model was copied for background tasks
|
|
88
|
+
if self._original_client is not None:
|
|
89
|
+
self.client = self._original_client
|
|
90
|
+
return self.client
|
|
91
|
+
|
|
76
92
|
self.client = litellm
|
|
77
93
|
return self.client
|
|
78
94
|
|
|
95
|
+
def __deepcopy__(self, memo: Dict[int, Any]) -> "LiteLLM":
|
|
96
|
+
"""
|
|
97
|
+
Custom deepcopy to preserve the client (e.g., Router) across copies.
|
|
98
|
+
|
|
99
|
+
This is needed because when the model is copied for background tasks
|
|
100
|
+
(memory, summarization), the client reference needs to be preserved.
|
|
101
|
+
"""
|
|
102
|
+
# Create a shallow copy first
|
|
103
|
+
cls = self.__class__
|
|
104
|
+
result = cls.__new__(cls)
|
|
105
|
+
memo[id(self)] = result
|
|
106
|
+
|
|
107
|
+
# Copy all attributes, but keep the same client reference
|
|
108
|
+
for k, v in self.__dict__.items():
|
|
109
|
+
if k in ("client", "_original_client"):
|
|
110
|
+
# Keep the same client reference (don't deepcopy Router instances)
|
|
111
|
+
setattr(result, k, v)
|
|
112
|
+
else:
|
|
113
|
+
setattr(result, k, copy.deepcopy(v, memo))
|
|
114
|
+
|
|
115
|
+
return result
|
|
116
|
+
|
|
79
117
|
def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
|
|
80
118
|
"""Format messages for LiteLLM API."""
|
|
81
119
|
formatted_messages = []
|
agno/models/openai/chat.py
CHANGED
|
@@ -305,6 +305,13 @@ class OpenAIChat(Model):
|
|
|
305
305
|
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
306
306
|
return cleaned_dict
|
|
307
307
|
|
|
308
|
+
@classmethod
|
|
309
|
+
def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChat":
|
|
310
|
+
"""
|
|
311
|
+
Create an OpenAIChat model from a dictionary.
|
|
312
|
+
"""
|
|
313
|
+
return cls(**data)
|
|
314
|
+
|
|
308
315
|
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
309
316
|
"""
|
|
310
317
|
Format a message into the format expected by OpenAI.
|
|
@@ -2,10 +2,13 @@ from dataclasses import dataclass
|
|
|
2
2
|
from os import getenv
|
|
3
3
|
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
5
6
|
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
from agno.exceptions import ModelAuthenticationError
|
|
9
|
+
from agno.models.message import Message
|
|
8
10
|
from agno.models.openai.like import OpenAILike
|
|
11
|
+
from agno.models.response import ModelResponse
|
|
9
12
|
from agno.run.agent import RunOutput
|
|
10
13
|
|
|
11
14
|
|
|
@@ -83,3 +86,46 @@ class OpenRouter(OpenAILike):
|
|
|
83
86
|
request_params["extra_body"] = extra_body
|
|
84
87
|
|
|
85
88
|
return request_params
|
|
89
|
+
|
|
90
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
91
|
+
message_dict = super()._format_message(message, compress_tool_results)
|
|
92
|
+
|
|
93
|
+
if message.role == "assistant" and message.provider_data:
|
|
94
|
+
if message.provider_data.get("reasoning_details"):
|
|
95
|
+
message_dict["reasoning_details"] = message.provider_data["reasoning_details"]
|
|
96
|
+
|
|
97
|
+
return message_dict
|
|
98
|
+
|
|
99
|
+
def _parse_provider_response(
|
|
100
|
+
self,
|
|
101
|
+
response: ChatCompletion,
|
|
102
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
103
|
+
) -> ModelResponse:
|
|
104
|
+
model_response = super()._parse_provider_response(response, response_format)
|
|
105
|
+
|
|
106
|
+
if response.choices and len(response.choices) > 0:
|
|
107
|
+
response_message = response.choices[0].message
|
|
108
|
+
if hasattr(response_message, "reasoning_details") and response_message.reasoning_details:
|
|
109
|
+
if model_response.provider_data is None:
|
|
110
|
+
model_response.provider_data = {}
|
|
111
|
+
model_response.provider_data["reasoning_details"] = response_message.reasoning_details
|
|
112
|
+
elif hasattr(response_message, "model_extra"):
|
|
113
|
+
extra = getattr(response_message, "model_extra", None)
|
|
114
|
+
if extra and isinstance(extra, dict) and extra.get("reasoning_details"):
|
|
115
|
+
if model_response.provider_data is None:
|
|
116
|
+
model_response.provider_data = {}
|
|
117
|
+
model_response.provider_data["reasoning_details"] = extra["reasoning_details"]
|
|
118
|
+
|
|
119
|
+
return model_response
|
|
120
|
+
|
|
121
|
+
def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
|
|
122
|
+
model_response = super()._parse_provider_response_delta(response_delta)
|
|
123
|
+
|
|
124
|
+
if response_delta.choices and len(response_delta.choices) > 0:
|
|
125
|
+
choice_delta = response_delta.choices[0].delta
|
|
126
|
+
if hasattr(choice_delta, "reasoning_details") and choice_delta.reasoning_details:
|
|
127
|
+
if model_response.provider_data is None:
|
|
128
|
+
model_response.provider_data = {}
|
|
129
|
+
model_response.provider_data["reasoning_details"] = choice_delta.reasoning_details
|
|
130
|
+
|
|
131
|
+
return model_response
|
agno/models/response.py
CHANGED
|
@@ -16,6 +16,10 @@ class ModelResponseEvent(str, Enum):
|
|
|
16
16
|
tool_call_started = "ToolCallStarted"
|
|
17
17
|
tool_call_completed = "ToolCallCompleted"
|
|
18
18
|
assistant_response = "AssistantResponse"
|
|
19
|
+
compression_started = "CompressionStarted"
|
|
20
|
+
compression_completed = "CompressionCompleted"
|
|
21
|
+
model_request_started = "ModelRequestStarted"
|
|
22
|
+
model_request_completed = "ModelRequestCompleted"
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
@dataclass
|
|
@@ -124,6 +128,18 @@ class ModelResponse:
|
|
|
124
128
|
|
|
125
129
|
updated_session_state: Optional[Dict[str, Any]] = None
|
|
126
130
|
|
|
131
|
+
# Compression stats
|
|
132
|
+
compression_stats: Optional[Dict[str, Any]] = None
|
|
133
|
+
|
|
134
|
+
# Model request metrics (for model_request_completed events)
|
|
135
|
+
input_tokens: Optional[int] = None
|
|
136
|
+
output_tokens: Optional[int] = None
|
|
137
|
+
total_tokens: Optional[int] = None
|
|
138
|
+
time_to_first_token: Optional[float] = None
|
|
139
|
+
reasoning_tokens: Optional[int] = None
|
|
140
|
+
cache_read_tokens: Optional[int] = None
|
|
141
|
+
cache_write_tokens: Optional[int] = None
|
|
142
|
+
|
|
127
143
|
def to_dict(self) -> Dict[str, Any]:
|
|
128
144
|
"""Serialize ModelResponse to dictionary for caching."""
|
|
129
145
|
_dict = asdict(self)
|