agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +4 -0
- agno/agent/agent.py +1368 -541
- agno/agent/remote.py +13 -0
- agno/db/base.py +339 -0
- agno/db/postgres/async_postgres.py +116 -12
- agno/db/postgres/postgres.py +1242 -25
- agno/db/postgres/schemas.py +48 -1
- agno/db/sqlite/async_sqlite.py +119 -4
- agno/db/sqlite/schemas.py +51 -0
- agno/db/sqlite/sqlite.py +1186 -13
- agno/db/utils.py +37 -1
- agno/integrations/discord/client.py +12 -1
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +4 -0
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +3722 -2182
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +2 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +236 -13
- agno/knowledge/reader/docx_reader.py +2 -2
- agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
- agno/knowledge/reader/firecrawl_reader.py +2 -2
- agno/knowledge/reader/json_reader.py +2 -2
- agno/knowledge/reader/markdown_reader.py +2 -2
- agno/knowledge/reader/pdf_reader.py +5 -4
- agno/knowledge/reader/pptx_reader.py +2 -2
- agno/knowledge/reader/reader_factory.py +118 -1
- agno/knowledge/reader/s3_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +2 -2
- agno/knowledge/reader/web_search_reader.py +2 -2
- agno/knowledge/reader/website_reader.py +5 -3
- agno/knowledge/reader/wikipedia_reader.py +2 -2
- agno/knowledge/reader/youtube_reader.py +2 -2
- agno/knowledge/remote_content/__init__.py +29 -0
- agno/knowledge/remote_content/config.py +204 -0
- agno/knowledge/remote_content/remote_content.py +74 -17
- agno/knowledge/utils.py +37 -29
- agno/learn/__init__.py +6 -0
- agno/learn/machine.py +35 -0
- agno/learn/schemas.py +82 -11
- agno/learn/stores/__init__.py +3 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/learned_knowledge.py +6 -6
- agno/models/anthropic/claude.py +24 -0
- agno/models/aws/bedrock.py +20 -0
- agno/models/base.py +60 -6
- agno/models/cerebras/cerebras.py +34 -2
- agno/models/cohere/chat.py +25 -0
- agno/models/google/gemini.py +50 -5
- agno/models/litellm/chat.py +38 -0
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/openai/chat.py +25 -1
- agno/models/openrouter/openrouter.py +46 -0
- agno/models/perplexity/perplexity.py +2 -0
- agno/models/response.py +16 -0
- agno/os/app.py +83 -44
- agno/os/interfaces/slack/router.py +10 -1
- agno/os/interfaces/whatsapp/router.py +6 -0
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +1 -0
- agno/os/routers/agents/router.py +29 -16
- agno/os/routers/agents/schema.py +6 -4
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/evals/schemas.py +4 -3
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +128 -3
- agno/os/routers/knowledge/schemas.py +12 -0
- agno/os/routers/memory/schemas.py +4 -2
- agno/os/routers/metrics/metrics.py +9 -11
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/teams/router.py +20 -8
- agno/os/routers/teams/schema.py +6 -4
- agno/os/routers/traces/traces.py +5 -5
- agno/os/routers/workflows/router.py +38 -11
- agno/os/routers/workflows/schema.py +1 -1
- agno/os/schema.py +92 -26
- agno/os/utils.py +84 -19
- agno/reasoning/anthropic.py +2 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +2 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +4 -10
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/reasoning/vertexai.py +2 -2
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/run/agent.py +59 -0
- agno/run/base.py +7 -0
- agno/run/team.py +57 -0
- agno/skills/agent_skills.py +10 -3
- agno/team/__init__.py +3 -1
- agno/team/team.py +1165 -330
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/function.py +35 -83
- agno/tools/knowledge.py +9 -4
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/parallel.py +0 -7
- agno/tools/reasoning.py +30 -23
- agno/tools/tavily.py +4 -1
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +48 -47
- agno/utils/agent.py +42 -5
- agno/utils/events.py +160 -2
- agno/utils/print_response/agent.py +0 -31
- agno/utils/print_response/team.py +0 -2
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/team.py +61 -11
- agno/vectordb/lancedb/lance_db.py +4 -1
- agno/vectordb/mongodb/mongodb.py +1 -1
- agno/vectordb/pgvector/pgvector.py +3 -3
- agno/vectordb/qdrant/qdrant.py +4 -4
- agno/workflow/__init__.py +3 -1
- agno/workflow/condition.py +0 -21
- agno/workflow/loop.py +0 -21
- agno/workflow/parallel.py +0 -21
- agno/workflow/router.py +0 -21
- agno/workflow/step.py +117 -24
- agno/workflow/steps.py +0 -21
- agno/workflow/workflow.py +427 -63
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
|
@@ -919,7 +919,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
919
919
|
text_content = self._to_text_content(learning=learning_obj)
|
|
920
920
|
|
|
921
921
|
# Build metadata for filtering
|
|
922
|
-
# Metadata must be passed separately to
|
|
922
|
+
# Metadata must be passed separately to insert for filters to work
|
|
923
923
|
filter_metadata: dict[str, Any] = {
|
|
924
924
|
"namespace": effective_namespace,
|
|
925
925
|
}
|
|
@@ -932,7 +932,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
932
932
|
if tags:
|
|
933
933
|
filter_metadata["tags"] = tags
|
|
934
934
|
|
|
935
|
-
self.knowledge.
|
|
935
|
+
self.knowledge.insert(
|
|
936
936
|
name=learning_data["title"],
|
|
937
937
|
text_content=text_content,
|
|
938
938
|
reader=TextReader(),
|
|
@@ -989,7 +989,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
989
989
|
text_content = self._to_text_content(learning=learning_obj)
|
|
990
990
|
|
|
991
991
|
# Build metadata for filtering - THIS IS THE KEY FIX!
|
|
992
|
-
# Metadata must be passed separately to
|
|
992
|
+
# Metadata must be passed separately to insert for filters to work
|
|
993
993
|
filter_metadata: dict[str, Any] = {
|
|
994
994
|
"namespace": effective_namespace,
|
|
995
995
|
}
|
|
@@ -1002,8 +1002,8 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
1002
1002
|
if tags:
|
|
1003
1003
|
filter_metadata["tags"] = tags
|
|
1004
1004
|
|
|
1005
|
-
if hasattr(self.knowledge, "
|
|
1006
|
-
await self.knowledge.
|
|
1005
|
+
if hasattr(self.knowledge, "ainsert"):
|
|
1006
|
+
await self.knowledge.ainsert(
|
|
1007
1007
|
name=learning_data["title"],
|
|
1008
1008
|
text_content=text_content,
|
|
1009
1009
|
reader=TextReader(),
|
|
@@ -1011,7 +1011,7 @@ class LearnedKnowledgeStore(LearningStore):
|
|
|
1011
1011
|
metadata=filter_metadata, # Pass metadata for filtering
|
|
1012
1012
|
)
|
|
1013
1013
|
else:
|
|
1014
|
-
self.knowledge.
|
|
1014
|
+
self.knowledge.insert(
|
|
1015
1015
|
name=learning_data["title"],
|
|
1016
1016
|
text_content=text_content,
|
|
1017
1017
|
reader=TextReader(),
|
agno/models/anthropic/claude.py
CHANGED
|
@@ -406,6 +406,30 @@ class Claude(Model):
|
|
|
406
406
|
self.async_client = AsyncAnthropicClient(**_client_params)
|
|
407
407
|
return self.async_client
|
|
408
408
|
|
|
409
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
410
|
+
"""
|
|
411
|
+
Convert the model to a dictionary.
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
415
|
+
"""
|
|
416
|
+
model_dict = super().to_dict()
|
|
417
|
+
model_dict.update(
|
|
418
|
+
{
|
|
419
|
+
"max_tokens": self.max_tokens,
|
|
420
|
+
"thinking": self.thinking,
|
|
421
|
+
"temperature": self.temperature,
|
|
422
|
+
"stop_sequences": self.stop_sequences,
|
|
423
|
+
"top_p": self.top_p,
|
|
424
|
+
"top_k": self.top_k,
|
|
425
|
+
"cache_system_prompt": self.cache_system_prompt,
|
|
426
|
+
"extended_cache_time": self.extended_cache_time,
|
|
427
|
+
"betas": self.betas,
|
|
428
|
+
}
|
|
429
|
+
)
|
|
430
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
431
|
+
return cleaned_dict
|
|
432
|
+
|
|
409
433
|
def count_tokens(
|
|
410
434
|
self,
|
|
411
435
|
messages: List[Message],
|
agno/models/aws/bedrock.py
CHANGED
|
@@ -166,6 +166,26 @@ class AwsBedrock(Model):
|
|
|
166
166
|
|
|
167
167
|
return self.async_session.client(**client_kwargs)
|
|
168
168
|
|
|
169
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
170
|
+
"""
|
|
171
|
+
Convert the model to a dictionary.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
175
|
+
"""
|
|
176
|
+
model_dict = super().to_dict()
|
|
177
|
+
model_dict.update(
|
|
178
|
+
{
|
|
179
|
+
"aws_region": self.aws_region,
|
|
180
|
+
"max_tokens": self.max_tokens,
|
|
181
|
+
"temperature": self.temperature,
|
|
182
|
+
"top_p": self.top_p,
|
|
183
|
+
"stop_sequences": self.stop_sequences,
|
|
184
|
+
}
|
|
185
|
+
)
|
|
186
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
187
|
+
return cleaned_dict
|
|
188
|
+
|
|
169
189
|
def _format_tools_for_request(self, tools: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
170
190
|
"""
|
|
171
191
|
Format the tools for the request.
|
agno/models/base.py
CHANGED
|
@@ -1293,12 +1293,23 @@ class Model(ABC):
|
|
|
1293
1293
|
if _compression_manager is not None and _compression_manager.should_compress(
|
|
1294
1294
|
messages, tools, model=self, response_format=response_format
|
|
1295
1295
|
):
|
|
1296
|
+
# Emit compression started event
|
|
1297
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1296
1298
|
_compression_manager.compress(messages)
|
|
1299
|
+
# Emit compression completed event with stats
|
|
1300
|
+
yield ModelResponse(
|
|
1301
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1302
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1303
|
+
)
|
|
1297
1304
|
|
|
1298
1305
|
assistant_message = Message(role=self.assistant_message_role)
|
|
1299
1306
|
# Create assistant message and stream data
|
|
1300
1307
|
stream_data = MessageData()
|
|
1301
1308
|
model_response = ModelResponse()
|
|
1309
|
+
|
|
1310
|
+
# Emit LLM request started event
|
|
1311
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
1312
|
+
|
|
1302
1313
|
if stream_model_response:
|
|
1303
1314
|
# Generate response
|
|
1304
1315
|
for response in self.process_response_stream(
|
|
@@ -1334,6 +1345,19 @@ class Model(ABC):
|
|
|
1334
1345
|
messages.append(assistant_message)
|
|
1335
1346
|
assistant_message.log(metrics=True)
|
|
1336
1347
|
|
|
1348
|
+
# Emit LLM request completed event with metrics
|
|
1349
|
+
llm_metrics = assistant_message.metrics
|
|
1350
|
+
yield ModelResponse(
|
|
1351
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1352
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1353
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1354
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1355
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1356
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1357
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1358
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1337
1361
|
# Handle tool calls if present
|
|
1338
1362
|
if assistant_message.tool_calls is not None:
|
|
1339
1363
|
# Prepare function calls
|
|
@@ -1513,12 +1537,23 @@ class Model(ABC):
|
|
|
1513
1537
|
if _compression_manager is not None and await _compression_manager.ashould_compress(
|
|
1514
1538
|
messages, tools, model=self, response_format=response_format
|
|
1515
1539
|
):
|
|
1540
|
+
# Emit compression started event
|
|
1541
|
+
yield ModelResponse(event=ModelResponseEvent.compression_started.value)
|
|
1516
1542
|
await _compression_manager.acompress(messages)
|
|
1543
|
+
# Emit compression completed event with stats
|
|
1544
|
+
yield ModelResponse(
|
|
1545
|
+
event=ModelResponseEvent.compression_completed.value,
|
|
1546
|
+
compression_stats=_compression_manager.stats.copy(),
|
|
1547
|
+
)
|
|
1517
1548
|
|
|
1518
1549
|
# Create assistant message and stream data
|
|
1519
1550
|
assistant_message = Message(role=self.assistant_message_role)
|
|
1520
1551
|
stream_data = MessageData()
|
|
1521
1552
|
model_response = ModelResponse()
|
|
1553
|
+
|
|
1554
|
+
# Emit LLM request started event
|
|
1555
|
+
yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
|
|
1556
|
+
|
|
1522
1557
|
if stream_model_response:
|
|
1523
1558
|
# Generate response
|
|
1524
1559
|
async for model_response in self.aprocess_response_stream(
|
|
@@ -1554,6 +1589,19 @@ class Model(ABC):
|
|
|
1554
1589
|
messages.append(assistant_message)
|
|
1555
1590
|
assistant_message.log(metrics=True)
|
|
1556
1591
|
|
|
1592
|
+
# Emit LLM request completed event with metrics
|
|
1593
|
+
llm_metrics = assistant_message.metrics
|
|
1594
|
+
yield ModelResponse(
|
|
1595
|
+
event=ModelResponseEvent.model_request_completed.value,
|
|
1596
|
+
input_tokens=llm_metrics.input_tokens if llm_metrics else None,
|
|
1597
|
+
output_tokens=llm_metrics.output_tokens if llm_metrics else None,
|
|
1598
|
+
total_tokens=llm_metrics.total_tokens if llm_metrics else None,
|
|
1599
|
+
time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
|
|
1600
|
+
reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
|
|
1601
|
+
cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
|
|
1602
|
+
cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
|
|
1603
|
+
)
|
|
1604
|
+
|
|
1557
1605
|
# Handle tool calls if present
|
|
1558
1606
|
if assistant_message.tool_calls is not None:
|
|
1559
1607
|
# Prepare function calls
|
|
@@ -1940,6 +1988,7 @@ class Model(ABC):
|
|
|
1940
1988
|
|
|
1941
1989
|
if isinstance(item, CustomEvent):
|
|
1942
1990
|
function_call_output += str(item)
|
|
1991
|
+
item.tool_call_id = function_call.call_id
|
|
1943
1992
|
|
|
1944
1993
|
# For WorkflowCompletedEvent, extract content for final output
|
|
1945
1994
|
from agno.run.workflow import WorkflowCompletedEvent
|
|
@@ -1971,8 +2020,6 @@ class Model(ABC):
|
|
|
1971
2020
|
and function_call.function._run_context.session_state is not None
|
|
1972
2021
|
):
|
|
1973
2022
|
function_execution_result.updated_session_state = function_call.function._run_context.session_state
|
|
1974
|
-
elif function_call.function._session_state is not None:
|
|
1975
|
-
function_execution_result.updated_session_state = function_call.function._session_state
|
|
1976
2023
|
else:
|
|
1977
2024
|
from agno.tools.function import ToolResult
|
|
1978
2025
|
|
|
@@ -2384,6 +2431,7 @@ class Model(ABC):
|
|
|
2384
2431
|
|
|
2385
2432
|
if isinstance(item, CustomEvent):
|
|
2386
2433
|
function_call_output += str(item)
|
|
2434
|
+
item.tool_call_id = function_call.call_id
|
|
2387
2435
|
|
|
2388
2436
|
# For WorkflowCompletedEvent, extract content for final output
|
|
2389
2437
|
from agno.run.workflow import WorkflowCompletedEvent
|
|
@@ -2461,8 +2509,12 @@ class Model(ABC):
|
|
|
2461
2509
|
if async_gen_index in async_generator_outputs:
|
|
2462
2510
|
_, async_function_call_output, error = async_generator_outputs[async_gen_index]
|
|
2463
2511
|
if error:
|
|
2464
|
-
|
|
2465
|
-
|
|
2512
|
+
# Handle async generator exceptions gracefully like sync generators
|
|
2513
|
+
log_error(
|
|
2514
|
+
f"Error while iterating async generator for {function_call.function.name}: {error}"
|
|
2515
|
+
)
|
|
2516
|
+
function_call.error = str(error)
|
|
2517
|
+
function_call_success = False
|
|
2466
2518
|
break
|
|
2467
2519
|
async_gen_index += 1
|
|
2468
2520
|
|
|
@@ -2509,6 +2561,10 @@ class Model(ABC):
|
|
|
2509
2561
|
yield ModelResponse(content=item.content)
|
|
2510
2562
|
continue
|
|
2511
2563
|
|
|
2564
|
+
elif isinstance(item, CustomEvent):
|
|
2565
|
+
function_call_output += str(item)
|
|
2566
|
+
item.tool_call_id = function_call.call_id
|
|
2567
|
+
|
|
2512
2568
|
# Yield the event itself to bubble it up
|
|
2513
2569
|
yield item
|
|
2514
2570
|
else:
|
|
@@ -2532,8 +2588,6 @@ class Model(ABC):
|
|
|
2532
2588
|
and function_call.function._run_context.session_state is not None
|
|
2533
2589
|
):
|
|
2534
2590
|
updated_session_state = function_call.function._run_context.session_state
|
|
2535
|
-
elif function_call.function._session_state is not None:
|
|
2536
|
-
updated_session_state = function_call.function._session_state
|
|
2537
2591
|
|
|
2538
2592
|
if not (
|
|
2539
2593
|
async_function_call_output is not None
|
agno/models/cerebras/cerebras.py
CHANGED
|
@@ -97,6 +97,35 @@ class Cerebras(Model):
|
|
|
97
97
|
client_params.update(self.client_params)
|
|
98
98
|
return client_params
|
|
99
99
|
|
|
100
|
+
def _ensure_additional_properties_false(self, schema: Dict[str, Any]) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Recursively ensure all object types have additionalProperties: false.
|
|
103
|
+
Cerebras API requires this for JSON schema validation.
|
|
104
|
+
"""
|
|
105
|
+
if not isinstance(schema, dict):
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
# Set additionalProperties: false for object types
|
|
109
|
+
if schema.get("type") == "object":
|
|
110
|
+
schema["additionalProperties"] = False
|
|
111
|
+
|
|
112
|
+
# Recursively process nested schemas
|
|
113
|
+
if "properties" in schema and isinstance(schema["properties"], dict):
|
|
114
|
+
for prop_schema in schema["properties"].values():
|
|
115
|
+
self._ensure_additional_properties_false(prop_schema)
|
|
116
|
+
|
|
117
|
+
if "items" in schema:
|
|
118
|
+
self._ensure_additional_properties_false(schema["items"])
|
|
119
|
+
|
|
120
|
+
if "$defs" in schema and isinstance(schema["$defs"], dict):
|
|
121
|
+
for def_schema in schema["$defs"].values():
|
|
122
|
+
self._ensure_additional_properties_false(def_schema)
|
|
123
|
+
|
|
124
|
+
for key in ["allOf", "anyOf", "oneOf"]:
|
|
125
|
+
if key in schema and isinstance(schema[key], list):
|
|
126
|
+
for item in schema[key]:
|
|
127
|
+
self._ensure_additional_properties_false(item)
|
|
128
|
+
|
|
100
129
|
def get_client(self) -> CerebrasClient:
|
|
101
130
|
"""
|
|
102
131
|
Returns a Cerebras client.
|
|
@@ -191,8 +220,11 @@ class Cerebras(Model):
|
|
|
191
220
|
):
|
|
192
221
|
# Ensure json_schema has strict parameter set
|
|
193
222
|
schema = response_format["json_schema"]
|
|
194
|
-
if isinstance(schema.get("schema"), dict)
|
|
195
|
-
|
|
223
|
+
if isinstance(schema.get("schema"), dict):
|
|
224
|
+
if "strict" not in schema:
|
|
225
|
+
schema["strict"] = self.strict_output
|
|
226
|
+
# Cerebras requires additionalProperties: false for all object types
|
|
227
|
+
self._ensure_additional_properties_false(schema["schema"])
|
|
196
228
|
|
|
197
229
|
request_params["response_format"] = response_format
|
|
198
230
|
|
agno/models/cohere/chat.py
CHANGED
|
@@ -115,6 +115,31 @@ class Cohere(Model):
|
|
|
115
115
|
self.async_client = CohereAsyncClient(**_client_params)
|
|
116
116
|
return self.async_client # type: ignore
|
|
117
117
|
|
|
118
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
119
|
+
"""
|
|
120
|
+
Convert the model to a dictionary.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
124
|
+
"""
|
|
125
|
+
model_dict = super().to_dict()
|
|
126
|
+
model_dict.update(
|
|
127
|
+
{
|
|
128
|
+
"temperature": self.temperature,
|
|
129
|
+
"max_tokens": self.max_tokens,
|
|
130
|
+
"top_k": self.top_k,
|
|
131
|
+
"top_p": self.top_p,
|
|
132
|
+
"seed": self.seed,
|
|
133
|
+
"frequency_penalty": self.frequency_penalty,
|
|
134
|
+
"presence_penalty": self.presence_penalty,
|
|
135
|
+
"logprobs": self.logprobs,
|
|
136
|
+
"strict_tools": self.strict_tools,
|
|
137
|
+
"add_chat_history": self.add_chat_history,
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
141
|
+
return cleaned_dict
|
|
142
|
+
|
|
118
143
|
def get_request_params(
|
|
119
144
|
self,
|
|
120
145
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
agno/models/google/gemini.py
CHANGED
|
@@ -172,6 +172,45 @@ class Gemini(Model):
|
|
|
172
172
|
self.client = genai.Client(**client_params)
|
|
173
173
|
return self.client
|
|
174
174
|
|
|
175
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
176
|
+
"""
|
|
177
|
+
Convert the model to a dictionary.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Dict[str, Any]: The dictionary representation of the model.
|
|
181
|
+
"""
|
|
182
|
+
model_dict = super().to_dict()
|
|
183
|
+
model_dict.update(
|
|
184
|
+
{
|
|
185
|
+
"search": self.search,
|
|
186
|
+
"grounding": self.grounding,
|
|
187
|
+
"grounding_dynamic_threshold": self.grounding_dynamic_threshold,
|
|
188
|
+
"url_context": self.url_context,
|
|
189
|
+
"vertexai_search": self.vertexai_search,
|
|
190
|
+
"vertexai_search_datastore": self.vertexai_search_datastore,
|
|
191
|
+
"file_search_store_names": self.file_search_store_names,
|
|
192
|
+
"file_search_metadata_filter": self.file_search_metadata_filter,
|
|
193
|
+
"temperature": self.temperature,
|
|
194
|
+
"top_p": self.top_p,
|
|
195
|
+
"top_k": self.top_k,
|
|
196
|
+
"max_output_tokens": self.max_output_tokens,
|
|
197
|
+
"stop_sequences": self.stop_sequences,
|
|
198
|
+
"logprobs": self.logprobs,
|
|
199
|
+
"presence_penalty": self.presence_penalty,
|
|
200
|
+
"frequency_penalty": self.frequency_penalty,
|
|
201
|
+
"seed": self.seed,
|
|
202
|
+
"response_modalities": self.response_modalities,
|
|
203
|
+
"thinking_budget": self.thinking_budget,
|
|
204
|
+
"include_thoughts": self.include_thoughts,
|
|
205
|
+
"thinking_level": self.thinking_level,
|
|
206
|
+
"vertexai": self.vertexai,
|
|
207
|
+
"project_id": self.project_id,
|
|
208
|
+
"location": self.location,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
212
|
+
return cleaned_dict
|
|
213
|
+
|
|
175
214
|
def _append_file_search_tool(self, builtin_tools: List[Tool]) -> None:
|
|
176
215
|
"""Append Gemini File Search tool to builtin_tools if file search is enabled.
|
|
177
216
|
|
|
@@ -672,7 +711,6 @@ class Gemini(Model):
|
|
|
672
711
|
compress_tool_results: Whether to compress tool results.
|
|
673
712
|
"""
|
|
674
713
|
formatted_messages: List = []
|
|
675
|
-
file_content: Optional[Union[GeminiFile, Part]] = None
|
|
676
714
|
system_message = None
|
|
677
715
|
|
|
678
716
|
for message in messages:
|
|
@@ -795,14 +833,11 @@ class Gemini(Model):
|
|
|
795
833
|
for file in message.files:
|
|
796
834
|
file_content = self._format_file_for_message(file)
|
|
797
835
|
if isinstance(file_content, Part):
|
|
798
|
-
|
|
836
|
+
message_parts.append(file_content)
|
|
799
837
|
|
|
800
838
|
final_message = Content(role=role, parts=message_parts)
|
|
801
839
|
formatted_messages.append(final_message)
|
|
802
840
|
|
|
803
|
-
if isinstance(file_content, GeminiFile):
|
|
804
|
-
formatted_messages.insert(0, file_content)
|
|
805
|
-
|
|
806
841
|
return formatted_messages, system_message
|
|
807
842
|
|
|
808
843
|
def _format_audio_for_message(self, audio: Audio) -> Optional[Union[Part, GeminiFile]]:
|
|
@@ -936,6 +971,16 @@ class Gemini(Model):
|
|
|
936
971
|
|
|
937
972
|
# Case 2: File is a URL
|
|
938
973
|
elif file.url is not None:
|
|
974
|
+
# Case 2a: GCS URI (gs://) - pass directly to Gemini (supports up to 2GB)
|
|
975
|
+
if file.url.startswith("gs://") and file.mime_type:
|
|
976
|
+
return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
|
|
977
|
+
|
|
978
|
+
# Case 2b: HTTPS URL with mime_type - pass directly to Gemini (supports up to 100MB)
|
|
979
|
+
# This enables pre-signed URLs from S3/Azure and public URLs without downloading
|
|
980
|
+
if file.url.startswith("https://") and file.mime_type:
|
|
981
|
+
return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
|
|
982
|
+
|
|
983
|
+
# Case 2c: URL without mime_type - download and detect (existing behavior)
|
|
939
984
|
url_content = file.file_url_content
|
|
940
985
|
if url_content is not None:
|
|
941
986
|
content, mime_type = url_content
|
agno/models/litellm/chat.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import json
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from os import getenv
|
|
@@ -48,10 +49,18 @@ class LiteLLM(Model):
|
|
|
48
49
|
|
|
49
50
|
client: Optional[Any] = None
|
|
50
51
|
|
|
52
|
+
# Store the original client to preserve it across copies (e.g., for Router instances)
|
|
53
|
+
_original_client: Optional[Any] = None
|
|
54
|
+
|
|
51
55
|
def __post_init__(self):
|
|
52
56
|
"""Initialize the model after the dataclass initialization."""
|
|
53
57
|
super().__post_init__()
|
|
54
58
|
|
|
59
|
+
# Store the original client if provided (e.g., Router instance)
|
|
60
|
+
# This ensures the client is preserved when the model is copied for background tasks
|
|
61
|
+
if self.client is not None and self._original_client is None:
|
|
62
|
+
self._original_client = self.client
|
|
63
|
+
|
|
55
64
|
# Set up API key from environment variable if not already set
|
|
56
65
|
if not self.client and not self.api_key:
|
|
57
66
|
self.api_key = getenv("LITELLM_API_KEY")
|
|
@@ -70,12 +79,41 @@ class LiteLLM(Model):
|
|
|
70
79
|
Returns:
|
|
71
80
|
Any: An instance of the LiteLLM client.
|
|
72
81
|
"""
|
|
82
|
+
# First check if we have a current client
|
|
73
83
|
if self.client is not None:
|
|
74
84
|
return self.client
|
|
75
85
|
|
|
86
|
+
# Check if we have an original client (e.g., Router) that was preserved
|
|
87
|
+
# This handles the case where the model was copied for background tasks
|
|
88
|
+
if self._original_client is not None:
|
|
89
|
+
self.client = self._original_client
|
|
90
|
+
return self.client
|
|
91
|
+
|
|
76
92
|
self.client = litellm
|
|
77
93
|
return self.client
|
|
78
94
|
|
|
95
|
+
def __deepcopy__(self, memo: Dict[int, Any]) -> "LiteLLM":
|
|
96
|
+
"""
|
|
97
|
+
Custom deepcopy to preserve the client (e.g., Router) across copies.
|
|
98
|
+
|
|
99
|
+
This is needed because when the model is copied for background tasks
|
|
100
|
+
(memory, summarization), the client reference needs to be preserved.
|
|
101
|
+
"""
|
|
102
|
+
# Create a shallow copy first
|
|
103
|
+
cls = self.__class__
|
|
104
|
+
result = cls.__new__(cls)
|
|
105
|
+
memo[id(self)] = result
|
|
106
|
+
|
|
107
|
+
# Copy all attributes, but keep the same client reference
|
|
108
|
+
for k, v in self.__dict__.items():
|
|
109
|
+
if k in ("client", "_original_client"):
|
|
110
|
+
# Keep the same client reference (don't deepcopy Router instances)
|
|
111
|
+
setattr(result, k, v)
|
|
112
|
+
else:
|
|
113
|
+
setattr(result, k, copy.deepcopy(v, memo))
|
|
114
|
+
|
|
115
|
+
return result
|
|
116
|
+
|
|
79
117
|
def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
|
|
80
118
|
"""Format messages for LiteLLM API."""
|
|
81
119
|
formatted_messages = []
|
agno/models/n1n/n1n.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from os import getenv
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from agno.exceptions import ModelAuthenticationError
|
|
6
|
+
from agno.models.openai.like import OpenAILike
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class N1N(OpenAILike):
|
|
11
|
+
"""
|
|
12
|
+
A class for interacting with n1n.ai models.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
id (str): The model id. Defaults to "gpt-4o".
|
|
16
|
+
name (str): The model name. Defaults to "N1N".
|
|
17
|
+
provider (str): The provider name. Defaults to "N1N".
|
|
18
|
+
api_key (Optional[str]): The API key.
|
|
19
|
+
base_url (str): The base URL. Defaults to "https://api.n1n.ai/v1".
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
id: str = "gpt-4o"
|
|
23
|
+
name: str = "N1N"
|
|
24
|
+
provider: str = "N1N"
|
|
25
|
+
|
|
26
|
+
api_key: Optional[str] = field(default_factory=lambda: getenv("N1N_API_KEY"))
|
|
27
|
+
base_url: str = "https://api.n1n.ai/v1"
|
|
28
|
+
|
|
29
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
30
|
+
# Fetch API key from env if not already set
|
|
31
|
+
if not self.api_key:
|
|
32
|
+
self.api_key = getenv("N1N_API_KEY")
|
|
33
|
+
if not self.api_key:
|
|
34
|
+
# Raise error immediately if key is missing
|
|
35
|
+
raise ModelAuthenticationError(
|
|
36
|
+
message="N1N_API_KEY not set. Please set the N1N_API_KEY environment variable.",
|
|
37
|
+
model_name=self.name,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Define base client params
|
|
41
|
+
base_params = {
|
|
42
|
+
"api_key": self.api_key,
|
|
43
|
+
"organization": self.organization,
|
|
44
|
+
"base_url": self.base_url,
|
|
45
|
+
"timeout": self.timeout,
|
|
46
|
+
"max_retries": self.max_retries,
|
|
47
|
+
"default_headers": self.default_headers,
|
|
48
|
+
"default_query": self.default_query,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Create client_params dict with non-None values
|
|
52
|
+
client_params = {k: v for k, v in base_params.items() if v is not None}
|
|
53
|
+
|
|
54
|
+
# Add additional client params if provided
|
|
55
|
+
if self.client_params:
|
|
56
|
+
client_params.update(self.client_params)
|
|
57
|
+
return client_params
|
agno/models/openai/chat.py
CHANGED
|
@@ -43,6 +43,8 @@ class OpenAIChat(Model):
|
|
|
43
43
|
name: str = "OpenAIChat"
|
|
44
44
|
provider: str = "OpenAI"
|
|
45
45
|
supports_native_structured_outputs: bool = True
|
|
46
|
+
# If True, only collect metrics on the final streaming chunk (for providers with cumulative token counts)
|
|
47
|
+
collect_metrics_on_completion: bool = False
|
|
46
48
|
|
|
47
49
|
# Request parameters
|
|
48
50
|
store: Optional[bool] = None
|
|
@@ -305,6 +307,13 @@ class OpenAIChat(Model):
|
|
|
305
307
|
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
306
308
|
return cleaned_dict
|
|
307
309
|
|
|
310
|
+
@classmethod
|
|
311
|
+
def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChat":
|
|
312
|
+
"""
|
|
313
|
+
Create an OpenAIChat model from a dictionary.
|
|
314
|
+
"""
|
|
315
|
+
return cls(**data)
|
|
316
|
+
|
|
308
317
|
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
309
318
|
"""
|
|
310
319
|
Format a message into the format expected by OpenAI.
|
|
@@ -745,6 +754,21 @@ class OpenAIChat(Model):
|
|
|
745
754
|
tool_call_entry["type"] = _tool_call_type
|
|
746
755
|
return tool_calls
|
|
747
756
|
|
|
757
|
+
def _should_collect_metrics(self, response: ChatCompletionChunk) -> bool:
|
|
758
|
+
"""
|
|
759
|
+
Determine if metrics should be collected from the response.
|
|
760
|
+
"""
|
|
761
|
+
if not response.usage:
|
|
762
|
+
return False
|
|
763
|
+
|
|
764
|
+
if not self.collect_metrics_on_completion:
|
|
765
|
+
return True
|
|
766
|
+
|
|
767
|
+
if not response.choices:
|
|
768
|
+
return False
|
|
769
|
+
|
|
770
|
+
return response.choices[0].finish_reason is not None
|
|
771
|
+
|
|
748
772
|
def _parse_provider_response(
|
|
749
773
|
self,
|
|
750
774
|
response: ChatCompletion,
|
|
@@ -913,7 +937,7 @@ class OpenAIChat(Model):
|
|
|
913
937
|
log_warning(f"Error processing audio: {e}")
|
|
914
938
|
|
|
915
939
|
# Add usage metrics if present
|
|
916
|
-
if response_delta.usage is not None:
|
|
940
|
+
if self._should_collect_metrics(response_delta) and response_delta.usage is not None:
|
|
917
941
|
model_response.response_usage = self._get_metrics(response_delta.usage)
|
|
918
942
|
|
|
919
943
|
return model_response
|
|
@@ -2,10 +2,13 @@ from dataclasses import dataclass
|
|
|
2
2
|
from os import getenv
|
|
3
3
|
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
5
6
|
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
from agno.exceptions import ModelAuthenticationError
|
|
9
|
+
from agno.models.message import Message
|
|
8
10
|
from agno.models.openai.like import OpenAILike
|
|
11
|
+
from agno.models.response import ModelResponse
|
|
9
12
|
from agno.run.agent import RunOutput
|
|
10
13
|
|
|
11
14
|
|
|
@@ -83,3 +86,46 @@ class OpenRouter(OpenAILike):
|
|
|
83
86
|
request_params["extra_body"] = extra_body
|
|
84
87
|
|
|
85
88
|
return request_params
|
|
89
|
+
|
|
90
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
91
|
+
message_dict = super()._format_message(message, compress_tool_results)
|
|
92
|
+
|
|
93
|
+
if message.role == "assistant" and message.provider_data:
|
|
94
|
+
if message.provider_data.get("reasoning_details"):
|
|
95
|
+
message_dict["reasoning_details"] = message.provider_data["reasoning_details"]
|
|
96
|
+
|
|
97
|
+
return message_dict
|
|
98
|
+
|
|
99
|
+
def _parse_provider_response(
|
|
100
|
+
self,
|
|
101
|
+
response: ChatCompletion,
|
|
102
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
103
|
+
) -> ModelResponse:
|
|
104
|
+
model_response = super()._parse_provider_response(response, response_format)
|
|
105
|
+
|
|
106
|
+
if response.choices and len(response.choices) > 0:
|
|
107
|
+
response_message = response.choices[0].message
|
|
108
|
+
if hasattr(response_message, "reasoning_details") and response_message.reasoning_details:
|
|
109
|
+
if model_response.provider_data is None:
|
|
110
|
+
model_response.provider_data = {}
|
|
111
|
+
model_response.provider_data["reasoning_details"] = response_message.reasoning_details
|
|
112
|
+
elif hasattr(response_message, "model_extra"):
|
|
113
|
+
extra = getattr(response_message, "model_extra", None)
|
|
114
|
+
if extra and isinstance(extra, dict) and extra.get("reasoning_details"):
|
|
115
|
+
if model_response.provider_data is None:
|
|
116
|
+
model_response.provider_data = {}
|
|
117
|
+
model_response.provider_data["reasoning_details"] = extra["reasoning_details"]
|
|
118
|
+
|
|
119
|
+
return model_response
|
|
120
|
+
|
|
121
|
+
def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
|
|
122
|
+
model_response = super()._parse_provider_response_delta(response_delta)
|
|
123
|
+
|
|
124
|
+
if response_delta.choices and len(response_delta.choices) > 0:
|
|
125
|
+
choice_delta = response_delta.choices[0].delta
|
|
126
|
+
if hasattr(choice_delta, "reasoning_details") and choice_delta.reasoning_details:
|
|
127
|
+
if model_response.provider_data is None:
|
|
128
|
+
model_response.provider_data = {}
|
|
129
|
+
model_response.provider_data["reasoning_details"] = choice_delta.reasoning_details
|
|
130
|
+
|
|
131
|
+
return model_response
|
|
@@ -41,6 +41,8 @@ class Perplexity(OpenAILike):
|
|
|
41
41
|
id: str = "sonar"
|
|
42
42
|
name: str = "Perplexity"
|
|
43
43
|
provider: str = "Perplexity"
|
|
44
|
+
# Perplexity returns cumulative token counts in each streaming chunk, so only collect on final chunk
|
|
45
|
+
collect_metrics_on_completion: bool = True
|
|
44
46
|
|
|
45
47
|
api_key: Optional[str] = field(default_factory=lambda: getenv("PERPLEXITY_API_KEY"))
|
|
46
48
|
base_url: str = "https://api.perplexity.ai/"
|