agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. agno/agent/__init__.py +4 -0
  2. agno/agent/agent.py +1368 -541
  3. agno/agent/remote.py +13 -0
  4. agno/db/base.py +339 -0
  5. agno/db/postgres/async_postgres.py +116 -12
  6. agno/db/postgres/postgres.py +1242 -25
  7. agno/db/postgres/schemas.py +48 -1
  8. agno/db/sqlite/async_sqlite.py +119 -4
  9. agno/db/sqlite/schemas.py +51 -0
  10. agno/db/sqlite/sqlite.py +1186 -13
  11. agno/db/utils.py +37 -1
  12. agno/integrations/discord/client.py +12 -1
  13. agno/knowledge/__init__.py +4 -0
  14. agno/knowledge/chunking/code.py +1 -1
  15. agno/knowledge/chunking/semantic.py +1 -1
  16. agno/knowledge/chunking/strategy.py +4 -0
  17. agno/knowledge/filesystem.py +412 -0
  18. agno/knowledge/knowledge.py +3722 -2182
  19. agno/knowledge/protocol.py +134 -0
  20. agno/knowledge/reader/arxiv_reader.py +2 -2
  21. agno/knowledge/reader/base.py +9 -7
  22. agno/knowledge/reader/csv_reader.py +236 -13
  23. agno/knowledge/reader/docx_reader.py +2 -2
  24. agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
  25. agno/knowledge/reader/firecrawl_reader.py +2 -2
  26. agno/knowledge/reader/json_reader.py +2 -2
  27. agno/knowledge/reader/markdown_reader.py +2 -2
  28. agno/knowledge/reader/pdf_reader.py +5 -4
  29. agno/knowledge/reader/pptx_reader.py +2 -2
  30. agno/knowledge/reader/reader_factory.py +118 -1
  31. agno/knowledge/reader/s3_reader.py +2 -2
  32. agno/knowledge/reader/tavily_reader.py +2 -2
  33. agno/knowledge/reader/text_reader.py +2 -2
  34. agno/knowledge/reader/web_search_reader.py +2 -2
  35. agno/knowledge/reader/website_reader.py +5 -3
  36. agno/knowledge/reader/wikipedia_reader.py +2 -2
  37. agno/knowledge/reader/youtube_reader.py +2 -2
  38. agno/knowledge/remote_content/__init__.py +29 -0
  39. agno/knowledge/remote_content/config.py +204 -0
  40. agno/knowledge/remote_content/remote_content.py +74 -17
  41. agno/knowledge/utils.py +37 -29
  42. agno/learn/__init__.py +6 -0
  43. agno/learn/machine.py +35 -0
  44. agno/learn/schemas.py +82 -11
  45. agno/learn/stores/__init__.py +3 -0
  46. agno/learn/stores/decision_log.py +1156 -0
  47. agno/learn/stores/learned_knowledge.py +6 -6
  48. agno/models/anthropic/claude.py +24 -0
  49. agno/models/aws/bedrock.py +20 -0
  50. agno/models/base.py +60 -6
  51. agno/models/cerebras/cerebras.py +34 -2
  52. agno/models/cohere/chat.py +25 -0
  53. agno/models/google/gemini.py +50 -5
  54. agno/models/litellm/chat.py +38 -0
  55. agno/models/n1n/__init__.py +3 -0
  56. agno/models/n1n/n1n.py +57 -0
  57. agno/models/openai/chat.py +25 -1
  58. agno/models/openrouter/openrouter.py +46 -0
  59. agno/models/perplexity/perplexity.py +2 -0
  60. agno/models/response.py +16 -0
  61. agno/os/app.py +83 -44
  62. agno/os/interfaces/slack/router.py +10 -1
  63. agno/os/interfaces/whatsapp/router.py +6 -0
  64. agno/os/middleware/__init__.py +2 -0
  65. agno/os/middleware/trailing_slash.py +27 -0
  66. agno/os/router.py +1 -0
  67. agno/os/routers/agents/router.py +29 -16
  68. agno/os/routers/agents/schema.py +6 -4
  69. agno/os/routers/components/__init__.py +3 -0
  70. agno/os/routers/components/components.py +475 -0
  71. agno/os/routers/evals/schemas.py +4 -3
  72. agno/os/routers/health.py +3 -3
  73. agno/os/routers/knowledge/knowledge.py +128 -3
  74. agno/os/routers/knowledge/schemas.py +12 -0
  75. agno/os/routers/memory/schemas.py +4 -2
  76. agno/os/routers/metrics/metrics.py +9 -11
  77. agno/os/routers/metrics/schemas.py +10 -6
  78. agno/os/routers/registry/__init__.py +3 -0
  79. agno/os/routers/registry/registry.py +337 -0
  80. agno/os/routers/teams/router.py +20 -8
  81. agno/os/routers/teams/schema.py +6 -4
  82. agno/os/routers/traces/traces.py +5 -5
  83. agno/os/routers/workflows/router.py +38 -11
  84. agno/os/routers/workflows/schema.py +1 -1
  85. agno/os/schema.py +92 -26
  86. agno/os/utils.py +84 -19
  87. agno/reasoning/anthropic.py +2 -2
  88. agno/reasoning/azure_ai_foundry.py +2 -2
  89. agno/reasoning/deepseek.py +2 -2
  90. agno/reasoning/default.py +6 -7
  91. agno/reasoning/gemini.py +2 -2
  92. agno/reasoning/helpers.py +6 -7
  93. agno/reasoning/manager.py +4 -10
  94. agno/reasoning/ollama.py +2 -2
  95. agno/reasoning/openai.py +2 -2
  96. agno/reasoning/vertexai.py +2 -2
  97. agno/registry/__init__.py +3 -0
  98. agno/registry/registry.py +68 -0
  99. agno/run/agent.py +59 -0
  100. agno/run/base.py +7 -0
  101. agno/run/team.py +57 -0
  102. agno/skills/agent_skills.py +10 -3
  103. agno/team/__init__.py +3 -1
  104. agno/team/team.py +1165 -330
  105. agno/tools/duckduckgo.py +25 -71
  106. agno/tools/exa.py +0 -21
  107. agno/tools/function.py +35 -83
  108. agno/tools/knowledge.py +9 -4
  109. agno/tools/mem0.py +11 -10
  110. agno/tools/memory.py +47 -46
  111. agno/tools/parallel.py +0 -7
  112. agno/tools/reasoning.py +30 -23
  113. agno/tools/tavily.py +4 -1
  114. agno/tools/websearch.py +93 -0
  115. agno/tools/website.py +1 -1
  116. agno/tools/wikipedia.py +1 -1
  117. agno/tools/workflow.py +48 -47
  118. agno/utils/agent.py +42 -5
  119. agno/utils/events.py +160 -2
  120. agno/utils/print_response/agent.py +0 -31
  121. agno/utils/print_response/team.py +0 -2
  122. agno/utils/print_response/workflow.py +0 -2
  123. agno/utils/team.py +61 -11
  124. agno/vectordb/lancedb/lance_db.py +4 -1
  125. agno/vectordb/mongodb/mongodb.py +1 -1
  126. agno/vectordb/pgvector/pgvector.py +3 -3
  127. agno/vectordb/qdrant/qdrant.py +4 -4
  128. agno/workflow/__init__.py +3 -1
  129. agno/workflow/condition.py +0 -21
  130. agno/workflow/loop.py +0 -21
  131. agno/workflow/parallel.py +0 -21
  132. agno/workflow/router.py +0 -21
  133. agno/workflow/step.py +117 -24
  134. agno/workflow/steps.py +0 -21
  135. agno/workflow/workflow.py +427 -63
  136. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
  137. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
  138. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
  139. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
  140. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
@@ -919,7 +919,7 @@ class LearnedKnowledgeStore(LearningStore):
919
919
  text_content = self._to_text_content(learning=learning_obj)
920
920
 
921
921
  # Build metadata for filtering
922
- # Metadata must be passed separately to add_content for filters to work
922
+ # Metadata must be passed separately to insert for filters to work
923
923
  filter_metadata: dict[str, Any] = {
924
924
  "namespace": effective_namespace,
925
925
  }
@@ -932,7 +932,7 @@ class LearnedKnowledgeStore(LearningStore):
932
932
  if tags:
933
933
  filter_metadata["tags"] = tags
934
934
 
935
- self.knowledge.add_content(
935
+ self.knowledge.insert(
936
936
  name=learning_data["title"],
937
937
  text_content=text_content,
938
938
  reader=TextReader(),
@@ -989,7 +989,7 @@ class LearnedKnowledgeStore(LearningStore):
989
989
  text_content = self._to_text_content(learning=learning_obj)
990
990
 
991
991
  # Build metadata for filtering - THIS IS THE KEY FIX!
992
- # Metadata must be passed separately to add_content for filters to work
992
+ # Metadata must be passed separately to insert for filters to work
993
993
  filter_metadata: dict[str, Any] = {
994
994
  "namespace": effective_namespace,
995
995
  }
@@ -1002,8 +1002,8 @@ class LearnedKnowledgeStore(LearningStore):
1002
1002
  if tags:
1003
1003
  filter_metadata["tags"] = tags
1004
1004
 
1005
- if hasattr(self.knowledge, "aadd_content"):
1006
- await self.knowledge.aadd_content(
1005
+ if hasattr(self.knowledge, "ainsert"):
1006
+ await self.knowledge.ainsert(
1007
1007
  name=learning_data["title"],
1008
1008
  text_content=text_content,
1009
1009
  reader=TextReader(),
@@ -1011,7 +1011,7 @@ class LearnedKnowledgeStore(LearningStore):
1011
1011
  metadata=filter_metadata, # Pass metadata for filtering
1012
1012
  )
1013
1013
  else:
1014
- self.knowledge.add_content(
1014
+ self.knowledge.insert(
1015
1015
  name=learning_data["title"],
1016
1016
  text_content=text_content,
1017
1017
  reader=TextReader(),
@@ -406,6 +406,30 @@ class Claude(Model):
406
406
  self.async_client = AsyncAnthropicClient(**_client_params)
407
407
  return self.async_client
408
408
 
409
+ def to_dict(self) -> Dict[str, Any]:
410
+ """
411
+ Convert the model to a dictionary.
412
+
413
+ Returns:
414
+ Dict[str, Any]: The dictionary representation of the model.
415
+ """
416
+ model_dict = super().to_dict()
417
+ model_dict.update(
418
+ {
419
+ "max_tokens": self.max_tokens,
420
+ "thinking": self.thinking,
421
+ "temperature": self.temperature,
422
+ "stop_sequences": self.stop_sequences,
423
+ "top_p": self.top_p,
424
+ "top_k": self.top_k,
425
+ "cache_system_prompt": self.cache_system_prompt,
426
+ "extended_cache_time": self.extended_cache_time,
427
+ "betas": self.betas,
428
+ }
429
+ )
430
+ cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
431
+ return cleaned_dict
432
+
409
433
  def count_tokens(
410
434
  self,
411
435
  messages: List[Message],
@@ -166,6 +166,26 @@ class AwsBedrock(Model):
166
166
 
167
167
  return self.async_session.client(**client_kwargs)
168
168
 
169
+ def to_dict(self) -> Dict[str, Any]:
170
+ """
171
+ Convert the model to a dictionary.
172
+
173
+ Returns:
174
+ Dict[str, Any]: The dictionary representation of the model.
175
+ """
176
+ model_dict = super().to_dict()
177
+ model_dict.update(
178
+ {
179
+ "aws_region": self.aws_region,
180
+ "max_tokens": self.max_tokens,
181
+ "temperature": self.temperature,
182
+ "top_p": self.top_p,
183
+ "stop_sequences": self.stop_sequences,
184
+ }
185
+ )
186
+ cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
187
+ return cleaned_dict
188
+
169
189
  def _format_tools_for_request(self, tools: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
170
190
  """
171
191
  Format the tools for the request.
agno/models/base.py CHANGED
@@ -1293,12 +1293,23 @@ class Model(ABC):
1293
1293
  if _compression_manager is not None and _compression_manager.should_compress(
1294
1294
  messages, tools, model=self, response_format=response_format
1295
1295
  ):
1296
+ # Emit compression started event
1297
+ yield ModelResponse(event=ModelResponseEvent.compression_started.value)
1296
1298
  _compression_manager.compress(messages)
1299
+ # Emit compression completed event with stats
1300
+ yield ModelResponse(
1301
+ event=ModelResponseEvent.compression_completed.value,
1302
+ compression_stats=_compression_manager.stats.copy(),
1303
+ )
1297
1304
 
1298
1305
  assistant_message = Message(role=self.assistant_message_role)
1299
1306
  # Create assistant message and stream data
1300
1307
  stream_data = MessageData()
1301
1308
  model_response = ModelResponse()
1309
+
1310
+ # Emit LLM request started event
1311
+ yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
1312
+
1302
1313
  if stream_model_response:
1303
1314
  # Generate response
1304
1315
  for response in self.process_response_stream(
@@ -1334,6 +1345,19 @@ class Model(ABC):
1334
1345
  messages.append(assistant_message)
1335
1346
  assistant_message.log(metrics=True)
1336
1347
 
1348
+ # Emit LLM request completed event with metrics
1349
+ llm_metrics = assistant_message.metrics
1350
+ yield ModelResponse(
1351
+ event=ModelResponseEvent.model_request_completed.value,
1352
+ input_tokens=llm_metrics.input_tokens if llm_metrics else None,
1353
+ output_tokens=llm_metrics.output_tokens if llm_metrics else None,
1354
+ total_tokens=llm_metrics.total_tokens if llm_metrics else None,
1355
+ time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
1356
+ reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
1357
+ cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
1358
+ cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
1359
+ )
1360
+
1337
1361
  # Handle tool calls if present
1338
1362
  if assistant_message.tool_calls is not None:
1339
1363
  # Prepare function calls
@@ -1513,12 +1537,23 @@ class Model(ABC):
1513
1537
  if _compression_manager is not None and await _compression_manager.ashould_compress(
1514
1538
  messages, tools, model=self, response_format=response_format
1515
1539
  ):
1540
+ # Emit compression started event
1541
+ yield ModelResponse(event=ModelResponseEvent.compression_started.value)
1516
1542
  await _compression_manager.acompress(messages)
1543
+ # Emit compression completed event with stats
1544
+ yield ModelResponse(
1545
+ event=ModelResponseEvent.compression_completed.value,
1546
+ compression_stats=_compression_manager.stats.copy(),
1547
+ )
1517
1548
 
1518
1549
  # Create assistant message and stream data
1519
1550
  assistant_message = Message(role=self.assistant_message_role)
1520
1551
  stream_data = MessageData()
1521
1552
  model_response = ModelResponse()
1553
+
1554
+ # Emit LLM request started event
1555
+ yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
1556
+
1522
1557
  if stream_model_response:
1523
1558
  # Generate response
1524
1559
  async for model_response in self.aprocess_response_stream(
@@ -1554,6 +1589,19 @@ class Model(ABC):
1554
1589
  messages.append(assistant_message)
1555
1590
  assistant_message.log(metrics=True)
1556
1591
 
1592
+ # Emit LLM request completed event with metrics
1593
+ llm_metrics = assistant_message.metrics
1594
+ yield ModelResponse(
1595
+ event=ModelResponseEvent.model_request_completed.value,
1596
+ input_tokens=llm_metrics.input_tokens if llm_metrics else None,
1597
+ output_tokens=llm_metrics.output_tokens if llm_metrics else None,
1598
+ total_tokens=llm_metrics.total_tokens if llm_metrics else None,
1599
+ time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
1600
+ reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
1601
+ cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
1602
+ cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
1603
+ )
1604
+
1557
1605
  # Handle tool calls if present
1558
1606
  if assistant_message.tool_calls is not None:
1559
1607
  # Prepare function calls
@@ -1940,6 +1988,7 @@ class Model(ABC):
1940
1988
 
1941
1989
  if isinstance(item, CustomEvent):
1942
1990
  function_call_output += str(item)
1991
+ item.tool_call_id = function_call.call_id
1943
1992
 
1944
1993
  # For WorkflowCompletedEvent, extract content for final output
1945
1994
  from agno.run.workflow import WorkflowCompletedEvent
@@ -1971,8 +2020,6 @@ class Model(ABC):
1971
2020
  and function_call.function._run_context.session_state is not None
1972
2021
  ):
1973
2022
  function_execution_result.updated_session_state = function_call.function._run_context.session_state
1974
- elif function_call.function._session_state is not None:
1975
- function_execution_result.updated_session_state = function_call.function._session_state
1976
2023
  else:
1977
2024
  from agno.tools.function import ToolResult
1978
2025
 
@@ -2384,6 +2431,7 @@ class Model(ABC):
2384
2431
 
2385
2432
  if isinstance(item, CustomEvent):
2386
2433
  function_call_output += str(item)
2434
+ item.tool_call_id = function_call.call_id
2387
2435
 
2388
2436
  # For WorkflowCompletedEvent, extract content for final output
2389
2437
  from agno.run.workflow import WorkflowCompletedEvent
@@ -2461,8 +2509,12 @@ class Model(ABC):
2461
2509
  if async_gen_index in async_generator_outputs:
2462
2510
  _, async_function_call_output, error = async_generator_outputs[async_gen_index]
2463
2511
  if error:
2464
- log_error(f"Error in async generator: {error}")
2465
- raise error
2512
+ # Handle async generator exceptions gracefully like sync generators
2513
+ log_error(
2514
+ f"Error while iterating async generator for {function_call.function.name}: {error}"
2515
+ )
2516
+ function_call.error = str(error)
2517
+ function_call_success = False
2466
2518
  break
2467
2519
  async_gen_index += 1
2468
2520
 
@@ -2509,6 +2561,10 @@ class Model(ABC):
2509
2561
  yield ModelResponse(content=item.content)
2510
2562
  continue
2511
2563
 
2564
+ elif isinstance(item, CustomEvent):
2565
+ function_call_output += str(item)
2566
+ item.tool_call_id = function_call.call_id
2567
+
2512
2568
  # Yield the event itself to bubble it up
2513
2569
  yield item
2514
2570
  else:
@@ -2532,8 +2588,6 @@ class Model(ABC):
2532
2588
  and function_call.function._run_context.session_state is not None
2533
2589
  ):
2534
2590
  updated_session_state = function_call.function._run_context.session_state
2535
- elif function_call.function._session_state is not None:
2536
- updated_session_state = function_call.function._session_state
2537
2591
 
2538
2592
  if not (
2539
2593
  async_function_call_output is not None
@@ -97,6 +97,35 @@ class Cerebras(Model):
97
97
  client_params.update(self.client_params)
98
98
  return client_params
99
99
 
100
+ def _ensure_additional_properties_false(self, schema: Dict[str, Any]) -> None:
101
+ """
102
+ Recursively ensure all object types have additionalProperties: false.
103
+ Cerebras API requires this for JSON schema validation.
104
+ """
105
+ if not isinstance(schema, dict):
106
+ return
107
+
108
+ # Set additionalProperties: false for object types
109
+ if schema.get("type") == "object":
110
+ schema["additionalProperties"] = False
111
+
112
+ # Recursively process nested schemas
113
+ if "properties" in schema and isinstance(schema["properties"], dict):
114
+ for prop_schema in schema["properties"].values():
115
+ self._ensure_additional_properties_false(prop_schema)
116
+
117
+ if "items" in schema:
118
+ self._ensure_additional_properties_false(schema["items"])
119
+
120
+ if "$defs" in schema and isinstance(schema["$defs"], dict):
121
+ for def_schema in schema["$defs"].values():
122
+ self._ensure_additional_properties_false(def_schema)
123
+
124
+ for key in ["allOf", "anyOf", "oneOf"]:
125
+ if key in schema and isinstance(schema[key], list):
126
+ for item in schema[key]:
127
+ self._ensure_additional_properties_false(item)
128
+
100
129
  def get_client(self) -> CerebrasClient:
101
130
  """
102
131
  Returns a Cerebras client.
@@ -191,8 +220,11 @@ class Cerebras(Model):
191
220
  ):
192
221
  # Ensure json_schema has strict parameter set
193
222
  schema = response_format["json_schema"]
194
- if isinstance(schema.get("schema"), dict) and "strict" not in schema:
195
- schema["strict"] = self.strict_output
223
+ if isinstance(schema.get("schema"), dict):
224
+ if "strict" not in schema:
225
+ schema["strict"] = self.strict_output
226
+ # Cerebras requires additionalProperties: false for all object types
227
+ self._ensure_additional_properties_false(schema["schema"])
196
228
 
197
229
  request_params["response_format"] = response_format
198
230
 
@@ -115,6 +115,31 @@ class Cohere(Model):
115
115
  self.async_client = CohereAsyncClient(**_client_params)
116
116
  return self.async_client # type: ignore
117
117
 
118
+ def to_dict(self) -> Dict[str, Any]:
119
+ """
120
+ Convert the model to a dictionary.
121
+
122
+ Returns:
123
+ Dict[str, Any]: The dictionary representation of the model.
124
+ """
125
+ model_dict = super().to_dict()
126
+ model_dict.update(
127
+ {
128
+ "temperature": self.temperature,
129
+ "max_tokens": self.max_tokens,
130
+ "top_k": self.top_k,
131
+ "top_p": self.top_p,
132
+ "seed": self.seed,
133
+ "frequency_penalty": self.frequency_penalty,
134
+ "presence_penalty": self.presence_penalty,
135
+ "logprobs": self.logprobs,
136
+ "strict_tools": self.strict_tools,
137
+ "add_chat_history": self.add_chat_history,
138
+ }
139
+ )
140
+ cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
141
+ return cleaned_dict
142
+
118
143
  def get_request_params(
119
144
  self,
120
145
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
@@ -172,6 +172,45 @@ class Gemini(Model):
172
172
  self.client = genai.Client(**client_params)
173
173
  return self.client
174
174
 
175
+ def to_dict(self) -> Dict[str, Any]:
176
+ """
177
+ Convert the model to a dictionary.
178
+
179
+ Returns:
180
+ Dict[str, Any]: The dictionary representation of the model.
181
+ """
182
+ model_dict = super().to_dict()
183
+ model_dict.update(
184
+ {
185
+ "search": self.search,
186
+ "grounding": self.grounding,
187
+ "grounding_dynamic_threshold": self.grounding_dynamic_threshold,
188
+ "url_context": self.url_context,
189
+ "vertexai_search": self.vertexai_search,
190
+ "vertexai_search_datastore": self.vertexai_search_datastore,
191
+ "file_search_store_names": self.file_search_store_names,
192
+ "file_search_metadata_filter": self.file_search_metadata_filter,
193
+ "temperature": self.temperature,
194
+ "top_p": self.top_p,
195
+ "top_k": self.top_k,
196
+ "max_output_tokens": self.max_output_tokens,
197
+ "stop_sequences": self.stop_sequences,
198
+ "logprobs": self.logprobs,
199
+ "presence_penalty": self.presence_penalty,
200
+ "frequency_penalty": self.frequency_penalty,
201
+ "seed": self.seed,
202
+ "response_modalities": self.response_modalities,
203
+ "thinking_budget": self.thinking_budget,
204
+ "include_thoughts": self.include_thoughts,
205
+ "thinking_level": self.thinking_level,
206
+ "vertexai": self.vertexai,
207
+ "project_id": self.project_id,
208
+ "location": self.location,
209
+ }
210
+ )
211
+ cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
212
+ return cleaned_dict
213
+
175
214
  def _append_file_search_tool(self, builtin_tools: List[Tool]) -> None:
176
215
  """Append Gemini File Search tool to builtin_tools if file search is enabled.
177
216
 
@@ -672,7 +711,6 @@ class Gemini(Model):
672
711
  compress_tool_results: Whether to compress tool results.
673
712
  """
674
713
  formatted_messages: List = []
675
- file_content: Optional[Union[GeminiFile, Part]] = None
676
714
  system_message = None
677
715
 
678
716
  for message in messages:
@@ -795,14 +833,11 @@ class Gemini(Model):
795
833
  for file in message.files:
796
834
  file_content = self._format_file_for_message(file)
797
835
  if isinstance(file_content, Part):
798
- formatted_messages.append(file_content)
836
+ message_parts.append(file_content)
799
837
 
800
838
  final_message = Content(role=role, parts=message_parts)
801
839
  formatted_messages.append(final_message)
802
840
 
803
- if isinstance(file_content, GeminiFile):
804
- formatted_messages.insert(0, file_content)
805
-
806
841
  return formatted_messages, system_message
807
842
 
808
843
  def _format_audio_for_message(self, audio: Audio) -> Optional[Union[Part, GeminiFile]]:
@@ -936,6 +971,16 @@ class Gemini(Model):
936
971
 
937
972
  # Case 2: File is a URL
938
973
  elif file.url is not None:
974
+ # Case 2a: GCS URI (gs://) - pass directly to Gemini (supports up to 2GB)
975
+ if file.url.startswith("gs://") and file.mime_type:
976
+ return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
977
+
978
+ # Case 2b: HTTPS URL with mime_type - pass directly to Gemini (supports up to 100MB)
979
+ # This enables pre-signed URLs from S3/Azure and public URLs without downloading
980
+ if file.url.startswith("https://") and file.mime_type:
981
+ return Part.from_uri(file_uri=file.url, mime_type=file.mime_type)
982
+
983
+ # Case 2c: URL without mime_type - download and detect (existing behavior)
939
984
  url_content = file.file_url_content
940
985
  if url_content is not None:
941
986
  content, mime_type = url_content
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  from dataclasses import dataclass
3
4
  from os import getenv
@@ -48,10 +49,18 @@ class LiteLLM(Model):
48
49
 
49
50
  client: Optional[Any] = None
50
51
 
52
+ # Store the original client to preserve it across copies (e.g., for Router instances)
53
+ _original_client: Optional[Any] = None
54
+
51
55
  def __post_init__(self):
52
56
  """Initialize the model after the dataclass initialization."""
53
57
  super().__post_init__()
54
58
 
59
+ # Store the original client if provided (e.g., Router instance)
60
+ # This ensures the client is preserved when the model is copied for background tasks
61
+ if self.client is not None and self._original_client is None:
62
+ self._original_client = self.client
63
+
55
64
  # Set up API key from environment variable if not already set
56
65
  if not self.client and not self.api_key:
57
66
  self.api_key = getenv("LITELLM_API_KEY")
@@ -70,12 +79,41 @@ class LiteLLM(Model):
70
79
  Returns:
71
80
  Any: An instance of the LiteLLM client.
72
81
  """
82
+ # First check if we have a current client
73
83
  if self.client is not None:
74
84
  return self.client
75
85
 
86
+ # Check if we have an original client (e.g., Router) that was preserved
87
+ # This handles the case where the model was copied for background tasks
88
+ if self._original_client is not None:
89
+ self.client = self._original_client
90
+ return self.client
91
+
76
92
  self.client = litellm
77
93
  return self.client
78
94
 
95
+ def __deepcopy__(self, memo: Dict[int, Any]) -> "LiteLLM":
96
+ """
97
+ Custom deepcopy to preserve the client (e.g., Router) across copies.
98
+
99
+ This is needed because when the model is copied for background tasks
100
+ (memory, summarization), the client reference needs to be preserved.
101
+ """
102
+ # Create a shallow copy first
103
+ cls = self.__class__
104
+ result = cls.__new__(cls)
105
+ memo[id(self)] = result
106
+
107
+ # Copy all attributes, but keep the same client reference
108
+ for k, v in self.__dict__.items():
109
+ if k in ("client", "_original_client"):
110
+ # Keep the same client reference (don't deepcopy Router instances)
111
+ setattr(result, k, v)
112
+ else:
113
+ setattr(result, k, copy.deepcopy(v, memo))
114
+
115
+ return result
116
+
79
117
  def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
80
118
  """Format messages for LiteLLM API."""
81
119
  formatted_messages = []
@@ -0,0 +1,3 @@
1
+ from agno.models.n1n.n1n import N1N
2
+
3
+ __all__ = ["N1N"]
agno/models/n1n/n1n.py ADDED
@@ -0,0 +1,57 @@
1
+ from dataclasses import dataclass, field
2
+ from os import getenv
3
+ from typing import Any, Dict, Optional
4
+
5
+ from agno.exceptions import ModelAuthenticationError
6
+ from agno.models.openai.like import OpenAILike
7
+
8
+
9
+ @dataclass
10
+ class N1N(OpenAILike):
11
+ """
12
+ A class for interacting with n1n.ai models.
13
+
14
+ Attributes:
15
+ id (str): The model id. Defaults to "gpt-4o".
16
+ name (str): The model name. Defaults to "N1N".
17
+ provider (str): The provider name. Defaults to "N1N".
18
+ api_key (Optional[str]): The API key.
19
+ base_url (str): The base URL. Defaults to "https://api.n1n.ai/v1".
20
+ """
21
+
22
+ id: str = "gpt-4o"
23
+ name: str = "N1N"
24
+ provider: str = "N1N"
25
+
26
+ api_key: Optional[str] = field(default_factory=lambda: getenv("N1N_API_KEY"))
27
+ base_url: str = "https://api.n1n.ai/v1"
28
+
29
+ def _get_client_params(self) -> Dict[str, Any]:
30
+ # Fetch API key from env if not already set
31
+ if not self.api_key:
32
+ self.api_key = getenv("N1N_API_KEY")
33
+ if not self.api_key:
34
+ # Raise error immediately if key is missing
35
+ raise ModelAuthenticationError(
36
+ message="N1N_API_KEY not set. Please set the N1N_API_KEY environment variable.",
37
+ model_name=self.name,
38
+ )
39
+
40
+ # Define base client params
41
+ base_params = {
42
+ "api_key": self.api_key,
43
+ "organization": self.organization,
44
+ "base_url": self.base_url,
45
+ "timeout": self.timeout,
46
+ "max_retries": self.max_retries,
47
+ "default_headers": self.default_headers,
48
+ "default_query": self.default_query,
49
+ }
50
+
51
+ # Create client_params dict with non-None values
52
+ client_params = {k: v for k, v in base_params.items() if v is not None}
53
+
54
+ # Add additional client params if provided
55
+ if self.client_params:
56
+ client_params.update(self.client_params)
57
+ return client_params
@@ -43,6 +43,8 @@ class OpenAIChat(Model):
43
43
  name: str = "OpenAIChat"
44
44
  provider: str = "OpenAI"
45
45
  supports_native_structured_outputs: bool = True
46
+ # If True, only collect metrics on the final streaming chunk (for providers with cumulative token counts)
47
+ collect_metrics_on_completion: bool = False
46
48
 
47
49
  # Request parameters
48
50
  store: Optional[bool] = None
@@ -305,6 +307,13 @@ class OpenAIChat(Model):
305
307
  cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
306
308
  return cleaned_dict
307
309
 
310
+ @classmethod
311
+ def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChat":
312
+ """
313
+ Create an OpenAIChat model from a dictionary.
314
+ """
315
+ return cls(**data)
316
+
308
317
  def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
309
318
  """
310
319
  Format a message into the format expected by OpenAI.
@@ -745,6 +754,21 @@ class OpenAIChat(Model):
745
754
  tool_call_entry["type"] = _tool_call_type
746
755
  return tool_calls
747
756
 
757
+ def _should_collect_metrics(self, response: ChatCompletionChunk) -> bool:
758
+ """
759
+ Determine if metrics should be collected from the response.
760
+ """
761
+ if not response.usage:
762
+ return False
763
+
764
+ if not self.collect_metrics_on_completion:
765
+ return True
766
+
767
+ if not response.choices:
768
+ return False
769
+
770
+ return response.choices[0].finish_reason is not None
771
+
748
772
  def _parse_provider_response(
749
773
  self,
750
774
  response: ChatCompletion,
@@ -913,7 +937,7 @@ class OpenAIChat(Model):
913
937
  log_warning(f"Error processing audio: {e}")
914
938
 
915
939
  # Add usage metrics if present
916
- if response_delta.usage is not None:
940
+ if self._should_collect_metrics(response_delta) and response_delta.usage is not None:
917
941
  model_response.response_usage = self._get_metrics(response_delta.usage)
918
942
 
919
943
  return model_response
@@ -2,10 +2,13 @@ from dataclasses import dataclass
2
2
  from os import getenv
3
3
  from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
+ from openai.types.chat import ChatCompletion, ChatCompletionChunk
5
6
  from pydantic import BaseModel
6
7
 
7
8
  from agno.exceptions import ModelAuthenticationError
9
+ from agno.models.message import Message
8
10
  from agno.models.openai.like import OpenAILike
11
+ from agno.models.response import ModelResponse
9
12
  from agno.run.agent import RunOutput
10
13
 
11
14
 
@@ -83,3 +86,46 @@ class OpenRouter(OpenAILike):
83
86
  request_params["extra_body"] = extra_body
84
87
 
85
88
  return request_params
89
+
90
+ def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
91
+ message_dict = super()._format_message(message, compress_tool_results)
92
+
93
+ if message.role == "assistant" and message.provider_data:
94
+ if message.provider_data.get("reasoning_details"):
95
+ message_dict["reasoning_details"] = message.provider_data["reasoning_details"]
96
+
97
+ return message_dict
98
+
99
+ def _parse_provider_response(
100
+ self,
101
+ response: ChatCompletion,
102
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
103
+ ) -> ModelResponse:
104
+ model_response = super()._parse_provider_response(response, response_format)
105
+
106
+ if response.choices and len(response.choices) > 0:
107
+ response_message = response.choices[0].message
108
+ if hasattr(response_message, "reasoning_details") and response_message.reasoning_details:
109
+ if model_response.provider_data is None:
110
+ model_response.provider_data = {}
111
+ model_response.provider_data["reasoning_details"] = response_message.reasoning_details
112
+ elif hasattr(response_message, "model_extra"):
113
+ extra = getattr(response_message, "model_extra", None)
114
+ if extra and isinstance(extra, dict) and extra.get("reasoning_details"):
115
+ if model_response.provider_data is None:
116
+ model_response.provider_data = {}
117
+ model_response.provider_data["reasoning_details"] = extra["reasoning_details"]
118
+
119
+ return model_response
120
+
121
+ def _parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> ModelResponse:
122
+ model_response = super()._parse_provider_response_delta(response_delta)
123
+
124
+ if response_delta.choices and len(response_delta.choices) > 0:
125
+ choice_delta = response_delta.choices[0].delta
126
+ if hasattr(choice_delta, "reasoning_details") and choice_delta.reasoning_details:
127
+ if model_response.provider_data is None:
128
+ model_response.provider_data = {}
129
+ model_response.provider_data["reasoning_details"] = choice_delta.reasoning_details
130
+
131
+ return model_response
@@ -41,6 +41,8 @@ class Perplexity(OpenAILike):
41
41
  id: str = "sonar"
42
42
  name: str = "Perplexity"
43
43
  provider: str = "Perplexity"
44
+ # Perplexity returns cumulative token counts in each streaming chunk, so only collect on final chunk
45
+ collect_metrics_on_completion: bool = True
44
46
 
45
47
  api_key: Optional[str] = field(default_factory=lambda: getenv("PERPLEXITY_API_KEY"))
46
48
  base_url: str = "https://api.perplexity.ai/"