agno 2.0.10__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. agno/agent/agent.py +608 -175
  2. agno/db/in_memory/in_memory_db.py +42 -29
  3. agno/db/postgres/postgres.py +6 -4
  4. agno/exceptions.py +62 -1
  5. agno/guardrails/__init__.py +6 -0
  6. agno/guardrails/base.py +19 -0
  7. agno/guardrails/openai.py +144 -0
  8. agno/guardrails/pii.py +94 -0
  9. agno/guardrails/prompt_injection.py +51 -0
  10. agno/knowledge/embedder/aws_bedrock.py +9 -4
  11. agno/knowledge/embedder/azure_openai.py +54 -0
  12. agno/knowledge/embedder/base.py +2 -0
  13. agno/knowledge/embedder/cohere.py +184 -5
  14. agno/knowledge/embedder/google.py +79 -1
  15. agno/knowledge/embedder/huggingface.py +9 -4
  16. agno/knowledge/embedder/jina.py +63 -0
  17. agno/knowledge/embedder/mistral.py +78 -11
  18. agno/knowledge/embedder/ollama.py +5 -0
  19. agno/knowledge/embedder/openai.py +18 -54
  20. agno/knowledge/embedder/voyageai.py +69 -16
  21. agno/knowledge/knowledge.py +5 -4
  22. agno/knowledge/reader/pdf_reader.py +4 -3
  23. agno/knowledge/reader/website_reader.py +3 -2
  24. agno/models/base.py +125 -32
  25. agno/models/cerebras/cerebras.py +1 -0
  26. agno/models/cerebras/cerebras_openai.py +1 -0
  27. agno/models/dashscope/dashscope.py +1 -0
  28. agno/models/google/gemini.py +27 -5
  29. agno/models/litellm/chat.py +17 -0
  30. agno/models/openai/chat.py +13 -4
  31. agno/models/perplexity/perplexity.py +2 -3
  32. agno/models/requesty/__init__.py +5 -0
  33. agno/models/requesty/requesty.py +49 -0
  34. agno/models/vllm/vllm.py +1 -0
  35. agno/models/xai/xai.py +1 -0
  36. agno/os/app.py +167 -148
  37. agno/os/interfaces/whatsapp/router.py +2 -0
  38. agno/os/mcp.py +1 -1
  39. agno/os/middleware/__init__.py +7 -0
  40. agno/os/middleware/jwt.py +233 -0
  41. agno/os/router.py +181 -45
  42. agno/os/routers/home.py +2 -2
  43. agno/os/routers/memory/memory.py +23 -1
  44. agno/os/routers/memory/schemas.py +1 -1
  45. agno/os/routers/session/session.py +20 -3
  46. agno/os/utils.py +172 -8
  47. agno/run/agent.py +120 -77
  48. agno/run/team.py +115 -72
  49. agno/run/workflow.py +5 -15
  50. agno/session/summary.py +9 -10
  51. agno/session/team.py +2 -1
  52. agno/team/team.py +720 -168
  53. agno/tools/firecrawl.py +4 -4
  54. agno/tools/function.py +42 -2
  55. agno/tools/knowledge.py +3 -3
  56. agno/tools/searxng.py +2 -2
  57. agno/tools/serper.py +2 -2
  58. agno/tools/spider.py +2 -2
  59. agno/tools/workflow.py +4 -5
  60. agno/utils/events.py +66 -1
  61. agno/utils/hooks.py +57 -0
  62. agno/utils/media.py +11 -9
  63. agno/utils/print_response/agent.py +43 -5
  64. agno/utils/print_response/team.py +48 -12
  65. agno/vectordb/cassandra/cassandra.py +44 -4
  66. agno/vectordb/chroma/chromadb.py +79 -8
  67. agno/vectordb/clickhouse/clickhousedb.py +43 -6
  68. agno/vectordb/couchbase/couchbase.py +76 -5
  69. agno/vectordb/lancedb/lance_db.py +38 -3
  70. agno/vectordb/llamaindex/__init__.py +3 -0
  71. agno/vectordb/milvus/milvus.py +76 -4
  72. agno/vectordb/mongodb/mongodb.py +76 -4
  73. agno/vectordb/pgvector/pgvector.py +50 -6
  74. agno/vectordb/pineconedb/pineconedb.py +39 -2
  75. agno/vectordb/qdrant/qdrant.py +76 -26
  76. agno/vectordb/singlestore/singlestore.py +77 -4
  77. agno/vectordb/upstashdb/upstashdb.py +42 -2
  78. agno/vectordb/weaviate/weaviate.py +39 -3
  79. agno/workflow/types.py +1 -0
  80. agno/workflow/workflow.py +58 -2
  81. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/METADATA +4 -3
  82. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/RECORD +85 -75
  83. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/WHEEL +0 -0
  84. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/licenses/LICENSE +0 -0
  85. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -715,6 +715,7 @@ class Model(ABC):
715
715
  assistant_message = Message(role=self.assistant_message_role)
716
716
  # Create assistant message and stream data
717
717
  stream_data = MessageData()
718
+ model_response = ModelResponse()
718
719
  if stream_model_response:
719
720
  # Generate response
720
721
  yield from self.process_response_stream(
@@ -744,7 +745,6 @@ class Model(ABC):
744
745
  assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
745
746
 
746
747
  else:
747
- model_response = ModelResponse()
748
748
  self._process_model_response(
749
749
  messages=messages,
750
750
  assistant_message=assistant_message,
@@ -784,6 +784,10 @@ class Model(ABC):
784
784
  self.format_function_call_results(
785
785
  messages=messages, function_call_results=function_call_results, **stream_data.extra
786
786
  )
787
+ elif model_response and model_response.extra is not None:
788
+ self.format_function_call_results(
789
+ messages=messages, function_call_results=function_call_results, **model_response.extra
790
+ )
787
791
  else:
788
792
  self.format_function_call_results(messages=messages, function_call_results=function_call_results)
789
793
 
@@ -879,9 +883,10 @@ class Model(ABC):
879
883
  # Create assistant message and stream data
880
884
  assistant_message = Message(role=self.assistant_message_role)
881
885
  stream_data = MessageData()
886
+ model_response = ModelResponse()
882
887
  if stream_model_response:
883
888
  # Generate response
884
- async for response in self.aprocess_response_stream(
889
+ async for model_response in self.aprocess_response_stream(
885
890
  messages=messages,
886
891
  assistant_message=assistant_message,
887
892
  stream_data=stream_data,
@@ -890,7 +895,7 @@ class Model(ABC):
890
895
  tool_choice=tool_choice or self._tool_choice,
891
896
  run_response=run_response,
892
897
  ):
893
- yield response
898
+ yield model_response
894
899
 
895
900
  # Populate assistant message from stream data
896
901
  if stream_data.response_content:
@@ -907,7 +912,6 @@ class Model(ABC):
907
912
  assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
908
913
 
909
914
  else:
910
- model_response = ModelResponse()
911
915
  await self._aprocess_model_response(
912
916
  messages=messages,
913
917
  assistant_message=assistant_message,
@@ -948,6 +952,10 @@ class Model(ABC):
948
952
  self.format_function_call_results(
949
953
  messages=messages, function_call_results=function_call_results, **stream_data.extra
950
954
  )
955
+ elif model_response and model_response.extra is not None:
956
+ self.format_function_call_results(
957
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
958
+ )
951
959
  else:
952
960
  self.format_function_call_results(messages=messages, function_call_results=function_call_results)
953
961
 
@@ -1573,30 +1581,35 @@ class Model(ABC):
1573
1581
  *(self.arun_function_call(fc) for fc in function_calls_to_run), return_exceptions=True
1574
1582
  )
1575
1583
 
1576
- # Process results
1584
+ # Separate async generators from other results for concurrent processing
1585
+ async_generator_results: List[Any] = []
1586
+ non_async_generator_results: List[Any] = []
1587
+
1577
1588
  for result in results:
1578
- # If result is an exception, skip processing it
1579
1589
  if isinstance(result, BaseException):
1580
- log_error(f"Error during function call: {result}")
1581
- raise result
1590
+ non_async_generator_results.append(result)
1591
+ continue
1582
1592
 
1583
- # Unpack result
1584
1593
  function_call_success, function_call_timer, function_call, function_execution_result = result
1585
1594
 
1586
- updated_session_state = function_execution_result.updated_session_state
1595
+ # Check if this result contains an async generator
1596
+ if isinstance(function_call.result, (AsyncGeneratorType, AsyncIterator)):
1597
+ async_generator_results.append(result)
1598
+ else:
1599
+ non_async_generator_results.append(result)
1587
1600
 
1588
- # Handle AgentRunException
1589
- if isinstance(function_call_success, AgentRunException):
1590
- a_exc = function_call_success
1591
- # Update additional messages from function call
1592
- _handle_agent_exception(a_exc, additional_input)
1593
- # Set function call success to False if an exception occurred
1594
- function_call_success = False
1601
+ # Process async generators with real-time event streaming using asyncio.Queue
1602
+ async_generator_outputs: Dict[int, Tuple[Any, str, Optional[BaseException]]] = {}
1603
+ event_queue: asyncio.Queue = asyncio.Queue()
1604
+ active_generators_count: int = len(async_generator_results)
1595
1605
 
1596
- # Process function call output
1597
- function_call_output: str = ""
1598
- if isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1599
- for item in function_call.result:
1606
+ # Create background tasks for each async generator
1607
+ async def process_async_generator(result, generator_id):
1608
+ function_call_success, function_call_timer, function_call, function_execution_result = result
1609
+ function_call_output = ""
1610
+
1611
+ try:
1612
+ async for item in function_call.result:
1600
1613
  # This function yields agent/team run events
1601
1614
  if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1602
1615
  item, tuple(get_args(TeamRunOutputEvent))
@@ -1610,17 +1623,102 @@ class Model(ABC):
1610
1623
  function_call_output += item.content or ""
1611
1624
 
1612
1625
  if function_call.function.show_result:
1613
- yield ModelResponse(content=item.content)
1626
+ await event_queue.put(ModelResponse(content=item.content))
1614
1627
  continue
1615
1628
 
1616
- # Yield the event itself to bubble it up
1617
- yield item
1629
+ if isinstance(item, CustomEvent):
1630
+ function_call_output += str(item)
1631
+
1632
+ # Put the event into the queue to be yielded
1633
+ await event_queue.put(item)
1634
+
1635
+ # Yield custom events emitted by the tool
1618
1636
  else:
1619
1637
  function_call_output += str(item)
1620
1638
  if function_call.function.show_result:
1621
- yield ModelResponse(content=str(item))
1622
- elif isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
1623
- async for item in function_call.result:
1639
+ await event_queue.put(ModelResponse(content=str(item)))
1640
+
1641
+ # Store the final output for this generator
1642
+ async_generator_outputs[generator_id] = (result, function_call_output, None)
1643
+
1644
+ except Exception as e:
1645
+ # Store the exception
1646
+ async_generator_outputs[generator_id] = (result, "", e)
1647
+
1648
+ # Signal that this generator is done
1649
+ await event_queue.put(("GENERATOR_DONE", generator_id))
1650
+
1651
+ # Start all async generator tasks
1652
+ generator_tasks = []
1653
+ for i, result in enumerate(async_generator_results):
1654
+ task = asyncio.create_task(process_async_generator(result, i))
1655
+ generator_tasks.append(task)
1656
+
1657
+ # Stream events from the queue as they arrive
1658
+ completed_generators_count = 0
1659
+ while completed_generators_count < active_generators_count:
1660
+ try:
1661
+ event = await event_queue.get()
1662
+
1663
+ # Check if this is a completion signal
1664
+ if isinstance(event, tuple) and event[0] == "GENERATOR_DONE":
1665
+ completed_generators_count += 1
1666
+ continue
1667
+
1668
+ # Yield the actual event
1669
+ yield event
1670
+
1671
+ except Exception as e:
1672
+ log_error(f"Error processing async generator event: {e}")
1673
+ break
1674
+
1675
+ # Now process all results (non-async generators and completed async generators)
1676
+ for i, original_result in enumerate(results):
1677
+ # If result is an exception, skip processing it
1678
+ if isinstance(original_result, BaseException):
1679
+ log_error(f"Error during function call: {original_result}")
1680
+ raise original_result
1681
+
1682
+ # Unpack result
1683
+ function_call_success, function_call_timer, function_call, function_execution_result = original_result
1684
+
1685
+ # Check if this was an async generator that was already processed
1686
+ async_function_call_output = None
1687
+ if isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
1688
+ # Find the corresponding processed result
1689
+ async_gen_index = 0
1690
+ for j, result in enumerate(results[: i + 1]):
1691
+ if not isinstance(result, BaseException):
1692
+ _, _, fc, _ = result
1693
+ if isinstance(fc.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
1694
+ if j == i: # This is our async generator
1695
+ if async_gen_index in async_generator_outputs:
1696
+ _, async_function_call_output, error = async_generator_outputs[async_gen_index]
1697
+ if error:
1698
+ log_error(f"Error in async generator: {error}")
1699
+ raise error
1700
+ break
1701
+ async_gen_index += 1
1702
+
1703
+ updated_session_state = function_execution_result.updated_session_state
1704
+
1705
+ # Handle AgentRunException
1706
+ if isinstance(function_call_success, AgentRunException):
1707
+ a_exc = function_call_success
1708
+ # Update additional messages from function call
1709
+ _handle_agent_exception(a_exc, additional_input)
1710
+ # Set function call success to False if an exception occurred
1711
+ function_call_success = False
1712
+
1713
+ # Process function call output
1714
+ function_call_output: str = ""
1715
+
1716
+ # Check if this was an async generator that was already processed
1717
+ if async_function_call_output is not None:
1718
+ function_call_output = async_function_call_output
1719
+ # Events from async generators were already yielded in real-time above
1720
+ elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1721
+ for item in function_call.result:
1624
1722
  # This function yields agent/team run events
1625
1723
  if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1626
1724
  item, tuple(get_args(TeamRunOutputEvent))
@@ -1637,13 +1735,8 @@ class Model(ABC):
1637
1735
  yield ModelResponse(content=item.content)
1638
1736
  continue
1639
1737
 
1640
- if isinstance(item, CustomEvent):
1641
- function_call_output += str(item)
1642
-
1643
1738
  # Yield the event itself to bubble it up
1644
1739
  yield item
1645
-
1646
- # Yield custom events emitted by the tool
1647
1740
  else:
1648
1741
  function_call_output += str(item)
1649
1742
  if function_call.function.show_result:
@@ -136,6 +136,7 @@ class Cerebras(Model):
136
136
  self,
137
137
  tools: Optional[List[Dict[str, Any]]] = None,
138
138
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
139
+ **kwargs: Any,
139
140
  ) -> Dict[str, Any]:
140
141
  """
141
142
  Returns keyword arguments for API requests.
@@ -25,6 +25,7 @@ class CerebrasOpenAI(OpenAILike):
25
25
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
26
26
  tools: Optional[List[Dict[str, Any]]] = None,
27
27
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
28
+ **kwargs: Any,
28
29
  ) -> Dict[str, Any]:
29
30
  """
30
31
  Returns keyword arguments for API requests.
@@ -73,6 +73,7 @@ class DashScope(OpenAILike):
73
73
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
74
74
  tools: Optional[List[Dict[str, Any]]] = None,
75
75
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
76
+ **kwargs: Any,
76
77
  ) -> Dict[str, Any]:
77
78
  params = super().get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice)
78
79
 
@@ -26,6 +26,7 @@ try:
26
26
  from google.genai.types import (
27
27
  Content,
28
28
  DynamicRetrievalConfig,
29
+ FunctionCallingConfigMode,
29
30
  GenerateContentConfig,
30
31
  GenerateContentResponse,
31
32
  GenerateContentResponseUsageMetadata,
@@ -150,6 +151,7 @@ class Gemini(Model):
150
151
  system_message: Optional[str] = None,
151
152
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
152
153
  tools: Optional[List[Dict[str, Any]]] = None,
154
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
153
155
  ) -> Dict[str, Any]:
154
156
  """
155
157
  Returns the request keyword arguments for the GenerativeModel client.
@@ -245,6 +247,18 @@ class Gemini(Model):
245
247
  elif tools:
246
248
  config["tools"] = [format_function_definitions(tools)]
247
249
 
250
+ if tool_choice is not None:
251
+ if isinstance(tool_choice, str) and tool_choice.lower() == "auto":
252
+ config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.AUTO}}
253
+ elif isinstance(tool_choice, str) and tool_choice.lower() == "none":
254
+ config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.NONE}}
255
+ elif isinstance(tool_choice, str) and tool_choice.lower() == "validated":
256
+ config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.VALIDATED}}
257
+ elif isinstance(tool_choice, str) and tool_choice.lower() == "any":
258
+ config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.ANY}}
259
+ else:
260
+ config["tool_config"] = {"function_calling_config": {"mode": tool_choice}}
261
+
248
262
  config = {k: v for k, v in config.items() if v is not None}
249
263
 
250
264
  if config:
@@ -271,7 +285,9 @@ class Gemini(Model):
271
285
  Invokes the model with a list of messages and returns the response.
272
286
  """
273
287
  formatted_messages, system_message = self._format_messages(messages)
274
- request_kwargs = self.get_request_params(system_message, response_format=response_format, tools=tools)
288
+ request_kwargs = self.get_request_params(
289
+ system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
290
+ )
275
291
  try:
276
292
  if run_response and run_response.metrics:
277
293
  run_response.metrics.set_time_to_first_token()
@@ -315,7 +331,9 @@ class Gemini(Model):
315
331
  """
316
332
  formatted_messages, system_message = self._format_messages(messages)
317
333
 
318
- request_kwargs = self.get_request_params(system_message, response_format=response_format, tools=tools)
334
+ request_kwargs = self.get_request_params(
335
+ system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
336
+ )
319
337
  try:
320
338
  if run_response and run_response.metrics:
321
339
  run_response.metrics.set_time_to_first_token()
@@ -356,7 +374,9 @@ class Gemini(Model):
356
374
  """
357
375
  formatted_messages, system_message = self._format_messages(messages)
358
376
 
359
- request_kwargs = self.get_request_params(system_message, response_format=response_format, tools=tools)
377
+ request_kwargs = self.get_request_params(
378
+ system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
379
+ )
360
380
 
361
381
  try:
362
382
  if run_response and run_response.metrics:
@@ -400,7 +420,9 @@ class Gemini(Model):
400
420
  """
401
421
  formatted_messages, system_message = self._format_messages(messages)
402
422
 
403
- request_kwargs = self.get_request_params(system_message, response_format=response_format, tools=tools)
423
+ request_kwargs = self.get_request_params(
424
+ system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
425
+ )
404
426
 
405
427
  try:
406
428
  if run_response and run_response.metrics:
@@ -1051,9 +1073,9 @@ class Gemini(Model):
1051
1073
 
1052
1074
  metrics.input_tokens = response_usage.prompt_token_count or 0
1053
1075
  metrics.output_tokens = response_usage.candidates_token_count or 0
1054
- metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
1055
1076
  if response_usage.thoughts_token_count is not None:
1056
1077
  metrics.output_tokens += response_usage.thoughts_token_count or 0
1078
+ metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
1057
1079
 
1058
1080
  metrics.cache_read_tokens = response_usage.cached_content_token_count or 0
1059
1081
 
@@ -38,6 +38,10 @@ class LiteLLM(Model):
38
38
  max_tokens: Optional[int] = None
39
39
  temperature: float = 0.7
40
40
  top_p: float = 1.0
41
+ metadata: Optional[Dict[str, Any]] = None
42
+ extra_headers: Optional[Dict[str, Any]] = None
43
+ extra_query: Optional[Dict[str, Any]] = None
44
+ extra_body: Optional[Dict[str, Any]] = None
41
45
  request_params: Optional[Dict[str, Any]] = None
42
46
 
43
47
  client: Optional[Any] = None
@@ -148,10 +152,23 @@ class LiteLLM(Model):
148
152
  base_params["api_key"] = self.api_key
149
153
  if self.api_base:
150
154
  base_params["api_base"] = self.api_base
155
+ if self.extra_headers:
156
+ base_params["extra_headers"] = self.extra_headers
157
+ if self.extra_query:
158
+ base_params["extra_query"] = self.extra_query
151
159
  if tools:
152
160
  base_params["tools"] = tools
153
161
  base_params["tool_choice"] = "auto"
154
162
 
163
+ # Handle metadata via extra_body as per LiteLLM docs
164
+ if self.metadata:
165
+ if self.extra_body:
166
+ base_params["extra_body"] = {**self.extra_body, "metadata": self.metadata}
167
+ else:
168
+ base_params["extra_body"] = {"metadata": self.metadata}
169
+ elif self.extra_body:
170
+ base_params["extra_body"] = self.extra_body
171
+
155
172
  # Add additional request params if provided
156
173
  request_params: Dict[str, Any] = {k: v for k, v in base_params.items() if v is not None}
157
174
  if self.request_params:
@@ -160,6 +160,7 @@ class OpenAIChat(Model):
160
160
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
161
161
  tools: Optional[List[Dict[str, Any]]] = None,
162
162
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
163
+ run_response: Optional[RunOutput] = None,
163
164
  ) -> Dict[str, Any]:
164
165
  """
165
166
  Returns keyword arguments for API requests.
@@ -370,7 +371,9 @@ class OpenAIChat(Model):
370
371
  provider_response = self.get_client().chat.completions.create(
371
372
  model=self.id,
372
373
  messages=[self._format_message(m) for m in messages], # type: ignore
373
- **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
374
+ **self.get_request_params(
375
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
376
+ ),
374
377
  )
375
378
  assistant_message.metrics.stop_timer()
376
379
 
@@ -447,7 +450,9 @@ class OpenAIChat(Model):
447
450
  response = await self.get_async_client().chat.completions.create(
448
451
  model=self.id,
449
452
  messages=[self._format_message(m) for m in messages], # type: ignore
450
- **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
453
+ **self.get_request_params(
454
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
455
+ ),
451
456
  )
452
457
  assistant_message.metrics.stop_timer()
453
458
 
@@ -524,7 +529,9 @@ class OpenAIChat(Model):
524
529
  messages=[self._format_message(m) for m in messages], # type: ignore
525
530
  stream=True,
526
531
  stream_options={"include_usage": True},
527
- **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
532
+ **self.get_request_params(
533
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
534
+ ),
528
535
  ):
529
536
  yield self._parse_provider_response_delta(chunk)
530
537
 
@@ -598,7 +605,9 @@ class OpenAIChat(Model):
598
605
  messages=[self._format_message(m) for m in messages], # type: ignore
599
606
  stream=True,
600
607
  stream_options={"include_usage": True},
601
- **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
608
+ **self.get_request_params(
609
+ response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
610
+ ),
602
611
  )
603
612
 
604
613
  async for chunk in async_stream:
@@ -1,6 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from os import getenv
3
- from typing import Any, Dict, List, Optional, Type, Union
3
+ from typing import Any, Dict, Optional, Type, Union
4
4
 
5
5
  from pydantic import BaseModel
6
6
 
@@ -53,8 +53,7 @@ class Perplexity(OpenAILike):
53
53
  def get_request_params(
54
54
  self,
55
55
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
56
- tools: Optional[List[Dict[str, Any]]] = None,
57
- tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
56
+ **kwargs: Any,
58
57
  ) -> Dict[str, Any]:
59
58
  """
60
59
  Returns keyword arguments for API requests.
@@ -0,0 +1,5 @@
1
+ from agno.models.requesty.requesty import Requesty
2
+
3
+ __all__ = [
4
+ "Requesty",
5
+ ]
@@ -0,0 +1,49 @@
1
+ from dataclasses import dataclass, field
2
+ from os import getenv
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from agno.models.openai.like import OpenAILike
8
+ from agno.run.agent import RunOutput
9
+
10
+
11
+ @dataclass
12
+ class Requesty(OpenAILike):
13
+ """
14
+ A class for using models hosted on Requesty.
15
+
16
+ Attributes:
17
+ id (str): The model id. Defaults to "openai/gpt-4.1".
18
+ provider (str): The provider name. Defaults to "Requesty".
19
+ api_key (Optional[str]): The API key.
20
+ base_url (str): The base URL. Defaults to "https://router.requesty.ai/v1".
21
+ max_tokens (int): The maximum number of tokens. Defaults to 1024.
22
+ """
23
+
24
+ id: str = "openai/gpt-4.1"
25
+ name: str = "Requesty"
26
+ provider: str = "Requesty"
27
+
28
+ api_key: Optional[str] = field(default_factory=lambda: getenv("REQUESTY_API_KEY"))
29
+ base_url: str = "https://router.requesty.ai/v1"
30
+ max_tokens: int = 1024
31
+
32
+ def get_request_params(
33
+ self,
34
+ response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
35
+ tools: Optional[List[Dict[str, Any]]] = None,
36
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
37
+ run_response: Optional[RunOutput] = None,
38
+ ) -> Dict[str, Any]:
39
+ params = super().get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice)
40
+
41
+ if "extra_body" not in params:
42
+ params["extra_body"] = {}
43
+ params["extra_body"]["requesty"] = {}
44
+ if run_response and run_response.user_id:
45
+ params["extra_body"]["requesty"]["user_id"] = run_response.user_id
46
+ if run_response and run_response.session_id:
47
+ params["extra_body"]["requesty"]["trace_id"] = run_response.session_id
48
+
49
+ return params
agno/models/vllm/vllm.py CHANGED
@@ -57,6 +57,7 @@ class VLLM(OpenAILike):
57
57
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
58
58
  tools: Optional[List[Dict[str, Any]]] = None,
59
59
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
60
+ **kwargs: Any,
60
61
  ) -> Dict[str, Any]:
61
62
  request_kwargs = super().get_request_params(
62
63
  response_format=response_format, tools=tools, tool_choice=tool_choice
agno/models/xai/xai.py CHANGED
@@ -44,6 +44,7 @@ class xAI(OpenAILike):
44
44
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
45
45
  tools: Optional[List[Dict[str, Any]]] = None,
46
46
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
47
+ **kwargs: Any,
47
48
  ) -> Dict[str, Any]:
48
49
  """
49
50
  Returns keyword arguments for API requests, including search parameters.