agno 2.0.10__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +608 -175
- agno/db/in_memory/in_memory_db.py +42 -29
- agno/db/postgres/postgres.py +6 -4
- agno/exceptions.py +62 -1
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +51 -0
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/ollama.py +5 -0
- agno/knowledge/embedder/openai.py +18 -54
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +5 -4
- agno/knowledge/reader/pdf_reader.py +4 -3
- agno/knowledge/reader/website_reader.py +3 -2
- agno/models/base.py +125 -32
- agno/models/cerebras/cerebras.py +1 -0
- agno/models/cerebras/cerebras_openai.py +1 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/google/gemini.py +27 -5
- agno/models/litellm/chat.py +17 -0
- agno/models/openai/chat.py +13 -4
- agno/models/perplexity/perplexity.py +2 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +49 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +1 -0
- agno/os/app.py +167 -148
- agno/os/interfaces/whatsapp/router.py +2 -0
- agno/os/mcp.py +1 -1
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +181 -45
- agno/os/routers/home.py +2 -2
- agno/os/routers/memory/memory.py +23 -1
- agno/os/routers/memory/schemas.py +1 -1
- agno/os/routers/session/session.py +20 -3
- agno/os/utils.py +172 -8
- agno/run/agent.py +120 -77
- agno/run/team.py +115 -72
- agno/run/workflow.py +5 -15
- agno/session/summary.py +9 -10
- agno/session/team.py +2 -1
- agno/team/team.py +720 -168
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +42 -2
- agno/tools/knowledge.py +3 -3
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/spider.py +2 -2
- agno/tools/workflow.py +4 -5
- agno/utils/events.py +66 -1
- agno/utils/hooks.py +57 -0
- agno/utils/media.py +11 -9
- agno/utils/print_response/agent.py +43 -5
- agno/utils/print_response/team.py +48 -12
- agno/vectordb/cassandra/cassandra.py +44 -4
- agno/vectordb/chroma/chromadb.py +79 -8
- agno/vectordb/clickhouse/clickhousedb.py +43 -6
- agno/vectordb/couchbase/couchbase.py +76 -5
- agno/vectordb/lancedb/lance_db.py +38 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +76 -4
- agno/vectordb/mongodb/mongodb.py +76 -4
- agno/vectordb/pgvector/pgvector.py +50 -6
- agno/vectordb/pineconedb/pineconedb.py +39 -2
- agno/vectordb/qdrant/qdrant.py +76 -26
- agno/vectordb/singlestore/singlestore.py +77 -4
- agno/vectordb/upstashdb/upstashdb.py +42 -2
- agno/vectordb/weaviate/weaviate.py +39 -3
- agno/workflow/types.py +1 -0
- agno/workflow/workflow.py +58 -2
- {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/METADATA +4 -3
- {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/RECORD +85 -75
- {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/WHEEL +0 -0
- {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/top_level.txt +0 -0
agno/models/base.py
CHANGED
|
@@ -715,6 +715,7 @@ class Model(ABC):
|
|
|
715
715
|
assistant_message = Message(role=self.assistant_message_role)
|
|
716
716
|
# Create assistant message and stream data
|
|
717
717
|
stream_data = MessageData()
|
|
718
|
+
model_response = ModelResponse()
|
|
718
719
|
if stream_model_response:
|
|
719
720
|
# Generate response
|
|
720
721
|
yield from self.process_response_stream(
|
|
@@ -744,7 +745,6 @@ class Model(ABC):
|
|
|
744
745
|
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
745
746
|
|
|
746
747
|
else:
|
|
747
|
-
model_response = ModelResponse()
|
|
748
748
|
self._process_model_response(
|
|
749
749
|
messages=messages,
|
|
750
750
|
assistant_message=assistant_message,
|
|
@@ -784,6 +784,10 @@ class Model(ABC):
|
|
|
784
784
|
self.format_function_call_results(
|
|
785
785
|
messages=messages, function_call_results=function_call_results, **stream_data.extra
|
|
786
786
|
)
|
|
787
|
+
elif model_response and model_response.extra is not None:
|
|
788
|
+
self.format_function_call_results(
|
|
789
|
+
messages=messages, function_call_results=function_call_results, **model_response.extra
|
|
790
|
+
)
|
|
787
791
|
else:
|
|
788
792
|
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
789
793
|
|
|
@@ -879,9 +883,10 @@ class Model(ABC):
|
|
|
879
883
|
# Create assistant message and stream data
|
|
880
884
|
assistant_message = Message(role=self.assistant_message_role)
|
|
881
885
|
stream_data = MessageData()
|
|
886
|
+
model_response = ModelResponse()
|
|
882
887
|
if stream_model_response:
|
|
883
888
|
# Generate response
|
|
884
|
-
async for
|
|
889
|
+
async for model_response in self.aprocess_response_stream(
|
|
885
890
|
messages=messages,
|
|
886
891
|
assistant_message=assistant_message,
|
|
887
892
|
stream_data=stream_data,
|
|
@@ -890,7 +895,7 @@ class Model(ABC):
|
|
|
890
895
|
tool_choice=tool_choice or self._tool_choice,
|
|
891
896
|
run_response=run_response,
|
|
892
897
|
):
|
|
893
|
-
yield
|
|
898
|
+
yield model_response
|
|
894
899
|
|
|
895
900
|
# Populate assistant message from stream data
|
|
896
901
|
if stream_data.response_content:
|
|
@@ -907,7 +912,6 @@ class Model(ABC):
|
|
|
907
912
|
assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
|
|
908
913
|
|
|
909
914
|
else:
|
|
910
|
-
model_response = ModelResponse()
|
|
911
915
|
await self._aprocess_model_response(
|
|
912
916
|
messages=messages,
|
|
913
917
|
assistant_message=assistant_message,
|
|
@@ -948,6 +952,10 @@ class Model(ABC):
|
|
|
948
952
|
self.format_function_call_results(
|
|
949
953
|
messages=messages, function_call_results=function_call_results, **stream_data.extra
|
|
950
954
|
)
|
|
955
|
+
elif model_response and model_response.extra is not None:
|
|
956
|
+
self.format_function_call_results(
|
|
957
|
+
messages=messages, function_call_results=function_call_results, **model_response.extra or {}
|
|
958
|
+
)
|
|
951
959
|
else:
|
|
952
960
|
self.format_function_call_results(messages=messages, function_call_results=function_call_results)
|
|
953
961
|
|
|
@@ -1573,30 +1581,35 @@ class Model(ABC):
|
|
|
1573
1581
|
*(self.arun_function_call(fc) for fc in function_calls_to_run), return_exceptions=True
|
|
1574
1582
|
)
|
|
1575
1583
|
|
|
1576
|
-
#
|
|
1584
|
+
# Separate async generators from other results for concurrent processing
|
|
1585
|
+
async_generator_results: List[Any] = []
|
|
1586
|
+
non_async_generator_results: List[Any] = []
|
|
1587
|
+
|
|
1577
1588
|
for result in results:
|
|
1578
|
-
# If result is an exception, skip processing it
|
|
1579
1589
|
if isinstance(result, BaseException):
|
|
1580
|
-
|
|
1581
|
-
|
|
1590
|
+
non_async_generator_results.append(result)
|
|
1591
|
+
continue
|
|
1582
1592
|
|
|
1583
|
-
# Unpack result
|
|
1584
1593
|
function_call_success, function_call_timer, function_call, function_execution_result = result
|
|
1585
1594
|
|
|
1586
|
-
|
|
1595
|
+
# Check if this result contains an async generator
|
|
1596
|
+
if isinstance(function_call.result, (AsyncGeneratorType, AsyncIterator)):
|
|
1597
|
+
async_generator_results.append(result)
|
|
1598
|
+
else:
|
|
1599
|
+
non_async_generator_results.append(result)
|
|
1587
1600
|
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
_handle_agent_exception(a_exc, additional_input)
|
|
1593
|
-
# Set function call success to False if an exception occurred
|
|
1594
|
-
function_call_success = False
|
|
1601
|
+
# Process async generators with real-time event streaming using asyncio.Queue
|
|
1602
|
+
async_generator_outputs: Dict[int, Tuple[Any, str, Optional[BaseException]]] = {}
|
|
1603
|
+
event_queue: asyncio.Queue = asyncio.Queue()
|
|
1604
|
+
active_generators_count: int = len(async_generator_results)
|
|
1595
1605
|
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1606
|
+
# Create background tasks for each async generator
|
|
1607
|
+
async def process_async_generator(result, generator_id):
|
|
1608
|
+
function_call_success, function_call_timer, function_call, function_execution_result = result
|
|
1609
|
+
function_call_output = ""
|
|
1610
|
+
|
|
1611
|
+
try:
|
|
1612
|
+
async for item in function_call.result:
|
|
1600
1613
|
# This function yields agent/team run events
|
|
1601
1614
|
if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
|
|
1602
1615
|
item, tuple(get_args(TeamRunOutputEvent))
|
|
@@ -1610,17 +1623,102 @@ class Model(ABC):
|
|
|
1610
1623
|
function_call_output += item.content or ""
|
|
1611
1624
|
|
|
1612
1625
|
if function_call.function.show_result:
|
|
1613
|
-
|
|
1626
|
+
await event_queue.put(ModelResponse(content=item.content))
|
|
1614
1627
|
continue
|
|
1615
1628
|
|
|
1616
|
-
|
|
1617
|
-
|
|
1629
|
+
if isinstance(item, CustomEvent):
|
|
1630
|
+
function_call_output += str(item)
|
|
1631
|
+
|
|
1632
|
+
# Put the event into the queue to be yielded
|
|
1633
|
+
await event_queue.put(item)
|
|
1634
|
+
|
|
1635
|
+
# Yield custom events emitted by the tool
|
|
1618
1636
|
else:
|
|
1619
1637
|
function_call_output += str(item)
|
|
1620
1638
|
if function_call.function.show_result:
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1639
|
+
await event_queue.put(ModelResponse(content=str(item)))
|
|
1640
|
+
|
|
1641
|
+
# Store the final output for this generator
|
|
1642
|
+
async_generator_outputs[generator_id] = (result, function_call_output, None)
|
|
1643
|
+
|
|
1644
|
+
except Exception as e:
|
|
1645
|
+
# Store the exception
|
|
1646
|
+
async_generator_outputs[generator_id] = (result, "", e)
|
|
1647
|
+
|
|
1648
|
+
# Signal that this generator is done
|
|
1649
|
+
await event_queue.put(("GENERATOR_DONE", generator_id))
|
|
1650
|
+
|
|
1651
|
+
# Start all async generator tasks
|
|
1652
|
+
generator_tasks = []
|
|
1653
|
+
for i, result in enumerate(async_generator_results):
|
|
1654
|
+
task = asyncio.create_task(process_async_generator(result, i))
|
|
1655
|
+
generator_tasks.append(task)
|
|
1656
|
+
|
|
1657
|
+
# Stream events from the queue as they arrive
|
|
1658
|
+
completed_generators_count = 0
|
|
1659
|
+
while completed_generators_count < active_generators_count:
|
|
1660
|
+
try:
|
|
1661
|
+
event = await event_queue.get()
|
|
1662
|
+
|
|
1663
|
+
# Check if this is a completion signal
|
|
1664
|
+
if isinstance(event, tuple) and event[0] == "GENERATOR_DONE":
|
|
1665
|
+
completed_generators_count += 1
|
|
1666
|
+
continue
|
|
1667
|
+
|
|
1668
|
+
# Yield the actual event
|
|
1669
|
+
yield event
|
|
1670
|
+
|
|
1671
|
+
except Exception as e:
|
|
1672
|
+
log_error(f"Error processing async generator event: {e}")
|
|
1673
|
+
break
|
|
1674
|
+
|
|
1675
|
+
# Now process all results (non-async generators and completed async generators)
|
|
1676
|
+
for i, original_result in enumerate(results):
|
|
1677
|
+
# If result is an exception, skip processing it
|
|
1678
|
+
if isinstance(original_result, BaseException):
|
|
1679
|
+
log_error(f"Error during function call: {original_result}")
|
|
1680
|
+
raise original_result
|
|
1681
|
+
|
|
1682
|
+
# Unpack result
|
|
1683
|
+
function_call_success, function_call_timer, function_call, function_execution_result = original_result
|
|
1684
|
+
|
|
1685
|
+
# Check if this was an async generator that was already processed
|
|
1686
|
+
async_function_call_output = None
|
|
1687
|
+
if isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
|
|
1688
|
+
# Find the corresponding processed result
|
|
1689
|
+
async_gen_index = 0
|
|
1690
|
+
for j, result in enumerate(results[: i + 1]):
|
|
1691
|
+
if not isinstance(result, BaseException):
|
|
1692
|
+
_, _, fc, _ = result
|
|
1693
|
+
if isinstance(fc.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
|
|
1694
|
+
if j == i: # This is our async generator
|
|
1695
|
+
if async_gen_index in async_generator_outputs:
|
|
1696
|
+
_, async_function_call_output, error = async_generator_outputs[async_gen_index]
|
|
1697
|
+
if error:
|
|
1698
|
+
log_error(f"Error in async generator: {error}")
|
|
1699
|
+
raise error
|
|
1700
|
+
break
|
|
1701
|
+
async_gen_index += 1
|
|
1702
|
+
|
|
1703
|
+
updated_session_state = function_execution_result.updated_session_state
|
|
1704
|
+
|
|
1705
|
+
# Handle AgentRunException
|
|
1706
|
+
if isinstance(function_call_success, AgentRunException):
|
|
1707
|
+
a_exc = function_call_success
|
|
1708
|
+
# Update additional messages from function call
|
|
1709
|
+
_handle_agent_exception(a_exc, additional_input)
|
|
1710
|
+
# Set function call success to False if an exception occurred
|
|
1711
|
+
function_call_success = False
|
|
1712
|
+
|
|
1713
|
+
# Process function call output
|
|
1714
|
+
function_call_output: str = ""
|
|
1715
|
+
|
|
1716
|
+
# Check if this was an async generator that was already processed
|
|
1717
|
+
if async_function_call_output is not None:
|
|
1718
|
+
function_call_output = async_function_call_output
|
|
1719
|
+
# Events from async generators were already yielded in real-time above
|
|
1720
|
+
elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
|
|
1721
|
+
for item in function_call.result:
|
|
1624
1722
|
# This function yields agent/team run events
|
|
1625
1723
|
if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
|
|
1626
1724
|
item, tuple(get_args(TeamRunOutputEvent))
|
|
@@ -1637,13 +1735,8 @@ class Model(ABC):
|
|
|
1637
1735
|
yield ModelResponse(content=item.content)
|
|
1638
1736
|
continue
|
|
1639
1737
|
|
|
1640
|
-
if isinstance(item, CustomEvent):
|
|
1641
|
-
function_call_output += str(item)
|
|
1642
|
-
|
|
1643
1738
|
# Yield the event itself to bubble it up
|
|
1644
1739
|
yield item
|
|
1645
|
-
|
|
1646
|
-
# Yield custom events emitted by the tool
|
|
1647
1740
|
else:
|
|
1648
1741
|
function_call_output += str(item)
|
|
1649
1742
|
if function_call.function.show_result:
|
agno/models/cerebras/cerebras.py
CHANGED
|
@@ -25,6 +25,7 @@ class CerebrasOpenAI(OpenAILike):
|
|
|
25
25
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
26
26
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
27
27
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
28
|
+
**kwargs: Any,
|
|
28
29
|
) -> Dict[str, Any]:
|
|
29
30
|
"""
|
|
30
31
|
Returns keyword arguments for API requests.
|
|
@@ -73,6 +73,7 @@ class DashScope(OpenAILike):
|
|
|
73
73
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
74
74
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
75
75
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
76
|
+
**kwargs: Any,
|
|
76
77
|
) -> Dict[str, Any]:
|
|
77
78
|
params = super().get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice)
|
|
78
79
|
|
agno/models/google/gemini.py
CHANGED
|
@@ -26,6 +26,7 @@ try:
|
|
|
26
26
|
from google.genai.types import (
|
|
27
27
|
Content,
|
|
28
28
|
DynamicRetrievalConfig,
|
|
29
|
+
FunctionCallingConfigMode,
|
|
29
30
|
GenerateContentConfig,
|
|
30
31
|
GenerateContentResponse,
|
|
31
32
|
GenerateContentResponseUsageMetadata,
|
|
@@ -150,6 +151,7 @@ class Gemini(Model):
|
|
|
150
151
|
system_message: Optional[str] = None,
|
|
151
152
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
152
153
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
154
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
153
155
|
) -> Dict[str, Any]:
|
|
154
156
|
"""
|
|
155
157
|
Returns the request keyword arguments for the GenerativeModel client.
|
|
@@ -245,6 +247,18 @@ class Gemini(Model):
|
|
|
245
247
|
elif tools:
|
|
246
248
|
config["tools"] = [format_function_definitions(tools)]
|
|
247
249
|
|
|
250
|
+
if tool_choice is not None:
|
|
251
|
+
if isinstance(tool_choice, str) and tool_choice.lower() == "auto":
|
|
252
|
+
config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.AUTO}}
|
|
253
|
+
elif isinstance(tool_choice, str) and tool_choice.lower() == "none":
|
|
254
|
+
config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.NONE}}
|
|
255
|
+
elif isinstance(tool_choice, str) and tool_choice.lower() == "validated":
|
|
256
|
+
config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.VALIDATED}}
|
|
257
|
+
elif isinstance(tool_choice, str) and tool_choice.lower() == "any":
|
|
258
|
+
config["tool_config"] = {"function_calling_config": {"mode": FunctionCallingConfigMode.ANY}}
|
|
259
|
+
else:
|
|
260
|
+
config["tool_config"] = {"function_calling_config": {"mode": tool_choice}}
|
|
261
|
+
|
|
248
262
|
config = {k: v for k, v in config.items() if v is not None}
|
|
249
263
|
|
|
250
264
|
if config:
|
|
@@ -271,7 +285,9 @@ class Gemini(Model):
|
|
|
271
285
|
Invokes the model with a list of messages and returns the response.
|
|
272
286
|
"""
|
|
273
287
|
formatted_messages, system_message = self._format_messages(messages)
|
|
274
|
-
request_kwargs = self.get_request_params(
|
|
288
|
+
request_kwargs = self.get_request_params(
|
|
289
|
+
system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
290
|
+
)
|
|
275
291
|
try:
|
|
276
292
|
if run_response and run_response.metrics:
|
|
277
293
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -315,7 +331,9 @@ class Gemini(Model):
|
|
|
315
331
|
"""
|
|
316
332
|
formatted_messages, system_message = self._format_messages(messages)
|
|
317
333
|
|
|
318
|
-
request_kwargs = self.get_request_params(
|
|
334
|
+
request_kwargs = self.get_request_params(
|
|
335
|
+
system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
336
|
+
)
|
|
319
337
|
try:
|
|
320
338
|
if run_response and run_response.metrics:
|
|
321
339
|
run_response.metrics.set_time_to_first_token()
|
|
@@ -356,7 +374,9 @@ class Gemini(Model):
|
|
|
356
374
|
"""
|
|
357
375
|
formatted_messages, system_message = self._format_messages(messages)
|
|
358
376
|
|
|
359
|
-
request_kwargs = self.get_request_params(
|
|
377
|
+
request_kwargs = self.get_request_params(
|
|
378
|
+
system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
379
|
+
)
|
|
360
380
|
|
|
361
381
|
try:
|
|
362
382
|
if run_response and run_response.metrics:
|
|
@@ -400,7 +420,9 @@ class Gemini(Model):
|
|
|
400
420
|
"""
|
|
401
421
|
formatted_messages, system_message = self._format_messages(messages)
|
|
402
422
|
|
|
403
|
-
request_kwargs = self.get_request_params(
|
|
423
|
+
request_kwargs = self.get_request_params(
|
|
424
|
+
system_message, response_format=response_format, tools=tools, tool_choice=tool_choice
|
|
425
|
+
)
|
|
404
426
|
|
|
405
427
|
try:
|
|
406
428
|
if run_response and run_response.metrics:
|
|
@@ -1051,9 +1073,9 @@ class Gemini(Model):
|
|
|
1051
1073
|
|
|
1052
1074
|
metrics.input_tokens = response_usage.prompt_token_count or 0
|
|
1053
1075
|
metrics.output_tokens = response_usage.candidates_token_count or 0
|
|
1054
|
-
metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
|
|
1055
1076
|
if response_usage.thoughts_token_count is not None:
|
|
1056
1077
|
metrics.output_tokens += response_usage.thoughts_token_count or 0
|
|
1078
|
+
metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
|
|
1057
1079
|
|
|
1058
1080
|
metrics.cache_read_tokens = response_usage.cached_content_token_count or 0
|
|
1059
1081
|
|
agno/models/litellm/chat.py
CHANGED
|
@@ -38,6 +38,10 @@ class LiteLLM(Model):
|
|
|
38
38
|
max_tokens: Optional[int] = None
|
|
39
39
|
temperature: float = 0.7
|
|
40
40
|
top_p: float = 1.0
|
|
41
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
42
|
+
extra_headers: Optional[Dict[str, Any]] = None
|
|
43
|
+
extra_query: Optional[Dict[str, Any]] = None
|
|
44
|
+
extra_body: Optional[Dict[str, Any]] = None
|
|
41
45
|
request_params: Optional[Dict[str, Any]] = None
|
|
42
46
|
|
|
43
47
|
client: Optional[Any] = None
|
|
@@ -148,10 +152,23 @@ class LiteLLM(Model):
|
|
|
148
152
|
base_params["api_key"] = self.api_key
|
|
149
153
|
if self.api_base:
|
|
150
154
|
base_params["api_base"] = self.api_base
|
|
155
|
+
if self.extra_headers:
|
|
156
|
+
base_params["extra_headers"] = self.extra_headers
|
|
157
|
+
if self.extra_query:
|
|
158
|
+
base_params["extra_query"] = self.extra_query
|
|
151
159
|
if tools:
|
|
152
160
|
base_params["tools"] = tools
|
|
153
161
|
base_params["tool_choice"] = "auto"
|
|
154
162
|
|
|
163
|
+
# Handle metadata via extra_body as per LiteLLM docs
|
|
164
|
+
if self.metadata:
|
|
165
|
+
if self.extra_body:
|
|
166
|
+
base_params["extra_body"] = {**self.extra_body, "metadata": self.metadata}
|
|
167
|
+
else:
|
|
168
|
+
base_params["extra_body"] = {"metadata": self.metadata}
|
|
169
|
+
elif self.extra_body:
|
|
170
|
+
base_params["extra_body"] = self.extra_body
|
|
171
|
+
|
|
155
172
|
# Add additional request params if provided
|
|
156
173
|
request_params: Dict[str, Any] = {k: v for k, v in base_params.items() if v is not None}
|
|
157
174
|
if self.request_params:
|
agno/models/openai/chat.py
CHANGED
|
@@ -160,6 +160,7 @@ class OpenAIChat(Model):
|
|
|
160
160
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
161
161
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
162
162
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
163
|
+
run_response: Optional[RunOutput] = None,
|
|
163
164
|
) -> Dict[str, Any]:
|
|
164
165
|
"""
|
|
165
166
|
Returns keyword arguments for API requests.
|
|
@@ -370,7 +371,9 @@ class OpenAIChat(Model):
|
|
|
370
371
|
provider_response = self.get_client().chat.completions.create(
|
|
371
372
|
model=self.id,
|
|
372
373
|
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
373
|
-
**self.get_request_params(
|
|
374
|
+
**self.get_request_params(
|
|
375
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
376
|
+
),
|
|
374
377
|
)
|
|
375
378
|
assistant_message.metrics.stop_timer()
|
|
376
379
|
|
|
@@ -447,7 +450,9 @@ class OpenAIChat(Model):
|
|
|
447
450
|
response = await self.get_async_client().chat.completions.create(
|
|
448
451
|
model=self.id,
|
|
449
452
|
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
450
|
-
**self.get_request_params(
|
|
453
|
+
**self.get_request_params(
|
|
454
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
455
|
+
),
|
|
451
456
|
)
|
|
452
457
|
assistant_message.metrics.stop_timer()
|
|
453
458
|
|
|
@@ -524,7 +529,9 @@ class OpenAIChat(Model):
|
|
|
524
529
|
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
525
530
|
stream=True,
|
|
526
531
|
stream_options={"include_usage": True},
|
|
527
|
-
**self.get_request_params(
|
|
532
|
+
**self.get_request_params(
|
|
533
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
534
|
+
),
|
|
528
535
|
):
|
|
529
536
|
yield self._parse_provider_response_delta(chunk)
|
|
530
537
|
|
|
@@ -598,7 +605,9 @@ class OpenAIChat(Model):
|
|
|
598
605
|
messages=[self._format_message(m) for m in messages], # type: ignore
|
|
599
606
|
stream=True,
|
|
600
607
|
stream_options={"include_usage": True},
|
|
601
|
-
**self.get_request_params(
|
|
608
|
+
**self.get_request_params(
|
|
609
|
+
response_format=response_format, tools=tools, tool_choice=tool_choice, run_response=run_response
|
|
610
|
+
),
|
|
602
611
|
)
|
|
603
612
|
|
|
604
613
|
async for chunk in async_stream:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import Any, Dict,
|
|
3
|
+
from typing import Any, Dict, Optional, Type, Union
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
@@ -53,8 +53,7 @@ class Perplexity(OpenAILike):
|
|
|
53
53
|
def get_request_params(
|
|
54
54
|
self,
|
|
55
55
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
56
|
-
|
|
57
|
-
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
56
|
+
**kwargs: Any,
|
|
58
57
|
) -> Dict[str, Any]:
|
|
59
58
|
"""
|
|
60
59
|
Returns keyword arguments for API requests.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from os import getenv
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from agno.models.openai.like import OpenAILike
|
|
8
|
+
from agno.run.agent import RunOutput
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Requesty(OpenAILike):
|
|
13
|
+
"""
|
|
14
|
+
A class for using models hosted on Requesty.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
id (str): The model id. Defaults to "openai/gpt-4.1".
|
|
18
|
+
provider (str): The provider name. Defaults to "Requesty".
|
|
19
|
+
api_key (Optional[str]): The API key.
|
|
20
|
+
base_url (str): The base URL. Defaults to "https://router.requesty.ai/v1".
|
|
21
|
+
max_tokens (int): The maximum number of tokens. Defaults to 1024.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str = "openai/gpt-4.1"
|
|
25
|
+
name: str = "Requesty"
|
|
26
|
+
provider: str = "Requesty"
|
|
27
|
+
|
|
28
|
+
api_key: Optional[str] = field(default_factory=lambda: getenv("REQUESTY_API_KEY"))
|
|
29
|
+
base_url: str = "https://router.requesty.ai/v1"
|
|
30
|
+
max_tokens: int = 1024
|
|
31
|
+
|
|
32
|
+
def get_request_params(
|
|
33
|
+
self,
|
|
34
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
35
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
36
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
37
|
+
run_response: Optional[RunOutput] = None,
|
|
38
|
+
) -> Dict[str, Any]:
|
|
39
|
+
params = super().get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice)
|
|
40
|
+
|
|
41
|
+
if "extra_body" not in params:
|
|
42
|
+
params["extra_body"] = {}
|
|
43
|
+
params["extra_body"]["requesty"] = {}
|
|
44
|
+
if run_response and run_response.user_id:
|
|
45
|
+
params["extra_body"]["requesty"]["user_id"] = run_response.user_id
|
|
46
|
+
if run_response and run_response.session_id:
|
|
47
|
+
params["extra_body"]["requesty"]["trace_id"] = run_response.session_id
|
|
48
|
+
|
|
49
|
+
return params
|
agno/models/vllm/vllm.py
CHANGED
|
@@ -57,6 +57,7 @@ class VLLM(OpenAILike):
|
|
|
57
57
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
58
58
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
59
59
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
60
|
+
**kwargs: Any,
|
|
60
61
|
) -> Dict[str, Any]:
|
|
61
62
|
request_kwargs = super().get_request_params(
|
|
62
63
|
response_format=response_format, tools=tools, tool_choice=tool_choice
|
agno/models/xai/xai.py
CHANGED
|
@@ -44,6 +44,7 @@ class xAI(OpenAILike):
|
|
|
44
44
|
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
45
45
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
46
46
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
47
|
+
**kwargs: Any,
|
|
47
48
|
) -> Dict[str, Any]:
|
|
48
49
|
"""
|
|
49
50
|
Returns keyword arguments for API requests, including search parameters.
|