letta-nightly 0.6.37.dev20250310103931__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +34 -12
- letta/client/client.py +1 -50
- letta/constants.py +1 -1
- letta/functions/function_sets/multi_agent.py +9 -8
- letta/functions/helpers.py +33 -6
- letta/llm_api/anthropic.py +20 -0
- letta/llm_api/google_ai_client.py +332 -0
- letta/llm_api/google_vertex_client.py +214 -0
- letta/llm_api/llm_client.py +48 -0
- letta/llm_api/llm_client_base.py +129 -0
- letta/orm/step.py +1 -0
- letta/schemas/block.py +4 -48
- letta/schemas/letta_message.py +26 -0
- letta/schemas/message.py +1 -1
- letta/schemas/step.py +1 -0
- letta/serialize_schemas/agent.py +8 -1
- letta/server/rest_api/interface.py +9 -7
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
- letta/server/rest_api/routers/v1/agents.py +12 -8
- letta/server/rest_api/routers/v1/steps.py +2 -0
- letta/server/rest_api/routers/v1/voice.py +3 -6
- letta/services/agent_manager.py +56 -3
- letta/services/helpers/agent_manager_helper.py +12 -1
- letta/services/identity_manager.py +7 -1
- letta/services/message_manager.py +40 -0
- letta/services/step_manager.py +8 -1
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/METADATA +18 -17
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/RECORD +32 -28
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
|
@@ -29,6 +29,7 @@ from letta.helpers.json_helpers import json_dumps, json_loads
|
|
|
29
29
|
from letta.interface import AgentInterface
|
|
30
30
|
from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
|
|
31
31
|
from letta.llm_api.llm_api_tools import create
|
|
32
|
+
from letta.llm_api.llm_client import LLMClient
|
|
32
33
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
33
34
|
from letta.log import get_logger
|
|
34
35
|
from letta.memory import summarize_messages
|
|
@@ -356,19 +357,38 @@ class Agent(BaseAgent):
|
|
|
356
357
|
for attempt in range(1, empty_response_retry_limit + 1):
|
|
357
358
|
try:
|
|
358
359
|
log_telemetry(self.logger, "_get_ai_reply create start")
|
|
359
|
-
|
|
360
|
+
# New LLM client flow
|
|
361
|
+
llm_client = LLMClient.create(
|
|
362
|
+
agent_id=self.agent_state.id,
|
|
360
363
|
llm_config=self.agent_state.llm_config,
|
|
361
|
-
messages=message_sequence,
|
|
362
|
-
user_id=self.agent_state.created_by_id,
|
|
363
|
-
functions=allowed_functions,
|
|
364
|
-
# functions_python=self.functions_python, do we need this?
|
|
365
|
-
function_call=function_call,
|
|
366
|
-
first_message=first_message,
|
|
367
|
-
force_tool_call=force_tool_call,
|
|
368
|
-
stream=stream,
|
|
369
|
-
stream_interface=self.interface,
|
|
370
364
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
365
|
+
actor_id=self.agent_state.created_by_id,
|
|
371
366
|
)
|
|
367
|
+
|
|
368
|
+
if llm_client and not stream:
|
|
369
|
+
response = llm_client.send_llm_request(
|
|
370
|
+
messages=message_sequence,
|
|
371
|
+
tools=allowed_functions,
|
|
372
|
+
tool_call=function_call,
|
|
373
|
+
stream=stream,
|
|
374
|
+
first_message=first_message,
|
|
375
|
+
force_tool_call=force_tool_call,
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
# Fallback to existing flow
|
|
379
|
+
response = create(
|
|
380
|
+
llm_config=self.agent_state.llm_config,
|
|
381
|
+
messages=message_sequence,
|
|
382
|
+
user_id=self.agent_state.created_by_id,
|
|
383
|
+
functions=allowed_functions,
|
|
384
|
+
# functions_python=self.functions_python, do we need this?
|
|
385
|
+
function_call=function_call,
|
|
386
|
+
first_message=first_message,
|
|
387
|
+
force_tool_call=force_tool_call,
|
|
388
|
+
stream=stream,
|
|
389
|
+
stream_interface=self.interface,
|
|
390
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
391
|
+
)
|
|
372
392
|
log_telemetry(self.logger, "_get_ai_reply create finish")
|
|
373
393
|
|
|
374
394
|
# These bottom two are retryable
|
|
@@ -632,7 +652,7 @@ class Agent(BaseAgent):
|
|
|
632
652
|
function_args,
|
|
633
653
|
function_response,
|
|
634
654
|
messages,
|
|
635
|
-
[tool_return]
|
|
655
|
+
[tool_return],
|
|
636
656
|
include_function_failed_message=True,
|
|
637
657
|
)
|
|
638
658
|
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
@@ -659,7 +679,7 @@ class Agent(BaseAgent):
|
|
|
659
679
|
"content": function_response,
|
|
660
680
|
"tool_call_id": tool_call_id,
|
|
661
681
|
},
|
|
662
|
-
tool_returns=[tool_return] if
|
|
682
|
+
tool_returns=[tool_return] if sandbox_run_result else None,
|
|
663
683
|
)
|
|
664
684
|
) # extend conversation with function response
|
|
665
685
|
self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1])
|
|
@@ -909,6 +929,7 @@ class Agent(BaseAgent):
|
|
|
909
929
|
# Log step - this must happen before messages are persisted
|
|
910
930
|
step = self.step_manager.log_step(
|
|
911
931
|
actor=self.user,
|
|
932
|
+
agent_id=self.agent_state.id,
|
|
912
933
|
provider_name=self.agent_state.llm_config.model_endpoint_type,
|
|
913
934
|
model=self.agent_state.llm_config.model,
|
|
914
935
|
model_endpoint=self.agent_state.llm_config.model_endpoint,
|
|
@@ -1174,6 +1195,7 @@ class Agent(BaseAgent):
|
|
|
1174
1195
|
memory_edit_timestamp=get_utc_time(),
|
|
1175
1196
|
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
|
|
1176
1197
|
archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
|
|
1198
|
+
recent_passages=self.agent_manager.list_passages(actor=self.user, agent_id=self.agent_state.id, ascending=False, limit=10),
|
|
1177
1199
|
)
|
|
1178
1200
|
num_tokens_external_memory_summary = count_tokens(external_memory_summary)
|
|
1179
1201
|
|
letta/client/client.py
CHANGED
|
@@ -4,7 +4,6 @@ import time
|
|
|
4
4
|
from typing import Callable, Dict, Generator, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
|
-
from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall as OpenAIToolCall
|
|
8
7
|
|
|
9
8
|
import letta.utils
|
|
10
9
|
from letta.constants import ADMIN_PREFIX, BASE_MEMORY_TOOLS, BASE_TOOLS, DEFAULT_HUMAN, DEFAULT_PERSONA, FUNCTION_RETURN_CHAR_LIMIT
|
|
@@ -29,7 +28,7 @@ from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
|
|
29
28
|
from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse
|
|
30
29
|
from letta.schemas.llm_config import LLMConfig
|
|
31
30
|
from letta.schemas.memory import ArchivalMemorySummary, ChatMemory, CreateArchivalMemory, Memory, RecallMemorySummary
|
|
32
|
-
from letta.schemas.message import Message, MessageCreate
|
|
31
|
+
from letta.schemas.message import Message, MessageCreate
|
|
33
32
|
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
|
34
33
|
from letta.schemas.organization import Organization
|
|
35
34
|
from letta.schemas.passage import Passage
|
|
@@ -640,30 +639,6 @@ class RESTClient(AbstractClient):
|
|
|
640
639
|
# refresh and return agent
|
|
641
640
|
return self.get_agent(agent_state.id)
|
|
642
641
|
|
|
643
|
-
def update_message(
|
|
644
|
-
self,
|
|
645
|
-
agent_id: str,
|
|
646
|
-
message_id: str,
|
|
647
|
-
role: Optional[MessageRole] = None,
|
|
648
|
-
text: Optional[str] = None,
|
|
649
|
-
name: Optional[str] = None,
|
|
650
|
-
tool_calls: Optional[List[OpenAIToolCall]] = None,
|
|
651
|
-
tool_call_id: Optional[str] = None,
|
|
652
|
-
) -> Message:
|
|
653
|
-
request = MessageUpdate(
|
|
654
|
-
role=role,
|
|
655
|
-
content=text,
|
|
656
|
-
name=name,
|
|
657
|
-
tool_calls=tool_calls,
|
|
658
|
-
tool_call_id=tool_call_id,
|
|
659
|
-
)
|
|
660
|
-
response = requests.patch(
|
|
661
|
-
f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/messages/{message_id}", json=request.model_dump(), headers=self.headers
|
|
662
|
-
)
|
|
663
|
-
if response.status_code != 200:
|
|
664
|
-
raise ValueError(f"Failed to update message: {response.text}")
|
|
665
|
-
return Message(**response.json())
|
|
666
|
-
|
|
667
642
|
def update_agent(
|
|
668
643
|
self,
|
|
669
644
|
agent_id: str,
|
|
@@ -2436,30 +2411,6 @@ class LocalClient(AbstractClient):
|
|
|
2436
2411
|
# TODO: get full agent state
|
|
2437
2412
|
return self.server.agent_manager.get_agent_by_id(agent_state.id, actor=self.user)
|
|
2438
2413
|
|
|
2439
|
-
def update_message(
|
|
2440
|
-
self,
|
|
2441
|
-
agent_id: str,
|
|
2442
|
-
message_id: str,
|
|
2443
|
-
role: Optional[MessageRole] = None,
|
|
2444
|
-
text: Optional[str] = None,
|
|
2445
|
-
name: Optional[str] = None,
|
|
2446
|
-
tool_calls: Optional[List[OpenAIToolCall]] = None,
|
|
2447
|
-
tool_call_id: Optional[str] = None,
|
|
2448
|
-
) -> Message:
|
|
2449
|
-
message = self.server.update_agent_message(
|
|
2450
|
-
agent_id=agent_id,
|
|
2451
|
-
message_id=message_id,
|
|
2452
|
-
request=MessageUpdate(
|
|
2453
|
-
role=role,
|
|
2454
|
-
content=text,
|
|
2455
|
-
name=name,
|
|
2456
|
-
tool_calls=tool_calls,
|
|
2457
|
-
tool_call_id=tool_call_id,
|
|
2458
|
-
),
|
|
2459
|
-
actor=self.user,
|
|
2460
|
-
)
|
|
2461
|
-
return message
|
|
2462
|
-
|
|
2463
2414
|
def update_agent(
|
|
2464
2415
|
self,
|
|
2465
2416
|
agent_id: str,
|
letta/constants.py
CHANGED
|
@@ -50,7 +50,7 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
|
|
|
50
50
|
# Base memory tools CAN be edited, and are added by default by the server
|
|
51
51
|
BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
|
|
52
52
|
# Multi agent tools
|
|
53
|
-
MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "
|
|
53
|
+
MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
|
|
54
54
|
# Set of all built-in Letta tools
|
|
55
55
|
LETTA_TOOL_SET = set(BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS)
|
|
56
56
|
|
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
from typing import TYPE_CHECKING, List
|
|
3
3
|
|
|
4
4
|
from letta.functions.helpers import (
|
|
5
|
-
|
|
5
|
+
_send_message_to_agents_matching_tags_async,
|
|
6
6
|
execute_send_message_to_agent,
|
|
7
7
|
fire_and_forget_send_to_agent,
|
|
8
8
|
)
|
|
@@ -70,18 +70,19 @@ def send_message_to_agent_async(self: "Agent", message: str, other_agent_id: str
|
|
|
70
70
|
return "Successfully sent message"
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all: List[str], match_some: List[str]) -> List[str]:
|
|
74
74
|
"""
|
|
75
|
-
Sends a message to all agents within the same organization that match
|
|
75
|
+
Sends a message to all agents within the same organization that match the specified tag criteria. Agents must possess *all* of the tags in `match_all` and *at least one* of the tags in `match_some` to receive the message.
|
|
76
76
|
|
|
77
77
|
Args:
|
|
78
78
|
message (str): The content of the message to be sent to each matching agent.
|
|
79
|
-
|
|
79
|
+
match_all (List[str]): A list of tags that an agent must possess to receive the message.
|
|
80
|
+
match_some (List[str]): A list of tags where an agent must have at least one to qualify.
|
|
80
81
|
|
|
81
82
|
Returns:
|
|
82
|
-
List[str]: A list of responses from the agents that matched
|
|
83
|
-
response corresponds to a single agent. Agents that do not respond will not
|
|
84
|
-
|
|
83
|
+
List[str]: A list of responses from the agents that matched the filtering criteria. Each
|
|
84
|
+
response corresponds to a single agent. Agents that do not respond will not have an entry
|
|
85
|
+
in the returned list.
|
|
85
86
|
"""
|
|
86
87
|
|
|
87
|
-
return asyncio.run(
|
|
88
|
+
return asyncio.run(_send_message_to_agents_matching_tags_async(self, message, match_all, match_some))
|
letta/functions/helpers.py
CHANGED
|
@@ -518,8 +518,16 @@ def fire_and_forget_send_to_agent(
|
|
|
518
518
|
run_in_background_thread(background_task())
|
|
519
519
|
|
|
520
520
|
|
|
521
|
-
async def
|
|
522
|
-
|
|
521
|
+
async def _send_message_to_agents_matching_tags_async(
|
|
522
|
+
sender_agent: "Agent", message: str, match_all: List[str], match_some: List[str]
|
|
523
|
+
) -> List[str]:
|
|
524
|
+
log_telemetry(
|
|
525
|
+
sender_agent.logger,
|
|
526
|
+
"_send_message_to_agents_matching_tags_async start",
|
|
527
|
+
message=message,
|
|
528
|
+
match_all=match_all,
|
|
529
|
+
match_some=match_some,
|
|
530
|
+
)
|
|
523
531
|
server = get_letta_server()
|
|
524
532
|
|
|
525
533
|
augmented_message = (
|
|
@@ -529,9 +537,22 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
|
|
|
529
537
|
)
|
|
530
538
|
|
|
531
539
|
# Retrieve up to 100 matching agents
|
|
532
|
-
log_telemetry(
|
|
533
|
-
|
|
534
|
-
|
|
540
|
+
log_telemetry(
|
|
541
|
+
sender_agent.logger,
|
|
542
|
+
"_send_message_to_agents_matching_tags_async listing agents start",
|
|
543
|
+
message=message,
|
|
544
|
+
match_all=match_all,
|
|
545
|
+
match_some=match_some,
|
|
546
|
+
)
|
|
547
|
+
matching_agents = server.agent_manager.list_agents_matching_tags(actor=sender_agent.user, match_all=match_all, match_some=match_some)
|
|
548
|
+
|
|
549
|
+
log_telemetry(
|
|
550
|
+
sender_agent.logger,
|
|
551
|
+
"_send_message_to_agents_matching_tags_async listing agents finish",
|
|
552
|
+
message=message,
|
|
553
|
+
match_all=match_all,
|
|
554
|
+
match_some=match_some,
|
|
555
|
+
)
|
|
535
556
|
|
|
536
557
|
# Create a system message
|
|
537
558
|
messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
|
|
@@ -559,7 +580,13 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
|
|
|
559
580
|
else:
|
|
560
581
|
final.append(r)
|
|
561
582
|
|
|
562
|
-
log_telemetry(
|
|
583
|
+
log_telemetry(
|
|
584
|
+
sender_agent.logger,
|
|
585
|
+
"_send_message_to_agents_matching_tags_async finish",
|
|
586
|
+
message=message,
|
|
587
|
+
match_all=match_all,
|
|
588
|
+
match_some=match_some,
|
|
589
|
+
)
|
|
563
590
|
return final
|
|
564
591
|
|
|
565
592
|
|
letta/llm_api/anthropic.py
CHANGED
|
@@ -53,6 +53,11 @@ MODEL_LIST = [
|
|
|
53
53
|
"name": "claude-3-opus-20240229",
|
|
54
54
|
"context_window": 200000,
|
|
55
55
|
},
|
|
56
|
+
# latest
|
|
57
|
+
{
|
|
58
|
+
"name": "claude-3-opus-latest",
|
|
59
|
+
"context_window": 200000,
|
|
60
|
+
},
|
|
56
61
|
## Sonnet
|
|
57
62
|
# 3.0
|
|
58
63
|
{
|
|
@@ -69,11 +74,21 @@ MODEL_LIST = [
|
|
|
69
74
|
"name": "claude-3-5-sonnet-20241022",
|
|
70
75
|
"context_window": 200000,
|
|
71
76
|
},
|
|
77
|
+
# 3.5 latest
|
|
78
|
+
{
|
|
79
|
+
"name": "claude-3-5-sonnet-latest",
|
|
80
|
+
"context_window": 200000,
|
|
81
|
+
},
|
|
72
82
|
# 3.7
|
|
73
83
|
{
|
|
74
84
|
"name": "claude-3-7-sonnet-20250219",
|
|
75
85
|
"context_window": 200000,
|
|
76
86
|
},
|
|
87
|
+
# 3.7 latest
|
|
88
|
+
{
|
|
89
|
+
"name": "claude-3-7-sonnet-latest",
|
|
90
|
+
"context_window": 200000,
|
|
91
|
+
},
|
|
77
92
|
## Haiku
|
|
78
93
|
# 3.0
|
|
79
94
|
{
|
|
@@ -85,6 +100,11 @@ MODEL_LIST = [
|
|
|
85
100
|
"name": "claude-3-5-haiku-20241022",
|
|
86
101
|
"context_window": 200000,
|
|
87
102
|
},
|
|
103
|
+
# 3.5 latest
|
|
104
|
+
{
|
|
105
|
+
"name": "claude-3-5-haiku-latest",
|
|
106
|
+
"context_window": 200000,
|
|
107
|
+
},
|
|
88
108
|
]
|
|
89
109
|
|
|
90
110
|
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from letta.constants import NON_USER_MSG_PREFIX
|
|
5
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
|
6
|
+
from letta.helpers.json_helpers import json_dumps
|
|
7
|
+
from letta.llm_api.helpers import make_post_request
|
|
8
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
|
9
|
+
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
10
|
+
from letta.local_llm.utils import count_tokens
|
|
11
|
+
from letta.schemas.message import Message as PydanticMessage
|
|
12
|
+
from letta.schemas.openai.chat_completion_request import Tool
|
|
13
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
|
14
|
+
from letta.settings import model_settings
|
|
15
|
+
from letta.utils import get_tool_call_id
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GoogleAIClient(LLMClientBase):
|
|
19
|
+
|
|
20
|
+
def request(self, request_data: dict) -> dict:
|
|
21
|
+
"""
|
|
22
|
+
Performs underlying request to llm and returns raw response.
|
|
23
|
+
"""
|
|
24
|
+
url, headers = self.get_gemini_endpoint_and_headers(generate_content=True)
|
|
25
|
+
return make_post_request(url, headers, request_data)
|
|
26
|
+
|
|
27
|
+
def build_request_data(
|
|
28
|
+
self,
|
|
29
|
+
messages: List[PydanticMessage],
|
|
30
|
+
tools: List[dict],
|
|
31
|
+
tool_call: Optional[str],
|
|
32
|
+
) -> dict:
|
|
33
|
+
"""
|
|
34
|
+
Constructs a request object in the expected data format for this client.
|
|
35
|
+
"""
|
|
36
|
+
if tools:
|
|
37
|
+
tools = [{"type": "function", "function": f} for f in tools]
|
|
38
|
+
tools = self.convert_tools_to_google_ai_format(
|
|
39
|
+
[Tool(**t) for t in tools],
|
|
40
|
+
)
|
|
41
|
+
contents = self.add_dummy_model_messages(
|
|
42
|
+
[m.to_google_ai_dict() for m in messages],
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
"contents": contents,
|
|
47
|
+
"tools": tools,
|
|
48
|
+
"generation_config": {
|
|
49
|
+
"temperature": self.llm_config.temperature,
|
|
50
|
+
"max_output_tokens": self.llm_config.max_tokens,
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def convert_response_to_chat_completion(
|
|
55
|
+
self,
|
|
56
|
+
response_data: dict,
|
|
57
|
+
input_messages: List[PydanticMessage],
|
|
58
|
+
) -> ChatCompletionResponse:
|
|
59
|
+
"""
|
|
60
|
+
Converts custom response format from llm client into an OpenAI
|
|
61
|
+
ChatCompletionsResponse object.
|
|
62
|
+
|
|
63
|
+
Example Input:
|
|
64
|
+
{
|
|
65
|
+
"candidates": [
|
|
66
|
+
{
|
|
67
|
+
"content": {
|
|
68
|
+
"parts": [
|
|
69
|
+
{
|
|
70
|
+
"text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
"usageMetadata": {
|
|
77
|
+
"promptTokenCount": 9,
|
|
78
|
+
"candidatesTokenCount": 27,
|
|
79
|
+
"totalTokenCount": 36
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
choices = []
|
|
85
|
+
index = 0
|
|
86
|
+
for candidate in response_data["candidates"]:
|
|
87
|
+
content = candidate["content"]
|
|
88
|
+
|
|
89
|
+
role = content["role"]
|
|
90
|
+
assert role == "model", f"Unknown role in response: {role}"
|
|
91
|
+
|
|
92
|
+
parts = content["parts"]
|
|
93
|
+
# TODO support parts / multimodal
|
|
94
|
+
# TODO support parallel tool calling natively
|
|
95
|
+
# TODO Alternative here is to throw away everything else except for the first part
|
|
96
|
+
for response_message in parts:
|
|
97
|
+
# Convert the actual message style to OpenAI style
|
|
98
|
+
if "functionCall" in response_message and response_message["functionCall"] is not None:
|
|
99
|
+
function_call = response_message["functionCall"]
|
|
100
|
+
assert isinstance(function_call, dict), function_call
|
|
101
|
+
function_name = function_call["name"]
|
|
102
|
+
assert isinstance(function_name, str), function_name
|
|
103
|
+
function_args = function_call["args"]
|
|
104
|
+
assert isinstance(function_args, dict), function_args
|
|
105
|
+
|
|
106
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
|
107
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
108
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
109
|
+
|
|
110
|
+
assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
|
|
111
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
|
|
112
|
+
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
|
113
|
+
else:
|
|
114
|
+
inner_thoughts = None
|
|
115
|
+
|
|
116
|
+
# Google AI API doesn't generate tool call IDs
|
|
117
|
+
openai_response_message = Message(
|
|
118
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
119
|
+
content=inner_thoughts,
|
|
120
|
+
tool_calls=[
|
|
121
|
+
ToolCall(
|
|
122
|
+
id=get_tool_call_id(),
|
|
123
|
+
type="function",
|
|
124
|
+
function=FunctionCall(
|
|
125
|
+
name=function_name,
|
|
126
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
else:
|
|
133
|
+
|
|
134
|
+
# Inner thoughts are the content by default
|
|
135
|
+
inner_thoughts = response_message["text"]
|
|
136
|
+
|
|
137
|
+
# Google AI API doesn't generate tool call IDs
|
|
138
|
+
openai_response_message = Message(
|
|
139
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
140
|
+
content=inner_thoughts,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Google AI API uses different finish reason strings than OpenAI
|
|
144
|
+
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
|
145
|
+
# see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
|
146
|
+
# Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
|
|
147
|
+
# see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
|
|
148
|
+
finish_reason = candidate["finishReason"]
|
|
149
|
+
if finish_reason == "STOP":
|
|
150
|
+
openai_finish_reason = (
|
|
151
|
+
"function_call"
|
|
152
|
+
if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
|
|
153
|
+
else "stop"
|
|
154
|
+
)
|
|
155
|
+
elif finish_reason == "MAX_TOKENS":
|
|
156
|
+
openai_finish_reason = "length"
|
|
157
|
+
elif finish_reason == "SAFETY":
|
|
158
|
+
openai_finish_reason = "content_filter"
|
|
159
|
+
elif finish_reason == "RECITATION":
|
|
160
|
+
openai_finish_reason = "content_filter"
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
|
163
|
+
|
|
164
|
+
choices.append(
|
|
165
|
+
Choice(
|
|
166
|
+
finish_reason=openai_finish_reason,
|
|
167
|
+
index=index,
|
|
168
|
+
message=openai_response_message,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
index += 1
|
|
172
|
+
|
|
173
|
+
# if len(choices) > 1:
|
|
174
|
+
# raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
|
|
175
|
+
|
|
176
|
+
# NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
|
|
177
|
+
# "usageMetadata": {
|
|
178
|
+
# "promptTokenCount": 9,
|
|
179
|
+
# "candidatesTokenCount": 27,
|
|
180
|
+
# "totalTokenCount": 36
|
|
181
|
+
# }
|
|
182
|
+
if "usageMetadata" in response_data:
|
|
183
|
+
usage = UsageStatistics(
|
|
184
|
+
prompt_tokens=response_data["usageMetadata"]["promptTokenCount"],
|
|
185
|
+
completion_tokens=response_data["usageMetadata"]["candidatesTokenCount"],
|
|
186
|
+
total_tokens=response_data["usageMetadata"]["totalTokenCount"],
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
# Count it ourselves
|
|
190
|
+
assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
|
|
191
|
+
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
|
192
|
+
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
|
193
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
194
|
+
usage = UsageStatistics(
|
|
195
|
+
prompt_tokens=prompt_tokens,
|
|
196
|
+
completion_tokens=completion_tokens,
|
|
197
|
+
total_tokens=total_tokens,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
response_id = str(uuid.uuid4())
|
|
201
|
+
return ChatCompletionResponse(
|
|
202
|
+
id=response_id,
|
|
203
|
+
choices=choices,
|
|
204
|
+
model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
|
205
|
+
created=get_utc_time(),
|
|
206
|
+
usage=usage,
|
|
207
|
+
)
|
|
208
|
+
except KeyError as e:
|
|
209
|
+
raise e
|
|
210
|
+
|
|
211
|
+
def get_gemini_endpoint_and_headers(
|
|
212
|
+
self,
|
|
213
|
+
key_in_header: bool = True,
|
|
214
|
+
generate_content: bool = False,
|
|
215
|
+
) -> Tuple[str, dict]:
|
|
216
|
+
"""
|
|
217
|
+
Dynamically generate the model endpoint and headers.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
url = f"{self.llm_config.model_endpoint}/v1beta/models"
|
|
221
|
+
|
|
222
|
+
# Add the model
|
|
223
|
+
url += f"/{self.llm_config.model}"
|
|
224
|
+
|
|
225
|
+
# Add extension for generating content if we're hitting the LM
|
|
226
|
+
if generate_content:
|
|
227
|
+
url += ":generateContent"
|
|
228
|
+
|
|
229
|
+
# Decide if api key should be in header or not
|
|
230
|
+
# Two ways to pass the key: https://ai.google.dev/tutorials/setup
|
|
231
|
+
if key_in_header:
|
|
232
|
+
headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
|
|
233
|
+
else:
|
|
234
|
+
url += f"?key={model_settings.gemini_api_key}"
|
|
235
|
+
headers = {"Content-Type": "application/json"}
|
|
236
|
+
|
|
237
|
+
return url, headers
|
|
238
|
+
|
|
239
|
+
def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
|
|
240
|
+
"""
|
|
241
|
+
OpenAI style:
|
|
242
|
+
"tools": [{
|
|
243
|
+
"type": "function",
|
|
244
|
+
"function": {
|
|
245
|
+
"name": "find_movies",
|
|
246
|
+
"description": "find ....",
|
|
247
|
+
"parameters": {
|
|
248
|
+
"type": "object",
|
|
249
|
+
"properties": {
|
|
250
|
+
PARAM: {
|
|
251
|
+
"type": PARAM_TYPE, # eg "string"
|
|
252
|
+
"description": PARAM_DESCRIPTION,
|
|
253
|
+
},
|
|
254
|
+
...
|
|
255
|
+
},
|
|
256
|
+
"required": List[str],
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
Google AI style:
|
|
263
|
+
"tools": [{
|
|
264
|
+
"functionDeclarations": [{
|
|
265
|
+
"name": "find_movies",
|
|
266
|
+
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
|
267
|
+
"parameters": {
|
|
268
|
+
"type": "OBJECT",
|
|
269
|
+
"properties": {
|
|
270
|
+
"location": {
|
|
271
|
+
"type": "STRING",
|
|
272
|
+
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
|
273
|
+
},
|
|
274
|
+
"description": {
|
|
275
|
+
"type": "STRING",
|
|
276
|
+
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
|
277
|
+
}
|
|
278
|
+
},
|
|
279
|
+
"required": ["description"]
|
|
280
|
+
}
|
|
281
|
+
}, {
|
|
282
|
+
"name": "find_theaters",
|
|
283
|
+
...
|
|
284
|
+
"""
|
|
285
|
+
function_list = [
|
|
286
|
+
dict(
|
|
287
|
+
name=t.function.name,
|
|
288
|
+
description=t.function.description,
|
|
289
|
+
parameters=t.function.parameters, # TODO need to unpack
|
|
290
|
+
)
|
|
291
|
+
for t in tools
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
# Correct casing + add inner thoughts if needed
|
|
295
|
+
for func in function_list:
|
|
296
|
+
func["parameters"]["type"] = "OBJECT"
|
|
297
|
+
for param_name, param_fields in func["parameters"]["properties"].items():
|
|
298
|
+
param_fields["type"] = param_fields["type"].upper()
|
|
299
|
+
# Add inner thoughts
|
|
300
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
301
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
302
|
+
|
|
303
|
+
func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
|
|
304
|
+
"type": "STRING",
|
|
305
|
+
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
306
|
+
}
|
|
307
|
+
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
|
|
308
|
+
|
|
309
|
+
return [{"functionDeclarations": function_list}]
|
|
310
|
+
|
|
311
|
+
def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
|
|
312
|
+
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
|
313
|
+
|
|
314
|
+
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
|
315
|
+
so there is no natural follow-up 'model' role message.
|
|
316
|
+
|
|
317
|
+
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
|
318
|
+
with role == 'model' that is placed in-betweeen and function output
|
|
319
|
+
(role == 'tool') and user message (role == 'user').
|
|
320
|
+
"""
|
|
321
|
+
dummy_yield_message = {
|
|
322
|
+
"role": "model",
|
|
323
|
+
"parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
|
|
324
|
+
}
|
|
325
|
+
messages_with_padding = []
|
|
326
|
+
for i, message in enumerate(messages):
|
|
327
|
+
messages_with_padding.append(message)
|
|
328
|
+
# Check if the current message role is 'tool' and the next message role is 'user'
|
|
329
|
+
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
|
330
|
+
messages_with_padding.append(dummy_yield_message)
|
|
331
|
+
|
|
332
|
+
return messages_with_padding
|