letta-nightly 0.6.37.dev20250311104150__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (30) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +34 -12
  3. letta/client/client.py +1 -50
  4. letta/constants.py +1 -1
  5. letta/functions/function_sets/multi_agent.py +9 -8
  6. letta/functions/helpers.py +33 -6
  7. letta/llm_api/google_ai_client.py +332 -0
  8. letta/llm_api/google_vertex_client.py +214 -0
  9. letta/llm_api/llm_client.py +48 -0
  10. letta/llm_api/llm_client_base.py +129 -0
  11. letta/orm/step.py +1 -0
  12. letta/schemas/block.py +4 -48
  13. letta/schemas/letta_message.py +26 -0
  14. letta/schemas/message.py +1 -1
  15. letta/schemas/step.py +1 -0
  16. letta/serialize_schemas/agent.py +8 -1
  17. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
  18. letta/server/rest_api/routers/v1/agents.py +12 -8
  19. letta/server/rest_api/routers/v1/steps.py +2 -0
  20. letta/server/rest_api/routers/v1/voice.py +3 -6
  21. letta/services/agent_manager.py +56 -3
  22. letta/services/helpers/agent_manager_helper.py +12 -1
  23. letta/services/identity_manager.py +7 -1
  24. letta/services/message_manager.py +40 -0
  25. letta/services/step_manager.py +8 -1
  26. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/METADATA +18 -17
  27. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/RECORD +30 -26
  28. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/LICENSE +0 -0
  29. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/WHEEL +0 -0
  30. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.6.37"
1
+ __version__ = "0.6.38"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -29,6 +29,7 @@ from letta.helpers.json_helpers import json_dumps, json_loads
29
29
  from letta.interface import AgentInterface
30
30
  from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
31
31
  from letta.llm_api.llm_api_tools import create
32
+ from letta.llm_api.llm_client import LLMClient
32
33
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
33
34
  from letta.log import get_logger
34
35
  from letta.memory import summarize_messages
@@ -356,19 +357,38 @@ class Agent(BaseAgent):
356
357
  for attempt in range(1, empty_response_retry_limit + 1):
357
358
  try:
358
359
  log_telemetry(self.logger, "_get_ai_reply create start")
359
- response = create(
360
+ # New LLM client flow
361
+ llm_client = LLMClient.create(
362
+ agent_id=self.agent_state.id,
360
363
  llm_config=self.agent_state.llm_config,
361
- messages=message_sequence,
362
- user_id=self.agent_state.created_by_id,
363
- functions=allowed_functions,
364
- # functions_python=self.functions_python, do we need this?
365
- function_call=function_call,
366
- first_message=first_message,
367
- force_tool_call=force_tool_call,
368
- stream=stream,
369
- stream_interface=self.interface,
370
364
  put_inner_thoughts_first=put_inner_thoughts_first,
365
+ actor_id=self.agent_state.created_by_id,
371
366
  )
367
+
368
+ if llm_client and not stream:
369
+ response = llm_client.send_llm_request(
370
+ messages=message_sequence,
371
+ tools=allowed_functions,
372
+ tool_call=function_call,
373
+ stream=stream,
374
+ first_message=first_message,
375
+ force_tool_call=force_tool_call,
376
+ )
377
+ else:
378
+ # Fallback to existing flow
379
+ response = create(
380
+ llm_config=self.agent_state.llm_config,
381
+ messages=message_sequence,
382
+ user_id=self.agent_state.created_by_id,
383
+ functions=allowed_functions,
384
+ # functions_python=self.functions_python, do we need this?
385
+ function_call=function_call,
386
+ first_message=first_message,
387
+ force_tool_call=force_tool_call,
388
+ stream=stream,
389
+ stream_interface=self.interface,
390
+ put_inner_thoughts_first=put_inner_thoughts_first,
391
+ )
372
392
  log_telemetry(self.logger, "_get_ai_reply create finish")
373
393
 
374
394
  # These bottom two are retryable
@@ -632,7 +652,7 @@ class Agent(BaseAgent):
632
652
  function_args,
633
653
  function_response,
634
654
  messages,
635
- [tool_return] if tool_return else None,
655
+ [tool_return],
636
656
  include_function_failed_message=True,
637
657
  )
638
658
  return messages, False, True # force a heartbeat to allow agent to handle error
@@ -659,7 +679,7 @@ class Agent(BaseAgent):
659
679
  "content": function_response,
660
680
  "tool_call_id": tool_call_id,
661
681
  },
662
- tool_returns=[tool_return] if tool_return else None,
682
+ tool_returns=[tool_return] if sandbox_run_result else None,
663
683
  )
664
684
  ) # extend conversation with function response
665
685
  self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1])
@@ -909,6 +929,7 @@ class Agent(BaseAgent):
909
929
  # Log step - this must happen before messages are persisted
910
930
  step = self.step_manager.log_step(
911
931
  actor=self.user,
932
+ agent_id=self.agent_state.id,
912
933
  provider_name=self.agent_state.llm_config.model_endpoint_type,
913
934
  model=self.agent_state.llm_config.model,
914
935
  model_endpoint=self.agent_state.llm_config.model_endpoint,
@@ -1174,6 +1195,7 @@ class Agent(BaseAgent):
1174
1195
  memory_edit_timestamp=get_utc_time(),
1175
1196
  previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
1176
1197
  archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
1198
+ recent_passages=self.agent_manager.list_passages(actor=self.user, agent_id=self.agent_state.id, ascending=False, limit=10),
1177
1199
  )
1178
1200
  num_tokens_external_memory_summary = count_tokens(external_memory_summary)
1179
1201
 
letta/client/client.py CHANGED
@@ -4,7 +4,6 @@ import time
4
4
  from typing import Callable, Dict, Generator, List, Optional, Union
5
5
 
6
6
  import requests
7
- from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall as OpenAIToolCall
8
7
 
9
8
  import letta.utils
10
9
  from letta.constants import ADMIN_PREFIX, BASE_MEMORY_TOOLS, BASE_TOOLS, DEFAULT_HUMAN, DEFAULT_PERSONA, FUNCTION_RETURN_CHAR_LIMIT
@@ -29,7 +28,7 @@ from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
29
28
  from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse
30
29
  from letta.schemas.llm_config import LLMConfig
31
30
  from letta.schemas.memory import ArchivalMemorySummary, ChatMemory, CreateArchivalMemory, Memory, RecallMemorySummary
32
- from letta.schemas.message import Message, MessageCreate, MessageUpdate
31
+ from letta.schemas.message import Message, MessageCreate
33
32
  from letta.schemas.openai.chat_completion_response import UsageStatistics
34
33
  from letta.schemas.organization import Organization
35
34
  from letta.schemas.passage import Passage
@@ -640,30 +639,6 @@ class RESTClient(AbstractClient):
640
639
  # refresh and return agent
641
640
  return self.get_agent(agent_state.id)
642
641
 
643
- def update_message(
644
- self,
645
- agent_id: str,
646
- message_id: str,
647
- role: Optional[MessageRole] = None,
648
- text: Optional[str] = None,
649
- name: Optional[str] = None,
650
- tool_calls: Optional[List[OpenAIToolCall]] = None,
651
- tool_call_id: Optional[str] = None,
652
- ) -> Message:
653
- request = MessageUpdate(
654
- role=role,
655
- content=text,
656
- name=name,
657
- tool_calls=tool_calls,
658
- tool_call_id=tool_call_id,
659
- )
660
- response = requests.patch(
661
- f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/messages/{message_id}", json=request.model_dump(), headers=self.headers
662
- )
663
- if response.status_code != 200:
664
- raise ValueError(f"Failed to update message: {response.text}")
665
- return Message(**response.json())
666
-
667
642
  def update_agent(
668
643
  self,
669
644
  agent_id: str,
@@ -2436,30 +2411,6 @@ class LocalClient(AbstractClient):
2436
2411
  # TODO: get full agent state
2437
2412
  return self.server.agent_manager.get_agent_by_id(agent_state.id, actor=self.user)
2438
2413
 
2439
- def update_message(
2440
- self,
2441
- agent_id: str,
2442
- message_id: str,
2443
- role: Optional[MessageRole] = None,
2444
- text: Optional[str] = None,
2445
- name: Optional[str] = None,
2446
- tool_calls: Optional[List[OpenAIToolCall]] = None,
2447
- tool_call_id: Optional[str] = None,
2448
- ) -> Message:
2449
- message = self.server.update_agent_message(
2450
- agent_id=agent_id,
2451
- message_id=message_id,
2452
- request=MessageUpdate(
2453
- role=role,
2454
- content=text,
2455
- name=name,
2456
- tool_calls=tool_calls,
2457
- tool_call_id=tool_call_id,
2458
- ),
2459
- actor=self.user,
2460
- )
2461
- return message
2462
-
2463
2414
  def update_agent(
2464
2415
  self,
2465
2416
  agent_id: str,
letta/constants.py CHANGED
@@ -50,7 +50,7 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
50
50
  # Base memory tools CAN be edited, and are added by default by the server
51
51
  BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
52
52
  # Multi agent tools
53
- MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
53
+ MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
54
54
  # Set of all built-in Letta tools
55
55
  LETTA_TOOL_SET = set(BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS)
56
56
 
@@ -2,7 +2,7 @@ import asyncio
2
2
  from typing import TYPE_CHECKING, List
3
3
 
4
4
  from letta.functions.helpers import (
5
- _send_message_to_agents_matching_all_tags_async,
5
+ _send_message_to_agents_matching_tags_async,
6
6
  execute_send_message_to_agent,
7
7
  fire_and_forget_send_to_agent,
8
8
  )
@@ -70,18 +70,19 @@ def send_message_to_agent_async(self: "Agent", message: str, other_agent_id: str
70
70
  return "Successfully sent message"
71
71
 
72
72
 
73
- def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
73
+ def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all: List[str], match_some: List[str]) -> List[str]:
74
74
  """
75
- Sends a message to all agents within the same organization that match all of the specified tags. Messages are dispatched in parallel for improved performance, with retries to handle transient issues and timeouts to ensure responsiveness. This function enforces a limit of 100 agents and does not support pagination (cursor-based queries). Each agent must match all specified tags (`match_all_tags=True`) to be included.
75
+ Sends a message to all agents within the same organization that match the specified tag criteria. Agents must possess *all* of the tags in `match_all` and *at least one* of the tags in `match_some` to receive the message.
76
76
 
77
77
  Args:
78
78
  message (str): The content of the message to be sent to each matching agent.
79
- tags (List[str]): A list of tags that an agent must possess to receive the message.
79
+ match_all (List[str]): A list of tags that an agent must possess to receive the message.
80
+ match_some (List[str]): A list of tags where an agent must have at least one to qualify.
80
81
 
81
82
  Returns:
82
- List[str]: A list of responses from the agents that matched all tags. Each
83
- response corresponds to a single agent. Agents that do not respond will not
84
- have an entry in the returned list.
83
+ List[str]: A list of responses from the agents that matched the filtering criteria. Each
84
+ response corresponds to a single agent. Agents that do not respond will not have an entry
85
+ in the returned list.
85
86
  """
86
87
 
87
- return asyncio.run(_send_message_to_agents_matching_all_tags_async(self, message, tags))
88
+ return asyncio.run(_send_message_to_agents_matching_tags_async(self, message, match_all, match_some))
@@ -518,8 +518,16 @@ def fire_and_forget_send_to_agent(
518
518
  run_in_background_thread(background_task())
519
519
 
520
520
 
521
- async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent", message: str, tags: List[str]) -> List[str]:
522
- log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async start", message=message, tags=tags)
521
+ async def _send_message_to_agents_matching_tags_async(
522
+ sender_agent: "Agent", message: str, match_all: List[str], match_some: List[str]
523
+ ) -> List[str]:
524
+ log_telemetry(
525
+ sender_agent.logger,
526
+ "_send_message_to_agents_matching_tags_async start",
527
+ message=message,
528
+ match_all=match_all,
529
+ match_some=match_some,
530
+ )
523
531
  server = get_letta_server()
524
532
 
525
533
  augmented_message = (
@@ -529,9 +537,22 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
529
537
  )
530
538
 
531
539
  # Retrieve up to 100 matching agents
532
- log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents start", message=message, tags=tags)
533
- matching_agents = server.agent_manager.list_agents(actor=sender_agent.user, tags=tags, match_all_tags=True, limit=100)
534
- log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents finish", message=message, tags=tags)
540
+ log_telemetry(
541
+ sender_agent.logger,
542
+ "_send_message_to_agents_matching_tags_async listing agents start",
543
+ message=message,
544
+ match_all=match_all,
545
+ match_some=match_some,
546
+ )
547
+ matching_agents = server.agent_manager.list_agents_matching_tags(actor=sender_agent.user, match_all=match_all, match_some=match_some)
548
+
549
+ log_telemetry(
550
+ sender_agent.logger,
551
+ "_send_message_to_agents_matching_tags_async listing agents finish",
552
+ message=message,
553
+ match_all=match_all,
554
+ match_some=match_some,
555
+ )
535
556
 
536
557
  # Create a system message
537
558
  messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
@@ -559,7 +580,13 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
559
580
  else:
560
581
  final.append(r)
561
582
 
562
- log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
583
+ log_telemetry(
584
+ sender_agent.logger,
585
+ "_send_message_to_agents_matching_tags_async finish",
586
+ message=message,
587
+ match_all=match_all,
588
+ match_some=match_some,
589
+ )
563
590
  return final
564
591
 
565
592
 
@@ -0,0 +1,332 @@
1
+ import uuid
2
+ from typing import List, Optional, Tuple
3
+
4
+ from letta.constants import NON_USER_MSG_PREFIX
5
+ from letta.helpers.datetime_helpers import get_utc_time
6
+ from letta.helpers.json_helpers import json_dumps
7
+ from letta.llm_api.helpers import make_post_request
8
+ from letta.llm_api.llm_client_base import LLMClientBase
9
+ from letta.local_llm.json_parser import clean_json_string_extra_backslash
10
+ from letta.local_llm.utils import count_tokens
11
+ from letta.schemas.message import Message as PydanticMessage
12
+ from letta.schemas.openai.chat_completion_request import Tool
13
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
14
+ from letta.settings import model_settings
15
+ from letta.utils import get_tool_call_id
16
+
17
+
18
+ class GoogleAIClient(LLMClientBase):
19
+
20
+ def request(self, request_data: dict) -> dict:
21
+ """
22
+ Performs underlying request to llm and returns raw response.
23
+ """
24
+ url, headers = self.get_gemini_endpoint_and_headers(generate_content=True)
25
+ return make_post_request(url, headers, request_data)
26
+
27
+ def build_request_data(
28
+ self,
29
+ messages: List[PydanticMessage],
30
+ tools: List[dict],
31
+ tool_call: Optional[str],
32
+ ) -> dict:
33
+ """
34
+ Constructs a request object in the expected data format for this client.
35
+ """
36
+ if tools:
37
+ tools = [{"type": "function", "function": f} for f in tools]
38
+ tools = self.convert_tools_to_google_ai_format(
39
+ [Tool(**t) for t in tools],
40
+ )
41
+ contents = self.add_dummy_model_messages(
42
+ [m.to_google_ai_dict() for m in messages],
43
+ )
44
+
45
+ return {
46
+ "contents": contents,
47
+ "tools": tools,
48
+ "generation_config": {
49
+ "temperature": self.llm_config.temperature,
50
+ "max_output_tokens": self.llm_config.max_tokens,
51
+ },
52
+ }
53
+
54
+ def convert_response_to_chat_completion(
55
+ self,
56
+ response_data: dict,
57
+ input_messages: List[PydanticMessage],
58
+ ) -> ChatCompletionResponse:
59
+ """
60
+ Converts custom response format from llm client into an OpenAI
61
+ ChatCompletionsResponse object.
62
+
63
+ Example Input:
64
+ {
65
+ "candidates": [
66
+ {
67
+ "content": {
68
+ "parts": [
69
+ {
70
+ "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
71
+ }
72
+ ]
73
+ }
74
+ }
75
+ ],
76
+ "usageMetadata": {
77
+ "promptTokenCount": 9,
78
+ "candidatesTokenCount": 27,
79
+ "totalTokenCount": 36
80
+ }
81
+ }
82
+ """
83
+ try:
84
+ choices = []
85
+ index = 0
86
+ for candidate in response_data["candidates"]:
87
+ content = candidate["content"]
88
+
89
+ role = content["role"]
90
+ assert role == "model", f"Unknown role in response: {role}"
91
+
92
+ parts = content["parts"]
93
+ # TODO support parts / multimodal
94
+ # TODO support parallel tool calling natively
95
+ # TODO Alternative here is to throw away everything else except for the first part
96
+ for response_message in parts:
97
+ # Convert the actual message style to OpenAI style
98
+ if "functionCall" in response_message and response_message["functionCall"] is not None:
99
+ function_call = response_message["functionCall"]
100
+ assert isinstance(function_call, dict), function_call
101
+ function_name = function_call["name"]
102
+ assert isinstance(function_name, str), function_name
103
+ function_args = function_call["args"]
104
+ assert isinstance(function_args, dict), function_args
105
+
106
+ # NOTE: this also involves stripping the inner monologue out of the function
107
+ if self.llm_config.put_inner_thoughts_in_kwargs:
108
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
109
+
110
+ assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
111
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
112
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
113
+ else:
114
+ inner_thoughts = None
115
+
116
+ # Google AI API doesn't generate tool call IDs
117
+ openai_response_message = Message(
118
+ role="assistant", # NOTE: "model" -> "assistant"
119
+ content=inner_thoughts,
120
+ tool_calls=[
121
+ ToolCall(
122
+ id=get_tool_call_id(),
123
+ type="function",
124
+ function=FunctionCall(
125
+ name=function_name,
126
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
127
+ ),
128
+ )
129
+ ],
130
+ )
131
+
132
+ else:
133
+
134
+ # Inner thoughts are the content by default
135
+ inner_thoughts = response_message["text"]
136
+
137
+ # Google AI API doesn't generate tool call IDs
138
+ openai_response_message = Message(
139
+ role="assistant", # NOTE: "model" -> "assistant"
140
+ content=inner_thoughts,
141
+ )
142
+
143
+ # Google AI API uses different finish reason strings than OpenAI
144
+ # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
145
+ # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
146
+ # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
147
+ # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
148
+ finish_reason = candidate["finishReason"]
149
+ if finish_reason == "STOP":
150
+ openai_finish_reason = (
151
+ "function_call"
152
+ if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
153
+ else "stop"
154
+ )
155
+ elif finish_reason == "MAX_TOKENS":
156
+ openai_finish_reason = "length"
157
+ elif finish_reason == "SAFETY":
158
+ openai_finish_reason = "content_filter"
159
+ elif finish_reason == "RECITATION":
160
+ openai_finish_reason = "content_filter"
161
+ else:
162
+ raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
163
+
164
+ choices.append(
165
+ Choice(
166
+ finish_reason=openai_finish_reason,
167
+ index=index,
168
+ message=openai_response_message,
169
+ )
170
+ )
171
+ index += 1
172
+
173
+ # if len(choices) > 1:
174
+ # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
175
+
176
+ # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
177
+ # "usageMetadata": {
178
+ # "promptTokenCount": 9,
179
+ # "candidatesTokenCount": 27,
180
+ # "totalTokenCount": 36
181
+ # }
182
+ if "usageMetadata" in response_data:
183
+ usage = UsageStatistics(
184
+ prompt_tokens=response_data["usageMetadata"]["promptTokenCount"],
185
+ completion_tokens=response_data["usageMetadata"]["candidatesTokenCount"],
186
+ total_tokens=response_data["usageMetadata"]["totalTokenCount"],
187
+ )
188
+ else:
189
+ # Count it ourselves
190
+ assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
191
+ prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
192
+ completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
193
+ total_tokens = prompt_tokens + completion_tokens
194
+ usage = UsageStatistics(
195
+ prompt_tokens=prompt_tokens,
196
+ completion_tokens=completion_tokens,
197
+ total_tokens=total_tokens,
198
+ )
199
+
200
+ response_id = str(uuid.uuid4())
201
+ return ChatCompletionResponse(
202
+ id=response_id,
203
+ choices=choices,
204
+ model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
205
+ created=get_utc_time(),
206
+ usage=usage,
207
+ )
208
+ except KeyError as e:
209
+ raise e
210
+
211
+ def get_gemini_endpoint_and_headers(
212
+ self,
213
+ key_in_header: bool = True,
214
+ generate_content: bool = False,
215
+ ) -> Tuple[str, dict]:
216
+ """
217
+ Dynamically generate the model endpoint and headers.
218
+ """
219
+
220
+ url = f"{self.llm_config.model_endpoint}/v1beta/models"
221
+
222
+ # Add the model
223
+ url += f"/{self.llm_config.model}"
224
+
225
+ # Add extension for generating content if we're hitting the LM
226
+ if generate_content:
227
+ url += ":generateContent"
228
+
229
+ # Decide if api key should be in header or not
230
+ # Two ways to pass the key: https://ai.google.dev/tutorials/setup
231
+ if key_in_header:
232
+ headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
233
+ else:
234
+ url += f"?key={model_settings.gemini_api_key}"
235
+ headers = {"Content-Type": "application/json"}
236
+
237
+ return url, headers
238
+
239
+ def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
240
+ """
241
+ OpenAI style:
242
+ "tools": [{
243
+ "type": "function",
244
+ "function": {
245
+ "name": "find_movies",
246
+ "description": "find ....",
247
+ "parameters": {
248
+ "type": "object",
249
+ "properties": {
250
+ PARAM: {
251
+ "type": PARAM_TYPE, # eg "string"
252
+ "description": PARAM_DESCRIPTION,
253
+ },
254
+ ...
255
+ },
256
+ "required": List[str],
257
+ }
258
+ }
259
+ }
260
+ ]
261
+
262
+ Google AI style:
263
+ "tools": [{
264
+ "functionDeclarations": [{
265
+ "name": "find_movies",
266
+ "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
267
+ "parameters": {
268
+ "type": "OBJECT",
269
+ "properties": {
270
+ "location": {
271
+ "type": "STRING",
272
+ "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
273
+ },
274
+ "description": {
275
+ "type": "STRING",
276
+ "description": "Any kind of description including category or genre, title words, attributes, etc."
277
+ }
278
+ },
279
+ "required": ["description"]
280
+ }
281
+ }, {
282
+ "name": "find_theaters",
283
+ ...
284
+ """
285
+ function_list = [
286
+ dict(
287
+ name=t.function.name,
288
+ description=t.function.description,
289
+ parameters=t.function.parameters, # TODO need to unpack
290
+ )
291
+ for t in tools
292
+ ]
293
+
294
+ # Correct casing + add inner thoughts if needed
295
+ for func in function_list:
296
+ func["parameters"]["type"] = "OBJECT"
297
+ for param_name, param_fields in func["parameters"]["properties"].items():
298
+ param_fields["type"] = param_fields["type"].upper()
299
+ # Add inner thoughts
300
+ if self.llm_config.put_inner_thoughts_in_kwargs:
301
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
302
+
303
+ func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
304
+ "type": "STRING",
305
+ "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
306
+ }
307
+ func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
308
+
309
+ return [{"functionDeclarations": function_list}]
310
+
311
+ def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
312
+ """Google AI API requires all function call returns are immediately followed by a 'model' role message.
313
+
314
+ In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
315
+ so there is no natural follow-up 'model' role message.
316
+
317
+ To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
318
+ with role == 'model' that is placed in-betweeen and function output
319
+ (role == 'tool') and user message (role == 'user').
320
+ """
321
+ dummy_yield_message = {
322
+ "role": "model",
323
+ "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
324
+ }
325
+ messages_with_padding = []
326
+ for i, message in enumerate(messages):
327
+ messages_with_padding.append(message)
328
+ # Check if the current message role is 'tool' and the next message role is 'user'
329
+ if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
330
+ messages_with_padding.append(dummy_yield_message)
331
+
332
+ return messages_with_padding