letta-nightly 0.6.37.dev20250311104150__py3-none-any.whl → 0.6.39.dev20250313104142__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (58) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +83 -23
  3. letta/agents/low_latency_agent.py +3 -2
  4. letta/client/client.py +1 -50
  5. letta/constants.py +4 -1
  6. letta/functions/function_sets/base.py +1 -1
  7. letta/functions/function_sets/multi_agent.py +9 -8
  8. letta/functions/helpers.py +47 -6
  9. letta/functions/schema_generator.py +47 -0
  10. letta/helpers/mcp_helpers.py +108 -0
  11. letta/llm_api/cohere.py +1 -1
  12. letta/llm_api/google_ai_client.py +332 -0
  13. letta/llm_api/google_vertex_client.py +214 -0
  14. letta/llm_api/helpers.py +1 -2
  15. letta/llm_api/llm_api_tools.py +0 -1
  16. letta/llm_api/llm_client.py +48 -0
  17. letta/llm_api/llm_client_base.py +129 -0
  18. letta/local_llm/utils.py +30 -20
  19. letta/log.py +1 -1
  20. letta/memory.py +1 -1
  21. letta/orm/__init__.py +1 -0
  22. letta/orm/block.py +8 -0
  23. letta/orm/enums.py +2 -0
  24. letta/orm/identities_blocks.py +13 -0
  25. letta/orm/identity.py +9 -0
  26. letta/orm/sqlalchemy_base.py +4 -4
  27. letta/orm/step.py +1 -0
  28. letta/schemas/block.py +4 -48
  29. letta/schemas/identity.py +3 -0
  30. letta/schemas/letta_message.py +26 -0
  31. letta/schemas/message.py +69 -63
  32. letta/schemas/step.py +1 -0
  33. letta/schemas/tool.py +39 -2
  34. letta/serialize_schemas/agent.py +8 -1
  35. letta/server/rest_api/app.py +15 -0
  36. letta/server/rest_api/chat_completions_interface.py +2 -0
  37. letta/server/rest_api/interface.py +46 -13
  38. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
  39. letta/server/rest_api/routers/v1/agents.py +14 -10
  40. letta/server/rest_api/routers/v1/blocks.py +5 -1
  41. letta/server/rest_api/routers/v1/steps.py +2 -0
  42. letta/server/rest_api/routers/v1/tools.py +71 -1
  43. letta/server/rest_api/routers/v1/voice.py +3 -6
  44. letta/server/server.py +102 -5
  45. letta/services/agent_manager.py +58 -3
  46. letta/services/block_manager.py +10 -1
  47. letta/services/helpers/agent_manager_helper.py +12 -1
  48. letta/services/identity_manager.py +61 -15
  49. letta/services/message_manager.py +40 -0
  50. letta/services/step_manager.py +8 -1
  51. letta/services/summarizer/summarizer.py +1 -1
  52. letta/services/tool_manager.py +6 -0
  53. letta/settings.py +11 -12
  54. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/METADATA +20 -18
  55. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/RECORD +58 -52
  56. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/LICENSE +0 -0
  57. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/WHEEL +0 -0
  58. {letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,332 @@
1
+ import uuid
2
+ from typing import List, Optional, Tuple
3
+
4
+ from letta.constants import NON_USER_MSG_PREFIX
5
+ from letta.helpers.datetime_helpers import get_utc_time
6
+ from letta.helpers.json_helpers import json_dumps
7
+ from letta.llm_api.helpers import make_post_request
8
+ from letta.llm_api.llm_client_base import LLMClientBase
9
+ from letta.local_llm.json_parser import clean_json_string_extra_backslash
10
+ from letta.local_llm.utils import count_tokens
11
+ from letta.schemas.message import Message as PydanticMessage
12
+ from letta.schemas.openai.chat_completion_request import Tool
13
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
14
+ from letta.settings import model_settings
15
+ from letta.utils import get_tool_call_id
16
+
17
+
18
+ class GoogleAIClient(LLMClientBase):
19
+
20
+ def request(self, request_data: dict) -> dict:
21
+ """
22
+ Performs underlying request to llm and returns raw response.
23
+ """
24
+ url, headers = self.get_gemini_endpoint_and_headers(generate_content=True)
25
+ return make_post_request(url, headers, request_data)
26
+
27
+ def build_request_data(
28
+ self,
29
+ messages: List[PydanticMessage],
30
+ tools: List[dict],
31
+ tool_call: Optional[str],
32
+ ) -> dict:
33
+ """
34
+ Constructs a request object in the expected data format for this client.
35
+ """
36
+ if tools:
37
+ tools = [{"type": "function", "function": f} for f in tools]
38
+ tools = self.convert_tools_to_google_ai_format(
39
+ [Tool(**t) for t in tools],
40
+ )
41
+ contents = self.add_dummy_model_messages(
42
+ [m.to_google_ai_dict() for m in messages],
43
+ )
44
+
45
+ return {
46
+ "contents": contents,
47
+ "tools": tools,
48
+ "generation_config": {
49
+ "temperature": self.llm_config.temperature,
50
+ "max_output_tokens": self.llm_config.max_tokens,
51
+ },
52
+ }
53
+
54
+ def convert_response_to_chat_completion(
55
+ self,
56
+ response_data: dict,
57
+ input_messages: List[PydanticMessage],
58
+ ) -> ChatCompletionResponse:
59
+ """
60
+ Converts custom response format from llm client into an OpenAI
61
+ ChatCompletionsResponse object.
62
+
63
+ Example Input:
64
+ {
65
+ "candidates": [
66
+ {
67
+ "content": {
68
+ "parts": [
69
+ {
70
+ "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
71
+ }
72
+ ]
73
+ }
74
+ }
75
+ ],
76
+ "usageMetadata": {
77
+ "promptTokenCount": 9,
78
+ "candidatesTokenCount": 27,
79
+ "totalTokenCount": 36
80
+ }
81
+ }
82
+ """
83
+ try:
84
+ choices = []
85
+ index = 0
86
+ for candidate in response_data["candidates"]:
87
+ content = candidate["content"]
88
+
89
+ role = content["role"]
90
+ assert role == "model", f"Unknown role in response: {role}"
91
+
92
+ parts = content["parts"]
93
+ # TODO support parts / multimodal
94
+ # TODO support parallel tool calling natively
95
+ # TODO Alternative here is to throw away everything else except for the first part
96
+ for response_message in parts:
97
+ # Convert the actual message style to OpenAI style
98
+ if "functionCall" in response_message and response_message["functionCall"] is not None:
99
+ function_call = response_message["functionCall"]
100
+ assert isinstance(function_call, dict), function_call
101
+ function_name = function_call["name"]
102
+ assert isinstance(function_name, str), function_name
103
+ function_args = function_call["args"]
104
+ assert isinstance(function_args, dict), function_args
105
+
106
+ # NOTE: this also involves stripping the inner monologue out of the function
107
+ if self.llm_config.put_inner_thoughts_in_kwargs:
108
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
109
+
110
+ assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
111
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
112
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
113
+ else:
114
+ inner_thoughts = None
115
+
116
+ # Google AI API doesn't generate tool call IDs
117
+ openai_response_message = Message(
118
+ role="assistant", # NOTE: "model" -> "assistant"
119
+ content=inner_thoughts,
120
+ tool_calls=[
121
+ ToolCall(
122
+ id=get_tool_call_id(),
123
+ type="function",
124
+ function=FunctionCall(
125
+ name=function_name,
126
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
127
+ ),
128
+ )
129
+ ],
130
+ )
131
+
132
+ else:
133
+
134
+ # Inner thoughts are the content by default
135
+ inner_thoughts = response_message["text"]
136
+
137
+ # Google AI API doesn't generate tool call IDs
138
+ openai_response_message = Message(
139
+ role="assistant", # NOTE: "model" -> "assistant"
140
+ content=inner_thoughts,
141
+ )
142
+
143
+ # Google AI API uses different finish reason strings than OpenAI
144
+ # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
145
+ # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
146
+ # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
147
+ # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
148
+ finish_reason = candidate["finishReason"]
149
+ if finish_reason == "STOP":
150
+ openai_finish_reason = (
151
+ "function_call"
152
+ if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
153
+ else "stop"
154
+ )
155
+ elif finish_reason == "MAX_TOKENS":
156
+ openai_finish_reason = "length"
157
+ elif finish_reason == "SAFETY":
158
+ openai_finish_reason = "content_filter"
159
+ elif finish_reason == "RECITATION":
160
+ openai_finish_reason = "content_filter"
161
+ else:
162
+ raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
163
+
164
+ choices.append(
165
+ Choice(
166
+ finish_reason=openai_finish_reason,
167
+ index=index,
168
+ message=openai_response_message,
169
+ )
170
+ )
171
+ index += 1
172
+
173
+ # if len(choices) > 1:
174
+ # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
175
+
176
+ # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
177
+ # "usageMetadata": {
178
+ # "promptTokenCount": 9,
179
+ # "candidatesTokenCount": 27,
180
+ # "totalTokenCount": 36
181
+ # }
182
+ if "usageMetadata" in response_data:
183
+ usage = UsageStatistics(
184
+ prompt_tokens=response_data["usageMetadata"]["promptTokenCount"],
185
+ completion_tokens=response_data["usageMetadata"]["candidatesTokenCount"],
186
+ total_tokens=response_data["usageMetadata"]["totalTokenCount"],
187
+ )
188
+ else:
189
+ # Count it ourselves
190
+ assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
191
+ prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
192
+ completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
193
+ total_tokens = prompt_tokens + completion_tokens
194
+ usage = UsageStatistics(
195
+ prompt_tokens=prompt_tokens,
196
+ completion_tokens=completion_tokens,
197
+ total_tokens=total_tokens,
198
+ )
199
+
200
+ response_id = str(uuid.uuid4())
201
+ return ChatCompletionResponse(
202
+ id=response_id,
203
+ choices=choices,
204
+ model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
205
+ created=get_utc_time(),
206
+ usage=usage,
207
+ )
208
+ except KeyError as e:
209
+ raise e
210
+
211
+ def get_gemini_endpoint_and_headers(
212
+ self,
213
+ key_in_header: bool = True,
214
+ generate_content: bool = False,
215
+ ) -> Tuple[str, dict]:
216
+ """
217
+ Dynamically generate the model endpoint and headers.
218
+ """
219
+
220
+ url = f"{self.llm_config.model_endpoint}/v1beta/models"
221
+
222
+ # Add the model
223
+ url += f"/{self.llm_config.model}"
224
+
225
+ # Add extension for generating content if we're hitting the LM
226
+ if generate_content:
227
+ url += ":generateContent"
228
+
229
+ # Decide if api key should be in header or not
230
+ # Two ways to pass the key: https://ai.google.dev/tutorials/setup
231
+ if key_in_header:
232
+ headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
233
+ else:
234
+ url += f"?key={model_settings.gemini_api_key}"
235
+ headers = {"Content-Type": "application/json"}
236
+
237
+ return url, headers
238
+
239
+ def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
240
+ """
241
+ OpenAI style:
242
+ "tools": [{
243
+ "type": "function",
244
+ "function": {
245
+ "name": "find_movies",
246
+ "description": "find ....",
247
+ "parameters": {
248
+ "type": "object",
249
+ "properties": {
250
+ PARAM: {
251
+ "type": PARAM_TYPE, # eg "string"
252
+ "description": PARAM_DESCRIPTION,
253
+ },
254
+ ...
255
+ },
256
+ "required": List[str],
257
+ }
258
+ }
259
+ }
260
+ ]
261
+
262
+ Google AI style:
263
+ "tools": [{
264
+ "functionDeclarations": [{
265
+ "name": "find_movies",
266
+ "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
267
+ "parameters": {
268
+ "type": "OBJECT",
269
+ "properties": {
270
+ "location": {
271
+ "type": "STRING",
272
+ "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
273
+ },
274
+ "description": {
275
+ "type": "STRING",
276
+ "description": "Any kind of description including category or genre, title words, attributes, etc."
277
+ }
278
+ },
279
+ "required": ["description"]
280
+ }
281
+ }, {
282
+ "name": "find_theaters",
283
+ ...
284
+ """
285
+ function_list = [
286
+ dict(
287
+ name=t.function.name,
288
+ description=t.function.description,
289
+ parameters=t.function.parameters, # TODO need to unpack
290
+ )
291
+ for t in tools
292
+ ]
293
+
294
+ # Correct casing + add inner thoughts if needed
295
+ for func in function_list:
296
+ func["parameters"]["type"] = "OBJECT"
297
+ for param_name, param_fields in func["parameters"]["properties"].items():
298
+ param_fields["type"] = param_fields["type"].upper()
299
+ # Add inner thoughts
300
+ if self.llm_config.put_inner_thoughts_in_kwargs:
301
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
302
+
303
+ func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
304
+ "type": "STRING",
305
+ "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
306
+ }
307
+ func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
308
+
309
+ return [{"functionDeclarations": function_list}]
310
+
311
+ def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
312
+ """Google AI API requires all function call returns are immediately followed by a 'model' role message.
313
+
314
+ In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
315
+ so there is no natural follow-up 'model' role message.
316
+
317
+ To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
318
+ with role == 'model' that is placed in-betweeen and function output
319
+ (role == 'tool') and user message (role == 'user').
320
+ """
321
+ dummy_yield_message = {
322
+ "role": "model",
323
+ "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
324
+ }
325
+ messages_with_padding = []
326
+ for i, message in enumerate(messages):
327
+ messages_with_padding.append(message)
328
+ # Check if the current message role is 'tool' and the next message role is 'user'
329
+ if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
330
+ messages_with_padding.append(dummy_yield_message)
331
+
332
+ return messages_with_padding
@@ -0,0 +1,214 @@
1
+ import uuid
2
+ from typing import List, Optional
3
+
4
+ from google import genai
5
+ from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ToolConfig
6
+
7
+ from letta.helpers.datetime_helpers import get_utc_time
8
+ from letta.helpers.json_helpers import json_dumps
9
+ from letta.llm_api.google_ai_client import GoogleAIClient
10
+ from letta.local_llm.json_parser import clean_json_string_extra_backslash
11
+ from letta.local_llm.utils import count_tokens
12
+ from letta.schemas.message import Message as PydanticMessage
13
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
14
+ from letta.settings import model_settings
15
+ from letta.utils import get_tool_call_id
16
+
17
+
18
+ class GoogleVertexClient(GoogleAIClient):
19
+
20
+ def request(self, request_data: dict) -> dict:
21
+ """
22
+ Performs underlying request to llm and returns raw response.
23
+ """
24
+ client = genai.Client(
25
+ vertexai=True,
26
+ project=model_settings.google_cloud_project,
27
+ location=model_settings.google_cloud_location,
28
+ http_options={"api_version": "v1"},
29
+ )
30
+ response = client.models.generate_content(
31
+ model=self.llm_config.model,
32
+ contents=request_data["contents"],
33
+ config=request_data["config"],
34
+ )
35
+ return response.model_dump()
36
+
37
+ def build_request_data(
38
+ self,
39
+ messages: List[PydanticMessage],
40
+ tools: List[dict],
41
+ tool_call: Optional[str],
42
+ ) -> dict:
43
+ """
44
+ Constructs a request object in the expected data format for this client.
45
+ """
46
+ request_data = super().build_request_data(messages, tools, tool_call)
47
+ request_data["config"] = request_data.pop("generation_config")
48
+ request_data["config"]["tools"] = request_data.pop("tools")
49
+
50
+ tool_config = ToolConfig(
51
+ function_calling_config=FunctionCallingConfig(
52
+ # ANY mode forces the model to predict only function calls
53
+ mode=FunctionCallingConfigMode.ANY,
54
+ )
55
+ )
56
+ request_data["config"]["tool_config"] = tool_config.model_dump()
57
+
58
+ return request_data
59
+
60
+ def convert_response_to_chat_completion(
61
+ self,
62
+ response_data: dict,
63
+ input_messages: List[PydanticMessage],
64
+ ) -> ChatCompletionResponse:
65
+ """
66
+ Converts custom response format from llm client into an OpenAI
67
+ ChatCompletionsResponse object.
68
+
69
+ Example:
70
+ {
71
+ "candidates": [
72
+ {
73
+ "content": {
74
+ "parts": [
75
+ {
76
+ "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
77
+ }
78
+ ]
79
+ }
80
+ }
81
+ ],
82
+ "usageMetadata": {
83
+ "promptTokenCount": 9,
84
+ "candidatesTokenCount": 27,
85
+ "totalTokenCount": 36
86
+ }
87
+ }
88
+ """
89
+ response = GenerateContentResponse(**response_data)
90
+ try:
91
+ choices = []
92
+ index = 0
93
+ for candidate in response.candidates:
94
+ content = candidate.content
95
+
96
+ role = content.role
97
+ assert role == "model", f"Unknown role in response: {role}"
98
+
99
+ parts = content.parts
100
+ # TODO support parts / multimodal
101
+ # TODO support parallel tool calling natively
102
+ # TODO Alternative here is to throw away everything else except for the first part
103
+ for response_message in parts:
104
+ # Convert the actual message style to OpenAI style
105
+ if response_message.function_call:
106
+ function_call = response_message.function_call
107
+ function_name = function_call.name
108
+ function_args = function_call.args
109
+ assert isinstance(function_args, dict), function_args
110
+
111
+ # NOTE: this also involves stripping the inner monologue out of the function
112
+ if self.llm_config.put_inner_thoughts_in_kwargs:
113
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
114
+
115
+ assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
116
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
117
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
118
+ else:
119
+ inner_thoughts = None
120
+
121
+ # Google AI API doesn't generate tool call IDs
122
+ openai_response_message = Message(
123
+ role="assistant", # NOTE: "model" -> "assistant"
124
+ content=inner_thoughts,
125
+ tool_calls=[
126
+ ToolCall(
127
+ id=get_tool_call_id(),
128
+ type="function",
129
+ function=FunctionCall(
130
+ name=function_name,
131
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
132
+ ),
133
+ )
134
+ ],
135
+ )
136
+
137
+ else:
138
+
139
+ # Inner thoughts are the content by default
140
+ inner_thoughts = response_message.text
141
+
142
+ # Google AI API doesn't generate tool call IDs
143
+ openai_response_message = Message(
144
+ role="assistant", # NOTE: "model" -> "assistant"
145
+ content=inner_thoughts,
146
+ )
147
+
148
+ # Google AI API uses different finish reason strings than OpenAI
149
+ # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
150
+ # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
151
+ # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
152
+ # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
153
+ finish_reason = candidate.finish_reason.value
154
+ if finish_reason == "STOP":
155
+ openai_finish_reason = (
156
+ "function_call"
157
+ if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
158
+ else "stop"
159
+ )
160
+ elif finish_reason == "MAX_TOKENS":
161
+ openai_finish_reason = "length"
162
+ elif finish_reason == "SAFETY":
163
+ openai_finish_reason = "content_filter"
164
+ elif finish_reason == "RECITATION":
165
+ openai_finish_reason = "content_filter"
166
+ else:
167
+ raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
168
+
169
+ choices.append(
170
+ Choice(
171
+ finish_reason=openai_finish_reason,
172
+ index=index,
173
+ message=openai_response_message,
174
+ )
175
+ )
176
+ index += 1
177
+
178
+ # if len(choices) > 1:
179
+ # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
180
+
181
+ # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
182
+ # "usageMetadata": {
183
+ # "promptTokenCount": 9,
184
+ # "candidatesTokenCount": 27,
185
+ # "totalTokenCount": 36
186
+ # }
187
+ if response.usage_metadata:
188
+ usage = UsageStatistics(
189
+ prompt_tokens=response.usage_metadata.prompt_token_count,
190
+ completion_tokens=response.usage_metadata.candidates_token_count,
191
+ total_tokens=response.usage_metadata.total_token_count,
192
+ )
193
+ else:
194
+ # Count it ourselves
195
+ assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
196
+ prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
197
+ completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
198
+ total_tokens = prompt_tokens + completion_tokens
199
+ usage = UsageStatistics(
200
+ prompt_tokens=prompt_tokens,
201
+ completion_tokens=completion_tokens,
202
+ total_tokens=total_tokens,
203
+ )
204
+
205
+ response_id = str(uuid.uuid4())
206
+ return ChatCompletionResponse(
207
+ id=response_id,
208
+ choices=choices,
209
+ model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
210
+ created=get_utc_time(),
211
+ usage=usage,
212
+ )
213
+ except KeyError as e:
214
+ raise e
letta/llm_api/helpers.py CHANGED
@@ -86,9 +86,8 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
86
86
  # but if "type" is "object" we expected "properties", where each property has details
87
87
  # and if "type" is "array" we expect "items": <type>
88
88
  for param, details in openai_function["parameters"]["properties"].items():
89
-
90
89
  param_type = details["type"]
91
- description = details["description"]
90
+ description = details.get("description", "")
92
91
 
93
92
  if param_type == "object":
94
93
  if "properties" not in details:
@@ -596,7 +596,6 @@ def create(
596
596
  messages[0].content[
597
597
  0
598
598
  ].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
599
-
600
599
  return get_chat_completion(
601
600
  model=llm_config.model,
602
601
  messages=messages,
@@ -0,0 +1,48 @@
1
+ from typing import Optional
2
+
3
+ from letta.llm_api.llm_client_base import LLMClientBase
4
+ from letta.schemas.llm_config import LLMConfig
5
+
6
+
7
+ class LLMClient:
8
+ """Factory class for creating LLM clients based on the model endpoint type."""
9
+
10
+ @staticmethod
11
+ def create(
12
+ agent_id: str,
13
+ llm_config: LLMConfig,
14
+ put_inner_thoughts_first: bool = True,
15
+ actor_id: Optional[str] = None,
16
+ ) -> Optional[LLMClientBase]:
17
+ """
18
+ Create an LLM client based on the model endpoint type.
19
+
20
+ Args:
21
+ agent_id: Unique identifier for the agent
22
+ llm_config: Configuration for the LLM model
23
+ put_inner_thoughts_first: Whether to put inner thoughts first in the response
24
+ use_structured_output: Whether to use structured output
25
+ use_tool_naming: Whether to use tool naming
26
+ actor_id: Optional actor identifier
27
+
28
+ Returns:
29
+ An instance of LLMClientBase subclass
30
+
31
+ Raises:
32
+ ValueError: If the model endpoint type is not supported
33
+ """
34
+ match llm_config.model_endpoint_type:
35
+ case "google_ai":
36
+ from letta.llm_api.google_ai_client import GoogleAIClient
37
+
38
+ return GoogleAIClient(
39
+ agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
40
+ )
41
+ case "google_vertex":
42
+ from letta.llm_api.google_vertex_client import GoogleVertexClient
43
+
44
+ return GoogleVertexClient(
45
+ agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
46
+ )
47
+ case _:
48
+ return None