letta-nightly 0.6.44.dev20250326104203__py3-none-any.whl → 0.6.45.dev20250327104152__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

@@ -1,346 +0,0 @@
1
- import uuid
2
- from typing import List, Optional
3
-
4
- from letta.constants import NON_USER_MSG_PREFIX
5
- from letta.helpers.datetime_helpers import get_utc_time
6
- from letta.helpers.json_helpers import json_dumps
7
- from letta.local_llm.json_parser import clean_json_string_extra_backslash
8
- from letta.local_llm.utils import count_tokens
9
- from letta.schemas.openai.chat_completion_request import Tool
10
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
11
- from letta.tracing import log_event
12
- from letta.utils import get_tool_call_id
13
-
14
-
15
- def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
16
- """Google AI API requires all function call returns are immediately followed by a 'model' role message.
17
-
18
- In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
19
- so there is no natural follow-up 'model' role message.
20
-
21
- To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
22
- with role == 'model' that is placed in-betweeen and function output
23
- (role == 'tool') and user message (role == 'user').
24
- """
25
- dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]}
26
- messages_with_padding = []
27
- for i, message in enumerate(messages):
28
- messages_with_padding.append(message)
29
- # Check if the current message role is 'tool' and the next message role is 'user'
30
- if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
31
- messages_with_padding.append(dummy_yield_message)
32
-
33
- return messages_with_padding
34
-
35
-
36
- # TODO use pydantic model as input
37
- def to_google_ai(openai_message_dict: dict) -> dict:
38
-
39
- # TODO supports "parts" as part of multimodal support
40
- assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported"
41
- if openai_message_dict["role"] == "user":
42
- google_ai_message_dict = {
43
- "role": "user",
44
- "parts": [{"text": openai_message_dict["content"]}],
45
- }
46
- elif openai_message_dict["role"] == "assistant":
47
- google_ai_message_dict = {
48
- "role": "model", # NOTE: diff
49
- "parts": [{"text": openai_message_dict["content"]}],
50
- }
51
- elif openai_message_dict["role"] == "tool":
52
- google_ai_message_dict = {
53
- "role": "function", # NOTE: diff
54
- "parts": [{"text": openai_message_dict["content"]}],
55
- }
56
- else:
57
- raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}")
58
-
59
-
60
- # TODO convert return type to pydantic
61
- def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
62
- """
63
- OpenAI style:
64
- "tools": [{
65
- "type": "function",
66
- "function": {
67
- "name": "find_movies",
68
- "description": "find ....",
69
- "parameters": {
70
- "type": "object",
71
- "properties": {
72
- PARAM: {
73
- "type": PARAM_TYPE, # eg "string"
74
- "description": PARAM_DESCRIPTION,
75
- },
76
- ...
77
- },
78
- "required": List[str],
79
- }
80
- }
81
- }
82
- ]
83
-
84
- Google AI style:
85
- "tools": [{
86
- "functionDeclarations": [{
87
- "name": "find_movies",
88
- "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
89
- "parameters": {
90
- "type": "OBJECT",
91
- "properties": {
92
- "location": {
93
- "type": "STRING",
94
- "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
95
- },
96
- "description": {
97
- "type": "STRING",
98
- "description": "Any kind of description including category or genre, title words, attributes, etc."
99
- }
100
- },
101
- "required": ["description"]
102
- }
103
- }, {
104
- "name": "find_theaters",
105
- ...
106
- """
107
- function_list = [
108
- dict(
109
- name=t.function.name,
110
- description=t.function.description,
111
- parameters=t.function.parameters, # TODO need to unpack
112
- )
113
- for t in tools
114
- ]
115
-
116
- # Correct casing + add inner thoughts if needed
117
- for func in function_list:
118
- func["parameters"]["type"] = "OBJECT"
119
- for param_name, param_fields in func["parameters"]["properties"].items():
120
- param_fields["type"] = param_fields["type"].upper()
121
- # Add inner thoughts
122
- if inner_thoughts_in_kwargs:
123
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
124
-
125
- func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
126
- "type": "STRING",
127
- "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
128
- }
129
- func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
130
-
131
- return [{"functionDeclarations": function_list}]
132
-
133
-
134
- def convert_google_ai_response_to_chatcompletion(
135
- response,
136
- model: str, # Required since not returned
137
- input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata
138
- pull_inner_thoughts_from_args: Optional[bool] = True,
139
- ) -> ChatCompletionResponse:
140
- """Google AI API response format is not the same as ChatCompletion, requires unpacking
141
-
142
- Example:
143
- {
144
- "candidates": [
145
- {
146
- "content": {
147
- "parts": [
148
- {
149
- "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
150
- }
151
- ]
152
- }
153
- }
154
- ],
155
- "usageMetadata": {
156
- "promptTokenCount": 9,
157
- "candidatesTokenCount": 27,
158
- "totalTokenCount": 36
159
- }
160
- }
161
- """
162
- try:
163
- choices = []
164
- index = 0
165
- for candidate in response.candidates:
166
- content = candidate.content
167
-
168
- role = content.role
169
- assert role == "model", f"Unknown role in response: {role}"
170
-
171
- parts = content.parts
172
- # TODO support parts / multimodal
173
- # TODO support parallel tool calling natively
174
- # TODO Alternative here is to throw away everything else except for the first part
175
- for response_message in parts:
176
- # Convert the actual message style to OpenAI style
177
- if response_message.function_call:
178
- function_call = response_message.function_call
179
- function_name = function_call.name
180
- function_args = function_call.args
181
- assert isinstance(function_args, dict), function_args
182
-
183
- # NOTE: this also involves stripping the inner monologue out of the function
184
- if pull_inner_thoughts_from_args:
185
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG
186
-
187
- assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
188
- inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
189
- assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
190
- else:
191
- inner_thoughts = None
192
-
193
- # Google AI API doesn't generate tool call IDs
194
- openai_response_message = Message(
195
- role="assistant", # NOTE: "model" -> "assistant"
196
- content=inner_thoughts,
197
- tool_calls=[
198
- ToolCall(
199
- id=get_tool_call_id(),
200
- type="function",
201
- function=FunctionCall(
202
- name=function_name,
203
- arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
204
- ),
205
- )
206
- ],
207
- )
208
-
209
- else:
210
-
211
- # Inner thoughts are the content by default
212
- inner_thoughts = response_message.text
213
-
214
- # Google AI API doesn't generate tool call IDs
215
- openai_response_message = Message(
216
- role="assistant", # NOTE: "model" -> "assistant"
217
- content=inner_thoughts,
218
- )
219
-
220
- # Google AI API uses different finish reason strings than OpenAI
221
- # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
222
- # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
223
- # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
224
- # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
225
- finish_reason = candidate.finish_reason.value
226
- if finish_reason == "STOP":
227
- openai_finish_reason = (
228
- "function_call"
229
- if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
230
- else "stop"
231
- )
232
- elif finish_reason == "MAX_TOKENS":
233
- openai_finish_reason = "length"
234
- elif finish_reason == "SAFETY":
235
- openai_finish_reason = "content_filter"
236
- elif finish_reason == "RECITATION":
237
- openai_finish_reason = "content_filter"
238
- else:
239
- raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
240
-
241
- choices.append(
242
- Choice(
243
- finish_reason=openai_finish_reason,
244
- index=index,
245
- message=openai_response_message,
246
- )
247
- )
248
- index += 1
249
-
250
- # if len(choices) > 1:
251
- # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
252
-
253
- # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
254
- # "usageMetadata": {
255
- # "promptTokenCount": 9,
256
- # "candidatesTokenCount": 27,
257
- # "totalTokenCount": 36
258
- # }
259
- if response.usage_metadata:
260
- usage = UsageStatistics(
261
- prompt_tokens=response.usage_metadata.prompt_token_count,
262
- completion_tokens=response.usage_metadata.candidates_token_count,
263
- total_tokens=response.usage_metadata.total_token_count,
264
- )
265
- else:
266
- # Count it ourselves
267
- assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
268
- prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
269
- completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
270
- total_tokens = prompt_tokens + completion_tokens
271
- usage = UsageStatistics(
272
- prompt_tokens=prompt_tokens,
273
- completion_tokens=completion_tokens,
274
- total_tokens=total_tokens,
275
- )
276
-
277
- response_id = str(uuid.uuid4())
278
- return ChatCompletionResponse(
279
- id=response_id,
280
- choices=choices,
281
- model=model, # NOTE: Google API doesn't pass back model in the response
282
- created=get_utc_time(),
283
- usage=usage,
284
- )
285
- except KeyError as e:
286
- raise e
287
-
288
-
289
- # TODO convert 'data' type to pydantic
290
- def google_vertex_chat_completions_request(
291
- model: str,
292
- project_id: str,
293
- region: str,
294
- contents: List[dict],
295
- config: dict,
296
- add_postfunc_model_messages: bool = True,
297
- # NOTE: Google AI API doesn't support mixing parts 'text' and 'function',
298
- # so there's no clean way to put inner thoughts in the same message as a function call
299
- inner_thoughts_in_kwargs: bool = True,
300
- ) -> ChatCompletionResponse:
301
- """https://ai.google.dev/docs/function_calling
302
-
303
- From https://ai.google.dev/api/rest#service-endpoint:
304
- "A service endpoint is a base URL that specifies the network address of an API service.
305
- One service might have multiple service endpoints.
306
- This service has the following service endpoint and all URIs below are relative to this service endpoint:
307
- https://xxx.googleapis.com
308
- """
309
-
310
- from google import genai
311
- from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
312
-
313
- client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
314
- # add dummy model messages to the end of the input
315
- if add_postfunc_model_messages:
316
- contents = add_dummy_model_messages(contents)
317
-
318
- tool_config = ToolConfig(
319
- function_calling_config=FunctionCallingConfig(
320
- # ANY mode forces the model to predict only function calls
321
- mode=FunctionCallingConfigMode.ANY,
322
- )
323
- )
324
- config["tool_config"] = tool_config.model_dump()
325
-
326
- # make request to client
327
- attributes = config if isinstance(config, dict) else {"config": config}
328
- attributes.update({"contents": contents})
329
- log_event(name="llm_request_sent", attributes={"contents": contents, "config": config})
330
- response = client.models.generate_content(
331
- model=model,
332
- contents=contents,
333
- config=config,
334
- )
335
-
336
- # convert back response
337
- try:
338
- return convert_google_ai_response_to_chatcompletion(
339
- response=response,
340
- model=model,
341
- input_messages=contents,
342
- pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
343
- )
344
- except Exception as conversion_error:
345
- print(f"Error during response conversion: {conversion_error}")
346
- raise conversion_error