letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (66) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +19 -2
  3. letta/client/client.py +2 -0
  4. letta/constants.py +2 -0
  5. letta/functions/schema_generator.py +6 -6
  6. letta/helpers/converters.py +153 -0
  7. letta/helpers/tool_rule_solver.py +11 -1
  8. letta/llm_api/anthropic.py +10 -5
  9. letta/llm_api/aws_bedrock.py +1 -1
  10. letta/llm_api/deepseek.py +303 -0
  11. letta/llm_api/helpers.py +20 -10
  12. letta/llm_api/llm_api_tools.py +85 -2
  13. letta/llm_api/openai.py +16 -1
  14. letta/local_llm/chat_completion_proxy.py +15 -2
  15. letta/local_llm/lmstudio/api.py +75 -1
  16. letta/orm/__init__.py +2 -0
  17. letta/orm/agent.py +11 -4
  18. letta/orm/custom_columns.py +31 -110
  19. letta/orm/identities_agents.py +13 -0
  20. letta/orm/identity.py +60 -0
  21. letta/orm/organization.py +2 -0
  22. letta/orm/sqlalchemy_base.py +4 -0
  23. letta/schemas/agent.py +11 -1
  24. letta/schemas/identity.py +67 -0
  25. letta/schemas/llm_config.py +2 -0
  26. letta/schemas/message.py +1 -1
  27. letta/schemas/openai/chat_completion_response.py +2 -0
  28. letta/schemas/providers.py +72 -1
  29. letta/schemas/tool_rule.py +9 -1
  30. letta/serialize_schemas/__init__.py +1 -0
  31. letta/serialize_schemas/agent.py +36 -0
  32. letta/serialize_schemas/base.py +12 -0
  33. letta/serialize_schemas/custom_fields.py +69 -0
  34. letta/serialize_schemas/message.py +15 -0
  35. letta/server/db.py +111 -0
  36. letta/server/rest_api/app.py +8 -0
  37. letta/server/rest_api/chat_completions_interface.py +45 -21
  38. letta/server/rest_api/interface.py +114 -9
  39. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
  40. letta/server/rest_api/routers/v1/__init__.py +2 -0
  41. letta/server/rest_api/routers/v1/agents.py +14 -3
  42. letta/server/rest_api/routers/v1/identities.py +121 -0
  43. letta/server/rest_api/utils.py +183 -4
  44. letta/server/server.py +23 -117
  45. letta/services/agent_manager.py +53 -6
  46. letta/services/block_manager.py +1 -1
  47. letta/services/identity_manager.py +156 -0
  48. letta/services/job_manager.py +1 -1
  49. letta/services/message_manager.py +1 -1
  50. letta/services/organization_manager.py +1 -1
  51. letta/services/passage_manager.py +1 -1
  52. letta/services/provider_manager.py +1 -1
  53. letta/services/sandbox_config_manager.py +1 -1
  54. letta/services/source_manager.py +1 -1
  55. letta/services/step_manager.py +1 -1
  56. letta/services/tool_manager.py +1 -1
  57. letta/services/user_manager.py +1 -1
  58. letta/settings.py +3 -0
  59. letta/streaming_interface.py +6 -2
  60. letta/tracing.py +205 -0
  61. letta/utils.py +4 -0
  62. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
  63. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
  64. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
  65. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
  66. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,303 @@
1
+ import json
2
+ import re
3
+ import warnings
4
+ from typing import List, Optional
5
+
6
+ from letta.schemas.llm_config import LLMConfig
7
+ from letta.schemas.message import Message as _Message
8
+ from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage
9
+ from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
10
+ from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype
11
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
12
+ from letta.schemas.openai.openai import Function, ToolCall
13
+ from letta.utils import get_tool_call_id
14
+
15
+
16
+ def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
17
+ """
18
+ Merge `ToolMessage` objects into the previous message.
19
+ """
20
+ previous_message.content += (
21
+ f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
22
+ )
23
+ return previous_message
24
+
25
+
26
+ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
27
+ """
28
+ For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
29
+ """
30
+
31
+ if "tool_calls" in assistant_message.dict().keys():
32
+ assistant_message.content = "".join(
33
+ [
34
+ # f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
35
+ f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
36
+ for tool_call in assistant_message.tool_calls
37
+ ]
38
+ )
39
+ del assistant_message.tool_calls
40
+ return assistant_message
41
+
42
+
43
+ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
44
+ """
45
+ Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
46
+ Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
47
+
48
+ This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
49
+ at the end.
50
+
51
+ """
52
+ deepseek_messages = []
53
+ for idx, message in enumerate(messages):
54
+ # First message is the system prompt, add it
55
+ if idx == 0 and message.role == "system":
56
+ deepseek_messages.append(message)
57
+ continue
58
+ if message.role == "user":
59
+ if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
60
+ # User message, add it
61
+ deepseek_messages.append(UserMessage(content=message.content))
62
+ else:
63
+ # add to the content of the previous message
64
+ deepseek_messages[-1].content += message.content
65
+ elif message.role == "assistant":
66
+ if deepseek_messages[-1].role == "user":
67
+ # Assistant message, remove tool calls and add them to the content
68
+ deepseek_messages.append(handle_assistant_message(message))
69
+ else:
70
+ # add to the content of the previous message
71
+ deepseek_messages[-1].content += message.content
72
+ elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
73
+ # Tool message, add it to the last assistant message
74
+ merged_message = merge_tool_message(deepseek_messages[-1], message)
75
+ deepseek_messages[-1] = merged_message
76
+ else:
77
+ print(f"Skipping message: {message}")
78
+
79
+ # This needs to end on a user message, add a dummy message if the last was assistant
80
+ if deepseek_messages[-1].role == "assistant":
81
+ deepseek_messages.append(UserMessage(content=""))
82
+ return deepseek_messages
83
+
84
+
85
+ def build_deepseek_chat_completions_request(
86
+ llm_config: LLMConfig,
87
+ messages: List[_Message],
88
+ user_id: Optional[str],
89
+ functions: Optional[list],
90
+ function_call: Optional[str],
91
+ use_tool_naming: bool,
92
+ max_tokens: Optional[int],
93
+ ) -> ChatCompletionRequest:
94
+ # if functions and llm_config.put_inner_thoughts_in_kwargs:
95
+ # # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
96
+ # # TODO(fix)
97
+ # inner_thoughts_desc = (
98
+ # INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
99
+ # )
100
+ # functions = add_inner_thoughts_to_functions(
101
+ # functions=functions,
102
+ # inner_thoughts_key=INNER_THOUGHTS_KWARG,
103
+ # inner_thoughts_description=inner_thoughts_desc,
104
+ # )
105
+
106
+ openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages]
107
+
108
+ if llm_config.model:
109
+ model = llm_config.model
110
+ else:
111
+ warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
112
+ model = None
113
+ if use_tool_naming:
114
+ if function_call is None:
115
+ tool_choice = None
116
+ elif function_call not in ["none", "auto", "required"]:
117
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
118
+ else:
119
+ tool_choice = function_call
120
+
121
+ def add_functions_to_system_message(system_message: ChatMessage):
122
+ system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
123
+ system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
124
+
125
+ if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
126
+ add_functions_to_system_message(
127
+ openai_message_list[0]
128
+ ) # Inject additional instructions to the system prompt with the available functions
129
+
130
+ openai_message_list = map_messages_to_deepseek_format(openai_message_list)
131
+
132
+ data = ChatCompletionRequest(
133
+ model=model,
134
+ messages=openai_message_list,
135
+ user=str(user_id),
136
+ max_completion_tokens=max_tokens,
137
+ temperature=llm_config.temperature,
138
+ )
139
+ else:
140
+ data = ChatCompletionRequest(
141
+ model=model,
142
+ messages=openai_message_list,
143
+ tools=[Tool(type="function", function=f) for f in functions] if functions else None,
144
+ tool_choice=tool_choice,
145
+ user=str(user_id),
146
+ max_completion_tokens=max_tokens,
147
+ temperature=llm_config.temperature,
148
+ )
149
+ else:
150
+ data = ChatCompletionRequest(
151
+ model=model,
152
+ messages=openai_message_list,
153
+ functions=functions,
154
+ function_call=function_call,
155
+ user=str(user_id),
156
+ max_completion_tokens=max_tokens,
157
+ temperature=llm_config.temperature,
158
+ )
159
+
160
+ return data
161
+
162
+
163
+ def convert_deepseek_response_to_chatcompletion(
164
+ response: ChatCompletionResponse,
165
+ ) -> ChatCompletionResponse:
166
+ """
167
+ Example response from DeepSeek:
168
+
169
+ ChatCompletion(
170
+ id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
171
+ choices=[
172
+ Choice(
173
+ finish_reason='stop',
174
+ index=0,
175
+ logprobs=None,
176
+ message=ChatCompletionMessage(
177
+ content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
178
+ refusal=None,
179
+ role='assistant',
180
+ audio=None,
181
+ function_call=None,
182
+ tool_calls=None,
183
+ reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
184
+ )
185
+ )
186
+ ],
187
+ created=1738266449,
188
+ model='deepseek-reasoner',
189
+ object='chat.completion',
190
+ service_tier=None,
191
+ system_fingerprint='fp_7e73fd9a08',
192
+ usage=CompletionUsage(
193
+ completion_tokens=111,
194
+ prompt_tokens=1270,
195
+ total_tokens=1381,
196
+ completion_tokens_details=CompletionTokensDetails(
197
+ accepted_prediction_tokens=None,
198
+ audio_tokens=None,
199
+ reasoning_tokens=72,
200
+ rejected_prediction_tokens=None
201
+ ),
202
+ prompt_tokens_details=PromptTokensDetails(
203
+ audio_tokens=None,
204
+ cached_tokens=1088
205
+ ),
206
+ prompt_cache_hit_tokens=1088,
207
+ prompt_cache_miss_tokens=182
208
+ )
209
+ )
210
+ """
211
+
212
+ def convert_dict_quotes(input_dict: dict):
213
+ """
214
+ Convert a dictionary with single-quoted keys to double-quoted keys,
215
+ properly handling boolean values and nested structures.
216
+
217
+ Args:
218
+ input_dict (dict): Input dictionary with single-quoted keys
219
+
220
+ Returns:
221
+ str: JSON string with double-quoted keys
222
+ """
223
+ # First convert the dictionary to a JSON string to handle booleans properly
224
+ json_str = json.dumps(input_dict)
225
+
226
+ # Function to handle complex string replacements
227
+ def replace_quotes(match):
228
+ key = match.group(1)
229
+ # Escape any existing double quotes in the key
230
+ key = key.replace('"', '\\"')
231
+ return f'"{key}":'
232
+
233
+ # Replace single-quoted keys with double-quoted keys
234
+ # This regex looks for single-quoted keys followed by a colon
235
+ def strip_json_block(text):
236
+ # Check if text starts with ```json or similar
237
+ if text.strip().startswith("```"):
238
+ # Split by \n to remove the first and last lines
239
+ lines = text.split("\n")[1:-1]
240
+ return "\n".join(lines)
241
+ return text
242
+
243
+ pattern = r"'([^']*)':"
244
+ converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
245
+
246
+ # Parse the string back to ensure valid JSON format
247
+ try:
248
+ json.loads(converted_str)
249
+ return converted_str
250
+ except json.JSONDecodeError as e:
251
+ raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
252
+
253
+ def extract_json_block(text):
254
+ # Find the first {
255
+ start = text.find("{")
256
+ if start == -1:
257
+ return text
258
+
259
+ # Track nested braces to find the matching closing brace
260
+ brace_count = 0
261
+ end = start
262
+
263
+ for i in range(start, len(text)):
264
+ if text[i] == "{":
265
+ brace_count += 1
266
+ elif text[i] == "}":
267
+ brace_count -= 1
268
+ if brace_count == 0:
269
+ end = i + 1
270
+ break
271
+
272
+ return text[start:end]
273
+
274
+ content = response.choices[0].message.content
275
+ try:
276
+ content_dict = json.loads(extract_json_block(content))
277
+
278
+ if type(content_dict["arguments"]) == str:
279
+ content_dict["arguments"] = json.loads(content_dict["arguments"])
280
+
281
+ tool_calls = [
282
+ ToolCall(
283
+ id=get_tool_call_id(),
284
+ type="function",
285
+ function=Function(
286
+ name=content_dict["name"],
287
+ arguments=convert_dict_quotes(content_dict["arguments"]),
288
+ ),
289
+ )
290
+ ]
291
+ except (json.JSONDecodeError, TypeError, KeyError) as e:
292
+ print(e)
293
+ tool_calls = response.choices[0].message.tool_calls
294
+ raise ValueError(f"Failed to create valid JSON {content}")
295
+
296
+ # Move the "reasoning_content" into the "content" field
297
+ response.choices[0].message.content = response.choices[0].message.reasoning_content
298
+ response.choices[0].message.tool_calls = tool_calls
299
+
300
+ # Remove the "reasoning_content" field
301
+ response.choices[0].message.reasoning_content = None
302
+
303
+ return response
letta/llm_api/helpers.py CHANGED
@@ -202,21 +202,29 @@ def add_inner_thoughts_to_functions(
202
202
  inner_thoughts_key: str,
203
203
  inner_thoughts_description: str,
204
204
  inner_thoughts_required: bool = True,
205
+ put_inner_thoughts_first: bool = True,
205
206
  ) -> List[dict]:
206
207
  """Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
207
208
  new_functions = []
208
209
  for function_object in functions:
209
210
  new_function_object = copy.deepcopy(function_object)
210
-
211
- # Create a new OrderedDict with inner_thoughts as the first item
212
211
  new_properties = OrderedDict()
213
- new_properties[inner_thoughts_key] = {
214
- "type": "string",
215
- "description": inner_thoughts_description,
216
- }
217
212
 
218
- # Add the rest of the properties
219
- new_properties.update(function_object["parameters"]["properties"])
213
+ # For chat completions, we want inner thoughts to come later
214
+ if put_inner_thoughts_first:
215
+ # Create with inner_thoughts as the first item
216
+ new_properties[inner_thoughts_key] = {
217
+ "type": "string",
218
+ "description": inner_thoughts_description,
219
+ }
220
+ # Add the rest of the properties
221
+ new_properties.update(function_object["parameters"]["properties"])
222
+ else:
223
+ new_properties.update(function_object["parameters"]["properties"])
224
+ new_properties[inner_thoughts_key] = {
225
+ "type": "string",
226
+ "description": inner_thoughts_description,
227
+ }
220
228
 
221
229
  # Cast OrderedDict back to a regular dict
222
230
  new_function_object["parameters"]["properties"] = dict(new_properties)
@@ -225,9 +233,11 @@ def add_inner_thoughts_to_functions(
225
233
  if inner_thoughts_required:
226
234
  required_params = new_function_object["parameters"].get("required", [])
227
235
  if inner_thoughts_key not in required_params:
228
- required_params.insert(0, inner_thoughts_key)
236
+ if put_inner_thoughts_first:
237
+ required_params.insert(0, inner_thoughts_key)
238
+ else:
239
+ required_params.append(inner_thoughts_key)
229
240
  new_function_object["parameters"]["required"] = required_params
230
-
231
241
  new_functions.append(new_function_object)
232
242
 
233
243
  return new_functions
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import random
2
3
  import time
3
4
  from typing import List, Optional, Union
@@ -13,6 +14,7 @@ from letta.llm_api.anthropic import (
13
14
  )
14
15
  from letta.llm_api.aws_bedrock import has_valid_aws_credentials
15
16
  from letta.llm_api.azure_openai import azure_openai_chat_completions_request
17
+ from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
16
18
  from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
17
19
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
18
20
  from letta.llm_api.openai import (
@@ -29,8 +31,9 @@ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
29
31
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
30
32
  from letta.settings import ModelSettings
31
33
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
34
+ from letta.tracing import log_event, trace_method
32
35
 
33
- LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
36
+ LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
34
37
 
35
38
 
36
39
  def retry_with_exponential_backoff(
@@ -68,9 +71,28 @@ def retry_with_exponential_backoff(
68
71
  if http_err.response.status_code in error_codes:
69
72
  # Increment retries
70
73
  num_retries += 1
74
+ log_event(
75
+ "llm_retry_attempt",
76
+ {
77
+ "attempt": num_retries,
78
+ "delay": delay,
79
+ "status_code": http_err.response.status_code,
80
+ "error_type": type(http_err).__name__,
81
+ "error": str(http_err),
82
+ },
83
+ )
71
84
 
72
85
  # Check if max retries has been reached
73
86
  if num_retries > max_retries:
87
+ log_event(
88
+ "llm_max_retries_exceeded",
89
+ {
90
+ "max_retries": max_retries,
91
+ "status_code": http_err.response.status_code,
92
+ "error_type": type(http_err).__name__,
93
+ "error": str(http_err),
94
+ },
95
+ )
74
96
  raise RateLimitExceededError("Maximum number of retries exceeded", max_retries=max_retries)
75
97
 
76
98
  # Increment the delay
@@ -84,15 +106,21 @@ def retry_with_exponential_backoff(
84
106
  time.sleep(delay)
85
107
  else:
86
108
  # For other HTTP errors, re-raise the exception
109
+ log_event(
110
+ "llm_non_retryable_error",
111
+ {"status_code": http_err.response.status_code, "error_type": type(http_err).__name__, "error": str(http_err)},
112
+ )
87
113
  raise
88
114
 
89
115
  # Raise exceptions for any errors not specified
90
116
  except Exception as e:
117
+ log_event("llm_unexpected_error", {"error_type": type(e).__name__, "error": str(e)})
91
118
  raise e
92
119
 
93
120
  return wrapper
94
121
 
95
122
 
123
+ @trace_method("LLM Request")
96
124
  @retry_with_exponential_backoff
97
125
  def create(
98
126
  # agent_state: AgentState,
@@ -112,6 +140,7 @@ def create(
112
140
  stream: bool = False,
113
141
  stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
114
142
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
143
+ put_inner_thoughts_first: bool = True,
115
144
  ) -> ChatCompletionResponse:
116
145
  """Return response to chat completion with backoff"""
117
146
  from letta.utils import printd
@@ -157,7 +186,9 @@ def create(
157
186
  else:
158
187
  function_call = "required"
159
188
 
160
- data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming)
189
+ data = build_openai_chat_completions_request(
190
+ llm_config, messages, user_id, functions, function_call, use_tool_naming, put_inner_thoughts_first=put_inner_thoughts_first
191
+ )
161
192
  if stream: # Client requested token streaming
162
193
  data.stream = True
163
194
  assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -453,10 +484,62 @@ def create(
453
484
  ),
454
485
  )
455
486
 
487
+ elif llm_config.model_endpoint_type == "deepseek":
488
+ if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
489
+ # only is a problem if we are *not* using an openai proxy
490
+ raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
491
+
492
+ data = build_deepseek_chat_completions_request(
493
+ llm_config,
494
+ messages,
495
+ user_id,
496
+ functions,
497
+ function_call,
498
+ use_tool_naming,
499
+ llm_config.max_tokens,
500
+ )
501
+ if stream: # Client requested token streaming
502
+ data.stream = True
503
+ assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
504
+ stream_interface, AgentRefreshStreamingInterface
505
+ ), type(stream_interface)
506
+ response = openai_chat_completions_process_stream(
507
+ url=llm_config.model_endpoint,
508
+ api_key=model_settings.deepseek_api_key,
509
+ chat_completion_request=data,
510
+ stream_interface=stream_interface,
511
+ )
512
+ else: # Client did not request token streaming (expect a blocking backend response)
513
+ data.stream = False
514
+ if isinstance(stream_interface, AgentChunkStreamingInterface):
515
+ stream_interface.stream_start()
516
+ try:
517
+ response = openai_chat_completions_request(
518
+ url=llm_config.model_endpoint,
519
+ api_key=model_settings.deepseek_api_key,
520
+ chat_completion_request=data,
521
+ )
522
+ finally:
523
+ if isinstance(stream_interface, AgentChunkStreamingInterface):
524
+ stream_interface.stream_end()
525
+ """
526
+ if llm_config.put_inner_thoughts_in_kwargs:
527
+ response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
528
+ """
529
+ response = convert_deepseek_response_to_chatcompletion(response)
530
+ return response
531
+
456
532
  # local model
457
533
  else:
458
534
  if stream:
459
535
  raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
536
+
537
+ if "DeepSeek-R1".lower() in llm_config.model.lower(): # TODO: move this to the llm_config.
538
+ messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
539
+ messages[0].content[
540
+ 0
541
+ ].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
542
+
460
543
  return get_chat_completion(
461
544
  model=llm_config.model,
462
545
  messages=messages,
letta/llm_api/openai.py CHANGED
@@ -94,6 +94,7 @@ def build_openai_chat_completions_request(
94
94
  functions: Optional[list],
95
95
  function_call: Optional[str],
96
96
  use_tool_naming: bool,
97
+ put_inner_thoughts_first: bool = True,
97
98
  ) -> ChatCompletionRequest:
98
99
  if functions and llm_config.put_inner_thoughts_in_kwargs:
99
100
  # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -105,6 +106,7 @@ def build_openai_chat_completions_request(
105
106
  functions=functions,
106
107
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
107
108
  inner_thoughts_description=inner_thoughts_desc,
109
+ put_inner_thoughts_first=put_inner_thoughts_first,
108
110
  )
109
111
 
110
112
  openai_message_list = [
@@ -166,6 +168,11 @@ def openai_chat_completions_process_stream(
166
168
  create_message_id: bool = True,
167
169
  create_message_datetime: bool = True,
168
170
  override_tool_call_id: bool = True,
171
+ # if we expect reasoning content in the response,
172
+ # then we should emit reasoning_content as "inner_thoughts"
173
+ # however, we don't necessarily want to put these
174
+ # expect_reasoning_content: bool = False,
175
+ expect_reasoning_content: bool = True,
169
176
  ) -> ChatCompletionResponse:
170
177
  """Process a streaming completion response, and return a ChatCompletionRequest at the end.
171
178
 
@@ -250,6 +257,7 @@ def openai_chat_completions_process_stream(
250
257
  chat_completion_chunk,
251
258
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
252
259
  message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
260
+ expect_reasoning_content=expect_reasoning_content,
253
261
  )
254
262
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
255
263
  stream_interface.process_refresh(chat_completion_response)
@@ -290,6 +298,13 @@ def openai_chat_completions_process_stream(
290
298
  else:
291
299
  accum_message.content += content_delta
292
300
 
301
+ if expect_reasoning_content and message_delta.reasoning_content is not None:
302
+ reasoning_content_delta = message_delta.reasoning_content
303
+ if accum_message.reasoning_content is None:
304
+ accum_message.reasoning_content = reasoning_content_delta
305
+ else:
306
+ accum_message.reasoning_content += reasoning_content_delta
307
+
293
308
  # TODO(charles) make sure this works for parallel tool calling?
294
309
  if message_delta.tool_calls is not None:
295
310
  tool_calls_delta = message_delta.tool_calls
@@ -377,7 +392,7 @@ def openai_chat_completions_process_stream(
377
392
  chat_completion_response.usage.completion_tokens = n_chunks
378
393
  chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
379
394
 
380
- assert len(chat_completion_response.choices) > 0, chat_completion_response
395
+ assert len(chat_completion_response.choices) > 0, f"No response from provider {chat_completion_response}"
381
396
 
382
397
  # printd(chat_completion_response)
383
398
  return chat_completion_response
@@ -14,7 +14,7 @@ from letta.local_llm.grammars.gbnf_grammar_generator import create_dynamic_model
14
14
  from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
15
15
  from letta.local_llm.llamacpp.api import get_llamacpp_completion
16
16
  from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
17
- from letta.local_llm.lmstudio.api import get_lmstudio_completion
17
+ from letta.local_llm.lmstudio.api import get_lmstudio_completion, get_lmstudio_completion_chatcompletions
18
18
  from letta.local_llm.ollama.api import get_ollama_completion
19
19
  from letta.local_llm.utils import count_tokens, get_available_wrappers
20
20
  from letta.local_llm.vllm.api import get_vllm_completion
@@ -141,11 +141,24 @@ def get_chat_completion(
141
141
  f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
142
142
  )
143
143
 
144
+ # get the schema for the model
145
+
146
+ """
147
+ if functions_python is not None:
148
+ model_schema = generate_schema(functions)
149
+ else:
150
+ model_schema = None
151
+ """
152
+
153
+ # Run the LLM
144
154
  try:
155
+ result_reasoning = None
145
156
  if endpoint_type == "webui":
146
157
  result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
147
158
  elif endpoint_type == "webui-legacy":
148
159
  result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
160
+ elif endpoint_type == "lmstudio-chatcompletions":
161
+ result, usage, result_reasoning = get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages)
149
162
  elif endpoint_type == "lmstudio":
150
163
  result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
151
164
  elif endpoint_type == "lmstudio-legacy":
@@ -214,7 +227,7 @@ def get_chat_completion(
214
227
  index=0,
215
228
  message=Message(
216
229
  role=chat_completion_result["role"],
217
- content=chat_completion_result["content"],
230
+ content=result_reasoning if result_reasoning is not None else chat_completion_result["content"],
218
231
  tool_calls=(
219
232
  [ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
220
233
  if "function_call" in chat_completion_result