letta-nightly 0.8.0.dev20250606104326__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +1 -1
  3. letta/agents/letta_agent.py +49 -29
  4. letta/agents/letta_agent_batch.py +1 -2
  5. letta/agents/voice_agent.py +19 -13
  6. letta/agents/voice_sleeptime_agent.py +11 -3
  7. letta/constants.py +18 -0
  8. letta/data_sources/__init__.py +0 -0
  9. letta/data_sources/redis_client.py +282 -0
  10. letta/errors.py +0 -4
  11. letta/functions/function_sets/files.py +58 -0
  12. letta/functions/schema_generator.py +18 -1
  13. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  14. letta/helpers/datetime_helpers.py +47 -3
  15. letta/helpers/decorators.py +69 -0
  16. letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
  17. letta/interfaces/anthropic_streaming_interface.py +43 -24
  18. letta/interfaces/openai_streaming_interface.py +21 -19
  19. letta/llm_api/anthropic.py +1 -1
  20. letta/llm_api/anthropic_client.py +22 -14
  21. letta/llm_api/google_vertex_client.py +1 -1
  22. letta/llm_api/helpers.py +36 -30
  23. letta/llm_api/llm_api_tools.py +1 -1
  24. letta/llm_api/llm_client_base.py +29 -1
  25. letta/llm_api/openai.py +1 -1
  26. letta/llm_api/openai_client.py +6 -8
  27. letta/local_llm/chat_completion_proxy.py +1 -1
  28. letta/memory.py +1 -1
  29. letta/orm/enums.py +1 -0
  30. letta/orm/file.py +80 -3
  31. letta/orm/files_agents.py +13 -0
  32. letta/orm/sqlalchemy_base.py +34 -11
  33. letta/otel/__init__.py +0 -0
  34. letta/otel/context.py +25 -0
  35. letta/otel/events.py +0 -0
  36. letta/otel/metric_registry.py +122 -0
  37. letta/otel/metrics.py +66 -0
  38. letta/otel/resource.py +26 -0
  39. letta/{tracing.py → otel/tracing.py} +55 -78
  40. letta/plugins/README.md +22 -0
  41. letta/plugins/__init__.py +0 -0
  42. letta/plugins/defaults.py +11 -0
  43. letta/plugins/plugins.py +72 -0
  44. letta/schemas/enums.py +8 -0
  45. letta/schemas/file.py +12 -0
  46. letta/schemas/tool.py +4 -0
  47. letta/server/db.py +7 -7
  48. letta/server/rest_api/app.py +8 -6
  49. letta/server/rest_api/routers/v1/agents.py +37 -36
  50. letta/server/rest_api/routers/v1/groups.py +3 -3
  51. letta/server/rest_api/routers/v1/sources.py +26 -3
  52. letta/server/rest_api/utils.py +9 -6
  53. letta/server/server.py +18 -12
  54. letta/services/agent_manager.py +185 -193
  55. letta/services/block_manager.py +1 -1
  56. letta/services/context_window_calculator/token_counter.py +3 -2
  57. letta/services/file_processor/chunker/line_chunker.py +34 -0
  58. letta/services/file_processor/file_processor.py +40 -11
  59. letta/services/file_processor/parser/mistral_parser.py +11 -1
  60. letta/services/files_agents_manager.py +96 -7
  61. letta/services/group_manager.py +6 -6
  62. letta/services/helpers/agent_manager_helper.py +373 -3
  63. letta/services/identity_manager.py +1 -1
  64. letta/services/job_manager.py +1 -1
  65. letta/services/llm_batch_manager.py +1 -1
  66. letta/services/message_manager.py +1 -1
  67. letta/services/organization_manager.py +1 -1
  68. letta/services/passage_manager.py +1 -1
  69. letta/services/per_agent_lock_manager.py +1 -1
  70. letta/services/provider_manager.py +1 -1
  71. letta/services/sandbox_config_manager.py +1 -1
  72. letta/services/source_manager.py +178 -19
  73. letta/services/step_manager.py +2 -2
  74. letta/services/summarizer/summarizer.py +1 -1
  75. letta/services/telemetry_manager.py +1 -1
  76. letta/services/tool_executor/builtin_tool_executor.py +117 -0
  77. letta/services/tool_executor/composio_tool_executor.py +53 -0
  78. letta/services/tool_executor/core_tool_executor.py +474 -0
  79. letta/services/tool_executor/files_tool_executor.py +131 -0
  80. letta/services/tool_executor/mcp_tool_executor.py +45 -0
  81. letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
  82. letta/services/tool_executor/tool_execution_manager.py +34 -14
  83. letta/services/tool_executor/tool_execution_sandbox.py +1 -1
  84. letta/services/tool_executor/tool_executor.py +3 -802
  85. letta/services/tool_executor/tool_executor_base.py +43 -0
  86. letta/services/tool_manager.py +55 -59
  87. letta/services/tool_sandbox/e2b_sandbox.py +1 -1
  88. letta/services/tool_sandbox/local_sandbox.py +6 -3
  89. letta/services/user_manager.py +6 -3
  90. letta/settings.py +21 -1
  91. letta/utils.py +7 -2
  92. {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/METADATA +4 -2
  93. {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/RECORD +96 -74
  94. {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/entry_points.txt +0 -0
@@ -23,7 +23,7 @@ from anthropic.types.beta import (
23
23
  )
24
24
 
25
25
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
26
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
26
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
27
27
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
28
28
  from letta.log import get_logger
29
29
  from letta.schemas.letta_message import (
@@ -62,8 +62,7 @@ class AnthropicStreamingInterface:
62
62
  self.use_assistant_message = use_assistant_message
63
63
 
64
64
  # Premake IDs for database writes
65
- self.letta_assistant_message_id = Message.generate_id()
66
- self.letta_tool_message_id = Message.generate_id()
65
+ self.letta_message_id = Message.generate_id()
67
66
 
68
67
  self.anthropic_mode = None
69
68
  self.message_id = None
@@ -132,7 +131,7 @@ class AnthropicStreamingInterface:
132
131
  now = get_utc_timestamp_ns()
133
132
  ttft_ns = now - provider_request_start_timestamp_ns
134
133
  ttft_span.add_event(
135
- name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ttft_ns // 1_000_000}
134
+ name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
136
135
  )
137
136
  first_chunk = False
138
137
 
@@ -152,7 +151,7 @@ class AnthropicStreamingInterface:
152
151
  if not self.use_assistant_message:
153
152
  # Buffer the initial tool call message instead of yielding immediately
154
153
  tool_call_msg = ToolCallMessage(
155
- id=self.letta_tool_message_id,
154
+ id=self.letta_message_id,
156
155
  tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
157
156
  date=datetime.now(timezone.utc).isoformat(),
158
157
  )
@@ -165,11 +164,11 @@ class AnthropicStreamingInterface:
165
164
  if prev_message_type and prev_message_type != "hidden_reasoning_message":
166
165
  message_index += 1
167
166
  hidden_reasoning_message = HiddenReasoningMessage(
168
- id=self.letta_assistant_message_id,
167
+ id=self.letta_message_id,
169
168
  state="redacted",
170
169
  hidden_reasoning=content.data,
171
170
  date=datetime.now(timezone.utc).isoformat(),
172
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
171
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
173
172
  )
174
173
  self.reasoning_messages.append(hidden_reasoning_message)
175
174
  prev_message_type = hidden_reasoning_message.message_type
@@ -206,10 +205,10 @@ class AnthropicStreamingInterface:
206
205
  if prev_message_type and prev_message_type != "reasoning_message":
207
206
  message_index += 1
208
207
  reasoning_message = ReasoningMessage(
209
- id=self.letta_assistant_message_id,
208
+ id=self.letta_message_id,
210
209
  reasoning=self.accumulated_inner_thoughts[-1],
211
210
  date=datetime.now(timezone.utc).isoformat(),
212
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
211
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
213
212
  )
214
213
  self.reasoning_messages.append(reasoning_message)
215
214
  prev_message_type = reasoning_message.message_type
@@ -233,10 +232,10 @@ class AnthropicStreamingInterface:
233
232
  if prev_message_type and prev_message_type != "reasoning_message":
234
233
  message_index += 1
235
234
  reasoning_message = ReasoningMessage(
236
- id=self.letta_assistant_message_id,
235
+ id=self.letta_message_id,
237
236
  reasoning=inner_thoughts_diff,
238
237
  date=datetime.now(timezone.utc).isoformat(),
239
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
238
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
240
239
  )
241
240
  self.reasoning_messages.append(reasoning_message)
242
241
  prev_message_type = reasoning_message.message_type
@@ -249,10 +248,28 @@ class AnthropicStreamingInterface:
249
248
  if len(self.tool_call_buffer) > 0:
250
249
  if prev_message_type and prev_message_type != "tool_call_message":
251
250
  message_index += 1
251
+
252
+ # Strip out the inner thoughts from the buffered tool call arguments before streaming
253
+ tool_call_args = ""
252
254
  for buffered_msg in self.tool_call_buffer:
253
- buffered_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
254
- prev_message_type = buffered_msg.message_type
255
- yield buffered_msg
255
+ tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
256
+ tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
257
+
258
+ tool_call_msg = ToolCallMessage(
259
+ id=self.tool_call_buffer[0].id,
260
+ otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
261
+ date=self.tool_call_buffer[0].date,
262
+ name=self.tool_call_buffer[0].name,
263
+ sender_id=self.tool_call_buffer[0].sender_id,
264
+ step_id=self.tool_call_buffer[0].step_id,
265
+ tool_call=ToolCallDelta(
266
+ name=self.tool_call_name,
267
+ tool_call_id=self.tool_call_id,
268
+ arguments=tool_call_args,
269
+ ),
270
+ )
271
+ prev_message_type = tool_call_msg.message_type
272
+ yield tool_call_msg
256
273
  self.tool_call_buffer = []
257
274
 
258
275
  # Start detecting special case of "send_message"
@@ -266,24 +283,26 @@ class AnthropicStreamingInterface:
266
283
  if prev_message_type and prev_message_type != "assistant_message":
267
284
  message_index += 1
268
285
  assistant_msg = AssistantMessage(
269
- id=self.letta_assistant_message_id,
286
+ id=self.letta_message_id,
270
287
  content=[TextContent(text=send_message_diff)],
271
288
  date=datetime.now(timezone.utc).isoformat(),
272
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
289
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
273
290
  )
274
291
  prev_message_type = assistant_msg.message_type
275
292
  yield assistant_msg
276
293
  else:
277
294
  # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
278
295
  tool_call_msg = ToolCallMessage(
279
- id=self.letta_tool_message_id,
280
- tool_call=ToolCallDelta(arguments=delta.partial_json),
296
+ id=self.letta_message_id,
297
+ tool_call=ToolCallDelta(
298
+ name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
299
+ ),
281
300
  date=datetime.now(timezone.utc).isoformat(),
282
301
  )
283
302
  if self.inner_thoughts_complete:
284
303
  if prev_message_type and prev_message_type != "tool_call_message":
285
304
  message_index += 1
286
- tool_call_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
305
+ tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
287
306
  prev_message_type = tool_call_msg.message_type
288
307
  yield tool_call_msg
289
308
  else:
@@ -301,11 +320,11 @@ class AnthropicStreamingInterface:
301
320
  if prev_message_type and prev_message_type != "reasoning_message":
302
321
  message_index += 1
303
322
  reasoning_message = ReasoningMessage(
304
- id=self.letta_assistant_message_id,
323
+ id=self.letta_message_id,
305
324
  source="reasoner_model",
306
325
  reasoning=delta.thinking,
307
326
  date=datetime.now(timezone.utc).isoformat(),
308
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
327
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
309
328
  )
310
329
  self.reasoning_messages.append(reasoning_message)
311
330
  prev_message_type = reasoning_message.message_type
@@ -320,12 +339,12 @@ class AnthropicStreamingInterface:
320
339
  if prev_message_type and prev_message_type != "reasoning_message":
321
340
  message_index += 1
322
341
  reasoning_message = ReasoningMessage(
323
- id=self.letta_assistant_message_id,
342
+ id=self.letta_message_id,
324
343
  source="reasoner_model",
325
344
  reasoning="",
326
345
  date=datetime.now(timezone.utc).isoformat(),
327
346
  signature=delta.signature,
328
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
347
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
329
348
  )
330
349
  self.reasoning_messages.append(reasoning_message)
331
350
  prev_message_type = reasoning_message.message_type
@@ -360,7 +379,7 @@ class AnthropicStreamingInterface:
360
379
  group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
361
380
  ) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
362
381
  if group_type == "reasoning":
363
- reasoning_text = "".join(chunk.reasoning for chunk in group)
382
+ reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
364
383
  is_native = any(chunk.source == "reasoner_model" for chunk in group)
365
384
  signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
366
385
  if is_native:
@@ -5,7 +5,7 @@ from openai import AsyncStream
5
5
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
6
 
7
7
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
8
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
8
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
9
9
  from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
10
10
  from letta.schemas.letta_message_content import TextContent
11
11
  from letta.schemas.message import Message
@@ -32,14 +32,14 @@ class OpenAIStreamingInterface:
32
32
  self.function_args_buffer = None
33
33
  self.function_id_buffer = None
34
34
  self.last_flushed_function_name = None
35
+ self.last_flushed_function_id = None
35
36
 
36
37
  # Buffer to hold function arguments until inner thoughts are complete
37
38
  self.current_function_arguments = ""
38
39
  self.current_json_parse_result = {}
39
40
 
40
41
  # Premake IDs for database writes
41
- self.letta_assistant_message_id = Message.generate_id()
42
- self.letta_tool_message_id = Message.generate_id()
42
+ self.letta_message_id = Message.generate_id()
43
43
 
44
44
  self.message_id = None
45
45
  self.model = None
@@ -54,14 +54,14 @@ class OpenAIStreamingInterface:
54
54
  self.reasoning_messages = []
55
55
 
56
56
  def get_reasoning_content(self) -> List[TextContent]:
57
- content = "".join(self.reasoning_messages)
57
+ content = "".join(self.reasoning_messages).strip()
58
58
  return [TextContent(text=content)]
59
59
 
60
60
  def get_tool_call_object(self) -> ToolCall:
61
61
  """Useful for agent loop"""
62
62
  function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
63
63
  return ToolCall(
64
- id=self.letta_tool_message_id,
64
+ id=self.last_flushed_function_id,
65
65
  function=FunctionCall(arguments=self.current_function_arguments, name=function_name),
66
66
  )
67
67
 
@@ -85,7 +85,7 @@ class OpenAIStreamingInterface:
85
85
  now = get_utc_timestamp_ns()
86
86
  ttft_ns = now - provider_request_start_timestamp_ns
87
87
  ttft_span.add_event(
88
- name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ttft_ns // 1_000_000}
88
+ name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
89
89
  )
90
90
  first_chunk = False
91
91
 
@@ -133,11 +133,11 @@ class OpenAIStreamingInterface:
133
133
  message_index += 1
134
134
  self.reasoning_messages.append(updates_inner_thoughts)
135
135
  reasoning_message = ReasoningMessage(
136
- id=self.letta_tool_message_id,
136
+ id=self.letta_message_id,
137
137
  date=datetime.now(timezone.utc),
138
138
  reasoning=updates_inner_thoughts,
139
139
  # name=name,
140
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
140
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
141
141
  )
142
142
  prev_message_type = reasoning_message.message_type
143
143
  yield reasoning_message
@@ -171,20 +171,22 @@ class OpenAIStreamingInterface:
171
171
  message_index += 1
172
172
  self.tool_call_name = str(self.function_name_buffer)
173
173
  tool_call_msg = ToolCallMessage(
174
- id=self.letta_tool_message_id,
174
+ id=self.letta_message_id,
175
175
  date=datetime.now(timezone.utc),
176
176
  tool_call=ToolCallDelta(
177
177
  name=self.function_name_buffer,
178
178
  arguments=None,
179
179
  tool_call_id=self.function_id_buffer,
180
180
  ),
181
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
181
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
182
182
  )
183
183
  prev_message_type = tool_call_msg.message_type
184
184
  yield tool_call_msg
185
185
 
186
186
  # Record what the last function name we flushed was
187
187
  self.last_flushed_function_name = self.function_name_buffer
188
+ if self.last_flushed_function_id is None:
189
+ self.last_flushed_function_id = self.function_id_buffer
188
190
  # Clear the buffer
189
191
  self.function_name_buffer = None
190
192
  self.function_id_buffer = None
@@ -236,10 +238,10 @@ class OpenAIStreamingInterface:
236
238
  if prev_message_type and prev_message_type != "assistant_message":
237
239
  message_index += 1
238
240
  assistant_message = AssistantMessage(
239
- id=self.letta_assistant_message_id,
241
+ id=self.letta_message_id,
240
242
  date=datetime.now(timezone.utc),
241
243
  content=combined_chunk,
242
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
244
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
243
245
  )
244
246
  prev_message_type = assistant_message.message_type
245
247
  yield assistant_message
@@ -268,11 +270,11 @@ class OpenAIStreamingInterface:
268
270
  if prev_message_type and prev_message_type != "assistant_message":
269
271
  message_index += 1
270
272
  assistant_message = AssistantMessage(
271
- id=self.letta_assistant_message_id,
273
+ id=self.letta_message_id,
272
274
  date=datetime.now(timezone.utc),
273
275
  content=diff,
274
276
  # name=name,
275
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
277
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
276
278
  )
277
279
  prev_message_type = assistant_message.message_type
278
280
  yield assistant_message
@@ -292,15 +294,15 @@ class OpenAIStreamingInterface:
292
294
  if prev_message_type and prev_message_type != "tool_call_message":
293
295
  message_index += 1
294
296
  tool_call_msg = ToolCallMessage(
295
- id=self.letta_tool_message_id,
297
+ id=self.letta_message_id,
296
298
  date=datetime.now(timezone.utc),
297
299
  tool_call=ToolCallDelta(
298
- name=None,
300
+ name=self.function_name_buffer,
299
301
  arguments=combined_chunk,
300
302
  tool_call_id=self.function_id_buffer,
301
303
  ),
302
304
  # name=name,
303
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
305
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
304
306
  )
305
307
  prev_message_type = tool_call_msg.message_type
306
308
  yield tool_call_msg
@@ -312,7 +314,7 @@ class OpenAIStreamingInterface:
312
314
  if prev_message_type and prev_message_type != "tool_call_message":
313
315
  message_index += 1
314
316
  tool_call_msg = ToolCallMessage(
315
- id=self.letta_tool_message_id,
317
+ id=self.letta_message_id,
316
318
  date=datetime.now(timezone.utc),
317
319
  tool_call=ToolCallDelta(
318
320
  name=None,
@@ -320,7 +322,7 @@ class OpenAIStreamingInterface:
320
322
  tool_call_id=self.function_id_buffer,
321
323
  ),
322
324
  # name=name,
323
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
325
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
324
326
  )
325
327
  prev_message_type = tool_call_msg.message_type
326
328
  yield tool_call_msg
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
28
  from letta.log import get_logger
29
+ from letta.otel.tracing import log_event
29
30
  from letta.schemas.enums import ProviderCategory
30
31
  from letta.schemas.message import Message as _Message
31
32
  from letta.schemas.message import MessageRole as _MessageRole
@@ -45,7 +46,6 @@ from letta.services.provider_manager import ProviderManager
45
46
  from letta.services.user_manager import UserManager
46
47
  from letta.settings import model_settings
47
48
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
48
- from letta.tracing import log_event
49
49
 
50
50
  logger = get_logger(__name__)
51
51
 
@@ -27,16 +27,16 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
29
29
  from letta.log import get_logger
30
+ from letta.otel.tracing import trace_method
30
31
  from letta.schemas.enums import ProviderCategory
31
32
  from letta.schemas.llm_config import LLMConfig
32
33
  from letta.schemas.message import Message as PydanticMessage
33
- from letta.schemas.openai.chat_completion_request import Tool
34
+ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
34
35
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
35
36
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
36
37
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
37
38
  from letta.services.provider_manager import ProviderManager
38
39
  from letta.settings import model_settings
39
- from letta.tracing import trace_method
40
40
 
41
41
  DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
42
42
 
@@ -199,10 +199,10 @@ class AnthropicClient(LLMClientBase):
199
199
  elif llm_config.enable_reasoner:
200
200
  # NOTE: reasoning models currently do not allow for `any`
201
201
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
202
- tools_for_request = [Tool(function=f) for f in tools]
202
+ tools_for_request = [OpenAITool(function=f) for f in tools]
203
203
  elif force_tool_call is not None:
204
204
  tool_choice = {"type": "tool", "name": force_tool_call}
205
- tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
205
+ tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
206
206
 
207
207
  # need to have this setting to be able to put inner thoughts in kwargs
208
208
  if not llm_config.put_inner_thoughts_in_kwargs:
@@ -216,7 +216,7 @@ class AnthropicClient(LLMClientBase):
216
216
  tool_choice = {"type": "any", "disable_parallel_tool_use": True}
217
217
  else:
218
218
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
219
- tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
219
+ tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
220
220
 
221
221
  # Add tool choice
222
222
  if tool_choice:
@@ -230,7 +230,7 @@ class AnthropicClient(LLMClientBase):
230
230
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
231
231
  inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
232
232
  )
233
- tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
233
+ tools_for_request = [OpenAITool(function=f) for f in tools_with_inner_thoughts]
234
234
 
235
235
  if tools_for_request and len(tools_for_request) > 0:
236
236
  # TODO eventually enable parallel tool use
@@ -270,7 +270,7 @@ class AnthropicClient(LLMClientBase):
270
270
 
271
271
  return data
272
272
 
273
- async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[Tool] = None) -> int:
273
+ async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
274
274
  client = anthropic.AsyncAnthropic()
275
275
  if messages and len(messages) == 0:
276
276
  messages = None
@@ -278,11 +278,19 @@ class AnthropicClient(LLMClientBase):
278
278
  anthropic_tools = convert_tools_to_anthropic_format(tools)
279
279
  else:
280
280
  anthropic_tools = None
281
- result = await client.beta.messages.count_tokens(
282
- model=model or "claude-3-7-sonnet-20250219",
283
- messages=messages or [{"role": "user", "content": "hi"}],
284
- tools=anthropic_tools or [],
285
- )
281
+
282
+ try:
283
+ result = await client.beta.messages.count_tokens(
284
+ model=model or "claude-3-7-sonnet-20250219",
285
+ messages=messages or [{"role": "user", "content": "hi"}],
286
+ tools=anthropic_tools or [],
287
+ )
288
+ except:
289
+ import ipdb
290
+
291
+ ipdb.set_trace()
292
+ raise
293
+
286
294
  token_count = result.input_tokens
287
295
  if messages is None:
288
296
  token_count -= 8
@@ -477,7 +485,7 @@ class AnthropicClient(LLMClientBase):
477
485
  return chat_completion_response
478
486
 
479
487
 
480
- def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
488
+ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
481
489
  """See: https://docs.anthropic.com/claude/docs/tool-use
482
490
 
483
491
  OpenAI style:
@@ -527,7 +535,7 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
527
535
  for tool in tools:
528
536
  formatted_tool = {
529
537
  "name": tool.function.name,
530
- "description": tool.function.description,
538
+ "description": tool.function.description if tool.function.description else "",
531
539
  "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
532
540
  }
533
541
  formatted_tools.append(formatted_tool)
@@ -12,12 +12,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
12
12
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
13
13
  from letta.local_llm.utils import count_tokens
14
14
  from letta.log import get_logger
15
+ from letta.otel.tracing import trace_method
15
16
  from letta.schemas.llm_config import LLMConfig
16
17
  from letta.schemas.message import Message as PydanticMessage
17
18
  from letta.schemas.openai.chat_completion_request import Tool
18
19
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
19
20
  from letta.settings import model_settings, settings
20
- from letta.tracing import trace_method
21
21
  from letta.utils import get_tool_call_id
22
22
 
23
23
  logger = get_logger(__name__)
letta/llm_api/helpers.py CHANGED
@@ -63,11 +63,11 @@ def _convert_to_structured_output_helper(property: dict) -> dict:
63
63
 
64
64
 
65
65
  def convert_to_structured_output(openai_function: dict, allow_optional: bool = False) -> dict:
66
- """Convert function call objects to structured output objects
66
+ """Convert function call objects to structured output objects.
67
67
 
68
68
  See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
69
69
  """
70
- description = openai_function["description"] if "description" in openai_function else ""
70
+ description = openai_function.get("description", "")
71
71
 
72
72
  structured_output = {
73
73
  "name": openai_function["name"],
@@ -81,54 +81,58 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
81
81
  },
82
82
  }
83
83
 
84
- # This code needs to be able to handle nested properties
85
- # For example, the param details may have "type" + "description",
86
- # but if "type" is "object" we expected "properties", where each property has details
87
- # and if "type" is "array" we expect "items": <type>
88
84
  for param, details in openai_function["parameters"]["properties"].items():
89
85
  param_type = details["type"]
90
- description = details.get("description", "")
86
+ param_description = details.get("description", "")
91
87
 
92
88
  if param_type == "object":
93
89
  if "properties" not in details:
94
- # Structured outputs requires the properties on dicts be specified ahead of time
95
- raise ValueError(f"Property {param} of type object is missing properties")
90
+ raise ValueError(f"Property {param} of type object is missing 'properties'")
96
91
  structured_output["parameters"]["properties"][param] = {
97
92
  "type": "object",
98
- "description": description,
93
+ "description": param_description,
99
94
  "properties": {k: _convert_to_structured_output_helper(v) for k, v in details["properties"].items()},
100
95
  "additionalProperties": False,
101
96
  "required": list(details["properties"].keys()),
102
97
  }
103
98
 
104
99
  elif param_type == "array":
105
- structured_output["parameters"]["properties"][param] = {
106
- "type": "array",
107
- "description": description,
108
- "items": _convert_to_structured_output_helper(details["items"]),
109
- }
100
+ items_schema = details.get("items")
101
+ prefix_items_schema = details.get("prefixItems")
102
+
103
+ if prefix_items_schema:
104
+ # assume fixed-length tuple — safe fallback to use first type for items
105
+ fallback_item = prefix_items_schema[0] if isinstance(prefix_items_schema, list) else prefix_items_schema
106
+ structured_output["parameters"]["properties"][param] = {
107
+ "type": "array",
108
+ "description": param_description,
109
+ "prefixItems": [_convert_to_structured_output_helper(item) for item in prefix_items_schema],
110
+ "items": _convert_to_structured_output_helper(fallback_item),
111
+ "minItems": details.get("minItems", len(prefix_items_schema)),
112
+ "maxItems": details.get("maxItems", len(prefix_items_schema)),
113
+ }
114
+ elif items_schema:
115
+ structured_output["parameters"]["properties"][param] = {
116
+ "type": "array",
117
+ "description": param_description,
118
+ "items": _convert_to_structured_output_helper(items_schema),
119
+ }
120
+ else:
121
+ raise ValueError(f"Array param '{param}' is missing both 'items' and 'prefixItems'")
110
122
 
111
123
  else:
112
- structured_output["parameters"]["properties"][param] = {
113
- "type": param_type, # simple type
114
- "description": description,
124
+ prop = {
125
+ "type": param_type,
126
+ "description": param_description,
115
127
  }
116
-
117
- if "enum" in details:
118
- structured_output["parameters"]["properties"][param]["enum"] = details["enum"]
128
+ if "enum" in details:
129
+ prop["enum"] = details["enum"]
130
+ structured_output["parameters"]["properties"][param] = prop
119
131
 
120
132
  if not allow_optional:
121
- # Add all properties to required list
122
133
  structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
123
-
124
134
  else:
125
- # See what parameters exist that aren't required
126
- # Those are implied "optional" types
127
- # For those types, turn each of them into a union type with "null"
128
- # e.g.
129
- # "type": "string" -> "type": ["string", "null"]
130
- # TODO
131
- raise NotImplementedError
135
+ raise NotImplementedError("Optional parameter handling is not implemented.")
132
136
 
133
137
  return structured_output
134
138
 
@@ -292,6 +296,8 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
292
296
 
293
297
  except json.JSONDecodeError as e:
294
298
  warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
299
+ print(f"\nFailed to strip inner thoughts from kwargs: {e}")
300
+ print(f"\nTool call arguments: {tool_call.function.arguments}")
295
301
  raise e
296
302
  else:
297
303
  warnings.warn(f"Did not find tool call in message: {str(message)}")
@@ -26,6 +26,7 @@ from letta.local_llm.chat_completion_proxy import get_chat_completion
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
28
  from letta.orm.user import User
29
+ from letta.otel.tracing import log_event, trace_method
29
30
  from letta.schemas.enums import ProviderCategory
30
31
  from letta.schemas.llm_config import LLMConfig
31
32
  from letta.schemas.message import Message
@@ -35,7 +36,6 @@ from letta.schemas.provider_trace import ProviderTraceCreate
35
36
  from letta.services.telemetry_manager import TelemetryManager
36
37
  from letta.settings import ModelSettings
37
38
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
38
- from letta.tracing import log_event, trace_method
39
39
 
40
40
  LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
41
41
 
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from abc import abstractmethod
2
3
  from typing import TYPE_CHECKING, Dict, List, Optional, Union
3
4
 
@@ -6,13 +7,13 @@ from openai import AsyncStream, Stream
6
7
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
8
 
8
9
  from letta.errors import LLMError
10
+ from letta.otel.tracing import log_event, trace_method
9
11
  from letta.schemas.embedding_config import EmbeddingConfig
10
12
  from letta.schemas.llm_config import LLMConfig
11
13
  from letta.schemas.message import Message
12
14
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
13
15
  from letta.schemas.provider_trace import ProviderTraceCreate
14
16
  from letta.services.telemetry_manager import TelemetryManager
15
- from letta.tracing import log_event, trace_method
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from letta.orm import User
@@ -186,3 +187,30 @@ class LLMClientBase:
186
187
  An LLMError subclass that represents the error in a provider-agnostic way
187
188
  """
188
189
  return LLMError(f"Unhandled LLM error: {str(e)}")
190
+
191
+ def _fix_truncated_json_response(self, response: ChatCompletionResponse) -> ChatCompletionResponse:
192
+ """
193
+ Fixes truncated JSON responses by ensuring the content is properly formatted.
194
+ This is a workaround for some providers that may return incomplete JSON.
195
+ """
196
+ if response.choices and response.choices[0].message and response.choices[0].message.tool_calls:
197
+ tool_call_args_str = response.choices[0].message.tool_calls[0].function.arguments
198
+ try:
199
+ json.loads(tool_call_args_str)
200
+ except json.JSONDecodeError:
201
+ try:
202
+ json_str_end = ""
203
+ quote_count = tool_call_args_str.count('"')
204
+ if quote_count % 2 != 0:
205
+ json_str_end = json_str_end + '"'
206
+
207
+ open_braces = tool_call_args_str.count("{")
208
+ close_braces = tool_call_args_str.count("}")
209
+ missing_braces = open_braces - close_braces
210
+ json_str_end += "}" * missing_braces
211
+ fixed_tool_call_args_str = tool_call_args_str[: -len(json_str_end)] + json_str_end
212
+ json.loads(fixed_tool_call_args_str)
213
+ response.choices[0].message.tool_calls[0].function.arguments = fixed_tool_call_args_str
214
+ except json.JSONDecodeError:
215
+ pass
216
+ return response