letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +16 -12
  3. letta/agents/base_agent.py +1 -1
  4. letta/agents/helpers.py +13 -2
  5. letta/agents/letta_agent.py +72 -34
  6. letta/agents/letta_agent_batch.py +1 -2
  7. letta/agents/voice_agent.py +19 -13
  8. letta/agents/voice_sleeptime_agent.py +23 -6
  9. letta/constants.py +18 -0
  10. letta/data_sources/__init__.py +0 -0
  11. letta/data_sources/redis_client.py +282 -0
  12. letta/errors.py +0 -4
  13. letta/functions/function_sets/files.py +58 -0
  14. letta/functions/schema_generator.py +18 -1
  15. letta/groups/sleeptime_multi_agent_v2.py +13 -3
  16. letta/helpers/datetime_helpers.py +47 -3
  17. letta/helpers/decorators.py +69 -0
  18. letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
  19. letta/interfaces/anthropic_streaming_interface.py +43 -24
  20. letta/interfaces/openai_streaming_interface.py +21 -19
  21. letta/llm_api/anthropic.py +1 -1
  22. letta/llm_api/anthropic_client.py +30 -16
  23. letta/llm_api/google_vertex_client.py +1 -1
  24. letta/llm_api/helpers.py +36 -30
  25. letta/llm_api/llm_api_tools.py +1 -1
  26. letta/llm_api/llm_client_base.py +29 -1
  27. letta/llm_api/openai.py +1 -1
  28. letta/llm_api/openai_client.py +6 -8
  29. letta/local_llm/chat_completion_proxy.py +1 -1
  30. letta/memory.py +1 -1
  31. letta/orm/enums.py +1 -0
  32. letta/orm/file.py +80 -3
  33. letta/orm/files_agents.py +13 -0
  34. letta/orm/passage.py +2 -0
  35. letta/orm/sqlalchemy_base.py +34 -11
  36. letta/otel/__init__.py +0 -0
  37. letta/otel/context.py +25 -0
  38. letta/otel/events.py +0 -0
  39. letta/otel/metric_registry.py +122 -0
  40. letta/otel/metrics.py +66 -0
  41. letta/otel/resource.py +26 -0
  42. letta/{tracing.py → otel/tracing.py} +55 -78
  43. letta/plugins/README.md +22 -0
  44. letta/plugins/__init__.py +0 -0
  45. letta/plugins/defaults.py +11 -0
  46. letta/plugins/plugins.py +72 -0
  47. letta/schemas/enums.py +8 -0
  48. letta/schemas/file.py +12 -0
  49. letta/schemas/letta_request.py +6 -0
  50. letta/schemas/passage.py +1 -0
  51. letta/schemas/tool.py +4 -0
  52. letta/server/db.py +7 -7
  53. letta/server/rest_api/app.py +8 -6
  54. letta/server/rest_api/routers/v1/agents.py +46 -37
  55. letta/server/rest_api/routers/v1/groups.py +3 -3
  56. letta/server/rest_api/routers/v1/sources.py +26 -3
  57. letta/server/rest_api/routers/v1/tools.py +7 -2
  58. letta/server/rest_api/utils.py +9 -6
  59. letta/server/server.py +25 -13
  60. letta/services/agent_manager.py +186 -194
  61. letta/services/block_manager.py +1 -1
  62. letta/services/context_window_calculator/context_window_calculator.py +1 -1
  63. letta/services/context_window_calculator/token_counter.py +3 -2
  64. letta/services/file_processor/chunker/line_chunker.py +34 -0
  65. letta/services/file_processor/file_processor.py +43 -12
  66. letta/services/file_processor/parser/mistral_parser.py +11 -1
  67. letta/services/files_agents_manager.py +96 -7
  68. letta/services/group_manager.py +6 -6
  69. letta/services/helpers/agent_manager_helper.py +404 -3
  70. letta/services/identity_manager.py +1 -1
  71. letta/services/job_manager.py +1 -1
  72. letta/services/llm_batch_manager.py +1 -1
  73. letta/services/mcp/stdio_client.py +5 -1
  74. letta/services/mcp_manager.py +4 -4
  75. letta/services/message_manager.py +1 -1
  76. letta/services/organization_manager.py +1 -1
  77. letta/services/passage_manager.py +604 -19
  78. letta/services/per_agent_lock_manager.py +1 -1
  79. letta/services/provider_manager.py +1 -1
  80. letta/services/sandbox_config_manager.py +1 -1
  81. letta/services/source_manager.py +178 -19
  82. letta/services/step_manager.py +2 -2
  83. letta/services/summarizer/summarizer.py +1 -1
  84. letta/services/telemetry_manager.py +1 -1
  85. letta/services/tool_executor/builtin_tool_executor.py +117 -0
  86. letta/services/tool_executor/composio_tool_executor.py +53 -0
  87. letta/services/tool_executor/core_tool_executor.py +474 -0
  88. letta/services/tool_executor/files_tool_executor.py +138 -0
  89. letta/services/tool_executor/mcp_tool_executor.py +45 -0
  90. letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
  91. letta/services/tool_executor/tool_execution_manager.py +34 -14
  92. letta/services/tool_executor/tool_execution_sandbox.py +1 -1
  93. letta/services/tool_executor/tool_executor.py +3 -802
  94. letta/services/tool_executor/tool_executor_base.py +43 -0
  95. letta/services/tool_manager.py +55 -59
  96. letta/services/tool_sandbox/e2b_sandbox.py +1 -1
  97. letta/services/tool_sandbox/local_sandbox.py +6 -3
  98. letta/services/user_manager.py +6 -3
  99. letta/settings.py +23 -2
  100. letta/utils.py +7 -2
  101. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
  102. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
  103. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
  104. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
  105. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,69 @@
1
+ import inspect
2
+ from functools import wraps
3
+ from typing import Callable
4
+
5
+ from letta.log import get_logger
6
+ from letta.plugins.plugins import get_experimental_checker
7
+ from letta.settings import settings
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ def experimental(feature_name: str, fallback_function: Callable, **kwargs):
13
+ """Decorator that runs a fallback function if experimental feature is not enabled.
14
+
15
+ - kwargs from the decorator will be combined with function kwargs and overwritten only for experimental evaluation.
16
+ - if the decorated function, fallback_function, or experimental checker function is async, the whole call will be async
17
+ """
18
+
19
+ def decorator(f):
20
+ experimental_checker = get_experimental_checker()
21
+ is_f_async = inspect.iscoroutinefunction(f)
22
+ is_fallback_async = inspect.iscoroutinefunction(fallback_function)
23
+ is_experimental_checker_async = inspect.iscoroutinefunction(experimental_checker)
24
+
25
+ async def call_function(func, is_async, *args, **_kwargs):
26
+ if is_async:
27
+ return await func(*args, **_kwargs)
28
+ return func(*args, **_kwargs)
29
+
30
+ # asynchronous wrapper if any function is async
31
+ if any((is_f_async, is_fallback_async, is_experimental_checker_async)):
32
+
33
+ @wraps(f)
34
+ async def async_wrapper(*args, **_kwargs):
35
+ result = await call_function(experimental_checker, is_experimental_checker_async, feature_name, **dict(_kwargs, **kwargs))
36
+ if result:
37
+ return await call_function(f, is_f_async, *args, **_kwargs)
38
+ else:
39
+ return await call_function(fallback_function, is_fallback_async, *args, **_kwargs)
40
+
41
+ return async_wrapper
42
+
43
+ else:
44
+
45
+ @wraps(f)
46
+ def wrapper(*args, **_kwargs):
47
+ if experimental_checker(feature_name, **dict(_kwargs, **kwargs)):
48
+ return f(*args, **_kwargs)
49
+ else:
50
+ return fallback_function(*args, **kwargs)
51
+
52
+ return wrapper
53
+
54
+ return decorator
55
+
56
+
57
+ def deprecated(message: str):
58
+ """Simple decorator that marks a method as deprecated."""
59
+
60
+ def decorator(f):
61
+ @wraps(f)
62
+ def wrapper(*args, **kwargs):
63
+ if settings.debug:
64
+ logger.warning(f"Function {f.__name__} is deprecated: {message}.")
65
+ return f(*args, **kwargs)
66
+
67
+ return wrapper
68
+
69
+ return decorator
@@ -1,7 +1,12 @@
1
+ # TODO (cliandy): consolidate with decorators later
2
+ from functools import wraps
3
+
4
+
1
5
  def singleton(cls):
2
6
  """Decorator to make a class a Singleton class."""
3
7
  instances = {}
4
8
 
9
+ @wraps(cls)
5
10
  def get_instance(*args, **kwargs):
6
11
  if cls not in instances:
7
12
  instances[cls] = cls(*args, **kwargs)
@@ -23,7 +23,7 @@ from anthropic.types.beta import (
23
23
  )
24
24
 
25
25
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
26
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
26
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
27
27
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
28
28
  from letta.log import get_logger
29
29
  from letta.schemas.letta_message import (
@@ -62,8 +62,7 @@ class AnthropicStreamingInterface:
62
62
  self.use_assistant_message = use_assistant_message
63
63
 
64
64
  # Premake IDs for database writes
65
- self.letta_assistant_message_id = Message.generate_id()
66
- self.letta_tool_message_id = Message.generate_id()
65
+ self.letta_message_id = Message.generate_id()
67
66
 
68
67
  self.anthropic_mode = None
69
68
  self.message_id = None
@@ -132,7 +131,7 @@ class AnthropicStreamingInterface:
132
131
  now = get_utc_timestamp_ns()
133
132
  ttft_ns = now - provider_request_start_timestamp_ns
134
133
  ttft_span.add_event(
135
- name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ttft_ns // 1_000_000}
134
+ name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
136
135
  )
137
136
  first_chunk = False
138
137
 
@@ -152,7 +151,7 @@ class AnthropicStreamingInterface:
152
151
  if not self.use_assistant_message:
153
152
  # Buffer the initial tool call message instead of yielding immediately
154
153
  tool_call_msg = ToolCallMessage(
155
- id=self.letta_tool_message_id,
154
+ id=self.letta_message_id,
156
155
  tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
157
156
  date=datetime.now(timezone.utc).isoformat(),
158
157
  )
@@ -165,11 +164,11 @@ class AnthropicStreamingInterface:
165
164
  if prev_message_type and prev_message_type != "hidden_reasoning_message":
166
165
  message_index += 1
167
166
  hidden_reasoning_message = HiddenReasoningMessage(
168
- id=self.letta_assistant_message_id,
167
+ id=self.letta_message_id,
169
168
  state="redacted",
170
169
  hidden_reasoning=content.data,
171
170
  date=datetime.now(timezone.utc).isoformat(),
172
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
171
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
173
172
  )
174
173
  self.reasoning_messages.append(hidden_reasoning_message)
175
174
  prev_message_type = hidden_reasoning_message.message_type
@@ -206,10 +205,10 @@ class AnthropicStreamingInterface:
206
205
  if prev_message_type and prev_message_type != "reasoning_message":
207
206
  message_index += 1
208
207
  reasoning_message = ReasoningMessage(
209
- id=self.letta_assistant_message_id,
208
+ id=self.letta_message_id,
210
209
  reasoning=self.accumulated_inner_thoughts[-1],
211
210
  date=datetime.now(timezone.utc).isoformat(),
212
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
211
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
213
212
  )
214
213
  self.reasoning_messages.append(reasoning_message)
215
214
  prev_message_type = reasoning_message.message_type
@@ -233,10 +232,10 @@ class AnthropicStreamingInterface:
233
232
  if prev_message_type and prev_message_type != "reasoning_message":
234
233
  message_index += 1
235
234
  reasoning_message = ReasoningMessage(
236
- id=self.letta_assistant_message_id,
235
+ id=self.letta_message_id,
237
236
  reasoning=inner_thoughts_diff,
238
237
  date=datetime.now(timezone.utc).isoformat(),
239
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
238
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
240
239
  )
241
240
  self.reasoning_messages.append(reasoning_message)
242
241
  prev_message_type = reasoning_message.message_type
@@ -249,10 +248,28 @@ class AnthropicStreamingInterface:
249
248
  if len(self.tool_call_buffer) > 0:
250
249
  if prev_message_type and prev_message_type != "tool_call_message":
251
250
  message_index += 1
251
+
252
+ # Strip out the inner thoughts from the buffered tool call arguments before streaming
253
+ tool_call_args = ""
252
254
  for buffered_msg in self.tool_call_buffer:
253
- buffered_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
254
- prev_message_type = buffered_msg.message_type
255
- yield buffered_msg
255
+ tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
256
+ tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
257
+
258
+ tool_call_msg = ToolCallMessage(
259
+ id=self.tool_call_buffer[0].id,
260
+ otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
261
+ date=self.tool_call_buffer[0].date,
262
+ name=self.tool_call_buffer[0].name,
263
+ sender_id=self.tool_call_buffer[0].sender_id,
264
+ step_id=self.tool_call_buffer[0].step_id,
265
+ tool_call=ToolCallDelta(
266
+ name=self.tool_call_name,
267
+ tool_call_id=self.tool_call_id,
268
+ arguments=tool_call_args,
269
+ ),
270
+ )
271
+ prev_message_type = tool_call_msg.message_type
272
+ yield tool_call_msg
256
273
  self.tool_call_buffer = []
257
274
 
258
275
  # Start detecting special case of "send_message"
@@ -266,24 +283,26 @@ class AnthropicStreamingInterface:
266
283
  if prev_message_type and prev_message_type != "assistant_message":
267
284
  message_index += 1
268
285
  assistant_msg = AssistantMessage(
269
- id=self.letta_assistant_message_id,
286
+ id=self.letta_message_id,
270
287
  content=[TextContent(text=send_message_diff)],
271
288
  date=datetime.now(timezone.utc).isoformat(),
272
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
289
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
273
290
  )
274
291
  prev_message_type = assistant_msg.message_type
275
292
  yield assistant_msg
276
293
  else:
277
294
  # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
278
295
  tool_call_msg = ToolCallMessage(
279
- id=self.letta_tool_message_id,
280
- tool_call=ToolCallDelta(arguments=delta.partial_json),
296
+ id=self.letta_message_id,
297
+ tool_call=ToolCallDelta(
298
+ name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
299
+ ),
281
300
  date=datetime.now(timezone.utc).isoformat(),
282
301
  )
283
302
  if self.inner_thoughts_complete:
284
303
  if prev_message_type and prev_message_type != "tool_call_message":
285
304
  message_index += 1
286
- tool_call_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
305
+ tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
287
306
  prev_message_type = tool_call_msg.message_type
288
307
  yield tool_call_msg
289
308
  else:
@@ -301,11 +320,11 @@ class AnthropicStreamingInterface:
301
320
  if prev_message_type and prev_message_type != "reasoning_message":
302
321
  message_index += 1
303
322
  reasoning_message = ReasoningMessage(
304
- id=self.letta_assistant_message_id,
323
+ id=self.letta_message_id,
305
324
  source="reasoner_model",
306
325
  reasoning=delta.thinking,
307
326
  date=datetime.now(timezone.utc).isoformat(),
308
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
327
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
309
328
  )
310
329
  self.reasoning_messages.append(reasoning_message)
311
330
  prev_message_type = reasoning_message.message_type
@@ -320,12 +339,12 @@ class AnthropicStreamingInterface:
320
339
  if prev_message_type and prev_message_type != "reasoning_message":
321
340
  message_index += 1
322
341
  reasoning_message = ReasoningMessage(
323
- id=self.letta_assistant_message_id,
342
+ id=self.letta_message_id,
324
343
  source="reasoner_model",
325
344
  reasoning="",
326
345
  date=datetime.now(timezone.utc).isoformat(),
327
346
  signature=delta.signature,
328
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
347
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
329
348
  )
330
349
  self.reasoning_messages.append(reasoning_message)
331
350
  prev_message_type = reasoning_message.message_type
@@ -360,7 +379,7 @@ class AnthropicStreamingInterface:
360
379
  group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
361
380
  ) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
362
381
  if group_type == "reasoning":
363
- reasoning_text = "".join(chunk.reasoning for chunk in group)
382
+ reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
364
383
  is_native = any(chunk.source == "reasoner_model" for chunk in group)
365
384
  signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
366
385
  if is_native:
@@ -5,7 +5,7 @@ from openai import AsyncStream
5
5
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
6
 
7
7
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
8
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
8
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
9
9
  from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
10
10
  from letta.schemas.letta_message_content import TextContent
11
11
  from letta.schemas.message import Message
@@ -32,14 +32,14 @@ class OpenAIStreamingInterface:
32
32
  self.function_args_buffer = None
33
33
  self.function_id_buffer = None
34
34
  self.last_flushed_function_name = None
35
+ self.last_flushed_function_id = None
35
36
 
36
37
  # Buffer to hold function arguments until inner thoughts are complete
37
38
  self.current_function_arguments = ""
38
39
  self.current_json_parse_result = {}
39
40
 
40
41
  # Premake IDs for database writes
41
- self.letta_assistant_message_id = Message.generate_id()
42
- self.letta_tool_message_id = Message.generate_id()
42
+ self.letta_message_id = Message.generate_id()
43
43
 
44
44
  self.message_id = None
45
45
  self.model = None
@@ -54,14 +54,14 @@ class OpenAIStreamingInterface:
54
54
  self.reasoning_messages = []
55
55
 
56
56
  def get_reasoning_content(self) -> List[TextContent]:
57
- content = "".join(self.reasoning_messages)
57
+ content = "".join(self.reasoning_messages).strip()
58
58
  return [TextContent(text=content)]
59
59
 
60
60
  def get_tool_call_object(self) -> ToolCall:
61
61
  """Useful for agent loop"""
62
62
  function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
63
63
  return ToolCall(
64
- id=self.letta_tool_message_id,
64
+ id=self.last_flushed_function_id,
65
65
  function=FunctionCall(arguments=self.current_function_arguments, name=function_name),
66
66
  )
67
67
 
@@ -85,7 +85,7 @@ class OpenAIStreamingInterface:
85
85
  now = get_utc_timestamp_ns()
86
86
  ttft_ns = now - provider_request_start_timestamp_ns
87
87
  ttft_span.add_event(
88
- name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ttft_ns // 1_000_000}
88
+ name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
89
89
  )
90
90
  first_chunk = False
91
91
 
@@ -133,11 +133,11 @@ class OpenAIStreamingInterface:
133
133
  message_index += 1
134
134
  self.reasoning_messages.append(updates_inner_thoughts)
135
135
  reasoning_message = ReasoningMessage(
136
- id=self.letta_tool_message_id,
136
+ id=self.letta_message_id,
137
137
  date=datetime.now(timezone.utc),
138
138
  reasoning=updates_inner_thoughts,
139
139
  # name=name,
140
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
140
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
141
141
  )
142
142
  prev_message_type = reasoning_message.message_type
143
143
  yield reasoning_message
@@ -171,20 +171,22 @@ class OpenAIStreamingInterface:
171
171
  message_index += 1
172
172
  self.tool_call_name = str(self.function_name_buffer)
173
173
  tool_call_msg = ToolCallMessage(
174
- id=self.letta_tool_message_id,
174
+ id=self.letta_message_id,
175
175
  date=datetime.now(timezone.utc),
176
176
  tool_call=ToolCallDelta(
177
177
  name=self.function_name_buffer,
178
178
  arguments=None,
179
179
  tool_call_id=self.function_id_buffer,
180
180
  ),
181
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
181
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
182
182
  )
183
183
  prev_message_type = tool_call_msg.message_type
184
184
  yield tool_call_msg
185
185
 
186
186
  # Record what the last function name we flushed was
187
187
  self.last_flushed_function_name = self.function_name_buffer
188
+ if self.last_flushed_function_id is None:
189
+ self.last_flushed_function_id = self.function_id_buffer
188
190
  # Clear the buffer
189
191
  self.function_name_buffer = None
190
192
  self.function_id_buffer = None
@@ -236,10 +238,10 @@ class OpenAIStreamingInterface:
236
238
  if prev_message_type and prev_message_type != "assistant_message":
237
239
  message_index += 1
238
240
  assistant_message = AssistantMessage(
239
- id=self.letta_assistant_message_id,
241
+ id=self.letta_message_id,
240
242
  date=datetime.now(timezone.utc),
241
243
  content=combined_chunk,
242
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
244
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
243
245
  )
244
246
  prev_message_type = assistant_message.message_type
245
247
  yield assistant_message
@@ -268,11 +270,11 @@ class OpenAIStreamingInterface:
268
270
  if prev_message_type and prev_message_type != "assistant_message":
269
271
  message_index += 1
270
272
  assistant_message = AssistantMessage(
271
- id=self.letta_assistant_message_id,
273
+ id=self.letta_message_id,
272
274
  date=datetime.now(timezone.utc),
273
275
  content=diff,
274
276
  # name=name,
275
- otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
277
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
276
278
  )
277
279
  prev_message_type = assistant_message.message_type
278
280
  yield assistant_message
@@ -292,15 +294,15 @@ class OpenAIStreamingInterface:
292
294
  if prev_message_type and prev_message_type != "tool_call_message":
293
295
  message_index += 1
294
296
  tool_call_msg = ToolCallMessage(
295
- id=self.letta_tool_message_id,
297
+ id=self.letta_message_id,
296
298
  date=datetime.now(timezone.utc),
297
299
  tool_call=ToolCallDelta(
298
- name=None,
300
+ name=self.function_name_buffer,
299
301
  arguments=combined_chunk,
300
302
  tool_call_id=self.function_id_buffer,
301
303
  ),
302
304
  # name=name,
303
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
305
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
304
306
  )
305
307
  prev_message_type = tool_call_msg.message_type
306
308
  yield tool_call_msg
@@ -312,7 +314,7 @@ class OpenAIStreamingInterface:
312
314
  if prev_message_type and prev_message_type != "tool_call_message":
313
315
  message_index += 1
314
316
  tool_call_msg = ToolCallMessage(
315
- id=self.letta_tool_message_id,
317
+ id=self.letta_message_id,
316
318
  date=datetime.now(timezone.utc),
317
319
  tool_call=ToolCallDelta(
318
320
  name=None,
@@ -320,7 +322,7 @@ class OpenAIStreamingInterface:
320
322
  tool_call_id=self.function_id_buffer,
321
323
  ),
322
324
  # name=name,
323
- otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
325
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
324
326
  )
325
327
  prev_message_type = tool_call_msg.message_type
326
328
  yield tool_call_msg
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
28
  from letta.log import get_logger
29
+ from letta.otel.tracing import log_event
29
30
  from letta.schemas.enums import ProviderCategory
30
31
  from letta.schemas.message import Message as _Message
31
32
  from letta.schemas.message import MessageRole as _MessageRole
@@ -45,7 +46,6 @@ from letta.services.provider_manager import ProviderManager
45
46
  from letta.services.user_manager import UserManager
46
47
  from letta.settings import model_settings
47
48
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
48
- from letta.tracing import log_event
49
49
 
50
50
  logger = get_logger(__name__)
51
51
 
@@ -27,16 +27,16 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
29
29
  from letta.log import get_logger
30
+ from letta.otel.tracing import trace_method
30
31
  from letta.schemas.enums import ProviderCategory
31
32
  from letta.schemas.llm_config import LLMConfig
32
33
  from letta.schemas.message import Message as PydanticMessage
33
- from letta.schemas.openai.chat_completion_request import Tool
34
+ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
34
35
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
35
36
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
36
37
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
37
38
  from letta.services.provider_manager import ProviderManager
38
39
  from letta.settings import model_settings
39
- from letta.tracing import trace_method
40
40
 
41
41
  DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
42
42
 
@@ -199,10 +199,10 @@ class AnthropicClient(LLMClientBase):
199
199
  elif llm_config.enable_reasoner:
200
200
  # NOTE: reasoning models currently do not allow for `any`
201
201
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
202
- tools_for_request = [Tool(function=f) for f in tools]
202
+ tools_for_request = [OpenAITool(function=f) for f in tools]
203
203
  elif force_tool_call is not None:
204
204
  tool_choice = {"type": "tool", "name": force_tool_call}
205
- tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
205
+ tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
206
206
 
207
207
  # need to have this setting to be able to put inner thoughts in kwargs
208
208
  if not llm_config.put_inner_thoughts_in_kwargs:
@@ -216,7 +216,7 @@ class AnthropicClient(LLMClientBase):
216
216
  tool_choice = {"type": "any", "disable_parallel_tool_use": True}
217
217
  else:
218
218
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
219
- tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
219
+ tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
220
220
 
221
221
  # Add tool choice
222
222
  if tool_choice:
@@ -230,7 +230,7 @@ class AnthropicClient(LLMClientBase):
230
230
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
231
231
  inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
232
232
  )
233
- tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
233
+ tools_for_request = [OpenAITool(function=f) for f in tools_with_inner_thoughts]
234
234
 
235
235
  if tools_for_request and len(tools_for_request) > 0:
236
236
  # TODO eventually enable parallel tool use
@@ -270,7 +270,7 @@ class AnthropicClient(LLMClientBase):
270
270
 
271
271
  return data
272
272
 
273
- async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[Tool] = None) -> int:
273
+ async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
274
274
  client = anthropic.AsyncAnthropic()
275
275
  if messages and len(messages) == 0:
276
276
  messages = None
@@ -278,11 +278,19 @@ class AnthropicClient(LLMClientBase):
278
278
  anthropic_tools = convert_tools_to_anthropic_format(tools)
279
279
  else:
280
280
  anthropic_tools = None
281
- result = await client.beta.messages.count_tokens(
282
- model=model or "claude-3-7-sonnet-20250219",
283
- messages=messages or [{"role": "user", "content": "hi"}],
284
- tools=anthropic_tools or [],
285
- )
281
+
282
+ try:
283
+ result = await client.beta.messages.count_tokens(
284
+ model=model or "claude-3-7-sonnet-20250219",
285
+ messages=messages or [{"role": "user", "content": "hi"}],
286
+ tools=anthropic_tools or [],
287
+ )
288
+ except:
289
+ import ipdb
290
+
291
+ ipdb.set_trace()
292
+ raise
293
+
286
294
  token_count = result.input_tokens
287
295
  if messages is None:
288
296
  token_count -= 8
@@ -419,10 +427,16 @@ class AnthropicClient(LLMClientBase):
419
427
  if content_part.type == "text":
420
428
  content = strip_xml_tags(string=content_part.text, tag="thinking")
421
429
  if content_part.type == "tool_use":
422
- # hack for tool rules
430
+ # hack for incorrect tool format
423
431
  tool_input = json.loads(json.dumps(content_part.input))
424
432
  if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
425
- arguments = str(tool_input["function"]["arguments"])
433
+ arguments = json.dumps(tool_input["function"]["arguments"], indent=2)
434
+ try:
435
+ args_json = json.loads(arguments)
436
+ if not isinstance(args_json, dict):
437
+ raise ValueError("Expected parseable json object for arguments")
438
+ except:
439
+ arguments = str(tool_input["function"]["arguments"])
426
440
  else:
427
441
  arguments = json.dumps(tool_input, indent=2)
428
442
  tool_calls = [
@@ -477,7 +491,7 @@ class AnthropicClient(LLMClientBase):
477
491
  return chat_completion_response
478
492
 
479
493
 
480
- def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
494
+ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
481
495
  """See: https://docs.anthropic.com/claude/docs/tool-use
482
496
 
483
497
  OpenAI style:
@@ -527,7 +541,7 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
527
541
  for tool in tools:
528
542
  formatted_tool = {
529
543
  "name": tool.function.name,
530
- "description": tool.function.description,
544
+ "description": tool.function.description if tool.function.description else "",
531
545
  "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
532
546
  }
533
547
  formatted_tools.append(formatted_tool)
@@ -12,12 +12,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
12
12
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
13
13
  from letta.local_llm.utils import count_tokens
14
14
  from letta.log import get_logger
15
+ from letta.otel.tracing import trace_method
15
16
  from letta.schemas.llm_config import LLMConfig
16
17
  from letta.schemas.message import Message as PydanticMessage
17
18
  from letta.schemas.openai.chat_completion_request import Tool
18
19
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
19
20
  from letta.settings import model_settings, settings
20
- from letta.tracing import trace_method
21
21
  from letta.utils import get_tool_call_id
22
22
 
23
23
  logger = get_logger(__name__)