letta-nightly 0.8.0.dev20250606104326__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +1 -1
- letta/agents/letta_agent.py +49 -29
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +19 -13
- letta/agents/voice_sleeptime_agent.py +11 -3
- letta/constants.py +18 -0
- letta/data_sources/__init__.py +0 -0
- letta/data_sources/redis_client.py +282 -0
- letta/errors.py +0 -4
- letta/functions/function_sets/files.py +58 -0
- letta/functions/schema_generator.py +18 -1
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/helpers/datetime_helpers.py +47 -3
- letta/helpers/decorators.py +69 -0
- letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
- letta/interfaces/anthropic_streaming_interface.py +43 -24
- letta/interfaces/openai_streaming_interface.py +21 -19
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/anthropic_client.py +22 -14
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +36 -30
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client_base.py +29 -1
- letta/llm_api/openai.py +1 -1
- letta/llm_api/openai_client.py +6 -8
- letta/local_llm/chat_completion_proxy.py +1 -1
- letta/memory.py +1 -1
- letta/orm/enums.py +1 -0
- letta/orm/file.py +80 -3
- letta/orm/files_agents.py +13 -0
- letta/orm/sqlalchemy_base.py +34 -11
- letta/otel/__init__.py +0 -0
- letta/otel/context.py +25 -0
- letta/otel/events.py +0 -0
- letta/otel/metric_registry.py +122 -0
- letta/otel/metrics.py +66 -0
- letta/otel/resource.py +26 -0
- letta/{tracing.py → otel/tracing.py} +55 -78
- letta/plugins/README.md +22 -0
- letta/plugins/__init__.py +0 -0
- letta/plugins/defaults.py +11 -0
- letta/plugins/plugins.py +72 -0
- letta/schemas/enums.py +8 -0
- letta/schemas/file.py +12 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +7 -7
- letta/server/rest_api/app.py +8 -6
- letta/server/rest_api/routers/v1/agents.py +37 -36
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/sources.py +26 -3
- letta/server/rest_api/utils.py +9 -6
- letta/server/server.py +18 -12
- letta/services/agent_manager.py +185 -193
- letta/services/block_manager.py +1 -1
- letta/services/context_window_calculator/token_counter.py +3 -2
- letta/services/file_processor/chunker/line_chunker.py +34 -0
- letta/services/file_processor/file_processor.py +40 -11
- letta/services/file_processor/parser/mistral_parser.py +11 -1
- letta/services/files_agents_manager.py +96 -7
- letta/services/group_manager.py +6 -6
- letta/services/helpers/agent_manager_helper.py +373 -3
- letta/services/identity_manager.py +1 -1
- letta/services/job_manager.py +1 -1
- letta/services/llm_batch_manager.py +1 -1
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +1 -1
- letta/services/per_agent_lock_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +178 -19
- letta/services/step_manager.py +2 -2
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/telemetry_manager.py +1 -1
- letta/services/tool_executor/builtin_tool_executor.py +117 -0
- letta/services/tool_executor/composio_tool_executor.py +53 -0
- letta/services/tool_executor/core_tool_executor.py +474 -0
- letta/services/tool_executor/files_tool_executor.py +131 -0
- letta/services/tool_executor/mcp_tool_executor.py +45 -0
- letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
- letta/services/tool_executor/tool_execution_manager.py +34 -14
- letta/services/tool_executor/tool_execution_sandbox.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -802
- letta/services/tool_executor/tool_executor_base.py +43 -0
- letta/services/tool_manager.py +55 -59
- letta/services/tool_sandbox/e2b_sandbox.py +1 -1
- letta/services/tool_sandbox/local_sandbox.py +6 -3
- letta/services/user_manager.py +6 -3
- letta/settings.py +21 -1
- letta/utils.py +7 -2
- {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/METADATA +4 -2
- {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/RECORD +96 -74
- {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.0.dev20250606104326.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/entry_points.txt +0 -0
@@ -23,7 +23,7 @@ from anthropic.types.beta import (
|
|
23
23
|
)
|
24
24
|
|
25
25
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
26
|
-
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
26
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
27
27
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
28
28
|
from letta.log import get_logger
|
29
29
|
from letta.schemas.letta_message import (
|
@@ -62,8 +62,7 @@ class AnthropicStreamingInterface:
|
|
62
62
|
self.use_assistant_message = use_assistant_message
|
63
63
|
|
64
64
|
# Premake IDs for database writes
|
65
|
-
self.
|
66
|
-
self.letta_tool_message_id = Message.generate_id()
|
65
|
+
self.letta_message_id = Message.generate_id()
|
67
66
|
|
68
67
|
self.anthropic_mode = None
|
69
68
|
self.message_id = None
|
@@ -132,7 +131,7 @@ class AnthropicStreamingInterface:
|
|
132
131
|
now = get_utc_timestamp_ns()
|
133
132
|
ttft_ns = now - provider_request_start_timestamp_ns
|
134
133
|
ttft_span.add_event(
|
135
|
-
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ttft_ns
|
134
|
+
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
136
135
|
)
|
137
136
|
first_chunk = False
|
138
137
|
|
@@ -152,7 +151,7 @@ class AnthropicStreamingInterface:
|
|
152
151
|
if not self.use_assistant_message:
|
153
152
|
# Buffer the initial tool call message instead of yielding immediately
|
154
153
|
tool_call_msg = ToolCallMessage(
|
155
|
-
id=self.
|
154
|
+
id=self.letta_message_id,
|
156
155
|
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
157
156
|
date=datetime.now(timezone.utc).isoformat(),
|
158
157
|
)
|
@@ -165,11 +164,11 @@ class AnthropicStreamingInterface:
|
|
165
164
|
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
166
165
|
message_index += 1
|
167
166
|
hidden_reasoning_message = HiddenReasoningMessage(
|
168
|
-
id=self.
|
167
|
+
id=self.letta_message_id,
|
169
168
|
state="redacted",
|
170
169
|
hidden_reasoning=content.data,
|
171
170
|
date=datetime.now(timezone.utc).isoformat(),
|
172
|
-
otid=Message.generate_otid_from_id(self.
|
171
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
173
172
|
)
|
174
173
|
self.reasoning_messages.append(hidden_reasoning_message)
|
175
174
|
prev_message_type = hidden_reasoning_message.message_type
|
@@ -206,10 +205,10 @@ class AnthropicStreamingInterface:
|
|
206
205
|
if prev_message_type and prev_message_type != "reasoning_message":
|
207
206
|
message_index += 1
|
208
207
|
reasoning_message = ReasoningMessage(
|
209
|
-
id=self.
|
208
|
+
id=self.letta_message_id,
|
210
209
|
reasoning=self.accumulated_inner_thoughts[-1],
|
211
210
|
date=datetime.now(timezone.utc).isoformat(),
|
212
|
-
otid=Message.generate_otid_from_id(self.
|
211
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
213
212
|
)
|
214
213
|
self.reasoning_messages.append(reasoning_message)
|
215
214
|
prev_message_type = reasoning_message.message_type
|
@@ -233,10 +232,10 @@ class AnthropicStreamingInterface:
|
|
233
232
|
if prev_message_type and prev_message_type != "reasoning_message":
|
234
233
|
message_index += 1
|
235
234
|
reasoning_message = ReasoningMessage(
|
236
|
-
id=self.
|
235
|
+
id=self.letta_message_id,
|
237
236
|
reasoning=inner_thoughts_diff,
|
238
237
|
date=datetime.now(timezone.utc).isoformat(),
|
239
|
-
otid=Message.generate_otid_from_id(self.
|
238
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
240
239
|
)
|
241
240
|
self.reasoning_messages.append(reasoning_message)
|
242
241
|
prev_message_type = reasoning_message.message_type
|
@@ -249,10 +248,28 @@ class AnthropicStreamingInterface:
|
|
249
248
|
if len(self.tool_call_buffer) > 0:
|
250
249
|
if prev_message_type and prev_message_type != "tool_call_message":
|
251
250
|
message_index += 1
|
251
|
+
|
252
|
+
# Strip out the inner thoughts from the buffered tool call arguments before streaming
|
253
|
+
tool_call_args = ""
|
252
254
|
for buffered_msg in self.tool_call_buffer:
|
253
|
-
buffered_msg.
|
254
|
-
|
255
|
-
|
255
|
+
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
256
|
+
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
257
|
+
|
258
|
+
tool_call_msg = ToolCallMessage(
|
259
|
+
id=self.tool_call_buffer[0].id,
|
260
|
+
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
261
|
+
date=self.tool_call_buffer[0].date,
|
262
|
+
name=self.tool_call_buffer[0].name,
|
263
|
+
sender_id=self.tool_call_buffer[0].sender_id,
|
264
|
+
step_id=self.tool_call_buffer[0].step_id,
|
265
|
+
tool_call=ToolCallDelta(
|
266
|
+
name=self.tool_call_name,
|
267
|
+
tool_call_id=self.tool_call_id,
|
268
|
+
arguments=tool_call_args,
|
269
|
+
),
|
270
|
+
)
|
271
|
+
prev_message_type = tool_call_msg.message_type
|
272
|
+
yield tool_call_msg
|
256
273
|
self.tool_call_buffer = []
|
257
274
|
|
258
275
|
# Start detecting special case of "send_message"
|
@@ -266,24 +283,26 @@ class AnthropicStreamingInterface:
|
|
266
283
|
if prev_message_type and prev_message_type != "assistant_message":
|
267
284
|
message_index += 1
|
268
285
|
assistant_msg = AssistantMessage(
|
269
|
-
id=self.
|
286
|
+
id=self.letta_message_id,
|
270
287
|
content=[TextContent(text=send_message_diff)],
|
271
288
|
date=datetime.now(timezone.utc).isoformat(),
|
272
|
-
otid=Message.generate_otid_from_id(self.
|
289
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
273
290
|
)
|
274
291
|
prev_message_type = assistant_msg.message_type
|
275
292
|
yield assistant_msg
|
276
293
|
else:
|
277
294
|
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
278
295
|
tool_call_msg = ToolCallMessage(
|
279
|
-
id=self.
|
280
|
-
tool_call=ToolCallDelta(
|
296
|
+
id=self.letta_message_id,
|
297
|
+
tool_call=ToolCallDelta(
|
298
|
+
name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
|
299
|
+
),
|
281
300
|
date=datetime.now(timezone.utc).isoformat(),
|
282
301
|
)
|
283
302
|
if self.inner_thoughts_complete:
|
284
303
|
if prev_message_type and prev_message_type != "tool_call_message":
|
285
304
|
message_index += 1
|
286
|
-
tool_call_msg.otid = Message.generate_otid_from_id(self.
|
305
|
+
tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
|
287
306
|
prev_message_type = tool_call_msg.message_type
|
288
307
|
yield tool_call_msg
|
289
308
|
else:
|
@@ -301,11 +320,11 @@ class AnthropicStreamingInterface:
|
|
301
320
|
if prev_message_type and prev_message_type != "reasoning_message":
|
302
321
|
message_index += 1
|
303
322
|
reasoning_message = ReasoningMessage(
|
304
|
-
id=self.
|
323
|
+
id=self.letta_message_id,
|
305
324
|
source="reasoner_model",
|
306
325
|
reasoning=delta.thinking,
|
307
326
|
date=datetime.now(timezone.utc).isoformat(),
|
308
|
-
otid=Message.generate_otid_from_id(self.
|
327
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
309
328
|
)
|
310
329
|
self.reasoning_messages.append(reasoning_message)
|
311
330
|
prev_message_type = reasoning_message.message_type
|
@@ -320,12 +339,12 @@ class AnthropicStreamingInterface:
|
|
320
339
|
if prev_message_type and prev_message_type != "reasoning_message":
|
321
340
|
message_index += 1
|
322
341
|
reasoning_message = ReasoningMessage(
|
323
|
-
id=self.
|
342
|
+
id=self.letta_message_id,
|
324
343
|
source="reasoner_model",
|
325
344
|
reasoning="",
|
326
345
|
date=datetime.now(timezone.utc).isoformat(),
|
327
346
|
signature=delta.signature,
|
328
|
-
otid=Message.generate_otid_from_id(self.
|
347
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
329
348
|
)
|
330
349
|
self.reasoning_messages.append(reasoning_message)
|
331
350
|
prev_message_type = reasoning_message.message_type
|
@@ -360,7 +379,7 @@ class AnthropicStreamingInterface:
|
|
360
379
|
group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
|
361
380
|
) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
|
362
381
|
if group_type == "reasoning":
|
363
|
-
reasoning_text = "".join(chunk.reasoning for chunk in group)
|
382
|
+
reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
|
364
383
|
is_native = any(chunk.source == "reasoner_model" for chunk in group)
|
365
384
|
signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
|
366
385
|
if is_native:
|
@@ -5,7 +5,7 @@ from openai import AsyncStream
|
|
5
5
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
6
6
|
|
7
7
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
8
|
-
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
8
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
9
9
|
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
|
10
10
|
from letta.schemas.letta_message_content import TextContent
|
11
11
|
from letta.schemas.message import Message
|
@@ -32,14 +32,14 @@ class OpenAIStreamingInterface:
|
|
32
32
|
self.function_args_buffer = None
|
33
33
|
self.function_id_buffer = None
|
34
34
|
self.last_flushed_function_name = None
|
35
|
+
self.last_flushed_function_id = None
|
35
36
|
|
36
37
|
# Buffer to hold function arguments until inner thoughts are complete
|
37
38
|
self.current_function_arguments = ""
|
38
39
|
self.current_json_parse_result = {}
|
39
40
|
|
40
41
|
# Premake IDs for database writes
|
41
|
-
self.
|
42
|
-
self.letta_tool_message_id = Message.generate_id()
|
42
|
+
self.letta_message_id = Message.generate_id()
|
43
43
|
|
44
44
|
self.message_id = None
|
45
45
|
self.model = None
|
@@ -54,14 +54,14 @@ class OpenAIStreamingInterface:
|
|
54
54
|
self.reasoning_messages = []
|
55
55
|
|
56
56
|
def get_reasoning_content(self) -> List[TextContent]:
|
57
|
-
content = "".join(self.reasoning_messages)
|
57
|
+
content = "".join(self.reasoning_messages).strip()
|
58
58
|
return [TextContent(text=content)]
|
59
59
|
|
60
60
|
def get_tool_call_object(self) -> ToolCall:
|
61
61
|
"""Useful for agent loop"""
|
62
62
|
function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
|
63
63
|
return ToolCall(
|
64
|
-
id=self.
|
64
|
+
id=self.last_flushed_function_id,
|
65
65
|
function=FunctionCall(arguments=self.current_function_arguments, name=function_name),
|
66
66
|
)
|
67
67
|
|
@@ -85,7 +85,7 @@ class OpenAIStreamingInterface:
|
|
85
85
|
now = get_utc_timestamp_ns()
|
86
86
|
ttft_ns = now - provider_request_start_timestamp_ns
|
87
87
|
ttft_span.add_event(
|
88
|
-
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ttft_ns
|
88
|
+
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
89
89
|
)
|
90
90
|
first_chunk = False
|
91
91
|
|
@@ -133,11 +133,11 @@ class OpenAIStreamingInterface:
|
|
133
133
|
message_index += 1
|
134
134
|
self.reasoning_messages.append(updates_inner_thoughts)
|
135
135
|
reasoning_message = ReasoningMessage(
|
136
|
-
id=self.
|
136
|
+
id=self.letta_message_id,
|
137
137
|
date=datetime.now(timezone.utc),
|
138
138
|
reasoning=updates_inner_thoughts,
|
139
139
|
# name=name,
|
140
|
-
otid=Message.generate_otid_from_id(self.
|
140
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
141
141
|
)
|
142
142
|
prev_message_type = reasoning_message.message_type
|
143
143
|
yield reasoning_message
|
@@ -171,20 +171,22 @@ class OpenAIStreamingInterface:
|
|
171
171
|
message_index += 1
|
172
172
|
self.tool_call_name = str(self.function_name_buffer)
|
173
173
|
tool_call_msg = ToolCallMessage(
|
174
|
-
id=self.
|
174
|
+
id=self.letta_message_id,
|
175
175
|
date=datetime.now(timezone.utc),
|
176
176
|
tool_call=ToolCallDelta(
|
177
177
|
name=self.function_name_buffer,
|
178
178
|
arguments=None,
|
179
179
|
tool_call_id=self.function_id_buffer,
|
180
180
|
),
|
181
|
-
otid=Message.generate_otid_from_id(self.
|
181
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
182
182
|
)
|
183
183
|
prev_message_type = tool_call_msg.message_type
|
184
184
|
yield tool_call_msg
|
185
185
|
|
186
186
|
# Record what the last function name we flushed was
|
187
187
|
self.last_flushed_function_name = self.function_name_buffer
|
188
|
+
if self.last_flushed_function_id is None:
|
189
|
+
self.last_flushed_function_id = self.function_id_buffer
|
188
190
|
# Clear the buffer
|
189
191
|
self.function_name_buffer = None
|
190
192
|
self.function_id_buffer = None
|
@@ -236,10 +238,10 @@ class OpenAIStreamingInterface:
|
|
236
238
|
if prev_message_type and prev_message_type != "assistant_message":
|
237
239
|
message_index += 1
|
238
240
|
assistant_message = AssistantMessage(
|
239
|
-
id=self.
|
241
|
+
id=self.letta_message_id,
|
240
242
|
date=datetime.now(timezone.utc),
|
241
243
|
content=combined_chunk,
|
242
|
-
otid=Message.generate_otid_from_id(self.
|
244
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
243
245
|
)
|
244
246
|
prev_message_type = assistant_message.message_type
|
245
247
|
yield assistant_message
|
@@ -268,11 +270,11 @@ class OpenAIStreamingInterface:
|
|
268
270
|
if prev_message_type and prev_message_type != "assistant_message":
|
269
271
|
message_index += 1
|
270
272
|
assistant_message = AssistantMessage(
|
271
|
-
id=self.
|
273
|
+
id=self.letta_message_id,
|
272
274
|
date=datetime.now(timezone.utc),
|
273
275
|
content=diff,
|
274
276
|
# name=name,
|
275
|
-
otid=Message.generate_otid_from_id(self.
|
277
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
276
278
|
)
|
277
279
|
prev_message_type = assistant_message.message_type
|
278
280
|
yield assistant_message
|
@@ -292,15 +294,15 @@ class OpenAIStreamingInterface:
|
|
292
294
|
if prev_message_type and prev_message_type != "tool_call_message":
|
293
295
|
message_index += 1
|
294
296
|
tool_call_msg = ToolCallMessage(
|
295
|
-
id=self.
|
297
|
+
id=self.letta_message_id,
|
296
298
|
date=datetime.now(timezone.utc),
|
297
299
|
tool_call=ToolCallDelta(
|
298
|
-
name=
|
300
|
+
name=self.function_name_buffer,
|
299
301
|
arguments=combined_chunk,
|
300
302
|
tool_call_id=self.function_id_buffer,
|
301
303
|
),
|
302
304
|
# name=name,
|
303
|
-
otid=Message.generate_otid_from_id(self.
|
305
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
304
306
|
)
|
305
307
|
prev_message_type = tool_call_msg.message_type
|
306
308
|
yield tool_call_msg
|
@@ -312,7 +314,7 @@ class OpenAIStreamingInterface:
|
|
312
314
|
if prev_message_type and prev_message_type != "tool_call_message":
|
313
315
|
message_index += 1
|
314
316
|
tool_call_msg = ToolCallMessage(
|
315
|
-
id=self.
|
317
|
+
id=self.letta_message_id,
|
316
318
|
date=datetime.now(timezone.utc),
|
317
319
|
tool_call=ToolCallDelta(
|
318
320
|
name=None,
|
@@ -320,7 +322,7 @@ class OpenAIStreamingInterface:
|
|
320
322
|
tool_call_id=self.function_id_buffer,
|
321
323
|
),
|
322
324
|
# name=name,
|
323
|
-
otid=Message.generate_otid_from_id(self.
|
325
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
324
326
|
)
|
325
327
|
prev_message_type = tool_call_msg.message_type
|
326
328
|
yield tool_call_msg
|
letta/llm_api/anthropic.py
CHANGED
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
28
|
from letta.log import get_logger
|
29
|
+
from letta.otel.tracing import log_event
|
29
30
|
from letta.schemas.enums import ProviderCategory
|
30
31
|
from letta.schemas.message import Message as _Message
|
31
32
|
from letta.schemas.message import MessageRole as _MessageRole
|
@@ -45,7 +46,6 @@ from letta.services.provider_manager import ProviderManager
|
|
45
46
|
from letta.services.user_manager import UserManager
|
46
47
|
from letta.settings import model_settings
|
47
48
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
48
|
-
from letta.tracing import log_event
|
49
49
|
|
50
50
|
logger = get_logger(__name__)
|
51
51
|
|
@@ -27,16 +27,16 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
|
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
29
29
|
from letta.log import get_logger
|
30
|
+
from letta.otel.tracing import trace_method
|
30
31
|
from letta.schemas.enums import ProviderCategory
|
31
32
|
from letta.schemas.llm_config import LLMConfig
|
32
33
|
from letta.schemas.message import Message as PydanticMessage
|
33
|
-
from letta.schemas.openai.chat_completion_request import Tool
|
34
|
+
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
34
35
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
|
35
36
|
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
36
37
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
37
38
|
from letta.services.provider_manager import ProviderManager
|
38
39
|
from letta.settings import model_settings
|
39
|
-
from letta.tracing import trace_method
|
40
40
|
|
41
41
|
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
42
42
|
|
@@ -199,10 +199,10 @@ class AnthropicClient(LLMClientBase):
|
|
199
199
|
elif llm_config.enable_reasoner:
|
200
200
|
# NOTE: reasoning models currently do not allow for `any`
|
201
201
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
202
|
-
tools_for_request = [
|
202
|
+
tools_for_request = [OpenAITool(function=f) for f in tools]
|
203
203
|
elif force_tool_call is not None:
|
204
204
|
tool_choice = {"type": "tool", "name": force_tool_call}
|
205
|
-
tools_for_request = [
|
205
|
+
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
206
206
|
|
207
207
|
# need to have this setting to be able to put inner thoughts in kwargs
|
208
208
|
if not llm_config.put_inner_thoughts_in_kwargs:
|
@@ -216,7 +216,7 @@ class AnthropicClient(LLMClientBase):
|
|
216
216
|
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
217
217
|
else:
|
218
218
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
219
|
-
tools_for_request = [
|
219
|
+
tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
|
220
220
|
|
221
221
|
# Add tool choice
|
222
222
|
if tool_choice:
|
@@ -230,7 +230,7 @@ class AnthropicClient(LLMClientBase):
|
|
230
230
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
231
231
|
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
232
232
|
)
|
233
|
-
tools_for_request = [
|
233
|
+
tools_for_request = [OpenAITool(function=f) for f in tools_with_inner_thoughts]
|
234
234
|
|
235
235
|
if tools_for_request and len(tools_for_request) > 0:
|
236
236
|
# TODO eventually enable parallel tool use
|
@@ -270,7 +270,7 @@ class AnthropicClient(LLMClientBase):
|
|
270
270
|
|
271
271
|
return data
|
272
272
|
|
273
|
-
async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[
|
273
|
+
async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
|
274
274
|
client = anthropic.AsyncAnthropic()
|
275
275
|
if messages and len(messages) == 0:
|
276
276
|
messages = None
|
@@ -278,11 +278,19 @@ class AnthropicClient(LLMClientBase):
|
|
278
278
|
anthropic_tools = convert_tools_to_anthropic_format(tools)
|
279
279
|
else:
|
280
280
|
anthropic_tools = None
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
281
|
+
|
282
|
+
try:
|
283
|
+
result = await client.beta.messages.count_tokens(
|
284
|
+
model=model or "claude-3-7-sonnet-20250219",
|
285
|
+
messages=messages or [{"role": "user", "content": "hi"}],
|
286
|
+
tools=anthropic_tools or [],
|
287
|
+
)
|
288
|
+
except:
|
289
|
+
import ipdb
|
290
|
+
|
291
|
+
ipdb.set_trace()
|
292
|
+
raise
|
293
|
+
|
286
294
|
token_count = result.input_tokens
|
287
295
|
if messages is None:
|
288
296
|
token_count -= 8
|
@@ -477,7 +485,7 @@ class AnthropicClient(LLMClientBase):
|
|
477
485
|
return chat_completion_response
|
478
486
|
|
479
487
|
|
480
|
-
def convert_tools_to_anthropic_format(tools: List[
|
488
|
+
def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
|
481
489
|
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
482
490
|
|
483
491
|
OpenAI style:
|
@@ -527,7 +535,7 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
|
527
535
|
for tool in tools:
|
528
536
|
formatted_tool = {
|
529
537
|
"name": tool.function.name,
|
530
|
-
"description": tool.function.description,
|
538
|
+
"description": tool.function.description if tool.function.description else "",
|
531
539
|
"input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
|
532
540
|
}
|
533
541
|
formatted_tools.append(formatted_tool)
|
@@ -12,12 +12,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
12
12
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
13
13
|
from letta.local_llm.utils import count_tokens
|
14
14
|
from letta.log import get_logger
|
15
|
+
from letta.otel.tracing import trace_method
|
15
16
|
from letta.schemas.llm_config import LLMConfig
|
16
17
|
from letta.schemas.message import Message as PydanticMessage
|
17
18
|
from letta.schemas.openai.chat_completion_request import Tool
|
18
19
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
19
20
|
from letta.settings import model_settings, settings
|
20
|
-
from letta.tracing import trace_method
|
21
21
|
from letta.utils import get_tool_call_id
|
22
22
|
|
23
23
|
logger = get_logger(__name__)
|
letta/llm_api/helpers.py
CHANGED
@@ -63,11 +63,11 @@ def _convert_to_structured_output_helper(property: dict) -> dict:
|
|
63
63
|
|
64
64
|
|
65
65
|
def convert_to_structured_output(openai_function: dict, allow_optional: bool = False) -> dict:
|
66
|
-
"""Convert function call objects to structured output objects
|
66
|
+
"""Convert function call objects to structured output objects.
|
67
67
|
|
68
68
|
See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
69
69
|
"""
|
70
|
-
description = openai_function
|
70
|
+
description = openai_function.get("description", "")
|
71
71
|
|
72
72
|
structured_output = {
|
73
73
|
"name": openai_function["name"],
|
@@ -81,54 +81,58 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
|
|
81
81
|
},
|
82
82
|
}
|
83
83
|
|
84
|
-
# This code needs to be able to handle nested properties
|
85
|
-
# For example, the param details may have "type" + "description",
|
86
|
-
# but if "type" is "object" we expected "properties", where each property has details
|
87
|
-
# and if "type" is "array" we expect "items": <type>
|
88
84
|
for param, details in openai_function["parameters"]["properties"].items():
|
89
85
|
param_type = details["type"]
|
90
|
-
|
86
|
+
param_description = details.get("description", "")
|
91
87
|
|
92
88
|
if param_type == "object":
|
93
89
|
if "properties" not in details:
|
94
|
-
|
95
|
-
raise ValueError(f"Property {param} of type object is missing properties")
|
90
|
+
raise ValueError(f"Property {param} of type object is missing 'properties'")
|
96
91
|
structured_output["parameters"]["properties"][param] = {
|
97
92
|
"type": "object",
|
98
|
-
"description":
|
93
|
+
"description": param_description,
|
99
94
|
"properties": {k: _convert_to_structured_output_helper(v) for k, v in details["properties"].items()},
|
100
95
|
"additionalProperties": False,
|
101
96
|
"required": list(details["properties"].keys()),
|
102
97
|
}
|
103
98
|
|
104
99
|
elif param_type == "array":
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
100
|
+
items_schema = details.get("items")
|
101
|
+
prefix_items_schema = details.get("prefixItems")
|
102
|
+
|
103
|
+
if prefix_items_schema:
|
104
|
+
# assume fixed-length tuple — safe fallback to use first type for items
|
105
|
+
fallback_item = prefix_items_schema[0] if isinstance(prefix_items_schema, list) else prefix_items_schema
|
106
|
+
structured_output["parameters"]["properties"][param] = {
|
107
|
+
"type": "array",
|
108
|
+
"description": param_description,
|
109
|
+
"prefixItems": [_convert_to_structured_output_helper(item) for item in prefix_items_schema],
|
110
|
+
"items": _convert_to_structured_output_helper(fallback_item),
|
111
|
+
"minItems": details.get("minItems", len(prefix_items_schema)),
|
112
|
+
"maxItems": details.get("maxItems", len(prefix_items_schema)),
|
113
|
+
}
|
114
|
+
elif items_schema:
|
115
|
+
structured_output["parameters"]["properties"][param] = {
|
116
|
+
"type": "array",
|
117
|
+
"description": param_description,
|
118
|
+
"items": _convert_to_structured_output_helper(items_schema),
|
119
|
+
}
|
120
|
+
else:
|
121
|
+
raise ValueError(f"Array param '{param}' is missing both 'items' and 'prefixItems'")
|
110
122
|
|
111
123
|
else:
|
112
|
-
|
113
|
-
"type": param_type,
|
114
|
-
"description":
|
124
|
+
prop = {
|
125
|
+
"type": param_type,
|
126
|
+
"description": param_description,
|
115
127
|
}
|
116
|
-
|
117
|
-
|
118
|
-
structured_output["parameters"]["properties"][param]
|
128
|
+
if "enum" in details:
|
129
|
+
prop["enum"] = details["enum"]
|
130
|
+
structured_output["parameters"]["properties"][param] = prop
|
119
131
|
|
120
132
|
if not allow_optional:
|
121
|
-
# Add all properties to required list
|
122
133
|
structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
|
123
|
-
|
124
134
|
else:
|
125
|
-
|
126
|
-
# Those are implied "optional" types
|
127
|
-
# For those types, turn each of them into a union type with "null"
|
128
|
-
# e.g.
|
129
|
-
# "type": "string" -> "type": ["string", "null"]
|
130
|
-
# TODO
|
131
|
-
raise NotImplementedError
|
135
|
+
raise NotImplementedError("Optional parameter handling is not implemented.")
|
132
136
|
|
133
137
|
return structured_output
|
134
138
|
|
@@ -292,6 +296,8 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
|
|
292
296
|
|
293
297
|
except json.JSONDecodeError as e:
|
294
298
|
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
299
|
+
print(f"\nFailed to strip inner thoughts from kwargs: {e}")
|
300
|
+
print(f"\nTool call arguments: {tool_call.function.arguments}")
|
295
301
|
raise e
|
296
302
|
else:
|
297
303
|
warnings.warn(f"Did not find tool call in message: {str(message)}")
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -26,6 +26,7 @@ from letta.local_llm.chat_completion_proxy import get_chat_completion
|
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
28
|
from letta.orm.user import User
|
29
|
+
from letta.otel.tracing import log_event, trace_method
|
29
30
|
from letta.schemas.enums import ProviderCategory
|
30
31
|
from letta.schemas.llm_config import LLMConfig
|
31
32
|
from letta.schemas.message import Message
|
@@ -35,7 +36,6 @@ from letta.schemas.provider_trace import ProviderTraceCreate
|
|
35
36
|
from letta.services.telemetry_manager import TelemetryManager
|
36
37
|
from letta.settings import ModelSettings
|
37
38
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
38
|
-
from letta.tracing import log_event, trace_method
|
39
39
|
|
40
40
|
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
|
41
41
|
|
letta/llm_api/llm_client_base.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import json
|
1
2
|
from abc import abstractmethod
|
2
3
|
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
3
4
|
|
@@ -6,13 +7,13 @@ from openai import AsyncStream, Stream
|
|
6
7
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
8
|
|
8
9
|
from letta.errors import LLMError
|
10
|
+
from letta.otel.tracing import log_event, trace_method
|
9
11
|
from letta.schemas.embedding_config import EmbeddingConfig
|
10
12
|
from letta.schemas.llm_config import LLMConfig
|
11
13
|
from letta.schemas.message import Message
|
12
14
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
13
15
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
14
16
|
from letta.services.telemetry_manager import TelemetryManager
|
15
|
-
from letta.tracing import log_event, trace_method
|
16
17
|
|
17
18
|
if TYPE_CHECKING:
|
18
19
|
from letta.orm import User
|
@@ -186,3 +187,30 @@ class LLMClientBase:
|
|
186
187
|
An LLMError subclass that represents the error in a provider-agnostic way
|
187
188
|
"""
|
188
189
|
return LLMError(f"Unhandled LLM error: {str(e)}")
|
190
|
+
|
191
|
+
def _fix_truncated_json_response(self, response: ChatCompletionResponse) -> ChatCompletionResponse:
|
192
|
+
"""
|
193
|
+
Fixes truncated JSON responses by ensuring the content is properly formatted.
|
194
|
+
This is a workaround for some providers that may return incomplete JSON.
|
195
|
+
"""
|
196
|
+
if response.choices and response.choices[0].message and response.choices[0].message.tool_calls:
|
197
|
+
tool_call_args_str = response.choices[0].message.tool_calls[0].function.arguments
|
198
|
+
try:
|
199
|
+
json.loads(tool_call_args_str)
|
200
|
+
except json.JSONDecodeError:
|
201
|
+
try:
|
202
|
+
json_str_end = ""
|
203
|
+
quote_count = tool_call_args_str.count('"')
|
204
|
+
if quote_count % 2 != 0:
|
205
|
+
json_str_end = json_str_end + '"'
|
206
|
+
|
207
|
+
open_braces = tool_call_args_str.count("{")
|
208
|
+
close_braces = tool_call_args_str.count("}")
|
209
|
+
missing_braces = open_braces - close_braces
|
210
|
+
json_str_end += "}" * missing_braces
|
211
|
+
fixed_tool_call_args_str = tool_call_args_str[: -len(json_str_end)] + json_str_end
|
212
|
+
json.loads(fixed_tool_call_args_str)
|
213
|
+
response.choices[0].message.tool_calls[0].function.arguments = fixed_tool_call_args_str
|
214
|
+
except json.JSONDecodeError:
|
215
|
+
pass
|
216
|
+
return response
|