letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.3.dev20250607000559__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +16 -12
- letta/agents/base_agent.py +1 -1
- letta/agents/helpers.py +13 -2
- letta/agents/letta_agent.py +72 -34
- letta/agents/letta_agent_batch.py +1 -2
- letta/agents/voice_agent.py +19 -13
- letta/agents/voice_sleeptime_agent.py +23 -6
- letta/constants.py +18 -0
- letta/data_sources/__init__.py +0 -0
- letta/data_sources/redis_client.py +282 -0
- letta/errors.py +0 -4
- letta/functions/function_sets/files.py +58 -0
- letta/functions/schema_generator.py +18 -1
- letta/groups/sleeptime_multi_agent_v2.py +13 -3
- letta/helpers/datetime_helpers.py +47 -3
- letta/helpers/decorators.py +69 -0
- letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
- letta/interfaces/anthropic_streaming_interface.py +43 -24
- letta/interfaces/openai_streaming_interface.py +21 -19
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/anthropic_client.py +30 -16
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +36 -30
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client_base.py +29 -1
- letta/llm_api/openai.py +1 -1
- letta/llm_api/openai_client.py +6 -8
- letta/local_llm/chat_completion_proxy.py +1 -1
- letta/memory.py +1 -1
- letta/orm/enums.py +1 -0
- letta/orm/file.py +80 -3
- letta/orm/files_agents.py +13 -0
- letta/orm/passage.py +2 -0
- letta/orm/sqlalchemy_base.py +34 -11
- letta/otel/__init__.py +0 -0
- letta/otel/context.py +25 -0
- letta/otel/events.py +0 -0
- letta/otel/metric_registry.py +122 -0
- letta/otel/metrics.py +66 -0
- letta/otel/resource.py +26 -0
- letta/{tracing.py → otel/tracing.py} +55 -78
- letta/plugins/README.md +22 -0
- letta/plugins/__init__.py +0 -0
- letta/plugins/defaults.py +11 -0
- letta/plugins/plugins.py +72 -0
- letta/schemas/enums.py +8 -0
- letta/schemas/file.py +12 -0
- letta/schemas/letta_request.py +6 -0
- letta/schemas/passage.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +7 -7
- letta/server/rest_api/app.py +8 -6
- letta/server/rest_api/routers/v1/agents.py +46 -37
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/sources.py +26 -3
- letta/server/rest_api/routers/v1/tools.py +7 -2
- letta/server/rest_api/utils.py +9 -6
- letta/server/server.py +25 -13
- letta/services/agent_manager.py +186 -194
- letta/services/block_manager.py +1 -1
- letta/services/context_window_calculator/context_window_calculator.py +1 -1
- letta/services/context_window_calculator/token_counter.py +3 -2
- letta/services/file_processor/chunker/line_chunker.py +34 -0
- letta/services/file_processor/file_processor.py +43 -12
- letta/services/file_processor/parser/mistral_parser.py +11 -1
- letta/services/files_agents_manager.py +96 -7
- letta/services/group_manager.py +6 -6
- letta/services/helpers/agent_manager_helper.py +404 -3
- letta/services/identity_manager.py +1 -1
- letta/services/job_manager.py +1 -1
- letta/services/llm_batch_manager.py +1 -1
- letta/services/mcp/stdio_client.py +5 -1
- letta/services/mcp_manager.py +4 -4
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +604 -19
- letta/services/per_agent_lock_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +178 -19
- letta/services/step_manager.py +2 -2
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/telemetry_manager.py +1 -1
- letta/services/tool_executor/builtin_tool_executor.py +117 -0
- letta/services/tool_executor/composio_tool_executor.py +53 -0
- letta/services/tool_executor/core_tool_executor.py +474 -0
- letta/services/tool_executor/files_tool_executor.py +138 -0
- letta/services/tool_executor/mcp_tool_executor.py +45 -0
- letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
- letta/services/tool_executor/tool_execution_manager.py +34 -14
- letta/services/tool_executor/tool_execution_sandbox.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -802
- letta/services/tool_executor/tool_executor_base.py +43 -0
- letta/services/tool_manager.py +55 -59
- letta/services/tool_sandbox/e2b_sandbox.py +1 -1
- letta/services/tool_sandbox/local_sandbox.py +6 -3
- letta/services/user_manager.py +6 -3
- letta/settings.py +23 -2
- letta/utils.py +7 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/METADATA +4 -2
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/RECORD +105 -83
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.3.dev20250607000559.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
import inspect
|
2
|
+
from functools import wraps
|
3
|
+
from typing import Callable
|
4
|
+
|
5
|
+
from letta.log import get_logger
|
6
|
+
from letta.plugins.plugins import get_experimental_checker
|
7
|
+
from letta.settings import settings
|
8
|
+
|
9
|
+
logger = get_logger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
def experimental(feature_name: str, fallback_function: Callable, **kwargs):
|
13
|
+
"""Decorator that runs a fallback function if experimental feature is not enabled.
|
14
|
+
|
15
|
+
- kwargs from the decorator will be combined with function kwargs and overwritten only for experimental evaluation.
|
16
|
+
- if the decorated function, fallback_function, or experimental checker function is async, the whole call will be async
|
17
|
+
"""
|
18
|
+
|
19
|
+
def decorator(f):
|
20
|
+
experimental_checker = get_experimental_checker()
|
21
|
+
is_f_async = inspect.iscoroutinefunction(f)
|
22
|
+
is_fallback_async = inspect.iscoroutinefunction(fallback_function)
|
23
|
+
is_experimental_checker_async = inspect.iscoroutinefunction(experimental_checker)
|
24
|
+
|
25
|
+
async def call_function(func, is_async, *args, **_kwargs):
|
26
|
+
if is_async:
|
27
|
+
return await func(*args, **_kwargs)
|
28
|
+
return func(*args, **_kwargs)
|
29
|
+
|
30
|
+
# asynchronous wrapper if any function is async
|
31
|
+
if any((is_f_async, is_fallback_async, is_experimental_checker_async)):
|
32
|
+
|
33
|
+
@wraps(f)
|
34
|
+
async def async_wrapper(*args, **_kwargs):
|
35
|
+
result = await call_function(experimental_checker, is_experimental_checker_async, feature_name, **dict(_kwargs, **kwargs))
|
36
|
+
if result:
|
37
|
+
return await call_function(f, is_f_async, *args, **_kwargs)
|
38
|
+
else:
|
39
|
+
return await call_function(fallback_function, is_fallback_async, *args, **_kwargs)
|
40
|
+
|
41
|
+
return async_wrapper
|
42
|
+
|
43
|
+
else:
|
44
|
+
|
45
|
+
@wraps(f)
|
46
|
+
def wrapper(*args, **_kwargs):
|
47
|
+
if experimental_checker(feature_name, **dict(_kwargs, **kwargs)):
|
48
|
+
return f(*args, **_kwargs)
|
49
|
+
else:
|
50
|
+
return fallback_function(*args, **kwargs)
|
51
|
+
|
52
|
+
return wrapper
|
53
|
+
|
54
|
+
return decorator
|
55
|
+
|
56
|
+
|
57
|
+
def deprecated(message: str):
|
58
|
+
"""Simple decorator that marks a method as deprecated."""
|
59
|
+
|
60
|
+
def decorator(f):
|
61
|
+
@wraps(f)
|
62
|
+
def wrapper(*args, **kwargs):
|
63
|
+
if settings.debug:
|
64
|
+
logger.warning(f"Function {f.__name__} is deprecated: {message}.")
|
65
|
+
return f(*args, **kwargs)
|
66
|
+
|
67
|
+
return wrapper
|
68
|
+
|
69
|
+
return decorator
|
@@ -1,7 +1,12 @@
|
|
1
|
+
# TODO (cliandy): consolidate with decorators later
|
2
|
+
from functools import wraps
|
3
|
+
|
4
|
+
|
1
5
|
def singleton(cls):
|
2
6
|
"""Decorator to make a class a Singleton class."""
|
3
7
|
instances = {}
|
4
8
|
|
9
|
+
@wraps(cls)
|
5
10
|
def get_instance(*args, **kwargs):
|
6
11
|
if cls not in instances:
|
7
12
|
instances[cls] = cls(*args, **kwargs)
|
@@ -23,7 +23,7 @@ from anthropic.types.beta import (
|
|
23
23
|
)
|
24
24
|
|
25
25
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
26
|
-
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
26
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
27
27
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
28
28
|
from letta.log import get_logger
|
29
29
|
from letta.schemas.letta_message import (
|
@@ -62,8 +62,7 @@ class AnthropicStreamingInterface:
|
|
62
62
|
self.use_assistant_message = use_assistant_message
|
63
63
|
|
64
64
|
# Premake IDs for database writes
|
65
|
-
self.
|
66
|
-
self.letta_tool_message_id = Message.generate_id()
|
65
|
+
self.letta_message_id = Message.generate_id()
|
67
66
|
|
68
67
|
self.anthropic_mode = None
|
69
68
|
self.message_id = None
|
@@ -132,7 +131,7 @@ class AnthropicStreamingInterface:
|
|
132
131
|
now = get_utc_timestamp_ns()
|
133
132
|
ttft_ns = now - provider_request_start_timestamp_ns
|
134
133
|
ttft_span.add_event(
|
135
|
-
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ttft_ns
|
134
|
+
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
136
135
|
)
|
137
136
|
first_chunk = False
|
138
137
|
|
@@ -152,7 +151,7 @@ class AnthropicStreamingInterface:
|
|
152
151
|
if not self.use_assistant_message:
|
153
152
|
# Buffer the initial tool call message instead of yielding immediately
|
154
153
|
tool_call_msg = ToolCallMessage(
|
155
|
-
id=self.
|
154
|
+
id=self.letta_message_id,
|
156
155
|
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
157
156
|
date=datetime.now(timezone.utc).isoformat(),
|
158
157
|
)
|
@@ -165,11 +164,11 @@ class AnthropicStreamingInterface:
|
|
165
164
|
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
166
165
|
message_index += 1
|
167
166
|
hidden_reasoning_message = HiddenReasoningMessage(
|
168
|
-
id=self.
|
167
|
+
id=self.letta_message_id,
|
169
168
|
state="redacted",
|
170
169
|
hidden_reasoning=content.data,
|
171
170
|
date=datetime.now(timezone.utc).isoformat(),
|
172
|
-
otid=Message.generate_otid_from_id(self.
|
171
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
173
172
|
)
|
174
173
|
self.reasoning_messages.append(hidden_reasoning_message)
|
175
174
|
prev_message_type = hidden_reasoning_message.message_type
|
@@ -206,10 +205,10 @@ class AnthropicStreamingInterface:
|
|
206
205
|
if prev_message_type and prev_message_type != "reasoning_message":
|
207
206
|
message_index += 1
|
208
207
|
reasoning_message = ReasoningMessage(
|
209
|
-
id=self.
|
208
|
+
id=self.letta_message_id,
|
210
209
|
reasoning=self.accumulated_inner_thoughts[-1],
|
211
210
|
date=datetime.now(timezone.utc).isoformat(),
|
212
|
-
otid=Message.generate_otid_from_id(self.
|
211
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
213
212
|
)
|
214
213
|
self.reasoning_messages.append(reasoning_message)
|
215
214
|
prev_message_type = reasoning_message.message_type
|
@@ -233,10 +232,10 @@ class AnthropicStreamingInterface:
|
|
233
232
|
if prev_message_type and prev_message_type != "reasoning_message":
|
234
233
|
message_index += 1
|
235
234
|
reasoning_message = ReasoningMessage(
|
236
|
-
id=self.
|
235
|
+
id=self.letta_message_id,
|
237
236
|
reasoning=inner_thoughts_diff,
|
238
237
|
date=datetime.now(timezone.utc).isoformat(),
|
239
|
-
otid=Message.generate_otid_from_id(self.
|
238
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
240
239
|
)
|
241
240
|
self.reasoning_messages.append(reasoning_message)
|
242
241
|
prev_message_type = reasoning_message.message_type
|
@@ -249,10 +248,28 @@ class AnthropicStreamingInterface:
|
|
249
248
|
if len(self.tool_call_buffer) > 0:
|
250
249
|
if prev_message_type and prev_message_type != "tool_call_message":
|
251
250
|
message_index += 1
|
251
|
+
|
252
|
+
# Strip out the inner thoughts from the buffered tool call arguments before streaming
|
253
|
+
tool_call_args = ""
|
252
254
|
for buffered_msg in self.tool_call_buffer:
|
253
|
-
buffered_msg.
|
254
|
-
|
255
|
-
|
255
|
+
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
256
|
+
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
257
|
+
|
258
|
+
tool_call_msg = ToolCallMessage(
|
259
|
+
id=self.tool_call_buffer[0].id,
|
260
|
+
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
261
|
+
date=self.tool_call_buffer[0].date,
|
262
|
+
name=self.tool_call_buffer[0].name,
|
263
|
+
sender_id=self.tool_call_buffer[0].sender_id,
|
264
|
+
step_id=self.tool_call_buffer[0].step_id,
|
265
|
+
tool_call=ToolCallDelta(
|
266
|
+
name=self.tool_call_name,
|
267
|
+
tool_call_id=self.tool_call_id,
|
268
|
+
arguments=tool_call_args,
|
269
|
+
),
|
270
|
+
)
|
271
|
+
prev_message_type = tool_call_msg.message_type
|
272
|
+
yield tool_call_msg
|
256
273
|
self.tool_call_buffer = []
|
257
274
|
|
258
275
|
# Start detecting special case of "send_message"
|
@@ -266,24 +283,26 @@ class AnthropicStreamingInterface:
|
|
266
283
|
if prev_message_type and prev_message_type != "assistant_message":
|
267
284
|
message_index += 1
|
268
285
|
assistant_msg = AssistantMessage(
|
269
|
-
id=self.
|
286
|
+
id=self.letta_message_id,
|
270
287
|
content=[TextContent(text=send_message_diff)],
|
271
288
|
date=datetime.now(timezone.utc).isoformat(),
|
272
|
-
otid=Message.generate_otid_from_id(self.
|
289
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
273
290
|
)
|
274
291
|
prev_message_type = assistant_msg.message_type
|
275
292
|
yield assistant_msg
|
276
293
|
else:
|
277
294
|
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
278
295
|
tool_call_msg = ToolCallMessage(
|
279
|
-
id=self.
|
280
|
-
tool_call=ToolCallDelta(
|
296
|
+
id=self.letta_message_id,
|
297
|
+
tool_call=ToolCallDelta(
|
298
|
+
name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
|
299
|
+
),
|
281
300
|
date=datetime.now(timezone.utc).isoformat(),
|
282
301
|
)
|
283
302
|
if self.inner_thoughts_complete:
|
284
303
|
if prev_message_type and prev_message_type != "tool_call_message":
|
285
304
|
message_index += 1
|
286
|
-
tool_call_msg.otid = Message.generate_otid_from_id(self.
|
305
|
+
tool_call_msg.otid = Message.generate_otid_from_id(self.letta_message_id, message_index)
|
287
306
|
prev_message_type = tool_call_msg.message_type
|
288
307
|
yield tool_call_msg
|
289
308
|
else:
|
@@ -301,11 +320,11 @@ class AnthropicStreamingInterface:
|
|
301
320
|
if prev_message_type and prev_message_type != "reasoning_message":
|
302
321
|
message_index += 1
|
303
322
|
reasoning_message = ReasoningMessage(
|
304
|
-
id=self.
|
323
|
+
id=self.letta_message_id,
|
305
324
|
source="reasoner_model",
|
306
325
|
reasoning=delta.thinking,
|
307
326
|
date=datetime.now(timezone.utc).isoformat(),
|
308
|
-
otid=Message.generate_otid_from_id(self.
|
327
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
309
328
|
)
|
310
329
|
self.reasoning_messages.append(reasoning_message)
|
311
330
|
prev_message_type = reasoning_message.message_type
|
@@ -320,12 +339,12 @@ class AnthropicStreamingInterface:
|
|
320
339
|
if prev_message_type and prev_message_type != "reasoning_message":
|
321
340
|
message_index += 1
|
322
341
|
reasoning_message = ReasoningMessage(
|
323
|
-
id=self.
|
342
|
+
id=self.letta_message_id,
|
324
343
|
source="reasoner_model",
|
325
344
|
reasoning="",
|
326
345
|
date=datetime.now(timezone.utc).isoformat(),
|
327
346
|
signature=delta.signature,
|
328
|
-
otid=Message.generate_otid_from_id(self.
|
347
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
329
348
|
)
|
330
349
|
self.reasoning_messages.append(reasoning_message)
|
331
350
|
prev_message_type = reasoning_message.message_type
|
@@ -360,7 +379,7 @@ class AnthropicStreamingInterface:
|
|
360
379
|
group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
|
361
380
|
) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
|
362
381
|
if group_type == "reasoning":
|
363
|
-
reasoning_text = "".join(chunk.reasoning for chunk in group)
|
382
|
+
reasoning_text = "".join(chunk.reasoning for chunk in group).strip()
|
364
383
|
is_native = any(chunk.source == "reasoner_model" for chunk in group)
|
365
384
|
signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
|
366
385
|
if is_native:
|
@@ -5,7 +5,7 @@ from openai import AsyncStream
|
|
5
5
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
6
6
|
|
7
7
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
8
|
-
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
8
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
9
9
|
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
|
10
10
|
from letta.schemas.letta_message_content import TextContent
|
11
11
|
from letta.schemas.message import Message
|
@@ -32,14 +32,14 @@ class OpenAIStreamingInterface:
|
|
32
32
|
self.function_args_buffer = None
|
33
33
|
self.function_id_buffer = None
|
34
34
|
self.last_flushed_function_name = None
|
35
|
+
self.last_flushed_function_id = None
|
35
36
|
|
36
37
|
# Buffer to hold function arguments until inner thoughts are complete
|
37
38
|
self.current_function_arguments = ""
|
38
39
|
self.current_json_parse_result = {}
|
39
40
|
|
40
41
|
# Premake IDs for database writes
|
41
|
-
self.
|
42
|
-
self.letta_tool_message_id = Message.generate_id()
|
42
|
+
self.letta_message_id = Message.generate_id()
|
43
43
|
|
44
44
|
self.message_id = None
|
45
45
|
self.model = None
|
@@ -54,14 +54,14 @@ class OpenAIStreamingInterface:
|
|
54
54
|
self.reasoning_messages = []
|
55
55
|
|
56
56
|
def get_reasoning_content(self) -> List[TextContent]:
|
57
|
-
content = "".join(self.reasoning_messages)
|
57
|
+
content = "".join(self.reasoning_messages).strip()
|
58
58
|
return [TextContent(text=content)]
|
59
59
|
|
60
60
|
def get_tool_call_object(self) -> ToolCall:
|
61
61
|
"""Useful for agent loop"""
|
62
62
|
function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
|
63
63
|
return ToolCall(
|
64
|
-
id=self.
|
64
|
+
id=self.last_flushed_function_id,
|
65
65
|
function=FunctionCall(arguments=self.current_function_arguments, name=function_name),
|
66
66
|
)
|
67
67
|
|
@@ -85,7 +85,7 @@ class OpenAIStreamingInterface:
|
|
85
85
|
now = get_utc_timestamp_ns()
|
86
86
|
ttft_ns = now - provider_request_start_timestamp_ns
|
87
87
|
ttft_span.add_event(
|
88
|
-
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ttft_ns
|
88
|
+
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
89
89
|
)
|
90
90
|
first_chunk = False
|
91
91
|
|
@@ -133,11 +133,11 @@ class OpenAIStreamingInterface:
|
|
133
133
|
message_index += 1
|
134
134
|
self.reasoning_messages.append(updates_inner_thoughts)
|
135
135
|
reasoning_message = ReasoningMessage(
|
136
|
-
id=self.
|
136
|
+
id=self.letta_message_id,
|
137
137
|
date=datetime.now(timezone.utc),
|
138
138
|
reasoning=updates_inner_thoughts,
|
139
139
|
# name=name,
|
140
|
-
otid=Message.generate_otid_from_id(self.
|
140
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
141
141
|
)
|
142
142
|
prev_message_type = reasoning_message.message_type
|
143
143
|
yield reasoning_message
|
@@ -171,20 +171,22 @@ class OpenAIStreamingInterface:
|
|
171
171
|
message_index += 1
|
172
172
|
self.tool_call_name = str(self.function_name_buffer)
|
173
173
|
tool_call_msg = ToolCallMessage(
|
174
|
-
id=self.
|
174
|
+
id=self.letta_message_id,
|
175
175
|
date=datetime.now(timezone.utc),
|
176
176
|
tool_call=ToolCallDelta(
|
177
177
|
name=self.function_name_buffer,
|
178
178
|
arguments=None,
|
179
179
|
tool_call_id=self.function_id_buffer,
|
180
180
|
),
|
181
|
-
otid=Message.generate_otid_from_id(self.
|
181
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
182
182
|
)
|
183
183
|
prev_message_type = tool_call_msg.message_type
|
184
184
|
yield tool_call_msg
|
185
185
|
|
186
186
|
# Record what the last function name we flushed was
|
187
187
|
self.last_flushed_function_name = self.function_name_buffer
|
188
|
+
if self.last_flushed_function_id is None:
|
189
|
+
self.last_flushed_function_id = self.function_id_buffer
|
188
190
|
# Clear the buffer
|
189
191
|
self.function_name_buffer = None
|
190
192
|
self.function_id_buffer = None
|
@@ -236,10 +238,10 @@ class OpenAIStreamingInterface:
|
|
236
238
|
if prev_message_type and prev_message_type != "assistant_message":
|
237
239
|
message_index += 1
|
238
240
|
assistant_message = AssistantMessage(
|
239
|
-
id=self.
|
241
|
+
id=self.letta_message_id,
|
240
242
|
date=datetime.now(timezone.utc),
|
241
243
|
content=combined_chunk,
|
242
|
-
otid=Message.generate_otid_from_id(self.
|
244
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
243
245
|
)
|
244
246
|
prev_message_type = assistant_message.message_type
|
245
247
|
yield assistant_message
|
@@ -268,11 +270,11 @@ class OpenAIStreamingInterface:
|
|
268
270
|
if prev_message_type and prev_message_type != "assistant_message":
|
269
271
|
message_index += 1
|
270
272
|
assistant_message = AssistantMessage(
|
271
|
-
id=self.
|
273
|
+
id=self.letta_message_id,
|
272
274
|
date=datetime.now(timezone.utc),
|
273
275
|
content=diff,
|
274
276
|
# name=name,
|
275
|
-
otid=Message.generate_otid_from_id(self.
|
277
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
276
278
|
)
|
277
279
|
prev_message_type = assistant_message.message_type
|
278
280
|
yield assistant_message
|
@@ -292,15 +294,15 @@ class OpenAIStreamingInterface:
|
|
292
294
|
if prev_message_type and prev_message_type != "tool_call_message":
|
293
295
|
message_index += 1
|
294
296
|
tool_call_msg = ToolCallMessage(
|
295
|
-
id=self.
|
297
|
+
id=self.letta_message_id,
|
296
298
|
date=datetime.now(timezone.utc),
|
297
299
|
tool_call=ToolCallDelta(
|
298
|
-
name=
|
300
|
+
name=self.function_name_buffer,
|
299
301
|
arguments=combined_chunk,
|
300
302
|
tool_call_id=self.function_id_buffer,
|
301
303
|
),
|
302
304
|
# name=name,
|
303
|
-
otid=Message.generate_otid_from_id(self.
|
305
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
304
306
|
)
|
305
307
|
prev_message_type = tool_call_msg.message_type
|
306
308
|
yield tool_call_msg
|
@@ -312,7 +314,7 @@ class OpenAIStreamingInterface:
|
|
312
314
|
if prev_message_type and prev_message_type != "tool_call_message":
|
313
315
|
message_index += 1
|
314
316
|
tool_call_msg = ToolCallMessage(
|
315
|
-
id=self.
|
317
|
+
id=self.letta_message_id,
|
316
318
|
date=datetime.now(timezone.utc),
|
317
319
|
tool_call=ToolCallDelta(
|
318
320
|
name=None,
|
@@ -320,7 +322,7 @@ class OpenAIStreamingInterface:
|
|
320
322
|
tool_call_id=self.function_id_buffer,
|
321
323
|
),
|
322
324
|
# name=name,
|
323
|
-
otid=Message.generate_otid_from_id(self.
|
325
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
324
326
|
)
|
325
327
|
prev_message_type = tool_call_msg.message_type
|
326
328
|
yield tool_call_msg
|
letta/llm_api/anthropic.py
CHANGED
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
28
|
from letta.log import get_logger
|
29
|
+
from letta.otel.tracing import log_event
|
29
30
|
from letta.schemas.enums import ProviderCategory
|
30
31
|
from letta.schemas.message import Message as _Message
|
31
32
|
from letta.schemas.message import MessageRole as _MessageRole
|
@@ -45,7 +46,6 @@ from letta.services.provider_manager import ProviderManager
|
|
45
46
|
from letta.services.user_manager import UserManager
|
46
47
|
from letta.settings import model_settings
|
47
48
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
48
|
-
from letta.tracing import log_event
|
49
49
|
|
50
50
|
logger = get_logger(__name__)
|
51
51
|
|
@@ -27,16 +27,16 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
|
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
29
29
|
from letta.log import get_logger
|
30
|
+
from letta.otel.tracing import trace_method
|
30
31
|
from letta.schemas.enums import ProviderCategory
|
31
32
|
from letta.schemas.llm_config import LLMConfig
|
32
33
|
from letta.schemas.message import Message as PydanticMessage
|
33
|
-
from letta.schemas.openai.chat_completion_request import Tool
|
34
|
+
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
34
35
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
|
35
36
|
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
36
37
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
37
38
|
from letta.services.provider_manager import ProviderManager
|
38
39
|
from letta.settings import model_settings
|
39
|
-
from letta.tracing import trace_method
|
40
40
|
|
41
41
|
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
42
42
|
|
@@ -199,10 +199,10 @@ class AnthropicClient(LLMClientBase):
|
|
199
199
|
elif llm_config.enable_reasoner:
|
200
200
|
# NOTE: reasoning models currently do not allow for `any`
|
201
201
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
202
|
-
tools_for_request = [
|
202
|
+
tools_for_request = [OpenAITool(function=f) for f in tools]
|
203
203
|
elif force_tool_call is not None:
|
204
204
|
tool_choice = {"type": "tool", "name": force_tool_call}
|
205
|
-
tools_for_request = [
|
205
|
+
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
206
206
|
|
207
207
|
# need to have this setting to be able to put inner thoughts in kwargs
|
208
208
|
if not llm_config.put_inner_thoughts_in_kwargs:
|
@@ -216,7 +216,7 @@ class AnthropicClient(LLMClientBase):
|
|
216
216
|
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
217
217
|
else:
|
218
218
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
219
|
-
tools_for_request = [
|
219
|
+
tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
|
220
220
|
|
221
221
|
# Add tool choice
|
222
222
|
if tool_choice:
|
@@ -230,7 +230,7 @@ class AnthropicClient(LLMClientBase):
|
|
230
230
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
231
231
|
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
232
232
|
)
|
233
|
-
tools_for_request = [
|
233
|
+
tools_for_request = [OpenAITool(function=f) for f in tools_with_inner_thoughts]
|
234
234
|
|
235
235
|
if tools_for_request and len(tools_for_request) > 0:
|
236
236
|
# TODO eventually enable parallel tool use
|
@@ -270,7 +270,7 @@ class AnthropicClient(LLMClientBase):
|
|
270
270
|
|
271
271
|
return data
|
272
272
|
|
273
|
-
async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[
|
273
|
+
async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
|
274
274
|
client = anthropic.AsyncAnthropic()
|
275
275
|
if messages and len(messages) == 0:
|
276
276
|
messages = None
|
@@ -278,11 +278,19 @@ class AnthropicClient(LLMClientBase):
|
|
278
278
|
anthropic_tools = convert_tools_to_anthropic_format(tools)
|
279
279
|
else:
|
280
280
|
anthropic_tools = None
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
281
|
+
|
282
|
+
try:
|
283
|
+
result = await client.beta.messages.count_tokens(
|
284
|
+
model=model or "claude-3-7-sonnet-20250219",
|
285
|
+
messages=messages or [{"role": "user", "content": "hi"}],
|
286
|
+
tools=anthropic_tools or [],
|
287
|
+
)
|
288
|
+
except:
|
289
|
+
import ipdb
|
290
|
+
|
291
|
+
ipdb.set_trace()
|
292
|
+
raise
|
293
|
+
|
286
294
|
token_count = result.input_tokens
|
287
295
|
if messages is None:
|
288
296
|
token_count -= 8
|
@@ -419,10 +427,16 @@ class AnthropicClient(LLMClientBase):
|
|
419
427
|
if content_part.type == "text":
|
420
428
|
content = strip_xml_tags(string=content_part.text, tag="thinking")
|
421
429
|
if content_part.type == "tool_use":
|
422
|
-
# hack for tool
|
430
|
+
# hack for incorrect tool format
|
423
431
|
tool_input = json.loads(json.dumps(content_part.input))
|
424
432
|
if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
|
425
|
-
arguments =
|
433
|
+
arguments = json.dumps(tool_input["function"]["arguments"], indent=2)
|
434
|
+
try:
|
435
|
+
args_json = json.loads(arguments)
|
436
|
+
if not isinstance(args_json, dict):
|
437
|
+
raise ValueError("Expected parseable json object for arguments")
|
438
|
+
except:
|
439
|
+
arguments = str(tool_input["function"]["arguments"])
|
426
440
|
else:
|
427
441
|
arguments = json.dumps(tool_input, indent=2)
|
428
442
|
tool_calls = [
|
@@ -477,7 +491,7 @@ class AnthropicClient(LLMClientBase):
|
|
477
491
|
return chat_completion_response
|
478
492
|
|
479
493
|
|
480
|
-
def convert_tools_to_anthropic_format(tools: List[
|
494
|
+
def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
|
481
495
|
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
482
496
|
|
483
497
|
OpenAI style:
|
@@ -527,7 +541,7 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
|
527
541
|
for tool in tools:
|
528
542
|
formatted_tool = {
|
529
543
|
"name": tool.function.name,
|
530
|
-
"description": tool.function.description,
|
544
|
+
"description": tool.function.description if tool.function.description else "",
|
531
545
|
"input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
|
532
546
|
}
|
533
547
|
formatted_tools.append(formatted_tool)
|
@@ -12,12 +12,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
12
12
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
13
13
|
from letta.local_llm.utils import count_tokens
|
14
14
|
from letta.log import get_logger
|
15
|
+
from letta.otel.tracing import trace_method
|
15
16
|
from letta.schemas.llm_config import LLMConfig
|
16
17
|
from letta.schemas.message import Message as PydanticMessage
|
17
18
|
from letta.schemas.openai.chat_completion_request import Tool
|
18
19
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
19
20
|
from letta.settings import model_settings, settings
|
20
|
-
from letta.tracing import trace_method
|
21
21
|
from letta.utils import get_tool_call_id
|
22
22
|
|
23
23
|
logger = get_logger(__name__)
|