letta-nightly 0.11.7.dev20251008104128__py3-none-any.whl → 0.12.0.dev20251009203644__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/letta_agent_v3.py +33 -5
- letta/database_utils.py +161 -0
- letta/interfaces/anthropic_streaming_interface.py +21 -9
- letta/interfaces/gemini_streaming_interface.py +7 -5
- letta/interfaces/openai_streaming_interface.py +42 -30
- letta/llm_api/anthropic_client.py +36 -16
- letta/llm_api/google_vertex_client.py +1 -0
- letta/orm/__init__.py +1 -0
- letta/orm/run_metrics.py +82 -0
- letta/schemas/letta_message.py +29 -12
- letta/schemas/message.py +192 -51
- letta/schemas/run_metrics.py +21 -0
- letta/server/db.py +3 -10
- letta/server/rest_api/interface.py +85 -41
- letta/server/rest_api/routers/v1/providers.py +34 -0
- letta/server/rest_api/routers/v1/runs.py +27 -18
- letta/server/server.py +22 -0
- letta/services/context_window_calculator/token_counter.py +1 -1
- letta/services/helpers/run_manager_helper.py +5 -21
- letta/services/run_manager.py +63 -0
- letta/system.py +5 -1
- {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/RECORD +27 -24
- {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/licenses/LICENSE +0 -0
letta/__init__.py
CHANGED
letta/agents/letta_agent_v3.py
CHANGED
@@ -595,9 +595,30 @@ class LettaAgentV3(LettaAgentV2):
|
|
595
595
|
# -1. no tool call, no content
|
596
596
|
if tool_call is None and (content is None or len(content) == 0):
|
597
597
|
# Edge case is when there's also no content - basically, the LLM "no-op'd"
|
598
|
-
#
|
599
|
-
|
600
|
-
|
598
|
+
# If RequiredBeforeExitToolRule exists and not all required tools have been called,
|
599
|
+
# inject a rule-violation heartbeat to keep looping and inform the model.
|
600
|
+
uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
|
601
|
+
if uncalled:
|
602
|
+
# TODO: we may need to change this to not have a "heartbeat" prefix for v3?
|
603
|
+
heartbeat_reason = (
|
604
|
+
f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
|
605
|
+
)
|
606
|
+
from letta.server.rest_api.utils import create_heartbeat_system_message
|
607
|
+
|
608
|
+
heartbeat_msg = create_heartbeat_system_message(
|
609
|
+
agent_id=agent_state.id,
|
610
|
+
model=agent_state.llm_config.model,
|
611
|
+
function_call_success=True,
|
612
|
+
timezone=agent_state.timezone,
|
613
|
+
heartbeat_reason=heartbeat_reason,
|
614
|
+
run_id=run_id,
|
615
|
+
)
|
616
|
+
messages_to_persist = (initial_messages or []) + [heartbeat_msg]
|
617
|
+
continue_stepping, stop_reason = True, None
|
618
|
+
else:
|
619
|
+
# In this case, we actually do not want to persist the no-op message
|
620
|
+
continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
621
|
+
messages_to_persist = initial_messages or []
|
601
622
|
|
602
623
|
# 0. If there's no tool call, we can early exit
|
603
624
|
elif tool_call is None:
|
@@ -627,7 +648,8 @@ class LettaAgentV3(LettaAgentV2):
|
|
627
648
|
run_id=run_id,
|
628
649
|
is_approval_response=is_approval or is_denial,
|
629
650
|
force_set_request_heartbeat=False,
|
630
|
-
|
651
|
+
# If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
|
652
|
+
add_heartbeat_on_continue=bool(heartbeat_reason),
|
631
653
|
)
|
632
654
|
messages_to_persist = (initial_messages or []) + assistant_message
|
633
655
|
|
@@ -843,7 +865,13 @@ class LettaAgentV3(LettaAgentV2):
|
|
843
865
|
stop_reason: LettaStopReason | None = None
|
844
866
|
|
845
867
|
if tool_call_name is None:
|
846
|
-
# No tool call
|
868
|
+
# No tool call – if there are required-before-exit tools uncalled, keep stepping
|
869
|
+
# and provide explicit feedback to the model; otherwise end the loop.
|
870
|
+
uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
|
871
|
+
if uncalled and not is_final_step:
|
872
|
+
reason = f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
|
873
|
+
return True, reason, None
|
874
|
+
# No required tools remaining → end turn
|
847
875
|
return False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
848
876
|
else:
|
849
877
|
if tool_rule_violated:
|
letta/database_utils.py
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
"""
|
2
|
+
Database URI utilities for consistent database connection handling across the application.
|
3
|
+
|
4
|
+
This module provides utilities for parsing and converting database URIs to ensure
|
5
|
+
consistent behavior between the main application, alembic migrations, and other
|
6
|
+
database-related components.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from typing import Optional
|
10
|
+
from urllib.parse import urlparse, urlunparse
|
11
|
+
|
12
|
+
|
13
|
+
def parse_database_uri(uri: str) -> dict[str, Optional[str]]:
|
14
|
+
"""
|
15
|
+
Parse a database URI into its components.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
uri: Database URI (e.g., postgresql://user:pass@host:port/db)
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
Dictionary with parsed components: scheme, driver, user, password, host, port, database
|
22
|
+
"""
|
23
|
+
parsed = urlparse(uri)
|
24
|
+
|
25
|
+
# Extract driver from scheme (e.g., postgresql+asyncpg -> asyncpg)
|
26
|
+
scheme_parts = parsed.scheme.split("+")
|
27
|
+
base_scheme = scheme_parts[0] if scheme_parts else ""
|
28
|
+
driver = scheme_parts[1] if len(scheme_parts) > 1 else None
|
29
|
+
|
30
|
+
return {
|
31
|
+
"scheme": base_scheme,
|
32
|
+
"driver": driver,
|
33
|
+
"user": parsed.username,
|
34
|
+
"password": parsed.password,
|
35
|
+
"host": parsed.hostname,
|
36
|
+
"port": str(parsed.port) if parsed.port else None,
|
37
|
+
"database": parsed.path.lstrip("/") if parsed.path else None,
|
38
|
+
"query": parsed.query,
|
39
|
+
"fragment": parsed.fragment,
|
40
|
+
}
|
41
|
+
|
42
|
+
|
43
|
+
def build_database_uri(
|
44
|
+
scheme: str = "postgresql",
|
45
|
+
driver: Optional[str] = None,
|
46
|
+
user: Optional[str] = None,
|
47
|
+
password: Optional[str] = None,
|
48
|
+
host: Optional[str] = None,
|
49
|
+
port: Optional[str] = None,
|
50
|
+
database: Optional[str] = None,
|
51
|
+
query: Optional[str] = None,
|
52
|
+
fragment: Optional[str] = None,
|
53
|
+
) -> str:
|
54
|
+
"""
|
55
|
+
Build a database URI from components.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
scheme: Base scheme (e.g., "postgresql")
|
59
|
+
driver: Driver name (e.g., "asyncpg", "pg8000")
|
60
|
+
user: Username
|
61
|
+
password: Password
|
62
|
+
host: Hostname
|
63
|
+
port: Port number
|
64
|
+
database: Database name
|
65
|
+
query: Query string
|
66
|
+
fragment: Fragment
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
Complete database URI
|
70
|
+
"""
|
71
|
+
# Combine scheme and driver
|
72
|
+
full_scheme = f"{scheme}+{driver}" if driver else scheme
|
73
|
+
|
74
|
+
# Build netloc (user:password@host:port)
|
75
|
+
netloc_parts = []
|
76
|
+
if user:
|
77
|
+
if password:
|
78
|
+
netloc_parts.append(f"{user}:{password}")
|
79
|
+
else:
|
80
|
+
netloc_parts.append(user)
|
81
|
+
|
82
|
+
if host:
|
83
|
+
if port:
|
84
|
+
netloc_parts.append(f"{host}:{port}")
|
85
|
+
else:
|
86
|
+
netloc_parts.append(host)
|
87
|
+
|
88
|
+
netloc = "@".join(netloc_parts) if netloc_parts else ""
|
89
|
+
|
90
|
+
# Build path
|
91
|
+
path = f"/{database}" if database else ""
|
92
|
+
|
93
|
+
# Build the URI
|
94
|
+
return urlunparse((full_scheme, netloc, path, "", query or "", fragment or ""))
|
95
|
+
|
96
|
+
|
97
|
+
def convert_to_async_uri(uri: str) -> str:
|
98
|
+
"""
|
99
|
+
Convert a database URI to use the asyncpg driver for async operations.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
uri: Original database URI
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
URI with asyncpg driver and ssl parameter adjustments
|
106
|
+
"""
|
107
|
+
components = parse_database_uri(uri)
|
108
|
+
|
109
|
+
# Convert to asyncpg driver
|
110
|
+
components["driver"] = "asyncpg"
|
111
|
+
|
112
|
+
# Build the new URI
|
113
|
+
new_uri = build_database_uri(**components)
|
114
|
+
|
115
|
+
# Replace sslmode= with ssl= for asyncpg compatibility
|
116
|
+
new_uri = new_uri.replace("sslmode=", "ssl=")
|
117
|
+
|
118
|
+
return new_uri
|
119
|
+
|
120
|
+
|
121
|
+
def convert_to_sync_uri(uri: str) -> str:
|
122
|
+
"""
|
123
|
+
Convert a database URI to use the pg8000 driver for sync operations (alembic).
|
124
|
+
|
125
|
+
Args:
|
126
|
+
uri: Original database URI
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
URI with pg8000 driver and sslmode parameter adjustments
|
130
|
+
"""
|
131
|
+
components = parse_database_uri(uri)
|
132
|
+
|
133
|
+
# Convert to pg8000 driver
|
134
|
+
components["driver"] = "pg8000"
|
135
|
+
|
136
|
+
# Build the new URI
|
137
|
+
new_uri = build_database_uri(**components)
|
138
|
+
|
139
|
+
# Replace ssl= with sslmode= for pg8000 compatibility
|
140
|
+
new_uri = new_uri.replace("ssl=", "sslmode=")
|
141
|
+
|
142
|
+
return new_uri
|
143
|
+
|
144
|
+
|
145
|
+
def get_database_uri_for_context(uri: str, context: str = "async") -> str:
|
146
|
+
"""
|
147
|
+
Get the appropriate database URI for a specific context.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
uri: Original database URI
|
151
|
+
context: Context type ("async" for asyncpg, "sync" for pg8000, "alembic" for pg8000)
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
URI formatted for the specified context
|
155
|
+
"""
|
156
|
+
if context in ["async"]:
|
157
|
+
return convert_to_async_uri(uri)
|
158
|
+
elif context in ["sync", "alembic"]:
|
159
|
+
return convert_to_sync_uri(uri)
|
160
|
+
else:
|
161
|
+
raise ValueError(f"Unknown context: {context}. Must be 'async', 'sync', or 'alembic'")
|
@@ -279,9 +279,11 @@ class AnthropicStreamingInterface:
|
|
279
279
|
if prev_message_type and prev_message_type != "tool_call_message":
|
280
280
|
message_index += 1
|
281
281
|
if self.tool_call_name not in self.requires_approval_tools:
|
282
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
|
282
283
|
tool_call_msg = ToolCallMessage(
|
283
284
|
id=self.letta_message_id,
|
284
|
-
tool_call=
|
285
|
+
tool_call=tool_call_delta,
|
286
|
+
tool_calls=tool_call_delta,
|
285
287
|
date=datetime.now(timezone.utc).isoformat(),
|
286
288
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
287
289
|
run_id=self.run_id,
|
@@ -423,15 +425,17 @@ class AnthropicStreamingInterface:
|
|
423
425
|
tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
|
424
426
|
tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
|
425
427
|
|
428
|
+
tool_call_delta = ToolCallDelta(
|
429
|
+
name=self.tool_call_name,
|
430
|
+
tool_call_id=self.tool_call_id,
|
431
|
+
arguments=tool_call_args,
|
432
|
+
)
|
426
433
|
tool_call_msg = ToolCallMessage(
|
427
434
|
id=self.tool_call_buffer[0].id,
|
428
435
|
otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
|
429
436
|
date=self.tool_call_buffer[0].date,
|
430
|
-
tool_call=
|
431
|
-
|
432
|
-
tool_call_id=self.tool_call_id,
|
433
|
-
arguments=tool_call_args,
|
434
|
-
),
|
437
|
+
tool_call=tool_call_delta,
|
438
|
+
tool_calls=tool_call_delta,
|
435
439
|
run_id=self.run_id,
|
436
440
|
)
|
437
441
|
prev_message_type = tool_call_msg.message_type
|
@@ -467,9 +471,13 @@ class AnthropicStreamingInterface:
|
|
467
471
|
run_id=self.run_id,
|
468
472
|
)
|
469
473
|
else:
|
474
|
+
tool_call_delta = ToolCallDelta(
|
475
|
+
name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
|
476
|
+
)
|
470
477
|
tool_call_msg = ToolCallMessage(
|
471
478
|
id=self.letta_message_id,
|
472
|
-
tool_call=
|
479
|
+
tool_call=tool_call_delta,
|
480
|
+
tool_calls=tool_call_delta,
|
473
481
|
date=datetime.now(timezone.utc).isoformat(),
|
474
482
|
run_id=self.run_id,
|
475
483
|
)
|
@@ -778,9 +786,11 @@ class SimpleAnthropicStreamingInterface:
|
|
778
786
|
else:
|
779
787
|
if prev_message_type and prev_message_type != "tool_call_message":
|
780
788
|
message_index += 1
|
789
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
|
781
790
|
tool_call_msg = ToolCallMessage(
|
782
791
|
id=self.letta_message_id,
|
783
|
-
tool_call=
|
792
|
+
tool_call=tool_call_delta,
|
793
|
+
tool_calls=tool_call_delta,
|
784
794
|
date=datetime.now(timezone.utc).isoformat(),
|
785
795
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
786
796
|
run_id=self.run_id,
|
@@ -860,9 +870,11 @@ class SimpleAnthropicStreamingInterface:
|
|
860
870
|
else:
|
861
871
|
if prev_message_type and prev_message_type != "tool_call_message":
|
862
872
|
message_index += 1
|
873
|
+
tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json)
|
863
874
|
tool_call_msg = ToolCallMessage(
|
864
875
|
id=self.letta_message_id,
|
865
|
-
tool_call=
|
876
|
+
tool_call=tool_call_delta,
|
877
|
+
tool_calls=tool_call_delta,
|
866
878
|
date=datetime.now(timezone.utc).isoformat(),
|
867
879
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
868
880
|
run_id=self.run_id,
|
@@ -273,15 +273,17 @@ class SimpleGeminiStreamingInterface:
|
|
273
273
|
else:
|
274
274
|
if prev_message_type and prev_message_type != "tool_call_message":
|
275
275
|
message_index += 1
|
276
|
+
tool_call_delta = ToolCallDelta(
|
277
|
+
name=name,
|
278
|
+
arguments=arguments_str,
|
279
|
+
tool_call_id=call_id,
|
280
|
+
)
|
276
281
|
yield ToolCallMessage(
|
277
282
|
id=self.letta_message_id,
|
278
283
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
279
284
|
date=datetime.now(timezone.utc),
|
280
|
-
tool_call=
|
281
|
-
|
282
|
-
arguments=arguments_str,
|
283
|
-
tool_call_id=call_id,
|
284
|
-
),
|
285
|
+
tool_call=tool_call_delta,
|
286
|
+
tool_calls=tool_call_delta,
|
285
287
|
run_id=self.run_id,
|
286
288
|
step_id=self.step_id,
|
287
289
|
)
|
@@ -336,14 +336,16 @@ class OpenAIStreamingInterface:
|
|
336
336
|
step_id=self.step_id,
|
337
337
|
)
|
338
338
|
else:
|
339
|
+
tool_call_delta = ToolCallDelta(
|
340
|
+
name=self.function_name_buffer,
|
341
|
+
arguments=None,
|
342
|
+
tool_call_id=self.function_id_buffer,
|
343
|
+
)
|
339
344
|
tool_call_msg = ToolCallMessage(
|
340
345
|
id=self.letta_message_id,
|
341
346
|
date=datetime.now(timezone.utc),
|
342
|
-
tool_call=
|
343
|
-
|
344
|
-
arguments=None,
|
345
|
-
tool_call_id=self.function_id_buffer,
|
346
|
-
),
|
347
|
+
tool_call=tool_call_delta,
|
348
|
+
tool_calls=tool_call_delta,
|
347
349
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
348
350
|
run_id=self.run_id,
|
349
351
|
step_id=self.step_id,
|
@@ -423,14 +425,16 @@ class OpenAIStreamingInterface:
|
|
423
425
|
step_id=self.step_id,
|
424
426
|
)
|
425
427
|
else:
|
428
|
+
tool_call_delta = ToolCallDelta(
|
429
|
+
name=self.function_name_buffer,
|
430
|
+
arguments=combined_chunk,
|
431
|
+
tool_call_id=self.function_id_buffer,
|
432
|
+
)
|
426
433
|
tool_call_msg = ToolCallMessage(
|
427
434
|
id=self.letta_message_id,
|
428
435
|
date=datetime.now(timezone.utc),
|
429
|
-
tool_call=
|
430
|
-
|
431
|
-
arguments=combined_chunk,
|
432
|
-
tool_call_id=self.function_id_buffer,
|
433
|
-
),
|
436
|
+
tool_call=tool_call_delta,
|
437
|
+
tool_calls=tool_call_delta,
|
434
438
|
# name=name,
|
435
439
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
436
440
|
run_id=self.run_id,
|
@@ -460,14 +464,16 @@ class OpenAIStreamingInterface:
|
|
460
464
|
step_id=self.step_id,
|
461
465
|
)
|
462
466
|
else:
|
467
|
+
tool_call_delta = ToolCallDelta(
|
468
|
+
name=None,
|
469
|
+
arguments=updates_main_json,
|
470
|
+
tool_call_id=self.function_id_buffer,
|
471
|
+
)
|
463
472
|
tool_call_msg = ToolCallMessage(
|
464
473
|
id=self.letta_message_id,
|
465
474
|
date=datetime.now(timezone.utc),
|
466
|
-
tool_call=
|
467
|
-
|
468
|
-
arguments=updates_main_json,
|
469
|
-
tool_call_id=self.function_id_buffer,
|
470
|
-
),
|
475
|
+
tool_call=tool_call_delta,
|
476
|
+
tool_calls=tool_call_delta,
|
471
477
|
# name=name,
|
472
478
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
473
479
|
run_id=self.run_id,
|
@@ -717,14 +723,16 @@ class SimpleOpenAIStreamingInterface:
|
|
717
723
|
step_id=self.step_id,
|
718
724
|
)
|
719
725
|
else:
|
726
|
+
tool_call_delta = ToolCallDelta(
|
727
|
+
name=tool_call.function.name,
|
728
|
+
arguments=tool_call.function.arguments,
|
729
|
+
tool_call_id=tool_call.id,
|
730
|
+
)
|
720
731
|
tool_call_msg = ToolCallMessage(
|
721
732
|
id=self.letta_message_id,
|
722
733
|
date=datetime.now(timezone.utc),
|
723
|
-
tool_call=
|
724
|
-
|
725
|
-
arguments=tool_call.function.arguments,
|
726
|
-
tool_call_id=tool_call.id,
|
727
|
-
),
|
734
|
+
tool_call=tool_call_delta,
|
735
|
+
tool_calls=tool_call_delta,
|
728
736
|
# name=name,
|
729
737
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
730
738
|
run_id=self.run_id,
|
@@ -945,15 +953,17 @@ class SimpleOpenAIResponsesStreamingInterface:
|
|
945
953
|
else:
|
946
954
|
if prev_message_type and prev_message_type != "tool_call_message":
|
947
955
|
message_index += 1
|
956
|
+
tool_call_delta = ToolCallDelta(
|
957
|
+
name=name,
|
958
|
+
arguments=arguments if arguments != "" else None,
|
959
|
+
tool_call_id=call_id,
|
960
|
+
)
|
948
961
|
yield ToolCallMessage(
|
949
962
|
id=self.letta_message_id,
|
950
963
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
951
964
|
date=datetime.now(timezone.utc),
|
952
|
-
tool_call=
|
953
|
-
|
954
|
-
arguments=arguments if arguments != "" else None,
|
955
|
-
tool_call_id=call_id,
|
956
|
-
),
|
965
|
+
tool_call=tool_call_delta,
|
966
|
+
tool_calls=tool_call_delta,
|
957
967
|
run_id=self.run_id,
|
958
968
|
step_id=self.step_id,
|
959
969
|
)
|
@@ -1113,15 +1123,17 @@ class SimpleOpenAIResponsesStreamingInterface:
|
|
1113
1123
|
else:
|
1114
1124
|
if prev_message_type and prev_message_type != "tool_call_message":
|
1115
1125
|
message_index += 1
|
1126
|
+
tool_call_delta = ToolCallDelta(
|
1127
|
+
name=None,
|
1128
|
+
arguments=delta,
|
1129
|
+
tool_call_id=None,
|
1130
|
+
)
|
1116
1131
|
yield ToolCallMessage(
|
1117
1132
|
id=self.letta_message_id,
|
1118
1133
|
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
1119
1134
|
date=datetime.now(timezone.utc),
|
1120
|
-
tool_call=
|
1121
|
-
|
1122
|
-
arguments=delta,
|
1123
|
-
tool_call_id=None,
|
1124
|
-
),
|
1135
|
+
tool_call=tool_call_delta,
|
1136
|
+
tool_calls=tool_call_delta,
|
1125
1137
|
run_id=self.run_id,
|
1126
1138
|
step_id=self.step_id,
|
1127
1139
|
)
|
@@ -56,6 +56,9 @@ class AnthropicClient(LLMClientBase):
|
|
56
56
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
57
57
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
58
58
|
betas: list[str] = []
|
59
|
+
# Interleaved thinking for reasoner (sync path parity)
|
60
|
+
if llm_config.enable_reasoner:
|
61
|
+
betas.append("interleaved-thinking-2025-05-14")
|
59
62
|
# 1M context beta for Sonnet 4/4.5 when enabled
|
60
63
|
try:
|
61
64
|
from letta.settings import model_settings
|
@@ -325,6 +328,7 @@ class AnthropicClient(LLMClientBase):
|
|
325
328
|
data["system"] = self._add_cache_control_to_system_message(system_content)
|
326
329
|
data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
|
327
330
|
messages=messages[1:],
|
331
|
+
current_model=llm_config.model,
|
328
332
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
329
333
|
put_inner_thoughts_in_kwargs=put_kwargs,
|
330
334
|
# if react, use native content + strip heartbeats
|
@@ -370,6 +374,7 @@ class AnthropicClient(LLMClientBase):
|
|
370
374
|
async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
|
371
375
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
372
376
|
|
377
|
+
# Use the default client; token counting is lightweight and does not require BYOK overrides
|
373
378
|
client = anthropic.AsyncAnthropic()
|
374
379
|
if messages and len(messages) == 0:
|
375
380
|
messages = None
|
@@ -378,23 +383,20 @@ class AnthropicClient(LLMClientBase):
|
|
378
383
|
else:
|
379
384
|
anthropic_tools = None
|
380
385
|
|
386
|
+
# Detect presence of reasoning blocks anywhere in the final assistant message.
|
387
|
+
# Interleaved thinking is not guaranteed to be the first content part.
|
381
388
|
thinking_enabled = False
|
382
389
|
if messages and len(messages) > 0:
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
and isinstance(last_assistant_message.get("content"), list)
|
394
|
-
and len(last_assistant_message["content"]) > 0
|
395
|
-
and last_assistant_message["content"][0].get("type") == "thinking"
|
396
|
-
):
|
397
|
-
thinking_enabled = True
|
390
|
+
last_assistant_message = next((m for m in reversed(messages) if m.get("role") == "assistant"), None)
|
391
|
+
if last_assistant_message:
|
392
|
+
content = last_assistant_message.get("content")
|
393
|
+
if isinstance(content, list):
|
394
|
+
for part in content:
|
395
|
+
if isinstance(part, dict) and part.get("type") in {"thinking", "redacted_thinking"}:
|
396
|
+
thinking_enabled = True
|
397
|
+
break
|
398
|
+
elif isinstance(content, str) and "<thinking>" in content:
|
399
|
+
thinking_enabled = True
|
398
400
|
|
399
401
|
try:
|
400
402
|
count_params = {
|
@@ -403,9 +405,27 @@ class AnthropicClient(LLMClientBase):
|
|
403
405
|
"tools": anthropic_tools or [],
|
404
406
|
}
|
405
407
|
|
408
|
+
betas: list[str] = []
|
406
409
|
if thinking_enabled:
|
410
|
+
# Match interleaved thinking behavior so token accounting is consistent
|
407
411
|
count_params["thinking"] = {"type": "enabled", "budget_tokens": 16000}
|
408
|
-
|
412
|
+
betas.append("interleaved-thinking-2025-05-14")
|
413
|
+
|
414
|
+
# Opt-in to 1M context if enabled for this model in settings
|
415
|
+
try:
|
416
|
+
if (
|
417
|
+
model
|
418
|
+
and model_settings.anthropic_sonnet_1m
|
419
|
+
and (model.startswith("claude-sonnet-4") or model.startswith("claude-sonnet-4-5"))
|
420
|
+
):
|
421
|
+
betas.append("context-1m-2025-08-07")
|
422
|
+
except Exception:
|
423
|
+
pass
|
424
|
+
|
425
|
+
if betas:
|
426
|
+
result = await client.beta.messages.count_tokens(**count_params, betas=betas)
|
427
|
+
else:
|
428
|
+
result = await client.beta.messages.count_tokens(**count_params)
|
409
429
|
except:
|
410
430
|
raise
|
411
431
|
|
@@ -311,6 +311,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
311
311
|
contents = self.add_dummy_model_messages(
|
312
312
|
PydanticMessage.to_google_dicts_from_list(
|
313
313
|
messages,
|
314
|
+
current_model=llm_config.model,
|
314
315
|
put_inner_thoughts_in_kwargs=False if agent_type == AgentType.letta_v1_agent else True,
|
315
316
|
native_content=True if agent_type == AgentType.letta_v1_agent else False,
|
316
317
|
),
|
letta/orm/__init__.py
CHANGED
@@ -27,6 +27,7 @@ from letta.orm.prompt import Prompt
|
|
27
27
|
from letta.orm.provider import Provider
|
28
28
|
from letta.orm.provider_trace import ProviderTrace
|
29
29
|
from letta.orm.run import Run
|
30
|
+
from letta.orm.run_metrics import RunMetrics
|
30
31
|
from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
|
31
32
|
from letta.orm.source import Source
|
32
33
|
from letta.orm.sources_agents import SourcesAgents
|
letta/orm/run_metrics.py
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
from datetime import datetime, timezone
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
3
|
+
|
4
|
+
from sqlalchemy import BigInteger, ForeignKey, Integer, String
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
6
|
+
from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
|
7
|
+
|
8
|
+
from letta.orm.mixins import AgentMixin, OrganizationMixin, ProjectMixin, TemplateMixin
|
9
|
+
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
10
|
+
from letta.schemas.run_metrics import RunMetrics as PydanticRunMetrics
|
11
|
+
from letta.schemas.user import User
|
12
|
+
from letta.settings import DatabaseChoice, settings
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from letta.orm.agent import Agent
|
16
|
+
from letta.orm.run import Run
|
17
|
+
from letta.orm.step import Step
|
18
|
+
|
19
|
+
|
20
|
+
class RunMetrics(SqlalchemyBase, ProjectMixin, AgentMixin, OrganizationMixin, TemplateMixin):
|
21
|
+
"""Tracks performance metrics for agent steps."""
|
22
|
+
|
23
|
+
__tablename__ = "run_metrics"
|
24
|
+
__pydantic_model__ = PydanticRunMetrics
|
25
|
+
|
26
|
+
id: Mapped[str] = mapped_column(
|
27
|
+
ForeignKey("runs.id", ondelete="CASCADE"),
|
28
|
+
primary_key=True,
|
29
|
+
doc="The unique identifier of the run this metric belongs to (also serves as PK)",
|
30
|
+
)
|
31
|
+
run_start_ns: Mapped[Optional[int]] = mapped_column(
|
32
|
+
BigInteger,
|
33
|
+
nullable=True,
|
34
|
+
doc="The timestamp of the start of the run in nanoseconds",
|
35
|
+
)
|
36
|
+
run_ns: Mapped[Optional[int]] = mapped_column(
|
37
|
+
BigInteger,
|
38
|
+
nullable=True,
|
39
|
+
doc="Total time for the run in nanoseconds",
|
40
|
+
)
|
41
|
+
num_steps: Mapped[Optional[int]] = mapped_column(
|
42
|
+
Integer,
|
43
|
+
nullable=True,
|
44
|
+
doc="The number of steps in the run",
|
45
|
+
)
|
46
|
+
run: Mapped[Optional["Run"]] = relationship("Run", foreign_keys=[id])
|
47
|
+
agent: Mapped[Optional["Agent"]] = relationship("Agent")
|
48
|
+
|
49
|
+
def create(
|
50
|
+
self,
|
51
|
+
db_session: Session,
|
52
|
+
actor: Optional[User] = None,
|
53
|
+
no_commit: bool = False,
|
54
|
+
) -> "RunMetrics":
|
55
|
+
"""Override create to handle SQLite timestamp issues"""
|
56
|
+
# For SQLite, explicitly set timestamps as server_default may not work
|
57
|
+
if settings.database_engine == DatabaseChoice.SQLITE:
|
58
|
+
now = datetime.now(timezone.utc)
|
59
|
+
if not self.created_at:
|
60
|
+
self.created_at = now
|
61
|
+
if not self.updated_at:
|
62
|
+
self.updated_at = now
|
63
|
+
|
64
|
+
return super().create(db_session, actor=actor, no_commit=no_commit)
|
65
|
+
|
66
|
+
async def create_async(
|
67
|
+
self,
|
68
|
+
db_session: AsyncSession,
|
69
|
+
actor: Optional[User] = None,
|
70
|
+
no_commit: bool = False,
|
71
|
+
no_refresh: bool = False,
|
72
|
+
) -> "RunMetrics":
|
73
|
+
"""Override create_async to handle SQLite timestamp issues"""
|
74
|
+
# For SQLite, explicitly set timestamps as server_default may not work
|
75
|
+
if settings.database_engine == DatabaseChoice.SQLITE:
|
76
|
+
now = datetime.now(timezone.utc)
|
77
|
+
if not self.created_at:
|
78
|
+
self.created_at = now
|
79
|
+
if not self.updated_at:
|
80
|
+
self.updated_at = now
|
81
|
+
|
82
|
+
return await super().create_async(db_session, actor=actor, no_commit=no_commit, no_refresh=no_refresh)
|