letta-nightly 0.11.7.dev20251008104128__py3-none-any.whl → 0.12.0.dev20251009203644__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/letta_agent_v3.py +33 -5
  3. letta/database_utils.py +161 -0
  4. letta/interfaces/anthropic_streaming_interface.py +21 -9
  5. letta/interfaces/gemini_streaming_interface.py +7 -5
  6. letta/interfaces/openai_streaming_interface.py +42 -30
  7. letta/llm_api/anthropic_client.py +36 -16
  8. letta/llm_api/google_vertex_client.py +1 -0
  9. letta/orm/__init__.py +1 -0
  10. letta/orm/run_metrics.py +82 -0
  11. letta/schemas/letta_message.py +29 -12
  12. letta/schemas/message.py +192 -51
  13. letta/schemas/run_metrics.py +21 -0
  14. letta/server/db.py +3 -10
  15. letta/server/rest_api/interface.py +85 -41
  16. letta/server/rest_api/routers/v1/providers.py +34 -0
  17. letta/server/rest_api/routers/v1/runs.py +27 -18
  18. letta/server/server.py +22 -0
  19. letta/services/context_window_calculator/token_counter.py +1 -1
  20. letta/services/helpers/run_manager_helper.py +5 -21
  21. letta/services/run_manager.py +63 -0
  22. letta/system.py +5 -1
  23. {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/METADATA +1 -1
  24. {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/RECORD +27 -24
  25. {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/WHEEL +0 -0
  26. {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/entry_points.txt +0 -0
  27. {letta_nightly-0.11.7.dev20251008104128.dist-info → letta_nightly-0.12.0.dev20251009203644.dist-info}/licenses/LICENSE +0 -0
letta/__init__.py CHANGED
@@ -5,7 +5,7 @@ try:
5
5
  __version__ = version("letta")
6
6
  except PackageNotFoundError:
7
7
  # Fallback for development installations
8
- __version__ = "0.11.7"
8
+ __version__ = "0.12.0"
9
9
 
10
10
  if os.environ.get("LETTA_VERSION"):
11
11
  __version__ = os.environ["LETTA_VERSION"]
@@ -595,9 +595,30 @@ class LettaAgentV3(LettaAgentV2):
595
595
  # -1. no tool call, no content
596
596
  if tool_call is None and (content is None or len(content) == 0):
597
597
  # Edge case is when there's also no content - basically, the LLM "no-op'd"
598
- # In this case, we actually do not want to persist the no-op message
599
- continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
600
- messages_to_persist = initial_messages or []
598
+ # If RequiredBeforeExitToolRule exists and not all required tools have been called,
599
+ # inject a rule-violation heartbeat to keep looping and inform the model.
600
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
601
+ if uncalled:
602
+ # TODO: we may need to change this to not have a "heartbeat" prefix for v3?
603
+ heartbeat_reason = (
604
+ f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
605
+ )
606
+ from letta.server.rest_api.utils import create_heartbeat_system_message
607
+
608
+ heartbeat_msg = create_heartbeat_system_message(
609
+ agent_id=agent_state.id,
610
+ model=agent_state.llm_config.model,
611
+ function_call_success=True,
612
+ timezone=agent_state.timezone,
613
+ heartbeat_reason=heartbeat_reason,
614
+ run_id=run_id,
615
+ )
616
+ messages_to_persist = (initial_messages or []) + [heartbeat_msg]
617
+ continue_stepping, stop_reason = True, None
618
+ else:
619
+ # In this case, we actually do not want to persist the no-op message
620
+ continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
621
+ messages_to_persist = initial_messages or []
601
622
 
602
623
  # 0. If there's no tool call, we can early exit
603
624
  elif tool_call is None:
@@ -627,7 +648,8 @@ class LettaAgentV3(LettaAgentV2):
627
648
  run_id=run_id,
628
649
  is_approval_response=is_approval or is_denial,
629
650
  force_set_request_heartbeat=False,
630
- add_heartbeat_on_continue=False,
651
+ # If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
652
+ add_heartbeat_on_continue=bool(heartbeat_reason),
631
653
  )
632
654
  messages_to_persist = (initial_messages or []) + assistant_message
633
655
 
@@ -843,7 +865,13 @@ class LettaAgentV3(LettaAgentV2):
843
865
  stop_reason: LettaStopReason | None = None
844
866
 
845
867
  if tool_call_name is None:
846
- # No tool call? End loop
868
+ # No tool call if there are required-before-exit tools uncalled, keep stepping
869
+ # and provide explicit feedback to the model; otherwise end the loop.
870
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
871
+ if uncalled and not is_final_step:
872
+ reason = f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
873
+ return True, reason, None
874
+ # No required tools remaining → end turn
847
875
  return False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
848
876
  else:
849
877
  if tool_rule_violated:
@@ -0,0 +1,161 @@
1
+ """
2
+ Database URI utilities for consistent database connection handling across the application.
3
+
4
+ This module provides utilities for parsing and converting database URIs to ensure
5
+ consistent behavior between the main application, alembic migrations, and other
6
+ database-related components.
7
+ """
8
+
9
+ from typing import Optional
10
+ from urllib.parse import urlparse, urlunparse
11
+
12
+
13
+ def parse_database_uri(uri: str) -> dict[str, Optional[str]]:
14
+ """
15
+ Parse a database URI into its components.
16
+
17
+ Args:
18
+ uri: Database URI (e.g., postgresql://user:pass@host:port/db)
19
+
20
+ Returns:
21
+ Dictionary with parsed components: scheme, driver, user, password, host, port, database
22
+ """
23
+ parsed = urlparse(uri)
24
+
25
+ # Extract driver from scheme (e.g., postgresql+asyncpg -> asyncpg)
26
+ scheme_parts = parsed.scheme.split("+")
27
+ base_scheme = scheme_parts[0] if scheme_parts else ""
28
+ driver = scheme_parts[1] if len(scheme_parts) > 1 else None
29
+
30
+ return {
31
+ "scheme": base_scheme,
32
+ "driver": driver,
33
+ "user": parsed.username,
34
+ "password": parsed.password,
35
+ "host": parsed.hostname,
36
+ "port": str(parsed.port) if parsed.port else None,
37
+ "database": parsed.path.lstrip("/") if parsed.path else None,
38
+ "query": parsed.query,
39
+ "fragment": parsed.fragment,
40
+ }
41
+
42
+
43
+ def build_database_uri(
44
+ scheme: str = "postgresql",
45
+ driver: Optional[str] = None,
46
+ user: Optional[str] = None,
47
+ password: Optional[str] = None,
48
+ host: Optional[str] = None,
49
+ port: Optional[str] = None,
50
+ database: Optional[str] = None,
51
+ query: Optional[str] = None,
52
+ fragment: Optional[str] = None,
53
+ ) -> str:
54
+ """
55
+ Build a database URI from components.
56
+
57
+ Args:
58
+ scheme: Base scheme (e.g., "postgresql")
59
+ driver: Driver name (e.g., "asyncpg", "pg8000")
60
+ user: Username
61
+ password: Password
62
+ host: Hostname
63
+ port: Port number
64
+ database: Database name
65
+ query: Query string
66
+ fragment: Fragment
67
+
68
+ Returns:
69
+ Complete database URI
70
+ """
71
+ # Combine scheme and driver
72
+ full_scheme = f"{scheme}+{driver}" if driver else scheme
73
+
74
+ # Build netloc (user:password@host:port)
75
+ netloc_parts = []
76
+ if user:
77
+ if password:
78
+ netloc_parts.append(f"{user}:{password}")
79
+ else:
80
+ netloc_parts.append(user)
81
+
82
+ if host:
83
+ if port:
84
+ netloc_parts.append(f"{host}:{port}")
85
+ else:
86
+ netloc_parts.append(host)
87
+
88
+ netloc = "@".join(netloc_parts) if netloc_parts else ""
89
+
90
+ # Build path
91
+ path = f"/{database}" if database else ""
92
+
93
+ # Build the URI
94
+ return urlunparse((full_scheme, netloc, path, "", query or "", fragment or ""))
95
+
96
+
97
+ def convert_to_async_uri(uri: str) -> str:
98
+ """
99
+ Convert a database URI to use the asyncpg driver for async operations.
100
+
101
+ Args:
102
+ uri: Original database URI
103
+
104
+ Returns:
105
+ URI with asyncpg driver and ssl parameter adjustments
106
+ """
107
+ components = parse_database_uri(uri)
108
+
109
+ # Convert to asyncpg driver
110
+ components["driver"] = "asyncpg"
111
+
112
+ # Build the new URI
113
+ new_uri = build_database_uri(**components)
114
+
115
+ # Replace sslmode= with ssl= for asyncpg compatibility
116
+ new_uri = new_uri.replace("sslmode=", "ssl=")
117
+
118
+ return new_uri
119
+
120
+
121
+ def convert_to_sync_uri(uri: str) -> str:
122
+ """
123
+ Convert a database URI to use the pg8000 driver for sync operations (alembic).
124
+
125
+ Args:
126
+ uri: Original database URI
127
+
128
+ Returns:
129
+ URI with pg8000 driver and sslmode parameter adjustments
130
+ """
131
+ components = parse_database_uri(uri)
132
+
133
+ # Convert to pg8000 driver
134
+ components["driver"] = "pg8000"
135
+
136
+ # Build the new URI
137
+ new_uri = build_database_uri(**components)
138
+
139
+ # Replace ssl= with sslmode= for pg8000 compatibility
140
+ new_uri = new_uri.replace("ssl=", "sslmode=")
141
+
142
+ return new_uri
143
+
144
+
145
+ def get_database_uri_for_context(uri: str, context: str = "async") -> str:
146
+ """
147
+ Get the appropriate database URI for a specific context.
148
+
149
+ Args:
150
+ uri: Original database URI
151
+ context: Context type ("async" for asyncpg, "sync" for pg8000, "alembic" for pg8000)
152
+
153
+ Returns:
154
+ URI formatted for the specified context
155
+ """
156
+ if context in ["async"]:
157
+ return convert_to_async_uri(uri)
158
+ elif context in ["sync", "alembic"]:
159
+ return convert_to_sync_uri(uri)
160
+ else:
161
+ raise ValueError(f"Unknown context: {context}. Must be 'async', 'sync', or 'alembic'")
@@ -279,9 +279,11 @@ class AnthropicStreamingInterface:
279
279
  if prev_message_type and prev_message_type != "tool_call_message":
280
280
  message_index += 1
281
281
  if self.tool_call_name not in self.requires_approval_tools:
282
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
282
283
  tool_call_msg = ToolCallMessage(
283
284
  id=self.letta_message_id,
284
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
285
+ tool_call=tool_call_delta,
286
+ tool_calls=tool_call_delta,
285
287
  date=datetime.now(timezone.utc).isoformat(),
286
288
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
287
289
  run_id=self.run_id,
@@ -423,15 +425,17 @@ class AnthropicStreamingInterface:
423
425
  tool_call_args += buffered_msg.tool_call.arguments if buffered_msg.tool_call.arguments else ""
424
426
  tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
425
427
 
428
+ tool_call_delta = ToolCallDelta(
429
+ name=self.tool_call_name,
430
+ tool_call_id=self.tool_call_id,
431
+ arguments=tool_call_args,
432
+ )
426
433
  tool_call_msg = ToolCallMessage(
427
434
  id=self.tool_call_buffer[0].id,
428
435
  otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
429
436
  date=self.tool_call_buffer[0].date,
430
- tool_call=ToolCallDelta(
431
- name=self.tool_call_name,
432
- tool_call_id=self.tool_call_id,
433
- arguments=tool_call_args,
434
- ),
437
+ tool_call=tool_call_delta,
438
+ tool_calls=tool_call_delta,
435
439
  run_id=self.run_id,
436
440
  )
437
441
  prev_message_type = tool_call_msg.message_type
@@ -467,9 +471,13 @@ class AnthropicStreamingInterface:
467
471
  run_id=self.run_id,
468
472
  )
469
473
  else:
474
+ tool_call_delta = ToolCallDelta(
475
+ name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json
476
+ )
470
477
  tool_call_msg = ToolCallMessage(
471
478
  id=self.letta_message_id,
472
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
479
+ tool_call=tool_call_delta,
480
+ tool_calls=tool_call_delta,
473
481
  date=datetime.now(timezone.utc).isoformat(),
474
482
  run_id=self.run_id,
475
483
  )
@@ -778,9 +786,11 @@ class SimpleAnthropicStreamingInterface:
778
786
  else:
779
787
  if prev_message_type and prev_message_type != "tool_call_message":
780
788
  message_index += 1
789
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id)
781
790
  tool_call_msg = ToolCallMessage(
782
791
  id=self.letta_message_id,
783
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
792
+ tool_call=tool_call_delta,
793
+ tool_calls=tool_call_delta,
784
794
  date=datetime.now(timezone.utc).isoformat(),
785
795
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
786
796
  run_id=self.run_id,
@@ -860,9 +870,11 @@ class SimpleAnthropicStreamingInterface:
860
870
  else:
861
871
  if prev_message_type and prev_message_type != "tool_call_message":
862
872
  message_index += 1
873
+ tool_call_delta = ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json)
863
874
  tool_call_msg = ToolCallMessage(
864
875
  id=self.letta_message_id,
865
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
876
+ tool_call=tool_call_delta,
877
+ tool_calls=tool_call_delta,
866
878
  date=datetime.now(timezone.utc).isoformat(),
867
879
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
868
880
  run_id=self.run_id,
@@ -273,15 +273,17 @@ class SimpleGeminiStreamingInterface:
273
273
  else:
274
274
  if prev_message_type and prev_message_type != "tool_call_message":
275
275
  message_index += 1
276
+ tool_call_delta = ToolCallDelta(
277
+ name=name,
278
+ arguments=arguments_str,
279
+ tool_call_id=call_id,
280
+ )
276
281
  yield ToolCallMessage(
277
282
  id=self.letta_message_id,
278
283
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
279
284
  date=datetime.now(timezone.utc),
280
- tool_call=ToolCallDelta(
281
- name=name,
282
- arguments=arguments_str,
283
- tool_call_id=call_id,
284
- ),
285
+ tool_call=tool_call_delta,
286
+ tool_calls=tool_call_delta,
285
287
  run_id=self.run_id,
286
288
  step_id=self.step_id,
287
289
  )
@@ -336,14 +336,16 @@ class OpenAIStreamingInterface:
336
336
  step_id=self.step_id,
337
337
  )
338
338
  else:
339
+ tool_call_delta = ToolCallDelta(
340
+ name=self.function_name_buffer,
341
+ arguments=None,
342
+ tool_call_id=self.function_id_buffer,
343
+ )
339
344
  tool_call_msg = ToolCallMessage(
340
345
  id=self.letta_message_id,
341
346
  date=datetime.now(timezone.utc),
342
- tool_call=ToolCallDelta(
343
- name=self.function_name_buffer,
344
- arguments=None,
345
- tool_call_id=self.function_id_buffer,
346
- ),
347
+ tool_call=tool_call_delta,
348
+ tool_calls=tool_call_delta,
347
349
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
348
350
  run_id=self.run_id,
349
351
  step_id=self.step_id,
@@ -423,14 +425,16 @@ class OpenAIStreamingInterface:
423
425
  step_id=self.step_id,
424
426
  )
425
427
  else:
428
+ tool_call_delta = ToolCallDelta(
429
+ name=self.function_name_buffer,
430
+ arguments=combined_chunk,
431
+ tool_call_id=self.function_id_buffer,
432
+ )
426
433
  tool_call_msg = ToolCallMessage(
427
434
  id=self.letta_message_id,
428
435
  date=datetime.now(timezone.utc),
429
- tool_call=ToolCallDelta(
430
- name=self.function_name_buffer,
431
- arguments=combined_chunk,
432
- tool_call_id=self.function_id_buffer,
433
- ),
436
+ tool_call=tool_call_delta,
437
+ tool_calls=tool_call_delta,
434
438
  # name=name,
435
439
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
436
440
  run_id=self.run_id,
@@ -460,14 +464,16 @@ class OpenAIStreamingInterface:
460
464
  step_id=self.step_id,
461
465
  )
462
466
  else:
467
+ tool_call_delta = ToolCallDelta(
468
+ name=None,
469
+ arguments=updates_main_json,
470
+ tool_call_id=self.function_id_buffer,
471
+ )
463
472
  tool_call_msg = ToolCallMessage(
464
473
  id=self.letta_message_id,
465
474
  date=datetime.now(timezone.utc),
466
- tool_call=ToolCallDelta(
467
- name=None,
468
- arguments=updates_main_json,
469
- tool_call_id=self.function_id_buffer,
470
- ),
475
+ tool_call=tool_call_delta,
476
+ tool_calls=tool_call_delta,
471
477
  # name=name,
472
478
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
473
479
  run_id=self.run_id,
@@ -717,14 +723,16 @@ class SimpleOpenAIStreamingInterface:
717
723
  step_id=self.step_id,
718
724
  )
719
725
  else:
726
+ tool_call_delta = ToolCallDelta(
727
+ name=tool_call.function.name,
728
+ arguments=tool_call.function.arguments,
729
+ tool_call_id=tool_call.id,
730
+ )
720
731
  tool_call_msg = ToolCallMessage(
721
732
  id=self.letta_message_id,
722
733
  date=datetime.now(timezone.utc),
723
- tool_call=ToolCallDelta(
724
- name=tool_call.function.name,
725
- arguments=tool_call.function.arguments,
726
- tool_call_id=tool_call.id,
727
- ),
734
+ tool_call=tool_call_delta,
735
+ tool_calls=tool_call_delta,
728
736
  # name=name,
729
737
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
730
738
  run_id=self.run_id,
@@ -945,15 +953,17 @@ class SimpleOpenAIResponsesStreamingInterface:
945
953
  else:
946
954
  if prev_message_type and prev_message_type != "tool_call_message":
947
955
  message_index += 1
956
+ tool_call_delta = ToolCallDelta(
957
+ name=name,
958
+ arguments=arguments if arguments != "" else None,
959
+ tool_call_id=call_id,
960
+ )
948
961
  yield ToolCallMessage(
949
962
  id=self.letta_message_id,
950
963
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
951
964
  date=datetime.now(timezone.utc),
952
- tool_call=ToolCallDelta(
953
- name=name,
954
- arguments=arguments if arguments != "" else None,
955
- tool_call_id=call_id,
956
- ),
965
+ tool_call=tool_call_delta,
966
+ tool_calls=tool_call_delta,
957
967
  run_id=self.run_id,
958
968
  step_id=self.step_id,
959
969
  )
@@ -1113,15 +1123,17 @@ class SimpleOpenAIResponsesStreamingInterface:
1113
1123
  else:
1114
1124
  if prev_message_type and prev_message_type != "tool_call_message":
1115
1125
  message_index += 1
1126
+ tool_call_delta = ToolCallDelta(
1127
+ name=None,
1128
+ arguments=delta,
1129
+ tool_call_id=None,
1130
+ )
1116
1131
  yield ToolCallMessage(
1117
1132
  id=self.letta_message_id,
1118
1133
  otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
1119
1134
  date=datetime.now(timezone.utc),
1120
- tool_call=ToolCallDelta(
1121
- name=None,
1122
- arguments=delta,
1123
- tool_call_id=None,
1124
- ),
1135
+ tool_call=tool_call_delta,
1136
+ tool_calls=tool_call_delta,
1125
1137
  run_id=self.run_id,
1126
1138
  step_id=self.step_id,
1127
1139
  )
@@ -56,6 +56,9 @@ class AnthropicClient(LLMClientBase):
56
56
  def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
57
57
  client = self._get_anthropic_client(llm_config, async_client=False)
58
58
  betas: list[str] = []
59
+ # Interleaved thinking for reasoner (sync path parity)
60
+ if llm_config.enable_reasoner:
61
+ betas.append("interleaved-thinking-2025-05-14")
59
62
  # 1M context beta for Sonnet 4/4.5 when enabled
60
63
  try:
61
64
  from letta.settings import model_settings
@@ -325,6 +328,7 @@ class AnthropicClient(LLMClientBase):
325
328
  data["system"] = self._add_cache_control_to_system_message(system_content)
326
329
  data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
327
330
  messages=messages[1:],
331
+ current_model=llm_config.model,
328
332
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
329
333
  put_inner_thoughts_in_kwargs=put_kwargs,
330
334
  # if react, use native content + strip heartbeats
@@ -370,6 +374,7 @@ class AnthropicClient(LLMClientBase):
370
374
  async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
371
375
  logging.getLogger("httpx").setLevel(logging.WARNING)
372
376
 
377
+ # Use the default client; token counting is lightweight and does not require BYOK overrides
373
378
  client = anthropic.AsyncAnthropic()
374
379
  if messages and len(messages) == 0:
375
380
  messages = None
@@ -378,23 +383,20 @@ class AnthropicClient(LLMClientBase):
378
383
  else:
379
384
  anthropic_tools = None
380
385
 
386
+ # Detect presence of reasoning blocks anywhere in the final assistant message.
387
+ # Interleaved thinking is not guaranteed to be the first content part.
381
388
  thinking_enabled = False
382
389
  if messages and len(messages) > 0:
383
- # Check if the last assistant message starts with a thinking block
384
- # Find the last assistant message
385
- last_assistant_message = None
386
- for message in reversed(messages):
387
- if message.get("role") == "assistant":
388
- last_assistant_message = message
389
- break
390
-
391
- if (
392
- last_assistant_message
393
- and isinstance(last_assistant_message.get("content"), list)
394
- and len(last_assistant_message["content"]) > 0
395
- and last_assistant_message["content"][0].get("type") == "thinking"
396
- ):
397
- thinking_enabled = True
390
+ last_assistant_message = next((m for m in reversed(messages) if m.get("role") == "assistant"), None)
391
+ if last_assistant_message:
392
+ content = last_assistant_message.get("content")
393
+ if isinstance(content, list):
394
+ for part in content:
395
+ if isinstance(part, dict) and part.get("type") in {"thinking", "redacted_thinking"}:
396
+ thinking_enabled = True
397
+ break
398
+ elif isinstance(content, str) and "<thinking>" in content:
399
+ thinking_enabled = True
398
400
 
399
401
  try:
400
402
  count_params = {
@@ -403,9 +405,27 @@ class AnthropicClient(LLMClientBase):
403
405
  "tools": anthropic_tools or [],
404
406
  }
405
407
 
408
+ betas: list[str] = []
406
409
  if thinking_enabled:
410
+ # Match interleaved thinking behavior so token accounting is consistent
407
411
  count_params["thinking"] = {"type": "enabled", "budget_tokens": 16000}
408
- result = await client.beta.messages.count_tokens(**count_params)
412
+ betas.append("interleaved-thinking-2025-05-14")
413
+
414
+ # Opt-in to 1M context if enabled for this model in settings
415
+ try:
416
+ if (
417
+ model
418
+ and model_settings.anthropic_sonnet_1m
419
+ and (model.startswith("claude-sonnet-4") or model.startswith("claude-sonnet-4-5"))
420
+ ):
421
+ betas.append("context-1m-2025-08-07")
422
+ except Exception:
423
+ pass
424
+
425
+ if betas:
426
+ result = await client.beta.messages.count_tokens(**count_params, betas=betas)
427
+ else:
428
+ result = await client.beta.messages.count_tokens(**count_params)
409
429
  except:
410
430
  raise
411
431
 
@@ -311,6 +311,7 @@ class GoogleVertexClient(LLMClientBase):
311
311
  contents = self.add_dummy_model_messages(
312
312
  PydanticMessage.to_google_dicts_from_list(
313
313
  messages,
314
+ current_model=llm_config.model,
314
315
  put_inner_thoughts_in_kwargs=False if agent_type == AgentType.letta_v1_agent else True,
315
316
  native_content=True if agent_type == AgentType.letta_v1_agent else False,
316
317
  ),
letta/orm/__init__.py CHANGED
@@ -27,6 +27,7 @@ from letta.orm.prompt import Prompt
27
27
  from letta.orm.provider import Provider
28
28
  from letta.orm.provider_trace import ProviderTrace
29
29
  from letta.orm.run import Run
30
+ from letta.orm.run_metrics import RunMetrics
30
31
  from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
31
32
  from letta.orm.source import Source
32
33
  from letta.orm.sources_agents import SourcesAgents
@@ -0,0 +1,82 @@
1
+ from datetime import datetime, timezone
2
+ from typing import TYPE_CHECKING, Optional
3
+
4
+ from sqlalchemy import BigInteger, ForeignKey, Integer, String
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+ from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
7
+
8
+ from letta.orm.mixins import AgentMixin, OrganizationMixin, ProjectMixin, TemplateMixin
9
+ from letta.orm.sqlalchemy_base import SqlalchemyBase
10
+ from letta.schemas.run_metrics import RunMetrics as PydanticRunMetrics
11
+ from letta.schemas.user import User
12
+ from letta.settings import DatabaseChoice, settings
13
+
14
+ if TYPE_CHECKING:
15
+ from letta.orm.agent import Agent
16
+ from letta.orm.run import Run
17
+ from letta.orm.step import Step
18
+
19
+
20
+ class RunMetrics(SqlalchemyBase, ProjectMixin, AgentMixin, OrganizationMixin, TemplateMixin):
21
+ """Tracks performance metrics for agent steps."""
22
+
23
+ __tablename__ = "run_metrics"
24
+ __pydantic_model__ = PydanticRunMetrics
25
+
26
+ id: Mapped[str] = mapped_column(
27
+ ForeignKey("runs.id", ondelete="CASCADE"),
28
+ primary_key=True,
29
+ doc="The unique identifier of the run this metric belongs to (also serves as PK)",
30
+ )
31
+ run_start_ns: Mapped[Optional[int]] = mapped_column(
32
+ BigInteger,
33
+ nullable=True,
34
+ doc="The timestamp of the start of the run in nanoseconds",
35
+ )
36
+ run_ns: Mapped[Optional[int]] = mapped_column(
37
+ BigInteger,
38
+ nullable=True,
39
+ doc="Total time for the run in nanoseconds",
40
+ )
41
+ num_steps: Mapped[Optional[int]] = mapped_column(
42
+ Integer,
43
+ nullable=True,
44
+ doc="The number of steps in the run",
45
+ )
46
+ run: Mapped[Optional["Run"]] = relationship("Run", foreign_keys=[id])
47
+ agent: Mapped[Optional["Agent"]] = relationship("Agent")
48
+
49
+ def create(
50
+ self,
51
+ db_session: Session,
52
+ actor: Optional[User] = None,
53
+ no_commit: bool = False,
54
+ ) -> "RunMetrics":
55
+ """Override create to handle SQLite timestamp issues"""
56
+ # For SQLite, explicitly set timestamps as server_default may not work
57
+ if settings.database_engine == DatabaseChoice.SQLITE:
58
+ now = datetime.now(timezone.utc)
59
+ if not self.created_at:
60
+ self.created_at = now
61
+ if not self.updated_at:
62
+ self.updated_at = now
63
+
64
+ return super().create(db_session, actor=actor, no_commit=no_commit)
65
+
66
+ async def create_async(
67
+ self,
68
+ db_session: AsyncSession,
69
+ actor: Optional[User] = None,
70
+ no_commit: bool = False,
71
+ no_refresh: bool = False,
72
+ ) -> "RunMetrics":
73
+ """Override create_async to handle SQLite timestamp issues"""
74
+ # For SQLite, explicitly set timestamps as server_default may not work
75
+ if settings.database_engine == DatabaseChoice.SQLITE:
76
+ now = datetime.now(timezone.utc)
77
+ if not self.created_at:
78
+ self.created_at = now
79
+ if not self.updated_at:
80
+ self.updated_at = now
81
+
82
+ return await super().create_async(db_session, actor=actor, no_commit=no_commit, no_refresh=no_refresh)