letta-nightly 0.8.4.dev20250618104304__py3-none-any.whl → 0.8.5.dev20250619180801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/letta_agent.py +54 -20
  3. letta/agents/voice_agent.py +47 -31
  4. letta/constants.py +1 -1
  5. letta/data_sources/redis_client.py +11 -6
  6. letta/functions/function_sets/builtin.py +35 -11
  7. letta/functions/prompts.py +26 -0
  8. letta/functions/types.py +6 -0
  9. letta/interfaces/openai_chat_completions_streaming_interface.py +0 -1
  10. letta/llm_api/anthropic.py +9 -1
  11. letta/llm_api/anthropic_client.py +22 -3
  12. letta/llm_api/aws_bedrock.py +10 -6
  13. letta/llm_api/llm_api_tools.py +3 -0
  14. letta/llm_api/openai_client.py +1 -1
  15. letta/orm/agent.py +14 -1
  16. letta/orm/job.py +3 -0
  17. letta/orm/provider.py +3 -1
  18. letta/schemas/agent.py +7 -0
  19. letta/schemas/embedding_config.py +8 -0
  20. letta/schemas/enums.py +0 -1
  21. letta/schemas/job.py +1 -0
  22. letta/schemas/providers.py +13 -5
  23. letta/server/rest_api/routers/v1/agents.py +76 -35
  24. letta/server/rest_api/routers/v1/providers.py +7 -7
  25. letta/server/rest_api/routers/v1/sources.py +39 -19
  26. letta/server/rest_api/routers/v1/tools.py +96 -31
  27. letta/services/agent_manager.py +8 -2
  28. letta/services/file_processor/chunker/llama_index_chunker.py +89 -1
  29. letta/services/file_processor/embedder/openai_embedder.py +6 -1
  30. letta/services/file_processor/parser/mistral_parser.py +2 -2
  31. letta/services/helpers/agent_manager_helper.py +44 -16
  32. letta/services/job_manager.py +35 -17
  33. letta/services/mcp/base_client.py +26 -1
  34. letta/services/mcp_manager.py +33 -18
  35. letta/services/provider_manager.py +30 -0
  36. letta/services/tool_executor/builtin_tool_executor.py +335 -43
  37. letta/services/tool_manager.py +25 -1
  38. letta/services/user_manager.py +1 -1
  39. letta/settings.py +3 -0
  40. {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/METADATA +4 -3
  41. {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/RECORD +44 -42
  42. {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/LICENSE +0 -0
  43. {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/WHEEL +0 -0
  44. {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- __version__ = "0.8.4"
3
+ __version__ = "0.8.5"
4
4
 
5
5
  if os.environ.get("LETTA_VERSION"):
6
6
  __version__ = os.environ["LETTA_VERSION"]
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import json
3
3
  import uuid
4
+ from datetime import datetime
4
5
  from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
5
6
 
6
7
  from openai import AsyncStream
@@ -13,7 +14,7 @@ from letta.agents.helpers import _create_letta_response, _prepare_in_context_mes
13
14
  from letta.constants import DEFAULT_MAX_STEPS
14
15
  from letta.errors import ContextWindowExceededError
15
16
  from letta.helpers import ToolRulesSolver
16
- from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
17
+ from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
17
18
  from letta.helpers.tool_execution_helper import enable_strict_mode
18
19
  from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
19
20
  from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -25,7 +26,7 @@ from letta.orm.enums import ToolType
25
26
  from letta.otel.context import get_ctx_attributes
26
27
  from letta.otel.metric_registry import MetricRegistry
27
28
  from letta.otel.tracing import log_event, trace_method, tracer
28
- from letta.schemas.agent import AgentState
29
+ from letta.schemas.agent import AgentState, UpdateAgent
29
30
  from letta.schemas.enums import MessageRole
30
31
  from letta.schemas.letta_message import MessageType
31
32
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -283,8 +284,13 @@ class LettaAgent(BaseAgent):
283
284
  # log request time
284
285
  if request_start_timestamp_ns:
285
286
  now = get_utc_timestamp_ns()
286
- request_ns = now - request_start_timestamp_ns
287
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
287
+ duration_ms = ns_to_ms(now - request_start_timestamp_ns)
288
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
289
+
290
+ # update agent's last run metrics
291
+ now_datetime = get_utc_time()
292
+ await self._update_agent_last_run_metrics(now_datetime, duration_ms)
293
+
288
294
  request_span.end()
289
295
 
290
296
  # Return back usage
@@ -410,8 +416,13 @@ class LettaAgent(BaseAgent):
410
416
  # log request time
411
417
  if request_start_timestamp_ns:
412
418
  now = get_utc_timestamp_ns()
413
- request_ns = now - request_start_timestamp_ns
414
- request_span.add_event(name="request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
419
+ duration_ms = ns_to_ms(now - request_start_timestamp_ns)
420
+ request_span.add_event(name="request_ms", attributes={"duration_ms": duration_ms})
421
+
422
+ # update agent's last run metrics
423
+ now_datetime = get_utc_time()
424
+ await self._update_agent_last_run_metrics(now_datetime, duration_ms)
425
+
415
426
  request_span.end()
416
427
 
417
428
  # Extend the in context message ids
@@ -426,6 +437,16 @@ class LettaAgent(BaseAgent):
426
437
 
427
438
  return current_in_context_messages, new_in_context_messages, usage, stop_reason
428
439
 
440
+ async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
441
+ try:
442
+ await self.agent_manager.update_agent_async(
443
+ agent_id=self.agent_id,
444
+ agent_update=UpdateAgent(last_run_completion=completion_time, last_run_duration_ms=duration_ms),
445
+ actor=self.actor,
446
+ )
447
+ except Exception as e:
448
+ logger.error(f"Failed to update agent's last run metrics: {e}")
449
+
429
450
  @trace_method
430
451
  async def step_stream(
431
452
  self,
@@ -631,8 +652,13 @@ class LettaAgent(BaseAgent):
631
652
  # log time of entire request
632
653
  if request_start_timestamp_ns:
633
654
  now = get_utc_timestamp_ns()
634
- request_ns = now - request_start_timestamp_ns
635
- request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
655
+ duration_ms = ns_to_ms(now - request_start_timestamp_ns)
656
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
657
+
658
+ # update agent's last run metrics
659
+ completion_time = get_utc_time()
660
+ await self._update_agent_last_run_metrics(completion_time, duration_ms)
661
+
636
662
  request_span.end()
637
663
 
638
664
  for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
@@ -913,13 +939,13 @@ class LettaAgent(BaseAgent):
913
939
  except AssertionError:
914
940
  tool_args = json.loads(tool_args)
915
941
 
942
+ # Get request heartbeats and coerce to bool
943
+ request_heartbeat = tool_args.pop("request_heartbeat", False)
916
944
  if is_final_step:
917
945
  stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
918
946
  logger.info("Agent has reached max steps.")
919
947
  request_heartbeat = False
920
948
  else:
921
- # Get request heartbeats and coerce to bool
922
- request_heartbeat = tool_args.pop("request_heartbeat", False)
923
949
  # Pre-emptively pop out inner_thoughts
924
950
  tool_args.pop(INNER_THOUGHTS_KWARG, "")
925
951
 
@@ -940,7 +966,10 @@ class LettaAgent(BaseAgent):
940
966
  tool_call_id=tool_call_id,
941
967
  request_heartbeat=request_heartbeat,
942
968
  )
943
- if tool_call_name not in valid_tool_names:
969
+ # Check if tool rule is violated - if so, we'll force continuation
970
+ tool_rule_violated = tool_call_name not in valid_tool_names
971
+
972
+ if tool_rule_violated:
944
973
  base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
945
974
  violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
946
975
  if violated_rule_messages:
@@ -969,7 +998,7 @@ class LettaAgent(BaseAgent):
969
998
 
970
999
  # get the function response limit
971
1000
  target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
972
- return_char_limit = target_tool.return_char_limit
1001
+ return_char_limit = target_tool.return_char_limit if target_tool else None
973
1002
  function_response_string = validate_function_response(
974
1003
  tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
975
1004
  )
@@ -981,15 +1010,20 @@ class LettaAgent(BaseAgent):
981
1010
  # 4. Register tool call with tool rule solver
982
1011
  # Resolve whether or not to continue stepping
983
1012
  continue_stepping = request_heartbeat
984
- tool_rules_solver.register_tool_call(tool_name=tool_call_name)
985
- if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
986
- if continue_stepping:
987
- stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
988
- continue_stepping = False
989
- elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
990
- continue_stepping = True
991
- elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
1013
+
1014
+ # Force continuation if tool rule was violated to give the model another chance
1015
+ if tool_rule_violated:
992
1016
  continue_stepping = True
1017
+ else:
1018
+ tool_rules_solver.register_tool_call(tool_name=tool_call_name)
1019
+ if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
1020
+ if continue_stepping:
1021
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
1022
+ continue_stepping = False
1023
+ elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
1024
+ continue_stepping = True
1025
+ elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
1026
+ continue_stepping = True
993
1027
 
994
1028
  # 5a. Persist Steps to DB
995
1029
  # Following agent loop to persist this before messages
@@ -9,14 +9,9 @@ import openai
9
9
  from letta.agents.base_agent import BaseAgent
10
10
  from letta.agents.exceptions import IncompatibleAgentType
11
11
  from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
12
- from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
12
+ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG, REQUEST_HEARTBEAT_PARAM
13
13
  from letta.helpers.datetime_helpers import get_utc_time
14
- from letta.helpers.tool_execution_helper import (
15
- add_pre_execution_message,
16
- enable_strict_mode,
17
- execute_external_tool,
18
- remove_request_heartbeat,
19
- )
14
+ from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
20
15
  from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
21
16
  from letta.log import get_logger
22
17
  from letta.orm.enums import ToolType
@@ -47,6 +42,7 @@ from letta.services.message_manager import MessageManager
47
42
  from letta.services.passage_manager import PassageManager
48
43
  from letta.services.summarizer.enums import SummarizationMode
49
44
  from letta.services.summarizer.summarizer import Summarizer
45
+ from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
50
46
  from letta.settings import model_settings
51
47
 
52
48
  logger = get_logger(__name__)
@@ -124,7 +120,11 @@ class VoiceAgent(BaseAgent):
124
120
 
125
121
  user_query = input_messages[0].content[0].text
126
122
 
127
- agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
123
+ agent_state = await self.agent_manager.get_agent_by_id_async(
124
+ agent_id=self.agent_id,
125
+ include_relationships=["tools", "memory", "tool_exec_environment_variables", "multi_agent_group"],
126
+ actor=self.actor,
127
+ )
128
128
 
129
129
  # TODO: Refactor this so it uses our in-house clients
130
130
  # TODO: For now, piggyback off of OpenAI client for ease
@@ -332,7 +332,12 @@ class VoiceAgent(BaseAgent):
332
332
 
333
333
  def _build_tool_schemas(self, agent_state: AgentState, external_tools_only=True) -> List[Tool]:
334
334
  if external_tools_only:
335
- tools = [t for t in agent_state.tools if t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
335
+ tools = [
336
+ t
337
+ for t in agent_state.tools
338
+ if t.tool_type
339
+ in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM, ToolType.LETTA_FILES_CORE, ToolType.LETTA_BUILTIN, ToolType.EXTERNAL_MCP}
340
+ ]
336
341
  else:
337
342
  tools = agent_state.tools
338
343
 
@@ -401,12 +406,10 @@ class VoiceAgent(BaseAgent):
401
406
 
402
407
  async def _execute_tool(self, user_query: str, tool_name: str, tool_args: dict, agent_state: AgentState) -> "ToolExecutionResult":
403
408
  """
404
- Executes a tool and returns (result, success_flag).
409
+ Executes a tool and returns the ToolExecutionResult.
405
410
  """
406
411
  from letta.schemas.tool_execution_result import ToolExecutionResult
407
412
 
408
- print("EXECUTING TOOL")
409
-
410
413
  # Special memory case
411
414
  if tool_name == "search_memory":
412
415
  tool_result = await self._search_memory(
@@ -420,26 +423,39 @@ class VoiceAgent(BaseAgent):
420
423
  func_return=tool_result,
421
424
  status="success",
422
425
  )
423
- else:
424
- target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
425
- if not target_tool:
426
- return ToolExecutionResult(
427
- func_return=f"Tool not found: {tool_name}",
428
- status="error",
429
- )
430
426
 
431
- try:
432
- tool_result, _ = execute_external_tool(
433
- agent_state=agent_state,
434
- function_name=tool_name,
435
- function_args=tool_args,
436
- target_letta_tool=target_tool,
437
- actor=self.actor,
438
- allow_agent_state_modifications=False,
439
- )
440
- return ToolExecutionResult(func_return=tool_result, status="success")
441
- except Exception as e:
442
- return ToolExecutionResult(func_return=f"Failed to call tool. Error: {e}", status="error")
427
+ # Find the target tool
428
+ target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
429
+ if not target_tool:
430
+ return ToolExecutionResult(
431
+ func_return=f"Tool {tool_name} not found",
432
+ status="error",
433
+ )
434
+
435
+ # Use ToolExecutionManager for modern tool execution
436
+ sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
437
+ tool_execution_manager = ToolExecutionManager(
438
+ agent_state=agent_state,
439
+ message_manager=self.message_manager,
440
+ agent_manager=self.agent_manager,
441
+ block_manager=self.block_manager,
442
+ passage_manager=self.passage_manager,
443
+ sandbox_env_vars=sandbox_env_vars,
444
+ actor=self.actor,
445
+ )
446
+
447
+ # Remove request heartbeat / pre_exec_message
448
+ tool_args.pop(PRE_EXECUTION_MESSAGE_ARG, None)
449
+ tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
450
+
451
+ tool_execution_result = await tool_execution_manager.execute_tool_async(
452
+ function_name=tool_name,
453
+ function_args=tool_args,
454
+ tool=target_tool,
455
+ step_id=None, # VoiceAgent doesn't use step tracking currently
456
+ )
457
+
458
+ return tool_execution_result
443
459
 
444
460
  async def _search_memory(
445
461
  self,
letta/constants.py CHANGED
@@ -65,7 +65,7 @@ DEFAULT_EMBEDDING_CHUNK_SIZE = 300
65
65
 
66
66
  # tokenizers
67
67
  EMBEDDING_TO_TOKENIZER_MAP = {
68
- "text-embedding-ada-002": "cl100k_base",
68
+ "text-embedding-3-small": "cl100k_base",
69
69
  }
70
70
  EMBEDDING_TO_TOKENIZER_DEFAULT = "cl100k_base"
71
71
 
@@ -290,12 +290,17 @@ async def get_redis_client() -> AsyncRedisClient:
290
290
  try:
291
291
  from letta.settings import settings
292
292
 
293
- _client_instance = AsyncRedisClient(
294
- host=settings.redis_host or "localhost",
295
- port=settings.redis_port or 6379,
296
- )
297
- await _client_instance.wait_for_ready(timeout=5)
298
- logger.info("Redis client initialized")
293
+ # If Redis settings are not configured, use noop client
294
+ if settings.redis_host is None or settings.redis_port is None:
295
+ logger.info("Redis not configured, using noop client")
296
+ _client_instance = NoopAsyncRedisClient()
297
+ else:
298
+ _client_instance = AsyncRedisClient(
299
+ host=settings.redis_host,
300
+ port=settings.redis_port,
301
+ )
302
+ await _client_instance.wait_for_ready(timeout=5)
303
+ logger.info("Redis client initialized")
299
304
  except Exception as e:
300
305
  logger.warning(f"Failed to initialize Redis: {e}")
301
306
  _client_instance = NoopAsyncRedisClient()
@@ -1,27 +1,51 @@
1
- from typing import Literal
1
+ from typing import List, Literal
2
2
 
3
+ from letta.functions.types import SearchTask
3
4
 
4
- async def web_search(query: str) -> str:
5
+
6
+ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
5
7
  """
6
- Search the web for information.
8
+ Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
9
+
7
10
  Args:
8
- query (str): The query to search the web for.
11
+ code (str): The code to run.
12
+ language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
9
13
  Returns:
10
- str: The search results.
14
+ str: The output of the code, the stdout, the stderr, and error traces (if any).
11
15
  """
12
16
 
13
17
  raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
14
18
 
15
19
 
16
- def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
20
+ async def web_search(
21
+ tasks: List[SearchTask],
22
+ limit: int = 3,
23
+ return_raw: bool = False,
24
+ ) -> str:
17
25
  """
18
- Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
26
+ Search the web with a list of query/question pairs and extract passages that answer the corresponding questions.
27
+
28
+ Examples:
29
+ tasks -> [
30
+ SearchTask(
31
+ query="Tesla Q1 2025 earnings report PDF",
32
+ question="What was Tesla's net profit in Q1 2025?"
33
+ ),
34
+ SearchTask(
35
+ query="Letta API prebuilt tools core_memory_append",
36
+ question="What does the core_memory_append tool do in Letta?"
37
+ )
38
+ ]
19
39
 
20
40
  Args:
21
- code (str): The code to run.
22
- language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
41
+ tasks (List[SearchTask]): A list of search tasks, each containing a `query` and a corresponding `question`.
42
+ limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 3.
43
+ return_raw (bool, optional): If set to True, returns the raw content of the web pages.
44
+ This should be False unless otherwise specified by the user. Defaults to False.
45
+
23
46
  Returns:
24
- str: The output of the code, the stdout, the stderr, and error traces (if any).
47
+ str: A JSON-encoded string containing a list of search results.
48
+ Each result includes ranked snippets with their source URLs and relevance scores,
49
+ corresponding to each search task.
25
50
  """
26
-
27
51
  raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
@@ -0,0 +1,26 @@
1
+ FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert at extracting relevant information from web content.
2
+
3
+ Given a document with line numbers (format: "LINE_NUM: content"), identify passages that answer the provided question by returning line ranges:
4
+ - start_line: The starting line number (inclusive)
5
+ - end_line: The ending line number (inclusive)
6
+
7
+ SELECTION PRINCIPLES:
8
+ 1. Prefer comprehensive passages that include full context
9
+ 2. Capture complete thoughts, examples, and explanations
10
+ 3. When relevant content spans multiple paragraphs, include the entire section
11
+ 4. Favor fewer, substantial passages over many fragments
12
+
13
+ Focus on passages that can stand alone as complete, meaningful responses."""
14
+
15
+
16
+ def get_firecrawl_search_user_prompt(query: str, question: str, numbered_content: str) -> str:
17
+ """Generate the user prompt for line-number based search analysis."""
18
+ return f"""Search Query: {query}
19
+ Question to Answer: {question}
20
+
21
+ Document Content (with line numbers):
22
+ {numbered_content}
23
+
24
+ Identify line ranges that best answer: "{question}"
25
+
26
+ Select comprehensive passages with full context. Include entire sections when relevant."""
@@ -0,0 +1,6 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class SearchTask(BaseModel):
5
+ query: str = Field(description="Search query for web search")
6
+ question: str = Field(description="Question to answer from search results, considering full conversation context")
@@ -16,7 +16,6 @@ class OpenAIChatCompletionsStreamingInterface:
16
16
  """
17
17
 
18
18
  def __init__(self, stream_pre_execution_message: bool = True):
19
- print("CHAT COMPLETITION INTERFACE")
20
19
  self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
21
20
  self.stream_pre_execution_message: bool = stream_pre_execution_message
22
21
 
@@ -823,12 +823,20 @@ def anthropic_chat_completions_request(
823
823
  def anthropic_bedrock_chat_completions_request(
824
824
  data: ChatCompletionRequest,
825
825
  inner_thoughts_xml_tag: Optional[str] = "thinking",
826
+ provider_name: Optional[str] = None,
827
+ provider_category: Optional[ProviderCategory] = None,
828
+ user_id: Optional[str] = None,
826
829
  ) -> ChatCompletionResponse:
827
830
  """Make a chat completion request to Anthropic via AWS Bedrock."""
828
831
  data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
829
832
 
830
833
  # Get the client
831
- client = get_bedrock_client()
834
+ if provider_category == ProviderCategory.byok:
835
+ actor = UserManager().get_user_or_default(user_id=user_id)
836
+ access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
837
+ client = get_bedrock_client(access_key, secret_key, region)
838
+ else:
839
+ client = get_bedrock_client()
832
840
 
833
841
  # Make the request
834
842
  try:
@@ -243,7 +243,8 @@ class AnthropicClient(LLMClientBase):
243
243
  # Move 'system' to the top level
244
244
  if messages[0].role != "system":
245
245
  raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
246
- data["system"] = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
246
+ system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
247
+ data["system"] = self._add_cache_control_to_system_message(system_content)
247
248
  data["messages"] = [
248
249
  m.to_anthropic_dict(
249
250
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
@@ -315,9 +316,11 @@ class AnthropicClient(LLMClientBase):
315
316
 
316
317
  if isinstance(e, anthropic.BadRequestError):
317
318
  logger.warning(f"[Anthropic] Bad request: {str(e)}")
318
- if "prompt is too long" in str(e).lower():
319
- # If the context window is too large, we expect to receive:
319
+ error_str = str(e).lower()
320
+ if "prompt is too long" in error_str or "exceed context limit" in error_str:
321
+ # If the context window is too large, we expect to receive either:
320
322
  # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 200758 tokens > 200000 maximum'}}
323
+ # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'input length and `max_tokens` exceed context limit: 173298 + 32000 > 200000, decrease input length or `max_tokens` and try again'}}
321
324
  return ContextWindowExceededError(
322
325
  message=f"Bad request to Anthropic (context window exceeded): {str(e)}",
323
326
  )
@@ -490,6 +493,22 @@ class AnthropicClient(LLMClientBase):
490
493
 
491
494
  return chat_completion_response
492
495
 
496
+ def _add_cache_control_to_system_message(self, system_content):
497
+ """Add cache control to system message content"""
498
+ if isinstance(system_content, str):
499
+ # For string content, convert to list format with cache control
500
+ return [{"type": "text", "text": system_content, "cache_control": {"type": "ephemeral"}}]
501
+ elif isinstance(system_content, list):
502
+ # For list content, add cache control to the last text block
503
+ cached_content = system_content.copy()
504
+ for i in range(len(cached_content) - 1, -1, -1):
505
+ if cached_content[i].get("type") == "text":
506
+ cached_content[i]["cache_control"] = {"type": "ephemeral"}
507
+ break
508
+ return cached_content
509
+
510
+ return system_content
511
+
493
512
 
494
513
  def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
495
514
  """See: https://docs.anthropic.com/claude/docs/tool-use
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Dict, List
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
4
  from anthropic import AnthropicBedrock
5
5
 
@@ -19,7 +19,11 @@ def has_valid_aws_credentials() -> bool:
19
19
  return valid_aws_credentials
20
20
 
21
21
 
22
- def get_bedrock_client():
22
+ def get_bedrock_client(
23
+ access_key: Optional[str] = None,
24
+ secret_key: Optional[str] = None,
25
+ region: Optional[str] = None,
26
+ ):
23
27
  """
24
28
  Get a Bedrock client
25
29
  """
@@ -28,9 +32,9 @@ def get_bedrock_client():
28
32
  logger.debug(f"Getting Bedrock client for {model_settings.aws_region}")
29
33
  sts_client = boto3.client(
30
34
  "sts",
31
- aws_access_key_id=model_settings.aws_access_key,
32
- aws_secret_access_key=model_settings.aws_secret_access_key,
33
- region_name=model_settings.aws_region,
35
+ aws_access_key_id=access_key or model_settings.aws_access_key,
36
+ aws_secret_access_key=secret_key or model_settings.aws_secret_access_key,
37
+ region_name=region or model_settings.aws_region,
34
38
  )
35
39
  credentials = sts_client.get_session_token()["Credentials"]
36
40
 
@@ -38,7 +42,7 @@ def get_bedrock_client():
38
42
  aws_access_key=credentials["AccessKeyId"],
39
43
  aws_secret_key=credentials["SecretAccessKey"],
40
44
  aws_session_token=credentials["SessionToken"],
41
- aws_region=model_settings.aws_region,
45
+ aws_region=region or model_settings.aws_region,
42
46
  )
43
47
  return bedrock
44
48
 
@@ -569,6 +569,9 @@ def create(
569
569
  # NOTE: max_tokens is required for Anthropic API
570
570
  max_tokens=llm_config.max_tokens,
571
571
  ),
572
+ provider_name=llm_config.provider_name,
573
+ provider_category=llm_config.provider_category,
574
+ user_id=user_id,
572
575
  )
573
576
 
574
577
  elif llm_config.model_endpoint_type == "deepseek":
@@ -53,7 +53,7 @@ def accepts_developer_role(model: str) -> bool:
53
53
 
54
54
  See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
55
55
  """
56
- if is_openai_reasoning_model(model):
56
+ if is_openai_reasoning_model(model) and not "o1-mini" in model or "o1-preview" in model:
57
57
  return True
58
58
  else:
59
59
  return False
letta/orm/agent.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import asyncio
2
2
  import uuid
3
+ from datetime import datetime
3
4
  from typing import TYPE_CHECKING, List, Optional, Set
4
5
 
5
- from sqlalchemy import JSON, Boolean, Index, String
6
+ from sqlalchemy import JSON, Boolean, DateTime, Index, Integer, String
6
7
  from sqlalchemy.ext.asyncio import AsyncAttrs
7
8
  from sqlalchemy.orm import Mapped, mapped_column, relationship
8
9
 
@@ -80,6 +81,14 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
80
81
  Boolean, doc="If set to True, memory management will move to a background agent thread."
81
82
  )
82
83
 
84
+ # Run metrics
85
+ last_run_completion: Mapped[Optional[datetime]] = mapped_column(
86
+ DateTime(timezone=True), nullable=True, doc="The timestamp when the agent last completed a run."
87
+ )
88
+ last_run_duration_ms: Mapped[Optional[int]] = mapped_column(
89
+ Integer, nullable=True, doc="The duration in milliseconds of the agent's last run."
90
+ )
91
+
83
92
  # relationships
84
93
  organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
85
94
  tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
@@ -176,6 +185,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
176
185
  "updated_at": self.updated_at,
177
186
  "enable_sleeptime": self.enable_sleeptime,
178
187
  "response_format": self.response_format,
188
+ "last_run_completion": self.last_run_completion,
189
+ "last_run_duration_ms": self.last_run_duration_ms,
179
190
  # optional field defaults
180
191
  "tags": [],
181
192
  "tools": [],
@@ -252,6 +263,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
252
263
  "updated_at": self.updated_at,
253
264
  "enable_sleeptime": self.enable_sleeptime,
254
265
  "response_format": self.response_format,
266
+ "last_run_completion": self.last_run_completion,
267
+ "last_run_duration_ms": self.last_run_duration_ms,
255
268
  }
256
269
  optional_fields = {
257
270
  "tags": [],
letta/orm/job.py CHANGED
@@ -43,6 +43,9 @@ class Job(SqlalchemyBase, UserMixin):
43
43
  callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
44
44
  callback_sent_at: Mapped[Optional[datetime]] = mapped_column(nullable=True, doc="Timestamp when the callback was last attempted.")
45
45
  callback_status_code: Mapped[Optional[int]] = mapped_column(nullable=True, doc="HTTP status code returned by the callback endpoint.")
46
+ callback_error: Mapped[Optional[str]] = mapped_column(
47
+ nullable=True, doc="Optional error message from attempting to POST the callback endpoint."
48
+ )
46
49
 
47
50
  # relationships
48
51
  user: Mapped["User"] = relationship("User", back_populates="jobs")
letta/orm/provider.py CHANGED
@@ -27,8 +27,10 @@ class Provider(SqlalchemyBase, OrganizationMixin):
27
27
  name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
28
28
  provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
29
29
  provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
30
- api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
30
+ api_key: Mapped[str] = mapped_column(nullable=True, doc="API key or secret key used for requests to the provider.")
31
31
  base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")
32
+ access_key: Mapped[str] = mapped_column(nullable=True, doc="Access key used for requests to the provider.")
33
+ region: Mapped[str] = mapped_column(nullable=True, doc="Region used for requests to the provider.")
32
34
 
33
35
  # relationships
34
36
  organization: Mapped["Organization"] = relationship("Organization", back_populates="providers")