letta-nightly 0.8.4.dev20250618104304__py3-none-any.whl → 0.8.5.dev20250619180801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/letta_agent.py +54 -20
- letta/agents/voice_agent.py +47 -31
- letta/constants.py +1 -1
- letta/data_sources/redis_client.py +11 -6
- letta/functions/function_sets/builtin.py +35 -11
- letta/functions/prompts.py +26 -0
- letta/functions/types.py +6 -0
- letta/interfaces/openai_chat_completions_streaming_interface.py +0 -1
- letta/llm_api/anthropic.py +9 -1
- letta/llm_api/anthropic_client.py +22 -3
- letta/llm_api/aws_bedrock.py +10 -6
- letta/llm_api/llm_api_tools.py +3 -0
- letta/llm_api/openai_client.py +1 -1
- letta/orm/agent.py +14 -1
- letta/orm/job.py +3 -0
- letta/orm/provider.py +3 -1
- letta/schemas/agent.py +7 -0
- letta/schemas/embedding_config.py +8 -0
- letta/schemas/enums.py +0 -1
- letta/schemas/job.py +1 -0
- letta/schemas/providers.py +13 -5
- letta/server/rest_api/routers/v1/agents.py +76 -35
- letta/server/rest_api/routers/v1/providers.py +7 -7
- letta/server/rest_api/routers/v1/sources.py +39 -19
- letta/server/rest_api/routers/v1/tools.py +96 -31
- letta/services/agent_manager.py +8 -2
- letta/services/file_processor/chunker/llama_index_chunker.py +89 -1
- letta/services/file_processor/embedder/openai_embedder.py +6 -1
- letta/services/file_processor/parser/mistral_parser.py +2 -2
- letta/services/helpers/agent_manager_helper.py +44 -16
- letta/services/job_manager.py +35 -17
- letta/services/mcp/base_client.py +26 -1
- letta/services/mcp_manager.py +33 -18
- letta/services/provider_manager.py +30 -0
- letta/services/tool_executor/builtin_tool_executor.py +335 -43
- letta/services/tool_manager.py +25 -1
- letta/services/user_manager.py +1 -1
- letta/settings.py +3 -0
- {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/METADATA +4 -3
- {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/RECORD +44 -42
- {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agents/letta_agent.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
3
|
import uuid
|
4
|
+
from datetime import datetime
|
4
5
|
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
|
5
6
|
|
6
7
|
from openai import AsyncStream
|
@@ -13,7 +14,7 @@ from letta.agents.helpers import _create_letta_response, _prepare_in_context_mes
|
|
13
14
|
from letta.constants import DEFAULT_MAX_STEPS
|
14
15
|
from letta.errors import ContextWindowExceededError
|
15
16
|
from letta.helpers import ToolRulesSolver
|
16
|
-
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
|
17
|
+
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
17
18
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
18
19
|
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
|
19
20
|
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
@@ -25,7 +26,7 @@ from letta.orm.enums import ToolType
|
|
25
26
|
from letta.otel.context import get_ctx_attributes
|
26
27
|
from letta.otel.metric_registry import MetricRegistry
|
27
28
|
from letta.otel.tracing import log_event, trace_method, tracer
|
28
|
-
from letta.schemas.agent import AgentState
|
29
|
+
from letta.schemas.agent import AgentState, UpdateAgent
|
29
30
|
from letta.schemas.enums import MessageRole
|
30
31
|
from letta.schemas.letta_message import MessageType
|
31
32
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
@@ -283,8 +284,13 @@ class LettaAgent(BaseAgent):
|
|
283
284
|
# log request time
|
284
285
|
if request_start_timestamp_ns:
|
285
286
|
now = get_utc_timestamp_ns()
|
286
|
-
|
287
|
-
request_span.add_event(name="letta_request_ms", attributes={"duration_ms":
|
287
|
+
duration_ms = ns_to_ms(now - request_start_timestamp_ns)
|
288
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
|
289
|
+
|
290
|
+
# update agent's last run metrics
|
291
|
+
now_datetime = get_utc_time()
|
292
|
+
await self._update_agent_last_run_metrics(now_datetime, duration_ms)
|
293
|
+
|
288
294
|
request_span.end()
|
289
295
|
|
290
296
|
# Return back usage
|
@@ -410,8 +416,13 @@ class LettaAgent(BaseAgent):
|
|
410
416
|
# log request time
|
411
417
|
if request_start_timestamp_ns:
|
412
418
|
now = get_utc_timestamp_ns()
|
413
|
-
|
414
|
-
request_span.add_event(name="request_ms", attributes={"duration_ms":
|
419
|
+
duration_ms = ns_to_ms(now - request_start_timestamp_ns)
|
420
|
+
request_span.add_event(name="request_ms", attributes={"duration_ms": duration_ms})
|
421
|
+
|
422
|
+
# update agent's last run metrics
|
423
|
+
now_datetime = get_utc_time()
|
424
|
+
await self._update_agent_last_run_metrics(now_datetime, duration_ms)
|
425
|
+
|
415
426
|
request_span.end()
|
416
427
|
|
417
428
|
# Extend the in context message ids
|
@@ -426,6 +437,16 @@ class LettaAgent(BaseAgent):
|
|
426
437
|
|
427
438
|
return current_in_context_messages, new_in_context_messages, usage, stop_reason
|
428
439
|
|
440
|
+
async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
|
441
|
+
try:
|
442
|
+
await self.agent_manager.update_agent_async(
|
443
|
+
agent_id=self.agent_id,
|
444
|
+
agent_update=UpdateAgent(last_run_completion=completion_time, last_run_duration_ms=duration_ms),
|
445
|
+
actor=self.actor,
|
446
|
+
)
|
447
|
+
except Exception as e:
|
448
|
+
logger.error(f"Failed to update agent's last run metrics: {e}")
|
449
|
+
|
429
450
|
@trace_method
|
430
451
|
async def step_stream(
|
431
452
|
self,
|
@@ -631,8 +652,13 @@ class LettaAgent(BaseAgent):
|
|
631
652
|
# log time of entire request
|
632
653
|
if request_start_timestamp_ns:
|
633
654
|
now = get_utc_timestamp_ns()
|
634
|
-
|
635
|
-
request_span.add_event(name="letta_request_ms", attributes={"duration_ms":
|
655
|
+
duration_ms = ns_to_ms(now - request_start_timestamp_ns)
|
656
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
|
657
|
+
|
658
|
+
# update agent's last run metrics
|
659
|
+
completion_time = get_utc_time()
|
660
|
+
await self._update_agent_last_run_metrics(completion_time, duration_ms)
|
661
|
+
|
636
662
|
request_span.end()
|
637
663
|
|
638
664
|
for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
|
@@ -913,13 +939,13 @@ class LettaAgent(BaseAgent):
|
|
913
939
|
except AssertionError:
|
914
940
|
tool_args = json.loads(tool_args)
|
915
941
|
|
942
|
+
# Get request heartbeats and coerce to bool
|
943
|
+
request_heartbeat = tool_args.pop("request_heartbeat", False)
|
916
944
|
if is_final_step:
|
917
945
|
stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
|
918
946
|
logger.info("Agent has reached max steps.")
|
919
947
|
request_heartbeat = False
|
920
948
|
else:
|
921
|
-
# Get request heartbeats and coerce to bool
|
922
|
-
request_heartbeat = tool_args.pop("request_heartbeat", False)
|
923
949
|
# Pre-emptively pop out inner_thoughts
|
924
950
|
tool_args.pop(INNER_THOUGHTS_KWARG, "")
|
925
951
|
|
@@ -940,7 +966,10 @@ class LettaAgent(BaseAgent):
|
|
940
966
|
tool_call_id=tool_call_id,
|
941
967
|
request_heartbeat=request_heartbeat,
|
942
968
|
)
|
943
|
-
if
|
969
|
+
# Check if tool rule is violated - if so, we'll force continuation
|
970
|
+
tool_rule_violated = tool_call_name not in valid_tool_names
|
971
|
+
|
972
|
+
if tool_rule_violated:
|
944
973
|
base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
|
945
974
|
violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
|
946
975
|
if violated_rule_messages:
|
@@ -969,7 +998,7 @@ class LettaAgent(BaseAgent):
|
|
969
998
|
|
970
999
|
# get the function response limit
|
971
1000
|
target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
|
972
|
-
return_char_limit = target_tool.return_char_limit
|
1001
|
+
return_char_limit = target_tool.return_char_limit if target_tool else None
|
973
1002
|
function_response_string = validate_function_response(
|
974
1003
|
tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
|
975
1004
|
)
|
@@ -981,15 +1010,20 @@ class LettaAgent(BaseAgent):
|
|
981
1010
|
# 4. Register tool call with tool rule solver
|
982
1011
|
# Resolve whether or not to continue stepping
|
983
1012
|
continue_stepping = request_heartbeat
|
984
|
-
|
985
|
-
if
|
986
|
-
|
987
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
|
988
|
-
continue_stepping = False
|
989
|
-
elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
|
990
|
-
continue_stepping = True
|
991
|
-
elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
|
1013
|
+
|
1014
|
+
# Force continuation if tool rule was violated to give the model another chance
|
1015
|
+
if tool_rule_violated:
|
992
1016
|
continue_stepping = True
|
1017
|
+
else:
|
1018
|
+
tool_rules_solver.register_tool_call(tool_name=tool_call_name)
|
1019
|
+
if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
|
1020
|
+
if continue_stepping:
|
1021
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
|
1022
|
+
continue_stepping = False
|
1023
|
+
elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
|
1024
|
+
continue_stepping = True
|
1025
|
+
elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
|
1026
|
+
continue_stepping = True
|
993
1027
|
|
994
1028
|
# 5a. Persist Steps to DB
|
995
1029
|
# Following agent loop to persist this before messages
|
letta/agents/voice_agent.py
CHANGED
@@ -9,14 +9,9 @@ import openai
|
|
9
9
|
from letta.agents.base_agent import BaseAgent
|
10
10
|
from letta.agents.exceptions import IncompatibleAgentType
|
11
11
|
from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
|
12
|
-
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
12
|
+
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG, REQUEST_HEARTBEAT_PARAM
|
13
13
|
from letta.helpers.datetime_helpers import get_utc_time
|
14
|
-
from letta.helpers.tool_execution_helper import
|
15
|
-
add_pre_execution_message,
|
16
|
-
enable_strict_mode,
|
17
|
-
execute_external_tool,
|
18
|
-
remove_request_heartbeat,
|
19
|
-
)
|
14
|
+
from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
|
20
15
|
from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
|
21
16
|
from letta.log import get_logger
|
22
17
|
from letta.orm.enums import ToolType
|
@@ -47,6 +42,7 @@ from letta.services.message_manager import MessageManager
|
|
47
42
|
from letta.services.passage_manager import PassageManager
|
48
43
|
from letta.services.summarizer.enums import SummarizationMode
|
49
44
|
from letta.services.summarizer.summarizer import Summarizer
|
45
|
+
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
50
46
|
from letta.settings import model_settings
|
51
47
|
|
52
48
|
logger = get_logger(__name__)
|
@@ -124,7 +120,11 @@ class VoiceAgent(BaseAgent):
|
|
124
120
|
|
125
121
|
user_query = input_messages[0].content[0].text
|
126
122
|
|
127
|
-
agent_state = await self.agent_manager.get_agent_by_id_async(
|
123
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(
|
124
|
+
agent_id=self.agent_id,
|
125
|
+
include_relationships=["tools", "memory", "tool_exec_environment_variables", "multi_agent_group"],
|
126
|
+
actor=self.actor,
|
127
|
+
)
|
128
128
|
|
129
129
|
# TODO: Refactor this so it uses our in-house clients
|
130
130
|
# TODO: For now, piggyback off of OpenAI client for ease
|
@@ -332,7 +332,12 @@ class VoiceAgent(BaseAgent):
|
|
332
332
|
|
333
333
|
def _build_tool_schemas(self, agent_state: AgentState, external_tools_only=True) -> List[Tool]:
|
334
334
|
if external_tools_only:
|
335
|
-
tools = [
|
335
|
+
tools = [
|
336
|
+
t
|
337
|
+
for t in agent_state.tools
|
338
|
+
if t.tool_type
|
339
|
+
in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM, ToolType.LETTA_FILES_CORE, ToolType.LETTA_BUILTIN, ToolType.EXTERNAL_MCP}
|
340
|
+
]
|
336
341
|
else:
|
337
342
|
tools = agent_state.tools
|
338
343
|
|
@@ -401,12 +406,10 @@ class VoiceAgent(BaseAgent):
|
|
401
406
|
|
402
407
|
async def _execute_tool(self, user_query: str, tool_name: str, tool_args: dict, agent_state: AgentState) -> "ToolExecutionResult":
|
403
408
|
"""
|
404
|
-
Executes a tool and returns
|
409
|
+
Executes a tool and returns the ToolExecutionResult.
|
405
410
|
"""
|
406
411
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
407
412
|
|
408
|
-
print("EXECUTING TOOL")
|
409
|
-
|
410
413
|
# Special memory case
|
411
414
|
if tool_name == "search_memory":
|
412
415
|
tool_result = await self._search_memory(
|
@@ -420,26 +423,39 @@ class VoiceAgent(BaseAgent):
|
|
420
423
|
func_return=tool_result,
|
421
424
|
status="success",
|
422
425
|
)
|
423
|
-
else:
|
424
|
-
target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
|
425
|
-
if not target_tool:
|
426
|
-
return ToolExecutionResult(
|
427
|
-
func_return=f"Tool not found: {tool_name}",
|
428
|
-
status="error",
|
429
|
-
)
|
430
426
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
427
|
+
# Find the target tool
|
428
|
+
target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
|
429
|
+
if not target_tool:
|
430
|
+
return ToolExecutionResult(
|
431
|
+
func_return=f"Tool {tool_name} not found",
|
432
|
+
status="error",
|
433
|
+
)
|
434
|
+
|
435
|
+
# Use ToolExecutionManager for modern tool execution
|
436
|
+
sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
|
437
|
+
tool_execution_manager = ToolExecutionManager(
|
438
|
+
agent_state=agent_state,
|
439
|
+
message_manager=self.message_manager,
|
440
|
+
agent_manager=self.agent_manager,
|
441
|
+
block_manager=self.block_manager,
|
442
|
+
passage_manager=self.passage_manager,
|
443
|
+
sandbox_env_vars=sandbox_env_vars,
|
444
|
+
actor=self.actor,
|
445
|
+
)
|
446
|
+
|
447
|
+
# Remove request heartbeat / pre_exec_message
|
448
|
+
tool_args.pop(PRE_EXECUTION_MESSAGE_ARG, None)
|
449
|
+
tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
|
450
|
+
|
451
|
+
tool_execution_result = await tool_execution_manager.execute_tool_async(
|
452
|
+
function_name=tool_name,
|
453
|
+
function_args=tool_args,
|
454
|
+
tool=target_tool,
|
455
|
+
step_id=None, # VoiceAgent doesn't use step tracking currently
|
456
|
+
)
|
457
|
+
|
458
|
+
return tool_execution_result
|
443
459
|
|
444
460
|
async def _search_memory(
|
445
461
|
self,
|
letta/constants.py
CHANGED
@@ -290,12 +290,17 @@ async def get_redis_client() -> AsyncRedisClient:
|
|
290
290
|
try:
|
291
291
|
from letta.settings import settings
|
292
292
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
293
|
+
# If Redis settings are not configured, use noop client
|
294
|
+
if settings.redis_host is None or settings.redis_port is None:
|
295
|
+
logger.info("Redis not configured, using noop client")
|
296
|
+
_client_instance = NoopAsyncRedisClient()
|
297
|
+
else:
|
298
|
+
_client_instance = AsyncRedisClient(
|
299
|
+
host=settings.redis_host,
|
300
|
+
port=settings.redis_port,
|
301
|
+
)
|
302
|
+
await _client_instance.wait_for_ready(timeout=5)
|
303
|
+
logger.info("Redis client initialized")
|
299
304
|
except Exception as e:
|
300
305
|
logger.warning(f"Failed to initialize Redis: {e}")
|
301
306
|
_client_instance = NoopAsyncRedisClient()
|
@@ -1,27 +1,51 @@
|
|
1
|
-
from typing import Literal
|
1
|
+
from typing import List, Literal
|
2
2
|
|
3
|
+
from letta.functions.types import SearchTask
|
3
4
|
|
4
|
-
|
5
|
+
|
6
|
+
def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
|
5
7
|
"""
|
6
|
-
|
8
|
+
Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
|
9
|
+
|
7
10
|
Args:
|
8
|
-
|
11
|
+
code (str): The code to run.
|
12
|
+
language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
|
9
13
|
Returns:
|
10
|
-
str: The
|
14
|
+
str: The output of the code, the stdout, the stderr, and error traces (if any).
|
11
15
|
"""
|
12
16
|
|
13
17
|
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
|
14
18
|
|
15
19
|
|
16
|
-
def
|
20
|
+
async def web_search(
|
21
|
+
tasks: List[SearchTask],
|
22
|
+
limit: int = 3,
|
23
|
+
return_raw: bool = False,
|
24
|
+
) -> str:
|
17
25
|
"""
|
18
|
-
|
26
|
+
Search the web with a list of query/question pairs and extract passages that answer the corresponding questions.
|
27
|
+
|
28
|
+
Examples:
|
29
|
+
tasks -> [
|
30
|
+
SearchTask(
|
31
|
+
query="Tesla Q1 2025 earnings report PDF",
|
32
|
+
question="What was Tesla's net profit in Q1 2025?"
|
33
|
+
),
|
34
|
+
SearchTask(
|
35
|
+
query="Letta API prebuilt tools core_memory_append",
|
36
|
+
question="What does the core_memory_append tool do in Letta?"
|
37
|
+
)
|
38
|
+
]
|
19
39
|
|
20
40
|
Args:
|
21
|
-
|
22
|
-
|
41
|
+
tasks (List[SearchTask]): A list of search tasks, each containing a `query` and a corresponding `question`.
|
42
|
+
limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 3.
|
43
|
+
return_raw (bool, optional): If set to True, returns the raw content of the web pages.
|
44
|
+
This should be False unless otherwise specified by the user. Defaults to False.
|
45
|
+
|
23
46
|
Returns:
|
24
|
-
str:
|
47
|
+
str: A JSON-encoded string containing a list of search results.
|
48
|
+
Each result includes ranked snippets with their source URLs and relevance scores,
|
49
|
+
corresponding to each search task.
|
25
50
|
"""
|
26
|
-
|
27
51
|
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
|
@@ -0,0 +1,26 @@
|
|
1
|
+
FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert at extracting relevant information from web content.
|
2
|
+
|
3
|
+
Given a document with line numbers (format: "LINE_NUM: content"), identify passages that answer the provided question by returning line ranges:
|
4
|
+
- start_line: The starting line number (inclusive)
|
5
|
+
- end_line: The ending line number (inclusive)
|
6
|
+
|
7
|
+
SELECTION PRINCIPLES:
|
8
|
+
1. Prefer comprehensive passages that include full context
|
9
|
+
2. Capture complete thoughts, examples, and explanations
|
10
|
+
3. When relevant content spans multiple paragraphs, include the entire section
|
11
|
+
4. Favor fewer, substantial passages over many fragments
|
12
|
+
|
13
|
+
Focus on passages that can stand alone as complete, meaningful responses."""
|
14
|
+
|
15
|
+
|
16
|
+
def get_firecrawl_search_user_prompt(query: str, question: str, numbered_content: str) -> str:
|
17
|
+
"""Generate the user prompt for line-number based search analysis."""
|
18
|
+
return f"""Search Query: {query}
|
19
|
+
Question to Answer: {question}
|
20
|
+
|
21
|
+
Document Content (with line numbers):
|
22
|
+
{numbered_content}
|
23
|
+
|
24
|
+
Identify line ranges that best answer: "{question}"
|
25
|
+
|
26
|
+
Select comprehensive passages with full context. Include entire sections when relevant."""
|
letta/functions/types.py
ADDED
@@ -16,7 +16,6 @@ class OpenAIChatCompletionsStreamingInterface:
|
|
16
16
|
"""
|
17
17
|
|
18
18
|
def __init__(self, stream_pre_execution_message: bool = True):
|
19
|
-
print("CHAT COMPLETITION INTERFACE")
|
20
19
|
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
21
20
|
self.stream_pre_execution_message: bool = stream_pre_execution_message
|
22
21
|
|
letta/llm_api/anthropic.py
CHANGED
@@ -823,12 +823,20 @@ def anthropic_chat_completions_request(
|
|
823
823
|
def anthropic_bedrock_chat_completions_request(
|
824
824
|
data: ChatCompletionRequest,
|
825
825
|
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
826
|
+
provider_name: Optional[str] = None,
|
827
|
+
provider_category: Optional[ProviderCategory] = None,
|
828
|
+
user_id: Optional[str] = None,
|
826
829
|
) -> ChatCompletionResponse:
|
827
830
|
"""Make a chat completion request to Anthropic via AWS Bedrock."""
|
828
831
|
data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
|
829
832
|
|
830
833
|
# Get the client
|
831
|
-
|
834
|
+
if provider_category == ProviderCategory.byok:
|
835
|
+
actor = UserManager().get_user_or_default(user_id=user_id)
|
836
|
+
access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
|
837
|
+
client = get_bedrock_client(access_key, secret_key, region)
|
838
|
+
else:
|
839
|
+
client = get_bedrock_client()
|
832
840
|
|
833
841
|
# Make the request
|
834
842
|
try:
|
@@ -243,7 +243,8 @@ class AnthropicClient(LLMClientBase):
|
|
243
243
|
# Move 'system' to the top level
|
244
244
|
if messages[0].role != "system":
|
245
245
|
raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
|
246
|
-
|
246
|
+
system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
|
247
|
+
data["system"] = self._add_cache_control_to_system_message(system_content)
|
247
248
|
data["messages"] = [
|
248
249
|
m.to_anthropic_dict(
|
249
250
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
@@ -315,9 +316,11 @@ class AnthropicClient(LLMClientBase):
|
|
315
316
|
|
316
317
|
if isinstance(e, anthropic.BadRequestError):
|
317
318
|
logger.warning(f"[Anthropic] Bad request: {str(e)}")
|
318
|
-
|
319
|
-
|
319
|
+
error_str = str(e).lower()
|
320
|
+
if "prompt is too long" in error_str or "exceed context limit" in error_str:
|
321
|
+
# If the context window is too large, we expect to receive either:
|
320
322
|
# 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 200758 tokens > 200000 maximum'}}
|
323
|
+
# 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'input length and `max_tokens` exceed context limit: 173298 + 32000 > 200000, decrease input length or `max_tokens` and try again'}}
|
321
324
|
return ContextWindowExceededError(
|
322
325
|
message=f"Bad request to Anthropic (context window exceeded): {str(e)}",
|
323
326
|
)
|
@@ -490,6 +493,22 @@ class AnthropicClient(LLMClientBase):
|
|
490
493
|
|
491
494
|
return chat_completion_response
|
492
495
|
|
496
|
+
def _add_cache_control_to_system_message(self, system_content):
|
497
|
+
"""Add cache control to system message content"""
|
498
|
+
if isinstance(system_content, str):
|
499
|
+
# For string content, convert to list format with cache control
|
500
|
+
return [{"type": "text", "text": system_content, "cache_control": {"type": "ephemeral"}}]
|
501
|
+
elif isinstance(system_content, list):
|
502
|
+
# For list content, add cache control to the last text block
|
503
|
+
cached_content = system_content.copy()
|
504
|
+
for i in range(len(cached_content) - 1, -1, -1):
|
505
|
+
if cached_content[i].get("type") == "text":
|
506
|
+
cached_content[i]["cache_control"] = {"type": "ephemeral"}
|
507
|
+
break
|
508
|
+
return cached_content
|
509
|
+
|
510
|
+
return system_content
|
511
|
+
|
493
512
|
|
494
513
|
def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
|
495
514
|
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
letta/llm_api/aws_bedrock.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
import os
|
2
|
-
from typing import Any, Dict, List
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
3
|
|
4
4
|
from anthropic import AnthropicBedrock
|
5
5
|
|
@@ -19,7 +19,11 @@ def has_valid_aws_credentials() -> bool:
|
|
19
19
|
return valid_aws_credentials
|
20
20
|
|
21
21
|
|
22
|
-
def get_bedrock_client(
|
22
|
+
def get_bedrock_client(
|
23
|
+
access_key: Optional[str] = None,
|
24
|
+
secret_key: Optional[str] = None,
|
25
|
+
region: Optional[str] = None,
|
26
|
+
):
|
23
27
|
"""
|
24
28
|
Get a Bedrock client
|
25
29
|
"""
|
@@ -28,9 +32,9 @@ def get_bedrock_client():
|
|
28
32
|
logger.debug(f"Getting Bedrock client for {model_settings.aws_region}")
|
29
33
|
sts_client = boto3.client(
|
30
34
|
"sts",
|
31
|
-
aws_access_key_id=model_settings.aws_access_key,
|
32
|
-
aws_secret_access_key=model_settings.aws_secret_access_key,
|
33
|
-
region_name=model_settings.aws_region,
|
35
|
+
aws_access_key_id=access_key or model_settings.aws_access_key,
|
36
|
+
aws_secret_access_key=secret_key or model_settings.aws_secret_access_key,
|
37
|
+
region_name=region or model_settings.aws_region,
|
34
38
|
)
|
35
39
|
credentials = sts_client.get_session_token()["Credentials"]
|
36
40
|
|
@@ -38,7 +42,7 @@ def get_bedrock_client():
|
|
38
42
|
aws_access_key=credentials["AccessKeyId"],
|
39
43
|
aws_secret_key=credentials["SecretAccessKey"],
|
40
44
|
aws_session_token=credentials["SessionToken"],
|
41
|
-
aws_region=model_settings.aws_region,
|
45
|
+
aws_region=region or model_settings.aws_region,
|
42
46
|
)
|
43
47
|
return bedrock
|
44
48
|
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -569,6 +569,9 @@ def create(
|
|
569
569
|
# NOTE: max_tokens is required for Anthropic API
|
570
570
|
max_tokens=llm_config.max_tokens,
|
571
571
|
),
|
572
|
+
provider_name=llm_config.provider_name,
|
573
|
+
provider_category=llm_config.provider_category,
|
574
|
+
user_id=user_id,
|
572
575
|
)
|
573
576
|
|
574
577
|
elif llm_config.model_endpoint_type == "deepseek":
|
letta/llm_api/openai_client.py
CHANGED
@@ -53,7 +53,7 @@ def accepts_developer_role(model: str) -> bool:
|
|
53
53
|
|
54
54
|
See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
|
55
55
|
"""
|
56
|
-
if is_openai_reasoning_model(model):
|
56
|
+
if is_openai_reasoning_model(model) and not "o1-mini" in model or "o1-preview" in model:
|
57
57
|
return True
|
58
58
|
else:
|
59
59
|
return False
|
letta/orm/agent.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
import asyncio
|
2
2
|
import uuid
|
3
|
+
from datetime import datetime
|
3
4
|
from typing import TYPE_CHECKING, List, Optional, Set
|
4
5
|
|
5
|
-
from sqlalchemy import JSON, Boolean, Index, String
|
6
|
+
from sqlalchemy import JSON, Boolean, DateTime, Index, Integer, String
|
6
7
|
from sqlalchemy.ext.asyncio import AsyncAttrs
|
7
8
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
8
9
|
|
@@ -80,6 +81,14 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
80
81
|
Boolean, doc="If set to True, memory management will move to a background agent thread."
|
81
82
|
)
|
82
83
|
|
84
|
+
# Run metrics
|
85
|
+
last_run_completion: Mapped[Optional[datetime]] = mapped_column(
|
86
|
+
DateTime(timezone=True), nullable=True, doc="The timestamp when the agent last completed a run."
|
87
|
+
)
|
88
|
+
last_run_duration_ms: Mapped[Optional[int]] = mapped_column(
|
89
|
+
Integer, nullable=True, doc="The duration in milliseconds of the agent's last run."
|
90
|
+
)
|
91
|
+
|
83
92
|
# relationships
|
84
93
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
|
85
94
|
tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
|
@@ -176,6 +185,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
176
185
|
"updated_at": self.updated_at,
|
177
186
|
"enable_sleeptime": self.enable_sleeptime,
|
178
187
|
"response_format": self.response_format,
|
188
|
+
"last_run_completion": self.last_run_completion,
|
189
|
+
"last_run_duration_ms": self.last_run_duration_ms,
|
179
190
|
# optional field defaults
|
180
191
|
"tags": [],
|
181
192
|
"tools": [],
|
@@ -252,6 +263,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
252
263
|
"updated_at": self.updated_at,
|
253
264
|
"enable_sleeptime": self.enable_sleeptime,
|
254
265
|
"response_format": self.response_format,
|
266
|
+
"last_run_completion": self.last_run_completion,
|
267
|
+
"last_run_duration_ms": self.last_run_duration_ms,
|
255
268
|
}
|
256
269
|
optional_fields = {
|
257
270
|
"tags": [],
|
letta/orm/job.py
CHANGED
@@ -43,6 +43,9 @@ class Job(SqlalchemyBase, UserMixin):
|
|
43
43
|
callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
|
44
44
|
callback_sent_at: Mapped[Optional[datetime]] = mapped_column(nullable=True, doc="Timestamp when the callback was last attempted.")
|
45
45
|
callback_status_code: Mapped[Optional[int]] = mapped_column(nullable=True, doc="HTTP status code returned by the callback endpoint.")
|
46
|
+
callback_error: Mapped[Optional[str]] = mapped_column(
|
47
|
+
nullable=True, doc="Optional error message from attempting to POST the callback endpoint."
|
48
|
+
)
|
46
49
|
|
47
50
|
# relationships
|
48
51
|
user: Mapped["User"] = relationship("User", back_populates="jobs")
|
letta/orm/provider.py
CHANGED
@@ -27,8 +27,10 @@ class Provider(SqlalchemyBase, OrganizationMixin):
|
|
27
27
|
name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
|
28
28
|
provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
|
29
29
|
provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
|
30
|
-
api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
|
30
|
+
api_key: Mapped[str] = mapped_column(nullable=True, doc="API key or secret key used for requests to the provider.")
|
31
31
|
base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")
|
32
|
+
access_key: Mapped[str] = mapped_column(nullable=True, doc="Access key used for requests to the provider.")
|
33
|
+
region: Mapped[str] = mapped_column(nullable=True, doc="Region used for requests to the provider.")
|
32
34
|
|
33
35
|
# relationships
|
34
36
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="providers")
|