letta-nightly 0.11.2.dev20250810104230__py3-none-any.whl → 0.11.3.dev20250812002120__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/letta_agent.py +22 -10
- letta/constants.py +7 -0
- letta/functions/function_sets/base.py +1 -1
- letta/helpers/converters.py +19 -0
- letta/helpers/json_helpers.py +1 -1
- letta/helpers/tool_rule_solver.py +48 -96
- letta/interfaces/openai_streaming_interface.py +9 -0
- letta/llm_api/anthropic_client.py +9 -2
- letta/llm_api/google_vertex_client.py +17 -4
- letta/llm_api/llm_client_base.py +4 -0
- letta/llm_api/openai_client.py +4 -1
- letta/log.py +3 -1
- letta/schemas/enums.py +4 -3
- letta/schemas/llm_config.py +35 -25
- letta/schemas/response_format.py +5 -6
- letta/schemas/tool_rule.py +8 -1
- letta/services/agent_manager.py +2 -3
- letta/services/mcp/base_client.py +6 -2
- letta/services/mcp_manager.py +11 -5
- letta/services/tool_executor/tool_execution_sandbox.py +8 -4
- letta/services/tool_manager.py +66 -42
- letta/services/tool_sandbox/e2b_sandbox.py +4 -2
- letta/services/tool_sandbox/modal_sandbox.py +4 -4
- letta/settings.py +2 -1
- {letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/RECORD +30 -30
- {letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agents/letta_agent.py
CHANGED
@@ -220,6 +220,7 @@ class LettaAgent(BaseAgent):
|
|
220
220
|
actor=self.actor,
|
221
221
|
)
|
222
222
|
stop_reason = None
|
223
|
+
job_update_metadata = None
|
223
224
|
usage = LettaUsageStatistics()
|
224
225
|
|
225
226
|
# span for request
|
@@ -367,6 +368,7 @@ class LettaAgent(BaseAgent):
|
|
367
368
|
except Exception as e:
|
368
369
|
# Handle any unexpected errors during step processing
|
369
370
|
self.logger.error(f"Error during step processing: {e}")
|
371
|
+
job_update_metadata = {"error": str(e)}
|
370
372
|
|
371
373
|
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
372
374
|
if not stop_reason:
|
@@ -429,7 +431,7 @@ class LettaAgent(BaseAgent):
|
|
429
431
|
self.logger.error("Invalid StepProgression value")
|
430
432
|
|
431
433
|
if settings.track_stop_reason:
|
432
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
434
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
433
435
|
|
434
436
|
except Exception as e:
|
435
437
|
self.logger.error("Failed to update step: %s", e)
|
@@ -447,7 +449,7 @@ class LettaAgent(BaseAgent):
|
|
447
449
|
force=False,
|
448
450
|
)
|
449
451
|
|
450
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
452
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
|
451
453
|
|
452
454
|
# Return back usage
|
453
455
|
for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
|
@@ -485,6 +487,7 @@ class LettaAgent(BaseAgent):
|
|
485
487
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
486
488
|
|
487
489
|
stop_reason = None
|
490
|
+
job_update_metadata = None
|
488
491
|
usage = LettaUsageStatistics()
|
489
492
|
for i in range(max_steps):
|
490
493
|
# If dry run, build request data and return it without making LLM call
|
@@ -622,6 +625,7 @@ class LettaAgent(BaseAgent):
|
|
622
625
|
except Exception as e:
|
623
626
|
# Handle any unexpected errors during step processing
|
624
627
|
self.logger.error(f"Error during step processing: {e}")
|
628
|
+
job_update_metadata = {"error": str(e)}
|
625
629
|
|
626
630
|
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
627
631
|
if not stop_reason:
|
@@ -680,7 +684,7 @@ class LettaAgent(BaseAgent):
|
|
680
684
|
self.logger.error("Invalid StepProgression value")
|
681
685
|
|
682
686
|
if settings.track_stop_reason:
|
683
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
687
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
684
688
|
|
685
689
|
except Exception as e:
|
686
690
|
self.logger.error("Failed to update step: %s", e)
|
@@ -698,7 +702,7 @@ class LettaAgent(BaseAgent):
|
|
698
702
|
force=False,
|
699
703
|
)
|
700
704
|
|
701
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
705
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
|
702
706
|
|
703
707
|
return current_in_context_messages, new_in_context_messages, stop_reason, usage
|
704
708
|
|
@@ -748,6 +752,7 @@ class LettaAgent(BaseAgent):
|
|
748
752
|
actor=self.actor,
|
749
753
|
)
|
750
754
|
stop_reason = None
|
755
|
+
job_update_metadata = None
|
751
756
|
usage = LettaUsageStatistics()
|
752
757
|
first_chunk, request_span = True, None
|
753
758
|
if request_start_timestamp_ns:
|
@@ -977,6 +982,7 @@ class LettaAgent(BaseAgent):
|
|
977
982
|
except Exception as e:
|
978
983
|
# Handle any unexpected errors during step processing
|
979
984
|
self.logger.error(f"Error during step processing: {e}")
|
985
|
+
job_update_metadata = {"error": str(e)}
|
980
986
|
|
981
987
|
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
982
988
|
if not stop_reason:
|
@@ -1039,7 +1045,7 @@ class LettaAgent(BaseAgent):
|
|
1039
1045
|
|
1040
1046
|
# Do tracking for failure cases. Can consolidate with success conditions later.
|
1041
1047
|
if settings.track_stop_reason:
|
1042
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
1048
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
1043
1049
|
|
1044
1050
|
except Exception as e:
|
1045
1051
|
self.logger.error("Failed to update step: %s", e)
|
@@ -1056,20 +1062,28 @@ class LettaAgent(BaseAgent):
|
|
1056
1062
|
force=False,
|
1057
1063
|
)
|
1058
1064
|
|
1059
|
-
await self._log_request(request_start_timestamp_ns, request_span)
|
1065
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
|
1060
1066
|
|
1061
1067
|
for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
|
1062
1068
|
yield f"data: {finish_chunk}\n\n"
|
1063
1069
|
|
1064
|
-
async def _log_request(
|
1070
|
+
async def _log_request(
|
1071
|
+
self, request_start_timestamp_ns: int, request_span: "Span | None", job_update_metadata: dict | None, is_error: bool
|
1072
|
+
):
|
1065
1073
|
if request_start_timestamp_ns:
|
1066
1074
|
now_ns, now = get_utc_timestamp_ns(), get_utc_time()
|
1067
1075
|
duration_ns = now_ns - request_start_timestamp_ns
|
1068
1076
|
if request_span:
|
1069
1077
|
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
|
1070
1078
|
await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
|
1071
|
-
if self.current_run_id:
|
1079
|
+
if settings.track_agent_run and self.current_run_id:
|
1072
1080
|
await self.job_manager.record_response_duration(self.current_run_id, duration_ns, self.actor)
|
1081
|
+
await self.job_manager.safe_update_job_status_async(
|
1082
|
+
job_id=self.current_run_id,
|
1083
|
+
new_status=JobStatus.failed if is_error else JobStatus.completed,
|
1084
|
+
actor=self.actor,
|
1085
|
+
metadata=job_update_metadata,
|
1086
|
+
)
|
1073
1087
|
if request_span:
|
1074
1088
|
request_span.end()
|
1075
1089
|
|
@@ -1507,8 +1521,6 @@ class LettaAgent(BaseAgent):
|
|
1507
1521
|
status="error",
|
1508
1522
|
)
|
1509
1523
|
|
1510
|
-
print(target_tool)
|
1511
|
-
|
1512
1524
|
# TODO: This temp. Move this logic and code to executors
|
1513
1525
|
|
1514
1526
|
if agent_step_span:
|
letta/constants.py
CHANGED
@@ -208,6 +208,13 @@ LLM_MAX_TOKENS = {
|
|
208
208
|
"deepseek-chat": 64000,
|
209
209
|
"deepseek-reasoner": 64000,
|
210
210
|
## OpenAI models: https://platform.openai.com/docs/models/overview
|
211
|
+
# gpt-5
|
212
|
+
"gpt-5": 400000,
|
213
|
+
"gpt-5-2025-08-07": 400000,
|
214
|
+
"gpt-5-mini": 400000,
|
215
|
+
"gpt-5-mini-2025-08-07": 400000,
|
216
|
+
"gpt-5-nano": 400000,
|
217
|
+
"gpt-5-nano-2025-08-07": 400000,
|
211
218
|
# reasoners
|
212
219
|
"o1": 200000,
|
213
220
|
# "o1-pro": 200000, # responses API only
|
@@ -264,7 +264,7 @@ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_li
|
|
264
264
|
|
265
265
|
Args:
|
266
266
|
label (str): Section of the memory to be edited, identified by its label.
|
267
|
-
new_str (str): The text to insert.
|
267
|
+
new_str (str): The text to insert. Do not include line number prefixes.
|
268
268
|
insert_line (int): The line number after which to insert the text (0 for beginning of file). Defaults to -1 (end of the file).
|
269
269
|
|
270
270
|
Returns:
|
letta/helpers/converters.py
CHANGED
@@ -395,6 +395,24 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
|
|
395
395
|
if not data:
|
396
396
|
return None
|
397
397
|
|
398
|
+
if solver_data := data.get("tool_rules_solver"):
|
399
|
+
# Get existing tool_rules or reconstruct from categorized fields for backwards compatibility
|
400
|
+
tool_rules_data = solver_data.get("tool_rules", [])
|
401
|
+
|
402
|
+
if not tool_rules_data:
|
403
|
+
for field_name in (
|
404
|
+
"init_tool_rules",
|
405
|
+
"continue_tool_rules",
|
406
|
+
"child_based_tool_rules",
|
407
|
+
"parent_tool_rules",
|
408
|
+
"terminal_tool_rules",
|
409
|
+
"required_before_exit_tool_rules",
|
410
|
+
):
|
411
|
+
if field_data := solver_data.get(field_name):
|
412
|
+
tool_rules_data.extend(field_data)
|
413
|
+
|
414
|
+
solver_data["tool_rules"] = deserialize_tool_rules(tool_rules_data)
|
415
|
+
|
398
416
|
return AgentStepState(**data)
|
399
417
|
|
400
418
|
|
@@ -418,6 +436,7 @@ def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormat
|
|
418
436
|
return JsonSchemaResponseFormat(**data)
|
419
437
|
if data["type"] == ResponseFormatType.json_object:
|
420
438
|
return JsonObjectResponseFormat(**data)
|
439
|
+
raise ValueError(f"Unknown Response Format type: {data['type']}")
|
421
440
|
|
422
441
|
|
423
442
|
# --------------------------
|
letta/helpers/json_helpers.py
CHANGED
@@ -15,7 +15,7 @@ def json_dumps(data, indent=2) -> str:
|
|
15
15
|
try:
|
16
16
|
return obj.decode("utf-8")
|
17
17
|
except Exception:
|
18
|
-
|
18
|
+
# TODO: this is to handle Gemini thought signatures, b64 decode this back to bytes when sending back to Gemini
|
19
19
|
return base64.b64encode(obj).decode("utf-8")
|
20
20
|
raise TypeError(f"Type {type(obj)} not serializable")
|
21
21
|
|
@@ -1,11 +1,9 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import TypeAlias
|
2
2
|
|
3
3
|
from pydantic import BaseModel, Field
|
4
4
|
|
5
5
|
from letta.schemas.block import Block
|
6
|
-
from letta.schemas.enums import ToolRuleType
|
7
6
|
from letta.schemas.tool_rule import (
|
8
|
-
BaseToolRule,
|
9
7
|
ChildToolRule,
|
10
8
|
ConditionalToolRule,
|
11
9
|
ContinueToolRule,
|
@@ -14,88 +12,61 @@ from letta.schemas.tool_rule import (
|
|
14
12
|
ParentToolRule,
|
15
13
|
RequiredBeforeExitToolRule,
|
16
14
|
TerminalToolRule,
|
15
|
+
ToolRule,
|
17
16
|
)
|
18
17
|
|
18
|
+
ToolName: TypeAlias = str
|
19
19
|
|
20
|
-
|
21
|
-
"""Custom exception for tool rule validation errors in ToolRulesSolver."""
|
22
|
-
|
23
|
-
def __init__(self, message: str):
|
24
|
-
super().__init__(f"ToolRuleValidationError: {message}")
|
20
|
+
COMPILED_PROMPT_DESCRIPTION = "The following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls."
|
25
21
|
|
26
22
|
|
27
23
|
class ToolRulesSolver(BaseModel):
|
28
|
-
|
29
|
-
|
24
|
+
tool_rules: list[ToolRule] | None = Field(default=None, description="Input list of tool rules")
|
25
|
+
|
26
|
+
# Categorized fields
|
27
|
+
init_tool_rules: list[InitToolRule] = Field(
|
28
|
+
default_factory=list, description="Initial tool rules to be used at the start of tool execution.", exclude=True
|
30
29
|
)
|
31
|
-
continue_tool_rules:
|
32
|
-
default_factory=list, description="Continue tool rules to be used to continue tool execution."
|
30
|
+
continue_tool_rules: list[ContinueToolRule] = Field(
|
31
|
+
default_factory=list, description="Continue tool rules to be used to continue tool execution.", exclude=True
|
33
32
|
)
|
34
33
|
# TODO: This should be renamed?
|
35
34
|
# TODO: These are tools that control the set of allowed functions in the next turn
|
36
|
-
child_based_tool_rules:
|
37
|
-
default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions."
|
35
|
+
child_based_tool_rules: list[ChildToolRule | ConditionalToolRule | MaxCountPerStepToolRule] = Field(
|
36
|
+
default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions.", exclude=True
|
38
37
|
)
|
39
|
-
parent_tool_rules:
|
40
|
-
default_factory=list, description="Filter tool rules to be used to filter out tools from the available set."
|
38
|
+
parent_tool_rules: list[ParentToolRule] = Field(
|
39
|
+
default_factory=list, description="Filter tool rules to be used to filter out tools from the available set.", exclude=True
|
41
40
|
)
|
42
|
-
terminal_tool_rules:
|
43
|
-
default_factory=list, description="Terminal tool rules that end the agent loop if called."
|
41
|
+
terminal_tool_rules: list[TerminalToolRule] = Field(
|
42
|
+
default_factory=list, description="Terminal tool rules that end the agent loop if called.", exclude=True
|
44
43
|
)
|
45
|
-
required_before_exit_tool_rules:
|
46
|
-
default_factory=list, description="Tool rules that must be called before the agent can exit."
|
44
|
+
required_before_exit_tool_rules: list[RequiredBeforeExitToolRule] = Field(
|
45
|
+
default_factory=list, description="Tool rules that must be called before the agent can exit.", exclude=True
|
47
46
|
)
|
48
|
-
tool_call_history:
|
49
|
-
|
50
|
-
def __init__(
|
51
|
-
self,
|
52
|
-
tool_rules: Optional[List[BaseToolRule]] = None,
|
53
|
-
init_tool_rules: Optional[List[InitToolRule]] = None,
|
54
|
-
continue_tool_rules: Optional[List[ContinueToolRule]] = None,
|
55
|
-
child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
|
56
|
-
parent_tool_rules: Optional[List[ParentToolRule]] = None,
|
57
|
-
terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
|
58
|
-
required_before_exit_tool_rules: Optional[List[RequiredBeforeExitToolRule]] = None,
|
59
|
-
tool_call_history: Optional[List[str]] = None,
|
60
|
-
**kwargs,
|
61
|
-
):
|
62
|
-
super().__init__(
|
63
|
-
init_tool_rules=init_tool_rules or [],
|
64
|
-
continue_tool_rules=continue_tool_rules or [],
|
65
|
-
child_based_tool_rules=child_based_tool_rules or [],
|
66
|
-
parent_tool_rules=parent_tool_rules or [],
|
67
|
-
terminal_tool_rules=terminal_tool_rules or [],
|
68
|
-
required_before_exit_tool_rules=required_before_exit_tool_rules or [],
|
69
|
-
tool_call_history=tool_call_history or [],
|
70
|
-
**kwargs,
|
71
|
-
)
|
47
|
+
tool_call_history: list[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
|
72
48
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
49
|
+
def __init__(self, tool_rules: list[ToolRule] | None = None, **kwargs):
|
50
|
+
super().__init__(tool_rules=tool_rules, **kwargs)
|
51
|
+
|
52
|
+
def model_post_init(self, __context):
|
53
|
+
if self.tool_rules:
|
54
|
+
for rule in self.tool_rules:
|
55
|
+
if isinstance(rule, InitToolRule):
|
77
56
|
self.init_tool_rules.append(rule)
|
78
|
-
elif rule
|
79
|
-
assert isinstance(rule, ChildToolRule)
|
57
|
+
elif isinstance(rule, ChildToolRule):
|
80
58
|
self.child_based_tool_rules.append(rule)
|
81
|
-
elif rule
|
82
|
-
assert isinstance(rule, ConditionalToolRule)
|
83
|
-
self.validate_conditional_tool(rule)
|
59
|
+
elif isinstance(rule, ConditionalToolRule):
|
84
60
|
self.child_based_tool_rules.append(rule)
|
85
|
-
elif rule
|
86
|
-
assert isinstance(rule, TerminalToolRule)
|
61
|
+
elif isinstance(rule, TerminalToolRule):
|
87
62
|
self.terminal_tool_rules.append(rule)
|
88
|
-
elif rule
|
89
|
-
assert isinstance(rule, ContinueToolRule)
|
63
|
+
elif isinstance(rule, ContinueToolRule):
|
90
64
|
self.continue_tool_rules.append(rule)
|
91
|
-
elif rule
|
92
|
-
assert isinstance(rule, MaxCountPerStepToolRule)
|
65
|
+
elif isinstance(rule, MaxCountPerStepToolRule):
|
93
66
|
self.child_based_tool_rules.append(rule)
|
94
|
-
elif rule
|
95
|
-
assert isinstance(rule, ParentToolRule)
|
67
|
+
elif isinstance(rule, ParentToolRule):
|
96
68
|
self.parent_tool_rules.append(rule)
|
97
|
-
elif rule
|
98
|
-
assert isinstance(rule, RequiredBeforeExitToolRule)
|
69
|
+
elif isinstance(rule, RequiredBeforeExitToolRule):
|
99
70
|
self.required_before_exit_tool_rules.append(rule)
|
100
71
|
|
101
72
|
def register_tool_call(self, tool_name: str):
|
@@ -107,12 +78,12 @@ class ToolRulesSolver(BaseModel):
|
|
107
78
|
self.tool_call_history.clear()
|
108
79
|
|
109
80
|
def get_allowed_tool_names(
|
110
|
-
self, available_tools: set[
|
111
|
-
) ->
|
81
|
+
self, available_tools: set[ToolName], error_on_empty: bool = True, last_function_response: str | None = None
|
82
|
+
) -> list[ToolName]:
|
112
83
|
"""Get a list of tool names allowed based on the last tool called.
|
113
84
|
|
114
85
|
The logic is as follows:
|
115
|
-
1. if there are no previous tool calls and we have InitToolRules, those are the only options for the first tool call
|
86
|
+
1. if there are no previous tool calls, and we have InitToolRules, those are the only options for the first tool call
|
116
87
|
2. else we take the intersection of the Parent/Child/Conditional/MaxSteps as the options
|
117
88
|
3. Continue/Terminal/RequiredBeforeExit rules are applied in the agent loop flow, not to restrict tools
|
118
89
|
"""
|
@@ -134,23 +105,23 @@ class ToolRulesSolver(BaseModel):
|
|
134
105
|
|
135
106
|
return list(final_allowed_tools)
|
136
107
|
|
137
|
-
def is_terminal_tool(self, tool_name:
|
108
|
+
def is_terminal_tool(self, tool_name: ToolName) -> bool:
|
138
109
|
"""Check if the tool is defined as a terminal tool in the terminal tool rules or required-before-exit tool rules."""
|
139
110
|
return any(rule.tool_name == tool_name for rule in self.terminal_tool_rules)
|
140
111
|
|
141
|
-
def has_children_tools(self, tool_name):
|
112
|
+
def has_children_tools(self, tool_name: ToolName):
|
142
113
|
"""Check if the tool has children tools"""
|
143
114
|
return any(rule.tool_name == tool_name for rule in self.child_based_tool_rules)
|
144
115
|
|
145
|
-
def is_continue_tool(self, tool_name):
|
116
|
+
def is_continue_tool(self, tool_name: ToolName):
|
146
117
|
"""Check if the tool is defined as a continue tool in the tool rules."""
|
147
118
|
return any(rule.tool_name == tool_name for rule in self.continue_tool_rules)
|
148
119
|
|
149
|
-
def has_required_tools_been_called(self, available_tools: set[
|
120
|
+
def has_required_tools_been_called(self, available_tools: set[ToolName]) -> bool:
|
150
121
|
"""Check if all required-before-exit tools have been called."""
|
151
122
|
return len(self.get_uncalled_required_tools(available_tools=available_tools)) == 0
|
152
123
|
|
153
|
-
def get_uncalled_required_tools(self, available_tools: set[
|
124
|
+
def get_uncalled_required_tools(self, available_tools: set[ToolName]) -> list[str]:
|
154
125
|
"""Get the list of required-before-exit tools that have not been called yet."""
|
155
126
|
if not self.required_before_exit_tool_rules:
|
156
127
|
return [] # No required tools means no uncalled tools
|
@@ -161,16 +132,12 @@ class ToolRulesSolver(BaseModel):
|
|
161
132
|
# Get required tools that are uncalled AND available
|
162
133
|
return list((required_tool_names & available_tools) - called_tool_names)
|
163
134
|
|
164
|
-
def
|
165
|
-
"""Get the names of tools that are required before exit."""
|
166
|
-
return [rule.tool_name for rule in self.required_before_exit_tool_rules]
|
167
|
-
|
168
|
-
def compile_tool_rule_prompts(self) -> Optional[Block]:
|
135
|
+
def compile_tool_rule_prompts(self) -> Block | None:
|
169
136
|
"""
|
170
137
|
Compile prompt templates from all tool rules into an ephemeral Block.
|
171
138
|
|
172
139
|
Returns:
|
173
|
-
|
140
|
+
Block | None: Compiled prompt block with tool rule constraints, or None if no templates exist.
|
174
141
|
"""
|
175
142
|
compiled_prompts = []
|
176
143
|
|
@@ -191,20 +158,20 @@ class ToolRulesSolver(BaseModel):
|
|
191
158
|
return Block(
|
192
159
|
label="tool_usage_rules",
|
193
160
|
value="\n".join(compiled_prompts),
|
194
|
-
description=
|
161
|
+
description=COMPILED_PROMPT_DESCRIPTION,
|
195
162
|
)
|
196
163
|
return None
|
197
164
|
|
198
|
-
def guess_rule_violation(self, tool_name:
|
165
|
+
def guess_rule_violation(self, tool_name: ToolName) -> list[str]:
|
199
166
|
"""
|
200
167
|
Check if the given tool name or the previous tool in history matches any tool rule,
|
201
|
-
and return rendered prompt templates for matching
|
168
|
+
and return rendered prompt templates for matching rule violations.
|
202
169
|
|
203
170
|
Args:
|
204
171
|
tool_name: The name of the tool to check for rule violations
|
205
172
|
|
206
173
|
Returns:
|
207
|
-
|
174
|
+
list of rendered prompt templates from matching tool rules
|
208
175
|
"""
|
209
176
|
violated_rules = []
|
210
177
|
|
@@ -228,18 +195,3 @@ class ToolRulesSolver(BaseModel):
|
|
228
195
|
violated_rules.append(rendered_prompt)
|
229
196
|
|
230
197
|
return violated_rules
|
231
|
-
|
232
|
-
@staticmethod
|
233
|
-
def validate_conditional_tool(rule: ConditionalToolRule):
|
234
|
-
"""
|
235
|
-
Validate a conditional tool rule
|
236
|
-
|
237
|
-
Args:
|
238
|
-
rule (ConditionalToolRule): The conditional tool rule to validate
|
239
|
-
|
240
|
-
Raises:
|
241
|
-
ToolRuleValidationError: If the rule is invalid
|
242
|
-
"""
|
243
|
-
if len(rule.child_output_mapping) == 0:
|
244
|
-
raise ToolRuleValidationError("Conditional tool rule must have at least one child tool.")
|
245
|
-
return True
|
@@ -345,6 +345,15 @@ class OpenAIStreamingInterface:
|
|
345
345
|
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
346
346
|
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
347
347
|
diff = new_content.replace(prev_content, "", 1)
|
348
|
+
|
349
|
+
# quick patch to mitigate double message streaming error
|
350
|
+
# TODO: root cause this issue and remove patch
|
351
|
+
if diff != "" and "\\n" not in new_content:
|
352
|
+
converted_new_content = new_content.replace("\n", "\\n")
|
353
|
+
converted_content_diff = converted_new_content.replace(prev_content, "", 1)
|
354
|
+
if converted_content_diff == "":
|
355
|
+
diff = converted_content_diff
|
356
|
+
|
348
357
|
self.current_json_parse_result = parsed_args
|
349
358
|
if prev_message_type and prev_message_type != "assistant_message":
|
350
359
|
message_index += 1
|
@@ -182,7 +182,7 @@ class AnthropicClient(LLMClientBase):
|
|
182
182
|
}
|
183
183
|
|
184
184
|
# Extended Thinking
|
185
|
-
if llm_config.enable_reasoner:
|
185
|
+
if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
186
186
|
data["thinking"] = {
|
187
187
|
"type": "enabled",
|
188
188
|
"budget_tokens": llm_config.max_reasoning_tokens,
|
@@ -200,7 +200,7 @@ class AnthropicClient(LLMClientBase):
|
|
200
200
|
# Special case for summarization path
|
201
201
|
tools_for_request = None
|
202
202
|
tool_choice = None
|
203
|
-
elif llm_config.enable_reasoner:
|
203
|
+
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
204
204
|
# NOTE: reasoning models currently do not allow for `any`
|
205
205
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
206
206
|
tools_for_request = [OpenAITool(function=f) for f in tools]
|
@@ -296,6 +296,13 @@ class AnthropicClient(LLMClientBase):
|
|
296
296
|
token_count -= 8
|
297
297
|
return token_count
|
298
298
|
|
299
|
+
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
300
|
+
return (
|
301
|
+
llm_config.model.startswith("claude-3-7-sonnet")
|
302
|
+
or llm_config.model.startswith("claude-sonnet-4")
|
303
|
+
or llm_config.model.startswith("claude-opus-4")
|
304
|
+
)
|
305
|
+
|
299
306
|
@trace_method
|
300
307
|
def handle_llm_error(self, e: Exception) -> Exception:
|
301
308
|
if isinstance(e, anthropic.APITimeoutError):
|
@@ -254,8 +254,11 @@ class GoogleVertexClient(LLMClientBase):
|
|
254
254
|
# If enable_reasoner is False, set thinking_budget to 0
|
255
255
|
# Otherwise, use the value from max_reasoning_tokens
|
256
256
|
if "flash" in llm_config.model:
|
257
|
+
# Gemini flash models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
|
257
258
|
thinking_config = ThinkingConfig(
|
258
|
-
thinking_budget=
|
259
|
+
thinking_budget=(
|
260
|
+
llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
|
261
|
+
),
|
259
262
|
)
|
260
263
|
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
261
264
|
|
@@ -292,7 +295,6 @@ class GoogleVertexClient(LLMClientBase):
|
|
292
295
|
}
|
293
296
|
}
|
294
297
|
"""
|
295
|
-
# print(response_data)
|
296
298
|
|
297
299
|
response = GenerateContentResponse(**response_data)
|
298
300
|
try:
|
@@ -301,11 +303,11 @@ class GoogleVertexClient(LLMClientBase):
|
|
301
303
|
for candidate in response.candidates:
|
302
304
|
content = candidate.content
|
303
305
|
|
304
|
-
if content.role is None or content.parts is None:
|
306
|
+
if content is None or content.role is None or content.parts is None:
|
305
307
|
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
306
308
|
# NOTE: must be a ValueError to trigger a retry
|
307
309
|
if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
|
308
|
-
raise ValueError(f"Error in response data from LLM: {candidate.
|
310
|
+
raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}...")
|
309
311
|
else:
|
310
312
|
raise ValueError(f"Error in response data from LLM: {response_data}")
|
311
313
|
|
@@ -494,6 +496,17 @@ class GoogleVertexClient(LLMClientBase):
|
|
494
496
|
"required": ["name", "args"],
|
495
497
|
}
|
496
498
|
|
499
|
+
def get_thinking_budget(self, model: str) -> bool:
|
500
|
+
if model_settings.gemini_force_minimum_thinking_budget:
|
501
|
+
if all(substring in model for substring in ["2.5", "flash", "lite"]):
|
502
|
+
return 512
|
503
|
+
elif all(substring in model for substring in ["2.5", "flash"]):
|
504
|
+
return 1
|
505
|
+
return 0
|
506
|
+
|
507
|
+
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
508
|
+
return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
|
509
|
+
|
497
510
|
@trace_method
|
498
511
|
def handle_llm_error(self, e: Exception) -> Exception:
|
499
512
|
# Fallback to base implementation
|
letta/llm_api/llm_client_base.py
CHANGED
@@ -174,6 +174,10 @@ class LLMClientBase:
|
|
174
174
|
"""
|
175
175
|
raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
|
176
176
|
|
177
|
+
@abstractmethod
|
178
|
+
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
179
|
+
raise NotImplementedError
|
180
|
+
|
177
181
|
@abstractmethod
|
178
182
|
def handle_llm_error(self, e: Exception) -> Exception:
|
179
183
|
"""
|
letta/llm_api/openai_client.py
CHANGED
@@ -276,6 +276,9 @@ class OpenAIClient(LLMClientBase):
|
|
276
276
|
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
277
277
|
return response.model_dump()
|
278
278
|
|
279
|
+
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
280
|
+
return is_openai_reasoning_model(llm_config.model)
|
281
|
+
|
279
282
|
@trace_method
|
280
283
|
def convert_response_to_chat_completion(
|
281
284
|
self,
|
@@ -298,7 +301,7 @@ class OpenAIClient(LLMClientBase):
|
|
298
301
|
)
|
299
302
|
|
300
303
|
# If we used a reasoning model, create a content part for the ommitted reasoning
|
301
|
-
if
|
304
|
+
if self.is_reasoning_model(llm_config):
|
302
305
|
chat_completion_response.choices[0].message.omitted_reasoning_content = True
|
303
306
|
|
304
307
|
return chat_completion_response
|
letta/log.py
CHANGED
@@ -61,13 +61,15 @@ DEVELOPMENT_LOGGING = {
|
|
61
61
|
},
|
62
62
|
}
|
63
63
|
|
64
|
+
# Configure logging once at module initialization to avoid performance overhead
|
65
|
+
dictConfig(DEVELOPMENT_LOGGING)
|
66
|
+
|
64
67
|
|
65
68
|
def get_logger(name: Optional[str] = None) -> "logging.Logger":
|
66
69
|
"""returns the project logger, scoped to a child name if provided
|
67
70
|
Args:
|
68
71
|
name: will define a child logger
|
69
72
|
"""
|
70
|
-
dictConfig(DEVELOPMENT_LOGGING)
|
71
73
|
parent_logger = logging.getLogger("Letta")
|
72
74
|
if name:
|
73
75
|
return parent_logger.getChild(name)
|
letta/schemas/enums.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from enum import Enum
|
1
|
+
from enum import Enum, StrEnum
|
2
2
|
|
3
3
|
|
4
4
|
class ProviderType(str, Enum):
|
@@ -42,7 +42,7 @@ class OptionState(str, Enum):
|
|
42
42
|
DEFAULT = "default"
|
43
43
|
|
44
44
|
|
45
|
-
class JobStatus(
|
45
|
+
class JobStatus(StrEnum):
|
46
46
|
"""
|
47
47
|
Status of the job.
|
48
48
|
"""
|
@@ -63,7 +63,8 @@ class JobStatus(str, Enum):
|
|
63
63
|
|
64
64
|
class AgentStepStatus(str, Enum):
|
65
65
|
"""
|
66
|
-
Status of
|
66
|
+
Status of agent step.
|
67
|
+
TODO (cliandy): consolidate this with job status
|
67
68
|
"""
|
68
69
|
|
69
70
|
paused = "paused"
|