letta-nightly 0.6.50.dev20250411104155__py3-none-any.whl → 0.6.52.dev20250412051016__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +23 -32
  3. letta/agents/base_agent.py +17 -6
  4. letta/agents/ephemeral_agent.py +5 -6
  5. letta/agents/ephemeral_memory_agent.py +8 -10
  6. letta/agents/helpers.py +6 -6
  7. letta/agents/letta_agent.py +9 -10
  8. letta/agents/letta_agent_batch.py +164 -0
  9. letta/agents/voice_agent.py +8 -8
  10. letta/functions/function_sets/base.py +1 -1
  11. letta/helpers/converters.py +5 -2
  12. letta/helpers/tool_rule_solver.py +12 -2
  13. letta/jobs/scheduler.py +13 -11
  14. letta/llm_api/anthropic.py +0 -1
  15. letta/llm_api/anthropic_client.py +61 -23
  16. letta/llm_api/cohere.py +1 -1
  17. letta/llm_api/google_ai_client.py +48 -13
  18. letta/llm_api/google_vertex_client.py +19 -1
  19. letta/llm_api/llm_client_base.py +13 -5
  20. letta/llm_api/openai.py +4 -3
  21. letta/llm_api/openai_client.py +18 -10
  22. letta/orm/organization.py +4 -2
  23. letta/orm/sqlalchemy_base.py +3 -0
  24. letta/schemas/enums.py +1 -0
  25. letta/schemas/group.py +30 -1
  26. letta/schemas/identity.py +10 -0
  27. letta/schemas/letta_request.py +4 -0
  28. letta/schemas/letta_response.py +9 -1
  29. letta/schemas/llm_config.py +10 -0
  30. letta/schemas/message.py +21 -12
  31. letta/schemas/openai/chat_completion_request.py +1 -0
  32. letta/schemas/tool_rule.py +14 -1
  33. letta/server/rest_api/interface.py +5 -4
  34. letta/server/rest_api/routers/v1/agents.py +20 -13
  35. letta/server/rest_api/routers/v1/groups.py +1 -1
  36. letta/server/rest_api/routers/v1/identities.py +23 -2
  37. letta/server/rest_api/utils.py +20 -22
  38. letta/server/server.py +34 -21
  39. letta/services/agent_manager.py +13 -9
  40. letta/services/block_manager.py +2 -4
  41. letta/services/identity_manager.py +21 -5
  42. letta/services/llm_batch_manager.py +21 -1
  43. letta/services/summarizer/summarizer.py +11 -4
  44. letta/services/tool_manager.py +1 -1
  45. letta/settings.py +1 -0
  46. letta/utils.py +2 -2
  47. {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/METADATA +3 -3
  48. {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/RECORD +51 -50
  49. {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/LICENSE +0 -0
  50. {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/WHEEL +0 -0
  51. {letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/entry_points.txt +0 -0
@@ -28,6 +28,7 @@ from letta.schemas.tool_rule import (
28
28
  ContinueToolRule,
29
29
  InitToolRule,
30
30
  MaxCountPerStepToolRule,
31
+ ParentToolRule,
31
32
  TerminalToolRule,
32
33
  ToolRule,
33
34
  )
@@ -89,7 +90,7 @@ def serialize_tool_rules(tool_rules: Optional[List[ToolRule]]) -> List[Dict[str,
89
90
  return data
90
91
 
91
92
 
92
- def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule]]:
93
+ def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[ToolRule]:
93
94
  """Convert a list of dictionaries back into ToolRule objects."""
94
95
  if not data:
95
96
  return []
@@ -99,7 +100,7 @@ def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[Union[ChildToolRu
99
100
 
100
101
  def deserialize_tool_rule(
101
102
  data: Dict,
102
- ) -> Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule, ContinueToolRule, MaxCountPerStepToolRule]:
103
+ ) -> ToolRule:
103
104
  """Deserialize a dictionary to the appropriate ToolRule subclass based on 'type'."""
104
105
  rule_type = ToolRuleType(data.get("type"))
105
106
 
@@ -118,6 +119,8 @@ def deserialize_tool_rule(
118
119
  return ContinueToolRule(**data)
119
120
  elif rule_type == ToolRuleType.max_count_per_step:
120
121
  return MaxCountPerStepToolRule(**data)
122
+ elif rule_type == ToolRuleType.parent_last_tool:
123
+ return ParentToolRule(**data)
121
124
  raise ValueError(f"Unknown ToolRule type: {rule_type}")
122
125
 
123
126
 
@@ -10,6 +10,7 @@ from letta.schemas.tool_rule import (
10
10
  ContinueToolRule,
11
11
  InitToolRule,
12
12
  MaxCountPerStepToolRule,
13
+ ParentToolRule,
13
14
  TerminalToolRule,
14
15
  )
15
16
 
@@ -33,6 +34,9 @@ class ToolRulesSolver(BaseModel):
33
34
  child_based_tool_rules: List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]] = Field(
34
35
  default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions."
35
36
  )
37
+ parent_tool_rules: List[ParentToolRule] = Field(
38
+ default_factory=list, description="Filter tool rules to be used to filter out tools from the available set."
39
+ )
36
40
  terminal_tool_rules: List[TerminalToolRule] = Field(
37
41
  default_factory=list, description="Terminal tool rules that end the agent loop if called."
38
42
  )
@@ -44,6 +48,7 @@ class ToolRulesSolver(BaseModel):
44
48
  init_tool_rules: Optional[List[InitToolRule]] = None,
45
49
  continue_tool_rules: Optional[List[ContinueToolRule]] = None,
46
50
  child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
51
+ parent_tool_rules: Optional[List[ParentToolRule]] = None,
47
52
  terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
48
53
  tool_call_history: Optional[List[str]] = None,
49
54
  **kwargs,
@@ -52,6 +57,7 @@ class ToolRulesSolver(BaseModel):
52
57
  init_tool_rules=init_tool_rules or [],
53
58
  continue_tool_rules=continue_tool_rules or [],
54
59
  child_based_tool_rules=child_based_tool_rules or [],
60
+ parent_tool_rules=parent_tool_rules or [],
55
61
  terminal_tool_rules=terminal_tool_rules or [],
56
62
  tool_call_history=tool_call_history or [],
57
63
  **kwargs,
@@ -78,6 +84,9 @@ class ToolRulesSolver(BaseModel):
78
84
  elif rule.type == ToolRuleType.max_count_per_step:
79
85
  assert isinstance(rule, MaxCountPerStepToolRule)
80
86
  self.child_based_tool_rules.append(rule)
87
+ elif rule.type == ToolRuleType.parent_last_tool:
88
+ assert isinstance(rule, ParentToolRule)
89
+ self.parent_tool_rules.append(rule)
81
90
 
82
91
  def register_tool_call(self, tool_name: str):
83
92
  """Update the internal state to track tool call history."""
@@ -102,13 +111,14 @@ class ToolRulesSolver(BaseModel):
102
111
  # If there are init tool rules, only return those defined in the init tool rules
103
112
  return [rule.tool_name for rule in self.init_tool_rules]
104
113
  else:
105
- # Otherwise, return all the available tools
114
+ # Otherwise, return all tools besides those constrained by parent tool rules
115
+ available_tools = available_tools - set.union(set(), *(set(rule.children) for rule in self.parent_tool_rules))
106
116
  return list(available_tools)
107
117
  else:
108
118
  # Collect valid tools from all child-based rules
109
119
  valid_tool_sets = [
110
120
  rule.get_valid_tools(self.tool_call_history, available_tools, last_function_response)
111
- for rule in self.child_based_tool_rules
121
+ for rule in self.child_based_tool_rules + self.parent_tool_rules
112
122
  ]
113
123
 
114
124
  # Compute intersection of all valid tool sets
letta/jobs/scheduler.py CHANGED
@@ -12,17 +12,19 @@ scheduler = AsyncIOScheduler()
12
12
 
13
13
  def start_cron_jobs(server: SyncServer):
14
14
  """Initialize cron jobs"""
15
- scheduler.add_job(
16
- poll_running_llm_batches,
17
- args=[server],
18
- trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
19
- next_run_time=datetime.datetime.now(datetime.UTC),
20
- id="poll_llm_batches",
21
- name="Poll LLM API batch jobs and update status",
22
- replace_existing=True,
23
- )
24
- scheduler.start()
15
+ if settings.enable_batch_job_polling:
16
+ scheduler.add_job(
17
+ poll_running_llm_batches,
18
+ args=[server],
19
+ trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
20
+ next_run_time=datetime.datetime.now(datetime.timezone.utc),
21
+ id="poll_llm_batches",
22
+ name="Poll LLM API batch jobs and update status",
23
+ replace_existing=True,
24
+ )
25
+ scheduler.start()
25
26
 
26
27
 
27
28
  def shutdown_cron_scheduler():
28
- scheduler.shutdown()
29
+ if settings.enable_batch_job_polling:
30
+ scheduler.shutdown()
@@ -691,7 +691,6 @@ def _prepare_anthropic_request(
691
691
  # Convert to Anthropic format
692
692
  msg_objs = [
693
693
  _Message.dict_to_message(
694
- user_id=None,
695
694
  agent_id=None,
696
695
  openai_message_dict=m,
697
696
  )
@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
29
29
  from letta.log import get_logger
30
+ from letta.schemas.llm_config import LLMConfig
30
31
  from letta.schemas.message import Message as PydanticMessage
31
32
  from letta.schemas.openai.chat_completion_request import Tool
32
33
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
@@ -59,25 +60,55 @@ class AnthropicClient(LLMClientBase):
59
60
  return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
60
61
 
61
62
  @trace_method
62
- async def batch_async(self, requests: Dict[str, dict]) -> BetaMessageBatch:
63
+ async def send_llm_batch_request_async(
64
+ self,
65
+ agent_messages_mapping: Dict[str, List[PydanticMessage]],
66
+ agent_tools_mapping: Dict[str, List[dict]],
67
+ agent_llm_config_mapping: Dict[str, LLMConfig],
68
+ ) -> BetaMessageBatch:
63
69
  """
64
- Send a batch of requests to the Anthropic API asynchronously.
70
+ Sends a batch request to the Anthropic API using the provided agent messages and tools mappings.
65
71
 
66
72
  Args:
67
- requests (Dict[str, dict]): A mapping from custom_id to request parameter dicts.
73
+ agent_messages_mapping: A dict mapping agent_id to their list of PydanticMessages.
74
+ agent_tools_mapping: A dict mapping agent_id to their list of tool dicts.
75
+ agent_llm_config_mapping: A dict mapping agent_id to their LLM config
68
76
 
69
77
  Returns:
70
- List[dict]: A list of response dictionaries corresponding to each request.
78
+ BetaMessageBatch: The batch response from the Anthropic API.
79
+
80
+ Raises:
81
+ ValueError: If the sets of agent_ids in the two mappings do not match.
82
+ Exception: Transformed errors from the underlying API call.
71
83
  """
72
- client = self._get_anthropic_client(async_client=True)
84
+ # Validate that both mappings use the same set of agent_ids.
85
+ if set(agent_messages_mapping.keys()) != set(agent_tools_mapping.keys()):
86
+ raise ValueError("Agent mappings for messages and tools must use the same agent_ids.")
87
+
88
+ try:
89
+ requests = {
90
+ agent_id: self.build_request_data(
91
+ messages=agent_messages_mapping[agent_id],
92
+ llm_config=agent_llm_config_mapping[agent_id],
93
+ tools=agent_tools_mapping[agent_id],
94
+ )
95
+ for agent_id in agent_messages_mapping
96
+ }
73
97
 
74
- anthropic_requests = [
75
- Request(custom_id=custom_id, params=MessageCreateParamsNonStreaming(**params)) for custom_id, params in requests.items()
76
- ]
98
+ client = self._get_anthropic_client(async_client=True)
99
+
100
+ anthropic_requests = [
101
+ Request(custom_id=agent_id, params=MessageCreateParamsNonStreaming(**params)) for agent_id, params in requests.items()
102
+ ]
103
+
104
+ batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
77
105
 
78
- batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
106
+ return batch_response
79
107
 
80
- return batch_response
108
+ except Exception as e:
109
+ # Enhance logging here if additional context is needed
110
+ logger.error("Error during send_llm_batch_request_async.", exc_info=True)
111
+ raise self.handle_llm_error(e)
81
112
 
82
113
  @trace_method
83
114
  def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
@@ -90,6 +121,7 @@ class AnthropicClient(LLMClientBase):
90
121
  def build_request_data(
91
122
  self,
92
123
  messages: List[PydanticMessage],
124
+ llm_config: LLMConfig,
93
125
  tools: Optional[List[dict]] = None,
94
126
  force_tool_call: Optional[str] = None,
95
127
  ) -> dict:
@@ -99,20 +131,20 @@ class AnthropicClient(LLMClientBase):
99
131
  if not self.use_tool_naming:
100
132
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
101
133
 
102
- if not self.llm_config.max_tokens:
134
+ if not llm_config.max_tokens:
103
135
  raise ValueError("Max tokens must be set for anthropic")
104
136
 
105
137
  data = {
106
- "model": self.llm_config.model,
107
- "max_tokens": self.llm_config.max_tokens,
108
- "temperature": self.llm_config.temperature,
138
+ "model": llm_config.model,
139
+ "max_tokens": llm_config.max_tokens,
140
+ "temperature": llm_config.temperature,
109
141
  }
110
142
 
111
143
  # Extended Thinking
112
- if self.llm_config.enable_reasoner:
144
+ if llm_config.enable_reasoner:
113
145
  data["thinking"] = {
114
146
  "type": "enabled",
115
- "budget_tokens": self.llm_config.max_reasoning_tokens,
147
+ "budget_tokens": llm_config.max_reasoning_tokens,
116
148
  }
117
149
  # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
118
150
  data["temperature"] = 1.0
@@ -132,13 +164,13 @@ class AnthropicClient(LLMClientBase):
132
164
  tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
133
165
 
134
166
  # need to have this setting to be able to put inner thoughts in kwargs
135
- if not self.llm_config.put_inner_thoughts_in_kwargs:
167
+ if not llm_config.put_inner_thoughts_in_kwargs:
136
168
  logger.warning(
137
169
  f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
138
170
  )
139
- self.llm_config.put_inner_thoughts_in_kwargs = True
171
+ llm_config.put_inner_thoughts_in_kwargs = True
140
172
  else:
141
- if self.llm_config.put_inner_thoughts_in_kwargs:
173
+ if llm_config.put_inner_thoughts_in_kwargs:
142
174
  # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
143
175
  tool_choice = {"type": "any", "disable_parallel_tool_use": True}
144
176
  else:
@@ -151,7 +183,7 @@ class AnthropicClient(LLMClientBase):
151
183
 
152
184
  # Add inner thoughts kwarg
153
185
  # TODO: Can probably make this more efficient
154
- if tools_for_request and len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
186
+ if tools_for_request and len(tools_for_request) > 0 and llm_config.put_inner_thoughts_in_kwargs:
155
187
  tools_with_inner_thoughts = add_inner_thoughts_to_functions(
156
188
  functions=[t.function.model_dump() for t in tools_for_request],
157
189
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -173,7 +205,7 @@ class AnthropicClient(LLMClientBase):
173
205
  data["messages"] = [
174
206
  m.to_anthropic_dict(
175
207
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
176
- put_inner_thoughts_in_kwargs=bool(self.llm_config.put_inner_thoughts_in_kwargs),
208
+ put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
177
209
  )
178
210
  for m in messages[1:]
179
211
  ]
@@ -189,7 +221,7 @@ class AnthropicClient(LLMClientBase):
189
221
  # https://docs.anthropic.com/en/api/messages#body-messages
190
222
  # NOTE: cannot prefill with tools for opus:
191
223
  # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
192
- if prefix_fill and not self.llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
224
+ if prefix_fill and not llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
193
225
  data["messages"].append(
194
226
  # Start the thinking process for the assistant
195
227
  {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
@@ -323,13 +355,19 @@ class AnthropicClient(LLMClientBase):
323
355
  if content_part.type == "text":
324
356
  content = strip_xml_tags(string=content_part.text, tag="thinking")
325
357
  if content_part.type == "tool_use":
358
+ # hack for tool rules
359
+ input = json.loads(json.dumps(content_part.input))
360
+ if "id" in input and input["id"].startswith("toolu_") and "function" in input:
361
+ arguments = str(input["function"]["arguments"])
362
+ else:
363
+ arguments = json.dumps(content_part.input, indent=2)
326
364
  tool_calls = [
327
365
  ToolCall(
328
366
  id=content_part.id,
329
367
  type="function",
330
368
  function=FunctionCall(
331
369
  name=content_part.name,
332
- arguments=json.dumps(content_part.input, indent=2),
370
+ arguments=arguments,
333
371
  ),
334
372
  )
335
373
  ]
letta/llm_api/cohere.py CHANGED
@@ -315,7 +315,7 @@ def cohere_chat_completions_request(
315
315
  data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
316
316
 
317
317
  # Convert messages to Cohere format
318
- msg_objs = [Message.dict_to_message(user_id=uuid.uuid4(), agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
318
+ msg_objs = [Message.dict_to_message(agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
319
319
 
320
320
  # System message 0 should instead be a "preamble"
321
321
  # See: https://docs.cohere.com/reference/chat
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import uuid
2
3
  from typing import List, Optional, Tuple
3
4
 
@@ -11,12 +12,16 @@ from letta.llm_api.helpers import make_post_request
11
12
  from letta.llm_api.llm_client_base import LLMClientBase
12
13
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
13
14
  from letta.local_llm.utils import count_tokens
15
+ from letta.log import get_logger
16
+ from letta.schemas.llm_config import LLMConfig
14
17
  from letta.schemas.message import Message as PydanticMessage
15
18
  from letta.schemas.openai.chat_completion_request import Tool
16
19
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
17
20
  from letta.settings import model_settings
18
21
  from letta.utils import get_tool_call_id
19
22
 
23
+ logger = get_logger(__name__)
24
+
20
25
 
21
26
  class GoogleAIClient(LLMClientBase):
22
27
 
@@ -24,6 +29,8 @@ class GoogleAIClient(LLMClientBase):
24
29
  """
25
30
  Performs underlying request to llm and returns raw response.
26
31
  """
32
+ # print("[google_ai request]", json.dumps(request_data, indent=2))
33
+
27
34
  url, headers = get_gemini_endpoint_and_headers(
28
35
  base_url=str(self.llm_config.model_endpoint),
29
36
  model=self.llm_config.model,
@@ -36,6 +43,7 @@ class GoogleAIClient(LLMClientBase):
36
43
  def build_request_data(
37
44
  self,
38
45
  messages: List[PydanticMessage],
46
+ llm_config: LLMConfig,
39
47
  tools: List[dict],
40
48
  force_tool_call: Optional[str] = None,
41
49
  ) -> dict:
@@ -44,9 +52,10 @@ class GoogleAIClient(LLMClientBase):
44
52
  """
45
53
  if tools:
46
54
  tools = [{"type": "function", "function": f} for f in tools]
47
- tools = self.convert_tools_to_google_ai_format(
48
- [Tool(**t) for t in tools],
49
- )
55
+ tool_objs = [Tool(**t) for t in tools]
56
+ tool_names = [t.function.name for t in tool_objs]
57
+ # Convert to the exact payload style Google expects
58
+ tools = self.convert_tools_to_google_ai_format(tool_objs)
50
59
  contents = self.add_dummy_model_messages(
51
60
  [m.to_google_ai_dict() for m in messages],
52
61
  )
@@ -55,8 +64,8 @@ class GoogleAIClient(LLMClientBase):
55
64
  "contents": contents,
56
65
  "tools": tools,
57
66
  "generation_config": {
58
- "temperature": self.llm_config.temperature,
59
- "max_output_tokens": self.llm_config.max_tokens,
67
+ "temperature": llm_config.temperature,
68
+ "max_output_tokens": llm_config.max_tokens,
60
69
  },
61
70
  }
62
71
 
@@ -65,6 +74,8 @@ class GoogleAIClient(LLMClientBase):
65
74
  function_calling_config=FunctionCallingConfig(
66
75
  # ANY mode forces the model to predict only function calls
67
76
  mode=FunctionCallingConfigMode.ANY,
77
+ # Provide the list of tools (though empty should also work, it seems not to)
78
+ allowed_function_names=tool_names,
68
79
  )
69
80
  )
70
81
  request_data["tool_config"] = tool_config.model_dump()
@@ -99,6 +110,8 @@ class GoogleAIClient(LLMClientBase):
99
110
  }
100
111
  }
101
112
  """
113
+ # print("[google_ai response]", json.dumps(response_data, indent=2))
114
+
102
115
  try:
103
116
  choices = []
104
117
  index = 0
@@ -109,6 +122,17 @@ class GoogleAIClient(LLMClientBase):
109
122
  assert role == "model", f"Unknown role in response: {role}"
110
123
 
111
124
  parts = content["parts"]
125
+
126
+ # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
127
+ # so let's disable it for now
128
+
129
+ # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
130
+ # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
131
+ # To patch this, if we have multiple parts we can take the last one
132
+ if len(parts) > 1:
133
+ logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
134
+ parts = [parts[-1]]
135
+
112
136
  # TODO support parts / multimodal
113
137
  # TODO support parallel tool calling natively
114
138
  # TODO Alternative here is to throw away everything else except for the first part
@@ -199,10 +223,22 @@ class GoogleAIClient(LLMClientBase):
199
223
  # "totalTokenCount": 36
200
224
  # }
201
225
  if "usageMetadata" in response_data:
226
+ usage_data = response_data["usageMetadata"]
227
+ if "promptTokenCount" not in usage_data:
228
+ raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
229
+ if "totalTokenCount" not in usage_data:
230
+ raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
231
+ if "candidatesTokenCount" not in usage_data:
232
+ raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
233
+
234
+ prompt_tokens = usage_data["promptTokenCount"]
235
+ completion_tokens = usage_data["candidatesTokenCount"]
236
+ total_tokens = usage_data["totalTokenCount"]
237
+
202
238
  usage = UsageStatistics(
203
- prompt_tokens=response_data["usageMetadata"]["promptTokenCount"],
204
- completion_tokens=response_data["usageMetadata"]["candidatesTokenCount"],
205
- total_tokens=response_data["usageMetadata"]["totalTokenCount"],
239
+ prompt_tokens=prompt_tokens,
240
+ completion_tokens=completion_tokens,
241
+ total_tokens=total_tokens,
206
242
  )
207
243
  else:
208
244
  # Count it ourselves
@@ -282,17 +318,16 @@ class GoogleAIClient(LLMClientBase):
282
318
  for t in tools
283
319
  ]
284
320
 
285
- # Correct casing + add inner thoughts if needed
321
+ # Add inner thoughts if needed
286
322
  for func in function_list:
287
- func["parameters"]["type"] = "OBJECT"
288
- for param_name, param_fields in func["parameters"]["properties"].items():
289
- param_fields["type"] = param_fields["type"].upper()
323
+ # Note: Google AI API used to have weird casing requirements, but not any more
324
+
290
325
  # Add inner thoughts
291
326
  if self.llm_config.put_inner_thoughts_in_kwargs:
292
327
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
293
328
 
294
329
  func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
295
- "type": "STRING",
330
+ "type": "string",
296
331
  "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
297
332
  }
298
333
  func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
@@ -9,6 +9,7 @@ from letta.helpers.json_helpers import json_dumps
9
9
  from letta.llm_api.google_ai_client import GoogleAIClient
10
10
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
11
11
  from letta.local_llm.utils import count_tokens
12
+ from letta.schemas.llm_config import LLMConfig
12
13
  from letta.schemas.message import Message as PydanticMessage
13
14
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
14
15
  from letta.settings import model_settings
@@ -37,20 +38,24 @@ class GoogleVertexClient(GoogleAIClient):
37
38
  def build_request_data(
38
39
  self,
39
40
  messages: List[PydanticMessage],
41
+ llm_config: LLMConfig,
40
42
  tools: List[dict],
41
43
  force_tool_call: Optional[str] = None,
42
44
  ) -> dict:
43
45
  """
44
46
  Constructs a request object in the expected data format for this client.
45
47
  """
46
- request_data = super().build_request_data(messages, tools, force_tool_call)
48
+ request_data = super().build_request_data(messages, self.llm_config, tools, force_tool_call)
47
49
  request_data["config"] = request_data.pop("generation_config")
48
50
  request_data["config"]["tools"] = request_data.pop("tools")
49
51
 
52
+ tool_names = [t["name"] for t in tools]
50
53
  tool_config = ToolConfig(
51
54
  function_calling_config=FunctionCallingConfig(
52
55
  # ANY mode forces the model to predict only function calls
53
56
  mode=FunctionCallingConfigMode.ANY,
57
+ # Provide the list of tools (though empty should also work, it seems not to)
58
+ allowed_function_names=tool_names,
54
59
  )
55
60
  )
56
61
  request_data["config"]["tool_config"] = tool_config.model_dump()
@@ -86,6 +91,8 @@ class GoogleVertexClient(GoogleAIClient):
86
91
  }
87
92
  }
88
93
  """
94
+ # print(response_data)
95
+
89
96
  response = GenerateContentResponse(**response_data)
90
97
  try:
91
98
  choices = []
@@ -97,6 +104,17 @@ class GoogleVertexClient(GoogleAIClient):
97
104
  assert role == "model", f"Unknown role in response: {role}"
98
105
 
99
106
  parts = content.parts
107
+
108
+ # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
109
+ # so let's disable it for now
110
+
111
+ # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
112
+ # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
113
+ # To patch this, if we have multiple parts we can take the last one
114
+ if len(parts) > 1:
115
+ logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
116
+ parts = [parts[-1]]
117
+
100
118
  # TODO support parts / multimodal
101
119
  # TODO support parallel tool calling natively
102
120
  # TODO Alternative here is to throw away everything else except for the first part
@@ -1,6 +1,7 @@
1
1
  from abc import abstractmethod
2
- from typing import List, Optional, Union
2
+ from typing import Dict, List, Optional, Union
3
3
 
4
+ from anthropic.types.beta.messages import BetaMessageBatch
4
5
  from openai import AsyncStream, Stream
5
6
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
7
 
@@ -21,7 +22,6 @@ class LLMClientBase:
21
22
  self,
22
23
  llm_config: LLMConfig,
23
24
  put_inner_thoughts_first: Optional[bool] = True,
24
- use_structured_output: Optional[bool] = True,
25
25
  use_tool_naming: bool = True,
26
26
  ):
27
27
  self.llm_config = llm_config
@@ -40,7 +40,7 @@ class LLMClientBase:
40
40
  If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
41
41
  Otherwise returns a ChatCompletionResponse.
42
42
  """
43
- request_data = self.build_request_data(messages, tools, force_tool_call)
43
+ request_data = self.build_request_data(messages, self.llm_config, tools, force_tool_call)
44
44
 
45
45
  try:
46
46
  log_event(name="llm_request_sent", attributes=request_data)
@@ -66,8 +66,7 @@ class LLMClientBase:
66
66
  If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
67
67
  Otherwise returns a ChatCompletionResponse.
68
68
  """
69
- request_data = self.build_request_data(messages, tools, force_tool_call)
70
- response_data = {}
69
+ request_data = self.build_request_data(messages, self.llm_config, tools, force_tool_call)
71
70
 
72
71
  try:
73
72
  log_event(name="llm_request_sent", attributes=request_data)
@@ -81,10 +80,19 @@ class LLMClientBase:
81
80
 
82
81
  return self.convert_response_to_chat_completion(response_data, messages)
83
82
 
83
+ async def send_llm_batch_request_async(
84
+ self,
85
+ agent_messages_mapping: Dict[str, List[Message]],
86
+ agent_tools_mapping: Dict[str, List[dict]],
87
+ agent_llm_config_mapping: Dict[str, LLMConfig],
88
+ ) -> Union[BetaMessageBatch]:
89
+ raise NotImplementedError
90
+
84
91
  @abstractmethod
85
92
  def build_request_data(
86
93
  self,
87
94
  messages: List[Message],
95
+ llm_config: LLMConfig,
88
96
  tools: List[dict],
89
97
  force_tool_call: Optional[str] = None,
90
98
  ) -> dict:
letta/llm_api/openai.py CHANGED
@@ -135,7 +135,7 @@ def build_openai_chat_completions_request(
135
135
  tool_choice=tool_choice,
136
136
  user=str(user_id),
137
137
  max_completion_tokens=llm_config.max_tokens,
138
- temperature=llm_config.temperature,
138
+ temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
139
139
  )
140
140
  else:
141
141
  data = ChatCompletionRequest(
@@ -145,7 +145,7 @@ def build_openai_chat_completions_request(
145
145
  function_call=function_call,
146
146
  user=str(user_id),
147
147
  max_completion_tokens=llm_config.max_tokens,
148
- temperature=llm_config.temperature,
148
+ temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
149
149
  )
150
150
  # https://platform.openai.com/docs/guides/text-generation/json-mode
151
151
  # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
@@ -168,7 +168,6 @@ def build_openai_chat_completions_request(
168
168
  tool.function = FunctionSchema(**structured_output_version)
169
169
  except ValueError as e:
170
170
  warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
171
-
172
171
  return data
173
172
 
174
173
 
@@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
488
487
  # except ValueError as e:
489
488
  # warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
490
489
 
490
+ if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
491
+ data.pop("parallel_tool_calls", None)
491
492
  return data