letta-nightly 0.7.0.dev20250423003112__py3-none-any.whl → 0.7.2.dev20250423222439__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +113 -81
  3. letta/agents/letta_agent.py +2 -2
  4. letta/agents/letta_agent_batch.py +38 -34
  5. letta/client/client.py +10 -2
  6. letta/constants.py +4 -3
  7. letta/functions/function_sets/multi_agent.py +1 -3
  8. letta/functions/helpers.py +3 -3
  9. letta/groups/dynamic_multi_agent.py +58 -59
  10. letta/groups/round_robin_multi_agent.py +43 -49
  11. letta/groups/sleeptime_multi_agent.py +28 -18
  12. letta/groups/supervisor_multi_agent.py +21 -20
  13. letta/helpers/composio_helpers.py +1 -1
  14. letta/helpers/converters.py +29 -0
  15. letta/helpers/datetime_helpers.py +9 -0
  16. letta/helpers/message_helper.py +1 -0
  17. letta/helpers/tool_execution_helper.py +3 -3
  18. letta/jobs/llm_batch_job_polling.py +2 -1
  19. letta/llm_api/anthropic.py +10 -6
  20. letta/llm_api/anthropic_client.py +2 -2
  21. letta/llm_api/cohere.py +2 -2
  22. letta/llm_api/google_ai_client.py +2 -2
  23. letta/llm_api/google_vertex_client.py +2 -2
  24. letta/llm_api/openai.py +11 -4
  25. letta/llm_api/openai_client.py +34 -2
  26. letta/local_llm/chat_completion_proxy.py +2 -2
  27. letta/orm/agent.py +8 -1
  28. letta/orm/custom_columns.py +15 -0
  29. letta/schemas/agent.py +6 -0
  30. letta/schemas/letta_message_content.py +2 -1
  31. letta/schemas/llm_config.py +12 -2
  32. letta/schemas/message.py +18 -0
  33. letta/schemas/openai/chat_completion_response.py +52 -3
  34. letta/schemas/response_format.py +78 -0
  35. letta/schemas/tool_execution_result.py +14 -0
  36. letta/server/rest_api/chat_completions_interface.py +2 -2
  37. letta/server/rest_api/interface.py +3 -2
  38. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +1 -1
  39. letta/server/rest_api/routers/v1/agents.py +4 -4
  40. letta/server/rest_api/routers/v1/groups.py +2 -2
  41. letta/server/rest_api/routers/v1/messages.py +41 -19
  42. letta/server/server.py +24 -57
  43. letta/services/agent_manager.py +6 -1
  44. letta/services/llm_batch_manager.py +28 -26
  45. letta/services/tool_executor/tool_execution_manager.py +37 -28
  46. letta/services/tool_executor/tool_execution_sandbox.py +35 -16
  47. letta/services/tool_executor/tool_executor.py +299 -68
  48. letta/services/tool_sandbox/base.py +3 -2
  49. letta/services/tool_sandbox/e2b_sandbox.py +5 -4
  50. letta/services/tool_sandbox/local_sandbox.py +11 -6
  51. {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/METADATA +1 -1
  52. {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/RECORD +55 -53
  53. {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/LICENSE +0 -0
  54. {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/WHEEL +0 -0
  55. {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,7 @@ from letta.interface import AgentInterface
9
9
  from letta.orm import User
10
10
  from letta.orm.enums import ToolType
11
11
  from letta.schemas.letta_message_content import TextContent
12
- from letta.schemas.message import Message, MessageCreate
12
+ from letta.schemas.message import MessageCreate
13
13
  from letta.schemas.tool import Tool
14
14
  from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule
15
15
  from letta.schemas.usage import LettaUsageStatistics
@@ -37,17 +37,18 @@ class SupervisorMultiAgent(Agent):
37
37
 
38
38
  def step(
39
39
  self,
40
- messages: List[MessageCreate],
40
+ input_messages: List[MessageCreate],
41
41
  chaining: bool = True,
42
42
  max_chaining_steps: Optional[int] = None,
43
43
  put_inner_thoughts_first: bool = True,
44
44
  assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
45
45
  **kwargs,
46
46
  ) -> LettaUsageStatistics:
47
+ # Load settings
47
48
  token_streaming = self.interface.streaming_mode if hasattr(self.interface, "streaming_mode") else False
48
49
  metadata = self.interface.metadata if hasattr(self.interface, "metadata") else None
49
50
 
50
- # add multi agent tool
51
+ # Prepare supervisor agent
51
52
  if self.tool_manager.get_tool_by_name(tool_name="send_message_to_all_agents_in_group", actor=self.user) is None:
52
53
  multi_agent_tool = Tool(
53
54
  name=send_message_to_all_agents_in_group.__name__,
@@ -64,7 +65,6 @@ class SupervisorMultiAgent(Agent):
64
65
  )
65
66
  self.agent_state = self.agent_manager.attach_tool(agent_id=self.agent_state.id, tool_id=multi_agent_tool.id, actor=self.user)
66
67
 
67
- # override tool rules
68
68
  old_tool_rules = self.agent_state.tool_rules
69
69
  self.agent_state.tool_rules = [
70
70
  InitToolRule(
@@ -79,24 +79,25 @@ class SupervisorMultiAgent(Agent):
79
79
  ),
80
80
  ]
81
81
 
82
- supervisor_messages = [
83
- Message(
84
- agent_id=self.agent_state.id,
85
- role="user",
86
- content=[TextContent(text=message.content)],
87
- name=None,
88
- model=None,
89
- tool_calls=None,
90
- tool_call_id=None,
91
- group_id=self.group_id,
92
- otid=message.otid,
93
- )
94
- for message in messages
95
- ]
82
+ # Prepare new messages
83
+ new_messages = []
84
+ for message in input_messages:
85
+ if isinstance(message.content, str):
86
+ message.content = [TextContent(text=message.content)]
87
+ message.group_id = self.group_id
88
+ new_messages.append(message)
89
+
96
90
  try:
97
- supervisor_agent = Agent(agent_state=self.agent_state, interface=self.interface, user=self.user)
91
+ # Load supervisor agent
92
+ supervisor_agent = Agent(
93
+ agent_state=self.agent_state,
94
+ interface=self.interface,
95
+ user=self.user,
96
+ )
97
+
98
+ # Perform supervisor step
98
99
  usage_stats = supervisor_agent.step(
99
- messages=supervisor_messages,
100
+ input_messages=new_messages,
100
101
  chaining=chaining,
101
102
  max_chaining_steps=max_chaining_steps,
102
103
  stream=token_streaming,
@@ -10,7 +10,7 @@ def get_composio_api_key(actor: User, logger: Optional[Logger] = None) -> Option
10
10
  api_keys = SandboxConfigManager().list_sandbox_env_vars_by_key(key="COMPOSIO_API_KEY", actor=actor)
11
11
  if not api_keys:
12
12
  if logger:
13
- logger.warning(f"No API keys found for Composio. Defaulting to the environment variable...")
13
+ logger.debug(f"No API keys found for Composio. Defaulting to the environment variable...")
14
14
  if tool_settings.composio_api_key:
15
15
  return tool_settings.composio_api_key
16
16
  else:
@@ -22,6 +22,13 @@ from letta.schemas.letta_message_content import (
22
22
  )
23
23
  from letta.schemas.llm_config import LLMConfig
24
24
  from letta.schemas.message import ToolReturn
25
+ from letta.schemas.response_format import (
26
+ JsonObjectResponseFormat,
27
+ JsonSchemaResponseFormat,
28
+ ResponseFormatType,
29
+ ResponseFormatUnion,
30
+ TextResponseFormat,
31
+ )
25
32
  from letta.schemas.tool_rule import (
26
33
  ChildToolRule,
27
34
  ConditionalToolRule,
@@ -371,3 +378,25 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
371
378
  return None
372
379
 
373
380
  return AgentStepState(**data)
381
+
382
+
383
+ # --------------------------
384
+ # Response Format Serialization
385
+ # --------------------------
386
+
387
+
388
+ def serialize_response_format(response_format: Optional[ResponseFormatUnion]) -> Optional[Dict[str, Any]]:
389
+ if not response_format:
390
+ return None
391
+ return response_format.model_dump(mode="json")
392
+
393
+
394
+ def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormatUnion]:
395
+ if not data:
396
+ return None
397
+ if data["type"] == ResponseFormatType.text:
398
+ return TextResponseFormat(**data)
399
+ if data["type"] == ResponseFormatType.json_schema:
400
+ return JsonSchemaResponseFormat(**data)
401
+ if data["type"] == ResponseFormatType.json_object:
402
+ return JsonObjectResponseFormat(**data)
@@ -66,6 +66,15 @@ def get_utc_time() -> datetime:
66
66
  return datetime.now(timezone.utc)
67
67
 
68
68
 
69
+ def get_utc_time_int() -> int:
70
+ return int(get_utc_time().timestamp())
71
+
72
+
73
+ def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
74
+ """Convert Unix timestamp in seconds to UTC datetime object"""
75
+ return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
76
+
77
+
69
78
  def format_datetime(dt):
70
79
  return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")
71
80
 
@@ -40,4 +40,5 @@ def prepare_input_message_create(
40
40
  tool_call_id=None,
41
41
  otid=message.otid,
42
42
  sender_id=message.sender_id,
43
+ group_id=message.group_id,
43
44
  )
@@ -160,12 +160,12 @@ def execute_external_tool(
160
160
  else:
161
161
  agent_state_copy = None
162
162
 
163
- sandbox_run_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
164
- function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state
163
+ tool_execution_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
164
+ function_response, updated_agent_state = tool_execution_result.func_return, tool_execution_result.agent_state
165
165
  # TODO: Bring this back
166
166
  # if allow_agent_state_modifications and updated_agent_state is not None:
167
167
  # self.update_memory_if_changed(updated_agent_state.memory)
168
- return function_response, sandbox_run_result
168
+ return function_response, tool_execution_result
169
169
  except Exception as e:
170
170
  # Need to catch error here, or else trunction wont happen
171
171
  # TODO: modify to function execution error
@@ -73,7 +73,8 @@ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: st
73
73
  """
74
74
  updates = []
75
75
  try:
76
- async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
76
+ results = await server.anthropic_async_client.beta.messages.batches.results(batch_resp_id)
77
+ async for item_result in results:
77
78
  # Here, custom_id should be the agent_id
78
79
  item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
79
80
  updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))
@@ -20,7 +20,7 @@ from anthropic.types.beta import (
20
20
  )
21
21
 
22
22
  from letta.errors import BedrockError, BedrockPermissionError
23
- from letta.helpers.datetime_helpers import get_utc_time
23
+ from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
24
24
  from letta.llm_api.aws_bedrock import get_bedrock_client
25
25
  from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion(
396
396
  return ChatCompletionResponse(
397
397
  id=response.id,
398
398
  choices=[choice],
399
- created=get_utc_time(),
399
+ created=get_utc_time_int(),
400
400
  model=response.model,
401
401
  usage=UsageStatistics(
402
402
  prompt_tokens=prompt_tokens,
@@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
451
451
  'logprobs': None
452
452
  }
453
453
  ],
454
- 'created': datetime.datetime(2025, 1, 24, 0, 18, 55, tzinfo=TzInfo(UTC)),
454
+ 'created': 1713216662,
455
455
  'model': 'gpt-4o-mini-2024-07-18',
456
456
  'system_fingerprint': 'fp_bd83329f63',
457
457
  'object': 'chat.completion.chunk'
@@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
613
613
  return ChatCompletionChunkResponse(
614
614
  id=message_id,
615
615
  choices=[choice],
616
- created=get_utc_time(),
616
+ created=get_utc_time_int(),
617
617
  model=model,
618
618
  output_tokens=completion_chunk_tokens,
619
619
  )
@@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream(
920
920
  chat_completion_response = ChatCompletionResponse(
921
921
  id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
922
922
  choices=[],
923
- created=dummy_message.created_at,
923
+ created=int(dummy_message.created_at.timestamp()),
924
924
  model=chat_completion_request.model,
925
925
  usage=UsageStatistics(
926
926
  prompt_tokens=prompt_tokens,
@@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream(
954
954
  message_type = stream_interface.process_chunk(
955
955
  chat_completion_chunk,
956
956
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
957
- message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
957
+ message_date=(
958
+ timestamp_to_datetime(chat_completion_response.created)
959
+ if create_message_datetime
960
+ else timestamp_to_datetime(chat_completion_chunk.created)
961
+ ),
958
962
  # if extended_thinking is on, then reasoning_content will be flowing as chunks
959
963
  # TODO handle emitting redacted reasoning content (e.g. as concat?)
960
964
  expect_reasoning_content=extended_thinking,
@@ -22,7 +22,7 @@ from letta.errors import (
22
22
  LLMServerError,
23
23
  LLMUnprocessableEntityError,
24
24
  )
25
- from letta.helpers.datetime_helpers import get_utc_time
25
+ from letta.helpers.datetime_helpers import get_utc_time_int
26
26
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -403,7 +403,7 @@ class AnthropicClient(LLMClientBase):
403
403
  chat_completion_response = ChatCompletionResponse(
404
404
  id=response.id,
405
405
  choices=[choice],
406
- created=get_utc_time(),
406
+ created=get_utc_time_int(),
407
407
  model=response.model,
408
408
  usage=UsageStatistics(
409
409
  prompt_tokens=prompt_tokens,
letta/llm_api/cohere.py CHANGED
@@ -4,7 +4,7 @@ from typing import List, Optional, Union
4
4
 
5
5
  import requests
6
6
 
7
- from letta.helpers.datetime_helpers import get_utc_time
7
+ from letta.helpers.datetime_helpers import get_utc_time_int
8
8
  from letta.helpers.json_helpers import json_dumps
9
9
  from letta.local_llm.utils import count_tokens
10
10
  from letta.schemas.message import Message
@@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion(
207
207
  return ChatCompletionResponse(
208
208
  id=response_json["response_id"],
209
209
  choices=[choice],
210
- created=get_utc_time(),
210
+ created=get_utc_time_int(),
211
211
  model=model,
212
212
  usage=UsageStatistics(
213
213
  prompt_tokens=prompt_tokens,
@@ -6,7 +6,7 @@ import requests
6
6
  from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
7
7
 
8
8
  from letta.constants import NON_USER_MSG_PREFIX
9
- from letta.helpers.datetime_helpers import get_utc_time
9
+ from letta.helpers.datetime_helpers import get_utc_time_int
10
10
  from letta.helpers.json_helpers import json_dumps
11
11
  from letta.llm_api.helpers import make_post_request
12
12
  from letta.llm_api.llm_client_base import LLMClientBase
@@ -260,7 +260,7 @@ class GoogleAIClient(LLMClientBase):
260
260
  id=response_id,
261
261
  choices=choices,
262
262
  model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
263
- created=get_utc_time(),
263
+ created=get_utc_time_int(),
264
264
  usage=usage,
265
265
  )
266
266
  except KeyError as e:
@@ -4,7 +4,7 @@ from typing import List, Optional
4
4
  from google import genai
5
5
  from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
6
6
 
7
- from letta.helpers.datetime_helpers import get_utc_time
7
+ from letta.helpers.datetime_helpers import get_utc_time_int
8
8
  from letta.helpers.json_helpers import json_dumps
9
9
  from letta.llm_api.google_ai_client import GoogleAIClient
10
10
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
@@ -234,7 +234,7 @@ class GoogleVertexClient(GoogleAIClient):
234
234
  id=response_id,
235
235
  choices=choices,
236
236
  model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
237
- created=get_utc_time(),
237
+ created=get_utc_time_int(),
238
238
  usage=usage,
239
239
  )
240
240
  except KeyError as e:
letta/llm_api/openai.py CHANGED
@@ -4,7 +4,9 @@ from typing import Generator, List, Optional, Union
4
4
  import requests
5
5
  from openai import OpenAI
6
6
 
7
+ from letta.helpers.datetime_helpers import timestamp_to_datetime
7
8
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
9
+ from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
8
10
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
9
11
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
10
12
  from letta.log import get_logger
@@ -135,7 +137,7 @@ def build_openai_chat_completions_request(
135
137
  tool_choice=tool_choice,
136
138
  user=str(user_id),
137
139
  max_completion_tokens=llm_config.max_tokens,
138
- temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
140
+ temperature=llm_config.temperature if supports_temperature_param(model) else None,
139
141
  reasoning_effort=llm_config.reasoning_effort,
140
142
  )
141
143
  else:
@@ -237,7 +239,7 @@ def openai_chat_completions_process_stream(
237
239
  chat_completion_response = ChatCompletionResponse(
238
240
  id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
239
241
  choices=[],
240
- created=dummy_message.created_at, # NOTE: doesn't matter since both will do get_utc_time()
242
+ created=int(dummy_message.created_at.timestamp()), # NOTE: doesn't matter since both will do get_utc_time()
241
243
  model=chat_completion_request.model,
242
244
  usage=UsageStatistics(
243
245
  completion_tokens=0,
@@ -274,7 +276,11 @@ def openai_chat_completions_process_stream(
274
276
  message_type = stream_interface.process_chunk(
275
277
  chat_completion_chunk,
276
278
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
277
- message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
279
+ message_date=(
280
+ timestamp_to_datetime(chat_completion_response.created)
281
+ if create_message_datetime
282
+ else timestamp_to_datetime(chat_completion_chunk.created)
283
+ ),
278
284
  expect_reasoning_content=expect_reasoning_content,
279
285
  name=name,
280
286
  message_index=message_idx,
@@ -489,6 +495,7 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
489
495
  # except ValueError as e:
490
496
  # warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
491
497
 
492
- if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
498
+ if not supports_parallel_tool_calling(chat_completion_request.model):
493
499
  data.pop("parallel_tool_calls", None)
500
+
494
501
  return data
@@ -34,6 +34,33 @@ from letta.settings import model_settings
34
34
  logger = get_logger(__name__)
35
35
 
36
36
 
37
+ def is_openai_reasoning_model(model: str) -> bool:
38
+ """Utility function to check if the model is a 'reasoner'"""
39
+
40
+ # NOTE: needs to be updated with new model releases
41
+ return model.startswith("o1") or model.startswith("o3")
42
+
43
+
44
+ def supports_temperature_param(model: str) -> bool:
45
+ """Certain OpenAI models don't support configuring the temperature.
46
+
47
+ Example error: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_parameter'}}
48
+ """
49
+ if is_openai_reasoning_model(model):
50
+ return False
51
+ else:
52
+ return True
53
+
54
+
55
+ def supports_parallel_tool_calling(model: str) -> bool:
56
+ """Certain OpenAI models don't support parallel tool calls."""
57
+
58
+ if is_openai_reasoning_model(model):
59
+ return False
60
+ else:
61
+ return True
62
+
63
+
37
64
  class OpenAIClient(LLMClientBase):
38
65
  def _prepare_client_kwargs(self) -> dict:
39
66
  api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -66,7 +93,8 @@ class OpenAIClient(LLMClientBase):
66
93
  put_inner_thoughts_first=True,
67
94
  )
68
95
 
69
- use_developer_message = llm_config.model.startswith("o1") or llm_config.model.startswith("o3") # o-series models
96
+ use_developer_message = is_openai_reasoning_model(llm_config.model)
97
+
70
98
  openai_message_list = [
71
99
  cast_message_to_subtype(
72
100
  m.to_openai_dict(
@@ -103,7 +131,7 @@ class OpenAIClient(LLMClientBase):
103
131
  tool_choice=tool_choice,
104
132
  user=str(),
105
133
  max_completion_tokens=llm_config.max_tokens,
106
- temperature=llm_config.temperature,
134
+ temperature=llm_config.temperature if supports_temperature_param(model) else None,
107
135
  )
108
136
 
109
137
  if "inference.memgpt.ai" in llm_config.model_endpoint:
@@ -160,6 +188,10 @@ class OpenAIClient(LLMClientBase):
160
188
  response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
161
189
  )
162
190
 
191
+ # If we used a reasoning model, create a content part for the ommitted reasoning
192
+ if is_openai_reasoning_model(self.llm_config.model):
193
+ chat_completion_response.choices[0].message.ommitted_reasoning_content = True
194
+
163
195
  return chat_completion_response
164
196
 
165
197
  def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
@@ -6,7 +6,7 @@ import requests
6
6
 
7
7
  from letta.constants import CLI_WARNING_PREFIX
8
8
  from letta.errors import LocalLLMConnectionError, LocalLLMError
9
- from letta.helpers.datetime_helpers import get_utc_time
9
+ from letta.helpers.datetime_helpers import get_utc_time_int
10
10
  from letta.helpers.json_helpers import json_dumps
11
11
  from letta.local_llm.constants import DEFAULT_WRAPPER
12
12
  from letta.local_llm.function_parser import patch_function
@@ -241,7 +241,7 @@ def get_chat_completion(
241
241
  ),
242
242
  )
243
243
  ],
244
- created=get_utc_time(),
244
+ created=get_utc_time_int(),
245
245
  model=model,
246
246
  # "This fingerprint represents the backend configuration that the model runs with."
247
247
  # system_fingerprint=user if user is not None else "null",
letta/orm/agent.py CHANGED
@@ -5,7 +5,7 @@ from sqlalchemy import JSON, Boolean, Index, String
5
5
  from sqlalchemy.orm import Mapped, mapped_column, relationship
6
6
 
7
7
  from letta.orm.block import Block
8
- from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
8
+ from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ResponseFormatColumn, ToolRulesColumn
9
9
  from letta.orm.identity import Identity
10
10
  from letta.orm.mixins import OrganizationMixin
11
11
  from letta.orm.organization import Organization
@@ -15,6 +15,7 @@ from letta.schemas.agent import AgentType, get_prompt_template_for_agent_type
15
15
  from letta.schemas.embedding_config import EmbeddingConfig
16
16
  from letta.schemas.llm_config import LLMConfig
17
17
  from letta.schemas.memory import Memory
18
+ from letta.schemas.response_format import ResponseFormatUnion
18
19
  from letta.schemas.tool_rule import ToolRule
19
20
 
20
21
  if TYPE_CHECKING:
@@ -48,6 +49,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
48
49
  # This is dangerously flexible with the JSON type
49
50
  message_ids: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True, doc="List of message IDs in in-context memory.")
50
51
 
52
+ # Response Format
53
+ response_format: Mapped[Optional[ResponseFormatUnion]] = mapped_column(
54
+ ResponseFormatColumn, nullable=True, doc="The response format for the agent."
55
+ )
56
+
51
57
  # Metadata and configs
52
58
  metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the agent.")
53
59
  llm_config: Mapped[Optional[LLMConfig]] = mapped_column(
@@ -168,6 +174,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
168
174
  "multi_agent_group": None,
169
175
  "tool_exec_environment_variables": [],
170
176
  "enable_sleeptime": None,
177
+ "response_format": self.response_format,
171
178
  }
172
179
 
173
180
  # Optional fields: only included if requested
@@ -9,6 +9,7 @@ from letta.helpers.converters import (
9
9
  deserialize_llm_config,
10
10
  deserialize_message_content,
11
11
  deserialize_poll_batch_response,
12
+ deserialize_response_format,
12
13
  deserialize_tool_calls,
13
14
  deserialize_tool_returns,
14
15
  deserialize_tool_rules,
@@ -20,6 +21,7 @@ from letta.helpers.converters import (
20
21
  serialize_llm_config,
21
22
  serialize_message_content,
22
23
  serialize_poll_batch_response,
24
+ serialize_response_format,
23
25
  serialize_tool_calls,
24
26
  serialize_tool_returns,
25
27
  serialize_tool_rules,
@@ -168,3 +170,16 @@ class AgentStepStateColumn(TypeDecorator):
168
170
 
169
171
  def process_result_value(self, value, dialect):
170
172
  return deserialize_agent_step_state(value)
173
+
174
+
175
+ class ResponseFormatColumn(TypeDecorator):
176
+ """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
177
+
178
+ impl = JSON
179
+ cache_ok = True
180
+
181
+ def process_bind_param(self, value, dialect):
182
+ return serialize_response_format(value)
183
+
184
+ def process_result_value(self, value, dialect):
185
+ return deserialize_response_format(value)
letta/schemas/agent.py CHANGED
@@ -14,6 +14,7 @@ from letta.schemas.llm_config import LLMConfig
14
14
  from letta.schemas.memory import Memory
15
15
  from letta.schemas.message import Message, MessageCreate
16
16
  from letta.schemas.openai.chat_completion_response import UsageStatistics
17
+ from letta.schemas.response_format import ResponseFormatUnion
17
18
  from letta.schemas.source import Source
18
19
  from letta.schemas.tool import Tool
19
20
  from letta.schemas.tool_rule import ToolRule
@@ -66,6 +67,9 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
66
67
  # llm information
67
68
  llm_config: LLMConfig = Field(..., description="The LLM configuration used by the agent.")
68
69
  embedding_config: EmbeddingConfig = Field(..., description="The embedding configuration used by the agent.")
70
+ response_format: Optional[ResponseFormatUnion] = Field(
71
+ None, description="The response format used by the agent when returning from `send_message`."
72
+ )
69
73
 
70
74
  # This is an object representing the in-process state of a running `Agent`
71
75
  # Field in this object can be theoretically edited by tools, and will be persisted by the ORM
@@ -180,6 +184,7 @@ class CreateAgent(BaseModel, validate_assignment=True): #
180
184
  description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
181
185
  )
182
186
  enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
187
+ response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
183
188
 
184
189
  @field_validator("name")
185
190
  @classmethod
@@ -259,6 +264,7 @@ class UpdateAgent(BaseModel):
259
264
  None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
260
265
  )
261
266
  enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
267
+ response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
262
268
 
263
269
  class Config:
264
270
  extra = "ignore" # Ignores extra fields
@@ -145,7 +145,8 @@ class OmittedReasoningContent(MessageContent):
145
145
  type: Literal[MessageContentType.omitted_reasoning] = Field(
146
146
  MessageContentType.omitted_reasoning, description="Indicates this is an omitted reasoning step."
147
147
  )
148
- tokens: int = Field(..., description="The reasoning token count for intermediate reasoning content.")
148
+ # NOTE: dropping because we don't track this kind of information for the other reasoning types
149
+ # tokens: int = Field(..., description="The reasoning token count for intermediate reasoning content.")
149
150
 
150
151
 
151
152
  LettaMessageContentUnion = Annotated[
@@ -81,8 +81,11 @@ class LLMConfig(BaseModel):
81
81
  @model_validator(mode="before")
82
82
  @classmethod
83
83
  def set_default_enable_reasoner(cls, values):
84
- if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
85
- values["enable_reasoner"] = True
84
+ # NOTE: this is really only applicable for models that can toggle reasoning on-and-off, like 3.7
85
+ # We can also use this field to identify if a model is a "reasoning" model (o1/o3, etc.) if we want
86
+ # if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
87
+ # values["enable_reasoner"] = True
88
+ # values["put_inner_thoughts_in_kwargs"] = False
86
89
  return values
87
90
 
88
91
  @model_validator(mode="before")
@@ -100,6 +103,13 @@ class LLMConfig(BaseModel):
100
103
  if values.get("put_inner_thoughts_in_kwargs") is None:
101
104
  values["put_inner_thoughts_in_kwargs"] = False if model in avoid_put_inner_thoughts_in_kwargs else True
102
105
 
106
+ # For the o1/o3 series from OpenAI, set to False by default
107
+ # We can set this flag to `true` if desired, which will enable "double-think"
108
+ from letta.llm_api.openai_client import is_openai_reasoning_model
109
+
110
+ if is_openai_reasoning_model(model):
111
+ values["put_inner_thoughts_in_kwargs"] = False
112
+
103
113
  return values
104
114
 
105
115
  @model_validator(mode="after")
letta/schemas/message.py CHANGED
@@ -31,6 +31,7 @@ from letta.schemas.letta_message import (
31
31
  )
32
32
  from letta.schemas.letta_message_content import (
33
33
  LettaMessageContentUnion,
34
+ OmittedReasoningContent,
34
35
  ReasoningContent,
35
36
  RedactedReasoningContent,
36
37
  TextContent,
@@ -82,6 +83,7 @@ class MessageCreate(BaseModel):
82
83
  name: Optional[str] = Field(None, description="The name of the participant.")
83
84
  otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
84
85
  sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
86
+ group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
85
87
 
86
88
  def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
87
89
  data = super().model_dump(**kwargs)
@@ -294,6 +296,18 @@ class Message(BaseMessage):
294
296
  sender_id=self.sender_id,
295
297
  )
296
298
  )
299
+ elif isinstance(content_part, OmittedReasoningContent):
300
+ # Special case for "hidden reasoning" models like o1/o3
301
+ # NOTE: we also have to think about how to return this during streaming
302
+ messages.append(
303
+ HiddenReasoningMessage(
304
+ id=self.id,
305
+ date=self.created_at,
306
+ state="omitted",
307
+ name=self.name,
308
+ otid=otid,
309
+ )
310
+ )
297
311
  else:
298
312
  warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
299
313
 
@@ -463,6 +477,10 @@ class Message(BaseMessage):
463
477
  data=openai_message_dict["redacted_reasoning_content"] if "redacted_reasoning_content" in openai_message_dict else None,
464
478
  ),
465
479
  )
480
+ if "omitted_reasoning_content" in openai_message_dict and openai_message_dict["omitted_reasoning_content"]:
481
+ content.append(
482
+ OmittedReasoningContent(),
483
+ )
466
484
 
467
485
  # If we're going from deprecated function form
468
486
  if openai_message_dict["role"] == "function":