letta-nightly 0.7.0.dev20250423003112__py3-none-any.whl → 0.7.2.dev20250423222439__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +113 -81
- letta/agents/letta_agent.py +2 -2
- letta/agents/letta_agent_batch.py +38 -34
- letta/client/client.py +10 -2
- letta/constants.py +4 -3
- letta/functions/function_sets/multi_agent.py +1 -3
- letta/functions/helpers.py +3 -3
- letta/groups/dynamic_multi_agent.py +58 -59
- letta/groups/round_robin_multi_agent.py +43 -49
- letta/groups/sleeptime_multi_agent.py +28 -18
- letta/groups/supervisor_multi_agent.py +21 -20
- letta/helpers/composio_helpers.py +1 -1
- letta/helpers/converters.py +29 -0
- letta/helpers/datetime_helpers.py +9 -0
- letta/helpers/message_helper.py +1 -0
- letta/helpers/tool_execution_helper.py +3 -3
- letta/jobs/llm_batch_job_polling.py +2 -1
- letta/llm_api/anthropic.py +10 -6
- letta/llm_api/anthropic_client.py +2 -2
- letta/llm_api/cohere.py +2 -2
- letta/llm_api/google_ai_client.py +2 -2
- letta/llm_api/google_vertex_client.py +2 -2
- letta/llm_api/openai.py +11 -4
- letta/llm_api/openai_client.py +34 -2
- letta/local_llm/chat_completion_proxy.py +2 -2
- letta/orm/agent.py +8 -1
- letta/orm/custom_columns.py +15 -0
- letta/schemas/agent.py +6 -0
- letta/schemas/letta_message_content.py +2 -1
- letta/schemas/llm_config.py +12 -2
- letta/schemas/message.py +18 -0
- letta/schemas/openai/chat_completion_response.py +52 -3
- letta/schemas/response_format.py +78 -0
- letta/schemas/tool_execution_result.py +14 -0
- letta/server/rest_api/chat_completions_interface.py +2 -2
- letta/server/rest_api/interface.py +3 -2
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +1 -1
- letta/server/rest_api/routers/v1/agents.py +4 -4
- letta/server/rest_api/routers/v1/groups.py +2 -2
- letta/server/rest_api/routers/v1/messages.py +41 -19
- letta/server/server.py +24 -57
- letta/services/agent_manager.py +6 -1
- letta/services/llm_batch_manager.py +28 -26
- letta/services/tool_executor/tool_execution_manager.py +37 -28
- letta/services/tool_executor/tool_execution_sandbox.py +35 -16
- letta/services/tool_executor/tool_executor.py +299 -68
- letta/services/tool_sandbox/base.py +3 -2
- letta/services/tool_sandbox/e2b_sandbox.py +5 -4
- letta/services/tool_sandbox/local_sandbox.py +11 -6
- {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/METADATA +1 -1
- {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/RECORD +55 -53
- {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,7 @@ from letta.interface import AgentInterface
|
|
9
9
|
from letta.orm import User
|
10
10
|
from letta.orm.enums import ToolType
|
11
11
|
from letta.schemas.letta_message_content import TextContent
|
12
|
-
from letta.schemas.message import
|
12
|
+
from letta.schemas.message import MessageCreate
|
13
13
|
from letta.schemas.tool import Tool
|
14
14
|
from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule
|
15
15
|
from letta.schemas.usage import LettaUsageStatistics
|
@@ -37,17 +37,18 @@ class SupervisorMultiAgent(Agent):
|
|
37
37
|
|
38
38
|
def step(
|
39
39
|
self,
|
40
|
-
|
40
|
+
input_messages: List[MessageCreate],
|
41
41
|
chaining: bool = True,
|
42
42
|
max_chaining_steps: Optional[int] = None,
|
43
43
|
put_inner_thoughts_first: bool = True,
|
44
44
|
assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
|
45
45
|
**kwargs,
|
46
46
|
) -> LettaUsageStatistics:
|
47
|
+
# Load settings
|
47
48
|
token_streaming = self.interface.streaming_mode if hasattr(self.interface, "streaming_mode") else False
|
48
49
|
metadata = self.interface.metadata if hasattr(self.interface, "metadata") else None
|
49
50
|
|
50
|
-
#
|
51
|
+
# Prepare supervisor agent
|
51
52
|
if self.tool_manager.get_tool_by_name(tool_name="send_message_to_all_agents_in_group", actor=self.user) is None:
|
52
53
|
multi_agent_tool = Tool(
|
53
54
|
name=send_message_to_all_agents_in_group.__name__,
|
@@ -64,7 +65,6 @@ class SupervisorMultiAgent(Agent):
|
|
64
65
|
)
|
65
66
|
self.agent_state = self.agent_manager.attach_tool(agent_id=self.agent_state.id, tool_id=multi_agent_tool.id, actor=self.user)
|
66
67
|
|
67
|
-
# override tool rules
|
68
68
|
old_tool_rules = self.agent_state.tool_rules
|
69
69
|
self.agent_state.tool_rules = [
|
70
70
|
InitToolRule(
|
@@ -79,24 +79,25 @@ class SupervisorMultiAgent(Agent):
|
|
79
79
|
),
|
80
80
|
]
|
81
81
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
content=[TextContent(text=message.content)]
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
tool_call_id=None,
|
91
|
-
group_id=self.group_id,
|
92
|
-
otid=message.otid,
|
93
|
-
)
|
94
|
-
for message in messages
|
95
|
-
]
|
82
|
+
# Prepare new messages
|
83
|
+
new_messages = []
|
84
|
+
for message in input_messages:
|
85
|
+
if isinstance(message.content, str):
|
86
|
+
message.content = [TextContent(text=message.content)]
|
87
|
+
message.group_id = self.group_id
|
88
|
+
new_messages.append(message)
|
89
|
+
|
96
90
|
try:
|
97
|
-
|
91
|
+
# Load supervisor agent
|
92
|
+
supervisor_agent = Agent(
|
93
|
+
agent_state=self.agent_state,
|
94
|
+
interface=self.interface,
|
95
|
+
user=self.user,
|
96
|
+
)
|
97
|
+
|
98
|
+
# Perform supervisor step
|
98
99
|
usage_stats = supervisor_agent.step(
|
99
|
-
|
100
|
+
input_messages=new_messages,
|
100
101
|
chaining=chaining,
|
101
102
|
max_chaining_steps=max_chaining_steps,
|
102
103
|
stream=token_streaming,
|
@@ -10,7 +10,7 @@ def get_composio_api_key(actor: User, logger: Optional[Logger] = None) -> Option
|
|
10
10
|
api_keys = SandboxConfigManager().list_sandbox_env_vars_by_key(key="COMPOSIO_API_KEY", actor=actor)
|
11
11
|
if not api_keys:
|
12
12
|
if logger:
|
13
|
-
logger.
|
13
|
+
logger.debug(f"No API keys found for Composio. Defaulting to the environment variable...")
|
14
14
|
if tool_settings.composio_api_key:
|
15
15
|
return tool_settings.composio_api_key
|
16
16
|
else:
|
letta/helpers/converters.py
CHANGED
@@ -22,6 +22,13 @@ from letta.schemas.letta_message_content import (
|
|
22
22
|
)
|
23
23
|
from letta.schemas.llm_config import LLMConfig
|
24
24
|
from letta.schemas.message import ToolReturn
|
25
|
+
from letta.schemas.response_format import (
|
26
|
+
JsonObjectResponseFormat,
|
27
|
+
JsonSchemaResponseFormat,
|
28
|
+
ResponseFormatType,
|
29
|
+
ResponseFormatUnion,
|
30
|
+
TextResponseFormat,
|
31
|
+
)
|
25
32
|
from letta.schemas.tool_rule import (
|
26
33
|
ChildToolRule,
|
27
34
|
ConditionalToolRule,
|
@@ -371,3 +378,25 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
|
|
371
378
|
return None
|
372
379
|
|
373
380
|
return AgentStepState(**data)
|
381
|
+
|
382
|
+
|
383
|
+
# --------------------------
|
384
|
+
# Response Format Serialization
|
385
|
+
# --------------------------
|
386
|
+
|
387
|
+
|
388
|
+
def serialize_response_format(response_format: Optional[ResponseFormatUnion]) -> Optional[Dict[str, Any]]:
|
389
|
+
if not response_format:
|
390
|
+
return None
|
391
|
+
return response_format.model_dump(mode="json")
|
392
|
+
|
393
|
+
|
394
|
+
def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormatUnion]:
|
395
|
+
if not data:
|
396
|
+
return None
|
397
|
+
if data["type"] == ResponseFormatType.text:
|
398
|
+
return TextResponseFormat(**data)
|
399
|
+
if data["type"] == ResponseFormatType.json_schema:
|
400
|
+
return JsonSchemaResponseFormat(**data)
|
401
|
+
if data["type"] == ResponseFormatType.json_object:
|
402
|
+
return JsonObjectResponseFormat(**data)
|
@@ -66,6 +66,15 @@ def get_utc_time() -> datetime:
|
|
66
66
|
return datetime.now(timezone.utc)
|
67
67
|
|
68
68
|
|
69
|
+
def get_utc_time_int() -> int:
|
70
|
+
return int(get_utc_time().timestamp())
|
71
|
+
|
72
|
+
|
73
|
+
def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
|
74
|
+
"""Convert Unix timestamp in seconds to UTC datetime object"""
|
75
|
+
return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
|
76
|
+
|
77
|
+
|
69
78
|
def format_datetime(dt):
|
70
79
|
return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")
|
71
80
|
|
letta/helpers/message_helper.py
CHANGED
@@ -160,12 +160,12 @@ def execute_external_tool(
|
|
160
160
|
else:
|
161
161
|
agent_state_copy = None
|
162
162
|
|
163
|
-
|
164
|
-
function_response, updated_agent_state =
|
163
|
+
tool_execution_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
|
164
|
+
function_response, updated_agent_state = tool_execution_result.func_return, tool_execution_result.agent_state
|
165
165
|
# TODO: Bring this back
|
166
166
|
# if allow_agent_state_modifications and updated_agent_state is not None:
|
167
167
|
# self.update_memory_if_changed(updated_agent_state.memory)
|
168
|
-
return function_response,
|
168
|
+
return function_response, tool_execution_result
|
169
169
|
except Exception as e:
|
170
170
|
# Need to catch error here, or else trunction wont happen
|
171
171
|
# TODO: modify to function execution error
|
@@ -73,7 +73,8 @@ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: st
|
|
73
73
|
"""
|
74
74
|
updates = []
|
75
75
|
try:
|
76
|
-
|
76
|
+
results = await server.anthropic_async_client.beta.messages.batches.results(batch_resp_id)
|
77
|
+
async for item_result in results:
|
77
78
|
# Here, custom_id should be the agent_id
|
78
79
|
item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
|
79
80
|
updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))
|
letta/llm_api/anthropic.py
CHANGED
@@ -20,7 +20,7 @@ from anthropic.types.beta import (
|
|
20
20
|
)
|
21
21
|
|
22
22
|
from letta.errors import BedrockError, BedrockPermissionError
|
23
|
-
from letta.helpers.datetime_helpers import
|
23
|
+
from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
|
24
24
|
from letta.llm_api.aws_bedrock import get_bedrock_client
|
25
25
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
@@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion(
|
|
396
396
|
return ChatCompletionResponse(
|
397
397
|
id=response.id,
|
398
398
|
choices=[choice],
|
399
|
-
created=
|
399
|
+
created=get_utc_time_int(),
|
400
400
|
model=response.model,
|
401
401
|
usage=UsageStatistics(
|
402
402
|
prompt_tokens=prompt_tokens,
|
@@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
451
451
|
'logprobs': None
|
452
452
|
}
|
453
453
|
],
|
454
|
-
'created':
|
454
|
+
'created': 1713216662,
|
455
455
|
'model': 'gpt-4o-mini-2024-07-18',
|
456
456
|
'system_fingerprint': 'fp_bd83329f63',
|
457
457
|
'object': 'chat.completion.chunk'
|
@@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
613
613
|
return ChatCompletionChunkResponse(
|
614
614
|
id=message_id,
|
615
615
|
choices=[choice],
|
616
|
-
created=
|
616
|
+
created=get_utc_time_int(),
|
617
617
|
model=model,
|
618
618
|
output_tokens=completion_chunk_tokens,
|
619
619
|
)
|
@@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream(
|
|
920
920
|
chat_completion_response = ChatCompletionResponse(
|
921
921
|
id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
|
922
922
|
choices=[],
|
923
|
-
created=dummy_message.created_at,
|
923
|
+
created=int(dummy_message.created_at.timestamp()),
|
924
924
|
model=chat_completion_request.model,
|
925
925
|
usage=UsageStatistics(
|
926
926
|
prompt_tokens=prompt_tokens,
|
@@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream(
|
|
954
954
|
message_type = stream_interface.process_chunk(
|
955
955
|
chat_completion_chunk,
|
956
956
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
957
|
-
message_date=
|
957
|
+
message_date=(
|
958
|
+
timestamp_to_datetime(chat_completion_response.created)
|
959
|
+
if create_message_datetime
|
960
|
+
else timestamp_to_datetime(chat_completion_chunk.created)
|
961
|
+
),
|
958
962
|
# if extended_thinking is on, then reasoning_content will be flowing as chunks
|
959
963
|
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
960
964
|
expect_reasoning_content=extended_thinking,
|
@@ -22,7 +22,7 @@ from letta.errors import (
|
|
22
22
|
LLMServerError,
|
23
23
|
LLMUnprocessableEntityError,
|
24
24
|
)
|
25
|
-
from letta.helpers.datetime_helpers import
|
25
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
26
26
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
@@ -403,7 +403,7 @@ class AnthropicClient(LLMClientBase):
|
|
403
403
|
chat_completion_response = ChatCompletionResponse(
|
404
404
|
id=response.id,
|
405
405
|
choices=[choice],
|
406
|
-
created=
|
406
|
+
created=get_utc_time_int(),
|
407
407
|
model=response.model,
|
408
408
|
usage=UsageStatistics(
|
409
409
|
prompt_tokens=prompt_tokens,
|
letta/llm_api/cohere.py
CHANGED
@@ -4,7 +4,7 @@ from typing import List, Optional, Union
|
|
4
4
|
|
5
5
|
import requests
|
6
6
|
|
7
|
-
from letta.helpers.datetime_helpers import
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
8
|
from letta.helpers.json_helpers import json_dumps
|
9
9
|
from letta.local_llm.utils import count_tokens
|
10
10
|
from letta.schemas.message import Message
|
@@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion(
|
|
207
207
|
return ChatCompletionResponse(
|
208
208
|
id=response_json["response_id"],
|
209
209
|
choices=[choice],
|
210
|
-
created=
|
210
|
+
created=get_utc_time_int(),
|
211
211
|
model=model,
|
212
212
|
usage=UsageStatistics(
|
213
213
|
prompt_tokens=prompt_tokens,
|
@@ -6,7 +6,7 @@ import requests
|
|
6
6
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
7
7
|
|
8
8
|
from letta.constants import NON_USER_MSG_PREFIX
|
9
|
-
from letta.helpers.datetime_helpers import
|
9
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
10
10
|
from letta.helpers.json_helpers import json_dumps
|
11
11
|
from letta.llm_api.helpers import make_post_request
|
12
12
|
from letta.llm_api.llm_client_base import LLMClientBase
|
@@ -260,7 +260,7 @@ class GoogleAIClient(LLMClientBase):
|
|
260
260
|
id=response_id,
|
261
261
|
choices=choices,
|
262
262
|
model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
263
|
-
created=
|
263
|
+
created=get_utc_time_int(),
|
264
264
|
usage=usage,
|
265
265
|
)
|
266
266
|
except KeyError as e:
|
@@ -4,7 +4,7 @@ from typing import List, Optional
|
|
4
4
|
from google import genai
|
5
5
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
|
6
6
|
|
7
|
-
from letta.helpers.datetime_helpers import
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
8
|
from letta.helpers.json_helpers import json_dumps
|
9
9
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
10
10
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
@@ -234,7 +234,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
234
234
|
id=response_id,
|
235
235
|
choices=choices,
|
236
236
|
model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
237
|
-
created=
|
237
|
+
created=get_utc_time_int(),
|
238
238
|
usage=usage,
|
239
239
|
)
|
240
240
|
except KeyError as e:
|
letta/llm_api/openai.py
CHANGED
@@ -4,7 +4,9 @@ from typing import Generator, List, Optional, Union
|
|
4
4
|
import requests
|
5
5
|
from openai import OpenAI
|
6
6
|
|
7
|
+
from letta.helpers.datetime_helpers import timestamp_to_datetime
|
7
8
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
|
9
|
+
from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
|
8
10
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
9
11
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
10
12
|
from letta.log import get_logger
|
@@ -135,7 +137,7 @@ def build_openai_chat_completions_request(
|
|
135
137
|
tool_choice=tool_choice,
|
136
138
|
user=str(user_id),
|
137
139
|
max_completion_tokens=llm_config.max_tokens,
|
138
|
-
temperature=
|
140
|
+
temperature=llm_config.temperature if supports_temperature_param(model) else None,
|
139
141
|
reasoning_effort=llm_config.reasoning_effort,
|
140
142
|
)
|
141
143
|
else:
|
@@ -237,7 +239,7 @@ def openai_chat_completions_process_stream(
|
|
237
239
|
chat_completion_response = ChatCompletionResponse(
|
238
240
|
id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
|
239
241
|
choices=[],
|
240
|
-
created=dummy_message.created_at, # NOTE: doesn't matter since both will do get_utc_time()
|
242
|
+
created=int(dummy_message.created_at.timestamp()), # NOTE: doesn't matter since both will do get_utc_time()
|
241
243
|
model=chat_completion_request.model,
|
242
244
|
usage=UsageStatistics(
|
243
245
|
completion_tokens=0,
|
@@ -274,7 +276,11 @@ def openai_chat_completions_process_stream(
|
|
274
276
|
message_type = stream_interface.process_chunk(
|
275
277
|
chat_completion_chunk,
|
276
278
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
277
|
-
message_date=
|
279
|
+
message_date=(
|
280
|
+
timestamp_to_datetime(chat_completion_response.created)
|
281
|
+
if create_message_datetime
|
282
|
+
else timestamp_to_datetime(chat_completion_chunk.created)
|
283
|
+
),
|
278
284
|
expect_reasoning_content=expect_reasoning_content,
|
279
285
|
name=name,
|
280
286
|
message_index=message_idx,
|
@@ -489,6 +495,7 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
|
|
489
495
|
# except ValueError as e:
|
490
496
|
# warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
491
497
|
|
492
|
-
if
|
498
|
+
if not supports_parallel_tool_calling(chat_completion_request.model):
|
493
499
|
data.pop("parallel_tool_calls", None)
|
500
|
+
|
494
501
|
return data
|
letta/llm_api/openai_client.py
CHANGED
@@ -34,6 +34,33 @@ from letta.settings import model_settings
|
|
34
34
|
logger = get_logger(__name__)
|
35
35
|
|
36
36
|
|
37
|
+
def is_openai_reasoning_model(model: str) -> bool:
|
38
|
+
"""Utility function to check if the model is a 'reasoner'"""
|
39
|
+
|
40
|
+
# NOTE: needs to be updated with new model releases
|
41
|
+
return model.startswith("o1") or model.startswith("o3")
|
42
|
+
|
43
|
+
|
44
|
+
def supports_temperature_param(model: str) -> bool:
|
45
|
+
"""Certain OpenAI models don't support configuring the temperature.
|
46
|
+
|
47
|
+
Example error: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_parameter'}}
|
48
|
+
"""
|
49
|
+
if is_openai_reasoning_model(model):
|
50
|
+
return False
|
51
|
+
else:
|
52
|
+
return True
|
53
|
+
|
54
|
+
|
55
|
+
def supports_parallel_tool_calling(model: str) -> bool:
|
56
|
+
"""Certain OpenAI models don't support parallel tool calls."""
|
57
|
+
|
58
|
+
if is_openai_reasoning_model(model):
|
59
|
+
return False
|
60
|
+
else:
|
61
|
+
return True
|
62
|
+
|
63
|
+
|
37
64
|
class OpenAIClient(LLMClientBase):
|
38
65
|
def _prepare_client_kwargs(self) -> dict:
|
39
66
|
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
@@ -66,7 +93,8 @@ class OpenAIClient(LLMClientBase):
|
|
66
93
|
put_inner_thoughts_first=True,
|
67
94
|
)
|
68
95
|
|
69
|
-
use_developer_message =
|
96
|
+
use_developer_message = is_openai_reasoning_model(llm_config.model)
|
97
|
+
|
70
98
|
openai_message_list = [
|
71
99
|
cast_message_to_subtype(
|
72
100
|
m.to_openai_dict(
|
@@ -103,7 +131,7 @@ class OpenAIClient(LLMClientBase):
|
|
103
131
|
tool_choice=tool_choice,
|
104
132
|
user=str(),
|
105
133
|
max_completion_tokens=llm_config.max_tokens,
|
106
|
-
temperature=llm_config.temperature,
|
134
|
+
temperature=llm_config.temperature if supports_temperature_param(model) else None,
|
107
135
|
)
|
108
136
|
|
109
137
|
if "inference.memgpt.ai" in llm_config.model_endpoint:
|
@@ -160,6 +188,10 @@ class OpenAIClient(LLMClientBase):
|
|
160
188
|
response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
|
161
189
|
)
|
162
190
|
|
191
|
+
# If we used a reasoning model, create a content part for the ommitted reasoning
|
192
|
+
if is_openai_reasoning_model(self.llm_config.model):
|
193
|
+
chat_completion_response.choices[0].message.ommitted_reasoning_content = True
|
194
|
+
|
163
195
|
return chat_completion_response
|
164
196
|
|
165
197
|
def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
|
@@ -6,7 +6,7 @@ import requests
|
|
6
6
|
|
7
7
|
from letta.constants import CLI_WARNING_PREFIX
|
8
8
|
from letta.errors import LocalLLMConnectionError, LocalLLMError
|
9
|
-
from letta.helpers.datetime_helpers import
|
9
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
10
10
|
from letta.helpers.json_helpers import json_dumps
|
11
11
|
from letta.local_llm.constants import DEFAULT_WRAPPER
|
12
12
|
from letta.local_llm.function_parser import patch_function
|
@@ -241,7 +241,7 @@ def get_chat_completion(
|
|
241
241
|
),
|
242
242
|
)
|
243
243
|
],
|
244
|
-
created=
|
244
|
+
created=get_utc_time_int(),
|
245
245
|
model=model,
|
246
246
|
# "This fingerprint represents the backend configuration that the model runs with."
|
247
247
|
# system_fingerprint=user if user is not None else "null",
|
letta/orm/agent.py
CHANGED
@@ -5,7 +5,7 @@ from sqlalchemy import JSON, Boolean, Index, String
|
|
5
5
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
6
6
|
|
7
7
|
from letta.orm.block import Block
|
8
|
-
from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
|
8
|
+
from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ResponseFormatColumn, ToolRulesColumn
|
9
9
|
from letta.orm.identity import Identity
|
10
10
|
from letta.orm.mixins import OrganizationMixin
|
11
11
|
from letta.orm.organization import Organization
|
@@ -15,6 +15,7 @@ from letta.schemas.agent import AgentType, get_prompt_template_for_agent_type
|
|
15
15
|
from letta.schemas.embedding_config import EmbeddingConfig
|
16
16
|
from letta.schemas.llm_config import LLMConfig
|
17
17
|
from letta.schemas.memory import Memory
|
18
|
+
from letta.schemas.response_format import ResponseFormatUnion
|
18
19
|
from letta.schemas.tool_rule import ToolRule
|
19
20
|
|
20
21
|
if TYPE_CHECKING:
|
@@ -48,6 +49,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
48
49
|
# This is dangerously flexible with the JSON type
|
49
50
|
message_ids: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True, doc="List of message IDs in in-context memory.")
|
50
51
|
|
52
|
+
# Response Format
|
53
|
+
response_format: Mapped[Optional[ResponseFormatUnion]] = mapped_column(
|
54
|
+
ResponseFormatColumn, nullable=True, doc="The response format for the agent."
|
55
|
+
)
|
56
|
+
|
51
57
|
# Metadata and configs
|
52
58
|
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the agent.")
|
53
59
|
llm_config: Mapped[Optional[LLMConfig]] = mapped_column(
|
@@ -168,6 +174,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
168
174
|
"multi_agent_group": None,
|
169
175
|
"tool_exec_environment_variables": [],
|
170
176
|
"enable_sleeptime": None,
|
177
|
+
"response_format": self.response_format,
|
171
178
|
}
|
172
179
|
|
173
180
|
# Optional fields: only included if requested
|
letta/orm/custom_columns.py
CHANGED
@@ -9,6 +9,7 @@ from letta.helpers.converters import (
|
|
9
9
|
deserialize_llm_config,
|
10
10
|
deserialize_message_content,
|
11
11
|
deserialize_poll_batch_response,
|
12
|
+
deserialize_response_format,
|
12
13
|
deserialize_tool_calls,
|
13
14
|
deserialize_tool_returns,
|
14
15
|
deserialize_tool_rules,
|
@@ -20,6 +21,7 @@ from letta.helpers.converters import (
|
|
20
21
|
serialize_llm_config,
|
21
22
|
serialize_message_content,
|
22
23
|
serialize_poll_batch_response,
|
24
|
+
serialize_response_format,
|
23
25
|
serialize_tool_calls,
|
24
26
|
serialize_tool_returns,
|
25
27
|
serialize_tool_rules,
|
@@ -168,3 +170,16 @@ class AgentStepStateColumn(TypeDecorator):
|
|
168
170
|
|
169
171
|
def process_result_value(self, value, dialect):
|
170
172
|
return deserialize_agent_step_state(value)
|
173
|
+
|
174
|
+
|
175
|
+
class ResponseFormatColumn(TypeDecorator):
|
176
|
+
"""Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
|
177
|
+
|
178
|
+
impl = JSON
|
179
|
+
cache_ok = True
|
180
|
+
|
181
|
+
def process_bind_param(self, value, dialect):
|
182
|
+
return serialize_response_format(value)
|
183
|
+
|
184
|
+
def process_result_value(self, value, dialect):
|
185
|
+
return deserialize_response_format(value)
|
letta/schemas/agent.py
CHANGED
@@ -14,6 +14,7 @@ from letta.schemas.llm_config import LLMConfig
|
|
14
14
|
from letta.schemas.memory import Memory
|
15
15
|
from letta.schemas.message import Message, MessageCreate
|
16
16
|
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
17
|
+
from letta.schemas.response_format import ResponseFormatUnion
|
17
18
|
from letta.schemas.source import Source
|
18
19
|
from letta.schemas.tool import Tool
|
19
20
|
from letta.schemas.tool_rule import ToolRule
|
@@ -66,6 +67,9 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
|
|
66
67
|
# llm information
|
67
68
|
llm_config: LLMConfig = Field(..., description="The LLM configuration used by the agent.")
|
68
69
|
embedding_config: EmbeddingConfig = Field(..., description="The embedding configuration used by the agent.")
|
70
|
+
response_format: Optional[ResponseFormatUnion] = Field(
|
71
|
+
None, description="The response format used by the agent when returning from `send_message`."
|
72
|
+
)
|
69
73
|
|
70
74
|
# This is an object representing the in-process state of a running `Agent`
|
71
75
|
# Field in this object can be theoretically edited by tools, and will be persisted by the ORM
|
@@ -180,6 +184,7 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
|
180
184
|
description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
|
181
185
|
)
|
182
186
|
enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
|
187
|
+
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
|
183
188
|
|
184
189
|
@field_validator("name")
|
185
190
|
@classmethod
|
@@ -259,6 +264,7 @@ class UpdateAgent(BaseModel):
|
|
259
264
|
None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
|
260
265
|
)
|
261
266
|
enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
|
267
|
+
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
|
262
268
|
|
263
269
|
class Config:
|
264
270
|
extra = "ignore" # Ignores extra fields
|
@@ -145,7 +145,8 @@ class OmittedReasoningContent(MessageContent):
|
|
145
145
|
type: Literal[MessageContentType.omitted_reasoning] = Field(
|
146
146
|
MessageContentType.omitted_reasoning, description="Indicates this is an omitted reasoning step."
|
147
147
|
)
|
148
|
-
|
148
|
+
# NOTE: dropping because we don't track this kind of information for the other reasoning types
|
149
|
+
# tokens: int = Field(..., description="The reasoning token count for intermediate reasoning content.")
|
149
150
|
|
150
151
|
|
151
152
|
LettaMessageContentUnion = Annotated[
|
letta/schemas/llm_config.py
CHANGED
@@ -81,8 +81,11 @@ class LLMConfig(BaseModel):
|
|
81
81
|
@model_validator(mode="before")
|
82
82
|
@classmethod
|
83
83
|
def set_default_enable_reasoner(cls, values):
|
84
|
-
|
85
|
-
|
84
|
+
# NOTE: this is really only applicable for models that can toggle reasoning on-and-off, like 3.7
|
85
|
+
# We can also use this field to identify if a model is a "reasoning" model (o1/o3, etc.) if we want
|
86
|
+
# if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
|
87
|
+
# values["enable_reasoner"] = True
|
88
|
+
# values["put_inner_thoughts_in_kwargs"] = False
|
86
89
|
return values
|
87
90
|
|
88
91
|
@model_validator(mode="before")
|
@@ -100,6 +103,13 @@ class LLMConfig(BaseModel):
|
|
100
103
|
if values.get("put_inner_thoughts_in_kwargs") is None:
|
101
104
|
values["put_inner_thoughts_in_kwargs"] = False if model in avoid_put_inner_thoughts_in_kwargs else True
|
102
105
|
|
106
|
+
# For the o1/o3 series from OpenAI, set to False by default
|
107
|
+
# We can set this flag to `true` if desired, which will enable "double-think"
|
108
|
+
from letta.llm_api.openai_client import is_openai_reasoning_model
|
109
|
+
|
110
|
+
if is_openai_reasoning_model(model):
|
111
|
+
values["put_inner_thoughts_in_kwargs"] = False
|
112
|
+
|
103
113
|
return values
|
104
114
|
|
105
115
|
@model_validator(mode="after")
|
letta/schemas/message.py
CHANGED
@@ -31,6 +31,7 @@ from letta.schemas.letta_message import (
|
|
31
31
|
)
|
32
32
|
from letta.schemas.letta_message_content import (
|
33
33
|
LettaMessageContentUnion,
|
34
|
+
OmittedReasoningContent,
|
34
35
|
ReasoningContent,
|
35
36
|
RedactedReasoningContent,
|
36
37
|
TextContent,
|
@@ -82,6 +83,7 @@ class MessageCreate(BaseModel):
|
|
82
83
|
name: Optional[str] = Field(None, description="The name of the participant.")
|
83
84
|
otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
|
84
85
|
sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
|
86
|
+
group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
|
85
87
|
|
86
88
|
def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
|
87
89
|
data = super().model_dump(**kwargs)
|
@@ -294,6 +296,18 @@ class Message(BaseMessage):
|
|
294
296
|
sender_id=self.sender_id,
|
295
297
|
)
|
296
298
|
)
|
299
|
+
elif isinstance(content_part, OmittedReasoningContent):
|
300
|
+
# Special case for "hidden reasoning" models like o1/o3
|
301
|
+
# NOTE: we also have to think about how to return this during streaming
|
302
|
+
messages.append(
|
303
|
+
HiddenReasoningMessage(
|
304
|
+
id=self.id,
|
305
|
+
date=self.created_at,
|
306
|
+
state="omitted",
|
307
|
+
name=self.name,
|
308
|
+
otid=otid,
|
309
|
+
)
|
310
|
+
)
|
297
311
|
else:
|
298
312
|
warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
|
299
313
|
|
@@ -463,6 +477,10 @@ class Message(BaseMessage):
|
|
463
477
|
data=openai_message_dict["redacted_reasoning_content"] if "redacted_reasoning_content" in openai_message_dict else None,
|
464
478
|
),
|
465
479
|
)
|
480
|
+
if "omitted_reasoning_content" in openai_message_dict and openai_message_dict["omitted_reasoning_content"]:
|
481
|
+
content.append(
|
482
|
+
OmittedReasoningContent(),
|
483
|
+
)
|
466
484
|
|
467
485
|
# If we're going from deprecated function form
|
468
486
|
if openai_message_dict["role"] == "function":
|