letta-nightly 0.11.4.dev20250826104242__py3-none-any.whl → 0.11.6.dev20250827050912__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +9 -3
- letta/agents/base_agent.py +2 -2
- letta/agents/letta_agent.py +56 -45
- letta/agents/voice_agent.py +2 -2
- letta/data_sources/redis_client.py +146 -1
- letta/errors.py +4 -0
- letta/functions/function_sets/files.py +2 -2
- letta/functions/mcp_client/types.py +30 -6
- letta/functions/schema_generator.py +46 -1
- letta/functions/schema_validator.py +17 -2
- letta/functions/types.py +1 -1
- letta/helpers/tool_execution_helper.py +0 -2
- letta/llm_api/anthropic_client.py +27 -5
- letta/llm_api/deepseek_client.py +97 -0
- letta/llm_api/groq_client.py +79 -0
- letta/llm_api/helpers.py +0 -1
- letta/llm_api/llm_api_tools.py +2 -113
- letta/llm_api/llm_client.py +21 -0
- letta/llm_api/llm_client_base.py +11 -9
- letta/llm_api/openai_client.py +3 -0
- letta/llm_api/xai_client.py +85 -0
- letta/prompts/prompt_generator.py +190 -0
- letta/schemas/agent_file.py +17 -2
- letta/schemas/file.py +24 -1
- letta/schemas/job.py +2 -0
- letta/schemas/letta_message.py +2 -0
- letta/schemas/letta_request.py +22 -0
- letta/schemas/message.py +10 -1
- letta/schemas/providers/bedrock.py +1 -0
- letta/schemas/response_format.py +2 -2
- letta/server/generate_openapi_schema.sh +4 -4
- letta/server/rest_api/redis_stream_manager.py +300 -0
- letta/server/rest_api/routers/v1/agents.py +129 -7
- letta/server/rest_api/routers/v1/folders.py +15 -5
- letta/server/rest_api/routers/v1/runs.py +101 -11
- letta/server/rest_api/routers/v1/sources.py +21 -53
- letta/server/rest_api/routers/v1/telemetry.py +14 -4
- letta/server/rest_api/routers/v1/tools.py +2 -2
- letta/server/rest_api/streaming_response.py +3 -24
- letta/server/server.py +0 -1
- letta/services/agent_manager.py +2 -2
- letta/services/agent_serialization_manager.py +129 -32
- letta/services/file_manager.py +111 -6
- letta/services/file_processor/file_processor.py +5 -2
- letta/services/files_agents_manager.py +60 -0
- letta/services/helpers/agent_manager_helper.py +6 -207
- letta/services/helpers/tool_parser_helper.py +6 -3
- letta/services/llm_batch_manager.py +1 -1
- letta/services/mcp/base_client.py +7 -1
- letta/services/mcp/sse_client.py +7 -2
- letta/services/mcp/stdio_client.py +5 -0
- letta/services/mcp/streamable_http_client.py +11 -2
- letta/services/mcp_manager.py +31 -30
- letta/services/source_manager.py +26 -1
- letta/services/summarizer/summarizer.py +21 -10
- letta/services/tool_executor/files_tool_executor.py +13 -9
- letta/services/tool_executor/mcp_tool_executor.py +3 -0
- letta/services/tool_executor/tool_execution_manager.py +13 -0
- letta/services/tool_executor/tool_execution_sandbox.py +0 -1
- letta/services/tool_manager.py +43 -20
- letta/services/tool_sandbox/local_sandbox.py +0 -2
- letta/settings.py +1 -0
- letta/utils.py +37 -0
- {letta_nightly-0.11.4.dev20250826104242.dist-info → letta_nightly-0.11.6.dev20250827050912.dist-info}/METADATA +116 -102
- {letta_nightly-0.11.4.dev20250826104242.dist-info → letta_nightly-0.11.6.dev20250827050912.dist-info}/RECORD +128 -127
- {letta_nightly-0.11.4.dev20250826104242.dist-info → letta_nightly-0.11.6.dev20250827050912.dist-info}/WHEEL +1 -1
- letta_nightly-0.11.6.dev20250827050912.dist-info/entry_points.txt +2 -0
- letta/functions/mcp_client/__init__.py +0 -0
- letta/functions/mcp_client/base_client.py +0 -156
- letta/functions/mcp_client/sse_client.py +0 -51
- letta/functions/mcp_client/stdio_client.py +0 -109
- letta_nightly-0.11.4.dev20250826104242.dist-info/entry_points.txt +0 -3
- {letta_nightly-0.11.4.dev20250826104242.dist-info → letta_nightly-0.11.6.dev20250827050912.dist-info/licenses}/LICENSE +0 -0
@@ -287,12 +287,34 @@ class AnthropicClient(LLMClientBase):
|
|
287
287
|
else:
|
288
288
|
anthropic_tools = None
|
289
289
|
|
290
|
+
thinking_enabled = False
|
291
|
+
if messages and len(messages) > 0:
|
292
|
+
# Check if the last assistant message starts with a thinking block
|
293
|
+
# Find the last assistant message
|
294
|
+
last_assistant_message = None
|
295
|
+
for message in reversed(messages):
|
296
|
+
if message.get("role") == "assistant":
|
297
|
+
last_assistant_message = message
|
298
|
+
break
|
299
|
+
|
300
|
+
if (
|
301
|
+
last_assistant_message
|
302
|
+
and isinstance(last_assistant_message.get("content"), list)
|
303
|
+
and len(last_assistant_message["content"]) > 0
|
304
|
+
and last_assistant_message["content"][0].get("type") == "thinking"
|
305
|
+
):
|
306
|
+
thinking_enabled = True
|
307
|
+
|
290
308
|
try:
|
291
|
-
|
292
|
-
model
|
293
|
-
messages
|
294
|
-
tools
|
295
|
-
|
309
|
+
count_params = {
|
310
|
+
"model": model or "claude-3-7-sonnet-20250219",
|
311
|
+
"messages": messages or [{"role": "user", "content": "hi"}],
|
312
|
+
"tools": anthropic_tools or [],
|
313
|
+
}
|
314
|
+
|
315
|
+
if thinking_enabled:
|
316
|
+
count_params["thinking"] = {"type": "enabled", "budget_tokens": 16000}
|
317
|
+
result = await client.beta.messages.count_tokens(**count_params)
|
296
318
|
except:
|
297
319
|
raise
|
298
320
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
5
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
6
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
|
+
|
8
|
+
from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format
|
9
|
+
from letta.llm_api.openai_client import OpenAIClient
|
10
|
+
from letta.otel.tracing import trace_method
|
11
|
+
from letta.schemas.llm_config import LLMConfig
|
12
|
+
from letta.schemas.message import Message as PydanticMessage
|
13
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
14
|
+
from letta.settings import model_settings
|
15
|
+
|
16
|
+
|
17
|
+
class DeepseekClient(OpenAIClient):
|
18
|
+
|
19
|
+
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
20
|
+
return False
|
21
|
+
|
22
|
+
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
23
|
+
return False
|
24
|
+
|
25
|
+
@trace_method
|
26
|
+
def build_request_data(
|
27
|
+
self,
|
28
|
+
messages: List[PydanticMessage],
|
29
|
+
llm_config: LLMConfig,
|
30
|
+
tools: Optional[List[dict]] = None,
|
31
|
+
force_tool_call: Optional[str] = None,
|
32
|
+
) -> dict:
|
33
|
+
# Override put_inner_thoughts_in_kwargs to False for DeepSeek
|
34
|
+
llm_config.put_inner_thoughts_in_kwargs = False
|
35
|
+
|
36
|
+
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
37
|
+
|
38
|
+
def add_functions_to_system_message(system_message: ChatMessage):
|
39
|
+
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
40
|
+
system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
|
41
|
+
|
42
|
+
if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
|
43
|
+
add_functions_to_system_message(
|
44
|
+
data["messages"][0]
|
45
|
+
) # Inject additional instructions to the system prompt with the available functions
|
46
|
+
|
47
|
+
data["messages"] = map_messages_to_deepseek_format(data["messages"])
|
48
|
+
|
49
|
+
return data
|
50
|
+
|
51
|
+
@trace_method
|
52
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
53
|
+
"""
|
54
|
+
Performs underlying synchronous request to OpenAI API and returns raw response dict.
|
55
|
+
"""
|
56
|
+
api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
|
57
|
+
client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
58
|
+
|
59
|
+
response: ChatCompletion = client.chat.completions.create(**request_data)
|
60
|
+
return response.model_dump()
|
61
|
+
|
62
|
+
@trace_method
|
63
|
+
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
64
|
+
"""
|
65
|
+
Performs underlying asynchronous request to OpenAI API and returns raw response dict.
|
66
|
+
"""
|
67
|
+
api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
|
68
|
+
client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
69
|
+
|
70
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
71
|
+
return response.model_dump()
|
72
|
+
|
73
|
+
@trace_method
|
74
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
75
|
+
"""
|
76
|
+
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
77
|
+
"""
|
78
|
+
api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
|
79
|
+
client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
80
|
+
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
81
|
+
**request_data, stream=True, stream_options={"include_usage": True}
|
82
|
+
)
|
83
|
+
return response_stream
|
84
|
+
|
85
|
+
@trace_method
|
86
|
+
def convert_response_to_chat_completion(
|
87
|
+
self,
|
88
|
+
response_data: dict,
|
89
|
+
input_messages: List[PydanticMessage], # Included for consistency, maybe used later
|
90
|
+
llm_config: LLMConfig,
|
91
|
+
) -> ChatCompletionResponse:
|
92
|
+
"""
|
93
|
+
Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
|
94
|
+
Handles potential extraction of inner thoughts if they were added via kwargs.
|
95
|
+
"""
|
96
|
+
response = ChatCompletionResponse(**response_data)
|
97
|
+
return convert_deepseek_response_to_chatcompletion(response)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
5
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
6
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
|
+
|
8
|
+
from letta.llm_api.openai_client import OpenAIClient
|
9
|
+
from letta.otel.tracing import trace_method
|
10
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
11
|
+
from letta.schemas.llm_config import LLMConfig
|
12
|
+
from letta.schemas.message import Message as PydanticMessage
|
13
|
+
from letta.settings import model_settings
|
14
|
+
|
15
|
+
|
16
|
+
class GroqClient(OpenAIClient):
|
17
|
+
|
18
|
+
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
19
|
+
return False
|
20
|
+
|
21
|
+
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
22
|
+
return True
|
23
|
+
|
24
|
+
@trace_method
|
25
|
+
def build_request_data(
|
26
|
+
self,
|
27
|
+
messages: List[PydanticMessage],
|
28
|
+
llm_config: LLMConfig,
|
29
|
+
tools: Optional[List[dict]] = None,
|
30
|
+
force_tool_call: Optional[str] = None,
|
31
|
+
) -> dict:
|
32
|
+
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
33
|
+
|
34
|
+
# Groq validation - these fields are not supported and will cause 400 errors
|
35
|
+
# https://console.groq.com/docs/openai
|
36
|
+
if "top_logprobs" in data:
|
37
|
+
del data["top_logprobs"]
|
38
|
+
if "logit_bias" in data:
|
39
|
+
del data["logit_bias"]
|
40
|
+
data["logprobs"] = False
|
41
|
+
data["n"] = 1
|
42
|
+
|
43
|
+
return data
|
44
|
+
|
45
|
+
@trace_method
|
46
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
47
|
+
"""
|
48
|
+
Performs underlying synchronous request to Groq API and returns raw response dict.
|
49
|
+
"""
|
50
|
+
api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
|
51
|
+
client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
52
|
+
|
53
|
+
response: ChatCompletion = client.chat.completions.create(**request_data)
|
54
|
+
return response.model_dump()
|
55
|
+
|
56
|
+
@trace_method
|
57
|
+
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
58
|
+
"""
|
59
|
+
Performs underlying asynchronous request to Groq API and returns raw response dict.
|
60
|
+
"""
|
61
|
+
api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
|
62
|
+
client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
63
|
+
|
64
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
65
|
+
return response.model_dump()
|
66
|
+
|
67
|
+
@trace_method
|
68
|
+
async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
|
69
|
+
"""Request embeddings given texts and embedding config"""
|
70
|
+
api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
|
71
|
+
client = AsyncOpenAI(api_key=api_key, base_url=embedding_config.embedding_endpoint)
|
72
|
+
response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
|
73
|
+
|
74
|
+
# TODO: add total usage
|
75
|
+
return [r.embedding for r in response.data]
|
76
|
+
|
77
|
+
@trace_method
|
78
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
79
|
+
raise NotImplementedError("Streaming not supported for Groq.")
|
letta/llm_api/helpers.py
CHANGED
@@ -133,7 +133,6 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
|
|
133
133
|
structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
|
134
134
|
else:
|
135
135
|
raise NotImplementedError("Optional parameter handling is not implemented.")
|
136
|
-
|
137
136
|
return structured_output
|
138
137
|
|
139
138
|
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -8,7 +8,7 @@ import requests
|
|
8
8
|
from letta.constants import CLI_WARNING_PREFIX
|
9
9
|
from letta.errors import LettaConfigurationError, RateLimitExceededError
|
10
10
|
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
11
|
-
from letta.llm_api.helpers import
|
11
|
+
from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs
|
12
12
|
from letta.llm_api.openai import (
|
13
13
|
build_openai_chat_completions_request,
|
14
14
|
openai_chat_completions_process_stream,
|
@@ -16,14 +16,13 @@ from letta.llm_api.openai import (
|
|
16
16
|
prepare_openai_payload,
|
17
17
|
)
|
18
18
|
from letta.local_llm.chat_completion_proxy import get_chat_completion
|
19
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
19
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
20
20
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
21
21
|
from letta.orm.user import User
|
22
22
|
from letta.otel.tracing import log_event, trace_method
|
23
23
|
from letta.schemas.enums import ProviderCategory
|
24
24
|
from letta.schemas.llm_config import LLMConfig
|
25
25
|
from letta.schemas.message import Message
|
26
|
-
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
27
26
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
28
27
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
29
28
|
from letta.services.telemetry_manager import TelemetryManager
|
@@ -246,116 +245,6 @@ def create(
|
|
246
245
|
|
247
246
|
return response
|
248
247
|
|
249
|
-
elif llm_config.model_endpoint_type == "xai":
|
250
|
-
api_key = model_settings.xai_api_key
|
251
|
-
|
252
|
-
if function_call is None and functions is not None and len(functions) > 0:
|
253
|
-
# force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
|
254
|
-
function_call = "required"
|
255
|
-
|
256
|
-
data = build_openai_chat_completions_request(
|
257
|
-
llm_config,
|
258
|
-
messages,
|
259
|
-
user_id,
|
260
|
-
functions,
|
261
|
-
function_call,
|
262
|
-
use_tool_naming,
|
263
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
264
|
-
use_structured_output=False, # NOTE: not supported atm for xAI
|
265
|
-
)
|
266
|
-
|
267
|
-
# Specific bug for the mini models (as of Apr 14, 2025)
|
268
|
-
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
|
269
|
-
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
|
270
|
-
if "grok-3-mini-" in llm_config.model:
|
271
|
-
data.presence_penalty = None
|
272
|
-
data.frequency_penalty = None
|
273
|
-
|
274
|
-
if stream: # Client requested token streaming
|
275
|
-
data.stream = True
|
276
|
-
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
277
|
-
stream_interface, AgentRefreshStreamingInterface
|
278
|
-
), type(stream_interface)
|
279
|
-
response = openai_chat_completions_process_stream(
|
280
|
-
url=llm_config.model_endpoint,
|
281
|
-
api_key=api_key,
|
282
|
-
chat_completion_request=data,
|
283
|
-
stream_interface=stream_interface,
|
284
|
-
name=name,
|
285
|
-
# TODO turn on to support reasoning content from xAI reasoners:
|
286
|
-
# https://docs.x.ai/docs/guides/reasoning#reasoning
|
287
|
-
expect_reasoning_content=False,
|
288
|
-
)
|
289
|
-
else: # Client did not request token streaming (expect a blocking backend response)
|
290
|
-
data.stream = False
|
291
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
292
|
-
stream_interface.stream_start()
|
293
|
-
try:
|
294
|
-
response = openai_chat_completions_request(
|
295
|
-
url=llm_config.model_endpoint,
|
296
|
-
api_key=api_key,
|
297
|
-
chat_completion_request=data,
|
298
|
-
)
|
299
|
-
finally:
|
300
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
301
|
-
stream_interface.stream_end()
|
302
|
-
|
303
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
304
|
-
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
305
|
-
|
306
|
-
return response
|
307
|
-
|
308
|
-
elif llm_config.model_endpoint_type == "groq":
|
309
|
-
if stream:
|
310
|
-
raise NotImplementedError("Streaming not yet implemented for Groq.")
|
311
|
-
|
312
|
-
if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
|
313
|
-
raise LettaConfigurationError(message="Groq key is missing from letta config file", missing_fields=["groq_api_key"])
|
314
|
-
|
315
|
-
# force to true for groq, since they don't support 'content' is non-null
|
316
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
317
|
-
functions = add_inner_thoughts_to_functions(
|
318
|
-
functions=functions,
|
319
|
-
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
320
|
-
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
321
|
-
)
|
322
|
-
|
323
|
-
tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
|
324
|
-
data = ChatCompletionRequest(
|
325
|
-
model=llm_config.model,
|
326
|
-
messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) for m in messages],
|
327
|
-
tools=tools,
|
328
|
-
tool_choice=function_call,
|
329
|
-
user=str(user_id),
|
330
|
-
)
|
331
|
-
|
332
|
-
# https://console.groq.com/docs/openai
|
333
|
-
# "The following fields are currently not supported and will result in a 400 error (yikes) if they are supplied:"
|
334
|
-
assert data.top_logprobs is None
|
335
|
-
assert data.logit_bias is None
|
336
|
-
assert data.logprobs == False
|
337
|
-
assert data.n == 1
|
338
|
-
# They mention that none of the messages can have names, but it seems to not error out (for now)
|
339
|
-
|
340
|
-
data.stream = False
|
341
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
342
|
-
stream_interface.stream_start()
|
343
|
-
try:
|
344
|
-
# groq uses the openai chat completions API, so this component should be reusable
|
345
|
-
response = openai_chat_completions_request(
|
346
|
-
url=llm_config.model_endpoint,
|
347
|
-
api_key=model_settings.groq_api_key,
|
348
|
-
chat_completion_request=data,
|
349
|
-
)
|
350
|
-
finally:
|
351
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
352
|
-
stream_interface.stream_end()
|
353
|
-
|
354
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
355
|
-
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
356
|
-
|
357
|
-
return response
|
358
|
-
|
359
248
|
elif llm_config.model_endpoint_type == "deepseek":
|
360
249
|
if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
|
361
250
|
# only is a problem if we are *not* using an openai proxy
|
letta/llm_api/llm_client.py
CHANGED
@@ -79,5 +79,26 @@ class LLMClient:
|
|
79
79
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
80
80
|
actor=actor,
|
81
81
|
)
|
82
|
+
case ProviderType.xai:
|
83
|
+
from letta.llm_api.xai_client import XAIClient
|
84
|
+
|
85
|
+
return XAIClient(
|
86
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
87
|
+
actor=actor,
|
88
|
+
)
|
89
|
+
case ProviderType.groq:
|
90
|
+
from letta.llm_api.groq_client import GroqClient
|
91
|
+
|
92
|
+
return GroqClient(
|
93
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
94
|
+
actor=actor,
|
95
|
+
)
|
96
|
+
case ProviderType.deepseek:
|
97
|
+
from letta.llm_api.deepseek_client import DeepseekClient
|
98
|
+
|
99
|
+
return DeepseekClient(
|
100
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
101
|
+
actor=actor,
|
102
|
+
)
|
82
103
|
case _:
|
83
104
|
return None
|
letta/llm_api/llm_client_base.py
CHANGED
@@ -15,6 +15,7 @@ from letta.schemas.message import Message
|
|
15
15
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
16
16
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
17
17
|
from letta.services.telemetry_manager import TelemetryManager
|
18
|
+
from letta.settings import settings
|
18
19
|
|
19
20
|
if TYPE_CHECKING:
|
20
21
|
from letta.orm import User
|
@@ -90,15 +91,16 @@ class LLMClientBase:
|
|
90
91
|
try:
|
91
92
|
log_event(name="llm_request_sent", attributes=request_data)
|
92
93
|
response_data = await self.request_async(request_data, llm_config)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
94
|
+
if settings.track_provider_trace and telemetry_manager:
|
95
|
+
await telemetry_manager.create_provider_trace_async(
|
96
|
+
actor=self.actor,
|
97
|
+
provider_trace_create=ProviderTraceCreate(
|
98
|
+
request_json=request_data,
|
99
|
+
response_json=response_data,
|
100
|
+
step_id=step_id,
|
101
|
+
organization_id=self.actor.organization_id,
|
102
|
+
),
|
103
|
+
)
|
102
104
|
|
103
105
|
log_event(name="llm_response_received", attributes=response_data)
|
104
106
|
except Exception as e:
|
letta/llm_api/openai_client.py
CHANGED
@@ -146,6 +146,9 @@ class OpenAIClient(LLMClientBase):
|
|
146
146
|
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
147
147
|
return requires_auto_tool_choice(llm_config)
|
148
148
|
|
149
|
+
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
150
|
+
return supports_structured_output(llm_config)
|
151
|
+
|
149
152
|
@trace_method
|
150
153
|
def build_request_data(
|
151
154
|
self,
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
5
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
6
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
|
+
|
8
|
+
from letta.llm_api.openai_client import OpenAIClient
|
9
|
+
from letta.otel.tracing import trace_method
|
10
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
11
|
+
from letta.schemas.llm_config import LLMConfig
|
12
|
+
from letta.schemas.message import Message as PydanticMessage
|
13
|
+
from letta.settings import model_settings
|
14
|
+
|
15
|
+
|
16
|
+
class XAIClient(OpenAIClient):
|
17
|
+
|
18
|
+
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
19
|
+
return False
|
20
|
+
|
21
|
+
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
22
|
+
return False
|
23
|
+
|
24
|
+
@trace_method
|
25
|
+
def build_request_data(
|
26
|
+
self,
|
27
|
+
messages: List[PydanticMessage],
|
28
|
+
llm_config: LLMConfig,
|
29
|
+
tools: Optional[List[dict]] = None,
|
30
|
+
force_tool_call: Optional[str] = None,
|
31
|
+
) -> dict:
|
32
|
+
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
33
|
+
|
34
|
+
# Specific bug for the mini models (as of Apr 14, 2025)
|
35
|
+
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
|
36
|
+
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
|
37
|
+
if "grok-3-mini-" in llm_config.model:
|
38
|
+
data.pop("presence_penalty", None)
|
39
|
+
data.pop("frequency_penalty", None)
|
40
|
+
|
41
|
+
return data
|
42
|
+
|
43
|
+
@trace_method
|
44
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
45
|
+
"""
|
46
|
+
Performs underlying synchronous request to OpenAI API and returns raw response dict.
|
47
|
+
"""
|
48
|
+
api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
|
49
|
+
client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
50
|
+
|
51
|
+
response: ChatCompletion = client.chat.completions.create(**request_data)
|
52
|
+
return response.model_dump()
|
53
|
+
|
54
|
+
@trace_method
|
55
|
+
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
56
|
+
"""
|
57
|
+
Performs underlying asynchronous request to OpenAI API and returns raw response dict.
|
58
|
+
"""
|
59
|
+
api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
|
60
|
+
client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
61
|
+
|
62
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
63
|
+
return response.model_dump()
|
64
|
+
|
65
|
+
@trace_method
|
66
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
67
|
+
"""
|
68
|
+
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
69
|
+
"""
|
70
|
+
api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
|
71
|
+
client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
|
72
|
+
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
73
|
+
**request_data, stream=True, stream_options={"include_usage": True}
|
74
|
+
)
|
75
|
+
return response_stream
|
76
|
+
|
77
|
+
@trace_method
|
78
|
+
async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
|
79
|
+
"""Request embeddings given texts and embedding config"""
|
80
|
+
api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
|
81
|
+
client = AsyncOpenAI(api_key=api_key, base_url=embedding_config.embedding_endpoint)
|
82
|
+
response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
|
83
|
+
|
84
|
+
# TODO: add total usage
|
85
|
+
return [r.embedding for r in response.data]
|