letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +9 -3
  3. letta/agents/base_agent.py +2 -2
  4. letta/agents/letta_agent.py +56 -45
  5. letta/agents/voice_agent.py +2 -2
  6. letta/data_sources/redis_client.py +146 -1
  7. letta/errors.py +4 -0
  8. letta/functions/function_sets/files.py +2 -2
  9. letta/functions/mcp_client/types.py +30 -6
  10. letta/functions/schema_generator.py +46 -1
  11. letta/functions/schema_validator.py +17 -2
  12. letta/functions/types.py +1 -1
  13. letta/helpers/tool_execution_helper.py +0 -2
  14. letta/llm_api/anthropic_client.py +27 -5
  15. letta/llm_api/deepseek_client.py +97 -0
  16. letta/llm_api/groq_client.py +79 -0
  17. letta/llm_api/helpers.py +0 -1
  18. letta/llm_api/llm_api_tools.py +2 -113
  19. letta/llm_api/llm_client.py +21 -0
  20. letta/llm_api/llm_client_base.py +11 -9
  21. letta/llm_api/openai_client.py +3 -0
  22. letta/llm_api/xai_client.py +85 -0
  23. letta/prompts/prompt_generator.py +190 -0
  24. letta/schemas/agent_file.py +17 -2
  25. letta/schemas/file.py +24 -1
  26. letta/schemas/job.py +2 -0
  27. letta/schemas/letta_message.py +2 -0
  28. letta/schemas/letta_request.py +22 -0
  29. letta/schemas/message.py +10 -1
  30. letta/schemas/providers/bedrock.py +1 -0
  31. letta/server/rest_api/redis_stream_manager.py +300 -0
  32. letta/server/rest_api/routers/v1/agents.py +129 -7
  33. letta/server/rest_api/routers/v1/folders.py +15 -5
  34. letta/server/rest_api/routers/v1/runs.py +101 -11
  35. letta/server/rest_api/routers/v1/sources.py +21 -53
  36. letta/server/rest_api/routers/v1/telemetry.py +14 -4
  37. letta/server/rest_api/routers/v1/tools.py +2 -2
  38. letta/server/rest_api/streaming_response.py +3 -24
  39. letta/server/server.py +0 -1
  40. letta/services/agent_manager.py +2 -2
  41. letta/services/agent_serialization_manager.py +129 -32
  42. letta/services/file_manager.py +111 -6
  43. letta/services/file_processor/file_processor.py +5 -2
  44. letta/services/files_agents_manager.py +60 -0
  45. letta/services/helpers/agent_manager_helper.py +4 -205
  46. letta/services/helpers/tool_parser_helper.py +6 -3
  47. letta/services/mcp/base_client.py +7 -1
  48. letta/services/mcp/sse_client.py +7 -2
  49. letta/services/mcp/stdio_client.py +5 -0
  50. letta/services/mcp/streamable_http_client.py +11 -2
  51. letta/services/mcp_manager.py +31 -30
  52. letta/services/source_manager.py +26 -1
  53. letta/services/summarizer/summarizer.py +21 -10
  54. letta/services/tool_executor/files_tool_executor.py +13 -9
  55. letta/services/tool_executor/mcp_tool_executor.py +3 -0
  56. letta/services/tool_executor/tool_execution_manager.py +13 -0
  57. letta/services/tool_manager.py +43 -20
  58. letta/settings.py +1 -0
  59. letta/utils.py +37 -0
  60. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
  61. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
  62. letta/functions/mcp_client/__init__.py +0 -0
  63. letta/functions/mcp_client/base_client.py +0 -156
  64. letta/functions/mcp_client/sse_client.py +0 -51
  65. letta/functions/mcp_client/stdio_client.py +0 -109
  66. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0
@@ -287,12 +287,34 @@ class AnthropicClient(LLMClientBase):
287
287
  else:
288
288
  anthropic_tools = None
289
289
 
290
+ thinking_enabled = False
291
+ if messages and len(messages) > 0:
292
+ # Check if the last assistant message starts with a thinking block
293
+ # Find the last assistant message
294
+ last_assistant_message = None
295
+ for message in reversed(messages):
296
+ if message.get("role") == "assistant":
297
+ last_assistant_message = message
298
+ break
299
+
300
+ if (
301
+ last_assistant_message
302
+ and isinstance(last_assistant_message.get("content"), list)
303
+ and len(last_assistant_message["content"]) > 0
304
+ and last_assistant_message["content"][0].get("type") == "thinking"
305
+ ):
306
+ thinking_enabled = True
307
+
290
308
  try:
291
- result = await client.beta.messages.count_tokens(
292
- model=model or "claude-3-7-sonnet-20250219",
293
- messages=messages or [{"role": "user", "content": "hi"}],
294
- tools=anthropic_tools or [],
295
- )
309
+ count_params = {
310
+ "model": model or "claude-3-7-sonnet-20250219",
311
+ "messages": messages or [{"role": "user", "content": "hi"}],
312
+ "tools": anthropic_tools or [],
313
+ }
314
+
315
+ if thinking_enabled:
316
+ count_params["thinking"] = {"type": "enabled", "budget_tokens": 16000}
317
+ result = await client.beta.messages.count_tokens(**count_params)
296
318
  except:
297
319
  raise
298
320
 
@@ -0,0 +1,97 @@
1
+ import os
2
+ from typing import List, Optional
3
+
4
+ from openai import AsyncOpenAI, AsyncStream, OpenAI
5
+ from openai.types.chat.chat_completion import ChatCompletion
6
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
+
8
+ from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format
9
+ from letta.llm_api.openai_client import OpenAIClient
10
+ from letta.otel.tracing import trace_method
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.schemas.message import Message as PydanticMessage
13
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
14
+ from letta.settings import model_settings
15
+
16
+
17
+ class DeepseekClient(OpenAIClient):
18
+
19
+ def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
20
+ return False
21
+
22
+ def supports_structured_output(self, llm_config: LLMConfig) -> bool:
23
+ return False
24
+
25
+ @trace_method
26
+ def build_request_data(
27
+ self,
28
+ messages: List[PydanticMessage],
29
+ llm_config: LLMConfig,
30
+ tools: Optional[List[dict]] = None,
31
+ force_tool_call: Optional[str] = None,
32
+ ) -> dict:
33
+ # Override put_inner_thoughts_in_kwargs to False for DeepSeek
34
+ llm_config.put_inner_thoughts_in_kwargs = False
35
+
36
+ data = super().build_request_data(messages, llm_config, tools, force_tool_call)
37
+
38
+ def add_functions_to_system_message(system_message: ChatMessage):
39
+ system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
40
+ system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
41
+
42
+ if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
43
+ add_functions_to_system_message(
44
+ data["messages"][0]
45
+ ) # Inject additional instructions to the system prompt with the available functions
46
+
47
+ data["messages"] = map_messages_to_deepseek_format(data["messages"])
48
+
49
+ return data
50
+
51
+ @trace_method
52
+ def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
53
+ """
54
+ Performs underlying synchronous request to OpenAI API and returns raw response dict.
55
+ """
56
+ api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
57
+ client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
58
+
59
+ response: ChatCompletion = client.chat.completions.create(**request_data)
60
+ return response.model_dump()
61
+
62
+ @trace_method
63
+ async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
64
+ """
65
+ Performs underlying asynchronous request to OpenAI API and returns raw response dict.
66
+ """
67
+ api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
68
+ client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
69
+
70
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
71
+ return response.model_dump()
72
+
73
+ @trace_method
74
+ async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
75
+ """
76
+ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
77
+ """
78
+ api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
79
+ client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
80
+ response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
81
+ **request_data, stream=True, stream_options={"include_usage": True}
82
+ )
83
+ return response_stream
84
+
85
+ @trace_method
86
+ def convert_response_to_chat_completion(
87
+ self,
88
+ response_data: dict,
89
+ input_messages: List[PydanticMessage], # Included for consistency, maybe used later
90
+ llm_config: LLMConfig,
91
+ ) -> ChatCompletionResponse:
92
+ """
93
+ Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
94
+ Handles potential extraction of inner thoughts if they were added via kwargs.
95
+ """
96
+ response = ChatCompletionResponse(**response_data)
97
+ return convert_deepseek_response_to_chatcompletion(response)
@@ -0,0 +1,79 @@
1
+ import os
2
+ from typing import List, Optional
3
+
4
+ from openai import AsyncOpenAI, AsyncStream, OpenAI
5
+ from openai.types.chat.chat_completion import ChatCompletion
6
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
+
8
+ from letta.llm_api.openai_client import OpenAIClient
9
+ from letta.otel.tracing import trace_method
10
+ from letta.schemas.embedding_config import EmbeddingConfig
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.schemas.message import Message as PydanticMessage
13
+ from letta.settings import model_settings
14
+
15
+
16
+ class GroqClient(OpenAIClient):
17
+
18
+ def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
19
+ return False
20
+
21
+ def supports_structured_output(self, llm_config: LLMConfig) -> bool:
22
+ return True
23
+
24
+ @trace_method
25
+ def build_request_data(
26
+ self,
27
+ messages: List[PydanticMessage],
28
+ llm_config: LLMConfig,
29
+ tools: Optional[List[dict]] = None,
30
+ force_tool_call: Optional[str] = None,
31
+ ) -> dict:
32
+ data = super().build_request_data(messages, llm_config, tools, force_tool_call)
33
+
34
+ # Groq validation - these fields are not supported and will cause 400 errors
35
+ # https://console.groq.com/docs/openai
36
+ if "top_logprobs" in data:
37
+ del data["top_logprobs"]
38
+ if "logit_bias" in data:
39
+ del data["logit_bias"]
40
+ data["logprobs"] = False
41
+ data["n"] = 1
42
+
43
+ return data
44
+
45
+ @trace_method
46
+ def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
47
+ """
48
+ Performs underlying synchronous request to Groq API and returns raw response dict.
49
+ """
50
+ api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
51
+ client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
52
+
53
+ response: ChatCompletion = client.chat.completions.create(**request_data)
54
+ return response.model_dump()
55
+
56
+ @trace_method
57
+ async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
58
+ """
59
+ Performs underlying asynchronous request to Groq API and returns raw response dict.
60
+ """
61
+ api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
62
+ client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
63
+
64
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
65
+ return response.model_dump()
66
+
67
+ @trace_method
68
+ async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
69
+ """Request embeddings given texts and embedding config"""
70
+ api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
71
+ client = AsyncOpenAI(api_key=api_key, base_url=embedding_config.embedding_endpoint)
72
+ response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
73
+
74
+ # TODO: add total usage
75
+ return [r.embedding for r in response.data]
76
+
77
+ @trace_method
78
+ async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
79
+ raise NotImplementedError("Streaming not supported for Groq.")
letta/llm_api/helpers.py CHANGED
@@ -133,7 +133,6 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
133
133
  structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
134
134
  else:
135
135
  raise NotImplementedError("Optional parameter handling is not implemented.")
136
-
137
136
  return structured_output
138
137
 
139
138
 
@@ -8,7 +8,7 @@ import requests
8
8
  from letta.constants import CLI_WARNING_PREFIX
9
9
  from letta.errors import LettaConfigurationError, RateLimitExceededError
10
10
  from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
11
- from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
11
+ from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs
12
12
  from letta.llm_api.openai import (
13
13
  build_openai_chat_completions_request,
14
14
  openai_chat_completions_process_stream,
@@ -16,14 +16,13 @@ from letta.llm_api.openai import (
16
16
  prepare_openai_payload,
17
17
  )
18
18
  from letta.local_llm.chat_completion_proxy import get_chat_completion
19
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
19
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
20
20
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
21
21
  from letta.orm.user import User
22
22
  from letta.otel.tracing import log_event, trace_method
23
23
  from letta.schemas.enums import ProviderCategory
24
24
  from letta.schemas.llm_config import LLMConfig
25
25
  from letta.schemas.message import Message
26
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
27
26
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
28
27
  from letta.schemas.provider_trace import ProviderTraceCreate
29
28
  from letta.services.telemetry_manager import TelemetryManager
@@ -246,116 +245,6 @@ def create(
246
245
 
247
246
  return response
248
247
 
249
- elif llm_config.model_endpoint_type == "xai":
250
- api_key = model_settings.xai_api_key
251
-
252
- if function_call is None and functions is not None and len(functions) > 0:
253
- # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
254
- function_call = "required"
255
-
256
- data = build_openai_chat_completions_request(
257
- llm_config,
258
- messages,
259
- user_id,
260
- functions,
261
- function_call,
262
- use_tool_naming,
263
- put_inner_thoughts_first=put_inner_thoughts_first,
264
- use_structured_output=False, # NOTE: not supported atm for xAI
265
- )
266
-
267
- # Specific bug for the mini models (as of Apr 14, 2025)
268
- # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
269
- # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
270
- if "grok-3-mini-" in llm_config.model:
271
- data.presence_penalty = None
272
- data.frequency_penalty = None
273
-
274
- if stream: # Client requested token streaming
275
- data.stream = True
276
- assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
277
- stream_interface, AgentRefreshStreamingInterface
278
- ), type(stream_interface)
279
- response = openai_chat_completions_process_stream(
280
- url=llm_config.model_endpoint,
281
- api_key=api_key,
282
- chat_completion_request=data,
283
- stream_interface=stream_interface,
284
- name=name,
285
- # TODO turn on to support reasoning content from xAI reasoners:
286
- # https://docs.x.ai/docs/guides/reasoning#reasoning
287
- expect_reasoning_content=False,
288
- )
289
- else: # Client did not request token streaming (expect a blocking backend response)
290
- data.stream = False
291
- if isinstance(stream_interface, AgentChunkStreamingInterface):
292
- stream_interface.stream_start()
293
- try:
294
- response = openai_chat_completions_request(
295
- url=llm_config.model_endpoint,
296
- api_key=api_key,
297
- chat_completion_request=data,
298
- )
299
- finally:
300
- if isinstance(stream_interface, AgentChunkStreamingInterface):
301
- stream_interface.stream_end()
302
-
303
- if llm_config.put_inner_thoughts_in_kwargs:
304
- response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
305
-
306
- return response
307
-
308
- elif llm_config.model_endpoint_type == "groq":
309
- if stream:
310
- raise NotImplementedError("Streaming not yet implemented for Groq.")
311
-
312
- if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
313
- raise LettaConfigurationError(message="Groq key is missing from letta config file", missing_fields=["groq_api_key"])
314
-
315
- # force to true for groq, since they don't support 'content' is non-null
316
- if llm_config.put_inner_thoughts_in_kwargs:
317
- functions = add_inner_thoughts_to_functions(
318
- functions=functions,
319
- inner_thoughts_key=INNER_THOUGHTS_KWARG,
320
- inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
321
- )
322
-
323
- tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
324
- data = ChatCompletionRequest(
325
- model=llm_config.model,
326
- messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) for m in messages],
327
- tools=tools,
328
- tool_choice=function_call,
329
- user=str(user_id),
330
- )
331
-
332
- # https://console.groq.com/docs/openai
333
- # "The following fields are currently not supported and will result in a 400 error (yikes) if they are supplied:"
334
- assert data.top_logprobs is None
335
- assert data.logit_bias is None
336
- assert data.logprobs == False
337
- assert data.n == 1
338
- # They mention that none of the messages can have names, but it seems to not error out (for now)
339
-
340
- data.stream = False
341
- if isinstance(stream_interface, AgentChunkStreamingInterface):
342
- stream_interface.stream_start()
343
- try:
344
- # groq uses the openai chat completions API, so this component should be reusable
345
- response = openai_chat_completions_request(
346
- url=llm_config.model_endpoint,
347
- api_key=model_settings.groq_api_key,
348
- chat_completion_request=data,
349
- )
350
- finally:
351
- if isinstance(stream_interface, AgentChunkStreamingInterface):
352
- stream_interface.stream_end()
353
-
354
- if llm_config.put_inner_thoughts_in_kwargs:
355
- response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
356
-
357
- return response
358
-
359
248
  elif llm_config.model_endpoint_type == "deepseek":
360
249
  if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
361
250
  # only is a problem if we are *not* using an openai proxy
@@ -79,5 +79,26 @@ class LLMClient:
79
79
  put_inner_thoughts_first=put_inner_thoughts_first,
80
80
  actor=actor,
81
81
  )
82
+ case ProviderType.xai:
83
+ from letta.llm_api.xai_client import XAIClient
84
+
85
+ return XAIClient(
86
+ put_inner_thoughts_first=put_inner_thoughts_first,
87
+ actor=actor,
88
+ )
89
+ case ProviderType.groq:
90
+ from letta.llm_api.groq_client import GroqClient
91
+
92
+ return GroqClient(
93
+ put_inner_thoughts_first=put_inner_thoughts_first,
94
+ actor=actor,
95
+ )
96
+ case ProviderType.deepseek:
97
+ from letta.llm_api.deepseek_client import DeepseekClient
98
+
99
+ return DeepseekClient(
100
+ put_inner_thoughts_first=put_inner_thoughts_first,
101
+ actor=actor,
102
+ )
82
103
  case _:
83
104
  return None
@@ -15,6 +15,7 @@ from letta.schemas.message import Message
15
15
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
16
16
  from letta.schemas.provider_trace import ProviderTraceCreate
17
17
  from letta.services.telemetry_manager import TelemetryManager
18
+ from letta.settings import settings
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from letta.orm import User
@@ -90,15 +91,16 @@ class LLMClientBase:
90
91
  try:
91
92
  log_event(name="llm_request_sent", attributes=request_data)
92
93
  response_data = await self.request_async(request_data, llm_config)
93
- await telemetry_manager.create_provider_trace_async(
94
- actor=self.actor,
95
- provider_trace_create=ProviderTraceCreate(
96
- request_json=request_data,
97
- response_json=response_data,
98
- step_id=step_id,
99
- organization_id=self.actor.organization_id,
100
- ),
101
- )
94
+ if settings.track_provider_trace and telemetry_manager:
95
+ await telemetry_manager.create_provider_trace_async(
96
+ actor=self.actor,
97
+ provider_trace_create=ProviderTraceCreate(
98
+ request_json=request_data,
99
+ response_json=response_data,
100
+ step_id=step_id,
101
+ organization_id=self.actor.organization_id,
102
+ ),
103
+ )
102
104
 
103
105
  log_event(name="llm_response_received", attributes=response_data)
104
106
  except Exception as e:
@@ -146,6 +146,9 @@ class OpenAIClient(LLMClientBase):
146
146
  def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
147
147
  return requires_auto_tool_choice(llm_config)
148
148
 
149
+ def supports_structured_output(self, llm_config: LLMConfig) -> bool:
150
+ return supports_structured_output(llm_config)
151
+
149
152
  @trace_method
150
153
  def build_request_data(
151
154
  self,
@@ -0,0 +1,85 @@
1
+ import os
2
+ from typing import List, Optional
3
+
4
+ from openai import AsyncOpenAI, AsyncStream, OpenAI
5
+ from openai.types.chat.chat_completion import ChatCompletion
6
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
+
8
+ from letta.llm_api.openai_client import OpenAIClient
9
+ from letta.otel.tracing import trace_method
10
+ from letta.schemas.embedding_config import EmbeddingConfig
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.schemas.message import Message as PydanticMessage
13
+ from letta.settings import model_settings
14
+
15
+
16
+ class XAIClient(OpenAIClient):
17
+
18
+ def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
19
+ return False
20
+
21
+ def supports_structured_output(self, llm_config: LLMConfig) -> bool:
22
+ return False
23
+
24
+ @trace_method
25
+ def build_request_data(
26
+ self,
27
+ messages: List[PydanticMessage],
28
+ llm_config: LLMConfig,
29
+ tools: Optional[List[dict]] = None,
30
+ force_tool_call: Optional[str] = None,
31
+ ) -> dict:
32
+ data = super().build_request_data(messages, llm_config, tools, force_tool_call)
33
+
34
+ # Specific bug for the mini models (as of Apr 14, 2025)
35
+ # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
36
+ # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
37
+ if "grok-3-mini-" in llm_config.model:
38
+ data.pop("presence_penalty", None)
39
+ data.pop("frequency_penalty", None)
40
+
41
+ return data
42
+
43
+ @trace_method
44
+ def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
45
+ """
46
+ Performs underlying synchronous request to OpenAI API and returns raw response dict.
47
+ """
48
+ api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
49
+ client = OpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
50
+
51
+ response: ChatCompletion = client.chat.completions.create(**request_data)
52
+ return response.model_dump()
53
+
54
+ @trace_method
55
+ async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
56
+ """
57
+ Performs underlying asynchronous request to OpenAI API and returns raw response dict.
58
+ """
59
+ api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
60
+ client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
61
+
62
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
63
+ return response.model_dump()
64
+
65
+ @trace_method
66
+ async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
67
+ """
68
+ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
69
+ """
70
+ api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
71
+ client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
72
+ response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
73
+ **request_data, stream=True, stream_options={"include_usage": True}
74
+ )
75
+ return response_stream
76
+
77
+ @trace_method
78
+ async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
79
+ """Request embeddings given texts and embedding config"""
80
+ api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
81
+ client = AsyncOpenAI(api_key=api_key, base_url=embedding_config.embedding_endpoint)
82
+ response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
83
+
84
+ # TODO: add total usage
85
+ return [r.embedding for r in response.data]