letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

@@ -1,25 +1,25 @@
1
- import copy
2
- import json
3
1
  import os
4
2
  import random
5
3
  import time
6
- import warnings
7
4
  from typing import List, Optional, Union
8
5
 
9
6
  import requests
10
7
 
11
- from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
8
+ from letta.constants import CLI_WARNING_PREFIX
12
9
  from letta.llm_api.anthropic import anthropic_chat_completions_request
13
- from letta.llm_api.azure_openai import (
14
- MODEL_TO_AZURE_ENGINE,
15
- azure_openai_chat_completions_request,
16
- )
10
+ from letta.llm_api.azure_openai import azure_openai_chat_completions_request
17
11
  from letta.llm_api.cohere import cohere_chat_completions_request
18
12
  from letta.llm_api.google_ai import (
19
13
  convert_tools_to_google_ai_format,
20
14
  google_ai_chat_completions_request,
21
15
  )
16
+ from letta.llm_api.helpers import (
17
+ add_inner_thoughts_to_functions,
18
+ derive_inner_thoughts_in_kwargs,
19
+ unpack_all_inner_thoughts_from_kwargs,
20
+ )
22
21
  from letta.llm_api.openai import (
22
+ build_openai_chat_completions_request,
23
23
  openai_chat_completions_process_stream,
24
24
  openai_chat_completions_request,
25
25
  )
@@ -37,144 +37,15 @@ from letta.schemas.openai.chat_completion_request import (
37
37
  Tool,
38
38
  cast_message_to_subtype,
39
39
  )
40
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
40
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
41
41
  from letta.streaming_interface import (
42
42
  AgentChunkStreamingInterface,
43
43
  AgentRefreshStreamingInterface,
44
44
  )
45
- from letta.utils import json_dumps
46
45
 
47
46
  LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
48
47
 
49
48
 
50
- # TODO update to use better types
51
- def add_inner_thoughts_to_functions(
52
- functions: List[dict],
53
- inner_thoughts_key: str,
54
- inner_thoughts_description: str,
55
- inner_thoughts_required: bool = True,
56
- # inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
57
- ) -> List[dict]:
58
- """Add an inner_thoughts kwarg to every function in the provided list"""
59
- # return copies
60
- new_functions = []
61
-
62
- # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
63
- for function_object in functions:
64
- function_params = function_object["parameters"]["properties"]
65
- required_params = list(function_object["parameters"]["required"])
66
-
67
- # if the inner thoughts arg doesn't exist, add it
68
- if inner_thoughts_key not in function_params:
69
- function_params[inner_thoughts_key] = {
70
- "type": "string",
71
- "description": inner_thoughts_description,
72
- }
73
-
74
- # make sure it's tagged as required
75
- new_function_object = copy.deepcopy(function_object)
76
- if inner_thoughts_required and inner_thoughts_key not in required_params:
77
- required_params.append(inner_thoughts_key)
78
- new_function_object["parameters"]["required"] = required_params
79
-
80
- new_functions.append(new_function_object)
81
-
82
- # return a list of copies
83
- return new_functions
84
-
85
-
86
- def unpack_all_inner_thoughts_from_kwargs(
87
- response: ChatCompletionResponse,
88
- inner_thoughts_key: str,
89
- ) -> ChatCompletionResponse:
90
- """Strip the inner thoughts out of the tool call and put it in the message content"""
91
- if len(response.choices) == 0:
92
- raise ValueError(f"Unpacking inner thoughts from empty response not supported")
93
-
94
- new_choices = []
95
- for choice in response.choices:
96
- new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
97
-
98
- # return an updated copy
99
- new_response = response.model_copy(deep=True)
100
- new_response.choices = new_choices
101
- return new_response
102
-
103
-
104
- def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
105
- message = choice.message
106
- if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
107
- if len(message.tool_calls) > 1:
108
- warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
109
- # TODO support multiple tool calls
110
- tool_call = message.tool_calls[0]
111
-
112
- try:
113
- # Sadly we need to parse the JSON since args are in string format
114
- func_args = dict(json.loads(tool_call.function.arguments))
115
- if inner_thoughts_key in func_args:
116
- # extract the inner thoughts
117
- inner_thoughts = func_args.pop(inner_thoughts_key)
118
-
119
- # replace the kwargs
120
- new_choice = choice.model_copy(deep=True)
121
- new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
122
- # also replace the message content
123
- if new_choice.message.content is not None:
124
- warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
125
- new_choice.message.content = inner_thoughts
126
-
127
- return new_choice
128
- else:
129
- warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
130
-
131
- except json.JSONDecodeError as e:
132
- warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
133
- raise e
134
-
135
-
136
- def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
137
- """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
138
- from letta.utils import printd
139
-
140
- match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
141
-
142
- # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
143
- if match_string in str(exception):
144
- printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
145
- return True
146
-
147
- # Based on python requests + OpenAI REST API (/v1)
148
- elif isinstance(exception, requests.exceptions.HTTPError):
149
- if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
150
- try:
151
- error_details = exception.response.json()
152
- if "error" not in error_details:
153
- printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
154
- return False
155
- else:
156
- error_details = error_details["error"]
157
-
158
- # Check for the specific error code
159
- if error_details.get("code") == "context_length_exceeded":
160
- printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
161
- return True
162
- # Soft-check for "maximum context length" inside of the message
163
- elif error_details.get("message") and "maximum context length" in error_details.get("message"):
164
- printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
165
- return True
166
- else:
167
- printd(f"HTTPError occurred, but unknown error message: {error_details}")
168
- return False
169
- except ValueError:
170
- # JSON decoding failed
171
- printd(f"HTTPError occurred ({exception}), but no JSON error message.")
172
-
173
- # Generic fail
174
- else:
175
- return False
176
-
177
-
178
49
  def retry_with_exponential_backoff(
179
50
  func,
180
51
  initial_delay: float = 1,
@@ -248,7 +119,8 @@ def create(
248
119
  stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
249
120
  # TODO move to llm_config?
250
121
  # if unspecified (None), default to something we've tested
251
- inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
122
+ inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
123
+ max_tokens: Optional[int] = None,
252
124
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
253
125
  ) -> ChatCompletionResponse:
254
126
  """Return response to chat completion with backoff"""
@@ -267,59 +139,14 @@ def create(
267
139
 
268
140
  # openai
269
141
  if llm_config.model_endpoint_type == "openai":
270
-
271
- if inner_thoughts_in_kwargs == OptionState.DEFAULT:
272
- # model that are known to not use `content` fields on tool calls
273
- inner_thoughts_in_kwargs = (
274
- "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
275
- )
276
- else:
277
- inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
278
-
279
- if not isinstance(inner_thoughts_in_kwargs, bool):
280
- warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
281
- inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
282
- if inner_thoughts_in_kwargs:
283
- functions = add_inner_thoughts_to_functions(
284
- functions=functions,
285
- inner_thoughts_key=INNER_THOUGHTS_KWARG,
286
- inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
287
- )
288
-
289
- openai_message_list = [
290
- cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
291
- ]
292
-
293
- # TODO do the same for Azure?
294
142
  if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
295
143
  # only is a problem if we are *not* using an openai proxy
296
144
  raise ValueError(f"OpenAI key is missing from letta config file")
297
- if use_tool_naming:
298
- data = ChatCompletionRequest(
299
- model=llm_config.model,
300
- messages=openai_message_list,
301
- tools=[{"type": "function", "function": f} for f in functions] if functions else None,
302
- tool_choice=function_call,
303
- user=str(user_id),
304
- )
305
- else:
306
- data = ChatCompletionRequest(
307
- model=llm_config.model,
308
- messages=openai_message_list,
309
- functions=functions,
310
- function_call=function_call,
311
- user=str(user_id),
312
- )
313
- # https://platform.openai.com/docs/guides/text-generation/json-mode
314
- # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
315
- if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
316
- data.response_format = {"type": "json_object"}
317
145
 
318
- if "inference.memgpt.ai" in llm_config.model_endpoint:
319
- # override user id for inference.memgpt.ai
320
- import uuid
321
-
322
- data.user = str(uuid.UUID(int=0))
146
+ inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
147
+ data = build_openai_chat_completions_request(
148
+ llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
149
+ )
323
150
 
324
151
  if stream: # Client requested token streaming
325
152
  data.stream = True
@@ -356,35 +183,32 @@ def create(
356
183
  if stream:
357
184
  raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
358
185
 
359
- azure_deployment = (
360
- model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
186
+ if model_settings.azure_api_key is None:
187
+ raise ValueError(f"Azure API key is missing. Did you set AZURE_API_KEY in your env?")
188
+
189
+ if model_settings.azure_base_url is None:
190
+ raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
191
+
192
+ # Set the llm config model_endpoint from model_settings
193
+ # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
194
+ llm_config.model_endpoint = model_settings.azure_base_url
195
+ inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
196
+ chat_completion_request = build_openai_chat_completions_request(
197
+ llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
361
198
  )
362
- if use_tool_naming:
363
- data = dict(
364
- # NOTE: don't pass model to Azure calls, that is the deployment_id
365
- # model=agent_config.model,
366
- messages=[m.to_openai_dict() for m in messages],
367
- tools=[{"type": "function", "function": f} for f in functions] if functions else None,
368
- tool_choice=function_call,
369
- user=str(user_id),
370
- )
371
- else:
372
- data = dict(
373
- # NOTE: don't pass model to Azure calls, that is the deployment_id
374
- # model=agent_config.model,
375
- messages=[m.to_openai_dict() for m in messages],
376
- functions=functions,
377
- function_call=function_call,
378
- user=str(user_id),
379
- )
380
- return azure_openai_chat_completions_request(
381
- resource_name=model_settings.azure_endpoint,
382
- deployment_id=azure_deployment,
383
- api_version=model_settings.azure_version,
384
- api_key=model_settings.azure_key,
385
- data=data,
199
+
200
+ response = azure_openai_chat_completions_request(
201
+ model_settings=model_settings,
202
+ llm_config=llm_config,
203
+ api_key=model_settings.azure_api_key,
204
+ chat_completion_request=chat_completion_request,
386
205
  )
387
206
 
207
+ if inner_thoughts_in_kwargs:
208
+ response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
209
+
210
+ return response
211
+
388
212
  elif llm_config.model_endpoint_type == "google_ai":
389
213
  if stream:
390
214
  raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
@@ -517,7 +341,7 @@ def create(
517
341
  stream_inferface.stream_end()
518
342
 
519
343
  if inner_thoughts_in_kwargs:
520
- response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
344
+ response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
521
345
 
522
346
  return response
523
347
 
letta/llm_api/openai.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import json
2
- from typing import Generator, Optional, Union
2
+ import warnings
3
+ from typing import Generator, List, Optional, Union
3
4
 
4
5
  import httpx
5
6
  import requests
@@ -8,10 +9,19 @@ from httpx_sse._exceptions import SSEError
8
9
 
9
10
  from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
10
11
  from letta.errors import LLMError
12
+ from letta.llm_api.helpers import add_inner_thoughts_to_functions
13
+ from letta.local_llm.constants import (
14
+ INNER_THOUGHTS_KWARG,
15
+ INNER_THOUGHTS_KWARG_DESCRIPTION,
16
+ )
11
17
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
18
+ from letta.schemas.llm_config import LLMConfig
12
19
  from letta.schemas.message import Message as _Message
13
20
  from letta.schemas.message import MessageRole as _MessageRole
14
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
21
+ from letta.schemas.openai.chat_completion_request import (
22
+ ChatCompletionRequest,
23
+ cast_message_to_subtype,
24
+ )
15
25
  from letta.schemas.openai.chat_completion_response import (
16
26
  ChatCompletionChunkResponse,
17
27
  ChatCompletionResponse,
@@ -81,6 +91,64 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
81
91
  raise e
82
92
 
83
93
 
94
+ def build_openai_chat_completions_request(
95
+ llm_config: LLMConfig,
96
+ messages: List[Message],
97
+ user_id: Optional[str],
98
+ functions: Optional[list],
99
+ function_call: str,
100
+ use_tool_naming: bool,
101
+ inner_thoughts_in_kwargs: bool,
102
+ max_tokens: Optional[int],
103
+ ) -> ChatCompletionRequest:
104
+ if inner_thoughts_in_kwargs:
105
+ functions = add_inner_thoughts_to_functions(
106
+ functions=functions,
107
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
108
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
109
+ )
110
+
111
+ openai_message_list = [
112
+ cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
113
+ ]
114
+ if llm_config.model:
115
+ model = llm_config.model
116
+ else:
117
+ warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
118
+ model = None
119
+
120
+ if use_tool_naming:
121
+ data = ChatCompletionRequest(
122
+ model=model,
123
+ messages=openai_message_list,
124
+ tools=[{"type": "function", "function": f} for f in functions] if functions else None,
125
+ tool_choice=function_call,
126
+ user=str(user_id),
127
+ max_tokens=max_tokens,
128
+ )
129
+ else:
130
+ data = ChatCompletionRequest(
131
+ model=model,
132
+ messages=openai_message_list,
133
+ functions=functions,
134
+ function_call=function_call,
135
+ user=str(user_id),
136
+ max_tokens=max_tokens,
137
+ )
138
+ # https://platform.openai.com/docs/guides/text-generation/json-mode
139
+ # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
140
+ if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
141
+ data.response_format = {"type": "json_object"}
142
+
143
+ if "inference.memgpt.ai" in llm_config.model_endpoint:
144
+ # override user id for inference.memgpt.ai
145
+ import uuid
146
+
147
+ data.user = str(uuid.UUID(int=0))
148
+
149
+ return data
150
+
151
+
84
152
  def openai_chat_completions_process_stream(
85
153
  url: str,
86
154
  api_key: str,
letta/providers.py CHANGED
@@ -220,7 +220,11 @@ class GoogleAIProvider(Provider):
220
220
 
221
221
 
222
222
  class AzureProvider(Provider):
223
- pass
223
+ name: str = "azure"
224
+ base_url: str = Field(
225
+ ..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
226
+ )
227
+ api_key: str = Field(..., description="API key for the Azure API.")
224
228
 
225
229
 
226
230
  class VLLMProvider(OpenAIProvider):
@@ -11,7 +11,7 @@ class LLMConfig(BaseModel):
11
11
  model (str): The name of the LLM model.
12
12
  model_endpoint_type (str): The endpoint type for the model.
13
13
  model_endpoint (str): The endpoint for the model.
14
- model_wrapper (str): The wrapper for the model.
14
+ model_wrapper (str): The wrapper for the model. This is used to wrap additional text around the input/output of the model. This is useful for text-to-text completions, such as the Completions API in OpenAI.
15
15
  context_window (int): The context window size for the model.
16
16
  """
17
17
 
@@ -34,7 +34,10 @@ class LLMConfig(BaseModel):
34
34
  "vllm",
35
35
  "hugging-face",
36
36
  ] = Field(..., description="The endpoint type for the model.")
37
- model_endpoint: str = Field(..., description="The endpoint for the model.")
37
+ model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
38
+ api_version: Optional[str] = Field(
39
+ None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview."
40
+ )
38
41
  model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
39
42
  context_window: int = Field(..., description="The context window size for the model.")
40
43
 
@@ -26,7 +26,6 @@ class CreateToolResponse(BaseModel):
26
26
 
27
27
 
28
28
  def setup_tools_index_router(server: SyncServer, interface: QueuingInterface):
29
- # get_current_user_with_server = partial(partial(get_current_user, server), password)
30
29
 
31
30
  @router.delete("/tools/{tool_name}", tags=["tools"])
32
31
  async def delete_tool(
@@ -5,8 +5,7 @@ from pathlib import Path
5
5
  from typing import Optional
6
6
 
7
7
  import uvicorn
8
- from fastapi import FastAPI, Request
9
- from fastapi.responses import JSONResponse
8
+ from fastapi import FastAPI
10
9
  from starlette.middleware.cors import CORSMiddleware
11
10
 
12
11
  from letta.server.constants import REST_DEFAULT_PORT
@@ -84,21 +83,6 @@ def create_application() -> "FastAPI":
84
83
  allow_headers=["*"],
85
84
  )
86
85
 
87
- @app.middleware("http")
88
- async def set_current_user_middleware(request: Request, call_next):
89
- user_id = request.headers.get("user_id")
90
- if user_id:
91
- try:
92
- server.set_current_user(user_id)
93
- except ValueError as e:
94
- # Return an HTTP 401 Unauthorized response
95
- # raise HTTPException(status_code=401, detail=str(e))
96
- return JSONResponse(status_code=401, content={"detail": str(e)})
97
- else:
98
- server.set_current_user(None)
99
- response = await call_next(request)
100
- return response
101
-
102
86
  for route in v1_routes:
103
87
  app.include_router(route, prefix=API_PREFIX)
104
88
  # this gives undocumented routes for "latest" and bare api calls.
@@ -1,7 +1,7 @@
1
1
  import uuid
2
2
  from typing import TYPE_CHECKING, List
3
3
 
4
- from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
4
+ from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
5
5
 
6
6
  from letta.constants import DEFAULT_PRESET
7
7
  from letta.schemas.agent import CreateAgent
@@ -43,11 +43,12 @@ router = APIRouter(prefix="/v1/threads", tags=["threads"])
43
43
  def create_thread(
44
44
  request: CreateThreadRequest = Body(...),
45
45
  server: SyncServer = Depends(get_letta_server),
46
+ user_id: str = Header(None), # Extract user_id from header, default to None if not present
46
47
  ):
47
48
  # TODO: use requests.description and requests.metadata fields
48
49
  # TODO: handle requests.file_ids and requests.tools
49
50
  # TODO: eventually allow request to override embedding/llm model
50
- actor = server.get_current_user()
51
+ actor = server.get_user_or_default(user_id=user_id)
51
52
 
52
53
  print("Create thread/agent", request)
53
54
  # create a letta agent
@@ -67,8 +68,9 @@ def create_thread(
67
68
  def retrieve_thread(
68
69
  thread_id: str = Path(..., description="The unique identifier of the thread."),
69
70
  server: SyncServer = Depends(get_letta_server),
71
+ user_id: str = Header(None), # Extract user_id from header, default to None if not present
70
72
  ):
71
- actor = server.get_current_user()
73
+ actor = server.get_user_or_default(user_id=user_id)
72
74
  agent = server.get_agent(user_id=actor.id, agent_id=thread_id)
73
75
  assert agent is not None
74
76
  return OpenAIThread(
@@ -100,8 +102,9 @@ def create_message(
100
102
  thread_id: str = Path(..., description="The unique identifier of the thread."),
101
103
  request: CreateMessageRequest = Body(...),
102
104
  server: SyncServer = Depends(get_letta_server),
105
+ user_id: str = Header(None), # Extract user_id from header, default to None if not present
103
106
  ):
104
- actor = server.get_current_user()
107
+ actor = server.get_user_or_default(user_id=user_id)
105
108
  agent_id = thread_id
106
109
  # create message object
107
110
  message = Message(
@@ -143,8 +146,9 @@ def list_messages(
143
146
  after: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
144
147
  before: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
145
148
  server: SyncServer = Depends(get_letta_server),
149
+ user_id: str = Header(None), # Extract user_id from header, default to None if not present
146
150
  ):
147
- actor = server.get_current_user()
151
+ actor = server.get_user_or_default(user_id)
148
152
  after_uuid = after if before else None
149
153
  before_uuid = before if before else None
150
154
  agent_id = thread_id
@@ -239,7 +243,6 @@ def create_run(
239
243
  request: CreateRunRequest = Body(...),
240
244
  server: SyncServer = Depends(get_letta_server),
241
245
  ):
242
- server.get_current_user()
243
246
 
244
247
  # TODO: add request.instructions as a message?
245
248
  agent_id = thread_id
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  from typing import TYPE_CHECKING
3
3
 
4
- from fastapi import APIRouter, Body, Depends, HTTPException
4
+ from fastapi import APIRouter, Body, Depends, Header, HTTPException
5
5
 
6
6
  from letta.schemas.enums import MessageRole
7
7
  from letta.schemas.letta_message import FunctionCall, LettaMessage
@@ -30,12 +30,14 @@ router = APIRouter(prefix="/v1/chat/completions", tags=["chat_completions"])
30
30
  async def create_chat_completion(
31
31
  completion_request: ChatCompletionRequest = Body(...),
32
32
  server: "SyncServer" = Depends(get_letta_server),
33
+ user_id: str = Header(None), # Extract user_id from header, default to None if not present
33
34
  ):
34
35
  """Send a message to a Letta agent via a /chat/completions completion_request
35
36
  The bearer token will be used to identify the user.
36
37
  The 'user' field in the completion_request should be set to the agent ID.
37
38
  """
38
- actor = server.get_current_user()
39
+ actor = server.get_user_or_default(user_id=user_id)
40
+
39
41
  agent_id = completion_request.user
40
42
  if agent_id is None:
41
43
  raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")