letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show
  1. letta/agent.py +36 -10
  2. letta/client/client.py +8 -1
  3. letta/credentials.py +3 -3
  4. letta/errors.py +1 -1
  5. letta/functions/schema_generator.py +1 -1
  6. letta/llm_api/anthropic.py +3 -24
  7. letta/llm_api/azure_openai.py +53 -108
  8. letta/llm_api/azure_openai_constants.py +10 -0
  9. letta/llm_api/google_ai.py +39 -64
  10. letta/llm_api/helpers.py +208 -0
  11. letta/llm_api/llm_api_tools.py +43 -218
  12. letta/llm_api/openai.py +74 -50
  13. letta/main.py +1 -1
  14. letta/metadata.py +2 -0
  15. letta/providers.py +144 -31
  16. letta/schemas/agent.py +14 -0
  17. letta/schemas/llm_config.py +2 -2
  18. letta/schemas/openai/chat_completion_response.py +3 -0
  19. letta/schemas/tool.py +3 -3
  20. letta/server/rest_api/admin/tools.py +0 -1
  21. letta/server/rest_api/app.py +1 -17
  22. letta/server/rest_api/routers/openai/assistants/threads.py +10 -7
  23. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +5 -3
  24. letta/server/rest_api/routers/v1/agents.py +23 -13
  25. letta/server/rest_api/routers/v1/blocks.py +5 -3
  26. letta/server/rest_api/routers/v1/jobs.py +5 -3
  27. letta/server/rest_api/routers/v1/sources.py +25 -13
  28. letta/server/rest_api/routers/v1/tools.py +12 -7
  29. letta/server/server.py +33 -37
  30. letta/settings.py +5 -113
  31. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/METADATA +1 -1
  32. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/RECORD +35 -33
  33. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/LICENSE +0 -0
  34. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/WHEEL +0 -0
  35. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  import uuid
2
- from typing import List, Optional
2
+ from typing import List, Optional, Tuple
3
3
 
4
4
  import requests
5
5
 
6
6
  from letta.constants import NON_USER_MSG_PREFIX
7
+ from letta.llm_api.helpers import make_post_request
7
8
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
8
9
  from letta.local_llm.utils import count_tokens
9
10
  from letta.schemas.openai.chat_completion_request import Tool
@@ -15,27 +16,41 @@ from letta.schemas.openai.chat_completion_response import (
15
16
  ToolCall,
16
17
  UsageStatistics,
17
18
  )
18
- from letta.utils import get_tool_call_id, get_utc_time
19
+ from letta.utils import get_tool_call_id, get_utc_time, json_dumps
19
20
 
20
- # from letta.data_types import ToolCall
21
21
 
22
+ def get_gemini_endpoint_and_headers(
23
+ base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
24
+ ) -> Tuple[str, dict]:
25
+ """
26
+ Dynamically generate the model endpoint and headers.
27
+ """
28
+ url = f"{base_url}/v1beta/models"
22
29
 
23
- SUPPORTED_MODELS = [
24
- "gemini-pro",
25
- ]
26
-
30
+ # Add the model
31
+ if model is not None:
32
+ url += f"/{model}"
27
33
 
28
- def google_ai_get_model_details(service_endpoint: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
29
- from letta.utils import printd
34
+ # Add extension for generating content if we're hitting the LM
35
+ if generate_content:
36
+ url += ":generateContent"
30
37
 
38
+ # Decide if api key should be in header or not
31
39
  # Two ways to pass the key: https://ai.google.dev/tutorials/setup
32
40
  if key_in_header:
33
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}"
34
41
  headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
35
42
  else:
36
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}?key={api_key}"
43
+ url += f"?key={api_key}"
37
44
  headers = {"Content-Type": "application/json"}
38
45
 
46
+ return url, headers
47
+
48
+
49
+ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
50
+ from letta.utils import printd
51
+
52
+ url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
53
+
39
54
  try:
40
55
  response = requests.get(url, headers=headers)
41
56
  printd(f"response = {response}")
@@ -66,25 +81,17 @@ def google_ai_get_model_details(service_endpoint: str, api_key: str, model: str,
66
81
  raise e
67
82
 
68
83
 
69
- def google_ai_get_model_context_window(service_endpoint: str, api_key: str, model: str, key_in_header: bool = True) -> int:
70
- model_details = google_ai_get_model_details(
71
- service_endpoint=service_endpoint, api_key=api_key, model=model, key_in_header=key_in_header
72
- )
84
+ def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
85
+ model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
73
86
  # TODO should this be:
74
87
  # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
75
88
  return int(model_details["inputTokenLimit"])
76
89
 
77
90
 
78
- def google_ai_get_model_list(service_endpoint: str, api_key: str, key_in_header: bool = True) -> List[dict]:
91
+ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
79
92
  from letta.utils import printd
80
93
 
81
- # Two ways to pass the key: https://ai.google.dev/tutorials/setup
82
- if key_in_header:
83
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models"
84
- headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
85
- else:
86
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models?key={api_key}"
87
- headers = {"Content-Type": "application/json"}
94
+ url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
88
95
 
89
96
  try:
90
97
  response = requests.get(url, headers=headers)
@@ -396,7 +403,7 @@ def convert_google_ai_response_to_chatcompletion(
396
403
 
397
404
  # TODO convert 'data' type to pydantic
398
405
  def google_ai_chat_completions_request(
399
- service_endpoint: str,
406
+ base_url: str,
400
407
  model: str,
401
408
  api_key: str,
402
409
  data: dict,
@@ -414,55 +421,23 @@ def google_ai_chat_completions_request(
414
421
  This service has the following service endpoint and all URIs below are relative to this service endpoint:
415
422
  https://xxx.googleapis.com
416
423
  """
417
- from letta.utils import printd
418
424
 
419
- assert service_endpoint is not None, "Missing service_endpoint when calling Google AI"
420
425
  assert api_key is not None, "Missing api_key when calling Google AI"
421
- assert model in SUPPORTED_MODELS, f"Model '{model}' not in supported models: {', '.join(SUPPORTED_MODELS)}"
422
426
 
423
- # Two ways to pass the key: https://ai.google.dev/tutorials/setup
424
- if key_in_header:
425
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent"
426
- headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
427
- else:
428
- url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
429
- headers = {"Content-Type": "application/json"}
427
+ url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header, generate_content=True)
430
428
 
431
429
  # data["contents"][-1]["role"] = "model"
432
430
  if add_postfunc_model_messages:
433
431
  data["contents"] = add_dummy_model_messages(data["contents"])
434
432
 
435
- printd(f"Sending request to {url}")
433
+ response_json = make_post_request(url, headers, data)
436
434
  try:
437
- response = requests.post(url, headers=headers, json=data)
438
- printd(f"response = {response}")
439
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
440
- response = response.json() # convert to dict from string
441
- printd(f"response.json = {response}")
442
-
443
- # Convert Google AI response to ChatCompletion style
444
435
  return convert_google_ai_response_to_chatcompletion(
445
- response_json=response,
446
- model=model,
436
+ response_json=response_json,
437
+ model=data.get("model"),
447
438
  input_messages=data["contents"],
448
- pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
439
+ pull_inner_thoughts_from_args=data.get("inner_thoughts_in_kwargs", False),
449
440
  )
450
-
451
- except requests.exceptions.HTTPError as http_err:
452
- # Handle HTTP errors (e.g., response 4XX, 5XX)
453
- printd(f"Got HTTPError, exception={http_err}, payload={data}")
454
- # Print the HTTP status code
455
- print(f"HTTP Error: {http_err.response.status_code}")
456
- # Print the response content (error message from server)
457
- print(f"Message: {http_err.response.text}")
458
- raise http_err
459
-
460
- except requests.exceptions.RequestException as req_err:
461
- # Handle other requests-related errors (e.g., connection error)
462
- printd(f"Got RequestException, exception={req_err}")
463
- raise req_err
464
-
465
- except Exception as e:
466
- # Handle other potential errors
467
- printd(f"Got unknown Exception, exception={e}")
468
- raise e
441
+ except Exception as conversion_error:
442
+ print(f"Error during response conversion: {conversion_error}")
443
+ raise conversion_error
@@ -0,0 +1,208 @@
1
+ import copy
2
+ import json
3
+ import warnings
4
+ from typing import Any, List, Union
5
+
6
+ import requests
7
+
8
+ from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
9
+ from letta.schemas.enums import OptionState
10
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
11
+ from letta.utils import json_dumps, printd
12
+
13
+
14
+ def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
15
+ printd(f"Sending request to {url}")
16
+ try:
17
+ # Make the POST request
18
+ response = requests.post(url, headers=headers, json=data)
19
+ printd(f"Response status code: {response.status_code}")
20
+
21
+ # Raise for 4XX/5XX HTTP errors
22
+ response.raise_for_status()
23
+
24
+ # Ensure the content is JSON before parsing
25
+ if response.headers.get("Content-Type") == "application/json":
26
+ response_data = response.json() # Convert to dict from JSON
27
+ printd(f"Response JSON: {response_data}")
28
+ else:
29
+ error_message = f"Unexpected content type returned: {response.headers.get('Content-Type')}"
30
+ printd(error_message)
31
+ raise ValueError(error_message)
32
+
33
+ # Process the response using the callback function
34
+ return response_data
35
+
36
+ except requests.exceptions.HTTPError as http_err:
37
+ # HTTP errors (4XX, 5XX)
38
+ error_message = f"HTTP error occurred: {http_err}"
39
+ if http_err.response is not None:
40
+ error_message += f" | Status code: {http_err.response.status_code}, Message: {http_err.response.text}"
41
+ printd(error_message)
42
+ raise requests.exceptions.HTTPError(error_message) from http_err
43
+
44
+ except requests.exceptions.Timeout as timeout_err:
45
+ # Handle timeout errors
46
+ error_message = f"Request timed out: {timeout_err}"
47
+ printd(error_message)
48
+ raise requests.exceptions.Timeout(error_message) from timeout_err
49
+
50
+ except requests.exceptions.RequestException as req_err:
51
+ # Non-HTTP errors (e.g., connection, SSL errors)
52
+ error_message = f"Request failed: {req_err}"
53
+ printd(error_message)
54
+ raise requests.exceptions.RequestException(error_message) from req_err
55
+
56
+ except ValueError as val_err:
57
+ # Handle content-type or non-JSON response issues
58
+ error_message = f"ValueError: {val_err}"
59
+ printd(error_message)
60
+ raise ValueError(error_message) from val_err
61
+
62
+ except Exception as e:
63
+ # Catch any other unknown exceptions
64
+ error_message = f"An unexpected error occurred: {e}"
65
+ printd(error_message)
66
+ raise Exception(error_message) from e
67
+
68
+
69
+ # TODO update to use better types
70
+ def add_inner_thoughts_to_functions(
71
+ functions: List[dict],
72
+ inner_thoughts_key: str,
73
+ inner_thoughts_description: str,
74
+ inner_thoughts_required: bool = True,
75
+ # inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
76
+ ) -> List[dict]:
77
+ """Add an inner_thoughts kwarg to every function in the provided list"""
78
+ # return copies
79
+ new_functions = []
80
+
81
+ # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
82
+ for function_object in functions:
83
+ function_params = function_object["parameters"]["properties"]
84
+ required_params = list(function_object["parameters"]["required"])
85
+
86
+ # if the inner thoughts arg doesn't exist, add it
87
+ if inner_thoughts_key not in function_params:
88
+ function_params[inner_thoughts_key] = {
89
+ "type": "string",
90
+ "description": inner_thoughts_description,
91
+ }
92
+
93
+ # make sure it's tagged as required
94
+ new_function_object = copy.deepcopy(function_object)
95
+ if inner_thoughts_required and inner_thoughts_key not in required_params:
96
+ required_params.append(inner_thoughts_key)
97
+ new_function_object["parameters"]["required"] = required_params
98
+
99
+ new_functions.append(new_function_object)
100
+
101
+ # return a list of copies
102
+ return new_functions
103
+
104
+
105
+ def unpack_all_inner_thoughts_from_kwargs(
106
+ response: ChatCompletionResponse,
107
+ inner_thoughts_key: str,
108
+ ) -> ChatCompletionResponse:
109
+ """Strip the inner thoughts out of the tool call and put it in the message content"""
110
+ if len(response.choices) == 0:
111
+ raise ValueError(f"Unpacking inner thoughts from empty response not supported")
112
+
113
+ new_choices = []
114
+ for choice in response.choices:
115
+ new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
116
+
117
+ # return an updated copy
118
+ new_response = response.model_copy(deep=True)
119
+ new_response.choices = new_choices
120
+ return new_response
121
+
122
+
123
+ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
124
+ message = choice.message
125
+ if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
126
+ if len(message.tool_calls) > 1:
127
+ warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
128
+ # TODO support multiple tool calls
129
+ tool_call = message.tool_calls[0]
130
+
131
+ try:
132
+ # Sadly we need to parse the JSON since args are in string format
133
+ func_args = dict(json.loads(tool_call.function.arguments))
134
+ if inner_thoughts_key in func_args:
135
+ # extract the inner thoughts
136
+ inner_thoughts = func_args.pop(inner_thoughts_key)
137
+
138
+ # replace the kwargs
139
+ new_choice = choice.model_copy(deep=True)
140
+ new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
141
+ # also replace the message content
142
+ if new_choice.message.content is not None:
143
+ warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
144
+ new_choice.message.content = inner_thoughts
145
+
146
+ return new_choice
147
+ else:
148
+ warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
149
+
150
+ except json.JSONDecodeError as e:
151
+ warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
152
+ raise e
153
+
154
+
155
+ def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
156
+ """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
157
+ from letta.utils import printd
158
+
159
+ match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
160
+
161
+ # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
162
+ if match_string in str(exception):
163
+ printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
164
+ return True
165
+
166
+ # Based on python requests + OpenAI REST API (/v1)
167
+ elif isinstance(exception, requests.exceptions.HTTPError):
168
+ if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
169
+ try:
170
+ error_details = exception.response.json()
171
+ if "error" not in error_details:
172
+ printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
173
+ return False
174
+ else:
175
+ error_details = error_details["error"]
176
+
177
+ # Check for the specific error code
178
+ if error_details.get("code") == "context_length_exceeded":
179
+ printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
180
+ return True
181
+ # Soft-check for "maximum context length" inside of the message
182
+ elif error_details.get("message") and "maximum context length" in error_details.get("message"):
183
+ printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
184
+ return True
185
+ else:
186
+ printd(f"HTTPError occurred, but unknown error message: {error_details}")
187
+ return False
188
+ except ValueError:
189
+ # JSON decoding failed
190
+ printd(f"HTTPError occurred ({exception}), but no JSON error message.")
191
+
192
+ # Generic fail
193
+ else:
194
+ return False
195
+
196
+
197
+ def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
198
+ if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
199
+ # model that are known to not use `content` fields on tool calls
200
+ inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
201
+ else:
202
+ inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
203
+
204
+ if not isinstance(inner_thoughts_in_kwargs, bool):
205
+ warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
206
+ inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
207
+
208
+ return inner_thoughts_in_kwargs