letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,485 @@
1
+ import copy
2
+ import json
3
+ import os
4
+ import random
5
+ import time
6
+ import warnings
7
+ from typing import List, Optional, Union
8
+
9
+ import requests
10
+
11
+ from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
12
+ from letta.credentials import LettaCredentials
13
+ from letta.llm_api.anthropic import anthropic_chat_completions_request
14
+ from letta.llm_api.azure_openai import (
15
+ MODEL_TO_AZURE_ENGINE,
16
+ azure_openai_chat_completions_request,
17
+ )
18
+ from letta.llm_api.cohere import cohere_chat_completions_request
19
+ from letta.llm_api.google_ai import (
20
+ convert_tools_to_google_ai_format,
21
+ google_ai_chat_completions_request,
22
+ )
23
+ from letta.llm_api.openai import (
24
+ openai_chat_completions_process_stream,
25
+ openai_chat_completions_request,
26
+ )
27
+ from letta.local_llm.chat_completion_proxy import get_chat_completion
28
+ from letta.local_llm.constants import (
29
+ INNER_THOUGHTS_KWARG,
30
+ INNER_THOUGHTS_KWARG_DESCRIPTION,
31
+ )
32
+ from letta.schemas.enums import OptionState
33
+ from letta.schemas.llm_config import LLMConfig
34
+ from letta.schemas.message import Message
35
+ from letta.schemas.openai.chat_completion_request import (
36
+ ChatCompletionRequest,
37
+ Tool,
38
+ cast_message_to_subtype,
39
+ )
40
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
41
+ from letta.streaming_interface import (
42
+ AgentChunkStreamingInterface,
43
+ AgentRefreshStreamingInterface,
44
+ )
45
+ from letta.utils import json_dumps
46
+
47
+ LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local"]
48
+
49
+
50
+ # TODO update to use better types
51
+ def add_inner_thoughts_to_functions(
52
+ functions: List[dict],
53
+ inner_thoughts_key: str,
54
+ inner_thoughts_description: str,
55
+ inner_thoughts_required: bool = True,
56
+ # inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
57
+ ) -> List[dict]:
58
+ """Add an inner_thoughts kwarg to every function in the provided list"""
59
+ # return copies
60
+ new_functions = []
61
+
62
+ # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
63
+ for function_object in functions:
64
+ function_params = function_object["parameters"]["properties"]
65
+ required_params = list(function_object["parameters"]["required"])
66
+
67
+ # if the inner thoughts arg doesn't exist, add it
68
+ if inner_thoughts_key not in function_params:
69
+ function_params[inner_thoughts_key] = {
70
+ "type": "string",
71
+ "description": inner_thoughts_description,
72
+ }
73
+
74
+ # make sure it's tagged as required
75
+ new_function_object = copy.deepcopy(function_object)
76
+ if inner_thoughts_required and inner_thoughts_key not in required_params:
77
+ required_params.append(inner_thoughts_key)
78
+ new_function_object["parameters"]["required"] = required_params
79
+
80
+ new_functions.append(new_function_object)
81
+
82
+ # return a list of copies
83
+ return new_functions
84
+
85
+
86
+ def unpack_inner_thoughts_from_kwargs(
87
+ response: ChatCompletionResponse,
88
+ inner_thoughts_key: str,
89
+ ) -> ChatCompletionResponse:
90
+ """Strip the inner thoughts out of the tool call and put it in the message content"""
91
+ if len(response.choices) == 0:
92
+ raise ValueError(f"Unpacking inner thoughts from empty response not supported")
93
+
94
+ new_choices = []
95
+ for choice in response.choices:
96
+ msg = choice.message
97
+ if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) >= 1:
98
+ if len(msg.tool_calls) > 1:
99
+ warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(msg.tool_calls)}) is not supported")
100
+ # TODO support multiple tool calls
101
+ tool_call = msg.tool_calls[0]
102
+
103
+ try:
104
+ # Sadly we need to parse the JSON since args are in string format
105
+ func_args = dict(json.loads(tool_call.function.arguments))
106
+ if inner_thoughts_key in func_args:
107
+ # extract the inner thoughts
108
+ inner_thoughts = func_args.pop(inner_thoughts_key)
109
+
110
+ # replace the kwargs
111
+ new_choice = choice.model_copy(deep=True)
112
+ new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
113
+ # also replace the message content
114
+ if new_choice.message.content is not None:
115
+ warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
116
+ new_choice.message.content = inner_thoughts
117
+
118
+ # save copy
119
+ new_choices.append(new_choice)
120
+ else:
121
+ warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
122
+
123
+ except json.JSONDecodeError as e:
124
+ warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
125
+ raise e
126
+
127
+ # return an updated copy
128
+ new_response = response.model_copy(deep=True)
129
+ new_response.choices = new_choices
130
+ return new_response
131
+
132
+
133
+ def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
134
+ """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
135
+ from letta.utils import printd
136
+
137
+ match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
138
+
139
+ # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
140
+ if match_string in str(exception):
141
+ printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
142
+ return True
143
+
144
+ # Based on python requests + OpenAI REST API (/v1)
145
+ elif isinstance(exception, requests.exceptions.HTTPError):
146
+ if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
147
+ try:
148
+ error_details = exception.response.json()
149
+ if "error" not in error_details:
150
+ printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
151
+ return False
152
+ else:
153
+ error_details = error_details["error"]
154
+
155
+ # Check for the specific error code
156
+ if error_details.get("code") == "context_length_exceeded":
157
+ printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
158
+ return True
159
+ # Soft-check for "maximum context length" inside of the message
160
+ elif error_details.get("message") and "maximum context length" in error_details.get("message"):
161
+ printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
162
+ return True
163
+ else:
164
+ printd(f"HTTPError occurred, but unknown error message: {error_details}")
165
+ return False
166
+ except ValueError:
167
+ # JSON decoding failed
168
+ printd(f"HTTPError occurred ({exception}), but no JSON error message.")
169
+
170
+ # Generic fail
171
+ else:
172
+ return False
173
+
174
+
175
+ def retry_with_exponential_backoff(
176
+ func,
177
+ initial_delay: float = 1,
178
+ exponential_base: float = 2,
179
+ jitter: bool = True,
180
+ max_retries: int = 20,
181
+ # List of OpenAI error codes: https://github.com/openai/openai-python/blob/17ac6779958b2b74999c634c4ea4c7b74906027a/src/openai/_client.py#L227-L250
182
+ # 429 = rate limit
183
+ error_codes: tuple = (429,),
184
+ ):
185
+ """Retry a function with exponential backoff."""
186
+
187
+ def wrapper(*args, **kwargs):
188
+ pass
189
+
190
+ # Initialize variables
191
+ num_retries = 0
192
+ delay = initial_delay
193
+
194
+ # Loop until a successful response or max_retries is hit or an exception is raised
195
+ while True:
196
+ try:
197
+ return func(*args, **kwargs)
198
+
199
+ except requests.exceptions.HTTPError as http_err:
200
+ # Retry on specified errors
201
+ if http_err.response.status_code in error_codes:
202
+ # Increment retries
203
+ num_retries += 1
204
+
205
+ # Check if max retries has been reached
206
+ if num_retries > max_retries:
207
+ raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
208
+
209
+ # Increment the delay
210
+ delay *= exponential_base * (1 + jitter * random.random())
211
+
212
+ # Sleep for the delay
213
+ # printd(f"Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying...")
214
+ print(
215
+ f"{CLI_WARNING_PREFIX}Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying..."
216
+ )
217
+ time.sleep(delay)
218
+ else:
219
+ # For other HTTP errors, re-raise the exception
220
+ raise
221
+
222
+ # Raise exceptions for any errors not specified
223
+ except Exception as e:
224
+ raise e
225
+
226
+ return wrapper
227
+
228
+
229
+ @retry_with_exponential_backoff
230
+ def create(
231
+ # agent_state: AgentState,
232
+ llm_config: LLMConfig,
233
+ messages: List[Message],
234
+ user_id: Optional[str] = None, # option UUID to associate request with
235
+ functions: Optional[list] = None,
236
+ functions_python: Optional[list] = None,
237
+ function_call: str = "auto",
238
+ # hint
239
+ first_message: bool = False,
240
+ # use tool naming?
241
+ # if false, will use deprecated 'functions' style
242
+ use_tool_naming: bool = True,
243
+ # streaming?
244
+ stream: bool = False,
245
+ stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
246
+ # TODO move to llm_config?
247
+ # if unspecified (None), default to something we've tested
248
+ inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
249
+ ) -> ChatCompletionResponse:
250
+ """Return response to chat completion with backoff"""
251
+ from letta.utils import printd
252
+
253
+ printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
254
+
255
+ # TODO eventually refactor so that credentials are passed through
256
+
257
+ credentials = LettaCredentials.load()
258
+
259
+ if function_call and not functions:
260
+ printd("unsetting function_call because functions is None")
261
+ function_call = None
262
+
263
+ # openai
264
+ if llm_config.model_endpoint_type == "openai":
265
+
266
+ if inner_thoughts_in_kwargs == OptionState.DEFAULT:
267
+ # model that are known to not use `content` fields on tool calls
268
+ inner_thoughts_in_kwargs = (
269
+ "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
270
+ )
271
+ else:
272
+ inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
273
+
274
+ if not isinstance(inner_thoughts_in_kwargs, bool):
275
+ warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
276
+ inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
277
+ if inner_thoughts_in_kwargs:
278
+ functions = add_inner_thoughts_to_functions(
279
+ functions=functions,
280
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
281
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
282
+ )
283
+
284
+ openai_message_list = [
285
+ cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
286
+ ]
287
+
288
+ # TODO do the same for Azure?
289
+ if credentials.openai_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
290
+ # only is a problem if we are *not* using an openai proxy
291
+ raise ValueError(f"OpenAI key is missing from letta config file")
292
+ if use_tool_naming:
293
+ data = ChatCompletionRequest(
294
+ model=llm_config.model,
295
+ messages=openai_message_list,
296
+ tools=[{"type": "function", "function": f} for f in functions] if functions else None,
297
+ tool_choice=function_call,
298
+ user=str(user_id),
299
+ )
300
+ else:
301
+ data = ChatCompletionRequest(
302
+ model=llm_config.model,
303
+ messages=openai_message_list,
304
+ functions=functions,
305
+ function_call=function_call,
306
+ user=str(user_id),
307
+ )
308
+ # https://platform.openai.com/docs/guides/text-generation/json-mode
309
+ # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
310
+ if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
311
+ data.response_format = {"type": "json_object"}
312
+
313
+ if "inference.memgpt.ai" in llm_config.model_endpoint:
314
+ # override user id for inference.memgpt.ai
315
+ import uuid
316
+ data.user = str(uuid.UUID(int=0))
317
+
318
+ if stream: # Client requested token streaming
319
+ data.stream = True
320
+ assert isinstance(stream_inferface, AgentChunkStreamingInterface) or isinstance(
321
+ stream_inferface, AgentRefreshStreamingInterface
322
+ ), type(stream_inferface)
323
+ response = openai_chat_completions_process_stream(
324
+ url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
325
+ api_key=credentials.openai_key,
326
+ chat_completion_request=data,
327
+ stream_inferface=stream_inferface,
328
+ )
329
+ else: # Client did not request token streaming (expect a blocking backend response)
330
+ data.stream = False
331
+ if isinstance(stream_inferface, AgentChunkStreamingInterface):
332
+ stream_inferface.stream_start()
333
+ try:
334
+
335
+ response = openai_chat_completions_request(
336
+ url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
337
+ api_key=credentials.openai_key,
338
+ chat_completion_request=data,
339
+ )
340
+ finally:
341
+ if isinstance(stream_inferface, AgentChunkStreamingInterface):
342
+ stream_inferface.stream_end()
343
+
344
+ if inner_thoughts_in_kwargs:
345
+ response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
346
+
347
+ return response
348
+
349
+ # azure
350
+ elif llm_config.model_endpoint_type == "azure":
351
+ if stream:
352
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
353
+
354
+ azure_deployment = (
355
+ credentials.azure_deployment if credentials.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
356
+ )
357
+ if use_tool_naming:
358
+ data = dict(
359
+ # NOTE: don't pass model to Azure calls, that is the deployment_id
360
+ # model=agent_config.model,
361
+ messages=[m.to_openai_dict() for m in messages],
362
+ tools=[{"type": "function", "function": f} for f in functions] if functions else None,
363
+ tool_choice=function_call,
364
+ user=str(user_id),
365
+ )
366
+ else:
367
+ data = dict(
368
+ # NOTE: don't pass model to Azure calls, that is the deployment_id
369
+ # model=agent_config.model,
370
+ messages=[m.to_openai_dict() for m in messages],
371
+ functions=functions,
372
+ function_call=function_call,
373
+ user=str(user_id),
374
+ )
375
+ return azure_openai_chat_completions_request(
376
+ resource_name=credentials.azure_endpoint,
377
+ deployment_id=azure_deployment,
378
+ api_version=credentials.azure_version,
379
+ api_key=credentials.azure_key,
380
+ data=data,
381
+ )
382
+
383
+ elif llm_config.model_endpoint_type == "google_ai":
384
+ if stream:
385
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
386
+ if not use_tool_naming:
387
+ raise NotImplementedError("Only tool calling supported on Google AI API requests")
388
+
389
+ # NOTE: until Google AI supports CoT / text alongside function calls,
390
+ # we need to put it in a kwarg (unless we want to split the message into two)
391
+ google_ai_inner_thoughts_in_kwarg = True
392
+
393
+ if functions is not None:
394
+ tools = [{"type": "function", "function": f} for f in functions]
395
+ tools = [Tool(**t) for t in tools]
396
+ tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg)
397
+ else:
398
+ tools = None
399
+
400
+ return google_ai_chat_completions_request(
401
+ inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg,
402
+ service_endpoint=credentials.google_ai_service_endpoint,
403
+ model=llm_config.model,
404
+ api_key=credentials.google_ai_key,
405
+ # see structure of payload here: https://ai.google.dev/docs/function_calling
406
+ data=dict(
407
+ contents=[m.to_google_ai_dict() for m in messages],
408
+ tools=tools,
409
+ ),
410
+ )
411
+
412
+ elif llm_config.model_endpoint_type == "anthropic":
413
+ if stream:
414
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
415
+ if not use_tool_naming:
416
+ raise NotImplementedError("Only tool calling supported on Anthropic API requests")
417
+
418
+ if functions is not None:
419
+ tools = [{"type": "function", "function": f} for f in functions]
420
+ tools = [Tool(**t) for t in tools]
421
+ else:
422
+ tools = None
423
+
424
+ return anthropic_chat_completions_request(
425
+ url=llm_config.model_endpoint,
426
+ api_key=credentials.anthropic_key,
427
+ data=ChatCompletionRequest(
428
+ model=llm_config.model,
429
+ messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
430
+ tools=[{"type": "function", "function": f} for f in functions] if functions else None,
431
+ # tool_choice=function_call,
432
+ # user=str(user_id),
433
+ # NOTE: max_tokens is required for Anthropic API
434
+ max_tokens=1024, # TODO make dynamic
435
+ ),
436
+ )
437
+
438
+ elif llm_config.model_endpoint_type == "cohere":
439
+ if stream:
440
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
441
+ if not use_tool_naming:
442
+ raise NotImplementedError("Only tool calling supported on Cohere API requests")
443
+
444
+ if functions is not None:
445
+ tools = [{"type": "function", "function": f} for f in functions]
446
+ tools = [Tool(**t) for t in tools]
447
+ else:
448
+ tools = None
449
+
450
+ return cohere_chat_completions_request(
451
+ # url=llm_config.model_endpoint,
452
+ url="https://api.cohere.ai/v1", # TODO
453
+ api_key=os.getenv("COHERE_API_KEY"), # TODO remove
454
+ chat_completion_request=ChatCompletionRequest(
455
+ model="command-r-plus", # TODO
456
+ messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
457
+ tools=[{"type": "function", "function": f} for f in functions] if functions else None,
458
+ tool_choice=function_call,
459
+ # user=str(user_id),
460
+ # NOTE: max_tokens is required for Anthropic API
461
+ # max_tokens=1024, # TODO make dynamic
462
+ ),
463
+ )
464
+
465
+ # local model
466
+ else:
467
+ if stream:
468
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
469
+ return get_chat_completion(
470
+ model=llm_config.model,
471
+ messages=messages,
472
+ functions=functions,
473
+ functions_python=functions_python,
474
+ function_call=function_call,
475
+ context_window=llm_config.context_window,
476
+ endpoint=llm_config.model_endpoint,
477
+ endpoint_type=llm_config.model_endpoint_type,
478
+ wrapper=llm_config.model_wrapper,
479
+ user=str(user_id),
480
+ # hint
481
+ first_message=first_message,
482
+ # auth-related
483
+ auth_type=credentials.openllm_auth_type,
484
+ auth_key=credentials.openllm_key,
485
+ )