letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,470 @@
1
+ import json
2
+ from typing import Generator, Optional, Union
3
+
4
+ import httpx
5
+ import requests
6
+ from httpx_sse import connect_sse
7
+ from httpx_sse._exceptions import SSEError
8
+
9
+ from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
10
+ from letta.errors import LLMError
11
+ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
12
+ from letta.schemas.message import Message as _Message
13
+ from letta.schemas.message import MessageRole as _MessageRole
14
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
15
+ from letta.schemas.openai.chat_completion_response import (
16
+ ChatCompletionChunkResponse,
17
+ ChatCompletionResponse,
18
+ Choice,
19
+ FunctionCall,
20
+ Message,
21
+ ToolCall,
22
+ UsageStatistics,
23
+ )
24
+ from letta.schemas.openai.embedding_response import EmbeddingResponse
25
+ from letta.streaming_interface import (
26
+ AgentChunkStreamingInterface,
27
+ AgentRefreshStreamingInterface,
28
+ )
29
+ from letta.utils import smart_urljoin
30
+
31
+ OPENAI_SSE_DONE = "[DONE]"
32
+
33
+
34
+ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional[bool] = False) -> dict:
35
+ """https://platform.openai.com/docs/api-reference/models/list"""
36
+ from letta.utils import printd
37
+
38
+ # In some cases we may want to double-check the URL and do basic correction, eg:
39
+ # In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
40
+ # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit
41
+ if fix_url:
42
+ if not url.endswith("/v1"):
43
+ url = smart_urljoin(url, "v1")
44
+
45
+ url = smart_urljoin(url, "models")
46
+
47
+ headers = {"Content-Type": "application/json"}
48
+ if api_key is not None:
49
+ headers["Authorization"] = f"Bearer {api_key}"
50
+
51
+ printd(f"Sending request to {url}")
52
+ try:
53
+ response = requests.get(url, headers=headers)
54
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
55
+ response = response.json() # convert to dict from string
56
+ printd(f"response = {response}")
57
+ return response
58
+ except requests.exceptions.HTTPError as http_err:
59
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
60
+ try:
61
+ response = response.json()
62
+ except:
63
+ pass
64
+ printd(f"Got HTTPError, exception={http_err}, response={response}")
65
+ raise http_err
66
+ except requests.exceptions.RequestException as req_err:
67
+ # Handle other requests-related errors (e.g., connection error)
68
+ try:
69
+ response = response.json()
70
+ except:
71
+ pass
72
+ printd(f"Got RequestException, exception={req_err}, response={response}")
73
+ raise req_err
74
+ except Exception as e:
75
+ # Handle other potential errors
76
+ try:
77
+ response = response.json()
78
+ except:
79
+ pass
80
+ printd(f"Got unknown Exception, exception={e}, response={response}")
81
+ raise e
82
+
83
+
84
+ def openai_chat_completions_process_stream(
85
+ url: str,
86
+ api_key: str,
87
+ chat_completion_request: ChatCompletionRequest,
88
+ stream_inferface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
89
+ create_message_id: bool = True,
90
+ create_message_datetime: bool = True,
91
+ ) -> ChatCompletionResponse:
92
+ """Process a streaming completion response, and return a ChatCompletionRequest at the end.
93
+
94
+ To "stream" the response in Letta, we want to call a streaming-compatible interface function
95
+ on the chunks received from the OpenAI-compatible server POST SSE response.
96
+ """
97
+ assert chat_completion_request.stream == True
98
+ assert stream_inferface is not None, "Required"
99
+
100
+ # Count the prompt tokens
101
+ # TODO move to post-request?
102
+ chat_history = [m.model_dump(exclude_none=True) for m in chat_completion_request.messages]
103
+ # print(chat_history)
104
+
105
+ prompt_tokens = num_tokens_from_messages(
106
+ messages=chat_history,
107
+ model=chat_completion_request.model,
108
+ )
109
+ # We also need to add the cost of including the functions list to the input prompt
110
+ if chat_completion_request.tools is not None:
111
+ assert chat_completion_request.functions is None
112
+ prompt_tokens += num_tokens_from_functions(
113
+ functions=[t.function.model_dump() for t in chat_completion_request.tools],
114
+ model=chat_completion_request.model,
115
+ )
116
+ elif chat_completion_request.functions is not None:
117
+ assert chat_completion_request.tools is None
118
+ prompt_tokens += num_tokens_from_functions(
119
+ functions=[f.model_dump() for f in chat_completion_request.functions],
120
+ model=chat_completion_request.model,
121
+ )
122
+
123
+ # Create a dummy Message object to get an ID and date
124
+ # TODO(sarah): add message ID generation function
125
+ dummy_message = _Message(
126
+ role=_MessageRole.assistant,
127
+ text="",
128
+ user_id="",
129
+ agent_id="",
130
+ model="",
131
+ name=None,
132
+ tool_calls=None,
133
+ tool_call_id=None,
134
+ )
135
+
136
+ TEMP_STREAM_RESPONSE_ID = "temp_id"
137
+ TEMP_STREAM_FINISH_REASON = "temp_null"
138
+ TEMP_STREAM_TOOL_CALL_ID = "temp_id"
139
+ chat_completion_response = ChatCompletionResponse(
140
+ id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
141
+ choices=[],
142
+ created=dummy_message.created_at, # NOTE: doesn't matter since both will do get_utc_time()
143
+ model=chat_completion_request.model,
144
+ usage=UsageStatistics(
145
+ completion_tokens=0,
146
+ prompt_tokens=prompt_tokens,
147
+ total_tokens=prompt_tokens,
148
+ ),
149
+ )
150
+
151
+ if stream_inferface:
152
+ stream_inferface.stream_start()
153
+
154
+ n_chunks = 0 # approx == n_tokens
155
+ try:
156
+ for chunk_idx, chat_completion_chunk in enumerate(
157
+ openai_chat_completions_request_stream(url=url, api_key=api_key, chat_completion_request=chat_completion_request)
158
+ ):
159
+ assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
160
+
161
+ if stream_inferface:
162
+ if isinstance(stream_inferface, AgentChunkStreamingInterface):
163
+ stream_inferface.process_chunk(
164
+ chat_completion_chunk,
165
+ message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
166
+ message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
167
+ )
168
+ elif isinstance(stream_inferface, AgentRefreshStreamingInterface):
169
+ stream_inferface.process_refresh(chat_completion_response)
170
+ else:
171
+ raise TypeError(stream_inferface)
172
+
173
+ if chunk_idx == 0:
174
+ # initialize the choice objects which we will increment with the deltas
175
+ num_choices = len(chat_completion_chunk.choices)
176
+ assert num_choices > 0
177
+ chat_completion_response.choices = [
178
+ Choice(
179
+ finish_reason=TEMP_STREAM_FINISH_REASON, # NOTE: needs to be ovrerwritten
180
+ index=i,
181
+ message=Message(
182
+ role="assistant",
183
+ ),
184
+ )
185
+ for i in range(len(chat_completion_chunk.choices))
186
+ ]
187
+
188
+ # add the choice delta
189
+ assert len(chat_completion_chunk.choices) == len(chat_completion_response.choices), chat_completion_chunk
190
+ for chunk_choice in chat_completion_chunk.choices:
191
+ if chunk_choice.finish_reason is not None:
192
+ chat_completion_response.choices[chunk_choice.index].finish_reason = chunk_choice.finish_reason
193
+
194
+ if chunk_choice.logprobs is not None:
195
+ chat_completion_response.choices[chunk_choice.index].logprobs = chunk_choice.logprobs
196
+
197
+ accum_message = chat_completion_response.choices[chunk_choice.index].message
198
+ message_delta = chunk_choice.delta
199
+
200
+ if message_delta.content is not None:
201
+ content_delta = message_delta.content
202
+ if accum_message.content is None:
203
+ accum_message.content = content_delta
204
+ else:
205
+ accum_message.content += content_delta
206
+
207
+ if message_delta.tool_calls is not None:
208
+ tool_calls_delta = message_delta.tool_calls
209
+
210
+ # If this is the first tool call showing up in a chunk, initialize the list with it
211
+ if accum_message.tool_calls is None:
212
+ accum_message.tool_calls = [
213
+ ToolCall(id=TEMP_STREAM_TOOL_CALL_ID, function=FunctionCall(name="", arguments=""))
214
+ for _ in range(len(tool_calls_delta))
215
+ ]
216
+
217
+ for tool_call_delta in tool_calls_delta:
218
+ if tool_call_delta.id is not None:
219
+ # TODO assert that we're not overwriting?
220
+ # TODO += instead of =?
221
+ accum_message.tool_calls[tool_call_delta.index].id = tool_call_delta.id
222
+ if tool_call_delta.function is not None:
223
+ if tool_call_delta.function.name is not None:
224
+ # TODO assert that we're not overwriting?
225
+ # TODO += instead of =?
226
+ accum_message.tool_calls[tool_call_delta.index].function.name = tool_call_delta.function.name
227
+ if tool_call_delta.function.arguments is not None:
228
+ accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
229
+
230
+ if message_delta.function_call is not None:
231
+ raise NotImplementedError(f"Old function_call style not support with stream=True")
232
+
233
+ # overwrite response fields based on latest chunk
234
+ if not create_message_id:
235
+ chat_completion_response.id = chat_completion_chunk.id
236
+ if not create_message_datetime:
237
+ chat_completion_response.created = chat_completion_chunk.created
238
+ chat_completion_response.model = chat_completion_chunk.model
239
+ chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
240
+
241
+ # increment chunk counter
242
+ n_chunks += 1
243
+
244
+ except Exception as e:
245
+ if stream_inferface:
246
+ stream_inferface.stream_end()
247
+ print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
248
+ raise e
249
+ finally:
250
+ if stream_inferface:
251
+ stream_inferface.stream_end()
252
+
253
+ # make sure we didn't leave temp stuff in
254
+ assert all([c.finish_reason != TEMP_STREAM_FINISH_REASON for c in chat_completion_response.choices])
255
+ assert all(
256
+ [
257
+ all([tc != TEMP_STREAM_TOOL_CALL_ID for tc in c.message.tool_calls]) if c.message.tool_calls else True
258
+ for c in chat_completion_response.choices
259
+ ]
260
+ )
261
+ if not create_message_id:
262
+ assert chat_completion_response.id != dummy_message.id
263
+
264
+ # compute token usage before returning
265
+ # TODO try actually computing the #tokens instead of assuming the chunks is the same
266
+ chat_completion_response.usage.completion_tokens = n_chunks
267
+ chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
268
+
269
+ # printd(chat_completion_response)
270
+ return chat_completion_response
271
+
272
+
273
+ def _sse_post(url: str, data: dict, headers: dict) -> Generator[ChatCompletionChunkResponse, None, None]:
274
+
275
+ with httpx.Client() as client:
276
+ with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
277
+
278
+ # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
279
+ if not event_source.response.is_success:
280
+ # handle errors
281
+ from letta.utils import printd
282
+
283
+ printd("Caught error before iterating SSE request:", vars(event_source.response))
284
+ printd(event_source.response.read())
285
+
286
+ try:
287
+ response_bytes = event_source.response.read()
288
+ response_dict = json.loads(response_bytes.decode("utf-8"))
289
+ error_message = response_dict["error"]["message"]
290
+ # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
291
+ if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
292
+ raise LLMError(error_message)
293
+ except LLMError:
294
+ raise
295
+ except:
296
+ print(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
297
+ event_source.response.raise_for_status()
298
+
299
+ try:
300
+ for sse in event_source.iter_sse():
301
+ # printd(sse.event, sse.data, sse.id, sse.retry)
302
+ if sse.data == OPENAI_SSE_DONE:
303
+ # print("finished")
304
+ break
305
+ else:
306
+ chunk_data = json.loads(sse.data)
307
+ # print("chunk_data::", chunk_data)
308
+ chunk_object = ChatCompletionChunkResponse(**chunk_data)
309
+ # print("chunk_object::", chunk_object)
310
+ # id=chunk_data["id"],
311
+ # choices=[ChunkChoice],
312
+ # model=chunk_data["model"],
313
+ # system_fingerprint=chunk_data["system_fingerprint"]
314
+ # )
315
+ yield chunk_object
316
+
317
+ except SSEError as e:
318
+ print("Caught an error while iterating the SSE stream:", str(e))
319
+ if "application/json" in str(e): # Check if the error is because of JSON response
320
+ # TODO figure out a better way to catch the error other than re-trying with a POST
321
+ response = client.post(url=url, json=data, headers=headers) # Make the request again to get the JSON response
322
+ if response.headers["Content-Type"].startswith("application/json"):
323
+ error_details = response.json() # Parse the JSON to get the error message
324
+ print("Request:", vars(response.request))
325
+ print("POST Error:", error_details)
326
+ print("Original SSE Error:", str(e))
327
+ else:
328
+ print("Failed to retrieve JSON error message via retry.")
329
+ else:
330
+ print("SSEError not related to 'application/json' content type.")
331
+
332
+ # Optionally re-raise the exception if you need to propagate it
333
+ raise e
334
+
335
+ except Exception as e:
336
+ if event_source.response.request is not None:
337
+ print("HTTP Request:", vars(event_source.response.request))
338
+ if event_source.response is not None:
339
+ print("HTTP Status:", event_source.response.status_code)
340
+ print("HTTP Headers:", event_source.response.headers)
341
+ # print("HTTP Body:", event_source.response.text)
342
+ print("Exception message:", str(e))
343
+ raise e
344
+
345
+
346
+ def openai_chat_completions_request_stream(
347
+ url: str,
348
+ api_key: str,
349
+ chat_completion_request: ChatCompletionRequest,
350
+ ) -> Generator[ChatCompletionChunkResponse, None, None]:
351
+ from letta.utils import printd
352
+
353
+ url = smart_urljoin(url, "chat/completions")
354
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
355
+ data = chat_completion_request.model_dump(exclude_none=True)
356
+
357
+ printd("Request:\n", json.dumps(data, indent=2))
358
+
359
+ # If functions == None, strip from the payload
360
+ if "functions" in data and data["functions"] is None:
361
+ data.pop("functions")
362
+ data.pop("function_call", None) # extra safe, should exist always (default="auto")
363
+
364
+ if "tools" in data and data["tools"] is None:
365
+ data.pop("tools")
366
+ data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
367
+
368
+ printd(f"Sending request to {url}")
369
+ try:
370
+ return _sse_post(url=url, data=data, headers=headers)
371
+ except requests.exceptions.HTTPError as http_err:
372
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
373
+ printd(f"Got HTTPError, exception={http_err}, payload={data}")
374
+ raise http_err
375
+ except requests.exceptions.RequestException as req_err:
376
+ # Handle other requests-related errors (e.g., connection error)
377
+ printd(f"Got RequestException, exception={req_err}")
378
+ raise req_err
379
+ except Exception as e:
380
+ # Handle other potential errors
381
+ printd(f"Got unknown Exception, exception={e}")
382
+ raise e
383
+
384
+
385
+ def openai_chat_completions_request(
386
+ url: str,
387
+ api_key: str,
388
+ chat_completion_request: ChatCompletionRequest,
389
+ ) -> ChatCompletionResponse:
390
+ """Send a ChatCompletion request to an OpenAI-compatible server
391
+
392
+ If request.stream == True, will yield ChatCompletionChunkResponses
393
+ If request.stream == False, will return a ChatCompletionResponse
394
+
395
+ https://platform.openai.com/docs/guides/text-generation?lang=curl
396
+ """
397
+ from letta.utils import printd
398
+
399
+ url = smart_urljoin(url, "chat/completions")
400
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
401
+ data = chat_completion_request.model_dump(exclude_none=True)
402
+
403
+ # add check otherwise will cause error: "Invalid value for 'parallel_tool_calls': 'parallel_tool_calls' is only allowed when 'tools' are specified."
404
+ if chat_completion_request.tools is not None:
405
+ data["parallel_tool_calls"] = False
406
+
407
+ printd("Request:\n", json.dumps(data, indent=2))
408
+
409
+ # If functions == None, strip from the payload
410
+ if "functions" in data and data["functions"] is None:
411
+ data.pop("functions")
412
+ data.pop("function_call", None) # extra safe, should exist always (default="auto")
413
+
414
+ if "tools" in data and data["tools"] is None:
415
+ data.pop("tools")
416
+ data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
417
+
418
+ printd(f"Sending request to {url}")
419
+ try:
420
+ response = requests.post(url, headers=headers, json=data)
421
+ printd(f"response = {response}, response.text = {response.text}")
422
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
423
+
424
+ response = response.json() # convert to dict from string
425
+ printd(f"response.json = {response}")
426
+
427
+ response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default
428
+ return response
429
+ except requests.exceptions.HTTPError as http_err:
430
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
431
+ printd(f"Got HTTPError, exception={http_err}, payload={data}")
432
+ raise http_err
433
+ except requests.exceptions.RequestException as req_err:
434
+ # Handle other requests-related errors (e.g., connection error)
435
+ printd(f"Got RequestException, exception={req_err}")
436
+ raise req_err
437
+ except Exception as e:
438
+ # Handle other potential errors
439
+ printd(f"Got unknown Exception, exception={e}")
440
+ raise e
441
+
442
+
443
+ def openai_embeddings_request(url: str, api_key: str, data: dict) -> EmbeddingResponse:
444
+ """https://platform.openai.com/docs/api-reference/embeddings/create"""
445
+ from letta.utils import printd
446
+
447
+ url = smart_urljoin(url, "embeddings")
448
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
449
+
450
+ printd(f"Sending request to {url}")
451
+ try:
452
+ response = requests.post(url, headers=headers, json=data)
453
+ printd(f"response = {response}")
454
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
455
+ response = response.json() # convert to dict from string
456
+ printd(f"response.json = {response}")
457
+ response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default
458
+ return response
459
+ except requests.exceptions.HTTPError as http_err:
460
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
461
+ printd(f"Got HTTPError, exception={http_err}, payload={data}")
462
+ raise http_err
463
+ except requests.exceptions.RequestException as req_err:
464
+ # Handle other requests-related errors (e.g., connection error)
465
+ printd(f"Got RequestException, exception={req_err}")
466
+ raise req_err
467
+ except Exception as e:
468
+ # Handle other potential errors
469
+ printd(f"Got unknown Exception, exception={e}")
470
+ raise e
@@ -0,0 +1,3 @@
1
+ # Letta + local LLMs
2
+
3
+ See [https://letta.readme.io/docs/local_llm](https://letta.readme.io/docs/local_llm) for documentation on running Letta with custom LLM backends.
File without changes