letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,726 @@
1
+ import asyncio
2
+ import json
3
+ import queue
4
+ from collections import deque
5
+ from datetime import datetime
6
+ from typing import AsyncGenerator, Literal, Optional, Union
7
+
8
+ from letta.interface import AgentInterface
9
+ from letta.schemas.enums import MessageStreamStatus
10
+ from letta.schemas.letta_message import (
11
+ AssistantMessage,
12
+ FunctionCall,
13
+ FunctionCallDelta,
14
+ FunctionCallMessage,
15
+ FunctionReturn,
16
+ InternalMonologue,
17
+ LegacyFunctionCallMessage,
18
+ LegacyLettaMessage,
19
+ LettaMessage,
20
+ )
21
+ from letta.schemas.message import Message
22
+ from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
23
+ from letta.streaming_interface import AgentChunkStreamingInterface
24
+ from letta.utils import is_utc_datetime
25
+
26
+
27
+ class QueuingInterface(AgentInterface):
28
+ """Messages are queued inside an internal buffer and manually flushed"""
29
+
30
+ def __init__(self, debug=True):
31
+ self.buffer = queue.Queue()
32
+ self.debug = debug
33
+
34
+ def _queue_push(self, message_api: Union[str, dict], message_obj: Union[Message, None]):
35
+ """Wrapper around self.buffer.queue.put() that ensures the types are safe
36
+
37
+ Data will be in the format: {
38
+ "message_obj": ...
39
+ "message_string": ...
40
+ }
41
+ """
42
+
43
+ # Check the string first
44
+
45
+ if isinstance(message_api, str):
46
+ # check that it's the stop word
47
+ if message_api == "STOP":
48
+ assert message_obj is None
49
+ self.buffer.put(
50
+ {
51
+ "message_api": message_api,
52
+ "message_obj": None,
53
+ }
54
+ )
55
+ else:
56
+ raise ValueError(f"Unrecognized string pushed to buffer: {message_api}")
57
+
58
+ elif isinstance(message_api, dict):
59
+ # check if it's the error message style
60
+ if len(message_api.keys()) == 1 and "internal_error" in message_api:
61
+ assert message_obj is None
62
+ self.buffer.put(
63
+ {
64
+ "message_api": message_api,
65
+ "message_obj": None,
66
+ }
67
+ )
68
+ else:
69
+ assert message_obj is not None, message_api
70
+ self.buffer.put(
71
+ {
72
+ "message_api": message_api,
73
+ "message_obj": message_obj,
74
+ }
75
+ )
76
+
77
+ else:
78
+ raise ValueError(f"Unrecognized type pushed to buffer: {type(message_api)}")
79
+
80
+ def to_list(self, style: Literal["obj", "api"] = "obj"):
81
+ """Convert queue to a list (empties it out at the same time)"""
82
+ items = []
83
+ while not self.buffer.empty():
84
+ try:
85
+ # items.append(self.buffer.get_nowait())
86
+ item_to_push = self.buffer.get_nowait()
87
+ if style == "obj":
88
+ if item_to_push["message_obj"] is not None:
89
+ items.append(item_to_push["message_obj"])
90
+ elif style == "api":
91
+ items.append(item_to_push["message_api"])
92
+ else:
93
+ raise ValueError(style)
94
+ except queue.Empty:
95
+ break
96
+ if len(items) > 1 and items[-1] == "STOP":
97
+ items.pop()
98
+
99
+ # If the style is "obj", then we need to deduplicate any messages
100
+ # Filter down items for duplicates based on item.id
101
+ if style == "obj":
102
+ seen_ids = set()
103
+ unique_items = []
104
+ for item in reversed(items):
105
+ if item.id not in seen_ids:
106
+ seen_ids.add(item.id)
107
+ unique_items.append(item)
108
+ items = list(reversed(unique_items))
109
+
110
+ return items
111
+
112
+ def clear(self):
113
+ """Clear all messages from the queue."""
114
+ with self.buffer.mutex:
115
+ # Empty the queue
116
+ self.buffer.queue.clear()
117
+
118
+ async def message_generator(self, style: Literal["obj", "api"] = "obj"):
119
+ while True:
120
+ if not self.buffer.empty():
121
+ message = self.buffer.get()
122
+ message_obj = message["message_obj"]
123
+ message_api = message["message_api"]
124
+
125
+ if message_api == "STOP":
126
+ break
127
+
128
+ # yield message
129
+ if style == "obj":
130
+ yield message_obj
131
+ elif style == "api":
132
+ yield message_api
133
+ else:
134
+ raise ValueError(style)
135
+
136
+ else:
137
+ await asyncio.sleep(0.1) # Small sleep to prevent a busy loop
138
+
139
+ def step_yield(self):
140
+ """Enqueue a special stop message"""
141
+ self._queue_push(message_api="STOP", message_obj=None)
142
+
143
+ @staticmethod
144
+ def step_complete():
145
+ pass
146
+
147
+ def error(self, error: str):
148
+ """Enqueue a special stop message"""
149
+ self._queue_push(message_api={"internal_error": error}, message_obj=None)
150
+ self._queue_push(message_api="STOP", message_obj=None)
151
+
152
+ def user_message(self, msg: str, msg_obj: Optional[Message] = None):
153
+ """Handle reception of a user message"""
154
+ assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
155
+ if self.debug:
156
+ print(msg)
157
+ print(vars(msg_obj))
158
+ print(msg_obj.created_at.isoformat())
159
+
160
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
161
+ """Handle the agent's internal monologue"""
162
+ assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
163
+ if self.debug:
164
+ print(msg)
165
+ print(vars(msg_obj))
166
+ print(msg_obj.created_at.isoformat())
167
+
168
+ new_message = {"internal_monologue": msg}
169
+
170
+ # add extra metadata
171
+ if msg_obj is not None:
172
+ new_message["id"] = str(msg_obj.id)
173
+ assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
174
+ new_message["date"] = msg_obj.created_at.isoformat()
175
+
176
+ self._queue_push(message_api=new_message, message_obj=msg_obj)
177
+
178
+ def assistant_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
179
+ """Handle the agent sending a message"""
180
+ # assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
181
+
182
+ if self.debug:
183
+ print(msg)
184
+ if msg_obj is not None:
185
+ print(vars(msg_obj))
186
+ print(msg_obj.created_at.isoformat())
187
+
188
+ new_message = {"assistant_message": msg}
189
+
190
+ # add extra metadata
191
+ if msg_obj is not None:
192
+ new_message["id"] = str(msg_obj.id)
193
+ assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
194
+ new_message["date"] = msg_obj.created_at.isoformat()
195
+ else:
196
+ # FIXME this is a total hack
197
+ assert self.buffer.qsize() > 1, "Tried to reach back to grab function call data, but couldn't find a buffer message."
198
+ # TODO also should not be accessing protected member here
199
+
200
+ new_message["id"] = self.buffer.queue[-1]["message_api"]["id"]
201
+ # assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
202
+ new_message["date"] = self.buffer.queue[-1]["message_api"]["date"]
203
+
204
+ msg_obj = self.buffer.queue[-1]["message_obj"]
205
+
206
+ self._queue_push(message_api=new_message, message_obj=msg_obj)
207
+
208
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False) -> None:
209
+ """Handle the agent calling a function"""
210
+ # TODO handle 'function' messages that indicate the start of a function call
211
+ assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
212
+
213
+ if self.debug:
214
+ print(msg)
215
+ print(vars(msg_obj))
216
+ print(msg_obj.created_at.isoformat())
217
+
218
+ if msg.startswith("Running "):
219
+ msg = msg.replace("Running ", "")
220
+ new_message = {"function_call": msg}
221
+
222
+ elif msg.startswith("Ran "):
223
+ if not include_ran_messages:
224
+ return
225
+ msg = msg.replace("Ran ", "Function call returned: ")
226
+ new_message = {"function_call": msg}
227
+
228
+ elif msg.startswith("Success: "):
229
+ msg = msg.replace("Success: ", "")
230
+ new_message = {"function_return": msg, "status": "success"}
231
+
232
+ elif msg.startswith("Error: "):
233
+ msg = msg.replace("Error: ", "")
234
+ new_message = {"function_return": msg, "status": "error"}
235
+
236
+ else:
237
+ # NOTE: generic, should not happen
238
+ new_message = {"function_message": msg}
239
+
240
+ # add extra metadata
241
+ if msg_obj is not None:
242
+ new_message["id"] = str(msg_obj.id)
243
+ assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
244
+ new_message["date"] = msg_obj.created_at.isoformat()
245
+
246
+ self._queue_push(message_api=new_message, message_obj=msg_obj)
247
+
248
+
249
+ class FunctionArgumentsStreamHandler:
250
+ """State machine that can process a stream of"""
251
+
252
+ def __init__(self, json_key="message"):
253
+ self.json_key = json_key
254
+ self.reset()
255
+
256
+ def reset(self):
257
+ self.in_message = False
258
+ self.key_buffer = ""
259
+ self.accumulating = False
260
+ self.message_started = False
261
+
262
+ def process_json_chunk(self, chunk: str) -> Optional[str]:
263
+ """Process a chunk from the function arguments and return the plaintext version"""
264
+
265
+ # Use strip to handle only leading and trailing whitespace in control structures
266
+ if self.accumulating:
267
+ clean_chunk = chunk.strip()
268
+ if self.json_key in self.key_buffer:
269
+ if ":" in clean_chunk:
270
+ self.in_message = True
271
+ self.accumulating = False
272
+ return None
273
+ self.key_buffer += clean_chunk
274
+ return None
275
+
276
+ if self.in_message:
277
+ if chunk.strip() == '"' and self.message_started:
278
+ self.in_message = False
279
+ self.message_started = False
280
+ return None
281
+ if not self.message_started and chunk.strip() == '"':
282
+ self.message_started = True
283
+ return None
284
+ if self.message_started:
285
+ if chunk.strip().endswith('"'):
286
+ self.in_message = False
287
+ return chunk.rstrip('"\n')
288
+ return chunk
289
+
290
+ if chunk.strip() == "{":
291
+ self.key_buffer = ""
292
+ self.accumulating = True
293
+ return None
294
+ if chunk.strip() == "}":
295
+ self.in_message = False
296
+ self.message_started = False
297
+ return None
298
+ return None
299
+
300
+
301
+ class StreamingServerInterface(AgentChunkStreamingInterface):
302
+ """Maintain a generator that is a proxy for self.process_chunk()
303
+
304
+ Usage:
305
+ - The main POST SSE code that launches the streaming request
306
+ will call .process_chunk with each incoming stream (as a handler)
307
+ -
308
+
309
+ NOTE: this interface is SINGLE THREADED, and meant to be used
310
+ with a single agent. A multi-agent implementation of this interface
311
+ should maintain multiple generators and index them with the request ID
312
+ """
313
+
314
+ def __init__(self, multi_step=True):
315
+ # If streaming mode, ignores base interface calls like .assistant_message, etc
316
+ self.streaming_mode = False
317
+ # NOTE: flag for supporting legacy 'stream' flag where send_message is treated specially
318
+ self.nonstreaming_legacy_mode = False
319
+ # If chat completion mode, creates a "chatcompletion-style" stream, but with concepts remapped
320
+ self.streaming_chat_completion_mode = False
321
+ self.streaming_chat_completion_mode_function_name = None # NOTE: sadly need to track state during stream
322
+ # If chat completion mode, we need a special stream reader to
323
+ # turn function argument to send_message into a normal text stream
324
+ self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler()
325
+
326
+ self._chunks = deque()
327
+ self._event = asyncio.Event() # Use an event to notify when chunks are available
328
+ self._active = True # This should be set to False to stop the generator
329
+
330
+ # if multi_step = True, the stream ends when the agent yields
331
+ # if multi_step = False, the stream ends when the step ends
332
+ self.multi_step = multi_step
333
+ self.multi_step_indicator = MessageStreamStatus.done_step
334
+ self.multi_step_gen_indicator = MessageStreamStatus.done_generation
335
+
336
+ # extra prints
337
+ self.debug = False
338
+ self.timeout = 30
339
+
340
+ async def _create_generator(self) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
341
+ """An asynchronous generator that yields chunks as they become available."""
342
+ while self._active:
343
+ try:
344
+ # Wait until there is an item in the deque or the stream is deactivated
345
+ await asyncio.wait_for(self._event.wait(), timeout=self.timeout) # 30 second timeout
346
+ except asyncio.TimeoutError:
347
+ break # Exit the loop if we timeout
348
+
349
+ while self._chunks:
350
+ yield self._chunks.popleft()
351
+
352
+ # Reset the event until a new item is pushed
353
+ self._event.clear()
354
+
355
+ # while self._active:
356
+ # # Wait until there is an item in the deque or the stream is deactivated
357
+ # await self._event.wait()
358
+
359
+ # while self._chunks:
360
+ # yield self._chunks.popleft()
361
+
362
+ # # Reset the event until a new item is pushed
363
+ # self._event.clear()
364
+
365
+ def get_generator(self) -> AsyncGenerator:
366
+ """Get the generator that yields processed chunks."""
367
+ if not self._active:
368
+ # If the stream is not active, don't return a generator that would produce values
369
+ raise StopIteration("The stream has not been started or has been ended.")
370
+ return self._create_generator()
371
+
372
+ def _push_to_buffer(
373
+ self,
374
+ item: Union[
375
+ # signal on SSE stream status [DONE_GEN], [DONE_STEP], [DONE]
376
+ MessageStreamStatus,
377
+ # the non-streaming message types
378
+ LettaMessage,
379
+ LegacyLettaMessage,
380
+ # the streaming message types
381
+ ChatCompletionChunkResponse,
382
+ ],
383
+ ):
384
+ """Add an item to the deque"""
385
+ assert self._active, "Generator is inactive"
386
+ assert (
387
+ isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus)
388
+ ), f"Wrong type: {type(item)}"
389
+
390
+ self._chunks.append(item)
391
+ self._event.set() # Signal that new data is available
392
+
393
+ def stream_start(self):
394
+ """Initialize streaming by activating the generator and clearing any old chunks."""
395
+ self.streaming_chat_completion_mode_function_name = None
396
+
397
+ if not self._active:
398
+ self._active = True
399
+ self._chunks.clear()
400
+ self._event.clear()
401
+
402
+ def stream_end(self):
403
+ """Clean up the stream by deactivating and clearing chunks."""
404
+ self.streaming_chat_completion_mode_function_name = None
405
+
406
+ if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
407
+ self._push_to_buffer(self.multi_step_gen_indicator)
408
+
409
+ # self._active = False
410
+ # self._event.set() # Unblock the generator if it's waiting to allow it to complete
411
+
412
+ # if not self.multi_step:
413
+ # # end the stream
414
+ # self._active = False
415
+ # self._event.set() # Unblock the generator if it's waiting to allow it to complete
416
+ # else:
417
+ # # signal that a new step has started in the stream
418
+ # self._chunks.append(self.multi_step_indicator)
419
+ # self._event.set() # Signal that new data is available
420
+
421
+ def step_complete(self):
422
+ """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
423
+ if not self.multi_step:
424
+ # end the stream
425
+ self._active = False
426
+ self._event.set() # Unblock the generator if it's waiting to allow it to complete
427
+ elif not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
428
+ # signal that a new step has started in the stream
429
+ self._push_to_buffer(self.multi_step_indicator)
430
+
431
+ def step_yield(self):
432
+ """If multi_step, this is the true 'stream_end' function."""
433
+ # if self.multi_step:
434
+ # end the stream
435
+ self._active = False
436
+ self._event.set() # Unblock the generator if it's waiting to allow it to complete
437
+
438
+ @staticmethod
439
+ def clear():
440
+ return
441
+
442
+ def _process_chunk_to_letta_style(
443
+ self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime
444
+ ) -> Optional[Union[InternalMonologue, FunctionCallMessage]]:
445
+ """
446
+ Example data from non-streaming response looks like:
447
+
448
+ data: {"function_call": "send_message({'message': \"Ah, the age-old question, Chad. The meaning of life is as subjective as the life itself. 42, as the supercomputer 'Deep Thought' calculated in 'The Hitchhiker's Guide to the Galaxy', is indeed an answer, but maybe not the one we're after. Among other things, perhaps life is about learning, experiencing and connecting. What are your thoughts, Chad? What gives your life meaning?\"})", "date": "2024-02-29T06:07:48.844733+00:00"}
449
+
450
+ data: {"assistant_message": "Ah, the age-old question, Chad. The meaning of life is as subjective as the life itself. 42, as the supercomputer 'Deep Thought' calculated in 'The Hitchhiker's Guide to the Galaxy', is indeed an answer, but maybe not the one we're after. Among other things, perhaps life is about learning, experiencing and connecting. What are your thoughts, Chad? What gives your life meaning?", "date": "2024-02-29T06:07:49.846280+00:00"}
451
+
452
+ data: {"function_return": "None", "status": "success", "date": "2024-02-29T06:07:50.847262+00:00"}
453
+ """
454
+ choice = chunk.choices[0]
455
+ message_delta = choice.delta
456
+
457
+ # inner thoughts
458
+ if message_delta.content is not None:
459
+ processed_chunk = InternalMonologue(
460
+ id=message_id,
461
+ date=message_date,
462
+ internal_monologue=message_delta.content,
463
+ )
464
+ elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
465
+ tool_call = message_delta.tool_calls[0]
466
+
467
+ tool_call_delta = {}
468
+ if tool_call.id:
469
+ tool_call_delta["id"] = tool_call.id
470
+ if tool_call.function:
471
+ if tool_call.function.arguments:
472
+ tool_call_delta["arguments"] = tool_call.function.arguments
473
+ if tool_call.function.name:
474
+ tool_call_delta["name"] = tool_call.function.name
475
+
476
+ processed_chunk = FunctionCallMessage(
477
+ id=message_id,
478
+ date=message_date,
479
+ function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
480
+ )
481
+ elif choice.finish_reason is not None:
482
+ # skip if there's a finish
483
+ return None
484
+ else:
485
+ raise ValueError(f"Couldn't find delta in chunk: {chunk}")
486
+
487
+ return processed_chunk
488
+
489
+ def _process_chunk_to_openai_style(self, chunk: ChatCompletionChunkResponse) -> Optional[dict]:
490
+ """Chunks should look like OpenAI, but be remapped from letta-style concepts.
491
+
492
+ inner_thoughts are silenced:
493
+ - means that 'content' -> /dev/null
494
+ send_message is a "message"
495
+ - means that tool call to "send_message" should map to 'content'
496
+
497
+ TODO handle occurance of multi-step function calling
498
+ TODO handle partial stream of "name" in tool call
499
+ """
500
+ proxy_chunk = chunk.model_copy(deep=True)
501
+
502
+ choice = chunk.choices[0]
503
+ message_delta = choice.delta
504
+
505
+ # inner thoughts
506
+ if message_delta.content is not None:
507
+ # skip inner monologue
508
+ return None
509
+
510
+ # tool call
511
+ elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
512
+ tool_call = message_delta.tool_calls[0]
513
+
514
+ if tool_call.function:
515
+
516
+ # Track the function name while streaming
517
+ # If we were previously on a 'send_message', we need to 'toggle' into 'content' mode
518
+ if tool_call.function.name:
519
+ if self.streaming_chat_completion_mode_function_name is None:
520
+ self.streaming_chat_completion_mode_function_name = tool_call.function.name
521
+ else:
522
+ self.streaming_chat_completion_mode_function_name += tool_call.function.name
523
+
524
+ if tool_call.function.name == "send_message":
525
+ # early exit to turn into content mode
526
+ self.streaming_chat_completion_json_reader.reset()
527
+ return None
528
+
529
+ if tool_call.function.arguments:
530
+ if self.streaming_chat_completion_mode_function_name == "send_message":
531
+ cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
532
+ if cleaned_func_args is None:
533
+ return None
534
+ else:
535
+ # Wipe tool call
536
+ proxy_chunk.choices[0].delta.tool_calls = None
537
+ # Replace with 'content'
538
+ proxy_chunk.choices[0].delta.content = cleaned_func_args
539
+
540
+ processed_chunk = proxy_chunk.model_dump(exclude_none=True)
541
+
542
+ return processed_chunk
543
+
544
+ def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
545
+ """Process a streaming chunk from an OpenAI-compatible server.
546
+
547
+ Example data from non-streaming response looks like:
548
+
549
+ data: {"function_call": "send_message({'message': \"Ah, the age-old question, Chad. The meaning of life is as subjective as the life itself. 42, as the supercomputer 'Deep Thought' calculated in 'The Hitchhiker's Guide to the Galaxy', is indeed an answer, but maybe not the one we're after. Among other things, perhaps life is about learning, experiencing and connecting. What are your thoughts, Chad? What gives your life meaning?\"})", "date": "2024-02-29T06:07:48.844733+00:00"}
550
+
551
+ data: {"assistant_message": "Ah, the age-old question, Chad. The meaning of life is as subjective as the life itself. 42, as the supercomputer 'Deep Thought' calculated in 'The Hitchhiker's Guide to the Galaxy', is indeed an answer, but maybe not the one we're after. Among other things, perhaps life is about learning, experiencing and connecting. What are your thoughts, Chad? What gives your life meaning?", "date": "2024-02-29T06:07:49.846280+00:00"}
552
+
553
+ data: {"function_return": "None", "status": "success", "date": "2024-02-29T06:07:50.847262+00:00"}
554
+ """
555
+ # print("Processed CHUNK:", chunk)
556
+
557
+ # Example where we just pass through the raw stream from the underlying OpenAI SSE stream
558
+ # processed_chunk = chunk.model_dump_json(exclude_none=True)
559
+
560
+ if self.streaming_chat_completion_mode:
561
+ # processed_chunk = self._process_chunk_to_openai_style(chunk)
562
+ raise NotImplementedError("OpenAI proxy streaming temporarily disabled")
563
+ else:
564
+ processed_chunk = self._process_chunk_to_letta_style(chunk=chunk, message_id=message_id, message_date=message_date)
565
+
566
+ if processed_chunk is None:
567
+ return
568
+
569
+ self._push_to_buffer(processed_chunk)
570
+
571
+ def user_message(self, msg: str, msg_obj: Optional[Message] = None):
572
+ """Letta receives a user message"""
573
+ return
574
+
575
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
576
+ """Letta generates some internal monologue"""
577
+ if not self.streaming_mode:
578
+
579
+ # create a fake "chunk" of a stream
580
+ # processed_chunk = {
581
+ # "internal_monologue": msg,
582
+ # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
583
+ # "id": str(msg_obj.id) if msg_obj is not None else None,
584
+ # }
585
+ processed_chunk = InternalMonologue(
586
+ id=msg_obj.id,
587
+ date=msg_obj.created_at,
588
+ internal_monologue=msg,
589
+ )
590
+
591
+ self._push_to_buffer(processed_chunk)
592
+
593
+ return
594
+
595
+ def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
596
+ """Letta uses send_message"""
597
+
598
+ # if not self.streaming_mode and self.send_message_special_case:
599
+
600
+ # # create a fake "chunk" of a stream
601
+ # processed_chunk = {
602
+ # "assistant_message": msg,
603
+ # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
604
+ # "id": str(msg_obj.id) if msg_obj is not None else None,
605
+ # }
606
+
607
+ # self._chunks.append(processed_chunk)
608
+ # self._event.set() # Signal that new data is available
609
+
610
+ return
611
+
612
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None):
613
+ """Letta calls a function"""
614
+
615
+ # TODO handle 'function' messages that indicate the start of a function call
616
+ assert msg_obj is not None, "StreamingServerInterface requires msg_obj references for metadata"
617
+
618
+ if msg.startswith("Running "):
619
+ if not self.streaming_mode:
620
+ # create a fake "chunk" of a stream
621
+ function_call = msg_obj.tool_calls[0]
622
+
623
+ if self.nonstreaming_legacy_mode:
624
+ # Special case where we want to send two chunks - one first for the function call, then for send_message
625
+
626
+ # Should be in the following legacy style:
627
+ # data: {
628
+ # "function_call": "send_message({'message': 'Chad, ... ask?'})",
629
+ # "id": "771748ee-120a-453a-960d-746570b22ee5",
630
+ # "date": "2024-06-22T23:04:32.141923+00:00"
631
+ # }
632
+ try:
633
+ func_args = json.loads(function_call.function.arguments)
634
+ except:
635
+ func_args = function_call.function.arguments
636
+ # processed_chunk = {
637
+ # "function_call": f"{function_call.function.name}({func_args})",
638
+ # "id": str(msg_obj.id),
639
+ # "date": msg_obj.created_at.isoformat(),
640
+ # }
641
+ processed_chunk = LegacyFunctionCallMessage(
642
+ id=msg_obj.id,
643
+ date=msg_obj.created_at,
644
+ function_call=f"{function_call.function.name}({func_args})",
645
+ )
646
+ self._push_to_buffer(processed_chunk)
647
+
648
+ if function_call.function.name == "send_message":
649
+ try:
650
+ # processed_chunk = {
651
+ # "assistant_message": func_args["message"],
652
+ # "id": str(msg_obj.id),
653
+ # "date": msg_obj.created_at.isoformat(),
654
+ # }
655
+ processed_chunk = AssistantMessage(
656
+ id=msg_obj.id,
657
+ date=msg_obj.created_at,
658
+ assistant_message=func_args["message"],
659
+ )
660
+ self._push_to_buffer(processed_chunk)
661
+ except Exception as e:
662
+ print(f"Failed to parse function message: {e}")
663
+
664
+ else:
665
+
666
+ processed_chunk = FunctionCallMessage(
667
+ id=msg_obj.id,
668
+ date=msg_obj.created_at,
669
+ function_call=FunctionCall(
670
+ name=function_call.function.name,
671
+ arguments=function_call.function.arguments,
672
+ ),
673
+ )
674
+ # processed_chunk = {
675
+ # "function_call": {
676
+ # "name": function_call.function.name,
677
+ # "arguments": function_call.function.arguments,
678
+ # },
679
+ # "id": str(msg_obj.id),
680
+ # "date": msg_obj.created_at.isoformat(),
681
+ # }
682
+ self._push_to_buffer(processed_chunk)
683
+
684
+ return
685
+ else:
686
+ return
687
+ # msg = msg.replace("Running ", "")
688
+ # new_message = {"function_call": msg}
689
+
690
+ elif msg.startswith("Ran "):
691
+ return
692
+ # msg = msg.replace("Ran ", "Function call returned: ")
693
+ # new_message = {"function_call": msg}
694
+
695
+ elif msg.startswith("Success: "):
696
+ msg = msg.replace("Success: ", "")
697
+ # new_message = {"function_return": msg, "status": "success"}
698
+ new_message = FunctionReturn(
699
+ id=msg_obj.id,
700
+ date=msg_obj.created_at,
701
+ function_return=msg,
702
+ status="success",
703
+ )
704
+
705
+ elif msg.startswith("Error: "):
706
+ msg = msg.replace("Error: ", "")
707
+ # new_message = {"function_return": msg, "status": "error"}
708
+ new_message = FunctionReturn(
709
+ id=msg_obj.id,
710
+ date=msg_obj.created_at,
711
+ function_return=msg,
712
+ status="error",
713
+ )
714
+
715
+ else:
716
+ # NOTE: generic, should not happen
717
+ raise ValueError(msg)
718
+ new_message = {"function_message": msg}
719
+
720
+ # add extra metadata
721
+ # if msg_obj is not None:
722
+ # new_message["id"] = str(msg_obj.id)
723
+ # assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
724
+ # new_message["date"] = msg_obj.created_at.isoformat()
725
+
726
+ self._push_to_buffer(new_message)