letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,345 @@
1
+ from letta.utils import json_dumps, json_loads
2
+
3
+ from ...errors import LLMJSONParsingError
4
+ from ..json_parser import clean_json
5
+ from .wrapper_base import LLMChatCompletionWrapper
6
+
7
+
8
+ class ZephyrMistralWrapper(LLMChatCompletionWrapper):
9
+ """
10
+ Wrapper for Zephyr Alpha and Beta, Mistral 7B:
11
+ https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
12
+ https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
13
+ Note: this wrapper formats a prompt that only generates JSON, no inner thoughts
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ simplify_json_content=True,
19
+ clean_function_args=True,
20
+ include_assistant_prefix=True,
21
+ include_opening_brace_in_prefix=True,
22
+ include_section_separators=False,
23
+ ):
24
+ self.simplify_json_content = simplify_json_content
25
+ self.clean_func_args = clean_function_args
26
+ self.include_assistant_prefix = include_assistant_prefix
27
+ self.include_opening_brance_in_prefix = include_opening_brace_in_prefix
28
+ self.include_section_separators = include_section_separators
29
+
30
+ def chat_completion_to_prompt(self, messages, functions, function_documentation=None):
31
+ """
32
+ Zephyr prompt format:
33
+ <|system|>
34
+ </s>
35
+ <|user|>
36
+ {prompt}</s>
37
+ <|assistant|>
38
+ (source: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF#prompt-template-zephyr)
39
+ """
40
+
41
+ prompt = ""
42
+
43
+ IM_END_TOKEN = "</s>"
44
+
45
+ # System instructions go first
46
+ assert messages[0]["role"] == "system"
47
+ prompt += f"<|system|>"
48
+ prompt += f"\n{messages[0]['content']}"
49
+
50
+ # Next is the functions preamble
51
+ def create_function_description(schema):
52
+ # airorobos style
53
+ func_str = ""
54
+ func_str += f"{schema['name']}:"
55
+ func_str += f"\n description: {schema['description']}"
56
+ func_str += f"\n params:"
57
+ for param_k, param_v in schema["parameters"]["properties"].items():
58
+ # TODO we're ignoring type
59
+ func_str += f"\n {param_k}: {param_v['description']}"
60
+ # TODO we're ignoring schema['parameters']['required']
61
+ return func_str
62
+
63
+ # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
64
+ prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
65
+ prompt += f"\nAvailable functions:"
66
+ if function_documentation is not None:
67
+ prompt += f"\n{function_documentation}"
68
+ else:
69
+ for function_dict in functions:
70
+ prompt += f"\n{create_function_description(function_dict)}"
71
+
72
+ # Put functions INSIDE system message (TODO experiment with this)
73
+ prompt += IM_END_TOKEN
74
+
75
+ def create_function_call(function_call):
76
+ airo_func_call = {
77
+ "function": function_call["name"],
78
+ "params": json_loads(function_call["arguments"]),
79
+ }
80
+ return json_dumps(airo_func_call, indent=2)
81
+
82
+ for message in messages[1:]:
83
+ assert message["role"] in ["user", "assistant", "function", "tool"], message
84
+
85
+ if message["role"] == "user":
86
+ if self.simplify_json_content:
87
+ try:
88
+ content_json = json_loads(message["content"])
89
+ content_simple = content_json["message"]
90
+ prompt += f"\n<|user|>\n{content_simple}{IM_END_TOKEN}"
91
+ # prompt += f"\nUSER: {content_simple}"
92
+ except:
93
+ prompt += f"\n<|user|>\n{message['content']}{IM_END_TOKEN}"
94
+ # prompt += f"\nUSER: {message['content']}"
95
+ elif message["role"] == "assistant":
96
+ prompt += f"\n<|assistant|>"
97
+ if message["content"] is not None:
98
+ prompt += f"\n{message['content']}"
99
+ # prompt += f"\nASSISTANT: {message['content']}"
100
+ # need to add the function call if there was one
101
+ if "function_call" in message and message["function_call"]:
102
+ prompt += f"\n{create_function_call(message['function_call'])}"
103
+ prompt += f"{IM_END_TOKEN}"
104
+ elif message["role"] in ["function", "tool"]:
105
+ # TODO find a good way to add this
106
+ # prompt += f"\nASSISTANT: (function return) {message['content']}"
107
+ prompt += f"\n<|assistant|>"
108
+ prompt += f"\nFUNCTION RETURN: {message['content']}"
109
+ # prompt += f"\nFUNCTION RETURN: {message['content']}"
110
+ continue
111
+ else:
112
+ raise ValueError(message)
113
+
114
+ # Add a sep for the response
115
+ # if self.include_section_separators:
116
+ # prompt += "\n### RESPONSE"
117
+
118
+ if self.include_assistant_prefix:
119
+ # prompt += f"\nASSISTANT:"
120
+ prompt += f"\n<|assistant|>"
121
+ if self.include_opening_brance_in_prefix:
122
+ prompt += "\n{"
123
+
124
+ return prompt
125
+
126
+ def clean_function_args(self, function_name, function_args):
127
+ """Some basic Letta-specific cleaning of function args"""
128
+ cleaned_function_name = function_name
129
+ cleaned_function_args = function_args.copy() if function_args is not None else {}
130
+
131
+ if function_name == "send_message":
132
+ # strip request_heartbeat
133
+ cleaned_function_args.pop("request_heartbeat", None)
134
+
135
+ # TODO more cleaning to fix errors LLM makes
136
+ return cleaned_function_name, cleaned_function_args
137
+
138
+ def output_to_chat_completion_response(self, raw_llm_output):
139
+ """Turn raw LLM output into a ChatCompletion style response with:
140
+ "message" = {
141
+ "role": "assistant",
142
+ "content": ...,
143
+ "function_call": {
144
+ "name": ...
145
+ "arguments": {
146
+ "arg1": val1,
147
+ ...
148
+ }
149
+ }
150
+ }
151
+ """
152
+ if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{":
153
+ raw_llm_output = "{" + raw_llm_output
154
+
155
+ try:
156
+ function_json_output = clean_json(raw_llm_output)
157
+ except Exception as e:
158
+ raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output} - error\n{str(e)}")
159
+ try:
160
+ function_name = function_json_output["function"]
161
+ function_parameters = function_json_output["params"]
162
+ except KeyError as e:
163
+ raise LLMJSONParsingError(f"Received valid JSON from LLM, but JSON was missing fields: {str(e)}")
164
+
165
+ if self.clean_func_args:
166
+ function_name, function_parameters = self.clean_function_args(function_name, function_parameters)
167
+
168
+ message = {
169
+ "role": "assistant",
170
+ "content": None,
171
+ "function_call": {
172
+ "name": function_name,
173
+ "arguments": json_dumps(function_parameters),
174
+ },
175
+ }
176
+ return message
177
+
178
+
179
+ class ZephyrMistralInnerMonologueWrapper(ZephyrMistralWrapper):
180
+ """Still expect only JSON outputs from model, but add inner monologue as a field"""
181
+
182
+ """
183
+ Wrapper for Zephyr Alpha and Beta, Mistral 7B:
184
+ https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
185
+ https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
186
+ Note: this wrapper formats a prompt with inner thoughts included
187
+ """
188
+
189
+ def __init__(
190
+ self,
191
+ simplify_json_content=True,
192
+ clean_function_args=True,
193
+ include_assistant_prefix=True,
194
+ include_opening_brace_in_prefix=True,
195
+ include_section_separators=True,
196
+ ):
197
+ self.simplify_json_content = simplify_json_content
198
+ self.clean_func_args = clean_function_args
199
+ self.include_assistant_prefix = include_assistant_prefix
200
+ self.include_opening_brance_in_prefix = include_opening_brace_in_prefix
201
+ self.include_section_separators = include_section_separators
202
+
203
+ def chat_completion_to_prompt(self, messages, functions, function_documentation=None):
204
+ prompt = ""
205
+
206
+ IM_END_TOKEN = "</s>"
207
+
208
+ # System insturctions go first
209
+ assert messages[0]["role"] == "system"
210
+ prompt += messages[0]["content"]
211
+
212
+ # Next is the functions preamble
213
+ def create_function_description(schema, add_inner_thoughts=True):
214
+ # airorobos style
215
+ func_str = ""
216
+ func_str += f"{schema['name']}:"
217
+ func_str += f"\n description: {schema['description']}"
218
+ func_str += f"\n params:"
219
+ if add_inner_thoughts:
220
+ func_str += f"\n inner_thoughts: Deep inner monologue private to you only."
221
+ for param_k, param_v in schema["parameters"]["properties"].items():
222
+ # TODO we're ignoring type
223
+ func_str += f"\n {param_k}: {param_v['description']}"
224
+ # TODO we're ignoring schema['parameters']['required']
225
+ return func_str
226
+
227
+ # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
228
+ prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
229
+ prompt += f"\nAvailable functions:"
230
+ if function_documentation is not None:
231
+ prompt += f"\n{function_documentation}"
232
+ else:
233
+ for function_dict in functions:
234
+ prompt += f"\n{create_function_description(function_dict)}"
235
+
236
+ def create_function_call(function_call, inner_thoughts=None):
237
+ airo_func_call = {
238
+ "function": function_call["name"],
239
+ "params": {
240
+ "inner_thoughts": inner_thoughts,
241
+ **json_loads(function_call["arguments"]),
242
+ },
243
+ }
244
+ return json_dumps(airo_func_call, indent=2)
245
+
246
+ # Add a sep for the conversation
247
+ if self.include_section_separators:
248
+ prompt += "\n<|user|>"
249
+
250
+ # Last are the user/assistant messages
251
+ for message in messages[1:]:
252
+ assert message["role"] in ["user", "assistant", "function", "tool"], message
253
+
254
+ if message["role"] == "user":
255
+ if self.simplify_json_content:
256
+ try:
257
+ content_json = json_loads(message["content"])
258
+ content_simple = content_json["message"]
259
+ prompt += f"\n<|user|>\n{content_simple}{IM_END_TOKEN}"
260
+ except:
261
+ prompt += f"\n<|user|>\n{message['content']}{IM_END_TOKEN}"
262
+ elif message["role"] == "assistant":
263
+ prompt += f"\n<|assistant|>"
264
+ # need to add the function call if there was one
265
+ inner_thoughts = message["content"]
266
+ if "function_call" in message and message["function_call"]:
267
+ prompt += f"\n{create_function_call(message['function_call'], inner_thoughts=inner_thoughts)}"
268
+ elif message["role"] in ["function", "tool"]:
269
+ # TODO find a good way to add this
270
+ # prompt += f"\nASSISTANT: (function return) {message['content']}"
271
+ prompt += f"\nFUNCTION RETURN: {message['content']}"
272
+ continue
273
+ else:
274
+ raise ValueError(message)
275
+
276
+ # Add a sep for the response
277
+ # if self.include_section_separators:
278
+ # prompt += "\n### RESPONSE"
279
+
280
+ if self.include_assistant_prefix:
281
+ prompt += f"\n<|assistant|>"
282
+ if self.include_opening_brance_in_prefix:
283
+ prompt += "\n{"
284
+
285
+ return prompt
286
+
287
+ def clean_function_args(self, function_name, function_args):
288
+ """Some basic Letta-specific cleaning of function args"""
289
+ cleaned_function_name = function_name
290
+ cleaned_function_args = function_args.copy() if function_args is not None else {}
291
+
292
+ if function_name == "send_message":
293
+ # strip request_heartbeat
294
+ cleaned_function_args.pop("request_heartbeat", None)
295
+
296
+ inner_thoughts = None
297
+ if "inner_thoughts" in function_args:
298
+ inner_thoughts = cleaned_function_args.pop("inner_thoughts")
299
+
300
+ # TODO more cleaning to fix errors LLM makes
301
+ return inner_thoughts, cleaned_function_name, cleaned_function_args
302
+
303
+ def output_to_chat_completion_response(self, raw_llm_output):
304
+ """Turn raw LLM output into a ChatCompletion style response with:
305
+ "message" = {
306
+ "role": "assistant",
307
+ "content": ...,
308
+ "function_call": {
309
+ "name": ...
310
+ "arguments": {
311
+ "arg1": val1,
312
+ ...
313
+ }
314
+ }
315
+ }
316
+ """
317
+ if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{":
318
+ raw_llm_output = "{" + raw_llm_output
319
+
320
+ try:
321
+ function_json_output = clean_json(raw_llm_output)
322
+ except Exception as e:
323
+ raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output} - error\n{str(e)}")
324
+ try:
325
+ function_name = function_json_output["function"]
326
+ function_parameters = function_json_output["params"]
327
+ except KeyError as e:
328
+ raise LLMJSONParsingError(f"Received valid JSON from LLM, but JSON was missing fields: {str(e)}")
329
+
330
+ if self.clean_func_args:
331
+ (
332
+ inner_thoughts,
333
+ function_name,
334
+ function_parameters,
335
+ ) = self.clean_function_args(function_name, function_parameters)
336
+
337
+ message = {
338
+ "role": "assistant",
339
+ "content": inner_thoughts,
340
+ "function_call": {
341
+ "name": function_name,
342
+ "arguments": json_dumps(function_parameters),
343
+ },
344
+ }
345
+ return message
@@ -0,0 +1,100 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import post_json_auth_request
5
+ from letta.utils import count_tokens
6
+
7
+ LMSTUDIO_API_CHAT_SUFFIX = "/v1/chat/completions"
8
+ LMSTUDIO_API_COMPLETIONS_SUFFIX = "/v1/completions"
9
+
10
+
11
+ def get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions"):
12
+ """Based on the example for using LM Studio as a backend from https://github.com/lmstudio-ai/examples/tree/main/Hello%2C%20world%20-%20OpenAI%20python%20client"""
13
+ from letta.utils import printd
14
+
15
+ prompt_tokens = count_tokens(prompt)
16
+ if prompt_tokens > context_window:
17
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
18
+
19
+ settings = get_completions_settings()
20
+ settings.update(
21
+ {
22
+ "input_prefix": "",
23
+ "input_suffix": "",
24
+ # This controls how LM studio handles context overflow
25
+ # In Letta we handle this ourselves, so this should be disabled
26
+ # "context_overflow_policy": 0,
27
+ "lmstudio": {"context_overflow_policy": 0}, # 0 = stop at limit
28
+ "stream": False,
29
+ "model": "local model",
30
+ }
31
+ )
32
+
33
+ # Uses the ChatCompletions API style
34
+ # Seems to work better, probably because it's applying some extra settings under-the-hood?
35
+ if api == "chat":
36
+ URI = urljoin(endpoint.strip("/") + "/", LMSTUDIO_API_CHAT_SUFFIX.strip("/"))
37
+
38
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
39
+ request = settings
40
+ request["max_tokens"] = context_window
41
+
42
+ # Put the entire completion string inside the first message
43
+ message_structure = [{"role": "user", "content": prompt}]
44
+ request["messages"] = message_structure
45
+
46
+ # Uses basic string completions (string in, string out)
47
+ # Does not work as well as ChatCompletions for some reason
48
+ elif api == "completions":
49
+ URI = urljoin(endpoint.strip("/") + "/", LMSTUDIO_API_COMPLETIONS_SUFFIX.strip("/"))
50
+
51
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
52
+ request = settings
53
+ request["max_tokens"] = context_window
54
+
55
+ # Standard completions format, formatted string goes in prompt
56
+ request["prompt"] = prompt
57
+
58
+ else:
59
+ raise ValueError(api)
60
+
61
+ if not endpoint.startswith(("http://", "https://")):
62
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
63
+
64
+ try:
65
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
66
+ if response.status_code == 200:
67
+ result_full = response.json()
68
+ printd(f"JSON API response:\n{result_full}")
69
+ if api == "chat":
70
+ result = result_full["choices"][0]["message"]["content"]
71
+ usage = result_full.get("usage", None)
72
+ elif api == "completions":
73
+ result = result_full["choices"][0]["text"]
74
+ usage = result_full.get("usage", None)
75
+ else:
76
+ # Example error: msg={"error":"Context length exceeded. Tokens in context: 8000, Context length: 8000"}
77
+ if "context length" in str(response.text).lower():
78
+ # "exceeds context length" is what appears in the LM Studio error message
79
+ # raise an alternate exception that matches OpenAI's message, which is "maximum context length"
80
+ raise Exception(f"Request exceeds maximum context length (code={response.status_code}, msg={response.text}, URI={URI})")
81
+ else:
82
+ raise Exception(
83
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
84
+ + f" Make sure that the LM Studio local inference server is running and reachable at {URI}."
85
+ )
86
+ except:
87
+ # TODO handle gracefully
88
+ raise
89
+
90
+ # Pass usage statistics back to main thread
91
+ # These are used to compute memory warning messages
92
+ completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
93
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
94
+ usage = {
95
+ "prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
96
+ "completion_tokens": completion_tokens,
97
+ "total_tokens": total_tokens,
98
+ }
99
+
100
+ return result, usage
@@ -0,0 +1,29 @@
1
+ SIMPLE = {
2
+ "stop": [
3
+ "\nUSER:",
4
+ "\nASSISTANT:",
5
+ "\nFUNCTION RETURN:",
6
+ "\nUSER",
7
+ "\nASSISTANT",
8
+ "\nFUNCTION RETURN",
9
+ "\nFUNCTION",
10
+ "\nFUNC",
11
+ "<|im_start|>",
12
+ "<|im_end|>",
13
+ "<|im_sep|>",
14
+ # '\n' +
15
+ # '</s>',
16
+ # '<|',
17
+ # '\n#',
18
+ # '\n\n\n',
19
+ ],
20
+ # This controls the maximum number of tokens that the model can generate
21
+ # Cap this at the model context length (assuming 8k for Mistral 7B)
22
+ # "max_tokens": 8000,
23
+ # "max_tokens": LLM_MAX_TOKENS,
24
+ # This controls how LM studio handles context overflow
25
+ # In Letta we handle this ourselves, so this should be commented out
26
+ # "lmstudio": {"context_overflow_policy": 2},
27
+ "stream": False,
28
+ "model": "local model",
29
+ }
@@ -0,0 +1,88 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.errors import LocalLLMError
4
+ from letta.local_llm.settings.settings import get_completions_settings
5
+ from letta.local_llm.utils import post_json_auth_request
6
+ from letta.utils import count_tokens
7
+
8
+ OLLAMA_API_SUFFIX = "/api/generate"
9
+
10
+
11
+ def get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window, grammar=None):
12
+ """See https://github.com/jmorganca/ollama/blob/main/docs/api.md for instructions on how to run the LLM web server"""
13
+ from letta.utils import printd
14
+
15
+ prompt_tokens = count_tokens(prompt)
16
+ if prompt_tokens > context_window:
17
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
18
+
19
+ if model is None:
20
+ raise LocalLLMError(
21
+ f"Error: model name not specified. Set model in your config to the model you want to run (e.g. 'dolphin2.2-mistral')"
22
+ )
23
+
24
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
25
+ # https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
26
+ settings = get_completions_settings()
27
+ settings.update(
28
+ {
29
+ # specific naming for context length
30
+ "num_ctx": context_window,
31
+ }
32
+ )
33
+
34
+ # https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
35
+ request = {
36
+ ## base parameters
37
+ "model": model,
38
+ "prompt": prompt,
39
+ # "images": [], # TODO eventually support
40
+ ## advanced parameters
41
+ # "format": "json", # TODO eventually support
42
+ "stream": False,
43
+ "options": settings,
44
+ "raw": True, # no prompt formatting
45
+ # "raw mode does not support template, system, or context"
46
+ # "system": "", # no prompt formatting
47
+ # "template": "{{ .Prompt }}", # no prompt formatting
48
+ # "context": None, # no memory via prompt formatting
49
+ }
50
+
51
+ # Set grammar
52
+ if grammar is not None:
53
+ # request["grammar_string"] = load_grammar_file(grammar)
54
+ raise NotImplementedError(f"Ollama does not support grammars")
55
+
56
+ if not endpoint.startswith(("http://", "https://")):
57
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
58
+
59
+ try:
60
+ URI = urljoin(endpoint.strip("/") + "/", OLLAMA_API_SUFFIX.strip("/"))
61
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
62
+ if response.status_code == 200:
63
+ # https://github.com/jmorganca/ollama/blob/main/docs/api.md
64
+ result_full = response.json()
65
+ printd(f"JSON API response:\n{result_full}")
66
+ result = result_full["response"]
67
+ else:
68
+ raise Exception(
69
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
70
+ + f" Make sure that the ollama API server is running and reachable at {URI}."
71
+ )
72
+
73
+ except:
74
+ # TODO handle gracefully
75
+ raise
76
+
77
+ # Pass usage statistics back to main thread
78
+ # These are used to compute memory warning messages
79
+ # https://github.com/jmorganca/ollama/blob/main/docs/api.md#response
80
+ completion_tokens = result_full.get("eval_count", None)
81
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
82
+ usage = {
83
+ "prompt_tokens": prompt_tokens, # can also grab from "prompt_eval_count"
84
+ "completion_tokens": completion_tokens,
85
+ "total_tokens": total_tokens,
86
+ }
87
+
88
+ return result, usage
@@ -0,0 +1,32 @@
1
+ # see https://github.com/jmorganca/ollama/blob/main/docs/api.md
2
+ # and https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
3
+ SIMPLE = {
4
+ "options": {
5
+ "stop": [
6
+ "\nUSER:",
7
+ "\nASSISTANT:",
8
+ "\nFUNCTION RETURN:",
9
+ "\nUSER",
10
+ "\nASSISTANT",
11
+ "\nFUNCTION RETURN",
12
+ "\nFUNCTION",
13
+ "\nFUNC",
14
+ "<|im_start|>",
15
+ "<|im_end|>",
16
+ "<|im_sep|>",
17
+ # '\n' +
18
+ # '</s>',
19
+ # '<|',
20
+ # '\n#',
21
+ # '\n\n\n',
22
+ ],
23
+ # "num_ctx": LLM_MAX_TOKENS,
24
+ },
25
+ "stream": False,
26
+ # turn off Ollama's own prompt formatting
27
+ "system": "",
28
+ "template": "{{ .Prompt }}",
29
+ # "system": None,
30
+ # "template": None,
31
+ "context": None,
32
+ }
File without changes
@@ -0,0 +1,45 @@
1
+ from letta.local_llm.settings.simple import settings as simple_settings
2
+
3
+ settings = {
4
+ "max_new_tokens": 250,
5
+ "do_sample": False,
6
+ "temperature": 0,
7
+ "top_p": 0,
8
+ "typical_p": 1,
9
+ "repetition_penalty": 1.18,
10
+ "repetition_penalty_range": 0,
11
+ "encoder_repetition_penalty": 1,
12
+ "top_k": 1,
13
+ "min_length": 0,
14
+ "no_repeat_ngram_size": 0,
15
+ "num_beams": 1,
16
+ "penalty_alpha": 0,
17
+ "length_penalty": 1,
18
+ "early_stopping": False,
19
+ "guidance_scale": 1,
20
+ "negative_prompt": "",
21
+ "seed": -1,
22
+ "add_bos_token": True,
23
+ # NOTE: important - these are the BASE stopping strings, and should be combined with {{user}}/{{char}}-based stopping strings
24
+ "stopping_strings": [
25
+ simple_settings["stop"]
26
+ # '### Response (JSON only, engaging, natural, authentic, descriptive, creative):',
27
+ # "</s>",
28
+ # "<|",
29
+ # "\n#",
30
+ # "\n*{{user}} ",
31
+ # "\n\n\n",
32
+ # "\n{",
33
+ # ",\n{",
34
+ ],
35
+ "truncation_length": 4096,
36
+ "ban_eos_token": False,
37
+ "skip_special_tokens": True,
38
+ "top_a": 0,
39
+ "tfs": 1,
40
+ "epsilon_cutoff": 0,
41
+ "eta_cutoff": 0,
42
+ "mirostat_mode": 2,
43
+ "mirostat_tau": 4,
44
+ "mirostat_eta": 0.1,
45
+ }