letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,279 @@
1
+ """Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
2
+
3
+ import uuid
4
+
5
+ import requests
6
+
7
+ from letta.constants import CLI_WARNING_PREFIX
8
+ from letta.errors import LocalLLMConnectionError, LocalLLMError
9
+ from letta.local_llm.constants import DEFAULT_WRAPPER
10
+ from letta.local_llm.function_parser import patch_function
11
+ from letta.local_llm.grammars.gbnf_grammar_generator import (
12
+ create_dynamic_model_from_function,
13
+ generate_gbnf_grammar_and_documentation,
14
+ )
15
+ from letta.local_llm.groq.api import get_groq_completion
16
+ from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
17
+ from letta.local_llm.llamacpp.api import get_llamacpp_completion
18
+ from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
19
+ from letta.local_llm.lmstudio.api import get_lmstudio_completion
20
+ from letta.local_llm.ollama.api import get_ollama_completion
21
+ from letta.local_llm.utils import count_tokens, get_available_wrappers
22
+ from letta.local_llm.vllm.api import get_vllm_completion
23
+ from letta.local_llm.webui.api import get_webui_completion
24
+ from letta.local_llm.webui.legacy_api import (
25
+ get_webui_completion as get_webui_completion_legacy,
26
+ )
27
+ from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
28
+ from letta.schemas.openai.chat_completion_response import (
29
+ ChatCompletionResponse,
30
+ Choice,
31
+ Message,
32
+ ToolCall,
33
+ UsageStatistics,
34
+ )
35
+ from letta.utils import get_tool_call_id, get_utc_time, json_dumps
36
+
37
+ has_shown_warning = False
38
+ grammar_supported_backends = ["koboldcpp", "llamacpp", "webui", "webui-legacy"]
39
+
40
+
41
+ def get_chat_completion(
42
+ model,
43
+ # no model required (except for Ollama), since the model is fixed to whatever you set in your own backend
44
+ messages,
45
+ functions=None,
46
+ functions_python=None,
47
+ function_call="auto",
48
+ context_window=None,
49
+ user=None,
50
+ # required
51
+ wrapper=None,
52
+ endpoint=None,
53
+ endpoint_type=None,
54
+ # optional cleanup
55
+ function_correction=True,
56
+ # extra hints to allow for additional prompt formatting hacks
57
+ # TODO this could alternatively be supported via passing function_call="send_message" into the wrapper
58
+ first_message=False,
59
+ # optional auth headers
60
+ auth_type=None,
61
+ auth_key=None,
62
+ ) -> ChatCompletionResponse:
63
+ from letta.utils import printd
64
+
65
+ assert context_window is not None, "Local LLM calls need the context length to be explicitly set"
66
+ assert endpoint is not None, "Local LLM calls need the endpoint (eg http://localendpoint:1234) to be explicitly set"
67
+ assert endpoint_type is not None, "Local LLM calls need the endpoint type (eg webui) to be explicitly set"
68
+ global has_shown_warning
69
+ grammar = None
70
+
71
+ # TODO: eventually just process Message object
72
+ if not isinstance(messages[0], dict):
73
+ messages = [m.to_openai_dict() for m in messages]
74
+
75
+ if function_call is not None and function_call != "auto":
76
+ raise ValueError(f"function_call == {function_call} not supported (auto or None only)")
77
+
78
+ available_wrappers = get_available_wrappers()
79
+ documentation = None
80
+
81
+ # Special case for if the call we're making is coming from the summarizer
82
+ if messages[0]["role"] == "system" and messages[0]["content"].strip() == SUMMARIZE_SYSTEM_MESSAGE.strip():
83
+ llm_wrapper = simple_summary_wrapper.SimpleSummaryWrapper()
84
+
85
+ # Select a default prompt formatter
86
+ elif wrapper is None:
87
+ # Warn the user that we're using the fallback
88
+ if not has_shown_warning:
89
+ print(
90
+ f"{CLI_WARNING_PREFIX}no wrapper specified for local LLM, using the default wrapper (you can remove this warning by specifying the wrapper with --model-wrapper)"
91
+ )
92
+ has_shown_warning = True
93
+
94
+ llm_wrapper = DEFAULT_WRAPPER()
95
+
96
+ # User provided an incorrect prompt formatter
97
+ elif wrapper not in available_wrappers:
98
+ raise ValueError(f"Could not find requested wrapper '{wrapper} in available wrappers list:\n{', '.join(available_wrappers)}")
99
+
100
+ # User provided a correct prompt formatter
101
+ else:
102
+ llm_wrapper = available_wrappers[wrapper]
103
+
104
+ # If the wrapper uses grammar, generate the grammar using the grammar generating function
105
+ # TODO move this to a flag
106
+ if wrapper is not None and "grammar" in wrapper:
107
+ # When using grammars, we don't want to do any extras output tricks like appending a response prefix
108
+ setattr(llm_wrapper, "assistant_prefix_extra_first_message", "")
109
+ setattr(llm_wrapper, "assistant_prefix_extra", "")
110
+
111
+ # TODO find a better way to do this than string matching (eg an attribute)
112
+ if "noforce" in wrapper:
113
+ # "noforce" means that the prompt formatter expects inner thoughts as a top-level parameter
114
+ # this is closer to the OpenAI style since it allows for messages w/o any function calls
115
+ # however, with bad LLMs it makes it easier for the LLM to "forget" to call any of the functions
116
+ grammar, documentation = generate_grammar_and_documentation(
117
+ functions_python=functions_python,
118
+ add_inner_thoughts_top_level=True,
119
+ add_inner_thoughts_param_level=False,
120
+ allow_only_inner_thoughts=True,
121
+ )
122
+ else:
123
+ # otherwise, the other prompt formatters will insert inner thoughts as a function call parameter (by default)
124
+ # this means that every response from the LLM will be required to call a function
125
+ grammar, documentation = generate_grammar_and_documentation(
126
+ functions_python=functions_python,
127
+ add_inner_thoughts_top_level=False,
128
+ add_inner_thoughts_param_level=True,
129
+ allow_only_inner_thoughts=False,
130
+ )
131
+ printd(grammar)
132
+
133
+ if grammar is not None and endpoint_type not in grammar_supported_backends:
134
+ print(
135
+ f"{CLI_WARNING_PREFIX}grammars are currently not supported when using {endpoint_type} as the Letta local LLM backend (supported: {', '.join(grammar_supported_backends)})"
136
+ )
137
+ grammar = None
138
+
139
+ # First step: turn the message sequence into a prompt that the model expects
140
+ try:
141
+ # if hasattr(llm_wrapper, "supports_first_message"):
142
+ if hasattr(llm_wrapper, "supports_first_message") and llm_wrapper.supports_first_message:
143
+ prompt = llm_wrapper.chat_completion_to_prompt(
144
+ messages=messages, functions=functions, first_message=first_message, function_documentation=documentation
145
+ )
146
+ else:
147
+ prompt = llm_wrapper.chat_completion_to_prompt(messages=messages, functions=functions, function_documentation=documentation)
148
+
149
+ printd(prompt)
150
+ except Exception as e:
151
+ print(e)
152
+ raise LocalLLMError(
153
+ f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
154
+ )
155
+
156
+ try:
157
+ if endpoint_type == "webui":
158
+ result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
159
+ elif endpoint_type == "webui-legacy":
160
+ result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
161
+ elif endpoint_type == "lmstudio":
162
+ result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
163
+ elif endpoint_type == "lmstudio-legacy":
164
+ result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="chat")
165
+ elif endpoint_type == "llamacpp":
166
+ result, usage = get_llamacpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
167
+ elif endpoint_type == "koboldcpp":
168
+ result, usage = get_koboldcpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
169
+ elif endpoint_type == "ollama":
170
+ result, usage = get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
171
+ elif endpoint_type == "vllm":
172
+ result, usage = get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user)
173
+ elif endpoint_type == "groq":
174
+ result, usage = get_groq_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
175
+ else:
176
+ raise LocalLLMError(
177
+ f"Invalid endpoint type {endpoint_type}, please set variable depending on your backend (webui, lmstudio, llamacpp, koboldcpp)"
178
+ )
179
+ except requests.exceptions.ConnectionError as e:
180
+ raise LocalLLMConnectionError(f"Unable to connect to endpoint {endpoint}")
181
+
182
+ if result is None or result == "":
183
+ raise LocalLLMError(f"Got back an empty response string from {endpoint}")
184
+ printd(f"Raw LLM output:\n====\n{result}\n====")
185
+
186
+ try:
187
+ if hasattr(llm_wrapper, "supports_first_message") and llm_wrapper.supports_first_message:
188
+ chat_completion_result = llm_wrapper.output_to_chat_completion_response(result, first_message=first_message)
189
+ else:
190
+ chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
191
+ printd(json_dumps(chat_completion_result, indent=2))
192
+ except Exception as e:
193
+ raise LocalLLMError(f"Failed to parse JSON from local LLM response - error: {str(e)}")
194
+
195
+ # Run through some manual function correction (optional)
196
+ if function_correction:
197
+ chat_completion_result = patch_function(message_history=messages, new_message=chat_completion_result)
198
+
199
+ # Fill in potential missing usage information (used for tracking token use)
200
+ if not ("prompt_tokens" in usage and "completion_tokens" in usage and "total_tokens" in usage):
201
+ raise LocalLLMError(f"usage dict in response was missing fields ({usage})")
202
+
203
+ if usage["prompt_tokens"] is None:
204
+ printd(f"usage dict was missing prompt_tokens, computing on-the-fly...")
205
+ usage["prompt_tokens"] = count_tokens(prompt)
206
+
207
+ # NOTE: we should compute on-the-fly anyways since we might have to correct for errors during JSON parsing
208
+ usage["completion_tokens"] = count_tokens(json_dumps(chat_completion_result))
209
+ """
210
+ if usage["completion_tokens"] is None:
211
+ printd(f"usage dict was missing completion_tokens, computing on-the-fly...")
212
+ # chat_completion_result is dict with 'role' and 'content'
213
+ # token counter wants a string
214
+ usage["completion_tokens"] = count_tokens(json_dumps(chat_completion_result))
215
+ """
216
+
217
+ # NOTE: this is the token count that matters most
218
+ if usage["total_tokens"] is None:
219
+ printd(f"usage dict was missing total_tokens, computing on-the-fly...")
220
+ usage["total_tokens"] = usage["prompt_tokens"] + usage["completion_tokens"]
221
+
222
+ # unpack with response.choices[0].message.content
223
+ response = ChatCompletionResponse(
224
+ id=str(uuid.uuid4()), # TODO something better?
225
+ choices=[
226
+ Choice(
227
+ finish_reason="stop",
228
+ index=0,
229
+ message=Message(
230
+ role=chat_completion_result["role"],
231
+ content=chat_completion_result["content"],
232
+ tool_calls=(
233
+ [ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
234
+ if "function_call" in chat_completion_result
235
+ else []
236
+ ),
237
+ ),
238
+ )
239
+ ],
240
+ created=get_utc_time(),
241
+ model=model,
242
+ # "This fingerprint represents the backend configuration that the model runs with."
243
+ # system_fingerprint=user if user is not None else "null",
244
+ system_fingerprint=None,
245
+ object="chat.completion",
246
+ usage=UsageStatistics(**usage),
247
+ )
248
+ printd(response)
249
+ return response
250
+
251
+
252
+ def generate_grammar_and_documentation(
253
+ functions_python: dict,
254
+ add_inner_thoughts_top_level: bool,
255
+ add_inner_thoughts_param_level: bool,
256
+ allow_only_inner_thoughts: bool,
257
+ ):
258
+ from letta.utils import printd
259
+
260
+ assert not (
261
+ add_inner_thoughts_top_level and add_inner_thoughts_param_level
262
+ ), "Can only place inner thoughts in one location in the grammar generator"
263
+
264
+ grammar_function_models = []
265
+ # create_dynamic_model_from_function will add inner thoughts to the function parameters if add_inner_thoughts is True.
266
+ # generate_gbnf_grammar_and_documentation will add inner thoughts to the outer object of the function parameters if add_inner_thoughts is True.
267
+ for key, func in functions_python.items():
268
+ grammar_function_models.append(create_dynamic_model_from_function(func, add_inner_thoughts=add_inner_thoughts_param_level))
269
+ grammar, documentation = generate_gbnf_grammar_and_documentation(
270
+ grammar_function_models,
271
+ outer_object_name="function",
272
+ outer_object_content="params",
273
+ model_prefix="function",
274
+ fields_prefix="params",
275
+ add_inner_thoughts=add_inner_thoughts_top_level,
276
+ allow_only_inner_thoughts=allow_only_inner_thoughts,
277
+ )
278
+ printd(grammar)
279
+ return grammar, documentation
@@ -0,0 +1,31 @@
1
+ # import letta.local_llm.llm_chat_completion_wrappers.airoboros as airoboros
2
+ from letta.local_llm.llm_chat_completion_wrappers.chatml import (
3
+ ChatMLInnerMonologueWrapper,
4
+ )
5
+
6
+ DEFAULT_ENDPOINTS = {
7
+ # Local
8
+ "koboldcpp": "http://localhost:5001",
9
+ "llamacpp": "http://localhost:8080",
10
+ "lmstudio": "http://localhost:1234",
11
+ "lmstudio-legacy": "http://localhost:1234",
12
+ "ollama": "http://localhost:11434",
13
+ "webui-legacy": "http://localhost:5000",
14
+ "webui": "http://localhost:5000",
15
+ "vllm": "http://localhost:8000",
16
+ # APIs
17
+ "openai": "https://api.openai.com",
18
+ "anthropic": "https://api.anthropic.com",
19
+ "groq": "https://api.groq.com/openai",
20
+ }
21
+
22
+ DEFAULT_OLLAMA_MODEL = "dolphin2.2-mistral:7b-q6_K"
23
+
24
+ # DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper
25
+ # DEFAULT_WRAPPER_NAME = "airoboros-l2-70b-2.1"
26
+
27
+ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
28
+ DEFAULT_WRAPPER_NAME = "chatml"
29
+
30
+ INNER_THOUGHTS_KWARG = "inner_thoughts"
31
+ INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
@@ -0,0 +1,68 @@
1
+ import copy
2
+ import json
3
+
4
+ from letta.utils import json_dumps, json_loads
5
+
6
+ NO_HEARTBEAT_FUNCS = ["send_message", "pause_heartbeats"]
7
+
8
+
9
+ def insert_heartbeat(message):
10
+ # message_copy = message.copy()
11
+ message_copy = copy.deepcopy(message)
12
+
13
+ if message_copy.get("function_call"):
14
+ # function_name = message.get("function_call").get("name")
15
+ params = message_copy.get("function_call").get("arguments")
16
+ params = json_loads(params)
17
+ params["request_heartbeat"] = True
18
+ message_copy["function_call"]["arguments"] = json_dumps(params)
19
+
20
+ elif message_copy.get("tool_call"):
21
+ # function_name = message.get("tool_calls")[0].get("function").get("name")
22
+ params = message_copy.get("tool_calls")[0].get("function").get("arguments")
23
+ params = json_loads(params)
24
+ params["request_heartbeat"] = True
25
+ message_copy["tools_calls"][0]["function"]["arguments"] = json_dumps(params)
26
+
27
+ return message_copy
28
+
29
+
30
+ def heartbeat_correction(message_history, new_message):
31
+ """Add heartbeats where we think the agent forgot to add them themselves
32
+
33
+ If the last message in the stack is a user message and the new message is an assistant func call, fix the heartbeat
34
+
35
+ See: https://github.com/cpacker/Letta/issues/601
36
+ """
37
+ if len(message_history) < 1:
38
+ return None
39
+
40
+ last_message_was_user = False
41
+ if message_history[-1]["role"] == "user":
42
+ try:
43
+ content = json_loads(message_history[-1]["content"])
44
+ except json.JSONDecodeError:
45
+ return None
46
+ # Check if it's a user message or system message
47
+ if content["type"] == "user_message":
48
+ last_message_was_user = True
49
+
50
+ new_message_is_heartbeat_function = False
51
+ if new_message["role"] == "assistant":
52
+ if new_message.get("function_call") or new_message.get("tool_calls"):
53
+ if new_message.get("function_call"):
54
+ function_name = new_message.get("function_call").get("name")
55
+ elif new_message.get("tool_calls"):
56
+ function_name = new_message.get("tool_calls")[0].get("function").get("name")
57
+ if function_name not in NO_HEARTBEAT_FUNCS:
58
+ new_message_is_heartbeat_function = True
59
+
60
+ if last_message_was_user and new_message_is_heartbeat_function:
61
+ return insert_heartbeat(new_message)
62
+ else:
63
+ return None
64
+
65
+
66
+ def patch_function(message_history, new_message):
67
+ corrected_output = heartbeat_correction(message_history=message_history, new_message=new_message)
68
+ return corrected_output if corrected_output is not None else new_message
File without changes