letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,72 @@
1
+ import json
2
+ import os
3
+
4
+ from letta.constants import LETTA_DIR
5
+ from letta.local_llm.settings.deterministic_mirostat import (
6
+ settings as det_miro_settings,
7
+ )
8
+ from letta.local_llm.settings.simple import settings as simple_settings
9
+
10
+ DEFAULT = "simple"
11
+ SETTINGS_FOLDER_NAME = "settings"
12
+ COMPLETION_SETTINGS_FILE_NAME = "completions_api_settings.json"
13
+
14
+
15
+ def get_completions_settings(defaults="simple") -> dict:
16
+ """Pull from the home directory settings if they exist, otherwise default"""
17
+ from letta.utils import printd
18
+
19
+ # Load up some default base settings
20
+ printd(f"Loading default settings from '{defaults}'")
21
+ if defaults == "simple":
22
+ # simple = basic stop strings
23
+ settings = simple_settings
24
+ elif defaults == "deterministic_mirostat":
25
+ settings = det_miro_settings
26
+ elif defaults is None:
27
+ settings = dict()
28
+ else:
29
+ raise ValueError(defaults)
30
+
31
+ # Check if settings_dir folder exists (if not, create it)
32
+ settings_dir = os.path.join(LETTA_DIR, SETTINGS_FOLDER_NAME)
33
+ if not os.path.exists(settings_dir):
34
+ printd(f"Settings folder '{settings_dir}' doesn't exist, creating it...")
35
+ try:
36
+ os.makedirs(settings_dir)
37
+ except Exception as e:
38
+ print(f"Error: failed to create settings folder '{settings_dir}'.\n{e}")
39
+ return settings
40
+
41
+ # Then, check if settings_dir/completions_api_settings.json file exists
42
+ settings_file = os.path.join(settings_dir, COMPLETION_SETTINGS_FILE_NAME)
43
+
44
+ if os.path.isfile(settings_file):
45
+ # Load into a dict called "settings"
46
+ printd(f"Found completion settings file '{settings_file}', loading it...")
47
+ try:
48
+ with open(settings_file, "r", encoding="utf-8") as file:
49
+ user_settings = json.load(file)
50
+ if len(user_settings) > 0:
51
+ printd(f"Updating base settings with the following user settings:\n{json_dumps(user_settings,indent=2)}")
52
+ settings.update(user_settings)
53
+ else:
54
+ printd(f"'{settings_file}' was empty, ignoring...")
55
+ except json.JSONDecodeError as e:
56
+ print(f"Error: failed to load user settings file '{settings_file}', invalid json.\n{e}")
57
+ except Exception as e:
58
+ print(f"Error: failed to load user settings file.\n{e}")
59
+
60
+ else:
61
+ printd(f"No completion settings file '{settings_file}', skipping...")
62
+ # Create the file settings_file to make it easy for the user to edit
63
+ try:
64
+ with open(settings_file, "w", encoding="utf-8") as file:
65
+ # We don't want to dump existing default settings in case we modify
66
+ # the default settings in the future
67
+ # json.dump(settings, file, indent=4)
68
+ json.dump({}, file, indent=4)
69
+ except Exception as e:
70
+ print(f"Error: failed to create empty settings file '{settings_file}'.\n{e}")
71
+
72
+ return settings
@@ -0,0 +1,28 @@
1
+ settings = {
2
+ # "stopping_strings": [
3
+ "stop": [
4
+ "\nUSER:",
5
+ "\nASSISTANT:",
6
+ "\nFUNCTION RETURN:",
7
+ "\nUSER",
8
+ "\nASSISTANT",
9
+ "\nFUNCTION RETURN",
10
+ "\nFUNCTION",
11
+ "\nFUNC",
12
+ "<|im_start|>",
13
+ "<|im_end|>",
14
+ "<|im_sep|>",
15
+ # airoboros specific
16
+ "\n### ",
17
+ # '\n' +
18
+ # '</s>',
19
+ # '<|',
20
+ "\n#",
21
+ # "\n\n\n",
22
+ # prevent chaining function calls / multi json objects / run-on generations
23
+ # NOTE: this requires the ability to patch the extra '}}' back into the prompt
24
+ " }\n}\n",
25
+ ],
26
+ # most lm frontends default to 0.7-0.8 these days
27
+ # "temperature": 0.8,
28
+ }
@@ -0,0 +1,265 @@
1
+ import os
2
+ import warnings
3
+ from typing import List
4
+
5
+ import requests
6
+ import tiktoken
7
+
8
+ import letta.local_llm.llm_chat_completion_wrappers.airoboros as airoboros
9
+ import letta.local_llm.llm_chat_completion_wrappers.chatml as chatml
10
+ import letta.local_llm.llm_chat_completion_wrappers.configurable_wrapper as configurable_wrapper
11
+ import letta.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
12
+ import letta.local_llm.llm_chat_completion_wrappers.llama3 as llama3
13
+ import letta.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
14
+
15
+
16
+ def post_json_auth_request(uri, json_payload, auth_type, auth_key):
17
+ """Send a POST request with a JSON payload and optional authentication"""
18
+
19
+ # By default most local LLM inference servers do not have authorization enabled
20
+ if auth_type is None:
21
+ response = requests.post(uri, json=json_payload)
22
+
23
+ # Used by OpenAI, together.ai, Mistral AI
24
+ elif auth_type == "bearer_token":
25
+ if auth_key is None:
26
+ raise ValueError(f"auth_type is {auth_type}, but auth_key is null")
27
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {auth_key}"}
28
+ response = requests.post(uri, json=json_payload, headers=headers)
29
+
30
+ # Used by OpenAI Azure
31
+ elif auth_type == "api_key":
32
+ if auth_key is None:
33
+ raise ValueError(f"auth_type is {auth_type}, but auth_key is null")
34
+ headers = {"Content-Type": "application/json", "api-key": f"{auth_key}"}
35
+ response = requests.post(uri, json=json_payload, headers=headers)
36
+
37
+ else:
38
+ raise ValueError(f"Unsupport authentication type: {auth_type}")
39
+
40
+ return response
41
+
42
+
43
+ # deprecated for Box
44
+ class DotDict(dict):
45
+ """Allow dot access on properties similar to OpenAI response object"""
46
+
47
+ def __getattr__(self, attr):
48
+ return self.get(attr)
49
+
50
+ def __setattr__(self, key, value):
51
+ self[key] = value
52
+
53
+ # following methods necessary for pickling
54
+ def __getstate__(self):
55
+ return vars(self)
56
+
57
+ def __setstate__(self, state):
58
+ vars(self).update(state)
59
+
60
+
61
+ def load_grammar_file(grammar):
62
+ # Set grammar
63
+ grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grammars", f"{grammar}.gbnf")
64
+
65
+ # Check if the file exists
66
+ if not os.path.isfile(grammar_file):
67
+ # If the file doesn't exist, raise a FileNotFoundError
68
+ raise FileNotFoundError(f"The grammar file {grammar_file} does not exist.")
69
+
70
+ with open(grammar_file, "r", encoding="utf-8") as file:
71
+ grammar_str = file.read()
72
+
73
+ return grammar_str
74
+
75
+
76
+ # TODO: support tokenizers/tokenizer apis available in local models
77
+ def count_tokens(s: str, model: str = "gpt-4") -> int:
78
+ encoding = tiktoken.encoding_for_model(model)
79
+ return len(encoding.encode(s))
80
+
81
+
82
+ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
83
+ """Return the number of tokens used by a list of functions.
84
+
85
+ Copied from https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/11
86
+ """
87
+ try:
88
+ encoding = tiktoken.encoding_for_model(model)
89
+ except KeyError:
90
+ print("Warning: model not found. Using cl100k_base encoding.")
91
+ encoding = tiktoken.get_encoding("cl100k_base")
92
+
93
+ num_tokens = 0
94
+ for function in functions:
95
+ function_tokens = len(encoding.encode(function["name"]))
96
+ function_tokens += len(encoding.encode(function["description"]))
97
+
98
+ if "parameters" in function:
99
+ parameters = function["parameters"]
100
+ if "properties" in parameters:
101
+ for propertiesKey in parameters["properties"]:
102
+ function_tokens += len(encoding.encode(propertiesKey))
103
+ v = parameters["properties"][propertiesKey]
104
+ for field in v:
105
+ if field == "type":
106
+ function_tokens += 2
107
+ function_tokens += len(encoding.encode(v["type"]))
108
+ elif field == "description":
109
+ function_tokens += 2
110
+ function_tokens += len(encoding.encode(v["description"]))
111
+ elif field == "enum":
112
+ function_tokens -= 3
113
+ for o in v["enum"]:
114
+ function_tokens += 3
115
+ function_tokens += len(encoding.encode(o))
116
+ else:
117
+ print(f"Warning: not supported field {field}")
118
+ function_tokens += 11
119
+
120
+ num_tokens += function_tokens
121
+
122
+ num_tokens += 12
123
+ return num_tokens
124
+
125
+
126
+ def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
127
+ """Based on above code (num_tokens_from_functions).
128
+
129
+ Example to encode:
130
+ [{
131
+ 'id': '8b6707cf-2352-4804-93db-0423f',
132
+ 'type': 'function',
133
+ 'function': {
134
+ 'name': 'send_message',
135
+ 'arguments': '{\n "message": "More human than human is our motto."\n}'
136
+ }
137
+ }]
138
+ """
139
+ try:
140
+ encoding = tiktoken.encoding_for_model(model)
141
+ except KeyError:
142
+ # print("Warning: model not found. Using cl100k_base encoding.")
143
+ encoding = tiktoken.get_encoding("cl100k_base")
144
+
145
+ num_tokens = 0
146
+ for tool_call in tool_calls:
147
+ function_tokens = len(encoding.encode(tool_call["id"]))
148
+ function_tokens += 2 + len(encoding.encode(tool_call["type"]))
149
+ function_tokens += 2 + len(encoding.encode(tool_call["function"]["name"]))
150
+ function_tokens += 2 + len(encoding.encode(tool_call["function"]["arguments"]))
151
+
152
+ num_tokens += function_tokens
153
+
154
+ # TODO adjust?
155
+ num_tokens += 12
156
+ return num_tokens
157
+
158
+
159
+ def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int:
160
+ """Return the number of tokens used by a list of messages.
161
+
162
+ From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
163
+
164
+ For counting tokens in function calling RESPONSES, see:
165
+ https://hmarr.com/blog/counting-openai-tokens/, https://github.com/hmarr/openai-chat-tokens
166
+
167
+ For counting tokens in function calling REQUESTS, see:
168
+ https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/11
169
+ """
170
+ try:
171
+ encoding = tiktoken.encoding_for_model(model)
172
+ except KeyError:
173
+ # print("Warning: model not found. Using cl100k_base encoding.")
174
+ encoding = tiktoken.get_encoding("cl100k_base")
175
+ if model in {
176
+ "gpt-3.5-turbo-0613",
177
+ "gpt-3.5-turbo-16k-0613",
178
+ "gpt-4-0314",
179
+ "gpt-4-32k-0314",
180
+ "gpt-4-0613",
181
+ "gpt-4-32k-0613",
182
+ }:
183
+ tokens_per_message = 3
184
+ tokens_per_name = 1
185
+ elif model == "gpt-3.5-turbo-0301":
186
+ tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
187
+ tokens_per_name = -1 # if there's a name, the role is omitted
188
+ elif "gpt-3.5-turbo" in model:
189
+ # print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
190
+ return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
191
+ elif "gpt-4" in model:
192
+ # print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
193
+ return num_tokens_from_messages(messages, model="gpt-4-0613")
194
+ else:
195
+ warnings.warn(
196
+ f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
197
+ )
198
+ return num_tokens_from_messages(messages, model="gpt-4-0613")
199
+ # raise NotImplementedError(
200
+ # f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
201
+ # )
202
+ num_tokens = 0
203
+ for message in messages:
204
+ num_tokens += tokens_per_message
205
+ for key, value in message.items():
206
+ try:
207
+
208
+ if isinstance(value, list) and key == "tool_calls":
209
+ num_tokens += num_tokens_from_tool_calls(tool_calls=value, model=model)
210
+ # special case for tool calling (list)
211
+ # num_tokens += len(encoding.encode(value["name"]))
212
+ # num_tokens += len(encoding.encode(value["arguments"]))
213
+
214
+ else:
215
+ num_tokens += len(encoding.encode(value))
216
+
217
+ if key == "name":
218
+ num_tokens += tokens_per_name
219
+
220
+ except TypeError as e:
221
+ print(f"tiktoken encoding failed on: {value}")
222
+ raise e
223
+
224
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
225
+ return num_tokens
226
+
227
+
228
+ def get_available_wrappers() -> dict:
229
+ return {
230
+ "llama3": llama3.LLaMA3InnerMonologueWrapper(),
231
+ "llama3-grammar": llama3.LLaMA3InnerMonologueWrapper(),
232
+ "llama3-hints-grammar": llama3.LLaMA3InnerMonologueWrapper(assistant_prefix_hint=True),
233
+ "experimental-wrapper-neural-chat-grammar-noforce": configurable_wrapper.ConfigurableJSONWrapper(
234
+ post_prompt="### Assistant:",
235
+ sys_prompt_start="### System:\n",
236
+ sys_prompt_end="\n",
237
+ user_prompt_start="### User:\n",
238
+ user_prompt_end="\n",
239
+ assistant_prompt_start="### Assistant:\n",
240
+ assistant_prompt_end="\n",
241
+ tool_prompt_start="### User:\n",
242
+ tool_prompt_end="\n",
243
+ strip_prompt=True,
244
+ ),
245
+ # New chatml-based wrappers
246
+ "chatml": chatml.ChatMLInnerMonologueWrapper(),
247
+ "chatml-grammar": chatml.ChatMLInnerMonologueWrapper(),
248
+ "chatml-noforce": chatml.ChatMLOuterInnerMonologueWrapper(),
249
+ "chatml-noforce-grammar": chatml.ChatMLOuterInnerMonologueWrapper(),
250
+ # "chatml-noforce-sysm": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True),
251
+ "chatml-noforce-roles": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True, allow_function_role=True),
252
+ "chatml-noforce-roles-grammar": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True, allow_function_role=True),
253
+ # With extra hints
254
+ "chatml-hints": chatml.ChatMLInnerMonologueWrapper(assistant_prefix_hint=True),
255
+ "chatml-hints-grammar": chatml.ChatMLInnerMonologueWrapper(assistant_prefix_hint=True),
256
+ "chatml-noforce-hints": chatml.ChatMLOuterInnerMonologueWrapper(assistant_prefix_hint=True),
257
+ "chatml-noforce-hints-grammar": chatml.ChatMLOuterInnerMonologueWrapper(assistant_prefix_hint=True),
258
+ # Legacy wrappers
259
+ "airoboros-l2-70b-2.1": airoboros.Airoboros21InnerMonologueWrapper(),
260
+ "airoboros-l2-70b-2.1-grammar": airoboros.Airoboros21InnerMonologueWrapper(assistant_prefix_extra=None),
261
+ "dolphin-2.1-mistral-7b": dolphin.Dolphin21MistralWrapper(),
262
+ "dolphin-2.1-mistral-7b-grammar": dolphin.Dolphin21MistralWrapper(include_opening_brace_in_prefix=False),
263
+ "zephyr-7B": zephyr.ZephyrMistralInnerMonologueWrapper(),
264
+ "zephyr-7B-grammar": zephyr.ZephyrMistralInnerMonologueWrapper(include_opening_brace_in_prefix=False),
265
+ }
@@ -0,0 +1,63 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import count_tokens, post_json_auth_request
5
+
6
+ WEBUI_API_SUFFIX = "/v1/completions"
7
+
8
+
9
+ def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user, grammar=None):
10
+ """https://github.com/vllm-project/vllm/blob/main/examples/api_client.py"""
11
+ from letta.utils import printd
12
+
13
+ prompt_tokens = count_tokens(prompt)
14
+ if prompt_tokens > context_window:
15
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
16
+
17
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
18
+ settings = get_completions_settings()
19
+ request = settings
20
+ request["prompt"] = prompt
21
+ request["max_tokens"] = 3000 # int(context_window - prompt_tokens)
22
+ request["stream"] = False
23
+ request["user"] = user
24
+
25
+ # currently hardcoded, since we are only supporting one model with the hosted endpoint
26
+ request["model"] = model
27
+
28
+ # Set grammar
29
+ if grammar is not None:
30
+ raise NotImplementedError
31
+
32
+ if not endpoint.startswith(("http://", "https://")):
33
+ raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://")
34
+
35
+ try:
36
+ URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
37
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
38
+ if response.status_code == 200:
39
+ result_full = response.json()
40
+ printd(f"JSON API response:\n{result_full}")
41
+ result = result_full["choices"][0]["text"]
42
+ usage = result_full.get("usage", None)
43
+ else:
44
+ raise Exception(
45
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
46
+ + f" Make sure that the vLLM server is running and reachable at {URI}."
47
+ )
48
+
49
+ except:
50
+ # TODO handle gracefully
51
+ raise
52
+
53
+ # Pass usage statistics back to main thread
54
+ # These are used to compute memory warning messages
55
+ completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
56
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
57
+ usage = {
58
+ "prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
59
+ "completion_tokens": completion_tokens,
60
+ "total_tokens": total_tokens,
61
+ }
62
+
63
+ return result, usage
@@ -0,0 +1,60 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import count_tokens, post_json_auth_request
5
+
6
+ WEBUI_API_SUFFIX = "/v1/completions"
7
+
8
+
9
+ def get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
10
+ """Compatibility for the new OpenAI API: https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples"""
11
+ from letta.utils import printd
12
+
13
+ prompt_tokens = count_tokens(prompt)
14
+ if prompt_tokens > context_window:
15
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
16
+
17
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
18
+ settings = get_completions_settings()
19
+ request = settings
20
+ request["prompt"] = prompt
21
+ request["truncation_length"] = context_window
22
+ request["max_tokens"] = int(context_window - prompt_tokens)
23
+ request["max_new_tokens"] = int(context_window - prompt_tokens) # safety backup to "max_tokens", shouldn't matter
24
+
25
+ # Set grammar
26
+ if grammar is not None:
27
+ request["grammar_string"] = grammar
28
+
29
+ if not endpoint.startswith(("http://", "https://")):
30
+ raise ValueError(f"Endpoint value ({endpoint}) must begin with http:// or https://")
31
+
32
+ try:
33
+ URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
34
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
35
+ if response.status_code == 200:
36
+ result_full = response.json()
37
+ printd(f"JSON API response:\n{result_full}")
38
+ result = result_full["choices"][0]["text"]
39
+ usage = result_full.get("usage", None)
40
+ else:
41
+ raise Exception(
42
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
43
+ + f" Make sure that the web UI server is running and reachable at {URI}."
44
+ )
45
+
46
+ except:
47
+ # TODO handle gracefully
48
+ raise
49
+
50
+ # Pass usage statistics back to main thread
51
+ # These are used to compute memory warning messages
52
+ completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
53
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
54
+ usage = {
55
+ "prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
56
+ "completion_tokens": completion_tokens,
57
+ "total_tokens": total_tokens,
58
+ }
59
+
60
+ return result, usage
@@ -0,0 +1,58 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import count_tokens, post_json_auth_request
5
+
6
+ WEBUI_API_SUFFIX = "/api/v1/generate"
7
+
8
+
9
+ def get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
10
+ """See https://github.com/oobabooga/text-generation-webui for instructions on how to run the LLM web server"""
11
+ from letta.utils import printd
12
+
13
+ prompt_tokens = count_tokens(prompt)
14
+ if prompt_tokens > context_window:
15
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
16
+
17
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
18
+ settings = get_completions_settings()
19
+ request = settings
20
+ request["stopping_strings"] = request["stop"] # alias
21
+ request["max_new_tokens"] = 3072 # random hack?
22
+ request["prompt"] = prompt
23
+ request["truncation_length"] = context_window # assuming mistral 7b
24
+
25
+ # Set grammar
26
+ if grammar is not None:
27
+ request["grammar_string"] = grammar
28
+
29
+ if not endpoint.startswith(("http://", "https://")):
30
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
31
+
32
+ try:
33
+ URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
34
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
35
+ if response.status_code == 200:
36
+ result_full = response.json()
37
+ printd(f"JSON API response:\n{result_full}")
38
+ result = result_full["results"][0]["text"]
39
+ else:
40
+ raise Exception(
41
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
42
+ + f" Make sure that the web UI server is running and reachable at {URI}."
43
+ )
44
+
45
+ except:
46
+ # TODO handle gracefully
47
+ raise
48
+
49
+ # TODO correct for legacy
50
+ completion_tokens = None
51
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
52
+ usage = {
53
+ "prompt_tokens": prompt_tokens,
54
+ "completion_tokens": completion_tokens,
55
+ "total_tokens": total_tokens,
56
+ }
57
+
58
+ return result, usage
@@ -0,0 +1,23 @@
1
+ SIMPLE = {
2
+ "stopping_strings": [
3
+ "\nUSER:",
4
+ "\nASSISTANT:",
5
+ "\nFUNCTION RETURN:",
6
+ "\nUSER",
7
+ "\nASSISTANT",
8
+ "\nFUNCTION RETURN",
9
+ "\nFUNCTION",
10
+ "\nFUNC",
11
+ "<|im_start|>",
12
+ "<|im_end|>",
13
+ "<|im_sep|>",
14
+ # '\n' +
15
+ # '</s>',
16
+ # '<|',
17
+ # '\n#',
18
+ # '\n\n\n',
19
+ ],
20
+ "max_new_tokens": 3072,
21
+ # "truncation_length": 4096, # assuming llama2 models
22
+ # "truncation_length": LLM_MAX_TOKENS, # assuming mistral 7b
23
+ }
@@ -0,0 +1,24 @@
1
+ SIMPLE = {
2
+ # "stopping_strings": [
3
+ "stop": [
4
+ "\nUSER:",
5
+ "\nASSISTANT:",
6
+ "\nFUNCTION RETURN:",
7
+ "\nUSER",
8
+ "\nASSISTANT",
9
+ "\nFUNCTION RETURN",
10
+ "\nFUNCTION",
11
+ "\nFUNC",
12
+ "<|im_start|>",
13
+ "<|im_end|>",
14
+ "<|im_sep|>",
15
+ # '\n' +
16
+ # '</s>',
17
+ # '<|',
18
+ # '\n#',
19
+ # '\n\n\n',
20
+ ],
21
+ # "max_tokens": 3072,
22
+ # "truncation_length": 4096, # assuming llama2 models
23
+ # "truncation_length": LLM_MAX_TOKENS, # assuming mistral 7b
24
+ }
letta/log.py ADDED
@@ -0,0 +1,76 @@
1
+ import logging
2
+ from logging.config import dictConfig
3
+ from pathlib import Path
4
+ from sys import stdout
5
+ from typing import Optional
6
+
7
+ from letta.settings import settings
8
+
9
+ selected_log_level = logging.DEBUG if settings.debug else logging.INFO
10
+
11
+
12
+ def _setup_logfile() -> "Path":
13
+ """ensure the logger filepath is in place
14
+
15
+ Returns: the logfile Path
16
+ """
17
+ logfile = Path(settings.letta_dir / "logs" / "Letta.log")
18
+ logfile.parent.mkdir(parents=True, exist_ok=True)
19
+ logfile.touch(exist_ok=True)
20
+ return logfile
21
+
22
+
23
+ # TODO: production logging should be much less invasive
24
+ DEVELOPMENT_LOGGING = {
25
+ "version": 1,
26
+ "disable_existing_loggers": True,
27
+ "formatters": {
28
+ "standard": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
29
+ "no_datetime": {
30
+ "format": "%(name)s - %(levelname)s - %(message)s",
31
+ },
32
+ },
33
+ "handlers": {
34
+ "console": {
35
+ "level": selected_log_level,
36
+ "class": "logging.StreamHandler",
37
+ "stream": stdout,
38
+ "formatter": "no_datetime",
39
+ },
40
+ "file": {
41
+ "level": "DEBUG",
42
+ "class": "logging.handlers.RotatingFileHandler",
43
+ "filename": _setup_logfile(),
44
+ "maxBytes": 1024**2 * 10,
45
+ "backupCount": 3,
46
+ "formatter": "standard",
47
+ },
48
+ },
49
+ "loggers": {
50
+ "Letta": {
51
+ "level": logging.DEBUG if settings.debug else logging.INFO,
52
+ "handlers": [
53
+ "console",
54
+ "file",
55
+ ],
56
+ "propagate": False,
57
+ },
58
+ "uvicorn": {
59
+ "level": "INFO",
60
+ "handlers": ["console"],
61
+ "propagate": False,
62
+ },
63
+ },
64
+ }
65
+
66
+
67
+ def get_logger(name: Optional[str] = None) -> "logging.Logger":
68
+ """returns the project logger, scoped to a child name if provided
69
+ Args:
70
+ name: will define a child logger
71
+ """
72
+ dictConfig(DEVELOPMENT_LOGGING)
73
+ parent_logger = logging.getLogger("Letta")
74
+ if name:
75
+ return parent_logger.getChild(name)
76
+ return parent_logger