letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +24 -0
- letta/__main__.py +3 -0
- letta/agent.py +1427 -0
- letta/agent_store/chroma.py +295 -0
- letta/agent_store/db.py +546 -0
- letta/agent_store/lancedb.py +177 -0
- letta/agent_store/milvus.py +198 -0
- letta/agent_store/qdrant.py +201 -0
- letta/agent_store/storage.py +188 -0
- letta/benchmark/benchmark.py +96 -0
- letta/benchmark/constants.py +14 -0
- letta/cli/cli.py +689 -0
- letta/cli/cli_config.py +1282 -0
- letta/cli/cli_load.py +166 -0
- letta/client/__init__.py +0 -0
- letta/client/admin.py +171 -0
- letta/client/client.py +2360 -0
- letta/client/streaming.py +90 -0
- letta/client/utils.py +61 -0
- letta/config.py +484 -0
- letta/configs/anthropic.json +13 -0
- letta/configs/letta_hosted.json +11 -0
- letta/configs/openai.json +12 -0
- letta/constants.py +134 -0
- letta/credentials.py +140 -0
- letta/data_sources/connectors.py +247 -0
- letta/embeddings.py +218 -0
- letta/errors.py +26 -0
- letta/functions/__init__.py +0 -0
- letta/functions/function_sets/base.py +174 -0
- letta/functions/function_sets/extras.py +132 -0
- letta/functions/functions.py +105 -0
- letta/functions/schema_generator.py +205 -0
- letta/humans/__init__.py +0 -0
- letta/humans/examples/basic.txt +1 -0
- letta/humans/examples/cs_phd.txt +9 -0
- letta/interface.py +314 -0
- letta/llm_api/__init__.py +0 -0
- letta/llm_api/anthropic.py +383 -0
- letta/llm_api/azure_openai.py +155 -0
- letta/llm_api/cohere.py +396 -0
- letta/llm_api/google_ai.py +468 -0
- letta/llm_api/llm_api_tools.py +485 -0
- letta/llm_api/openai.py +470 -0
- letta/local_llm/README.md +3 -0
- letta/local_llm/__init__.py +0 -0
- letta/local_llm/chat_completion_proxy.py +279 -0
- letta/local_llm/constants.py +31 -0
- letta/local_llm/function_parser.py +68 -0
- letta/local_llm/grammars/__init__.py +0 -0
- letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
- letta/local_llm/grammars/json.gbnf +26 -0
- letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
- letta/local_llm/groq/api.py +97 -0
- letta/local_llm/json_parser.py +202 -0
- letta/local_llm/koboldcpp/api.py +62 -0
- letta/local_llm/koboldcpp/settings.py +23 -0
- letta/local_llm/llamacpp/api.py +58 -0
- letta/local_llm/llamacpp/settings.py +22 -0
- letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
- letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
- letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
- letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
- letta/local_llm/lmstudio/api.py +100 -0
- letta/local_llm/lmstudio/settings.py +29 -0
- letta/local_llm/ollama/api.py +88 -0
- letta/local_llm/ollama/settings.py +32 -0
- letta/local_llm/settings/__init__.py +0 -0
- letta/local_llm/settings/deterministic_mirostat.py +45 -0
- letta/local_llm/settings/settings.py +72 -0
- letta/local_llm/settings/simple.py +28 -0
- letta/local_llm/utils.py +265 -0
- letta/local_llm/vllm/api.py +63 -0
- letta/local_llm/webui/api.py +60 -0
- letta/local_llm/webui/legacy_api.py +58 -0
- letta/local_llm/webui/legacy_settings.py +23 -0
- letta/local_llm/webui/settings.py +24 -0
- letta/log.py +76 -0
- letta/main.py +437 -0
- letta/memory.py +440 -0
- letta/metadata.py +884 -0
- letta/openai_backcompat/__init__.py +0 -0
- letta/openai_backcompat/openai_object.py +437 -0
- letta/persistence_manager.py +148 -0
- letta/personas/__init__.py +0 -0
- letta/personas/examples/anna_pa.txt +13 -0
- letta/personas/examples/google_search_persona.txt +15 -0
- letta/personas/examples/memgpt_doc.txt +6 -0
- letta/personas/examples/memgpt_starter.txt +4 -0
- letta/personas/examples/sam.txt +14 -0
- letta/personas/examples/sam_pov.txt +14 -0
- letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
- letta/personas/examples/sqldb/test.db +0 -0
- letta/prompts/__init__.py +0 -0
- letta/prompts/gpt_summarize.py +14 -0
- letta/prompts/gpt_system.py +26 -0
- letta/prompts/system/memgpt_base.txt +49 -0
- letta/prompts/system/memgpt_chat.txt +58 -0
- letta/prompts/system/memgpt_chat_compressed.txt +13 -0
- letta/prompts/system/memgpt_chat_fstring.txt +51 -0
- letta/prompts/system/memgpt_doc.txt +50 -0
- letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
- letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
- letta/prompts/system/memgpt_modified_chat.txt +23 -0
- letta/pytest.ini +0 -0
- letta/schemas/agent.py +117 -0
- letta/schemas/api_key.py +21 -0
- letta/schemas/block.py +135 -0
- letta/schemas/document.py +21 -0
- letta/schemas/embedding_config.py +54 -0
- letta/schemas/enums.py +35 -0
- letta/schemas/job.py +38 -0
- letta/schemas/letta_base.py +80 -0
- letta/schemas/letta_message.py +175 -0
- letta/schemas/letta_request.py +23 -0
- letta/schemas/letta_response.py +28 -0
- letta/schemas/llm_config.py +54 -0
- letta/schemas/memory.py +224 -0
- letta/schemas/message.py +727 -0
- letta/schemas/openai/chat_completion_request.py +123 -0
- letta/schemas/openai/chat_completion_response.py +136 -0
- letta/schemas/openai/chat_completions.py +123 -0
- letta/schemas/openai/embedding_response.py +11 -0
- letta/schemas/openai/openai.py +157 -0
- letta/schemas/organization.py +20 -0
- letta/schemas/passage.py +80 -0
- letta/schemas/source.py +62 -0
- letta/schemas/tool.py +143 -0
- letta/schemas/usage.py +18 -0
- letta/schemas/user.py +33 -0
- letta/server/__init__.py +0 -0
- letta/server/constants.py +6 -0
- letta/server/rest_api/__init__.py +0 -0
- letta/server/rest_api/admin/__init__.py +0 -0
- letta/server/rest_api/admin/agents.py +21 -0
- letta/server/rest_api/admin/tools.py +83 -0
- letta/server/rest_api/admin/users.py +98 -0
- letta/server/rest_api/app.py +193 -0
- letta/server/rest_api/auth/__init__.py +0 -0
- letta/server/rest_api/auth/index.py +43 -0
- letta/server/rest_api/auth_token.py +22 -0
- letta/server/rest_api/interface.py +726 -0
- letta/server/rest_api/routers/__init__.py +0 -0
- letta/server/rest_api/routers/openai/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
- letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
- letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
- letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
- letta/server/rest_api/routers/v1/__init__.py +15 -0
- letta/server/rest_api/routers/v1/agents.py +543 -0
- letta/server/rest_api/routers/v1/blocks.py +73 -0
- letta/server/rest_api/routers/v1/jobs.py +46 -0
- letta/server/rest_api/routers/v1/llms.py +28 -0
- letta/server/rest_api/routers/v1/organizations.py +61 -0
- letta/server/rest_api/routers/v1/sources.py +199 -0
- letta/server/rest_api/routers/v1/tools.py +103 -0
- letta/server/rest_api/routers/v1/users.py +109 -0
- letta/server/rest_api/static_files.py +74 -0
- letta/server/rest_api/utils.py +69 -0
- letta/server/server.py +1995 -0
- letta/server/startup.sh +8 -0
- letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
- letta/server/static_files/assets/index-156816da.css +1 -0
- letta/server/static_files/assets/index-486e3228.js +274 -0
- letta/server/static_files/favicon.ico +0 -0
- letta/server/static_files/index.html +39 -0
- letta/server/static_files/memgpt_logo_transparent.png +0 -0
- letta/server/utils.py +46 -0
- letta/server/ws_api/__init__.py +0 -0
- letta/server/ws_api/example_client.py +104 -0
- letta/server/ws_api/interface.py +108 -0
- letta/server/ws_api/protocol.py +100 -0
- letta/server/ws_api/server.py +145 -0
- letta/settings.py +165 -0
- letta/streaming_interface.py +396 -0
- letta/system.py +207 -0
- letta/utils.py +1065 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
from letta.utils import json_dumps, json_loads
|
|
2
|
+
|
|
3
|
+
from ...errors import LLMJSONParsingError
|
|
4
|
+
from ..json_parser import clean_json
|
|
5
|
+
from .wrapper_base import LLMChatCompletionWrapper
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ZephyrMistralWrapper(LLMChatCompletionWrapper):
|
|
9
|
+
"""
|
|
10
|
+
Wrapper for Zephyr Alpha and Beta, Mistral 7B:
|
|
11
|
+
https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
|
|
12
|
+
https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
|
|
13
|
+
Note: this wrapper formats a prompt that only generates JSON, no inner thoughts
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
simplify_json_content=True,
|
|
19
|
+
clean_function_args=True,
|
|
20
|
+
include_assistant_prefix=True,
|
|
21
|
+
include_opening_brace_in_prefix=True,
|
|
22
|
+
include_section_separators=False,
|
|
23
|
+
):
|
|
24
|
+
self.simplify_json_content = simplify_json_content
|
|
25
|
+
self.clean_func_args = clean_function_args
|
|
26
|
+
self.include_assistant_prefix = include_assistant_prefix
|
|
27
|
+
self.include_opening_brance_in_prefix = include_opening_brace_in_prefix
|
|
28
|
+
self.include_section_separators = include_section_separators
|
|
29
|
+
|
|
30
|
+
def chat_completion_to_prompt(self, messages, functions, function_documentation=None):
|
|
31
|
+
"""
|
|
32
|
+
Zephyr prompt format:
|
|
33
|
+
<|system|>
|
|
34
|
+
</s>
|
|
35
|
+
<|user|>
|
|
36
|
+
{prompt}</s>
|
|
37
|
+
<|assistant|>
|
|
38
|
+
(source: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF#prompt-template-zephyr)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
prompt = ""
|
|
42
|
+
|
|
43
|
+
IM_END_TOKEN = "</s>"
|
|
44
|
+
|
|
45
|
+
# System instructions go first
|
|
46
|
+
assert messages[0]["role"] == "system"
|
|
47
|
+
prompt += f"<|system|>"
|
|
48
|
+
prompt += f"\n{messages[0]['content']}"
|
|
49
|
+
|
|
50
|
+
# Next is the functions preamble
|
|
51
|
+
def create_function_description(schema):
|
|
52
|
+
# airorobos style
|
|
53
|
+
func_str = ""
|
|
54
|
+
func_str += f"{schema['name']}:"
|
|
55
|
+
func_str += f"\n description: {schema['description']}"
|
|
56
|
+
func_str += f"\n params:"
|
|
57
|
+
for param_k, param_v in schema["parameters"]["properties"].items():
|
|
58
|
+
# TODO we're ignoring type
|
|
59
|
+
func_str += f"\n {param_k}: {param_v['description']}"
|
|
60
|
+
# TODO we're ignoring schema['parameters']['required']
|
|
61
|
+
return func_str
|
|
62
|
+
|
|
63
|
+
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
|
64
|
+
prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
|
65
|
+
prompt += f"\nAvailable functions:"
|
|
66
|
+
if function_documentation is not None:
|
|
67
|
+
prompt += f"\n{function_documentation}"
|
|
68
|
+
else:
|
|
69
|
+
for function_dict in functions:
|
|
70
|
+
prompt += f"\n{create_function_description(function_dict)}"
|
|
71
|
+
|
|
72
|
+
# Put functions INSIDE system message (TODO experiment with this)
|
|
73
|
+
prompt += IM_END_TOKEN
|
|
74
|
+
|
|
75
|
+
def create_function_call(function_call):
|
|
76
|
+
airo_func_call = {
|
|
77
|
+
"function": function_call["name"],
|
|
78
|
+
"params": json_loads(function_call["arguments"]),
|
|
79
|
+
}
|
|
80
|
+
return json_dumps(airo_func_call, indent=2)
|
|
81
|
+
|
|
82
|
+
for message in messages[1:]:
|
|
83
|
+
assert message["role"] in ["user", "assistant", "function", "tool"], message
|
|
84
|
+
|
|
85
|
+
if message["role"] == "user":
|
|
86
|
+
if self.simplify_json_content:
|
|
87
|
+
try:
|
|
88
|
+
content_json = json_loads(message["content"])
|
|
89
|
+
content_simple = content_json["message"]
|
|
90
|
+
prompt += f"\n<|user|>\n{content_simple}{IM_END_TOKEN}"
|
|
91
|
+
# prompt += f"\nUSER: {content_simple}"
|
|
92
|
+
except:
|
|
93
|
+
prompt += f"\n<|user|>\n{message['content']}{IM_END_TOKEN}"
|
|
94
|
+
# prompt += f"\nUSER: {message['content']}"
|
|
95
|
+
elif message["role"] == "assistant":
|
|
96
|
+
prompt += f"\n<|assistant|>"
|
|
97
|
+
if message["content"] is not None:
|
|
98
|
+
prompt += f"\n{message['content']}"
|
|
99
|
+
# prompt += f"\nASSISTANT: {message['content']}"
|
|
100
|
+
# need to add the function call if there was one
|
|
101
|
+
if "function_call" in message and message["function_call"]:
|
|
102
|
+
prompt += f"\n{create_function_call(message['function_call'])}"
|
|
103
|
+
prompt += f"{IM_END_TOKEN}"
|
|
104
|
+
elif message["role"] in ["function", "tool"]:
|
|
105
|
+
# TODO find a good way to add this
|
|
106
|
+
# prompt += f"\nASSISTANT: (function return) {message['content']}"
|
|
107
|
+
prompt += f"\n<|assistant|>"
|
|
108
|
+
prompt += f"\nFUNCTION RETURN: {message['content']}"
|
|
109
|
+
# prompt += f"\nFUNCTION RETURN: {message['content']}"
|
|
110
|
+
continue
|
|
111
|
+
else:
|
|
112
|
+
raise ValueError(message)
|
|
113
|
+
|
|
114
|
+
# Add a sep for the response
|
|
115
|
+
# if self.include_section_separators:
|
|
116
|
+
# prompt += "\n### RESPONSE"
|
|
117
|
+
|
|
118
|
+
if self.include_assistant_prefix:
|
|
119
|
+
# prompt += f"\nASSISTANT:"
|
|
120
|
+
prompt += f"\n<|assistant|>"
|
|
121
|
+
if self.include_opening_brance_in_prefix:
|
|
122
|
+
prompt += "\n{"
|
|
123
|
+
|
|
124
|
+
return prompt
|
|
125
|
+
|
|
126
|
+
def clean_function_args(self, function_name, function_args):
|
|
127
|
+
"""Some basic Letta-specific cleaning of function args"""
|
|
128
|
+
cleaned_function_name = function_name
|
|
129
|
+
cleaned_function_args = function_args.copy() if function_args is not None else {}
|
|
130
|
+
|
|
131
|
+
if function_name == "send_message":
|
|
132
|
+
# strip request_heartbeat
|
|
133
|
+
cleaned_function_args.pop("request_heartbeat", None)
|
|
134
|
+
|
|
135
|
+
# TODO more cleaning to fix errors LLM makes
|
|
136
|
+
return cleaned_function_name, cleaned_function_args
|
|
137
|
+
|
|
138
|
+
def output_to_chat_completion_response(self, raw_llm_output):
|
|
139
|
+
"""Turn raw LLM output into a ChatCompletion style response with:
|
|
140
|
+
"message" = {
|
|
141
|
+
"role": "assistant",
|
|
142
|
+
"content": ...,
|
|
143
|
+
"function_call": {
|
|
144
|
+
"name": ...
|
|
145
|
+
"arguments": {
|
|
146
|
+
"arg1": val1,
|
|
147
|
+
...
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
"""
|
|
152
|
+
if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{":
|
|
153
|
+
raw_llm_output = "{" + raw_llm_output
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
function_json_output = clean_json(raw_llm_output)
|
|
157
|
+
except Exception as e:
|
|
158
|
+
raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output} - error\n{str(e)}")
|
|
159
|
+
try:
|
|
160
|
+
function_name = function_json_output["function"]
|
|
161
|
+
function_parameters = function_json_output["params"]
|
|
162
|
+
except KeyError as e:
|
|
163
|
+
raise LLMJSONParsingError(f"Received valid JSON from LLM, but JSON was missing fields: {str(e)}")
|
|
164
|
+
|
|
165
|
+
if self.clean_func_args:
|
|
166
|
+
function_name, function_parameters = self.clean_function_args(function_name, function_parameters)
|
|
167
|
+
|
|
168
|
+
message = {
|
|
169
|
+
"role": "assistant",
|
|
170
|
+
"content": None,
|
|
171
|
+
"function_call": {
|
|
172
|
+
"name": function_name,
|
|
173
|
+
"arguments": json_dumps(function_parameters),
|
|
174
|
+
},
|
|
175
|
+
}
|
|
176
|
+
return message
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class ZephyrMistralInnerMonologueWrapper(ZephyrMistralWrapper):
|
|
180
|
+
"""Still expect only JSON outputs from model, but add inner monologue as a field"""
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
Wrapper for Zephyr Alpha and Beta, Mistral 7B:
|
|
184
|
+
https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
|
|
185
|
+
https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
|
|
186
|
+
Note: this wrapper formats a prompt with inner thoughts included
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def __init__(
|
|
190
|
+
self,
|
|
191
|
+
simplify_json_content=True,
|
|
192
|
+
clean_function_args=True,
|
|
193
|
+
include_assistant_prefix=True,
|
|
194
|
+
include_opening_brace_in_prefix=True,
|
|
195
|
+
include_section_separators=True,
|
|
196
|
+
):
|
|
197
|
+
self.simplify_json_content = simplify_json_content
|
|
198
|
+
self.clean_func_args = clean_function_args
|
|
199
|
+
self.include_assistant_prefix = include_assistant_prefix
|
|
200
|
+
self.include_opening_brance_in_prefix = include_opening_brace_in_prefix
|
|
201
|
+
self.include_section_separators = include_section_separators
|
|
202
|
+
|
|
203
|
+
def chat_completion_to_prompt(self, messages, functions, function_documentation=None):
|
|
204
|
+
prompt = ""
|
|
205
|
+
|
|
206
|
+
IM_END_TOKEN = "</s>"
|
|
207
|
+
|
|
208
|
+
# System insturctions go first
|
|
209
|
+
assert messages[0]["role"] == "system"
|
|
210
|
+
prompt += messages[0]["content"]
|
|
211
|
+
|
|
212
|
+
# Next is the functions preamble
|
|
213
|
+
def create_function_description(schema, add_inner_thoughts=True):
|
|
214
|
+
# airorobos style
|
|
215
|
+
func_str = ""
|
|
216
|
+
func_str += f"{schema['name']}:"
|
|
217
|
+
func_str += f"\n description: {schema['description']}"
|
|
218
|
+
func_str += f"\n params:"
|
|
219
|
+
if add_inner_thoughts:
|
|
220
|
+
func_str += f"\n inner_thoughts: Deep inner monologue private to you only."
|
|
221
|
+
for param_k, param_v in schema["parameters"]["properties"].items():
|
|
222
|
+
# TODO we're ignoring type
|
|
223
|
+
func_str += f"\n {param_k}: {param_v['description']}"
|
|
224
|
+
# TODO we're ignoring schema['parameters']['required']
|
|
225
|
+
return func_str
|
|
226
|
+
|
|
227
|
+
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
|
228
|
+
prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
|
229
|
+
prompt += f"\nAvailable functions:"
|
|
230
|
+
if function_documentation is not None:
|
|
231
|
+
prompt += f"\n{function_documentation}"
|
|
232
|
+
else:
|
|
233
|
+
for function_dict in functions:
|
|
234
|
+
prompt += f"\n{create_function_description(function_dict)}"
|
|
235
|
+
|
|
236
|
+
def create_function_call(function_call, inner_thoughts=None):
|
|
237
|
+
airo_func_call = {
|
|
238
|
+
"function": function_call["name"],
|
|
239
|
+
"params": {
|
|
240
|
+
"inner_thoughts": inner_thoughts,
|
|
241
|
+
**json_loads(function_call["arguments"]),
|
|
242
|
+
},
|
|
243
|
+
}
|
|
244
|
+
return json_dumps(airo_func_call, indent=2)
|
|
245
|
+
|
|
246
|
+
# Add a sep for the conversation
|
|
247
|
+
if self.include_section_separators:
|
|
248
|
+
prompt += "\n<|user|>"
|
|
249
|
+
|
|
250
|
+
# Last are the user/assistant messages
|
|
251
|
+
for message in messages[1:]:
|
|
252
|
+
assert message["role"] in ["user", "assistant", "function", "tool"], message
|
|
253
|
+
|
|
254
|
+
if message["role"] == "user":
|
|
255
|
+
if self.simplify_json_content:
|
|
256
|
+
try:
|
|
257
|
+
content_json = json_loads(message["content"])
|
|
258
|
+
content_simple = content_json["message"]
|
|
259
|
+
prompt += f"\n<|user|>\n{content_simple}{IM_END_TOKEN}"
|
|
260
|
+
except:
|
|
261
|
+
prompt += f"\n<|user|>\n{message['content']}{IM_END_TOKEN}"
|
|
262
|
+
elif message["role"] == "assistant":
|
|
263
|
+
prompt += f"\n<|assistant|>"
|
|
264
|
+
# need to add the function call if there was one
|
|
265
|
+
inner_thoughts = message["content"]
|
|
266
|
+
if "function_call" in message and message["function_call"]:
|
|
267
|
+
prompt += f"\n{create_function_call(message['function_call'], inner_thoughts=inner_thoughts)}"
|
|
268
|
+
elif message["role"] in ["function", "tool"]:
|
|
269
|
+
# TODO find a good way to add this
|
|
270
|
+
# prompt += f"\nASSISTANT: (function return) {message['content']}"
|
|
271
|
+
prompt += f"\nFUNCTION RETURN: {message['content']}"
|
|
272
|
+
continue
|
|
273
|
+
else:
|
|
274
|
+
raise ValueError(message)
|
|
275
|
+
|
|
276
|
+
# Add a sep for the response
|
|
277
|
+
# if self.include_section_separators:
|
|
278
|
+
# prompt += "\n### RESPONSE"
|
|
279
|
+
|
|
280
|
+
if self.include_assistant_prefix:
|
|
281
|
+
prompt += f"\n<|assistant|>"
|
|
282
|
+
if self.include_opening_brance_in_prefix:
|
|
283
|
+
prompt += "\n{"
|
|
284
|
+
|
|
285
|
+
return prompt
|
|
286
|
+
|
|
287
|
+
def clean_function_args(self, function_name, function_args):
|
|
288
|
+
"""Some basic Letta-specific cleaning of function args"""
|
|
289
|
+
cleaned_function_name = function_name
|
|
290
|
+
cleaned_function_args = function_args.copy() if function_args is not None else {}
|
|
291
|
+
|
|
292
|
+
if function_name == "send_message":
|
|
293
|
+
# strip request_heartbeat
|
|
294
|
+
cleaned_function_args.pop("request_heartbeat", None)
|
|
295
|
+
|
|
296
|
+
inner_thoughts = None
|
|
297
|
+
if "inner_thoughts" in function_args:
|
|
298
|
+
inner_thoughts = cleaned_function_args.pop("inner_thoughts")
|
|
299
|
+
|
|
300
|
+
# TODO more cleaning to fix errors LLM makes
|
|
301
|
+
return inner_thoughts, cleaned_function_name, cleaned_function_args
|
|
302
|
+
|
|
303
|
+
def output_to_chat_completion_response(self, raw_llm_output):
|
|
304
|
+
"""Turn raw LLM output into a ChatCompletion style response with:
|
|
305
|
+
"message" = {
|
|
306
|
+
"role": "assistant",
|
|
307
|
+
"content": ...,
|
|
308
|
+
"function_call": {
|
|
309
|
+
"name": ...
|
|
310
|
+
"arguments": {
|
|
311
|
+
"arg1": val1,
|
|
312
|
+
...
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
"""
|
|
317
|
+
if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{":
|
|
318
|
+
raw_llm_output = "{" + raw_llm_output
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
function_json_output = clean_json(raw_llm_output)
|
|
322
|
+
except Exception as e:
|
|
323
|
+
raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output} - error\n{str(e)}")
|
|
324
|
+
try:
|
|
325
|
+
function_name = function_json_output["function"]
|
|
326
|
+
function_parameters = function_json_output["params"]
|
|
327
|
+
except KeyError as e:
|
|
328
|
+
raise LLMJSONParsingError(f"Received valid JSON from LLM, but JSON was missing fields: {str(e)}")
|
|
329
|
+
|
|
330
|
+
if self.clean_func_args:
|
|
331
|
+
(
|
|
332
|
+
inner_thoughts,
|
|
333
|
+
function_name,
|
|
334
|
+
function_parameters,
|
|
335
|
+
) = self.clean_function_args(function_name, function_parameters)
|
|
336
|
+
|
|
337
|
+
message = {
|
|
338
|
+
"role": "assistant",
|
|
339
|
+
"content": inner_thoughts,
|
|
340
|
+
"function_call": {
|
|
341
|
+
"name": function_name,
|
|
342
|
+
"arguments": json_dumps(function_parameters),
|
|
343
|
+
},
|
|
344
|
+
}
|
|
345
|
+
return message
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from urllib.parse import urljoin
|
|
2
|
+
|
|
3
|
+
from letta.local_llm.settings.settings import get_completions_settings
|
|
4
|
+
from letta.local_llm.utils import post_json_auth_request
|
|
5
|
+
from letta.utils import count_tokens
|
|
6
|
+
|
|
7
|
+
LMSTUDIO_API_CHAT_SUFFIX = "/v1/chat/completions"
|
|
8
|
+
LMSTUDIO_API_COMPLETIONS_SUFFIX = "/v1/completions"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions"):
|
|
12
|
+
"""Based on the example for using LM Studio as a backend from https://github.com/lmstudio-ai/examples/tree/main/Hello%2C%20world%20-%20OpenAI%20python%20client"""
|
|
13
|
+
from letta.utils import printd
|
|
14
|
+
|
|
15
|
+
prompt_tokens = count_tokens(prompt)
|
|
16
|
+
if prompt_tokens > context_window:
|
|
17
|
+
raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
|
|
18
|
+
|
|
19
|
+
settings = get_completions_settings()
|
|
20
|
+
settings.update(
|
|
21
|
+
{
|
|
22
|
+
"input_prefix": "",
|
|
23
|
+
"input_suffix": "",
|
|
24
|
+
# This controls how LM studio handles context overflow
|
|
25
|
+
# In Letta we handle this ourselves, so this should be disabled
|
|
26
|
+
# "context_overflow_policy": 0,
|
|
27
|
+
"lmstudio": {"context_overflow_policy": 0}, # 0 = stop at limit
|
|
28
|
+
"stream": False,
|
|
29
|
+
"model": "local model",
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Uses the ChatCompletions API style
|
|
34
|
+
# Seems to work better, probably because it's applying some extra settings under-the-hood?
|
|
35
|
+
if api == "chat":
|
|
36
|
+
URI = urljoin(endpoint.strip("/") + "/", LMSTUDIO_API_CHAT_SUFFIX.strip("/"))
|
|
37
|
+
|
|
38
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
39
|
+
request = settings
|
|
40
|
+
request["max_tokens"] = context_window
|
|
41
|
+
|
|
42
|
+
# Put the entire completion string inside the first message
|
|
43
|
+
message_structure = [{"role": "user", "content": prompt}]
|
|
44
|
+
request["messages"] = message_structure
|
|
45
|
+
|
|
46
|
+
# Uses basic string completions (string in, string out)
|
|
47
|
+
# Does not work as well as ChatCompletions for some reason
|
|
48
|
+
elif api == "completions":
|
|
49
|
+
URI = urljoin(endpoint.strip("/") + "/", LMSTUDIO_API_COMPLETIONS_SUFFIX.strip("/"))
|
|
50
|
+
|
|
51
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
52
|
+
request = settings
|
|
53
|
+
request["max_tokens"] = context_window
|
|
54
|
+
|
|
55
|
+
# Standard completions format, formatted string goes in prompt
|
|
56
|
+
request["prompt"] = prompt
|
|
57
|
+
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(api)
|
|
60
|
+
|
|
61
|
+
if not endpoint.startswith(("http://", "https://")):
|
|
62
|
+
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
|
66
|
+
if response.status_code == 200:
|
|
67
|
+
result_full = response.json()
|
|
68
|
+
printd(f"JSON API response:\n{result_full}")
|
|
69
|
+
if api == "chat":
|
|
70
|
+
result = result_full["choices"][0]["message"]["content"]
|
|
71
|
+
usage = result_full.get("usage", None)
|
|
72
|
+
elif api == "completions":
|
|
73
|
+
result = result_full["choices"][0]["text"]
|
|
74
|
+
usage = result_full.get("usage", None)
|
|
75
|
+
else:
|
|
76
|
+
# Example error: msg={"error":"Context length exceeded. Tokens in context: 8000, Context length: 8000"}
|
|
77
|
+
if "context length" in str(response.text).lower():
|
|
78
|
+
# "exceeds context length" is what appears in the LM Studio error message
|
|
79
|
+
# raise an alternate exception that matches OpenAI's message, which is "maximum context length"
|
|
80
|
+
raise Exception(f"Request exceeds maximum context length (code={response.status_code}, msg={response.text}, URI={URI})")
|
|
81
|
+
else:
|
|
82
|
+
raise Exception(
|
|
83
|
+
f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
|
|
84
|
+
+ f" Make sure that the LM Studio local inference server is running and reachable at {URI}."
|
|
85
|
+
)
|
|
86
|
+
except:
|
|
87
|
+
# TODO handle gracefully
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
# Pass usage statistics back to main thread
|
|
91
|
+
# These are used to compute memory warning messages
|
|
92
|
+
completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
|
|
93
|
+
total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
|
|
94
|
+
usage = {
|
|
95
|
+
"prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
|
|
96
|
+
"completion_tokens": completion_tokens,
|
|
97
|
+
"total_tokens": total_tokens,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return result, usage
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
SIMPLE = {
|
|
2
|
+
"stop": [
|
|
3
|
+
"\nUSER:",
|
|
4
|
+
"\nASSISTANT:",
|
|
5
|
+
"\nFUNCTION RETURN:",
|
|
6
|
+
"\nUSER",
|
|
7
|
+
"\nASSISTANT",
|
|
8
|
+
"\nFUNCTION RETURN",
|
|
9
|
+
"\nFUNCTION",
|
|
10
|
+
"\nFUNC",
|
|
11
|
+
"<|im_start|>",
|
|
12
|
+
"<|im_end|>",
|
|
13
|
+
"<|im_sep|>",
|
|
14
|
+
# '\n' +
|
|
15
|
+
# '</s>',
|
|
16
|
+
# '<|',
|
|
17
|
+
# '\n#',
|
|
18
|
+
# '\n\n\n',
|
|
19
|
+
],
|
|
20
|
+
# This controls the maximum number of tokens that the model can generate
|
|
21
|
+
# Cap this at the model context length (assuming 8k for Mistral 7B)
|
|
22
|
+
# "max_tokens": 8000,
|
|
23
|
+
# "max_tokens": LLM_MAX_TOKENS,
|
|
24
|
+
# This controls how LM studio handles context overflow
|
|
25
|
+
# In Letta we handle this ourselves, so this should be commented out
|
|
26
|
+
# "lmstudio": {"context_overflow_policy": 2},
|
|
27
|
+
"stream": False,
|
|
28
|
+
"model": "local model",
|
|
29
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from urllib.parse import urljoin
|
|
2
|
+
|
|
3
|
+
from letta.errors import LocalLLMError
|
|
4
|
+
from letta.local_llm.settings.settings import get_completions_settings
|
|
5
|
+
from letta.local_llm.utils import post_json_auth_request
|
|
6
|
+
from letta.utils import count_tokens
|
|
7
|
+
|
|
8
|
+
OLLAMA_API_SUFFIX = "/api/generate"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window, grammar=None):
|
|
12
|
+
"""See https://github.com/jmorganca/ollama/blob/main/docs/api.md for instructions on how to run the LLM web server"""
|
|
13
|
+
from letta.utils import printd
|
|
14
|
+
|
|
15
|
+
prompt_tokens = count_tokens(prompt)
|
|
16
|
+
if prompt_tokens > context_window:
|
|
17
|
+
raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
|
|
18
|
+
|
|
19
|
+
if model is None:
|
|
20
|
+
raise LocalLLMError(
|
|
21
|
+
f"Error: model name not specified. Set model in your config to the model you want to run (e.g. 'dolphin2.2-mistral')"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
25
|
+
# https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
|
|
26
|
+
settings = get_completions_settings()
|
|
27
|
+
settings.update(
|
|
28
|
+
{
|
|
29
|
+
# specific naming for context length
|
|
30
|
+
"num_ctx": context_window,
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
|
|
35
|
+
request = {
|
|
36
|
+
## base parameters
|
|
37
|
+
"model": model,
|
|
38
|
+
"prompt": prompt,
|
|
39
|
+
# "images": [], # TODO eventually support
|
|
40
|
+
## advanced parameters
|
|
41
|
+
# "format": "json", # TODO eventually support
|
|
42
|
+
"stream": False,
|
|
43
|
+
"options": settings,
|
|
44
|
+
"raw": True, # no prompt formatting
|
|
45
|
+
# "raw mode does not support template, system, or context"
|
|
46
|
+
# "system": "", # no prompt formatting
|
|
47
|
+
# "template": "{{ .Prompt }}", # no prompt formatting
|
|
48
|
+
# "context": None, # no memory via prompt formatting
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Set grammar
|
|
52
|
+
if grammar is not None:
|
|
53
|
+
# request["grammar_string"] = load_grammar_file(grammar)
|
|
54
|
+
raise NotImplementedError(f"Ollama does not support grammars")
|
|
55
|
+
|
|
56
|
+
if not endpoint.startswith(("http://", "https://")):
|
|
57
|
+
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
URI = urljoin(endpoint.strip("/") + "/", OLLAMA_API_SUFFIX.strip("/"))
|
|
61
|
+
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
|
62
|
+
if response.status_code == 200:
|
|
63
|
+
# https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
|
64
|
+
result_full = response.json()
|
|
65
|
+
printd(f"JSON API response:\n{result_full}")
|
|
66
|
+
result = result_full["response"]
|
|
67
|
+
else:
|
|
68
|
+
raise Exception(
|
|
69
|
+
f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
|
|
70
|
+
+ f" Make sure that the ollama API server is running and reachable at {URI}."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
except:
|
|
74
|
+
# TODO handle gracefully
|
|
75
|
+
raise
|
|
76
|
+
|
|
77
|
+
# Pass usage statistics back to main thread
|
|
78
|
+
# These are used to compute memory warning messages
|
|
79
|
+
# https://github.com/jmorganca/ollama/blob/main/docs/api.md#response
|
|
80
|
+
completion_tokens = result_full.get("eval_count", None)
|
|
81
|
+
total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
|
|
82
|
+
usage = {
|
|
83
|
+
"prompt_tokens": prompt_tokens, # can also grab from "prompt_eval_count"
|
|
84
|
+
"completion_tokens": completion_tokens,
|
|
85
|
+
"total_tokens": total_tokens,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return result, usage
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# see https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
|
2
|
+
# and https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
|
|
3
|
+
SIMPLE = {
|
|
4
|
+
"options": {
|
|
5
|
+
"stop": [
|
|
6
|
+
"\nUSER:",
|
|
7
|
+
"\nASSISTANT:",
|
|
8
|
+
"\nFUNCTION RETURN:",
|
|
9
|
+
"\nUSER",
|
|
10
|
+
"\nASSISTANT",
|
|
11
|
+
"\nFUNCTION RETURN",
|
|
12
|
+
"\nFUNCTION",
|
|
13
|
+
"\nFUNC",
|
|
14
|
+
"<|im_start|>",
|
|
15
|
+
"<|im_end|>",
|
|
16
|
+
"<|im_sep|>",
|
|
17
|
+
# '\n' +
|
|
18
|
+
# '</s>',
|
|
19
|
+
# '<|',
|
|
20
|
+
# '\n#',
|
|
21
|
+
# '\n\n\n',
|
|
22
|
+
],
|
|
23
|
+
# "num_ctx": LLM_MAX_TOKENS,
|
|
24
|
+
},
|
|
25
|
+
"stream": False,
|
|
26
|
+
# turn off Ollama's own prompt formatting
|
|
27
|
+
"system": "",
|
|
28
|
+
"template": "{{ .Prompt }}",
|
|
29
|
+
# "system": None,
|
|
30
|
+
# "template": None,
|
|
31
|
+
"context": None,
|
|
32
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from letta.local_llm.settings.simple import settings as simple_settings
|
|
2
|
+
|
|
3
|
+
settings = {
|
|
4
|
+
"max_new_tokens": 250,
|
|
5
|
+
"do_sample": False,
|
|
6
|
+
"temperature": 0,
|
|
7
|
+
"top_p": 0,
|
|
8
|
+
"typical_p": 1,
|
|
9
|
+
"repetition_penalty": 1.18,
|
|
10
|
+
"repetition_penalty_range": 0,
|
|
11
|
+
"encoder_repetition_penalty": 1,
|
|
12
|
+
"top_k": 1,
|
|
13
|
+
"min_length": 0,
|
|
14
|
+
"no_repeat_ngram_size": 0,
|
|
15
|
+
"num_beams": 1,
|
|
16
|
+
"penalty_alpha": 0,
|
|
17
|
+
"length_penalty": 1,
|
|
18
|
+
"early_stopping": False,
|
|
19
|
+
"guidance_scale": 1,
|
|
20
|
+
"negative_prompt": "",
|
|
21
|
+
"seed": -1,
|
|
22
|
+
"add_bos_token": True,
|
|
23
|
+
# NOTE: important - these are the BASE stopping strings, and should be combined with {{user}}/{{char}}-based stopping strings
|
|
24
|
+
"stopping_strings": [
|
|
25
|
+
simple_settings["stop"]
|
|
26
|
+
# '### Response (JSON only, engaging, natural, authentic, descriptive, creative):',
|
|
27
|
+
# "</s>",
|
|
28
|
+
# "<|",
|
|
29
|
+
# "\n#",
|
|
30
|
+
# "\n*{{user}} ",
|
|
31
|
+
# "\n\n\n",
|
|
32
|
+
# "\n{",
|
|
33
|
+
# ",\n{",
|
|
34
|
+
],
|
|
35
|
+
"truncation_length": 4096,
|
|
36
|
+
"ban_eos_token": False,
|
|
37
|
+
"skip_special_tokens": True,
|
|
38
|
+
"top_a": 0,
|
|
39
|
+
"tfs": 1,
|
|
40
|
+
"epsilon_cutoff": 0,
|
|
41
|
+
"eta_cutoff": 0,
|
|
42
|
+
"mirostat_mode": 2,
|
|
43
|
+
"mirostat_tau": 4,
|
|
44
|
+
"mirostat_eta": 0.1,
|
|
45
|
+
}
|