letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +24 -0
- letta/__main__.py +3 -0
- letta/agent.py +1427 -0
- letta/agent_store/chroma.py +295 -0
- letta/agent_store/db.py +546 -0
- letta/agent_store/lancedb.py +177 -0
- letta/agent_store/milvus.py +198 -0
- letta/agent_store/qdrant.py +201 -0
- letta/agent_store/storage.py +188 -0
- letta/benchmark/benchmark.py +96 -0
- letta/benchmark/constants.py +14 -0
- letta/cli/cli.py +689 -0
- letta/cli/cli_config.py +1282 -0
- letta/cli/cli_load.py +166 -0
- letta/client/__init__.py +0 -0
- letta/client/admin.py +171 -0
- letta/client/client.py +2360 -0
- letta/client/streaming.py +90 -0
- letta/client/utils.py +61 -0
- letta/config.py +484 -0
- letta/configs/anthropic.json +13 -0
- letta/configs/letta_hosted.json +11 -0
- letta/configs/openai.json +12 -0
- letta/constants.py +134 -0
- letta/credentials.py +140 -0
- letta/data_sources/connectors.py +247 -0
- letta/embeddings.py +218 -0
- letta/errors.py +26 -0
- letta/functions/__init__.py +0 -0
- letta/functions/function_sets/base.py +174 -0
- letta/functions/function_sets/extras.py +132 -0
- letta/functions/functions.py +105 -0
- letta/functions/schema_generator.py +205 -0
- letta/humans/__init__.py +0 -0
- letta/humans/examples/basic.txt +1 -0
- letta/humans/examples/cs_phd.txt +9 -0
- letta/interface.py +314 -0
- letta/llm_api/__init__.py +0 -0
- letta/llm_api/anthropic.py +383 -0
- letta/llm_api/azure_openai.py +155 -0
- letta/llm_api/cohere.py +396 -0
- letta/llm_api/google_ai.py +468 -0
- letta/llm_api/llm_api_tools.py +485 -0
- letta/llm_api/openai.py +470 -0
- letta/local_llm/README.md +3 -0
- letta/local_llm/__init__.py +0 -0
- letta/local_llm/chat_completion_proxy.py +279 -0
- letta/local_llm/constants.py +31 -0
- letta/local_llm/function_parser.py +68 -0
- letta/local_llm/grammars/__init__.py +0 -0
- letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
- letta/local_llm/grammars/json.gbnf +26 -0
- letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
- letta/local_llm/groq/api.py +97 -0
- letta/local_llm/json_parser.py +202 -0
- letta/local_llm/koboldcpp/api.py +62 -0
- letta/local_llm/koboldcpp/settings.py +23 -0
- letta/local_llm/llamacpp/api.py +58 -0
- letta/local_llm/llamacpp/settings.py +22 -0
- letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
- letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
- letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
- letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
- letta/local_llm/lmstudio/api.py +100 -0
- letta/local_llm/lmstudio/settings.py +29 -0
- letta/local_llm/ollama/api.py +88 -0
- letta/local_llm/ollama/settings.py +32 -0
- letta/local_llm/settings/__init__.py +0 -0
- letta/local_llm/settings/deterministic_mirostat.py +45 -0
- letta/local_llm/settings/settings.py +72 -0
- letta/local_llm/settings/simple.py +28 -0
- letta/local_llm/utils.py +265 -0
- letta/local_llm/vllm/api.py +63 -0
- letta/local_llm/webui/api.py +60 -0
- letta/local_llm/webui/legacy_api.py +58 -0
- letta/local_llm/webui/legacy_settings.py +23 -0
- letta/local_llm/webui/settings.py +24 -0
- letta/log.py +76 -0
- letta/main.py +437 -0
- letta/memory.py +440 -0
- letta/metadata.py +884 -0
- letta/openai_backcompat/__init__.py +0 -0
- letta/openai_backcompat/openai_object.py +437 -0
- letta/persistence_manager.py +148 -0
- letta/personas/__init__.py +0 -0
- letta/personas/examples/anna_pa.txt +13 -0
- letta/personas/examples/google_search_persona.txt +15 -0
- letta/personas/examples/memgpt_doc.txt +6 -0
- letta/personas/examples/memgpt_starter.txt +4 -0
- letta/personas/examples/sam.txt +14 -0
- letta/personas/examples/sam_pov.txt +14 -0
- letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
- letta/personas/examples/sqldb/test.db +0 -0
- letta/prompts/__init__.py +0 -0
- letta/prompts/gpt_summarize.py +14 -0
- letta/prompts/gpt_system.py +26 -0
- letta/prompts/system/memgpt_base.txt +49 -0
- letta/prompts/system/memgpt_chat.txt +58 -0
- letta/prompts/system/memgpt_chat_compressed.txt +13 -0
- letta/prompts/system/memgpt_chat_fstring.txt +51 -0
- letta/prompts/system/memgpt_doc.txt +50 -0
- letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
- letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
- letta/prompts/system/memgpt_modified_chat.txt +23 -0
- letta/pytest.ini +0 -0
- letta/schemas/agent.py +117 -0
- letta/schemas/api_key.py +21 -0
- letta/schemas/block.py +135 -0
- letta/schemas/document.py +21 -0
- letta/schemas/embedding_config.py +54 -0
- letta/schemas/enums.py +35 -0
- letta/schemas/job.py +38 -0
- letta/schemas/letta_base.py +80 -0
- letta/schemas/letta_message.py +175 -0
- letta/schemas/letta_request.py +23 -0
- letta/schemas/letta_response.py +28 -0
- letta/schemas/llm_config.py +54 -0
- letta/schemas/memory.py +224 -0
- letta/schemas/message.py +727 -0
- letta/schemas/openai/chat_completion_request.py +123 -0
- letta/schemas/openai/chat_completion_response.py +136 -0
- letta/schemas/openai/chat_completions.py +123 -0
- letta/schemas/openai/embedding_response.py +11 -0
- letta/schemas/openai/openai.py +157 -0
- letta/schemas/organization.py +20 -0
- letta/schemas/passage.py +80 -0
- letta/schemas/source.py +62 -0
- letta/schemas/tool.py +143 -0
- letta/schemas/usage.py +18 -0
- letta/schemas/user.py +33 -0
- letta/server/__init__.py +0 -0
- letta/server/constants.py +6 -0
- letta/server/rest_api/__init__.py +0 -0
- letta/server/rest_api/admin/__init__.py +0 -0
- letta/server/rest_api/admin/agents.py +21 -0
- letta/server/rest_api/admin/tools.py +83 -0
- letta/server/rest_api/admin/users.py +98 -0
- letta/server/rest_api/app.py +193 -0
- letta/server/rest_api/auth/__init__.py +0 -0
- letta/server/rest_api/auth/index.py +43 -0
- letta/server/rest_api/auth_token.py +22 -0
- letta/server/rest_api/interface.py +726 -0
- letta/server/rest_api/routers/__init__.py +0 -0
- letta/server/rest_api/routers/openai/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
- letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
- letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
- letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
- letta/server/rest_api/routers/v1/__init__.py +15 -0
- letta/server/rest_api/routers/v1/agents.py +543 -0
- letta/server/rest_api/routers/v1/blocks.py +73 -0
- letta/server/rest_api/routers/v1/jobs.py +46 -0
- letta/server/rest_api/routers/v1/llms.py +28 -0
- letta/server/rest_api/routers/v1/organizations.py +61 -0
- letta/server/rest_api/routers/v1/sources.py +199 -0
- letta/server/rest_api/routers/v1/tools.py +103 -0
- letta/server/rest_api/routers/v1/users.py +109 -0
- letta/server/rest_api/static_files.py +74 -0
- letta/server/rest_api/utils.py +69 -0
- letta/server/server.py +1995 -0
- letta/server/startup.sh +8 -0
- letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
- letta/server/static_files/assets/index-156816da.css +1 -0
- letta/server/static_files/assets/index-486e3228.js +274 -0
- letta/server/static_files/favicon.ico +0 -0
- letta/server/static_files/index.html +39 -0
- letta/server/static_files/memgpt_logo_transparent.png +0 -0
- letta/server/utils.py +46 -0
- letta/server/ws_api/__init__.py +0 -0
- letta/server/ws_api/example_client.py +104 -0
- letta/server/ws_api/interface.py +108 -0
- letta/server/ws_api/protocol.py +100 -0
- letta/server/ws_api/server.py +145 -0
- letta/settings.py +165 -0
- letta/streaming_interface.py +396 -0
- letta/system.py +207 -0
- letta/utils.py +1065 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from letta.constants import CLI_WARNING_PREFIX
|
|
8
|
+
from letta.errors import LocalLLMConnectionError, LocalLLMError
|
|
9
|
+
from letta.local_llm.constants import DEFAULT_WRAPPER
|
|
10
|
+
from letta.local_llm.function_parser import patch_function
|
|
11
|
+
from letta.local_llm.grammars.gbnf_grammar_generator import (
|
|
12
|
+
create_dynamic_model_from_function,
|
|
13
|
+
generate_gbnf_grammar_and_documentation,
|
|
14
|
+
)
|
|
15
|
+
from letta.local_llm.groq.api import get_groq_completion
|
|
16
|
+
from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
|
|
17
|
+
from letta.local_llm.llamacpp.api import get_llamacpp_completion
|
|
18
|
+
from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
|
|
19
|
+
from letta.local_llm.lmstudio.api import get_lmstudio_completion
|
|
20
|
+
from letta.local_llm.ollama.api import get_ollama_completion
|
|
21
|
+
from letta.local_llm.utils import count_tokens, get_available_wrappers
|
|
22
|
+
from letta.local_llm.vllm.api import get_vllm_completion
|
|
23
|
+
from letta.local_llm.webui.api import get_webui_completion
|
|
24
|
+
from letta.local_llm.webui.legacy_api import (
|
|
25
|
+
get_webui_completion as get_webui_completion_legacy,
|
|
26
|
+
)
|
|
27
|
+
from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
|
|
28
|
+
from letta.schemas.openai.chat_completion_response import (
|
|
29
|
+
ChatCompletionResponse,
|
|
30
|
+
Choice,
|
|
31
|
+
Message,
|
|
32
|
+
ToolCall,
|
|
33
|
+
UsageStatistics,
|
|
34
|
+
)
|
|
35
|
+
from letta.utils import get_tool_call_id, get_utc_time, json_dumps
|
|
36
|
+
|
|
37
|
+
has_shown_warning = False
|
|
38
|
+
grammar_supported_backends = ["koboldcpp", "llamacpp", "webui", "webui-legacy"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_chat_completion(
|
|
42
|
+
model,
|
|
43
|
+
# no model required (except for Ollama), since the model is fixed to whatever you set in your own backend
|
|
44
|
+
messages,
|
|
45
|
+
functions=None,
|
|
46
|
+
functions_python=None,
|
|
47
|
+
function_call="auto",
|
|
48
|
+
context_window=None,
|
|
49
|
+
user=None,
|
|
50
|
+
# required
|
|
51
|
+
wrapper=None,
|
|
52
|
+
endpoint=None,
|
|
53
|
+
endpoint_type=None,
|
|
54
|
+
# optional cleanup
|
|
55
|
+
function_correction=True,
|
|
56
|
+
# extra hints to allow for additional prompt formatting hacks
|
|
57
|
+
# TODO this could alternatively be supported via passing function_call="send_message" into the wrapper
|
|
58
|
+
first_message=False,
|
|
59
|
+
# optional auth headers
|
|
60
|
+
auth_type=None,
|
|
61
|
+
auth_key=None,
|
|
62
|
+
) -> ChatCompletionResponse:
|
|
63
|
+
from letta.utils import printd
|
|
64
|
+
|
|
65
|
+
assert context_window is not None, "Local LLM calls need the context length to be explicitly set"
|
|
66
|
+
assert endpoint is not None, "Local LLM calls need the endpoint (eg http://localendpoint:1234) to be explicitly set"
|
|
67
|
+
assert endpoint_type is not None, "Local LLM calls need the endpoint type (eg webui) to be explicitly set"
|
|
68
|
+
global has_shown_warning
|
|
69
|
+
grammar = None
|
|
70
|
+
|
|
71
|
+
# TODO: eventually just process Message object
|
|
72
|
+
if not isinstance(messages[0], dict):
|
|
73
|
+
messages = [m.to_openai_dict() for m in messages]
|
|
74
|
+
|
|
75
|
+
if function_call is not None and function_call != "auto":
|
|
76
|
+
raise ValueError(f"function_call == {function_call} not supported (auto or None only)")
|
|
77
|
+
|
|
78
|
+
available_wrappers = get_available_wrappers()
|
|
79
|
+
documentation = None
|
|
80
|
+
|
|
81
|
+
# Special case for if the call we're making is coming from the summarizer
|
|
82
|
+
if messages[0]["role"] == "system" and messages[0]["content"].strip() == SUMMARIZE_SYSTEM_MESSAGE.strip():
|
|
83
|
+
llm_wrapper = simple_summary_wrapper.SimpleSummaryWrapper()
|
|
84
|
+
|
|
85
|
+
# Select a default prompt formatter
|
|
86
|
+
elif wrapper is None:
|
|
87
|
+
# Warn the user that we're using the fallback
|
|
88
|
+
if not has_shown_warning:
|
|
89
|
+
print(
|
|
90
|
+
f"{CLI_WARNING_PREFIX}no wrapper specified for local LLM, using the default wrapper (you can remove this warning by specifying the wrapper with --model-wrapper)"
|
|
91
|
+
)
|
|
92
|
+
has_shown_warning = True
|
|
93
|
+
|
|
94
|
+
llm_wrapper = DEFAULT_WRAPPER()
|
|
95
|
+
|
|
96
|
+
# User provided an incorrect prompt formatter
|
|
97
|
+
elif wrapper not in available_wrappers:
|
|
98
|
+
raise ValueError(f"Could not find requested wrapper '{wrapper} in available wrappers list:\n{', '.join(available_wrappers)}")
|
|
99
|
+
|
|
100
|
+
# User provided a correct prompt formatter
|
|
101
|
+
else:
|
|
102
|
+
llm_wrapper = available_wrappers[wrapper]
|
|
103
|
+
|
|
104
|
+
# If the wrapper uses grammar, generate the grammar using the grammar generating function
|
|
105
|
+
# TODO move this to a flag
|
|
106
|
+
if wrapper is not None and "grammar" in wrapper:
|
|
107
|
+
# When using grammars, we don't want to do any extras output tricks like appending a response prefix
|
|
108
|
+
setattr(llm_wrapper, "assistant_prefix_extra_first_message", "")
|
|
109
|
+
setattr(llm_wrapper, "assistant_prefix_extra", "")
|
|
110
|
+
|
|
111
|
+
# TODO find a better way to do this than string matching (eg an attribute)
|
|
112
|
+
if "noforce" in wrapper:
|
|
113
|
+
# "noforce" means that the prompt formatter expects inner thoughts as a top-level parameter
|
|
114
|
+
# this is closer to the OpenAI style since it allows for messages w/o any function calls
|
|
115
|
+
# however, with bad LLMs it makes it easier for the LLM to "forget" to call any of the functions
|
|
116
|
+
grammar, documentation = generate_grammar_and_documentation(
|
|
117
|
+
functions_python=functions_python,
|
|
118
|
+
add_inner_thoughts_top_level=True,
|
|
119
|
+
add_inner_thoughts_param_level=False,
|
|
120
|
+
allow_only_inner_thoughts=True,
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
# otherwise, the other prompt formatters will insert inner thoughts as a function call parameter (by default)
|
|
124
|
+
# this means that every response from the LLM will be required to call a function
|
|
125
|
+
grammar, documentation = generate_grammar_and_documentation(
|
|
126
|
+
functions_python=functions_python,
|
|
127
|
+
add_inner_thoughts_top_level=False,
|
|
128
|
+
add_inner_thoughts_param_level=True,
|
|
129
|
+
allow_only_inner_thoughts=False,
|
|
130
|
+
)
|
|
131
|
+
printd(grammar)
|
|
132
|
+
|
|
133
|
+
if grammar is not None and endpoint_type not in grammar_supported_backends:
|
|
134
|
+
print(
|
|
135
|
+
f"{CLI_WARNING_PREFIX}grammars are currently not supported when using {endpoint_type} as the Letta local LLM backend (supported: {', '.join(grammar_supported_backends)})"
|
|
136
|
+
)
|
|
137
|
+
grammar = None
|
|
138
|
+
|
|
139
|
+
# First step: turn the message sequence into a prompt that the model expects
|
|
140
|
+
try:
|
|
141
|
+
# if hasattr(llm_wrapper, "supports_first_message"):
|
|
142
|
+
if hasattr(llm_wrapper, "supports_first_message") and llm_wrapper.supports_first_message:
|
|
143
|
+
prompt = llm_wrapper.chat_completion_to_prompt(
|
|
144
|
+
messages=messages, functions=functions, first_message=first_message, function_documentation=documentation
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
prompt = llm_wrapper.chat_completion_to_prompt(messages=messages, functions=functions, function_documentation=documentation)
|
|
148
|
+
|
|
149
|
+
printd(prompt)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
print(e)
|
|
152
|
+
raise LocalLLMError(
|
|
153
|
+
f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
if endpoint_type == "webui":
|
|
158
|
+
result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
159
|
+
elif endpoint_type == "webui-legacy":
|
|
160
|
+
result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
161
|
+
elif endpoint_type == "lmstudio":
|
|
162
|
+
result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
|
|
163
|
+
elif endpoint_type == "lmstudio-legacy":
|
|
164
|
+
result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="chat")
|
|
165
|
+
elif endpoint_type == "llamacpp":
|
|
166
|
+
result, usage = get_llamacpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
167
|
+
elif endpoint_type == "koboldcpp":
|
|
168
|
+
result, usage = get_koboldcpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
169
|
+
elif endpoint_type == "ollama":
|
|
170
|
+
result, usage = get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
|
|
171
|
+
elif endpoint_type == "vllm":
|
|
172
|
+
result, usage = get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user)
|
|
173
|
+
elif endpoint_type == "groq":
|
|
174
|
+
result, usage = get_groq_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
|
|
175
|
+
else:
|
|
176
|
+
raise LocalLLMError(
|
|
177
|
+
f"Invalid endpoint type {endpoint_type}, please set variable depending on your backend (webui, lmstudio, llamacpp, koboldcpp)"
|
|
178
|
+
)
|
|
179
|
+
except requests.exceptions.ConnectionError as e:
|
|
180
|
+
raise LocalLLMConnectionError(f"Unable to connect to endpoint {endpoint}")
|
|
181
|
+
|
|
182
|
+
if result is None or result == "":
|
|
183
|
+
raise LocalLLMError(f"Got back an empty response string from {endpoint}")
|
|
184
|
+
printd(f"Raw LLM output:\n====\n{result}\n====")
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
if hasattr(llm_wrapper, "supports_first_message") and llm_wrapper.supports_first_message:
|
|
188
|
+
chat_completion_result = llm_wrapper.output_to_chat_completion_response(result, first_message=first_message)
|
|
189
|
+
else:
|
|
190
|
+
chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
|
|
191
|
+
printd(json_dumps(chat_completion_result, indent=2))
|
|
192
|
+
except Exception as e:
|
|
193
|
+
raise LocalLLMError(f"Failed to parse JSON from local LLM response - error: {str(e)}")
|
|
194
|
+
|
|
195
|
+
# Run through some manual function correction (optional)
|
|
196
|
+
if function_correction:
|
|
197
|
+
chat_completion_result = patch_function(message_history=messages, new_message=chat_completion_result)
|
|
198
|
+
|
|
199
|
+
# Fill in potential missing usage information (used for tracking token use)
|
|
200
|
+
if not ("prompt_tokens" in usage and "completion_tokens" in usage and "total_tokens" in usage):
|
|
201
|
+
raise LocalLLMError(f"usage dict in response was missing fields ({usage})")
|
|
202
|
+
|
|
203
|
+
if usage["prompt_tokens"] is None:
|
|
204
|
+
printd(f"usage dict was missing prompt_tokens, computing on-the-fly...")
|
|
205
|
+
usage["prompt_tokens"] = count_tokens(prompt)
|
|
206
|
+
|
|
207
|
+
# NOTE: we should compute on-the-fly anyways since we might have to correct for errors during JSON parsing
|
|
208
|
+
usage["completion_tokens"] = count_tokens(json_dumps(chat_completion_result))
|
|
209
|
+
"""
|
|
210
|
+
if usage["completion_tokens"] is None:
|
|
211
|
+
printd(f"usage dict was missing completion_tokens, computing on-the-fly...")
|
|
212
|
+
# chat_completion_result is dict with 'role' and 'content'
|
|
213
|
+
# token counter wants a string
|
|
214
|
+
usage["completion_tokens"] = count_tokens(json_dumps(chat_completion_result))
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
# NOTE: this is the token count that matters most
|
|
218
|
+
if usage["total_tokens"] is None:
|
|
219
|
+
printd(f"usage dict was missing total_tokens, computing on-the-fly...")
|
|
220
|
+
usage["total_tokens"] = usage["prompt_tokens"] + usage["completion_tokens"]
|
|
221
|
+
|
|
222
|
+
# unpack with response.choices[0].message.content
|
|
223
|
+
response = ChatCompletionResponse(
|
|
224
|
+
id=str(uuid.uuid4()), # TODO something better?
|
|
225
|
+
choices=[
|
|
226
|
+
Choice(
|
|
227
|
+
finish_reason="stop",
|
|
228
|
+
index=0,
|
|
229
|
+
message=Message(
|
|
230
|
+
role=chat_completion_result["role"],
|
|
231
|
+
content=chat_completion_result["content"],
|
|
232
|
+
tool_calls=(
|
|
233
|
+
[ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
|
|
234
|
+
if "function_call" in chat_completion_result
|
|
235
|
+
else []
|
|
236
|
+
),
|
|
237
|
+
),
|
|
238
|
+
)
|
|
239
|
+
],
|
|
240
|
+
created=get_utc_time(),
|
|
241
|
+
model=model,
|
|
242
|
+
# "This fingerprint represents the backend configuration that the model runs with."
|
|
243
|
+
# system_fingerprint=user if user is not None else "null",
|
|
244
|
+
system_fingerprint=None,
|
|
245
|
+
object="chat.completion",
|
|
246
|
+
usage=UsageStatistics(**usage),
|
|
247
|
+
)
|
|
248
|
+
printd(response)
|
|
249
|
+
return response
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def generate_grammar_and_documentation(
|
|
253
|
+
functions_python: dict,
|
|
254
|
+
add_inner_thoughts_top_level: bool,
|
|
255
|
+
add_inner_thoughts_param_level: bool,
|
|
256
|
+
allow_only_inner_thoughts: bool,
|
|
257
|
+
):
|
|
258
|
+
from letta.utils import printd
|
|
259
|
+
|
|
260
|
+
assert not (
|
|
261
|
+
add_inner_thoughts_top_level and add_inner_thoughts_param_level
|
|
262
|
+
), "Can only place inner thoughts in one location in the grammar generator"
|
|
263
|
+
|
|
264
|
+
grammar_function_models = []
|
|
265
|
+
# create_dynamic_model_from_function will add inner thoughts to the function parameters if add_inner_thoughts is True.
|
|
266
|
+
# generate_gbnf_grammar_and_documentation will add inner thoughts to the outer object of the function parameters if add_inner_thoughts is True.
|
|
267
|
+
for key, func in functions_python.items():
|
|
268
|
+
grammar_function_models.append(create_dynamic_model_from_function(func, add_inner_thoughts=add_inner_thoughts_param_level))
|
|
269
|
+
grammar, documentation = generate_gbnf_grammar_and_documentation(
|
|
270
|
+
grammar_function_models,
|
|
271
|
+
outer_object_name="function",
|
|
272
|
+
outer_object_content="params",
|
|
273
|
+
model_prefix="function",
|
|
274
|
+
fields_prefix="params",
|
|
275
|
+
add_inner_thoughts=add_inner_thoughts_top_level,
|
|
276
|
+
allow_only_inner_thoughts=allow_only_inner_thoughts,
|
|
277
|
+
)
|
|
278
|
+
printd(grammar)
|
|
279
|
+
return grammar, documentation
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# import letta.local_llm.llm_chat_completion_wrappers.airoboros as airoboros
|
|
2
|
+
from letta.local_llm.llm_chat_completion_wrappers.chatml import (
|
|
3
|
+
ChatMLInnerMonologueWrapper,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
DEFAULT_ENDPOINTS = {
|
|
7
|
+
# Local
|
|
8
|
+
"koboldcpp": "http://localhost:5001",
|
|
9
|
+
"llamacpp": "http://localhost:8080",
|
|
10
|
+
"lmstudio": "http://localhost:1234",
|
|
11
|
+
"lmstudio-legacy": "http://localhost:1234",
|
|
12
|
+
"ollama": "http://localhost:11434",
|
|
13
|
+
"webui-legacy": "http://localhost:5000",
|
|
14
|
+
"webui": "http://localhost:5000",
|
|
15
|
+
"vllm": "http://localhost:8000",
|
|
16
|
+
# APIs
|
|
17
|
+
"openai": "https://api.openai.com",
|
|
18
|
+
"anthropic": "https://api.anthropic.com",
|
|
19
|
+
"groq": "https://api.groq.com/openai",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
DEFAULT_OLLAMA_MODEL = "dolphin2.2-mistral:7b-q6_K"
|
|
23
|
+
|
|
24
|
+
# DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper
|
|
25
|
+
# DEFAULT_WRAPPER_NAME = "airoboros-l2-70b-2.1"
|
|
26
|
+
|
|
27
|
+
DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
|
|
28
|
+
DEFAULT_WRAPPER_NAME = "chatml"
|
|
29
|
+
|
|
30
|
+
INNER_THOUGHTS_KWARG = "inner_thoughts"
|
|
31
|
+
INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from letta.utils import json_dumps, json_loads
|
|
5
|
+
|
|
6
|
+
NO_HEARTBEAT_FUNCS = ["send_message", "pause_heartbeats"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def insert_heartbeat(message):
|
|
10
|
+
# message_copy = message.copy()
|
|
11
|
+
message_copy = copy.deepcopy(message)
|
|
12
|
+
|
|
13
|
+
if message_copy.get("function_call"):
|
|
14
|
+
# function_name = message.get("function_call").get("name")
|
|
15
|
+
params = message_copy.get("function_call").get("arguments")
|
|
16
|
+
params = json_loads(params)
|
|
17
|
+
params["request_heartbeat"] = True
|
|
18
|
+
message_copy["function_call"]["arguments"] = json_dumps(params)
|
|
19
|
+
|
|
20
|
+
elif message_copy.get("tool_call"):
|
|
21
|
+
# function_name = message.get("tool_calls")[0].get("function").get("name")
|
|
22
|
+
params = message_copy.get("tool_calls")[0].get("function").get("arguments")
|
|
23
|
+
params = json_loads(params)
|
|
24
|
+
params["request_heartbeat"] = True
|
|
25
|
+
message_copy["tools_calls"][0]["function"]["arguments"] = json_dumps(params)
|
|
26
|
+
|
|
27
|
+
return message_copy
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def heartbeat_correction(message_history, new_message):
|
|
31
|
+
"""Add heartbeats where we think the agent forgot to add them themselves
|
|
32
|
+
|
|
33
|
+
If the last message in the stack is a user message and the new message is an assistant func call, fix the heartbeat
|
|
34
|
+
|
|
35
|
+
See: https://github.com/cpacker/Letta/issues/601
|
|
36
|
+
"""
|
|
37
|
+
if len(message_history) < 1:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
last_message_was_user = False
|
|
41
|
+
if message_history[-1]["role"] == "user":
|
|
42
|
+
try:
|
|
43
|
+
content = json_loads(message_history[-1]["content"])
|
|
44
|
+
except json.JSONDecodeError:
|
|
45
|
+
return None
|
|
46
|
+
# Check if it's a user message or system message
|
|
47
|
+
if content["type"] == "user_message":
|
|
48
|
+
last_message_was_user = True
|
|
49
|
+
|
|
50
|
+
new_message_is_heartbeat_function = False
|
|
51
|
+
if new_message["role"] == "assistant":
|
|
52
|
+
if new_message.get("function_call") or new_message.get("tool_calls"):
|
|
53
|
+
if new_message.get("function_call"):
|
|
54
|
+
function_name = new_message.get("function_call").get("name")
|
|
55
|
+
elif new_message.get("tool_calls"):
|
|
56
|
+
function_name = new_message.get("tool_calls")[0].get("function").get("name")
|
|
57
|
+
if function_name not in NO_HEARTBEAT_FUNCS:
|
|
58
|
+
new_message_is_heartbeat_function = True
|
|
59
|
+
|
|
60
|
+
if last_message_was_user and new_message_is_heartbeat_function:
|
|
61
|
+
return insert_heartbeat(new_message)
|
|
62
|
+
else:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def patch_function(message_history, new_message):
|
|
67
|
+
corrected_output = heartbeat_correction(message_history=message_history, new_message=new_message)
|
|
68
|
+
return corrected_output if corrected_output is not None else new_message
|
|
File without changes
|