letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +24 -0
- letta/__main__.py +3 -0
- letta/agent.py +1427 -0
- letta/agent_store/chroma.py +295 -0
- letta/agent_store/db.py +546 -0
- letta/agent_store/lancedb.py +177 -0
- letta/agent_store/milvus.py +198 -0
- letta/agent_store/qdrant.py +201 -0
- letta/agent_store/storage.py +188 -0
- letta/benchmark/benchmark.py +96 -0
- letta/benchmark/constants.py +14 -0
- letta/cli/cli.py +689 -0
- letta/cli/cli_config.py +1282 -0
- letta/cli/cli_load.py +166 -0
- letta/client/__init__.py +0 -0
- letta/client/admin.py +171 -0
- letta/client/client.py +2360 -0
- letta/client/streaming.py +90 -0
- letta/client/utils.py +61 -0
- letta/config.py +484 -0
- letta/configs/anthropic.json +13 -0
- letta/configs/letta_hosted.json +11 -0
- letta/configs/openai.json +12 -0
- letta/constants.py +134 -0
- letta/credentials.py +140 -0
- letta/data_sources/connectors.py +247 -0
- letta/embeddings.py +218 -0
- letta/errors.py +26 -0
- letta/functions/__init__.py +0 -0
- letta/functions/function_sets/base.py +174 -0
- letta/functions/function_sets/extras.py +132 -0
- letta/functions/functions.py +105 -0
- letta/functions/schema_generator.py +205 -0
- letta/humans/__init__.py +0 -0
- letta/humans/examples/basic.txt +1 -0
- letta/humans/examples/cs_phd.txt +9 -0
- letta/interface.py +314 -0
- letta/llm_api/__init__.py +0 -0
- letta/llm_api/anthropic.py +383 -0
- letta/llm_api/azure_openai.py +155 -0
- letta/llm_api/cohere.py +396 -0
- letta/llm_api/google_ai.py +468 -0
- letta/llm_api/llm_api_tools.py +485 -0
- letta/llm_api/openai.py +470 -0
- letta/local_llm/README.md +3 -0
- letta/local_llm/__init__.py +0 -0
- letta/local_llm/chat_completion_proxy.py +279 -0
- letta/local_llm/constants.py +31 -0
- letta/local_llm/function_parser.py +68 -0
- letta/local_llm/grammars/__init__.py +0 -0
- letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
- letta/local_llm/grammars/json.gbnf +26 -0
- letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
- letta/local_llm/groq/api.py +97 -0
- letta/local_llm/json_parser.py +202 -0
- letta/local_llm/koboldcpp/api.py +62 -0
- letta/local_llm/koboldcpp/settings.py +23 -0
- letta/local_llm/llamacpp/api.py +58 -0
- letta/local_llm/llamacpp/settings.py +22 -0
- letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
- letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
- letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
- letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
- letta/local_llm/lmstudio/api.py +100 -0
- letta/local_llm/lmstudio/settings.py +29 -0
- letta/local_llm/ollama/api.py +88 -0
- letta/local_llm/ollama/settings.py +32 -0
- letta/local_llm/settings/__init__.py +0 -0
- letta/local_llm/settings/deterministic_mirostat.py +45 -0
- letta/local_llm/settings/settings.py +72 -0
- letta/local_llm/settings/simple.py +28 -0
- letta/local_llm/utils.py +265 -0
- letta/local_llm/vllm/api.py +63 -0
- letta/local_llm/webui/api.py +60 -0
- letta/local_llm/webui/legacy_api.py +58 -0
- letta/local_llm/webui/legacy_settings.py +23 -0
- letta/local_llm/webui/settings.py +24 -0
- letta/log.py +76 -0
- letta/main.py +437 -0
- letta/memory.py +440 -0
- letta/metadata.py +884 -0
- letta/openai_backcompat/__init__.py +0 -0
- letta/openai_backcompat/openai_object.py +437 -0
- letta/persistence_manager.py +148 -0
- letta/personas/__init__.py +0 -0
- letta/personas/examples/anna_pa.txt +13 -0
- letta/personas/examples/google_search_persona.txt +15 -0
- letta/personas/examples/memgpt_doc.txt +6 -0
- letta/personas/examples/memgpt_starter.txt +4 -0
- letta/personas/examples/sam.txt +14 -0
- letta/personas/examples/sam_pov.txt +14 -0
- letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
- letta/personas/examples/sqldb/test.db +0 -0
- letta/prompts/__init__.py +0 -0
- letta/prompts/gpt_summarize.py +14 -0
- letta/prompts/gpt_system.py +26 -0
- letta/prompts/system/memgpt_base.txt +49 -0
- letta/prompts/system/memgpt_chat.txt +58 -0
- letta/prompts/system/memgpt_chat_compressed.txt +13 -0
- letta/prompts/system/memgpt_chat_fstring.txt +51 -0
- letta/prompts/system/memgpt_doc.txt +50 -0
- letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
- letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
- letta/prompts/system/memgpt_modified_chat.txt +23 -0
- letta/pytest.ini +0 -0
- letta/schemas/agent.py +117 -0
- letta/schemas/api_key.py +21 -0
- letta/schemas/block.py +135 -0
- letta/schemas/document.py +21 -0
- letta/schemas/embedding_config.py +54 -0
- letta/schemas/enums.py +35 -0
- letta/schemas/job.py +38 -0
- letta/schemas/letta_base.py +80 -0
- letta/schemas/letta_message.py +175 -0
- letta/schemas/letta_request.py +23 -0
- letta/schemas/letta_response.py +28 -0
- letta/schemas/llm_config.py +54 -0
- letta/schemas/memory.py +224 -0
- letta/schemas/message.py +727 -0
- letta/schemas/openai/chat_completion_request.py +123 -0
- letta/schemas/openai/chat_completion_response.py +136 -0
- letta/schemas/openai/chat_completions.py +123 -0
- letta/schemas/openai/embedding_response.py +11 -0
- letta/schemas/openai/openai.py +157 -0
- letta/schemas/organization.py +20 -0
- letta/schemas/passage.py +80 -0
- letta/schemas/source.py +62 -0
- letta/schemas/tool.py +143 -0
- letta/schemas/usage.py +18 -0
- letta/schemas/user.py +33 -0
- letta/server/__init__.py +0 -0
- letta/server/constants.py +6 -0
- letta/server/rest_api/__init__.py +0 -0
- letta/server/rest_api/admin/__init__.py +0 -0
- letta/server/rest_api/admin/agents.py +21 -0
- letta/server/rest_api/admin/tools.py +83 -0
- letta/server/rest_api/admin/users.py +98 -0
- letta/server/rest_api/app.py +193 -0
- letta/server/rest_api/auth/__init__.py +0 -0
- letta/server/rest_api/auth/index.py +43 -0
- letta/server/rest_api/auth_token.py +22 -0
- letta/server/rest_api/interface.py +726 -0
- letta/server/rest_api/routers/__init__.py +0 -0
- letta/server/rest_api/routers/openai/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
- letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
- letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
- letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
- letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
- letta/server/rest_api/routers/v1/__init__.py +15 -0
- letta/server/rest_api/routers/v1/agents.py +543 -0
- letta/server/rest_api/routers/v1/blocks.py +73 -0
- letta/server/rest_api/routers/v1/jobs.py +46 -0
- letta/server/rest_api/routers/v1/llms.py +28 -0
- letta/server/rest_api/routers/v1/organizations.py +61 -0
- letta/server/rest_api/routers/v1/sources.py +199 -0
- letta/server/rest_api/routers/v1/tools.py +103 -0
- letta/server/rest_api/routers/v1/users.py +109 -0
- letta/server/rest_api/static_files.py +74 -0
- letta/server/rest_api/utils.py +69 -0
- letta/server/server.py +1995 -0
- letta/server/startup.sh +8 -0
- letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
- letta/server/static_files/assets/index-156816da.css +1 -0
- letta/server/static_files/assets/index-486e3228.js +274 -0
- letta/server/static_files/favicon.ico +0 -0
- letta/server/static_files/index.html +39 -0
- letta/server/static_files/memgpt_logo_transparent.png +0 -0
- letta/server/utils.py +46 -0
- letta/server/ws_api/__init__.py +0 -0
- letta/server/ws_api/example_client.py +104 -0
- letta/server/ws_api/interface.py +108 -0
- letta/server/ws_api/protocol.py +100 -0
- letta/server/ws_api/server.py +145 -0
- letta/settings.py +165 -0
- letta/streaming_interface.py +396 -0
- letta/system.py +207 -0
- letta/utils.py +1065 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
- letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# https://github.com/ggerganov/llama.cpp/blob/master/grammars/json.gbnf
|
|
2
|
+
root ::= object
|
|
3
|
+
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
|
4
|
+
|
|
5
|
+
object ::=
|
|
6
|
+
"{" ws (
|
|
7
|
+
string ":" ws value
|
|
8
|
+
("," ws string ":" ws value)*
|
|
9
|
+
)? "}" ws
|
|
10
|
+
|
|
11
|
+
array ::=
|
|
12
|
+
"[" ws (
|
|
13
|
+
value
|
|
14
|
+
("," ws value)*
|
|
15
|
+
)? "]" ws
|
|
16
|
+
|
|
17
|
+
string ::=
|
|
18
|
+
"\"" (
|
|
19
|
+
[^"\\] |
|
|
20
|
+
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
|
21
|
+
)* "\"" ws
|
|
22
|
+
|
|
23
|
+
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
|
24
|
+
|
|
25
|
+
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
|
26
|
+
ws ::= ([ \t\n] ws)?
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
root ::= Function
|
|
2
|
+
Function ::= SendMessage | PauseHeartbeats | CoreMemoryAppend | CoreMemoryReplace | ConversationSearch | ConversationSearchDate | ArchivalMemoryInsert | ArchivalMemorySearch
|
|
3
|
+
SendMessage ::= "{" ws "\"function\":" ws "\"send_message\"," ws "\"params\":" ws SendMessageParams "}"
|
|
4
|
+
PauseHeartbeats ::= "{" ws "\"function\":" ws "\"pause_heartbeats\"," ws "\"params\":" ws PauseHeartbeatsParams "}"
|
|
5
|
+
CoreMemoryAppend ::= "{" ws "\"function\":" ws "\"core_memory_append\"," ws "\"params\":" ws CoreMemoryAppendParams "}"
|
|
6
|
+
CoreMemoryReplace ::= "{" ws "\"function\":" ws "\"core_memory_replace\"," ws "\"params\":" ws CoreMemoryReplaceParams "}"
|
|
7
|
+
ConversationSearch ::= "{" ws "\"function\":" ws "\"conversation_search\"," ws "\"params\":" ws ConversationSearchParams "}"
|
|
8
|
+
ConversationSearchDate ::= "{" ws "\"function\":" ws "\"conversation_search_date\"," ws "\"params\":" ws ConversationSearchDateParams "}"
|
|
9
|
+
ArchivalMemoryInsert ::= "{" ws "\"function\":" ws "\"archival_memory_insert\"," ws "\"params\":" ws ArchivalMemoryInsertParams "}"
|
|
10
|
+
ArchivalMemorySearch ::= "{" ws "\"function\":" ws "\"archival_memory_search\"," ws "\"params\":" ws ArchivalMemorySearchParams "}"
|
|
11
|
+
SendMessageParams ::= "{" ws InnerThoughtsParam "," ws "\"message\":" ws string ws "}"
|
|
12
|
+
PauseHeartbeatsParams ::= "{" ws InnerThoughtsParam "," ws "\"minutes\":" ws number ws "}"
|
|
13
|
+
CoreMemoryAppendParams ::= "{" ws InnerThoughtsParam "," ws "\"name\":" ws namestring "," ws "\"content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
|
|
14
|
+
CoreMemoryReplaceParams ::= "{" ws InnerThoughtsParam "," ws "\"name\":" ws namestring "," ws "\"old_content\":" ws string "," ws "\"new_content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
|
|
15
|
+
ConversationSearchParams ::= "{" ws InnerThoughtsParam "," ws "\"query\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
|
|
16
|
+
ConversationSearchDateParams ::= "{" ws InnerThoughtsParam "," ws "\"start_date\":" ws string ws "," ws "\"end_date\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
|
|
17
|
+
ArchivalMemoryInsertParams ::= "{" ws InnerThoughtsParam "," ws "\"content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
|
|
18
|
+
ArchivalMemorySearchParams ::= "{" ws InnerThoughtsParam "," ws "\"query\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
|
|
19
|
+
InnerThoughtsParam ::= "\"inner_thoughts\":" ws string
|
|
20
|
+
RequestHeartbeatParam ::= "\"request_heartbeat\":" ws boolean
|
|
21
|
+
namestring ::= "\"human\"" | "\"persona\""
|
|
22
|
+
boolean ::= "true" | "false"
|
|
23
|
+
number ::= [0-9]+
|
|
24
|
+
|
|
25
|
+
string ::=
|
|
26
|
+
"\"" (
|
|
27
|
+
[^"\\] |
|
|
28
|
+
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
|
29
|
+
)* "\"" ws
|
|
30
|
+
|
|
31
|
+
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
|
32
|
+
ws ::= ([ \t\n] ws)?
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
from urllib.parse import urljoin
|
|
3
|
+
|
|
4
|
+
from letta.local_llm.settings.settings import get_completions_settings
|
|
5
|
+
from letta.local_llm.utils import post_json_auth_request
|
|
6
|
+
from letta.utils import count_tokens
|
|
7
|
+
|
|
8
|
+
API_CHAT_SUFFIX = "/v1/chat/completions"
|
|
9
|
+
# LMSTUDIO_API_COMPLETIONS_SUFFIX = "/v1/completions"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_groq_completion(endpoint: str, auth_type: str, auth_key: str, model: str, prompt: str, context_window: int) -> Tuple[str, dict]:
|
|
13
|
+
"""TODO no support for function calling OR raw completions, so we need to route the request into /chat/completions instead"""
|
|
14
|
+
from letta.utils import printd
|
|
15
|
+
|
|
16
|
+
prompt_tokens = count_tokens(prompt)
|
|
17
|
+
if prompt_tokens > context_window:
|
|
18
|
+
raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
|
|
19
|
+
|
|
20
|
+
settings = get_completions_settings()
|
|
21
|
+
settings.update(
|
|
22
|
+
{
|
|
23
|
+
# see https://console.groq.com/docs/text-chat, supports:
|
|
24
|
+
# "temperature": ,
|
|
25
|
+
# "max_tokens": ,
|
|
26
|
+
# "top_p",
|
|
27
|
+
# "stream",
|
|
28
|
+
# "stop",
|
|
29
|
+
# Groq only allows 4 stop tokens
|
|
30
|
+
"stop": [
|
|
31
|
+
"\nUSER",
|
|
32
|
+
"\nASSISTANT",
|
|
33
|
+
"\nFUNCTION",
|
|
34
|
+
# "\nFUNCTION RETURN",
|
|
35
|
+
# "<|im_start|>",
|
|
36
|
+
# "<|im_end|>",
|
|
37
|
+
# "<|im_sep|>",
|
|
38
|
+
# # airoboros specific
|
|
39
|
+
# "\n### ",
|
|
40
|
+
# # '\n' +
|
|
41
|
+
# # '</s>',
|
|
42
|
+
# # '<|',
|
|
43
|
+
# "\n#",
|
|
44
|
+
# # "\n\n\n",
|
|
45
|
+
# # prevent chaining function calls / multi json objects / run-on generations
|
|
46
|
+
# # NOTE: this requires the ability to patch the extra '}}' back into the prompt
|
|
47
|
+
" }\n}\n",
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
URI = urljoin(endpoint.strip("/") + "/", API_CHAT_SUFFIX.strip("/"))
|
|
53
|
+
|
|
54
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
55
|
+
request = settings
|
|
56
|
+
request["model"] = model
|
|
57
|
+
request["max_tokens"] = context_window
|
|
58
|
+
# NOTE: Hack for chat/completion-only endpoints: put the entire completion string inside the first message
|
|
59
|
+
message_structure = [{"role": "user", "content": prompt}]
|
|
60
|
+
request["messages"] = message_structure
|
|
61
|
+
|
|
62
|
+
if not endpoint.startswith(("http://", "https://")):
|
|
63
|
+
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
|
67
|
+
if response.status_code == 200:
|
|
68
|
+
result_full = response.json()
|
|
69
|
+
printd(f"JSON API response:\n{result_full}")
|
|
70
|
+
result = result_full["choices"][0]["message"]["content"]
|
|
71
|
+
usage = result_full.get("usage", None)
|
|
72
|
+
else:
|
|
73
|
+
# Example error: msg={"error":"Context length exceeded. Tokens in context: 8000, Context length: 8000"}
|
|
74
|
+
if "context length" in str(response.text).lower():
|
|
75
|
+
# "exceeds context length" is what appears in the LM Studio error message
|
|
76
|
+
# raise an alternate exception that matches OpenAI's message, which is "maximum context length"
|
|
77
|
+
raise Exception(f"Request exceeds maximum context length (code={response.status_code}, msg={response.text}, URI={URI})")
|
|
78
|
+
else:
|
|
79
|
+
raise Exception(
|
|
80
|
+
f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
|
|
81
|
+
+ f" Make sure that the inference server is running and reachable at {URI}."
|
|
82
|
+
)
|
|
83
|
+
except:
|
|
84
|
+
# TODO handle gracefully
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
# Pass usage statistics back to main thread
|
|
88
|
+
# These are used to compute memory warning messages
|
|
89
|
+
completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
|
|
90
|
+
total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
|
|
91
|
+
usage = {
|
|
92
|
+
"prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
|
|
93
|
+
"completion_tokens": completion_tokens,
|
|
94
|
+
"total_tokens": total_tokens,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return result, usage
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from letta.errors import LLMJSONParsingError
|
|
5
|
+
from letta.utils import json_loads
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def clean_json_string_extra_backslash(s):
|
|
9
|
+
"""Clean extra backslashes out from stringified JSON
|
|
10
|
+
|
|
11
|
+
NOTE: Google AI Gemini API likes to include these
|
|
12
|
+
"""
|
|
13
|
+
# Strip slashes that are used to escape single quotes and other backslashes
|
|
14
|
+
# Use json.loads to parse it correctly
|
|
15
|
+
while "\\\\" in s:
|
|
16
|
+
s = s.replace("\\\\", "\\")
|
|
17
|
+
return s
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def replace_escaped_underscores(string: str):
|
|
21
|
+
r"""Handles the case of escaped underscores, e.g.:
|
|
22
|
+
|
|
23
|
+
{
|
|
24
|
+
"function":"send\_message",
|
|
25
|
+
"params": {
|
|
26
|
+
"inner\_thoughts": "User is asking for information about themselves. Retrieving data from core memory.",
|
|
27
|
+
"message": "I know that you are Chad. Is there something specific you would like to know or talk about regarding yourself?"
|
|
28
|
+
"""
|
|
29
|
+
return string.replace(r"\_", "_")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_first_json(string: str):
|
|
33
|
+
"""Handles the case of two JSON objects back-to-back"""
|
|
34
|
+
from letta.utils import printd
|
|
35
|
+
|
|
36
|
+
depth = 0
|
|
37
|
+
start_index = None
|
|
38
|
+
|
|
39
|
+
for i, char in enumerate(string):
|
|
40
|
+
if char == "{":
|
|
41
|
+
if depth == 0:
|
|
42
|
+
start_index = i
|
|
43
|
+
depth += 1
|
|
44
|
+
elif char == "}":
|
|
45
|
+
depth -= 1
|
|
46
|
+
if depth == 0 and start_index is not None:
|
|
47
|
+
try:
|
|
48
|
+
return json_loads(string[start_index : i + 1])
|
|
49
|
+
except json.JSONDecodeError as e:
|
|
50
|
+
raise LLMJSONParsingError(f"Matched closing bracket, but decode failed with error: {str(e)}")
|
|
51
|
+
printd("No valid JSON object found.")
|
|
52
|
+
raise LLMJSONParsingError("Couldn't find starting bracket")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def add_missing_heartbeat(llm_json):
|
|
56
|
+
"""Manually insert heartbeat requests into messages that should have them
|
|
57
|
+
|
|
58
|
+
Use the following heuristic:
|
|
59
|
+
- if (function call is not send_message && prev message['role'] == user): insert heartbeat
|
|
60
|
+
|
|
61
|
+
Basically, if Letta is calling a function (not send_message) immediately after the user sending a message,
|
|
62
|
+
it probably is a retriever or insertion call, in which case we likely want to eventually reply with send_message
|
|
63
|
+
|
|
64
|
+
"message" = {
|
|
65
|
+
"role": "assistant",
|
|
66
|
+
"content": ...,
|
|
67
|
+
"function_call": {
|
|
68
|
+
"name": ...
|
|
69
|
+
"arguments": {
|
|
70
|
+
"arg1": val1,
|
|
71
|
+
...
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
"""
|
|
76
|
+
raise NotImplementedError
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def clean_and_interpret_send_message_json(json_string):
|
|
80
|
+
# If normal parsing fails, attempt to clean and extract manually
|
|
81
|
+
cleaned_json_string = re.sub(r"[^\x00-\x7F]+", "", json_string) # Remove non-ASCII characters
|
|
82
|
+
function_match = re.search(r'"function":\s*"send_message"', cleaned_json_string)
|
|
83
|
+
inner_thoughts_match = re.search(r'"inner_thoughts":\s*"([^"]+)"', cleaned_json_string)
|
|
84
|
+
message_match = re.search(r'"message":\s*"([^"]+)"', cleaned_json_string)
|
|
85
|
+
|
|
86
|
+
if function_match and inner_thoughts_match and message_match:
|
|
87
|
+
return {
|
|
88
|
+
"function": "send_message",
|
|
89
|
+
"params": {
|
|
90
|
+
"inner_thoughts": inner_thoughts_match.group(1),
|
|
91
|
+
"message": message_match.group(1),
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
else:
|
|
95
|
+
raise LLMJSONParsingError(f"Couldn't manually extract send_message pattern from:\n{json_string}")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def repair_json_string(json_string):
|
|
99
|
+
"""
|
|
100
|
+
This function repairs a JSON string where line feeds were accidentally added
|
|
101
|
+
within string literals. The line feeds are replaced with the escaped line
|
|
102
|
+
feed sequence '\\n'.
|
|
103
|
+
"""
|
|
104
|
+
new_string = ""
|
|
105
|
+
in_string = False
|
|
106
|
+
escape = False
|
|
107
|
+
|
|
108
|
+
for char in json_string:
|
|
109
|
+
if char == '"' and not escape:
|
|
110
|
+
in_string = not in_string
|
|
111
|
+
if char == "\\" and not escape:
|
|
112
|
+
escape = True
|
|
113
|
+
else:
|
|
114
|
+
escape = False
|
|
115
|
+
if char == "\n" and in_string:
|
|
116
|
+
new_string += "\\n"
|
|
117
|
+
else:
|
|
118
|
+
new_string += char
|
|
119
|
+
|
|
120
|
+
return new_string
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def repair_even_worse_json(json_string):
|
|
124
|
+
"""
|
|
125
|
+
This function repairs a malformed JSON string where string literals are broken up and
|
|
126
|
+
not properly enclosed in quotes. It aims to consolidate everything between 'message': and
|
|
127
|
+
the two ending curly braces into one string for the 'message' field.
|
|
128
|
+
"""
|
|
129
|
+
# State flags
|
|
130
|
+
in_message = False
|
|
131
|
+
in_string = False
|
|
132
|
+
escape = False
|
|
133
|
+
message_content = []
|
|
134
|
+
|
|
135
|
+
# Storage for the new JSON
|
|
136
|
+
new_json_parts = []
|
|
137
|
+
|
|
138
|
+
# Iterating through each character
|
|
139
|
+
for char in json_string:
|
|
140
|
+
if char == '"' and not escape:
|
|
141
|
+
in_string = not in_string
|
|
142
|
+
if not in_message:
|
|
143
|
+
# If we encounter a quote and are not in message, append normally
|
|
144
|
+
new_json_parts.append(char)
|
|
145
|
+
elif char == "\\" and not escape:
|
|
146
|
+
escape = True
|
|
147
|
+
new_json_parts.append(char)
|
|
148
|
+
else:
|
|
149
|
+
if escape:
|
|
150
|
+
escape = False
|
|
151
|
+
if in_message:
|
|
152
|
+
if char == "}":
|
|
153
|
+
# Append the consolidated message and the closing characters then reset the flag
|
|
154
|
+
new_json_parts.append('"{}"'.format("".join(message_content).replace("\n", " ")))
|
|
155
|
+
new_json_parts.append(char)
|
|
156
|
+
in_message = False
|
|
157
|
+
elif in_string or char.isalnum() or char.isspace() or char in ".',;:!":
|
|
158
|
+
# Collect the message content, excluding structural characters
|
|
159
|
+
message_content.append(char)
|
|
160
|
+
else:
|
|
161
|
+
# If we're not in message mode, append character to the output as is
|
|
162
|
+
new_json_parts.append(char)
|
|
163
|
+
if '"message":' in "".join(new_json_parts[-10:]):
|
|
164
|
+
# If we detect "message": pattern, switch to message mode
|
|
165
|
+
in_message = True
|
|
166
|
+
message_content = []
|
|
167
|
+
|
|
168
|
+
# Joining everything to form the new JSON
|
|
169
|
+
repaired_json = "".join(new_json_parts)
|
|
170
|
+
return repaired_json
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def clean_json(raw_llm_output, messages=None, functions=None):
|
|
174
|
+
from letta.utils import printd
|
|
175
|
+
|
|
176
|
+
strategies = [
|
|
177
|
+
lambda output: json_loads(output),
|
|
178
|
+
lambda output: json_loads(output + "}"),
|
|
179
|
+
lambda output: json_loads(output + "}}"),
|
|
180
|
+
lambda output: json_loads(output + '"}}'),
|
|
181
|
+
# with strip and strip comma
|
|
182
|
+
lambda output: json_loads(output.strip().rstrip(",") + "}"),
|
|
183
|
+
lambda output: json_loads(output.strip().rstrip(",") + "}}"),
|
|
184
|
+
lambda output: json_loads(output.strip().rstrip(",") + '"}}'),
|
|
185
|
+
# more complex patchers
|
|
186
|
+
lambda output: json_loads(repair_json_string(output)),
|
|
187
|
+
lambda output: json_loads(repair_even_worse_json(output)),
|
|
188
|
+
lambda output: extract_first_json(output + "}}"),
|
|
189
|
+
lambda output: clean_and_interpret_send_message_json(output),
|
|
190
|
+
# replace underscores
|
|
191
|
+
lambda output: json_loads(replace_escaped_underscores(output)),
|
|
192
|
+
lambda output: extract_first_json(replace_escaped_underscores(output) + "}}"),
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
for strategy in strategies:
|
|
196
|
+
try:
|
|
197
|
+
printd(f"Trying strategy: {strategy.__name__}")
|
|
198
|
+
return strategy(raw_llm_output)
|
|
199
|
+
except (json.JSONDecodeError, LLMJSONParsingError) as e:
|
|
200
|
+
printd(f"Strategy {strategy.__name__} failed with error: {e}")
|
|
201
|
+
|
|
202
|
+
raise LLMJSONParsingError(f"Failed to decode valid Letta JSON from LLM output:\n=====\n{raw_llm_output}\n=====")
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from urllib.parse import urljoin
|
|
2
|
+
|
|
3
|
+
from letta.local_llm.settings.settings import get_completions_settings
|
|
4
|
+
from letta.local_llm.utils import count_tokens, post_json_auth_request
|
|
5
|
+
|
|
6
|
+
KOBOLDCPP_API_SUFFIX = "/api/v1/generate"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_koboldcpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
|
|
10
|
+
"""See https://lite.koboldai.net/koboldcpp_api for API spec"""
|
|
11
|
+
from letta.utils import printd
|
|
12
|
+
|
|
13
|
+
prompt_tokens = count_tokens(prompt)
|
|
14
|
+
if prompt_tokens > context_window:
|
|
15
|
+
raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
|
|
16
|
+
|
|
17
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
18
|
+
settings = get_completions_settings()
|
|
19
|
+
request = settings
|
|
20
|
+
request["prompt"] = prompt
|
|
21
|
+
request["max_context_length"] = context_window
|
|
22
|
+
request["max_length"] = 400 # if we don't set this, it'll default to 100 which is quite short
|
|
23
|
+
|
|
24
|
+
# Set grammar
|
|
25
|
+
if grammar is not None:
|
|
26
|
+
request["grammar"] = grammar
|
|
27
|
+
|
|
28
|
+
if not endpoint.startswith(("http://", "https://")):
|
|
29
|
+
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
# NOTE: llama.cpp server returns the following when it's out of context
|
|
33
|
+
# curl: (52) Empty reply from server
|
|
34
|
+
URI = urljoin(endpoint.strip("/") + "/", KOBOLDCPP_API_SUFFIX.strip("/"))
|
|
35
|
+
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
|
36
|
+
if response.status_code == 200:
|
|
37
|
+
result_full = response.json()
|
|
38
|
+
printd(f"JSON API response:\n{result_full}")
|
|
39
|
+
result = result_full["results"][0]["text"]
|
|
40
|
+
else:
|
|
41
|
+
raise Exception(
|
|
42
|
+
f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
|
|
43
|
+
+ f" Make sure that the koboldcpp server is running and reachable at {URI}."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
except:
|
|
47
|
+
# TODO handle gracefully
|
|
48
|
+
raise
|
|
49
|
+
|
|
50
|
+
# Pass usage statistics back to main thread
|
|
51
|
+
# These are used to compute memory warning messages
|
|
52
|
+
# KoboldCpp doesn't return anything?
|
|
53
|
+
# https://lite.koboldai.net/koboldcpp_api#/v1/post_v1_generate
|
|
54
|
+
completion_tokens = None
|
|
55
|
+
total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
|
|
56
|
+
usage = {
|
|
57
|
+
"prompt_tokens": prompt_tokens,
|
|
58
|
+
"completion_tokens": completion_tokens,
|
|
59
|
+
"total_tokens": total_tokens,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result, usage
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# see https://lite.koboldai.net/koboldcpp_api#/v1/post_v1_generate
|
|
2
|
+
SIMPLE = {
|
|
3
|
+
"stop_sequence": [
|
|
4
|
+
"\nUSER:",
|
|
5
|
+
"\nASSISTANT:",
|
|
6
|
+
"\nFUNCTION RETURN:",
|
|
7
|
+
"\nUSER",
|
|
8
|
+
"\nASSISTANT",
|
|
9
|
+
"\nFUNCTION RETURN",
|
|
10
|
+
"\nFUNCTION",
|
|
11
|
+
"\nFUNC",
|
|
12
|
+
"<|im_start|>",
|
|
13
|
+
"<|im_end|>",
|
|
14
|
+
"<|im_sep|>",
|
|
15
|
+
# '\n' +
|
|
16
|
+
# '</s>',
|
|
17
|
+
# '<|',
|
|
18
|
+
# '\n#',
|
|
19
|
+
# '\n\n\n',
|
|
20
|
+
],
|
|
21
|
+
# "max_context_length": LLM_MAX_TOKENS,
|
|
22
|
+
"max_length": 512,
|
|
23
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from urllib.parse import urljoin
|
|
2
|
+
|
|
3
|
+
from letta.local_llm.settings.settings import get_completions_settings
|
|
4
|
+
from letta.local_llm.utils import count_tokens, post_json_auth_request
|
|
5
|
+
|
|
6
|
+
LLAMACPP_API_SUFFIX = "/completion"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_llamacpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
|
|
10
|
+
"""See https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md for instructions on how to run the LLM web server"""
|
|
11
|
+
from letta.utils import printd
|
|
12
|
+
|
|
13
|
+
prompt_tokens = count_tokens(prompt)
|
|
14
|
+
if prompt_tokens > context_window:
|
|
15
|
+
raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
|
|
16
|
+
|
|
17
|
+
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
|
18
|
+
settings = get_completions_settings()
|
|
19
|
+
request = settings
|
|
20
|
+
request["prompt"] = prompt
|
|
21
|
+
|
|
22
|
+
# Set grammar
|
|
23
|
+
if grammar is not None:
|
|
24
|
+
request["grammar"] = grammar
|
|
25
|
+
|
|
26
|
+
if not endpoint.startswith(("http://", "https://")):
|
|
27
|
+
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# NOTE: llama.cpp server returns the following when it's out of context
|
|
31
|
+
# curl: (52) Empty reply from server
|
|
32
|
+
URI = urljoin(endpoint.strip("/") + "/", LLAMACPP_API_SUFFIX.strip("/"))
|
|
33
|
+
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
|
34
|
+
if response.status_code == 200:
|
|
35
|
+
result_full = response.json()
|
|
36
|
+
printd(f"JSON API response:\n{result_full}")
|
|
37
|
+
result = result_full["content"]
|
|
38
|
+
else:
|
|
39
|
+
raise Exception(
|
|
40
|
+
f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
|
|
41
|
+
+ f" Make sure that the llama.cpp server is running and reachable at {URI}."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
except:
|
|
45
|
+
# TODO handle gracefully
|
|
46
|
+
raise
|
|
47
|
+
|
|
48
|
+
# Pass usage statistics back to main thread
|
|
49
|
+
# These are used to compute memory warning messages
|
|
50
|
+
completion_tokens = result_full.get("tokens_predicted", None)
|
|
51
|
+
total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
|
|
52
|
+
usage = {
|
|
53
|
+
"prompt_tokens": prompt_tokens, # can grab from "tokens_evaluated", but it's usually wrong (set to 0)
|
|
54
|
+
"completion_tokens": completion_tokens,
|
|
55
|
+
"total_tokens": total_tokens,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return result, usage
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# see https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#api-endpoints for options
|
|
2
|
+
SIMPLE = {
|
|
3
|
+
"stop": [
|
|
4
|
+
"\nUSER:",
|
|
5
|
+
"\nASSISTANT:",
|
|
6
|
+
"\nFUNCTION RETURN:",
|
|
7
|
+
"\nUSER",
|
|
8
|
+
"\nASSISTANT",
|
|
9
|
+
"\nFUNCTION RETURN",
|
|
10
|
+
"\nFUNCTION",
|
|
11
|
+
"\nFUNC",
|
|
12
|
+
"<|im_start|>",
|
|
13
|
+
"<|im_end|>",
|
|
14
|
+
"<|im_sep|>",
|
|
15
|
+
# '\n' +
|
|
16
|
+
# '</s>',
|
|
17
|
+
# '<|',
|
|
18
|
+
# '\n#',
|
|
19
|
+
# '\n\n\n',
|
|
20
|
+
],
|
|
21
|
+
# "n_predict": 3072,
|
|
22
|
+
}
|
|
File without changes
|