letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,26 @@
1
+ # https://github.com/ggerganov/llama.cpp/blob/master/grammars/json.gbnf
2
+ root ::= object
3
+ value ::= object | array | string | number | ("true" | "false" | "null") ws
4
+
5
+ object ::=
6
+ "{" ws (
7
+ string ":" ws value
8
+ ("," ws string ":" ws value)*
9
+ )? "}" ws
10
+
11
+ array ::=
12
+ "[" ws (
13
+ value
14
+ ("," ws value)*
15
+ )? "]" ws
16
+
17
+ string ::=
18
+ "\"" (
19
+ [^"\\] |
20
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
21
+ )* "\"" ws
22
+
23
+ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
24
+
25
+ # Optional space: by convention, applied in this grammar after literal chars when allowed
26
+ ws ::= ([ \t\n] ws)?
@@ -0,0 +1,32 @@
1
+ root ::= Function
2
+ Function ::= SendMessage | PauseHeartbeats | CoreMemoryAppend | CoreMemoryReplace | ConversationSearch | ConversationSearchDate | ArchivalMemoryInsert | ArchivalMemorySearch
3
+ SendMessage ::= "{" ws "\"function\":" ws "\"send_message\"," ws "\"params\":" ws SendMessageParams "}"
4
+ PauseHeartbeats ::= "{" ws "\"function\":" ws "\"pause_heartbeats\"," ws "\"params\":" ws PauseHeartbeatsParams "}"
5
+ CoreMemoryAppend ::= "{" ws "\"function\":" ws "\"core_memory_append\"," ws "\"params\":" ws CoreMemoryAppendParams "}"
6
+ CoreMemoryReplace ::= "{" ws "\"function\":" ws "\"core_memory_replace\"," ws "\"params\":" ws CoreMemoryReplaceParams "}"
7
+ ConversationSearch ::= "{" ws "\"function\":" ws "\"conversation_search\"," ws "\"params\":" ws ConversationSearchParams "}"
8
+ ConversationSearchDate ::= "{" ws "\"function\":" ws "\"conversation_search_date\"," ws "\"params\":" ws ConversationSearchDateParams "}"
9
+ ArchivalMemoryInsert ::= "{" ws "\"function\":" ws "\"archival_memory_insert\"," ws "\"params\":" ws ArchivalMemoryInsertParams "}"
10
+ ArchivalMemorySearch ::= "{" ws "\"function\":" ws "\"archival_memory_search\"," ws "\"params\":" ws ArchivalMemorySearchParams "}"
11
+ SendMessageParams ::= "{" ws InnerThoughtsParam "," ws "\"message\":" ws string ws "}"
12
+ PauseHeartbeatsParams ::= "{" ws InnerThoughtsParam "," ws "\"minutes\":" ws number ws "}"
13
+ CoreMemoryAppendParams ::= "{" ws InnerThoughtsParam "," ws "\"name\":" ws namestring "," ws "\"content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
14
+ CoreMemoryReplaceParams ::= "{" ws InnerThoughtsParam "," ws "\"name\":" ws namestring "," ws "\"old_content\":" ws string "," ws "\"new_content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
15
+ ConversationSearchParams ::= "{" ws InnerThoughtsParam "," ws "\"query\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
16
+ ConversationSearchDateParams ::= "{" ws InnerThoughtsParam "," ws "\"start_date\":" ws string ws "," ws "\"end_date\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
17
+ ArchivalMemoryInsertParams ::= "{" ws InnerThoughtsParam "," ws "\"content\":" ws string ws "," ws RequestHeartbeatParam ws "}"
18
+ ArchivalMemorySearchParams ::= "{" ws InnerThoughtsParam "," ws "\"query\":" ws string ws "," ws "\"page\":" ws number ws "," ws RequestHeartbeatParam ws "}"
19
+ InnerThoughtsParam ::= "\"inner_thoughts\":" ws string
20
+ RequestHeartbeatParam ::= "\"request_heartbeat\":" ws boolean
21
+ namestring ::= "\"human\"" | "\"persona\""
22
+ boolean ::= "true" | "false"
23
+ number ::= [0-9]+
24
+
25
+ string ::=
26
+ "\"" (
27
+ [^"\\] |
28
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
29
+ )* "\"" ws
30
+
31
+ # Optional space: by convention, applied in this grammar after literal chars when allowed
32
+ ws ::= ([ \t\n] ws)?
@@ -0,0 +1,97 @@
1
+ from typing import Tuple
2
+ from urllib.parse import urljoin
3
+
4
+ from letta.local_llm.settings.settings import get_completions_settings
5
+ from letta.local_llm.utils import post_json_auth_request
6
+ from letta.utils import count_tokens
7
+
8
+ API_CHAT_SUFFIX = "/v1/chat/completions"
9
+ # LMSTUDIO_API_COMPLETIONS_SUFFIX = "/v1/completions"
10
+
11
+
12
+ def get_groq_completion(endpoint: str, auth_type: str, auth_key: str, model: str, prompt: str, context_window: int) -> Tuple[str, dict]:
13
+ """TODO no support for function calling OR raw completions, so we need to route the request into /chat/completions instead"""
14
+ from letta.utils import printd
15
+
16
+ prompt_tokens = count_tokens(prompt)
17
+ if prompt_tokens > context_window:
18
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
19
+
20
+ settings = get_completions_settings()
21
+ settings.update(
22
+ {
23
+ # see https://console.groq.com/docs/text-chat, supports:
24
+ # "temperature": ,
25
+ # "max_tokens": ,
26
+ # "top_p",
27
+ # "stream",
28
+ # "stop",
29
+ # Groq only allows 4 stop tokens
30
+ "stop": [
31
+ "\nUSER",
32
+ "\nASSISTANT",
33
+ "\nFUNCTION",
34
+ # "\nFUNCTION RETURN",
35
+ # "<|im_start|>",
36
+ # "<|im_end|>",
37
+ # "<|im_sep|>",
38
+ # # airoboros specific
39
+ # "\n### ",
40
+ # # '\n' +
41
+ # # '</s>',
42
+ # # '<|',
43
+ # "\n#",
44
+ # # "\n\n\n",
45
+ # # prevent chaining function calls / multi json objects / run-on generations
46
+ # # NOTE: this requires the ability to patch the extra '}}' back into the prompt
47
+ " }\n}\n",
48
+ ]
49
+ }
50
+ )
51
+
52
+ URI = urljoin(endpoint.strip("/") + "/", API_CHAT_SUFFIX.strip("/"))
53
+
54
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
55
+ request = settings
56
+ request["model"] = model
57
+ request["max_tokens"] = context_window
58
+ # NOTE: Hack for chat/completion-only endpoints: put the entire completion string inside the first message
59
+ message_structure = [{"role": "user", "content": prompt}]
60
+ request["messages"] = message_structure
61
+
62
+ if not endpoint.startswith(("http://", "https://")):
63
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
64
+
65
+ try:
66
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
67
+ if response.status_code == 200:
68
+ result_full = response.json()
69
+ printd(f"JSON API response:\n{result_full}")
70
+ result = result_full["choices"][0]["message"]["content"]
71
+ usage = result_full.get("usage", None)
72
+ else:
73
+ # Example error: msg={"error":"Context length exceeded. Tokens in context: 8000, Context length: 8000"}
74
+ if "context length" in str(response.text).lower():
75
+ # "exceeds context length" is what appears in the LM Studio error message
76
+ # raise an alternate exception that matches OpenAI's message, which is "maximum context length"
77
+ raise Exception(f"Request exceeds maximum context length (code={response.status_code}, msg={response.text}, URI={URI})")
78
+ else:
79
+ raise Exception(
80
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
81
+ + f" Make sure that the inference server is running and reachable at {URI}."
82
+ )
83
+ except:
84
+ # TODO handle gracefully
85
+ raise
86
+
87
+ # Pass usage statistics back to main thread
88
+ # These are used to compute memory warning messages
89
+ completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
90
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
91
+ usage = {
92
+ "prompt_tokens": prompt_tokens, # can grab from usage dict, but it's usually wrong (set to 0)
93
+ "completion_tokens": completion_tokens,
94
+ "total_tokens": total_tokens,
95
+ }
96
+
97
+ return result, usage
@@ -0,0 +1,202 @@
1
+ import json
2
+ import re
3
+
4
+ from letta.errors import LLMJSONParsingError
5
+ from letta.utils import json_loads
6
+
7
+
8
+ def clean_json_string_extra_backslash(s):
9
+ """Clean extra backslashes out from stringified JSON
10
+
11
+ NOTE: Google AI Gemini API likes to include these
12
+ """
13
+ # Strip slashes that are used to escape single quotes and other backslashes
14
+ # Use json.loads to parse it correctly
15
+ while "\\\\" in s:
16
+ s = s.replace("\\\\", "\\")
17
+ return s
18
+
19
+
20
+ def replace_escaped_underscores(string: str):
21
+ r"""Handles the case of escaped underscores, e.g.:
22
+
23
+ {
24
+ "function":"send\_message",
25
+ "params": {
26
+ "inner\_thoughts": "User is asking for information about themselves. Retrieving data from core memory.",
27
+ "message": "I know that you are Chad. Is there something specific you would like to know or talk about regarding yourself?"
28
+ """
29
+ return string.replace(r"\_", "_")
30
+
31
+
32
+ def extract_first_json(string: str):
33
+ """Handles the case of two JSON objects back-to-back"""
34
+ from letta.utils import printd
35
+
36
+ depth = 0
37
+ start_index = None
38
+
39
+ for i, char in enumerate(string):
40
+ if char == "{":
41
+ if depth == 0:
42
+ start_index = i
43
+ depth += 1
44
+ elif char == "}":
45
+ depth -= 1
46
+ if depth == 0 and start_index is not None:
47
+ try:
48
+ return json_loads(string[start_index : i + 1])
49
+ except json.JSONDecodeError as e:
50
+ raise LLMJSONParsingError(f"Matched closing bracket, but decode failed with error: {str(e)}")
51
+ printd("No valid JSON object found.")
52
+ raise LLMJSONParsingError("Couldn't find starting bracket")
53
+
54
+
55
+ def add_missing_heartbeat(llm_json):
56
+ """Manually insert heartbeat requests into messages that should have them
57
+
58
+ Use the following heuristic:
59
+ - if (function call is not send_message && prev message['role'] == user): insert heartbeat
60
+
61
+ Basically, if Letta is calling a function (not send_message) immediately after the user sending a message,
62
+ it probably is a retriever or insertion call, in which case we likely want to eventually reply with send_message
63
+
64
+ "message" = {
65
+ "role": "assistant",
66
+ "content": ...,
67
+ "function_call": {
68
+ "name": ...
69
+ "arguments": {
70
+ "arg1": val1,
71
+ ...
72
+ }
73
+ }
74
+ }
75
+ """
76
+ raise NotImplementedError
77
+
78
+
79
+ def clean_and_interpret_send_message_json(json_string):
80
+ # If normal parsing fails, attempt to clean and extract manually
81
+ cleaned_json_string = re.sub(r"[^\x00-\x7F]+", "", json_string) # Remove non-ASCII characters
82
+ function_match = re.search(r'"function":\s*"send_message"', cleaned_json_string)
83
+ inner_thoughts_match = re.search(r'"inner_thoughts":\s*"([^"]+)"', cleaned_json_string)
84
+ message_match = re.search(r'"message":\s*"([^"]+)"', cleaned_json_string)
85
+
86
+ if function_match and inner_thoughts_match and message_match:
87
+ return {
88
+ "function": "send_message",
89
+ "params": {
90
+ "inner_thoughts": inner_thoughts_match.group(1),
91
+ "message": message_match.group(1),
92
+ },
93
+ }
94
+ else:
95
+ raise LLMJSONParsingError(f"Couldn't manually extract send_message pattern from:\n{json_string}")
96
+
97
+
98
+ def repair_json_string(json_string):
99
+ """
100
+ This function repairs a JSON string where line feeds were accidentally added
101
+ within string literals. The line feeds are replaced with the escaped line
102
+ feed sequence '\\n'.
103
+ """
104
+ new_string = ""
105
+ in_string = False
106
+ escape = False
107
+
108
+ for char in json_string:
109
+ if char == '"' and not escape:
110
+ in_string = not in_string
111
+ if char == "\\" and not escape:
112
+ escape = True
113
+ else:
114
+ escape = False
115
+ if char == "\n" and in_string:
116
+ new_string += "\\n"
117
+ else:
118
+ new_string += char
119
+
120
+ return new_string
121
+
122
+
123
+ def repair_even_worse_json(json_string):
124
+ """
125
+ This function repairs a malformed JSON string where string literals are broken up and
126
+ not properly enclosed in quotes. It aims to consolidate everything between 'message': and
127
+ the two ending curly braces into one string for the 'message' field.
128
+ """
129
+ # State flags
130
+ in_message = False
131
+ in_string = False
132
+ escape = False
133
+ message_content = []
134
+
135
+ # Storage for the new JSON
136
+ new_json_parts = []
137
+
138
+ # Iterating through each character
139
+ for char in json_string:
140
+ if char == '"' and not escape:
141
+ in_string = not in_string
142
+ if not in_message:
143
+ # If we encounter a quote and are not in message, append normally
144
+ new_json_parts.append(char)
145
+ elif char == "\\" and not escape:
146
+ escape = True
147
+ new_json_parts.append(char)
148
+ else:
149
+ if escape:
150
+ escape = False
151
+ if in_message:
152
+ if char == "}":
153
+ # Append the consolidated message and the closing characters then reset the flag
154
+ new_json_parts.append('"{}"'.format("".join(message_content).replace("\n", " ")))
155
+ new_json_parts.append(char)
156
+ in_message = False
157
+ elif in_string or char.isalnum() or char.isspace() or char in ".',;:!":
158
+ # Collect the message content, excluding structural characters
159
+ message_content.append(char)
160
+ else:
161
+ # If we're not in message mode, append character to the output as is
162
+ new_json_parts.append(char)
163
+ if '"message":' in "".join(new_json_parts[-10:]):
164
+ # If we detect "message": pattern, switch to message mode
165
+ in_message = True
166
+ message_content = []
167
+
168
+ # Joining everything to form the new JSON
169
+ repaired_json = "".join(new_json_parts)
170
+ return repaired_json
171
+
172
+
173
+ def clean_json(raw_llm_output, messages=None, functions=None):
174
+ from letta.utils import printd
175
+
176
+ strategies = [
177
+ lambda output: json_loads(output),
178
+ lambda output: json_loads(output + "}"),
179
+ lambda output: json_loads(output + "}}"),
180
+ lambda output: json_loads(output + '"}}'),
181
+ # with strip and strip comma
182
+ lambda output: json_loads(output.strip().rstrip(",") + "}"),
183
+ lambda output: json_loads(output.strip().rstrip(",") + "}}"),
184
+ lambda output: json_loads(output.strip().rstrip(",") + '"}}'),
185
+ # more complex patchers
186
+ lambda output: json_loads(repair_json_string(output)),
187
+ lambda output: json_loads(repair_even_worse_json(output)),
188
+ lambda output: extract_first_json(output + "}}"),
189
+ lambda output: clean_and_interpret_send_message_json(output),
190
+ # replace underscores
191
+ lambda output: json_loads(replace_escaped_underscores(output)),
192
+ lambda output: extract_first_json(replace_escaped_underscores(output) + "}}"),
193
+ ]
194
+
195
+ for strategy in strategies:
196
+ try:
197
+ printd(f"Trying strategy: {strategy.__name__}")
198
+ return strategy(raw_llm_output)
199
+ except (json.JSONDecodeError, LLMJSONParsingError) as e:
200
+ printd(f"Strategy {strategy.__name__} failed with error: {e}")
201
+
202
+ raise LLMJSONParsingError(f"Failed to decode valid Letta JSON from LLM output:\n=====\n{raw_llm_output}\n=====")
@@ -0,0 +1,62 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import count_tokens, post_json_auth_request
5
+
6
+ KOBOLDCPP_API_SUFFIX = "/api/v1/generate"
7
+
8
+
9
+ def get_koboldcpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
10
+ """See https://lite.koboldai.net/koboldcpp_api for API spec"""
11
+ from letta.utils import printd
12
+
13
+ prompt_tokens = count_tokens(prompt)
14
+ if prompt_tokens > context_window:
15
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
16
+
17
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
18
+ settings = get_completions_settings()
19
+ request = settings
20
+ request["prompt"] = prompt
21
+ request["max_context_length"] = context_window
22
+ request["max_length"] = 400 # if we don't set this, it'll default to 100 which is quite short
23
+
24
+ # Set grammar
25
+ if grammar is not None:
26
+ request["grammar"] = grammar
27
+
28
+ if not endpoint.startswith(("http://", "https://")):
29
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
30
+
31
+ try:
32
+ # NOTE: llama.cpp server returns the following when it's out of context
33
+ # curl: (52) Empty reply from server
34
+ URI = urljoin(endpoint.strip("/") + "/", KOBOLDCPP_API_SUFFIX.strip("/"))
35
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
36
+ if response.status_code == 200:
37
+ result_full = response.json()
38
+ printd(f"JSON API response:\n{result_full}")
39
+ result = result_full["results"][0]["text"]
40
+ else:
41
+ raise Exception(
42
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
43
+ + f" Make sure that the koboldcpp server is running and reachable at {URI}."
44
+ )
45
+
46
+ except:
47
+ # TODO handle gracefully
48
+ raise
49
+
50
+ # Pass usage statistics back to main thread
51
+ # These are used to compute memory warning messages
52
+ # KoboldCpp doesn't return anything?
53
+ # https://lite.koboldai.net/koboldcpp_api#/v1/post_v1_generate
54
+ completion_tokens = None
55
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
56
+ usage = {
57
+ "prompt_tokens": prompt_tokens,
58
+ "completion_tokens": completion_tokens,
59
+ "total_tokens": total_tokens,
60
+ }
61
+
62
+ return result, usage
@@ -0,0 +1,23 @@
1
+ # see https://lite.koboldai.net/koboldcpp_api#/v1/post_v1_generate
2
+ SIMPLE = {
3
+ "stop_sequence": [
4
+ "\nUSER:",
5
+ "\nASSISTANT:",
6
+ "\nFUNCTION RETURN:",
7
+ "\nUSER",
8
+ "\nASSISTANT",
9
+ "\nFUNCTION RETURN",
10
+ "\nFUNCTION",
11
+ "\nFUNC",
12
+ "<|im_start|>",
13
+ "<|im_end|>",
14
+ "<|im_sep|>",
15
+ # '\n' +
16
+ # '</s>',
17
+ # '<|',
18
+ # '\n#',
19
+ # '\n\n\n',
20
+ ],
21
+ # "max_context_length": LLM_MAX_TOKENS,
22
+ "max_length": 512,
23
+ }
@@ -0,0 +1,58 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from letta.local_llm.settings.settings import get_completions_settings
4
+ from letta.local_llm.utils import count_tokens, post_json_auth_request
5
+
6
+ LLAMACPP_API_SUFFIX = "/completion"
7
+
8
+
9
+ def get_llamacpp_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
10
+ """See https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md for instructions on how to run the LLM web server"""
11
+ from letta.utils import printd
12
+
13
+ prompt_tokens = count_tokens(prompt)
14
+ if prompt_tokens > context_window:
15
+ raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
16
+
17
+ # Settings for the generation, includes the prompt + stop tokens, max length, etc
18
+ settings = get_completions_settings()
19
+ request = settings
20
+ request["prompt"] = prompt
21
+
22
+ # Set grammar
23
+ if grammar is not None:
24
+ request["grammar"] = grammar
25
+
26
+ if not endpoint.startswith(("http://", "https://")):
27
+ raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
28
+
29
+ try:
30
+ # NOTE: llama.cpp server returns the following when it's out of context
31
+ # curl: (52) Empty reply from server
32
+ URI = urljoin(endpoint.strip("/") + "/", LLAMACPP_API_SUFFIX.strip("/"))
33
+ response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
34
+ if response.status_code == 200:
35
+ result_full = response.json()
36
+ printd(f"JSON API response:\n{result_full}")
37
+ result = result_full["content"]
38
+ else:
39
+ raise Exception(
40
+ f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
41
+ + f" Make sure that the llama.cpp server is running and reachable at {URI}."
42
+ )
43
+
44
+ except:
45
+ # TODO handle gracefully
46
+ raise
47
+
48
+ # Pass usage statistics back to main thread
49
+ # These are used to compute memory warning messages
50
+ completion_tokens = result_full.get("tokens_predicted", None)
51
+ total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
52
+ usage = {
53
+ "prompt_tokens": prompt_tokens, # can grab from "tokens_evaluated", but it's usually wrong (set to 0)
54
+ "completion_tokens": completion_tokens,
55
+ "total_tokens": total_tokens,
56
+ }
57
+
58
+ return result, usage
@@ -0,0 +1,22 @@
1
+ # see https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#api-endpoints for options
2
+ SIMPLE = {
3
+ "stop": [
4
+ "\nUSER:",
5
+ "\nASSISTANT:",
6
+ "\nFUNCTION RETURN:",
7
+ "\nUSER",
8
+ "\nASSISTANT",
9
+ "\nFUNCTION RETURN",
10
+ "\nFUNCTION",
11
+ "\nFUNC",
12
+ "<|im_start|>",
13
+ "<|im_end|>",
14
+ "<|im_sep|>",
15
+ # '\n' +
16
+ # '</s>',
17
+ # '<|',
18
+ # '\n#',
19
+ # '\n\n\n',
20
+ ],
21
+ # "n_predict": 3072,
22
+ }