letta-nightly 0.8.17.dev20250723104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +5 -3
- letta/agent.py +3 -2
- letta/agents/base_agent.py +4 -1
- letta/agents/voice_agent.py +1 -0
- letta/constants.py +4 -2
- letta/functions/schema_generator.py +2 -1
- letta/groups/dynamic_multi_agent.py +1 -0
- letta/helpers/converters.py +13 -5
- letta/helpers/json_helpers.py +6 -1
- letta/llm_api/anthropic.py +2 -2
- letta/llm_api/aws_bedrock.py +24 -94
- letta/llm_api/deepseek.py +1 -1
- letta/llm_api/google_ai_client.py +0 -38
- letta/llm_api/google_constants.py +6 -3
- letta/llm_api/helpers.py +1 -1
- letta/llm_api/llm_api_tools.py +4 -7
- letta/llm_api/mistral.py +12 -37
- letta/llm_api/openai.py +17 -17
- letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
- letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
- letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
- letta/local_llm/constants.py +2 -23
- letta/local_llm/json_parser.py +11 -1
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
- letta/local_llm/ollama/api.py +2 -2
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +33 -2
- letta/orm/files_agents.py +13 -10
- letta/orm/mixins.py +8 -0
- letta/orm/prompt.py +13 -0
- letta/orm/sqlite_functions.py +61 -17
- letta/otel/db_pool_monitoring.py +13 -12
- letta/schemas/agent.py +69 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -0
- letta/schemas/embedding_config.py +15 -3
- letta/schemas/enums.py +2 -0
- letta/schemas/file.py +1 -1
- letta/schemas/folder.py +74 -0
- letta/schemas/memory.py +12 -6
- letta/schemas/prompt.py +9 -0
- letta/schemas/providers/__init__.py +47 -0
- letta/schemas/providers/anthropic.py +78 -0
- letta/schemas/providers/azure.py +80 -0
- letta/schemas/providers/base.py +201 -0
- letta/schemas/providers/bedrock.py +78 -0
- letta/schemas/providers/cerebras.py +79 -0
- letta/schemas/providers/cohere.py +18 -0
- letta/schemas/providers/deepseek.py +63 -0
- letta/schemas/providers/google_gemini.py +102 -0
- letta/schemas/providers/google_vertex.py +54 -0
- letta/schemas/providers/groq.py +35 -0
- letta/schemas/providers/letta.py +39 -0
- letta/schemas/providers/lmstudio.py +97 -0
- letta/schemas/providers/mistral.py +41 -0
- letta/schemas/providers/ollama.py +151 -0
- letta/schemas/providers/openai.py +241 -0
- letta/schemas/providers/together.py +85 -0
- letta/schemas/providers/vllm.py +57 -0
- letta/schemas/providers/xai.py +66 -0
- letta/server/db.py +0 -5
- letta/server/rest_api/app.py +4 -3
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +152 -4
- letta/server/rest_api/routers/v1/folders.py +490 -0
- letta/server/rest_api/routers/v1/providers.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +21 -26
- letta/server/rest_api/routers/v1/tools.py +90 -15
- letta/server/server.py +50 -95
- letta/services/agent_manager.py +420 -81
- letta/services/agent_serialization_manager.py +707 -0
- letta/services/block_manager.py +132 -11
- letta/services/file_manager.py +104 -29
- letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
- letta/services/file_processor/file_processor.py +75 -24
- letta/services/file_processor/parser/markitdown_parser.py +95 -0
- letta/services/files_agents_manager.py +57 -17
- letta/services/group_manager.py +7 -0
- letta/services/helpers/agent_manager_helper.py +25 -15
- letta/services/provider_manager.py +2 -2
- letta/services/source_manager.py +35 -16
- letta/services/tool_executor/files_tool_executor.py +12 -5
- letta/services/tool_manager.py +12 -0
- letta/services/tool_sandbox/e2b_sandbox.py +52 -48
- letta/settings.py +9 -6
- letta/streaming_utils.py +2 -1
- letta/utils.py +34 -1
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
letta/llm_api/openai.py
CHANGED
@@ -59,11 +59,15 @@ def openai_check_valid_api_key(base_url: str, api_key: Union[str, None]) -> None
|
|
59
59
|
|
60
60
|
def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool = False, extra_params: Optional[dict] = None) -> dict:
|
61
61
|
"""https://platform.openai.com/docs/api-reference/models/list"""
|
62
|
-
from letta.utils import printd
|
63
62
|
|
64
63
|
# In some cases we may want to double-check the URL and do basic correction, eg:
|
65
64
|
# In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
|
66
65
|
# However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit
|
66
|
+
|
67
|
+
import warnings
|
68
|
+
|
69
|
+
warnings.warn("The synchronous version of openai_get_model_list function is deprecated. Use the async one instead.", DeprecationWarning)
|
70
|
+
|
67
71
|
if fix_url:
|
68
72
|
if not url.endswith("/v1"):
|
69
73
|
url = smart_urljoin(url, "v1")
|
@@ -74,14 +78,14 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
74
78
|
if api_key is not None:
|
75
79
|
headers["Authorization"] = f"Bearer {api_key}"
|
76
80
|
|
77
|
-
|
81
|
+
logger.debug(f"Sending request to {url}")
|
78
82
|
response = None
|
79
83
|
try:
|
80
84
|
# TODO add query param "tool" to be true
|
81
85
|
response = requests.get(url, headers=headers, params=extra_params)
|
82
86
|
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
83
87
|
response = response.json() # convert to dict from string
|
84
|
-
|
88
|
+
logger.debug(f"response = {response}")
|
85
89
|
return response
|
86
90
|
except requests.exceptions.HTTPError as http_err:
|
87
91
|
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
@@ -90,7 +94,7 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
90
94
|
response = response.json()
|
91
95
|
except:
|
92
96
|
pass
|
93
|
-
|
97
|
+
logger.debug(f"Got HTTPError, exception={http_err}, response={response}")
|
94
98
|
raise http_err
|
95
99
|
except requests.exceptions.RequestException as req_err:
|
96
100
|
# Handle other requests-related errors (e.g., connection error)
|
@@ -99,7 +103,7 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
99
103
|
response = response.json()
|
100
104
|
except:
|
101
105
|
pass
|
102
|
-
|
106
|
+
logger.debug(f"Got RequestException, exception={req_err}, response={response}")
|
103
107
|
raise req_err
|
104
108
|
except Exception as e:
|
105
109
|
# Handle other potential errors
|
@@ -108,7 +112,7 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
108
112
|
response = response.json()
|
109
113
|
except:
|
110
114
|
pass
|
111
|
-
|
115
|
+
logger.debug(f"Got unknown Exception, exception={e}, response={response}")
|
112
116
|
raise e
|
113
117
|
|
114
118
|
|
@@ -120,7 +124,6 @@ async def openai_get_model_list_async(
|
|
120
124
|
client: Optional["httpx.AsyncClient"] = None,
|
121
125
|
) -> dict:
|
122
126
|
"""https://platform.openai.com/docs/api-reference/models/list"""
|
123
|
-
from letta.utils import printd
|
124
127
|
|
125
128
|
# In some cases we may want to double-check the URL and do basic correction
|
126
129
|
if fix_url and not url.endswith("/v1"):
|
@@ -132,7 +135,7 @@ async def openai_get_model_list_async(
|
|
132
135
|
if api_key is not None:
|
133
136
|
headers["Authorization"] = f"Bearer {api_key}"
|
134
137
|
|
135
|
-
|
138
|
+
logger.debug(f"Sending request to {url}")
|
136
139
|
|
137
140
|
# Use provided client or create a new one
|
138
141
|
close_client = False
|
@@ -144,24 +147,23 @@ async def openai_get_model_list_async(
|
|
144
147
|
response = await client.get(url, headers=headers, params=extra_params)
|
145
148
|
response.raise_for_status()
|
146
149
|
result = response.json()
|
147
|
-
|
150
|
+
logger.debug(f"response = {result}")
|
148
151
|
return result
|
149
152
|
except httpx.HTTPStatusError as http_err:
|
150
153
|
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
151
|
-
error_response = None
|
152
154
|
try:
|
153
155
|
error_response = http_err.response.json()
|
154
156
|
except:
|
155
157
|
error_response = {"status_code": http_err.response.status_code, "text": http_err.response.text}
|
156
|
-
|
158
|
+
logger.debug(f"Got HTTPError, exception={http_err}, response={error_response}")
|
157
159
|
raise http_err
|
158
160
|
except httpx.RequestError as req_err:
|
159
161
|
# Handle other httpx-related errors (e.g., connection error)
|
160
|
-
|
162
|
+
logger.debug(f"Got RequestException, exception={req_err}")
|
161
163
|
raise req_err
|
162
164
|
except Exception as e:
|
163
165
|
# Handle other potential errors
|
164
|
-
|
166
|
+
logger.debug(f"Got unknown Exception, exception={e}")
|
165
167
|
raise e
|
166
168
|
finally:
|
167
169
|
if close_client:
|
@@ -480,7 +482,7 @@ def openai_chat_completions_process_stream(
|
|
480
482
|
)
|
481
483
|
|
482
484
|
if message_delta.function_call is not None:
|
483
|
-
raise NotImplementedError(
|
485
|
+
raise NotImplementedError("Old function_call style not support with stream=True")
|
484
486
|
|
485
487
|
# overwrite response fields based on latest chunk
|
486
488
|
if not create_message_id:
|
@@ -503,7 +505,7 @@ def openai_chat_completions_process_stream(
|
|
503
505
|
logger.error(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
|
504
506
|
raise e
|
505
507
|
finally:
|
506
|
-
logger.info(
|
508
|
+
logger.info("Finally ending streaming interface.")
|
507
509
|
if stream_interface:
|
508
510
|
stream_interface.stream_end()
|
509
511
|
|
@@ -525,7 +527,6 @@ def openai_chat_completions_process_stream(
|
|
525
527
|
|
526
528
|
assert len(chat_completion_response.choices) > 0, f"No response from provider {chat_completion_response}"
|
527
529
|
|
528
|
-
# printd(chat_completion_response)
|
529
530
|
log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
|
530
531
|
return chat_completion_response
|
531
532
|
|
@@ -536,7 +537,6 @@ def openai_chat_completions_request_stream(
|
|
536
537
|
chat_completion_request: ChatCompletionRequest,
|
537
538
|
fix_url: bool = False,
|
538
539
|
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
539
|
-
|
540
540
|
# In some cases we may want to double-check the URL and do basic correction, eg:
|
541
541
|
# In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
|
542
542
|
# However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit
|
@@ -0,0 +1,38 @@
|
|
1
|
+
{
|
2
|
+
"id": "msg_123",
|
3
|
+
"type": "message",
|
4
|
+
"role": "assistant",
|
5
|
+
"model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
6
|
+
"content": [
|
7
|
+
{
|
8
|
+
"type": "text",
|
9
|
+
"text": "I see the Firefox icon. Let me click on it and then navigate to a weather website."
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"type": "tool_use",
|
13
|
+
"id": "toolu_123",
|
14
|
+
"name": "computer",
|
15
|
+
"input": {
|
16
|
+
"action": "mouse_move",
|
17
|
+
"coordinate": [
|
18
|
+
708,
|
19
|
+
736
|
20
|
+
]
|
21
|
+
}
|
22
|
+
},
|
23
|
+
{
|
24
|
+
"type": "tool_use",
|
25
|
+
"id": "toolu_234",
|
26
|
+
"name": "computer",
|
27
|
+
"input": {
|
28
|
+
"action": "left_click"
|
29
|
+
}
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"stop_reason": "tool_use",
|
33
|
+
"stop_sequence": null,
|
34
|
+
"usage": {
|
35
|
+
"input_tokens": 3391,
|
36
|
+
"output_tokens": 132
|
37
|
+
}
|
38
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"object": "list",
|
3
|
+
"data": [
|
4
|
+
{
|
5
|
+
"id": "text-embedding-nomic-embed-text-v1.5",
|
6
|
+
"object": "model",
|
7
|
+
"type": "embeddings",
|
8
|
+
"publisher": "nomic-ai",
|
9
|
+
"arch": "nomic-bert",
|
10
|
+
"compatibility_type": "gguf",
|
11
|
+
"quantization": "Q4_0",
|
12
|
+
"state": "not-loaded",
|
13
|
+
"max_context_length": 2048
|
14
|
+
},
|
15
|
+
...
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"object": "list",
|
3
|
+
"data": [
|
4
|
+
{
|
5
|
+
"id": "qwen2-vl-7b-instruct",
|
6
|
+
"object": "model",
|
7
|
+
"type": "vlm",
|
8
|
+
"publisher": "mlx-community",
|
9
|
+
"arch": "qwen2_vl",
|
10
|
+
"compatibility_type": "mlx",
|
11
|
+
"quantization": "4bit",
|
12
|
+
"state": "not-loaded",
|
13
|
+
"max_context_length": 32768
|
14
|
+
},
|
15
|
+
...,
|
letta/local_llm/constants.py
CHANGED
@@ -1,32 +1,11 @@
|
|
1
|
-
# import letta.local_llm.llm_chat_completion_wrappers.airoboros as airoboros
|
2
1
|
from letta.local_llm.llm_chat_completion_wrappers.chatml import ChatMLInnerMonologueWrapper
|
3
2
|
|
4
|
-
DEFAULT_ENDPOINTS = {
|
5
|
-
# Local
|
6
|
-
"koboldcpp": "http://localhost:5001",
|
7
|
-
"llamacpp": "http://localhost:8080",
|
8
|
-
"lmstudio": "http://localhost:1234",
|
9
|
-
"lmstudio-legacy": "http://localhost:1234",
|
10
|
-
"ollama": "http://localhost:11434",
|
11
|
-
"webui-legacy": "http://localhost:5000",
|
12
|
-
"webui": "http://localhost:5000",
|
13
|
-
"vllm": "http://localhost:8000",
|
14
|
-
# APIs
|
15
|
-
"openai": "https://api.openai.com",
|
16
|
-
"anthropic": "https://api.anthropic.com",
|
17
|
-
"groq": "https://api.groq.com/openai",
|
18
|
-
}
|
19
|
-
|
20
|
-
DEFAULT_OLLAMA_MODEL = "dolphin2.2-mistral:7b-q6_K"
|
21
|
-
|
22
|
-
# DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper
|
23
|
-
# DEFAULT_WRAPPER_NAME = "airoboros-l2-70b-2.1"
|
24
|
-
|
25
3
|
DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
|
26
4
|
DEFAULT_WRAPPER_NAME = "chatml"
|
27
5
|
|
28
|
-
INNER_THOUGHTS_KWARG = "
|
6
|
+
INNER_THOUGHTS_KWARG = "thinking"
|
29
7
|
INNER_THOUGHTS_KWARG_VERTEX = "thinking"
|
8
|
+
VALID_INNER_THOUGHTS_KWARGS = ("thinking", "inner_thoughts")
|
30
9
|
INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
|
31
10
|
INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg."
|
32
11
|
INNER_THOUGHTS_CLI_SYMBOL = "💭"
|
letta/local_llm/json_parser.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import re
|
3
|
+
import warnings
|
3
4
|
|
4
5
|
from letta.errors import LLMJSONParsingError
|
5
6
|
from letta.helpers.json_helpers import json_loads
|
@@ -77,10 +78,19 @@ def add_missing_heartbeat(llm_json):
|
|
77
78
|
|
78
79
|
|
79
80
|
def clean_and_interpret_send_message_json(json_string):
|
81
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, VALID_INNER_THOUGHTS_KWARGS
|
82
|
+
from letta.settings import model_settings
|
83
|
+
|
84
|
+
kwarg = model_settings.inner_thoughts_kwarg
|
85
|
+
if kwarg not in VALID_INNER_THOUGHTS_KWARGS:
|
86
|
+
warnings.warn(f"INNER_THOUGHTS_KWARG is not valid: {kwarg}")
|
87
|
+
kwarg = INNER_THOUGHTS_KWARG
|
88
|
+
|
80
89
|
# If normal parsing fails, attempt to clean and extract manually
|
81
90
|
cleaned_json_string = re.sub(r"[^\x00-\x7F]+", "", json_string) # Remove non-ASCII characters
|
82
91
|
function_match = re.search(r'"function":\s*"send_message"', cleaned_json_string)
|
83
|
-
|
92
|
+
|
93
|
+
inner_thoughts_match = re.search(rf'"{kwarg}":\s*"([^"]+)"', cleaned_json_string)
|
84
94
|
message_match = re.search(r'"message":\s*"([^"]+)"', cleaned_json_string)
|
85
95
|
|
86
96
|
if function_match and inner_thoughts_match and message_match:
|
@@ -75,7 +75,7 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
|
|
75
75
|
func_str = ""
|
76
76
|
func_str += f"{schema['name']}:"
|
77
77
|
func_str += f"\n description: {schema['description']}"
|
78
|
-
func_str +=
|
78
|
+
func_str += "\n params:"
|
79
79
|
for param_k, param_v in schema["parameters"]["properties"].items():
|
80
80
|
# TODO we're ignoring type
|
81
81
|
func_str += f"\n {param_k}: {param_v['description']}"
|
@@ -83,8 +83,8 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
|
|
83
83
|
return func_str
|
84
84
|
|
85
85
|
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
86
|
-
prompt +=
|
87
|
-
prompt +=
|
86
|
+
prompt += "\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
87
|
+
prompt += "\nAvailable functions:"
|
88
88
|
if function_documentation is not None:
|
89
89
|
prompt += f"\n{function_documentation}"
|
90
90
|
else:
|
@@ -150,7 +150,7 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
|
|
150
150
|
prompt += "\n### RESPONSE"
|
151
151
|
|
152
152
|
if self.include_assistant_prefix:
|
153
|
-
prompt +=
|
153
|
+
prompt += "\nASSISTANT:"
|
154
154
|
if self.include_opening_brance_in_prefix:
|
155
155
|
prompt += "\n{"
|
156
156
|
|
@@ -282,9 +282,9 @@ class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper):
|
|
282
282
|
func_str = ""
|
283
283
|
func_str += f"{schema['name']}:"
|
284
284
|
func_str += f"\n description: {schema['description']}"
|
285
|
-
func_str +=
|
285
|
+
func_str += "\n params:"
|
286
286
|
if add_inner_thoughts:
|
287
|
-
func_str +=
|
287
|
+
func_str += "\n inner_thoughts: Deep inner monologue private to you only."
|
288
288
|
for param_k, param_v in schema["parameters"]["properties"].items():
|
289
289
|
# TODO we're ignoring type
|
290
290
|
func_str += f"\n {param_k}: {param_v['description']}"
|
@@ -292,8 +292,8 @@ class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper):
|
|
292
292
|
return func_str
|
293
293
|
|
294
294
|
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
295
|
-
prompt +=
|
296
|
-
prompt +=
|
295
|
+
prompt += "\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
296
|
+
prompt += "\nAvailable functions:"
|
297
297
|
if function_documentation is not None:
|
298
298
|
prompt += f"\n{function_documentation}"
|
299
299
|
else:
|
@@ -375,7 +375,7 @@ class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper):
|
|
375
375
|
prompt += "\n### RESPONSE"
|
376
376
|
|
377
377
|
if self.include_assistant_prefix:
|
378
|
-
prompt +=
|
378
|
+
prompt += "\nASSISTANT:"
|
379
379
|
if self.assistant_prefix_extra:
|
380
380
|
prompt += self.assistant_prefix_extra
|
381
381
|
|
@@ -71,7 +71,7 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
71
71
|
func_str = ""
|
72
72
|
func_str += f"{schema['name']}:"
|
73
73
|
func_str += f"\n description: {schema['description']}"
|
74
|
-
func_str +=
|
74
|
+
func_str += "\n params:"
|
75
75
|
if add_inner_thoughts:
|
76
76
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
77
77
|
|
@@ -87,8 +87,8 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
87
87
|
prompt = ""
|
88
88
|
|
89
89
|
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
90
|
-
prompt +=
|
91
|
-
prompt +=
|
90
|
+
prompt += "Please select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
91
|
+
prompt += "\nAvailable functions:"
|
92
92
|
for function_dict in functions:
|
93
93
|
prompt += f"\n{self._compile_function_description(function_dict)}"
|
94
94
|
|
@@ -101,8 +101,8 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
101
101
|
prompt += system_message
|
102
102
|
prompt += "\n"
|
103
103
|
if function_documentation is not None:
|
104
|
-
prompt +=
|
105
|
-
prompt +=
|
104
|
+
prompt += "Please select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
105
|
+
prompt += "\nAvailable functions:\n"
|
106
106
|
prompt += function_documentation
|
107
107
|
else:
|
108
108
|
prompt += self._compile_function_block(functions)
|
@@ -230,7 +230,6 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
230
230
|
prompt += f"\n<|im_start|>{role_str}\n{msg_str.strip()}<|im_end|>"
|
231
231
|
|
232
232
|
elif message["role"] == "system":
|
233
|
-
|
234
233
|
role_str = "system"
|
235
234
|
msg_str = self._compile_system_message(
|
236
235
|
system_message=message["content"], functions=functions, function_documentation=function_documentation
|
@@ -255,7 +254,7 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
255
254
|
raise ValueError(message)
|
256
255
|
|
257
256
|
if self.include_assistant_prefix:
|
258
|
-
prompt +=
|
257
|
+
prompt += "\n<|im_start|>assistant"
|
259
258
|
if self.assistant_prefix_hint:
|
260
259
|
prompt += f"\n{FIRST_PREFIX_HINT if first_message else PREFIX_HINT}"
|
261
260
|
if self.supports_first_message and first_message:
|
@@ -386,7 +385,7 @@ class ChatMLOuterInnerMonologueWrapper(ChatMLInnerMonologueWrapper):
|
|
386
385
|
"You must always include inner thoughts, but you do not always have to call a function.",
|
387
386
|
]
|
388
387
|
)
|
389
|
-
prompt +=
|
388
|
+
prompt += "\nAvailable functions:"
|
390
389
|
for function_dict in functions:
|
391
390
|
prompt += f"\n{self._compile_function_description(function_dict, add_inner_thoughts=False)}"
|
392
391
|
|
@@ -91,9 +91,9 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
|
|
91
91
|
func_str = ""
|
92
92
|
func_str += f"{schema['name']}:"
|
93
93
|
func_str += f"\n description: {schema['description']}"
|
94
|
-
func_str +=
|
94
|
+
func_str += "\n params:"
|
95
95
|
if add_inner_thoughts:
|
96
|
-
func_str +=
|
96
|
+
func_str += "\n inner_thoughts: Deep inner monologue private to you only."
|
97
97
|
for param_k, param_v in schema["parameters"]["properties"].items():
|
98
98
|
# TODO we're ignoring type
|
99
99
|
func_str += f"\n {param_k}: {param_v['description']}"
|
@@ -105,8 +105,8 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
|
|
105
105
|
prompt = ""
|
106
106
|
|
107
107
|
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
108
|
-
prompt +=
|
109
|
-
prompt +=
|
108
|
+
prompt += "Please select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
109
|
+
prompt += "\nAvailable functions:"
|
110
110
|
for function_dict in functions:
|
111
111
|
prompt += f"\n{self._compile_function_description(function_dict)}"
|
112
112
|
|
@@ -117,8 +117,8 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
|
|
117
117
|
prompt = system_message
|
118
118
|
prompt += "\n"
|
119
119
|
if function_documentation is not None:
|
120
|
-
prompt +=
|
121
|
-
prompt +=
|
120
|
+
prompt += "Please select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
121
|
+
prompt += "\nAvailable functions:"
|
122
122
|
prompt += function_documentation
|
123
123
|
else:
|
124
124
|
prompt += self._compile_function_block(functions)
|
@@ -85,7 +85,7 @@ class Dolphin21MistralWrapper(LLMChatCompletionWrapper):
|
|
85
85
|
func_str = ""
|
86
86
|
func_str += f"{schema['name']}:"
|
87
87
|
func_str += f"\n description: {schema['description']}"
|
88
|
-
func_str +=
|
88
|
+
func_str += "\n params:"
|
89
89
|
for param_k, param_v in schema["parameters"]["properties"].items():
|
90
90
|
# TODO we're ignoring type
|
91
91
|
func_str += f"\n {param_k}: {param_v['description']}"
|
@@ -93,8 +93,8 @@ class Dolphin21MistralWrapper(LLMChatCompletionWrapper):
|
|
93
93
|
return func_str
|
94
94
|
|
95
95
|
# prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
|
96
|
-
prompt +=
|
97
|
-
prompt +=
|
96
|
+
prompt += "\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
|
97
|
+
prompt += "\nAvailable functions:"
|
98
98
|
if function_documentation is not None:
|
99
99
|
prompt += f"\n{function_documentation}"
|
100
100
|
else:
|
letta/local_llm/ollama/api.py
CHANGED
@@ -18,7 +18,7 @@ def get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_
|
|
18
18
|
|
19
19
|
if model is None:
|
20
20
|
raise LocalLLMError(
|
21
|
-
|
21
|
+
"Error: model name not specified. Set model in your config to the model you want to run (e.g. 'dolphin2.2-mistral')"
|
22
22
|
)
|
23
23
|
|
24
24
|
# Settings for the generation, includes the prompt + stop tokens, max length, etc
|
@@ -51,7 +51,7 @@ def get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_
|
|
51
51
|
# Set grammar
|
52
52
|
if grammar is not None:
|
53
53
|
# request["grammar_string"] = load_grammar_file(grammar)
|
54
|
-
raise NotImplementedError(
|
54
|
+
raise NotImplementedError("Ollama does not support grammars")
|
55
55
|
|
56
56
|
if not endpoint.startswith(("http://", "https://")):
|
57
57
|
raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
|
letta/orm/__init__.py
CHANGED
@@ -20,6 +20,7 @@ from letta.orm.mcp_server import MCPServer
|
|
20
20
|
from letta.orm.message import Message
|
21
21
|
from letta.orm.organization import Organization
|
22
22
|
from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
|
23
|
+
from letta.orm.prompt import Prompt
|
23
24
|
from letta.orm.provider import Provider
|
24
25
|
from letta.orm.provider_trace import ProviderTrace
|
25
26
|
from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
|
letta/orm/agent.py
CHANGED
@@ -20,6 +20,7 @@ from letta.schemas.llm_config import LLMConfig
|
|
20
20
|
from letta.schemas.memory import Memory
|
21
21
|
from letta.schemas.response_format import ResponseFormatUnion
|
22
22
|
from letta.schemas.tool_rule import ToolRule
|
23
|
+
from letta.utils import calculate_file_defaults_based_on_context_window
|
23
24
|
|
24
25
|
if TYPE_CHECKING:
|
25
26
|
from letta.orm.agents_tags import AgentsTags
|
@@ -92,6 +93,14 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
92
93
|
# timezone
|
93
94
|
timezone: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The timezone of the agent (for the context window).")
|
94
95
|
|
96
|
+
# file related controls
|
97
|
+
max_files_open: Mapped[Optional[int]] = mapped_column(
|
98
|
+
Integer, nullable=True, doc="Maximum number of files that can be open at once for this agent."
|
99
|
+
)
|
100
|
+
per_file_view_window_char_limit: Mapped[Optional[int]] = mapped_column(
|
101
|
+
Integer, nullable=True, doc="The per-file view window character limit for this agent."
|
102
|
+
)
|
103
|
+
|
95
104
|
# relationships
|
96
105
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="agents", lazy="raise")
|
97
106
|
tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
|
@@ -146,6 +155,15 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
146
155
|
lazy="selectin",
|
147
156
|
)
|
148
157
|
|
158
|
+
def _get_per_file_view_window_char_limit(self) -> int:
|
159
|
+
"""Get the per_file_view_window_char_limit, calculating defaults if None."""
|
160
|
+
if self.per_file_view_window_char_limit is not None:
|
161
|
+
return self.per_file_view_window_char_limit
|
162
|
+
|
163
|
+
context_window = self.llm_config.context_window if self.llm_config and self.llm_config.context_window else None
|
164
|
+
_, default_char_limit = calculate_file_defaults_based_on_context_window(context_window)
|
165
|
+
return default_char_limit
|
166
|
+
|
149
167
|
def to_pydantic(self, include_relationships: Optional[Set[str]] = None) -> PydanticAgentState:
|
150
168
|
"""
|
151
169
|
Converts the SQLAlchemy Agent model into its Pydantic counterpart.
|
@@ -191,6 +209,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
191
209
|
"last_run_completion": self.last_run_completion,
|
192
210
|
"last_run_duration_ms": self.last_run_duration_ms,
|
193
211
|
"timezone": self.timezone,
|
212
|
+
"max_files_open": self.max_files_open,
|
213
|
+
"per_file_view_window_char_limit": self.per_file_view_window_char_limit,
|
194
214
|
# optional field defaults
|
195
215
|
"tags": [],
|
196
216
|
"tools": [],
|
@@ -208,7 +228,12 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
208
228
|
"sources": lambda: [s.to_pydantic() for s in self.sources],
|
209
229
|
"memory": lambda: Memory(
|
210
230
|
blocks=[b.to_pydantic() for b in self.core_memory],
|
211
|
-
file_blocks=[
|
231
|
+
file_blocks=[
|
232
|
+
block
|
233
|
+
for b in self.file_agents
|
234
|
+
if (block := b.to_pydantic_block(per_file_view_window_char_limit=self._get_per_file_view_window_char_limit()))
|
235
|
+
is not None
|
236
|
+
],
|
212
237
|
prompt_template=get_prompt_template_for_agent_type(self.agent_type),
|
213
238
|
),
|
214
239
|
"identity_ids": lambda: [i.id for i in self.identities],
|
@@ -271,6 +296,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
271
296
|
"response_format": self.response_format,
|
272
297
|
"last_run_completion": self.last_run_completion,
|
273
298
|
"last_run_duration_ms": self.last_run_duration_ms,
|
299
|
+
"max_files_open": self.max_files_open,
|
300
|
+
"per_file_view_window_char_limit": self.per_file_view_window_char_limit,
|
274
301
|
}
|
275
302
|
optional_fields = {
|
276
303
|
"tags": [],
|
@@ -314,7 +341,11 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
|
314
341
|
state["sources"] = [s.to_pydantic() for s in sources]
|
315
342
|
state["memory"] = Memory(
|
316
343
|
blocks=[m.to_pydantic() for m in memory],
|
317
|
-
file_blocks=[
|
344
|
+
file_blocks=[
|
345
|
+
block
|
346
|
+
for b in file_agents
|
347
|
+
if (block := b.to_pydantic_block(per_file_view_window_char_limit=self._get_per_file_view_window_char_limit())) is not None
|
348
|
+
],
|
318
349
|
prompt_template=get_prompt_template_for_agent_type(self.agent_type),
|
319
350
|
)
|
320
351
|
state["identity_ids"] = [i.id for i in identities]
|
letta/orm/files_agents.py
CHANGED
@@ -5,10 +5,10 @@ from typing import TYPE_CHECKING, Optional
|
|
5
5
|
from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func
|
6
6
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
7
7
|
|
8
|
-
from letta.constants import
|
8
|
+
from letta.constants import FILE_IS_TRUNCATED_WARNING
|
9
9
|
from letta.orm.mixins import OrganizationMixin
|
10
10
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
11
|
-
from letta.schemas.block import
|
11
|
+
from letta.schemas.block import FileBlock as PydanticFileBlock
|
12
12
|
from letta.schemas.file import FileAgent as PydanticFileAgent
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
@@ -59,7 +59,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
|
|
59
59
|
String,
|
60
60
|
ForeignKey("sources.id", ondelete="CASCADE"),
|
61
61
|
nullable=False,
|
62
|
-
doc="ID of the source
|
62
|
+
doc="ID of the source",
|
63
63
|
)
|
64
64
|
|
65
65
|
file_name: Mapped[str] = mapped_column(
|
@@ -86,19 +86,22 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
|
|
86
86
|
)
|
87
87
|
|
88
88
|
# TODO: This is temporary as we figure out if we want FileBlock as a first class citizen
|
89
|
-
def to_pydantic_block(self) ->
|
89
|
+
def to_pydantic_block(self, per_file_view_window_char_limit: int) -> PydanticFileBlock:
|
90
90
|
visible_content = self.visible_content if self.visible_content and self.is_open else ""
|
91
91
|
|
92
92
|
# Truncate content and add warnings here when converting from FileAgent to Block
|
93
|
-
if len(visible_content) >
|
93
|
+
if len(visible_content) > per_file_view_window_char_limit:
|
94
94
|
truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
|
95
|
-
visible_content = visible_content[:
|
95
|
+
visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)]
|
96
96
|
visible_content += truncated_warning
|
97
97
|
|
98
|
-
return
|
98
|
+
return PydanticFileBlock(
|
99
99
|
value=visible_content,
|
100
|
-
label=self.file_name,
|
100
|
+
label=self.file_name,
|
101
101
|
read_only=True,
|
102
|
-
|
103
|
-
|
102
|
+
file_id=self.file_id,
|
103
|
+
source_id=self.source_id,
|
104
|
+
is_open=self.is_open,
|
105
|
+
last_accessed_at=self.last_accessed_at,
|
106
|
+
limit=per_file_view_window_char_limit,
|
104
107
|
)
|
letta/orm/mixins.py
CHANGED
@@ -62,3 +62,11 @@ class SandboxConfigMixin(Base):
|
|
62
62
|
__abstract__ = True
|
63
63
|
|
64
64
|
sandbox_config_id: Mapped[str] = mapped_column(String, ForeignKey("sandbox_configs.id"))
|
65
|
+
|
66
|
+
|
67
|
+
class ProjectMixin(Base):
|
68
|
+
"""Mixin for models that belong to a project."""
|
69
|
+
|
70
|
+
__abstract__ = True
|
71
|
+
|
72
|
+
project_id: Mapped[str] = mapped_column(String, nullable=True, doc="The associated project id.")
|
letta/orm/prompt.py
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
from sqlalchemy.orm import Mapped, mapped_column
|
2
|
+
|
3
|
+
from letta.orm.mixins import ProjectMixin
|
4
|
+
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
5
|
+
from letta.schemas.prompt import Prompt as PydanticPrompt
|
6
|
+
|
7
|
+
|
8
|
+
class Prompt(SqlalchemyBase, ProjectMixin):
|
9
|
+
__pydantic_model__ = PydanticPrompt
|
10
|
+
__tablename__ = "prompts"
|
11
|
+
|
12
|
+
id: Mapped[str] = mapped_column(primary_key=True, doc="Unique passage identifier")
|
13
|
+
prompt: Mapped[str] = mapped_column(doc="The string contents of the prompt.")
|