letta-nightly 0.5.0.dev20241017104103__py3-none-any.whl → 0.5.0.dev20241019104023__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +29 -14
- letta/cli/cli.py +0 -2
- letta/client/client.py +41 -6
- letta/constants.py +1 -1
- letta/functions/helpers.py +3 -3
- letta/llm_api/anthropic.py +1 -1
- letta/llm_api/helpers.py +0 -15
- letta/llm_api/llm_api_tools.py +35 -47
- letta/llm_api/openai.py +18 -8
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +1 -1
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +1 -1
- letta/local_llm/utils.py +22 -6
- letta/main.py +0 -4
- letta/metadata.py +19 -6
- letta/o1_agent.py +87 -0
- letta/personas/examples/o1_persona.txt +5 -0
- letta/prompts/system/memgpt_modified_o1.txt +31 -0
- letta/schemas/agent.py +30 -2
- letta/schemas/llm_config.py +24 -1
- letta/schemas/memory.py +4 -0
- letta/schemas/openai/chat_completion_request.py +2 -2
- letta/schemas/tool.py +34 -2
- letta/server/rest_api/app.py +1 -0
- letta/server/rest_api/routers/v1/agents.py +14 -6
- letta/server/rest_api/routers/v1/tools.py +9 -6
- letta/server/server.py +63 -22
- letta/settings.py +3 -0
- {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/METADATA +2 -2
- {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/RECORD +32 -29
- {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/LICENSE +0 -0
- {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/WHEEL +0 -0
- {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/entry_points.txt +0 -0
letta/agent.py
CHANGED
|
@@ -23,16 +23,19 @@ from letta.errors import LLMError
|
|
|
23
23
|
from letta.interface import AgentInterface
|
|
24
24
|
from letta.llm_api.helpers import is_context_overflow_error
|
|
25
25
|
from letta.llm_api.llm_api_tools import create
|
|
26
|
-
from letta.local_llm.utils import num_tokens_from_messages
|
|
26
|
+
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
27
27
|
from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
|
|
28
28
|
from letta.metadata import MetadataStore
|
|
29
29
|
from letta.persistence_manager import LocalStateManager
|
|
30
30
|
from letta.schemas.agent import AgentState, AgentStepResponse
|
|
31
31
|
from letta.schemas.block import Block
|
|
32
32
|
from letta.schemas.embedding_config import EmbeddingConfig
|
|
33
|
-
from letta.schemas.enums import MessageRole
|
|
33
|
+
from letta.schemas.enums import MessageRole
|
|
34
34
|
from letta.schemas.memory import ContextWindowOverview, Memory
|
|
35
35
|
from letta.schemas.message import Message, UpdateMessage
|
|
36
|
+
from letta.schemas.openai.chat_completion_request import (
|
|
37
|
+
Tool as ChatCompletionRequestTool,
|
|
38
|
+
)
|
|
36
39
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
37
40
|
from letta.schemas.openai.chat_completion_response import (
|
|
38
41
|
Message as ChatCompletionMessage,
|
|
@@ -463,15 +466,14 @@ class Agent(BaseAgent):
|
|
|
463
466
|
function_call: str = "auto",
|
|
464
467
|
first_message: bool = False, # hint
|
|
465
468
|
stream: bool = False, # TODO move to config?
|
|
466
|
-
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
467
469
|
) -> ChatCompletionResponse:
|
|
468
470
|
"""Get response from LLM API"""
|
|
469
471
|
try:
|
|
470
472
|
response = create(
|
|
471
473
|
# agent_state=self.agent_state,
|
|
472
474
|
llm_config=self.agent_state.llm_config,
|
|
473
|
-
user_id=self.agent_state.user_id,
|
|
474
475
|
messages=message_sequence,
|
|
476
|
+
user_id=self.agent_state.user_id,
|
|
475
477
|
functions=self.functions,
|
|
476
478
|
functions_python=self.functions_python,
|
|
477
479
|
function_call=function_call,
|
|
@@ -480,8 +482,6 @@ class Agent(BaseAgent):
|
|
|
480
482
|
# streaming
|
|
481
483
|
stream=stream,
|
|
482
484
|
stream_interface=self.interface,
|
|
483
|
-
# putting inner thoughts in func args or not
|
|
484
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
485
485
|
)
|
|
486
486
|
|
|
487
487
|
if len(response.choices) == 0 or response.choices[0] is None:
|
|
@@ -822,7 +822,6 @@ class Agent(BaseAgent):
|
|
|
822
822
|
first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
|
|
823
823
|
skip_verify: bool = False,
|
|
824
824
|
stream: bool = False, # TODO move to config?
|
|
825
|
-
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
826
825
|
ms: Optional[MetadataStore] = None,
|
|
827
826
|
) -> AgentStepResponse:
|
|
828
827
|
"""Runs a single step in the agent loop (generates at most one LLM call)"""
|
|
@@ -861,10 +860,7 @@ class Agent(BaseAgent):
|
|
|
861
860
|
counter = 0
|
|
862
861
|
while True:
|
|
863
862
|
response = self._get_ai_reply(
|
|
864
|
-
message_sequence=input_message_sequence,
|
|
865
|
-
first_message=True, # passed through to the prompt formatter
|
|
866
|
-
stream=stream,
|
|
867
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
863
|
+
message_sequence=input_message_sequence, first_message=True, stream=stream # passed through to the prompt formatter
|
|
868
864
|
)
|
|
869
865
|
if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
|
|
870
866
|
break
|
|
@@ -877,7 +873,6 @@ class Agent(BaseAgent):
|
|
|
877
873
|
response = self._get_ai_reply(
|
|
878
874
|
message_sequence=input_message_sequence,
|
|
879
875
|
stream=stream,
|
|
880
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
881
876
|
)
|
|
882
877
|
|
|
883
878
|
# Step 3: check if LLM wanted to call a function
|
|
@@ -954,7 +949,6 @@ class Agent(BaseAgent):
|
|
|
954
949
|
first_message_retry_limit=first_message_retry_limit,
|
|
955
950
|
skip_verify=skip_verify,
|
|
956
951
|
stream=stream,
|
|
957
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
958
952
|
ms=ms,
|
|
959
953
|
)
|
|
960
954
|
|
|
@@ -1467,6 +1461,24 @@ class Agent(BaseAgent):
|
|
|
1467
1461
|
)
|
|
1468
1462
|
num_tokens_external_memory_summary = count_tokens(external_memory_summary)
|
|
1469
1463
|
|
|
1464
|
+
# tokens taken up by function definitions
|
|
1465
|
+
if self.functions:
|
|
1466
|
+
available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in self.functions]
|
|
1467
|
+
num_tokens_available_functions_definitions = num_tokens_from_functions(functions=self.functions, model=self.model)
|
|
1468
|
+
else:
|
|
1469
|
+
available_functions_definitions = []
|
|
1470
|
+
num_tokens_available_functions_definitions = 0
|
|
1471
|
+
|
|
1472
|
+
num_tokens_used_total = (
|
|
1473
|
+
num_tokens_system # system prompt
|
|
1474
|
+
+ num_tokens_available_functions_definitions # function definitions
|
|
1475
|
+
+ num_tokens_core_memory # core memory
|
|
1476
|
+
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
|
1477
|
+
+ num_tokens_summary_memory # summary of ongoing conversation
|
|
1478
|
+
+ num_tokens_messages # tokens taken by messages
|
|
1479
|
+
)
|
|
1480
|
+
assert isinstance(num_tokens_used_total, int)
|
|
1481
|
+
|
|
1470
1482
|
return ContextWindowOverview(
|
|
1471
1483
|
# context window breakdown (in messages)
|
|
1472
1484
|
num_messages=len(self._messages),
|
|
@@ -1475,7 +1487,7 @@ class Agent(BaseAgent):
|
|
|
1475
1487
|
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
|
1476
1488
|
# top-level information
|
|
1477
1489
|
context_window_size_max=self.agent_state.llm_config.context_window,
|
|
1478
|
-
context_window_size_current=
|
|
1490
|
+
context_window_size_current=num_tokens_used_total,
|
|
1479
1491
|
# context window breakdown (in tokens)
|
|
1480
1492
|
num_tokens_system=num_tokens_system,
|
|
1481
1493
|
system_prompt=system_prompt,
|
|
@@ -1485,6 +1497,9 @@ class Agent(BaseAgent):
|
|
|
1485
1497
|
summary_memory=summary_memory,
|
|
1486
1498
|
num_tokens_messages=num_tokens_messages,
|
|
1487
1499
|
messages=self._messages,
|
|
1500
|
+
# related to functions
|
|
1501
|
+
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
|
1502
|
+
functions_definitions=available_functions_definitions,
|
|
1488
1503
|
)
|
|
1489
1504
|
|
|
1490
1505
|
|
letta/cli/cli.py
CHANGED
|
@@ -49,7 +49,6 @@ def server(
|
|
|
49
49
|
ade: Annotated[bool, typer.Option(help="Allows remote access")] = False,
|
|
50
50
|
):
|
|
51
51
|
"""Launch a Letta server process"""
|
|
52
|
-
|
|
53
52
|
if type == ServerChoice.rest_api:
|
|
54
53
|
pass
|
|
55
54
|
|
|
@@ -321,7 +320,6 @@ def run(
|
|
|
321
320
|
ms=ms,
|
|
322
321
|
no_verify=no_verify,
|
|
323
322
|
stream=stream,
|
|
324
|
-
inner_thoughts_in_kwargs=no_content,
|
|
325
323
|
) # TODO: add back no_verify
|
|
326
324
|
|
|
327
325
|
|
letta/client/client.py
CHANGED
|
@@ -96,6 +96,9 @@ class AbstractClient(object):
|
|
|
96
96
|
):
|
|
97
97
|
raise NotImplementedError
|
|
98
98
|
|
|
99
|
+
def get_tools_from_agent(self, agent_id: str):
|
|
100
|
+
raise NotImplementedError
|
|
101
|
+
|
|
99
102
|
def add_tool_to_agent(self, agent_id: str, tool_id: str):
|
|
100
103
|
raise NotImplementedError
|
|
101
104
|
|
|
@@ -197,7 +200,7 @@ class AbstractClient(object):
|
|
|
197
200
|
) -> Tool:
|
|
198
201
|
raise NotImplementedError
|
|
199
202
|
|
|
200
|
-
def list_tools(self) -> List[Tool]:
|
|
203
|
+
def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
|
|
201
204
|
raise NotImplementedError
|
|
202
205
|
|
|
203
206
|
def get_tool(self, id: str) -> Tool:
|
|
@@ -480,6 +483,21 @@ class RESTClient(AbstractClient):
|
|
|
480
483
|
raise ValueError(f"Failed to update agent: {response.text}")
|
|
481
484
|
return AgentState(**response.json())
|
|
482
485
|
|
|
486
|
+
def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
|
|
487
|
+
"""
|
|
488
|
+
Get tools to an existing agent
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
agent_id (str): ID of the agent
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
List[Tool]: A List of Tool objs
|
|
495
|
+
"""
|
|
496
|
+
response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/tools", headers=self.headers)
|
|
497
|
+
if response.status_code != 200:
|
|
498
|
+
raise ValueError(f"Failed to get tools from agents: {response.text}")
|
|
499
|
+
return [Tool(**tool) for tool in response.json()]
|
|
500
|
+
|
|
483
501
|
def add_tool_to_agent(self, agent_id: str, tool_id: str):
|
|
484
502
|
"""
|
|
485
503
|
Add tool to an existing agent
|
|
@@ -1364,14 +1382,19 @@ class RESTClient(AbstractClient):
|
|
|
1364
1382
|
# raise ValueError(f"Failed to create tool: {response.text}")
|
|
1365
1383
|
# return ToolModel(**response.json())
|
|
1366
1384
|
|
|
1367
|
-
def list_tools(self) -> List[Tool]:
|
|
1385
|
+
def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
|
|
1368
1386
|
"""
|
|
1369
1387
|
List available tools for the user.
|
|
1370
1388
|
|
|
1371
1389
|
Returns:
|
|
1372
1390
|
tools (List[Tool]): List of tools
|
|
1373
1391
|
"""
|
|
1374
|
-
|
|
1392
|
+
params = {}
|
|
1393
|
+
if cursor:
|
|
1394
|
+
params["cursor"] = str(cursor)
|
|
1395
|
+
if limit:
|
|
1396
|
+
params["limit"] = limit
|
|
1397
|
+
response = requests.get(f"{self.base_url}/{self.api_prefix}/tools", params=params, headers=self.headers)
|
|
1375
1398
|
if response.status_code != 200:
|
|
1376
1399
|
raise ValueError(f"Failed to list tools: {response.text}")
|
|
1377
1400
|
return [Tool(**tool) for tool in response.json()]
|
|
@@ -1692,6 +1715,19 @@ class LocalClient(AbstractClient):
|
|
|
1692
1715
|
)
|
|
1693
1716
|
return agent_state
|
|
1694
1717
|
|
|
1718
|
+
def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
|
|
1719
|
+
"""
|
|
1720
|
+
Get tools from an existing agent.
|
|
1721
|
+
|
|
1722
|
+
Args:
|
|
1723
|
+
agent_id (str): ID of the agent
|
|
1724
|
+
|
|
1725
|
+
Returns:
|
|
1726
|
+
List[Tool]: A list of Tool objs
|
|
1727
|
+
"""
|
|
1728
|
+
self.interface.clear()
|
|
1729
|
+
return self.server.get_tools_from_agent(agent_id=agent_id, user_id=self.user_id)
|
|
1730
|
+
|
|
1695
1731
|
def add_tool_to_agent(self, agent_id: str, tool_id: str):
|
|
1696
1732
|
"""
|
|
1697
1733
|
Add tool to an existing agent
|
|
@@ -2250,15 +2286,14 @@ class LocalClient(AbstractClient):
|
|
|
2250
2286
|
ToolUpdate(id=id, source_type=source_type, source_code=source_code, tags=tags, name=name), self.user_id
|
|
2251
2287
|
)
|
|
2252
2288
|
|
|
2253
|
-
def list_tools(self):
|
|
2289
|
+
def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
|
|
2254
2290
|
"""
|
|
2255
2291
|
List available tools for the user.
|
|
2256
2292
|
|
|
2257
2293
|
Returns:
|
|
2258
2294
|
tools (List[Tool]): List of tools
|
|
2259
2295
|
"""
|
|
2260
|
-
|
|
2261
|
-
return tools
|
|
2296
|
+
return self.server.list_tools(cursor=cursor, limit=limit, user_id=self.user_id)
|
|
2262
2297
|
|
|
2263
2298
|
def get_tool(self, id: str) -> Optional[Tool]:
|
|
2264
2299
|
"""
|
letta/constants.py
CHANGED
|
@@ -139,7 +139,7 @@ CORE_MEMORY_PERSONA_CHAR_LIMIT = 2000
|
|
|
139
139
|
CORE_MEMORY_HUMAN_CHAR_LIMIT = 2000
|
|
140
140
|
|
|
141
141
|
# Function return limits
|
|
142
|
-
FUNCTION_RETURN_CHAR_LIMIT =
|
|
142
|
+
FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words
|
|
143
143
|
|
|
144
144
|
MAX_PAUSE_HEARTBEATS = 360 # in min
|
|
145
145
|
|
letta/functions/helpers.py
CHANGED
|
@@ -5,10 +5,10 @@ from pydantic import BaseModel
|
|
|
5
5
|
|
|
6
6
|
def generate_composio_tool_wrapper(action: "ActionType") -> tuple[str, str]:
|
|
7
7
|
# Instantiate the object
|
|
8
|
-
tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{action
|
|
8
|
+
tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{str(action)}])[0]"
|
|
9
9
|
|
|
10
10
|
# Generate func name
|
|
11
|
-
func_name = f"run_{action.name}"
|
|
11
|
+
func_name = f"run_{action.name.lower()}"
|
|
12
12
|
|
|
13
13
|
wrapper_function_str = f"""
|
|
14
14
|
def {func_name}(**kwargs):
|
|
@@ -19,7 +19,7 @@ def {func_name}(**kwargs):
|
|
|
19
19
|
|
|
20
20
|
composio_toolset = ComposioToolSet()
|
|
21
21
|
tool = {tool_instantiation_str}
|
|
22
|
-
tool.func(**kwargs)
|
|
22
|
+
return tool.func(**kwargs)['data']
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
# Compile safety check
|
letta/llm_api/anthropic.py
CHANGED
|
@@ -53,7 +53,7 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
|
|
|
53
53
|
return MODEL_LIST
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
def convert_tools_to_anthropic_format(tools: List[Tool]
|
|
56
|
+
def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
|
57
57
|
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
|
58
58
|
|
|
59
59
|
OpenAI style:
|
letta/llm_api/helpers.py
CHANGED
|
@@ -6,7 +6,6 @@ from typing import Any, List, Union
|
|
|
6
6
|
import requests
|
|
7
7
|
|
|
8
8
|
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
9
|
-
from letta.schemas.enums import OptionState
|
|
10
9
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
|
|
11
10
|
from letta.utils import json_dumps, printd
|
|
12
11
|
|
|
@@ -200,17 +199,3 @@ def is_context_overflow_error(exception: Union[requests.exceptions.RequestExcept
|
|
|
200
199
|
# Generic fail
|
|
201
200
|
else:
|
|
202
201
|
return False
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
|
|
206
|
-
if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
|
|
207
|
-
# model that are known to not use `content` fields on tool calls
|
|
208
|
-
inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
|
|
209
|
-
else:
|
|
210
|
-
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
|
|
211
|
-
|
|
212
|
-
if not isinstance(inner_thoughts_in_kwargs, bool):
|
|
213
|
-
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
|
214
|
-
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
|
215
|
-
|
|
216
|
-
return inner_thoughts_in_kwargs
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import random
|
|
3
2
|
import time
|
|
4
3
|
from typing import List, Optional, Union
|
|
@@ -8,14 +7,12 @@ import requests
|
|
|
8
7
|
from letta.constants import CLI_WARNING_PREFIX
|
|
9
8
|
from letta.llm_api.anthropic import anthropic_chat_completions_request
|
|
10
9
|
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
|
11
|
-
from letta.llm_api.cohere import cohere_chat_completions_request
|
|
12
10
|
from letta.llm_api.google_ai import (
|
|
13
11
|
convert_tools_to_google_ai_format,
|
|
14
12
|
google_ai_chat_completions_request,
|
|
15
13
|
)
|
|
16
14
|
from letta.llm_api.helpers import (
|
|
17
15
|
add_inner_thoughts_to_functions,
|
|
18
|
-
derive_inner_thoughts_in_kwargs,
|
|
19
16
|
unpack_all_inner_thoughts_from_kwargs,
|
|
20
17
|
)
|
|
21
18
|
from letta.llm_api.openai import (
|
|
@@ -28,7 +25,6 @@ from letta.local_llm.constants import (
|
|
|
28
25
|
INNER_THOUGHTS_KWARG,
|
|
29
26
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
30
27
|
)
|
|
31
|
-
from letta.schemas.enums import OptionState
|
|
32
28
|
from letta.schemas.llm_config import LLMConfig
|
|
33
29
|
from letta.schemas.message import Message
|
|
34
30
|
from letta.schemas.openai.chat_completion_request import (
|
|
@@ -120,9 +116,6 @@ def create(
|
|
|
120
116
|
# streaming?
|
|
121
117
|
stream: bool = False,
|
|
122
118
|
stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
|
|
123
|
-
# TODO move to llm_config?
|
|
124
|
-
# if unspecified (None), default to something we've tested
|
|
125
|
-
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
126
119
|
max_tokens: Optional[int] = None,
|
|
127
120
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
128
121
|
) -> ChatCompletionResponse:
|
|
@@ -146,10 +139,7 @@ def create(
|
|
|
146
139
|
# only is a problem if we are *not* using an openai proxy
|
|
147
140
|
raise ValueError(f"OpenAI key is missing from letta config file")
|
|
148
141
|
|
|
149
|
-
|
|
150
|
-
data = build_openai_chat_completions_request(
|
|
151
|
-
llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
|
|
152
|
-
)
|
|
142
|
+
data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
|
|
153
143
|
|
|
154
144
|
if stream: # Client requested token streaming
|
|
155
145
|
data.stream = True
|
|
@@ -176,7 +166,7 @@ def create(
|
|
|
176
166
|
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
177
167
|
stream_interface.stream_end()
|
|
178
168
|
|
|
179
|
-
if
|
|
169
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
180
170
|
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
181
171
|
|
|
182
172
|
return response
|
|
@@ -198,9 +188,8 @@ def create(
|
|
|
198
188
|
# Set the llm config model_endpoint from model_settings
|
|
199
189
|
# For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
|
|
200
190
|
llm_config.model_endpoint = model_settings.azure_base_url
|
|
201
|
-
inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
|
|
202
191
|
chat_completion_request = build_openai_chat_completions_request(
|
|
203
|
-
llm_config, messages, user_id, functions, function_call, use_tool_naming,
|
|
192
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
|
|
204
193
|
)
|
|
205
194
|
|
|
206
195
|
response = azure_openai_chat_completions_request(
|
|
@@ -210,7 +199,7 @@ def create(
|
|
|
210
199
|
chat_completion_request=chat_completion_request,
|
|
211
200
|
)
|
|
212
201
|
|
|
213
|
-
if
|
|
202
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
214
203
|
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
215
204
|
|
|
216
205
|
return response
|
|
@@ -224,7 +213,7 @@ def create(
|
|
|
224
213
|
if functions is not None:
|
|
225
214
|
tools = [{"type": "function", "function": f} for f in functions]
|
|
226
215
|
tools = [Tool(**t) for t in tools]
|
|
227
|
-
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=
|
|
216
|
+
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
|
|
228
217
|
else:
|
|
229
218
|
tools = None
|
|
230
219
|
|
|
@@ -237,7 +226,7 @@ def create(
|
|
|
237
226
|
contents=[m.to_google_ai_dict() for m in messages],
|
|
238
227
|
tools=tools,
|
|
239
228
|
),
|
|
240
|
-
inner_thoughts_in_kwargs=
|
|
229
|
+
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
241
230
|
)
|
|
242
231
|
|
|
243
232
|
elif llm_config.model_endpoint_type == "anthropic":
|
|
@@ -260,32 +249,32 @@ def create(
|
|
|
260
249
|
),
|
|
261
250
|
)
|
|
262
251
|
|
|
263
|
-
elif llm_config.model_endpoint_type == "cohere":
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
252
|
+
# elif llm_config.model_endpoint_type == "cohere":
|
|
253
|
+
# if stream:
|
|
254
|
+
# raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
255
|
+
# if not use_tool_naming:
|
|
256
|
+
# raise NotImplementedError("Only tool calling supported on Cohere API requests")
|
|
257
|
+
#
|
|
258
|
+
# if functions is not None:
|
|
259
|
+
# tools = [{"type": "function", "function": f} for f in functions]
|
|
260
|
+
# tools = [Tool(**t) for t in tools]
|
|
261
|
+
# else:
|
|
262
|
+
# tools = None
|
|
263
|
+
#
|
|
264
|
+
# return cohere_chat_completions_request(
|
|
265
|
+
# # url=llm_config.model_endpoint,
|
|
266
|
+
# url="https://api.cohere.ai/v1", # TODO
|
|
267
|
+
# api_key=os.getenv("COHERE_API_KEY"), # TODO remove
|
|
268
|
+
# chat_completion_request=ChatCompletionRequest(
|
|
269
|
+
# model="command-r-plus", # TODO
|
|
270
|
+
# messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
|
271
|
+
# tools=tools,
|
|
272
|
+
# tool_choice=function_call,
|
|
273
|
+
# # user=str(user_id),
|
|
274
|
+
# # NOTE: max_tokens is required for Anthropic API
|
|
275
|
+
# # max_tokens=1024, # TODO make dynamic
|
|
276
|
+
# ),
|
|
277
|
+
# )
|
|
289
278
|
|
|
290
279
|
elif llm_config.model_endpoint_type == "groq":
|
|
291
280
|
if stream:
|
|
@@ -295,8 +284,7 @@ def create(
|
|
|
295
284
|
raise ValueError(f"Groq key is missing from letta config file")
|
|
296
285
|
|
|
297
286
|
# force to true for groq, since they don't support 'content' is non-null
|
|
298
|
-
|
|
299
|
-
if inner_thoughts_in_kwargs:
|
|
287
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
300
288
|
functions = add_inner_thoughts_to_functions(
|
|
301
289
|
functions=functions,
|
|
302
290
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
@@ -306,7 +294,7 @@ def create(
|
|
|
306
294
|
tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
|
|
307
295
|
data = ChatCompletionRequest(
|
|
308
296
|
model=llm_config.model,
|
|
309
|
-
messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=
|
|
297
|
+
messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) for m in messages],
|
|
310
298
|
tools=tools,
|
|
311
299
|
tool_choice=function_call,
|
|
312
300
|
user=str(user_id),
|
|
@@ -335,7 +323,7 @@ def create(
|
|
|
335
323
|
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
336
324
|
stream_interface.stream_end()
|
|
337
325
|
|
|
338
|
-
if
|
|
326
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
339
327
|
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
340
328
|
|
|
341
329
|
return response
|
letta/llm_api/openai.py
CHANGED
|
@@ -18,8 +18,13 @@ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_mes
|
|
|
18
18
|
from letta.schemas.llm_config import LLMConfig
|
|
19
19
|
from letta.schemas.message import Message as _Message
|
|
20
20
|
from letta.schemas.message import MessageRole as _MessageRole
|
|
21
|
+
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
|
21
22
|
from letta.schemas.openai.chat_completion_request import (
|
|
22
|
-
|
|
23
|
+
FunctionCall as ToolFunctionChoiceFunctionCall,
|
|
24
|
+
)
|
|
25
|
+
from letta.schemas.openai.chat_completion_request import (
|
|
26
|
+
Tool,
|
|
27
|
+
ToolFunctionChoice,
|
|
23
28
|
cast_message_to_subtype,
|
|
24
29
|
)
|
|
25
30
|
from letta.schemas.openai.chat_completion_response import (
|
|
@@ -100,15 +105,14 @@ def openai_get_model_list(
|
|
|
100
105
|
|
|
101
106
|
def build_openai_chat_completions_request(
|
|
102
107
|
llm_config: LLMConfig,
|
|
103
|
-
messages: List[
|
|
108
|
+
messages: List[_Message],
|
|
104
109
|
user_id: Optional[str],
|
|
105
110
|
functions: Optional[list],
|
|
106
|
-
function_call: str,
|
|
111
|
+
function_call: Optional[str],
|
|
107
112
|
use_tool_naming: bool,
|
|
108
|
-
inner_thoughts_in_kwargs: bool,
|
|
109
113
|
max_tokens: Optional[int],
|
|
110
114
|
) -> ChatCompletionRequest:
|
|
111
|
-
if
|
|
115
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
112
116
|
functions = add_inner_thoughts_to_functions(
|
|
113
117
|
functions=functions,
|
|
114
118
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
@@ -116,7 +120,7 @@ def build_openai_chat_completions_request(
|
|
|
116
120
|
)
|
|
117
121
|
|
|
118
122
|
openai_message_list = [
|
|
119
|
-
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=
|
|
123
|
+
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)) for m in messages
|
|
120
124
|
]
|
|
121
125
|
if llm_config.model:
|
|
122
126
|
model = llm_config.model
|
|
@@ -125,11 +129,17 @@ def build_openai_chat_completions_request(
|
|
|
125
129
|
model = None
|
|
126
130
|
|
|
127
131
|
if use_tool_naming:
|
|
132
|
+
if function_call is None:
|
|
133
|
+
tool_choice = None
|
|
134
|
+
elif function_call not in ["none", "auto", "required"]:
|
|
135
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
|
|
136
|
+
else:
|
|
137
|
+
tool_choice = function_call
|
|
128
138
|
data = ChatCompletionRequest(
|
|
129
139
|
model=model,
|
|
130
140
|
messages=openai_message_list,
|
|
131
|
-
tools=[
|
|
132
|
-
tool_choice=
|
|
141
|
+
tools=[Tool(type="function", function=f) for f in functions] if functions else None,
|
|
142
|
+
tool_choice=tool_choice,
|
|
133
143
|
user=str(user_id),
|
|
134
144
|
max_tokens=max_tokens,
|
|
135
145
|
)
|
|
@@ -188,7 +188,7 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
|
|
|
188
188
|
try:
|
|
189
189
|
# indent the function replies
|
|
190
190
|
function_return_dict = json_loads(message["content"])
|
|
191
|
-
function_return_str = json_dumps(function_return_dict, indent=
|
|
191
|
+
function_return_str = json_dumps(function_return_dict, indent=0)
|
|
192
192
|
except:
|
|
193
193
|
function_return_str = message["content"]
|
|
194
194
|
|
|
@@ -183,7 +183,7 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
|
|
|
183
183
|
try:
|
|
184
184
|
# indent the function replies
|
|
185
185
|
function_return_dict = json_loads(message["content"])
|
|
186
|
-
function_return_str = json_dumps(function_return_dict, indent=
|
|
186
|
+
function_return_str = json_dumps(function_return_dict, indent=0)
|
|
187
187
|
except:
|
|
188
188
|
function_return_str = message["content"]
|
|
189
189
|
|
letta/local_llm/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import warnings
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Union
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
import tiktoken
|
|
@@ -11,6 +11,7 @@ import letta.local_llm.llm_chat_completion_wrappers.configurable_wrapper as conf
|
|
|
11
11
|
import letta.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
|
|
12
12
|
import letta.local_llm.llm_chat_completion_wrappers.llama3 as llama3
|
|
13
13
|
import letta.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
|
|
14
|
+
from letta.schemas.openai.chat_completion_request import Tool, ToolCall
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def post_json_auth_request(uri, json_payload, auth_type, auth_key):
|
|
@@ -123,7 +124,7 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
|
|
|
123
124
|
return num_tokens
|
|
124
125
|
|
|
125
126
|
|
|
126
|
-
def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
|
|
127
|
+
def num_tokens_from_tool_calls(tool_calls: Union[List[dict], List[ToolCall]], model: str = "gpt-4"):
|
|
127
128
|
"""Based on above code (num_tokens_from_functions).
|
|
128
129
|
|
|
129
130
|
Example to encode:
|
|
@@ -144,10 +145,25 @@ def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
|
|
|
144
145
|
|
|
145
146
|
num_tokens = 0
|
|
146
147
|
for tool_call in tool_calls:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
148
|
+
if isinstance(tool_call, dict):
|
|
149
|
+
tool_call_id = tool_call["id"]
|
|
150
|
+
tool_call_type = tool_call["type"]
|
|
151
|
+
tool_call_function = tool_call["function"]
|
|
152
|
+
tool_call_function_name = tool_call_function["name"]
|
|
153
|
+
tool_call_function_arguments = tool_call_function["arguments"]
|
|
154
|
+
elif isinstance(tool_call, Tool):
|
|
155
|
+
tool_call_id = tool_call.id
|
|
156
|
+
tool_call_type = tool_call.type
|
|
157
|
+
tool_call_function = tool_call.function
|
|
158
|
+
tool_call_function_name = tool_call_function.name
|
|
159
|
+
tool_call_function_arguments = tool_call_function.arguments
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError(f"Unknown tool call type: {type(tool_call)}")
|
|
162
|
+
|
|
163
|
+
function_tokens = len(encoding.encode(tool_call_id))
|
|
164
|
+
function_tokens += 2 + len(encoding.encode(tool_call_type))
|
|
165
|
+
function_tokens += 2 + len(encoding.encode(tool_call_function_name))
|
|
166
|
+
function_tokens += 2 + len(encoding.encode(tool_call_function_arguments))
|
|
151
167
|
|
|
152
168
|
num_tokens += function_tokens
|
|
153
169
|
|
letta/main.py
CHANGED
|
@@ -20,7 +20,6 @@ from letta.cli.cli_load import app as load_app
|
|
|
20
20
|
from letta.config import LettaConfig
|
|
21
21
|
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
|
22
22
|
from letta.metadata import MetadataStore
|
|
23
|
-
from letta.schemas.enums import OptionState
|
|
24
23
|
|
|
25
24
|
# from letta.interface import CLIInterface as interface # for printing to terminal
|
|
26
25
|
from letta.streaming_interface import AgentRefreshStreamingInterface
|
|
@@ -64,7 +63,6 @@ def run_agent_loop(
|
|
|
64
63
|
no_verify: bool = False,
|
|
65
64
|
strip_ui: bool = False,
|
|
66
65
|
stream: bool = False,
|
|
67
|
-
inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
|
|
68
66
|
):
|
|
69
67
|
if isinstance(letta_agent.interface, AgentRefreshStreamingInterface):
|
|
70
68
|
# letta_agent.interface.toggle_streaming(on=stream)
|
|
@@ -369,7 +367,6 @@ def run_agent_loop(
|
|
|
369
367
|
first_message=False,
|
|
370
368
|
skip_verify=no_verify,
|
|
371
369
|
stream=stream,
|
|
372
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
373
370
|
ms=ms,
|
|
374
371
|
)
|
|
375
372
|
else:
|
|
@@ -378,7 +375,6 @@ def run_agent_loop(
|
|
|
378
375
|
first_message=False,
|
|
379
376
|
skip_verify=no_verify,
|
|
380
377
|
stream=stream,
|
|
381
|
-
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
382
378
|
ms=ms,
|
|
383
379
|
)
|
|
384
380
|
new_messages = step_response.messages
|