letta-nightly 0.5.0.dev20241017104103__py3-none-any.whl → 0.5.0.dev20241019104023__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show
  1. letta/agent.py +29 -14
  2. letta/cli/cli.py +0 -2
  3. letta/client/client.py +41 -6
  4. letta/constants.py +1 -1
  5. letta/functions/helpers.py +3 -3
  6. letta/llm_api/anthropic.py +1 -1
  7. letta/llm_api/helpers.py +0 -15
  8. letta/llm_api/llm_api_tools.py +35 -47
  9. letta/llm_api/openai.py +18 -8
  10. letta/local_llm/llm_chat_completion_wrappers/chatml.py +1 -1
  11. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +1 -1
  12. letta/local_llm/utils.py +22 -6
  13. letta/main.py +0 -4
  14. letta/metadata.py +19 -6
  15. letta/o1_agent.py +87 -0
  16. letta/personas/examples/o1_persona.txt +5 -0
  17. letta/prompts/system/memgpt_modified_o1.txt +31 -0
  18. letta/schemas/agent.py +30 -2
  19. letta/schemas/llm_config.py +24 -1
  20. letta/schemas/memory.py +4 -0
  21. letta/schemas/openai/chat_completion_request.py +2 -2
  22. letta/schemas/tool.py +34 -2
  23. letta/server/rest_api/app.py +1 -0
  24. letta/server/rest_api/routers/v1/agents.py +14 -6
  25. letta/server/rest_api/routers/v1/tools.py +9 -6
  26. letta/server/server.py +63 -22
  27. letta/settings.py +3 -0
  28. {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/METADATA +2 -2
  29. {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/RECORD +32 -29
  30. {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/LICENSE +0 -0
  31. {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/WHEEL +0 -0
  32. {letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/entry_points.txt +0 -0
letta/agent.py CHANGED
@@ -23,16 +23,19 @@ from letta.errors import LLMError
23
23
  from letta.interface import AgentInterface
24
24
  from letta.llm_api.helpers import is_context_overflow_error
25
25
  from letta.llm_api.llm_api_tools import create
26
- from letta.local_llm.utils import num_tokens_from_messages
26
+ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
27
27
  from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
28
28
  from letta.metadata import MetadataStore
29
29
  from letta.persistence_manager import LocalStateManager
30
30
  from letta.schemas.agent import AgentState, AgentStepResponse
31
31
  from letta.schemas.block import Block
32
32
  from letta.schemas.embedding_config import EmbeddingConfig
33
- from letta.schemas.enums import MessageRole, OptionState
33
+ from letta.schemas.enums import MessageRole
34
34
  from letta.schemas.memory import ContextWindowOverview, Memory
35
35
  from letta.schemas.message import Message, UpdateMessage
36
+ from letta.schemas.openai.chat_completion_request import (
37
+ Tool as ChatCompletionRequestTool,
38
+ )
36
39
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
37
40
  from letta.schemas.openai.chat_completion_response import (
38
41
  Message as ChatCompletionMessage,
@@ -463,15 +466,14 @@ class Agent(BaseAgent):
463
466
  function_call: str = "auto",
464
467
  first_message: bool = False, # hint
465
468
  stream: bool = False, # TODO move to config?
466
- inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
467
469
  ) -> ChatCompletionResponse:
468
470
  """Get response from LLM API"""
469
471
  try:
470
472
  response = create(
471
473
  # agent_state=self.agent_state,
472
474
  llm_config=self.agent_state.llm_config,
473
- user_id=self.agent_state.user_id,
474
475
  messages=message_sequence,
476
+ user_id=self.agent_state.user_id,
475
477
  functions=self.functions,
476
478
  functions_python=self.functions_python,
477
479
  function_call=function_call,
@@ -480,8 +482,6 @@ class Agent(BaseAgent):
480
482
  # streaming
481
483
  stream=stream,
482
484
  stream_interface=self.interface,
483
- # putting inner thoughts in func args or not
484
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
485
485
  )
486
486
 
487
487
  if len(response.choices) == 0 or response.choices[0] is None:
@@ -822,7 +822,6 @@ class Agent(BaseAgent):
822
822
  first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
823
823
  skip_verify: bool = False,
824
824
  stream: bool = False, # TODO move to config?
825
- inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
826
825
  ms: Optional[MetadataStore] = None,
827
826
  ) -> AgentStepResponse:
828
827
  """Runs a single step in the agent loop (generates at most one LLM call)"""
@@ -861,10 +860,7 @@ class Agent(BaseAgent):
861
860
  counter = 0
862
861
  while True:
863
862
  response = self._get_ai_reply(
864
- message_sequence=input_message_sequence,
865
- first_message=True, # passed through to the prompt formatter
866
- stream=stream,
867
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
863
+ message_sequence=input_message_sequence, first_message=True, stream=stream # passed through to the prompt formatter
868
864
  )
869
865
  if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
870
866
  break
@@ -877,7 +873,6 @@ class Agent(BaseAgent):
877
873
  response = self._get_ai_reply(
878
874
  message_sequence=input_message_sequence,
879
875
  stream=stream,
880
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
881
876
  )
882
877
 
883
878
  # Step 3: check if LLM wanted to call a function
@@ -954,7 +949,6 @@ class Agent(BaseAgent):
954
949
  first_message_retry_limit=first_message_retry_limit,
955
950
  skip_verify=skip_verify,
956
951
  stream=stream,
957
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
958
952
  ms=ms,
959
953
  )
960
954
 
@@ -1467,6 +1461,24 @@ class Agent(BaseAgent):
1467
1461
  )
1468
1462
  num_tokens_external_memory_summary = count_tokens(external_memory_summary)
1469
1463
 
1464
+ # tokens taken up by function definitions
1465
+ if self.functions:
1466
+ available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in self.functions]
1467
+ num_tokens_available_functions_definitions = num_tokens_from_functions(functions=self.functions, model=self.model)
1468
+ else:
1469
+ available_functions_definitions = []
1470
+ num_tokens_available_functions_definitions = 0
1471
+
1472
+ num_tokens_used_total = (
1473
+ num_tokens_system # system prompt
1474
+ + num_tokens_available_functions_definitions # function definitions
1475
+ + num_tokens_core_memory # core memory
1476
+ + num_tokens_external_memory_summary # metadata (statistics) about recall/archival
1477
+ + num_tokens_summary_memory # summary of ongoing conversation
1478
+ + num_tokens_messages # tokens taken by messages
1479
+ )
1480
+ assert isinstance(num_tokens_used_total, int)
1481
+
1470
1482
  return ContextWindowOverview(
1471
1483
  # context window breakdown (in messages)
1472
1484
  num_messages=len(self._messages),
@@ -1475,7 +1487,7 @@ class Agent(BaseAgent):
1475
1487
  num_tokens_external_memory_summary=num_tokens_external_memory_summary,
1476
1488
  # top-level information
1477
1489
  context_window_size_max=self.agent_state.llm_config.context_window,
1478
- context_window_size_current=num_tokens_system + num_tokens_core_memory + num_tokens_summary_memory + num_tokens_messages,
1490
+ context_window_size_current=num_tokens_used_total,
1479
1491
  # context window breakdown (in tokens)
1480
1492
  num_tokens_system=num_tokens_system,
1481
1493
  system_prompt=system_prompt,
@@ -1485,6 +1497,9 @@ class Agent(BaseAgent):
1485
1497
  summary_memory=summary_memory,
1486
1498
  num_tokens_messages=num_tokens_messages,
1487
1499
  messages=self._messages,
1500
+ # related to functions
1501
+ num_tokens_functions_definitions=num_tokens_available_functions_definitions,
1502
+ functions_definitions=available_functions_definitions,
1488
1503
  )
1489
1504
 
1490
1505
 
letta/cli/cli.py CHANGED
@@ -49,7 +49,6 @@ def server(
49
49
  ade: Annotated[bool, typer.Option(help="Allows remote access")] = False,
50
50
  ):
51
51
  """Launch a Letta server process"""
52
-
53
52
  if type == ServerChoice.rest_api:
54
53
  pass
55
54
 
@@ -321,7 +320,6 @@ def run(
321
320
  ms=ms,
322
321
  no_verify=no_verify,
323
322
  stream=stream,
324
- inner_thoughts_in_kwargs=no_content,
325
323
  ) # TODO: add back no_verify
326
324
 
327
325
 
letta/client/client.py CHANGED
@@ -96,6 +96,9 @@ class AbstractClient(object):
96
96
  ):
97
97
  raise NotImplementedError
98
98
 
99
+ def get_tools_from_agent(self, agent_id: str):
100
+ raise NotImplementedError
101
+
99
102
  def add_tool_to_agent(self, agent_id: str, tool_id: str):
100
103
  raise NotImplementedError
101
104
 
@@ -197,7 +200,7 @@ class AbstractClient(object):
197
200
  ) -> Tool:
198
201
  raise NotImplementedError
199
202
 
200
- def list_tools(self) -> List[Tool]:
203
+ def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
201
204
  raise NotImplementedError
202
205
 
203
206
  def get_tool(self, id: str) -> Tool:
@@ -480,6 +483,21 @@ class RESTClient(AbstractClient):
480
483
  raise ValueError(f"Failed to update agent: {response.text}")
481
484
  return AgentState(**response.json())
482
485
 
486
+ def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
487
+ """
488
+ Get tools to an existing agent
489
+
490
+ Args:
491
+ agent_id (str): ID of the agent
492
+
493
+ Returns:
494
+ List[Tool]: A List of Tool objs
495
+ """
496
+ response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/tools", headers=self.headers)
497
+ if response.status_code != 200:
498
+ raise ValueError(f"Failed to get tools from agents: {response.text}")
499
+ return [Tool(**tool) for tool in response.json()]
500
+
483
501
  def add_tool_to_agent(self, agent_id: str, tool_id: str):
484
502
  """
485
503
  Add tool to an existing agent
@@ -1364,14 +1382,19 @@ class RESTClient(AbstractClient):
1364
1382
  # raise ValueError(f"Failed to create tool: {response.text}")
1365
1383
  # return ToolModel(**response.json())
1366
1384
 
1367
- def list_tools(self) -> List[Tool]:
1385
+ def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
1368
1386
  """
1369
1387
  List available tools for the user.
1370
1388
 
1371
1389
  Returns:
1372
1390
  tools (List[Tool]): List of tools
1373
1391
  """
1374
- response = requests.get(f"{self.base_url}/{self.api_prefix}/tools", headers=self.headers)
1392
+ params = {}
1393
+ if cursor:
1394
+ params["cursor"] = str(cursor)
1395
+ if limit:
1396
+ params["limit"] = limit
1397
+ response = requests.get(f"{self.base_url}/{self.api_prefix}/tools", params=params, headers=self.headers)
1375
1398
  if response.status_code != 200:
1376
1399
  raise ValueError(f"Failed to list tools: {response.text}")
1377
1400
  return [Tool(**tool) for tool in response.json()]
@@ -1692,6 +1715,19 @@ class LocalClient(AbstractClient):
1692
1715
  )
1693
1716
  return agent_state
1694
1717
 
1718
+ def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
1719
+ """
1720
+ Get tools from an existing agent.
1721
+
1722
+ Args:
1723
+ agent_id (str): ID of the agent
1724
+
1725
+ Returns:
1726
+ List[Tool]: A list of Tool objs
1727
+ """
1728
+ self.interface.clear()
1729
+ return self.server.get_tools_from_agent(agent_id=agent_id, user_id=self.user_id)
1730
+
1695
1731
  def add_tool_to_agent(self, agent_id: str, tool_id: str):
1696
1732
  """
1697
1733
  Add tool to an existing agent
@@ -2250,15 +2286,14 @@ class LocalClient(AbstractClient):
2250
2286
  ToolUpdate(id=id, source_type=source_type, source_code=source_code, tags=tags, name=name), self.user_id
2251
2287
  )
2252
2288
 
2253
- def list_tools(self):
2289
+ def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
2254
2290
  """
2255
2291
  List available tools for the user.
2256
2292
 
2257
2293
  Returns:
2258
2294
  tools (List[Tool]): List of tools
2259
2295
  """
2260
- tools = self.server.list_tools(user_id=self.user_id)
2261
- return tools
2296
+ return self.server.list_tools(cursor=cursor, limit=limit, user_id=self.user_id)
2262
2297
 
2263
2298
  def get_tool(self, id: str) -> Optional[Tool]:
2264
2299
  """
letta/constants.py CHANGED
@@ -139,7 +139,7 @@ CORE_MEMORY_PERSONA_CHAR_LIMIT = 2000
139
139
  CORE_MEMORY_HUMAN_CHAR_LIMIT = 2000
140
140
 
141
141
  # Function return limits
142
- FUNCTION_RETURN_CHAR_LIMIT = 3000 # ~300 words
142
+ FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words
143
143
 
144
144
  MAX_PAUSE_HEARTBEATS = 360 # in min
145
145
 
@@ -5,10 +5,10 @@ from pydantic import BaseModel
5
5
 
6
6
  def generate_composio_tool_wrapper(action: "ActionType") -> tuple[str, str]:
7
7
  # Instantiate the object
8
- tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{action.name}])[0]"
8
+ tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{str(action)}])[0]"
9
9
 
10
10
  # Generate func name
11
- func_name = f"run_{action.name}"
11
+ func_name = f"run_{action.name.lower()}"
12
12
 
13
13
  wrapper_function_str = f"""
14
14
  def {func_name}(**kwargs):
@@ -19,7 +19,7 @@ def {func_name}(**kwargs):
19
19
 
20
20
  composio_toolset = ComposioToolSet()
21
21
  tool = {tool_instantiation_str}
22
- tool.func(**kwargs)
22
+ return tool.func(**kwargs)['data']
23
23
  """
24
24
 
25
25
  # Compile safety check
@@ -53,7 +53,7 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
53
53
  return MODEL_LIST
54
54
 
55
55
 
56
- def convert_tools_to_anthropic_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
56
+ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
57
57
  """See: https://docs.anthropic.com/claude/docs/tool-use
58
58
 
59
59
  OpenAI style:
letta/llm_api/helpers.py CHANGED
@@ -6,7 +6,6 @@ from typing import Any, List, Union
6
6
  import requests
7
7
 
8
8
  from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
9
- from letta.schemas.enums import OptionState
10
9
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
11
10
  from letta.utils import json_dumps, printd
12
11
 
@@ -200,17 +199,3 @@ def is_context_overflow_error(exception: Union[requests.exceptions.RequestExcept
200
199
  # Generic fail
201
200
  else:
202
201
  return False
203
-
204
-
205
- def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
206
- if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
207
- # model that are known to not use `content` fields on tool calls
208
- inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
209
- else:
210
- inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
211
-
212
- if not isinstance(inner_thoughts_in_kwargs, bool):
213
- warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
214
- inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
215
-
216
- return inner_thoughts_in_kwargs
@@ -1,4 +1,3 @@
1
- import os
2
1
  import random
3
2
  import time
4
3
  from typing import List, Optional, Union
@@ -8,14 +7,12 @@ import requests
8
7
  from letta.constants import CLI_WARNING_PREFIX
9
8
  from letta.llm_api.anthropic import anthropic_chat_completions_request
10
9
  from letta.llm_api.azure_openai import azure_openai_chat_completions_request
11
- from letta.llm_api.cohere import cohere_chat_completions_request
12
10
  from letta.llm_api.google_ai import (
13
11
  convert_tools_to_google_ai_format,
14
12
  google_ai_chat_completions_request,
15
13
  )
16
14
  from letta.llm_api.helpers import (
17
15
  add_inner_thoughts_to_functions,
18
- derive_inner_thoughts_in_kwargs,
19
16
  unpack_all_inner_thoughts_from_kwargs,
20
17
  )
21
18
  from letta.llm_api.openai import (
@@ -28,7 +25,6 @@ from letta.local_llm.constants import (
28
25
  INNER_THOUGHTS_KWARG,
29
26
  INNER_THOUGHTS_KWARG_DESCRIPTION,
30
27
  )
31
- from letta.schemas.enums import OptionState
32
28
  from letta.schemas.llm_config import LLMConfig
33
29
  from letta.schemas.message import Message
34
30
  from letta.schemas.openai.chat_completion_request import (
@@ -120,9 +116,6 @@ def create(
120
116
  # streaming?
121
117
  stream: bool = False,
122
118
  stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
123
- # TODO move to llm_config?
124
- # if unspecified (None), default to something we've tested
125
- inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
126
119
  max_tokens: Optional[int] = None,
127
120
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
128
121
  ) -> ChatCompletionResponse:
@@ -146,10 +139,7 @@ def create(
146
139
  # only is a problem if we are *not* using an openai proxy
147
140
  raise ValueError(f"OpenAI key is missing from letta config file")
148
141
 
149
- inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
150
- data = build_openai_chat_completions_request(
151
- llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
152
- )
142
+ data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
153
143
 
154
144
  if stream: # Client requested token streaming
155
145
  data.stream = True
@@ -176,7 +166,7 @@ def create(
176
166
  if isinstance(stream_interface, AgentChunkStreamingInterface):
177
167
  stream_interface.stream_end()
178
168
 
179
- if inner_thoughts_in_kwargs:
169
+ if llm_config.put_inner_thoughts_in_kwargs:
180
170
  response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
181
171
 
182
172
  return response
@@ -198,9 +188,8 @@ def create(
198
188
  # Set the llm config model_endpoint from model_settings
199
189
  # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
200
190
  llm_config.model_endpoint = model_settings.azure_base_url
201
- inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
202
191
  chat_completion_request = build_openai_chat_completions_request(
203
- llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
192
+ llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
204
193
  )
205
194
 
206
195
  response = azure_openai_chat_completions_request(
@@ -210,7 +199,7 @@ def create(
210
199
  chat_completion_request=chat_completion_request,
211
200
  )
212
201
 
213
- if inner_thoughts_in_kwargs:
202
+ if llm_config.put_inner_thoughts_in_kwargs:
214
203
  response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
215
204
 
216
205
  return response
@@ -224,7 +213,7 @@ def create(
224
213
  if functions is not None:
225
214
  tools = [{"type": "function", "function": f} for f in functions]
226
215
  tools = [Tool(**t) for t in tools]
227
- tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=True)
216
+ tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
228
217
  else:
229
218
  tools = None
230
219
 
@@ -237,7 +226,7 @@ def create(
237
226
  contents=[m.to_google_ai_dict() for m in messages],
238
227
  tools=tools,
239
228
  ),
240
- inner_thoughts_in_kwargs=True,
229
+ inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
241
230
  )
242
231
 
243
232
  elif llm_config.model_endpoint_type == "anthropic":
@@ -260,32 +249,32 @@ def create(
260
249
  ),
261
250
  )
262
251
 
263
- elif llm_config.model_endpoint_type == "cohere":
264
- if stream:
265
- raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
266
- if not use_tool_naming:
267
- raise NotImplementedError("Only tool calling supported on Cohere API requests")
268
-
269
- if functions is not None:
270
- tools = [{"type": "function", "function": f} for f in functions]
271
- tools = [Tool(**t) for t in tools]
272
- else:
273
- tools = None
274
-
275
- return cohere_chat_completions_request(
276
- # url=llm_config.model_endpoint,
277
- url="https://api.cohere.ai/v1", # TODO
278
- api_key=os.getenv("COHERE_API_KEY"), # TODO remove
279
- chat_completion_request=ChatCompletionRequest(
280
- model="command-r-plus", # TODO
281
- messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
282
- tools=tools,
283
- tool_choice=function_call,
284
- # user=str(user_id),
285
- # NOTE: max_tokens is required for Anthropic API
286
- # max_tokens=1024, # TODO make dynamic
287
- ),
288
- )
252
+ # elif llm_config.model_endpoint_type == "cohere":
253
+ # if stream:
254
+ # raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
255
+ # if not use_tool_naming:
256
+ # raise NotImplementedError("Only tool calling supported on Cohere API requests")
257
+ #
258
+ # if functions is not None:
259
+ # tools = [{"type": "function", "function": f} for f in functions]
260
+ # tools = [Tool(**t) for t in tools]
261
+ # else:
262
+ # tools = None
263
+ #
264
+ # return cohere_chat_completions_request(
265
+ # # url=llm_config.model_endpoint,
266
+ # url="https://api.cohere.ai/v1", # TODO
267
+ # api_key=os.getenv("COHERE_API_KEY"), # TODO remove
268
+ # chat_completion_request=ChatCompletionRequest(
269
+ # model="command-r-plus", # TODO
270
+ # messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
271
+ # tools=tools,
272
+ # tool_choice=function_call,
273
+ # # user=str(user_id),
274
+ # # NOTE: max_tokens is required for Anthropic API
275
+ # # max_tokens=1024, # TODO make dynamic
276
+ # ),
277
+ # )
289
278
 
290
279
  elif llm_config.model_endpoint_type == "groq":
291
280
  if stream:
@@ -295,8 +284,7 @@ def create(
295
284
  raise ValueError(f"Groq key is missing from letta config file")
296
285
 
297
286
  # force to true for groq, since they don't support 'content' is non-null
298
- inner_thoughts_in_kwargs = True
299
- if inner_thoughts_in_kwargs:
287
+ if llm_config.put_inner_thoughts_in_kwargs:
300
288
  functions = add_inner_thoughts_to_functions(
301
289
  functions=functions,
302
290
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -306,7 +294,7 @@ def create(
306
294
  tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
307
295
  data = ChatCompletionRequest(
308
296
  model=llm_config.model,
309
- messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs) for m in messages],
297
+ messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) for m in messages],
310
298
  tools=tools,
311
299
  tool_choice=function_call,
312
300
  user=str(user_id),
@@ -335,7 +323,7 @@ def create(
335
323
  if isinstance(stream_interface, AgentChunkStreamingInterface):
336
324
  stream_interface.stream_end()
337
325
 
338
- if inner_thoughts_in_kwargs:
326
+ if llm_config.put_inner_thoughts_in_kwargs:
339
327
  response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
340
328
 
341
329
  return response
letta/llm_api/openai.py CHANGED
@@ -18,8 +18,13 @@ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_mes
18
18
  from letta.schemas.llm_config import LLMConfig
19
19
  from letta.schemas.message import Message as _Message
20
20
  from letta.schemas.message import MessageRole as _MessageRole
21
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
21
22
  from letta.schemas.openai.chat_completion_request import (
22
- ChatCompletionRequest,
23
+ FunctionCall as ToolFunctionChoiceFunctionCall,
24
+ )
25
+ from letta.schemas.openai.chat_completion_request import (
26
+ Tool,
27
+ ToolFunctionChoice,
23
28
  cast_message_to_subtype,
24
29
  )
25
30
  from letta.schemas.openai.chat_completion_response import (
@@ -100,15 +105,14 @@ def openai_get_model_list(
100
105
 
101
106
  def build_openai_chat_completions_request(
102
107
  llm_config: LLMConfig,
103
- messages: List[Message],
108
+ messages: List[_Message],
104
109
  user_id: Optional[str],
105
110
  functions: Optional[list],
106
- function_call: str,
111
+ function_call: Optional[str],
107
112
  use_tool_naming: bool,
108
- inner_thoughts_in_kwargs: bool,
109
113
  max_tokens: Optional[int],
110
114
  ) -> ChatCompletionRequest:
111
- if inner_thoughts_in_kwargs:
115
+ if llm_config.put_inner_thoughts_in_kwargs:
112
116
  functions = add_inner_thoughts_to_functions(
113
117
  functions=functions,
114
118
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -116,7 +120,7 @@ def build_openai_chat_completions_request(
116
120
  )
117
121
 
118
122
  openai_message_list = [
119
- cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
123
+ cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)) for m in messages
120
124
  ]
121
125
  if llm_config.model:
122
126
  model = llm_config.model
@@ -125,11 +129,17 @@ def build_openai_chat_completions_request(
125
129
  model = None
126
130
 
127
131
  if use_tool_naming:
132
+ if function_call is None:
133
+ tool_choice = None
134
+ elif function_call not in ["none", "auto", "required"]:
135
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
136
+ else:
137
+ tool_choice = function_call
128
138
  data = ChatCompletionRequest(
129
139
  model=model,
130
140
  messages=openai_message_list,
131
- tools=[{"type": "function", "function": f} for f in functions] if functions else None,
132
- tool_choice=function_call,
141
+ tools=[Tool(type="function", function=f) for f in functions] if functions else None,
142
+ tool_choice=tool_choice,
133
143
  user=str(user_id),
134
144
  max_tokens=max_tokens,
135
145
  )
@@ -188,7 +188,7 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
188
188
  try:
189
189
  # indent the function replies
190
190
  function_return_dict = json_loads(message["content"])
191
- function_return_str = json_dumps(function_return_dict, indent=self.json_indent)
191
+ function_return_str = json_dumps(function_return_dict, indent=0)
192
192
  except:
193
193
  function_return_str = message["content"]
194
194
 
@@ -183,7 +183,7 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
183
183
  try:
184
184
  # indent the function replies
185
185
  function_return_dict = json_loads(message["content"])
186
- function_return_str = json_dumps(function_return_dict, indent=self.json_indent)
186
+ function_return_str = json_dumps(function_return_dict, indent=0)
187
187
  except:
188
188
  function_return_str = message["content"]
189
189
 
letta/local_llm/utils.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import warnings
3
- from typing import List
3
+ from typing import List, Union
4
4
 
5
5
  import requests
6
6
  import tiktoken
@@ -11,6 +11,7 @@ import letta.local_llm.llm_chat_completion_wrappers.configurable_wrapper as conf
11
11
  import letta.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
12
12
  import letta.local_llm.llm_chat_completion_wrappers.llama3 as llama3
13
13
  import letta.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
14
+ from letta.schemas.openai.chat_completion_request import Tool, ToolCall
14
15
 
15
16
 
16
17
  def post_json_auth_request(uri, json_payload, auth_type, auth_key):
@@ -123,7 +124,7 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
123
124
  return num_tokens
124
125
 
125
126
 
126
- def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
127
+ def num_tokens_from_tool_calls(tool_calls: Union[List[dict], List[ToolCall]], model: str = "gpt-4"):
127
128
  """Based on above code (num_tokens_from_functions).
128
129
 
129
130
  Example to encode:
@@ -144,10 +145,25 @@ def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
144
145
 
145
146
  num_tokens = 0
146
147
  for tool_call in tool_calls:
147
- function_tokens = len(encoding.encode(tool_call["id"]))
148
- function_tokens += 2 + len(encoding.encode(tool_call["type"]))
149
- function_tokens += 2 + len(encoding.encode(tool_call["function"]["name"]))
150
- function_tokens += 2 + len(encoding.encode(tool_call["function"]["arguments"]))
148
+ if isinstance(tool_call, dict):
149
+ tool_call_id = tool_call["id"]
150
+ tool_call_type = tool_call["type"]
151
+ tool_call_function = tool_call["function"]
152
+ tool_call_function_name = tool_call_function["name"]
153
+ tool_call_function_arguments = tool_call_function["arguments"]
154
+ elif isinstance(tool_call, Tool):
155
+ tool_call_id = tool_call.id
156
+ tool_call_type = tool_call.type
157
+ tool_call_function = tool_call.function
158
+ tool_call_function_name = tool_call_function.name
159
+ tool_call_function_arguments = tool_call_function.arguments
160
+ else:
161
+ raise ValueError(f"Unknown tool call type: {type(tool_call)}")
162
+
163
+ function_tokens = len(encoding.encode(tool_call_id))
164
+ function_tokens += 2 + len(encoding.encode(tool_call_type))
165
+ function_tokens += 2 + len(encoding.encode(tool_call_function_name))
166
+ function_tokens += 2 + len(encoding.encode(tool_call_function_arguments))
151
167
 
152
168
  num_tokens += function_tokens
153
169
 
letta/main.py CHANGED
@@ -20,7 +20,6 @@ from letta.cli.cli_load import app as load_app
20
20
  from letta.config import LettaConfig
21
21
  from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
22
22
  from letta.metadata import MetadataStore
23
- from letta.schemas.enums import OptionState
24
23
 
25
24
  # from letta.interface import CLIInterface as interface # for printing to terminal
26
25
  from letta.streaming_interface import AgentRefreshStreamingInterface
@@ -64,7 +63,6 @@ def run_agent_loop(
64
63
  no_verify: bool = False,
65
64
  strip_ui: bool = False,
66
65
  stream: bool = False,
67
- inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
68
66
  ):
69
67
  if isinstance(letta_agent.interface, AgentRefreshStreamingInterface):
70
68
  # letta_agent.interface.toggle_streaming(on=stream)
@@ -369,7 +367,6 @@ def run_agent_loop(
369
367
  first_message=False,
370
368
  skip_verify=no_verify,
371
369
  stream=stream,
372
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
373
370
  ms=ms,
374
371
  )
375
372
  else:
@@ -378,7 +375,6 @@ def run_agent_loop(
378
375
  first_message=False,
379
376
  skip_verify=no_verify,
380
377
  stream=stream,
381
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
382
378
  ms=ms,
383
379
  )
384
380
  new_messages = step_response.messages