letta-nightly 0.6.13.dev20250122185528__py3-none-any.whl → 0.6.14.dev20250123104106__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. letta/__init__.py +2 -2
  2. letta/agent.py +69 -100
  3. letta/chat_only_agent.py +1 -1
  4. letta/client/client.py +153 -137
  5. letta/constants.py +1 -8
  6. letta/data_sources/connectors.py +1 -1
  7. letta/functions/helpers.py +29 -4
  8. letta/functions/schema_generator.py +55 -0
  9. letta/llm_api/helpers.py +51 -1
  10. letta/memory.py +9 -7
  11. letta/orm/agent.py +2 -2
  12. letta/orm/block.py +3 -1
  13. letta/orm/custom_columns.py +5 -4
  14. letta/orm/enums.py +1 -0
  15. letta/orm/message.py +2 -2
  16. letta/orm/sqlalchemy_base.py +5 -0
  17. letta/schemas/agent.py +3 -3
  18. letta/schemas/block.py +2 -2
  19. letta/schemas/environment_variables.py +1 -1
  20. letta/schemas/job.py +1 -1
  21. letta/schemas/letta_base.py +6 -0
  22. letta/schemas/letta_message.py +6 -6
  23. letta/schemas/memory.py +3 -2
  24. letta/schemas/message.py +21 -13
  25. letta/schemas/passage.py +1 -1
  26. letta/schemas/source.py +4 -4
  27. letta/schemas/tool.py +38 -43
  28. letta/server/rest_api/app.py +1 -16
  29. letta/server/rest_api/routers/v1/agents.py +101 -84
  30. letta/server/rest_api/routers/v1/blocks.py +8 -46
  31. letta/server/rest_api/routers/v1/jobs.py +4 -4
  32. letta/server/rest_api/routers/v1/providers.py +2 -2
  33. letta/server/rest_api/routers/v1/runs.py +6 -6
  34. letta/server/rest_api/routers/v1/sources.py +8 -38
  35. letta/server/rest_api/routers/v1/tags.py +1 -1
  36. letta/server/rest_api/routers/v1/tools.py +6 -7
  37. letta/server/server.py +3 -3
  38. letta/services/agent_manager.py +43 -9
  39. letta/services/block_manager.py +3 -3
  40. letta/services/job_manager.py +5 -3
  41. letta/services/organization_manager.py +1 -1
  42. letta/services/passage_manager.py +3 -3
  43. letta/services/provider_manager.py +2 -2
  44. letta/services/sandbox_config_manager.py +2 -2
  45. letta/services/source_manager.py +3 -3
  46. letta/services/tool_execution_sandbox.py +3 -1
  47. letta/services/tool_manager.py +8 -3
  48. letta/services/user_manager.py +2 -2
  49. letta/settings.py +29 -0
  50. letta/system.py +2 -2
  51. {letta_nightly-0.6.13.dev20250122185528.dist-info → letta_nightly-0.6.14.dev20250123104106.dist-info}/METADATA +1 -1
  52. {letta_nightly-0.6.13.dev20250122185528.dist-info → letta_nightly-0.6.14.dev20250123104106.dist-info}/RECORD +55 -61
  53. letta/server/rest_api/routers/openai/__init__.py +0 -0
  54. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  55. letta/server/rest_api/routers/openai/assistants/assistants.py +0 -115
  56. letta/server/rest_api/routers/openai/assistants/schemas.py +0 -115
  57. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  58. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -120
  59. {letta_nightly-0.6.13.dev20250122185528.dist-info → letta_nightly-0.6.14.dev20250123104106.dist-info}/LICENSE +0 -0
  60. {letta_nightly-0.6.13.dev20250122185528.dist-info → letta_nightly-0.6.14.dev20250123104106.dist-info}/WHEEL +0 -0
  61. {letta_nightly-0.6.13.dev20250122185528.dist-info → letta_nightly-0.6.14.dev20250123104106.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
- __version__ = "0.6.13"
1
+
2
+ __version__ = "0.6.14"
2
3
 
3
4
 
4
5
  # import clients
@@ -15,7 +16,6 @@ from letta.schemas.letta_message import LettaMessage
15
16
  from letta.schemas.llm_config import LLMConfig
16
17
  from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary
17
18
  from letta.schemas.message import Message
18
- from letta.schemas.openai.chat_completion_response import UsageStatistics
19
19
  from letta.schemas.organization import Organization
20
20
  from letta.schemas.passage import Passage
21
21
  from letta.schemas.source import Source
letta/agent.py CHANGED
@@ -5,6 +5,8 @@ import warnings
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import List, Optional, Tuple, Union
7
7
 
8
+ from openai.types.beta.function_tool import FunctionTool as OpenAITool
9
+
8
10
  from letta.constants import (
9
11
  CLI_WARNING_PREFIX,
10
12
  ERROR_MESSAGE_PREFIX,
@@ -13,9 +15,6 @@ from letta.constants import (
13
15
  LETTA_CORE_TOOL_MODULE_NAME,
14
16
  LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
15
17
  LLM_MAX_TOKENS,
16
- MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST,
17
- MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC,
18
- MESSAGE_SUMMARY_WARNING_FRAC,
19
18
  REQ_HEARTBEAT_MESSAGE,
20
19
  )
21
20
  from letta.errors import ContextWindowExceededError
@@ -23,7 +22,7 @@ from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_fun
23
22
  from letta.functions.functions import get_function_from_module
24
23
  from letta.helpers import ToolRulesSolver
25
24
  from letta.interface import AgentInterface
26
- from letta.llm_api.helpers import is_context_overflow_error
25
+ from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
27
26
  from letta.llm_api.llm_api_tools import create
28
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
29
28
  from letta.log import get_logger
@@ -36,7 +35,6 @@ from letta.schemas.embedding_config import EmbeddingConfig
36
35
  from letta.schemas.enums import MessageRole
37
36
  from letta.schemas.memory import ContextWindowOverview, Memory
38
37
  from letta.schemas.message import Message
39
- from letta.schemas.openai.chat_completion_request import Tool as ChatCompletionRequestTool
40
38
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
41
39
  from letta.schemas.openai.chat_completion_response import Message as ChatCompletionMessage
42
40
  from letta.schemas.openai.chat_completion_response import UsageStatistics
@@ -52,6 +50,7 @@ from letta.services.passage_manager import PassageManager
52
50
  from letta.services.provider_manager import ProviderManager
53
51
  from letta.services.step_manager import StepManager
54
52
  from letta.services.tool_execution_sandbox import ToolExecutionSandbox
53
+ from letta.settings import summarizer_settings
55
54
  from letta.streaming_interface import StreamingRefreshCLIInterface
56
55
  from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
57
56
  from letta.utils import (
@@ -66,6 +65,8 @@ from letta.utils import (
66
65
  validate_function_response,
67
66
  )
68
67
 
68
+ logger = get_logger(__name__)
69
+
69
70
 
70
71
  class BaseAgent(ABC):
71
72
  """
@@ -635,7 +636,7 @@ class Agent(BaseAgent):
635
636
  self.logger.info(f"Hit max chaining steps, stopping after {counter} steps")
636
637
  break
637
638
  # Chain handlers
638
- elif token_warning:
639
+ elif token_warning and summarizer_settings.send_memory_warning_message:
639
640
  assert self.agent_state.created_by_id is not None
640
641
  next_input_message = Message.dict_to_message(
641
642
  agent_id=self.agent_state.id,
@@ -686,6 +687,7 @@ class Agent(BaseAgent):
686
687
  stream: bool = False, # TODO move to config?
687
688
  step_count: Optional[int] = None,
688
689
  metadata: Optional[dict] = None,
690
+ summarize_attempt_count: int = 0,
689
691
  ) -> AgentStepResponse:
690
692
  """Runs a single step in the agent loop (generates at most one LLM call)"""
691
693
 
@@ -753,9 +755,9 @@ class Agent(BaseAgent):
753
755
  LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
754
756
  )
755
757
 
756
- if current_total_tokens > MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window):
757
- self.logger.warning(
758
- f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window)}"
758
+ if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window):
759
+ printd(
760
+ f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
759
761
  )
760
762
 
761
763
  # Only deliver the alert if we haven't already (this period)
@@ -764,8 +766,8 @@ class Agent(BaseAgent):
764
766
  self.agent_alerted_about_memory_pressure = True # it's up to the outer loop to handle this
765
767
 
766
768
  else:
767
- self.logger.warning(
768
- f"last response total_tokens ({current_total_tokens}) < {MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window)}"
769
+ printd(
770
+ f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
769
771
  )
770
772
 
771
773
  # Log step - this must happen before messages are persisted
@@ -807,28 +809,46 @@ class Agent(BaseAgent):
807
809
  )
808
810
 
809
811
  except Exception as e:
810
- self.logger.error(f"step() failed\nmessages = {messages}\nerror = {e}")
812
+ logger.error(f"step() failed\nmessages = {messages}\nerror = {e}")
811
813
 
812
814
  # If we got a context alert, try trimming the messages length, then try again
813
815
  if is_context_overflow_error(e):
814
- self.logger.warning(
815
- f"context window exceeded with limit {self.agent_state.llm_config.context_window}, running summarizer to trim messages"
816
- )
817
- # A separate API call to run a summarizer
818
- self.summarize_messages_inplace()
816
+ in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
819
817
 
820
- # Try step again
821
- return self.inner_step(
822
- messages=messages,
823
- first_message=first_message,
824
- first_message_retry_limit=first_message_retry_limit,
825
- skip_verify=skip_verify,
826
- stream=stream,
827
- metadata=metadata,
828
- )
818
+ if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
819
+ logger.warning(
820
+ f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
821
+ )
822
+ # A separate API call to run a summarizer
823
+ self.summarize_messages_inplace()
824
+
825
+ # Try step again
826
+ return self.inner_step(
827
+ messages=messages,
828
+ first_message=first_message,
829
+ first_message_retry_limit=first_message_retry_limit,
830
+ skip_verify=skip_verify,
831
+ stream=stream,
832
+ metadata=metadata,
833
+ summarize_attempt_count=summarize_attempt_count + 1,
834
+ )
835
+ else:
836
+ err_msg = f"Ran summarizer {summarize_attempt_count - 1} times for agent id={self.agent_state.id}, but messages are still overflowing the context window."
837
+ token_counts = (get_token_counts_for_messages(in_context_messages),)
838
+ logger.error(err_msg)
839
+ logger.error(f"num_in_context_messages: {len(self.agent_state.message_ids)}")
840
+ logger.error(f"token_counts: {token_counts}")
841
+ raise ContextWindowExceededError(
842
+ err_msg,
843
+ details={
844
+ "num_in_context_messages": len(self.agent_state.message_ids),
845
+ "in_context_messages_text": [m.text for m in in_context_messages],
846
+ "token_counts": token_counts,
847
+ },
848
+ )
829
849
 
830
850
  else:
831
- self.logger.error(f"step() failed with an unrecognized exception: '{str(e)}'")
851
+ logger.error(f"step() failed with an unrecognized exception: '{str(e)}'")
832
852
  raise e
833
853
 
834
854
  def step_user_message(self, user_message_str: str, **kwargs) -> AgentStepResponse:
@@ -865,109 +885,54 @@ class Agent(BaseAgent):
865
885
 
866
886
  return self.inner_step(messages=[user_message], **kwargs)
867
887
 
868
- def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True, disallow_tool_as_first=True):
888
+ def summarize_messages_inplace(self):
869
889
  in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
870
890
  in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
891
+ in_context_messages_openai_no_system = in_context_messages_openai[1:]
892
+ token_counts = get_token_counts_for_messages(in_context_messages)
893
+ logger.info(f"System message token count={token_counts[0]}")
894
+ logger.info(f"token_counts_no_system={token_counts[1:]}")
871
895
 
872
896
  if in_context_messages_openai[0]["role"] != "system":
873
897
  raise RuntimeError(f"in_context_messages_openai[0] should be system (instead got {in_context_messages_openai[0]})")
874
898
 
875
- # Start at index 1 (past the system message),
876
- # and collect messages for summarization until we reach the desired truncation token fraction (eg 50%)
877
- # Do not allow truncation of the last N messages, since these are needed for in-context examples of function calling
878
- token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai]
879
- message_buffer_token_count = sum(token_counts[1:]) # no system message
880
- desired_token_count_to_summarize = int(message_buffer_token_count * MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC)
881
- candidate_messages_to_summarize = in_context_messages_openai[1:]
882
- token_counts = token_counts[1:]
883
-
884
- if preserve_last_N_messages:
885
- candidate_messages_to_summarize = candidate_messages_to_summarize[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST]
886
- token_counts = token_counts[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST]
887
-
888
- printd(f"MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC={MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC}")
889
- printd(f"MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST={MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST}")
890
- printd(f"token_counts={token_counts}")
891
- printd(f"message_buffer_token_count={message_buffer_token_count}")
892
- printd(f"desired_token_count_to_summarize={desired_token_count_to_summarize}")
893
- printd(f"len(candidate_messages_to_summarize)={len(candidate_messages_to_summarize)}")
894
-
895
899
  # If at this point there's nothing to summarize, throw an error
896
- if len(candidate_messages_to_summarize) == 0:
900
+ if len(in_context_messages_openai_no_system) == 0:
897
901
  raise ContextWindowExceededError(
898
902
  "Not enough messages to compress for summarization",
899
903
  details={
900
- "num_candidate_messages": len(candidate_messages_to_summarize),
904
+ "num_candidate_messages": len(in_context_messages_openai_no_system),
901
905
  "num_total_messages": len(in_context_messages_openai),
902
- "preserve_N": MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST,
903
906
  },
904
907
  )
905
908
 
906
- # Walk down the message buffer (front-to-back) until we hit the target token count
907
- tokens_so_far = 0
908
- cutoff = 0
909
- for i, msg in enumerate(candidate_messages_to_summarize):
910
- cutoff = i
911
- tokens_so_far += token_counts[i]
912
- if tokens_so_far > desired_token_count_to_summarize:
913
- break
914
- # Account for system message
915
- cutoff += 1
916
-
917
- # Try to make an assistant message come after the cutoff
918
- try:
919
- printd(f"Selected cutoff {cutoff} was a 'user', shifting one...")
920
- if in_context_messages_openai[cutoff]["role"] == "user":
921
- new_cutoff = cutoff + 1
922
- if in_context_messages_openai[new_cutoff]["role"] == "user":
923
- printd(f"Shifted cutoff {new_cutoff} is still a 'user', ignoring...")
924
- cutoff = new_cutoff
925
- except IndexError:
926
- pass
927
-
928
- # Make sure the cutoff isn't on a 'tool' or 'function'
929
- if disallow_tool_as_first:
930
- while in_context_messages_openai[cutoff]["role"] in ["tool", "function"] and cutoff < len(in_context_messages_openai):
931
- printd(f"Selected cutoff {cutoff} was a 'tool', shifting one...")
932
- cutoff += 1
933
-
909
+ cutoff = calculate_summarizer_cutoff(in_context_messages=in_context_messages, token_counts=token_counts, logger=logger)
934
910
  message_sequence_to_summarize = in_context_messages[1:cutoff] # do NOT get rid of the system message
935
- if len(message_sequence_to_summarize) <= 1:
936
- # This prevents a potential infinite loop of summarizing the same message over and over
937
- raise ContextWindowExceededError(
938
- "Not enough messages to compress for summarization after determining cutoff",
939
- details={
940
- "num_candidate_messages": len(message_sequence_to_summarize),
941
- "num_total_messages": len(in_context_messages_openai),
942
- "preserve_N": MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST,
943
- },
944
- )
945
- else:
946
- printd(f"Attempting to summarize {len(message_sequence_to_summarize)} messages [1:{cutoff}] of {len(in_context_messages)}")
911
+ logger.info(f"Attempting to summarize {len(message_sequence_to_summarize)} messages of {len(in_context_messages)}")
947
912
 
948
913
  # We can't do summarize logic properly if context_window is undefined
949
914
  if self.agent_state.llm_config.context_window is None:
950
915
  # Fallback if for some reason context_window is missing, just set to the default
951
- print(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
952
- print(f"{self.agent_state}")
916
+ logger.warning(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
953
917
  self.agent_state.llm_config.context_window = (
954
918
  LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
955
919
  )
956
920
 
957
921
  summary = summarize_messages(agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize)
958
- printd(f"Got summary: {summary}")
922
+ logger.info(f"Got summary: {summary}")
959
923
 
960
924
  # Metadata that's useful for the agent to see
961
925
  all_time_message_count = self.message_manager.size(agent_id=self.agent_state.id, actor=self.user)
962
- remaining_message_count = len(in_context_messages_openai[cutoff:])
926
+ remaining_message_count = 1 + len(in_context_messages) - cutoff # System + remaining
963
927
  hidden_message_count = all_time_message_count - remaining_message_count
964
928
  summary_message_count = len(message_sequence_to_summarize)
965
929
  summary_message = package_summarize_message(summary, summary_message_count, hidden_message_count, all_time_message_count)
966
- printd(f"Packaged into message: {summary_message}")
930
+ logger.info(f"Packaged into message: {summary_message}")
967
931
 
968
932
  prior_len = len(in_context_messages_openai)
969
- self.agent_state = self.agent_manager.trim_older_in_context_messages(cutoff, agent_id=self.agent_state.id, actor=self.user)
933
+ self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(agent_id=self.agent_state.id, actor=self.user)
970
934
  packed_summary_message = {"role": "user", "content": summary_message}
935
+ # Prepend the summary
971
936
  self.agent_state = self.agent_manager.prepend_to_in_context_messages(
972
937
  messages=[
973
938
  Message.dict_to_message(
@@ -983,8 +948,12 @@ class Agent(BaseAgent):
983
948
 
984
949
  # reset alert
985
950
  self.agent_alerted_about_memory_pressure = False
951
+ curr_in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
986
952
 
987
- printd(f"Ran summarizer, messages length {prior_len} -> {len(in_context_messages_openai)}")
953
+ logger.info(f"Ran summarizer, messages length {prior_len} -> {len(curr_in_context_messages)}")
954
+ logger.info(
955
+ f"Summarizer brought down total token count from {sum(token_counts)} -> {sum(get_token_counts_for_messages(curr_in_context_messages))}"
956
+ )
988
957
 
989
958
  def add_function(self, function_name: str) -> str:
990
959
  # TODO: refactor
@@ -1055,7 +1024,7 @@ class Agent(BaseAgent):
1055
1024
  # tokens taken up by function definitions
1056
1025
  agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
1057
1026
  if agent_state_tool_jsons:
1058
- available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in agent_state_tool_jsons]
1027
+ available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
1059
1028
  num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
1060
1029
  else:
1061
1030
  available_functions_definitions = []
@@ -1122,7 +1091,7 @@ def save_agent(agent: Agent):
1122
1091
  embedding_config=agent_state.embedding_config,
1123
1092
  message_ids=agent_state.message_ids,
1124
1093
  description=agent_state.description,
1125
- metadata_=agent_state.metadata_,
1094
+ metadata=agent_state.metadata,
1126
1095
  # TODO: Add this back in later
1127
1096
  # tool_exec_environment_variables=agent_state.get_agent_env_vars_as_dict(),
1128
1097
  )
letta/chat_only_agent.py CHANGED
@@ -87,7 +87,7 @@ class ChatOnlyAgent(Agent):
87
87
  memory=offline_memory,
88
88
  llm_config=LLMConfig.default_config("gpt-4"),
89
89
  embedding_config=EmbeddingConfig.default_config("text-embedding-ada-002"),
90
- tool_ids=self.agent_state.metadata_.get("offline_memory_tools", []),
90
+ tool_ids=self.agent_state.metadata.get("offline_memory_tools", []),
91
91
  include_base_tools=False,
92
92
  )
93
93
  self.offline_memory_agent.memory.update_block_value(label="conversation_block", value=recent_convo)