letta-nightly 0.13.0.dev20251030104218__py3-none-any.whl → 0.13.1.dev20251031234110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (101) hide show
  1. letta/__init__.py +1 -1
  2. letta/adapters/simple_llm_stream_adapter.py +1 -0
  3. letta/agents/letta_agent_v2.py +8 -0
  4. letta/agents/letta_agent_v3.py +120 -27
  5. letta/agents/temporal/activities/__init__.py +25 -0
  6. letta/agents/temporal/activities/create_messages.py +26 -0
  7. letta/agents/temporal/activities/create_step.py +57 -0
  8. letta/agents/temporal/activities/example_activity.py +9 -0
  9. letta/agents/temporal/activities/execute_tool.py +130 -0
  10. letta/agents/temporal/activities/llm_request.py +114 -0
  11. letta/agents/temporal/activities/prepare_messages.py +27 -0
  12. letta/agents/temporal/activities/refresh_context.py +160 -0
  13. letta/agents/temporal/activities/summarize_conversation_history.py +77 -0
  14. letta/agents/temporal/activities/update_message_ids.py +25 -0
  15. letta/agents/temporal/activities/update_run.py +43 -0
  16. letta/agents/temporal/constants.py +59 -0
  17. letta/agents/temporal/temporal_agent_workflow.py +704 -0
  18. letta/agents/temporal/types.py +275 -0
  19. letta/constants.py +8 -0
  20. letta/errors.py +4 -0
  21. letta/functions/function_sets/base.py +0 -11
  22. letta/groups/helpers.py +7 -1
  23. letta/groups/sleeptime_multi_agent_v4.py +4 -3
  24. letta/interfaces/anthropic_streaming_interface.py +0 -1
  25. letta/interfaces/openai_streaming_interface.py +103 -100
  26. letta/llm_api/anthropic_client.py +57 -12
  27. letta/llm_api/bedrock_client.py +1 -0
  28. letta/llm_api/deepseek_client.py +3 -2
  29. letta/llm_api/google_vertex_client.py +1 -0
  30. letta/llm_api/groq_client.py +1 -0
  31. letta/llm_api/llm_client_base.py +15 -1
  32. letta/llm_api/openai.py +2 -2
  33. letta/llm_api/openai_client.py +17 -3
  34. letta/llm_api/xai_client.py +1 -0
  35. letta/orm/organization.py +4 -0
  36. letta/orm/sqlalchemy_base.py +7 -0
  37. letta/otel/tracing.py +131 -4
  38. letta/schemas/agent_file.py +10 -10
  39. letta/schemas/block.py +22 -3
  40. letta/schemas/enums.py +21 -0
  41. letta/schemas/environment_variables.py +3 -2
  42. letta/schemas/group.py +3 -3
  43. letta/schemas/letta_response.py +36 -4
  44. letta/schemas/llm_batch_job.py +3 -3
  45. letta/schemas/llm_config.py +27 -3
  46. letta/schemas/mcp.py +3 -2
  47. letta/schemas/mcp_server.py +3 -2
  48. letta/schemas/message.py +167 -49
  49. letta/schemas/organization.py +2 -1
  50. letta/schemas/passage.py +2 -1
  51. letta/schemas/provider_trace.py +2 -1
  52. letta/schemas/providers/openrouter.py +1 -2
  53. letta/schemas/run_metrics.py +2 -1
  54. letta/schemas/sandbox_config.py +3 -1
  55. letta/schemas/step_metrics.py +2 -1
  56. letta/schemas/tool_rule.py +2 -2
  57. letta/schemas/user.py +2 -1
  58. letta/server/rest_api/app.py +5 -1
  59. letta/server/rest_api/routers/v1/__init__.py +4 -0
  60. letta/server/rest_api/routers/v1/agents.py +71 -9
  61. letta/server/rest_api/routers/v1/blocks.py +7 -7
  62. letta/server/rest_api/routers/v1/groups.py +40 -0
  63. letta/server/rest_api/routers/v1/identities.py +2 -2
  64. letta/server/rest_api/routers/v1/internal_agents.py +31 -0
  65. letta/server/rest_api/routers/v1/internal_blocks.py +177 -0
  66. letta/server/rest_api/routers/v1/internal_runs.py +25 -1
  67. letta/server/rest_api/routers/v1/runs.py +2 -22
  68. letta/server/rest_api/routers/v1/tools.py +10 -0
  69. letta/server/server.py +5 -2
  70. letta/services/agent_manager.py +4 -4
  71. letta/services/archive_manager.py +16 -0
  72. letta/services/group_manager.py +44 -0
  73. letta/services/helpers/run_manager_helper.py +2 -2
  74. letta/services/lettuce/lettuce_client.py +148 -0
  75. letta/services/mcp/base_client.py +9 -3
  76. letta/services/run_manager.py +148 -37
  77. letta/services/source_manager.py +91 -3
  78. letta/services/step_manager.py +2 -3
  79. letta/services/streaming_service.py +52 -13
  80. letta/services/summarizer/summarizer.py +28 -2
  81. letta/services/tool_executor/builtin_tool_executor.py +1 -1
  82. letta/services/tool_executor/core_tool_executor.py +2 -117
  83. letta/services/tool_schema_generator.py +2 -2
  84. letta/validators.py +21 -0
  85. {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/METADATA +1 -1
  86. {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/RECORD +89 -84
  87. letta/agent.py +0 -1758
  88. letta/cli/cli_load.py +0 -16
  89. letta/client/__init__.py +0 -0
  90. letta/client/streaming.py +0 -95
  91. letta/client/utils.py +0 -78
  92. letta/functions/async_composio_toolset.py +0 -109
  93. letta/functions/composio_helpers.py +0 -96
  94. letta/helpers/composio_helpers.py +0 -38
  95. letta/orm/job_messages.py +0 -33
  96. letta/schemas/providers.py +0 -1617
  97. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -132
  98. letta/services/tool_executor/composio_tool_executor.py +0 -57
  99. {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/WHEEL +0 -0
  100. {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/entry_points.txt +0 -0
  101. {letta_nightly-0.13.0.dev20251030104218.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/licenses/LICENSE +0 -0
letta/agent.py DELETED
@@ -1,1758 +0,0 @@
1
- import asyncio
2
- import json
3
- import time
4
- import traceback
5
- import warnings
6
- from abc import ABC, abstractmethod
7
- from typing import Dict, List, Optional, Tuple, Union
8
-
9
- from openai.types.beta.function_tool import FunctionTool as OpenAITool
10
-
11
- from letta.agents.helpers import generate_step_id
12
- from letta.constants import (
13
- CLI_WARNING_PREFIX,
14
- COMPOSIO_ENTITY_ENV_VAR_KEY,
15
- ERROR_MESSAGE_PREFIX,
16
- FIRST_MESSAGE_ATTEMPTS,
17
- FUNC_FAILED_HEARTBEAT_MESSAGE,
18
- LETTA_CORE_TOOL_MODULE_NAME,
19
- LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
20
- LLM_MAX_TOKENS,
21
- READ_ONLY_BLOCK_EDIT_ERROR,
22
- REQ_HEARTBEAT_MESSAGE,
23
- SEND_MESSAGE_TOOL_NAME,
24
- )
25
- from letta.errors import ContextWindowExceededError
26
- from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source
27
- from letta.functions.composio_helpers import execute_composio_action, generate_composio_action_from_func_name
28
- from letta.functions.functions import get_function_from_module
29
- from letta.helpers import ToolRulesSolver
30
- from letta.helpers.composio_helpers import get_composio_api_key
31
- from letta.helpers.datetime_helpers import get_utc_time
32
- from letta.helpers.json_helpers import json_dumps, json_loads
33
- from letta.helpers.message_helper import convert_message_creates_to_messages
34
- from letta.interface import AgentInterface
35
- from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
36
- from letta.llm_api.llm_api_tools import create
37
- from letta.llm_api.llm_client import LLMClient
38
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG
39
- from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
40
- from letta.log import get_logger
41
- from letta.memory import summarize_messages
42
- from letta.orm import User
43
- from letta.otel.tracing import log_event, trace_method
44
- from letta.prompts.prompt_generator import PromptGenerator
45
- from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent
46
- from letta.schemas.block import BlockUpdate
47
- from letta.schemas.embedding_config import EmbeddingConfig
48
- from letta.schemas.enums import MessageRole, ProviderType, StepStatus, ToolType
49
- from letta.schemas.letta_message_content import ImageContent, TextContent
50
- from letta.schemas.memory import ContextWindowOverview, Memory
51
- from letta.schemas.message import Message, MessageCreate, ToolReturn
52
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Message as ChatCompletionMessage, UsageStatistics
53
- from letta.schemas.response_format import ResponseFormatType
54
- from letta.schemas.tool import Tool
55
- from letta.schemas.tool_execution_result import ToolExecutionResult
56
- from letta.schemas.tool_rule import TerminalToolRule
57
- from letta.schemas.usage import LettaUsageStatistics
58
- from letta.services.agent_manager import AgentManager
59
- from letta.services.block_manager import BlockManager
60
- from letta.services.helpers.agent_manager_helper import check_supports_structured_output
61
- from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
62
- from letta.services.job_manager import JobManager
63
- from letta.services.mcp.base_client import AsyncBaseMCPClient
64
- from letta.services.message_manager import MessageManager
65
- from letta.services.passage_manager import PassageManager
66
- from letta.services.provider_manager import ProviderManager
67
- from letta.services.step_manager import StepManager
68
- from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
69
- from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
70
- from letta.services.tool_manager import ToolManager
71
- from letta.settings import model_settings, settings, summarizer_settings
72
- from letta.streaming_interface import StreamingRefreshCLIInterface
73
- from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
74
- from letta.utils import count_tokens, get_friendly_error_msg, get_tool_call_id, log_telemetry, parse_json, validate_function_response
75
-
76
- logger = get_logger(__name__)
77
-
78
-
79
- class BaseAgent(ABC):
80
- """
81
- Abstract class for all agents.
82
- Only one interface is required: step.
83
- """
84
-
85
- @abstractmethod
86
- def step(
87
- self,
88
- input_messages: List[MessageCreate],
89
- ) -> LettaUsageStatistics:
90
- """
91
- Top-level event message handler for the agent.
92
- """
93
- raise NotImplementedError
94
-
95
-
96
- class Agent(BaseAgent):
97
- def __init__(
98
- self,
99
- interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
100
- agent_state: AgentState, # in-memory representation of the agent state (read from multiple tables)
101
- user: User,
102
- # extras
103
- first_message_verify_mono: bool = True, # TODO move to config?
104
- # MCP sessions, state held in-memory in the server
105
- mcp_clients: Optional[Dict[str, AsyncBaseMCPClient]] = None,
106
- save_last_response: bool = False,
107
- ):
108
- assert isinstance(agent_state.memory, Memory), f"Memory object is not of type Memory: {type(agent_state.memory)}"
109
- # Hold a copy of the state that was used to init the agent
110
- self.agent_state = agent_state
111
- assert isinstance(self.agent_state.memory, Memory), f"Memory object is not of type Memory: {type(self.agent_state.memory)}"
112
-
113
- self.user = user
114
-
115
- # initialize a tool rules solver
116
- self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
117
-
118
- # gpt-4, gpt-3.5-turbo, ...
119
- self.model = self.agent_state.llm_config.model
120
- self.supports_structured_output = check_supports_structured_output(model=self.model, tool_rules=agent_state.tool_rules)
121
-
122
- # if there are tool rules, print out a warning
123
- if not self.supports_structured_output and agent_state.tool_rules:
124
- for rule in agent_state.tool_rules:
125
- if not isinstance(rule, TerminalToolRule):
126
- warnings.warn("Tool rules only work reliably for model backends that support structured outputs (e.g. OpenAI gpt-4o).")
127
- break
128
-
129
- # state managers
130
- self.block_manager = BlockManager()
131
-
132
- # Interface must implement:
133
- # - internal_monologue
134
- # - assistant_message
135
- # - function_message
136
- # ...
137
- # Different interfaces can handle events differently
138
- # e.g., print in CLI vs send a discord message with a discord bot
139
- self.interface = interface
140
-
141
- # Create the persistence manager object based on the AgentState info
142
- self.message_manager = MessageManager()
143
- self.passage_manager = PassageManager()
144
- self.provider_manager = ProviderManager()
145
- self.agent_manager = AgentManager()
146
- self.job_manager = JobManager()
147
- self.step_manager = StepManager()
148
- self.telemetry_manager = TelemetryManager() if settings.llm_api_logging else NoopTelemetryManager()
149
-
150
- # State needed for heartbeat pausing
151
-
152
- self.first_message_verify_mono = first_message_verify_mono
153
-
154
- # Controls if the convo memory pressure warning is triggered
155
- # When an alert is sent in the message queue, set this to True (to avoid repeat alerts)
156
- # When the summarizer is run, set this back to False (to reset)
157
- self.agent_alerted_about_memory_pressure = False
158
-
159
- # Load last function response from message history
160
- self.last_function_response = self.load_last_function_response()
161
-
162
- # Save last responses in memory
163
- self.save_last_response = save_last_response
164
- self.last_response_messages = []
165
-
166
- # Logger that the Agent specifically can use, will also report the agent_state ID with the logs
167
- self.logger = get_logger(agent_state.id)
168
-
169
- # MCPClient, state/sessions managed by the server
170
- # TODO: This is temporary, as a bridge
171
- self.mcp_clients = None
172
- # TODO: no longer supported
173
- # if mcp_clients:
174
- # self.mcp_clients = {client_id: client.to_sync_client() for client_id, client in mcp_clients.items()}
175
-
176
- def load_last_function_response(self):
177
- """Load the last function response from message history"""
178
- in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
179
- for i in range(len(in_context_messages) - 1, -1, -1):
180
- msg = in_context_messages[i]
181
- if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
182
- text_content = msg.content[0].text
183
- try:
184
- response_json = json.loads(text_content)
185
- if response_json.get("message"):
186
- return response_json["message"]
187
- except (json.JSONDecodeError, KeyError):
188
- raise ValueError(f"Invalid JSON format in message: {text_content}")
189
- return None
190
-
191
- def ensure_read_only_block_not_modified(self, new_memory: Memory) -> None:
192
- """
193
- Throw an error if a read-only block has been modified
194
- """
195
- for label in self.agent_state.memory.list_block_labels():
196
- if self.agent_state.memory.get_block(label).read_only:
197
- if new_memory.get_block(label).value != self.agent_state.memory.get_block(label).value:
198
- raise ValueError(READ_ONLY_BLOCK_EDIT_ERROR)
199
-
200
- def update_memory_if_changed(self, new_memory: Memory) -> bool:
201
- """
202
- Update internal memory object and system prompt if there have been modifications.
203
-
204
- Args:
205
- new_memory (Memory): the new memory object to compare to the current memory object
206
-
207
- Returns:
208
- modified (bool): whether the memory was updated
209
- """
210
- system_message = self.message_manager.get_message_by_id(message_id=self.agent_state.message_ids[0], actor=self.user)
211
- if new_memory.compile() not in system_message.content[0].text:
212
- # update the blocks (LRW) in the DB
213
- for label in self.agent_state.memory.list_block_labels():
214
- updated_value = new_memory.get_block(label).value
215
- if updated_value != self.agent_state.memory.get_block(label).value:
216
- # update the block if it's changed
217
- block_id = self.agent_state.memory.get_block(label).id
218
- self.block_manager.update_block(block_id=block_id, block_update=BlockUpdate(value=updated_value), actor=self.user)
219
-
220
- # refresh memory from DB (using block ids)
221
- self.agent_state.memory = Memory(
222
- blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
223
- file_blocks=self.agent_state.memory.file_blocks,
224
- agent_type=self.agent_state.agent_type,
225
- )
226
-
227
- # NOTE: don't do this since re-buildin the memory is handled at the start of the step
228
- # rebuild memory - this records the last edited timestamp of the memory
229
- # TODO: pass in update timestamp from block edit time
230
- self.agent_state = self.agent_manager.rebuild_system_prompt(agent_id=self.agent_state.id, actor=self.user)
231
-
232
- return True
233
-
234
- return False
235
-
236
- def _handle_function_error_response(
237
- self,
238
- error_msg: str,
239
- tool_call_id: str,
240
- function_name: str,
241
- function_args: dict,
242
- function_response: str,
243
- messages: List[Message],
244
- tool_returns: Optional[List[ToolReturn]] = None,
245
- include_function_failed_message: bool = False,
246
- group_id: Optional[str] = None,
247
- ) -> List[Message]:
248
- """
249
- Handle error from function call response
250
- """
251
- # Update tool rules
252
- self.last_function_response = function_response
253
- self.tool_rules_solver.register_tool_call(function_name)
254
-
255
- # Extend conversation with function response
256
- function_response = package_function_response(False, error_msg, self.agent_state.timezone)
257
- new_message = Message(
258
- agent_id=self.agent_state.id,
259
- # Base info OpenAI-style
260
- model=self.model,
261
- role="tool",
262
- name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
263
- content=[TextContent(text=function_response)],
264
- tool_call_id=tool_call_id,
265
- # Letta extras
266
- tool_returns=tool_returns,
267
- group_id=group_id,
268
- )
269
- messages.append(new_message)
270
- self.interface.function_message(f"Error: {error_msg}", msg_obj=new_message, chunk_index=0)
271
- if include_function_failed_message:
272
- self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=new_message)
273
-
274
- # Return updated messages
275
- return messages
276
-
277
- def _runtime_override_tool_json_schema(
278
- self,
279
- functions_list: List[Dict | None],
280
- ) -> List[Dict | None]:
281
- """Override the tool JSON schema at runtime for a particular tool if conditions are met."""
282
-
283
- # Currently just injects `send_message` with a `response_format` if provided to the agent.
284
- if self.agent_state.response_format and self.agent_state.response_format.type != ResponseFormatType.text:
285
- for func in functions_list:
286
- if func["name"] == SEND_MESSAGE_TOOL_NAME:
287
- if self.agent_state.response_format.type == ResponseFormatType.json_schema:
288
- func["parameters"]["properties"]["message"] = self.agent_state.response_format.json_schema["schema"]
289
- if self.agent_state.response_format.type == ResponseFormatType.json_object:
290
- func["parameters"]["properties"]["message"] = {
291
- "type": "object",
292
- "description": "Message contents. All unicode (including emojis) are supported.",
293
- "additionalProperties": True,
294
- "properties": {},
295
- }
296
- break
297
- return functions_list
298
-
299
- @trace_method
300
- def _get_ai_reply(
301
- self,
302
- message_sequence: List[Message],
303
- function_call: Optional[str] = None,
304
- first_message: bool = False,
305
- stream: bool = False, # TODO move to config?
306
- empty_response_retry_limit: int = 3,
307
- backoff_factor: float = 0.5, # delay multiplier for exponential backoff
308
- max_delay: float = 10.0, # max delay between retries
309
- step_count: Optional[int] = None,
310
- last_function_failed: bool = False,
311
- put_inner_thoughts_first: bool = True,
312
- step_id: Optional[str] = None,
313
- ) -> ChatCompletionResponse | None:
314
- """Get response from LLM API with robust retry mechanism."""
315
- log_telemetry(self.logger, "_get_ai_reply start")
316
- available_tools = set([t.name for t in self.agent_state.tools])
317
- agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
318
-
319
- # Get allowed tools or allow all if none are allowed
320
- allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(
321
- available_tools=available_tools, last_function_response=self.last_function_response
322
- ) or list(available_tools)
323
-
324
- # Don't allow a tool to be called if it failed last time
325
- if last_function_failed and self.tool_rules_solver.tool_call_history:
326
- allowed_tool_names = [f for f in allowed_tool_names if f != self.tool_rules_solver.tool_call_history[-1]]
327
- if not allowed_tool_names:
328
- return None
329
-
330
- allowed_functions = [func for func in agent_state_tool_jsons if func["name"] in allowed_tool_names]
331
- # Extract terminal tool names from tool rules
332
- terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
333
- allowed_functions = runtime_override_tool_json_schema(
334
- tool_list=allowed_functions,
335
- response_format=self.agent_state.response_format,
336
- request_heartbeat=True,
337
- terminal_tools=terminal_tool_names,
338
- )
339
-
340
- # For the first message, force the initial tool if one is specified
341
- force_tool_call = None
342
- if (
343
- step_count is not None
344
- and step_count == 0
345
- and not self.supports_structured_output
346
- and len(self.tool_rules_solver.init_tool_rules) > 0
347
- ):
348
- # TODO: This just seems wrong? What if there are more than 1 init tool rules?
349
- force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name
350
- # Force a tool call if exactly one tool is specified
351
- elif step_count is not None and step_count > 0 and len(allowed_tool_names) == 1:
352
- force_tool_call = allowed_tool_names[0]
353
-
354
- for attempt in range(1, empty_response_retry_limit + 1):
355
- try:
356
- log_telemetry(self.logger, "_get_ai_reply create start")
357
- # New LLM client flow
358
- llm_client = LLMClient.create(
359
- provider_type=self.agent_state.llm_config.model_endpoint_type,
360
- put_inner_thoughts_first=put_inner_thoughts_first,
361
- actor=self.user,
362
- )
363
-
364
- if llm_client and not stream:
365
- response = llm_client.send_llm_request(
366
- messages=message_sequence,
367
- llm_config=self.agent_state.llm_config,
368
- tools=allowed_functions,
369
- force_tool_call=force_tool_call,
370
- telemetry_manager=self.telemetry_manager,
371
- step_id=step_id,
372
- )
373
- else:
374
- # Fallback to existing flow
375
- for message in message_sequence:
376
- if isinstance(message.content, list):
377
-
378
- def get_fallback_text_content(content):
379
- if isinstance(content, ImageContent):
380
- return TextContent(text="[Image Here]")
381
- return content
382
-
383
- message.content = [get_fallback_text_content(content) for content in message.content]
384
-
385
- response = create(
386
- llm_config=self.agent_state.llm_config,
387
- messages=message_sequence,
388
- user_id=self.agent_state.created_by_id,
389
- functions=allowed_functions,
390
- # functions_python=self.functions_python, do we need this?
391
- function_call=function_call,
392
- first_message=first_message,
393
- force_tool_call=force_tool_call,
394
- stream=stream,
395
- stream_interface=self.interface,
396
- put_inner_thoughts_first=put_inner_thoughts_first,
397
- name=self.agent_state.name,
398
- telemetry_manager=self.telemetry_manager,
399
- step_id=step_id,
400
- actor=self.user,
401
- )
402
- log_telemetry(self.logger, "_get_ai_reply create finish")
403
-
404
- # These bottom two are retryable
405
- if len(response.choices) == 0 or response.choices[0] is None:
406
- raise ValueError(f"API call returned an empty message: {response}")
407
-
408
- if response.choices[0].finish_reason not in ["stop", "function_call", "tool_calls"]:
409
- if response.choices[0].finish_reason == "length":
410
- # This is not retryable, hence RuntimeError v.s. ValueError
411
- raise RuntimeError("Finish reason was length (maximum context length)")
412
- else:
413
- raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
414
- log_telemetry(self.logger, "_handle_ai_response finish")
415
-
416
- except ValueError as ve:
417
- if attempt >= empty_response_retry_limit:
418
- warnings.warn(f"Retry limit reached. Final error: {ve}")
419
- log_telemetry(self.logger, "_handle_ai_response finish ValueError")
420
- raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
421
- else:
422
- delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
423
- warnings.warn(f"Attempt {attempt} failed: {ve}. Retrying in {delay} seconds...")
424
- time.sleep(delay)
425
- continue
426
-
427
- except Exception as e:
428
- # For non-retryable errors, exit immediately
429
- log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
430
- raise e
431
-
432
- # check if we are going over the context window: this allows for articifial constraints
433
- if response.usage.total_tokens > self.agent_state.llm_config.context_window:
434
- # trigger summarization
435
- log_telemetry(self.logger, "_get_ai_reply summarize_messages_inplace")
436
- self.summarize_messages_inplace()
437
-
438
- # return the response
439
- return response
440
-
441
- log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
442
- raise Exception("Retries exhausted and no valid response received.")
443
-
444
- @trace_method
445
- def _handle_ai_response(
446
- self,
447
- response_message: ChatCompletionMessage, # TODO should we eventually move the Message creation outside of this function?
448
- override_tool_call_id: bool = False,
449
- # If we are streaming, we needed to create a Message ID ahead of time,
450
- # and now we want to use it in the creation of the Message object
451
- # TODO figure out a cleaner way to do this
452
- response_message_id: Optional[str] = None,
453
- group_id: Optional[str] = None,
454
- ) -> Tuple[List[Message], bool, bool]:
455
- """Handles parsing and function execution"""
456
- log_telemetry(self.logger, "_handle_ai_response start")
457
- # Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
458
- if response_message_id is not None:
459
- assert response_message_id.startswith("message-"), response_message_id
460
-
461
- messages = [] # append these to the history when done
462
- function_name = None
463
- function_args = {}
464
- chunk_index = 0
465
-
466
- # Step 2: check if LLM wanted to call a function
467
- if response_message.function_call or (response_message.tool_calls is not None and len(response_message.tool_calls) > 0):
468
- if response_message.function_call:
469
- raise DeprecationWarning(response_message)
470
- if response_message.tool_calls is not None and len(response_message.tool_calls) > 1:
471
- # raise NotImplementedError(f">1 tool call not supported")
472
- # TODO eventually support sequential tool calling
473
- self.logger.warning(f">1 tool call not supported, using index=0 only\n{response_message.tool_calls}")
474
- response_message.tool_calls = [response_message.tool_calls[0]]
475
- assert response_message.tool_calls is not None and len(response_message.tool_calls) > 0
476
-
477
- # generate UUID for tool call
478
- if override_tool_call_id or response_message.function_call:
479
- warnings.warn("Overriding the tool call can result in inconsistent tool call IDs during streaming")
480
- tool_call_id = get_tool_call_id() # needs to be a string for JSON
481
- response_message.tool_calls[0].id = tool_call_id
482
- else:
483
- tool_call_id = response_message.tool_calls[0].id
484
- assert tool_call_id is not None # should be defined
485
-
486
- # only necessary to add the tool_call_id to a function call (antipattern)
487
- # response_message_dict = response_message.model_dump()
488
- # response_message_dict["tool_call_id"] = tool_call_id
489
-
490
- # role: assistant (requesting tool call, set tool call ID)
491
- messages.append(
492
- # NOTE: we're recreating the message here
493
- # TODO should probably just overwrite the fields?
494
- Message.dict_to_message(
495
- id=response_message_id,
496
- agent_id=self.agent_state.id,
497
- model=self.model,
498
- openai_message_dict=response_message.model_dump(),
499
- name=self.agent_state.name,
500
- group_id=group_id,
501
- )
502
- ) # extend conversation with assistant's reply
503
- self.logger.debug(f"Function call message: {messages[-1]}")
504
-
505
- nonnull_content = False
506
- if response_message.content or response_message.reasoning_content or response_message.redacted_reasoning_content:
507
- # The content if then internal monologue, not chat
508
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
509
- chunk_index += 1
510
- # Flag to avoid printing a duplicate if inner thoughts get popped from the function call
511
- nonnull_content = True
512
-
513
- # Step 3: call the function
514
- # Note: the JSON response may not always be valid; be sure to handle errors
515
- function_call = (
516
- response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
517
- )
518
- function_name = function_call.name
519
- self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
520
-
521
- # Failure case 1: function name is wrong (not in agent_state.tools)
522
- target_letta_tool = None
523
- for t in self.agent_state.tools:
524
- if t.name == function_name:
525
- # This force refreshes the target_letta_tool from the database
526
- # We only do this on name match to confirm that the agent state contains a specific tool with the right name
527
- target_letta_tool = ToolManager().get_tool_by_name(tool_name=function_name, actor=self.user)
528
- break
529
-
530
- if not target_letta_tool:
531
- error_msg = f"No function named {function_name}"
532
- function_response = "None" # more like "never ran?"
533
- messages = self._handle_function_error_response(
534
- error_msg, tool_call_id, function_name, function_args, function_response, messages, group_id=group_id
535
- )
536
- return messages, False, True # force a heartbeat to allow agent to handle error
537
-
538
- # Failure case 2: function name is OK, but function args are bad JSON
539
- try:
540
- raw_function_args = function_call.arguments
541
- function_args = parse_json(raw_function_args)
542
- if not isinstance(function_args, dict):
543
- raise ValueError(f"Function arguments are not a dictionary: {function_args} (raw={raw_function_args})")
544
- except Exception as e:
545
- print(e)
546
- error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
547
- function_response = "None" # more like "never ran?"
548
- messages = self._handle_function_error_response(
549
- error_msg, tool_call_id, function_name, function_args, function_response, messages, group_id=group_id
550
- )
551
- return messages, False, True # force a heartbeat to allow agent to handle error
552
-
553
- # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
554
- if INNER_THOUGHTS_KWARG in function_args:
555
- response_message.content = function_args.pop(INNER_THOUGHTS_KWARG)
556
- # The content if then internal monologue, not chat
557
- if response_message.content and not nonnull_content:
558
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
559
- chunk_index += 1
560
-
561
- # (Still parsing function args)
562
- # Handle requests for immediate heartbeat
563
- heartbeat_request = function_args.pop("request_heartbeat", None)
564
-
565
- # Edge case: heartbeat_request is returned as a stringified boolean, we will attempt to parse:
566
- if isinstance(heartbeat_request, str) and heartbeat_request.lower().strip() == "true":
567
- heartbeat_request = True
568
-
569
- if heartbeat_request is None:
570
- heartbeat_request = False
571
-
572
- if not isinstance(heartbeat_request, bool):
573
- self.logger.warning(
574
- f"{CLI_WARNING_PREFIX}'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}"
575
- )
576
- heartbeat_request = False
577
-
578
- # Failure case 3: function failed during execution
579
- # NOTE: the msg_obj associated with the "Running " message is the prior assistant message, not the function/tool role message
580
- # this is because the function/tool role message is only created once the function/tool has executed/returned
581
-
582
- # handle cases where we return a json message
583
- if "message" in function_args:
584
- function_args["message"] = str(function_args.get("message", ""))
585
- self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
586
- chunk_index = 0 # reset chunk index after assistant message
587
- try:
588
- # handle tool execution (sandbox) and state updates
589
- log_telemetry(
590
- self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
591
- )
592
- log_event(
593
- "tool_call_initiated",
594
- attributes={
595
- "function_name": function_name,
596
- "target_letta_tool": target_letta_tool.model_dump(),
597
- **{f"function_args.{k}": v for k, v in function_args.items()},
598
- },
599
- )
600
-
601
- tool_execution_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
602
- function_response = tool_execution_result.func_return
603
-
604
- log_event(
605
- "tool_call_ended",
606
- attributes={
607
- "function_response": function_response,
608
- "tool_execution_result": tool_execution_result.model_dump(),
609
- },
610
- )
611
- log_telemetry(
612
- self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
613
- )
614
-
615
- if tool_execution_result and tool_execution_result.status == "error":
616
- tool_return = ToolReturn(
617
- status=tool_execution_result.status, stdout=tool_execution_result.stdout, stderr=tool_execution_result.stderr
618
- )
619
- messages = self._handle_function_error_response(
620
- function_response,
621
- tool_call_id,
622
- function_name,
623
- function_args,
624
- function_response,
625
- messages,
626
- [tool_return],
627
- group_id=group_id,
628
- )
629
- return messages, False, True # force a heartbeat to allow agent to handle error
630
-
631
- # handle trunction
632
- if function_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
633
- # with certain functions we rely on the paging mechanism to handle overflow
634
- truncate = False
635
- else:
636
- # but by default, we add a truncation safeguard to prevent bad functions from
637
- # overflow the agent context window
638
- truncate = True
639
-
640
- # get the function response limit
641
- return_char_limit = target_letta_tool.return_char_limit
642
- function_response_string = validate_function_response(
643
- function_response, return_char_limit=return_char_limit, truncate=truncate
644
- )
645
- function_args.pop("self", None)
646
- function_response = package_function_response(True, function_response_string, self.agent_state.timezone)
647
- function_failed = False
648
- except Exception as e:
649
- function_args.pop("self", None)
650
- # error_msg = f"Error calling function {function_name} with args {function_args}: {str(e)}"
651
- # Less detailed - don't provide full args, idea is that it should be in recent context so no need (just adds noise)
652
- error_msg = get_friendly_error_msg(function_name=function_name, exception_name=type(e).__name__, exception_message=str(e))
653
- error_msg_user = f"{error_msg}\n{traceback.format_exc()}"
654
- self.logger.error(error_msg_user)
655
- messages = self._handle_function_error_response(
656
- error_msg,
657
- tool_call_id,
658
- function_name,
659
- function_args,
660
- function_response,
661
- messages,
662
- [ToolReturn(status="error", stderr=[error_msg_user])],
663
- include_function_failed_message=True,
664
- group_id=group_id,
665
- )
666
- return messages, False, True # force a heartbeat to allow agent to handle error
667
-
668
- # Step 4: check if function response is an error
669
- if function_response_string.startswith(ERROR_MESSAGE_PREFIX):
670
- error_msg = function_response_string
671
- tool_return = ToolReturn(
672
- status=tool_execution_result.status,
673
- stdout=tool_execution_result.stdout,
674
- stderr=tool_execution_result.stderr,
675
- )
676
- messages = self._handle_function_error_response(
677
- error_msg,
678
- tool_call_id,
679
- function_name,
680
- function_args,
681
- function_response,
682
- messages,
683
- [tool_return],
684
- include_function_failed_message=True,
685
- group_id=group_id,
686
- )
687
- return messages, False, True # force a heartbeat to allow agent to handle error
688
-
689
- # If no failures happened along the way: ...
690
- # Step 5: send the info on the function call and function response to GPT
691
- tool_return = ToolReturn(
692
- status=tool_execution_result.status,
693
- stdout=tool_execution_result.stdout,
694
- stderr=tool_execution_result.stderr,
695
- )
696
- messages.append(
697
- Message(
698
- agent_id=self.agent_state.id,
699
- # Base info OpenAI-style
700
- model=self.model,
701
- role="tool",
702
- name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
703
- content=[TextContent(text=function_response)],
704
- tool_call_id=tool_call_id,
705
- # Letta extras
706
- tool_returns=[tool_return],
707
- group_id=group_id,
708
- )
709
- ) # extend conversation with function response
710
- self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
711
- self.interface.function_message(f"Success: {function_response_string}", msg_obj=messages[-1], chunk_index=chunk_index)
712
- chunk_index += 1
713
- self.last_function_response = function_response
714
-
715
- else:
716
- # Standard non-function reply
717
- messages.append(
718
- Message.dict_to_message(
719
- id=response_message_id,
720
- agent_id=self.agent_state.id,
721
- model=self.model,
722
- openai_message_dict=response_message.model_dump(),
723
- name=self.agent_state.name,
724
- group_id=group_id,
725
- )
726
- ) # extend conversation with assistant's reply
727
- self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
728
- chunk_index += 1
729
- heartbeat_request = False
730
- function_failed = False
731
-
732
- # rebuild memory
733
- # TODO: @charles please check this
734
- self.agent_state = self.agent_manager.rebuild_system_prompt(agent_id=self.agent_state.id, actor=self.user)
735
-
736
- # Update ToolRulesSolver state with last called function
737
- self.tool_rules_solver.register_tool_call(function_name)
738
- # Update heartbeat request according to provided tool rules
739
- if self.tool_rules_solver.has_children_tools(function_name):
740
- heartbeat_request = True
741
- elif self.tool_rules_solver.is_terminal_tool(function_name):
742
- heartbeat_request = False
743
-
744
- # if continue tool rule, then must request a heartbeat
745
- # TODO: dont even include heartbeats in the args
746
- if self.tool_rules_solver.is_continue_tool(function_name):
747
- heartbeat_request = True
748
-
749
- log_telemetry(self.logger, "_handle_ai_response finish")
750
- return messages, heartbeat_request, function_failed
751
-
752
- @trace_method
753
- def step(
754
- self,
755
- input_messages: List[MessageCreate],
756
- # additional args
757
- chaining: bool = True,
758
- max_chaining_steps: Optional[int] = None,
759
- put_inner_thoughts_first: bool = True,
760
- **kwargs,
761
- ) -> LettaUsageStatistics:
762
- """Run Agent.step in a loop, handling chaining via heartbeat requests and function failures"""
763
- # Defensively clear the tool rules solver history
764
- # Usually this would be extraneous as Agent loop is re-loaded on every message send
765
- # But just to be safe
766
- self.tool_rules_solver.clear_tool_history()
767
-
768
- # Convert MessageCreate objects to Message objects
769
- next_input_messages = convert_message_creates_to_messages(input_messages, self.agent_state.id, self.agent_state.timezone)
770
- counter = 0
771
- total_usage = UsageStatistics()
772
- step_count = 0
773
- function_failed = False
774
- steps_messages = []
775
- while True:
776
- kwargs["first_message"] = False
777
- kwargs["step_count"] = step_count
778
- kwargs["last_function_failed"] = function_failed
779
- step_response = self.inner_step(
780
- messages=next_input_messages,
781
- put_inner_thoughts_first=put_inner_thoughts_first,
782
- **kwargs,
783
- )
784
-
785
- heartbeat_request = step_response.heartbeat_request
786
- function_failed = step_response.function_failed
787
- token_warning = step_response.in_context_memory_warning
788
- usage = step_response.usage
789
- steps_messages.append(step_response.messages)
790
-
791
- step_count += 1
792
- total_usage += usage
793
- counter += 1
794
- self.interface.step_complete()
795
-
796
- # logger.debug("Saving agent state")
797
- # save updated state
798
- save_agent(self)
799
-
800
- # Chain stops
801
- if not chaining:
802
- self.logger.info("No chaining, stopping after one step")
803
- break
804
- elif max_chaining_steps is not None and counter > max_chaining_steps:
805
- self.logger.info(f"Hit max chaining steps, stopping after {counter} steps")
806
- break
807
- # Chain handlers
808
- elif token_warning and summarizer_settings.send_memory_warning_message:
809
- assert self.agent_state.created_by_id is not None
810
- next_input_messages = [
811
- Message.dict_to_message(
812
- agent_id=self.agent_state.id,
813
- model=self.model,
814
- openai_message_dict={
815
- "role": "user", # TODO: change to system?
816
- "content": get_token_limit_warning(),
817
- },
818
- ),
819
- ]
820
- continue # always chain
821
- elif function_failed:
822
- assert self.agent_state.created_by_id is not None
823
- next_input_messages = [
824
- Message.dict_to_message(
825
- agent_id=self.agent_state.id,
826
- model=self.model,
827
- openai_message_dict={
828
- "role": "user", # TODO: change to system?
829
- "content": get_heartbeat(self.agent_state.timezone, FUNC_FAILED_HEARTBEAT_MESSAGE),
830
- },
831
- )
832
- ]
833
- continue # always chain
834
- elif heartbeat_request:
835
- assert self.agent_state.created_by_id is not None
836
- next_input_messages = [
837
- Message.dict_to_message(
838
- agent_id=self.agent_state.id,
839
- model=self.model,
840
- openai_message_dict={
841
- "role": "user", # TODO: change to system?
842
- "content": get_heartbeat(self.agent_state.timezone, REQ_HEARTBEAT_MESSAGE),
843
- },
844
- )
845
- ]
846
- continue # always chain
847
- # Letta no-op / yield
848
- else:
849
- break
850
-
851
- if self.agent_state.message_buffer_autoclear:
852
- self.logger.info("Autoclearing message buffer")
853
- self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
854
-
855
- return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count, steps_messages=steps_messages)
856
-
857
- def inner_step(
858
- self,
859
- messages: List[Message],
860
- first_message: bool = False,
861
- first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
862
- skip_verify: bool = False,
863
- stream: bool = False, # TODO move to config?
864
- step_count: Optional[int] = None,
865
- metadata: Optional[dict] = None,
866
- summarize_attempt_count: int = 0,
867
- last_function_failed: bool = False,
868
- put_inner_thoughts_first: bool = True,
869
- ) -> AgentStepResponse:
870
- """Runs a single step in the agent loop (generates at most one LLM call)"""
871
- try:
872
- # Extract job_id from metadata if present
873
- job_id = metadata.get("job_id") if metadata else None
874
-
875
- # Declare step_id for the given step to be used as the step is processing.
876
- step_id = generate_step_id()
877
-
878
- # Step 0: update core memory
879
- # only pulling latest block data if shared memory is being used
880
- current_persisted_memory = Memory(
881
- blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
882
- file_blocks=self.agent_state.memory.file_blocks,
883
- agent_type=self.agent_state.agent_type,
884
- ) # read blocks from DB
885
- self.update_memory_if_changed(current_persisted_memory)
886
-
887
- # Step 1: add user message
888
- if not all(isinstance(m, Message) for m in messages):
889
- raise ValueError(f"messages should be a list of Message, got {[type(m) for m in messages]}")
890
-
891
- in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
892
- input_message_sequence = in_context_messages + messages
893
-
894
- if (
895
- len(input_message_sequence) > 1
896
- and input_message_sequence[-1].role != "user"
897
- and input_message_sequence[-1].group_id is None
898
- ):
899
- self.logger.warning(f"{CLI_WARNING_PREFIX}Attempting to run ChatCompletion without user as the last message in the queue")
900
-
901
- # Step 2: send the conversation and available functions to the LLM
902
- response = self._get_ai_reply(
903
- message_sequence=input_message_sequence,
904
- first_message=first_message,
905
- stream=stream,
906
- step_count=step_count,
907
- last_function_failed=last_function_failed,
908
- put_inner_thoughts_first=put_inner_thoughts_first,
909
- step_id=step_id,
910
- )
911
- if not response:
912
- # EDGE CASE: Function call failed AND there's no tools left for agent to call -> return early
913
- return AgentStepResponse(
914
- messages=input_message_sequence,
915
- heartbeat_request=False,
916
- function_failed=False, # NOTE: this is different from other function fails. We force to return early
917
- in_context_memory_warning=False,
918
- usage=UsageStatistics(),
919
- )
920
-
921
- # Step 3: check if LLM wanted to call a function
922
- # (if yes) Step 4: call the function
923
- # (if yes) Step 5: send the info on the function call and function response to LLM
924
- response_message = response.choices[0].message
925
-
926
- response_message.model_copy() # TODO why are we copying here?
927
- all_response_messages, heartbeat_request, function_failed = self._handle_ai_response(
928
- response_message,
929
- # TODO this is kind of hacky, find a better way to handle this
930
- # the only time we set up message creation ahead of time is when streaming is on
931
- response_message_id=response.id if stream else None,
932
- group_id=input_message_sequence[-1].group_id,
933
- )
934
-
935
- # Step 6: extend the message history
936
- if len(messages) > 0:
937
- all_new_messages = messages + all_response_messages
938
- else:
939
- all_new_messages = all_response_messages
940
-
941
- if self.save_last_response:
942
- self.last_response_messages = all_response_messages
943
-
944
- # Check the memory pressure and potentially issue a memory pressure warning
945
- current_total_tokens = response.usage.total_tokens
946
- active_memory_warning = False
947
-
948
- # We can't do summarize logic properly if context_window is undefined
949
- if self.agent_state.llm_config.context_window is None:
950
- # Fallback if for some reason context_window is missing, just set to the default
951
- print(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
952
- print(f"{self.agent_state}")
953
- self.agent_state.llm_config.context_window = (
954
- LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
955
- )
956
-
957
- if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window):
958
- logger.warning(
959
- f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
960
- )
961
-
962
- log_event(
963
- name="memory_pressure_warning",
964
- attributes={
965
- "current_total_tokens": current_total_tokens,
966
- "context_window_limit": self.agent_state.llm_config.context_window,
967
- },
968
- )
969
- # Only deliver the alert if we haven't already (this period)
970
- if not self.agent_alerted_about_memory_pressure:
971
- active_memory_warning = True
972
- self.agent_alerted_about_memory_pressure = True # it's up to the outer loop to handle this
973
-
974
- else:
975
- logger.info(
976
- f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
977
- )
978
-
979
- # Log step - this must happen before messages are persisted
980
- step = self.step_manager.log_step(
981
- actor=self.user,
982
- agent_id=self.agent_state.id,
983
- provider_name=self.agent_state.llm_config.model_endpoint_type,
984
- provider_category=self.agent_state.llm_config.provider_category or "base",
985
- model=self.agent_state.llm_config.model,
986
- model_endpoint=self.agent_state.llm_config.model_endpoint,
987
- context_window_limit=self.agent_state.llm_config.context_window,
988
- usage=response.usage,
989
- provider_id=self.provider_manager.get_provider_id_from_name(
990
- self.agent_state.llm_config.provider_name,
991
- actor=self.user,
992
- ),
993
- job_id=job_id,
994
- step_id=step_id,
995
- project_id=self.agent_state.project_id,
996
- status=StepStatus.SUCCESS, # Set to SUCCESS since we're logging after successful completion
997
- )
998
- for message in all_new_messages:
999
- message.step_id = step.id
1000
-
1001
- # Persisting into Messages
1002
- self.agent_state = self.agent_manager.append_to_in_context_messages(
1003
- all_new_messages, agent_id=self.agent_state.id, actor=self.user
1004
- )
1005
- if job_id:
1006
- for message in all_new_messages:
1007
- if message.role != "user":
1008
- self.job_manager.add_message_to_job(
1009
- job_id=job_id,
1010
- message_id=message.id,
1011
- actor=self.user,
1012
- )
1013
-
1014
- return AgentStepResponse(
1015
- messages=all_new_messages,
1016
- heartbeat_request=heartbeat_request,
1017
- function_failed=function_failed,
1018
- in_context_memory_warning=active_memory_warning,
1019
- usage=response.usage,
1020
- )
1021
-
1022
- except Exception as e:
1023
- logger.error(f"step() failed\nmessages = {messages}\nerror = {e}")
1024
-
1025
- # If we got a context alert, try trimming the messages length, then try again
1026
- if is_context_overflow_error(e):
1027
- in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
1028
-
1029
- # TODO: this is a patch to resolve immediate issues, should be removed once the summarizer is fixes
1030
- if self.agent_state.message_buffer_autoclear:
1031
- # no calling the summarizer in this case
1032
- logger.error(
1033
- f"step() failed with an exception that looks like a context window overflow, but message buffer is set to autoclear, so skipping: '{str(e)}'"
1034
- )
1035
- raise e
1036
-
1037
- if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
1038
- logger.warning(
1039
- f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
1040
- )
1041
- # A separate API call to run a summarizer
1042
- self.summarize_messages_inplace()
1043
-
1044
- # Try step again
1045
- return self.inner_step(
1046
- messages=messages,
1047
- first_message=first_message,
1048
- first_message_retry_limit=first_message_retry_limit,
1049
- skip_verify=skip_verify,
1050
- stream=stream,
1051
- metadata=metadata,
1052
- summarize_attempt_count=summarize_attempt_count + 1,
1053
- )
1054
- else:
1055
- err_msg = f"Ran summarizer {summarize_attempt_count - 1} times for agent id={self.agent_state.id}, but messages are still overflowing the context window."
1056
- token_counts = (get_token_counts_for_messages(in_context_messages),)
1057
- logger.error(err_msg)
1058
- logger.error(f"num_in_context_messages: {len(self.agent_state.message_ids)}")
1059
- logger.error(f"token_counts: {token_counts}")
1060
- raise ContextWindowExceededError(
1061
- err_msg,
1062
- details={
1063
- "num_in_context_messages": len(self.agent_state.message_ids),
1064
- "in_context_messages_text": [m.content for m in in_context_messages],
1065
- "token_counts": token_counts,
1066
- },
1067
- )
1068
-
1069
- else:
1070
- logger.error(f"step() failed with an unrecognized exception: '{str(e)}'")
1071
- traceback.print_exc()
1072
- raise e
1073
-
1074
- def step_user_message(self, user_message_str: str, **kwargs) -> AgentStepResponse:
1075
- """Takes a basic user message string, turns it into a stringified JSON with extra metadata, then sends it to the agent
1076
-
1077
- Example:
1078
- -> user_message_str = 'hi'
1079
- -> {'message': 'hi', 'type': 'user_message', ...}
1080
- -> json.dumps(...)
1081
- -> agent.step(messages=[Message(role='user', text=...)])
1082
- """
1083
- # Wrap with metadata, dumps to JSON
1084
- assert user_message_str and isinstance(user_message_str, str), (
1085
- f"user_message_str should be a non-empty string, got {type(user_message_str)}"
1086
- )
1087
- user_message_json_str = package_user_message(user_message_str, self.agent_state.timezone)
1088
-
1089
- # Validate JSON via save/load
1090
- user_message = validate_json(user_message_json_str)
1091
- cleaned_user_message_text, name = strip_name_field_from_user_message(user_message)
1092
-
1093
- # Turn into a dict
1094
- openai_message_dict = {"role": "user", "content": cleaned_user_message_text, "name": name}
1095
-
1096
- # Create the associated Message object (in the database)
1097
- assert self.agent_state.created_by_id is not None, "User ID is not set"
1098
- user_message = Message.dict_to_message(
1099
- agent_id=self.agent_state.id,
1100
- model=self.model,
1101
- openai_message_dict=openai_message_dict,
1102
- # created_at=timestamp,
1103
- )
1104
-
1105
- return self.inner_step(messages=[user_message], **kwargs)
1106
-
1107
- def summarize_messages_inplace(self):
1108
- in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
1109
- in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
1110
- in_context_messages_openai_no_system = in_context_messages_openai[1:]
1111
- token_counts = get_token_counts_for_messages(in_context_messages)
1112
- logger.info(f"System message token count={token_counts[0]}")
1113
- logger.info(f"token_counts_no_system={token_counts[1:]}")
1114
-
1115
- if in_context_messages_openai[0]["role"] != "system":
1116
- raise RuntimeError(f"in_context_messages_openai[0] should be system (instead got {in_context_messages_openai[0]})")
1117
-
1118
- # If at this point there's nothing to summarize, throw an error
1119
- if len(in_context_messages_openai_no_system) == 0:
1120
- raise ContextWindowExceededError(
1121
- "Not enough messages to compress for summarization",
1122
- details={
1123
- "num_candidate_messages": len(in_context_messages_openai_no_system),
1124
- "num_total_messages": len(in_context_messages_openai),
1125
- },
1126
- )
1127
-
1128
- cutoff = calculate_summarizer_cutoff(in_context_messages=in_context_messages, token_counts=token_counts, logger=logger)
1129
- message_sequence_to_summarize = in_context_messages[1:cutoff] # do NOT get rid of the system message
1130
- logger.info(f"Attempting to summarize {len(message_sequence_to_summarize)} messages of {len(in_context_messages)}")
1131
-
1132
- # We can't do summarize logic properly if context_window is undefined
1133
- if self.agent_state.llm_config.context_window is None:
1134
- # Fallback if for some reason context_window is missing, just set to the default
1135
- logger.warning(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
1136
- self.agent_state.llm_config.context_window = (
1137
- LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
1138
- )
1139
-
1140
- summary = summarize_messages(
1141
- agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize, actor=self.user
1142
- )
1143
- logger.info(f"Got summary: {summary}")
1144
-
1145
- # Metadata that's useful for the agent to see
1146
- all_time_message_count = self.message_manager.size(agent_id=self.agent_state.id, actor=self.user)
1147
- remaining_message_count = 1 + len(in_context_messages) - cutoff # System + remaining
1148
- hidden_message_count = all_time_message_count - remaining_message_count
1149
- summary_message_count = len(message_sequence_to_summarize)
1150
- summary_message = package_summarize_message(
1151
- summary, summary_message_count, hidden_message_count, all_time_message_count, self.agent_state.timezone
1152
- )
1153
- logger.info(f"Packaged into message: {summary_message}")
1154
-
1155
- prior_len = len(in_context_messages_openai)
1156
- self.agent_state = self.agent_manager.trim_older_in_context_messages(num=cutoff, agent_id=self.agent_state.id, actor=self.user)
1157
- packed_summary_message = {"role": "user", "content": summary_message}
1158
- # Prepend the summary
1159
- self.agent_state = self.agent_manager.prepend_to_in_context_messages(
1160
- messages=[
1161
- Message.dict_to_message(
1162
- agent_id=self.agent_state.id,
1163
- model=self.model,
1164
- openai_message_dict=packed_summary_message,
1165
- )
1166
- ],
1167
- agent_id=self.agent_state.id,
1168
- actor=self.user,
1169
- )
1170
-
1171
- # reset alert
1172
- self.agent_alerted_about_memory_pressure = False
1173
- curr_in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
1174
-
1175
- current_token_count = sum(get_token_counts_for_messages(curr_in_context_messages))
1176
- logger.info(f"Ran summarizer, messages length {prior_len} -> {len(curr_in_context_messages)}")
1177
- logger.info(f"Summarizer brought down total token count from {sum(token_counts)} -> {current_token_count}")
1178
- log_event(
1179
- name="summarization",
1180
- attributes={
1181
- "prior_length": prior_len,
1182
- "current_length": len(curr_in_context_messages),
1183
- "prior_token_count": sum(token_counts),
1184
- "current_token_count": current_token_count,
1185
- "context_window_limit": self.agent_state.llm_config.context_window,
1186
- },
1187
- )
1188
-
1189
- def add_function(self, function_name: str) -> str:
1190
- # TODO: refactor
1191
- raise NotImplementedError
1192
-
1193
- def remove_function(self, function_name: str) -> str:
1194
- # TODO: refactor
1195
- raise NotImplementedError
1196
-
1197
- def migrate_embedding(self, embedding_config: EmbeddingConfig):
1198
- """Migrate the agent to a new embedding"""
1199
- # TODO: archival memory
1200
-
1201
- # TODO: recall memory
1202
- raise NotImplementedError()
1203
-
1204
- def get_context_window(self) -> ContextWindowOverview:
1205
- """Get the context window of the agent"""
1206
-
1207
- system_prompt = self.agent_state.system # TODO is this the current system or the initial system?
1208
- num_tokens_system = count_tokens(system_prompt)
1209
- core_memory = self.agent_state.memory.compile()
1210
- num_tokens_core_memory = count_tokens(core_memory)
1211
-
1212
- # Grab the in-context messages
1213
- # conversion of messages to OpenAI dict format, which is passed to the token counter
1214
- in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
1215
- in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
1216
-
1217
- # Check if there's a summary message in the message queue
1218
- if (
1219
- len(in_context_messages) > 1
1220
- and in_context_messages[1].role == MessageRole.user
1221
- and in_context_messages[1].content
1222
- and len(in_context_messages[1].content) == 1
1223
- and isinstance(in_context_messages[1].content[0], TextContent)
1224
- # TODO remove hardcoding
1225
- and "The following is a summary of the previous " in in_context_messages[1].content[0].text
1226
- ):
1227
- # Summary message exists
1228
- text_content = in_context_messages[1].content[0].text
1229
- assert text_content is not None
1230
- summary_memory = text_content
1231
- num_tokens_summary_memory = count_tokens(text_content)
1232
- # with a summary message, the real messages start at index 2
1233
- num_tokens_messages = (
1234
- num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
1235
- if len(in_context_messages_openai) > 2
1236
- else 0
1237
- )
1238
-
1239
- else:
1240
- summary_memory = None
1241
- num_tokens_summary_memory = 0
1242
- # with no summary message, the real messages start at index 1
1243
- num_tokens_messages = (
1244
- num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
1245
- if len(in_context_messages_openai) > 1
1246
- else 0
1247
- )
1248
-
1249
- agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
1250
- message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
1251
- external_memory_summary = PromptGenerator.compile_memory_metadata_block(
1252
- memory_edit_timestamp=get_utc_time(),
1253
- timezone=self.agent_state.timezone,
1254
- previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
1255
- archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
1256
- )
1257
- num_tokens_external_memory_summary = count_tokens(external_memory_summary)
1258
-
1259
- # tokens taken up by function definitions
1260
- agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
1261
- if agent_state_tool_jsons:
1262
- available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
1263
- num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
1264
- else:
1265
- available_functions_definitions = []
1266
- num_tokens_available_functions_definitions = 0
1267
-
1268
- num_tokens_used_total = (
1269
- num_tokens_system # system prompt
1270
- + num_tokens_available_functions_definitions # function definitions
1271
- + num_tokens_core_memory # core memory
1272
- + num_tokens_external_memory_summary # metadata (statistics) about recall/archival
1273
- + num_tokens_summary_memory # summary of ongoing conversation
1274
- + num_tokens_messages # tokens taken by messages
1275
- )
1276
- assert isinstance(num_tokens_used_total, int)
1277
-
1278
- return ContextWindowOverview(
1279
- # context window breakdown (in messages)
1280
- num_messages=len(in_context_messages),
1281
- num_archival_memory=agent_manager_passage_size,
1282
- num_recall_memory=message_manager_size,
1283
- num_tokens_external_memory_summary=num_tokens_external_memory_summary,
1284
- external_memory_summary=external_memory_summary,
1285
- # top-level information
1286
- context_window_size_max=self.agent_state.llm_config.context_window,
1287
- context_window_size_current=num_tokens_used_total,
1288
- # context window breakdown (in tokens)
1289
- num_tokens_system=num_tokens_system,
1290
- system_prompt=system_prompt,
1291
- num_tokens_core_memory=num_tokens_core_memory,
1292
- core_memory=core_memory,
1293
- num_tokens_summary_memory=num_tokens_summary_memory,
1294
- summary_memory=summary_memory,
1295
- num_tokens_messages=num_tokens_messages,
1296
- messages=in_context_messages,
1297
- # related to functions
1298
- num_tokens_functions_definitions=num_tokens_available_functions_definitions,
1299
- functions_definitions=available_functions_definitions,
1300
- )
1301
-
1302
- async def get_context_window_async(self) -> ContextWindowOverview:
1303
- if settings.environment == "PRODUCTION" and model_settings.anthropic_api_key:
1304
- return await self.get_context_window_from_anthropic_async()
1305
- return await self.get_context_window_from_tiktoken_async()
1306
-
1307
- async def get_context_window_from_tiktoken_async(self) -> ContextWindowOverview:
1308
- """Get the context window of the agent"""
1309
- # Grab the in-context messages
1310
- in_context_messages = await self.message_manager.get_messages_by_ids_async(
1311
- message_ids=self.agent_state.message_ids, actor=self.user
1312
- )
1313
-
1314
- # conversion of messages to OpenAI dict format, which is passed to the token counter
1315
- in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
1316
-
1317
- # Extract system, memory and external summary
1318
- if (
1319
- len(in_context_messages) > 0
1320
- and in_context_messages[0].role == MessageRole.system
1321
- and in_context_messages[0].content
1322
- and len(in_context_messages[0].content) == 1
1323
- and isinstance(in_context_messages[0].content[0], TextContent)
1324
- ):
1325
- system_message = in_context_messages[0].content[0].text
1326
-
1327
- external_memory_marker_pos = system_message.find("###")
1328
- core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
1329
- if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
1330
- system_prompt = system_message[:external_memory_marker_pos].strip()
1331
- external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
1332
- core_memory = system_message[core_memory_marker_pos:].strip()
1333
- else:
1334
- # if no markers found, put everything in system message
1335
- self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
1336
- system_prompt = system_message
1337
- external_memory_summary = ""
1338
- core_memory = ""
1339
- else:
1340
- # if no system message, fall back on agent's system prompt
1341
- self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
1342
- system_prompt = self.agent_state.system
1343
- external_memory_summary = ""
1344
- core_memory = ""
1345
-
1346
- num_tokens_system = count_tokens(system_prompt)
1347
- num_tokens_core_memory = count_tokens(core_memory)
1348
- num_tokens_external_memory_summary = count_tokens(external_memory_summary)
1349
-
1350
- # Check if there's a summary message in the message queue
1351
- if (
1352
- len(in_context_messages) > 1
1353
- and in_context_messages[1].role == MessageRole.user
1354
- and in_context_messages[1].content
1355
- and len(in_context_messages[1].content) == 1
1356
- and isinstance(in_context_messages[1].content[0], TextContent)
1357
- # TODO remove hardcoding
1358
- and "The following is a summary of the previous " in in_context_messages[1].content[0].text
1359
- ):
1360
- # Summary message exists
1361
- text_content = in_context_messages[1].content[0].text
1362
- assert text_content is not None
1363
- summary_memory = text_content
1364
- num_tokens_summary_memory = count_tokens(text_content)
1365
- # with a summary message, the real messages start at index 2
1366
- num_tokens_messages = (
1367
- num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
1368
- if len(in_context_messages_openai) > 2
1369
- else 0
1370
- )
1371
-
1372
- else:
1373
- summary_memory = None
1374
- num_tokens_summary_memory = 0
1375
- # with no summary message, the real messages start at index 1
1376
- num_tokens_messages = (
1377
- num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
1378
- if len(in_context_messages_openai) > 1
1379
- else 0
1380
- )
1381
-
1382
- # tokens taken up by function definitions
1383
- agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
1384
- if agent_state_tool_jsons:
1385
- available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
1386
- num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
1387
- else:
1388
- available_functions_definitions = []
1389
- num_tokens_available_functions_definitions = 0
1390
-
1391
- num_tokens_used_total = (
1392
- num_tokens_system # system prompt
1393
- + num_tokens_available_functions_definitions # function definitions
1394
- + num_tokens_core_memory # core memory
1395
- + num_tokens_external_memory_summary # metadata (statistics) about recall/archival
1396
- + num_tokens_summary_memory # summary of ongoing conversation
1397
- + num_tokens_messages # tokens taken by messages
1398
- )
1399
- assert isinstance(num_tokens_used_total, int)
1400
-
1401
- passage_manager_size = await self.passage_manager.agent_passage_size_async(
1402
- agent_id=self.agent_state.id,
1403
- actor=self.user,
1404
- )
1405
- message_manager_size = await self.message_manager.size_async(
1406
- agent_id=self.agent_state.id,
1407
- actor=self.user,
1408
- )
1409
-
1410
- return ContextWindowOverview(
1411
- # context window breakdown (in messages)
1412
- num_messages=len(in_context_messages),
1413
- num_archival_memory=passage_manager_size,
1414
- num_recall_memory=message_manager_size,
1415
- num_tokens_external_memory_summary=num_tokens_external_memory_summary,
1416
- external_memory_summary=external_memory_summary,
1417
- # top-level information
1418
- context_window_size_max=self.agent_state.llm_config.context_window,
1419
- context_window_size_current=num_tokens_used_total,
1420
- # context window breakdown (in tokens)
1421
- num_tokens_system=num_tokens_system,
1422
- system_prompt=system_prompt,
1423
- num_tokens_core_memory=num_tokens_core_memory,
1424
- core_memory=core_memory,
1425
- num_tokens_summary_memory=num_tokens_summary_memory,
1426
- summary_memory=summary_memory,
1427
- num_tokens_messages=num_tokens_messages,
1428
- messages=in_context_messages,
1429
- # related to functions
1430
- num_tokens_functions_definitions=num_tokens_available_functions_definitions,
1431
- functions_definitions=available_functions_definitions,
1432
- )
1433
-
1434
- async def get_context_window_from_anthropic_async(self) -> ContextWindowOverview:
1435
- """Get the context window of the agent"""
1436
- anthropic_client = LLMClient.create(provider_type=ProviderType.anthropic, actor=self.user)
1437
- model = self.agent_state.llm_config.model if self.agent_state.llm_config.model_endpoint_type == "anthropic" else None
1438
-
1439
- # Grab the in-context messages
1440
- in_context_messages = await self.message_manager.get_messages_by_ids_async(
1441
- message_ids=self.agent_state.message_ids, actor=self.user
1442
- )
1443
-
1444
- # conversion of messages to anthropic dict format, which is passed to the token counter
1445
- in_context_messages_anthropic = Message.to_anthropic_dicts_from_list(in_context_messages)
1446
-
1447
- # Extract system, memory and external summary
1448
- if (
1449
- len(in_context_messages) > 0
1450
- and in_context_messages[0].role == MessageRole.system
1451
- and in_context_messages[0].content
1452
- and len(in_context_messages[0].content) == 1
1453
- and isinstance(in_context_messages[0].content[0], TextContent)
1454
- ):
1455
- system_message = in_context_messages[0].content[0].text
1456
-
1457
- external_memory_marker_pos = system_message.find("###")
1458
- core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
1459
- if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
1460
- system_prompt = system_message[:external_memory_marker_pos].strip()
1461
- external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
1462
- core_memory = system_message[core_memory_marker_pos:].strip()
1463
- else:
1464
- # if no markers found, put everything in system message
1465
- self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
1466
- system_prompt = system_message
1467
- external_memory_summary = ""
1468
- core_memory = ""
1469
- else:
1470
- # if no system message, fall back on agent's system prompt
1471
- self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
1472
- system_prompt = self.agent_state.system
1473
- external_memory_summary = ""
1474
- core_memory = ""
1475
-
1476
- num_tokens_system_coroutine = anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": system_prompt}])
1477
- num_tokens_core_memory_coroutine = (
1478
- anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": core_memory}])
1479
- if core_memory
1480
- else asyncio.sleep(0, result=0)
1481
- )
1482
- num_tokens_external_memory_summary_coroutine = (
1483
- anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": external_memory_summary}])
1484
- if external_memory_summary
1485
- else asyncio.sleep(0, result=0)
1486
- )
1487
-
1488
- # Check if there's a summary message in the message queue
1489
- if (
1490
- len(in_context_messages) > 1
1491
- and in_context_messages[1].role == MessageRole.user
1492
- and in_context_messages[1].content
1493
- and len(in_context_messages[1].content) == 1
1494
- and isinstance(in_context_messages[1].content[0], TextContent)
1495
- # TODO remove hardcoding
1496
- and "The following is a summary of the previous " in in_context_messages[1].content[0].text
1497
- ):
1498
- # Summary message exists
1499
- text_content = in_context_messages[1].content[0].text
1500
- assert text_content is not None
1501
- summary_memory = text_content
1502
- num_tokens_summary_memory_coroutine = anthropic_client.count_tokens(
1503
- model=model, messages=[{"role": "user", "content": summary_memory}]
1504
- )
1505
- # with a summary message, the real messages start at index 2
1506
- num_tokens_messages_coroutine = (
1507
- anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[2:])
1508
- if len(in_context_messages_anthropic) > 2
1509
- else asyncio.sleep(0, result=0)
1510
- )
1511
-
1512
- else:
1513
- summary_memory = None
1514
- num_tokens_summary_memory_coroutine = asyncio.sleep(0, result=0)
1515
- # with no summary message, the real messages start at index 1
1516
- num_tokens_messages_coroutine = (
1517
- anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[1:])
1518
- if len(in_context_messages_anthropic) > 1
1519
- else asyncio.sleep(0, result=0)
1520
- )
1521
-
1522
- # tokens taken up by function definitions
1523
- if self.agent_state.tools and len(self.agent_state.tools) > 0:
1524
- available_functions_definitions = [OpenAITool(type="function", function=f.json_schema) for f in self.agent_state.tools]
1525
- num_tokens_available_functions_definitions_coroutine = anthropic_client.count_tokens(
1526
- model=model,
1527
- tools=available_functions_definitions,
1528
- )
1529
- else:
1530
- available_functions_definitions = []
1531
- num_tokens_available_functions_definitions_coroutine = asyncio.sleep(0, result=0)
1532
-
1533
- (
1534
- num_tokens_system,
1535
- num_tokens_core_memory,
1536
- num_tokens_external_memory_summary,
1537
- num_tokens_summary_memory,
1538
- num_tokens_messages,
1539
- num_tokens_available_functions_definitions,
1540
- ) = await asyncio.gather(
1541
- num_tokens_system_coroutine,
1542
- num_tokens_core_memory_coroutine,
1543
- num_tokens_external_memory_summary_coroutine,
1544
- num_tokens_summary_memory_coroutine,
1545
- num_tokens_messages_coroutine,
1546
- num_tokens_available_functions_definitions_coroutine,
1547
- )
1548
-
1549
- num_tokens_used_total = (
1550
- num_tokens_system # system prompt
1551
- + num_tokens_available_functions_definitions # function definitions
1552
- + num_tokens_core_memory # core memory
1553
- + num_tokens_external_memory_summary # metadata (statistics) about recall/archival
1554
- + num_tokens_summary_memory # summary of ongoing conversation
1555
- + num_tokens_messages # tokens taken by messages
1556
- )
1557
- assert isinstance(num_tokens_used_total, int)
1558
-
1559
- passage_manager_size = await self.passage_manager.agent_passage_size_async(
1560
- agent_id=self.agent_state.id,
1561
- actor=self.user,
1562
- )
1563
- message_manager_size = await self.message_manager.size_async(
1564
- agent_id=self.agent_state.id,
1565
- actor=self.user,
1566
- )
1567
-
1568
- return ContextWindowOverview(
1569
- # context window breakdown (in messages)
1570
- num_messages=len(in_context_messages),
1571
- num_archival_memory=passage_manager_size,
1572
- num_recall_memory=message_manager_size,
1573
- num_tokens_external_memory_summary=num_tokens_external_memory_summary,
1574
- external_memory_summary=external_memory_summary,
1575
- # top-level information
1576
- context_window_size_max=self.agent_state.llm_config.context_window,
1577
- context_window_size_current=num_tokens_used_total,
1578
- # context window breakdown (in tokens)
1579
- num_tokens_system=num_tokens_system,
1580
- system_prompt=system_prompt,
1581
- num_tokens_core_memory=num_tokens_core_memory,
1582
- core_memory=core_memory,
1583
- num_tokens_summary_memory=num_tokens_summary_memory,
1584
- summary_memory=summary_memory,
1585
- num_tokens_messages=num_tokens_messages,
1586
- messages=in_context_messages,
1587
- # related to functions
1588
- num_tokens_functions_definitions=num_tokens_available_functions_definitions,
1589
- functions_definitions=available_functions_definitions,
1590
- )
1591
-
1592
- def count_tokens(self) -> int:
1593
- """Count the tokens in the current context window"""
1594
- context_window_breakdown = self.get_context_window()
1595
- return context_window_breakdown.context_window_size_current
1596
-
1597
- # TODO: Refactor into separate class v.s. large if/elses here
1598
- def execute_tool_and_persist_state(self, function_name: str, function_args: dict, target_letta_tool: Tool) -> ToolExecutionResult:
1599
- """
1600
- Execute tool modifications and persist the state of the agent.
1601
- Note: only some agent state modifications will be persisted, such as data in the AgentState ORM and block data
1602
- """
1603
- # TODO: add agent manager here
1604
- orig_memory_str = self.agent_state.memory.compile()
1605
-
1606
- # TODO: need to have an AgentState object that actually has full access to the block data
1607
- # this is because the sandbox tools need to be able to access block.value to edit this data
1608
- try:
1609
- if target_letta_tool.tool_type == ToolType.LETTA_CORE:
1610
- # base tools are allowed to access the `Agent` object and run on the database
1611
- callable_func = get_function_from_module(LETTA_CORE_TOOL_MODULE_NAME, function_name)
1612
- function_args["self"] = self # need to attach self to arg since it's dynamically linked
1613
- function_response = callable_func(**function_args)
1614
- elif target_letta_tool.tool_type == ToolType.LETTA_MULTI_AGENT_CORE:
1615
- callable_func = get_function_from_module(LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name)
1616
- function_args["self"] = self # need to attach self to arg since it's dynamically linked
1617
- function_response = callable_func(**function_args)
1618
- elif target_letta_tool.tool_type == ToolType.LETTA_MEMORY_CORE or target_letta_tool.tool_type == ToolType.LETTA_SLEEPTIME_CORE:
1619
- callable_func = get_function_from_module(LETTA_CORE_TOOL_MODULE_NAME, function_name)
1620
- agent_state_copy = self.agent_state.__deepcopy__()
1621
- function_args["agent_state"] = agent_state_copy # need to attach self to arg since it's dynamically linked
1622
- function_response = callable_func(**function_args)
1623
- self.ensure_read_only_block_not_modified(
1624
- new_memory=agent_state_copy.memory
1625
- ) # memory editing tools cannot edit read-only blocks
1626
- self.update_memory_if_changed(agent_state_copy.memory)
1627
- elif target_letta_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
1628
- action_name = generate_composio_action_from_func_name(target_letta_tool.name)
1629
- # Get entity ID from the agent_state
1630
- entity_id = None
1631
- for env_var in self.agent_state.secrets:
1632
- if env_var.key == COMPOSIO_ENTITY_ENV_VAR_KEY:
1633
- entity_id = env_var.value
1634
- # Get composio_api_key
1635
- composio_api_key = get_composio_api_key(actor=self.user, logger=self.logger)
1636
- function_response = execute_composio_action(
1637
- action_name=action_name, args=function_args, api_key=composio_api_key, entity_id=entity_id
1638
- )
1639
- elif target_letta_tool.tool_type == ToolType.EXTERNAL_MCP:
1640
- # Get the server name from the tool tag
1641
- # TODO make a property instead?
1642
- server_name = target_letta_tool.tags[0].split(":")[1]
1643
-
1644
- # Get the MCPClient from the server's handle
1645
- # TODO these don't get raised properly
1646
- if not self.mcp_clients:
1647
- raise ValueError("No MCP client available to use")
1648
- if server_name not in self.mcp_clients:
1649
- raise ValueError(f"Unknown MCP server name: {server_name}")
1650
- mcp_client = self.mcp_clients[server_name]
1651
-
1652
- # Check that tool exists
1653
- available_tools = mcp_client.list_tools()
1654
- available_tool_names = [t.name for t in available_tools]
1655
- if function_name not in available_tool_names:
1656
- raise ValueError(
1657
- f"{function_name} is not available in MCP server {server_name}. Please check your `~/.letta/mcp_config.json` file."
1658
- )
1659
-
1660
- function_response, is_error = mcp_client.execute_tool(tool_name=function_name, tool_args=function_args)
1661
- return ToolExecutionResult(
1662
- status="error" if is_error else "success",
1663
- func_return=function_response,
1664
- )
1665
- else:
1666
- try:
1667
- # Parse the source code to extract function annotations
1668
- annotations = get_function_annotations_from_source(target_letta_tool.source_code, function_name)
1669
- # Coerce the function arguments to the correct types based on the annotations
1670
- function_args = coerce_dict_args_by_annotations(function_args, annotations)
1671
- except ValueError as e:
1672
- self.logger.debug(f"Error coercing function arguments: {e}")
1673
-
1674
- # execute tool in a sandbox
1675
- # TODO: allow agent_state to specify which sandbox to execute tools in
1676
- # TODO: This is only temporary, can remove after we publish a pip package with this object
1677
- agent_state_copy = self.agent_state.__deepcopy__()
1678
- agent_state_copy.tools = []
1679
- agent_state_copy.tool_rules = []
1680
-
1681
- tool_execution_result = ToolExecutionSandbox(function_name, function_args, self.user, tool_object=target_letta_tool).run(
1682
- agent_state=agent_state_copy
1683
- )
1684
- assert orig_memory_str == self.agent_state.memory.compile(), "Memory should not be modified in a sandbox tool"
1685
- if tool_execution_result.agent_state is not None:
1686
- self.update_memory_if_changed(tool_execution_result.agent_state.memory)
1687
- return tool_execution_result
1688
- except Exception as e:
1689
- # Need to catch error here, or else trunction wont happen
1690
- # TODO: modify to function execution error
1691
- function_response = get_friendly_error_msg(
1692
- function_name=function_name, exception_name=type(e).__name__, exception_message=str(e)
1693
- )
1694
- return ToolExecutionResult(
1695
- status="error",
1696
- func_return=function_response,
1697
- stderr=[traceback.format_exc()],
1698
- )
1699
-
1700
- return ToolExecutionResult(
1701
- status="success",
1702
- func_return=function_response,
1703
- )
1704
-
1705
-
1706
- def save_agent(agent: Agent):
1707
- """Save agent to metadata store"""
1708
- agent_state = agent.agent_state
1709
- assert isinstance(agent_state.memory, Memory), f"Memory is not a Memory object: {type(agent_state.memory)}"
1710
-
1711
- # TODO: move this to agent manager
1712
- # TODO: Completely strip out metadata
1713
- # convert to persisted model
1714
- agent_manager = AgentManager()
1715
- update_agent = UpdateAgent(
1716
- name=agent_state.name,
1717
- tool_ids=[t.id for t in agent_state.tools],
1718
- source_ids=[s.id for s in agent_state.sources],
1719
- block_ids=[b.id for b in agent_state.memory.blocks],
1720
- tags=agent_state.tags,
1721
- system=agent_state.system,
1722
- tool_rules=agent_state.tool_rules,
1723
- llm_config=agent_state.llm_config,
1724
- embedding_config=agent_state.embedding_config,
1725
- message_ids=agent_state.message_ids,
1726
- description=agent_state.description,
1727
- metadata=agent_state.metadata,
1728
- # TODO: Add this back in later
1729
- # tool_exec_environment_variables=agent_state.get_agent_env_vars_as_dict(),
1730
- )
1731
- agent_manager.update_agent(agent_id=agent_state.id, agent_update=update_agent, actor=agent.user)
1732
-
1733
-
1734
- def strip_name_field_from_user_message(user_message_text: str) -> Tuple[str, Optional[str]]:
1735
- """If 'name' exists in the JSON string, remove it and return the cleaned text + name value"""
1736
- try:
1737
- user_message_json = dict(json_loads(user_message_text))
1738
- # Special handling for AutoGen messages with 'name' field
1739
- # Treat 'name' as a special field
1740
- # If it exists in the input message, elevate it to the 'message' level
1741
- name = user_message_json.pop("name", None)
1742
- clean_message = json_dumps(user_message_json)
1743
- return clean_message, name
1744
-
1745
- except Exception as e:
1746
- print(f"{CLI_WARNING_PREFIX}handling of 'name' field failed with: {e}")
1747
- raise e
1748
-
1749
-
1750
- def validate_json(user_message_text: str) -> str:
1751
- """Make sure that the user input message is valid JSON"""
1752
- try:
1753
- user_message_json = dict(json_loads(user_message_text))
1754
- user_message_json_val = json_dumps(user_message_json)
1755
- return user_message_json_val
1756
- except Exception as e:
1757
- print(f"{CLI_WARNING_PREFIX}couldn't parse user input message as JSON: {e}")
1758
- raise e