letta-nightly 0.13.0.dev20251031104146__py3-none-any.whl → 0.13.1.dev20251101010313__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/adapters/simple_llm_stream_adapter.py +1 -0
- letta/agents/letta_agent_v2.py +8 -0
- letta/agents/letta_agent_v3.py +127 -27
- letta/agents/temporal/activities/__init__.py +25 -0
- letta/agents/temporal/activities/create_messages.py +26 -0
- letta/agents/temporal/activities/create_step.py +57 -0
- letta/agents/temporal/activities/example_activity.py +9 -0
- letta/agents/temporal/activities/execute_tool.py +130 -0
- letta/agents/temporal/activities/llm_request.py +114 -0
- letta/agents/temporal/activities/prepare_messages.py +27 -0
- letta/agents/temporal/activities/refresh_context.py +160 -0
- letta/agents/temporal/activities/summarize_conversation_history.py +77 -0
- letta/agents/temporal/activities/update_message_ids.py +25 -0
- letta/agents/temporal/activities/update_run.py +43 -0
- letta/agents/temporal/constants.py +59 -0
- letta/agents/temporal/temporal_agent_workflow.py +704 -0
- letta/agents/temporal/types.py +275 -0
- letta/constants.py +11 -0
- letta/errors.py +4 -0
- letta/functions/function_sets/base.py +0 -11
- letta/groups/helpers.py +7 -1
- letta/groups/sleeptime_multi_agent_v4.py +4 -3
- letta/interfaces/anthropic_streaming_interface.py +0 -1
- letta/interfaces/openai_streaming_interface.py +103 -100
- letta/llm_api/anthropic_client.py +57 -12
- letta/llm_api/bedrock_client.py +1 -0
- letta/llm_api/deepseek_client.py +3 -2
- letta/llm_api/google_vertex_client.py +5 -4
- letta/llm_api/groq_client.py +1 -0
- letta/llm_api/llm_client_base.py +15 -1
- letta/llm_api/openai.py +2 -2
- letta/llm_api/openai_client.py +17 -3
- letta/llm_api/xai_client.py +1 -0
- letta/orm/agent.py +3 -0
- letta/orm/organization.py +4 -0
- letta/orm/sqlalchemy_base.py +7 -0
- letta/otel/tracing.py +131 -4
- letta/schemas/agent.py +108 -40
- letta/schemas/agent_file.py +10 -10
- letta/schemas/block.py +22 -3
- letta/schemas/enums.py +21 -0
- letta/schemas/environment_variables.py +3 -2
- letta/schemas/group.py +3 -3
- letta/schemas/letta_response.py +36 -4
- letta/schemas/llm_batch_job.py +3 -3
- letta/schemas/llm_config.py +123 -4
- letta/schemas/mcp.py +3 -2
- letta/schemas/mcp_server.py +3 -2
- letta/schemas/message.py +167 -49
- letta/schemas/model.py +265 -0
- letta/schemas/organization.py +2 -1
- letta/schemas/passage.py +2 -1
- letta/schemas/provider_trace.py +2 -1
- letta/schemas/providers/openrouter.py +1 -2
- letta/schemas/run_metrics.py +2 -1
- letta/schemas/sandbox_config.py +3 -1
- letta/schemas/step_metrics.py +2 -1
- letta/schemas/tool_rule.py +2 -2
- letta/schemas/user.py +2 -1
- letta/server/rest_api/app.py +5 -1
- letta/server/rest_api/routers/v1/__init__.py +4 -0
- letta/server/rest_api/routers/v1/agents.py +71 -9
- letta/server/rest_api/routers/v1/blocks.py +7 -7
- letta/server/rest_api/routers/v1/groups.py +40 -0
- letta/server/rest_api/routers/v1/identities.py +2 -2
- letta/server/rest_api/routers/v1/internal_agents.py +31 -0
- letta/server/rest_api/routers/v1/internal_blocks.py +177 -0
- letta/server/rest_api/routers/v1/internal_runs.py +25 -1
- letta/server/rest_api/routers/v1/runs.py +2 -22
- letta/server/rest_api/routers/v1/tools.py +12 -1
- letta/server/server.py +20 -4
- letta/services/agent_manager.py +4 -4
- letta/services/archive_manager.py +16 -0
- letta/services/group_manager.py +44 -0
- letta/services/helpers/run_manager_helper.py +2 -2
- letta/services/lettuce/lettuce_client.py +148 -0
- letta/services/mcp/base_client.py +9 -3
- letta/services/run_manager.py +148 -37
- letta/services/source_manager.py +91 -3
- letta/services/step_manager.py +2 -3
- letta/services/streaming_service.py +52 -13
- letta/services/summarizer/summarizer.py +28 -2
- letta/services/tool_executor/builtin_tool_executor.py +1 -1
- letta/services/tool_executor/core_tool_executor.py +2 -117
- letta/services/tool_sandbox/e2b_sandbox.py +4 -1
- letta/services/tool_schema_generator.py +2 -2
- letta/validators.py +21 -0
- {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251101010313.dist-info}/METADATA +1 -1
- {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251101010313.dist-info}/RECORD +93 -87
- letta/agent.py +0 -1758
- letta/cli/cli_load.py +0 -16
- letta/client/__init__.py +0 -0
- letta/client/streaming.py +0 -95
- letta/client/utils.py +0 -78
- letta/functions/async_composio_toolset.py +0 -109
- letta/functions/composio_helpers.py +0 -96
- letta/helpers/composio_helpers.py +0 -38
- letta/orm/job_messages.py +0 -33
- letta/schemas/providers.py +0 -1617
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -132
- letta/services/tool_executor/composio_tool_executor.py +0 -57
- {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251101010313.dist-info}/WHEEL +0 -0
- {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251101010313.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251101010313.dist-info}/licenses/LICENSE +0 -0
letta/agent.py
DELETED
|
@@ -1,1758 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import json
|
|
3
|
-
import time
|
|
4
|
-
import traceback
|
|
5
|
-
import warnings
|
|
6
|
-
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
8
|
-
|
|
9
|
-
from openai.types.beta.function_tool import FunctionTool as OpenAITool
|
|
10
|
-
|
|
11
|
-
from letta.agents.helpers import generate_step_id
|
|
12
|
-
from letta.constants import (
|
|
13
|
-
CLI_WARNING_PREFIX,
|
|
14
|
-
COMPOSIO_ENTITY_ENV_VAR_KEY,
|
|
15
|
-
ERROR_MESSAGE_PREFIX,
|
|
16
|
-
FIRST_MESSAGE_ATTEMPTS,
|
|
17
|
-
FUNC_FAILED_HEARTBEAT_MESSAGE,
|
|
18
|
-
LETTA_CORE_TOOL_MODULE_NAME,
|
|
19
|
-
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
|
|
20
|
-
LLM_MAX_TOKENS,
|
|
21
|
-
READ_ONLY_BLOCK_EDIT_ERROR,
|
|
22
|
-
REQ_HEARTBEAT_MESSAGE,
|
|
23
|
-
SEND_MESSAGE_TOOL_NAME,
|
|
24
|
-
)
|
|
25
|
-
from letta.errors import ContextWindowExceededError
|
|
26
|
-
from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source
|
|
27
|
-
from letta.functions.composio_helpers import execute_composio_action, generate_composio_action_from_func_name
|
|
28
|
-
from letta.functions.functions import get_function_from_module
|
|
29
|
-
from letta.helpers import ToolRulesSolver
|
|
30
|
-
from letta.helpers.composio_helpers import get_composio_api_key
|
|
31
|
-
from letta.helpers.datetime_helpers import get_utc_time
|
|
32
|
-
from letta.helpers.json_helpers import json_dumps, json_loads
|
|
33
|
-
from letta.helpers.message_helper import convert_message_creates_to_messages
|
|
34
|
-
from letta.interface import AgentInterface
|
|
35
|
-
from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
|
|
36
|
-
from letta.llm_api.llm_api_tools import create
|
|
37
|
-
from letta.llm_api.llm_client import LLMClient
|
|
38
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
39
|
-
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
40
|
-
from letta.log import get_logger
|
|
41
|
-
from letta.memory import summarize_messages
|
|
42
|
-
from letta.orm import User
|
|
43
|
-
from letta.otel.tracing import log_event, trace_method
|
|
44
|
-
from letta.prompts.prompt_generator import PromptGenerator
|
|
45
|
-
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent
|
|
46
|
-
from letta.schemas.block import BlockUpdate
|
|
47
|
-
from letta.schemas.embedding_config import EmbeddingConfig
|
|
48
|
-
from letta.schemas.enums import MessageRole, ProviderType, StepStatus, ToolType
|
|
49
|
-
from letta.schemas.letta_message_content import ImageContent, TextContent
|
|
50
|
-
from letta.schemas.memory import ContextWindowOverview, Memory
|
|
51
|
-
from letta.schemas.message import Message, MessageCreate, ToolReturn
|
|
52
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Message as ChatCompletionMessage, UsageStatistics
|
|
53
|
-
from letta.schemas.response_format import ResponseFormatType
|
|
54
|
-
from letta.schemas.tool import Tool
|
|
55
|
-
from letta.schemas.tool_execution_result import ToolExecutionResult
|
|
56
|
-
from letta.schemas.tool_rule import TerminalToolRule
|
|
57
|
-
from letta.schemas.usage import LettaUsageStatistics
|
|
58
|
-
from letta.services.agent_manager import AgentManager
|
|
59
|
-
from letta.services.block_manager import BlockManager
|
|
60
|
-
from letta.services.helpers.agent_manager_helper import check_supports_structured_output
|
|
61
|
-
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
|
|
62
|
-
from letta.services.job_manager import JobManager
|
|
63
|
-
from letta.services.mcp.base_client import AsyncBaseMCPClient
|
|
64
|
-
from letta.services.message_manager import MessageManager
|
|
65
|
-
from letta.services.passage_manager import PassageManager
|
|
66
|
-
from letta.services.provider_manager import ProviderManager
|
|
67
|
-
from letta.services.step_manager import StepManager
|
|
68
|
-
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
|
69
|
-
from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
|
|
70
|
-
from letta.services.tool_manager import ToolManager
|
|
71
|
-
from letta.settings import model_settings, settings, summarizer_settings
|
|
72
|
-
from letta.streaming_interface import StreamingRefreshCLIInterface
|
|
73
|
-
from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
|
|
74
|
-
from letta.utils import count_tokens, get_friendly_error_msg, get_tool_call_id, log_telemetry, parse_json, validate_function_response
|
|
75
|
-
|
|
76
|
-
logger = get_logger(__name__)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class BaseAgent(ABC):
|
|
80
|
-
"""
|
|
81
|
-
Abstract class for all agents.
|
|
82
|
-
Only one interface is required: step.
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
@abstractmethod
|
|
86
|
-
def step(
|
|
87
|
-
self,
|
|
88
|
-
input_messages: List[MessageCreate],
|
|
89
|
-
) -> LettaUsageStatistics:
|
|
90
|
-
"""
|
|
91
|
-
Top-level event message handler for the agent.
|
|
92
|
-
"""
|
|
93
|
-
raise NotImplementedError
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class Agent(BaseAgent):
|
|
97
|
-
def __init__(
|
|
98
|
-
self,
|
|
99
|
-
interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
|
|
100
|
-
agent_state: AgentState, # in-memory representation of the agent state (read from multiple tables)
|
|
101
|
-
user: User,
|
|
102
|
-
# extras
|
|
103
|
-
first_message_verify_mono: bool = True, # TODO move to config?
|
|
104
|
-
# MCP sessions, state held in-memory in the server
|
|
105
|
-
mcp_clients: Optional[Dict[str, AsyncBaseMCPClient]] = None,
|
|
106
|
-
save_last_response: bool = False,
|
|
107
|
-
):
|
|
108
|
-
assert isinstance(agent_state.memory, Memory), f"Memory object is not of type Memory: {type(agent_state.memory)}"
|
|
109
|
-
# Hold a copy of the state that was used to init the agent
|
|
110
|
-
self.agent_state = agent_state
|
|
111
|
-
assert isinstance(self.agent_state.memory, Memory), f"Memory object is not of type Memory: {type(self.agent_state.memory)}"
|
|
112
|
-
|
|
113
|
-
self.user = user
|
|
114
|
-
|
|
115
|
-
# initialize a tool rules solver
|
|
116
|
-
self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
|
|
117
|
-
|
|
118
|
-
# gpt-4, gpt-3.5-turbo, ...
|
|
119
|
-
self.model = self.agent_state.llm_config.model
|
|
120
|
-
self.supports_structured_output = check_supports_structured_output(model=self.model, tool_rules=agent_state.tool_rules)
|
|
121
|
-
|
|
122
|
-
# if there are tool rules, print out a warning
|
|
123
|
-
if not self.supports_structured_output and agent_state.tool_rules:
|
|
124
|
-
for rule in agent_state.tool_rules:
|
|
125
|
-
if not isinstance(rule, TerminalToolRule):
|
|
126
|
-
warnings.warn("Tool rules only work reliably for model backends that support structured outputs (e.g. OpenAI gpt-4o).")
|
|
127
|
-
break
|
|
128
|
-
|
|
129
|
-
# state managers
|
|
130
|
-
self.block_manager = BlockManager()
|
|
131
|
-
|
|
132
|
-
# Interface must implement:
|
|
133
|
-
# - internal_monologue
|
|
134
|
-
# - assistant_message
|
|
135
|
-
# - function_message
|
|
136
|
-
# ...
|
|
137
|
-
# Different interfaces can handle events differently
|
|
138
|
-
# e.g., print in CLI vs send a discord message with a discord bot
|
|
139
|
-
self.interface = interface
|
|
140
|
-
|
|
141
|
-
# Create the persistence manager object based on the AgentState info
|
|
142
|
-
self.message_manager = MessageManager()
|
|
143
|
-
self.passage_manager = PassageManager()
|
|
144
|
-
self.provider_manager = ProviderManager()
|
|
145
|
-
self.agent_manager = AgentManager()
|
|
146
|
-
self.job_manager = JobManager()
|
|
147
|
-
self.step_manager = StepManager()
|
|
148
|
-
self.telemetry_manager = TelemetryManager() if settings.llm_api_logging else NoopTelemetryManager()
|
|
149
|
-
|
|
150
|
-
# State needed for heartbeat pausing
|
|
151
|
-
|
|
152
|
-
self.first_message_verify_mono = first_message_verify_mono
|
|
153
|
-
|
|
154
|
-
# Controls if the convo memory pressure warning is triggered
|
|
155
|
-
# When an alert is sent in the message queue, set this to True (to avoid repeat alerts)
|
|
156
|
-
# When the summarizer is run, set this back to False (to reset)
|
|
157
|
-
self.agent_alerted_about_memory_pressure = False
|
|
158
|
-
|
|
159
|
-
# Load last function response from message history
|
|
160
|
-
self.last_function_response = self.load_last_function_response()
|
|
161
|
-
|
|
162
|
-
# Save last responses in memory
|
|
163
|
-
self.save_last_response = save_last_response
|
|
164
|
-
self.last_response_messages = []
|
|
165
|
-
|
|
166
|
-
# Logger that the Agent specifically can use, will also report the agent_state ID with the logs
|
|
167
|
-
self.logger = get_logger(agent_state.id)
|
|
168
|
-
|
|
169
|
-
# MCPClient, state/sessions managed by the server
|
|
170
|
-
# TODO: This is temporary, as a bridge
|
|
171
|
-
self.mcp_clients = None
|
|
172
|
-
# TODO: no longer supported
|
|
173
|
-
# if mcp_clients:
|
|
174
|
-
# self.mcp_clients = {client_id: client.to_sync_client() for client_id, client in mcp_clients.items()}
|
|
175
|
-
|
|
176
|
-
def load_last_function_response(self):
|
|
177
|
-
"""Load the last function response from message history"""
|
|
178
|
-
in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
179
|
-
for i in range(len(in_context_messages) - 1, -1, -1):
|
|
180
|
-
msg = in_context_messages[i]
|
|
181
|
-
if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
|
|
182
|
-
text_content = msg.content[0].text
|
|
183
|
-
try:
|
|
184
|
-
response_json = json.loads(text_content)
|
|
185
|
-
if response_json.get("message"):
|
|
186
|
-
return response_json["message"]
|
|
187
|
-
except (json.JSONDecodeError, KeyError):
|
|
188
|
-
raise ValueError(f"Invalid JSON format in message: {text_content}")
|
|
189
|
-
return None
|
|
190
|
-
|
|
191
|
-
def ensure_read_only_block_not_modified(self, new_memory: Memory) -> None:
|
|
192
|
-
"""
|
|
193
|
-
Throw an error if a read-only block has been modified
|
|
194
|
-
"""
|
|
195
|
-
for label in self.agent_state.memory.list_block_labels():
|
|
196
|
-
if self.agent_state.memory.get_block(label).read_only:
|
|
197
|
-
if new_memory.get_block(label).value != self.agent_state.memory.get_block(label).value:
|
|
198
|
-
raise ValueError(READ_ONLY_BLOCK_EDIT_ERROR)
|
|
199
|
-
|
|
200
|
-
def update_memory_if_changed(self, new_memory: Memory) -> bool:
|
|
201
|
-
"""
|
|
202
|
-
Update internal memory object and system prompt if there have been modifications.
|
|
203
|
-
|
|
204
|
-
Args:
|
|
205
|
-
new_memory (Memory): the new memory object to compare to the current memory object
|
|
206
|
-
|
|
207
|
-
Returns:
|
|
208
|
-
modified (bool): whether the memory was updated
|
|
209
|
-
"""
|
|
210
|
-
system_message = self.message_manager.get_message_by_id(message_id=self.agent_state.message_ids[0], actor=self.user)
|
|
211
|
-
if new_memory.compile() not in system_message.content[0].text:
|
|
212
|
-
# update the blocks (LRW) in the DB
|
|
213
|
-
for label in self.agent_state.memory.list_block_labels():
|
|
214
|
-
updated_value = new_memory.get_block(label).value
|
|
215
|
-
if updated_value != self.agent_state.memory.get_block(label).value:
|
|
216
|
-
# update the block if it's changed
|
|
217
|
-
block_id = self.agent_state.memory.get_block(label).id
|
|
218
|
-
self.block_manager.update_block(block_id=block_id, block_update=BlockUpdate(value=updated_value), actor=self.user)
|
|
219
|
-
|
|
220
|
-
# refresh memory from DB (using block ids)
|
|
221
|
-
self.agent_state.memory = Memory(
|
|
222
|
-
blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
|
|
223
|
-
file_blocks=self.agent_state.memory.file_blocks,
|
|
224
|
-
agent_type=self.agent_state.agent_type,
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
# NOTE: don't do this since re-buildin the memory is handled at the start of the step
|
|
228
|
-
# rebuild memory - this records the last edited timestamp of the memory
|
|
229
|
-
# TODO: pass in update timestamp from block edit time
|
|
230
|
-
self.agent_state = self.agent_manager.rebuild_system_prompt(agent_id=self.agent_state.id, actor=self.user)
|
|
231
|
-
|
|
232
|
-
return True
|
|
233
|
-
|
|
234
|
-
return False
|
|
235
|
-
|
|
236
|
-
def _handle_function_error_response(
|
|
237
|
-
self,
|
|
238
|
-
error_msg: str,
|
|
239
|
-
tool_call_id: str,
|
|
240
|
-
function_name: str,
|
|
241
|
-
function_args: dict,
|
|
242
|
-
function_response: str,
|
|
243
|
-
messages: List[Message],
|
|
244
|
-
tool_returns: Optional[List[ToolReturn]] = None,
|
|
245
|
-
include_function_failed_message: bool = False,
|
|
246
|
-
group_id: Optional[str] = None,
|
|
247
|
-
) -> List[Message]:
|
|
248
|
-
"""
|
|
249
|
-
Handle error from function call response
|
|
250
|
-
"""
|
|
251
|
-
# Update tool rules
|
|
252
|
-
self.last_function_response = function_response
|
|
253
|
-
self.tool_rules_solver.register_tool_call(function_name)
|
|
254
|
-
|
|
255
|
-
# Extend conversation with function response
|
|
256
|
-
function_response = package_function_response(False, error_msg, self.agent_state.timezone)
|
|
257
|
-
new_message = Message(
|
|
258
|
-
agent_id=self.agent_state.id,
|
|
259
|
-
# Base info OpenAI-style
|
|
260
|
-
model=self.model,
|
|
261
|
-
role="tool",
|
|
262
|
-
name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
|
|
263
|
-
content=[TextContent(text=function_response)],
|
|
264
|
-
tool_call_id=tool_call_id,
|
|
265
|
-
# Letta extras
|
|
266
|
-
tool_returns=tool_returns,
|
|
267
|
-
group_id=group_id,
|
|
268
|
-
)
|
|
269
|
-
messages.append(new_message)
|
|
270
|
-
self.interface.function_message(f"Error: {error_msg}", msg_obj=new_message, chunk_index=0)
|
|
271
|
-
if include_function_failed_message:
|
|
272
|
-
self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=new_message)
|
|
273
|
-
|
|
274
|
-
# Return updated messages
|
|
275
|
-
return messages
|
|
276
|
-
|
|
277
|
-
def _runtime_override_tool_json_schema(
|
|
278
|
-
self,
|
|
279
|
-
functions_list: List[Dict | None],
|
|
280
|
-
) -> List[Dict | None]:
|
|
281
|
-
"""Override the tool JSON schema at runtime for a particular tool if conditions are met."""
|
|
282
|
-
|
|
283
|
-
# Currently just injects `send_message` with a `response_format` if provided to the agent.
|
|
284
|
-
if self.agent_state.response_format and self.agent_state.response_format.type != ResponseFormatType.text:
|
|
285
|
-
for func in functions_list:
|
|
286
|
-
if func["name"] == SEND_MESSAGE_TOOL_NAME:
|
|
287
|
-
if self.agent_state.response_format.type == ResponseFormatType.json_schema:
|
|
288
|
-
func["parameters"]["properties"]["message"] = self.agent_state.response_format.json_schema["schema"]
|
|
289
|
-
if self.agent_state.response_format.type == ResponseFormatType.json_object:
|
|
290
|
-
func["parameters"]["properties"]["message"] = {
|
|
291
|
-
"type": "object",
|
|
292
|
-
"description": "Message contents. All unicode (including emojis) are supported.",
|
|
293
|
-
"additionalProperties": True,
|
|
294
|
-
"properties": {},
|
|
295
|
-
}
|
|
296
|
-
break
|
|
297
|
-
return functions_list
|
|
298
|
-
|
|
299
|
-
@trace_method
|
|
300
|
-
def _get_ai_reply(
|
|
301
|
-
self,
|
|
302
|
-
message_sequence: List[Message],
|
|
303
|
-
function_call: Optional[str] = None,
|
|
304
|
-
first_message: bool = False,
|
|
305
|
-
stream: bool = False, # TODO move to config?
|
|
306
|
-
empty_response_retry_limit: int = 3,
|
|
307
|
-
backoff_factor: float = 0.5, # delay multiplier for exponential backoff
|
|
308
|
-
max_delay: float = 10.0, # max delay between retries
|
|
309
|
-
step_count: Optional[int] = None,
|
|
310
|
-
last_function_failed: bool = False,
|
|
311
|
-
put_inner_thoughts_first: bool = True,
|
|
312
|
-
step_id: Optional[str] = None,
|
|
313
|
-
) -> ChatCompletionResponse | None:
|
|
314
|
-
"""Get response from LLM API with robust retry mechanism."""
|
|
315
|
-
log_telemetry(self.logger, "_get_ai_reply start")
|
|
316
|
-
available_tools = set([t.name for t in self.agent_state.tools])
|
|
317
|
-
agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
|
|
318
|
-
|
|
319
|
-
# Get allowed tools or allow all if none are allowed
|
|
320
|
-
allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(
|
|
321
|
-
available_tools=available_tools, last_function_response=self.last_function_response
|
|
322
|
-
) or list(available_tools)
|
|
323
|
-
|
|
324
|
-
# Don't allow a tool to be called if it failed last time
|
|
325
|
-
if last_function_failed and self.tool_rules_solver.tool_call_history:
|
|
326
|
-
allowed_tool_names = [f for f in allowed_tool_names if f != self.tool_rules_solver.tool_call_history[-1]]
|
|
327
|
-
if not allowed_tool_names:
|
|
328
|
-
return None
|
|
329
|
-
|
|
330
|
-
allowed_functions = [func for func in agent_state_tool_jsons if func["name"] in allowed_tool_names]
|
|
331
|
-
# Extract terminal tool names from tool rules
|
|
332
|
-
terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
|
|
333
|
-
allowed_functions = runtime_override_tool_json_schema(
|
|
334
|
-
tool_list=allowed_functions,
|
|
335
|
-
response_format=self.agent_state.response_format,
|
|
336
|
-
request_heartbeat=True,
|
|
337
|
-
terminal_tools=terminal_tool_names,
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
# For the first message, force the initial tool if one is specified
|
|
341
|
-
force_tool_call = None
|
|
342
|
-
if (
|
|
343
|
-
step_count is not None
|
|
344
|
-
and step_count == 0
|
|
345
|
-
and not self.supports_structured_output
|
|
346
|
-
and len(self.tool_rules_solver.init_tool_rules) > 0
|
|
347
|
-
):
|
|
348
|
-
# TODO: This just seems wrong? What if there are more than 1 init tool rules?
|
|
349
|
-
force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name
|
|
350
|
-
# Force a tool call if exactly one tool is specified
|
|
351
|
-
elif step_count is not None and step_count > 0 and len(allowed_tool_names) == 1:
|
|
352
|
-
force_tool_call = allowed_tool_names[0]
|
|
353
|
-
|
|
354
|
-
for attempt in range(1, empty_response_retry_limit + 1):
|
|
355
|
-
try:
|
|
356
|
-
log_telemetry(self.logger, "_get_ai_reply create start")
|
|
357
|
-
# New LLM client flow
|
|
358
|
-
llm_client = LLMClient.create(
|
|
359
|
-
provider_type=self.agent_state.llm_config.model_endpoint_type,
|
|
360
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
361
|
-
actor=self.user,
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
if llm_client and not stream:
|
|
365
|
-
response = llm_client.send_llm_request(
|
|
366
|
-
messages=message_sequence,
|
|
367
|
-
llm_config=self.agent_state.llm_config,
|
|
368
|
-
tools=allowed_functions,
|
|
369
|
-
force_tool_call=force_tool_call,
|
|
370
|
-
telemetry_manager=self.telemetry_manager,
|
|
371
|
-
step_id=step_id,
|
|
372
|
-
)
|
|
373
|
-
else:
|
|
374
|
-
# Fallback to existing flow
|
|
375
|
-
for message in message_sequence:
|
|
376
|
-
if isinstance(message.content, list):
|
|
377
|
-
|
|
378
|
-
def get_fallback_text_content(content):
|
|
379
|
-
if isinstance(content, ImageContent):
|
|
380
|
-
return TextContent(text="[Image Here]")
|
|
381
|
-
return content
|
|
382
|
-
|
|
383
|
-
message.content = [get_fallback_text_content(content) for content in message.content]
|
|
384
|
-
|
|
385
|
-
response = create(
|
|
386
|
-
llm_config=self.agent_state.llm_config,
|
|
387
|
-
messages=message_sequence,
|
|
388
|
-
user_id=self.agent_state.created_by_id,
|
|
389
|
-
functions=allowed_functions,
|
|
390
|
-
# functions_python=self.functions_python, do we need this?
|
|
391
|
-
function_call=function_call,
|
|
392
|
-
first_message=first_message,
|
|
393
|
-
force_tool_call=force_tool_call,
|
|
394
|
-
stream=stream,
|
|
395
|
-
stream_interface=self.interface,
|
|
396
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
397
|
-
name=self.agent_state.name,
|
|
398
|
-
telemetry_manager=self.telemetry_manager,
|
|
399
|
-
step_id=step_id,
|
|
400
|
-
actor=self.user,
|
|
401
|
-
)
|
|
402
|
-
log_telemetry(self.logger, "_get_ai_reply create finish")
|
|
403
|
-
|
|
404
|
-
# These bottom two are retryable
|
|
405
|
-
if len(response.choices) == 0 or response.choices[0] is None:
|
|
406
|
-
raise ValueError(f"API call returned an empty message: {response}")
|
|
407
|
-
|
|
408
|
-
if response.choices[0].finish_reason not in ["stop", "function_call", "tool_calls"]:
|
|
409
|
-
if response.choices[0].finish_reason == "length":
|
|
410
|
-
# This is not retryable, hence RuntimeError v.s. ValueError
|
|
411
|
-
raise RuntimeError("Finish reason was length (maximum context length)")
|
|
412
|
-
else:
|
|
413
|
-
raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
|
|
414
|
-
log_telemetry(self.logger, "_handle_ai_response finish")
|
|
415
|
-
|
|
416
|
-
except ValueError as ve:
|
|
417
|
-
if attempt >= empty_response_retry_limit:
|
|
418
|
-
warnings.warn(f"Retry limit reached. Final error: {ve}")
|
|
419
|
-
log_telemetry(self.logger, "_handle_ai_response finish ValueError")
|
|
420
|
-
raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
|
|
421
|
-
else:
|
|
422
|
-
delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
|
|
423
|
-
warnings.warn(f"Attempt {attempt} failed: {ve}. Retrying in {delay} seconds...")
|
|
424
|
-
time.sleep(delay)
|
|
425
|
-
continue
|
|
426
|
-
|
|
427
|
-
except Exception as e:
|
|
428
|
-
# For non-retryable errors, exit immediately
|
|
429
|
-
log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
|
|
430
|
-
raise e
|
|
431
|
-
|
|
432
|
-
# check if we are going over the context window: this allows for articifial constraints
|
|
433
|
-
if response.usage.total_tokens > self.agent_state.llm_config.context_window:
|
|
434
|
-
# trigger summarization
|
|
435
|
-
log_telemetry(self.logger, "_get_ai_reply summarize_messages_inplace")
|
|
436
|
-
self.summarize_messages_inplace()
|
|
437
|
-
|
|
438
|
-
# return the response
|
|
439
|
-
return response
|
|
440
|
-
|
|
441
|
-
log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
|
|
442
|
-
raise Exception("Retries exhausted and no valid response received.")
|
|
443
|
-
|
|
444
|
-
@trace_method
|
|
445
|
-
def _handle_ai_response(
|
|
446
|
-
self,
|
|
447
|
-
response_message: ChatCompletionMessage, # TODO should we eventually move the Message creation outside of this function?
|
|
448
|
-
override_tool_call_id: bool = False,
|
|
449
|
-
# If we are streaming, we needed to create a Message ID ahead of time,
|
|
450
|
-
# and now we want to use it in the creation of the Message object
|
|
451
|
-
# TODO figure out a cleaner way to do this
|
|
452
|
-
response_message_id: Optional[str] = None,
|
|
453
|
-
group_id: Optional[str] = None,
|
|
454
|
-
) -> Tuple[List[Message], bool, bool]:
|
|
455
|
-
"""Handles parsing and function execution"""
|
|
456
|
-
log_telemetry(self.logger, "_handle_ai_response start")
|
|
457
|
-
# Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
|
|
458
|
-
if response_message_id is not None:
|
|
459
|
-
assert response_message_id.startswith("message-"), response_message_id
|
|
460
|
-
|
|
461
|
-
messages = [] # append these to the history when done
|
|
462
|
-
function_name = None
|
|
463
|
-
function_args = {}
|
|
464
|
-
chunk_index = 0
|
|
465
|
-
|
|
466
|
-
# Step 2: check if LLM wanted to call a function
|
|
467
|
-
if response_message.function_call or (response_message.tool_calls is not None and len(response_message.tool_calls) > 0):
|
|
468
|
-
if response_message.function_call:
|
|
469
|
-
raise DeprecationWarning(response_message)
|
|
470
|
-
if response_message.tool_calls is not None and len(response_message.tool_calls) > 1:
|
|
471
|
-
# raise NotImplementedError(f">1 tool call not supported")
|
|
472
|
-
# TODO eventually support sequential tool calling
|
|
473
|
-
self.logger.warning(f">1 tool call not supported, using index=0 only\n{response_message.tool_calls}")
|
|
474
|
-
response_message.tool_calls = [response_message.tool_calls[0]]
|
|
475
|
-
assert response_message.tool_calls is not None and len(response_message.tool_calls) > 0
|
|
476
|
-
|
|
477
|
-
# generate UUID for tool call
|
|
478
|
-
if override_tool_call_id or response_message.function_call:
|
|
479
|
-
warnings.warn("Overriding the tool call can result in inconsistent tool call IDs during streaming")
|
|
480
|
-
tool_call_id = get_tool_call_id() # needs to be a string for JSON
|
|
481
|
-
response_message.tool_calls[0].id = tool_call_id
|
|
482
|
-
else:
|
|
483
|
-
tool_call_id = response_message.tool_calls[0].id
|
|
484
|
-
assert tool_call_id is not None # should be defined
|
|
485
|
-
|
|
486
|
-
# only necessary to add the tool_call_id to a function call (antipattern)
|
|
487
|
-
# response_message_dict = response_message.model_dump()
|
|
488
|
-
# response_message_dict["tool_call_id"] = tool_call_id
|
|
489
|
-
|
|
490
|
-
# role: assistant (requesting tool call, set tool call ID)
|
|
491
|
-
messages.append(
|
|
492
|
-
# NOTE: we're recreating the message here
|
|
493
|
-
# TODO should probably just overwrite the fields?
|
|
494
|
-
Message.dict_to_message(
|
|
495
|
-
id=response_message_id,
|
|
496
|
-
agent_id=self.agent_state.id,
|
|
497
|
-
model=self.model,
|
|
498
|
-
openai_message_dict=response_message.model_dump(),
|
|
499
|
-
name=self.agent_state.name,
|
|
500
|
-
group_id=group_id,
|
|
501
|
-
)
|
|
502
|
-
) # extend conversation with assistant's reply
|
|
503
|
-
self.logger.debug(f"Function call message: {messages[-1]}")
|
|
504
|
-
|
|
505
|
-
nonnull_content = False
|
|
506
|
-
if response_message.content or response_message.reasoning_content or response_message.redacted_reasoning_content:
|
|
507
|
-
# The content if then internal monologue, not chat
|
|
508
|
-
self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
|
|
509
|
-
chunk_index += 1
|
|
510
|
-
# Flag to avoid printing a duplicate if inner thoughts get popped from the function call
|
|
511
|
-
nonnull_content = True
|
|
512
|
-
|
|
513
|
-
# Step 3: call the function
|
|
514
|
-
# Note: the JSON response may not always be valid; be sure to handle errors
|
|
515
|
-
function_call = (
|
|
516
|
-
response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
|
|
517
|
-
)
|
|
518
|
-
function_name = function_call.name
|
|
519
|
-
self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
|
|
520
|
-
|
|
521
|
-
# Failure case 1: function name is wrong (not in agent_state.tools)
|
|
522
|
-
target_letta_tool = None
|
|
523
|
-
for t in self.agent_state.tools:
|
|
524
|
-
if t.name == function_name:
|
|
525
|
-
# This force refreshes the target_letta_tool from the database
|
|
526
|
-
# We only do this on name match to confirm that the agent state contains a specific tool with the right name
|
|
527
|
-
target_letta_tool = ToolManager().get_tool_by_name(tool_name=function_name, actor=self.user)
|
|
528
|
-
break
|
|
529
|
-
|
|
530
|
-
if not target_letta_tool:
|
|
531
|
-
error_msg = f"No function named {function_name}"
|
|
532
|
-
function_response = "None" # more like "never ran?"
|
|
533
|
-
messages = self._handle_function_error_response(
|
|
534
|
-
error_msg, tool_call_id, function_name, function_args, function_response, messages, group_id=group_id
|
|
535
|
-
)
|
|
536
|
-
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
537
|
-
|
|
538
|
-
# Failure case 2: function name is OK, but function args are bad JSON
|
|
539
|
-
try:
|
|
540
|
-
raw_function_args = function_call.arguments
|
|
541
|
-
function_args = parse_json(raw_function_args)
|
|
542
|
-
if not isinstance(function_args, dict):
|
|
543
|
-
raise ValueError(f"Function arguments are not a dictionary: {function_args} (raw={raw_function_args})")
|
|
544
|
-
except Exception as e:
|
|
545
|
-
print(e)
|
|
546
|
-
error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
|
|
547
|
-
function_response = "None" # more like "never ran?"
|
|
548
|
-
messages = self._handle_function_error_response(
|
|
549
|
-
error_msg, tool_call_id, function_name, function_args, function_response, messages, group_id=group_id
|
|
550
|
-
)
|
|
551
|
-
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
552
|
-
|
|
553
|
-
# Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
|
|
554
|
-
if INNER_THOUGHTS_KWARG in function_args:
|
|
555
|
-
response_message.content = function_args.pop(INNER_THOUGHTS_KWARG)
|
|
556
|
-
# The content if then internal monologue, not chat
|
|
557
|
-
if response_message.content and not nonnull_content:
|
|
558
|
-
self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
|
|
559
|
-
chunk_index += 1
|
|
560
|
-
|
|
561
|
-
# (Still parsing function args)
|
|
562
|
-
# Handle requests for immediate heartbeat
|
|
563
|
-
heartbeat_request = function_args.pop("request_heartbeat", None)
|
|
564
|
-
|
|
565
|
-
# Edge case: heartbeat_request is returned as a stringified boolean, we will attempt to parse:
|
|
566
|
-
if isinstance(heartbeat_request, str) and heartbeat_request.lower().strip() == "true":
|
|
567
|
-
heartbeat_request = True
|
|
568
|
-
|
|
569
|
-
if heartbeat_request is None:
|
|
570
|
-
heartbeat_request = False
|
|
571
|
-
|
|
572
|
-
if not isinstance(heartbeat_request, bool):
|
|
573
|
-
self.logger.warning(
|
|
574
|
-
f"{CLI_WARNING_PREFIX}'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}"
|
|
575
|
-
)
|
|
576
|
-
heartbeat_request = False
|
|
577
|
-
|
|
578
|
-
# Failure case 3: function failed during execution
|
|
579
|
-
# NOTE: the msg_obj associated with the "Running " message is the prior assistant message, not the function/tool role message
|
|
580
|
-
# this is because the function/tool role message is only created once the function/tool has executed/returned
|
|
581
|
-
|
|
582
|
-
# handle cases where we return a json message
|
|
583
|
-
if "message" in function_args:
|
|
584
|
-
function_args["message"] = str(function_args.get("message", ""))
|
|
585
|
-
self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
|
|
586
|
-
chunk_index = 0 # reset chunk index after assistant message
|
|
587
|
-
try:
|
|
588
|
-
# handle tool execution (sandbox) and state updates
|
|
589
|
-
log_telemetry(
|
|
590
|
-
self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
|
|
591
|
-
)
|
|
592
|
-
log_event(
|
|
593
|
-
"tool_call_initiated",
|
|
594
|
-
attributes={
|
|
595
|
-
"function_name": function_name,
|
|
596
|
-
"target_letta_tool": target_letta_tool.model_dump(),
|
|
597
|
-
**{f"function_args.{k}": v for k, v in function_args.items()},
|
|
598
|
-
},
|
|
599
|
-
)
|
|
600
|
-
|
|
601
|
-
tool_execution_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
|
|
602
|
-
function_response = tool_execution_result.func_return
|
|
603
|
-
|
|
604
|
-
log_event(
|
|
605
|
-
"tool_call_ended",
|
|
606
|
-
attributes={
|
|
607
|
-
"function_response": function_response,
|
|
608
|
-
"tool_execution_result": tool_execution_result.model_dump(),
|
|
609
|
-
},
|
|
610
|
-
)
|
|
611
|
-
log_telemetry(
|
|
612
|
-
self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
|
|
613
|
-
)
|
|
614
|
-
|
|
615
|
-
if tool_execution_result and tool_execution_result.status == "error":
|
|
616
|
-
tool_return = ToolReturn(
|
|
617
|
-
status=tool_execution_result.status, stdout=tool_execution_result.stdout, stderr=tool_execution_result.stderr
|
|
618
|
-
)
|
|
619
|
-
messages = self._handle_function_error_response(
|
|
620
|
-
function_response,
|
|
621
|
-
tool_call_id,
|
|
622
|
-
function_name,
|
|
623
|
-
function_args,
|
|
624
|
-
function_response,
|
|
625
|
-
messages,
|
|
626
|
-
[tool_return],
|
|
627
|
-
group_id=group_id,
|
|
628
|
-
)
|
|
629
|
-
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
630
|
-
|
|
631
|
-
# handle trunction
|
|
632
|
-
if function_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
|
|
633
|
-
# with certain functions we rely on the paging mechanism to handle overflow
|
|
634
|
-
truncate = False
|
|
635
|
-
else:
|
|
636
|
-
# but by default, we add a truncation safeguard to prevent bad functions from
|
|
637
|
-
# overflow the agent context window
|
|
638
|
-
truncate = True
|
|
639
|
-
|
|
640
|
-
# get the function response limit
|
|
641
|
-
return_char_limit = target_letta_tool.return_char_limit
|
|
642
|
-
function_response_string = validate_function_response(
|
|
643
|
-
function_response, return_char_limit=return_char_limit, truncate=truncate
|
|
644
|
-
)
|
|
645
|
-
function_args.pop("self", None)
|
|
646
|
-
function_response = package_function_response(True, function_response_string, self.agent_state.timezone)
|
|
647
|
-
function_failed = False
|
|
648
|
-
except Exception as e:
|
|
649
|
-
function_args.pop("self", None)
|
|
650
|
-
# error_msg = f"Error calling function {function_name} with args {function_args}: {str(e)}"
|
|
651
|
-
# Less detailed - don't provide full args, idea is that it should be in recent context so no need (just adds noise)
|
|
652
|
-
error_msg = get_friendly_error_msg(function_name=function_name, exception_name=type(e).__name__, exception_message=str(e))
|
|
653
|
-
error_msg_user = f"{error_msg}\n{traceback.format_exc()}"
|
|
654
|
-
self.logger.error(error_msg_user)
|
|
655
|
-
messages = self._handle_function_error_response(
|
|
656
|
-
error_msg,
|
|
657
|
-
tool_call_id,
|
|
658
|
-
function_name,
|
|
659
|
-
function_args,
|
|
660
|
-
function_response,
|
|
661
|
-
messages,
|
|
662
|
-
[ToolReturn(status="error", stderr=[error_msg_user])],
|
|
663
|
-
include_function_failed_message=True,
|
|
664
|
-
group_id=group_id,
|
|
665
|
-
)
|
|
666
|
-
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
667
|
-
|
|
668
|
-
# Step 4: check if function response is an error
|
|
669
|
-
if function_response_string.startswith(ERROR_MESSAGE_PREFIX):
|
|
670
|
-
error_msg = function_response_string
|
|
671
|
-
tool_return = ToolReturn(
|
|
672
|
-
status=tool_execution_result.status,
|
|
673
|
-
stdout=tool_execution_result.stdout,
|
|
674
|
-
stderr=tool_execution_result.stderr,
|
|
675
|
-
)
|
|
676
|
-
messages = self._handle_function_error_response(
|
|
677
|
-
error_msg,
|
|
678
|
-
tool_call_id,
|
|
679
|
-
function_name,
|
|
680
|
-
function_args,
|
|
681
|
-
function_response,
|
|
682
|
-
messages,
|
|
683
|
-
[tool_return],
|
|
684
|
-
include_function_failed_message=True,
|
|
685
|
-
group_id=group_id,
|
|
686
|
-
)
|
|
687
|
-
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
688
|
-
|
|
689
|
-
# If no failures happened along the way: ...
|
|
690
|
-
# Step 5: send the info on the function call and function response to GPT
|
|
691
|
-
tool_return = ToolReturn(
|
|
692
|
-
status=tool_execution_result.status,
|
|
693
|
-
stdout=tool_execution_result.stdout,
|
|
694
|
-
stderr=tool_execution_result.stderr,
|
|
695
|
-
)
|
|
696
|
-
messages.append(
|
|
697
|
-
Message(
|
|
698
|
-
agent_id=self.agent_state.id,
|
|
699
|
-
# Base info OpenAI-style
|
|
700
|
-
model=self.model,
|
|
701
|
-
role="tool",
|
|
702
|
-
name=function_name, # NOTE: when role is 'tool', the 'name' is the function name, not agent name
|
|
703
|
-
content=[TextContent(text=function_response)],
|
|
704
|
-
tool_call_id=tool_call_id,
|
|
705
|
-
# Letta extras
|
|
706
|
-
tool_returns=[tool_return],
|
|
707
|
-
group_id=group_id,
|
|
708
|
-
)
|
|
709
|
-
) # extend conversation with function response
|
|
710
|
-
self.interface.function_message(f"Ran {function_name}({function_args})", msg_obj=messages[-1], chunk_index=chunk_index)
|
|
711
|
-
self.interface.function_message(f"Success: {function_response_string}", msg_obj=messages[-1], chunk_index=chunk_index)
|
|
712
|
-
chunk_index += 1
|
|
713
|
-
self.last_function_response = function_response
|
|
714
|
-
|
|
715
|
-
else:
|
|
716
|
-
# Standard non-function reply
|
|
717
|
-
messages.append(
|
|
718
|
-
Message.dict_to_message(
|
|
719
|
-
id=response_message_id,
|
|
720
|
-
agent_id=self.agent_state.id,
|
|
721
|
-
model=self.model,
|
|
722
|
-
openai_message_dict=response_message.model_dump(),
|
|
723
|
-
name=self.agent_state.name,
|
|
724
|
-
group_id=group_id,
|
|
725
|
-
)
|
|
726
|
-
) # extend conversation with assistant's reply
|
|
727
|
-
self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
|
|
728
|
-
chunk_index += 1
|
|
729
|
-
heartbeat_request = False
|
|
730
|
-
function_failed = False
|
|
731
|
-
|
|
732
|
-
# rebuild memory
|
|
733
|
-
# TODO: @charles please check this
|
|
734
|
-
self.agent_state = self.agent_manager.rebuild_system_prompt(agent_id=self.agent_state.id, actor=self.user)
|
|
735
|
-
|
|
736
|
-
# Update ToolRulesSolver state with last called function
|
|
737
|
-
self.tool_rules_solver.register_tool_call(function_name)
|
|
738
|
-
# Update heartbeat request according to provided tool rules
|
|
739
|
-
if self.tool_rules_solver.has_children_tools(function_name):
|
|
740
|
-
heartbeat_request = True
|
|
741
|
-
elif self.tool_rules_solver.is_terminal_tool(function_name):
|
|
742
|
-
heartbeat_request = False
|
|
743
|
-
|
|
744
|
-
# if continue tool rule, then must request a heartbeat
|
|
745
|
-
# TODO: dont even include heartbeats in the args
|
|
746
|
-
if self.tool_rules_solver.is_continue_tool(function_name):
|
|
747
|
-
heartbeat_request = True
|
|
748
|
-
|
|
749
|
-
log_telemetry(self.logger, "_handle_ai_response finish")
|
|
750
|
-
return messages, heartbeat_request, function_failed
|
|
751
|
-
|
|
752
|
-
@trace_method
|
|
753
|
-
def step(
|
|
754
|
-
self,
|
|
755
|
-
input_messages: List[MessageCreate],
|
|
756
|
-
# additional args
|
|
757
|
-
chaining: bool = True,
|
|
758
|
-
max_chaining_steps: Optional[int] = None,
|
|
759
|
-
put_inner_thoughts_first: bool = True,
|
|
760
|
-
**kwargs,
|
|
761
|
-
) -> LettaUsageStatistics:
|
|
762
|
-
"""Run Agent.step in a loop, handling chaining via heartbeat requests and function failures"""
|
|
763
|
-
# Defensively clear the tool rules solver history
|
|
764
|
-
# Usually this would be extraneous as Agent loop is re-loaded on every message send
|
|
765
|
-
# But just to be safe
|
|
766
|
-
self.tool_rules_solver.clear_tool_history()
|
|
767
|
-
|
|
768
|
-
# Convert MessageCreate objects to Message objects
|
|
769
|
-
next_input_messages = convert_message_creates_to_messages(input_messages, self.agent_state.id, self.agent_state.timezone)
|
|
770
|
-
counter = 0
|
|
771
|
-
total_usage = UsageStatistics()
|
|
772
|
-
step_count = 0
|
|
773
|
-
function_failed = False
|
|
774
|
-
steps_messages = []
|
|
775
|
-
while True:
|
|
776
|
-
kwargs["first_message"] = False
|
|
777
|
-
kwargs["step_count"] = step_count
|
|
778
|
-
kwargs["last_function_failed"] = function_failed
|
|
779
|
-
step_response = self.inner_step(
|
|
780
|
-
messages=next_input_messages,
|
|
781
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
782
|
-
**kwargs,
|
|
783
|
-
)
|
|
784
|
-
|
|
785
|
-
heartbeat_request = step_response.heartbeat_request
|
|
786
|
-
function_failed = step_response.function_failed
|
|
787
|
-
token_warning = step_response.in_context_memory_warning
|
|
788
|
-
usage = step_response.usage
|
|
789
|
-
steps_messages.append(step_response.messages)
|
|
790
|
-
|
|
791
|
-
step_count += 1
|
|
792
|
-
total_usage += usage
|
|
793
|
-
counter += 1
|
|
794
|
-
self.interface.step_complete()
|
|
795
|
-
|
|
796
|
-
# logger.debug("Saving agent state")
|
|
797
|
-
# save updated state
|
|
798
|
-
save_agent(self)
|
|
799
|
-
|
|
800
|
-
# Chain stops
|
|
801
|
-
if not chaining:
|
|
802
|
-
self.logger.info("No chaining, stopping after one step")
|
|
803
|
-
break
|
|
804
|
-
elif max_chaining_steps is not None and counter > max_chaining_steps:
|
|
805
|
-
self.logger.info(f"Hit max chaining steps, stopping after {counter} steps")
|
|
806
|
-
break
|
|
807
|
-
# Chain handlers
|
|
808
|
-
elif token_warning and summarizer_settings.send_memory_warning_message:
|
|
809
|
-
assert self.agent_state.created_by_id is not None
|
|
810
|
-
next_input_messages = [
|
|
811
|
-
Message.dict_to_message(
|
|
812
|
-
agent_id=self.agent_state.id,
|
|
813
|
-
model=self.model,
|
|
814
|
-
openai_message_dict={
|
|
815
|
-
"role": "user", # TODO: change to system?
|
|
816
|
-
"content": get_token_limit_warning(),
|
|
817
|
-
},
|
|
818
|
-
),
|
|
819
|
-
]
|
|
820
|
-
continue # always chain
|
|
821
|
-
elif function_failed:
|
|
822
|
-
assert self.agent_state.created_by_id is not None
|
|
823
|
-
next_input_messages = [
|
|
824
|
-
Message.dict_to_message(
|
|
825
|
-
agent_id=self.agent_state.id,
|
|
826
|
-
model=self.model,
|
|
827
|
-
openai_message_dict={
|
|
828
|
-
"role": "user", # TODO: change to system?
|
|
829
|
-
"content": get_heartbeat(self.agent_state.timezone, FUNC_FAILED_HEARTBEAT_MESSAGE),
|
|
830
|
-
},
|
|
831
|
-
)
|
|
832
|
-
]
|
|
833
|
-
continue # always chain
|
|
834
|
-
elif heartbeat_request:
|
|
835
|
-
assert self.agent_state.created_by_id is not None
|
|
836
|
-
next_input_messages = [
|
|
837
|
-
Message.dict_to_message(
|
|
838
|
-
agent_id=self.agent_state.id,
|
|
839
|
-
model=self.model,
|
|
840
|
-
openai_message_dict={
|
|
841
|
-
"role": "user", # TODO: change to system?
|
|
842
|
-
"content": get_heartbeat(self.agent_state.timezone, REQ_HEARTBEAT_MESSAGE),
|
|
843
|
-
},
|
|
844
|
-
)
|
|
845
|
-
]
|
|
846
|
-
continue # always chain
|
|
847
|
-
# Letta no-op / yield
|
|
848
|
-
else:
|
|
849
|
-
break
|
|
850
|
-
|
|
851
|
-
if self.agent_state.message_buffer_autoclear:
|
|
852
|
-
self.logger.info("Autoclearing message buffer")
|
|
853
|
-
self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
|
|
854
|
-
|
|
855
|
-
return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count, steps_messages=steps_messages)
|
|
856
|
-
|
|
857
|
-
def inner_step(
|
|
858
|
-
self,
|
|
859
|
-
messages: List[Message],
|
|
860
|
-
first_message: bool = False,
|
|
861
|
-
first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
|
|
862
|
-
skip_verify: bool = False,
|
|
863
|
-
stream: bool = False, # TODO move to config?
|
|
864
|
-
step_count: Optional[int] = None,
|
|
865
|
-
metadata: Optional[dict] = None,
|
|
866
|
-
summarize_attempt_count: int = 0,
|
|
867
|
-
last_function_failed: bool = False,
|
|
868
|
-
put_inner_thoughts_first: bool = True,
|
|
869
|
-
) -> AgentStepResponse:
|
|
870
|
-
"""Runs a single step in the agent loop (generates at most one LLM call)"""
|
|
871
|
-
try:
|
|
872
|
-
# Extract job_id from metadata if present
|
|
873
|
-
job_id = metadata.get("job_id") if metadata else None
|
|
874
|
-
|
|
875
|
-
# Declare step_id for the given step to be used as the step is processing.
|
|
876
|
-
step_id = generate_step_id()
|
|
877
|
-
|
|
878
|
-
# Step 0: update core memory
|
|
879
|
-
# only pulling latest block data if shared memory is being used
|
|
880
|
-
current_persisted_memory = Memory(
|
|
881
|
-
blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
|
|
882
|
-
file_blocks=self.agent_state.memory.file_blocks,
|
|
883
|
-
agent_type=self.agent_state.agent_type,
|
|
884
|
-
) # read blocks from DB
|
|
885
|
-
self.update_memory_if_changed(current_persisted_memory)
|
|
886
|
-
|
|
887
|
-
# Step 1: add user message
|
|
888
|
-
if not all(isinstance(m, Message) for m in messages):
|
|
889
|
-
raise ValueError(f"messages should be a list of Message, got {[type(m) for m in messages]}")
|
|
890
|
-
|
|
891
|
-
in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
892
|
-
input_message_sequence = in_context_messages + messages
|
|
893
|
-
|
|
894
|
-
if (
|
|
895
|
-
len(input_message_sequence) > 1
|
|
896
|
-
and input_message_sequence[-1].role != "user"
|
|
897
|
-
and input_message_sequence[-1].group_id is None
|
|
898
|
-
):
|
|
899
|
-
self.logger.warning(f"{CLI_WARNING_PREFIX}Attempting to run ChatCompletion without user as the last message in the queue")
|
|
900
|
-
|
|
901
|
-
# Step 2: send the conversation and available functions to the LLM
|
|
902
|
-
response = self._get_ai_reply(
|
|
903
|
-
message_sequence=input_message_sequence,
|
|
904
|
-
first_message=first_message,
|
|
905
|
-
stream=stream,
|
|
906
|
-
step_count=step_count,
|
|
907
|
-
last_function_failed=last_function_failed,
|
|
908
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
909
|
-
step_id=step_id,
|
|
910
|
-
)
|
|
911
|
-
if not response:
|
|
912
|
-
# EDGE CASE: Function call failed AND there's no tools left for agent to call -> return early
|
|
913
|
-
return AgentStepResponse(
|
|
914
|
-
messages=input_message_sequence,
|
|
915
|
-
heartbeat_request=False,
|
|
916
|
-
function_failed=False, # NOTE: this is different from other function fails. We force to return early
|
|
917
|
-
in_context_memory_warning=False,
|
|
918
|
-
usage=UsageStatistics(),
|
|
919
|
-
)
|
|
920
|
-
|
|
921
|
-
# Step 3: check if LLM wanted to call a function
|
|
922
|
-
# (if yes) Step 4: call the function
|
|
923
|
-
# (if yes) Step 5: send the info on the function call and function response to LLM
|
|
924
|
-
response_message = response.choices[0].message
|
|
925
|
-
|
|
926
|
-
response_message.model_copy() # TODO why are we copying here?
|
|
927
|
-
all_response_messages, heartbeat_request, function_failed = self._handle_ai_response(
|
|
928
|
-
response_message,
|
|
929
|
-
# TODO this is kind of hacky, find a better way to handle this
|
|
930
|
-
# the only time we set up message creation ahead of time is when streaming is on
|
|
931
|
-
response_message_id=response.id if stream else None,
|
|
932
|
-
group_id=input_message_sequence[-1].group_id,
|
|
933
|
-
)
|
|
934
|
-
|
|
935
|
-
# Step 6: extend the message history
|
|
936
|
-
if len(messages) > 0:
|
|
937
|
-
all_new_messages = messages + all_response_messages
|
|
938
|
-
else:
|
|
939
|
-
all_new_messages = all_response_messages
|
|
940
|
-
|
|
941
|
-
if self.save_last_response:
|
|
942
|
-
self.last_response_messages = all_response_messages
|
|
943
|
-
|
|
944
|
-
# Check the memory pressure and potentially issue a memory pressure warning
|
|
945
|
-
current_total_tokens = response.usage.total_tokens
|
|
946
|
-
active_memory_warning = False
|
|
947
|
-
|
|
948
|
-
# We can't do summarize logic properly if context_window is undefined
|
|
949
|
-
if self.agent_state.llm_config.context_window is None:
|
|
950
|
-
# Fallback if for some reason context_window is missing, just set to the default
|
|
951
|
-
print(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
|
|
952
|
-
print(f"{self.agent_state}")
|
|
953
|
-
self.agent_state.llm_config.context_window = (
|
|
954
|
-
LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
|
|
955
|
-
)
|
|
956
|
-
|
|
957
|
-
if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window):
|
|
958
|
-
logger.warning(
|
|
959
|
-
f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
|
|
960
|
-
)
|
|
961
|
-
|
|
962
|
-
log_event(
|
|
963
|
-
name="memory_pressure_warning",
|
|
964
|
-
attributes={
|
|
965
|
-
"current_total_tokens": current_total_tokens,
|
|
966
|
-
"context_window_limit": self.agent_state.llm_config.context_window,
|
|
967
|
-
},
|
|
968
|
-
)
|
|
969
|
-
# Only deliver the alert if we haven't already (this period)
|
|
970
|
-
if not self.agent_alerted_about_memory_pressure:
|
|
971
|
-
active_memory_warning = True
|
|
972
|
-
self.agent_alerted_about_memory_pressure = True # it's up to the outer loop to handle this
|
|
973
|
-
|
|
974
|
-
else:
|
|
975
|
-
logger.info(
|
|
976
|
-
f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
|
|
977
|
-
)
|
|
978
|
-
|
|
979
|
-
# Log step - this must happen before messages are persisted
|
|
980
|
-
step = self.step_manager.log_step(
|
|
981
|
-
actor=self.user,
|
|
982
|
-
agent_id=self.agent_state.id,
|
|
983
|
-
provider_name=self.agent_state.llm_config.model_endpoint_type,
|
|
984
|
-
provider_category=self.agent_state.llm_config.provider_category or "base",
|
|
985
|
-
model=self.agent_state.llm_config.model,
|
|
986
|
-
model_endpoint=self.agent_state.llm_config.model_endpoint,
|
|
987
|
-
context_window_limit=self.agent_state.llm_config.context_window,
|
|
988
|
-
usage=response.usage,
|
|
989
|
-
provider_id=self.provider_manager.get_provider_id_from_name(
|
|
990
|
-
self.agent_state.llm_config.provider_name,
|
|
991
|
-
actor=self.user,
|
|
992
|
-
),
|
|
993
|
-
job_id=job_id,
|
|
994
|
-
step_id=step_id,
|
|
995
|
-
project_id=self.agent_state.project_id,
|
|
996
|
-
status=StepStatus.SUCCESS, # Set to SUCCESS since we're logging after successful completion
|
|
997
|
-
)
|
|
998
|
-
for message in all_new_messages:
|
|
999
|
-
message.step_id = step.id
|
|
1000
|
-
|
|
1001
|
-
# Persisting into Messages
|
|
1002
|
-
self.agent_state = self.agent_manager.append_to_in_context_messages(
|
|
1003
|
-
all_new_messages, agent_id=self.agent_state.id, actor=self.user
|
|
1004
|
-
)
|
|
1005
|
-
if job_id:
|
|
1006
|
-
for message in all_new_messages:
|
|
1007
|
-
if message.role != "user":
|
|
1008
|
-
self.job_manager.add_message_to_job(
|
|
1009
|
-
job_id=job_id,
|
|
1010
|
-
message_id=message.id,
|
|
1011
|
-
actor=self.user,
|
|
1012
|
-
)
|
|
1013
|
-
|
|
1014
|
-
return AgentStepResponse(
|
|
1015
|
-
messages=all_new_messages,
|
|
1016
|
-
heartbeat_request=heartbeat_request,
|
|
1017
|
-
function_failed=function_failed,
|
|
1018
|
-
in_context_memory_warning=active_memory_warning,
|
|
1019
|
-
usage=response.usage,
|
|
1020
|
-
)
|
|
1021
|
-
|
|
1022
|
-
except Exception as e:
|
|
1023
|
-
logger.error(f"step() failed\nmessages = {messages}\nerror = {e}")
|
|
1024
|
-
|
|
1025
|
-
# If we got a context alert, try trimming the messages length, then try again
|
|
1026
|
-
if is_context_overflow_error(e):
|
|
1027
|
-
in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
1028
|
-
|
|
1029
|
-
# TODO: this is a patch to resolve immediate issues, should be removed once the summarizer is fixes
|
|
1030
|
-
if self.agent_state.message_buffer_autoclear:
|
|
1031
|
-
# no calling the summarizer in this case
|
|
1032
|
-
logger.error(
|
|
1033
|
-
f"step() failed with an exception that looks like a context window overflow, but message buffer is set to autoclear, so skipping: '{str(e)}'"
|
|
1034
|
-
)
|
|
1035
|
-
raise e
|
|
1036
|
-
|
|
1037
|
-
if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
|
|
1038
|
-
logger.warning(
|
|
1039
|
-
f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
|
|
1040
|
-
)
|
|
1041
|
-
# A separate API call to run a summarizer
|
|
1042
|
-
self.summarize_messages_inplace()
|
|
1043
|
-
|
|
1044
|
-
# Try step again
|
|
1045
|
-
return self.inner_step(
|
|
1046
|
-
messages=messages,
|
|
1047
|
-
first_message=first_message,
|
|
1048
|
-
first_message_retry_limit=first_message_retry_limit,
|
|
1049
|
-
skip_verify=skip_verify,
|
|
1050
|
-
stream=stream,
|
|
1051
|
-
metadata=metadata,
|
|
1052
|
-
summarize_attempt_count=summarize_attempt_count + 1,
|
|
1053
|
-
)
|
|
1054
|
-
else:
|
|
1055
|
-
err_msg = f"Ran summarizer {summarize_attempt_count - 1} times for agent id={self.agent_state.id}, but messages are still overflowing the context window."
|
|
1056
|
-
token_counts = (get_token_counts_for_messages(in_context_messages),)
|
|
1057
|
-
logger.error(err_msg)
|
|
1058
|
-
logger.error(f"num_in_context_messages: {len(self.agent_state.message_ids)}")
|
|
1059
|
-
logger.error(f"token_counts: {token_counts}")
|
|
1060
|
-
raise ContextWindowExceededError(
|
|
1061
|
-
err_msg,
|
|
1062
|
-
details={
|
|
1063
|
-
"num_in_context_messages": len(self.agent_state.message_ids),
|
|
1064
|
-
"in_context_messages_text": [m.content for m in in_context_messages],
|
|
1065
|
-
"token_counts": token_counts,
|
|
1066
|
-
},
|
|
1067
|
-
)
|
|
1068
|
-
|
|
1069
|
-
else:
|
|
1070
|
-
logger.error(f"step() failed with an unrecognized exception: '{str(e)}'")
|
|
1071
|
-
traceback.print_exc()
|
|
1072
|
-
raise e
|
|
1073
|
-
|
|
1074
|
-
def step_user_message(self, user_message_str: str, **kwargs) -> AgentStepResponse:
|
|
1075
|
-
"""Takes a basic user message string, turns it into a stringified JSON with extra metadata, then sends it to the agent
|
|
1076
|
-
|
|
1077
|
-
Example:
|
|
1078
|
-
-> user_message_str = 'hi'
|
|
1079
|
-
-> {'message': 'hi', 'type': 'user_message', ...}
|
|
1080
|
-
-> json.dumps(...)
|
|
1081
|
-
-> agent.step(messages=[Message(role='user', text=...)])
|
|
1082
|
-
"""
|
|
1083
|
-
# Wrap with metadata, dumps to JSON
|
|
1084
|
-
assert user_message_str and isinstance(user_message_str, str), (
|
|
1085
|
-
f"user_message_str should be a non-empty string, got {type(user_message_str)}"
|
|
1086
|
-
)
|
|
1087
|
-
user_message_json_str = package_user_message(user_message_str, self.agent_state.timezone)
|
|
1088
|
-
|
|
1089
|
-
# Validate JSON via save/load
|
|
1090
|
-
user_message = validate_json(user_message_json_str)
|
|
1091
|
-
cleaned_user_message_text, name = strip_name_field_from_user_message(user_message)
|
|
1092
|
-
|
|
1093
|
-
# Turn into a dict
|
|
1094
|
-
openai_message_dict = {"role": "user", "content": cleaned_user_message_text, "name": name}
|
|
1095
|
-
|
|
1096
|
-
# Create the associated Message object (in the database)
|
|
1097
|
-
assert self.agent_state.created_by_id is not None, "User ID is not set"
|
|
1098
|
-
user_message = Message.dict_to_message(
|
|
1099
|
-
agent_id=self.agent_state.id,
|
|
1100
|
-
model=self.model,
|
|
1101
|
-
openai_message_dict=openai_message_dict,
|
|
1102
|
-
# created_at=timestamp,
|
|
1103
|
-
)
|
|
1104
|
-
|
|
1105
|
-
return self.inner_step(messages=[user_message], **kwargs)
|
|
1106
|
-
|
|
1107
|
-
def summarize_messages_inplace(self):
|
|
1108
|
-
in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
1109
|
-
in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
|
|
1110
|
-
in_context_messages_openai_no_system = in_context_messages_openai[1:]
|
|
1111
|
-
token_counts = get_token_counts_for_messages(in_context_messages)
|
|
1112
|
-
logger.info(f"System message token count={token_counts[0]}")
|
|
1113
|
-
logger.info(f"token_counts_no_system={token_counts[1:]}")
|
|
1114
|
-
|
|
1115
|
-
if in_context_messages_openai[0]["role"] != "system":
|
|
1116
|
-
raise RuntimeError(f"in_context_messages_openai[0] should be system (instead got {in_context_messages_openai[0]})")
|
|
1117
|
-
|
|
1118
|
-
# If at this point there's nothing to summarize, throw an error
|
|
1119
|
-
if len(in_context_messages_openai_no_system) == 0:
|
|
1120
|
-
raise ContextWindowExceededError(
|
|
1121
|
-
"Not enough messages to compress for summarization",
|
|
1122
|
-
details={
|
|
1123
|
-
"num_candidate_messages": len(in_context_messages_openai_no_system),
|
|
1124
|
-
"num_total_messages": len(in_context_messages_openai),
|
|
1125
|
-
},
|
|
1126
|
-
)
|
|
1127
|
-
|
|
1128
|
-
cutoff = calculate_summarizer_cutoff(in_context_messages=in_context_messages, token_counts=token_counts, logger=logger)
|
|
1129
|
-
message_sequence_to_summarize = in_context_messages[1:cutoff] # do NOT get rid of the system message
|
|
1130
|
-
logger.info(f"Attempting to summarize {len(message_sequence_to_summarize)} messages of {len(in_context_messages)}")
|
|
1131
|
-
|
|
1132
|
-
# We can't do summarize logic properly if context_window is undefined
|
|
1133
|
-
if self.agent_state.llm_config.context_window is None:
|
|
1134
|
-
# Fallback if for some reason context_window is missing, just set to the default
|
|
1135
|
-
logger.warning(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}")
|
|
1136
|
-
self.agent_state.llm_config.context_window = (
|
|
1137
|
-
LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
|
|
1138
|
-
)
|
|
1139
|
-
|
|
1140
|
-
summary = summarize_messages(
|
|
1141
|
-
agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize, actor=self.user
|
|
1142
|
-
)
|
|
1143
|
-
logger.info(f"Got summary: {summary}")
|
|
1144
|
-
|
|
1145
|
-
# Metadata that's useful for the agent to see
|
|
1146
|
-
all_time_message_count = self.message_manager.size(agent_id=self.agent_state.id, actor=self.user)
|
|
1147
|
-
remaining_message_count = 1 + len(in_context_messages) - cutoff # System + remaining
|
|
1148
|
-
hidden_message_count = all_time_message_count - remaining_message_count
|
|
1149
|
-
summary_message_count = len(message_sequence_to_summarize)
|
|
1150
|
-
summary_message = package_summarize_message(
|
|
1151
|
-
summary, summary_message_count, hidden_message_count, all_time_message_count, self.agent_state.timezone
|
|
1152
|
-
)
|
|
1153
|
-
logger.info(f"Packaged into message: {summary_message}")
|
|
1154
|
-
|
|
1155
|
-
prior_len = len(in_context_messages_openai)
|
|
1156
|
-
self.agent_state = self.agent_manager.trim_older_in_context_messages(num=cutoff, agent_id=self.agent_state.id, actor=self.user)
|
|
1157
|
-
packed_summary_message = {"role": "user", "content": summary_message}
|
|
1158
|
-
# Prepend the summary
|
|
1159
|
-
self.agent_state = self.agent_manager.prepend_to_in_context_messages(
|
|
1160
|
-
messages=[
|
|
1161
|
-
Message.dict_to_message(
|
|
1162
|
-
agent_id=self.agent_state.id,
|
|
1163
|
-
model=self.model,
|
|
1164
|
-
openai_message_dict=packed_summary_message,
|
|
1165
|
-
)
|
|
1166
|
-
],
|
|
1167
|
-
agent_id=self.agent_state.id,
|
|
1168
|
-
actor=self.user,
|
|
1169
|
-
)
|
|
1170
|
-
|
|
1171
|
-
# reset alert
|
|
1172
|
-
self.agent_alerted_about_memory_pressure = False
|
|
1173
|
-
curr_in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
1174
|
-
|
|
1175
|
-
current_token_count = sum(get_token_counts_for_messages(curr_in_context_messages))
|
|
1176
|
-
logger.info(f"Ran summarizer, messages length {prior_len} -> {len(curr_in_context_messages)}")
|
|
1177
|
-
logger.info(f"Summarizer brought down total token count from {sum(token_counts)} -> {current_token_count}")
|
|
1178
|
-
log_event(
|
|
1179
|
-
name="summarization",
|
|
1180
|
-
attributes={
|
|
1181
|
-
"prior_length": prior_len,
|
|
1182
|
-
"current_length": len(curr_in_context_messages),
|
|
1183
|
-
"prior_token_count": sum(token_counts),
|
|
1184
|
-
"current_token_count": current_token_count,
|
|
1185
|
-
"context_window_limit": self.agent_state.llm_config.context_window,
|
|
1186
|
-
},
|
|
1187
|
-
)
|
|
1188
|
-
|
|
1189
|
-
def add_function(self, function_name: str) -> str:
|
|
1190
|
-
# TODO: refactor
|
|
1191
|
-
raise NotImplementedError
|
|
1192
|
-
|
|
1193
|
-
def remove_function(self, function_name: str) -> str:
|
|
1194
|
-
# TODO: refactor
|
|
1195
|
-
raise NotImplementedError
|
|
1196
|
-
|
|
1197
|
-
def migrate_embedding(self, embedding_config: EmbeddingConfig):
|
|
1198
|
-
"""Migrate the agent to a new embedding"""
|
|
1199
|
-
# TODO: archival memory
|
|
1200
|
-
|
|
1201
|
-
# TODO: recall memory
|
|
1202
|
-
raise NotImplementedError()
|
|
1203
|
-
|
|
1204
|
-
def get_context_window(self) -> ContextWindowOverview:
|
|
1205
|
-
"""Get the context window of the agent"""
|
|
1206
|
-
|
|
1207
|
-
system_prompt = self.agent_state.system # TODO is this the current system or the initial system?
|
|
1208
|
-
num_tokens_system = count_tokens(system_prompt)
|
|
1209
|
-
core_memory = self.agent_state.memory.compile()
|
|
1210
|
-
num_tokens_core_memory = count_tokens(core_memory)
|
|
1211
|
-
|
|
1212
|
-
# Grab the in-context messages
|
|
1213
|
-
# conversion of messages to OpenAI dict format, which is passed to the token counter
|
|
1214
|
-
in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
|
|
1215
|
-
in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
|
|
1216
|
-
|
|
1217
|
-
# Check if there's a summary message in the message queue
|
|
1218
|
-
if (
|
|
1219
|
-
len(in_context_messages) > 1
|
|
1220
|
-
and in_context_messages[1].role == MessageRole.user
|
|
1221
|
-
and in_context_messages[1].content
|
|
1222
|
-
and len(in_context_messages[1].content) == 1
|
|
1223
|
-
and isinstance(in_context_messages[1].content[0], TextContent)
|
|
1224
|
-
# TODO remove hardcoding
|
|
1225
|
-
and "The following is a summary of the previous " in in_context_messages[1].content[0].text
|
|
1226
|
-
):
|
|
1227
|
-
# Summary message exists
|
|
1228
|
-
text_content = in_context_messages[1].content[0].text
|
|
1229
|
-
assert text_content is not None
|
|
1230
|
-
summary_memory = text_content
|
|
1231
|
-
num_tokens_summary_memory = count_tokens(text_content)
|
|
1232
|
-
# with a summary message, the real messages start at index 2
|
|
1233
|
-
num_tokens_messages = (
|
|
1234
|
-
num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
|
|
1235
|
-
if len(in_context_messages_openai) > 2
|
|
1236
|
-
else 0
|
|
1237
|
-
)
|
|
1238
|
-
|
|
1239
|
-
else:
|
|
1240
|
-
summary_memory = None
|
|
1241
|
-
num_tokens_summary_memory = 0
|
|
1242
|
-
# with no summary message, the real messages start at index 1
|
|
1243
|
-
num_tokens_messages = (
|
|
1244
|
-
num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
|
|
1245
|
-
if len(in_context_messages_openai) > 1
|
|
1246
|
-
else 0
|
|
1247
|
-
)
|
|
1248
|
-
|
|
1249
|
-
agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
|
|
1250
|
-
message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
|
|
1251
|
-
external_memory_summary = PromptGenerator.compile_memory_metadata_block(
|
|
1252
|
-
memory_edit_timestamp=get_utc_time(),
|
|
1253
|
-
timezone=self.agent_state.timezone,
|
|
1254
|
-
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
|
|
1255
|
-
archival_memory_size=self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id),
|
|
1256
|
-
)
|
|
1257
|
-
num_tokens_external_memory_summary = count_tokens(external_memory_summary)
|
|
1258
|
-
|
|
1259
|
-
# tokens taken up by function definitions
|
|
1260
|
-
agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
|
|
1261
|
-
if agent_state_tool_jsons:
|
|
1262
|
-
available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
|
|
1263
|
-
num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
|
|
1264
|
-
else:
|
|
1265
|
-
available_functions_definitions = []
|
|
1266
|
-
num_tokens_available_functions_definitions = 0
|
|
1267
|
-
|
|
1268
|
-
num_tokens_used_total = (
|
|
1269
|
-
num_tokens_system # system prompt
|
|
1270
|
-
+ num_tokens_available_functions_definitions # function definitions
|
|
1271
|
-
+ num_tokens_core_memory # core memory
|
|
1272
|
-
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
|
1273
|
-
+ num_tokens_summary_memory # summary of ongoing conversation
|
|
1274
|
-
+ num_tokens_messages # tokens taken by messages
|
|
1275
|
-
)
|
|
1276
|
-
assert isinstance(num_tokens_used_total, int)
|
|
1277
|
-
|
|
1278
|
-
return ContextWindowOverview(
|
|
1279
|
-
# context window breakdown (in messages)
|
|
1280
|
-
num_messages=len(in_context_messages),
|
|
1281
|
-
num_archival_memory=agent_manager_passage_size,
|
|
1282
|
-
num_recall_memory=message_manager_size,
|
|
1283
|
-
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
|
1284
|
-
external_memory_summary=external_memory_summary,
|
|
1285
|
-
# top-level information
|
|
1286
|
-
context_window_size_max=self.agent_state.llm_config.context_window,
|
|
1287
|
-
context_window_size_current=num_tokens_used_total,
|
|
1288
|
-
# context window breakdown (in tokens)
|
|
1289
|
-
num_tokens_system=num_tokens_system,
|
|
1290
|
-
system_prompt=system_prompt,
|
|
1291
|
-
num_tokens_core_memory=num_tokens_core_memory,
|
|
1292
|
-
core_memory=core_memory,
|
|
1293
|
-
num_tokens_summary_memory=num_tokens_summary_memory,
|
|
1294
|
-
summary_memory=summary_memory,
|
|
1295
|
-
num_tokens_messages=num_tokens_messages,
|
|
1296
|
-
messages=in_context_messages,
|
|
1297
|
-
# related to functions
|
|
1298
|
-
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
|
1299
|
-
functions_definitions=available_functions_definitions,
|
|
1300
|
-
)
|
|
1301
|
-
|
|
1302
|
-
async def get_context_window_async(self) -> ContextWindowOverview:
|
|
1303
|
-
if settings.environment == "PRODUCTION" and model_settings.anthropic_api_key:
|
|
1304
|
-
return await self.get_context_window_from_anthropic_async()
|
|
1305
|
-
return await self.get_context_window_from_tiktoken_async()
|
|
1306
|
-
|
|
1307
|
-
async def get_context_window_from_tiktoken_async(self) -> ContextWindowOverview:
|
|
1308
|
-
"""Get the context window of the agent"""
|
|
1309
|
-
# Grab the in-context messages
|
|
1310
|
-
in_context_messages = await self.message_manager.get_messages_by_ids_async(
|
|
1311
|
-
message_ids=self.agent_state.message_ids, actor=self.user
|
|
1312
|
-
)
|
|
1313
|
-
|
|
1314
|
-
# conversion of messages to OpenAI dict format, which is passed to the token counter
|
|
1315
|
-
in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
|
|
1316
|
-
|
|
1317
|
-
# Extract system, memory and external summary
|
|
1318
|
-
if (
|
|
1319
|
-
len(in_context_messages) > 0
|
|
1320
|
-
and in_context_messages[0].role == MessageRole.system
|
|
1321
|
-
and in_context_messages[0].content
|
|
1322
|
-
and len(in_context_messages[0].content) == 1
|
|
1323
|
-
and isinstance(in_context_messages[0].content[0], TextContent)
|
|
1324
|
-
):
|
|
1325
|
-
system_message = in_context_messages[0].content[0].text
|
|
1326
|
-
|
|
1327
|
-
external_memory_marker_pos = system_message.find("###")
|
|
1328
|
-
core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
|
|
1329
|
-
if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
|
|
1330
|
-
system_prompt = system_message[:external_memory_marker_pos].strip()
|
|
1331
|
-
external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
|
|
1332
|
-
core_memory = system_message[core_memory_marker_pos:].strip()
|
|
1333
|
-
else:
|
|
1334
|
-
# if no markers found, put everything in system message
|
|
1335
|
-
self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
|
|
1336
|
-
system_prompt = system_message
|
|
1337
|
-
external_memory_summary = ""
|
|
1338
|
-
core_memory = ""
|
|
1339
|
-
else:
|
|
1340
|
-
# if no system message, fall back on agent's system prompt
|
|
1341
|
-
self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
|
|
1342
|
-
system_prompt = self.agent_state.system
|
|
1343
|
-
external_memory_summary = ""
|
|
1344
|
-
core_memory = ""
|
|
1345
|
-
|
|
1346
|
-
num_tokens_system = count_tokens(system_prompt)
|
|
1347
|
-
num_tokens_core_memory = count_tokens(core_memory)
|
|
1348
|
-
num_tokens_external_memory_summary = count_tokens(external_memory_summary)
|
|
1349
|
-
|
|
1350
|
-
# Check if there's a summary message in the message queue
|
|
1351
|
-
if (
|
|
1352
|
-
len(in_context_messages) > 1
|
|
1353
|
-
and in_context_messages[1].role == MessageRole.user
|
|
1354
|
-
and in_context_messages[1].content
|
|
1355
|
-
and len(in_context_messages[1].content) == 1
|
|
1356
|
-
and isinstance(in_context_messages[1].content[0], TextContent)
|
|
1357
|
-
# TODO remove hardcoding
|
|
1358
|
-
and "The following is a summary of the previous " in in_context_messages[1].content[0].text
|
|
1359
|
-
):
|
|
1360
|
-
# Summary message exists
|
|
1361
|
-
text_content = in_context_messages[1].content[0].text
|
|
1362
|
-
assert text_content is not None
|
|
1363
|
-
summary_memory = text_content
|
|
1364
|
-
num_tokens_summary_memory = count_tokens(text_content)
|
|
1365
|
-
# with a summary message, the real messages start at index 2
|
|
1366
|
-
num_tokens_messages = (
|
|
1367
|
-
num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
|
|
1368
|
-
if len(in_context_messages_openai) > 2
|
|
1369
|
-
else 0
|
|
1370
|
-
)
|
|
1371
|
-
|
|
1372
|
-
else:
|
|
1373
|
-
summary_memory = None
|
|
1374
|
-
num_tokens_summary_memory = 0
|
|
1375
|
-
# with no summary message, the real messages start at index 1
|
|
1376
|
-
num_tokens_messages = (
|
|
1377
|
-
num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
|
|
1378
|
-
if len(in_context_messages_openai) > 1
|
|
1379
|
-
else 0
|
|
1380
|
-
)
|
|
1381
|
-
|
|
1382
|
-
# tokens taken up by function definitions
|
|
1383
|
-
agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
|
|
1384
|
-
if agent_state_tool_jsons:
|
|
1385
|
-
available_functions_definitions = [OpenAITool(type="function", function=f) for f in agent_state_tool_jsons]
|
|
1386
|
-
num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
|
|
1387
|
-
else:
|
|
1388
|
-
available_functions_definitions = []
|
|
1389
|
-
num_tokens_available_functions_definitions = 0
|
|
1390
|
-
|
|
1391
|
-
num_tokens_used_total = (
|
|
1392
|
-
num_tokens_system # system prompt
|
|
1393
|
-
+ num_tokens_available_functions_definitions # function definitions
|
|
1394
|
-
+ num_tokens_core_memory # core memory
|
|
1395
|
-
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
|
1396
|
-
+ num_tokens_summary_memory # summary of ongoing conversation
|
|
1397
|
-
+ num_tokens_messages # tokens taken by messages
|
|
1398
|
-
)
|
|
1399
|
-
assert isinstance(num_tokens_used_total, int)
|
|
1400
|
-
|
|
1401
|
-
passage_manager_size = await self.passage_manager.agent_passage_size_async(
|
|
1402
|
-
agent_id=self.agent_state.id,
|
|
1403
|
-
actor=self.user,
|
|
1404
|
-
)
|
|
1405
|
-
message_manager_size = await self.message_manager.size_async(
|
|
1406
|
-
agent_id=self.agent_state.id,
|
|
1407
|
-
actor=self.user,
|
|
1408
|
-
)
|
|
1409
|
-
|
|
1410
|
-
return ContextWindowOverview(
|
|
1411
|
-
# context window breakdown (in messages)
|
|
1412
|
-
num_messages=len(in_context_messages),
|
|
1413
|
-
num_archival_memory=passage_manager_size,
|
|
1414
|
-
num_recall_memory=message_manager_size,
|
|
1415
|
-
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
|
1416
|
-
external_memory_summary=external_memory_summary,
|
|
1417
|
-
# top-level information
|
|
1418
|
-
context_window_size_max=self.agent_state.llm_config.context_window,
|
|
1419
|
-
context_window_size_current=num_tokens_used_total,
|
|
1420
|
-
# context window breakdown (in tokens)
|
|
1421
|
-
num_tokens_system=num_tokens_system,
|
|
1422
|
-
system_prompt=system_prompt,
|
|
1423
|
-
num_tokens_core_memory=num_tokens_core_memory,
|
|
1424
|
-
core_memory=core_memory,
|
|
1425
|
-
num_tokens_summary_memory=num_tokens_summary_memory,
|
|
1426
|
-
summary_memory=summary_memory,
|
|
1427
|
-
num_tokens_messages=num_tokens_messages,
|
|
1428
|
-
messages=in_context_messages,
|
|
1429
|
-
# related to functions
|
|
1430
|
-
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
|
1431
|
-
functions_definitions=available_functions_definitions,
|
|
1432
|
-
)
|
|
1433
|
-
|
|
1434
|
-
async def get_context_window_from_anthropic_async(self) -> ContextWindowOverview:
|
|
1435
|
-
"""Get the context window of the agent"""
|
|
1436
|
-
anthropic_client = LLMClient.create(provider_type=ProviderType.anthropic, actor=self.user)
|
|
1437
|
-
model = self.agent_state.llm_config.model if self.agent_state.llm_config.model_endpoint_type == "anthropic" else None
|
|
1438
|
-
|
|
1439
|
-
# Grab the in-context messages
|
|
1440
|
-
in_context_messages = await self.message_manager.get_messages_by_ids_async(
|
|
1441
|
-
message_ids=self.agent_state.message_ids, actor=self.user
|
|
1442
|
-
)
|
|
1443
|
-
|
|
1444
|
-
# conversion of messages to anthropic dict format, which is passed to the token counter
|
|
1445
|
-
in_context_messages_anthropic = Message.to_anthropic_dicts_from_list(in_context_messages)
|
|
1446
|
-
|
|
1447
|
-
# Extract system, memory and external summary
|
|
1448
|
-
if (
|
|
1449
|
-
len(in_context_messages) > 0
|
|
1450
|
-
and in_context_messages[0].role == MessageRole.system
|
|
1451
|
-
and in_context_messages[0].content
|
|
1452
|
-
and len(in_context_messages[0].content) == 1
|
|
1453
|
-
and isinstance(in_context_messages[0].content[0], TextContent)
|
|
1454
|
-
):
|
|
1455
|
-
system_message = in_context_messages[0].content[0].text
|
|
1456
|
-
|
|
1457
|
-
external_memory_marker_pos = system_message.find("###")
|
|
1458
|
-
core_memory_marker_pos = system_message.find("<", external_memory_marker_pos)
|
|
1459
|
-
if external_memory_marker_pos != -1 and core_memory_marker_pos != -1:
|
|
1460
|
-
system_prompt = system_message[:external_memory_marker_pos].strip()
|
|
1461
|
-
external_memory_summary = system_message[external_memory_marker_pos:core_memory_marker_pos].strip()
|
|
1462
|
-
core_memory = system_message[core_memory_marker_pos:].strip()
|
|
1463
|
-
else:
|
|
1464
|
-
# if no markers found, put everything in system message
|
|
1465
|
-
self.logger.info("No markers found in system message, core_memory and external_memory_summary will not be loaded")
|
|
1466
|
-
system_prompt = system_message
|
|
1467
|
-
external_memory_summary = ""
|
|
1468
|
-
core_memory = ""
|
|
1469
|
-
else:
|
|
1470
|
-
# if no system message, fall back on agent's system prompt
|
|
1471
|
-
self.logger.info("No system message found in history, core_memory and external_memory_summary will not be loaded")
|
|
1472
|
-
system_prompt = self.agent_state.system
|
|
1473
|
-
external_memory_summary = ""
|
|
1474
|
-
core_memory = ""
|
|
1475
|
-
|
|
1476
|
-
num_tokens_system_coroutine = anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": system_prompt}])
|
|
1477
|
-
num_tokens_core_memory_coroutine = (
|
|
1478
|
-
anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": core_memory}])
|
|
1479
|
-
if core_memory
|
|
1480
|
-
else asyncio.sleep(0, result=0)
|
|
1481
|
-
)
|
|
1482
|
-
num_tokens_external_memory_summary_coroutine = (
|
|
1483
|
-
anthropic_client.count_tokens(model=model, messages=[{"role": "user", "content": external_memory_summary}])
|
|
1484
|
-
if external_memory_summary
|
|
1485
|
-
else asyncio.sleep(0, result=0)
|
|
1486
|
-
)
|
|
1487
|
-
|
|
1488
|
-
# Check if there's a summary message in the message queue
|
|
1489
|
-
if (
|
|
1490
|
-
len(in_context_messages) > 1
|
|
1491
|
-
and in_context_messages[1].role == MessageRole.user
|
|
1492
|
-
and in_context_messages[1].content
|
|
1493
|
-
and len(in_context_messages[1].content) == 1
|
|
1494
|
-
and isinstance(in_context_messages[1].content[0], TextContent)
|
|
1495
|
-
# TODO remove hardcoding
|
|
1496
|
-
and "The following is a summary of the previous " in in_context_messages[1].content[0].text
|
|
1497
|
-
):
|
|
1498
|
-
# Summary message exists
|
|
1499
|
-
text_content = in_context_messages[1].content[0].text
|
|
1500
|
-
assert text_content is not None
|
|
1501
|
-
summary_memory = text_content
|
|
1502
|
-
num_tokens_summary_memory_coroutine = anthropic_client.count_tokens(
|
|
1503
|
-
model=model, messages=[{"role": "user", "content": summary_memory}]
|
|
1504
|
-
)
|
|
1505
|
-
# with a summary message, the real messages start at index 2
|
|
1506
|
-
num_tokens_messages_coroutine = (
|
|
1507
|
-
anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[2:])
|
|
1508
|
-
if len(in_context_messages_anthropic) > 2
|
|
1509
|
-
else asyncio.sleep(0, result=0)
|
|
1510
|
-
)
|
|
1511
|
-
|
|
1512
|
-
else:
|
|
1513
|
-
summary_memory = None
|
|
1514
|
-
num_tokens_summary_memory_coroutine = asyncio.sleep(0, result=0)
|
|
1515
|
-
# with no summary message, the real messages start at index 1
|
|
1516
|
-
num_tokens_messages_coroutine = (
|
|
1517
|
-
anthropic_client.count_tokens(model=model, messages=in_context_messages_anthropic[1:])
|
|
1518
|
-
if len(in_context_messages_anthropic) > 1
|
|
1519
|
-
else asyncio.sleep(0, result=0)
|
|
1520
|
-
)
|
|
1521
|
-
|
|
1522
|
-
# tokens taken up by function definitions
|
|
1523
|
-
if self.agent_state.tools and len(self.agent_state.tools) > 0:
|
|
1524
|
-
available_functions_definitions = [OpenAITool(type="function", function=f.json_schema) for f in self.agent_state.tools]
|
|
1525
|
-
num_tokens_available_functions_definitions_coroutine = anthropic_client.count_tokens(
|
|
1526
|
-
model=model,
|
|
1527
|
-
tools=available_functions_definitions,
|
|
1528
|
-
)
|
|
1529
|
-
else:
|
|
1530
|
-
available_functions_definitions = []
|
|
1531
|
-
num_tokens_available_functions_definitions_coroutine = asyncio.sleep(0, result=0)
|
|
1532
|
-
|
|
1533
|
-
(
|
|
1534
|
-
num_tokens_system,
|
|
1535
|
-
num_tokens_core_memory,
|
|
1536
|
-
num_tokens_external_memory_summary,
|
|
1537
|
-
num_tokens_summary_memory,
|
|
1538
|
-
num_tokens_messages,
|
|
1539
|
-
num_tokens_available_functions_definitions,
|
|
1540
|
-
) = await asyncio.gather(
|
|
1541
|
-
num_tokens_system_coroutine,
|
|
1542
|
-
num_tokens_core_memory_coroutine,
|
|
1543
|
-
num_tokens_external_memory_summary_coroutine,
|
|
1544
|
-
num_tokens_summary_memory_coroutine,
|
|
1545
|
-
num_tokens_messages_coroutine,
|
|
1546
|
-
num_tokens_available_functions_definitions_coroutine,
|
|
1547
|
-
)
|
|
1548
|
-
|
|
1549
|
-
num_tokens_used_total = (
|
|
1550
|
-
num_tokens_system # system prompt
|
|
1551
|
-
+ num_tokens_available_functions_definitions # function definitions
|
|
1552
|
-
+ num_tokens_core_memory # core memory
|
|
1553
|
-
+ num_tokens_external_memory_summary # metadata (statistics) about recall/archival
|
|
1554
|
-
+ num_tokens_summary_memory # summary of ongoing conversation
|
|
1555
|
-
+ num_tokens_messages # tokens taken by messages
|
|
1556
|
-
)
|
|
1557
|
-
assert isinstance(num_tokens_used_total, int)
|
|
1558
|
-
|
|
1559
|
-
passage_manager_size = await self.passage_manager.agent_passage_size_async(
|
|
1560
|
-
agent_id=self.agent_state.id,
|
|
1561
|
-
actor=self.user,
|
|
1562
|
-
)
|
|
1563
|
-
message_manager_size = await self.message_manager.size_async(
|
|
1564
|
-
agent_id=self.agent_state.id,
|
|
1565
|
-
actor=self.user,
|
|
1566
|
-
)
|
|
1567
|
-
|
|
1568
|
-
return ContextWindowOverview(
|
|
1569
|
-
# context window breakdown (in messages)
|
|
1570
|
-
num_messages=len(in_context_messages),
|
|
1571
|
-
num_archival_memory=passage_manager_size,
|
|
1572
|
-
num_recall_memory=message_manager_size,
|
|
1573
|
-
num_tokens_external_memory_summary=num_tokens_external_memory_summary,
|
|
1574
|
-
external_memory_summary=external_memory_summary,
|
|
1575
|
-
# top-level information
|
|
1576
|
-
context_window_size_max=self.agent_state.llm_config.context_window,
|
|
1577
|
-
context_window_size_current=num_tokens_used_total,
|
|
1578
|
-
# context window breakdown (in tokens)
|
|
1579
|
-
num_tokens_system=num_tokens_system,
|
|
1580
|
-
system_prompt=system_prompt,
|
|
1581
|
-
num_tokens_core_memory=num_tokens_core_memory,
|
|
1582
|
-
core_memory=core_memory,
|
|
1583
|
-
num_tokens_summary_memory=num_tokens_summary_memory,
|
|
1584
|
-
summary_memory=summary_memory,
|
|
1585
|
-
num_tokens_messages=num_tokens_messages,
|
|
1586
|
-
messages=in_context_messages,
|
|
1587
|
-
# related to functions
|
|
1588
|
-
num_tokens_functions_definitions=num_tokens_available_functions_definitions,
|
|
1589
|
-
functions_definitions=available_functions_definitions,
|
|
1590
|
-
)
|
|
1591
|
-
|
|
1592
|
-
def count_tokens(self) -> int:
|
|
1593
|
-
"""Count the tokens in the current context window"""
|
|
1594
|
-
context_window_breakdown = self.get_context_window()
|
|
1595
|
-
return context_window_breakdown.context_window_size_current
|
|
1596
|
-
|
|
1597
|
-
# TODO: Refactor into separate class v.s. large if/elses here
|
|
1598
|
-
def execute_tool_and_persist_state(self, function_name: str, function_args: dict, target_letta_tool: Tool) -> ToolExecutionResult:
|
|
1599
|
-
"""
|
|
1600
|
-
Execute tool modifications and persist the state of the agent.
|
|
1601
|
-
Note: only some agent state modifications will be persisted, such as data in the AgentState ORM and block data
|
|
1602
|
-
"""
|
|
1603
|
-
# TODO: add agent manager here
|
|
1604
|
-
orig_memory_str = self.agent_state.memory.compile()
|
|
1605
|
-
|
|
1606
|
-
# TODO: need to have an AgentState object that actually has full access to the block data
|
|
1607
|
-
# this is because the sandbox tools need to be able to access block.value to edit this data
|
|
1608
|
-
try:
|
|
1609
|
-
if target_letta_tool.tool_type == ToolType.LETTA_CORE:
|
|
1610
|
-
# base tools are allowed to access the `Agent` object and run on the database
|
|
1611
|
-
callable_func = get_function_from_module(LETTA_CORE_TOOL_MODULE_NAME, function_name)
|
|
1612
|
-
function_args["self"] = self # need to attach self to arg since it's dynamically linked
|
|
1613
|
-
function_response = callable_func(**function_args)
|
|
1614
|
-
elif target_letta_tool.tool_type == ToolType.LETTA_MULTI_AGENT_CORE:
|
|
1615
|
-
callable_func = get_function_from_module(LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name)
|
|
1616
|
-
function_args["self"] = self # need to attach self to arg since it's dynamically linked
|
|
1617
|
-
function_response = callable_func(**function_args)
|
|
1618
|
-
elif target_letta_tool.tool_type == ToolType.LETTA_MEMORY_CORE or target_letta_tool.tool_type == ToolType.LETTA_SLEEPTIME_CORE:
|
|
1619
|
-
callable_func = get_function_from_module(LETTA_CORE_TOOL_MODULE_NAME, function_name)
|
|
1620
|
-
agent_state_copy = self.agent_state.__deepcopy__()
|
|
1621
|
-
function_args["agent_state"] = agent_state_copy # need to attach self to arg since it's dynamically linked
|
|
1622
|
-
function_response = callable_func(**function_args)
|
|
1623
|
-
self.ensure_read_only_block_not_modified(
|
|
1624
|
-
new_memory=agent_state_copy.memory
|
|
1625
|
-
) # memory editing tools cannot edit read-only blocks
|
|
1626
|
-
self.update_memory_if_changed(agent_state_copy.memory)
|
|
1627
|
-
elif target_letta_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
|
|
1628
|
-
action_name = generate_composio_action_from_func_name(target_letta_tool.name)
|
|
1629
|
-
# Get entity ID from the agent_state
|
|
1630
|
-
entity_id = None
|
|
1631
|
-
for env_var in self.agent_state.secrets:
|
|
1632
|
-
if env_var.key == COMPOSIO_ENTITY_ENV_VAR_KEY:
|
|
1633
|
-
entity_id = env_var.value
|
|
1634
|
-
# Get composio_api_key
|
|
1635
|
-
composio_api_key = get_composio_api_key(actor=self.user, logger=self.logger)
|
|
1636
|
-
function_response = execute_composio_action(
|
|
1637
|
-
action_name=action_name, args=function_args, api_key=composio_api_key, entity_id=entity_id
|
|
1638
|
-
)
|
|
1639
|
-
elif target_letta_tool.tool_type == ToolType.EXTERNAL_MCP:
|
|
1640
|
-
# Get the server name from the tool tag
|
|
1641
|
-
# TODO make a property instead?
|
|
1642
|
-
server_name = target_letta_tool.tags[0].split(":")[1]
|
|
1643
|
-
|
|
1644
|
-
# Get the MCPClient from the server's handle
|
|
1645
|
-
# TODO these don't get raised properly
|
|
1646
|
-
if not self.mcp_clients:
|
|
1647
|
-
raise ValueError("No MCP client available to use")
|
|
1648
|
-
if server_name not in self.mcp_clients:
|
|
1649
|
-
raise ValueError(f"Unknown MCP server name: {server_name}")
|
|
1650
|
-
mcp_client = self.mcp_clients[server_name]
|
|
1651
|
-
|
|
1652
|
-
# Check that tool exists
|
|
1653
|
-
available_tools = mcp_client.list_tools()
|
|
1654
|
-
available_tool_names = [t.name for t in available_tools]
|
|
1655
|
-
if function_name not in available_tool_names:
|
|
1656
|
-
raise ValueError(
|
|
1657
|
-
f"{function_name} is not available in MCP server {server_name}. Please check your `~/.letta/mcp_config.json` file."
|
|
1658
|
-
)
|
|
1659
|
-
|
|
1660
|
-
function_response, is_error = mcp_client.execute_tool(tool_name=function_name, tool_args=function_args)
|
|
1661
|
-
return ToolExecutionResult(
|
|
1662
|
-
status="error" if is_error else "success",
|
|
1663
|
-
func_return=function_response,
|
|
1664
|
-
)
|
|
1665
|
-
else:
|
|
1666
|
-
try:
|
|
1667
|
-
# Parse the source code to extract function annotations
|
|
1668
|
-
annotations = get_function_annotations_from_source(target_letta_tool.source_code, function_name)
|
|
1669
|
-
# Coerce the function arguments to the correct types based on the annotations
|
|
1670
|
-
function_args = coerce_dict_args_by_annotations(function_args, annotations)
|
|
1671
|
-
except ValueError as e:
|
|
1672
|
-
self.logger.debug(f"Error coercing function arguments: {e}")
|
|
1673
|
-
|
|
1674
|
-
# execute tool in a sandbox
|
|
1675
|
-
# TODO: allow agent_state to specify which sandbox to execute tools in
|
|
1676
|
-
# TODO: This is only temporary, can remove after we publish a pip package with this object
|
|
1677
|
-
agent_state_copy = self.agent_state.__deepcopy__()
|
|
1678
|
-
agent_state_copy.tools = []
|
|
1679
|
-
agent_state_copy.tool_rules = []
|
|
1680
|
-
|
|
1681
|
-
tool_execution_result = ToolExecutionSandbox(function_name, function_args, self.user, tool_object=target_letta_tool).run(
|
|
1682
|
-
agent_state=agent_state_copy
|
|
1683
|
-
)
|
|
1684
|
-
assert orig_memory_str == self.agent_state.memory.compile(), "Memory should not be modified in a sandbox tool"
|
|
1685
|
-
if tool_execution_result.agent_state is not None:
|
|
1686
|
-
self.update_memory_if_changed(tool_execution_result.agent_state.memory)
|
|
1687
|
-
return tool_execution_result
|
|
1688
|
-
except Exception as e:
|
|
1689
|
-
# Need to catch error here, or else trunction wont happen
|
|
1690
|
-
# TODO: modify to function execution error
|
|
1691
|
-
function_response = get_friendly_error_msg(
|
|
1692
|
-
function_name=function_name, exception_name=type(e).__name__, exception_message=str(e)
|
|
1693
|
-
)
|
|
1694
|
-
return ToolExecutionResult(
|
|
1695
|
-
status="error",
|
|
1696
|
-
func_return=function_response,
|
|
1697
|
-
stderr=[traceback.format_exc()],
|
|
1698
|
-
)
|
|
1699
|
-
|
|
1700
|
-
return ToolExecutionResult(
|
|
1701
|
-
status="success",
|
|
1702
|
-
func_return=function_response,
|
|
1703
|
-
)
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
def save_agent(agent: Agent):
|
|
1707
|
-
"""Save agent to metadata store"""
|
|
1708
|
-
agent_state = agent.agent_state
|
|
1709
|
-
assert isinstance(agent_state.memory, Memory), f"Memory is not a Memory object: {type(agent_state.memory)}"
|
|
1710
|
-
|
|
1711
|
-
# TODO: move this to agent manager
|
|
1712
|
-
# TODO: Completely strip out metadata
|
|
1713
|
-
# convert to persisted model
|
|
1714
|
-
agent_manager = AgentManager()
|
|
1715
|
-
update_agent = UpdateAgent(
|
|
1716
|
-
name=agent_state.name,
|
|
1717
|
-
tool_ids=[t.id for t in agent_state.tools],
|
|
1718
|
-
source_ids=[s.id for s in agent_state.sources],
|
|
1719
|
-
block_ids=[b.id for b in agent_state.memory.blocks],
|
|
1720
|
-
tags=agent_state.tags,
|
|
1721
|
-
system=agent_state.system,
|
|
1722
|
-
tool_rules=agent_state.tool_rules,
|
|
1723
|
-
llm_config=agent_state.llm_config,
|
|
1724
|
-
embedding_config=agent_state.embedding_config,
|
|
1725
|
-
message_ids=agent_state.message_ids,
|
|
1726
|
-
description=agent_state.description,
|
|
1727
|
-
metadata=agent_state.metadata,
|
|
1728
|
-
# TODO: Add this back in later
|
|
1729
|
-
# tool_exec_environment_variables=agent_state.get_agent_env_vars_as_dict(),
|
|
1730
|
-
)
|
|
1731
|
-
agent_manager.update_agent(agent_id=agent_state.id, agent_update=update_agent, actor=agent.user)
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
def strip_name_field_from_user_message(user_message_text: str) -> Tuple[str, Optional[str]]:
|
|
1735
|
-
"""If 'name' exists in the JSON string, remove it and return the cleaned text + name value"""
|
|
1736
|
-
try:
|
|
1737
|
-
user_message_json = dict(json_loads(user_message_text))
|
|
1738
|
-
# Special handling for AutoGen messages with 'name' field
|
|
1739
|
-
# Treat 'name' as a special field
|
|
1740
|
-
# If it exists in the input message, elevate it to the 'message' level
|
|
1741
|
-
name = user_message_json.pop("name", None)
|
|
1742
|
-
clean_message = json_dumps(user_message_json)
|
|
1743
|
-
return clean_message, name
|
|
1744
|
-
|
|
1745
|
-
except Exception as e:
|
|
1746
|
-
print(f"{CLI_WARNING_PREFIX}handling of 'name' field failed with: {e}")
|
|
1747
|
-
raise e
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
def validate_json(user_message_text: str) -> str:
|
|
1751
|
-
"""Make sure that the user input message is valid JSON"""
|
|
1752
|
-
try:
|
|
1753
|
-
user_message_json = dict(json_loads(user_message_text))
|
|
1754
|
-
user_message_json_val = json_dumps(user_message_json)
|
|
1755
|
-
return user_message_json_val
|
|
1756
|
-
except Exception as e:
|
|
1757
|
-
print(f"{CLI_WARNING_PREFIX}couldn't parse user input message as JSON: {e}")
|
|
1758
|
-
raise e
|