letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +10 -14
- letta/agents/base_agent.py +18 -0
- letta/agents/helpers.py +32 -7
- letta/agents/letta_agent.py +953 -762
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +0 -1
- letta/constants.py +11 -8
- letta/errors.py +9 -0
- letta/functions/function_sets/base.py +77 -69
- letta/functions/function_sets/builtin.py +41 -22
- letta/functions/function_sets/multi_agent.py +1 -2
- letta/functions/schema_generator.py +0 -1
- letta/helpers/converters.py +8 -3
- letta/helpers/datetime_helpers.py +5 -4
- letta/helpers/message_helper.py +1 -2
- letta/helpers/pinecone_utils.py +0 -1
- letta/helpers/tool_rule_solver.py +10 -0
- letta/helpers/tpuf_client.py +848 -0
- letta/interface.py +8 -8
- letta/interfaces/anthropic_streaming_interface.py +7 -0
- letta/interfaces/openai_streaming_interface.py +29 -6
- letta/llm_api/anthropic_client.py +188 -18
- letta/llm_api/azure_client.py +0 -1
- letta/llm_api/bedrock_client.py +1 -2
- letta/llm_api/deepseek_client.py +319 -5
- letta/llm_api/google_vertex_client.py +75 -17
- letta/llm_api/groq_client.py +0 -1
- letta/llm_api/helpers.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -50
- letta/llm_api/llm_client.py +6 -8
- letta/llm_api/mistral.py +1 -1
- letta/llm_api/openai.py +16 -13
- letta/llm_api/openai_client.py +31 -16
- letta/llm_api/together_client.py +0 -1
- letta/llm_api/xai_client.py +0 -1
- letta/local_llm/chat_completion_proxy.py +7 -6
- letta/local_llm/settings/settings.py +1 -1
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +8 -6
- letta/orm/archive.py +9 -1
- letta/orm/block.py +3 -4
- letta/orm/block_history.py +3 -1
- letta/orm/group.py +2 -3
- letta/orm/identity.py +1 -2
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -2
- letta/orm/message.py +8 -4
- letta/orm/mixins.py +18 -0
- letta/orm/organization.py +2 -0
- letta/orm/passage.py +8 -1
- letta/orm/passage_tag.py +55 -0
- letta/orm/sandbox_config.py +1 -3
- letta/orm/step.py +1 -2
- letta/orm/tool.py +1 -0
- letta/otel/resource.py +2 -2
- letta/plugins/plugins.py +1 -1
- letta/prompts/prompt_generator.py +10 -2
- letta/schemas/agent.py +11 -0
- letta/schemas/archive.py +4 -0
- letta/schemas/block.py +13 -0
- letta/schemas/embedding_config.py +0 -1
- letta/schemas/enums.py +24 -7
- letta/schemas/group.py +12 -0
- letta/schemas/letta_message.py +55 -1
- letta/schemas/letta_message_content.py +28 -0
- letta/schemas/letta_request.py +21 -4
- letta/schemas/letta_stop_reason.py +9 -1
- letta/schemas/llm_config.py +24 -8
- letta/schemas/mcp.py +0 -3
- letta/schemas/memory.py +14 -0
- letta/schemas/message.py +245 -141
- letta/schemas/openai/chat_completion_request.py +2 -1
- letta/schemas/passage.py +1 -0
- letta/schemas/providers/bedrock.py +1 -1
- letta/schemas/providers/openai.py +2 -2
- letta/schemas/tool.py +11 -5
- letta/schemas/tool_execution_result.py +0 -1
- letta/schemas/tool_rule.py +71 -0
- letta/serialize_schemas/marshmallow_agent.py +1 -2
- letta/server/rest_api/app.py +3 -3
- letta/server/rest_api/auth/index.py +0 -1
- letta/server/rest_api/interface.py +3 -11
- letta/server/rest_api/redis_stream_manager.py +3 -4
- letta/server/rest_api/routers/v1/agents.py +143 -84
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/folders.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +23 -22
- letta/server/rest_api/routers/v1/internal_templates.py +68 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
- letta/server/rest_api/routers/v1/sources.py +1 -1
- letta/server/rest_api/routers/v1/tools.py +167 -15
- letta/server/rest_api/streaming_response.py +4 -3
- letta/server/rest_api/utils.py +75 -18
- letta/server/server.py +24 -35
- letta/services/agent_manager.py +359 -45
- letta/services/agent_serialization_manager.py +23 -3
- letta/services/archive_manager.py +72 -3
- letta/services/block_manager.py +1 -2
- letta/services/context_window_calculator/token_counter.py +11 -6
- letta/services/file_manager.py +1 -3
- letta/services/files_agents_manager.py +2 -4
- letta/services/group_manager.py +73 -12
- letta/services/helpers/agent_manager_helper.py +5 -5
- letta/services/identity_manager.py +8 -3
- letta/services/job_manager.py +2 -14
- letta/services/llm_batch_manager.py +1 -3
- letta/services/mcp/base_client.py +1 -2
- letta/services/mcp_manager.py +5 -6
- letta/services/message_manager.py +536 -15
- letta/services/organization_manager.py +1 -2
- letta/services/passage_manager.py +287 -12
- letta/services/provider_manager.py +1 -3
- letta/services/sandbox_config_manager.py +12 -7
- letta/services/source_manager.py +1 -2
- letta/services/step_manager.py +0 -1
- letta/services/summarizer/summarizer.py +4 -2
- letta/services/telemetry_manager.py +1 -3
- letta/services/tool_executor/builtin_tool_executor.py +136 -316
- letta/services/tool_executor/core_tool_executor.py +231 -74
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/mcp_tool_executor.py +0 -1
- letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
- letta/services/tool_executor/sandbox_tool_executor.py +0 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -3
- letta/services/tool_manager.py +181 -64
- letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
- letta/services/user_manager.py +1 -2
- letta/settings.py +5 -3
- letta/streaming_interface.py +3 -3
- letta/system.py +1 -1
- letta/utils.py +0 -1
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
- letta/llm_api/deepseek.py +0 -303
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
letta/agents/letta_agent.py
CHANGED
@@ -40,7 +40,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
|
|
40
40
|
from letta.schemas.letta_response import LettaResponse
|
41
41
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
42
42
|
from letta.schemas.llm_config import LLMConfig
|
43
|
-
from letta.schemas.message import Message,
|
43
|
+
from letta.schemas.message import Message, MessageCreateBase
|
44
44
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
45
45
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
46
46
|
from letta.schemas.step import StepProgression
|
@@ -48,7 +48,7 @@ from letta.schemas.step_metrics import StepMetrics
|
|
48
48
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
49
49
|
from letta.schemas.usage import LettaUsageStatistics
|
50
50
|
from letta.schemas.user import User
|
51
|
-
from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
51
|
+
from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
|
52
52
|
from letta.services.agent_manager import AgentManager
|
53
53
|
from letta.services.block_manager import BlockManager
|
54
54
|
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
|
@@ -164,7 +164,7 @@ class LettaAgent(BaseAgent):
|
|
164
164
|
@trace_method
|
165
165
|
async def step(
|
166
166
|
self,
|
167
|
-
input_messages: list[
|
167
|
+
input_messages: list[MessageCreateBase],
|
168
168
|
max_steps: int = DEFAULT_MAX_STEPS,
|
169
169
|
run_id: str | None = None,
|
170
170
|
use_assistant_message: bool = True,
|
@@ -203,7 +203,7 @@ class LettaAgent(BaseAgent):
|
|
203
203
|
@trace_method
|
204
204
|
async def step_stream_no_tokens(
|
205
205
|
self,
|
206
|
-
input_messages: list[
|
206
|
+
input_messages: list[MessageCreateBase],
|
207
207
|
max_steps: int = DEFAULT_MAX_STEPS,
|
208
208
|
use_assistant_message: bool = True,
|
209
209
|
request_start_timestamp_ns: int | None = None,
|
@@ -218,6 +218,7 @@ class LettaAgent(BaseAgent):
|
|
218
218
|
input_messages, agent_state, self.message_manager, self.actor
|
219
219
|
)
|
220
220
|
initial_messages = new_in_context_messages
|
221
|
+
in_context_messages = current_in_context_messages
|
221
222
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
222
223
|
llm_client = LLMClient.create(
|
223
224
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
@@ -233,137 +234,34 @@ class LettaAgent(BaseAgent):
|
|
233
234
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
234
235
|
|
235
236
|
for i in range(max_steps):
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
240
|
-
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
241
|
-
break
|
242
|
-
|
243
|
-
step_id = generate_step_id()
|
244
|
-
step_start = get_utc_timestamp_ns()
|
245
|
-
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
246
|
-
agent_step_span.set_attributes({"step_id": step_id})
|
247
|
-
|
248
|
-
step_progression = StepProgression.START
|
249
|
-
should_continue = False
|
250
|
-
step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
|
251
|
-
|
252
|
-
# Create step early with PENDING status
|
253
|
-
logged_step = await self.step_manager.log_step_async(
|
254
|
-
actor=self.actor,
|
255
|
-
agent_id=agent_state.id,
|
256
|
-
provider_name=agent_state.llm_config.model_endpoint_type,
|
257
|
-
provider_category=agent_state.llm_config.provider_category or "base",
|
258
|
-
model=agent_state.llm_config.model,
|
259
|
-
model_endpoint=agent_state.llm_config.model_endpoint,
|
260
|
-
context_window_limit=agent_state.llm_config.context_window,
|
261
|
-
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
262
|
-
provider_id=None,
|
263
|
-
job_id=self.current_run_id if self.current_run_id else None,
|
264
|
-
step_id=step_id,
|
265
|
-
project_id=agent_state.project_id,
|
266
|
-
status=StepStatus.PENDING,
|
267
|
-
)
|
268
|
-
# Only use step_id in messages if step was actually created
|
269
|
-
effective_step_id = step_id if logged_step else None
|
270
|
-
|
271
|
-
try:
|
272
|
-
request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
|
273
|
-
await self._build_and_request_from_llm(
|
274
|
-
current_in_context_messages,
|
275
|
-
new_in_context_messages,
|
276
|
-
agent_state,
|
277
|
-
llm_client,
|
278
|
-
tool_rules_solver,
|
279
|
-
agent_step_span,
|
280
|
-
step_metrics,
|
281
|
-
)
|
282
|
-
)
|
283
|
-
in_context_messages = current_in_context_messages + new_in_context_messages
|
284
|
-
|
285
|
-
step_progression = StepProgression.RESPONSE_RECEIVED
|
286
|
-
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
287
|
-
|
288
|
-
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
289
|
-
|
290
|
-
# update usage
|
291
|
-
usage.step_count += 1
|
292
|
-
usage.completion_tokens += response.usage.completion_tokens
|
293
|
-
usage.prompt_tokens += response.usage.prompt_tokens
|
294
|
-
usage.total_tokens += response.usage.total_tokens
|
295
|
-
MetricRegistry().message_output_tokens.record(
|
296
|
-
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
297
|
-
)
|
298
|
-
|
299
|
-
if not response.choices[0].message.tool_calls:
|
300
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
301
|
-
raise ValueError("No tool calls found in response, model must make a tool call")
|
302
|
-
tool_call = response.choices[0].message.tool_calls[0]
|
303
|
-
if response.choices[0].message.reasoning_content:
|
304
|
-
reasoning = [
|
305
|
-
ReasoningContent(
|
306
|
-
reasoning=response.choices[0].message.reasoning_content,
|
307
|
-
is_native=True,
|
308
|
-
signature=response.choices[0].message.reasoning_content_signature,
|
309
|
-
)
|
310
|
-
]
|
311
|
-
elif response.choices[0].message.omitted_reasoning_content:
|
312
|
-
reasoning = [OmittedReasoningContent()]
|
313
|
-
elif response.choices[0].message.content:
|
314
|
-
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
315
|
-
else:
|
316
|
-
self.logger.info("No reasoning content found.")
|
317
|
-
reasoning = None
|
318
|
-
|
237
|
+
if in_context_messages[-1].role == "approval":
|
238
|
+
approval_request_message = in_context_messages[-1]
|
239
|
+
step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
|
319
240
|
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
320
|
-
|
321
|
-
|
241
|
+
approval_request_message.tool_calls[0],
|
242
|
+
[], # TODO: update this
|
322
243
|
agent_state,
|
323
244
|
tool_rules_solver,
|
324
|
-
|
325
|
-
reasoning_content=
|
326
|
-
step_id=
|
245
|
+
usage,
|
246
|
+
reasoning_content=approval_request_message.content,
|
247
|
+
step_id=approval_request_message.step_id,
|
327
248
|
initial_messages=initial_messages,
|
328
|
-
agent_step_span=agent_step_span,
|
329
249
|
is_final_step=(i == max_steps - 1),
|
330
250
|
step_metrics=step_metrics,
|
251
|
+
run_id=self.current_run_id,
|
252
|
+
is_approval=input_messages[0].approve,
|
253
|
+
is_denial=input_messages[0].approve == False,
|
254
|
+
denial_reason=input_messages[0].reason,
|
331
255
|
)
|
332
|
-
step_progression = StepProgression.STEP_LOGGED
|
333
|
-
|
334
|
-
# Update step with actual usage now that we have it (if step was created)
|
335
|
-
if logged_step:
|
336
|
-
await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
|
337
|
-
|
338
|
-
# TODO (cliandy): handle message contexts with larger refactor and dedupe logic
|
339
256
|
new_message_idx = len(initial_messages) if initial_messages else 0
|
340
257
|
self.response_messages.extend(persisted_messages[new_message_idx:])
|
341
258
|
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
342
259
|
initial_messages = None
|
343
|
-
|
344
|
-
|
345
|
-
# log step time
|
346
|
-
now = get_utc_timestamp_ns()
|
347
|
-
step_ns = now - step_start
|
348
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
349
|
-
agent_step_span.end()
|
350
|
-
|
351
|
-
# Log LLM Trace
|
352
|
-
if settings.track_provider_trace:
|
353
|
-
await self.telemetry_manager.create_provider_trace_async(
|
354
|
-
actor=self.actor,
|
355
|
-
provider_trace_create=ProviderTraceCreate(
|
356
|
-
request_json=request_data,
|
357
|
-
response_json=response_data,
|
358
|
-
step_id=step_id, # Use original step_id for telemetry
|
359
|
-
organization_id=self.actor.organization_id,
|
360
|
-
),
|
361
|
-
)
|
362
|
-
step_progression = StepProgression.LOGGED_TRACE
|
260
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
363
261
|
|
364
262
|
# stream step
|
365
263
|
# TODO: improve TTFT
|
366
|
-
filter_user_messages = [m for m in persisted_messages if m.role != "user"]
|
264
|
+
filter_user_messages = [m for m in persisted_messages if m.role != "user" and m.role != "approval"]
|
367
265
|
letta_messages = Message.to_letta_messages_from_list(
|
368
266
|
filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
369
267
|
)
|
@@ -371,104 +269,262 @@ class LettaAgent(BaseAgent):
|
|
371
269
|
for message in letta_messages:
|
372
270
|
if include_return_message_types is None or message.message_type in include_return_message_types:
|
373
271
|
yield f"data: {message.model_dump_json()}\n\n"
|
272
|
+
else:
|
273
|
+
# Check for job cancellation at the start of each step
|
274
|
+
if await self._check_run_cancellation():
|
275
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
276
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
277
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
278
|
+
break
|
279
|
+
|
280
|
+
step_id = generate_step_id()
|
281
|
+
step_start = get_utc_timestamp_ns()
|
282
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
283
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
284
|
+
|
285
|
+
step_progression = StepProgression.START
|
286
|
+
should_continue = False
|
287
|
+
step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
|
288
|
+
|
289
|
+
# Create step early with PENDING status
|
290
|
+
logged_step = await self.step_manager.log_step_async(
|
291
|
+
actor=self.actor,
|
292
|
+
agent_id=agent_state.id,
|
293
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
294
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
295
|
+
model=agent_state.llm_config.model,
|
296
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
297
|
+
context_window_limit=agent_state.llm_config.context_window,
|
298
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
299
|
+
provider_id=None,
|
300
|
+
job_id=self.current_run_id if self.current_run_id else None,
|
301
|
+
step_id=step_id,
|
302
|
+
project_id=agent_state.project_id,
|
303
|
+
status=StepStatus.PENDING,
|
304
|
+
)
|
305
|
+
# Only use step_id in messages if step was actually created
|
306
|
+
effective_step_id = step_id if logged_step else None
|
307
|
+
|
308
|
+
try:
|
309
|
+
(
|
310
|
+
request_data,
|
311
|
+
response_data,
|
312
|
+
current_in_context_messages,
|
313
|
+
new_in_context_messages,
|
314
|
+
valid_tool_names,
|
315
|
+
) = await self._build_and_request_from_llm(
|
316
|
+
current_in_context_messages,
|
317
|
+
new_in_context_messages,
|
318
|
+
agent_state,
|
319
|
+
llm_client,
|
320
|
+
tool_rules_solver,
|
321
|
+
agent_step_span,
|
322
|
+
step_metrics,
|
323
|
+
)
|
324
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
374
325
|
|
375
|
-
|
376
|
-
|
326
|
+
step_progression = StepProgression.RESPONSE_RECEIVED
|
327
|
+
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
377
328
|
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
329
|
+
try:
|
330
|
+
response = llm_client.convert_response_to_chat_completion(
|
331
|
+
response_data, in_context_messages, agent_state.llm_config
|
332
|
+
)
|
333
|
+
except ValueError as e:
|
334
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
335
|
+
raise e
|
336
|
+
|
337
|
+
# update usage
|
338
|
+
usage.step_count += 1
|
339
|
+
usage.completion_tokens += response.usage.completion_tokens
|
340
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
341
|
+
usage.total_tokens += response.usage.total_tokens
|
342
|
+
MetricRegistry().message_output_tokens.record(
|
343
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
386
344
|
)
|
387
345
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
|
399
|
-
raise ValueError(f"Invalid Stop Reason: {stop_reason}")
|
400
|
-
|
401
|
-
# Send error stop reason to client and re-raise
|
402
|
-
yield f"data: {stop_reason.model_dump_json()}\n\n", 500
|
403
|
-
raise
|
404
|
-
|
405
|
-
# Update step if it needs to be updated
|
406
|
-
finally:
|
407
|
-
if step_progression == StepProgression.FINISHED and should_continue:
|
408
|
-
continue
|
409
|
-
|
410
|
-
self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
|
411
|
-
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
412
|
-
try:
|
413
|
-
if step_progression == StepProgression.FINISHED and not should_continue:
|
414
|
-
# Successfully completed - update with final usage and stop reason
|
415
|
-
if stop_reason is None:
|
416
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
417
|
-
# Note: step already updated with success status after _handle_ai_response
|
418
|
-
if logged_step:
|
419
|
-
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
420
|
-
break
|
421
|
-
|
422
|
-
# Handle error cases
|
423
|
-
if step_progression < StepProgression.STEP_LOGGED:
|
424
|
-
# Error occurred before step was fully logged
|
425
|
-
import traceback
|
426
|
-
|
427
|
-
if logged_step:
|
428
|
-
await self.step_manager.update_step_error_async(
|
429
|
-
actor=self.actor,
|
430
|
-
step_id=step_id, # Use original step_id for telemetry
|
431
|
-
error_type=type(e).__name__ if "e" in locals() else "Unknown",
|
432
|
-
error_message=str(e) if "e" in locals() else "Unknown error",
|
433
|
-
error_traceback=traceback.format_exc(),
|
434
|
-
stop_reason=stop_reason,
|
346
|
+
if not response.choices[0].message.tool_calls:
|
347
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
348
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
349
|
+
tool_call = response.choices[0].message.tool_calls[0]
|
350
|
+
if response.choices[0].message.reasoning_content:
|
351
|
+
reasoning = [
|
352
|
+
ReasoningContent(
|
353
|
+
reasoning=response.choices[0].message.reasoning_content,
|
354
|
+
is_native=True,
|
355
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
435
356
|
)
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
444
|
-
elif step_progression <= StepProgression.LOGGED_TRACE:
|
445
|
-
if stop_reason is None:
|
446
|
-
self.logger.error("Error in step after logging step")
|
447
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
448
|
-
if logged_step:
|
449
|
-
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
357
|
+
]
|
358
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
359
|
+
reasoning = [OmittedReasoningContent()]
|
360
|
+
elif response.choices[0].message.content:
|
361
|
+
reasoning = [
|
362
|
+
TextContent(text=response.choices[0].message.content)
|
363
|
+
] # reasoning placed into content for legacy reasons
|
450
364
|
else:
|
451
|
-
self.logger.
|
365
|
+
self.logger.info("No reasoning content found.")
|
366
|
+
reasoning = None
|
452
367
|
|
453
|
-
|
454
|
-
|
368
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
369
|
+
tool_call,
|
370
|
+
valid_tool_names,
|
371
|
+
agent_state,
|
372
|
+
tool_rules_solver,
|
373
|
+
response.usage,
|
374
|
+
reasoning_content=reasoning,
|
375
|
+
step_id=effective_step_id,
|
376
|
+
initial_messages=initial_messages,
|
377
|
+
agent_step_span=agent_step_span,
|
378
|
+
is_final_step=(i == max_steps - 1),
|
379
|
+
step_metrics=step_metrics,
|
380
|
+
)
|
381
|
+
step_progression = StepProgression.STEP_LOGGED
|
382
|
+
|
383
|
+
# Update step with actual usage now that we have it (if step was created)
|
384
|
+
if logged_step:
|
385
|
+
await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
|
386
|
+
|
387
|
+
# TODO (cliandy): handle message contexts with larger refactor and dedupe logic
|
388
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
389
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
390
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
391
|
+
initial_messages = None
|
392
|
+
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
393
|
+
|
394
|
+
# log step time
|
395
|
+
now = get_utc_timestamp_ns()
|
396
|
+
step_ns = now - step_start
|
397
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
398
|
+
agent_step_span.end()
|
399
|
+
|
400
|
+
# Log LLM Trace
|
401
|
+
if settings.track_provider_trace:
|
402
|
+
await self.telemetry_manager.create_provider_trace_async(
|
403
|
+
actor=self.actor,
|
404
|
+
provider_trace_create=ProviderTraceCreate(
|
405
|
+
request_json=request_data,
|
406
|
+
response_json=response_data,
|
407
|
+
step_id=step_id, # Use original step_id for telemetry
|
408
|
+
organization_id=self.actor.organization_id,
|
409
|
+
),
|
410
|
+
)
|
411
|
+
step_progression = StepProgression.LOGGED_TRACE
|
412
|
+
|
413
|
+
# stream step
|
414
|
+
# TODO: improve TTFT
|
415
|
+
filter_user_messages = [m for m in persisted_messages if m.role != "user"]
|
416
|
+
letta_messages = Message.to_letta_messages_from_list(
|
417
|
+
filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
418
|
+
)
|
419
|
+
letta_messages = [m for m in letta_messages if m.message_type != "approval_response_message"]
|
420
|
+
|
421
|
+
for message in letta_messages:
|
422
|
+
if include_return_message_types is None or message.message_type in include_return_message_types:
|
423
|
+
yield f"data: {message.model_dump_json()}\n\n"
|
424
|
+
|
425
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
426
|
+
step_progression = StepProgression.FINISHED
|
455
427
|
|
456
|
-
# Record
|
457
|
-
if logged_step and step_metrics
|
458
|
-
#
|
459
|
-
step_metrics.step_ns =
|
428
|
+
# Record step metrics for successful completion
|
429
|
+
if logged_step and step_metrics:
|
430
|
+
# Set the step_ns that was already calculated
|
431
|
+
step_metrics.step_ns = step_ns
|
460
432
|
await self._record_step_metrics(
|
461
433
|
step_id=step_id,
|
462
434
|
agent_state=agent_state,
|
463
435
|
step_metrics=step_metrics,
|
464
|
-
job_id=locals().get("run_id", self.current_run_id),
|
465
436
|
)
|
466
437
|
|
467
438
|
except Exception as e:
|
468
|
-
|
439
|
+
# Handle any unexpected errors during step processing
|
440
|
+
self.logger.error(f"Error during step processing: {e}")
|
441
|
+
job_update_metadata = {"error": str(e)}
|
442
|
+
|
443
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
444
|
+
if not stop_reason:
|
445
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
446
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
447
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
448
|
+
elif stop_reason.stop_reason not in (
|
449
|
+
StopReasonType.no_tool_call,
|
450
|
+
StopReasonType.invalid_tool_call,
|
451
|
+
StopReasonType.invalid_llm_response,
|
452
|
+
):
|
453
|
+
self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
|
454
|
+
|
455
|
+
# Send error stop reason to client and re-raise
|
456
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n", 500
|
457
|
+
raise
|
469
458
|
|
470
|
-
|
471
|
-
|
459
|
+
# Update step if it needs to be updated
|
460
|
+
finally:
|
461
|
+
if step_progression == StepProgression.FINISHED and should_continue:
|
462
|
+
continue
|
463
|
+
|
464
|
+
self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
|
465
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
466
|
+
try:
|
467
|
+
if step_progression == StepProgression.FINISHED and not should_continue:
|
468
|
+
# Successfully completed - update with final usage and stop reason
|
469
|
+
if stop_reason is None:
|
470
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
471
|
+
# Note: step already updated with success status after _handle_ai_response
|
472
|
+
if logged_step:
|
473
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
474
|
+
break
|
475
|
+
|
476
|
+
# Handle error cases
|
477
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
478
|
+
# Error occurred before step was fully logged
|
479
|
+
import traceback
|
480
|
+
|
481
|
+
if logged_step:
|
482
|
+
await self.step_manager.update_step_error_async(
|
483
|
+
actor=self.actor,
|
484
|
+
step_id=step_id, # Use original step_id for telemetry
|
485
|
+
error_type=type(e).__name__ if "e" in locals() else "Unknown",
|
486
|
+
error_message=str(e) if "e" in locals() else "Unknown error",
|
487
|
+
error_traceback=traceback.format_exc(),
|
488
|
+
stop_reason=stop_reason,
|
489
|
+
)
|
490
|
+
|
491
|
+
if step_progression <= StepProgression.RESPONSE_RECEIVED:
|
492
|
+
# TODO (cliandy): persist response if we get it back
|
493
|
+
if settings.track_errored_messages and initial_messages:
|
494
|
+
for message in initial_messages:
|
495
|
+
message.is_err = True
|
496
|
+
message.step_id = effective_step_id
|
497
|
+
await self.message_manager.create_many_messages_async(
|
498
|
+
initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
|
499
|
+
)
|
500
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
501
|
+
if stop_reason is None:
|
502
|
+
self.logger.error("Error in step after logging step")
|
503
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
504
|
+
if logged_step:
|
505
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
506
|
+
else:
|
507
|
+
self.logger.error("Invalid StepProgression value")
|
508
|
+
|
509
|
+
if settings.track_stop_reason:
|
510
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
511
|
+
|
512
|
+
# Record partial step metrics on failure (capture whatever timing data we have)
|
513
|
+
if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
|
514
|
+
# Calculate total step time up to the failure point
|
515
|
+
step_metrics.step_ns = get_utc_timestamp_ns() - step_start
|
516
|
+
await self._record_step_metrics(
|
517
|
+
step_id=step_id,
|
518
|
+
agent_state=agent_state,
|
519
|
+
step_metrics=step_metrics,
|
520
|
+
job_id=locals().get("run_id", self.current_run_id),
|
521
|
+
)
|
522
|
+
|
523
|
+
except Exception as e:
|
524
|
+
self.logger.error("Failed to update step: %s", e)
|
525
|
+
|
526
|
+
if not should_continue:
|
527
|
+
break
|
472
528
|
|
473
529
|
# Extend the in context message ids
|
474
530
|
if not agent_state.message_buffer_autoclear:
|
@@ -489,7 +545,7 @@ class LettaAgent(BaseAgent):
|
|
489
545
|
async def _step(
|
490
546
|
self,
|
491
547
|
agent_state: AgentState,
|
492
|
-
input_messages: list[
|
548
|
+
input_messages: list[MessageCreateBase],
|
493
549
|
max_steps: int = DEFAULT_MAX_STEPS,
|
494
550
|
run_id: str | None = None,
|
495
551
|
request_start_timestamp_ns: int | None = None,
|
@@ -506,6 +562,7 @@ class LettaAgent(BaseAgent):
|
|
506
562
|
input_messages, agent_state, self.message_manager, self.actor
|
507
563
|
)
|
508
564
|
initial_messages = new_in_context_messages
|
565
|
+
in_context_messages = current_in_context_messages
|
509
566
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
510
567
|
llm_client = LLMClient.create(
|
511
568
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
@@ -521,53 +578,83 @@ class LettaAgent(BaseAgent):
|
|
521
578
|
job_update_metadata = None
|
522
579
|
usage = LettaUsageStatistics()
|
523
580
|
for i in range(max_steps):
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
581
|
+
if in_context_messages[-1].role == "approval":
|
582
|
+
approval_request_message = in_context_messages[-1]
|
583
|
+
step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
|
584
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
585
|
+
approval_request_message.tool_calls[0],
|
586
|
+
[], # TODO: update this
|
587
|
+
agent_state,
|
588
|
+
tool_rules_solver,
|
589
|
+
usage,
|
590
|
+
reasoning_content=approval_request_message.content,
|
591
|
+
step_id=approval_request_message.step_id,
|
592
|
+
initial_messages=initial_messages,
|
593
|
+
is_final_step=(i == max_steps - 1),
|
594
|
+
step_metrics=step_metrics,
|
595
|
+
run_id=run_id or self.current_run_id,
|
596
|
+
is_approval=input_messages[0].approve,
|
597
|
+
is_denial=input_messages[0].approve == False,
|
598
|
+
denial_reason=input_messages[0].reason,
|
531
599
|
)
|
532
|
-
|
600
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
601
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
602
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
603
|
+
initial_messages = None
|
604
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
605
|
+
else:
|
606
|
+
# If dry run, build request data and return it without making LLM call
|
607
|
+
if dry_run:
|
608
|
+
request_data, valid_tool_names = await self._create_llm_request_data_async(
|
609
|
+
llm_client=llm_client,
|
610
|
+
in_context_messages=current_in_context_messages + new_in_context_messages,
|
611
|
+
agent_state=agent_state,
|
612
|
+
tool_rules_solver=tool_rules_solver,
|
613
|
+
)
|
614
|
+
return request_data
|
533
615
|
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
616
|
+
# Check for job cancellation at the start of each step
|
617
|
+
if await self._check_run_cancellation():
|
618
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
619
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
620
|
+
break
|
539
621
|
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
622
|
+
step_id = generate_step_id()
|
623
|
+
step_start = get_utc_timestamp_ns()
|
624
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
625
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
544
626
|
|
545
|
-
|
546
|
-
|
547
|
-
|
627
|
+
step_progression = StepProgression.START
|
628
|
+
should_continue = False
|
629
|
+
step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
|
548
630
|
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
631
|
+
# Create step early with PENDING status
|
632
|
+
logged_step = await self.step_manager.log_step_async(
|
633
|
+
actor=self.actor,
|
634
|
+
agent_id=agent_state.id,
|
635
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
636
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
637
|
+
model=agent_state.llm_config.model,
|
638
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
639
|
+
context_window_limit=agent_state.llm_config.context_window,
|
640
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
641
|
+
provider_id=None,
|
642
|
+
job_id=run_id if run_id else self.current_run_id,
|
643
|
+
step_id=step_id,
|
644
|
+
project_id=agent_state.project_id,
|
645
|
+
status=StepStatus.PENDING,
|
646
|
+
)
|
647
|
+
# Only use step_id in messages if step was actually created
|
648
|
+
effective_step_id = step_id if logged_step else None
|
567
649
|
|
568
|
-
|
569
|
-
|
570
|
-
|
650
|
+
try:
|
651
|
+
(
|
652
|
+
request_data,
|
653
|
+
response_data,
|
654
|
+
current_in_context_messages,
|
655
|
+
new_in_context_messages,
|
656
|
+
valid_tool_names,
|
657
|
+
) = await self._build_and_request_from_llm(
|
571
658
|
current_in_context_messages,
|
572
659
|
new_in_context_messages,
|
573
660
|
agent_state,
|
@@ -576,180 +663,193 @@ class LettaAgent(BaseAgent):
|
|
576
663
|
agent_step_span,
|
577
664
|
step_metrics,
|
578
665
|
)
|
579
|
-
|
580
|
-
in_context_messages = current_in_context_messages + new_in_context_messages
|
581
|
-
|
582
|
-
step_progression = StepProgression.RESPONSE_RECEIVED
|
583
|
-
log_event("agent.step.llm_response.received") # [3^]
|
584
|
-
|
585
|
-
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
666
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
586
667
|
|
587
|
-
|
588
|
-
|
589
|
-
usage.prompt_tokens += response.usage.prompt_tokens
|
590
|
-
usage.total_tokens += response.usage.total_tokens
|
591
|
-
usage.run_ids = [run_id] if run_id else None
|
592
|
-
MetricRegistry().message_output_tokens.record(
|
593
|
-
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
594
|
-
)
|
668
|
+
step_progression = StepProgression.RESPONSE_RECEIVED
|
669
|
+
log_event("agent.step.llm_response.received") # [3^]
|
595
670
|
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
tool_call = response.choices[0].message.tool_calls[0]
|
600
|
-
if response.choices[0].message.reasoning_content:
|
601
|
-
reasoning = [
|
602
|
-
ReasoningContent(
|
603
|
-
reasoning=response.choices[0].message.reasoning_content,
|
604
|
-
is_native=True,
|
605
|
-
signature=response.choices[0].message.reasoning_content_signature,
|
671
|
+
try:
|
672
|
+
response = llm_client.convert_response_to_chat_completion(
|
673
|
+
response_data, in_context_messages, agent_state.llm_config
|
606
674
|
)
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
valid_tool_names,
|
619
|
-
agent_state,
|
620
|
-
tool_rules_solver,
|
621
|
-
response.usage,
|
622
|
-
reasoning_content=reasoning,
|
623
|
-
step_id=effective_step_id,
|
624
|
-
initial_messages=initial_messages,
|
625
|
-
agent_step_span=agent_step_span,
|
626
|
-
is_final_step=(i == max_steps - 1),
|
627
|
-
run_id=run_id,
|
628
|
-
step_metrics=step_metrics,
|
629
|
-
)
|
630
|
-
step_progression = StepProgression.STEP_LOGGED
|
631
|
-
|
632
|
-
# Update step with actual usage now that we have it (if step was created)
|
633
|
-
if logged_step:
|
634
|
-
await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
|
635
|
-
|
636
|
-
new_message_idx = len(initial_messages) if initial_messages else 0
|
637
|
-
self.response_messages.extend(persisted_messages[new_message_idx:])
|
638
|
-
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
639
|
-
|
640
|
-
initial_messages = None
|
641
|
-
log_event("agent.step.llm_response.processed") # [4^]
|
642
|
-
|
643
|
-
# log step time
|
644
|
-
now = get_utc_timestamp_ns()
|
645
|
-
step_ns = now - step_start
|
646
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
647
|
-
agent_step_span.end()
|
648
|
-
|
649
|
-
# Log LLM Trace
|
650
|
-
if settings.track_provider_trace:
|
651
|
-
await self.telemetry_manager.create_provider_trace_async(
|
652
|
-
actor=self.actor,
|
653
|
-
provider_trace_create=ProviderTraceCreate(
|
654
|
-
request_json=request_data,
|
655
|
-
response_json=response_data,
|
656
|
-
step_id=step_id, # Use original step_id for telemetry
|
657
|
-
organization_id=self.actor.organization_id,
|
658
|
-
),
|
675
|
+
except ValueError as e:
|
676
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
677
|
+
raise e
|
678
|
+
|
679
|
+
usage.step_count += 1
|
680
|
+
usage.completion_tokens += response.usage.completion_tokens
|
681
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
682
|
+
usage.total_tokens += response.usage.total_tokens
|
683
|
+
usage.run_ids = [run_id] if run_id else None
|
684
|
+
MetricRegistry().message_output_tokens.record(
|
685
|
+
response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
659
686
|
)
|
660
|
-
step_progression = StepProgression.LOGGED_TRACE
|
661
687
|
|
662
|
-
|
663
|
-
|
688
|
+
if not response.choices[0].message.tool_calls:
|
689
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
690
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
691
|
+
tool_call = response.choices[0].message.tool_calls[0]
|
692
|
+
if response.choices[0].message.reasoning_content:
|
693
|
+
reasoning = [
|
694
|
+
ReasoningContent(
|
695
|
+
reasoning=response.choices[0].message.reasoning_content,
|
696
|
+
is_native=True,
|
697
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
698
|
+
)
|
699
|
+
]
|
700
|
+
elif response.choices[0].message.content:
|
701
|
+
reasoning = [
|
702
|
+
TextContent(text=response.choices[0].message.content)
|
703
|
+
] # reasoning placed into content for legacy reasons
|
704
|
+
elif response.choices[0].message.omitted_reasoning_content:
|
705
|
+
reasoning = [OmittedReasoningContent()]
|
706
|
+
else:
|
707
|
+
self.logger.info("No reasoning content found.")
|
708
|
+
reasoning = None
|
664
709
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
710
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
711
|
+
tool_call,
|
712
|
+
valid_tool_names,
|
713
|
+
agent_state,
|
714
|
+
tool_rules_solver,
|
715
|
+
response.usage,
|
716
|
+
reasoning_content=reasoning,
|
717
|
+
step_id=effective_step_id,
|
718
|
+
initial_messages=initial_messages,
|
719
|
+
agent_step_span=agent_step_span,
|
720
|
+
is_final_step=(i == max_steps - 1),
|
721
|
+
run_id=run_id,
|
672
722
|
step_metrics=step_metrics,
|
673
|
-
job_id=run_id if run_id else self.current_run_id,
|
674
723
|
)
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
702
|
-
if logged_step:
|
703
|
-
await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
|
704
|
-
break
|
705
|
-
|
706
|
-
# Handle error cases
|
707
|
-
if step_progression < StepProgression.STEP_LOGGED:
|
708
|
-
# Error occurred before step was fully logged
|
709
|
-
import traceback
|
710
|
-
|
711
|
-
if logged_step:
|
712
|
-
await self.step_manager.update_step_error_async(
|
713
|
-
actor=self.actor,
|
724
|
+
step_progression = StepProgression.STEP_LOGGED
|
725
|
+
|
726
|
+
# Update step with actual usage now that we have it (if step was created)
|
727
|
+
if logged_step:
|
728
|
+
await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
|
729
|
+
|
730
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
731
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
732
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
733
|
+
|
734
|
+
initial_messages = None
|
735
|
+
log_event("agent.step.llm_response.processed") # [4^]
|
736
|
+
|
737
|
+
# log step time
|
738
|
+
now = get_utc_timestamp_ns()
|
739
|
+
step_ns = now - step_start
|
740
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
741
|
+
agent_step_span.end()
|
742
|
+
|
743
|
+
# Log LLM Trace
|
744
|
+
if settings.track_provider_trace:
|
745
|
+
await self.telemetry_manager.create_provider_trace_async(
|
746
|
+
actor=self.actor,
|
747
|
+
provider_trace_create=ProviderTraceCreate(
|
748
|
+
request_json=request_data,
|
749
|
+
response_json=response_data,
|
714
750
|
step_id=step_id, # Use original step_id for telemetry
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
)
|
720
|
-
|
721
|
-
if step_progression <= StepProgression.RESPONSE_RECEIVED:
|
722
|
-
# TODO (cliandy): persist response if we get it back
|
723
|
-
if settings.track_errored_messages and initial_messages:
|
724
|
-
for message in initial_messages:
|
725
|
-
message.is_err = True
|
726
|
-
message.step_id = effective_step_id
|
727
|
-
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
728
|
-
elif step_progression <= StepProgression.LOGGED_TRACE:
|
729
|
-
if stop_reason is None:
|
730
|
-
self.logger.error("Error in step after logging step")
|
731
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
732
|
-
if logged_step:
|
733
|
-
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
734
|
-
else:
|
735
|
-
self.logger.error("Invalid StepProgression value")
|
751
|
+
organization_id=self.actor.organization_id,
|
752
|
+
),
|
753
|
+
)
|
754
|
+
step_progression = StepProgression.LOGGED_TRACE
|
736
755
|
|
737
|
-
|
738
|
-
|
756
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
757
|
+
step_progression = StepProgression.FINISHED
|
739
758
|
|
740
|
-
# Record
|
741
|
-
if logged_step and step_metrics
|
742
|
-
#
|
743
|
-
step_metrics.step_ns =
|
759
|
+
# Record step metrics for successful completion
|
760
|
+
if logged_step and step_metrics:
|
761
|
+
# Set the step_ns that was already calculated
|
762
|
+
step_metrics.step_ns = step_ns
|
744
763
|
await self._record_step_metrics(
|
745
764
|
step_id=step_id,
|
746
765
|
agent_state=agent_state,
|
747
766
|
step_metrics=step_metrics,
|
748
|
-
job_id=
|
767
|
+
job_id=run_id if run_id else self.current_run_id,
|
749
768
|
)
|
750
769
|
|
751
770
|
except Exception as e:
|
752
|
-
|
771
|
+
# Handle any unexpected errors during step processing
|
772
|
+
self.logger.error(f"Error during step processing: {e}")
|
773
|
+
job_update_metadata = {"error": str(e)}
|
774
|
+
|
775
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
776
|
+
if not stop_reason:
|
777
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
778
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
779
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
780
|
+
elif stop_reason.stop_reason not in (
|
781
|
+
StopReasonType.no_tool_call,
|
782
|
+
StopReasonType.invalid_tool_call,
|
783
|
+
StopReasonType.invalid_llm_response,
|
784
|
+
):
|
785
|
+
self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
|
786
|
+
raise
|
787
|
+
|
788
|
+
# Update step if it needs to be updated
|
789
|
+
finally:
|
790
|
+
if step_progression == StepProgression.FINISHED and should_continue:
|
791
|
+
continue
|
792
|
+
|
793
|
+
self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
|
794
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
795
|
+
try:
|
796
|
+
if step_progression == StepProgression.FINISHED and not should_continue:
|
797
|
+
# Successfully completed - update with final usage and stop reason
|
798
|
+
if stop_reason is None:
|
799
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
800
|
+
if logged_step:
|
801
|
+
await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
|
802
|
+
break
|
803
|
+
|
804
|
+
# Handle error cases
|
805
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
806
|
+
# Error occurred before step was fully logged
|
807
|
+
import traceback
|
808
|
+
|
809
|
+
if logged_step:
|
810
|
+
await self.step_manager.update_step_error_async(
|
811
|
+
actor=self.actor,
|
812
|
+
step_id=step_id, # Use original step_id for telemetry
|
813
|
+
error_type=type(e).__name__ if "e" in locals() else "Unknown",
|
814
|
+
error_message=str(e) if "e" in locals() else "Unknown error",
|
815
|
+
error_traceback=traceback.format_exc(),
|
816
|
+
stop_reason=stop_reason,
|
817
|
+
)
|
818
|
+
|
819
|
+
if step_progression <= StepProgression.RESPONSE_RECEIVED:
|
820
|
+
# TODO (cliandy): persist response if we get it back
|
821
|
+
if settings.track_errored_messages and initial_messages:
|
822
|
+
for message in initial_messages:
|
823
|
+
message.is_err = True
|
824
|
+
message.step_id = effective_step_id
|
825
|
+
await self.message_manager.create_many_messages_async(
|
826
|
+
initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
|
827
|
+
)
|
828
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
829
|
+
if stop_reason is None:
|
830
|
+
self.logger.error("Error in step after logging step")
|
831
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
832
|
+
if logged_step:
|
833
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
834
|
+
else:
|
835
|
+
self.logger.error("Invalid StepProgression value")
|
836
|
+
|
837
|
+
if settings.track_stop_reason:
|
838
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
839
|
+
|
840
|
+
# Record partial step metrics on failure (capture whatever timing data we have)
|
841
|
+
if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
|
842
|
+
# Calculate total step time up to the failure point
|
843
|
+
step_metrics.step_ns = get_utc_timestamp_ns() - step_start
|
844
|
+
await self._record_step_metrics(
|
845
|
+
step_id=step_id,
|
846
|
+
agent_state=agent_state,
|
847
|
+
step_metrics=step_metrics,
|
848
|
+
job_id=locals().get("run_id", self.current_run_id),
|
849
|
+
)
|
850
|
+
|
851
|
+
except Exception as e:
|
852
|
+
self.logger.error("Failed to update step: %s", e)
|
753
853
|
|
754
854
|
if not should_continue:
|
755
855
|
break
|
@@ -783,7 +883,7 @@ class LettaAgent(BaseAgent):
|
|
783
883
|
@trace_method
|
784
884
|
async def step_stream(
|
785
885
|
self,
|
786
|
-
input_messages: list[
|
886
|
+
input_messages: list[MessageCreateBase],
|
787
887
|
max_steps: int = DEFAULT_MAX_STEPS,
|
788
888
|
use_assistant_message: bool = True,
|
789
889
|
request_start_timestamp_ns: int | None = None,
|
@@ -806,6 +906,7 @@ class LettaAgent(BaseAgent):
|
|
806
906
|
input_messages, agent_state, self.message_manager, self.actor
|
807
907
|
)
|
808
908
|
initial_messages = new_in_context_messages
|
909
|
+
in_context_messages = current_in_context_messages
|
809
910
|
|
810
911
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
811
912
|
llm_client = LLMClient.create(
|
@@ -822,219 +923,30 @@ class LettaAgent(BaseAgent):
|
|
822
923
|
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
823
924
|
|
824
925
|
for i in range(max_steps):
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
829
|
-
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
830
|
-
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
831
|
-
break
|
832
|
-
|
833
|
-
step_start = get_utc_timestamp_ns()
|
834
|
-
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
835
|
-
agent_step_span.set_attributes({"step_id": step_id})
|
836
|
-
|
837
|
-
step_progression = StepProgression.START
|
838
|
-
should_continue = False
|
839
|
-
step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
|
840
|
-
|
841
|
-
# Create step early with PENDING status
|
842
|
-
logged_step = await self.step_manager.log_step_async(
|
843
|
-
actor=self.actor,
|
844
|
-
agent_id=agent_state.id,
|
845
|
-
provider_name=agent_state.llm_config.model_endpoint_type,
|
846
|
-
provider_category=agent_state.llm_config.provider_category or "base",
|
847
|
-
model=agent_state.llm_config.model,
|
848
|
-
model_endpoint=agent_state.llm_config.model_endpoint,
|
849
|
-
context_window_limit=agent_state.llm_config.context_window,
|
850
|
-
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
851
|
-
provider_id=None,
|
852
|
-
job_id=self.current_run_id if self.current_run_id else None,
|
853
|
-
step_id=step_id,
|
854
|
-
project_id=agent_state.project_id,
|
855
|
-
status=StepStatus.PENDING,
|
856
|
-
)
|
857
|
-
# Only use step_id in messages if step was actually created
|
858
|
-
effective_step_id = step_id if logged_step else None
|
859
|
-
|
860
|
-
try:
|
861
|
-
(
|
862
|
-
request_data,
|
863
|
-
stream,
|
864
|
-
current_in_context_messages,
|
865
|
-
new_in_context_messages,
|
866
|
-
valid_tool_names,
|
867
|
-
provider_request_start_timestamp_ns,
|
868
|
-
) = await self._build_and_request_from_llm_streaming(
|
869
|
-
first_chunk,
|
870
|
-
agent_step_span,
|
871
|
-
request_start_timestamp_ns,
|
872
|
-
current_in_context_messages,
|
873
|
-
new_in_context_messages,
|
874
|
-
agent_state,
|
875
|
-
llm_client,
|
876
|
-
tool_rules_solver,
|
877
|
-
)
|
878
|
-
|
879
|
-
step_progression = StepProgression.STREAM_RECEIVED
|
880
|
-
log_event("agent.stream.llm_response.received") # [3^]
|
881
|
-
|
882
|
-
# TODO: THIS IS INCREDIBLY UGLY
|
883
|
-
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
884
|
-
if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
885
|
-
interface = AnthropicStreamingInterface(
|
886
|
-
use_assistant_message=use_assistant_message,
|
887
|
-
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
888
|
-
)
|
889
|
-
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
890
|
-
interface = OpenAIStreamingInterface(
|
891
|
-
use_assistant_message=use_assistant_message,
|
892
|
-
is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
|
893
|
-
messages=current_in_context_messages + new_in_context_messages,
|
894
|
-
tools=request_data.get("tools", []),
|
895
|
-
)
|
896
|
-
else:
|
897
|
-
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
898
|
-
|
899
|
-
async for chunk in interface.process(
|
900
|
-
stream,
|
901
|
-
ttft_span=request_span,
|
902
|
-
):
|
903
|
-
# Measure TTFT (trace, metric, and db). This should be consolidated.
|
904
|
-
if first_chunk and request_span is not None:
|
905
|
-
now = get_utc_timestamp_ns()
|
906
|
-
ttft_ns = now - request_start_timestamp_ns
|
907
|
-
|
908
|
-
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
909
|
-
metric_attributes = get_ctx_attributes()
|
910
|
-
metric_attributes["model.name"] = agent_state.llm_config.model
|
911
|
-
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
912
|
-
|
913
|
-
if self.current_run_id and self.job_manager:
|
914
|
-
await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
|
915
|
-
|
916
|
-
first_chunk = False
|
917
|
-
|
918
|
-
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
919
|
-
# filter down returned data
|
920
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
921
|
-
|
922
|
-
stream_end_time_ns = get_utc_timestamp_ns()
|
923
|
-
|
924
|
-
# Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
|
925
|
-
if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
|
926
|
-
logger.warning(
|
927
|
-
f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
|
928
|
-
)
|
929
|
-
interface.input_tokens = interface.fallback_input_tokens
|
930
|
-
interface.output_tokens = interface.fallback_output_tokens
|
931
|
-
|
932
|
-
usage.step_count += 1
|
933
|
-
usage.completion_tokens += interface.output_tokens
|
934
|
-
usage.prompt_tokens += interface.input_tokens
|
935
|
-
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
936
|
-
MetricRegistry().message_output_tokens.record(
|
937
|
-
usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
938
|
-
)
|
939
|
-
|
940
|
-
# log LLM request time
|
941
|
-
llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
|
942
|
-
step_metrics.llm_request_ns = llm_request_ns
|
943
|
-
|
944
|
-
llm_request_ms = ns_to_ms(llm_request_ns)
|
945
|
-
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
|
946
|
-
MetricRegistry().llm_execution_time_ms_histogram.record(
|
947
|
-
llm_request_ms,
|
948
|
-
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
949
|
-
)
|
950
|
-
|
951
|
-
# Process resulting stream content
|
952
|
-
try:
|
953
|
-
tool_call = interface.get_tool_call_object()
|
954
|
-
except ValueError as e:
|
955
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
956
|
-
raise e
|
957
|
-
except Exception as e:
|
958
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
|
959
|
-
raise e
|
960
|
-
reasoning_content = interface.get_reasoning_content()
|
926
|
+
if in_context_messages[-1].role == "approval":
|
927
|
+
approval_request_message = in_context_messages[-1]
|
928
|
+
step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
|
961
929
|
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
962
|
-
|
963
|
-
|
930
|
+
approval_request_message.tool_calls[0],
|
931
|
+
[], # TODO: update this
|
964
932
|
agent_state,
|
965
933
|
tool_rules_solver,
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
),
|
971
|
-
reasoning_content=reasoning_content,
|
972
|
-
pre_computed_assistant_message_id=interface.letta_message_id,
|
973
|
-
step_id=effective_step_id,
|
974
|
-
initial_messages=initial_messages,
|
975
|
-
agent_step_span=agent_step_span,
|
934
|
+
usage,
|
935
|
+
reasoning_content=approval_request_message.content,
|
936
|
+
step_id=approval_request_message.step_id,
|
937
|
+
initial_messages=new_in_context_messages,
|
976
938
|
is_final_step=(i == max_steps - 1),
|
977
939
|
step_metrics=step_metrics,
|
940
|
+
run_id=self.current_run_id,
|
941
|
+
is_approval=input_messages[0].approve,
|
942
|
+
is_denial=input_messages[0].approve == False,
|
943
|
+
denial_reason=input_messages[0].reason,
|
978
944
|
)
|
979
|
-
step_progression = StepProgression.STEP_LOGGED
|
980
|
-
|
981
|
-
# Update step with actual usage now that we have it (if step was created)
|
982
|
-
if logged_step:
|
983
|
-
await self.step_manager.update_step_success_async(
|
984
|
-
self.actor,
|
985
|
-
step_id,
|
986
|
-
UsageStatistics(
|
987
|
-
completion_tokens=usage.completion_tokens,
|
988
|
-
prompt_tokens=usage.prompt_tokens,
|
989
|
-
total_tokens=usage.total_tokens,
|
990
|
-
),
|
991
|
-
stop_reason,
|
992
|
-
)
|
993
|
-
|
994
945
|
new_message_idx = len(initial_messages) if initial_messages else 0
|
995
946
|
self.response_messages.extend(persisted_messages[new_message_idx:])
|
996
947
|
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
997
|
-
|
998
948
|
initial_messages = None
|
999
|
-
|
1000
|
-
# log total step time
|
1001
|
-
now = get_utc_timestamp_ns()
|
1002
|
-
step_ns = now - step_start
|
1003
|
-
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
1004
|
-
agent_step_span.end()
|
1005
|
-
|
1006
|
-
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
1007
|
-
# log_event("agent.stream.llm_response.processed") # [4^]
|
1008
|
-
|
1009
|
-
# Log LLM Trace
|
1010
|
-
# We are piecing together the streamed response here.
|
1011
|
-
# Content here does not match the actual response schema as streams come in chunks.
|
1012
|
-
if settings.track_provider_trace:
|
1013
|
-
await self.telemetry_manager.create_provider_trace_async(
|
1014
|
-
actor=self.actor,
|
1015
|
-
provider_trace_create=ProviderTraceCreate(
|
1016
|
-
request_json=request_data,
|
1017
|
-
response_json={
|
1018
|
-
"content": {
|
1019
|
-
"tool_call": tool_call.model_dump_json(),
|
1020
|
-
"reasoning": [content.model_dump_json() for content in reasoning_content],
|
1021
|
-
},
|
1022
|
-
"id": interface.message_id,
|
1023
|
-
"model": interface.model,
|
1024
|
-
"role": "assistant",
|
1025
|
-
# "stop_reason": "",
|
1026
|
-
# "stop_sequence": None,
|
1027
|
-
"type": "message",
|
1028
|
-
"usage": {
|
1029
|
-
"input_tokens": usage.prompt_tokens,
|
1030
|
-
"output_tokens": usage.completion_tokens,
|
1031
|
-
},
|
1032
|
-
},
|
1033
|
-
step_id=step_id, # Use original step_id for telemetry
|
1034
|
-
organization_id=self.actor.organization_id,
|
1035
|
-
),
|
1036
|
-
)
|
1037
|
-
step_progression = StepProgression.LOGGED_TRACE
|
949
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
1038
950
|
|
1039
951
|
# yields tool response as this is handled from Letta and not the response from the LLM provider
|
1040
952
|
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
@@ -1042,103 +954,238 @@ class LettaAgent(BaseAgent):
|
|
1042
954
|
# Apply message type filtering if specified
|
1043
955
|
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
1044
956
|
yield f"data: {tool_return.model_dump_json()}\n\n"
|
957
|
+
else:
|
958
|
+
step_id = generate_step_id()
|
959
|
+
# Check for job cancellation at the start of each step
|
960
|
+
if await self._check_run_cancellation():
|
961
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
|
962
|
+
logger.info(f"Agent execution cancelled for run {self.current_run_id}")
|
963
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
964
|
+
break
|
965
|
+
|
966
|
+
step_start = get_utc_timestamp_ns()
|
967
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
968
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
969
|
+
|
970
|
+
step_progression = StepProgression.START
|
971
|
+
should_continue = False
|
972
|
+
step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
|
973
|
+
|
974
|
+
# Create step early with PENDING status
|
975
|
+
logged_step = await self.step_manager.log_step_async(
|
976
|
+
actor=self.actor,
|
977
|
+
agent_id=agent_state.id,
|
978
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
979
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
980
|
+
model=agent_state.llm_config.model,
|
981
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
982
|
+
context_window_limit=agent_state.llm_config.context_window,
|
983
|
+
usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
|
984
|
+
provider_id=None,
|
985
|
+
job_id=self.current_run_id if self.current_run_id else None,
|
986
|
+
step_id=step_id,
|
987
|
+
project_id=agent_state.project_id,
|
988
|
+
status=StepStatus.PENDING,
|
989
|
+
)
|
990
|
+
# Only use step_id in messages if step was actually created
|
991
|
+
effective_step_id = step_id if logged_step else None
|
1045
992
|
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
993
|
+
try:
|
994
|
+
(
|
995
|
+
request_data,
|
996
|
+
stream,
|
997
|
+
current_in_context_messages,
|
998
|
+
new_in_context_messages,
|
999
|
+
valid_tool_names,
|
1000
|
+
provider_request_start_timestamp_ns,
|
1001
|
+
) = await self._build_and_request_from_llm_streaming(
|
1002
|
+
first_chunk,
|
1003
|
+
agent_step_span,
|
1004
|
+
request_start_timestamp_ns,
|
1005
|
+
current_in_context_messages,
|
1006
|
+
new_in_context_messages,
|
1007
|
+
agent_state,
|
1008
|
+
llm_client,
|
1009
|
+
tool_rules_solver,
|
1010
|
+
)
|
1049
1011
|
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1012
|
+
step_progression = StepProgression.STREAM_RECEIVED
|
1013
|
+
log_event("agent.stream.llm_response.received") # [3^]
|
1014
|
+
|
1015
|
+
# TODO: THIS IS INCREDIBLY UGLY
|
1016
|
+
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
1017
|
+
if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
1018
|
+
interface = AnthropicStreamingInterface(
|
1019
|
+
use_assistant_message=use_assistant_message,
|
1020
|
+
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
1021
|
+
)
|
1022
|
+
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
1023
|
+
interface = OpenAIStreamingInterface(
|
1024
|
+
use_assistant_message=use_assistant_message,
|
1025
|
+
is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
|
1026
|
+
messages=current_in_context_messages + new_in_context_messages,
|
1027
|
+
tools=request_data.get("tools", []),
|
1028
|
+
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
1029
|
+
)
|
1030
|
+
else:
|
1031
|
+
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
1055
1032
|
|
1056
|
-
|
1057
|
-
|
1033
|
+
async for chunk in interface.process(
|
1034
|
+
stream,
|
1035
|
+
ttft_span=request_span,
|
1036
|
+
):
|
1037
|
+
# Measure TTFT (trace, metric, and db). This should be consolidated.
|
1038
|
+
if first_chunk and request_span is not None:
|
1039
|
+
now = get_utc_timestamp_ns()
|
1040
|
+
ttft_ns = now - request_start_timestamp_ns
|
1058
1041
|
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1042
|
+
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
|
1043
|
+
metric_attributes = get_ctx_attributes()
|
1044
|
+
metric_attributes["model.name"] = agent_state.llm_config.model
|
1045
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
1046
|
+
|
1047
|
+
if self.current_run_id and self.job_manager:
|
1048
|
+
await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
|
1049
|
+
|
1050
|
+
first_chunk = False
|
1051
|
+
|
1052
|
+
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
1053
|
+
# filter down returned data
|
1054
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
1055
|
+
|
1056
|
+
stream_end_time_ns = get_utc_timestamp_ns()
|
1057
|
+
|
1058
|
+
# Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
|
1059
|
+
if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
|
1060
|
+
logger.warning(
|
1061
|
+
f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
|
1065
1062
|
)
|
1066
|
-
|
1067
|
-
|
1063
|
+
interface.input_tokens = interface.fallback_input_tokens
|
1064
|
+
interface.output_tokens = interface.fallback_output_tokens
|
1065
|
+
|
1066
|
+
usage.step_count += 1
|
1067
|
+
usage.completion_tokens += interface.output_tokens
|
1068
|
+
usage.prompt_tokens += interface.input_tokens
|
1069
|
+
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
1070
|
+
MetricRegistry().message_output_tokens.record(
|
1071
|
+
usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
|
1072
|
+
)
|
1068
1073
|
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1074
|
+
# log LLM request time
|
1075
|
+
llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
|
1076
|
+
step_metrics.llm_request_ns = llm_request_ns
|
1077
|
+
|
1078
|
+
llm_request_ms = ns_to_ms(llm_request_ns)
|
1079
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
|
1080
|
+
MetricRegistry().llm_execution_time_ms_histogram.record(
|
1081
|
+
llm_request_ms,
|
1082
|
+
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
|
1083
|
+
)
|
1084
|
+
|
1085
|
+
# Process resulting stream content
|
1086
|
+
try:
|
1087
|
+
tool_call = interface.get_tool_call_object()
|
1088
|
+
except ValueError as e:
|
1089
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
|
1090
|
+
raise e
|
1091
|
+
except Exception as e:
|
1092
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
|
1093
|
+
raise e
|
1094
|
+
reasoning_content = interface.get_reasoning_content()
|
1095
|
+
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
|
1096
|
+
tool_call,
|
1097
|
+
valid_tool_names,
|
1098
|
+
agent_state,
|
1099
|
+
tool_rules_solver,
|
1100
|
+
UsageStatistics(
|
1101
|
+
completion_tokens=usage.completion_tokens,
|
1102
|
+
prompt_tokens=usage.prompt_tokens,
|
1103
|
+
total_tokens=usage.total_tokens,
|
1104
|
+
),
|
1105
|
+
reasoning_content=reasoning_content,
|
1106
|
+
pre_computed_assistant_message_id=interface.letta_message_id,
|
1107
|
+
step_id=effective_step_id,
|
1108
|
+
initial_messages=initial_messages,
|
1109
|
+
agent_step_span=agent_step_span,
|
1110
|
+
is_final_step=(i == max_steps - 1),
|
1111
|
+
step_metrics=step_metrics,
|
1112
|
+
)
|
1113
|
+
step_progression = StepProgression.STEP_LOGGED
|
1114
|
+
|
1115
|
+
# Update step with actual usage now that we have it (if step was created)
|
1116
|
+
if logged_step:
|
1117
|
+
await self.step_manager.update_step_success_async(
|
1118
|
+
self.actor,
|
1119
|
+
step_id,
|
1120
|
+
UsageStatistics(
|
1121
|
+
completion_tokens=usage.completion_tokens,
|
1122
|
+
prompt_tokens=usage.prompt_tokens,
|
1123
|
+
total_tokens=usage.total_tokens,
|
1124
|
+
),
|
1125
|
+
stop_reason,
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
new_message_idx = len(initial_messages) if initial_messages else 0
|
1129
|
+
self.response_messages.extend(persisted_messages[new_message_idx:])
|
1130
|
+
new_in_context_messages.extend(persisted_messages[new_message_idx:])
|
1131
|
+
|
1132
|
+
initial_messages = None
|
1133
|
+
|
1134
|
+
# log total step time
|
1135
|
+
now = get_utc_timestamp_ns()
|
1136
|
+
step_ns = now - step_start
|
1137
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
|
1138
|
+
agent_step_span.end()
|
1139
|
+
|
1140
|
+
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
1141
|
+
# log_event("agent.stream.llm_response.processed") # [4^]
|
1142
|
+
|
1143
|
+
# Log LLM Trace
|
1144
|
+
# We are piecing together the streamed response here.
|
1145
|
+
# Content here does not match the actual response schema as streams come in chunks.
|
1146
|
+
if settings.track_provider_trace:
|
1147
|
+
await self.telemetry_manager.create_provider_trace_async(
|
1148
|
+
actor=self.actor,
|
1149
|
+
provider_trace_create=ProviderTraceCreate(
|
1150
|
+
request_json=request_data,
|
1151
|
+
response_json={
|
1152
|
+
"content": {
|
1153
|
+
"tool_call": tool_call.model_dump_json(),
|
1154
|
+
"reasoning": [content.model_dump_json() for content in reasoning_content],
|
1155
|
+
},
|
1156
|
+
"id": interface.message_id,
|
1157
|
+
"model": interface.model,
|
1158
|
+
"role": "assistant",
|
1159
|
+
# "stop_reason": "",
|
1160
|
+
# "stop_sequence": None,
|
1161
|
+
"type": "message",
|
1162
|
+
"usage": {
|
1163
|
+
"input_tokens": usage.prompt_tokens,
|
1164
|
+
"output_tokens": usage.completion_tokens,
|
1165
|
+
},
|
1166
|
+
},
|
1111
1167
|
step_id=step_id, # Use original step_id for telemetry
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
)
|
1168
|
+
organization_id=self.actor.organization_id,
|
1169
|
+
),
|
1170
|
+
)
|
1171
|
+
step_progression = StepProgression.LOGGED_TRACE
|
1117
1172
|
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
elif step_progression <= StepProgression.LOGGED_TRACE:
|
1125
|
-
if stop_reason is None:
|
1126
|
-
self.logger.error("Error in step after logging step")
|
1127
|
-
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
1128
|
-
if logged_step:
|
1129
|
-
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
1130
|
-
else:
|
1131
|
-
self.logger.error("Invalid StepProgression value")
|
1173
|
+
# yields tool response as this is handled from Letta and not the response from the LLM provider
|
1174
|
+
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
1175
|
+
if not (use_assistant_message and tool_return.name == "send_message"):
|
1176
|
+
# Apply message type filtering if specified
|
1177
|
+
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
1178
|
+
yield f"data: {tool_return.model_dump_json()}\n\n"
|
1132
1179
|
|
1133
|
-
#
|
1134
|
-
|
1135
|
-
|
1180
|
+
# TODO (cliandy): consolidate and expand with trace
|
1181
|
+
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
1182
|
+
step_progression = StepProgression.FINISHED
|
1136
1183
|
|
1137
|
-
# Record
|
1138
|
-
if logged_step and step_metrics
|
1184
|
+
# Record step metrics for successful completion
|
1185
|
+
if logged_step and step_metrics:
|
1139
1186
|
try:
|
1140
|
-
#
|
1141
|
-
step_metrics.step_ns =
|
1187
|
+
# Set the step_ns that was already calculated
|
1188
|
+
step_metrics.step_ns = step_ns
|
1142
1189
|
|
1143
1190
|
# Get context attributes for project and template IDs
|
1144
1191
|
ctx_attrs = get_ctx_attributes()
|
@@ -1148,16 +1195,109 @@ class LettaAgent(BaseAgent):
|
|
1148
1195
|
agent_state=agent_state,
|
1149
1196
|
step_metrics=step_metrics,
|
1150
1197
|
ctx_attrs=ctx_attrs,
|
1151
|
-
job_id=
|
1198
|
+
job_id=self.current_run_id,
|
1152
1199
|
)
|
1153
1200
|
except Exception as metrics_error:
|
1154
1201
|
self.logger.warning(f"Failed to record step metrics: {metrics_error}")
|
1155
1202
|
|
1156
1203
|
except Exception as e:
|
1157
|
-
|
1204
|
+
# Handle any unexpected errors during step processing
|
1205
|
+
self.logger.error(f"Error during step processing: {e}")
|
1206
|
+
job_update_metadata = {"error": str(e)}
|
1207
|
+
|
1208
|
+
# This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
|
1209
|
+
if not stop_reason:
|
1210
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
1211
|
+
elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
|
1212
|
+
self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
|
1213
|
+
elif stop_reason.stop_reason not in (
|
1214
|
+
StopReasonType.no_tool_call,
|
1215
|
+
StopReasonType.invalid_tool_call,
|
1216
|
+
StopReasonType.invalid_llm_response,
|
1217
|
+
):
|
1218
|
+
self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
|
1219
|
+
|
1220
|
+
# Send error stop reason to client and re-raise with expected response code
|
1221
|
+
yield f"data: {stop_reason.model_dump_json()}\n\n", 500
|
1222
|
+
raise
|
1158
1223
|
|
1159
|
-
|
1160
|
-
|
1224
|
+
# Update step if it needs to be updated
|
1225
|
+
finally:
|
1226
|
+
if step_progression == StepProgression.FINISHED and should_continue:
|
1227
|
+
continue
|
1228
|
+
|
1229
|
+
self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
|
1230
|
+
self.logger.info("Running final update. Step Progression: %s", step_progression)
|
1231
|
+
try:
|
1232
|
+
if step_progression == StepProgression.FINISHED and not should_continue:
|
1233
|
+
# Successfully completed - update with final usage and stop reason
|
1234
|
+
if stop_reason is None:
|
1235
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
|
1236
|
+
# Note: step already updated with success status after _handle_ai_response
|
1237
|
+
if logged_step:
|
1238
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
1239
|
+
break
|
1240
|
+
|
1241
|
+
# Handle error cases
|
1242
|
+
if step_progression < StepProgression.STEP_LOGGED:
|
1243
|
+
# Error occurred before step was fully logged
|
1244
|
+
import traceback
|
1245
|
+
|
1246
|
+
if logged_step:
|
1247
|
+
await self.step_manager.update_step_error_async(
|
1248
|
+
actor=self.actor,
|
1249
|
+
step_id=step_id, # Use original step_id for telemetry
|
1250
|
+
error_type=type(e).__name__ if "e" in locals() else "Unknown",
|
1251
|
+
error_message=str(e) if "e" in locals() else "Unknown error",
|
1252
|
+
error_traceback=traceback.format_exc(),
|
1253
|
+
stop_reason=stop_reason,
|
1254
|
+
)
|
1255
|
+
|
1256
|
+
if step_progression <= StepProgression.STREAM_RECEIVED:
|
1257
|
+
if first_chunk and settings.track_errored_messages and initial_messages:
|
1258
|
+
for message in initial_messages:
|
1259
|
+
message.is_err = True
|
1260
|
+
message.step_id = effective_step_id
|
1261
|
+
await self.message_manager.create_many_messages_async(
|
1262
|
+
initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
|
1263
|
+
)
|
1264
|
+
elif step_progression <= StepProgression.LOGGED_TRACE:
|
1265
|
+
if stop_reason is None:
|
1266
|
+
self.logger.error("Error in step after logging step")
|
1267
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
1268
|
+
if logged_step:
|
1269
|
+
await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
|
1270
|
+
else:
|
1271
|
+
self.logger.error("Invalid StepProgression value")
|
1272
|
+
|
1273
|
+
# Do tracking for failure cases. Can consolidate with success conditions later.
|
1274
|
+
if settings.track_stop_reason:
|
1275
|
+
await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
|
1276
|
+
|
1277
|
+
# Record partial step metrics on failure (capture whatever timing data we have)
|
1278
|
+
if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
|
1279
|
+
try:
|
1280
|
+
# Calculate total step time up to the failure point
|
1281
|
+
step_metrics.step_ns = get_utc_timestamp_ns() - step_start
|
1282
|
+
|
1283
|
+
# Get context attributes for project and template IDs
|
1284
|
+
ctx_attrs = get_ctx_attributes()
|
1285
|
+
|
1286
|
+
await self._record_step_metrics(
|
1287
|
+
step_id=step_id,
|
1288
|
+
agent_state=agent_state,
|
1289
|
+
step_metrics=step_metrics,
|
1290
|
+
ctx_attrs=ctx_attrs,
|
1291
|
+
job_id=locals().get("run_id", self.current_run_id),
|
1292
|
+
)
|
1293
|
+
except Exception as metrics_error:
|
1294
|
+
self.logger.warning(f"Failed to record step metrics: {metrics_error}")
|
1295
|
+
|
1296
|
+
except Exception as e:
|
1297
|
+
self.logger.error("Failed to update step: %s", e)
|
1298
|
+
|
1299
|
+
if not should_continue:
|
1300
|
+
break
|
1161
1301
|
# Extend the in context message ids
|
1162
1302
|
if not agent_state.message_buffer_autoclear:
|
1163
1303
|
await self._rebuild_context_window(
|
@@ -1494,14 +1634,46 @@ class LettaAgent(BaseAgent):
|
|
1494
1634
|
is_final_step: bool | None = None,
|
1495
1635
|
run_id: str | None = None,
|
1496
1636
|
step_metrics: StepMetrics = None,
|
1637
|
+
is_approval: bool | None = None,
|
1638
|
+
is_denial: bool | None = None,
|
1639
|
+
denial_reason: str | None = None,
|
1497
1640
|
) -> tuple[list[Message], bool, LettaStopReason | None]:
|
1498
1641
|
"""
|
1499
1642
|
Handle the final AI response once streaming completes, execute / validate the
|
1500
1643
|
tool call, decide whether we should keep stepping, and persist state.
|
1501
1644
|
"""
|
1645
|
+
tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
|
1646
|
+
|
1647
|
+
if is_denial:
|
1648
|
+
continue_stepping = True
|
1649
|
+
stop_reason = None
|
1650
|
+
tool_call_messages = create_letta_messages_from_llm_response(
|
1651
|
+
agent_id=agent_state.id,
|
1652
|
+
model=agent_state.llm_config.model,
|
1653
|
+
function_name="",
|
1654
|
+
function_arguments={},
|
1655
|
+
tool_execution_result=ToolExecutionResult(status="error"),
|
1656
|
+
tool_call_id=tool_call_id,
|
1657
|
+
function_call_success=False,
|
1658
|
+
function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
|
1659
|
+
timezone=agent_state.timezone,
|
1660
|
+
actor=self.actor,
|
1661
|
+
continue_stepping=continue_stepping,
|
1662
|
+
heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
|
1663
|
+
reasoning_content=None,
|
1664
|
+
pre_computed_assistant_message_id=None,
|
1665
|
+
step_id=step_id,
|
1666
|
+
is_approval_response=True,
|
1667
|
+
)
|
1668
|
+
messages_to_persist = (initial_messages or []) + tool_call_messages
|
1669
|
+
persisted_messages = await self.message_manager.create_many_messages_async(
|
1670
|
+
messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
|
1671
|
+
)
|
1672
|
+
return persisted_messages, continue_stepping, stop_reason
|
1673
|
+
|
1502
1674
|
# 1. Parse and validate the tool-call envelope
|
1503
1675
|
tool_call_name: str = tool_call.function.name
|
1504
|
-
|
1676
|
+
|
1505
1677
|
tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
|
1506
1678
|
request_heartbeat: bool = _pop_heartbeat(tool_args)
|
1507
1679
|
tool_args.pop(INNER_THOUGHTS_KWARG, None)
|
@@ -1515,77 +1687,99 @@ class LettaAgent(BaseAgent):
|
|
1515
1687
|
request_heartbeat=request_heartbeat,
|
1516
1688
|
)
|
1517
1689
|
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
agent_step_span=agent_step_span,
|
1690
|
+
if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
|
1691
|
+
approval_message = create_approval_request_message_from_llm_response(
|
1692
|
+
agent_id=agent_state.id,
|
1693
|
+
model=agent_state.llm_config.model,
|
1694
|
+
function_name=tool_call_name,
|
1695
|
+
function_arguments=tool_args,
|
1696
|
+
tool_call_id=tool_call_id,
|
1697
|
+
actor=self.actor,
|
1698
|
+
continue_stepping=request_heartbeat,
|
1699
|
+
reasoning_content=reasoning_content,
|
1700
|
+
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
|
1530
1701
|
step_id=step_id,
|
1531
1702
|
)
|
1532
|
-
|
1703
|
+
messages_to_persist = (initial_messages or []) + [approval_message]
|
1704
|
+
continue_stepping = False
|
1705
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
|
1706
|
+
else:
|
1707
|
+
# 2. Execute the tool (or synthesize an error result if disallowed)
|
1708
|
+
tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
|
1709
|
+
if tool_rule_violated:
|
1710
|
+
tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
|
1711
|
+
else:
|
1712
|
+
# Track tool execution time
|
1713
|
+
tool_start_time = get_utc_timestamp_ns()
|
1714
|
+
tool_execution_result = await self._execute_tool(
|
1715
|
+
tool_name=tool_call_name,
|
1716
|
+
tool_args=tool_args,
|
1717
|
+
agent_state=agent_state,
|
1718
|
+
agent_step_span=agent_step_span,
|
1719
|
+
step_id=step_id,
|
1720
|
+
)
|
1721
|
+
tool_end_time = get_utc_timestamp_ns()
|
1533
1722
|
|
1534
|
-
|
1535
|
-
|
1723
|
+
# Store tool execution time in metrics
|
1724
|
+
step_metrics.tool_execution_ns = tool_end_time - tool_start_time
|
1536
1725
|
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1726
|
+
log_telemetry(
|
1727
|
+
self.logger,
|
1728
|
+
"_handle_ai_response execute tool finish",
|
1729
|
+
tool_execution_result=tool_execution_result,
|
1730
|
+
tool_call_id=tool_call_id,
|
1731
|
+
)
|
1540
1732
|
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1733
|
+
# 3. Prepare the function-response payload
|
1734
|
+
truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
|
1735
|
+
return_char_limit = next(
|
1736
|
+
(t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
|
1737
|
+
None,
|
1738
|
+
)
|
1739
|
+
function_response_string = validate_function_response(
|
1740
|
+
tool_execution_result.func_return,
|
1741
|
+
return_char_limit=return_char_limit,
|
1742
|
+
truncate=truncate,
|
1743
|
+
)
|
1744
|
+
self.last_function_response = package_function_response(
|
1745
|
+
was_success=tool_execution_result.success_flag,
|
1746
|
+
response_string=function_response_string,
|
1747
|
+
timezone=agent_state.timezone,
|
1748
|
+
)
|
1557
1749
|
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1750
|
+
# 4. Decide whether to keep stepping (focal section simplified)
|
1751
|
+
continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
|
1752
|
+
agent_state=agent_state,
|
1753
|
+
request_heartbeat=request_heartbeat,
|
1754
|
+
tool_call_name=tool_call_name,
|
1755
|
+
tool_rule_violated=tool_rule_violated,
|
1756
|
+
tool_rules_solver=tool_rules_solver,
|
1757
|
+
is_final_step=is_final_step,
|
1758
|
+
)
|
1567
1759
|
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
|
1573
|
-
|
1574
|
-
|
1575
|
-
|
1576
|
-
|
1577
|
-
|
1578
|
-
|
1579
|
-
|
1580
|
-
|
1581
|
-
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1760
|
+
# 5. Create messages (step was already created at the beginning)
|
1761
|
+
tool_call_messages = create_letta_messages_from_llm_response(
|
1762
|
+
agent_id=agent_state.id,
|
1763
|
+
model=agent_state.llm_config.model,
|
1764
|
+
function_name=tool_call_name,
|
1765
|
+
function_arguments=tool_args,
|
1766
|
+
tool_execution_result=tool_execution_result,
|
1767
|
+
tool_call_id=tool_call_id,
|
1768
|
+
function_call_success=tool_execution_result.success_flag,
|
1769
|
+
function_response=function_response_string,
|
1770
|
+
timezone=agent_state.timezone,
|
1771
|
+
actor=self.actor,
|
1772
|
+
continue_stepping=continue_stepping,
|
1773
|
+
heartbeat_reason=heartbeat_reason,
|
1774
|
+
reasoning_content=reasoning_content,
|
1775
|
+
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
|
1776
|
+
step_id=step_id,
|
1777
|
+
is_approval_response=is_approval or is_denial,
|
1778
|
+
)
|
1779
|
+
messages_to_persist = (initial_messages or []) + tool_call_messages
|
1586
1780
|
|
1587
1781
|
persisted_messages = await self.message_manager.create_many_messages_async(
|
1588
|
-
|
1782
|
+
messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
|
1589
1783
|
)
|
1590
1784
|
|
1591
1785
|
if run_id:
|
@@ -1606,7 +1800,6 @@ class LettaAgent(BaseAgent):
|
|
1606
1800
|
tool_rules_solver: ToolRulesSolver,
|
1607
1801
|
is_final_step: bool | None,
|
1608
1802
|
) -> tuple[bool, str | None, LettaStopReason | None]:
|
1609
|
-
|
1610
1803
|
continue_stepping = request_heartbeat
|
1611
1804
|
heartbeat_reason: str | None = None
|
1612
1805
|
stop_reason: LettaStopReason | None = None
|
@@ -1638,9 +1831,7 @@ class LettaAgent(BaseAgent):
|
|
1638
1831
|
uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
|
1639
1832
|
if not continue_stepping and uncalled:
|
1640
1833
|
continue_stepping = True
|
1641
|
-
heartbeat_reason = (
|
1642
|
-
f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [" f"{', '.join(uncalled)}] to be called still."
|
1643
|
-
)
|
1834
|
+
heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [{', '.join(uncalled)}] to be called still."
|
1644
1835
|
|
1645
1836
|
stop_reason = None # reset – we’re still going
|
1646
1837
|
|